aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--Makefile22
-rw-r--r--README2
-rw-r--r--doc/contact.html8
-rw-r--r--doc/ext_buffer.html689
-rw-r--r--doc/ext_c_api.html8
-rw-r--r--doc/ext_ffi.html8
-rw-r--r--doc/ext_ffi_api.html18
-rw-r--r--doc/ext_ffi_semantics.html48
-rw-r--r--doc/ext_ffi_tutorial.html8
-rw-r--r--doc/ext_jit.html10
-rw-r--r--doc/ext_profiler.html359
-rw-r--r--doc/extensions.html169
-rw-r--r--doc/install.html159
-rw-r--r--doc/luajit.html14
-rw-r--r--doc/running.html24
-rw-r--r--dynasm/dasm_arm.h22
-rw-r--r--dynasm/dasm_arm.lua6
-rw-r--r--dynasm/dasm_arm64.h558
-rw-r--r--dynasm/dasm_arm64.lua1226
-rw-r--r--dynasm/dasm_mips.h52
-rw-r--r--dynasm/dasm_mips.lua684
-rw-r--r--dynasm/dasm_mips64.lua12
-rw-r--r--dynasm/dasm_ppc.h35
-rw-r--r--dynasm/dasm_ppc.lua702
-rw-r--r--dynasm/dasm_proto.h4
-rw-r--r--dynasm/dasm_x86.h79
-rw-r--r--dynasm/dasm_x86.lua660
-rw-r--r--dynasm/dynasm.lua9
-rw-r--r--etc/luajit.pc2
-rw-r--r--src/.gitignore2
-rw-r--r--src/Makefile116
-rw-r--r--src/Makefile.dep264
-rw-r--r--src/host/buildvm.c36
-rw-r--r--src/host/buildvm.h1
-rw-r--r--src/host/buildvm_asm.c67
-rw-r--r--src/host/buildvm_fold.c7
-rw-r--r--src/host/buildvm_lib.c63
-rw-r--r--src/host/buildvm_libbc.h81
-rw-r--r--src/host/buildvm_peobj.c151
-rw-r--r--src/host/genlibbc.lua234
-rw-r--r--src/jit/bc.lua17
-rw-r--r--src/jit/bcsave.lua306
-rw-r--r--src/jit/dis_arm.lua18
-rw-r--r--src/jit/dis_arm64.lua1227
-rw-r--r--src/jit/dis_arm64be.lua12
-rw-r--r--src/jit/dis_mips.lua372
-rw-r--r--src/jit/dis_mips64.lua17
-rw-r--r--src/jit/dis_mips64el.lua17
-rw-r--r--src/jit/dis_mips64r6.lua17
-rw-r--r--src/jit/dis_mips64r6el.lua17
-rw-r--r--src/jit/dis_mipsel.lua15
-rw-r--r--src/jit/dis_ppc.lua18
-rw-r--r--src/jit/dis_x64.lua15
-rw-r--r--src/jit/dis_x86.lua297
-rw-r--r--src/jit/dump.lua83
-rw-r--r--src/jit/p.lua309
-rw-r--r--src/jit/v.lua24
-rw-r--r--src/jit/zone.lua45
-rw-r--r--src/lauxlib.h34
-rw-r--r--src/lib_aux.c117
-rw-r--r--src/lib_base.c167
-rw-r--r--src/lib_bit.c135
-rw-r--r--src/lib_buffer.c360
-rw-r--r--src/lib_debug.c14
-rw-r--r--src/lib_ffi.c106
-rw-r--r--src/lib_io.c57
-rw-r--r--src/lib_jit.c262
-rw-r--r--src/lib_math.c96
-rw-r--r--src/lib_os.c39
-rw-r--r--src/lib_package.c73
-rw-r--r--src/lib_string.c445
-rw-r--r--src/lib_table.c187
-rw-r--r--src/lj.supp41
-rw-r--r--src/lj_alloc.c275
-rw-r--r--src/lj_alloc.h3
-rw-r--r--src/lj_api.c402
-rw-r--r--src/lj_arch.h438
-rw-r--r--src/lj_asm.c1069
-rw-r--r--src/lj_asm_arm.h699
-rw-r--r--src/lj_asm_arm64.h2075
-rw-r--r--src/lj_asm_mips.h1817
-rw-r--r--src/lj_asm_ppc.h950
-rw-r--r--src/lj_asm_x86.h1384
-rw-r--r--src/lj_assert.c28
-rw-r--r--src/lj_bc.h4
-rw-r--r--src/lj_bcdump.h10
-rw-r--r--src/lj_bcread.c165
-rw-r--r--src/lj_bcwrite.c351
-rw-r--r--src/lj_buf.c303
-rw-r--r--src/lj_buf.h198
-rw-r--r--src/lj_carith.c81
-rw-r--r--src/lj_carith.h10
-rw-r--r--src/lj_ccall.c465
-rw-r--r--src/lj_ccall.h62
-rw-r--r--src/lj_ccallback.c280
-rw-r--r--src/lj_cconv.c66
-rw-r--r--src/lj_cconv.h5
-rw-r--r--src/lj_cdata.c67
-rw-r--r--src/lj_cdata.h14
-rw-r--r--src/lj_clib.c47
-rw-r--r--src/lj_cparse.c178
-rw-r--r--src/lj_cparse.h2
-rw-r--r--src/lj_crecord.c426
-rw-r--r--src/lj_crecord.h12
-rw-r--r--src/lj_ctype.c42
-rw-r--r--src/lj_ctype.h35
-rw-r--r--src/lj_debug.c204
-rw-r--r--src/lj_debug.h8
-rw-r--r--src/lj_def.h83
-rw-r--r--src/lj_dispatch.c115
-rw-r--r--src/lj_dispatch.h49
-rw-r--r--src/lj_emit_arm.h71
-rw-r--r--src/lj_emit_arm64.h473
-rw-r--r--src/lj_emit_mips.h161
-rw-r--r--src/lj_emit_ppc.h34
-rw-r--r--src/lj_emit_x86.h200
-rw-r--r--src/lj_err.c736
-rw-r--r--src/lj_err.h19
-rw-r--r--src/lj_errmsg.h24
-rw-r--r--src/lj_ffrecord.c1042
-rw-r--r--src/lj_frame.h160
-rw-r--r--src/lj_func.c18
-rw-r--r--src/lj_gc.c239
-rw-r--r--src/lj_gc.h16
-rw-r--r--src/lj_gdbjit.c55
-rw-r--r--src/lj_ir.c175
-rw-r--r--src/lj_ir.h123
-rw-r--r--src/lj_ircall.h240
-rw-r--r--src/lj_iropt.h23
-rw-r--r--src/lj_jit.h244
-rw-r--r--src/lj_lex.c392
-rw-r--r--src/lj_lex.h24
-rw-r--r--src/lj_lib.c149
-rw-r--r--src/lj_lib.h41
-rw-r--r--src/lj_load.c35
-rw-r--r--src/lj_mcode.c88
-rw-r--r--src/lj_meta.c134
-rw-r--r--src/lj_meta.h1
-rw-r--r--src/lj_obj.c18
-rw-r--r--src/lj_obj.h322
-rw-r--r--src/lj_opt_fold.c682
-rw-r--r--src/lj_opt_loop.c45
-rw-r--r--src/lj_opt_mem.c207
-rw-r--r--src/lj_opt_narrow.c66
-rw-r--r--src/lj_opt_sink.c14
-rw-r--r--src/lj_opt_split.c198
-rw-r--r--src/lj_parse.c315
-rw-r--r--src/lj_prng.c259
-rw-r--r--src/lj_prng.h24
-rw-r--r--src/lj_profile.c371
-rw-r--r--src/lj_profile.h21
-rw-r--r--src/lj_record.c1016
-rw-r--r--src/lj_record.h3
-rw-r--r--src/lj_serialize.c539
-rw-r--r--src/lj_serialize.h28
-rw-r--r--src/lj_snap.c279
-rw-r--r--src/lj_snap.h3
-rw-r--r--src/lj_state.c138
-rw-r--r--src/lj_state.h5
-rw-r--r--src/lj_str.c519
-rw-r--r--src/lj_str.h39
-rw-r--r--src/lj_strfmt.c606
-rw-r--r--src/lj_strfmt.h131
-rw-r--r--src/lj_strfmt_num.c593
-rw-r--r--src/lj_strscan.c80
-rw-r--r--src/lj_strscan.h3
-rw-r--r--src/lj_tab.c229
-rw-r--r--src/lj_tab.h37
-rw-r--r--src/lj_target.h20
-rw-r--r--src/lj_target_arm.h5
-rw-r--r--src/lj_target_arm64.h346
-rw-r--r--src/lj_target_mips.h195
-rw-r--r--src/lj_target_ppc.h2
-rw-r--r--src/lj_target_x86.h45
-rw-r--r--src/lj_trace.c293
-rw-r--r--src/lj_trace.h5
-rw-r--r--src/lj_traceerr.h6
-rw-r--r--src/lj_udata.c28
-rw-r--r--src/lj_udata.h3
-rw-r--r--src/lj_vm.h42
-rw-r--r--src/lj_vmevent.c1
-rw-r--r--src/lj_vmevent.h7
-rw-r--r--src/lj_vmmath.c110
-rw-r--r--src/ljamalg.c18
-rw-r--r--src/lua.h11
-rw-r--r--src/luaconf.h15
-rw-r--r--src/luajit.c133
-rw-r--r--src/luajit_rolling.h15
-rw-r--r--src/lualib.h1
-rw-r--r--src/msvcbuild.bat72
-rw-r--r--src/nxbuild.bat165
-rw-r--r--src/ps4build.bat37
-rw-r--r--src/ps5build.bat126
-rw-r--r--src/psvitabuild.bat2
-rw-r--r--src/vm_arm.dasc489
-rw-r--r--src/vm_arm64.dasc4222
-rw-r--r--src/vm_mips.dasc2697
-rw-r--r--src/vm_mips64.dasc5565
-rw-r--r--src/vm_ppc.dasc1704
-rw-r--r--src/vm_ppcspe.dasc3699
-rw-r--r--src/vm_x64.dasc4951
-rw-r--r--src/vm_x86.dasc1809
-rw-r--r--src/xb1build.bat104
-rw-r--r--src/xedkbuild.bat2
204 files changed, 48463 insertions, 14189 deletions
diff --git a/Makefile b/Makefile
index 792d7e56..6b67f54d 100644
--- a/Makefile
+++ b/Makefile
@@ -14,7 +14,7 @@
14############################################################################## 14##############################################################################
15 15
16MAJVER= 2 16MAJVER= 2
17MINVER= 0 17MINVER= 1
18ABIVER= 5.1 18ABIVER= 5.1
19 19
20# LuaJIT uses rolling releases. The release version is based on the time of 20# LuaJIT uses rolling releases. The release version is based on the time of
@@ -37,12 +37,13 @@ export MULTILIB= lib
37DPREFIX= $(DESTDIR)$(PREFIX) 37DPREFIX= $(DESTDIR)$(PREFIX)
38INSTALL_BIN= $(DPREFIX)/bin 38INSTALL_BIN= $(DPREFIX)/bin
39INSTALL_LIB= $(DPREFIX)/$(MULTILIB) 39INSTALL_LIB= $(DPREFIX)/$(MULTILIB)
40INSTALL_SHARE= $(DPREFIX)/share 40INSTALL_SHARE_= $(PREFIX)/share
41INSTALL_SHARE= $(DESTDIR)$(INSTALL_SHARE_)
41INSTALL_DEFINC= $(DPREFIX)/include/luajit-$(MMVERSION) 42INSTALL_DEFINC= $(DPREFIX)/include/luajit-$(MMVERSION)
42INSTALL_INC= $(INSTALL_DEFINC) 43INSTALL_INC= $(INSTALL_DEFINC)
43 44
44INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit-$(MMVERSION) 45export INSTALL_LJLIBD= $(INSTALL_SHARE_)/luajit-$(MMVERSION)
45INSTALL_JITLIB= $(INSTALL_LJLIBD)/jit 46INSTALL_JITLIB= $(DESTDIR)$(INSTALL_LJLIBD)/jit
46INSTALL_LMODD= $(INSTALL_SHARE)/lua 47INSTALL_LMODD= $(INSTALL_SHARE)/lua
47INSTALL_LMOD= $(INSTALL_LMODD)/$(ABIVER) 48INSTALL_LMOD= $(INSTALL_LMODD)/$(ABIVER)
48INSTALL_CMODD= $(INSTALL_LIB)/lua 49INSTALL_CMODD= $(INSTALL_LIB)/lua
@@ -71,7 +72,7 @@ INSTALL_PC= $(INSTALL_PKGCONFIG)/$(INSTALL_PCNAME)
71 72
72INSTALL_DIRS= $(INSTALL_BIN) $(INSTALL_LIB) $(INSTALL_INC) $(INSTALL_MAN) \ 73INSTALL_DIRS= $(INSTALL_BIN) $(INSTALL_LIB) $(INSTALL_INC) $(INSTALL_MAN) \
73 $(INSTALL_PKGCONFIG) $(INSTALL_JITLIB) $(INSTALL_LMOD) $(INSTALL_CMOD) 74 $(INSTALL_PKGCONFIG) $(INSTALL_JITLIB) $(INSTALL_LMOD) $(INSTALL_CMOD)
74UNINSTALL_DIRS= $(INSTALL_JITLIB) $(INSTALL_LJLIBD) $(INSTALL_INC) \ 75UNINSTALL_DIRS= $(INSTALL_JITLIB) $(DESTDIR)$(INSTALL_LJLIBD) $(INSTALL_INC) \
75 $(INSTALL_LMOD) $(INSTALL_LMODD) $(INSTALL_CMOD) $(INSTALL_CMODD) 76 $(INSTALL_LMOD) $(INSTALL_LMODD) $(INSTALL_CMOD) $(INSTALL_CMODD)
76 77
77RM= rm -f 78RM= rm -f
@@ -95,8 +96,12 @@ FILE_SO= libluajit.so
95FILE_MAN= luajit.1 96FILE_MAN= luajit.1
96FILE_PC= luajit.pc 97FILE_PC= luajit.pc
97FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h 98FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
98FILES_JITLIB= bc.lua v.lua dump.lua dis_x86.lua dis_x64.lua dis_arm.lua \ 99FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
99 dis_ppc.lua dis_mips.lua dis_mipsel.lua bcsave.lua vmdef.lua 100 dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
101 dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
102 dis_mips64.lua dis_mips64el.lua \
103 dis_mips64r6.lua dis_mips64r6el.lua \
104 vmdef.lua
100 105
101ifeq (,$(findstring Windows,$(OS))) 106ifeq (,$(findstring Windows,$(OS)))
102 HOST_SYS:= $(shell uname -s) 107 HOST_SYS:= $(shell uname -s)
@@ -110,6 +115,7 @@ ifeq (Darwin,$(TARGET_SYS))
110 INSTALL_SOSHORT1= $(INSTALL_DYLIBSHORT1) 115 INSTALL_SOSHORT1= $(INSTALL_DYLIBSHORT1)
111 INSTALL_SOSHORT2= $(INSTALL_DYLIBSHORT2) 116 INSTALL_SOSHORT2= $(INSTALL_DYLIBSHORT2)
112 LDCONFIG= : 117 LDCONFIG= :
118 SED_PC+= -e "s| -Wl,-E||"
113endif 119endif
114 120
115############################################################################## 121##############################################################################
@@ -126,7 +132,7 @@ install: $(INSTALL_DEP)
126 $(MKDIR) $(INSTALL_DIRS) 132 $(MKDIR) $(INSTALL_DIRS)
127 cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T) 133 cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T)
128 cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || : 134 cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || :
129 $(RM) $(INSTALL_TSYM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) 135 $(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2)
130 cd src && test -f $(FILE_SO) && \ 136 cd src && test -f $(FILE_SO) && \
131 $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \ 137 $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \
132 ( $(LDCONFIG) $(INSTALL_LIB) || : ) && \ 138 ( $(LDCONFIG) $(INSTALL_LIB) || : ) && \
diff --git a/README b/README
index dfa7ca94..e4a69265 100644
--- a/README
+++ b/README
@@ -1,4 +1,4 @@
1README for LuaJIT 2.0 1README for LuaJIT 2.1
2--------------------- 2---------------------
3 3
4LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language. 4LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
diff --git a/doc/contact.html b/doc/contact.html
index c32bc9dc..cc4d8c72 100644
--- a/doc/contact.html
+++ b/doc/contact.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>Contact</title> 4<title>Contact</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2023"> 6<meta name="Copyright" content="Copyright (C) 2005-2023">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -37,9 +37,13 @@
37<a href="ext_ffi_semantics.html">FFI Semantics</a> 37<a href="ext_ffi_semantics.html">FFI Semantics</a>
38</li></ul> 38</li></ul>
39</li><li> 39</li><li>
40<a href="ext_buffer.html">String Buffers</a>
41</li><li>
40<a href="ext_jit.html">jit.* Library</a> 42<a href="ext_jit.html">jit.* Library</a>
41</li><li> 43</li><li>
42<a href="ext_c_api.html">Lua/C API</a> 44<a href="ext_c_api.html">Lua/C API</a>
45</li><li>
46<a href="ext_profiler.html">Profiler</a>
43</li></ul> 47</li></ul>
44</li><li> 48</li><li>
45<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a> 49<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
diff --git a/doc/ext_buffer.html b/doc/ext_buffer.html
new file mode 100644
index 00000000..54bb66f6
--- /dev/null
+++ b/doc/ext_buffer.html
@@ -0,0 +1,689 @@
1<!DOCTYPE html>
2<html>
3<head>
4<title>String Buffer Library</title>
5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2023">
7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
9<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
10<style type="text/css">
11.lib {
12 vertical-align: middle;
13 margin-left: 5px;
14 padding: 0 5px;
15 font-size: 60%;
16 border-radius: 5px;
17 background: #c5d5ff;
18 color: #000;
19}
20</style>
21</head>
22<body>
23<div id="site">
24<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
25</div>
26<div id="head">
27<h1>String Buffer Library</h1>
28</div>
29<div id="nav">
30<ul><li>
31<a href="luajit.html">LuaJIT</a>
32<ul><li>
33<a href="https://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
34</li><li>
35<a href="install.html">Installation</a>
36</li><li>
37<a href="running.html">Running</a>
38</li></ul>
39</li><li>
40<a href="extensions.html">Extensions</a>
41<ul><li>
42<a href="ext_ffi.html">FFI Library</a>
43<ul><li>
44<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
45</li><li>
46<a href="ext_ffi_api.html">ffi.* API</a>
47</li><li>
48<a href="ext_ffi_semantics.html">FFI Semantics</a>
49</li></ul>
50</li><li>
51<a class="current" href="ext_buffer.html">String Buffers</a>
52</li><li>
53<a href="ext_jit.html">jit.* Library</a>
54</li><li>
55<a href="ext_c_api.html">Lua/C API</a>
56</li><li>
57<a href="ext_profiler.html">Profiler</a>
58</li></ul>
59</li><li>
60<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
61</li><li>
62<a href="https://luajit.org/faq.html">FAQ <span class="ext">&raquo;</span></a>
63</li><li>
64<a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
65</li></ul>
66</div>
67<div id="main">
68<p>
69The string buffer library allows <b>high-performance manipulation of
70string-like data</b>.
71</p>
72<p>
73Unlike Lua strings, which are constants, string buffers are
74<b>mutable</b> sequences of 8-bit (binary-transparent) characters. Data
75can be stored, formatted and encoded into a string buffer and later
76converted, extracted or decoded.
77</p>
78<p>
79The convenient string buffer API simplifies common string manipulation
80tasks, that would otherwise require creating many intermediate strings.
81String buffers improve performance by eliminating redundant memory
82copies, object creation, string interning and garbage collection
83overhead. In conjunction with the FFI library, they allow zero-copy
84operations.
85</p>
86<p>
87The string buffer library also includes a high-performance
88<a href="#serialize">serializer</a> for Lua objects.
89</p>
90
91<h2 id="use">Using the String Buffer Library</h2>
92<p>
93The string buffer library is built into LuaJIT by default, but it's not
94loaded by default. Add this to the start of every Lua file that needs
95one of its functions:
96</p>
97<pre class="code">
98local buffer = require("string.buffer")
99</pre>
100<p>
101The convention for the syntax shown on this page is that <tt>buffer</tt>
102refers to the buffer library and <tt>buf</tt> refers to an individual
103buffer object.
104</p>
105<p>
106Please note the difference between a Lua function call, e.g.
107<tt>buffer.new()</tt> (with a dot) and a Lua method call, e.g.
108<tt>buf:reset()</tt> (with a colon).
109</p>
110
111<h3 id="buffer_object">Buffer Objects</h3>
112<p>
113A buffer object is a garbage-collected Lua object. After creation with
114<tt>buffer.new()</tt>, it can (and should) be reused for many operations.
115When the last reference to a buffer object is gone, it will eventually
116be freed by the garbage collector, along with the allocated buffer
117space.
118</p>
119<p>
120Buffers operate like a FIFO (first-in first-out) data structure. Data
121can be appended (written) to the end of the buffer and consumed (read)
122from the front of the buffer. These operations may be freely mixed.
123</p>
124<p>
125The buffer space that holds the characters is managed automatically
126&mdash; it grows as needed and already consumed space is recycled. Use
127<tt>buffer.new(size)</tt> and <tt>buf:free()</tt>, if you need more
128control.
129</p>
130<p>
131The maximum size of a single buffer is the same as the maximum size of a
132Lua string, which is slightly below two gigabytes. For huge data sizes,
133neither strings nor buffers are the right data structure &mdash; use the
134FFI library to directly map memory or files up to the virtual memory
135limit of your OS.
136</p>
137
138<h3 id="buffer_overview">Buffer Method Overview</h3>
139<ul>
140<li>
141The <tt>buf:put*()</tt>-like methods append (write) characters to the
142end of the buffer.
143</li>
144<li>
145The <tt>buf:get*()</tt>-like methods consume (read) characters from the
146front of the buffer.
147</li>
148<li>
149Other methods, like <tt>buf:tostring()</tt> only read the buffer
150contents, but don't change the buffer.
151</li>
152<li>
153The <tt>buf:set()</tt> method allows zero-copy consumption of a string
154or an FFI cdata object as a buffer.
155</li>
156<li>
157The FFI-specific methods allow zero-copy read/write-style operations or
158modifying the buffer contents in-place. Please check the
159<a href="#ffi_caveats">FFI caveats</a> below, too.
160</li>
161<li>
162Methods that don't need to return anything specific, return the buffer
163object itself as a convenience. This allows method chaining, e.g.:
164<tt>buf:reset():encode(obj)</tt> or <tt>buf:skip(len):get()</tt>
165</li>
166</ul>
167
168<h2 id="create">Buffer Creation and Management</h2>
169
170<h3 id="buffer_new"><tt>local buf = buffer.new([size [,options]])<br>
171local buf = buffer.new([options])</tt></h3>
172<p>
173Creates a new buffer object.
174</p>
175<p>
176The optional <tt>size</tt> argument ensures a minimum initial buffer
177size. This is strictly an optimization when the required buffer size is
178known beforehand. The buffer space will grow as needed, in any case.
179</p>
180<p>
181The optional table <tt>options</tt> sets various
182<a href="#serialize_options">serialization options</a>.
183</p>
184
185<h3 id="buffer_reset"><tt>buf = buf:reset()</tt></h3>
186<p>
187Reset (empty) the buffer. The allocated buffer space is not freed and
188may be reused.
189</p>
190
191<h3 id="buffer_free"><tt>buf = buf:free()</tt></h3>
192<p>
193The buffer space of the buffer object is freed. The object itself
194remains intact, empty and may be reused.
195</p>
196<p>
197Note: you normally don't need to use this method. The garbage collector
198automatically frees the buffer space, when the buffer object is
199collected. Use this method, if you need to free the associated memory
200immediately.
201</p>
202
203<h2 id="write">Buffer Writers</h2>
204
205<h3 id="buffer_put"><tt>buf = buf:put([str|num|obj] [,…])</tt></h3>
206<p>
207Appends a string <tt>str</tt>, a number <tt>num</tt> or any object
208<tt>obj</tt> with a <tt>__tostring</tt> metamethod to the buffer.
209Multiple arguments are appended in the given order.
210</p>
211<p>
212Appending a buffer to a buffer is possible and short-circuited
213internally. But it still involves a copy. Better combine the buffer
214writes to use a single buffer.
215</p>
216
217<h3 id="buffer_putf"><tt>buf = buf:putf(format, …)</tt></h3>
218<p>
219Appends the formatted arguments to the buffer. The <tt>format</tt>
220string supports the same options as <tt>string.format()</tt>.
221</p>
222
223<h3 id="buffer_putcdata"><tt>buf = buf:putcdata(cdata, len)</tt><span class="lib">FFI</span></h3>
224<p>
225Appends the given <tt>len</tt> number of bytes from the memory pointed
226to by the FFI <tt>cdata</tt> object to the buffer. The object needs to
227be convertible to a (constant) pointer.
228</p>
229
230<h3 id="buffer_set"><tt>buf = buf:set(str)<br>
231buf = buf:set(cdata, len)</tt><span class="lib">FFI</span></h3>
232<p>
233This method allows zero-copy consumption of a string or an FFI cdata
234object as a buffer. It stores a reference to the passed string
235<tt>str</tt> or the FFI <tt>cdata</tt> object in the buffer. Any buffer
236space originally allocated is freed. This is <i>not</i> an append
237operation, unlike the <tt>buf:put*()</tt> methods.
238</p>
239<p>
240After calling this method, the buffer behaves as if
241<tt>buf:free():put(str)</tt> or <tt>buf:free():put(cdata,&nbsp;len)</tt>
242had been called. However, the data is only referenced and not copied, as
243long as the buffer is only consumed.
244</p>
245<p>
246In case the buffer is written to later on, the referenced data is copied
247and the object reference is removed (copy-on-write semantics).
248</p>
249<p>
250The stored reference is an anchor for the garbage collector and keeps the
251originally passed string or FFI cdata object alive.
252</p>
253
254<h3 id="buffer_reserve"><tt>ptr, len = buf:reserve(size)</tt><span class="lib">FFI</span><br>
255<tt>buf = buf:commit(used)</tt><span class="lib">FFI</span></h3>
256<p>
257The <tt>reserve</tt> method reserves at least <tt>size</tt> bytes of
258write space in the buffer. It returns an <tt>uint8_t&nbsp;*</tt> FFI
259cdata pointer <tt>ptr</tt> that points to this space.
260</p>
261<p>
262The available length in bytes is returned in <tt>len</tt>. This is at
263least <tt>size</tt> bytes, but may be more to facilitate efficient
264buffer growth. You can either make use of the additional space or ignore
265<tt>len</tt> and only use <tt>size</tt> bytes.
266</p>
267<p>
268The <tt>commit</tt> method appends the <tt>used</tt> bytes of the
269previously returned write space to the buffer data.
270</p>
271<p>
272This pair of methods allows zero-copy use of C read-style APIs:
273</p>
274<pre class="code">
275local MIN_SIZE = 65536
276repeat
277 local ptr, len = buf:reserve(MIN_SIZE)
278 local n = C.read(fd, ptr, len)
279 if n == 0 then break end -- EOF.
280 if n &lt; 0 then error("read error") end
281 buf:commit(n)
282until false
283</pre>
284<p>
285The reserved write space is <i>not</i> initialized. At least the
286<tt>used</tt> bytes <b>must</b> be written to before calling the
287<tt>commit</tt> method. There's no need to call the <tt>commit</tt>
288method, if nothing is added to the buffer (e.g. on error).
289</p>
290
291<h2 id="read">Buffer Readers</h2>
292
293<h3 id="buffer_length"><tt>len = #buf</tt></h3>
294<p>
295Returns the current length of the buffer data in bytes.
296</p>
297
298<h3 id="buffer_concat"><tt>res = str|num|buf .. str|num|buf […]</tt></h3>
299<p>
300The Lua concatenation operator <tt>..</tt> also accepts buffers, just
301like strings or numbers. It always returns a string and not a buffer.
302</p>
303<p>
304Note that although this is supported for convenience, this thwarts one
305of the main reasons to use buffers, which is to avoid string
306allocations. Rewrite it with <tt>buf:put()</tt> and <tt>buf:get()</tt>.
307</p>
308<p>
309Mixing this with unrelated objects that have a <tt>__concat</tt>
310metamethod may not work, since these probably only expect strings.
311</p>
312
313<h3 id="buffer_skip"><tt>buf = buf:skip(len)</tt></h3>
314<p>
315Skips (consumes) <tt>len</tt> bytes from the buffer up to the current
316length of the buffer data.
317</p>
318
319<h3 id="buffer_get"><tt>str, … = buf:get([len|nil] [,…])</tt></h3>
320<p>
321Consumes the buffer data and returns one or more strings. If called
322without arguments, the whole buffer data is consumed. If called with a
323number, up to <tt>len</tt> bytes are consumed. A <tt>nil</tt> argument
324consumes the remaining buffer space (this only makes sense as the last
325argument). Multiple arguments consume the buffer data in the given
326order.
327</p>
328<p>
329Note: a zero length or no remaining buffer data returns an empty string
330and not <tt>nil</tt>.
331</p>
332
333<h3 id="buffer_tostring"><tt>str = buf:tostring()<br>
334str = tostring(buf)</tt></h3>
335<p>
336Creates a string from the buffer data, but doesn't consume it. The
337buffer remains unchanged.
338</p>
339<p>
340Buffer objects also define a <tt>__tostring</tt> metamethod. This means
341buffers can be passed to the global <tt>tostring()</tt> function and
342many other functions that accept this in place of strings. The important
343internal uses in functions like <tt>io.write()</tt> are short-circuited
344to avoid the creation of an intermediate string object.
345</p>
346
347<h3 id="buffer_ref"><tt>ptr, len = buf:ref()</tt><span class="lib">FFI</span></h3>
348<p>
349Returns an <tt>uint8_t&nbsp;*</tt> FFI cdata pointer <tt>ptr</tt> that
350points to the buffer data. The length of the buffer data in bytes is
351returned in <tt>len</tt>.
352</p>
353<p>
354The returned pointer can be directly passed to C functions that expect a
355buffer and a length. You can also do bytewise reads
356(<tt>local&nbsp;x&nbsp;=&nbsp;ptr[i]</tt>) or writes
357(<tt>ptr[i]&nbsp;=&nbsp;0x40</tt>) of the buffer data.
358</p>
359<p>
360In conjunction with the <tt>skip</tt> method, this allows zero-copy use
361of C write-style APIs:
362</p>
363<pre class="code">
364repeat
365 local ptr, len = buf:ref()
366 if len == 0 then break end
367 local n = C.write(fd, ptr, len)
368 if n &lt; 0 then error("write error") end
369 buf:skip(n)
370until n >= len
371</pre>
372<p>
373Unlike Lua strings, buffer data is <i>not</i> implicitly
374zero-terminated. It's not safe to pass <tt>ptr</tt> to C functions that
375expect zero-terminated strings. If you're not using <tt>len</tt>, then
376you're doing something wrong.
377</p>
378
379<h2 id="serialize">Serialization of Lua Objects</h2>
380<p>
381The following functions and methods allow <b>high-speed serialization</b>
382(encoding) of a Lua object into a string and decoding it back to a Lua
383object. This allows convenient storage and transport of <b>structured
384data</b>.
385</p>
386<p>
387The encoded data is in an <a href="#serialize_format">internal binary
388format</a>. The data can be stored in files, binary-transparent
389databases or transmitted to other LuaJIT instances across threads,
390processes or networks.
391</p>
392<p>
393Encoding speed can reach up to 1 Gigabyte/second on a modern desktop- or
394server-class system, even when serializing many small objects. Decoding
395speed is mostly constrained by object creation cost.
396</p>
397<p>
398The serializer handles most Lua types, common FFI number types and
399nested structures. Functions, thread objects, other FFI cdata and full
400userdata cannot be serialized (yet).
401</p>
402<p>
403The encoder serializes nested structures as trees. Multiple references
404to a single object will be stored separately and create distinct objects
405after decoding. Circular references cause an error.
406</p>
407
408<h3 id="serialize_methods">Serialization Functions and Methods</h3>
409
410<h3 id="buffer_encode"><tt>str = buffer.encode(obj)<br>
411buf = buf:encode(obj)</tt></h3>
412<p>
413Serializes (encodes) the Lua object <tt>obj</tt>. The stand-alone
414function returns a string <tt>str</tt>. The buffer method appends the
415encoding to the buffer.
416</p>
417<p>
418<tt>obj</tt> can be any of the supported Lua types &mdash; it doesn't
419need to be a Lua table.
420</p>
421<p>
422This function may throw an error when attempting to serialize
423unsupported object types, circular references or deeply nested tables.
424</p>
425
426<h3 id="buffer_decode"><tt>obj = buffer.decode(str)<br>
427obj = buf:decode()</tt></h3>
428<p>
429The stand-alone function deserializes (decodes) the string
430<tt>str</tt>, the buffer method deserializes one object from the
431buffer. Both return a Lua object <tt>obj</tt>.
432</p>
433<p>
434The returned object may be any of the supported Lua types &mdash;
435even <tt>nil</tt>.
436</p>
437<p>
438This function may throw an error when fed with malformed or incomplete
439encoded data. The stand-alone function throws when there's left-over
440data after decoding a single top-level object. The buffer method leaves
441any left-over data in the buffer.
442</p>
443<p>
444Attempting to deserialize an FFI type will throw an error, if the FFI
445library is not built-in or has not been loaded, yet.
446</p>
447
448<h3 id="serialize_options">Serialization Options</h3>
449<p>
450The <tt>options</tt> table passed to <tt>buffer.new()</tt> may contain
451the following members (all optional):
452</p>
453<ul>
454<li>
455<tt>dict</tt> is a Lua table holding a <b>dictionary of strings</b> that
456commonly occur as table keys of objects you are serializing. These keys
457are compactly encoded as indexes during serialization. A well-chosen
458dictionary saves space and improves serialization performance.
459</li>
460<li>
461<tt>metatable</tt> is a Lua table holding a <b>dictionary of metatables</b>
462for the table objects you are serializing.
463</li>
464</ul>
465<p>
466<tt>dict</tt> needs to be an array of strings and <tt>metatable</tt> needs
467to be an array of tables. Both starting at index 1 and without holes (no
468<tt>nil</tt> in between). The tables are anchored in the buffer object and
469internally modified into a two-way index (don't do this yourself, just pass
470a plain array). The tables must not be modified after they have been passed
471to <tt>buffer.new()</tt>.
472</p>
473<p>
474The <tt>dict</tt> and <tt>metatable</tt> tables used by the encoder and
475decoder must be the same. Put the most common entries at the front. Extend
476at the end to ensure backwards-compatibility &mdash; older encodings can
477then still be read. You may also set some indexes to <tt>false</tt> to
478explicitly drop backwards-compatibility. Old encodings that use these
479indexes will throw an error when decoded.
480</p>
481<p>
482Metatables that are not found in the <tt>metatable</tt> dictionary are
483ignored when encoding. Decoding returns a table with a <tt>nil</tt>
484metatable.
485</p>
486<p>
487Note: parsing and preparation of the options table is somewhat
488expensive. Create a buffer object only once and recycle it for multiple
489uses. Avoid mixing encoder and decoder buffers, since the
490<tt>buf:set()</tt> method frees the already allocated buffer space:
491</p>
492<pre class="code">
493local options = {
494 dict = { "commonly", "used", "string", "keys" },
495}
496local buf_enc = buffer.new(options)
497local buf_dec = buffer.new(options)
498
499local function encode(obj)
500 return buf_enc:reset():encode(obj):get()
501end
502
503local function decode(str)
504 return buf_dec:set(str):decode()
505end
506</pre>
507
508<h3 id="serialize_stream">Streaming Serialization</h3>
509<p>
510In some contexts, it's desirable to do piecewise serialization of large
511datasets, also known as <i>streaming</i>.
512</p>
513<p>
514This serialization format can be safely concatenated and supports streaming.
515Multiple encodings can simply be appended to a buffer and later decoded
516individually:
517</p>
518<pre class="code">
519local buf = buffer.new()
520buf:encode(obj1)
521buf:encode(obj2)
522local copy1 = buf:decode()
523local copy2 = buf:decode()
524</pre>
525<p>
526Here's how to iterate over a stream:
527</p>
528<pre class="code">
529while #buf ~= 0 do
530 local obj = buf:decode()
531 -- Do something with obj.
532end
533</pre>
534<p>
535Since the serialization format doesn't prepend a length to its encoding,
536network applications may need to transmit the length, too.
537</p>
538
539<h3 id="serialize_format">Serialization Format Specification</h3>
540<p>
541This serialization format is designed for <b>internal use</b> by LuaJIT
542applications. Serialized data is upwards-compatible and portable across
543all supported LuaJIT platforms.
544</p>
545<p>
546It's an <b>8-bit binary format</b> and not human-readable. It uses e.g.
547embedded zeroes and stores embedded Lua string objects unmodified, which
548are 8-bit-clean, too. Encoded data can be safely concatenated for
549streaming and later decoded one top-level object at a time.
550</p>
551<p>
552The encoding is reasonably compact, but tuned for maximum performance,
553not for minimum space usage. It compresses well with any of the common
554byte-oriented data compression algorithms.
555</p>
556<p>
557Although documented here for reference, this format is explicitly
558<b>not</b> intended to be a 'public standard' for structured data
559interchange across computer languages (like JSON or MessagePack). Please
560do not use it as such.
561</p>
562<p>
563The specification is given below as a context-free grammar with a
564top-level <tt>object</tt> as the starting point. Alternatives are
565separated by the <tt>|</tt> symbol and <tt>*</tt> indicates repeats.
566Grouping is implicit or indicated by <tt>{…}</tt>. Terminals are
567either plain hex numbers, encoded as bytes, or have a <tt>.format</tt>
568suffix.
569</p>
570<pre>
571object → nil | false | true
572 | null | lightud32 | lightud64
573 | int | num | tab | tab_mt
574 | int64 | uint64 | complex
575 | string
576
577nil → 0x00
578false → 0x01
579true → 0x02
580
581null → 0x03 // NULL lightuserdata
582lightud32 → 0x04 data.I // 32 bit lightuserdata
583lightud64 → 0x05 data.L // 64 bit lightuserdata
584
585int → 0x06 int.I // int32_t
586num → 0x07 double.L
587
588tab → 0x08 // Empty table
589 | 0x09 h.U h*{object object} // Key/value hash
590 | 0x0a a.U a*object // 0-based array
591 | 0x0b a.U h.U a*object h*{object object} // Mixed
592 | 0x0c a.U (a-1)*object // 1-based array
593 | 0x0d a.U h.U (a-1)*object h*{object object} // Mixed
594tab_mt → 0x0e (index-1).U tab // Metatable dict entry
595
596int64 → 0x10 int.L // FFI int64_t
597uint64 → 0x11 uint.L // FFI uint64_t
598complex → 0x12 re.L im.L // FFI complex
599
600string → (0x20+len).U len*char.B
601 | 0x0f (index-1).U // String dict entry
602
603.B = 8 bit
604.I = 32 bit little-endian
605.L = 64 bit little-endian
606.U = prefix-encoded 32 bit unsigned number n:
607 0x00..0xdf → n.B
608 0xe0..0x1fdf → (0xe0|(((n-0xe0)>>8)&0x1f)).B ((n-0xe0)&0xff).B
609 0x1fe0.. → 0xff n.I
610</pre>
611
612<h2 id="error">Error handling</h2>
613<p>
614Many of the buffer methods can throw an error. Out-of-memory or usage
615errors are best caught with an outer wrapper for larger parts of code.
616There's not much one can do after that, anyway.
617</p>
618<p>
619OTOH, you may want to catch some errors individually. Buffer methods need
620to receive the buffer object as the first argument. The Lua colon-syntax
621<tt>obj:method()</tt> does that implicitly. But to wrap a method with
622<tt>pcall()</tt>, the arguments need to be passed like this:
623</p>
624<pre class="code">
625local ok, err = pcall(buf.encode, buf, obj)
626if not ok then
627 -- Handle error in err.
628end
629</pre>
630
631<h2 id="ffi_caveats">FFI caveats</h2>
632<p>
633The string buffer library has been designed to work well together with
634the FFI library. But due to the low-level nature of the FFI library,
635some care needs to be taken:
636</p>
637<p>
638First, please remember that FFI pointers are zero-indexed. The space
639returned by <tt>buf:reserve()</tt> and <tt>buf:ref()</tt> starts at the
640returned pointer and ends before <tt>len</tt> bytes after that.
641</p>
642<p>
643I.e. the first valid index is <tt>ptr[0]</tt> and the last valid index
644is <tt>ptr[len-1]</tt>. If the returned length is zero, there's no valid
645index at all. The returned pointer may even be <tt>NULL</tt>.
646</p>
647<p>
648The space pointed to by the returned pointer is only valid as long as
649the buffer is not modified in any way (neither append, nor consume, nor
650reset, etc.). The pointer is also not a GC anchor for the buffer object
651itself.
652</p>
653<p>
654Buffer data is only guaranteed to be byte-aligned. Casting the returned
655pointer to a data type with higher alignment may cause unaligned
656accesses. It depends on the CPU architecture whether this is allowed or
657not (it's always OK on x86/x64 and mostly OK on other modern
658architectures).
659</p>
660<p>
661FFI pointers or references do not count as GC anchors for an underlying
662object. E.g. an <tt>array</tt> allocated with <tt>ffi.new()</tt> is
663anchored by <tt>buf:set(array,&nbsp;len)</tt>, but not by
664<tt>buf:set(array+offset,&nbsp;len)</tt>. The addition of the offset
665creates a new pointer, even when the offset is zero. In this case, you
666need to make sure there's still a reference to the original array as
667long as its contents are in use by the buffer.
668</p>
669<p>
670Even though each LuaJIT VM instance is single-threaded (but you can
671create multiple VMs), FFI data structures can be accessed concurrently.
672Be careful when reading/writing FFI cdata from/to buffers to avoid
673concurrent accesses or modifications. In particular, the memory
674referenced by <tt>buf:set(cdata,&nbsp;len)</tt> must not be modified
675while buffer readers are working on it. Shared, but read-only memory
676mappings of files are OK, but only if the file does not change.
677</p>
678<br class="flush">
679</div>
680<div id="foot">
681<hr class="hide">
682Copyright &copy; 2005-2023
683<span class="noprint">
684&middot;
685<a href="contact.html">Contact</a>
686</span>
687</div>
688</body>
689</html>
diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html
index 43c82047..d5e6bb60 100644
--- a/doc/ext_c_api.html
+++ b/doc/ext_c_api.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>Lua/C API Extensions</title> 4<title>Lua/C API Extensions</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2023"> 6<meta name="Copyright" content="Copyright (C) 2005-2023">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -37,9 +37,13 @@
37<a href="ext_ffi_semantics.html">FFI Semantics</a> 37<a href="ext_ffi_semantics.html">FFI Semantics</a>
38</li></ul> 38</li></ul>
39</li><li> 39</li><li>
40<a href="ext_buffer.html">String Buffers</a>
41</li><li>
40<a href="ext_jit.html">jit.* Library</a> 42<a href="ext_jit.html">jit.* Library</a>
41</li><li> 43</li><li>
42<a class="current" href="ext_c_api.html">Lua/C API</a> 44<a class="current" href="ext_c_api.html">Lua/C API</a>
45</li><li>
46<a href="ext_profiler.html">Profiler</a>
43</li></ul> 47</li></ul>
44</li><li> 48</li><li>
45<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a> 49<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
diff --git a/doc/ext_ffi.html b/doc/ext_ffi.html
index 683c0cd0..eaa176b6 100644
--- a/doc/ext_ffi.html
+++ b/doc/ext_ffi.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>FFI Library</title> 4<title>FFI Library</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2023"> 6<meta name="Copyright" content="Copyright (C) 2005-2023">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -37,9 +37,13 @@
37<a href="ext_ffi_semantics.html">FFI Semantics</a> 37<a href="ext_ffi_semantics.html">FFI Semantics</a>
38</li></ul> 38</li></ul>
39</li><li> 39</li><li>
40<a href="ext_buffer.html">String Buffers</a>
41</li><li>
40<a href="ext_jit.html">jit.* Library</a> 42<a href="ext_jit.html">jit.* Library</a>
41</li><li> 43</li><li>
42<a href="ext_c_api.html">Lua/C API</a> 44<a href="ext_c_api.html">Lua/C API</a>
45</li><li>
46<a href="ext_profiler.html">Profiler</a>
43</li></ul> 47</li></ul>
44</li><li> 48</li><li>
45<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a> 49<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html
index d5f7032f..500a2143 100644
--- a/doc/ext_ffi_api.html
+++ b/doc/ext_ffi_api.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>ffi.* API Functions</title> 4<title>ffi.* API Functions</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2023"> 6<meta name="Copyright" content="Copyright (C) 2005-2023">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -42,9 +42,13 @@ td.abiparam { font-weight: bold; width: 6em; }
42<a href="ext_ffi_semantics.html">FFI Semantics</a> 42<a href="ext_ffi_semantics.html">FFI Semantics</a>
43</li></ul> 43</li></ul>
44</li><li> 44</li><li>
45<a href="ext_buffer.html">String Buffers</a>
46</li><li>
45<a href="ext_jit.html">jit.* Library</a> 47<a href="ext_jit.html">jit.* Library</a>
46</li><li> 48</li><li>
47<a href="ext_c_api.html">Lua/C API</a> 49<a href="ext_c_api.html">Lua/C API</a>
50</li><li>
51<a href="ext_profiler.html">Profiler</a>
48</li></ul> 52</li></ul>
49</li><li> 53</li><li>
50<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a> 54<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
@@ -458,6 +462,12 @@ otherwise. The following parameters are currently defined:
458<td class="abiparam">eabi</td><td class="abidesc">EABI variant of the standard ABI</td></tr> 462<td class="abiparam">eabi</td><td class="abidesc">EABI variant of the standard ABI</td></tr>
459<tr class="odd"> 463<tr class="odd">
460<td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr> 464<td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr>
465<tr class="even">
466<td class="abiparam">pauth</td><td class="abidesc">Pointer authentication ABI</td></tr>
467<tr class="odd">
468<td class="abiparam">uwp</td><td class="abidesc">Universal Windows Platform</td></tr>
469<tr class="even">
470<td class="abiparam">gc64</td><td class="abidesc">64 bit GC references</td></tr>
461</table> 471</table>
462 472
463<h3 id="ffi_os"><tt>ffi.os</tt></h3> 473<h3 id="ffi_os"><tt>ffi.os</tt></h3>
@@ -534,8 +544,8 @@ corresponding ctype.
534The parser for Lua source code treats numeric literals with the 544The parser for Lua source code treats numeric literals with the
535suffixes <tt>LL</tt> or <tt>ULL</tt> as signed or unsigned 64&nbsp;bit 545suffixes <tt>LL</tt> or <tt>ULL</tt> as signed or unsigned 64&nbsp;bit
536integers. Case doesn't matter, but uppercase is recommended for 546integers. Case doesn't matter, but uppercase is recommended for
537readability. It handles both decimal (<tt>42LL</tt>) and hexadecimal 547readability. It handles decimal (<tt>42LL</tt>), hexadecimal
538(<tt>0x2aLL</tt>) literals. 548(<tt>0x2aLL</tt>) and binary (<tt>0b101010LL</tt>) literals.
539</p> 549</p>
540<p> 550<p>
541The imaginary part of complex numbers can be specified by suffixing 551The imaginary part of complex numbers can be specified by suffixing
diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html
index 381a2010..b56e57a1 100644
--- a/doc/ext_ffi_semantics.html
+++ b/doc/ext_ffi_semantics.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>FFI Semantics</title> 4<title>FFI Semantics</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2023"> 6<meta name="Copyright" content="Copyright (C) 2005-2023">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -42,9 +42,13 @@ td.convop { font-style: italic; width: 40%; }
42<a class="current" href="ext_ffi_semantics.html">FFI Semantics</a> 42<a class="current" href="ext_ffi_semantics.html">FFI Semantics</a>
43</li></ul> 43</li></ul>
44</li><li> 44</li><li>
45<a href="ext_buffer.html">String Buffers</a>
46</li><li>
45<a href="ext_jit.html">jit.* Library</a> 47<a href="ext_jit.html">jit.* Library</a>
46</li><li> 48</li><li>
47<a href="ext_c_api.html">Lua/C API</a> 49<a href="ext_c_api.html">Lua/C API</a>
50</li><li>
51<a href="ext_profiler.html">Profiler</a>
48</li></ul> 52</li></ul>
49</li><li> 53</li><li>
50<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a> 54<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
@@ -175,6 +179,8 @@ a <tt>typedef</tt>, except re-declarations will be ignored):
175<tt>uint16_t</tt>, <tt>uint32_t</tt>, <tt>uint64_t</tt>, 179<tt>uint16_t</tt>, <tt>uint32_t</tt>, <tt>uint64_t</tt>,
176<tt>intptr_t</tt>, <tt>uintptr_t</tt>.</li> 180<tt>intptr_t</tt>, <tt>uintptr_t</tt>.</li>
177 181
182<li>From <tt>&lt;unistd.h&gt;</tt> (POSIX): <tt>ssize_t</tt>.</li>
183
178</ul> 184</ul>
179<p> 185<p>
180You're encouraged to use these types in preference to 186You're encouraged to use these types in preference to
@@ -434,6 +440,19 @@ If you don't do this, the default Lua number &rarr; <tt>double</tt>
434conversion rule applies. A vararg C&nbsp;function expecting an integer 440conversion rule applies. A vararg C&nbsp;function expecting an integer
435will see a garbled or uninitialized value. 441will see a garbled or uninitialized value.
436</p> 442</p>
443<p>
444Note: this is the only place where creating a boxed scalar number type is
445actually useful. <b>Never use <tt>ffi.new("int")</tt>, <tt>ffi.new("float")</tt>
446etc. anywhere else!</b>
447</p>
448<p style="font-size: 8pt;">
449Ditto for <tt>ffi.cast()</tt>. Explicitly boxing scalars <b>does not</b>
450improve performance or force <tt>int</tt> or <tt>float</tt> arithmetic! It
451just adds costly boxing, unboxing and conversions steps. And it may lead
452to surprise results, because
453<a href="#cdata_arith">cdata arithmetic on scalar numbers</a>
454is always performed on 64 bit integers.
455</p>
437 456
438<h2 id="init">Initializers</h2> 457<h2 id="init">Initializers</h2>
439<p> 458<p>
@@ -722,6 +741,22 @@ You'll have to explicitly convert a 64&nbsp;bit integer to a Lua
722number (e.g. for regular floating-point calculations) with 741number (e.g. for regular floating-point calculations) with
723<tt>tonumber()</tt>. But note this may incur a precision loss.</li> 742<tt>tonumber()</tt>. But note this may incur a precision loss.</li>
724 743
744<li><b>64&nbsp;bit bitwise operations</b>: the rules for 64&nbsp;bit
745arithmetic operators apply analogously.<br>
746
747Unlike the other <tt>bit.*</tt> operations, <tt>bit.tobit()</tt>
748converts a cdata number via <tt>int64_t</tt> to <tt>int32_t</tt> and
749returns a Lua number.<br>
750
751For <tt>bit.band()</tt>, <tt>bit.bor()</tt> and <tt>bit.bxor()</tt>, the
752conversion to <tt>int64_t</tt> or <tt>uint64_t</tt> applies to
753<em>all</em> arguments, if <em>any</em> argument is a cdata number.<br>
754
755For all other operations, only the first argument is used to determine
756the output type. This implies that a cdata number as a shift count for
757shifts and rotates is accepted, but that alone does <em>not</em> cause
758a cdata number output.
759
725</ul> 760</ul>
726 761
727<h3 id="cdata_comp">Comparisons of cdata objects</h3> 762<h3 id="cdata_comp">Comparisons of cdata objects</h3>
@@ -1193,14 +1228,12 @@ The following operations are currently not compiled and may exhibit
1193suboptimal performance, especially when used in inner loops: 1228suboptimal performance, especially when used in inner loops:
1194</p> 1229</p>
1195<ul> 1230<ul>
1196<li>Bitfield accesses and initializations.</li>
1197<li>Vector operations.</li> 1231<li>Vector operations.</li>
1198<li>Table initializers.</li> 1232<li>Table initializers.</li>
1199<li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li> 1233<li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li>
1200<li>Allocations of variable-length arrays or structs.</li> 1234<li>Non-default initialization of VLA/VLS or large C&nbsp;types
1201<li>Allocations of C&nbsp;types with a size &gt; 128&nbsp;bytes or an 1235(&gt; 128&nbsp;bytes or &gt; 16 array elements).</li>
1202alignment &gt; 8&nbsp;bytes.</li> 1236<li>Bitfield initializations.</li>
1203<li>Conversions from lightuserdata to <tt>void&nbsp;*</tt>.</li>
1204<li>Pointer differences for element sizes that are not a power of 1237<li>Pointer differences for element sizes that are not a power of
1205two.</li> 1238two.</li>
1206<li>Calls to C&nbsp;functions with aggregates passed or returned by 1239<li>Calls to C&nbsp;functions with aggregates passed or returned by
@@ -1216,7 +1249,6 @@ value.</li>
1216Other missing features: 1249Other missing features:
1217</p> 1250</p>
1218<ul> 1251<ul>
1219<li>Bit operations for 64&nbsp;bit types.</li>
1220<li>Arithmetic for <tt>complex</tt> numbers.</li> 1252<li>Arithmetic for <tt>complex</tt> numbers.</li>
1221<li>Passing structs by value to vararg C&nbsp;functions.</li> 1253<li>Passing structs by value to vararg C&nbsp;functions.</li>
1222<li><a href="extensions.html#exceptions">C++ exception interoperability</a> 1254<li><a href="extensions.html#exceptions">C++ exception interoperability</a>
diff --git a/doc/ext_ffi_tutorial.html b/doc/ext_ffi_tutorial.html
index 03b6ec56..a5236f0b 100644
--- a/doc/ext_ffi_tutorial.html
+++ b/doc/ext_ffi_tutorial.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>FFI Tutorial</title> 4<title>FFI Tutorial</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2023"> 6<meta name="Copyright" content="Copyright (C) 2005-2023">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -44,9 +44,13 @@ td.idiomlua b { font-weight: normal; color: #2142bf; }
44<a href="ext_ffi_semantics.html">FFI Semantics</a> 44<a href="ext_ffi_semantics.html">FFI Semantics</a>
45</li></ul> 45</li></ul>
46</li><li> 46</li><li>
47<a href="ext_buffer.html">String Buffers</a>
48</li><li>
47<a href="ext_jit.html">jit.* Library</a> 49<a href="ext_jit.html">jit.* Library</a>
48</li><li> 50</li><li>
49<a href="ext_c_api.html">Lua/C API</a> 51<a href="ext_c_api.html">Lua/C API</a>
52</li><li>
53<a href="ext_profiler.html">Profiler</a>
50</li></ul> 54</li></ul>
51</li><li> 55</li><li>
52<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a> 56<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
diff --git a/doc/ext_jit.html b/doc/ext_jit.html
index b1dbf36c..dd136d65 100644
--- a/doc/ext_jit.html
+++ b/doc/ext_jit.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>jit.* Library</title> 4<title>jit.* Library</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2023"> 6<meta name="Copyright" content="Copyright (C) 2005-2023">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -37,9 +37,13 @@
37<a href="ext_ffi_semantics.html">FFI Semantics</a> 37<a href="ext_ffi_semantics.html">FFI Semantics</a>
38</li></ul> 38</li></ul>
39</li><li> 39</li><li>
40<a href="ext_buffer.html">String Buffers</a>
41</li><li>
40<a class="current" href="ext_jit.html">jit.* Library</a> 42<a class="current" href="ext_jit.html">jit.* Library</a>
41</li><li> 43</li><li>
42<a href="ext_c_api.html">Lua/C API</a> 44<a href="ext_c_api.html">Lua/C API</a>
45</li><li>
46<a href="ext_profiler.html">Profiler</a>
43</li></ul> 47</li></ul>
44</li><li> 48</li><li>
45<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a> 49<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
@@ -145,7 +149,7 @@ Contains the target OS name:
145<h3 id="jit_arch"><tt>jit.arch</tt></h3> 149<h3 id="jit_arch"><tt>jit.arch</tt></h3>
146<p> 150<p>
147Contains the target architecture name: 151Contains the target architecture name:
148"x86", "x64", "arm", "ppc", "ppcspe", or "mips". 152"x86", "x64", "arm", "arm64", "arm64be", "ppc", "mips", "mipsel", "mips64", "mips64el", "mips64r6", "mips64r6el".
149</p> 153</p>
150 154
151<h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2> 155<h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2>
diff --git a/doc/ext_profiler.html b/doc/ext_profiler.html
new file mode 100644
index 00000000..81b5d773
--- /dev/null
+++ b/doc/ext_profiler.html
@@ -0,0 +1,359 @@
1<!DOCTYPE html>
2<html>
3<head>
4<title>Profiler</title>
5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2023">
7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
9<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
10</head>
11<body>
12<div id="site">
13<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
14</div>
15<div id="head">
16<h1>Profiler</h1>
17</div>
18<div id="nav">
19<ul><li>
20<a href="luajit.html">LuaJIT</a>
21<ul><li>
22<a href="https://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
23</li><li>
24<a href="install.html">Installation</a>
25</li><li>
26<a href="running.html">Running</a>
27</li></ul>
28</li><li>
29<a href="extensions.html">Extensions</a>
30<ul><li>
31<a href="ext_ffi.html">FFI Library</a>
32<ul><li>
33<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
34</li><li>
35<a href="ext_ffi_api.html">ffi.* API</a>
36</li><li>
37<a href="ext_ffi_semantics.html">FFI Semantics</a>
38</li></ul>
39</li><li>
40<a href="ext_buffer.html">String Buffers</a>
41</li><li>
42<a href="ext_jit.html">jit.* Library</a>
43</li><li>
44<a href="ext_c_api.html">Lua/C API</a>
45</li><li>
46<a class="current" href="ext_profiler.html">Profiler</a>
47</li></ul>
48</li><li>
49<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
50</li><li>
51<a href="https://luajit.org/faq.html">FAQ <span class="ext">&raquo;</span></a>
52</li><li>
53<a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
54</li></ul>
55</div>
56<div id="main">
57<p>
58LuaJIT has an integrated statistical profiler with very low overhead. It
59allows sampling the currently executing stack and other parameters in
60regular intervals.
61</p>
62<p>
63The integrated profiler can be accessed from three levels:
64</p>
65<ul>
66<li>The <a href="#hl_profiler">bundled high-level profiler</a>, invoked by the
67<a href="#j_p"><tt>-jp</tt></a> command line option.</li>
68<li>A <a href="#ll_lua_api">low-level Lua API</a> to control the profiler.</li>
69<li>A <a href="#ll_c_api">low-level C API</a> to control the profiler.</li>
70</ul>
71
72<h2 id="hl_profiler">High-Level Profiler</h2>
73<p>
74The bundled high-level profiler offers basic profiling functionality. It
75generates simple textual summaries or source code annotations. It can be
76accessed with the <a href="#j_p"><tt>-jp</tt></a> command line option
77or from Lua code by loading the underlying <tt>jit.p</tt> module.
78</p>
79<p>
80To cut to the chase &mdash; run this to get a CPU usage profile by
81function name:
82</p>
83<pre class="code">
84luajit -jp myapp.lua
85</pre>
86<p>
87It's <em>not</em> a stated goal of the bundled profiler to add every
88possible option or to cater for special profiling needs. The low-level
89profiler APIs are documented below. They may be used by third-party
90authors to implement advanced functionality, e.g. IDE integration or
91graphical profilers.
92</p>
93<p>
94Note: Sampling works for both interpreted and JIT-compiled code. The
95results for JIT-compiled code may sometimes be surprising. LuaJIT
96heavily optimizes and inlines Lua code &mdash; there's no simple
97one-to-one correspondence between source code lines and the sampled
98machine code.
99</p>
100
101<h3 id="j_p"><tt>-jp=[options[,output]]</tt></h3>
102<p>
103The <tt>-jp</tt> command line option starts the high-level profiler.
104When the application run by the command line terminates, the profiler
105stops and writes the results to <tt>stdout</tt> or to the specified
106<tt>output</tt> file.
107</p>
108<p>
109The <tt>options</tt> argument specifies how the profiling is to be
110performed:
111</p>
112<ul>
113<li><tt>f</tt> &mdash; Stack dump: function name, otherwise module:line.
114This is the default mode.</li>
115<li><tt>F</tt> &mdash; Stack dump: ditto, but dump module:name.</li>
116<li><tt>l</tt> &mdash; Stack dump: module:line.</li>
117<li><tt>&lt;number&gt;</tt> &mdash; stack dump depth (callee &larr;
118caller). Default: 1.</li>
119<li><tt>-&lt;number&gt;</tt> &mdash; Inverse stack dump depth (caller
120&rarr; callee).</li>
121<li><tt>s</tt> &mdash; Split stack dump after first stack level. Implies
122depth&nbsp;&ge;&nbsp;2 or depth&nbsp;&le;&nbsp;-2.</li>
123<li><tt>p</tt> &mdash; Show full path for module names.</li>
124<li><tt>v</tt> &mdash; Show VM states.</li>
125<li><tt>z</tt> &mdash; Show <a href="#jit_zone">zones</a>.</li>
126<li><tt>r</tt> &mdash; Show raw sample counts. Default: show percentages.</li>
127<li><tt>a</tt> &mdash; Annotate excerpts from source code files.</li>
128<li><tt>A</tt> &mdash; Annotate complete source code files.</li>
129<li><tt>G</tt> &mdash; Produce raw output suitable for graphical tools.</li>
130<li><tt>m&lt;number&gt;</tt> &mdash; Minimum sample percentage to be shown.
131Default: 3%.</li>
132<li><tt>i&lt;number&gt;</tt> &mdash; Sampling interval in milliseconds.
133Default: 10ms.<br>
134Note: The actual sampling precision is OS-dependent.</li>
135</ul>
136<p>
137The default output for <tt>-jp</tt> is a list of the most CPU consuming
138spots in the application. Increasing the stack dump depth with (say)
139<tt>-jp=2</tt> may help to point out the main callers or callees of
140hotspots. But sample aggregation is still flat per unique stack dump.
141</p>
142<p>
143To get a two-level view (split view) of callers/callees, use
144<tt>-jp=s</tt> or <tt>-jp=-s</tt>. The percentages shown for the second
145level are relative to the first level.
146</p>
147<p>
148To see how much time is spent in each line relative to a function, use
149<tt>-jp=fl</tt>.
150</p>
151<p>
152To see how much time is spent in different VM states or
153<a href="#jit_zone">zones</a>, use <tt>-jp=v</tt> or <tt>-jp=z</tt>.
154</p>
155<p>
156Combinations of <tt>v/z</tt> with <tt>f/F/l</tt> produce two-level
157views, e.g. <tt>-jp=vf</tt> or <tt>-jp=fv</tt>. This shows the time
158spent in a VM state or zone vs. hotspots. This can be used to answer
159questions like "Which time-consuming functions are only interpreted?" or
160"What's the garbage collector overhead for a specific function?".
161</p>
162<p>
163Multiple options can be combined &mdash; but not all combinations make
164sense, see above. E.g. <tt>-jp=3si4m1</tt> samples three stack levels
165deep in 4ms intervals and shows a split view of the CPU consuming
166functions and their callers with a 1% threshold.
167</p>
168<p>
169Source code annotations produced by <tt>-jp=a</tt> or <tt>-jp=A</tt> are
170always flat and at the line level. Obviously, the source code files need
171to be readable by the profiler script.
172</p>
173<p>
174The high-level profiler can also be started and stopped from Lua code with:
175</p>
176<pre class="code">
177require("jit.p").start(options, output)
178...
179require("jit.p").stop()
180</pre>
181
182<h3 id="jit_zone"><tt>jit.zone</tt> &mdash; Zones</h3>
183<p>
184Zones can be used to provide information about different parts of an
185application to the high-level profiler. E.g. a game could make use of an
186<tt>"AI"</tt> zone, a <tt>"PHYS"</tt> zone, etc. Zones are hierarchical,
187organized as a stack.
188</p>
189<p>
190The <tt>jit.zone</tt> module needs to be loaded explicitly:
191</p>
192<pre class="code">
193local zone = require("jit.zone")
194</pre>
195<ul>
196<li><tt>zone("name")</tt> pushes a named zone to the zone stack.</li>
197<li><tt>zone()</tt> pops the current zone from the zone stack and
198returns its name.</li>
199<li><tt>zone:get()</tt> returns the current zone name or <tt>nil</tt>.</li>
200<li><tt>zone:flush()</tt> flushes the zone stack.</li>
201</ul>
202<p>
203To show the time spent in each zone use <tt>-jp=z</tt>. To show the time
204spent relative to hotspots use e.g. <tt>-jp=zf</tt> or <tt>-jp=fz</tt>.
205</p>
206
207<h2 id="ll_lua_api">Low-level Lua API</h2>
208<p>
209The <tt>jit.profile</tt> module gives access to the low-level API of the
210profiler from Lua code. This module needs to be loaded explicitly:
211<pre class="code">
212local profile = require("jit.profile")
213</pre>
214<p>
215This module can be used to implement your own higher-level profiler.
216A typical profiling run starts the profiler, captures stack dumps in
217the profiler callback, adds them to a hash table to aggregate the number
218of samples, stops the profiler and then analyzes all captured
219stack dumps. Other parameters can be sampled in the profiler callback,
220too. But it's important not to spend too much time in the callback,
221since this may skew the statistics.
222</p>
223
224<h3 id="profile_start"><tt>profile.start(mode, cb)</tt>
225&mdash; Start profiler</h3>
226<p>
227This function starts the profiler. The <tt>mode</tt> argument is a
228string holding options:
229</p>
230<ul>
231<li><tt>f</tt> &mdash; Profile with precision down to the function level.</li>
232<li><tt>l</tt> &mdash; Profile with precision down to the line level.</li>
233<li><tt>i&lt;number&gt;</tt> &mdash; Sampling interval in milliseconds (default
23410ms).</br>
235Note: The actual sampling precision is OS-dependent.
236</li>
237</ul>
238<p>
239The <tt>cb</tt> argument is a callback function which is called with
240three arguments: <tt>(thread, samples, vmstate)</tt>. The callback is
241called on a separate coroutine, the <tt>thread</tt> argument is the
242state that holds the stack to sample for profiling. Note: do
243<em>not</em> modify the stack of that state or call functions on it.
244</p>
245<p>
246<tt>samples</tt> gives the number of accumulated samples since the last
247callback (usually 1).
248</p>
249<p>
250<tt>vmstate</tt> holds the VM state at the time the profiling timer
251triggered. This may or may not correspond to the state of the VM when
252the profiling callback is called. The state is either <tt>'N'</tt>
253native (compiled) code, <tt>'I'</tt> interpreted code, <tt>'C'</tt>
254C&nbsp;code, <tt>'G'</tt> the garbage collector, or <tt>'J'</tt> the JIT
255compiler.
256</p>
257
258<h3 id="profile_stop"><tt>profile.stop()</tt>
259&mdash; Stop profiler</h3>
260<p>
261This function stops the profiler.
262</p>
263
264<h3 id="profile_dump"><tt>dump = profile.dumpstack([thread,] fmt, depth)</tt>
265&mdash; Dump stack </h3>
266<p>
267This function allows taking stack dumps in an efficient manner. It
268returns a string with a stack dump for the <tt>thread</tt> (coroutine),
269formatted according to the <tt>fmt</tt> argument:
270</p>
271<ul>
272<li><tt>p</tt> &mdash; Preserve the full path for module names. Otherwise,
273only the file name is used.</li>
274<li><tt>f</tt> &mdash; Dump the function name if it can be derived. Otherwise,
275use module:line.</li>
276<li><tt>F</tt> &mdash; Ditto, but dump module:name.</li>
277<li><tt>l</tt> &mdash; Dump module:line.</li>
278<li><tt>Z</tt> &mdash; Zap the following characters for the last dumped
279frame.</li>
280<li>All other characters are added verbatim to the output string.</li>
281</ul>
282<p>
283The <tt>depth</tt> argument gives the number of frames to dump, starting
284at the topmost frame of the thread. A negative number dumps the frames in
285inverse order.
286</p>
287<p>
288The first example prints a list of the current module names and line
289numbers of up to 10 frames in separate lines. The second example prints
290semicolon-separated function names for all frames (up to 100) in inverse
291order:
292</p>
293<pre class="code">
294print(profile.dumpstack(thread, "l\n", 10))
295print(profile.dumpstack(thread, "lZ;", -100))
296</pre>
297
298<h2 id="ll_c_api">Low-level C API</h2>
299<p>
300The profiler can be controlled directly from C&nbsp;code, e.g. for
301use by IDEs. The declarations are in <tt>"luajit.h"</tt> (see
302<a href="ext_c_api.html">Lua/C API</a> extensions).
303</p>
304
305<h3 id="luaJIT_profile_start"><tt>luaJIT_profile_start(L, mode, cb, data)</tt>
306&mdash; Start profiler</h3>
307<p>
308This function starts the profiler. <a href="#profile_start">See
309above</a> for a description of the <tt>mode</tt> argument.
310</p>
311<p>
312The <tt>cb</tt> argument is a callback function with the following
313declaration:
314</p>
315<pre class="code">
316typedef void (*luaJIT_profile_callback)(void *data, lua_State *L,
317 int samples, int vmstate);
318</pre>
319<p>
320<tt>data</tt> is available for use by the callback. <tt>L</tt> is the
321state that holds the stack to sample for profiling. Note: do
322<em>not</em> modify this stack or call functions on this stack &mdash;
323use a separate coroutine for this purpose. <a href="#profile_start">See
324above</a> for a description of <tt>samples</tt> and <tt>vmstate</tt>.
325</p>
326
327<h3 id="luaJIT_profile_stop"><tt>luaJIT_profile_stop(L)</tt>
328&mdash; Stop profiler</h3>
329<p>
330This function stops the profiler.
331</p>
332
333<h3 id="luaJIT_profile_dumpstack"><tt>p = luaJIT_profile_dumpstack(L, fmt, depth, len)</tt>
334&mdash; Dump stack </h3>
335<p>
336This function allows taking stack dumps in an efficient manner.
337<a href="#profile_dump">See above</a> for a description of <tt>fmt</tt>
338and <tt>depth</tt>.
339</p>
340<p>
341This function returns a <tt>const&nbsp;char&nbsp;*</tt> pointing to a
342private string buffer of the profiler. The <tt>int&nbsp;*len</tt>
343argument returns the length of the output string. The buffer is
344overwritten on the next call and deallocated when the profiler stops.
345You either need to consume the content immediately or copy it for later
346use.
347</p>
348<br class="flush">
349</div>
350<div id="foot">
351<hr class="hide">
352Copyright &copy; 2005-2023
353<span class="noprint">
354&middot;
355<a href="contact.html">Contact</a>
356</span>
357</div>
358</body>
359</html>
diff --git a/doc/extensions.html b/doc/extensions.html
index 04a9ae07..e9aaa096 100644
--- a/doc/extensions.html
+++ b/doc/extensions.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>Extensions</title> 4<title>Extensions</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2023"> 6<meta name="Copyright" content="Copyright (C) 2005-2023">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -54,9 +54,13 @@ td.excinterop {
54<a href="ext_ffi_semantics.html">FFI Semantics</a> 54<a href="ext_ffi_semantics.html">FFI Semantics</a>
55</li></ul> 55</li></ul>
56</li><li> 56</li><li>
57<a href="ext_buffer.html">String Buffers</a>
58</li><li>
57<a href="ext_jit.html">jit.* Library</a> 59<a href="ext_jit.html">jit.* Library</a>
58</li><li> 60</li><li>
59<a href="ext_c_api.html">Lua/C API</a> 61<a href="ext_c_api.html">Lua/C API</a>
62</li><li>
63<a href="ext_profiler.html">Profiler</a>
60</li></ul> 64</li></ul>
61</li><li> 65</li><li>
62<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a> 66<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
@@ -106,6 +110,9 @@ bit.lshift bit.rshift bit.arshift bit.rol bit.ror bit.bswap
106This module is a LuaJIT built-in &mdash; you don't need to download or 110This module is a LuaJIT built-in &mdash; you don't need to download or
107install Lua BitOp. The Lua BitOp site has full documentation for all 111install Lua BitOp. The Lua BitOp site has full documentation for all
108<a href="https://bitop.luajit.org/api.html"><span class="ext">&raquo;</span>&nbsp;Lua BitOp API functions</a>. 112<a href="https://bitop.luajit.org/api.html"><span class="ext">&raquo;</span>&nbsp;Lua BitOp API functions</a>.
113The FFI adds support for
114<a href="ext_ffi_semantics.html#cdata_arith">64&nbsp;bit bitwise operations</a>,
115using the same API functions.
109</p> 116</p>
110<p> 117<p>
111Please make sure to <tt>require</tt> the module before using any of 118Please make sure to <tt>require</tt> the module before using any of
@@ -139,6 +146,11 @@ LuaJIT adds some
139<a href="ext_c_api.html">extra functions to the Lua/C API</a>. 146<a href="ext_c_api.html">extra functions to the Lua/C API</a>.
140</p> 147</p>
141 148
149<h3 id="profiler">Profiler</h3>
150<p>
151LuaJIT has an <a href="ext_profiler.html">integrated profiler</a>.
152</p>
153
142<h2 id="library">Enhanced Standard Library Functions</h2> 154<h2 id="library">Enhanced Standard Library Functions</h2>
143 155
144<h3 id="xpcall"><tt>xpcall(f, err [,args...])</tt> passes arguments</h3> 156<h3 id="xpcall"><tt>xpcall(f, err [,args...])</tt> passes arguments</h3>
@@ -148,13 +160,33 @@ passes any arguments after the error function to the function
148which is called in a protected context. 160which is called in a protected context.
149</p> 161</p>
150 162
151<h3 id="load"><tt>loadfile()</tt> etc. handle UTF-8 source code</h3> 163<h3 id="load"><tt>load*()</tt> handle UTF-8 source code</h3>
152<p> 164<p>
153Non-ASCII characters are handled transparently by the Lua source code parser. 165Non-ASCII characters are handled transparently by the Lua source code parser.
154This allows the use of UTF-8 characters in identifiers and strings. 166This allows the use of UTF-8 characters in identifiers and strings.
155A UTF-8 BOM is skipped at the start of the source code. 167A UTF-8 BOM is skipped at the start of the source code.
156</p> 168</p>
157 169
170<h3 id="load_mode"><tt>load*()</tt> add a mode parameter</h3>
171<p>
172As an extension from Lua 5.2, the functions <tt>loadstring()</tt>,
173<tt>loadfile()</tt> and (new) <tt>load()</tt> add an optional
174<tt>mode</tt> parameter.
175</p>
176<p>
177The default mode string is <tt>"bt"</tt>, which allows loading of both
178source code and bytecode. Use <tt>"t"</tt> to allow only source code
179or <tt>"b"</tt> to allow only bytecode to be loaded.
180</p>
181<p>
182By default, the <tt>load*</tt> functions generate the native bytecode format.
183For cross-compilation purposes, add <tt>W</tt> to the mode string to
184force the 32 bit format and <tt>X</tt> to force the 64 bit format.
185Add both to force the opposite format. Note that non-native bytecode
186generated by <tt>load*</tt> cannot be run, but can still be passed
187to <tt>string.dump</tt>.
188</p>
189
158<h3 id="tostring"><tt>tostring()</tt> etc. canonicalize NaN and &plusmn;Inf</h3> 190<h3 id="tostring"><tt>tostring()</tt> etc. canonicalize NaN and &plusmn;Inf</h3>
159<p> 191<p>
160All number-to-string conversions consistently convert non-finite numbers 192All number-to-string conversions consistently convert non-finite numbers
@@ -166,7 +198,7 @@ in <tt>"-inf"</tt>.
166<h3 id="tonumber"><tt>tonumber()</tt> etc. use builtin string to number conversion</h3> 198<h3 id="tonumber"><tt>tonumber()</tt> etc. use builtin string to number conversion</h3>
167<p> 199<p>
168All string-to-number conversions consistently convert integer and 200All string-to-number conversions consistently convert integer and
169floating-point inputs in decimal and hexadecimal on all platforms. 201floating-point inputs in decimal, hexadecimal and binary on all platforms.
170<tt>strtod()</tt> is <em>not</em> used anymore, which avoids numerous 202<tt>strtod()</tt> is <em>not</em> used anymore, which avoids numerous
171problems with poor C library implementations. The builtin conversion 203problems with poor C library implementations. The builtin conversion
172function provides full precision according to the IEEE-754 standard, it 204function provides full precision according to the IEEE-754 standard, it
@@ -174,21 +206,58 @@ works independently of the current locale and it supports hex floating-point
174numbers (e.g. <tt>0x1.5p-3</tt>). 206numbers (e.g. <tt>0x1.5p-3</tt>).
175</p> 207</p>
176 208
177<h3 id="string_dump"><tt>string.dump(f [,strip])</tt> generates portable bytecode</h3> 209<h3 id="string_dump"><tt>string.dump(f [,mode])</tt> generates portable bytecode</h3>
178<p> 210<p>
179An extra argument has been added to <tt>string.dump()</tt>. If set to 211An extra argument has been added to <tt>string.dump()</tt>. If set to
180<tt>true</tt>, 'stripped' bytecode without debug information is 212<tt>true</tt> or to a string which contains the character <tt>s</tt>,
181generated. This speeds up later bytecode loading and reduces memory 213'stripped' bytecode without debug information is generated. This speeds
182usage. See also the 214up later bytecode loading and reduces memory usage. See also the
183<a href="running.html#opt_b"><tt>-b</tt> command line option</a>. 215<a href="running.html#opt_b"><tt>-b</tt> command line option</a>.
184</p> 216</p>
185<p> 217<p>
186The generated bytecode is portable and can be loaded on any architecture 218The generated bytecode is portable and can be loaded on any architecture
187that LuaJIT supports, independent of word size or endianess. However, the 219that LuaJIT supports. However, the bytecode compatibility versions must
188bytecode compatibility versions must match. Bytecode stays compatible 220match. Bytecode only stays compatible within a major+minor version
189for dot releases (x.y.0 &rarr; x.y.1), but may change with major or 221(x.y.aaa &rarr; x.y.bbb), except for development branches. Foreign bytecode
190minor releases (2.0 &rarr; 2.1) or between any beta release. Foreign 222(e.g. from Lua 5.1) is incompatible and cannot be loaded.
191bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded. 223</p>
224<p>
225Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies
226a different, incompatible bytecode format between 32 bit and 64 bit ports.
227This may be rectified in the future. In the meantime, use the <tt>W</tt>
228and </tt>X</tt> <a href="#load_mode">modes of the <tt>load*</tt> functions</a>
229for cross-compilation purposes.
230</p>
231<p>
232Due to VM hardening, bytecode is not deterministic. Add <tt>d</tt> to the
233mode string to dump it in a deterministic manner: identical source code
234always gives a byte-for-byte identical bytecode dump. This feature is
235mainly useful for reproducible builds.
236</p>
237
238<h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3>
239<p>
240An extra library function <tt>table.new()</tt> can be made available via
241<tt>require("table.new")</tt>. This creates a pre-sized table, just like
242the C API equivalent <tt>lua_createtable()</tt>. This is useful for big
243tables if the final table size is known and automatic table resizing is
244too expensive.
245</p>
246
247<h3 id="table_clear"><tt>table.clear(tab)</tt> clears a table</h3>
248<p>
249An extra library function <tt>table.clear()</tt> can be made available
250via <tt>require("table.clear")</tt>. This clears all keys and values
251from a table, but preserves the allocated array/hash sizes. This is
252useful when a table, which is linked from multiple places, needs to be
253cleared and/or when recycling a table for use by the same context. This
254avoids managing backlinks, saves an allocation and the overhead of
255incremental array/hash part growth.
256</p>
257<p>
258Please note, this function is meant for very specific situations. In most
259cases it's better to replace the (usually single) link with a new table
260and let the GC do its work.
192</p> 261</p>
193 262
194<h3 id="math_random">Enhanced PRNG for <tt>math.random()</tt></h3> 263<h3 id="math_random">Enhanced PRNG for <tt>math.random()</tt></h3>
@@ -196,7 +265,7 @@ bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded.
196LuaJIT uses a Tausworthe PRNG with period 2^223 to implement 265LuaJIT uses a Tausworthe PRNG with period 2^223 to implement
197<tt>math.random()</tt> and <tt>math.randomseed()</tt>. The quality of 266<tt>math.random()</tt> and <tt>math.randomseed()</tt>. The quality of
198the PRNG results is much superior compared to the standard Lua 267the PRNG results is much superior compared to the standard Lua
199implementation, which uses the platform-specific ANSI rand(). 268implementation, which uses the platform-specific ANSI <tt>rand()</tt>.
200</p> 269</p>
201<p> 270<p>
202The PRNG generates the same sequences from the same seeds on all 271The PRNG generates the same sequences from the same seeds on all
@@ -207,6 +276,10 @@ It's correctly scaled up and rounded for <tt>math.random(n&nbsp;[,m])</tt> to
207preserve uniformity. 276preserve uniformity.
208</p> 277</p>
209<p> 278<p>
279Call <tt>math.randomseed()</tt> without any arguments to seed it from
280system entropy.
281</p>
282<p>
210Important: Neither this nor any other PRNG based on the simplistic 283Important: Neither this nor any other PRNG based on the simplistic
211<tt>math.random()</tt> API is suitable for cryptographic use. 284<tt>math.random()</tt> API is suitable for cryptographic use.
212</p> 285</p>
@@ -268,6 +341,26 @@ indexes for varargs.</li>
268<li><tt>debug.getupvalue()</tt> and <tt>debug.setupvalue()</tt> handle 341<li><tt>debug.getupvalue()</tt> and <tt>debug.setupvalue()</tt> handle
269C&nbsp;functions.</li> 342C&nbsp;functions.</li>
270<li><tt>debug.upvalueid()</tt> and <tt>debug.upvaluejoin()</tt>.</li> 343<li><tt>debug.upvalueid()</tt> and <tt>debug.upvaluejoin()</tt>.</li>
344<li>Lua/C API extensions:
345<tt>lua_version()</tt>
346<tt>lua_upvalueid()</tt>
347<tt>lua_upvaluejoin()</tt>
348<tt>lua_loadx()</tt>
349<tt>lua_copy()</tt>
350<tt>lua_tonumberx()</tt>
351<tt>lua_tointegerx()</tt>
352<tt>luaL_fileresult()</tt>
353<tt>luaL_execresult()</tt>
354<tt>luaL_loadfilex()</tt>
355<tt>luaL_loadbufferx()</tt>
356<tt>luaL_traceback()</tt>
357<tt>luaL_setfuncs()</tt>
358<tt>luaL_pushmodule()</tt>
359<tt>luaL_newlibtable()</tt>
360<tt>luaL_newlib()</tt>
361<tt>luaL_testudata()</tt>
362<tt>luaL_setmetatable()</tt>
363</li>
271<li>Command line option <tt>-E</tt>.</li> 364<li>Command line option <tt>-E</tt>.</li>
272<li>Command line checks <tt>__tostring</tt> for errors.</li> 365<li>Command line checks <tt>__tostring</tt> for errors.</li>
273</ul> 366</ul>
@@ -293,6 +386,8 @@ exit status.</li>
293<li><tt>debug.setmetatable()</tt> returns object.</li> 386<li><tt>debug.setmetatable()</tt> returns object.</li>
294<li><tt>debug.getuservalue()</tt> and <tt>debug.setuservalue()</tt>.</li> 387<li><tt>debug.getuservalue()</tt> and <tt>debug.setuservalue()</tt>.</li>
295<li>Remove <tt>math.mod()</tt>, <tt>string.gfind()</tt>.</li> 388<li>Remove <tt>math.mod()</tt>, <tt>string.gfind()</tt>.</li>
389<li><tt>package.searchers</tt>.</li>
390<li><tt>module()</tt> returns the module table.</li>
296</ul> 391</ul>
297<p> 392<p>
298Note: this provides only partial compatibility with Lua 5.2 at the 393Note: this provides only partial compatibility with Lua 5.2 at the
@@ -301,6 +396,21 @@ Lua&nbsp;5.1, which prevents implementing features that would otherwise
301break the Lua/C API and ABI (e.g. <tt>_ENV</tt>). 396break the Lua/C API and ABI (e.g. <tt>_ENV</tt>).
302</p> 397</p>
303 398
399<h2 id="lua53">Extensions from Lua 5.3</h2>
400<p>
401LuaJIT supports some extensions from Lua&nbsp;5.3:
402<ul>
403<li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8 encoding in string literals.</li>
404<li>The argument table <tt>arg</tt> can be read (and modified) by <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li>
405<li><tt>io.read()</tt> and <tt>file:read()</tt> accept formats with or without a leading <tt>*</tt>.</li>
406<li><tt>assert()</tt> accepts any type of error object.</li>
407<li><tt>table.move(a1, f, e, t [,a2])</tt>.</li>
408<li><tt>coroutine.isyieldable()</tt>.</li>
409<li>Lua/C API extensions:
410<tt>lua_isyieldable()</tt>
411</li>
412</ul>
413
304<h2 id="exceptions">C++ Exception Interoperability</h2> 414<h2 id="exceptions">C++ Exception Interoperability</h2>
305<p> 415<p>
306LuaJIT has built-in support for interoperating with C++&nbsp;exceptions. 416LuaJIT has built-in support for interoperating with C++&nbsp;exceptions.
@@ -314,26 +424,21 @@ the toolchain used to compile LuaJIT:
314<td class="excinterop">Interoperability</td> 424<td class="excinterop">Interoperability</td>
315</tr> 425</tr>
316<tr class="odd separate"> 426<tr class="odd separate">
317<td class="excplatform">POSIX/x64, DWARF2 unwinding</td> 427<td class="excplatform">External frame unwinding</td>
318<td class="exccompiler">GCC 4.3+</td> 428<td class="exccompiler">GCC, Clang, MSVC</td>
319<td class="excinterop"><b style="color: #00a000;">Full</b></td> 429<td class="excinterop"><b style="color: #00a000;">Full</b></td>
320</tr> 430</tr>
321<tr class="even"> 431<tr class="even">
322<td class="excplatform">Other platforms, DWARF2 unwinding</td> 432<td class="excplatform">Internal frame unwinding + DWARF2</td>
323<td class="exccompiler">GCC</td> 433<td class="exccompiler">GCC, Clang</td>
324<td class="excinterop"><b style="color: #c06000;">Limited</b></td> 434<td class="excinterop"><b style="color: #c06000;">Limited</b></td>
325</tr> 435</tr>
326<tr class="odd"> 436<tr class="odd">
327<td class="excplatform">Windows/x64</td> 437<td class="excplatform">Windows 64 bit</td>
328<td class="exccompiler">MSVC</td> 438<td class="exccompiler">non-MSVC</td>
329<td class="excinterop"><b style="color: #00a000;">Full</b></td> 439<td class="excinterop"><b style="color: #c06000;">Limited</b></td>
330</tr> 440</tr>
331<tr class="even"> 441<tr class="even">
332<td class="excplatform">Windows/x86</td>
333<td class="exccompiler">Any</td>
334<td class="excinterop"><b style="color: #a00000;">No</b></td>
335</tr>
336<tr class="odd">
337<td class="excplatform">Other platforms</td> 442<td class="excplatform">Other platforms</td>
338<td class="exccompiler">Other compilers</td> 443<td class="exccompiler">Other compilers</td>
339<td class="excinterop"><b style="color: #a00000;">No</b></td> 444<td class="excinterop"><b style="color: #a00000;">No</b></td>
@@ -352,9 +457,7 @@ the toolchain used to compile LuaJIT:
352on the C&nbsp;stack. The contents of the C++&nbsp;exception object 457on the C&nbsp;stack. The contents of the C++&nbsp;exception object
353pass through unmodified.</li> 458pass through unmodified.</li>
354<li>Lua errors can be caught on the C++ side with <tt>catch(...)</tt>. 459<li>Lua errors can be caught on the C++ side with <tt>catch(...)</tt>.
355The corresponding Lua error message can be retrieved from the Lua stack.<br> 460The corresponding Lua error message can be retrieved from the Lua stack.</li>
356For MSVC for Windows 64 bit this requires compilation of your C++ code
357with <tt>/EHa</tt>.</li>
358<li>Throwing Lua errors across C++ frames is safe. C++ destructors 461<li>Throwing Lua errors across C++ frames is safe. C++ destructors
359will be called.</li> 462will be called.</li>
360</ul> 463</ul>
@@ -384,14 +487,6 @@ C++ destructors.</li>
384<li>Lua errors <b>cannot</b> be caught on the C++ side.</li> 487<li>Lua errors <b>cannot</b> be caught on the C++ side.</li>
385<li>Throwing Lua errors across C++ frames will <b>not</b> call 488<li>Throwing Lua errors across C++ frames will <b>not</b> call
386C++ destructors.</li> 489C++ destructors.</li>
387<li>Additionally, on Windows/x86 with SEH-based C++&nbsp;exceptions:
388it's <b>not</b> safe to throw a Lua error across any frames containing
389a C++ function with any try/catch construct or using variables with
390(implicit) destructors. This also applies to any functions which may be
391inlined in such a function. It doesn't matter whether <tt>lua_error()</tt>
392is called inside or outside of a try/catch or whether any object actually
393needs to be destroyed: the SEH chain is corrupted and this will eventually
394lead to the termination of the process.</li>
395</ul> 490</ul>
396<br class="flush"> 491<br class="flush">
397</div> 492</div>
diff --git a/doc/install.html b/doc/install.html
index 7f2e40e4..b6481443 100644
--- a/doc/install.html
+++ b/doc/install.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>Installation</title> 4<title>Installation</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2023"> 6<meta name="Copyright" content="Copyright (C) 2005-2023">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -60,9 +60,13 @@ td.compatx {
60<a href="ext_ffi_semantics.html">FFI Semantics</a> 60<a href="ext_ffi_semantics.html">FFI Semantics</a>
61</li></ul> 61</li></ul>
62</li><li> 62</li><li>
63<a href="ext_buffer.html">String Buffers</a>
64</li><li>
63<a href="ext_jit.html">jit.* Library</a> 65<a href="ext_jit.html">jit.* Library</a>
64</li><li> 66</li><li>
65<a href="ext_c_api.html">Lua/C API</a> 67<a href="ext_c_api.html">Lua/C API</a>
68</li><li>
69<a href="ext_profiler.html">Profiler</a>
66</li></ul> 70</li></ul>
67</li><li> 71</li><li>
68<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a> 72<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
@@ -121,6 +125,13 @@ MSVC (Visual Studio).</li>
121Please read the instructions given in these files, before changing 125Please read the instructions given in these files, before changing
122any settings. 126any settings.
123</p> 127</p>
128<p>
129All LuaJIT 64 bit ports use 64 bit GC objects by default (<tt>LJ_GC64</tt>).
130For x64, you can select the old 32-on-64 bit mode by adding
131<tt>XCFLAGS=-DLUAJIT_DISABLE_GC64</tt> to the make command.
132Please check the note about the
133<a href="extensions.html#string_dump">bytecode format</a> differences, too.
134</p>
124 135
125<h2 id="posix">POSIX Systems (Linux, macOS, *BSD etc.)</h2> 136<h2 id="posix">POSIX Systems (Linux, macOS, *BSD etc.)</h2>
126<h3>Prerequisites</h3> 137<h3>Prerequisites</h3>
@@ -154,9 +165,12 @@ You can add an extra prefix to the search paths by appending the
154make PREFIX=/home/myself/lj2 165make PREFIX=/home/myself/lj2
155</pre> 166</pre>
156<p> 167<p>
157Please use the LuaJIT 2.1 branch to compile for 168Note for macOS: you <b>must</b> set the <tt>MACOSX_DEPLOYMENT_TARGET</tt>
158<b id="osx">macOS (OSX)</b>. 169environment variable to a value supported by your toolchain:
159</p> 170</p>
171<pre class="code">
172MACOSX_DEPLOYMENT_TARGET=XX.YY make
173</pre>
160<h3>Installing LuaJIT</h3> 174<h3>Installing LuaJIT</h3>
161<p> 175<p>
162The top-level Makefile installs LuaJIT by default under 176The top-level Makefile installs LuaJIT by default under
@@ -189,7 +203,7 @@ Or install Microsoft's Visual Studio (MSVC).
189</p> 203</p>
190<h3>Building with MSVC</h3> 204<h3>Building with MSVC</h3>
191<p> 205<p>
192Open a "Visual Studio Command Prompt" (either x86 or x64), <tt>cd</tt> to the 206Open a "Visual Studio Command Prompt" (x86, x64 or ARM64), <tt>cd</tt> to the
193directory with the source code and run these commands: 207directory with the source code and run these commands:
194</p> 208</p>
195<pre class="code"> 209<pre class="code">
@@ -200,6 +214,9 @@ msvcbuild
200Check the <tt>msvcbuild.bat</tt> file for more options. 214Check the <tt>msvcbuild.bat</tt> file for more options.
201Then follow the installation instructions below. 215Then follow the installation instructions below.
202</p> 216</p>
217<p>
218For an x64 to ARM64 cross-build run this first: <tt>vcvarsall.bat x64_arm64</tt>
219</p>
203<h3>Building with MinGW or Cygwin</h3> 220<h3>Building with MinGW or Cygwin</h3>
204<p> 221<p>
205Open a command prompt window and make sure the MinGW or Cygwin programs 222Open a command prompt window and make sure the MinGW or Cygwin programs
@@ -235,28 +252,37 @@ directory where <tt>luajit.exe</tt> is installed
235 252
236<h2 id="cross">Cross-compiling LuaJIT</h2> 253<h2 id="cross">Cross-compiling LuaJIT</h2>
237<p> 254<p>
255First, let's clear up some terminology:
256</p>
257<ul>
258<li>Host: This is your development system, usually based on a x64 or x86 CPU.</li>
259<li>Target: This is the target system you want LuaJIT to run on, e.g. Android/ARM.</li>
260<li>Toolchain: This comprises a C compiler, linker, assembler and a matching C library.</li>
261<li>Host (or system) toolchain: This is the toolchain used to build native binaries for your host system.</li>
262<li>Cross-compile toolchain: This is the toolchain used to build binaries for the target system. They can only be run on the target system.</li>
263</ul>
264<p>
238The GNU Makefile-based build system allows cross-compiling on any host 265The GNU Makefile-based build system allows cross-compiling on any host
239for any supported target, as long as both architectures have the same 266for any supported target:
240pointer size. If you want to cross-compile to any 32 bit target on an
241x64 OS, you need to install the multilib development package (e.g.
242<tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part
243(<tt>HOST_CC="gcc -m32"</tt>). On some distro versions, multilib conflicts
244with cross-compilers. The workaround is to install the x86 cross-compiler
245package <tt>gcc-i686-linux-gnu</tt> and use it to build the host part
246(<tt>HOST_CC=i686-linux-gnu-gcc</tt>).
247</p> 267</p>
268<ul>
269<li>Yes, you need a toolchain for both your host <em>and</em> your target!</li>
270<li>Both host and target architectures must have the same pointer size.</li>
271<li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li>
272<li>On some distro versions, multilib conflicts with cross-compilers. The workaround is to install the x86 cross-compiler package <tt>gcc-i686-linux-gnu</tt> and use it to build the host part (<tt>HOST_CC=i686-linux-gnu-gcc</tt>).</li>
273<li>64 bit targets always require compilation on a 64 bit host.</li>
274</ul>
248<p> 275<p>
249You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the 276You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the
250target OS differ, or you'll get assembler or linker errors. E.g. if 277target OS differ, or you'll get assembler or linker errors:
251you're compiling on a Windows or macOS host for embedded Linux or Android,
252you need to add <tt>TARGET_SYS=Linux</tt> to the examples below. For a
253minimal target OS, you may need to disable the built-in allocator in
254<tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>. Don't forget to
255specify the same <tt>TARGET_SYS</tt> for the install step, too.
256</p> 278</p>
279<ul>
280<li>E.g. if you're compiling on a Windows or macOS host for embedded Linux or Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples below.</li>
281<li>For a minimal target OS, you may need to disable the built-in allocator in <tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>.</li>
282<li>Don't forget to specify the same <tt>TARGET_SYS</tt> for the install step, too.</li>
283</ul>
257<p> 284<p>
258The examples below only show some popular targets &mdash; please check 285Here are some examples where host and target have the same CPU:
259the comments in <tt>src/Makefile</tt> for more details.
260</p> 286</p>
261<pre class="code"> 287<pre class="code">
262# Cross-compile to a 32 bit binary on a multilib x64 OS 288# Cross-compile to a 32 bit binary on a multilib x64 OS
@@ -274,34 +300,44 @@ use the canonical toolchain triplets for Linux.
274</p> 300</p>
275<p> 301<p>
276Since there's often no easy way to detect CPU features at runtime, it's 302Since there's often no easy way to detect CPU features at runtime, it's
277important to compile with the proper CPU or architecture settings. You 303important to compile with the proper CPU or architecture settings:
278can specify these when building the toolchain yourself. Or add 304</o>
279<tt>-mcpu=...</tt> or <tt>-march=...</tt> to <tt>TARGET_CFLAGS</tt>. For 305<ul>
280ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting, 306<li>The best way to get consistent results is to specify the correct settings when building the toolchain yourself.</li>
281too. Otherwise, LuaJIT may not run at the full performance of your target 307<li>For a pre-built, generic toolchain add <tt>-mcpu=...</tt> or <tt>-march=...</tt> and other necessary flags to <tt>TARGET_CFLAGS</tt>.</li>
282CPU. 308<li>For ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting, too. Otherwise LuaJIT may not run at the full performance of your target CPU.</li>
309<li>For MIPS it's important to select a supported ABI (o32 on MIPS32, n64 on MIPS64) and consistently compile your project either with hard-float or soft-float compiler settings.</li>
310</ul>
311<p>
312Here are some examples for targets with a different CPU than the host:
283</p> 313</p>
284<pre class="code"> 314<pre class="code">
285# ARM soft-float 315# ARM soft-float
286make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \ 316make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
287 TARGET_CFLAGS="-mfloat-abi=soft" 317 TARGET_CFLAGS="-mfloat-abi=soft"
288 318
289# ARM soft-float ABI with VFP (example for Cortex-A8) 319# ARM soft-float ABI with VFP (example for Cortex-A9)
290make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \ 320make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
291 TARGET_CFLAGS="-mcpu=cortex-a8 -mfloat-abi=softfp" 321 TARGET_CFLAGS="-mcpu=cortex-a9 -mfloat-abi=softfp"
292 322
293# ARM hard-float ABI with VFP (armhf, requires recent toolchain) 323# ARM hard-float ABI with VFP (armhf, most modern toolchains)
294make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf- 324make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf-
295 325
326# ARM64
327make CROSS=aarch64-linux-gnu-
328
296# PPC 329# PPC
297make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- 330make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
298# PPC/e500v2 (fast interpreter only)
299make HOST_CC="gcc -m32" CROSS=powerpc-e500v2-linux-gnuspe-
300 331
301# MIPS big-endian 332# MIPS32 big-endian
302make HOST_CC="gcc -m32" CROSS=mips-linux- 333make HOST_CC="gcc -m32" CROSS=mips-linux-gnu-
303# MIPS little-endian 334# MIPS32 little-endian
304make HOST_CC="gcc -m32" CROSS=mipsel-linux- 335make HOST_CC="gcc -m32" CROSS=mipsel-linux-gnu-
336
337# MIPS64 big-endian
338make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
339# MIPS64 little-endian
340make CROSS=mipsel-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
305</pre> 341</pre>
306<p> 342<p>
307You can cross-compile for <b id="android">Android</b> using the <a href="https://developer.android.com/ndk/"><span class="ext">&raquo;</span>&nbsp;Android NDK</a>. 343You can cross-compile for <b id="android">Android</b> using the <a href="https://developer.android.com/ndk/"><span class="ext">&raquo;</span>&nbsp;Android NDK</a>.
@@ -309,8 +345,17 @@ Please adapt the environment variables to match the install locations and the
309desired target platform. E.g. Android&nbsp;4.1 corresponds to ABI level&nbsp;16. 345desired target platform. E.g. Android&nbsp;4.1 corresponds to ABI level&nbsp;16.
310</p> 346</p>
311<pre class="code"> 347<pre class="code">
312# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.1+ (JB) 348# Android/ARM64, aarch64, Android 5.0+ (L)
349NDKDIR=/opt/android/ndk
350NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin
351NDKCROSS=$NDKBIN/aarch64-linux-android-
352NDKCC=$NDKBIN/aarch64-linux-android21-clang
353make CROSS=$NDKCROSS \
354 STATIC_CC=$NDKCC DYNAMIC_CC="$NDKCC -fPIC" \
355 TARGET_LD=$NDKCC TARGET_AR="$NDKBIN/llvm-ar rcus" \
356 TARGET_STRIP=$NDKBIN/llvm-strip
313 357
358# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.1+ (JB)
314NDKDIR=/opt/android/ndk 359NDKDIR=/opt/android/ndk
315NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin 360NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin
316NDKCROSS=$NDKBIN/arm-linux-androideabi- 361NDKCROSS=$NDKBIN/arm-linux-androideabi-
@@ -321,9 +366,23 @@ make HOST_CC="gcc -m32" CROSS=$NDKCROSS \
321 TARGET_STRIP=$NDKBIN/llvm-strip 366 TARGET_STRIP=$NDKBIN/llvm-strip
322</pre> 367</pre>
323<p> 368<p>
324Please use the LuaJIT 2.1 branch to compile for 369You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="https://developer.apple.com/ios/"><span class="ext">&raquo;</span>&nbsp;iOS SDK</a>:
325<b id="ios">iOS</b> (iPhone/iPad). 370</p>
371<p style="font-size: 8pt;">
372Note: <b>the JIT compiler is disabled for iOS</b>, because regular iOS Apps
373are not allowed to generate code at runtime. You'll only get the performance
374of the LuaJIT interpreter on iOS. This is still faster than plain Lua, but
375much slower than the JIT compiler. Please complain to Apple, not me.
376Or use Android. :-p
326</p> 377</p>
378<pre class="code">
379# iOS/ARM64
380ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
381ICC=$(xcrun --sdk iphoneos --find clang)
382ISDKF="-arch arm64 -isysroot $ISDKP"
383make DEFAULT_CC=clang CROSS="$(dirname $ICC)/" \
384 TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
385</pre>
327 386
328<h3 id="consoles">Cross-compiling for consoles</h3> 387<h3 id="consoles">Cross-compiling for consoles</h3>
329<p> 388<p>
@@ -367,15 +426,35 @@ and run the build command given in the table:
367<td class="compatx"><tt>ps4build</tt></td> 426<td class="compatx"><tt>ps4build</tt></td>
368</tr> 427</tr>
369<tr class="even"> 428<tr class="even">
429<td class="compatname"><b id="ps5">PS5</b></td>
430<td class="compatbits">64</td>
431<td class="compatx"><tt>ps5build</tt></td>
432</tr>
433<tr class="odd">
370<td class="compatname"><b id="psvita">PS Vita</b></td> 434<td class="compatname"><b id="psvita">PS Vita</b></td>
371<td class="compatbits">32</td> 435<td class="compatbits">32</td>
372<td class="compatx"><tt>psvitabuild</tt></td> 436<td class="compatx"><tt>psvitabuild</tt></td>
373</tr> 437</tr>
374<tr class="odd"> 438<tr class="even">
375<td class="compatname"><b id="xbox360">Xbox 360</b></td> 439<td class="compatname"><b id="xbox360">Xbox 360</b></td>
376<td class="compatbits">32</td> 440<td class="compatbits">32</td>
377<td class="compatx"><tt>xedkbuild</tt></td> 441<td class="compatx"><tt>xedkbuild</tt></td>
378</tr> 442</tr>
443<tr class="odd">
444<td class="compatname"><b id="xboxone">Xbox One</b></td>
445<td class="compatbits">64</td>
446<td class="compatx"><tt>xb1build</tt></td>
447</tr>
448<tr class="even">
449<td class="compatname"><b id="nx32">Nintendo Switch NX32</b></td>
450<td class="compatbits">32</td>
451<td class="compatx"><tt>nxbuild</tt></td>
452</tr>
453<tr class="odd">
454<td class="compatname"><b id="nx64">Nintendo Switch NX64</b></td>
455<td class="compatbits">64</td>
456<td class="compatx"><tt>nxbuild</tt></td>
457</tr>
379</table> 458</table>
380<p> 459<p>
381Please check out the comments in the corresponding <tt>*.bat</tt> 460Please check out the comments in the corresponding <tt>*.bat</tt>
diff --git a/doc/luajit.html b/doc/luajit.html
index 7346acb3..030cf705 100644
--- a/doc/luajit.html
+++ b/doc/luajit.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>LuaJIT</title> 4<title>LuaJIT</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2023"> 6<meta name="Copyright" content="Copyright (C) 2005-2023">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -98,9 +98,13 @@ table.fcompat td {
98<a href="ext_ffi_semantics.html">FFI Semantics</a> 98<a href="ext_ffi_semantics.html">FFI Semantics</a>
99</li></ul> 99</li></ul>
100</li><li> 100</li><li>
101<a href="ext_buffer.html">String Buffers</a>
102</li><li>
101<a href="ext_jit.html">jit.* Library</a> 103<a href="ext_jit.html">jit.* Library</a>
102</li><li> 104</li><li>
103<a href="ext_c_api.html">Lua/C API</a> 105<a href="ext_c_api.html">Lua/C API</a>
106</li><li>
107<a href="ext_profiler.html">Profiler</a>
104</li></ul> 108</li></ul>
105</li><li> 109</li><li>
106<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a> 110<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
@@ -132,13 +136,13 @@ LuaJIT is Copyright &copy; 2005-2023 Mike Pall, released under the
132<tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr> 136<tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr>
133</table> 137</table>
134<table class="feature os os3"> 138<table class="feature os os3">
135<tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td></tr> 139<tr><td>PS3</td><td>PS4<br>PS5</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td><td>Nintendo<br>Switch</td></tr>
136</table> 140</table>
137<table class="feature compiler"> 141<table class="feature compiler">
138<tr><td>GCC</td><td>CLANG<br>LLVM</td><td>MSVC</td></tr> 142<tr><td>GCC</td><td>Clang<br>LLVM</td><td>MSVC</td></tr>
139</table> 143</table>
140<table class="feature cpu"> 144<table class="feature cpu">
141<tr><td>x86</td><td>x64</td><td>ARM</td><td>PPC</td><td>e500</td><td>MIPS</td></tr> 145<tr><td>x86<br>x64</td><td>ARM<br>ARM64</td><td>PPC</td><td>MIPS32<br>MIPS64</td></tr>
142</table> 146</table>
143<table class="feature fcompat"> 147<table class="feature fcompat">
144<tr><td>Lua&nbsp;5.1<br>API+ABI</td><td>+&nbsp;JIT</td><td>+&nbsp;BitOp</td><td>+&nbsp;FFI</td><td>Drop-in<br>DLL/.so</td></tr> 148<tr><td>Lua&nbsp;5.1<br>API+ABI</td><td>+&nbsp;JIT</td><td>+&nbsp;BitOp</td><td>+&nbsp;FFI</td><td>Drop-in<br>DLL/.so</td></tr>
diff --git a/doc/running.html b/doc/running.html
index c7d9e9b6..142b810f 100644
--- a/doc/running.html
+++ b/doc/running.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>Running LuaJIT</title> 4<title>Running LuaJIT</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2023"> 6<meta name="Copyright" content="Copyright (C) 2005-2023">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -59,9 +59,13 @@ td.param_default {
59<a href="ext_ffi_semantics.html">FFI Semantics</a> 59<a href="ext_ffi_semantics.html">FFI Semantics</a>
60</li></ul> 60</li></ul>
61</li><li> 61</li><li>
62<a href="ext_buffer.html">String Buffers</a>
63</li><li>
62<a href="ext_jit.html">jit.* Library</a> 64<a href="ext_jit.html">jit.* Library</a>
63</li><li> 65</li><li>
64<a href="ext_c_api.html">Lua/C API</a> 66<a href="ext_c_api.html">Lua/C API</a>
67</li><li>
68<a href="ext_profiler.html">Profiler</a>
65</li></ul> 69</li></ul>
66</li><li> 70</li><li>
67<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a> 71<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
@@ -102,10 +106,14 @@ are accepted:
102<li><tt>-l</tt> &mdash; Only list bytecode.</li> 106<li><tt>-l</tt> &mdash; Only list bytecode.</li>
103<li><tt>-s</tt> &mdash; Strip debug info (this is the default).</li> 107<li><tt>-s</tt> &mdash; Strip debug info (this is the default).</li>
104<li><tt>-g</tt> &mdash; Keep debug info.</li> 108<li><tt>-g</tt> &mdash; Keep debug info.</li>
109<li><tt>-W</tt> &mdash; Generate 32 bit (non-GC64) bytecode.</li>
110<li><tt>-X</tt> &mdash; Generate 64 bit (GC64) bytecode.</li>
111<li><tt>-d</tt> &mdash; Generate bytecode in deterministic manner.</li>
105<li><tt>-n name</tt> &mdash; Set module name (default: auto-detect from input name)</li> 112<li><tt>-n name</tt> &mdash; Set module name (default: auto-detect from input name)</li>
106<li><tt>-t type</tt> &mdash; Set output file type (default: auto-detect from output name).</li> 113<li><tt>-t type</tt> &mdash; Set output file type (default: auto-detect from output name).</li>
107<li><tt>-a arch</tt> &mdash; Override architecture for object files (default: native).</li> 114<li><tt>-a arch</tt> &mdash; Override architecture for object files (default: native).</li>
108<li><tt>-o os</tt> &mdash; Override OS for object files (default: native).</li> 115<li><tt>-o os</tt> &mdash; Override OS for object files (default: native).</li>
116<li><tt>-F name</tt> &mdash; Override filename (default: input filename).</li>
109<li><tt>-e chunk</tt> &mdash; Use chunk string as input.</li> 117<li><tt>-e chunk</tt> &mdash; Use chunk string as input.</li>
110<li><tt>-</tt> (a single minus sign) &mdash; Use stdin as input and/or stdout as output.</li> 118<li><tt>-</tt> (a single minus sign) &mdash; Use stdin as input and/or stdout as output.</li>
111</ul> 119</ul>
@@ -115,7 +123,8 @@ file name:
115</p> 123</p>
116<ul> 124<ul>
117<li><tt>c</tt> &mdash; C source file, exported bytecode data.</li> 125<li><tt>c</tt> &mdash; C source file, exported bytecode data.</li>
118<li><tt>h</tt> &mdash; C header file, static bytecode data.</li> 126<li><tt>cc</tt> &mdash; C++ source file, exported bytecode data.</li>
127<li><tt>h</tt> &mdash; C/C++ header file, static bytecode data.</li>
119<li><tt>obj</tt> or <tt>o</tt> &mdash; Object file, exported bytecode data 128<li><tt>obj</tt> or <tt>o</tt> &mdash; Object file, exported bytecode data
120(OS- and architecture-specific).</li> 129(OS- and architecture-specific).</li>
121<li><tt>raw</tt> or any other extension &mdash; Raw bytecode file (portable). 130<li><tt>raw</tt> or any other extension &mdash; Raw bytecode file (portable).
@@ -171,6 +180,7 @@ Here are the available LuaJIT control commands:
171<li id="j_flush"><tt>-jflush</tt> &mdash; Flushes the whole cache of compiled code.</li> 180<li id="j_flush"><tt>-jflush</tt> &mdash; Flushes the whole cache of compiled code.</li>
172<li id="j_v"><tt>-jv</tt> &mdash; Shows verbose information about the progress of the JIT compiler.</li> 181<li id="j_v"><tt>-jv</tt> &mdash; Shows verbose information about the progress of the JIT compiler.</li>
173<li id="j_dump"><tt>-jdump</tt> &mdash; Dumps the code and structures used in various compiler stages.</li> 182<li id="j_dump"><tt>-jdump</tt> &mdash; Dumps the code and structures used in various compiler stages.</li>
183<li id="j_p"><tt>-jp</tt> &mdash; Start the <a href="ext_profiler.html">integrated profiler</a>.</li>
174</ul> 184</ul>
175<p> 185<p>
176The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules 186The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules
@@ -215,6 +225,12 @@ mix the three forms, but note that setting an optimization level
215overrides all earlier flags. 225overrides all earlier flags.
216</p> 226</p>
217<p> 227<p>
228Note that <tt>-Ofma</tt> is not enabled by default at any level,
229because it affects floating-point result accuracy. Only enable this,
230if you fully understand the trade-offs of FMA for performance (higher),
231determinism (lower) and numerical accuracy (higher).
232</p>
233<p>
218Here are the available flags and at what optimization levels they 234Here are the available flags and at what optimization levels they
219are enabled: 235are enabled:
220</p> 236</p>
@@ -246,6 +262,8 @@ are enabled:
246<td class="flag_name">sink</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_desc">Allocation/Store Sinking</td></tr> 262<td class="flag_name">sink</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_desc">Allocation/Store Sinking</td></tr>
247<tr class="even"> 263<tr class="even">
248<td class="flag_name">fuse</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_desc">Fusion of operands into instructions</td></tr> 264<td class="flag_name">fuse</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_desc">Fusion of operands into instructions</td></tr>
265<tr class="odd">
266<td class="flag_name">fma </td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_desc">Fused multiply-add</td></tr>
249</table> 267</table>
250<p> 268<p>
251Here are the parameters and their default settings: 269Here are the parameters and their default settings:
diff --git a/dynasm/dasm_arm.h b/dynasm/dasm_arm.h
index a93e831e..eaa94d9c 100644
--- a/dynasm/dasm_arm.h
+++ b/dynasm/dasm_arm.h
@@ -70,7 +70,7 @@ struct dasm_State {
70 size_t lgsize; 70 size_t lgsize;
71 int *pclabels; /* PC label chains/pos ptrs. */ 71 int *pclabels; /* PC label chains/pos ptrs. */
72 size_t pcsize; 72 size_t pcsize;
73 void **globals; /* Array of globals (bias -10). */ 73 void **globals; /* Array of globals. */
74 dasm_Section *section; /* Pointer to active section. */ 74 dasm_Section *section; /* Pointer to active section. */
75 size_t codesize; /* Total size of all code sections. */ 75 size_t codesize; /* Total size of all code sections. */
76 int maxsection; /* 0 <= sectionidx < maxsection. */ 76 int maxsection; /* 0 <= sectionidx < maxsection. */
@@ -87,7 +87,6 @@ void dasm_init(Dst_DECL, int maxsection)
87{ 87{
88 dasm_State *D; 88 dasm_State *D;
89 size_t psz = 0; 89 size_t psz = 0;
90 int i;
91 Dst_REF = NULL; 90 Dst_REF = NULL;
92 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); 91 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
93 D = Dst_REF; 92 D = Dst_REF;
@@ -98,12 +97,7 @@ void dasm_init(Dst_DECL, int maxsection)
98 D->pcsize = 0; 97 D->pcsize = 0;
99 D->globals = NULL; 98 D->globals = NULL;
100 D->maxsection = maxsection; 99 D->maxsection = maxsection;
101 for (i = 0; i < maxsection; i++) { 100 memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
102 D->sections[i].buf = NULL; /* Need this for pass3. */
103 D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
104 D->sections[i].bsize = 0;
105 D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
106 }
107} 101}
108 102
109/* Free DynASM state. */ 103/* Free DynASM state. */
@@ -123,7 +117,7 @@ void dasm_free(Dst_DECL)
123void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) 117void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
124{ 118{
125 dasm_State *D = Dst_REF; 119 dasm_State *D = Dst_REF;
126 D->globals = gl - 10; /* Negative bias to compensate for locals. */ 120 D->globals = gl;
127 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); 121 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
128} 122}
129 123
@@ -148,6 +142,7 @@ void dasm_setup(Dst_DECL, const void *actionlist)
148 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); 142 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
149 for (i = 0; i < D->maxsection; i++) { 143 for (i = 0; i < D->maxsection; i++) {
150 D->sections[i].pos = DASM_SEC2POS(i); 144 D->sections[i].pos = DASM_SEC2POS(i);
145 D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
151 D->sections[i].ofs = 0; 146 D->sections[i].ofs = 0;
152 } 147 }
153} 148}
@@ -294,7 +289,7 @@ int dasm_link(Dst_DECL, size_t *szp)
294 289
295 { /* Handle globals not defined in this translation unit. */ 290 { /* Handle globals not defined in this translation unit. */
296 int idx; 291 int idx;
297 for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { 292 for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
298 int n = D->lglabels[idx]; 293 int n = D->lglabels[idx];
299 /* Undefined label: Collapse rel chain and replace with marker (< 0). */ 294 /* Undefined label: Collapse rel chain and replace with marker (< 0). */
300 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } 295 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
@@ -371,7 +366,10 @@ int dasm_encode(Dst_DECL, void *buffer)
371 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000; 366 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
372 break; 367 break;
373 case DASM_REL_LG: 368 case DASM_REL_LG:
374 CK(n >= 0, UNDEF_LG); 369 if (n < 0) {
370 n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp - 4);
371 goto patchrel;
372 }
375 /* fallthrough */ 373 /* fallthrough */
376 case DASM_REL_PC: 374 case DASM_REL_PC:
377 CK(n >= 0, UNDEF_PC); 375 CK(n >= 0, UNDEF_PC);
@@ -393,7 +391,7 @@ int dasm_encode(Dst_DECL, void *buffer)
393 } 391 }
394 break; 392 break;
395 case DASM_LABEL_LG: 393 case DASM_LABEL_LG:
396 ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); 394 ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
397 break; 395 break;
398 case DASM_LABEL_PC: break; 396 case DASM_LABEL_PC: break;
399 case DASM_IMM: 397 case DASM_IMM:
diff --git a/dynasm/dasm_arm.lua b/dynasm/dasm_arm.lua
index 960f1fe6..edb57536 100644
--- a/dynasm/dasm_arm.lua
+++ b/dynasm/dasm_arm.lua
@@ -9,9 +9,9 @@
9local _info = { 9local _info = {
10 arch = "arm", 10 arch = "arm",
11 description = "DynASM ARM module", 11 description = "DynASM ARM module",
12 version = "1.3.0", 12 version = "1.5.0",
13 vernum = 10300, 13 vernum = 10500,
14 release = "2011-05-05", 14 release = "2021-05-02",
15 author = "Mike Pall", 15 author = "Mike Pall",
16 license = "MIT", 16 license = "MIT",
17} 17}
diff --git a/dynasm/dasm_arm64.h b/dynasm/dasm_arm64.h
new file mode 100644
index 00000000..1c541e5d
--- /dev/null
+++ b/dynasm/dasm_arm64.h
@@ -0,0 +1,558 @@
1/*
2** DynASM ARM64 encoding engine.
3** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
4** Released under the MIT license. See dynasm.lua for full copyright notice.
5*/
6
7#include <stddef.h>
8#include <stdarg.h>
9#include <string.h>
10#include <stdlib.h>
11
12#define DASM_ARCH "arm64"
13
14#ifndef DASM_EXTERN
15#define DASM_EXTERN(a,b,c,d) 0
16#endif
17
18/* Action definitions. */
19enum {
20 DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
21 /* The following actions need a buffer position. */
22 DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
23 /* The following actions also have an argument. */
24 DASM_REL_PC, DASM_LABEL_PC, DASM_REL_A,
25 DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML,
26 DASM_IMMV, DASM_VREG,
27 DASM__MAX
28};
29
30/* Maximum number of section buffer positions for a single dasm_put() call. */
31#define DASM_MAXSECPOS 25
32
33/* DynASM encoder status codes. Action list offset or number are or'ed in. */
34#define DASM_S_OK 0x00000000
35#define DASM_S_NOMEM 0x01000000
36#define DASM_S_PHASE 0x02000000
37#define DASM_S_MATCH_SEC 0x03000000
38#define DASM_S_RANGE_I 0x11000000
39#define DASM_S_RANGE_SEC 0x12000000
40#define DASM_S_RANGE_LG 0x13000000
41#define DASM_S_RANGE_PC 0x14000000
42#define DASM_S_RANGE_REL 0x15000000
43#define DASM_S_RANGE_VREG 0x16000000
44#define DASM_S_UNDEF_LG 0x21000000
45#define DASM_S_UNDEF_PC 0x22000000
46
47/* Macros to convert positions (8 bit section + 24 bit index). */
48#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
49#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
50#define DASM_SEC2POS(sec) ((sec)<<24)
51#define DASM_POS2SEC(pos) ((pos)>>24)
52#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
53
54/* Action list type. */
55typedef const unsigned int *dasm_ActList;
56
57/* Per-section structure. */
58typedef struct dasm_Section {
59 int *rbuf; /* Biased buffer pointer (negative section bias). */
60 int *buf; /* True buffer pointer. */
61 size_t bsize; /* Buffer size in bytes. */
62 int pos; /* Biased buffer position. */
63 int epos; /* End of biased buffer position - max single put. */
64 int ofs; /* Byte offset into section. */
65} dasm_Section;
66
67/* Core structure holding the DynASM encoding state. */
68struct dasm_State {
69 size_t psize; /* Allocated size of this structure. */
70 dasm_ActList actionlist; /* Current actionlist pointer. */
71 int *lglabels; /* Local/global chain/pos ptrs. */
72 size_t lgsize;
73 int *pclabels; /* PC label chains/pos ptrs. */
74 size_t pcsize;
75 void **globals; /* Array of globals. */
76 dasm_Section *section; /* Pointer to active section. */
77 size_t codesize; /* Total size of all code sections. */
78 int maxsection; /* 0 <= sectionidx < maxsection. */
79 int status; /* Status code. */
80 dasm_Section sections[1]; /* All sections. Alloc-extended. */
81};
82
83/* The size of the core structure depends on the max. number of sections. */
84#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
85
86
87/* Initialize DynASM state. */
88void dasm_init(Dst_DECL, int maxsection)
89{
90 dasm_State *D;
91 size_t psz = 0;
92 Dst_REF = NULL;
93 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
94 D = Dst_REF;
95 D->psize = psz;
96 D->lglabels = NULL;
97 D->lgsize = 0;
98 D->pclabels = NULL;
99 D->pcsize = 0;
100 D->globals = NULL;
101 D->maxsection = maxsection;
102 memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
103}
104
105/* Free DynASM state. */
106void dasm_free(Dst_DECL)
107{
108 dasm_State *D = Dst_REF;
109 int i;
110 for (i = 0; i < D->maxsection; i++)
111 if (D->sections[i].buf)
112 DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
113 if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
114 if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
115 DASM_M_FREE(Dst, D, D->psize);
116}
117
118/* Setup global label array. Must be called before dasm_setup(). */
119void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
120{
121 dasm_State *D = Dst_REF;
122 D->globals = gl;
123 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
124}
125
126/* Grow PC label array. Can be called after dasm_setup(), too. */
127void dasm_growpc(Dst_DECL, unsigned int maxpc)
128{
129 dasm_State *D = Dst_REF;
130 size_t osz = D->pcsize;
131 DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
132 memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
133}
134
135/* Setup encoder. */
136void dasm_setup(Dst_DECL, const void *actionlist)
137{
138 dasm_State *D = Dst_REF;
139 int i;
140 D->actionlist = (dasm_ActList)actionlist;
141 D->status = DASM_S_OK;
142 D->section = &D->sections[0];
143 memset((void *)D->lglabels, 0, D->lgsize);
144 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
145 for (i = 0; i < D->maxsection; i++) {
146 D->sections[i].pos = DASM_SEC2POS(i);
147 D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
148 D->sections[i].ofs = 0;
149 }
150}
151
152
153#ifdef DASM_CHECKS
154#define CK(x, st) \
155 do { if (!(x)) { \
156 D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0)
157#define CKPL(kind, st) \
158 do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
159 D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0)
160#else
161#define CK(x, st) ((void)0)
162#define CKPL(kind, st) ((void)0)
163#endif
164
165static int dasm_imm12(unsigned int n)
166{
167 if ((n >> 12) == 0)
168 return n;
169 else if ((n & 0xff000fff) == 0)
170 return (n >> 12) | 0x1000;
171 else
172 return -1;
173}
174
175static int dasm_ffs(unsigned long long x)
176{
177 int n = -1;
178 while (x) { x >>= 1; n++; }
179 return n;
180}
181
182static int dasm_imm13(int lo, int hi)
183{
184 int inv = 0, w = 64, s = 0xfff, xa, xb;
185 unsigned long long n = (((unsigned long long)hi) << 32) | (unsigned int)lo;
186 unsigned long long m = 1ULL, a, b, c;
187 if (n & 1) { n = ~n; inv = 1; }
188 a = n & (unsigned long long)-(long long)n;
189 b = (n+a)&(unsigned long long)-(long long)(n+a);
190 c = (n+a-b)&(unsigned long long)-(long long)(n+a-b);
191 xa = dasm_ffs(a); xb = dasm_ffs(b);
192 if (c) {
193 w = dasm_ffs(c) - xa;
194 if (w == 32) m = 0x0000000100000001UL;
195 else if (w == 16) m = 0x0001000100010001UL;
196 else if (w == 8) m = 0x0101010101010101UL;
197 else if (w == 4) m = 0x1111111111111111UL;
198 else if (w == 2) m = 0x5555555555555555UL;
199 else return -1;
200 s = (-2*w & 0x3f) - 1;
201 } else if (!a) {
202 return -1;
203 } else if (xb == -1) {
204 xb = 64;
205 }
206 if ((b-a) * m != n) return -1;
207 if (inv) {
208 return ((w - xb) << 6) | (s+w+xa-xb);
209 } else {
210 return ((w - xa) << 6) | (s+xb-xa);
211 }
212 return -1;
213}
214
215/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
216void dasm_put(Dst_DECL, int start, ...)
217{
218 va_list ap;
219 dasm_State *D = Dst_REF;
220 dasm_ActList p = D->actionlist + start;
221 dasm_Section *sec = D->section;
222 int pos = sec->pos, ofs = sec->ofs;
223 int *b;
224
225 if (pos >= sec->epos) {
226 DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
227 sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
228 sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
229 sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
230 }
231
232 b = sec->rbuf;
233 b[pos++] = start;
234
235 va_start(ap, start);
236 while (1) {
237 unsigned int ins = *p++;
238 unsigned int action = (ins >> 16);
239 if (action >= DASM__MAX) {
240 ofs += 4;
241 } else {
242 int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
243 switch (action) {
244 case DASM_STOP: goto stop;
245 case DASM_SECTION:
246 n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
247 D->section = &D->sections[n]; goto stop;
248 case DASM_ESC: p++; ofs += 4; break;
249 case DASM_REL_EXT: if ((ins & 0x8000)) ofs += 8; break;
250 case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
251 case DASM_REL_LG:
252 n = (ins & 2047) - 10; pl = D->lglabels + n;
253 /* Bkwd rel or global. */
254 if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
255 pl += 10; n = *pl;
256 if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
257 goto linkrel;
258 case DASM_REL_PC:
259 pl = D->pclabels + n; CKPL(pc, PC);
260 putrel:
261 n = *pl;
262 if (n < 0) { /* Label exists. Get label pos and store it. */
263 b[pos] = -n;
264 } else {
265 linkrel:
266 b[pos] = n; /* Else link to rel chain, anchored at label. */
267 *pl = pos;
268 }
269 pos++;
270 if ((ins & 0x8000)) ofs += 8;
271 break;
272 case DASM_REL_A:
273 b[pos++] = n;
274 b[pos++] = va_arg(ap, int);
275 break;
276 case DASM_LABEL_LG:
277 pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
278 case DASM_LABEL_PC:
279 pl = D->pclabels + n; CKPL(pc, PC);
280 putlabel:
281 n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
282 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
283 }
284 *pl = -pos; /* Label exists now. */
285 b[pos++] = ofs; /* Store pass1 offset estimate. */
286 break;
287 case DASM_IMM:
288 CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
289 n >>= ((ins>>10)&31);
290#ifdef DASM_CHECKS
291 if ((ins & 0x8000))
292 CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
293 else
294 CK((n>>((ins>>5)&31)) == 0, RANGE_I);
295#endif
296 b[pos++] = n;
297 break;
298 case DASM_IMM6:
299 CK((n >> 6) == 0, RANGE_I);
300 b[pos++] = n;
301 break;
302 case DASM_IMM12:
303 CK(dasm_imm12((unsigned int)n) != -1, RANGE_I);
304 b[pos++] = n;
305 break;
306 case DASM_IMM13W:
307 CK(dasm_imm13(n, n) != -1, RANGE_I);
308 b[pos++] = n;
309 break;
310 case DASM_IMM13X: {
311 int m = va_arg(ap, int);
312 CK(dasm_imm13(n, m) != -1, RANGE_I);
313 b[pos++] = n;
314 b[pos++] = m;
315 break;
316 }
317 case DASM_IMML: {
318#ifdef DASM_CHECKS
319 int scale = (ins & 3);
320 CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ||
321 (unsigned int)(n+256) < 512, RANGE_I);
322#endif
323 b[pos++] = n;
324 break;
325 }
326 case DASM_IMMV:
327 ofs += 4;
328 b[pos++] = n;
329 break;
330 case DASM_VREG:
331 CK(n < 32, RANGE_VREG);
332 b[pos++] = n;
333 break;
334 }
335 }
336 }
337stop:
338 va_end(ap);
339 sec->pos = pos;
340 sec->ofs = ofs;
341}
342#undef CK
343
344/* Pass 2: Link sections, shrink aligns, fix label offsets. */
345int dasm_link(Dst_DECL, size_t *szp)
346{
347 dasm_State *D = Dst_REF;
348 int secnum;
349 int ofs = 0;
350
351#ifdef DASM_CHECKS
352 *szp = 0;
353 if (D->status != DASM_S_OK) return D->status;
354 {
355 int pc;
356 for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
357 if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
358 }
359#endif
360
361 { /* Handle globals not defined in this translation unit. */
362 int idx;
363 for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
364 int n = D->lglabels[idx];
365 /* Undefined label: Collapse rel chain and replace with marker (< 0). */
366 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
367 }
368 }
369
370 /* Combine all code sections. No support for data sections (yet). */
371 for (secnum = 0; secnum < D->maxsection; secnum++) {
372 dasm_Section *sec = D->sections + secnum;
373 int *b = sec->rbuf;
374 int pos = DASM_SEC2POS(secnum);
375 int lastpos = sec->pos;
376
377 while (pos != lastpos) {
378 dasm_ActList p = D->actionlist + b[pos++];
379 while (1) {
380 unsigned int ins = *p++;
381 unsigned int action = (ins >> 16);
382 switch (action) {
383 case DASM_STOP: case DASM_SECTION: goto stop;
384 case DASM_ESC: p++; break;
385 case DASM_REL_EXT: break;
386 case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
387 case DASM_REL_LG: case DASM_REL_PC: pos++; break;
388 case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
389 case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W:
390 case DASM_IMML: case DASM_IMMV: case DASM_VREG: pos++; break;
391 case DASM_IMM13X: case DASM_REL_A: pos += 2; break;
392 }
393 }
394 stop: (void)0;
395 }
396 ofs += sec->ofs; /* Next section starts right after current section. */
397 }
398
399 D->codesize = ofs; /* Total size of all code sections */
400 *szp = ofs;
401 return DASM_S_OK;
402}
403
404#ifdef DASM_CHECKS
405#define CK(x, st) \
406 do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0)
407#else
408#define CK(x, st) ((void)0)
409#endif
410
411/* Pass 3: Encode sections. */
412int dasm_encode(Dst_DECL, void *buffer)
413{
414 dasm_State *D = Dst_REF;
415 char *base = (char *)buffer;
416 unsigned int *cp = (unsigned int *)buffer;
417 int secnum;
418
419 /* Encode all code sections. No support for data sections (yet). */
420 for (secnum = 0; secnum < D->maxsection; secnum++) {
421 dasm_Section *sec = D->sections + secnum;
422 int *b = sec->buf;
423 int *endb = sec->rbuf + sec->pos;
424
425 while (b != endb) {
426 dasm_ActList p = D->actionlist + *b++;
427 while (1) {
428 unsigned int ins = *p++;
429 unsigned int action = (ins >> 16);
430 int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
431 switch (action) {
432 case DASM_STOP: case DASM_SECTION: goto stop;
433 case DASM_ESC: *cp++ = *p++; break;
434 case DASM_REL_EXT:
435 n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048));
436 goto patchrel;
437 case DASM_ALIGN:
438 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xd503201f;
439 break;
440 case DASM_REL_LG:
441 if (n < 0) {
442 ptrdiff_t na = (ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp + 4;
443 n = (int)na;
444 CK((ptrdiff_t)n == na, RANGE_REL);
445 goto patchrel;
446 }
447 /* fallthrough */
448 case DASM_REL_PC:
449 CK(n >= 0, UNDEF_PC);
450 n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;
451 patchrel:
452 if (!(ins & 0xf800)) { /* B, BL */
453 CK((n & 3) == 0 && ((n+0x08000000) >> 28) == 0, RANGE_REL);
454 cp[-1] |= ((n >> 2) & 0x03ffffff);
455 } else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */
456 CK((n & 3) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL);
457 cp[-1] |= ((n << 3) & 0x00ffffe0);
458 } else if ((ins & 0x3000) == 0x2000) { /* ADR */
459 CK(((n+0x00100000) >> 21) == 0, RANGE_REL);
460 cp[-1] |= ((n << 3) & 0x00ffffe0) | ((n & 3) << 29);
461 } else if ((ins & 0x3000) == 0x3000) { /* ADRP */
462 cp[-1] |= ((n >> 9) & 0x00ffffe0) | (((n >> 12) & 3) << 29);
463 } else if ((ins & 0x1000)) { /* TBZ, TBNZ */
464 CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL);
465 cp[-1] |= ((n << 3) & 0x0007ffe0);
466 } else if ((ins & 0x8000)) { /* absolute */
467 cp[0] = (unsigned int)((ptrdiff_t)cp - 4 + n);
468 cp[1] = (unsigned int)(((ptrdiff_t)cp - 4 + n) >> 32);
469 cp += 2;
470 }
471 break;
472 case DASM_REL_A: {
473 ptrdiff_t na = (((ptrdiff_t)(*b++) << 32) | (unsigned int)n);
474 if ((ins & 0x3000) == 0x3000) { /* ADRP */
475 ins &= ~0x1000;
476 na = (na >> 12) - (((ptrdiff_t)cp - 4) >> 12);
477 } else {
478 na = na - (ptrdiff_t)cp + 4;
479 }
480 n = (int)na;
481 CK((ptrdiff_t)n == na, RANGE_REL);
482 goto patchrel;
483 }
484 case DASM_LABEL_LG:
485 ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
486 break;
487 case DASM_LABEL_PC: break;
488 case DASM_IMM:
489 cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
490 break;
491 case DASM_IMM6:
492 cp[-1] |= ((n&31) << 19) | ((n&32) << 26);
493 break;
494 case DASM_IMM12:
495 cp[-1] |= (dasm_imm12((unsigned int)n) << 10);
496 break;
497 case DASM_IMM13W:
498 cp[-1] |= (dasm_imm13(n, n) << 10);
499 break;
500 case DASM_IMM13X:
501 cp[-1] |= (dasm_imm13(n, *b++) << 10);
502 break;
503 case DASM_IMML: {
504 int scale = (ins & 3);
505 cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ?
506 ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12);
507 break;
508 }
509 case DASM_IMMV:
510 *cp++ = n;
511 break;
512 case DASM_VREG:
513 cp[-1] |= (n & 0x1f) << (ins & 0x1f);
514 break;
515 default: *cp++ = ins; break;
516 }
517 }
518 stop: (void)0;
519 }
520 }
521
522 if (base + D->codesize != (char *)cp) /* Check for phase errors. */
523 return DASM_S_PHASE;
524 return DASM_S_OK;
525}
526#undef CK
527
528/* Get PC label offset. */
529int dasm_getpclabel(Dst_DECL, unsigned int pc)
530{
531 dasm_State *D = Dst_REF;
532 if (pc*sizeof(int) < D->pcsize) {
533 int pos = D->pclabels[pc];
534 if (pos < 0) return *DASM_POS2PTR(D, -pos);
535 if (pos > 0) return -1; /* Undefined. */
536 }
537 return -2; /* Unused or out of range. */
538}
539
540#ifdef DASM_CHECKS
541/* Optional sanity checker to call between isolated encoding steps. */
542int dasm_checkstep(Dst_DECL, int secmatch)
543{
544 dasm_State *D = Dst_REF;
545 if (D->status == DASM_S_OK) {
546 int i;
547 for (i = 1; i <= 9; i++) {
548 if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
549 D->lglabels[i] = 0;
550 }
551 }
552 if (D->status == DASM_S_OK && secmatch >= 0 &&
553 D->section != &D->sections[secmatch])
554 D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections);
555 return D->status;
556}
557#endif
558
diff --git a/dynasm/dasm_arm64.lua b/dynasm/dasm_arm64.lua
new file mode 100644
index 00000000..05ea3e22
--- /dev/null
+++ b/dynasm/dasm_arm64.lua
@@ -0,0 +1,1226 @@
1------------------------------------------------------------------------------
2-- DynASM ARM64 module.
3--
4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
5-- See dynasm.lua for full copyright notice.
6------------------------------------------------------------------------------
7
8-- Module information:
9local _info = {
10 arch = "arm",
11 description = "DynASM ARM64 module",
12 version = "1.5.0",
13 vernum = 10500,
14 release = "2021-05-02",
15 author = "Mike Pall",
16 license = "MIT",
17}
18
19-- Exported glue functions for the arch-specific module.
20local _M = { _info = _info }
21
22-- Cache library functions.
23local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
24local assert, setmetatable, rawget = assert, setmetatable, rawget
25local _s = string
26local format, byte, char = _s.format, _s.byte, _s.char
27local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
28local concat, sort, insert = table.concat, table.sort, table.insert
29local bit = bit or require("bit")
30local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
31local ror, tohex, tobit = bit.ror, bit.tohex, bit.tobit
32
33-- Inherited tables and callbacks.
34local g_opt, g_arch
35local wline, werror, wfatal, wwarn
36
37-- Action name list.
38-- CHECK: Keep this in sync with the C code!
39local action_names = {
40 "STOP", "SECTION", "ESC", "REL_EXT",
41 "ALIGN", "REL_LG", "LABEL_LG",
42 "REL_PC", "LABEL_PC", "REL_A",
43 "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML", "IMMV",
44 "VREG",
45}
46
47-- Maximum number of section buffer positions for dasm_put().
48-- CHECK: Keep this in sync with the C code!
49local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
50
51-- Action name -> action number.
52local map_action = {}
53for n,name in ipairs(action_names) do
54 map_action[name] = n-1
55end
56
57-- Action list buffer.
58local actlist = {}
59
60-- Argument list for next dasm_put(). Start with offset 0 into action list.
61local actargs = { 0 }
62
63-- Current number of section buffer positions for dasm_put().
64local secpos = 1
65
66------------------------------------------------------------------------------
67
68-- Dump action names and numbers.
69local function dumpactions(out)
70 out:write("DynASM encoding engine action codes:\n")
71 for n,name in ipairs(action_names) do
72 local num = map_action[name]
73 out:write(format(" %-10s %02X %d\n", name, num, num))
74 end
75 out:write("\n")
76end
77
78-- Write action list buffer as a huge static C array.
79local function writeactions(out, name)
80 local nn = #actlist
81 if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
82 out:write("static const unsigned int ", name, "[", nn, "] = {\n")
83 for i = 1,nn-1 do
84 assert(out:write("0x", tohex(actlist[i]), ",\n"))
85 end
86 assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
87end
88
89------------------------------------------------------------------------------
90
91-- Add word to action list.
92local function wputxw(n)
93 assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
94 actlist[#actlist+1] = n
95end
96
97-- Add action to list with optional arg. Advance buffer pos, too.
98local function waction(action, val, a, num)
99 local w = assert(map_action[action], "bad action name `"..action.."'")
100 wputxw(w * 0x10000 + (val or 0))
101 if a then actargs[#actargs+1] = a end
102 if a or num then secpos = secpos + (num or 1) end
103end
104
105-- Flush action list (intervening C code or buffer pos overflow).
106local function wflush(term)
107 if #actlist == actargs[1] then return end -- Nothing to flush.
108 if not term then waction("STOP") end -- Terminate action list.
109 wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
110 actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
111 secpos = 1 -- The actionlist offset occupies a buffer position, too.
112end
113
114-- Put escaped word.
115local function wputw(n)
116 if n <= 0x000fffff then waction("ESC") end
117 wputxw(n)
118end
119
120-- Reserve position for word.
121local function wpos()
122 local pos = #actlist+1
123 actlist[pos] = ""
124 return pos
125end
126
127-- Store word to reserved position.
128local function wputpos(pos, n)
129 assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
130 if n <= 0x000fffff then
131 insert(actlist, pos+1, n)
132 n = map_action.ESC * 0x10000
133 end
134 actlist[pos] = n
135end
136
137------------------------------------------------------------------------------
138
139-- Global label name -> global label number. With auto assignment on 1st use.
140local next_global = 20
141local map_global = setmetatable({}, { __index = function(t, name)
142 if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
143 local n = next_global
144 if n > 2047 then werror("too many global labels") end
145 next_global = n + 1
146 t[name] = n
147 return n
148end})
149
150-- Dump global labels.
151local function dumpglobals(out, lvl)
152 local t = {}
153 for name, n in pairs(map_global) do t[n] = name end
154 out:write("Global labels:\n")
155 for i=20,next_global-1 do
156 out:write(format(" %s\n", t[i]))
157 end
158 out:write("\n")
159end
160
161-- Write global label enum.
162local function writeglobals(out, prefix)
163 local t = {}
164 for name, n in pairs(map_global) do t[n] = name end
165 out:write("enum {\n")
166 for i=20,next_global-1 do
167 out:write(" ", prefix, t[i], ",\n")
168 end
169 out:write(" ", prefix, "_MAX\n};\n")
170end
171
172-- Write global label names.
173local function writeglobalnames(out, name)
174 local t = {}
175 for name, n in pairs(map_global) do t[n] = name end
176 out:write("static const char *const ", name, "[] = {\n")
177 for i=20,next_global-1 do
178 out:write(" \"", t[i], "\",\n")
179 end
180 out:write(" (const char *)0\n};\n")
181end
182
183------------------------------------------------------------------------------
184
185-- Extern label name -> extern label number. With auto assignment on 1st use.
186local next_extern = 0
187local map_extern_ = {}
188local map_extern = setmetatable({}, { __index = function(t, name)
189 -- No restrictions on the name for now.
190 local n = next_extern
191 if n > 2047 then werror("too many extern labels") end
192 next_extern = n + 1
193 t[name] = n
194 map_extern_[n] = name
195 return n
196end})
197
198-- Dump extern labels.
199local function dumpexterns(out, lvl)
200 out:write("Extern labels:\n")
201 for i=0,next_extern-1 do
202 out:write(format(" %s\n", map_extern_[i]))
203 end
204 out:write("\n")
205end
206
207-- Write extern label names.
208local function writeexternnames(out, name)
209 out:write("static const char *const ", name, "[] = {\n")
210 for i=0,next_extern-1 do
211 out:write(" \"", map_extern_[i], "\",\n")
212 end
213 out:write(" (const char *)0\n};\n")
214end
215
216------------------------------------------------------------------------------
217
218-- Arch-specific maps.
219
220-- Ext. register name -> int. name.
221local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", }
222
223-- Int. register name -> ext. name.
224local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", }
225
226local map_type = {} -- Type name -> { ctype, reg }
227local ctypenum = 0 -- Type number (for Dt... macros).
228
229-- Reverse defines for registers.
230function _M.revdef(s)
231 return map_reg_rev[s] or s
232end
233
234local map_shift = { lsl = 0, lsr = 1, asr = 2, }
235
236local map_extend = {
237 uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3,
238 sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7,
239}
240
241local map_cond = {
242 eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
243 hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
244 hs = 2, lo = 3,
245}
246
247------------------------------------------------------------------------------
248
249local parse_reg_type
250
251local function parse_reg(expr, shift, no_vreg)
252 if not expr then werror("expected register name") end
253 local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$")
254 if not tname then
255 tname, ovreg = match(expr, "^([%w_]+):(R[xwqdshb]%b())$")
256 end
257 local tp = map_type[tname or expr]
258 if tp then
259 local reg = ovreg or tp.reg
260 if not reg then
261 werror("type `"..(tname or expr).."' needs a register override")
262 end
263 expr = reg
264 end
265 local ok31, rt, r = match(expr, "^(@?)([xwqdshb])([123]?[0-9])$")
266 if r then
267 r = tonumber(r)
268 if r <= 30 or (r == 31 and ok31 ~= "" or (rt ~= "w" and rt ~= "x")) then
269 if not parse_reg_type then
270 parse_reg_type = rt
271 elseif parse_reg_type ~= rt then
272 werror("register size mismatch")
273 end
274 return shl(r, shift), tp
275 end
276 end
277 local vrt, vreg = match(expr, "^R([xwqdshb])(%b())$")
278 if vreg then
279 if not parse_reg_type then
280 parse_reg_type = vrt
281 elseif parse_reg_type ~= vrt then
282 werror("register size mismatch")
283 end
284 if not no_vreg then waction("VREG", shift, vreg) end
285 return 0
286 end
287 werror("bad register name `"..expr.."'")
288end
289
290local function parse_reg_base(expr)
291 if expr == "sp" then return 0x3e0 end
292 local base, tp = parse_reg(expr, 5)
293 if parse_reg_type ~= "x" then werror("bad register type") end
294 parse_reg_type = false
295 return base, tp
296end
297
298local parse_ctx = {}
299
300local loadenv = setfenv and function(s)
301 local code = loadstring(s, "")
302 if code then setfenv(code, parse_ctx) end
303 return code
304end or function(s)
305 return load(s, "", nil, parse_ctx)
306end
307
308-- Try to parse simple arithmetic, too, since some basic ops are aliases.
309local function parse_number(n)
310 local x = tonumber(n)
311 if x then return x end
312 local code = loadenv("return "..n)
313 if code then
314 local ok, y = pcall(code)
315 if ok and type(y) == "number" then return y end
316 end
317 return nil
318end
319
320local function parse_imm(imm, bits, shift, scale, signed)
321 imm = match(imm, "^#(.*)$")
322 if not imm then werror("expected immediate operand") end
323 local n = parse_number(imm)
324 if n then
325 local m = sar(n, scale)
326 if shl(m, scale) == n then
327 if signed then
328 local s = sar(m, bits-1)
329 if s == 0 then return shl(m, shift)
330 elseif s == -1 then return shl(m + shl(1, bits), shift) end
331 else
332 if sar(m, bits) == 0 then return shl(m, shift) end
333 end
334 end
335 werror("out of range immediate `"..imm.."'")
336 else
337 waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
338 return 0
339 end
340end
341
342local function parse_imm12(imm)
343 imm = match(imm, "^#(.*)$")
344 if not imm then werror("expected immediate operand") end
345 local n = parse_number(imm)
346 if n then
347 if shr(n, 12) == 0 then
348 return shl(n, 10)
349 elseif band(n, 0xff000fff) == 0 then
350 return shr(n, 2) + 0x00400000
351 end
352 werror("out of range immediate `"..imm.."'")
353 else
354 waction("IMM12", 0, imm)
355 return 0
356 end
357end
358
359local function parse_imm13(imm)
360 imm = match(imm, "^#(.*)$")
361 if not imm then werror("expected immediate operand") end
362 local n = parse_number(imm)
363 local r64 = parse_reg_type == "x"
364 if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then
365 local inv = false
366 if band(n, 1) == 1 then n = bit.bnot(n); inv = true end
367 local t = {}
368 for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end
369 local b = table.concat(t)
370 b = b..(r64 and (inv and "1" or "0"):rep(32) or b)
371 local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)")
372 if p0 then
373 local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a
374 if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then
375 local s = band(-2*w, 0x3f) - 1
376 if w == 64 then s = s + 0x1000 end
377 if inv then
378 return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10)
379 else
380 return shl(w-#p0, 16) + shl(s+#p1, 10)
381 end
382 end
383 end
384 werror("out of range immediate `"..imm.."'")
385 elseif r64 then
386 waction("IMM13X", 0, format("(unsigned int)(%s)", imm))
387 actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm)
388 return 0
389 else
390 waction("IMM13W", 0, imm)
391 return 0
392 end
393end
394
395local function parse_imm6(imm)
396 imm = match(imm, "^#(.*)$")
397 if not imm then werror("expected immediate operand") end
398 local n = parse_number(imm)
399 if n then
400 if n >= 0 and n <= 63 then
401 return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0)
402 end
403 werror("out of range immediate `"..imm.."'")
404 else
405 waction("IMM6", 0, imm)
406 return 0
407 end
408end
409
410local function parse_imm_load(imm, scale)
411 local n = parse_number(imm)
412 if n then
413 local m = sar(n, scale)
414 if shl(m, scale) == n and m >= 0 and m < 0x1000 then
415 return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset.
416 elseif n >= -256 and n < 256 then
417 return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset.
418 end
419 werror("out of range immediate `"..imm.."'")
420 else
421 waction("IMML", scale, imm)
422 return 0
423 end
424end
425
426local function parse_fpimm(imm)
427 imm = match(imm, "^#(.*)$")
428 if not imm then werror("expected immediate operand") end
429 local n = parse_number(imm)
430 if n then
431 local m, e = math.frexp(n)
432 local s, e2 = 0, band(e-2, 7)
433 if m < 0 then m = -m; s = 0x00100000 end
434 m = m*32-16
435 if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then
436 return s + shl(e2, 17) + shl(m, 13)
437 end
438 werror("out of range immediate `"..imm.."'")
439 else
440 werror("NYI fpimm action")
441 end
442end
443
444local function parse_shift(expr)
445 local s, s2 = match(expr, "^(%S+)%s*(.*)$")
446 s = map_shift[s]
447 if not s then werror("expected shift operand") end
448 return parse_imm(s2, 6, 10, 0, false) + shl(s, 22)
449end
450
451local function parse_lslx16(expr)
452 local n = match(expr, "^lsl%s*#(%d+)$")
453 n = tonumber(n)
454 if not n then werror("expected shift operand") end
455 if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then
456 werror("bad shift amount")
457 end
458 return shl(n, 17)
459end
460
461local function parse_extend(expr)
462 local s, s2 = match(expr, "^(%S+)%s*(.*)$")
463 if s == "lsl" then
464 s = parse_reg_type == "x" and 3 or 2
465 else
466 s = map_extend[s]
467 end
468 if not s then werror("expected extend operand") end
469 return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13)
470end
471
472local function parse_cond(expr, inv)
473 local c = map_cond[expr]
474 if not c then werror("expected condition operand") end
475 return shl(bit.bxor(c, inv), 12)
476end
477
478local function parse_load(params, nparams, n, op)
479 if params[n+2] then werror("too many operands") end
480 local scale = shr(op, 30)
481 local pn, p2 = params[n], params[n+1]
482 local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
483 if not p1 then
484 if not p2 then
485 local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
486 if reg and tailr ~= "" then
487 local base, tp = parse_reg_base(reg)
488 if tp then
489 waction("IMML", scale, format(tp.ctypefmt, tailr))
490 return op + base
491 end
492 end
493 end
494 werror("expected address operand")
495 end
496 if p2 then
497 if wb == "!" then werror("bad use of '!'") end
498 op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
499 elseif wb == "!" then
500 local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
501 if not p1a then werror("bad use of '!'") end
502 op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00
503 else
504 local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$")
505 op = op + parse_reg_base(p1a)
506 if p2a ~= "" then
507 local imm = match(p2a, "^,%s*#(.*)$")
508 if imm then
509 op = op + parse_imm_load(imm, scale)
510 else
511 local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
512 op = op + parse_reg(p2b, 16) + 0x00200800
513 if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
514 werror("bad index register type")
515 end
516 if p3b == "" then
517 if parse_reg_type ~= "x" then werror("bad index register type") end
518 op = op + 0x6000
519 else
520 if p3s == "" or p3s == "#0" then
521 elseif p3s == "#"..scale then
522 op = op + 0x1000
523 else
524 werror("bad scale")
525 end
526 if parse_reg_type == "x" then
527 if p3b == "lsl" and p3s ~= "" then op = op + 0x6000
528 elseif p3b == "sxtx" then op = op + 0xe000
529 else
530 werror("bad extend/shift specifier")
531 end
532 else
533 if p3b == "uxtw" then op = op + 0x4000
534 elseif p3b == "sxtw" then op = op + 0xc000
535 else
536 werror("bad extend/shift specifier")
537 end
538 end
539 end
540 end
541 else
542 if wb == "!" then werror("bad use of '!'") end
543 op = op + 0x01000000
544 end
545 end
546 return op
547end
548
549local function parse_load_pair(params, nparams, n, op)
550 if params[n+2] then werror("too many operands") end
551 local pn, p2 = params[n], params[n+1]
552 local scale = 2 + shr(op, 31 - band(shr(op, 26), 1))
553 local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
554 if not p1 then
555 if not p2 then
556 local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
557 if reg and tailr ~= "" then
558 local base, tp = parse_reg_base(reg)
559 if tp then
560 waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr))
561 return op + base + 0x01000000
562 end
563 end
564 end
565 werror("expected address operand")
566 end
567 if p2 then
568 if wb == "!" then werror("bad use of '!'") end
569 op = op + 0x00800000
570 else
571 local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
572 if p1a then p1, p2 = p1a, p2a else p2 = "#0" end
573 op = op + (wb == "!" and 0x01800000 or 0x01000000)
574 end
575 return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true)
576end
577
578local function parse_label(label, def)
579 local prefix = label:sub(1, 2)
580 -- =>label (pc label reference)
581 if prefix == "=>" then
582 return "PC", 0, label:sub(3)
583 end
584 -- ->name (global label reference)
585 if prefix == "->" then
586 return "LG", map_global[label:sub(3)]
587 end
588 if def then
589 -- [1-9] (local label definition)
590 if match(label, "^[1-9]$") then
591 return "LG", 10+tonumber(label)
592 end
593 else
594 -- [<>][1-9] (local label reference)
595 local dir, lnum = match(label, "^([<>])([1-9])$")
596 if dir then -- Fwd: 1-9, Bkwd: 11-19.
597 return "LG", lnum + (dir == ">" and 0 or 10)
598 end
599 -- extern label (extern label reference)
600 local extname = match(label, "^extern%s+(%S+)$")
601 if extname then
602 return "EXT", map_extern[extname]
603 end
604 -- &expr (pointer)
605 if label:sub(1, 1) == "&" then
606 return "A", 0, format("(ptrdiff_t)(%s)", label:sub(2))
607 end
608 end
609end
610
611local function branch_type(op)
612 if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL
613 elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or
614 band(op, 0x3b000000) == 0x18000000 then
615 return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal
616 elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ
617 elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR
618 elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP
619 else
620 assert(false, "unknown branch type")
621 end
622end
623
624------------------------------------------------------------------------------
625
626local map_op, op_template
627
628local function op_alias(opname, f)
629 return function(params, nparams)
630 if not params then return "-> "..opname:sub(1, -3) end
631 f(params, nparams)
632 op_template(params, map_op[opname], nparams)
633 end
634end
635
636local function alias_bfx(p)
637 p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1"
638end
639
640local function alias_bfiz(p)
641 parse_reg(p[1], 0, true)
642 if parse_reg_type == "w" then
643 p[3] = "#(32-("..p[3]:sub(2).."))%32"
644 p[4] = "#("..p[4]:sub(2)..")-1"
645 else
646 p[3] = "#(64-("..p[3]:sub(2).."))%64"
647 p[4] = "#("..p[4]:sub(2)..")-1"
648 end
649end
650
651local alias_lslimm = op_alias("ubfm_4", function(p)
652 parse_reg(p[1], 0, true)
653 local sh = p[3]:sub(2)
654 if parse_reg_type == "w" then
655 p[3] = "#(32-("..sh.."))%32"
656 p[4] = "#31-("..sh..")"
657 else
658 p[3] = "#(64-("..sh.."))%64"
659 p[4] = "#63-("..sh..")"
660 end
661end)
662
663-- Template strings for ARM instructions.
664map_op = {
665 -- Basic data processing instructions.
666 add_3 = "0b000000DNMg|11000000pDpNIg|8b206000pDpNMx",
667 add_4 = "0b000000DNMSg|0b200000DNMXg|8b200000pDpNMXx|8b200000pDpNxMwX",
668 adds_3 = "2b000000DNMg|31000000DpNIg|ab206000DpNMx",
669 adds_4 = "2b000000DNMSg|2b200000DNMXg|ab200000DpNMXx|ab200000DpNxMwX",
670 cmn_2 = "2b00001fNMg|3100001fpNIg|ab20601fpNMx",
671 cmn_3 = "2b00001fNMSg|2b20001fNMXg|ab20001fpNMXx|ab20001fpNxMwX",
672
673 sub_3 = "4b000000DNMg|51000000pDpNIg|cb206000pDpNMx",
674 sub_4 = "4b000000DNMSg|4b200000DNMXg|cb200000pDpNMXx|cb200000pDpNxMwX",
675 subs_3 = "6b000000DNMg|71000000DpNIg|eb206000DpNMx",
676 subs_4 = "6b000000DNMSg|6b200000DNMXg|eb200000DpNMXx|eb200000DpNxMwX",
677 cmp_2 = "6b00001fNMg|7100001fpNIg|eb20601fpNMx",
678 cmp_3 = "6b00001fNMSg|6b20001fNMXg|eb20001fpNMXx|eb20001fpNxMwX",
679
680 neg_2 = "4b0003e0DMg",
681 neg_3 = "4b0003e0DMSg",
682 negs_2 = "6b0003e0DMg",
683 negs_3 = "6b0003e0DMSg",
684
685 adc_3 = "1a000000DNMg",
686 adcs_3 = "3a000000DNMg",
687 sbc_3 = "5a000000DNMg",
688 sbcs_3 = "7a000000DNMg",
689 ngc_2 = "5a0003e0DMg",
690 ngcs_2 = "7a0003e0DMg",
691
692 and_3 = "0a000000DNMg|12000000pDNig",
693 and_4 = "0a000000DNMSg",
694 orr_3 = "2a000000DNMg|32000000pDNig",
695 orr_4 = "2a000000DNMSg",
696 eor_3 = "4a000000DNMg|52000000pDNig",
697 eor_4 = "4a000000DNMSg",
698 ands_3 = "6a000000DNMg|72000000DNig",
699 ands_4 = "6a000000DNMSg",
700 tst_2 = "6a00001fNMg|7200001fNig",
701 tst_3 = "6a00001fNMSg",
702
703 bic_3 = "0a200000DNMg",
704 bic_4 = "0a200000DNMSg",
705 orn_3 = "2a200000DNMg",
706 orn_4 = "2a200000DNMSg",
707 eon_3 = "4a200000DNMg",
708 eon_4 = "4a200000DNMSg",
709 bics_3 = "6a200000DNMg",
710 bics_4 = "6a200000DNMSg",
711
712 movn_2 = "12800000DWg",
713 movn_3 = "12800000DWRg",
714 movz_2 = "52800000DWg",
715 movz_3 = "52800000DWRg",
716 movk_2 = "72800000DWg",
717 movk_3 = "72800000DWRg",
718
719 -- TODO: this doesn't cover all valid immediates for mov reg, #imm.
720 mov_2 = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg",
721 mov_3 = "2a0003e0DMSg",
722 mvn_2 = "2a2003e0DMg",
723 mvn_3 = "2a2003e0DMSg",
724
725 adr_2 = "10000000DBx",
726 adrp_2 = "90000000DBx",
727
728 csel_4 = "1a800000DNMCg",
729 csinc_4 = "1a800400DNMCg",
730 csinv_4 = "5a800000DNMCg",
731 csneg_4 = "5a800400DNMCg",
732 cset_2 = "1a9f07e0Dcg",
733 csetm_2 = "5a9f03e0Dcg",
734 cinc_3 = "1a800400DNmcg",
735 cinv_3 = "5a800000DNmcg",
736 cneg_3 = "5a800400DNmcg",
737
738 ccmn_4 = "3a400000NMVCg|3a400800N5VCg",
739 ccmp_4 = "7a400000NMVCg|7a400800N5VCg",
740
741 madd_4 = "1b000000DNMAg",
742 msub_4 = "1b008000DNMAg",
743 mul_3 = "1b007c00DNMg",
744 mneg_3 = "1b00fc00DNMg",
745
746 smaddl_4 = "9b200000DxNMwAx",
747 smsubl_4 = "9b208000DxNMwAx",
748 smull_3 = "9b207c00DxNMw",
749 smnegl_3 = "9b20fc00DxNMw",
750 smulh_3 = "9b407c00DNMx",
751 umaddl_4 = "9ba00000DxNMwAx",
752 umsubl_4 = "9ba08000DxNMwAx",
753 umull_3 = "9ba07c00DxNMw",
754 umnegl_3 = "9ba0fc00DxNMw",
755 umulh_3 = "9bc07c00DNMx",
756
757 udiv_3 = "1ac00800DNMg",
758 sdiv_3 = "1ac00c00DNMg",
759
760 -- Bit operations.
761 sbfm_4 = "13000000DN12w|93400000DN12x",
762 bfm_4 = "33000000DN12w|b3400000DN12x",
763 ubfm_4 = "53000000DN12w|d3400000DN12x",
764 extr_4 = "13800000DNM2w|93c00000DNM2x",
765
766 sxtb_2 = "13001c00DNw|93401c00DNx",
767 sxth_2 = "13003c00DNw|93403c00DNx",
768 sxtw_2 = "93407c00DxNw",
769 uxtb_2 = "53001c00DNw",
770 uxth_2 = "53003c00DNw",
771
772 sbfx_4 = op_alias("sbfm_4", alias_bfx),
773 bfxil_4 = op_alias("bfm_4", alias_bfx),
774 ubfx_4 = op_alias("ubfm_4", alias_bfx),
775 sbfiz_4 = op_alias("sbfm_4", alias_bfiz),
776 bfi_4 = op_alias("bfm_4", alias_bfiz),
777 ubfiz_4 = op_alias("ubfm_4", alias_bfiz),
778
779 lsl_3 = function(params, nparams)
780 if params and params[3]:byte() == 35 then
781 return alias_lslimm(params, nparams)
782 else
783 return op_template(params, "1ac02000DNMg", nparams)
784 end
785 end,
786 lsr_3 = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x",
787 asr_3 = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x",
788 ror_3 = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x",
789
790 clz_2 = "5ac01000DNg",
791 cls_2 = "5ac01400DNg",
792 rbit_2 = "5ac00000DNg",
793 rev_2 = "5ac00800DNw|dac00c00DNx",
794 rev16_2 = "5ac00400DNg",
795 rev32_2 = "dac00800DNx",
796
797 -- Loads and stores.
798 ["strb_*"] = "38000000DwL",
799 ["ldrb_*"] = "38400000DwL",
800 ["ldrsb_*"] = "38c00000DwL|38800000DxL",
801 ["strh_*"] = "78000000DwL",
802 ["ldrh_*"] = "78400000DwL",
803 ["ldrsh_*"] = "78c00000DwL|78800000DxL",
804 ["str_*"] = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL",
805 ["ldr_*"] = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL",
806 ["ldrsw_*"] = "98000000DxB|b8800000DxL",
807 -- NOTE: ldur etc. are handled by ldr et al.
808
809 ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP|ac000000DAqP",
810 ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP|ac400000DAqP",
811 ["ldpsw_*"] = "68400000DAxP",
812
813 -- Branches.
814 b_1 = "14000000B",
815 bl_1 = "94000000B",
816 blr_1 = "d63f0000Nx",
817 br_1 = "d61f0000Nx",
818 ret_0 = "d65f03c0",
819 ret_1 = "d65f0000Nx",
820 -- b.cond is added below.
821 cbz_2 = "34000000DBg",
822 cbnz_2 = "35000000DBg",
823 tbz_3 = "36000000DTBw|36000000DTBx",
824 tbnz_3 = "37000000DTBw|37000000DTBx",
825
826 -- ARM64e: Pointer authentication codes (PAC).
827 blraaz_1 = "d63f081fNx",
828 braa_2 = "d71f0800NDx",
829 braaz_1 = "d61f081fNx",
830 pacibsp_0 = "d503237f",
831 retab_0 = "d65f0fff",
832
833 -- Miscellaneous instructions.
834 -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
835 -- TODO: sys, sysl, ic, dc, at, tlbi
836 -- TODO: hint, yield, wfe, wfi, sev, sevl
837 -- TODO: clrex, dsb, dmb, isb
838 nop_0 = "d503201f",
839 brk_0 = "d4200000",
840 brk_1 = "d4200000W",
841
842 -- Floating point instructions.
843 fmov_2 = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf",
844 fabs_2 = "1e20c000DNf",
845 fneg_2 = "1e214000DNf",
846 fsqrt_2 = "1e21c000DNf",
847
848 fcvt_2 = "1e22c000DdNs|1e624000DsNd",
849
850 -- TODO: half-precision and fixed-point conversions.
851 fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd",
852 fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd",
853 fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd",
854 fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd",
855 fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd",
856 fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd",
857 fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd",
858 fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd",
859 fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd",
860 fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd",
861
862 scvtf_2 = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx",
863 ucvtf_2 = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx",
864
865 frintn_2 = "1e244000DNf",
866 frintp_2 = "1e24c000DNf",
867 frintm_2 = "1e254000DNf",
868 frintz_2 = "1e25c000DNf",
869 frinta_2 = "1e264000DNf",
870 frintx_2 = "1e274000DNf",
871 frinti_2 = "1e27c000DNf",
872
873 fadd_3 = "1e202800DNMf",
874 fsub_3 = "1e203800DNMf",
875 fmul_3 = "1e200800DNMf",
876 fnmul_3 = "1e208800DNMf",
877 fdiv_3 = "1e201800DNMf",
878
879 fmadd_4 = "1f000000DNMAf",
880 fmsub_4 = "1f008000DNMAf",
881 fnmadd_4 = "1f200000DNMAf",
882 fnmsub_4 = "1f208000DNMAf",
883
884 fmax_3 = "1e204800DNMf",
885 fmaxnm_3 = "1e206800DNMf",
886 fmin_3 = "1e205800DNMf",
887 fminnm_3 = "1e207800DNMf",
888
889 fcmp_2 = "1e202000NMf|1e202008NZf",
890 fcmpe_2 = "1e202010NMf|1e202018NZf",
891
892 fccmp_4 = "1e200400NMVCf",
893 fccmpe_4 = "1e200410NMVCf",
894
895 fcsel_4 = "1e200c00DNMCf",
896
897 -- TODO: crc32*, aes*, sha*, pmull
898 -- TODO: SIMD instructions.
899}
900
901for cond,c in pairs(map_cond) do
902 map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
903end
904
905------------------------------------------------------------------------------
906
907-- Handle opcodes defined with template strings.
908local function parse_template(params, template, nparams, pos)
909 local op = tonumber(template:sub(1, 8), 16)
910 local n = 1
911 local rtt = {}
912
913 parse_reg_type = false
914
915 -- Process each character.
916 for p in gmatch(template:sub(9), ".") do
917 local q = params[n]
918 if p == "D" then
919 op = op + parse_reg(q, 0); n = n + 1
920 elseif p == "N" then
921 op = op + parse_reg(q, 5); n = n + 1
922 elseif p == "M" then
923 op = op + parse_reg(q, 16); n = n + 1
924 elseif p == "A" then
925 op = op + parse_reg(q, 10); n = n + 1
926 elseif p == "m" then
927 op = op + parse_reg(params[n-1], 16)
928
929 elseif p == "p" then
930 if q == "sp" then params[n] = "@x31" end
931 elseif p == "g" then
932 if parse_reg_type == "x" then
933 op = op + 0x80000000
934 elseif parse_reg_type ~= "w" then
935 werror("bad register type")
936 end
937 parse_reg_type = false
938 elseif p == "f" then
939 if parse_reg_type == "d" then
940 op = op + 0x00400000
941 elseif parse_reg_type ~= "s" then
942 werror("bad register type")
943 end
944 parse_reg_type = false
945 elseif p == "x" or p == "w" or p == "d" or p == "s" or p == "q" then
946 if parse_reg_type ~= p then
947 werror("register size mismatch")
948 end
949 parse_reg_type = false
950
951 elseif p == "L" then
952 op = parse_load(params, nparams, n, op)
953 elseif p == "P" then
954 op = parse_load_pair(params, nparams, n, op)
955
956 elseif p == "B" then
957 local mode, v, s = parse_label(q, false); n = n + 1
958 if not mode then werror("bad label `"..q.."'") end
959 local m = branch_type(op)
960 if mode == "A" then
961 waction("REL_"..mode, v+m, format("(unsigned int)(%s)", s))
962 actargs[#actargs+1] = format("(unsigned int)((%s)>>32)", s)
963 else
964 waction("REL_"..mode, v+m, s, 1)
965 end
966
967 elseif p == "I" then
968 op = op + parse_imm12(q); n = n + 1
969 elseif p == "i" then
970 op = op + parse_imm13(q); n = n + 1
971 elseif p == "W" then
972 op = op + parse_imm(q, 16, 5, 0, false); n = n + 1
973 elseif p == "T" then
974 op = op + parse_imm6(q); n = n + 1
975 elseif p == "1" then
976 op = op + parse_imm(q, 6, 16, 0, false); n = n + 1
977 elseif p == "2" then
978 op = op + parse_imm(q, 6, 10, 0, false); n = n + 1
979 elseif p == "5" then
980 op = op + parse_imm(q, 5, 16, 0, false); n = n + 1
981 elseif p == "V" then
982 op = op + parse_imm(q, 4, 0, 0, false); n = n + 1
983 elseif p == "F" then
984 op = op + parse_fpimm(q); n = n + 1
985 elseif p == "Z" then
986 if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end
987 n = n + 1
988
989 elseif p == "S" then
990 op = op + parse_shift(q); n = n + 1
991 elseif p == "X" then
992 op = op + parse_extend(q); n = n + 1
993 elseif p == "R" then
994 op = op + parse_lslx16(q); n = n + 1
995 elseif p == "C" then
996 op = op + parse_cond(q, 0); n = n + 1
997 elseif p == "c" then
998 op = op + parse_cond(q, 1); n = n + 1
999
1000 else
1001 assert(false)
1002 end
1003 end
1004 wputpos(pos, op)
1005end
1006
1007function op_template(params, template, nparams)
1008 if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
1009
1010 -- Limit number of section buffer positions used by a single dasm_put().
1011 -- A single opcode needs a maximum of 4 positions.
1012 if secpos+4 > maxsecpos then wflush() end
1013 local pos = wpos()
1014 local lpos, apos, spos = #actlist, #actargs, secpos
1015
1016 local ok, err
1017 for t in gmatch(template, "[^|]+") do
1018 ok, err = pcall(parse_template, params, t, nparams, pos)
1019 if ok then return end
1020 secpos = spos
1021 actlist[lpos+1] = nil
1022 actlist[lpos+2] = nil
1023 actlist[lpos+3] = nil
1024 actlist[lpos+4] = nil
1025 actargs[apos+1] = nil
1026 actargs[apos+2] = nil
1027 actargs[apos+3] = nil
1028 actargs[apos+4] = nil
1029 end
1030 error(err, 0)
1031end
1032
1033map_op[".template__"] = op_template
1034
1035------------------------------------------------------------------------------
1036
1037-- Pseudo-opcode to mark the position where the action list is to be emitted.
1038map_op[".actionlist_1"] = function(params)
1039 if not params then return "cvar" end
1040 local name = params[1] -- No syntax check. You get to keep the pieces.
1041 wline(function(out) writeactions(out, name) end)
1042end
1043
1044-- Pseudo-opcode to mark the position where the global enum is to be emitted.
1045map_op[".globals_1"] = function(params)
1046 if not params then return "prefix" end
1047 local prefix = params[1] -- No syntax check. You get to keep the pieces.
1048 wline(function(out) writeglobals(out, prefix) end)
1049end
1050
1051-- Pseudo-opcode to mark the position where the global names are to be emitted.
1052map_op[".globalnames_1"] = function(params)
1053 if not params then return "cvar" end
1054 local name = params[1] -- No syntax check. You get to keep the pieces.
1055 wline(function(out) writeglobalnames(out, name) end)
1056end
1057
1058-- Pseudo-opcode to mark the position where the extern names are to be emitted.
1059map_op[".externnames_1"] = function(params)
1060 if not params then return "cvar" end
1061 local name = params[1] -- No syntax check. You get to keep the pieces.
1062 wline(function(out) writeexternnames(out, name) end)
1063end
1064
1065------------------------------------------------------------------------------
1066
1067-- Label pseudo-opcode (converted from trailing colon form).
1068map_op[".label_1"] = function(params)
1069 if not params then return "[1-9] | ->global | =>pcexpr" end
1070 if secpos+1 > maxsecpos then wflush() end
1071 local mode, n, s = parse_label(params[1], true)
1072 if not mode or mode == "EXT" then werror("bad label definition") end
1073 waction("LABEL_"..mode, n, s, 1)
1074end
1075
1076------------------------------------------------------------------------------
1077
1078-- Pseudo-opcodes for data storage.
1079local function op_data(params)
1080 if not params then return "imm..." end
1081 local sz = params.op == ".long" and 4 or 8
1082 for _,p in ipairs(params) do
1083 local imm = parse_number(p)
1084 if imm then
1085 local n = tobit(imm)
1086 if n == imm or (n < 0 and n + 2^32 == imm) then
1087 wputw(n < 0 and n + 2^32 or n)
1088 if sz == 8 then
1089 wputw(imm < 0 and 0xffffffff or 0)
1090 end
1091 elseif sz == 4 then
1092 werror("bad immediate `"..p.."'")
1093 else
1094 imm = nil
1095 end
1096 end
1097 if not imm then
1098 local mode, v, s = parse_label(p, false)
1099 if sz == 4 then
1100 if mode then werror("label does not fit into .long") end
1101 waction("IMMV", 0, p)
1102 elseif mode and mode ~= "A" then
1103 waction("REL_"..mode, v+0x8000, s, 1)
1104 else
1105 if mode == "A" then p = s end
1106 waction("IMMV", 0, format("(unsigned int)(%s)", p))
1107 waction("IMMV", 0, format("(unsigned int)((unsigned long long)(%s)>>32)", p))
1108 end
1109 end
1110 if secpos+2 > maxsecpos then wflush() end
1111 end
1112end
1113map_op[".long_*"] = op_data
1114map_op[".quad_*"] = op_data
1115map_op[".addr_*"] = op_data
1116
1117-- Alignment pseudo-opcode.
1118map_op[".align_1"] = function(params)
1119 if not params then return "numpow2" end
1120 if secpos+1 > maxsecpos then wflush() end
1121 local align = tonumber(params[1])
1122 if align then
1123 local x = align
1124 -- Must be a power of 2 in the range (2 ... 256).
1125 for i=1,8 do
1126 x = x / 2
1127 if x == 1 then
1128 waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
1129 return
1130 end
1131 end
1132 end
1133 werror("bad alignment")
1134end
1135
1136------------------------------------------------------------------------------
1137
1138-- Pseudo-opcode for (primitive) type definitions (map to C types).
1139map_op[".type_3"] = function(params, nparams)
1140 if not params then
1141 return nparams == 2 and "name, ctype" or "name, ctype, reg"
1142 end
1143 local name, ctype, reg = params[1], params[2], params[3]
1144 if not match(name, "^[%a_][%w_]*$") then
1145 werror("bad type name `"..name.."'")
1146 end
1147 local tp = map_type[name]
1148 if tp then
1149 werror("duplicate type `"..name.."'")
1150 end
1151 -- Add #type to defines. A bit unclean to put it in map_archdef.
1152 map_archdef["#"..name] = "sizeof("..ctype..")"
1153 -- Add new type and emit shortcut define.
1154 local num = ctypenum + 1
1155 map_type[name] = {
1156 ctype = ctype,
1157 ctypefmt = format("Dt%X(%%s)", num),
1158 reg = reg,
1159 }
1160 wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
1161 ctypenum = num
1162end
1163map_op[".type_2"] = map_op[".type_3"]
1164
1165-- Dump type definitions.
1166local function dumptypes(out, lvl)
1167 local t = {}
1168 for name in pairs(map_type) do t[#t+1] = name end
1169 sort(t)
1170 out:write("Type definitions:\n")
1171 for _,name in ipairs(t) do
1172 local tp = map_type[name]
1173 local reg = tp.reg or ""
1174 out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
1175 end
1176 out:write("\n")
1177end
1178
1179------------------------------------------------------------------------------
1180
1181-- Set the current section.
1182function _M.section(num)
1183 waction("SECTION", num)
1184 wflush(true) -- SECTION is a terminal action.
1185end
1186
1187------------------------------------------------------------------------------
1188
1189-- Dump architecture description.
1190function _M.dumparch(out)
1191 out:write(format("DynASM %s version %s, released %s\n\n",
1192 _info.arch, _info.version, _info.release))
1193 dumpactions(out)
1194end
1195
1196-- Dump all user defined elements.
1197function _M.dumpdef(out, lvl)
1198 dumptypes(out, lvl)
1199 dumpglobals(out, lvl)
1200 dumpexterns(out, lvl)
1201end
1202
1203------------------------------------------------------------------------------
1204
1205-- Pass callbacks from/to the DynASM core.
1206function _M.passcb(wl, we, wf, ww)
1207 wline, werror, wfatal, wwarn = wl, we, wf, ww
1208 return wflush
1209end
1210
1211-- Setup the arch-specific module.
1212function _M.setup(arch, opt)
1213 g_arch, g_opt = arch, opt
1214end
1215
1216-- Merge the core maps and the arch-specific maps.
1217function _M.mergemaps(map_coreop, map_def)
1218 setmetatable(map_op, { __index = map_coreop })
1219 setmetatable(map_def, { __index = map_archdef })
1220 return map_op, map_def
1221end
1222
1223return _M
1224
1225------------------------------------------------------------------------------
1226
diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h
index 7f3d6c35..7800e933 100644
--- a/dynasm/dasm_mips.h
+++ b/dynasm/dasm_mips.h
@@ -21,7 +21,7 @@ enum {
21 /* The following actions need a buffer position. */ 21 /* The following actions need a buffer position. */
22 DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, 22 DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
23 /* The following actions also have an argument. */ 23 /* The following actions also have an argument. */
24 DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, 24 DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS,
25 DASM__MAX 25 DASM__MAX
26}; 26};
27 27
@@ -69,7 +69,7 @@ struct dasm_State {
69 size_t lgsize; 69 size_t lgsize;
70 int *pclabels; /* PC label chains/pos ptrs. */ 70 int *pclabels; /* PC label chains/pos ptrs. */
71 size_t pcsize; 71 size_t pcsize;
72 void **globals; /* Array of globals (bias -10). */ 72 void **globals; /* Array of globals. */
73 dasm_Section *section; /* Pointer to active section. */ 73 dasm_Section *section; /* Pointer to active section. */
74 size_t codesize; /* Total size of all code sections. */ 74 size_t codesize; /* Total size of all code sections. */
75 int maxsection; /* 0 <= sectionidx < maxsection. */ 75 int maxsection; /* 0 <= sectionidx < maxsection. */
@@ -86,7 +86,6 @@ void dasm_init(Dst_DECL, int maxsection)
86{ 86{
87 dasm_State *D; 87 dasm_State *D;
88 size_t psz = 0; 88 size_t psz = 0;
89 int i;
90 Dst_REF = NULL; 89 Dst_REF = NULL;
91 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); 90 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
92 D = Dst_REF; 91 D = Dst_REF;
@@ -97,12 +96,7 @@ void dasm_init(Dst_DECL, int maxsection)
97 D->pcsize = 0; 96 D->pcsize = 0;
98 D->globals = NULL; 97 D->globals = NULL;
99 D->maxsection = maxsection; 98 D->maxsection = maxsection;
100 for (i = 0; i < maxsection; i++) { 99 memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
101 D->sections[i].buf = NULL; /* Need this for pass3. */
102 D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
103 D->sections[i].bsize = 0;
104 D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
105 }
106} 100}
107 101
108/* Free DynASM state. */ 102/* Free DynASM state. */
@@ -122,7 +116,7 @@ void dasm_free(Dst_DECL)
122void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) 116void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
123{ 117{
124 dasm_State *D = Dst_REF; 118 dasm_State *D = Dst_REF;
125 D->globals = gl - 10; /* Negative bias to compensate for locals. */ 119 D->globals = gl;
126 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); 120 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
127} 121}
128 122
@@ -147,6 +141,7 @@ void dasm_setup(Dst_DECL, const void *actionlist)
147 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); 141 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
148 for (i = 0; i < D->maxsection; i++) { 142 for (i = 0; i < D->maxsection; i++) {
149 D->sections[i].pos = DASM_SEC2POS(i); 143 D->sections[i].pos = DASM_SEC2POS(i);
144 D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
150 D->sections[i].ofs = 0; 145 D->sections[i].ofs = 0;
151 } 146 }
152} 147}
@@ -155,10 +150,10 @@ void dasm_setup(Dst_DECL, const void *actionlist)
155#ifdef DASM_CHECKS 150#ifdef DASM_CHECKS
156#define CK(x, st) \ 151#define CK(x, st) \
157 do { if (!(x)) { \ 152 do { if (!(x)) { \
158 D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) 153 D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0)
159#define CKPL(kind, st) \ 154#define CKPL(kind, st) \
160 do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ 155 do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
161 D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) 156 D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0)
162#else 157#else
163#define CK(x, st) ((void)0) 158#define CK(x, st) ((void)0)
164#define CKPL(kind, st) ((void)0) 159#define CKPL(kind, st) ((void)0)
@@ -231,7 +226,7 @@ void dasm_put(Dst_DECL, int start, ...)
231 *pl = -pos; /* Label exists now. */ 226 *pl = -pos; /* Label exists now. */
232 b[pos++] = ofs; /* Store pass1 offset estimate. */ 227 b[pos++] = ofs; /* Store pass1 offset estimate. */
233 break; 228 break;
234 case DASM_IMM: 229 case DASM_IMM: case DASM_IMMS:
235#ifdef DASM_CHECKS 230#ifdef DASM_CHECKS
236 CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); 231 CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
237#endif 232#endif
@@ -273,7 +268,7 @@ int dasm_link(Dst_DECL, size_t *szp)
273 268
274 { /* Handle globals not defined in this translation unit. */ 269 { /* Handle globals not defined in this translation unit. */
275 int idx; 270 int idx;
276 for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { 271 for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
277 int n = D->lglabels[idx]; 272 int n = D->lglabels[idx];
278 /* Undefined label: Collapse rel chain and replace with marker (< 0). */ 273 /* Undefined label: Collapse rel chain and replace with marker (< 0). */
279 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } 274 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
@@ -299,7 +294,7 @@ int dasm_link(Dst_DECL, size_t *szp)
299 case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; 294 case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
300 case DASM_REL_LG: case DASM_REL_PC: pos++; break; 295 case DASM_REL_LG: case DASM_REL_PC: pos++; break;
301 case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; 296 case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
302 case DASM_IMM: pos++; break; 297 case DASM_IMM: case DASM_IMMS: pos++; break;
303 } 298 }
304 } 299 }
305 stop: (void)0; 300 stop: (void)0;
@@ -314,7 +309,7 @@ int dasm_link(Dst_DECL, size_t *szp)
314 309
315#ifdef DASM_CHECKS 310#ifdef DASM_CHECKS
316#define CK(x, st) \ 311#define CK(x, st) \
317 do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) 312 do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0)
318#else 313#else
319#define CK(x, st) ((void)0) 314#define CK(x, st) ((void)0)
320#endif 315#endif
@@ -349,25 +344,32 @@ int dasm_encode(Dst_DECL, void *buffer)
349 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; 344 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
350 break; 345 break;
351 case DASM_REL_LG: 346 case DASM_REL_LG:
352 CK(n >= 0, UNDEF_LG); 347 if (n < 0) {
348 n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp);
349 goto patchrel;
350 }
353 /* fallthrough */ 351 /* fallthrough */
354 case DASM_REL_PC: 352 case DASM_REL_PC:
355 CK(n >= 0, UNDEF_PC); 353 CK(n >= 0, UNDEF_PC);
356 n = *DASM_POS2PTR(D, n); 354 n = *DASM_POS2PTR(D, n);
357 if (ins & 2048) 355 if (ins & 2048)
358 n = n - (int)((char *)cp - base); 356 n = (n + (int)(size_t)base) & 0x0fffffff;
359 else 357 else
360 n = (n + (int)base) & 0x0fffffff; 358 n = n - (int)((char *)cp - base);
361 patchrel: 359 patchrel: {
360 unsigned int e = 16 + ((ins >> 12) & 15);
362 CK((n & 3) == 0 && 361 CK((n & 3) == 0 &&
363 ((n + ((ins & 2048) ? 0x00020000 : 0)) >> 362 ((n + ((ins & 2048) ? 0 : (1<<(e+1)))) >> (e+2)) == 0, RANGE_REL);
364 ((ins & 2048) ? 18 : 28)) == 0, RANGE_REL); 363 cp[-1] |= ((n>>2) & ((1<<e)-1));
365 cp[-1] |= ((n>>2) & ((ins & 2048) ? 0x0000ffff: 0x03ffffff)); 364 }
366 break; 365 break;
367 case DASM_LABEL_LG: 366 case DASM_LABEL_LG:
368 ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); 367 ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
369 break; 368 break;
370 case DASM_LABEL_PC: break; 369 case DASM_LABEL_PC: break;
370 case DASM_IMMS:
371 cp[-1] |= ((n>>3) & 4); n &= 0x1f;
372 /* fallthrough */
371 case DASM_IMM: 373 case DASM_IMM:
372 cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); 374 cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
373 break; 375 break;
@@ -410,7 +412,7 @@ int dasm_checkstep(Dst_DECL, int secmatch)
410 } 412 }
411 if (D->status == DASM_S_OK && secmatch >= 0 && 413 if (D->status == DASM_S_OK && secmatch >= 0 &&
412 D->section != &D->sections[secmatch]) 414 D->section != &D->sections[secmatch])
413 D->status = DASM_S_MATCH_SEC|(D->section-D->sections); 415 D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections);
414 return D->status; 416 return D->status;
415} 417}
416#endif 418#endif
diff --git a/dynasm/dasm_mips.lua b/dynasm/dasm_mips.lua
index e2ff17f0..1c605b68 100644
--- a/dynasm/dasm_mips.lua
+++ b/dynasm/dasm_mips.lua
@@ -1,17 +1,20 @@
1------------------------------------------------------------------------------ 1------------------------------------------------------------------------------
2-- DynASM MIPS module. 2-- DynASM MIPS32/MIPS64 module.
3-- 3--
4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. 4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
5-- See dynasm.lua for full copyright notice. 5-- See dynasm.lua for full copyright notice.
6------------------------------------------------------------------------------ 6------------------------------------------------------------------------------
7 7
8local mips64 = mips64
9local mipsr6 = _map_def.MIPSR6
10
8-- Module information: 11-- Module information:
9local _info = { 12local _info = {
10 arch = "mips", 13 arch = mips64 and "mips64" or "mips",
11 description = "DynASM MIPS module", 14 description = "DynASM MIPS32/MIPS64 module",
12 version = "1.3.0", 15 version = "1.5.0",
13 vernum = 10300, 16 vernum = 10500,
14 release = "2012-01-23", 17 release = "2021-05-02",
15 author = "Mike Pall", 18 author = "Mike Pall",
16 license = "MIT", 19 license = "MIT",
17} 20}
@@ -27,7 +30,8 @@ local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
27local match, gmatch = _s.match, _s.gmatch 30local match, gmatch = _s.match, _s.gmatch
28local concat, sort = table.concat, table.sort 31local concat, sort = table.concat, table.sort
29local bit = bit or require("bit") 32local bit = bit or require("bit")
30local band, shl, sar, tohex = bit.band, bit.lshift, bit.arshift, bit.tohex 33local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
34local tohex = bit.tohex
31 35
32-- Inherited tables and callbacks. 36-- Inherited tables and callbacks.
33local g_opt, g_arch 37local g_opt, g_arch
@@ -38,7 +42,7 @@ local wline, werror, wfatal, wwarn
38local action_names = { 42local action_names = {
39 "STOP", "SECTION", "ESC", "REL_EXT", 43 "STOP", "SECTION", "ESC", "REL_EXT",
40 "ALIGN", "REL_LG", "LABEL_LG", 44 "ALIGN", "REL_LG", "LABEL_LG",
41 "REL_PC", "LABEL_PC", "IMM", 45 "REL_PC", "LABEL_PC", "IMM", "IMMS",
42} 46}
43 47
44-- Maximum number of section buffer positions for dasm_put(). 48-- Maximum number of section buffer positions for dasm_put().
@@ -235,7 +239,6 @@ local map_op = {
235 bne_3 = "14000000STB", 239 bne_3 = "14000000STB",
236 blez_2 = "18000000SB", 240 blez_2 = "18000000SB",
237 bgtz_2 = "1c000000SB", 241 bgtz_2 = "1c000000SB",
238 addi_3 = "20000000TSI",
239 li_2 = "24000000TI", 242 li_2 = "24000000TI",
240 addiu_3 = "24000000TSI", 243 addiu_3 = "24000000TSI",
241 slti_3 = "28000000TSI", 244 slti_3 = "28000000TSI",
@@ -245,70 +248,52 @@ local map_op = {
245 ori_3 = "34000000TSU", 248 ori_3 = "34000000TSU",
246 xori_3 = "38000000TSU", 249 xori_3 = "38000000TSU",
247 lui_2 = "3c000000TU", 250 lui_2 = "3c000000TU",
248 beqzl_2 = "50000000SB", 251 daddiu_3 = mips64 and "64000000TSI",
249 beql_3 = "50000000STB", 252 ldl_2 = mips64 and "68000000TO",
250 bnezl_2 = "54000000SB", 253 ldr_2 = mips64 and "6c000000TO",
251 bnel_3 = "54000000STB",
252 blezl_2 = "58000000SB",
253 bgtzl_2 = "5c000000SB",
254 lb_2 = "80000000TO", 254 lb_2 = "80000000TO",
255 lh_2 = "84000000TO", 255 lh_2 = "84000000TO",
256 lwl_2 = "88000000TO",
257 lw_2 = "8c000000TO", 256 lw_2 = "8c000000TO",
258 lbu_2 = "90000000TO", 257 lbu_2 = "90000000TO",
259 lhu_2 = "94000000TO", 258 lhu_2 = "94000000TO",
260 lwr_2 = "98000000TO", 259 lwu_2 = mips64 and "9c000000TO",
261 sb_2 = "a0000000TO", 260 sb_2 = "a0000000TO",
262 sh_2 = "a4000000TO", 261 sh_2 = "a4000000TO",
263 swl_2 = "a8000000TO",
264 sw_2 = "ac000000TO", 262 sw_2 = "ac000000TO",
265 swr_2 = "b8000000TO",
266 cache_2 = "bc000000NO",
267 ll_2 = "c0000000TO",
268 lwc1_2 = "c4000000HO", 263 lwc1_2 = "c4000000HO",
269 pref_2 = "cc000000NO",
270 ldc1_2 = "d4000000HO", 264 ldc1_2 = "d4000000HO",
271 sc_2 = "e0000000TO", 265 ld_2 = mips64 and "dc000000TO",
272 swc1_2 = "e4000000HO", 266 swc1_2 = "e4000000HO",
273 sdc1_2 = "f4000000HO", 267 sdc1_2 = "f4000000HO",
268 sd_2 = mips64 and "fc000000TO",
274 269
275 -- Opcode SPECIAL. 270 -- Opcode SPECIAL.
276 nop_0 = "00000000", 271 nop_0 = "00000000",
277 sll_3 = "00000000DTA", 272 sll_3 = "00000000DTA",
278 movf_2 = "00000001DS", 273 sextw_2 = "00000000DT",
279 movf_3 = "00000001DSC",
280 movt_2 = "00010001DS",
281 movt_3 = "00010001DSC",
282 srl_3 = "00000002DTA", 274 srl_3 = "00000002DTA",
283 rotr_3 = "00200002DTA", 275 rotr_3 = "00200002DTA",
284 sra_3 = "00000003DTA", 276 sra_3 = "00000003DTA",
285 sllv_3 = "00000004DTS", 277 sllv_3 = "00000004DTS",
286 srlv_3 = "00000006DTS", 278 srlv_3 = "00000006DTS",
287 rotrv_3 = "00000046DTS", 279 rotrv_3 = "00000046DTS",
280 drotrv_3 = mips64 and "00000056DTS",
288 srav_3 = "00000007DTS", 281 srav_3 = "00000007DTS",
289 jr_1 = "00000008S",
290 jalr_1 = "0000f809S", 282 jalr_1 = "0000f809S",
291 jalr_2 = "00000009DS", 283 jalr_2 = "00000009DS",
292 movz_3 = "0000000aDST",
293 movn_3 = "0000000bDST",
294 syscall_0 = "0000000c", 284 syscall_0 = "0000000c",
295 syscall_1 = "0000000cY", 285 syscall_1 = "0000000cY",
296 break_0 = "0000000d", 286 break_0 = "0000000d",
297 break_1 = "0000000dY", 287 break_1 = "0000000dY",
298 sync_0 = "0000000f", 288 sync_0 = "0000000f",
299 mfhi_1 = "00000010D", 289 dsllv_3 = mips64 and "00000014DTS",
300 mthi_1 = "00000011S", 290 dsrlv_3 = mips64 and "00000016DTS",
301 mflo_1 = "00000012D", 291 dsrav_3 = mips64 and "00000017DTS",
302 mtlo_1 = "00000013S",
303 mult_2 = "00000018ST",
304 multu_2 = "00000019ST",
305 div_2 = "0000001aST",
306 divu_2 = "0000001bST",
307 add_3 = "00000020DST", 292 add_3 = "00000020DST",
308 move_2 = "00000021DS", 293 move_2 = mips64 and "00000025DS" or "00000021DS",
309 addu_3 = "00000021DST", 294 addu_3 = "00000021DST",
310 sub_3 = "00000022DST", 295 sub_3 = "00000022DST",
311 negu_2 = "00000023DT", 296 negu_2 = mips64 and "0000002fDT" or "00000023DT",
312 subu_3 = "00000023DST", 297 subu_3 = "00000023DST",
313 and_3 = "00000024DST", 298 and_3 = "00000024DST",
314 or_3 = "00000025DST", 299 or_3 = "00000025DST",
@@ -317,6 +302,10 @@ local map_op = {
317 nor_3 = "00000027DST", 302 nor_3 = "00000027DST",
318 slt_3 = "0000002aDST", 303 slt_3 = "0000002aDST",
319 sltu_3 = "0000002bDST", 304 sltu_3 = "0000002bDST",
305 dadd_3 = mips64 and "0000002cDST",
306 daddu_3 = mips64 and "0000002dDST",
307 dsub_3 = mips64 and "0000002eDST",
308 dsubu_3 = mips64 and "0000002fDST",
320 tge_2 = "00000030ST", 309 tge_2 = "00000030ST",
321 tge_3 = "00000030STZ", 310 tge_3 = "00000030STZ",
322 tgeu_2 = "00000031ST", 311 tgeu_2 = "00000031ST",
@@ -329,40 +318,36 @@ local map_op = {
329 teq_3 = "00000034STZ", 318 teq_3 = "00000034STZ",
330 tne_2 = "00000036ST", 319 tne_2 = "00000036ST",
331 tne_3 = "00000036STZ", 320 tne_3 = "00000036STZ",
321 dsll_3 = mips64 and "00000038DTa",
322 dsrl_3 = mips64 and "0000003aDTa",
323 drotr_3 = mips64 and "0020003aDTa",
324 dsra_3 = mips64 and "0000003bDTa",
325 dsll32_3 = mips64 and "0000003cDTA",
326 dsrl32_3 = mips64 and "0000003eDTA",
327 drotr32_3 = mips64 and "0020003eDTA",
328 dsra32_3 = mips64 and "0000003fDTA",
332 329
333 -- Opcode REGIMM. 330 -- Opcode REGIMM.
334 bltz_2 = "04000000SB", 331 bltz_2 = "04000000SB",
335 bgez_2 = "04010000SB", 332 bgez_2 = "04010000SB",
336 bltzl_2 = "04020000SB", 333 bltzl_2 = "04020000SB",
337 bgezl_2 = "04030000SB", 334 bgezl_2 = "04030000SB",
338 tgei_2 = "04080000SI",
339 tgeiu_2 = "04090000SI",
340 tlti_2 = "040a0000SI",
341 tltiu_2 = "040b0000SI",
342 teqi_2 = "040c0000SI",
343 tnei_2 = "040e0000SI",
344 bltzal_2 = "04100000SB",
345 bal_1 = "04110000B", 335 bal_1 = "04110000B",
346 bgezal_2 = "04110000SB",
347 bltzall_2 = "04120000SB",
348 bgezall_2 = "04130000SB",
349 synci_1 = "041f0000O", 336 synci_1 = "041f0000O",
350 337
351 -- Opcode SPECIAL2.
352 madd_2 = "70000000ST",
353 maddu_2 = "70000001ST",
354 mul_3 = "70000002DST",
355 msub_2 = "70000004ST",
356 msubu_2 = "70000005ST",
357 clz_2 = "70000020DS=",
358 clo_2 = "70000021DS=",
359 sdbbp_0 = "7000003f",
360 sdbbp_1 = "7000003fY",
361
362 -- Opcode SPECIAL3. 338 -- Opcode SPECIAL3.
363 ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1 339 ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1
340 dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32
341 dextu_4 = mips64 and "7c000002TSAM", -- Args: pos-32 | size-1
342 dext_4 = mips64 and "7c000003TSAM", -- Args: pos | size-1
343 zextw_2 = mips64 and "7c00f803TS",
364 ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1 344 ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1
345 dinsm_4 = mips64 and "7c000005TSAM", -- Args: pos | pos+size-33
346 dinsu_4 = mips64 and "7c000006TSAM", -- Args: pos-32 | pos+size-33
347 dins_4 = mips64 and "7c000007TSAM", -- Args: pos | pos+size-1
365 wsbh_2 = "7c0000a0DT", 348 wsbh_2 = "7c0000a0DT",
349 dsbh_2 = mips64 and "7c0000a4DT",
350 dshd_2 = mips64 and "7c000164DT",
366 seb_2 = "7c000420DT", 351 seb_2 = "7c000420DT",
367 seh_2 = "7c000620DT", 352 seh_2 = "7c000620DT",
368 rdhwr_2 = "7c00003bTD", 353 rdhwr_2 = "7c00003bTD",
@@ -370,8 +355,12 @@ local map_op = {
370 -- Opcode COP0. 355 -- Opcode COP0.
371 mfc0_2 = "40000000TD", 356 mfc0_2 = "40000000TD",
372 mfc0_3 = "40000000TDW", 357 mfc0_3 = "40000000TDW",
358 dmfc0_2 = mips64 and "40200000TD",
359 dmfc0_3 = mips64 and "40200000TDW",
373 mtc0_2 = "40800000TD", 360 mtc0_2 = "40800000TD",
374 mtc0_3 = "40800000TDW", 361 mtc0_3 = "40800000TDW",
362 dmtc0_2 = mips64 and "40a00000TD",
363 dmtc0_3 = mips64 and "40a00000TDW",
375 rdpgpr_2 = "41400000DT", 364 rdpgpr_2 = "41400000DT",
376 di_0 = "41606000", 365 di_0 = "41606000",
377 di_1 = "41606000T", 366 di_1 = "41606000T",
@@ -388,21 +377,14 @@ local map_op = {
388 377
389 -- Opcode COP1. 378 -- Opcode COP1.
390 mfc1_2 = "44000000TG", 379 mfc1_2 = "44000000TG",
380 dmfc1_2 = mips64 and "44200000TG",
391 cfc1_2 = "44400000TG", 381 cfc1_2 = "44400000TG",
392 mfhc1_2 = "44600000TG", 382 mfhc1_2 = "44600000TG",
393 mtc1_2 = "44800000TG", 383 mtc1_2 = "44800000TG",
384 dmtc1_2 = mips64 and "44a00000TG",
394 ctc1_2 = "44c00000TG", 385 ctc1_2 = "44c00000TG",
395 mthc1_2 = "44e00000TG", 386 mthc1_2 = "44e00000TG",
396 387
397 bc1f_1 = "45000000B",
398 bc1f_2 = "45000000CB",
399 bc1t_1 = "45010000B",
400 bc1t_2 = "45010000CB",
401 bc1fl_1 = "45020000B",
402 bc1fl_2 = "45020000CB",
403 bc1tl_1 = "45030000B",
404 bc1tl_2 = "45030000CB",
405
406 ["add.s_3"] = "46000000FGH", 388 ["add.s_3"] = "46000000FGH",
407 ["sub.s_3"] = "46000001FGH", 389 ["sub.s_3"] = "46000001FGH",
408 ["mul.s_3"] = "46000002FGH", 390 ["mul.s_3"] = "46000002FGH",
@@ -419,51 +401,11 @@ local map_op = {
419 ["trunc.w.s_2"] = "4600000dFG", 401 ["trunc.w.s_2"] = "4600000dFG",
420 ["ceil.w.s_2"] = "4600000eFG", 402 ["ceil.w.s_2"] = "4600000eFG",
421 ["floor.w.s_2"] = "4600000fFG", 403 ["floor.w.s_2"] = "4600000fFG",
422 ["movf.s_2"] = "46000011FG",
423 ["movf.s_3"] = "46000011FGC",
424 ["movt.s_2"] = "46010011FG",
425 ["movt.s_3"] = "46010011FGC",
426 ["movz.s_3"] = "46000012FGT",
427 ["movn.s_3"] = "46000013FGT",
428 ["recip.s_2"] = "46000015FG", 404 ["recip.s_2"] = "46000015FG",
429 ["rsqrt.s_2"] = "46000016FG", 405 ["rsqrt.s_2"] = "46000016FG",
430 ["cvt.d.s_2"] = "46000021FG", 406 ["cvt.d.s_2"] = "46000021FG",
431 ["cvt.w.s_2"] = "46000024FG", 407 ["cvt.w.s_2"] = "46000024FG",
432 ["cvt.l.s_2"] = "46000025FG", 408 ["cvt.l.s_2"] = "46000025FG",
433 ["cvt.ps.s_3"] = "46000026FGH",
434 ["c.f.s_2"] = "46000030GH",
435 ["c.f.s_3"] = "46000030VGH",
436 ["c.un.s_2"] = "46000031GH",
437 ["c.un.s_3"] = "46000031VGH",
438 ["c.eq.s_2"] = "46000032GH",
439 ["c.eq.s_3"] = "46000032VGH",
440 ["c.ueq.s_2"] = "46000033GH",
441 ["c.ueq.s_3"] = "46000033VGH",
442 ["c.olt.s_2"] = "46000034GH",
443 ["c.olt.s_3"] = "46000034VGH",
444 ["c.ult.s_2"] = "46000035GH",
445 ["c.ult.s_3"] = "46000035VGH",
446 ["c.ole.s_2"] = "46000036GH",
447 ["c.ole.s_3"] = "46000036VGH",
448 ["c.ule.s_2"] = "46000037GH",
449 ["c.ule.s_3"] = "46000037VGH",
450 ["c.sf.s_2"] = "46000038GH",
451 ["c.sf.s_3"] = "46000038VGH",
452 ["c.ngle.s_2"] = "46000039GH",
453 ["c.ngle.s_3"] = "46000039VGH",
454 ["c.seq.s_2"] = "4600003aGH",
455 ["c.seq.s_3"] = "4600003aVGH",
456 ["c.ngl.s_2"] = "4600003bGH",
457 ["c.ngl.s_3"] = "4600003bVGH",
458 ["c.lt.s_2"] = "4600003cGH",
459 ["c.lt.s_3"] = "4600003cVGH",
460 ["c.nge.s_2"] = "4600003dGH",
461 ["c.nge.s_3"] = "4600003dVGH",
462 ["c.le.s_2"] = "4600003eGH",
463 ["c.le.s_3"] = "4600003eVGH",
464 ["c.ngt.s_2"] = "4600003fGH",
465 ["c.ngt.s_3"] = "4600003fVGH",
466
467 ["add.d_3"] = "46200000FGH", 409 ["add.d_3"] = "46200000FGH",
468 ["sub.d_3"] = "46200001FGH", 410 ["sub.d_3"] = "46200001FGH",
469 ["mul.d_3"] = "46200002FGH", 411 ["mul.d_3"] = "46200002FGH",
@@ -480,130 +422,410 @@ local map_op = {
480 ["trunc.w.d_2"] = "4620000dFG", 422 ["trunc.w.d_2"] = "4620000dFG",
481 ["ceil.w.d_2"] = "4620000eFG", 423 ["ceil.w.d_2"] = "4620000eFG",
482 ["floor.w.d_2"] = "4620000fFG", 424 ["floor.w.d_2"] = "4620000fFG",
483 ["movf.d_2"] = "46200011FG",
484 ["movf.d_3"] = "46200011FGC",
485 ["movt.d_2"] = "46210011FG",
486 ["movt.d_3"] = "46210011FGC",
487 ["movz.d_3"] = "46200012FGT",
488 ["movn.d_3"] = "46200013FGT",
489 ["recip.d_2"] = "46200015FG", 425 ["recip.d_2"] = "46200015FG",
490 ["rsqrt.d_2"] = "46200016FG", 426 ["rsqrt.d_2"] = "46200016FG",
491 ["cvt.s.d_2"] = "46200020FG", 427 ["cvt.s.d_2"] = "46200020FG",
492 ["cvt.w.d_2"] = "46200024FG", 428 ["cvt.w.d_2"] = "46200024FG",
493 ["cvt.l.d_2"] = "46200025FG", 429 ["cvt.l.d_2"] = "46200025FG",
494 ["c.f.d_2"] = "46200030GH",
495 ["c.f.d_3"] = "46200030VGH",
496 ["c.un.d_2"] = "46200031GH",
497 ["c.un.d_3"] = "46200031VGH",
498 ["c.eq.d_2"] = "46200032GH",
499 ["c.eq.d_3"] = "46200032VGH",
500 ["c.ueq.d_2"] = "46200033GH",
501 ["c.ueq.d_3"] = "46200033VGH",
502 ["c.olt.d_2"] = "46200034GH",
503 ["c.olt.d_3"] = "46200034VGH",
504 ["c.ult.d_2"] = "46200035GH",
505 ["c.ult.d_3"] = "46200035VGH",
506 ["c.ole.d_2"] = "46200036GH",
507 ["c.ole.d_3"] = "46200036VGH",
508 ["c.ule.d_2"] = "46200037GH",
509 ["c.ule.d_3"] = "46200037VGH",
510 ["c.sf.d_2"] = "46200038GH",
511 ["c.sf.d_3"] = "46200038VGH",
512 ["c.ngle.d_2"] = "46200039GH",
513 ["c.ngle.d_3"] = "46200039VGH",
514 ["c.seq.d_2"] = "4620003aGH",
515 ["c.seq.d_3"] = "4620003aVGH",
516 ["c.ngl.d_2"] = "4620003bGH",
517 ["c.ngl.d_3"] = "4620003bVGH",
518 ["c.lt.d_2"] = "4620003cGH",
519 ["c.lt.d_3"] = "4620003cVGH",
520 ["c.nge.d_2"] = "4620003dGH",
521 ["c.nge.d_3"] = "4620003dVGH",
522 ["c.le.d_2"] = "4620003eGH",
523 ["c.le.d_3"] = "4620003eVGH",
524 ["c.ngt.d_2"] = "4620003fGH",
525 ["c.ngt.d_3"] = "4620003fVGH",
526
527 ["add.ps_3"] = "46c00000FGH",
528 ["sub.ps_3"] = "46c00001FGH",
529 ["mul.ps_3"] = "46c00002FGH",
530 ["abs.ps_2"] = "46c00005FG",
531 ["mov.ps_2"] = "46c00006FG",
532 ["neg.ps_2"] = "46c00007FG",
533 ["movf.ps_2"] = "46c00011FG",
534 ["movf.ps_3"] = "46c00011FGC",
535 ["movt.ps_2"] = "46c10011FG",
536 ["movt.ps_3"] = "46c10011FGC",
537 ["movz.ps_3"] = "46c00012FGT",
538 ["movn.ps_3"] = "46c00013FGT",
539 ["cvt.s.pu_2"] = "46c00020FG",
540 ["cvt.s.pl_2"] = "46c00028FG",
541 ["pll.ps_3"] = "46c0002cFGH",
542 ["plu.ps_3"] = "46c0002dFGH",
543 ["pul.ps_3"] = "46c0002eFGH",
544 ["puu.ps_3"] = "46c0002fFGH",
545 ["c.f.ps_2"] = "46c00030GH",
546 ["c.f.ps_3"] = "46c00030VGH",
547 ["c.un.ps_2"] = "46c00031GH",
548 ["c.un.ps_3"] = "46c00031VGH",
549 ["c.eq.ps_2"] = "46c00032GH",
550 ["c.eq.ps_3"] = "46c00032VGH",
551 ["c.ueq.ps_2"] = "46c00033GH",
552 ["c.ueq.ps_3"] = "46c00033VGH",
553 ["c.olt.ps_2"] = "46c00034GH",
554 ["c.olt.ps_3"] = "46c00034VGH",
555 ["c.ult.ps_2"] = "46c00035GH",
556 ["c.ult.ps_3"] = "46c00035VGH",
557 ["c.ole.ps_2"] = "46c00036GH",
558 ["c.ole.ps_3"] = "46c00036VGH",
559 ["c.ule.ps_2"] = "46c00037GH",
560 ["c.ule.ps_3"] = "46c00037VGH",
561 ["c.sf.ps_2"] = "46c00038GH",
562 ["c.sf.ps_3"] = "46c00038VGH",
563 ["c.ngle.ps_2"] = "46c00039GH",
564 ["c.ngle.ps_3"] = "46c00039VGH",
565 ["c.seq.ps_2"] = "46c0003aGH",
566 ["c.seq.ps_3"] = "46c0003aVGH",
567 ["c.ngl.ps_2"] = "46c0003bGH",
568 ["c.ngl.ps_3"] = "46c0003bVGH",
569 ["c.lt.ps_2"] = "46c0003cGH",
570 ["c.lt.ps_3"] = "46c0003cVGH",
571 ["c.nge.ps_2"] = "46c0003dGH",
572 ["c.nge.ps_3"] = "46c0003dVGH",
573 ["c.le.ps_2"] = "46c0003eGH",
574 ["c.le.ps_3"] = "46c0003eVGH",
575 ["c.ngt.ps_2"] = "46c0003fGH",
576 ["c.ngt.ps_3"] = "46c0003fVGH",
577
578 ["cvt.s.w_2"] = "46800020FG", 430 ["cvt.s.w_2"] = "46800020FG",
579 ["cvt.d.w_2"] = "46800021FG", 431 ["cvt.d.w_2"] = "46800021FG",
580
581 ["cvt.s.l_2"] = "46a00020FG", 432 ["cvt.s.l_2"] = "46a00020FG",
582 ["cvt.d.l_2"] = "46a00021FG", 433 ["cvt.d.l_2"] = "46a00021FG",
583
584 -- Opcode COP1X.
585 lwxc1_2 = "4c000000FX",
586 ldxc1_2 = "4c000001FX",
587 luxc1_2 = "4c000005FX",
588 swxc1_2 = "4c000008FX",
589 sdxc1_2 = "4c000009FX",
590 suxc1_2 = "4c00000dFX",
591 prefx_2 = "4c00000fMX",
592 ["alnv.ps_4"] = "4c00001eFGHS",
593 ["madd.s_4"] = "4c000020FRGH",
594 ["madd.d_4"] = "4c000021FRGH",
595 ["madd.ps_4"] = "4c000026FRGH",
596 ["msub.s_4"] = "4c000028FRGH",
597 ["msub.d_4"] = "4c000029FRGH",
598 ["msub.ps_4"] = "4c00002eFRGH",
599 ["nmadd.s_4"] = "4c000030FRGH",
600 ["nmadd.d_4"] = "4c000031FRGH",
601 ["nmadd.ps_4"] = "4c000036FRGH",
602 ["nmsub.s_4"] = "4c000038FRGH",
603 ["nmsub.d_4"] = "4c000039FRGH",
604 ["nmsub.ps_4"] = "4c00003eFRGH",
605} 434}
606 435
436if mipsr6 then -- Instructions added with MIPSR6.
437
438 for k,v in pairs({
439
440 -- Add immediate to upper bits.
441 aui_3 = "3c000000TSI",
442 daui_3 = mips64 and "74000000TSI",
443 dahi_2 = mips64 and "04060000SI",
444 dati_2 = mips64 and "041e0000SI",
445
446 -- TODO: addiupc, auipc, aluipc, lwpc, lwupc, ldpc.
447
448 -- Compact branches.
449 blezalc_2 = "18000000TB", -- rt != 0.
450 bgezalc_2 = "18000000T=SB", -- rt != 0.
451 bgtzalc_2 = "1c000000TB", -- rt != 0.
452 bltzalc_2 = "1c000000T=SB", -- rt != 0.
453
454 blezc_2 = "58000000TB", -- rt != 0.
455 bgezc_2 = "58000000T=SB", -- rt != 0.
456 bgec_3 = "58000000STB", -- rs != rt.
457 blec_3 = "58000000TSB", -- rt != rs.
458
459 bgtzc_2 = "5c000000TB", -- rt != 0.
460 bltzc_2 = "5c000000T=SB", -- rt != 0.
461 bltc_3 = "5c000000STB", -- rs != rt.
462 bgtc_3 = "5c000000TSB", -- rt != rs.
463
464 bgeuc_3 = "18000000STB", -- rs != rt.
465 bleuc_3 = "18000000TSB", -- rt != rs.
466 bltuc_3 = "1c000000STB", -- rs != rt.
467 bgtuc_3 = "1c000000TSB", -- rt != rs.
468
469 beqzalc_2 = "20000000TB", -- rt != 0.
470 bnezalc_2 = "60000000TB", -- rt != 0.
471 beqc_3 = "20000000STB", -- rs < rt.
472 bnec_3 = "60000000STB", -- rs < rt.
473 bovc_3 = "20000000STB", -- rs >= rt.
474 bnvc_3 = "60000000STB", -- rs >= rt.
475
476 beqzc_2 = "d8000000SK", -- rs != 0.
477 bnezc_2 = "f8000000SK", -- rs != 0.
478 jic_2 = "d8000000TI",
479 jialc_2 = "f8000000TI",
480 bc_1 = "c8000000L",
481 balc_1 = "e8000000L",
482
483 -- Opcode SPECIAL.
484 jr_1 = "00000009S",
485 sdbbp_0 = "0000000e",
486 sdbbp_1 = "0000000eY",
487 lsa_4 = "00000005DSTA",
488 dlsa_4 = mips64 and "00000015DSTA",
489 seleqz_3 = "00000035DST",
490 selnez_3 = "00000037DST",
491 clz_2 = "00000050DS",
492 clo_2 = "00000051DS",
493 dclz_2 = mips64 and "00000052DS",
494 dclo_2 = mips64 and "00000053DS",
495 mul_3 = "00000098DST",
496 muh_3 = "000000d8DST",
497 mulu_3 = "00000099DST",
498 muhu_3 = "000000d9DST",
499 div_3 = "0000009aDST",
500 mod_3 = "000000daDST",
501 divu_3 = "0000009bDST",
502 modu_3 = "000000dbDST",
503 dmul_3 = mips64 and "0000009cDST",
504 dmuh_3 = mips64 and "000000dcDST",
505 dmulu_3 = mips64 and "0000009dDST",
506 dmuhu_3 = mips64 and "000000ddDST",
507 ddiv_3 = mips64 and "0000009eDST",
508 dmod_3 = mips64 and "000000deDST",
509 ddivu_3 = mips64 and "0000009fDST",
510 dmodu_3 = mips64 and "000000dfDST",
511
512 -- Opcode SPECIAL3.
513 align_4 = "7c000220DSTA",
514 dalign_4 = mips64 and "7c000224DSTA",
515 bitswap_2 = "7c000020DT",
516 dbitswap_2 = mips64 and "7c000024DT",
517
518 -- Opcode COP1.
519 bc1eqz_2 = "45200000HB",
520 bc1nez_2 = "45a00000HB",
521
522 ["sel.s_3"] = "46000010FGH",
523 ["seleqz.s_3"] = "46000014FGH",
524 ["selnez.s_3"] = "46000017FGH",
525 ["maddf.s_3"] = "46000018FGH",
526 ["msubf.s_3"] = "46000019FGH",
527 ["rint.s_2"] = "4600001aFG",
528 ["class.s_2"] = "4600001bFG",
529 ["min.s_3"] = "4600001cFGH",
530 ["mina.s_3"] = "4600001dFGH",
531 ["max.s_3"] = "4600001eFGH",
532 ["maxa.s_3"] = "4600001fFGH",
533 ["cmp.af.s_3"] = "46800000FGH",
534 ["cmp.un.s_3"] = "46800001FGH",
535 ["cmp.or.s_3"] = "46800011FGH",
536 ["cmp.eq.s_3"] = "46800002FGH",
537 ["cmp.une.s_3"] = "46800012FGH",
538 ["cmp.ueq.s_3"] = "46800003FGH",
539 ["cmp.ne.s_3"] = "46800013FGH",
540 ["cmp.lt.s_3"] = "46800004FGH",
541 ["cmp.ult.s_3"] = "46800005FGH",
542 ["cmp.le.s_3"] = "46800006FGH",
543 ["cmp.ule.s_3"] = "46800007FGH",
544 ["cmp.saf.s_3"] = "46800008FGH",
545 ["cmp.sun.s_3"] = "46800009FGH",
546 ["cmp.sor.s_3"] = "46800019FGH",
547 ["cmp.seq.s_3"] = "4680000aFGH",
548 ["cmp.sune.s_3"] = "4680001aFGH",
549 ["cmp.sueq.s_3"] = "4680000bFGH",
550 ["cmp.sne.s_3"] = "4680001bFGH",
551 ["cmp.slt.s_3"] = "4680000cFGH",
552 ["cmp.sult.s_3"] = "4680000dFGH",
553 ["cmp.sle.s_3"] = "4680000eFGH",
554 ["cmp.sule.s_3"] = "4680000fFGH",
555
556 ["sel.d_3"] = "46200010FGH",
557 ["seleqz.d_3"] = "46200014FGH",
558 ["selnez.d_3"] = "46200017FGH",
559 ["maddf.d_3"] = "46200018FGH",
560 ["msubf.d_3"] = "46200019FGH",
561 ["rint.d_2"] = "4620001aFG",
562 ["class.d_2"] = "4620001bFG",
563 ["min.d_3"] = "4620001cFGH",
564 ["mina.d_3"] = "4620001dFGH",
565 ["max.d_3"] = "4620001eFGH",
566 ["maxa.d_3"] = "4620001fFGH",
567 ["cmp.af.d_3"] = "46a00000FGH",
568 ["cmp.un.d_3"] = "46a00001FGH",
569 ["cmp.or.d_3"] = "46a00011FGH",
570 ["cmp.eq.d_3"] = "46a00002FGH",
571 ["cmp.une.d_3"] = "46a00012FGH",
572 ["cmp.ueq.d_3"] = "46a00003FGH",
573 ["cmp.ne.d_3"] = "46a00013FGH",
574 ["cmp.lt.d_3"] = "46a00004FGH",
575 ["cmp.ult.d_3"] = "46a00005FGH",
576 ["cmp.le.d_3"] = "46a00006FGH",
577 ["cmp.ule.d_3"] = "46a00007FGH",
578 ["cmp.saf.d_3"] = "46a00008FGH",
579 ["cmp.sun.d_3"] = "46a00009FGH",
580 ["cmp.sor.d_3"] = "46a00019FGH",
581 ["cmp.seq.d_3"] = "46a0000aFGH",
582 ["cmp.sune.d_3"] = "46a0001aFGH",
583 ["cmp.sueq.d_3"] = "46a0000bFGH",
584 ["cmp.sne.d_3"] = "46a0001bFGH",
585 ["cmp.slt.d_3"] = "46a0000cFGH",
586 ["cmp.sult.d_3"] = "46a0000dFGH",
587 ["cmp.sle.d_3"] = "46a0000eFGH",
588 ["cmp.sule.d_3"] = "46a0000fFGH",
589
590 }) do map_op[k] = v end
591
592else -- Instructions removed by MIPSR6.
593
594 for k,v in pairs({
595 -- Traps, don't use.
596 addi_3 = "20000000TSI",
597 daddi_3 = mips64 and "60000000TSI",
598
599 -- Branch on likely, don't use.
600 beqzl_2 = "50000000SB",
601 beql_3 = "50000000STB",
602 bnezl_2 = "54000000SB",
603 bnel_3 = "54000000STB",
604 blezl_2 = "58000000SB",
605 bgtzl_2 = "5c000000SB",
606
607 lwl_2 = "88000000TO",
608 lwr_2 = "98000000TO",
609 swl_2 = "a8000000TO",
610 sdl_2 = mips64 and "b0000000TO",
611 sdr_2 = mips64 and "b1000000TO",
612 swr_2 = "b8000000TO",
613 cache_2 = "bc000000NO",
614 ll_2 = "c0000000TO",
615 pref_2 = "cc000000NO",
616 sc_2 = "e0000000TO",
617 scd_2 = mips64 and "f0000000TO",
618
619 -- Opcode SPECIAL.
620 movf_2 = "00000001DS",
621 movf_3 = "00000001DSC",
622 movt_2 = "00010001DS",
623 movt_3 = "00010001DSC",
624 jr_1 = "00000008S",
625 movz_3 = "0000000aDST",
626 movn_3 = "0000000bDST",
627 mfhi_1 = "00000010D",
628 mthi_1 = "00000011S",
629 mflo_1 = "00000012D",
630 mtlo_1 = "00000013S",
631 mult_2 = "00000018ST",
632 multu_2 = "00000019ST",
633 div_3 = "0000001aST",
634 divu_3 = "0000001bST",
635 ddiv_3 = mips64 and "0000001eST",
636 ddivu_3 = mips64 and "0000001fST",
637 dmult_2 = mips64 and "0000001cST",
638 dmultu_2 = mips64 and "0000001dST",
639
640 -- Opcode REGIMM.
641 tgei_2 = "04080000SI",
642 tgeiu_2 = "04090000SI",
643 tlti_2 = "040a0000SI",
644 tltiu_2 = "040b0000SI",
645 teqi_2 = "040c0000SI",
646 tnei_2 = "040e0000SI",
647 bltzal_2 = "04100000SB",
648 bgezal_2 = "04110000SB",
649 bltzall_2 = "04120000SB",
650 bgezall_2 = "04130000SB",
651
652 -- Opcode SPECIAL2.
653 madd_2 = "70000000ST",
654 maddu_2 = "70000001ST",
655 mul_3 = "70000002DST",
656 msub_2 = "70000004ST",
657 msubu_2 = "70000005ST",
658 clz_2 = "70000020D=TS",
659 clo_2 = "70000021D=TS",
660 dclz_2 = mips64 and "70000024D=TS",
661 dclo_2 = mips64 and "70000025D=TS",
662 sdbbp_0 = "7000003f",
663 sdbbp_1 = "7000003fY",
664
665 -- Opcode COP1.
666 bc1f_1 = "45000000B",
667 bc1f_2 = "45000000CB",
668 bc1t_1 = "45010000B",
669 bc1t_2 = "45010000CB",
670 bc1fl_1 = "45020000B",
671 bc1fl_2 = "45020000CB",
672 bc1tl_1 = "45030000B",
673 bc1tl_2 = "45030000CB",
674
675 ["movf.s_2"] = "46000011FG",
676 ["movf.s_3"] = "46000011FGC",
677 ["movt.s_2"] = "46010011FG",
678 ["movt.s_3"] = "46010011FGC",
679 ["movz.s_3"] = "46000012FGT",
680 ["movn.s_3"] = "46000013FGT",
681 ["cvt.ps.s_3"] = "46000026FGH",
682 ["c.f.s_2"] = "46000030GH",
683 ["c.f.s_3"] = "46000030VGH",
684 ["c.un.s_2"] = "46000031GH",
685 ["c.un.s_3"] = "46000031VGH",
686 ["c.eq.s_2"] = "46000032GH",
687 ["c.eq.s_3"] = "46000032VGH",
688 ["c.ueq.s_2"] = "46000033GH",
689 ["c.ueq.s_3"] = "46000033VGH",
690 ["c.olt.s_2"] = "46000034GH",
691 ["c.olt.s_3"] = "46000034VGH",
692 ["c.ult.s_2"] = "46000035GH",
693 ["c.ult.s_3"] = "46000035VGH",
694 ["c.ole.s_2"] = "46000036GH",
695 ["c.ole.s_3"] = "46000036VGH",
696 ["c.ule.s_2"] = "46000037GH",
697 ["c.ule.s_3"] = "46000037VGH",
698 ["c.sf.s_2"] = "46000038GH",
699 ["c.sf.s_3"] = "46000038VGH",
700 ["c.ngle.s_2"] = "46000039GH",
701 ["c.ngle.s_3"] = "46000039VGH",
702 ["c.seq.s_2"] = "4600003aGH",
703 ["c.seq.s_3"] = "4600003aVGH",
704 ["c.ngl.s_2"] = "4600003bGH",
705 ["c.ngl.s_3"] = "4600003bVGH",
706 ["c.lt.s_2"] = "4600003cGH",
707 ["c.lt.s_3"] = "4600003cVGH",
708 ["c.nge.s_2"] = "4600003dGH",
709 ["c.nge.s_3"] = "4600003dVGH",
710 ["c.le.s_2"] = "4600003eGH",
711 ["c.le.s_3"] = "4600003eVGH",
712 ["c.ngt.s_2"] = "4600003fGH",
713 ["c.ngt.s_3"] = "4600003fVGH",
714 ["movf.d_2"] = "46200011FG",
715 ["movf.d_3"] = "46200011FGC",
716 ["movt.d_2"] = "46210011FG",
717 ["movt.d_3"] = "46210011FGC",
718 ["movz.d_3"] = "46200012FGT",
719 ["movn.d_3"] = "46200013FGT",
720 ["c.f.d_2"] = "46200030GH",
721 ["c.f.d_3"] = "46200030VGH",
722 ["c.un.d_2"] = "46200031GH",
723 ["c.un.d_3"] = "46200031VGH",
724 ["c.eq.d_2"] = "46200032GH",
725 ["c.eq.d_3"] = "46200032VGH",
726 ["c.ueq.d_2"] = "46200033GH",
727 ["c.ueq.d_3"] = "46200033VGH",
728 ["c.olt.d_2"] = "46200034GH",
729 ["c.olt.d_3"] = "46200034VGH",
730 ["c.ult.d_2"] = "46200035GH",
731 ["c.ult.d_3"] = "46200035VGH",
732 ["c.ole.d_2"] = "46200036GH",
733 ["c.ole.d_3"] = "46200036VGH",
734 ["c.ule.d_2"] = "46200037GH",
735 ["c.ule.d_3"] = "46200037VGH",
736 ["c.sf.d_2"] = "46200038GH",
737 ["c.sf.d_3"] = "46200038VGH",
738 ["c.ngle.d_2"] = "46200039GH",
739 ["c.ngle.d_3"] = "46200039VGH",
740 ["c.seq.d_2"] = "4620003aGH",
741 ["c.seq.d_3"] = "4620003aVGH",
742 ["c.ngl.d_2"] = "4620003bGH",
743 ["c.ngl.d_3"] = "4620003bVGH",
744 ["c.lt.d_2"] = "4620003cGH",
745 ["c.lt.d_3"] = "4620003cVGH",
746 ["c.nge.d_2"] = "4620003dGH",
747 ["c.nge.d_3"] = "4620003dVGH",
748 ["c.le.d_2"] = "4620003eGH",
749 ["c.le.d_3"] = "4620003eVGH",
750 ["c.ngt.d_2"] = "4620003fGH",
751 ["c.ngt.d_3"] = "4620003fVGH",
752 ["add.ps_3"] = "46c00000FGH",
753 ["sub.ps_3"] = "46c00001FGH",
754 ["mul.ps_3"] = "46c00002FGH",
755 ["abs.ps_2"] = "46c00005FG",
756 ["mov.ps_2"] = "46c00006FG",
757 ["neg.ps_2"] = "46c00007FG",
758 ["movf.ps_2"] = "46c00011FG",
759 ["movf.ps_3"] = "46c00011FGC",
760 ["movt.ps_2"] = "46c10011FG",
761 ["movt.ps_3"] = "46c10011FGC",
762 ["movz.ps_3"] = "46c00012FGT",
763 ["movn.ps_3"] = "46c00013FGT",
764 ["cvt.s.pu_2"] = "46c00020FG",
765 ["cvt.s.pl_2"] = "46c00028FG",
766 ["pll.ps_3"] = "46c0002cFGH",
767 ["plu.ps_3"] = "46c0002dFGH",
768 ["pul.ps_3"] = "46c0002eFGH",
769 ["puu.ps_3"] = "46c0002fFGH",
770 ["c.f.ps_2"] = "46c00030GH",
771 ["c.f.ps_3"] = "46c00030VGH",
772 ["c.un.ps_2"] = "46c00031GH",
773 ["c.un.ps_3"] = "46c00031VGH",
774 ["c.eq.ps_2"] = "46c00032GH",
775 ["c.eq.ps_3"] = "46c00032VGH",
776 ["c.ueq.ps_2"] = "46c00033GH",
777 ["c.ueq.ps_3"] = "46c00033VGH",
778 ["c.olt.ps_2"] = "46c00034GH",
779 ["c.olt.ps_3"] = "46c00034VGH",
780 ["c.ult.ps_2"] = "46c00035GH",
781 ["c.ult.ps_3"] = "46c00035VGH",
782 ["c.ole.ps_2"] = "46c00036GH",
783 ["c.ole.ps_3"] = "46c00036VGH",
784 ["c.ule.ps_2"] = "46c00037GH",
785 ["c.ule.ps_3"] = "46c00037VGH",
786 ["c.sf.ps_2"] = "46c00038GH",
787 ["c.sf.ps_3"] = "46c00038VGH",
788 ["c.ngle.ps_2"] = "46c00039GH",
789 ["c.ngle.ps_3"] = "46c00039VGH",
790 ["c.seq.ps_2"] = "46c0003aGH",
791 ["c.seq.ps_3"] = "46c0003aVGH",
792 ["c.ngl.ps_2"] = "46c0003bGH",
793 ["c.ngl.ps_3"] = "46c0003bVGH",
794 ["c.lt.ps_2"] = "46c0003cGH",
795 ["c.lt.ps_3"] = "46c0003cVGH",
796 ["c.nge.ps_2"] = "46c0003dGH",
797 ["c.nge.ps_3"] = "46c0003dVGH",
798 ["c.le.ps_2"] = "46c0003eGH",
799 ["c.le.ps_3"] = "46c0003eVGH",
800 ["c.ngt.ps_2"] = "46c0003fGH",
801 ["c.ngt.ps_3"] = "46c0003fVGH",
802
803 -- Opcode COP1X.
804 lwxc1_2 = "4c000000FX",
805 ldxc1_2 = "4c000001FX",
806 luxc1_2 = "4c000005FX",
807 swxc1_2 = "4c000008FX",
808 sdxc1_2 = "4c000009FX",
809 suxc1_2 = "4c00000dFX",
810 prefx_2 = "4c00000fMX",
811 ["alnv.ps_4"] = "4c00001eFGHS",
812 ["madd.s_4"] = "4c000020FRGH",
813 ["madd.d_4"] = "4c000021FRGH",
814 ["madd.ps_4"] = "4c000026FRGH",
815 ["msub.s_4"] = "4c000028FRGH",
816 ["msub.d_4"] = "4c000029FRGH",
817 ["msub.ps_4"] = "4c00002eFRGH",
818 ["nmadd.s_4"] = "4c000030FRGH",
819 ["nmadd.d_4"] = "4c000031FRGH",
820 ["nmadd.ps_4"] = "4c000036FRGH",
821 ["nmsub.s_4"] = "4c000038FRGH",
822 ["nmsub.d_4"] = "4c000039FRGH",
823 ["nmsub.ps_4"] = "4c00003eFRGH",
824
825 }) do map_op[k] = v end
826
827end
828
607------------------------------------------------------------------------------ 829------------------------------------------------------------------------------
608 830
609local function parse_gpr(expr) 831local function parse_gpr(expr)
@@ -633,7 +855,7 @@ local function parse_fpr(expr)
633 werror("bad register name `"..expr.."'") 855 werror("bad register name `"..expr.."'")
634end 856end
635 857
636local function parse_imm(imm, bits, shift, scale, signed) 858local function parse_imm(imm, bits, shift, scale, signed, action)
637 local n = tonumber(imm) 859 local n = tonumber(imm)
638 if n then 860 if n then
639 local m = sar(n, scale) 861 local m = sar(n, scale)
@@ -651,7 +873,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
651 match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then 873 match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then
652 werror("expected immediate operand, got register") 874 werror("expected immediate operand, got register")
653 else 875 else
654 waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) 876 waction(action or "IMM",
877 (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm)
655 return 0 878 return 0
656 end 879 end
657end 880end
@@ -756,13 +979,18 @@ map_op[".template__"] = function(params, template, nparams)
756 op = op + parse_disp(params[n]); n = n + 1 979 op = op + parse_disp(params[n]); n = n + 1
757 elseif p == "X" then 980 elseif p == "X" then
758 op = op + parse_index(params[n]); n = n + 1 981 op = op + parse_index(params[n]); n = n + 1
759 elseif p == "B" or p == "J" then 982 elseif p == "B" or p == "J" or p == "K" or p == "L" then
760 local mode, m, s = parse_label(params[n], false) 983 local mode, m, s = parse_label(params[n], false)
761 if p == "B" then m = m + 2048 end 984 if p == "J" then m = m + 0xa800
985 elseif p == "K" then m = m + 0x5000
986 elseif p == "L" then m = m + 0xa000 end
762 waction("REL_"..mode, m, s, 1) 987 waction("REL_"..mode, m, s, 1)
763 n = n + 1 988 n = n + 1
764 elseif p == "A" then 989 elseif p == "A" then
765 op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1 990 op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1
991 elseif p == "a" then
992 local m = parse_imm(params[n], 6, 6, 0, false, "IMMS"); n = n + 1
993 op = op + band(m, 0x7c0) + band(shr(m, 9), 4)
766 elseif p == "M" then 994 elseif p == "M" then
767 op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1 995 op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1
768 elseif p == "N" then 996 elseif p == "N" then
@@ -778,7 +1006,7 @@ map_op[".template__"] = function(params, template, nparams)
778 elseif p == "Z" then 1006 elseif p == "Z" then
779 op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1 1007 op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1
780 elseif p == "=" then 1008 elseif p == "=" then
781 op = op + shl(band(op, 0xf800), 5) -- Copy D to T for clz, clo. 1009 n = n - 1 -- Re-use previous parameter for next template char.
782 else 1010 else
783 assert(false) 1011 assert(false)
784 end 1012 end
diff --git a/dynasm/dasm_mips64.lua b/dynasm/dasm_mips64.lua
new file mode 100644
index 00000000..c97d666b
--- /dev/null
+++ b/dynasm/dasm_mips64.lua
@@ -0,0 +1,12 @@
1------------------------------------------------------------------------------
2-- DynASM MIPS64 module.
3--
4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
5-- See dynasm.lua for full copyright notice.
6------------------------------------------------------------------------------
7-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module.
8-- All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11mips64 = true -- Using a global is an ugly, but effective solution.
12return require("dasm_mips")
diff --git a/dynasm/dasm_ppc.h b/dynasm/dasm_ppc.h
index e2d6f1fc..4c7d7289 100644
--- a/dynasm/dasm_ppc.h
+++ b/dynasm/dasm_ppc.h
@@ -1,5 +1,5 @@
1/* 1/*
2** DynASM PPC encoding engine. 2** DynASM PPC/PPC64 encoding engine.
3** Copyright (C) 2005-2023 Mike Pall. All rights reserved. 3** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
4** Released under the MIT license. See dynasm.lua for full copyright notice. 4** Released under the MIT license. See dynasm.lua for full copyright notice.
5*/ 5*/
@@ -21,7 +21,7 @@ enum {
21 /* The following actions need a buffer position. */ 21 /* The following actions need a buffer position. */
22 DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, 22 DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
23 /* The following actions also have an argument. */ 23 /* The following actions also have an argument. */
24 DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, 24 DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH,
25 DASM__MAX 25 DASM__MAX
26}; 26};
27 27
@@ -69,7 +69,7 @@ struct dasm_State {
69 size_t lgsize; 69 size_t lgsize;
70 int *pclabels; /* PC label chains/pos ptrs. */ 70 int *pclabels; /* PC label chains/pos ptrs. */
71 size_t pcsize; 71 size_t pcsize;
72 void **globals; /* Array of globals (bias -10). */ 72 void **globals; /* Array of globals. */
73 dasm_Section *section; /* Pointer to active section. */ 73 dasm_Section *section; /* Pointer to active section. */
74 size_t codesize; /* Total size of all code sections. */ 74 size_t codesize; /* Total size of all code sections. */
75 int maxsection; /* 0 <= sectionidx < maxsection. */ 75 int maxsection; /* 0 <= sectionidx < maxsection. */
@@ -86,7 +86,6 @@ void dasm_init(Dst_DECL, int maxsection)
86{ 86{
87 dasm_State *D; 87 dasm_State *D;
88 size_t psz = 0; 88 size_t psz = 0;
89 int i;
90 Dst_REF = NULL; 89 Dst_REF = NULL;
91 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); 90 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
92 D = Dst_REF; 91 D = Dst_REF;
@@ -97,12 +96,7 @@ void dasm_init(Dst_DECL, int maxsection)
97 D->pcsize = 0; 96 D->pcsize = 0;
98 D->globals = NULL; 97 D->globals = NULL;
99 D->maxsection = maxsection; 98 D->maxsection = maxsection;
100 for (i = 0; i < maxsection; i++) { 99 memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
101 D->sections[i].buf = NULL; /* Need this for pass3. */
102 D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
103 D->sections[i].bsize = 0;
104 D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
105 }
106} 100}
107 101
108/* Free DynASM state. */ 102/* Free DynASM state. */
@@ -122,7 +116,7 @@ void dasm_free(Dst_DECL)
122void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) 116void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
123{ 117{
124 dasm_State *D = Dst_REF; 118 dasm_State *D = Dst_REF;
125 D->globals = gl - 10; /* Negative bias to compensate for locals. */ 119 D->globals = gl;
126 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); 120 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
127} 121}
128 122
@@ -147,6 +141,7 @@ void dasm_setup(Dst_DECL, const void *actionlist)
147 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); 141 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
148 for (i = 0; i < D->maxsection; i++) { 142 for (i = 0; i < D->maxsection; i++) {
149 D->sections[i].pos = DASM_SEC2POS(i); 143 D->sections[i].pos = DASM_SEC2POS(i);
144 D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
150 D->sections[i].ofs = 0; 145 D->sections[i].ofs = 0;
151 } 146 }
152} 147}
@@ -244,6 +239,10 @@ void dasm_put(Dst_DECL, int start, ...)
244#endif 239#endif
245 b[pos++] = n; 240 b[pos++] = n;
246 break; 241 break;
242 case DASM_IMMSH:
243 CK((n >> 6) == 0, RANGE_I);
244 b[pos++] = n;
245 break;
247 } 246 }
248 } 247 }
249 } 248 }
@@ -273,7 +272,7 @@ int dasm_link(Dst_DECL, size_t *szp)
273 272
274 { /* Handle globals not defined in this translation unit. */ 273 { /* Handle globals not defined in this translation unit. */
275 int idx; 274 int idx;
276 for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { 275 for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
277 int n = D->lglabels[idx]; 276 int n = D->lglabels[idx];
278 /* Undefined label: Collapse rel chain and replace with marker (< 0). */ 277 /* Undefined label: Collapse rel chain and replace with marker (< 0). */
279 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } 278 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
@@ -299,7 +298,7 @@ int dasm_link(Dst_DECL, size_t *szp)
299 case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; 298 case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
300 case DASM_REL_LG: case DASM_REL_PC: pos++; break; 299 case DASM_REL_LG: case DASM_REL_PC: pos++; break;
301 case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; 300 case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
302 case DASM_IMM: pos++; break; 301 case DASM_IMM: case DASM_IMMSH: pos++; break;
303 } 302 }
304 } 303 }
305 stop: (void)0; 304 stop: (void)0;
@@ -349,7 +348,10 @@ int dasm_encode(Dst_DECL, void *buffer)
349 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; 348 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
350 break; 349 break;
351 case DASM_REL_LG: 350 case DASM_REL_LG:
352 CK(n >= 0, UNDEF_LG); 351 if (n < 0) {
352 n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp);
353 goto patchrel;
354 }
353 /* fallthrough */ 355 /* fallthrough */
354 case DASM_REL_PC: 356 case DASM_REL_PC:
355 CK(n >= 0, UNDEF_PC); 357 CK(n >= 0, UNDEF_PC);
@@ -361,12 +363,15 @@ int dasm_encode(Dst_DECL, void *buffer)
361 cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc)); 363 cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc));
362 break; 364 break;
363 case DASM_LABEL_LG: 365 case DASM_LABEL_LG:
364 ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); 366 ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
365 break; 367 break;
366 case DASM_LABEL_PC: break; 368 case DASM_LABEL_PC: break;
367 case DASM_IMM: 369 case DASM_IMM:
368 cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); 370 cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
369 break; 371 break;
372 case DASM_IMMSH:
373 cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32);
374 break;
370 default: *cp++ = ins; break; 375 default: *cp++ = ins; break;
371 } 376 }
372 } 377 }
diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua
index b4f5cea4..d66ae4a0 100644
--- a/dynasm/dasm_ppc.lua
+++ b/dynasm/dasm_ppc.lua
@@ -1,17 +1,19 @@
1------------------------------------------------------------------------------ 1------------------------------------------------------------------------------
2-- DynASM PPC module. 2-- DynASM PPC/PPC64 module.
3-- 3--
4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. 4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
5-- See dynasm.lua for full copyright notice. 5-- See dynasm.lua for full copyright notice.
6--
7-- Support for various extensions contributed by Caio Souza Oliveira.
6------------------------------------------------------------------------------ 8------------------------------------------------------------------------------
7 9
8-- Module information: 10-- Module information:
9local _info = { 11local _info = {
10 arch = "ppc", 12 arch = "ppc",
11 description = "DynASM PPC module", 13 description = "DynASM PPC module",
12 version = "1.3.0", 14 version = "1.5.0",
13 vernum = 10300, 15 vernum = 10500,
14 release = "2011-05-05", 16 release = "2021-05-02",
15 author = "Mike Pall", 17 author = "Mike Pall",
16 license = "MIT", 18 license = "MIT",
17} 19}
@@ -39,7 +41,7 @@ local wline, werror, wfatal, wwarn
39local action_names = { 41local action_names = {
40 "STOP", "SECTION", "ESC", "REL_EXT", 42 "STOP", "SECTION", "ESC", "REL_EXT",
41 "ALIGN", "REL_LG", "LABEL_LG", 43 "ALIGN", "REL_LG", "LABEL_LG",
42 "REL_PC", "LABEL_PC", "IMM", 44 "REL_PC", "LABEL_PC", "IMM", "IMMSH"
43} 45}
44 46
45-- Maximum number of section buffer positions for dasm_put(). 47-- Maximum number of section buffer positions for dasm_put().
@@ -228,8 +230,18 @@ local map_cond = {
228 230
229------------------------------------------------------------------------------ 231------------------------------------------------------------------------------
230 232
233local map_op, op_template
234
235local function op_alias(opname, f)
236 return function(params, nparams)
237 if not params then return "-> "..opname:sub(1, -3) end
238 f(params, nparams)
239 op_template(params, map_op[opname], nparams)
240 end
241end
242
231-- Template strings for PPC instructions. 243-- Template strings for PPC instructions.
232local map_op = { 244map_op = {
233 tdi_3 = "08000000ARI", 245 tdi_3 = "08000000ARI",
234 twi_3 = "0c000000ARI", 246 twi_3 = "0c000000ARI",
235 mulli_3 = "1c000000RRI", 247 mulli_3 = "1c000000RRI",
@@ -297,6 +309,250 @@ local map_op = {
297 std_2 = "f8000000RD", 309 std_2 = "f8000000RD",
298 stdu_2 = "f8000001RD", 310 stdu_2 = "f8000001RD",
299 311
312 subi_3 = op_alias("addi_3", function(p) p[3] = "-("..p[3]..")" end),
313 subis_3 = op_alias("addis_3", function(p) p[3] = "-("..p[3]..")" end),
314 subic_3 = op_alias("addic_3", function(p) p[3] = "-("..p[3]..")" end),
315 ["subic._3"] = op_alias("addic._3", function(p) p[3] = "-("..p[3]..")" end),
316
317 rotlwi_3 = op_alias("rlwinm_5", function(p)
318 p[4] = "0"; p[5] = "31"
319 end),
320 rotrwi_3 = op_alias("rlwinm_5", function(p)
321 p[3] = "32-("..p[3]..")"; p[4] = "0"; p[5] = "31"
322 end),
323 rotlw_3 = op_alias("rlwnm_5", function(p)
324 p[4] = "0"; p[5] = "31"
325 end),
326 slwi_3 = op_alias("rlwinm_5", function(p)
327 p[5] = "31-("..p[3]..")"; p[4] = "0"
328 end),
329 srwi_3 = op_alias("rlwinm_5", function(p)
330 p[4] = p[3]; p[3] = "32-("..p[3]..")"; p[5] = "31"
331 end),
332 clrlwi_3 = op_alias("rlwinm_5", function(p)
333 p[4] = p[3]; p[3] = "0"; p[5] = "31"
334 end),
335 clrrwi_3 = op_alias("rlwinm_5", function(p)
336 p[5] = "31-("..p[3]..")"; p[3] = "0"; p[4] = "0"
337 end),
338
339 -- Primary opcode 4:
340 mulhhwu_3 = "10000010RRR.",
341 machhwu_3 = "10000018RRR.",
342 mulhhw_3 = "10000050RRR.",
343 nmachhw_3 = "1000005cRRR.",
344 machhwsu_3 = "10000098RRR.",
345 machhws_3 = "100000d8RRR.",
346 nmachhws_3 = "100000dcRRR.",
347 mulchwu_3 = "10000110RRR.",
348 macchwu_3 = "10000118RRR.",
349 mulchw_3 = "10000150RRR.",
350 macchw_3 = "10000158RRR.",
351 nmacchw_3 = "1000015cRRR.",
352 macchwsu_3 = "10000198RRR.",
353 macchws_3 = "100001d8RRR.",
354 nmacchws_3 = "100001dcRRR.",
355 mullhw_3 = "10000350RRR.",
356 maclhw_3 = "10000358RRR.",
357 nmaclhw_3 = "1000035cRRR.",
358 maclhwsu_3 = "10000398RRR.",
359 maclhws_3 = "100003d8RRR.",
360 nmaclhws_3 = "100003dcRRR.",
361 machhwuo_3 = "10000418RRR.",
362 nmachhwo_3 = "1000045cRRR.",
363 machhwsuo_3 = "10000498RRR.",
364 machhwso_3 = "100004d8RRR.",
365 nmachhwso_3 = "100004dcRRR.",
366 macchwuo_3 = "10000518RRR.",
367 macchwo_3 = "10000558RRR.",
368 nmacchwo_3 = "1000055cRRR.",
369 macchwsuo_3 = "10000598RRR.",
370 macchwso_3 = "100005d8RRR.",
371 nmacchwso_3 = "100005dcRRR.",
372 maclhwo_3 = "10000758RRR.",
373 nmaclhwo_3 = "1000075cRRR.",
374 maclhwsuo_3 = "10000798RRR.",
375 maclhwso_3 = "100007d8RRR.",
376 nmaclhwso_3 = "100007dcRRR.",
377
378 vaddubm_3 = "10000000VVV",
379 vmaxub_3 = "10000002VVV",
380 vrlb_3 = "10000004VVV",
381 vcmpequb_3 = "10000006VVV",
382 vmuloub_3 = "10000008VVV",
383 vaddfp_3 = "1000000aVVV",
384 vmrghb_3 = "1000000cVVV",
385 vpkuhum_3 = "1000000eVVV",
386 vmhaddshs_4 = "10000020VVVV",
387 vmhraddshs_4 = "10000021VVVV",
388 vmladduhm_4 = "10000022VVVV",
389 vmsumubm_4 = "10000024VVVV",
390 vmsummbm_4 = "10000025VVVV",
391 vmsumuhm_4 = "10000026VVVV",
392 vmsumuhs_4 = "10000027VVVV",
393 vmsumshm_4 = "10000028VVVV",
394 vmsumshs_4 = "10000029VVVV",
395 vsel_4 = "1000002aVVVV",
396 vperm_4 = "1000002bVVVV",
397 vsldoi_4 = "1000002cVVVP",
398 vpermxor_4 = "1000002dVVVV",
399 vmaddfp_4 = "1000002eVVVV~",
400 vnmsubfp_4 = "1000002fVVVV~",
401 vaddeuqm_4 = "1000003cVVVV",
402 vaddecuq_4 = "1000003dVVVV",
403 vsubeuqm_4 = "1000003eVVVV",
404 vsubecuq_4 = "1000003fVVVV",
405 vadduhm_3 = "10000040VVV",
406 vmaxuh_3 = "10000042VVV",
407 vrlh_3 = "10000044VVV",
408 vcmpequh_3 = "10000046VVV",
409 vmulouh_3 = "10000048VVV",
410 vsubfp_3 = "1000004aVVV",
411 vmrghh_3 = "1000004cVVV",
412 vpkuwum_3 = "1000004eVVV",
413 vadduwm_3 = "10000080VVV",
414 vmaxuw_3 = "10000082VVV",
415 vrlw_3 = "10000084VVV",
416 vcmpequw_3 = "10000086VVV",
417 vmulouw_3 = "10000088VVV",
418 vmuluwm_3 = "10000089VVV",
419 vmrghw_3 = "1000008cVVV",
420 vpkuhus_3 = "1000008eVVV",
421 vaddudm_3 = "100000c0VVV",
422 vmaxud_3 = "100000c2VVV",
423 vrld_3 = "100000c4VVV",
424 vcmpeqfp_3 = "100000c6VVV",
425 vcmpequd_3 = "100000c7VVV",
426 vpkuwus_3 = "100000ceVVV",
427 vadduqm_3 = "10000100VVV",
428 vmaxsb_3 = "10000102VVV",
429 vslb_3 = "10000104VVV",
430 vmulosb_3 = "10000108VVV",
431 vrefp_2 = "1000010aV-V",
432 vmrglb_3 = "1000010cVVV",
433 vpkshus_3 = "1000010eVVV",
434 vaddcuq_3 = "10000140VVV",
435 vmaxsh_3 = "10000142VVV",
436 vslh_3 = "10000144VVV",
437 vmulosh_3 = "10000148VVV",
438 vrsqrtefp_2 = "1000014aV-V",
439 vmrglh_3 = "1000014cVVV",
440 vpkswus_3 = "1000014eVVV",
441 vaddcuw_3 = "10000180VVV",
442 vmaxsw_3 = "10000182VVV",
443 vslw_3 = "10000184VVV",
444 vmulosw_3 = "10000188VVV",
445 vexptefp_2 = "1000018aV-V",
446 vmrglw_3 = "1000018cVVV",
447 vpkshss_3 = "1000018eVVV",
448 vmaxsd_3 = "100001c2VVV",
449 vsl_3 = "100001c4VVV",
450 vcmpgefp_3 = "100001c6VVV",
451 vlogefp_2 = "100001caV-V",
452 vpkswss_3 = "100001ceVVV",
453 vadduhs_3 = "10000240VVV",
454 vminuh_3 = "10000242VVV",
455 vsrh_3 = "10000244VVV",
456 vcmpgtuh_3 = "10000246VVV",
457 vmuleuh_3 = "10000248VVV",
458 vrfiz_2 = "1000024aV-V",
459 vsplth_3 = "1000024cVV3",
460 vupkhsh_2 = "1000024eV-V",
461 vminuw_3 = "10000282VVV",
462 vminud_3 = "100002c2VVV",
463 vcmpgtud_3 = "100002c7VVV",
464 vrfim_2 = "100002caV-V",
465 vcmpgtsb_3 = "10000306VVV",
466 vcfux_3 = "1000030aVVA~",
467 vaddshs_3 = "10000340VVV",
468 vminsh_3 = "10000342VVV",
469 vsrah_3 = "10000344VVV",
470 vcmpgtsh_3 = "10000346VVV",
471 vmulesh_3 = "10000348VVV",
472 vcfsx_3 = "1000034aVVA~",
473 vspltish_2 = "1000034cVS",
474 vupkhpx_2 = "1000034eV-V",
475 vaddsws_3 = "10000380VVV",
476 vminsw_3 = "10000382VVV",
477 vsraw_3 = "10000384VVV",
478 vcmpgtsw_3 = "10000386VVV",
479 vmulesw_3 = "10000388VVV",
480 vctuxs_3 = "1000038aVVA~",
481 vspltisw_2 = "1000038cVS",
482 vminsd_3 = "100003c2VVV",
483 vsrad_3 = "100003c4VVV",
484 vcmpbfp_3 = "100003c6VVV",
485 vcmpgtsd_3 = "100003c7VVV",
486 vctsxs_3 = "100003caVVA~",
487 vupklpx_2 = "100003ceV-V",
488 vsububm_3 = "10000400VVV",
489 ["bcdadd._4"] = "10000401VVVy.",
490 vavgub_3 = "10000402VVV",
491 vand_3 = "10000404VVV",
492 ["vcmpequb._3"] = "10000406VVV",
493 vmaxfp_3 = "1000040aVVV",
494 vsubuhm_3 = "10000440VVV",
495 ["bcdsub._4"] = "10000441VVVy.",
496 vavguh_3 = "10000442VVV",
497 vandc_3 = "10000444VVV",
498 ["vcmpequh._3"] = "10000446VVV",
499 vminfp_3 = "1000044aVVV",
500 vpkudum_3 = "1000044eVVV",
501 vsubuwm_3 = "10000480VVV",
502 vavguw_3 = "10000482VVV",
503 vor_3 = "10000484VVV",
504 ["vcmpequw._3"] = "10000486VVV",
505 vpmsumw_3 = "10000488VVV",
506 ["vcmpeqfp._3"] = "100004c6VVV",
507 ["vcmpequd._3"] = "100004c7VVV",
508 vpkudus_3 = "100004ceVVV",
509 vavgsb_3 = "10000502VVV",
510 vavgsh_3 = "10000542VVV",
511 vorc_3 = "10000544VVV",
512 vbpermq_3 = "1000054cVVV",
513 vpksdus_3 = "1000054eVVV",
514 vavgsw_3 = "10000582VVV",
515 vsld_3 = "100005c4VVV",
516 ["vcmpgefp._3"] = "100005c6VVV",
517 vpksdss_3 = "100005ceVVV",
518 vsububs_3 = "10000600VVV",
519 mfvscr_1 = "10000604V--",
520 vsum4ubs_3 = "10000608VVV",
521 vsubuhs_3 = "10000640VVV",
522 mtvscr_1 = "10000644--V",
523 ["vcmpgtuh._3"] = "10000646VVV",
524 vsum4shs_3 = "10000648VVV",
525 vupkhsw_2 = "1000064eV-V",
526 vsubuws_3 = "10000680VVV",
527 vshasigmaw_4 = "10000682VVYp",
528 veqv_3 = "10000684VVV",
529 vsum2sws_3 = "10000688VVV",
530 vmrgow_3 = "1000068cVVV",
531 vshasigmad_4 = "100006c2VVYp",
532 vsrd_3 = "100006c4VVV",
533 ["vcmpgtud._3"] = "100006c7VVV",
534 vupklsw_2 = "100006ceV-V",
535 vupkslw_2 = "100006ceV-V",
536 vsubsbs_3 = "10000700VVV",
537 vclzb_2 = "10000702V-V",
538 vpopcntb_2 = "10000703V-V",
539 ["vcmpgtsb._3"] = "10000706VVV",
540 vsum4sbs_3 = "10000708VVV",
541 vsubshs_3 = "10000740VVV",
542 vclzh_2 = "10000742V-V",
543 vpopcnth_2 = "10000743V-V",
544 ["vcmpgtsh._3"] = "10000746VVV",
545 vsubsws_3 = "10000780VVV",
546 vclzw_2 = "10000782V-V",
547 vpopcntw_2 = "10000783V-V",
548 ["vcmpgtsw._3"] = "10000786VVV",
549 vsumsws_3 = "10000788VVV",
550 vmrgew_3 = "1000078cVVV",
551 vclzd_2 = "100007c2V-V",
552 vpopcntd_2 = "100007c3V-V",
553 ["vcmpbfp._3"] = "100007c6VVV",
554 ["vcmpgtsd._3"] = "100007c7VVV",
555
300 -- Primary opcode 19: 556 -- Primary opcode 19:
301 mcrf_2 = "4c000000XX", 557 mcrf_2 = "4c000000XX",
302 isync_0 = "4c00012c", 558 isync_0 = "4c00012c",
@@ -316,6 +572,8 @@ local map_op = {
316 bclrl_2 = "4c000021AA", 572 bclrl_2 = "4c000021AA",
317 bcctr_2 = "4c000420AA", 573 bcctr_2 = "4c000420AA",
318 bcctrl_2 = "4c000421AA", 574 bcctrl_2 = "4c000421AA",
575 bctar_2 = "4c000460AA",
576 bctarl_2 = "4c000461AA",
319 blr_0 = "4e800020", 577 blr_0 = "4e800020",
320 blrl_0 = "4e800021", 578 blrl_0 = "4e800021",
321 bctr_0 = "4e800420", 579 bctr_0 = "4e800420",
@@ -327,6 +585,7 @@ local map_op = {
327 cmpd_3 = "7c200000XRR", 585 cmpd_3 = "7c200000XRR",
328 cmpd_2 = "7c200000-RR", 586 cmpd_2 = "7c200000-RR",
329 tw_3 = "7c000008ARR", 587 tw_3 = "7c000008ARR",
588 lvsl_3 = "7c00000cVRR",
330 subfc_3 = "7c000010RRR.", 589 subfc_3 = "7c000010RRR.",
331 subc_3 = "7c000010RRR~.", 590 subc_3 = "7c000010RRR~.",
332 mulhdu_3 = "7c000012RRR.", 591 mulhdu_3 = "7c000012RRR.",
@@ -351,50 +610,68 @@ local map_op = {
351 cmplw_2 = "7c000040-RR", 610 cmplw_2 = "7c000040-RR",
352 cmpld_3 = "7c200040XRR", 611 cmpld_3 = "7c200040XRR",
353 cmpld_2 = "7c200040-RR", 612 cmpld_2 = "7c200040-RR",
613 lvsr_3 = "7c00004cVRR",
354 subf_3 = "7c000050RRR.", 614 subf_3 = "7c000050RRR.",
355 sub_3 = "7c000050RRR~.", 615 sub_3 = "7c000050RRR~.",
616 lbarx_3 = "7c000068RR0R",
356 ldux_3 = "7c00006aRR0R", 617 ldux_3 = "7c00006aRR0R",
357 dcbst_2 = "7c00006c-RR", 618 dcbst_2 = "7c00006c-RR",
358 lwzux_3 = "7c00006eRR0R", 619 lwzux_3 = "7c00006eRR0R",
359 cntlzd_2 = "7c000074RR~", 620 cntlzd_2 = "7c000074RR~",
360 andc_3 = "7c000078RR~R.", 621 andc_3 = "7c000078RR~R.",
361 td_3 = "7c000088ARR", 622 td_3 = "7c000088ARR",
623 lvewx_3 = "7c00008eVRR",
362 mulhd_3 = "7c000092RRR.", 624 mulhd_3 = "7c000092RRR.",
625 addg6s_3 = "7c000094RRR",
363 mulhw_3 = "7c000096RRR.", 626 mulhw_3 = "7c000096RRR.",
627 dlmzb_3 = "7c00009cRR~R.",
364 ldarx_3 = "7c0000a8RR0R", 628 ldarx_3 = "7c0000a8RR0R",
365 dcbf_2 = "7c0000ac-RR", 629 dcbf_2 = "7c0000ac-RR",
366 lbzx_3 = "7c0000aeRR0R", 630 lbzx_3 = "7c0000aeRR0R",
631 lvx_3 = "7c0000ceVRR",
367 neg_2 = "7c0000d0RR.", 632 neg_2 = "7c0000d0RR.",
633 lharx_3 = "7c0000e8RR0R",
368 lbzux_3 = "7c0000eeRR0R", 634 lbzux_3 = "7c0000eeRR0R",
369 popcntb_2 = "7c0000f4RR~", 635 popcntb_2 = "7c0000f4RR~",
370 not_2 = "7c0000f8RR~%.", 636 not_2 = "7c0000f8RR~%.",
371 nor_3 = "7c0000f8RR~R.", 637 nor_3 = "7c0000f8RR~R.",
638 stvebx_3 = "7c00010eVRR",
372 subfe_3 = "7c000110RRR.", 639 subfe_3 = "7c000110RRR.",
373 sube_3 = "7c000110RRR~.", 640 sube_3 = "7c000110RRR~.",
374 adde_3 = "7c000114RRR.", 641 adde_3 = "7c000114RRR.",
375 stdx_3 = "7c00012aRR0R", 642 stdx_3 = "7c00012aRR0R",
376 stwcx_3 = "7c00012cRR0R.", 643 ["stwcx._3"] = "7c00012dRR0R.",
377 stwx_3 = "7c00012eRR0R", 644 stwx_3 = "7c00012eRR0R",
378 prtyw_2 = "7c000134RR~", 645 prtyw_2 = "7c000134RR~",
646 stvehx_3 = "7c00014eVRR",
379 stdux_3 = "7c00016aRR0R", 647 stdux_3 = "7c00016aRR0R",
648 ["stqcx._3"] = "7c00016dR:R0R.",
380 stwux_3 = "7c00016eRR0R", 649 stwux_3 = "7c00016eRR0R",
381 prtyd_2 = "7c000174RR~", 650 prtyd_2 = "7c000174RR~",
651 stvewx_3 = "7c00018eVRR",
382 subfze_2 = "7c000190RR.", 652 subfze_2 = "7c000190RR.",
383 addze_2 = "7c000194RR.", 653 addze_2 = "7c000194RR.",
384 stdcx_3 = "7c0001acRR0R.", 654 ["stdcx._3"] = "7c0001adRR0R.",
385 stbx_3 = "7c0001aeRR0R", 655 stbx_3 = "7c0001aeRR0R",
656 stvx_3 = "7c0001ceVRR",
386 subfme_2 = "7c0001d0RR.", 657 subfme_2 = "7c0001d0RR.",
387 mulld_3 = "7c0001d2RRR.", 658 mulld_3 = "7c0001d2RRR.",
388 addme_2 = "7c0001d4RR.", 659 addme_2 = "7c0001d4RR.",
389 mullw_3 = "7c0001d6RRR.", 660 mullw_3 = "7c0001d6RRR.",
390 dcbtst_2 = "7c0001ec-RR", 661 dcbtst_2 = "7c0001ec-RR",
391 stbux_3 = "7c0001eeRR0R", 662 stbux_3 = "7c0001eeRR0R",
663 bpermd_3 = "7c0001f8RR~R",
664 lvepxl_3 = "7c00020eVRR",
392 add_3 = "7c000214RRR.", 665 add_3 = "7c000214RRR.",
666 lqarx_3 = "7c000228R:R0R",
393 dcbt_2 = "7c00022c-RR", 667 dcbt_2 = "7c00022c-RR",
394 lhzx_3 = "7c00022eRR0R", 668 lhzx_3 = "7c00022eRR0R",
669 cdtbcd_2 = "7c000234RR~",
395 eqv_3 = "7c000238RR~R.", 670 eqv_3 = "7c000238RR~R.",
671 lvepx_3 = "7c00024eVRR",
396 eciwx_3 = "7c00026cRR0R", 672 eciwx_3 = "7c00026cRR0R",
397 lhzux_3 = "7c00026eRR0R", 673 lhzux_3 = "7c00026eRR0R",
674 cbcdtd_2 = "7c000274RR~",
398 xor_3 = "7c000278RR~R.", 675 xor_3 = "7c000278RR~R.",
399 mfspefscr_1 = "7c0082a6R", 676 mfspefscr_1 = "7c0082a6R",
400 mfxer_1 = "7c0102a6R", 677 mfxer_1 = "7c0102a6R",
@@ -404,8 +681,12 @@ local map_op = {
404 lhax_3 = "7c0002aeRR0R", 681 lhax_3 = "7c0002aeRR0R",
405 mftb_1 = "7c0c42e6R", 682 mftb_1 = "7c0c42e6R",
406 mftbu_1 = "7c0d42e6R", 683 mftbu_1 = "7c0d42e6R",
684 lvxl_3 = "7c0002ceVRR",
407 lwaux_3 = "7c0002eaRR0R", 685 lwaux_3 = "7c0002eaRR0R",
408 lhaux_3 = "7c0002eeRR0R", 686 lhaux_3 = "7c0002eeRR0R",
687 popcntw_2 = "7c0002f4RR~",
688 divdeu_3 = "7c000312RRR.",
689 divweu_3 = "7c000316RRR.",
409 sthx_3 = "7c00032eRR0R", 690 sthx_3 = "7c00032eRR0R",
410 orc_3 = "7c000338RR~R.", 691 orc_3 = "7c000338RR~R.",
411 ecowx_3 = "7c00036cRR0R", 692 ecowx_3 = "7c00036cRR0R",
@@ -420,10 +701,14 @@ local map_op = {
420 mtctr_1 = "7c0903a6R", 701 mtctr_1 = "7c0903a6R",
421 dcbi_2 = "7c0003ac-RR", 702 dcbi_2 = "7c0003ac-RR",
422 nand_3 = "7c0003b8RR~R.", 703 nand_3 = "7c0003b8RR~R.",
704 dsn_2 = "7c0003c6-RR",
705 stvxl_3 = "7c0003ceVRR",
423 divd_3 = "7c0003d2RRR.", 706 divd_3 = "7c0003d2RRR.",
424 divw_3 = "7c0003d6RRR.", 707 divw_3 = "7c0003d6RRR.",
708 popcntd_2 = "7c0003f4RR~",
425 cmpb_3 = "7c0003f8RR~R.", 709 cmpb_3 = "7c0003f8RR~R.",
426 mcrxr_1 = "7c000400X", 710 mcrxr_1 = "7c000400X",
711 lbdx_3 = "7c000406RRR",
427 subfco_3 = "7c000410RRR.", 712 subfco_3 = "7c000410RRR.",
428 subco_3 = "7c000410RRR~.", 713 subco_3 = "7c000410RRR~.",
429 addco_3 = "7c000414RRR.", 714 addco_3 = "7c000414RRR.",
@@ -433,16 +718,20 @@ local map_op = {
433 lfsx_3 = "7c00042eFR0R", 718 lfsx_3 = "7c00042eFR0R",
434 srw_3 = "7c000430RR~R.", 719 srw_3 = "7c000430RR~R.",
435 srd_3 = "7c000436RR~R.", 720 srd_3 = "7c000436RR~R.",
721 lhdx_3 = "7c000446RRR",
436 subfo_3 = "7c000450RRR.", 722 subfo_3 = "7c000450RRR.",
437 subo_3 = "7c000450RRR~.", 723 subo_3 = "7c000450RRR~.",
438 lfsux_3 = "7c00046eFR0R", 724 lfsux_3 = "7c00046eFR0R",
725 lwdx_3 = "7c000486RRR",
439 lswi_3 = "7c0004aaRR0A", 726 lswi_3 = "7c0004aaRR0A",
440 sync_0 = "7c0004ac", 727 sync_0 = "7c0004ac",
441 lwsync_0 = "7c2004ac", 728 lwsync_0 = "7c2004ac",
442 ptesync_0 = "7c4004ac", 729 ptesync_0 = "7c4004ac",
443 lfdx_3 = "7c0004aeFR0R", 730 lfdx_3 = "7c0004aeFR0R",
731 lddx_3 = "7c0004c6RRR",
444 nego_2 = "7c0004d0RR.", 732 nego_2 = "7c0004d0RR.",
445 lfdux_3 = "7c0004eeFR0R", 733 lfdux_3 = "7c0004eeFR0R",
734 stbdx_3 = "7c000506RRR",
446 subfeo_3 = "7c000510RRR.", 735 subfeo_3 = "7c000510RRR.",
447 subeo_3 = "7c000510RRR~.", 736 subeo_3 = "7c000510RRR~.",
448 addeo_3 = "7c000514RRR.", 737 addeo_3 = "7c000514RRR.",
@@ -450,27 +739,42 @@ local map_op = {
450 stswx_3 = "7c00052aRR0R", 739 stswx_3 = "7c00052aRR0R",
451 stwbrx_3 = "7c00052cRR0R", 740 stwbrx_3 = "7c00052cRR0R",
452 stfsx_3 = "7c00052eFR0R", 741 stfsx_3 = "7c00052eFR0R",
742 sthdx_3 = "7c000546RRR",
743 ["stbcx._3"] = "7c00056dRRR",
453 stfsux_3 = "7c00056eFR0R", 744 stfsux_3 = "7c00056eFR0R",
745 stwdx_3 = "7c000586RRR",
454 subfzeo_2 = "7c000590RR.", 746 subfzeo_2 = "7c000590RR.",
455 addzeo_2 = "7c000594RR.", 747 addzeo_2 = "7c000594RR.",
456 stswi_3 = "7c0005aaRR0A", 748 stswi_3 = "7c0005aaRR0A",
749 ["sthcx._3"] = "7c0005adRRR",
457 stfdx_3 = "7c0005aeFR0R", 750 stfdx_3 = "7c0005aeFR0R",
751 stddx_3 = "7c0005c6RRR",
458 subfmeo_2 = "7c0005d0RR.", 752 subfmeo_2 = "7c0005d0RR.",
459 mulldo_3 = "7c0005d2RRR.", 753 mulldo_3 = "7c0005d2RRR.",
460 addmeo_2 = "7c0005d4RR.", 754 addmeo_2 = "7c0005d4RR.",
461 mullwo_3 = "7c0005d6RRR.", 755 mullwo_3 = "7c0005d6RRR.",
462 dcba_2 = "7c0005ec-RR", 756 dcba_2 = "7c0005ec-RR",
463 stfdux_3 = "7c0005eeFR0R", 757 stfdux_3 = "7c0005eeFR0R",
758 stvepxl_3 = "7c00060eVRR",
464 addo_3 = "7c000614RRR.", 759 addo_3 = "7c000614RRR.",
465 lhbrx_3 = "7c00062cRR0R", 760 lhbrx_3 = "7c00062cRR0R",
761 lfdpx_3 = "7c00062eF:RR",
466 sraw_3 = "7c000630RR~R.", 762 sraw_3 = "7c000630RR~R.",
467 srad_3 = "7c000634RR~R.", 763 srad_3 = "7c000634RR~R.",
764 lfddx_3 = "7c000646FRR",
765 stvepx_3 = "7c00064eVRR",
468 srawi_3 = "7c000670RR~A.", 766 srawi_3 = "7c000670RR~A.",
469 sradi_3 = "7c000674RR~H.", 767 sradi_3 = "7c000674RR~H.",
470 eieio_0 = "7c0006ac", 768 eieio_0 = "7c0006ac",
471 lfiwax_3 = "7c0006aeFR0R", 769 lfiwax_3 = "7c0006aeFR0R",
770 divdeuo_3 = "7c000712RRR.",
771 divweuo_3 = "7c000716RRR.",
472 sthbrx_3 = "7c00072cRR0R", 772 sthbrx_3 = "7c00072cRR0R",
773 stfdpx_3 = "7c00072eF:RR",
473 extsh_2 = "7c000734RR~.", 774 extsh_2 = "7c000734RR~.",
775 stfddx_3 = "7c000746FRR",
776 divdeo_3 = "7c000752RRR.",
777 divweo_3 = "7c000756RRR.",
474 extsb_2 = "7c000774RR~.", 778 extsb_2 = "7c000774RR~.",
475 divduo_3 = "7c000792RRR.", 779 divduo_3 = "7c000792RRR.",
476 divwou_3 = "7c000796RRR.", 780 divwou_3 = "7c000796RRR.",
@@ -481,6 +785,40 @@ local map_op = {
481 divwo_3 = "7c0007d6RRR.", 785 divwo_3 = "7c0007d6RRR.",
482 dcbz_2 = "7c0007ec-RR", 786 dcbz_2 = "7c0007ec-RR",
483 787
788 ["tbegin._1"] = "7c00051d1",
789 ["tbegin._0"] = "7c00051d",
790 ["tend._1"] = "7c00055dY",
791 ["tend._0"] = "7c00055d",
792 ["tendall._0"] = "7e00055d",
793 tcheck_1 = "7c00059cX",
794 ["tsr._1"] = "7c0005dd1",
795 ["tsuspend._0"] = "7c0005dd",
796 ["tresume._0"] = "7c2005dd",
797 ["tabortwc._3"] = "7c00061dARR",
798 ["tabortdc._3"] = "7c00065dARR",
799 ["tabortwci._3"] = "7c00069dARS",
800 ["tabortdci._3"] = "7c0006ddARS",
801 ["tabort._1"] = "7c00071d-R-",
802 ["treclaim._1"] = "7c00075d-R",
803 ["trechkpt._0"] = "7c0007dd",
804
805 lxsiwzx_3 = "7c000018QRR",
806 lxsiwax_3 = "7c000098QRR",
807 mfvsrd_2 = "7c000066-Rq",
808 mfvsrwz_2 = "7c0000e6-Rq",
809 stxsiwx_3 = "7c000118QRR",
810 mtvsrd_2 = "7c000166QR",
811 mtvsrwa_2 = "7c0001a6QR",
812 lxvdsx_3 = "7c000298QRR",
813 lxsspx_3 = "7c000418QRR",
814 lxsdx_3 = "7c000498QRR",
815 stxsspx_3 = "7c000518QRR",
816 stxsdx_3 = "7c000598QRR",
817 lxvw4x_3 = "7c000618QRR",
818 lxvd2x_3 = "7c000698QRR",
819 stxvw4x_3 = "7c000718QRR",
820 stxvd2x_3 = "7c000798QRR",
821
484 -- Primary opcode 30: 822 -- Primary opcode 30:
485 rldicl_4 = "78000000RR~HM.", 823 rldicl_4 = "78000000RR~HM.",
486 rldicr_4 = "78000004RR~HM.", 824 rldicr_4 = "78000004RR~HM.",
@@ -489,6 +827,34 @@ local map_op = {
489 rldcl_4 = "78000010RR~RM.", 827 rldcl_4 = "78000010RR~RM.",
490 rldcr_4 = "78000012RR~RM.", 828 rldcr_4 = "78000012RR~RM.",
491 829
830 rotldi_3 = op_alias("rldicl_4", function(p)
831 p[4] = "0"
832 end),
833 rotrdi_3 = op_alias("rldicl_4", function(p)
834 p[3] = "64-("..p[3]..")"; p[4] = "0"
835 end),
836 rotld_3 = op_alias("rldcl_4", function(p)
837 p[4] = "0"
838 end),
839 sldi_3 = op_alias("rldicr_4", function(p)
840 p[4] = "63-("..p[3]..")"
841 end),
842 srdi_3 = op_alias("rldicl_4", function(p)
843 p[4] = p[3]; p[3] = "64-("..p[3]..")"
844 end),
845 clrldi_3 = op_alias("rldicl_4", function(p)
846 p[4] = p[3]; p[3] = "0"
847 end),
848 clrrdi_3 = op_alias("rldicr_4", function(p)
849 p[4] = "63-("..p[3]..")"; p[3] = "0"
850 end),
851
852 -- Primary opcode 56:
853 lq_2 = "e0000000R:D", -- NYI: displacement must be divisible by 8.
854
855 -- Primary opcode 57:
856 lfdp_2 = "e4000000F:D", -- NYI: displacement must be divisible by 4.
857
492 -- Primary opcode 59: 858 -- Primary opcode 59:
493 fdivs_3 = "ec000024FFF.", 859 fdivs_3 = "ec000024FFF.",
494 fsubs_3 = "ec000028FFF.", 860 fsubs_3 = "ec000028FFF.",
@@ -501,6 +867,200 @@ local map_op = {
501 fmadds_4 = "ec00003aFFFF~.", 867 fmadds_4 = "ec00003aFFFF~.",
502 fnmsubs_4 = "ec00003cFFFF~.", 868 fnmsubs_4 = "ec00003cFFFF~.",
503 fnmadds_4 = "ec00003eFFFF~.", 869 fnmadds_4 = "ec00003eFFFF~.",
870 fcfids_2 = "ec00069cF-F.",
871 fcfidus_2 = "ec00079cF-F.",
872
873 dadd_3 = "ec000004FFF.",
874 dqua_4 = "ec000006FFFZ.",
875 dmul_3 = "ec000044FFF.",
876 drrnd_4 = "ec000046FFFZ.",
877 dscli_3 = "ec000084FF6.",
878 dquai_4 = "ec000086SF~FZ.",
879 dscri_3 = "ec0000c4FF6.",
880 drintx_4 = "ec0000c61F~FZ.",
881 dcmpo_3 = "ec000104XFF",
882 dtstex_3 = "ec000144XFF",
883 dtstdc_3 = "ec000184XF6",
884 dtstdg_3 = "ec0001c4XF6",
885 drintn_4 = "ec0001c61F~FZ.",
886 dctdp_2 = "ec000204F-F.",
887 dctfix_2 = "ec000244F-F.",
888 ddedpd_3 = "ec000284ZF~F.",
889 dxex_2 = "ec0002c4F-F.",
890 dsub_3 = "ec000404FFF.",
891 ddiv_3 = "ec000444FFF.",
892 dcmpu_3 = "ec000504XFF",
893 dtstsf_3 = "ec000544XFF",
894 drsp_2 = "ec000604F-F.",
895 dcffix_2 = "ec000644F-F.",
896 denbcd_3 = "ec000684YF~F.",
897 diex_3 = "ec0006c4FFF.",
898
899 -- Primary opcode 60:
900 xsaddsp_3 = "f0000000QQQ",
901 xsmaddasp_3 = "f0000008QQQ",
902 xxsldwi_4 = "f0000010QQQz",
903 xsrsqrtesp_2 = "f0000028Q-Q",
904 xssqrtsp_2 = "f000002cQ-Q",
905 xxsel_4 = "f0000030QQQQ",
906 xssubsp_3 = "f0000040QQQ",
907 xsmaddmsp_3 = "f0000048QQQ",
908 xxpermdi_4 = "f0000050QQQz",
909 xsresp_2 = "f0000068Q-Q",
910 xsmulsp_3 = "f0000080QQQ",
911 xsmsubasp_3 = "f0000088QQQ",
912 xxmrghw_3 = "f0000090QQQ",
913 xsdivsp_3 = "f00000c0QQQ",
914 xsmsubmsp_3 = "f00000c8QQQ",
915 xsadddp_3 = "f0000100QQQ",
916 xsmaddadp_3 = "f0000108QQQ",
917 xscmpudp_3 = "f0000118XQQ",
918 xscvdpuxws_2 = "f0000120Q-Q",
919 xsrdpi_2 = "f0000124Q-Q",
920 xsrsqrtedp_2 = "f0000128Q-Q",
921 xssqrtdp_2 = "f000012cQ-Q",
922 xssubdp_3 = "f0000140QQQ",
923 xsmaddmdp_3 = "f0000148QQQ",
924 xscmpodp_3 = "f0000158XQQ",
925 xscvdpsxws_2 = "f0000160Q-Q",
926 xsrdpiz_2 = "f0000164Q-Q",
927 xsredp_2 = "f0000168Q-Q",
928 xsmuldp_3 = "f0000180QQQ",
929 xsmsubadp_3 = "f0000188QQQ",
930 xxmrglw_3 = "f0000190QQQ",
931 xsrdpip_2 = "f00001a4Q-Q",
932 xstsqrtdp_2 = "f00001a8X-Q",
933 xsrdpic_2 = "f00001acQ-Q",
934 xsdivdp_3 = "f00001c0QQQ",
935 xsmsubmdp_3 = "f00001c8QQQ",
936 xsrdpim_2 = "f00001e4Q-Q",
937 xstdivdp_3 = "f00001e8XQQ",
938 xvaddsp_3 = "f0000200QQQ",
939 xvmaddasp_3 = "f0000208QQQ",
940 xvcmpeqsp_3 = "f0000218QQQ",
941 xvcvspuxws_2 = "f0000220Q-Q",
942 xvrspi_2 = "f0000224Q-Q",
943 xvrsqrtesp_2 = "f0000228Q-Q",
944 xvsqrtsp_2 = "f000022cQ-Q",
945 xvsubsp_3 = "f0000240QQQ",
946 xvmaddmsp_3 = "f0000248QQQ",
947 xvcmpgtsp_3 = "f0000258QQQ",
948 xvcvspsxws_2 = "f0000260Q-Q",
949 xvrspiz_2 = "f0000264Q-Q",
950 xvresp_2 = "f0000268Q-Q",
951 xvmulsp_3 = "f0000280QQQ",
952 xvmsubasp_3 = "f0000288QQQ",
953 xxspltw_3 = "f0000290QQg~",
954 xvcmpgesp_3 = "f0000298QQQ",
955 xvcvuxwsp_2 = "f00002a0Q-Q",
956 xvrspip_2 = "f00002a4Q-Q",
957 xvtsqrtsp_2 = "f00002a8X-Q",
958 xvrspic_2 = "f00002acQ-Q",
959 xvdivsp_3 = "f00002c0QQQ",
960 xvmsubmsp_3 = "f00002c8QQQ",
961 xvcvsxwsp_2 = "f00002e0Q-Q",
962 xvrspim_2 = "f00002e4Q-Q",
963 xvtdivsp_3 = "f00002e8XQQ",
964 xvadddp_3 = "f0000300QQQ",
965 xvmaddadp_3 = "f0000308QQQ",
966 xvcmpeqdp_3 = "f0000318QQQ",
967 xvcvdpuxws_2 = "f0000320Q-Q",
968 xvrdpi_2 = "f0000324Q-Q",
969 xvrsqrtedp_2 = "f0000328Q-Q",
970 xvsqrtdp_2 = "f000032cQ-Q",
971 xvsubdp_3 = "f0000340QQQ",
972 xvmaddmdp_3 = "f0000348QQQ",
973 xvcmpgtdp_3 = "f0000358QQQ",
974 xvcvdpsxws_2 = "f0000360Q-Q",
975 xvrdpiz_2 = "f0000364Q-Q",
976 xvredp_2 = "f0000368Q-Q",
977 xvmuldp_3 = "f0000380QQQ",
978 xvmsubadp_3 = "f0000388QQQ",
979 xvcmpgedp_3 = "f0000398QQQ",
980 xvcvuxwdp_2 = "f00003a0Q-Q",
981 xvrdpip_2 = "f00003a4Q-Q",
982 xvtsqrtdp_2 = "f00003a8X-Q",
983 xvrdpic_2 = "f00003acQ-Q",
984 xvdivdp_3 = "f00003c0QQQ",
985 xvmsubmdp_3 = "f00003c8QQQ",
986 xvcvsxwdp_2 = "f00003e0Q-Q",
987 xvrdpim_2 = "f00003e4Q-Q",
988 xvtdivdp_3 = "f00003e8XQQ",
989 xsnmaddasp_3 = "f0000408QQQ",
990 xxland_3 = "f0000410QQQ",
991 xscvdpsp_2 = "f0000424Q-Q",
992 xscvdpspn_2 = "f000042cQ-Q",
993 xsnmaddmsp_3 = "f0000448QQQ",
994 xxlandc_3 = "f0000450QQQ",
995 xsrsp_2 = "f0000464Q-Q",
996 xsnmsubasp_3 = "f0000488QQQ",
997 xxlor_3 = "f0000490QQQ",
998 xscvuxdsp_2 = "f00004a0Q-Q",
999 xsnmsubmsp_3 = "f00004c8QQQ",
1000 xxlxor_3 = "f00004d0QQQ",
1001 xscvsxdsp_2 = "f00004e0Q-Q",
1002 xsmaxdp_3 = "f0000500QQQ",
1003 xsnmaddadp_3 = "f0000508QQQ",
1004 xxlnor_3 = "f0000510QQQ",
1005 xscvdpuxds_2 = "f0000520Q-Q",
1006 xscvspdp_2 = "f0000524Q-Q",
1007 xscvspdpn_2 = "f000052cQ-Q",
1008 xsmindp_3 = "f0000540QQQ",
1009 xsnmaddmdp_3 = "f0000548QQQ",
1010 xxlorc_3 = "f0000550QQQ",
1011 xscvdpsxds_2 = "f0000560Q-Q",
1012 xsabsdp_2 = "f0000564Q-Q",
1013 xscpsgndp_3 = "f0000580QQQ",
1014 xsnmsubadp_3 = "f0000588QQQ",
1015 xxlnand_3 = "f0000590QQQ",
1016 xscvuxddp_2 = "f00005a0Q-Q",
1017 xsnabsdp_2 = "f00005a4Q-Q",
1018 xsnmsubmdp_3 = "f00005c8QQQ",
1019 xxleqv_3 = "f00005d0QQQ",
1020 xscvsxddp_2 = "f00005e0Q-Q",
1021 xsnegdp_2 = "f00005e4Q-Q",
1022 xvmaxsp_3 = "f0000600QQQ",
1023 xvnmaddasp_3 = "f0000608QQQ",
1024 ["xvcmpeqsp._3"] = "f0000618QQQ",
1025 xvcvspuxds_2 = "f0000620Q-Q",
1026 xvcvdpsp_2 = "f0000624Q-Q",
1027 xvminsp_3 = "f0000640QQQ",
1028 xvnmaddmsp_3 = "f0000648QQQ",
1029 ["xvcmpgtsp._3"] = "f0000658QQQ",
1030 xvcvspsxds_2 = "f0000660Q-Q",
1031 xvabssp_2 = "f0000664Q-Q",
1032 xvcpsgnsp_3 = "f0000680QQQ",
1033 xvnmsubasp_3 = "f0000688QQQ",
1034 ["xvcmpgesp._3"] = "f0000698QQQ",
1035 xvcvuxdsp_2 = "f00006a0Q-Q",
1036 xvnabssp_2 = "f00006a4Q-Q",
1037 xvnmsubmsp_3 = "f00006c8QQQ",
1038 xvcvsxdsp_2 = "f00006e0Q-Q",
1039 xvnegsp_2 = "f00006e4Q-Q",
1040 xvmaxdp_3 = "f0000700QQQ",
1041 xvnmaddadp_3 = "f0000708QQQ",
1042 ["xvcmpeqdp._3"] = "f0000718QQQ",
1043 xvcvdpuxds_2 = "f0000720Q-Q",
1044 xvcvspdp_2 = "f0000724Q-Q",
1045 xvmindp_3 = "f0000740QQQ",
1046 xvnmaddmdp_3 = "f0000748QQQ",
1047 ["xvcmpgtdp._3"] = "f0000758QQQ",
1048 xvcvdpsxds_2 = "f0000760Q-Q",
1049 xvabsdp_2 = "f0000764Q-Q",
1050 xvcpsgndp_3 = "f0000780QQQ",
1051 xvnmsubadp_3 = "f0000788QQQ",
1052 ["xvcmpgedp._3"] = "f0000798QQQ",
1053 xvcvuxddp_2 = "f00007a0Q-Q",
1054 xvnabsdp_2 = "f00007a4Q-Q",
1055 xvnmsubmdp_3 = "f00007c8QQQ",
1056 xvcvsxddp_2 = "f00007e0Q-Q",
1057 xvnegdp_2 = "f00007e4Q-Q",
1058
1059 -- Primary opcode 61:
1060 stfdp_2 = "f4000000F:D", -- NYI: displacement must be divisible by 4.
1061
1062 -- Primary opcode 62:
1063 stq_2 = "f8000002R:D", -- NYI: displacement must be divisible by 8.
504 1064
505 -- Primary opcode 63: 1065 -- Primary opcode 63:
506 fdiv_3 = "fc000024FFF.", 1066 fdiv_3 = "fc000024FFF.",
@@ -526,8 +1086,12 @@ local map_op = {
526 frsp_2 = "fc000018F-F.", 1086 frsp_2 = "fc000018F-F.",
527 fctiw_2 = "fc00001cF-F.", 1087 fctiw_2 = "fc00001cF-F.",
528 fctiwz_2 = "fc00001eF-F.", 1088 fctiwz_2 = "fc00001eF-F.",
1089 ftdiv_2 = "fc000100X-F.",
1090 fctiwu_2 = "fc00011cF-F.",
1091 fctiwuz_2 = "fc00011eF-F.",
529 mtfsfi_2 = "fc00010cAA", -- NYI: upshift. 1092 mtfsfi_2 = "fc00010cAA", -- NYI: upshift.
530 fnabs_2 = "fc000110F-F.", 1093 fnabs_2 = "fc000110F-F.",
1094 ftsqrt_2 = "fc000140X-F.",
531 fabs_2 = "fc000210F-F.", 1095 fabs_2 = "fc000210F-F.",
532 frin_2 = "fc000310F-F.", 1096 frin_2 = "fc000310F-F.",
533 friz_2 = "fc000350F-F.", 1097 friz_2 = "fc000350F-F.",
@@ -537,7 +1101,38 @@ local map_op = {
537 -- NYI: mtfsf, mtfsb0, mtfsb1. 1101 -- NYI: mtfsf, mtfsb0, mtfsb1.
538 fctid_2 = "fc00065cF-F.", 1102 fctid_2 = "fc00065cF-F.",
539 fctidz_2 = "fc00065eF-F.", 1103 fctidz_2 = "fc00065eF-F.",
1104 fmrgow_3 = "fc00068cFFF",
540 fcfid_2 = "fc00069cF-F.", 1105 fcfid_2 = "fc00069cF-F.",
1106 fctidu_2 = "fc00075cF-F.",
1107 fctiduz_2 = "fc00075eF-F.",
1108 fmrgew_3 = "fc00078cFFF",
1109 fcfidu_2 = "fc00079cF-F.",
1110
1111 daddq_3 = "fc000004F:F:F:.",
1112 dquaq_4 = "fc000006F:F:F:Z.",
1113 dmulq_3 = "fc000044F:F:F:.",
1114 drrndq_4 = "fc000046F:F:F:Z.",
1115 dscliq_3 = "fc000084F:F:6.",
1116 dquaiq_4 = "fc000086SF:~F:Z.",
1117 dscriq_3 = "fc0000c4F:F:6.",
1118 drintxq_4 = "fc0000c61F:~F:Z.",
1119 dcmpoq_3 = "fc000104XF:F:",
1120 dtstexq_3 = "fc000144XF:F:",
1121 dtstdcq_3 = "fc000184XF:6",
1122 dtstdgq_3 = "fc0001c4XF:6",
1123 drintnq_4 = "fc0001c61F:~F:Z.",
1124 dctqpq_2 = "fc000204F:-F:.",
1125 dctfixq_2 = "fc000244F:-F:.",
1126 ddedpdq_3 = "fc000284ZF:~F:.",
1127 dxexq_2 = "fc0002c4F:-F:.",
1128 dsubq_3 = "fc000404F:F:F:.",
1129 ddivq_3 = "fc000444F:F:F:.",
1130 dcmpuq_3 = "fc000504XF:F:",
1131 dtstsfq_3 = "fc000544XF:F:",
1132 drdpq_2 = "fc000604F:-F:.",
1133 dcffixq_2 = "fc000644F:-F:.",
1134 denbcdq_3 = "fc000684YF:~F:.",
1135 diexq_3 = "fc0006c4F:FF:.",
541 1136
542 -- Primary opcode 4, SPE APU extension: 1137 -- Primary opcode 4, SPE APU extension:
543 evaddw_3 = "10000200RRR", 1138 evaddw_3 = "10000200RRR",
@@ -822,7 +1417,7 @@ local map_op = {
822do 1417do
823 local t = {} 1418 local t = {}
824 for k,v in pairs(map_op) do 1419 for k,v in pairs(map_op) do
825 if sub(v, -1) == "." then 1420 if type(v) == "string" and sub(v, -1) == "." then
826 local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2) 1421 local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2)
827 t[sub(k, 1, -3).."."..sub(k, -2)] = v2 1422 t[sub(k, 1, -3).."."..sub(k, -2)] = v2
828 end 1423 end
@@ -884,6 +1479,24 @@ local function parse_fpr(expr)
884 werror("bad register name `"..expr.."'") 1479 werror("bad register name `"..expr.."'")
885end 1480end
886 1481
1482local function parse_vr(expr)
1483 local r = match(expr, "^v([1-3]?[0-9])$")
1484 if r then
1485 r = tonumber(r)
1486 if r <= 31 then return r end
1487 end
1488 werror("bad register name `"..expr.."'")
1489end
1490
1491local function parse_vs(expr)
1492 local r = match(expr, "^vs([1-6]?[0-9])$")
1493 if r then
1494 r = tonumber(r)
1495 if r <= 63 then return r end
1496 end
1497 werror("bad register name `"..expr.."'")
1498end
1499
887local function parse_cr(expr) 1500local function parse_cr(expr)
888 local r = match(expr, "^cr([0-7])$") 1501 local r = match(expr, "^cr([0-7])$")
889 if r then return tonumber(r) end 1502 if r then return tonumber(r) end
@@ -900,8 +1513,30 @@ local function parse_cond(expr)
900 werror("bad condition bit name `"..expr.."'") 1513 werror("bad condition bit name `"..expr.."'")
901end 1514end
902 1515
1516local parse_ctx = {}
1517
1518local loadenv = setfenv and function(s)
1519 local code = loadstring(s, "")
1520 if code then setfenv(code, parse_ctx) end
1521 return code
1522end or function(s)
1523 return load(s, "", nil, parse_ctx)
1524end
1525
1526-- Try to parse simple arithmetic, too, since some basic ops are aliases.
1527local function parse_number(n)
1528 local x = tonumber(n)
1529 if x then return x end
1530 local code = loadenv("return "..n)
1531 if code then
1532 local ok, y = pcall(code)
1533 if ok then return y end
1534 end
1535 return nil
1536end
1537
903local function parse_imm(imm, bits, shift, scale, signed) 1538local function parse_imm(imm, bits, shift, scale, signed)
904 local n = tonumber(imm) 1539 local n = parse_number(imm)
905 if n then 1540 if n then
906 local m = sar(n, scale) 1541 local m = sar(n, scale)
907 if shl(m, scale) == n then 1542 if shl(m, scale) == n then
@@ -914,7 +1549,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
914 end 1549 end
915 end 1550 end
916 werror("out of range immediate `"..imm.."'") 1551 werror("out of range immediate `"..imm.."'")
917 elseif match(imm, "^r([1-3]?[0-9])$") or 1552 elseif match(imm, "^[rfv]([1-3]?[0-9])$") or
1553 match(imm, "^vs([1-6]?[0-9])$") or
918 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then 1554 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
919 werror("expected immediate operand, got register") 1555 werror("expected immediate operand, got register")
920 else 1556 else
@@ -924,11 +1560,11 @@ local function parse_imm(imm, bits, shift, scale, signed)
924end 1560end
925 1561
926local function parse_shiftmask(imm, isshift) 1562local function parse_shiftmask(imm, isshift)
927 local n = tonumber(imm) 1563 local n = parse_number(imm)
928 if n then 1564 if n then
929 if shr(n, 6) == 0 then 1565 if shr(n, 6) == 0 then
930 local lsb = band(imm, 31) 1566 local lsb = band(n, 31)
931 local msb = imm - lsb 1567 local msb = n - lsb
932 return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb) 1568 return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb)
933 end 1569 end
934 werror("out of range immediate `"..imm.."'") 1570 werror("out of range immediate `"..imm.."'")
@@ -936,7 +1572,8 @@ local function parse_shiftmask(imm, isshift)
936 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then 1572 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
937 werror("expected immediate operand, got register") 1573 werror("expected immediate operand, got register")
938 else 1574 else
939 werror("NYI: parameterized 64 bit shift/mask") 1575 waction("IMMSH", isshift and 1 or 0, imm)
1576 return 0;
940 end 1577 end
941end 1578end
942 1579
@@ -1011,7 +1648,7 @@ end
1011------------------------------------------------------------------------------ 1648------------------------------------------------------------------------------
1012 1649
1013-- Handle opcodes defined with template strings. 1650-- Handle opcodes defined with template strings.
1014map_op[".template__"] = function(params, template, nparams) 1651op_template = function(params, template, nparams)
1015 if not params then return sub(template, 9) end 1652 if not params then return sub(template, 9) end
1016 local op = tonumber(sub(template, 1, 8), 16) 1653 local op = tonumber(sub(template, 1, 8), 16)
1017 local n, rs = 1, 26 1654 local n, rs = 1, 26
@@ -1027,6 +1664,15 @@ map_op[".template__"] = function(params, template, nparams)
1027 rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1 1664 rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1
1028 elseif p == "F" then 1665 elseif p == "F" then
1029 rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1 1666 rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1
1667 elseif p == "V" then
1668 rs = rs - 5; op = op + shl(parse_vr(params[n]), rs); n = n + 1
1669 elseif p == "Q" then
1670 local vs = parse_vs(params[n]); n = n + 1; rs = rs - 5
1671 local sh = rs == 6 and 2 or 3 + band(shr(rs, 1), 3)
1672 op = op + shl(band(vs, 31), rs) + shr(band(vs, 32), sh)
1673 elseif p == "q" then
1674 local vs = parse_vs(params[n]); n = n + 1
1675 op = op + shl(band(vs, 31), 21) + shr(band(vs, 32), 5)
1030 elseif p == "A" then 1676 elseif p == "A" then
1031 rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1 1677 rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1
1032 elseif p == "S" then 1678 elseif p == "S" then
@@ -1047,6 +1693,26 @@ map_op[".template__"] = function(params, template, nparams)
1047 rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1 1693 rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1
1048 elseif p == "X" then 1694 elseif p == "X" then
1049 rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1 1695 rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1
1696 elseif p == "1" then
1697 rs = rs - 5; op = op + parse_imm(params[n], 1, rs, 0, false); n = n + 1
1698 elseif p == "g" then
1699 rs = rs - 5; op = op + parse_imm(params[n], 2, rs, 0, false); n = n + 1
1700 elseif p == "3" then
1701 rs = rs - 5; op = op + parse_imm(params[n], 3, rs, 0, false); n = n + 1
1702 elseif p == "P" then
1703 rs = rs - 5; op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
1704 elseif p == "p" then
1705 op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
1706 elseif p == "6" then
1707 rs = rs - 6; op = op + parse_imm(params[n], 6, rs, 0, false); n = n + 1
1708 elseif p == "Y" then
1709 rs = rs - 5; op = op + parse_imm(params[n], 1, rs+4, 0, false); n = n + 1
1710 elseif p == "y" then
1711 rs = rs - 5; op = op + parse_imm(params[n], 1, rs+3, 0, false); n = n + 1
1712 elseif p == "Z" then
1713 rs = rs - 5; op = op + parse_imm(params[n], 2, rs+3, 0, false); n = n + 1
1714 elseif p == "z" then
1715 rs = rs - 5; op = op + parse_imm(params[n], 2, rs+2, 0, false); n = n + 1
1050 elseif p == "W" then 1716 elseif p == "W" then
1051 op = op + parse_cr(params[n]); n = n + 1 1717 op = op + parse_cr(params[n]); n = n + 1
1052 elseif p == "G" then 1718 elseif p == "G" then
@@ -1071,6 +1737,8 @@ map_op[".template__"] = function(params, template, nparams)
1071 local lo = band(op, mm) 1737 local lo = band(op, mm)
1072 local hi = band(op, shl(mm, 5)) 1738 local hi = band(op, shl(mm, 5))
1073 op = op - lo - hi + shl(lo, 5) + shr(hi, 5) 1739 op = op - lo - hi + shl(lo, 5) + shr(hi, 5)
1740 elseif p == ":" then
1741 if band(shr(op, rs), 1) ~= 0 then werror("register pair expected") end
1074 elseif p == "-" then 1742 elseif p == "-" then
1075 rs = rs - 5 1743 rs = rs - 5
1076 elseif p == "." then 1744 elseif p == "." then
@@ -1082,6 +1750,8 @@ map_op[".template__"] = function(params, template, nparams)
1082 wputpos(pos, op) 1750 wputpos(pos, op)
1083end 1751end
1084 1752
1753map_op[".template__"] = op_template
1754
1085------------------------------------------------------------------------------ 1755------------------------------------------------------------------------------
1086 1756
1087-- Pseudo-opcode to mark the position where the action list is to be emitted. 1757-- Pseudo-opcode to mark the position where the action list is to be emitted.
diff --git a/dynasm/dasm_proto.h b/dynasm/dasm_proto.h
index 73558c69..3f50f502 100644
--- a/dynasm/dasm_proto.h
+++ b/dynasm/dasm_proto.h
@@ -10,8 +10,8 @@
10#include <stddef.h> 10#include <stddef.h>
11#include <stdarg.h> 11#include <stdarg.h>
12 12
13#define DASM_IDENT "DynASM 1.3.0" 13#define DASM_IDENT "DynASM 1.5.0"
14#define DASM_VERSION 10300 /* 1.3.0 */ 14#define DASM_VERSION 10500 /* 1.5.0 */
15 15
16#ifndef Dst_DECL 16#ifndef Dst_DECL
17#define Dst_DECL dasm_State **Dst 17#define Dst_DECL dasm_State **Dst
diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h
index 7b031c72..aded9990 100644
--- a/dynasm/dasm_x86.h
+++ b/dynasm/dasm_x86.h
@@ -68,7 +68,7 @@ struct dasm_State {
68 size_t lgsize; 68 size_t lgsize;
69 int *pclabels; /* PC label chains/pos ptrs. */ 69 int *pclabels; /* PC label chains/pos ptrs. */
70 size_t pcsize; 70 size_t pcsize;
71 void **globals; /* Array of globals (bias -10). */ 71 void **globals; /* Array of globals. */
72 dasm_Section *section; /* Pointer to active section. */ 72 dasm_Section *section; /* Pointer to active section. */
73 size_t codesize; /* Total size of all code sections. */ 73 size_t codesize; /* Total size of all code sections. */
74 int maxsection; /* 0 <= sectionidx < maxsection. */ 74 int maxsection; /* 0 <= sectionidx < maxsection. */
@@ -85,7 +85,6 @@ void dasm_init(Dst_DECL, int maxsection)
85{ 85{
86 dasm_State *D; 86 dasm_State *D;
87 size_t psz = 0; 87 size_t psz = 0;
88 int i;
89 Dst_REF = NULL; 88 Dst_REF = NULL;
90 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); 89 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
91 D = Dst_REF; 90 D = Dst_REF;
@@ -96,12 +95,7 @@ void dasm_init(Dst_DECL, int maxsection)
96 D->pcsize = 0; 95 D->pcsize = 0;
97 D->globals = NULL; 96 D->globals = NULL;
98 D->maxsection = maxsection; 97 D->maxsection = maxsection;
99 for (i = 0; i < maxsection; i++) { 98 memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
100 D->sections[i].buf = NULL; /* Need this for pass3. */
101 D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
102 D->sections[i].bsize = 0;
103 D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
104 }
105} 99}
106 100
107/* Free DynASM state. */ 101/* Free DynASM state. */
@@ -121,7 +115,7 @@ void dasm_free(Dst_DECL)
121void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) 115void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
122{ 116{
123 dasm_State *D = Dst_REF; 117 dasm_State *D = Dst_REF;
124 D->globals = gl - 10; /* Negative bias to compensate for locals. */ 118 D->globals = gl;
125 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); 119 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
126} 120}
127 121
@@ -146,6 +140,7 @@ void dasm_setup(Dst_DECL, const void *actionlist)
146 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); 140 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
147 for (i = 0; i < D->maxsection; i++) { 141 for (i = 0; i < D->maxsection; i++) {
148 D->sections[i].pos = DASM_SEC2POS(i); 142 D->sections[i].pos = DASM_SEC2POS(i);
143 D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
149 D->sections[i].ofs = 0; 144 D->sections[i].ofs = 0;
150 } 145 }
151} 146}
@@ -170,7 +165,7 @@ void dasm_put(Dst_DECL, int start, ...)
170 dasm_State *D = Dst_REF; 165 dasm_State *D = Dst_REF;
171 dasm_ActList p = D->actionlist + start; 166 dasm_ActList p = D->actionlist + start;
172 dasm_Section *sec = D->section; 167 dasm_Section *sec = D->section;
173 int pos = sec->pos, ofs = sec->ofs, mrm = 4; 168 int pos = sec->pos, ofs = sec->ofs, mrm = -1;
174 int *b; 169 int *b;
175 170
176 if (pos >= sec->epos) { 171 if (pos >= sec->epos) {
@@ -193,7 +188,7 @@ void dasm_put(Dst_DECL, int start, ...)
193 b[pos++] = n; 188 b[pos++] = n;
194 switch (action) { 189 switch (action) {
195 case DASM_DISP: 190 case DASM_DISP:
196 if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; } 191 if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; }
197 /* fallthrough */ 192 /* fallthrough */
198 case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ 193 case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */
199 case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ 194 case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
@@ -204,11 +199,17 @@ void dasm_put(Dst_DECL, int start, ...)
204 case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; 199 case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
205 case DASM_SPACE: p++; ofs += n; break; 200 case DASM_SPACE: p++; ofs += n; break;
206 case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ 201 case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
207 case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG); 202 case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG);
208 if (*p++ == 1 && *p == DASM_DISP) mrm = n; 203 if (*p < 0x40 && p[1] == DASM_DISP) mrm = n;
204 if (*p < 0x20 && (n&7) == 4) ofs++;
205 switch ((*p++ >> 3) & 3) {
206 case 3: n |= b[pos-3]; /* fallthrough */
207 case 2: n |= b[pos-2]; /* fallthrough */
208 case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; }
209 }
209 continue; 210 continue;
210 } 211 }
211 mrm = 4; 212 mrm = -1;
212 } else { 213 } else {
213 int *pl, n; 214 int *pl, n;
214 switch (action) { 215 switch (action) {
@@ -233,8 +234,11 @@ void dasm_put(Dst_DECL, int start, ...)
233 } 234 }
234 pos++; 235 pos++;
235 ofs += 4; /* Maximum offset needed. */ 236 ofs += 4; /* Maximum offset needed. */
236 if (action == DASM_REL_LG || action == DASM_REL_PC) 237 if (action == DASM_REL_LG || action == DASM_REL_PC) {
237 b[pos++] = ofs; /* Store pass1 offset estimate. */ 238 b[pos++] = ofs; /* Store pass1 offset estimate. */
239 } else if (sizeof(ptrdiff_t) == 8) {
240 ofs += 4;
241 }
238 break; 242 break;
239 case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel; 243 case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel;
240 case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); 244 case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC);
@@ -359,10 +363,22 @@ int dasm_link(Dst_DECL, size_t *szp)
359 do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0) 363 do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0)
360#define dasmd(x) \ 364#define dasmd(x) \
361 do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0) 365 do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0)
366#define dasmq(x) \
367 do { *((unsigned long long *)cp) = (unsigned long long)(x); cp+=8; } while (0)
362#else 368#else
363#define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0) 369#define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0)
364#define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0) 370#define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0)
371#define dasmq(x) do { dasmd(x); dasmd((x)>>32); } while (0)
365#endif 372#endif
373static unsigned char *dasma_(unsigned char *cp, ptrdiff_t x)
374{
375 if (sizeof(ptrdiff_t) == 8)
376 dasmq((unsigned long long)x);
377 else
378 dasmd((unsigned int)x);
379 return cp;
380}
381#define dasma(x) (cp = dasma_(cp, (x)))
366 382
367/* Pass 3: Encode sections. */ 383/* Pass 3: Encode sections. */
368int dasm_encode(Dst_DECL, void *buffer) 384int dasm_encode(Dst_DECL, void *buffer)
@@ -402,9 +418,29 @@ int dasm_encode(Dst_DECL, void *buffer)
402 case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL; 418 case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
403 /* fallthrough */ 419 /* fallthrough */
404 case DASM_IMM_W: dasmw(n); break; 420 case DASM_IMM_W: dasmw(n); break;
405 case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; } 421 case DASM_VREG: {
422 int t = *p++;
423 unsigned char *ex = cp - (t&7);
424 if ((n & 8) && t < 0xa0) {
425 if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6);
426 n &= 7;
427 } else if (n & 0x10) {
428 if (*ex & 0x80) {
429 *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2;
430 }
431 while (++ex < cp) ex[-1] = *ex;
432 if (mark) mark--;
433 cp--;
434 n &= 7;
435 }
436 if (t >= 0xc0) n <<= 4;
437 else if (t >= 0x40) n <<= 3;
438 else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; }
439 cp[-1] ^= n;
440 break;
441 }
406 case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; 442 case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
407 b++; n = (int)(ptrdiff_t)D->globals[-n]; 443 b++; n = (int)(ptrdiff_t)D->globals[-n-10];
408 /* fallthrough */ 444 /* fallthrough */
409 case DASM_REL_A: rel_a: 445 case DASM_REL_A: rel_a:
410 n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ 446 n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
@@ -417,17 +453,18 @@ int dasm_encode(Dst_DECL, void *buffer)
417 goto wb; 453 goto wb;
418 } 454 }
419 case DASM_IMM_LG: 455 case DASM_IMM_LG:
420 p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; } 456 p++;
457 if (n < 0) { dasma((ptrdiff_t)D->globals[-n-10]); break; }
421 /* fallthrough */ 458 /* fallthrough */
422 case DASM_IMM_PC: { 459 case DASM_IMM_PC: {
423 int *pb = DASM_POS2PTR(D, n); 460 int *pb = DASM_POS2PTR(D, n);
424 n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base); 461 dasma(*pb < 0 ? (ptrdiff_t)pb[1] : (*pb + (ptrdiff_t)base));
425 goto wd; 462 break;
426 } 463 }
427 case DASM_LABEL_LG: { 464 case DASM_LABEL_LG: {
428 int idx = *p++; 465 int idx = *p++;
429 if (idx >= 10) 466 if (idx >= 10)
430 D->globals[idx] = (void *)(base + (*p == DASM_SETLABEL ? *b : n)); 467 D->globals[idx-10] = (void *)(base + (*p == DASM_SETLABEL ? *b : n));
431 break; 468 break;
432 } 469 }
433 case DASM_LABEL_PC: case DASM_SETLABEL: break; 470 case DASM_LABEL_PC: case DASM_SETLABEL: break;
diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
index 8a4c93a2..7c789f82 100644
--- a/dynasm/dasm_x86.lua
+++ b/dynasm/dasm_x86.lua
@@ -11,9 +11,9 @@ local x64 = x64
11local _info = { 11local _info = {
12 arch = x64 and "x64" or "x86", 12 arch = x64 and "x64" or "x86",
13 description = "DynASM x86/x64 module", 13 description = "DynASM x86/x64 module",
14 version = "1.3.0", 14 version = "1.5.0",
15 vernum = 10300, 15 vernum = 10500,
16 release = "2011-05-05", 16 release = "2021-05-02",
17 author = "Mike Pall", 17 author = "Mike Pall",
18 license = "MIT", 18 license = "MIT",
19} 19}
@@ -27,9 +27,9 @@ local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatabl
27local _s = string 27local _s = string
28local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char 28local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
29local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub 29local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
30local concat, sort = table.concat, table.sort 30local concat, sort, remove = table.concat, table.sort, table.remove
31local bit = bit or require("bit") 31local bit = bit or require("bit")
32local band, shl, shr = bit.band, bit.lshift, bit.rshift 32local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift
33 33
34-- Inherited tables and callbacks. 34-- Inherited tables and callbacks.
35local g_opt, g_arch 35local g_opt, g_arch
@@ -41,7 +41,7 @@ local action_names = {
41 -- int arg, 1 buffer pos: 41 -- int arg, 1 buffer pos:
42 "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", 42 "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
43 -- action arg (1 byte), int arg, 1 buffer pos (reg/num): 43 -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
44 "VREG", "SPACE", -- !x64: VREG support NYI. 44 "VREG", "SPACE",
45 -- ptrdiff_t arg, 1 buffer pos (address): !x64 45 -- ptrdiff_t arg, 1 buffer pos (address): !x64
46 "SETLABEL", "REL_A", 46 "SETLABEL", "REL_A",
47 -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): 47 -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
@@ -83,6 +83,21 @@ local actargs = { 0 }
83-- Current number of section buffer positions for dasm_put(). 83-- Current number of section buffer positions for dasm_put().
84local secpos = 1 84local secpos = 1
85 85
86-- VREG kind encodings, pre-shifted by 5 bits.
87local map_vreg = {
88 ["modrm.rm.m"] = 0x00,
89 ["modrm.rm.r"] = 0x20,
90 ["opcode"] = 0x20,
91 ["sib.base"] = 0x20,
92 ["sib.index"] = 0x40,
93 ["modrm.reg"] = 0x80,
94 ["vex.v"] = 0xa0,
95 ["imm.hi"] = 0xc0,
96}
97
98-- Current number of VREG actions contributing to REX/VEX shrinkage.
99local vreg_shrink_count = 0
100
86------------------------------------------------------------------------------ 101------------------------------------------------------------------------------
87 102
88-- Compute action numbers for action names. 103-- Compute action numbers for action names.
@@ -134,6 +149,21 @@ local function waction(action, a, num)
134 if a or num then secpos = secpos + (num or 1) end 149 if a or num then secpos = secpos + (num or 1) end
135end 150end
136 151
152-- Optionally add a VREG action.
153local function wvreg(kind, vreg, psz, sk, defer)
154 if not vreg then return end
155 waction("VREG", vreg)
156 local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'")
157 if b < (sk or 0) then
158 vreg_shrink_count = vreg_shrink_count + 1
159 end
160 if not defer then
161 b = b + vreg_shrink_count * 8
162 vreg_shrink_count = 0
163 end
164 wputxb(b + (psz or 0))
165end
166
137-- Add call to embedded DynASM C code. 167-- Add call to embedded DynASM C code.
138local function wcall(func, args) 168local function wcall(func, args)
139 wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) 169 wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
@@ -299,7 +329,7 @@ local function mkrmap(sz, cl, names)
299 local iname = format("@%s%x%s", sz, i, needrex and "R" or "") 329 local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
300 if needrex then map_reg_needrex[iname] = true end 330 if needrex then map_reg_needrex[iname] = true end
301 local name 331 local name
302 if sz == "o" then name = format("xmm%d", i) 332 if sz == "o" or sz == "y" then name = format("%s%d", cl, i)
303 elseif sz == "f" then name = format("st%d", i) 333 elseif sz == "f" then name = format("st%d", i)
304 else name = format("r%d%s", i, sz == addrsize and "" or sz) end 334 else name = format("r%d%s", i, sz == addrsize and "" or sz) end
305 map_archdef[name] = iname 335 map_archdef[name] = iname
@@ -326,6 +356,7 @@ mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
326mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) 356mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
327map_reg_valid_index[map_archdef.esp] = false 357map_reg_valid_index[map_archdef.esp] = false
328if x64 then map_reg_valid_index[map_archdef.rsp] = false end 358if x64 then map_reg_valid_index[map_archdef.rsp] = false end
359if x64 then map_reg_needrex[map_archdef.Rb] = true end
329map_archdef["Ra"] = "@"..addrsize 360map_archdef["Ra"] = "@"..addrsize
330 361
331-- FP registers (internally tword sized, but use "f" as operand size). 362-- FP registers (internally tword sized, but use "f" as operand size).
@@ -334,21 +365,24 @@ mkrmap("f", "Rf")
334-- SSE registers (oword sized, but qword and dword accessible). 365-- SSE registers (oword sized, but qword and dword accessible).
335mkrmap("o", "xmm") 366mkrmap("o", "xmm")
336 367
368-- AVX registers (yword sized, but oword, qword and dword accessible).
369mkrmap("y", "ymm")
370
337-- Operand size prefixes to codes. 371-- Operand size prefixes to codes.
338local map_opsize = { 372local map_opsize = {
339 byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t", 373 byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y",
340 aword = addrsize, 374 tword = "t", aword = addrsize,
341} 375}
342 376
343-- Operand size code to number. 377-- Operand size code to number.
344local map_opsizenum = { 378local map_opsizenum = {
345 b = 1, w = 2, d = 4, q = 8, o = 16, t = 10, 379 b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10,
346} 380}
347 381
348-- Operand size code to name. 382-- Operand size code to name.
349local map_opsizename = { 383local map_opsizename = {
350 b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword", 384 b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword",
351 f = "fpword", 385 t = "tword", f = "fpword",
352} 386}
353 387
354-- Valid index register scale factors. 388-- Valid index register scale factors.
@@ -450,6 +484,22 @@ local function wputdarg(n)
450 end 484 end
451end 485end
452 486
487-- Put signed or unsigned qword or arg.
488local function wputqarg(n)
489 local tn = type(n)
490 if tn == "number" then -- This is only used for numbers from -2^31..2^32-1.
491 wputb(band(n, 255))
492 wputb(band(shr(n, 8), 255))
493 wputb(band(shr(n, 16), 255))
494 wputb(shr(n, 24))
495 local sign = n < 0 and 255 or 0
496 wputb(sign); wputb(sign); wputb(sign); wputb(sign)
497 else
498 waction("IMM_D", format("(unsigned int)(%s)", n))
499 waction("IMM_D", format("(unsigned int)((unsigned long long)(%s)>>32)", n))
500 end
501end
502
453-- Put operand-size dependent number or arg (defaults to dword). 503-- Put operand-size dependent number or arg (defaults to dword).
454local function wputszarg(sz, n) 504local function wputszarg(sz, n)
455 if not sz or sz == "d" or sz == "q" then wputdarg(n) 505 if not sz or sz == "d" or sz == "q" then wputdarg(n)
@@ -460,9 +510,45 @@ local function wputszarg(sz, n)
460end 510end
461 511
462-- Put multi-byte opcode with operand-size dependent modifications. 512-- Put multi-byte opcode with operand-size dependent modifications.
463local function wputop(sz, op, rex) 513local function wputop(sz, op, rex, vex, vregr, vregxb)
514 local psz, sk = 0, nil
515 if vex then
516 local tail
517 if vex.m == 1 and band(rex, 11) == 0 then
518 if x64 and vregxb then
519 sk = map_vreg["modrm.reg"]
520 else
521 wputb(0xc5)
522 tail = shl(bxor(band(rex, 4), 4), 5)
523 psz = 3
524 end
525 end
526 if not tail then
527 wputb(0xc4)
528 wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
529 tail = shl(band(rex, 8), 4)
530 psz = 4
531 end
532 local reg, vreg = 0, nil
533 if vex.v then
534 reg = vex.v.reg
535 if not reg then werror("bad vex operand") end
536 if reg < 0 then reg = 0; vreg = vex.v.vreg end
537 end
538 if sz == "y" or vex.l then tail = tail + 4 end
539 wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
540 wvreg("vex.v", vreg)
541 rex = 0
542 if op >= 256 then werror("bad vex opcode") end
543 else
544 if rex ~= 0 then
545 if not x64 then werror("bad operand size") end
546 elseif (vregr or vregxb) and x64 then
547 rex = 0x10
548 sk = map_vreg["vex.v"]
549 end
550 end
464 local r 551 local r
465 if rex ~= 0 and not x64 then werror("bad operand size") end
466 if sz == "w" then wputb(102) end 552 if sz == "w" then wputb(102) end
467 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] 553 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
468 if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end 554 if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
@@ -471,20 +557,20 @@ local function wputop(sz, op, rex)
471 if rex ~= 0 then 557 if rex ~= 0 then
472 local opc3 = band(op, 0xffff00) 558 local opc3 = band(op, 0xffff00)
473 if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then 559 if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
474 wputb(64 + band(rex, 15)); rex = 0 560 wputb(64 + band(rex, 15)); rex = 0; psz = 2
475 end 561 end
476 end 562 end
477 wputb(shr(op, 16)); op = band(op, 0xffff) 563 wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1
478 end 564 end
479 if op >= 256 then 565 if op >= 256 then
480 local b = shr(op, 8) 566 local b = shr(op, 8)
481 if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end 567 if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end
482 wputb(b) 568 wputb(b); op = band(op, 255); psz = psz + 1
483 op = band(op, 255)
484 end 569 end
485 if rex ~= 0 then wputb(64 + band(rex, 15)) end 570 if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end
486 if sz == "b" then op = op - 1 end 571 if sz == "b" then op = op - 1 end
487 wputb(op) 572 wputb(op)
573 return psz, sk
488end 574end
489 575
490-- Put ModRM or SIB formatted byte. 576-- Put ModRM or SIB formatted byte.
@@ -494,7 +580,7 @@ local function wputmodrm(m, s, rm, vs, vrm)
494end 580end
495 581
496-- Put ModRM/SIB plus optional displacement. 582-- Put ModRM/SIB plus optional displacement.
497local function wputmrmsib(t, imark, s, vsreg) 583local function wputmrmsib(t, imark, s, vsreg, psz, sk)
498 local vreg, vxreg 584 local vreg, vxreg
499 local reg, xreg = t.reg, t.xreg 585 local reg, xreg = t.reg, t.xreg
500 if reg and reg < 0 then reg = 0; vreg = t.vreg end 586 if reg and reg < 0 then reg = 0; vreg = t.vreg end
@@ -504,8 +590,8 @@ local function wputmrmsib(t, imark, s, vsreg)
504 -- Register mode. 590 -- Register mode.
505 if sub(t.mode, 1, 1) == "r" then 591 if sub(t.mode, 1, 1) == "r" then
506 wputmodrm(3, s, reg) 592 wputmodrm(3, s, reg)
507 if vsreg then waction("VREG", vsreg); wputxb(2) end 593 wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
508 if vreg then waction("VREG", vreg); wputxb(0) end 594 wvreg("modrm.rm.r", vreg, psz+1, sk)
509 return 595 return
510 end 596 end
511 597
@@ -519,28 +605,33 @@ local function wputmrmsib(t, imark, s, vsreg)
519 -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) 605 -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
520 wputmodrm(0, s, 4) 606 wputmodrm(0, s, 4)
521 if imark == "I" then waction("MARK") end 607 if imark == "I" then waction("MARK") end
522 if vsreg then waction("VREG", vsreg); wputxb(2) end 608 wvreg("modrm.reg", vsreg, psz+1, sk, vxreg)
523 wputmodrm(t.xsc, xreg, 5) 609 wputmodrm(t.xsc, xreg, 5)
524 if vxreg then waction("VREG", vxreg); wputxb(3) end 610 wvreg("sib.index", vxreg, psz+2, sk)
525 else 611 else
526 -- Pure 32 bit displacement. 612 -- Pure 32 bit displacement.
527 if x64 and tdisp ~= "table" then 613 if x64 and tdisp ~= "table" then
528 wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp) 614 wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
615 wvreg("modrm.reg", vsreg, psz+1, sk)
529 if imark == "I" then waction("MARK") end 616 if imark == "I" then waction("MARK") end
530 wputmodrm(0, 4, 5) 617 wputmodrm(0, 4, 5)
531 else 618 else
532 riprel = x64 619 riprel = x64
533 wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp) 620 wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
621 wvreg("modrm.reg", vsreg, psz+1, sk)
534 if imark == "I" then waction("MARK") end 622 if imark == "I" then waction("MARK") end
535 end 623 end
536 if vsreg then waction("VREG", vsreg); wputxb(2) end
537 end 624 end
538 if riprel then -- Emit rip-relative displacement. 625 if riprel then -- Emit rip-relative displacement.
539 if match("UWSiI", imark) then 626 if match("UWSiI", imark) then
540 werror("NYI: rip-relative displacement followed by immediate") 627 werror("NYI: rip-relative displacement followed by immediate")
541 end 628 end
542 -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f. 629 -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f.
543 wputlabel("REL_", disp[1], 2) 630 if disp[2] == "iPJ" then
631 waction("REL_A", disp[1])
632 else
633 wputlabel("REL_", disp[1], 2)
634 end
544 else 635 else
545 wputdarg(disp) 636 wputdarg(disp)
546 end 637 end
@@ -561,16 +652,16 @@ local function wputmrmsib(t, imark, s, vsreg)
561 if xreg or band(reg, 7) == 4 then 652 if xreg or band(reg, 7) == 4 then
562 wputmodrm(m or 2, s, 4) -- ModRM. 653 wputmodrm(m or 2, s, 4) -- ModRM.
563 if m == nil or imark == "I" then waction("MARK") end 654 if m == nil or imark == "I" then waction("MARK") end
564 if vsreg then waction("VREG", vsreg); wputxb(2) end 655 wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg)
565 wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. 656 wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
566 if vxreg then waction("VREG", vxreg); wputxb(3) end 657 wvreg("sib.index", vxreg, psz+2, sk, vreg)
567 if vreg then waction("VREG", vreg); wputxb(1) end 658 wvreg("sib.base", vreg, psz+2, sk)
568 else 659 else
569 wputmodrm(m or 2, s, reg) -- ModRM. 660 wputmodrm(m or 2, s, reg) -- ModRM.
570 if (imark == "I" and (m == 1 or m == 2)) or 661 if (imark == "I" and (m == 1 or m == 2)) or
571 (m == nil and (vsreg or vreg)) then waction("MARK") end 662 (m == nil and (vsreg or vreg)) then waction("MARK") end
572 if vsreg then waction("VREG", vsreg); wputxb(2) end 663 wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
573 if vreg then waction("VREG", vreg); wputxb(1) end 664 wvreg("modrm.rm.m", vreg, psz+1, sk)
574 end 665 end
575 666
576 -- Put displacement. 667 -- Put displacement.
@@ -592,10 +683,16 @@ local function opmodestr(op, args)
592end 683end
593 684
594-- Convert number to valid integer or nil. 685-- Convert number to valid integer or nil.
595local function toint(expr) 686local function toint(expr, isqword)
596 local n = tonumber(expr) 687 local n = tonumber(expr)
597 if n then 688 if n then
598 if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then 689 if n % 1 ~= 0 then
690 werror("not an integer number `"..expr.."'")
691 elseif isqword then
692 if n < -2147483648 or n > 2147483647 then
693 n = nil -- Handle it as an expression to avoid precision loss.
694 end
695 elseif n < -2147483648 or n > 4294967295 then
599 werror("bad integer number `"..expr.."'") 696 werror("bad integer number `"..expr.."'")
600 end 697 end
601 return n 698 return n
@@ -651,9 +748,9 @@ local function dispexpr(expr)
651 return imm*map_opsizenum[ops] 748 return imm*map_opsizenum[ops]
652 end 749 end
653 local mode, iexpr = immexpr(dispt) 750 local mode, iexpr = immexpr(dispt)
654 if mode == "iJ" then 751 if mode == "iJ" or mode == "iPJ" then
655 if c == "-" then werror("cannot invert label reference") end 752 if c == "-" then werror("cannot invert label reference") end
656 return { iexpr } 753 return { iexpr, mode }
657 end 754 end
658 return expr -- Need to return original signed expression. 755 return expr -- Need to return original signed expression.
659end 756end
@@ -678,7 +775,7 @@ local function rtexpr(expr)
678end 775end
679 776
680-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }. 777-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
681local function parseoperand(param) 778local function parseoperand(param, isqword)
682 local t = {} 779 local t = {}
683 780
684 local expr = param 781 local expr = param
@@ -766,7 +863,7 @@ local function parseoperand(param)
766 t.disp = dispexpr(tailx) 863 t.disp = dispexpr(tailx)
767 else 864 else
768 -- imm or opsize*imm 865 -- imm or opsize*imm
769 local imm = toint(expr) 866 local imm = toint(expr, isqword)
770 if not imm and sub(expr, 1, 1) == "*" and t.opsize then 867 if not imm and sub(expr, 1, 1) == "*" and t.opsize then
771 imm = toint(sub(expr, 2)) 868 imm = toint(sub(expr, 2))
772 if imm then 869 if imm then
@@ -881,9 +978,16 @@ end
881-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. 978-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
882-- The spare 3 bits are either filled with the last hex digit or 979-- The spare 3 bits are either filled with the last hex digit or
883-- the result from a previous "r"/"R". The opcode is restored. 980-- the result from a previous "r"/"R". The opcode is restored.
981-- "u" Use VEX encoding, vvvv unused.
982-- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is
983-- removed from the list used by future characters).
984-- "w" Use VEX encoding, vvvv from 3rd operand.
985-- "L" Force VEX.L
884-- 986--
885-- All of the following characters force a flush of the opcode: 987-- All of the following characters force a flush of the opcode:
886-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. 988-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand.
989-- "s" stores a 4 bit immediate from the last register operand,
990-- followed by 4 zero bits.
887-- "S" stores a signed 8 bit immediate from the last operand. 991-- "S" stores a signed 8 bit immediate from the last operand.
888-- "U" stores an unsigned 8 bit immediate from the last operand. 992-- "U" stores an unsigned 8 bit immediate from the last operand.
889-- "W" stores an unsigned 16 bit immediate from the last operand. 993-- "W" stores an unsigned 16 bit immediate from the last operand.
@@ -1047,6 +1151,8 @@ local map_op = {
1047 rep_0 = "F3", 1151 rep_0 = "F3",
1048 repe_0 = "F3", 1152 repe_0 = "F3",
1049 repz_0 = "F3", 1153 repz_0 = "F3",
1154 endbr32_0 = "F30F1EFB",
1155 endbr64_0 = "F30F1EFA",
1050 -- F4: *hlt 1156 -- F4: *hlt
1051 cmc_0 = "F5", 1157 cmc_0 = "F5",
1052 -- F6: test... mb,i; div... mb 1158 -- F6: test... mb,i; div... mb
@@ -1226,46 +1332,14 @@ local map_op = {
1226 movups_2 = "rmo:0F10rM|mro:0F11Rm", 1332 movups_2 = "rmo:0F10rM|mro:0F11Rm",
1227 orpd_2 = "rmo:660F56rM", 1333 orpd_2 = "rmo:660F56rM",
1228 orps_2 = "rmo:0F56rM", 1334 orps_2 = "rmo:0F56rM",
1229 packssdw_2 = "rmo:660F6BrM",
1230 packsswb_2 = "rmo:660F63rM",
1231 packuswb_2 = "rmo:660F67rM",
1232 paddb_2 = "rmo:660FFCrM",
1233 paddd_2 = "rmo:660FFErM",
1234 paddq_2 = "rmo:660FD4rM",
1235 paddsb_2 = "rmo:660FECrM",
1236 paddsw_2 = "rmo:660FEDrM",
1237 paddusb_2 = "rmo:660FDCrM",
1238 paddusw_2 = "rmo:660FDDrM",
1239 paddw_2 = "rmo:660FFDrM",
1240 pand_2 = "rmo:660FDBrM",
1241 pandn_2 = "rmo:660FDFrM",
1242 pause_0 = "F390", 1335 pause_0 = "F390",
1243 pavgb_2 = "rmo:660FE0rM",
1244 pavgw_2 = "rmo:660FE3rM",
1245 pcmpeqb_2 = "rmo:660F74rM",
1246 pcmpeqd_2 = "rmo:660F76rM",
1247 pcmpeqw_2 = "rmo:660F75rM",
1248 pcmpgtb_2 = "rmo:660F64rM",
1249 pcmpgtd_2 = "rmo:660F66rM",
1250 pcmpgtw_2 = "rmo:660F65rM",
1251 pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only. 1336 pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
1252 pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", 1337 pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:",
1253 pmaddwd_2 = "rmo:660FF5rM",
1254 pmaxsw_2 = "rmo:660FEErM",
1255 pmaxub_2 = "rmo:660FDErM",
1256 pminsw_2 = "rmo:660FEArM",
1257 pminub_2 = "rmo:660FDArM",
1258 pmovmskb_2 = "rr/do:660FD7rM", 1338 pmovmskb_2 = "rr/do:660FD7rM",
1259 pmulhuw_2 = "rmo:660FE4rM",
1260 pmulhw_2 = "rmo:660FE5rM",
1261 pmullw_2 = "rmo:660FD5rM",
1262 pmuludq_2 = "rmo:660FF4rM",
1263 por_2 = "rmo:660FEBrM",
1264 prefetchnta_1 = "xb:n0F180m", 1339 prefetchnta_1 = "xb:n0F180m",
1265 prefetcht0_1 = "xb:n0F181m", 1340 prefetcht0_1 = "xb:n0F181m",
1266 prefetcht1_1 = "xb:n0F182m", 1341 prefetcht1_1 = "xb:n0F182m",
1267 prefetcht2_1 = "xb:n0F183m", 1342 prefetcht2_1 = "xb:n0F183m",
1268 psadbw_2 = "rmo:660FF6rM",
1269 pshufd_3 = "rmio:660F70rMU", 1343 pshufd_3 = "rmio:660F70rMU",
1270 pshufhw_3 = "rmio:F30F70rMU", 1344 pshufhw_3 = "rmio:F30F70rMU",
1271 pshuflw_3 = "rmio:F20F70rMU", 1345 pshuflw_3 = "rmio:F20F70rMU",
@@ -1279,23 +1353,6 @@ local map_op = {
1279 psrldq_2 = "rio:660F733mU", 1353 psrldq_2 = "rio:660F733mU",
1280 psrlq_2 = "rmo:660FD3rM|rio:660F732mU", 1354 psrlq_2 = "rmo:660FD3rM|rio:660F732mU",
1281 psrlw_2 = "rmo:660FD1rM|rio:660F712mU", 1355 psrlw_2 = "rmo:660FD1rM|rio:660F712mU",
1282 psubb_2 = "rmo:660FF8rM",
1283 psubd_2 = "rmo:660FFArM",
1284 psubq_2 = "rmo:660FFBrM",
1285 psubsb_2 = "rmo:660FE8rM",
1286 psubsw_2 = "rmo:660FE9rM",
1287 psubusb_2 = "rmo:660FD8rM",
1288 psubusw_2 = "rmo:660FD9rM",
1289 psubw_2 = "rmo:660FF9rM",
1290 punpckhbw_2 = "rmo:660F68rM",
1291 punpckhdq_2 = "rmo:660F6ArM",
1292 punpckhqdq_2 = "rmo:660F6DrM",
1293 punpckhwd_2 = "rmo:660F69rM",
1294 punpcklbw_2 = "rmo:660F60rM",
1295 punpckldq_2 = "rmo:660F62rM",
1296 punpcklqdq_2 = "rmo:660F6CrM",
1297 punpcklwd_2 = "rmo:660F61rM",
1298 pxor_2 = "rmo:660FEFrM",
1299 rcpps_2 = "rmo:0F53rM", 1356 rcpps_2 = "rmo:0F53rM",
1300 rcpss_2 = "rro:F30F53rM|rx/od:", 1357 rcpss_2 = "rro:F30F53rM|rx/od:",
1301 rsqrtps_2 = "rmo:0F52rM", 1358 rsqrtps_2 = "rmo:0F52rM",
@@ -1413,6 +1470,327 @@ local map_op = {
1413 movntsd_2 = "xr/qo:nF20F2BRm", 1470 movntsd_2 = "xr/qo:nF20F2BRm",
1414 movntss_2 = "xr/do:F30F2BRm", 1471 movntss_2 = "xr/do:F30F2BRm",
1415 -- popcnt is also in SSE4.2 1472 -- popcnt is also in SSE4.2
1473
1474 -- AES-NI
1475 aesdec_2 = "rmo:660F38DErM",
1476 aesdeclast_2 = "rmo:660F38DFrM",
1477 aesenc_2 = "rmo:660F38DCrM",
1478 aesenclast_2 = "rmo:660F38DDrM",
1479 aesimc_2 = "rmo:660F38DBrM",
1480 aeskeygenassist_3 = "rmio:660F3ADFrMU",
1481 pclmulqdq_3 = "rmio:660F3A44rMU",
1482
1483 -- AVX FP ops
1484 vaddsubpd_3 = "rrmoy:660FVD0rM",
1485 vaddsubps_3 = "rrmoy:F20FVD0rM",
1486 vandpd_3 = "rrmoy:660FV54rM",
1487 vandps_3 = "rrmoy:0FV54rM",
1488 vandnpd_3 = "rrmoy:660FV55rM",
1489 vandnps_3 = "rrmoy:0FV55rM",
1490 vblendpd_4 = "rrmioy:660F3AV0DrMU",
1491 vblendps_4 = "rrmioy:660F3AV0CrMU",
1492 vblendvpd_4 = "rrmroy:660F3AV4BrMs",
1493 vblendvps_4 = "rrmroy:660F3AV4ArMs",
1494 vbroadcastf128_2 = "rx/yo:660F38u1ArM",
1495 vcmppd_4 = "rrmioy:660FVC2rMU",
1496 vcmpps_4 = "rrmioy:0FVC2rMU",
1497 vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:",
1498 vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:",
1499 vcomisd_2 = "rro:660Fu2FrM|rx/oq:",
1500 vcomiss_2 = "rro:0Fu2FrM|rx/od:",
1501 vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:",
1502 vcvtdq2ps_2 = "rmoy:0Fu5BrM",
1503 vcvtpd2dq_2 = "rmoy:F20FuE6rM",
1504 vcvtpd2ps_2 = "rmoy:660Fu5ArM",
1505 vcvtps2dq_2 = "rmoy:660Fu5BrM",
1506 vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:",
1507 vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:",
1508 vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:",
1509 vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM",
1510 vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM",
1511 vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:",
1512 vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:",
1513 vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM",
1514 vcvttps2dq_2 = "rmoy:F30Fu5BrM",
1515 vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:",
1516 vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:",
1517 vdppd_4 = "rrmio:660F3AV41rMU",
1518 vdpps_4 = "rrmioy:660F3AV40rMU",
1519 vextractf128_3 = "mri/oy:660F3AuL19RmU",
1520 vextractps_3 = "mri/do:660F3Au17RmU",
1521 vhaddpd_3 = "rrmoy:660FV7CrM",
1522 vhaddps_3 = "rrmoy:F20FV7CrM",
1523 vhsubpd_3 = "rrmoy:660FV7DrM",
1524 vhsubps_3 = "rrmoy:F20FV7DrM",
1525 vinsertf128_4 = "rrmi/yyo:660F3AV18rMU",
1526 vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:",
1527 vldmxcsr_1 = "xd:0FuAE2m",
1528 vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm",
1529 vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm",
1530 vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm",
1531 vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm",
1532 vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:",
1533 vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm",
1534 vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:",
1535 vmovhlps_3 = "rrro:0FV12rM",
1536 vmovhpd_2 = "xr/qo:660Fu17Rm",
1537 vmovhpd_3 = "rrx/ooq:660FV16rM",
1538 vmovhps_2 = "xr/qo:0Fu17Rm",
1539 vmovhps_3 = "rrx/ooq:0FV16rM",
1540 vmovlhps_3 = "rrro:0FV16rM",
1541 vmovlpd_2 = "xr/qo:660Fu13Rm",
1542 vmovlpd_3 = "rrx/ooq:660FV12rM",
1543 vmovlps_2 = "xr/qo:0Fu13Rm",
1544 vmovlps_3 = "rrx/ooq:0FV12rM",
1545 vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM",
1546 vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM",
1547 vmovntpd_2 = "xroy:660Fu2BRm",
1548 vmovntps_2 = "xroy:0Fu2BRm",
1549 vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm",
1550 vmovsd_3 = "rrro:F20FV10rM",
1551 vmovshdup_2 = "rmoy:F30Fu16rM",
1552 vmovsldup_2 = "rmoy:F30Fu12rM",
1553 vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm",
1554 vmovss_3 = "rrro:F30FV10rM",
1555 vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm",
1556 vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm",
1557 vorpd_3 = "rrmoy:660FV56rM",
1558 vorps_3 = "rrmoy:0FV56rM",
1559 vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU",
1560 vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU",
1561 vperm2f128_4 = "rrmiy:660F3AV06rMU",
1562 vptestpd_2 = "rmoy:660F38u0FrM",
1563 vptestps_2 = "rmoy:660F38u0ErM",
1564 vrcpps_2 = "rmoy:0Fu53rM",
1565 vrcpss_3 = "rrro:F30FV53rM|rrx/ood:",
1566 vrsqrtps_2 = "rmoy:0Fu52rM",
1567 vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:",
1568 vroundpd_3 = "rmioy:660F3Au09rMU",
1569 vroundps_3 = "rmioy:660F3Au08rMU",
1570 vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:",
1571 vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:",
1572 vshufpd_4 = "rrmioy:660FVC6rMU",
1573 vshufps_4 = "rrmioy:0FVC6rMU",
1574 vsqrtps_2 = "rmoy:0Fu51rM",
1575 vsqrtss_2 = "rro:F30Fu51rM|rx/od:",
1576 vsqrtpd_2 = "rmoy:660Fu51rM",
1577 vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:",
1578 vstmxcsr_1 = "xd:0FuAE3m",
1579 vucomisd_2 = "rro:660Fu2ErM|rx/oq:",
1580 vucomiss_2 = "rro:0Fu2ErM|rx/od:",
1581 vunpckhpd_3 = "rrmoy:660FV15rM",
1582 vunpckhps_3 = "rrmoy:0FV15rM",
1583 vunpcklpd_3 = "rrmoy:660FV14rM",
1584 vunpcklps_3 = "rrmoy:0FV14rM",
1585 vxorpd_3 = "rrmoy:660FV57rM",
1586 vxorps_3 = "rrmoy:0FV57rM",
1587 vzeroall_0 = "0FuL77",
1588 vzeroupper_0 = "0Fu77",
1589
1590 -- AVX2 FP ops
1591 vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:",
1592 vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:",
1593 -- *vgather* (!vsib)
1594 vpermpd_3 = "rmiy:660F3AuX01rMU",
1595 vpermps_3 = "rrmy:660F38V16rM",
1596
1597 -- AVX, AVX2 integer ops
1598 -- In general, xmm requires AVX, ymm requires AVX2.
1599 vaesdec_3 = "rrmo:660F38VDErM",
1600 vaesdeclast_3 = "rrmo:660F38VDFrM",
1601 vaesenc_3 = "rrmo:660F38VDCrM",
1602 vaesenclast_3 = "rrmo:660F38VDDrM",
1603 vaesimc_2 = "rmo:660F38uDBrM",
1604 vaeskeygenassist_3 = "rmio:660F3AuDFrMU",
1605 vlddqu_2 = "rxoy:F20FuF0rM",
1606 vmaskmovdqu_2 = "rro:660FuF7rM",
1607 vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm",
1608 vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm",
1609 vmovntdq_2 = "xroy:660FuE7Rm",
1610 vmovntdqa_2 = "rxoy:660F38u2ArM",
1611 vmpsadbw_4 = "rrmioy:660F3AV42rMU",
1612 vpabsb_2 = "rmoy:660F38u1CrM",
1613 vpabsd_2 = "rmoy:660F38u1ErM",
1614 vpabsw_2 = "rmoy:660F38u1DrM",
1615 vpackusdw_3 = "rrmoy:660F38V2BrM",
1616 vpalignr_4 = "rrmioy:660F3AV0FrMU",
1617 vpblendvb_4 = "rrmroy:660F3AV4CrMs",
1618 vpblendw_4 = "rrmioy:660F3AV0ErMU",
1619 vpclmulqdq_4 = "rrmio:660F3AV44rMU",
1620 vpcmpeqq_3 = "rrmoy:660F38V29rM",
1621 vpcmpestri_3 = "rmio:660F3Au61rMU",
1622 vpcmpestrm_3 = "rmio:660F3Au60rMU",
1623 vpcmpgtq_3 = "rrmoy:660F38V37rM",
1624 vpcmpistri_3 = "rmio:660F3Au63rMU",
1625 vpcmpistrm_3 = "rmio:660F3Au62rMU",
1626 vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:",
1627 vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU",
1628 vpextrd_3 = "mri/do:660F3Au16RmU",
1629 vpextrq_3 = "mri/qo:660F3Au16RmU",
1630 vphaddw_3 = "rrmoy:660F38V01rM",
1631 vphaddd_3 = "rrmoy:660F38V02rM",
1632 vphaddsw_3 = "rrmoy:660F38V03rM",
1633 vphminposuw_2 = "rmo:660F38u41rM",
1634 vphsubw_3 = "rrmoy:660F38V05rM",
1635 vphsubd_3 = "rrmoy:660F38V06rM",
1636 vphsubsw_3 = "rrmoy:660F38V07rM",
1637 vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:",
1638 vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:",
1639 vpinsrd_4 = "rrmi/ood:660F3AV22rMU",
1640 vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU",
1641 vpmaddubsw_3 = "rrmoy:660F38V04rM",
1642 vpmaxsb_3 = "rrmoy:660F38V3CrM",
1643 vpmaxsd_3 = "rrmoy:660F38V3DrM",
1644 vpmaxuw_3 = "rrmoy:660F38V3ErM",
1645 vpmaxud_3 = "rrmoy:660F38V3FrM",
1646 vpminsb_3 = "rrmoy:660F38V38rM",
1647 vpminsd_3 = "rrmoy:660F38V39rM",
1648 vpminuw_3 = "rrmoy:660F38V3ArM",
1649 vpminud_3 = "rrmoy:660F38V3BrM",
1650 vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM",
1651 vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:",
1652 vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:",
1653 vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:",
1654 vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:",
1655 vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:",
1656 vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:",
1657 vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:",
1658 vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:",
1659 vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:",
1660 vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:",
1661 vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:",
1662 vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:",
1663 vpmuldq_3 = "rrmoy:660F38V28rM",
1664 vpmulhrsw_3 = "rrmoy:660F38V0BrM",
1665 vpmulld_3 = "rrmoy:660F38V40rM",
1666 vpshufb_3 = "rrmoy:660F38V00rM",
1667 vpshufd_3 = "rmioy:660Fu70rMU",
1668 vpshufhw_3 = "rmioy:F30Fu70rMU",
1669 vpshuflw_3 = "rmioy:F20Fu70rMU",
1670 vpsignb_3 = "rrmoy:660F38V08rM",
1671 vpsignw_3 = "rrmoy:660F38V09rM",
1672 vpsignd_3 = "rrmoy:660F38V0ArM",
1673 vpslldq_3 = "rrioy:660Fv737mU",
1674 vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU",
1675 vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU",
1676 vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU",
1677 vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU",
1678 vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU",
1679 vpsrldq_3 = "rrioy:660Fv733mU",
1680 vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU",
1681 vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU",
1682 vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU",
1683 vptest_2 = "rmoy:660F38u17rM",
1684
1685 -- AVX2 integer ops
1686 vbroadcasti128_2 = "rx/yo:660F38u5ArM",
1687 vinserti128_4 = "rrmi/yyo:660F3AV38rMU",
1688 vextracti128_3 = "mri/oy:660F3AuL39RmU",
1689 vpblendd_4 = "rrmioy:660F3AV02rMU",
1690 vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:",
1691 vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:",
1692 vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:",
1693 vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:",
1694 vpermd_3 = "rrmy:660F38V36rM",
1695 vpermq_3 = "rmiy:660F3AuX00rMU",
1696 -- *vpgather* (!vsib)
1697 vperm2i128_4 = "rrmiy:660F3AV46rMU",
1698 vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm",
1699 vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm",
1700 vpsllvd_3 = "rrmoy:660F38V47rM",
1701 vpsllvq_3 = "rrmoy:660F38VX47rM",
1702 vpsravd_3 = "rrmoy:660F38V46rM",
1703 vpsrlvd_3 = "rrmoy:660F38V45rM",
1704 vpsrlvq_3 = "rrmoy:660F38VX45rM",
1705
1706 -- Intel ADX
1707 adcx_2 = "rmqd:660F38F6rM",
1708 adox_2 = "rmqd:F30F38F6rM",
1709
1710 -- BMI1
1711 andn_3 = "rrmqd:0F38VF2rM",
1712 bextr_3 = "rmrqd:0F38wF7rM",
1713 blsi_2 = "rmqd:0F38vF33m",
1714 blsmsk_2 = "rmqd:0F38vF32m",
1715 blsr_2 = "rmqd:0F38vF31m",
1716 tzcnt_2 = "rmqdw:F30FBCrM",
1717
1718 -- BMI2
1719 bzhi_3 = "rmrqd:0F38wF5rM",
1720 mulx_3 = "rrmqd:F20F38VF6rM",
1721 pdep_3 = "rrmqd:F20F38VF5rM",
1722 pext_3 = "rrmqd:F30F38VF5rM",
1723 rorx_3 = "rmSqd:F20F3AuF0rMS",
1724 sarx_3 = "rmrqd:F30F38wF7rM",
1725 shrx_3 = "rmrqd:F20F38wF7rM",
1726 shlx_3 = "rmrqd:660F38wF7rM",
1727
1728 -- FMA3
1729 vfmaddsub132pd_3 = "rrmoy:660F38VX96rM",
1730 vfmaddsub132ps_3 = "rrmoy:660F38V96rM",
1731 vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM",
1732 vfmaddsub213ps_3 = "rrmoy:660F38VA6rM",
1733 vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM",
1734 vfmaddsub231ps_3 = "rrmoy:660F38VB6rM",
1735
1736 vfmsubadd132pd_3 = "rrmoy:660F38VX97rM",
1737 vfmsubadd132ps_3 = "rrmoy:660F38V97rM",
1738 vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM",
1739 vfmsubadd213ps_3 = "rrmoy:660F38VA7rM",
1740 vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM",
1741 vfmsubadd231ps_3 = "rrmoy:660F38VB7rM",
1742
1743 vfmadd132pd_3 = "rrmoy:660F38VX98rM",
1744 vfmadd132ps_3 = "rrmoy:660F38V98rM",
1745 vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:",
1746 vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:",
1747 vfmadd213pd_3 = "rrmoy:660F38VXA8rM",
1748 vfmadd213ps_3 = "rrmoy:660F38VA8rM",
1749 vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:",
1750 vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:",
1751 vfmadd231pd_3 = "rrmoy:660F38VXB8rM",
1752 vfmadd231ps_3 = "rrmoy:660F38VB8rM",
1753 vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:",
1754 vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:",
1755
1756 vfmsub132pd_3 = "rrmoy:660F38VX9ArM",
1757 vfmsub132ps_3 = "rrmoy:660F38V9ArM",
1758 vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:",
1759 vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:",
1760 vfmsub213pd_3 = "rrmoy:660F38VXAArM",
1761 vfmsub213ps_3 = "rrmoy:660F38VAArM",
1762 vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:",
1763 vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:",
1764 vfmsub231pd_3 = "rrmoy:660F38VXBArM",
1765 vfmsub231ps_3 = "rrmoy:660F38VBArM",
1766 vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:",
1767 vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:",
1768
1769 vfnmadd132pd_3 = "rrmoy:660F38VX9CrM",
1770 vfnmadd132ps_3 = "rrmoy:660F38V9CrM",
1771 vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:",
1772 vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:",
1773 vfnmadd213pd_3 = "rrmoy:660F38VXACrM",
1774 vfnmadd213ps_3 = "rrmoy:660F38VACrM",
1775 vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:",
1776 vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:",
1777 vfnmadd231pd_3 = "rrmoy:660F38VXBCrM",
1778 vfnmadd231ps_3 = "rrmoy:660F38VBCrM",
1779 vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:",
1780 vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:",
1781
1782 vfnmsub132pd_3 = "rrmoy:660F38VX9ErM",
1783 vfnmsub132ps_3 = "rrmoy:660F38V9ErM",
1784 vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:",
1785 vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:",
1786 vfnmsub213pd_3 = "rrmoy:660F38VXAErM",
1787 vfnmsub213ps_3 = "rrmoy:660F38VAErM",
1788 vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:",
1789 vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:",
1790 vfnmsub231pd_3 = "rrmoy:660F38VXBErM",
1791 vfnmsub231ps_3 = "rrmoy:660F38VBErM",
1792 vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:",
1793 vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:",
1416} 1794}
1417 1795
1418------------------------------------------------------------------------------ 1796------------------------------------------------------------------------------
@@ -1463,28 +1841,58 @@ for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
1463 map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ 1841 map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
1464end 1842end
1465 1843
1466-- SSE FP arithmetic ops. 1844-- SSE / AVX FP arithmetic ops.
1467for name,n in pairs{ sqrt = 1, add = 8, mul = 9, 1845for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
1468 sub = 12, min = 13, div = 14, max = 15 } do 1846 sub = 12, min = 13, div = 14, max = 15 } do
1469 map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) 1847 map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
1470 map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) 1848 map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
1471 map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) 1849 map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
1472 map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) 1850 map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
1851 if n ~= 1 then
1852 map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n)
1853 map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n)
1854 map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n)
1855 map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n)
1856 end
1857end
1858
1859-- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf).
1860for name,n in pairs{
1861 paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4,
1862 paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B,
1863 packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC,
1864 paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0,
1865 pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76,
1866 pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66,
1867 pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE,
1868 pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA,
1869 pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5,
1870 pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8,
1871 psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8,
1872 psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9,
1873 punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A,
1874 punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61,
1875 punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF
1876} do
1877 map_op[name.."_2"] = format("rmo:660F%02XrM", n)
1878 map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n)
1473end 1879end
1474 1880
1475------------------------------------------------------------------------------ 1881------------------------------------------------------------------------------
1476 1882
1883local map_vexarg = { u = false, v = 1, V = 2, w = 3 }
1884
1477-- Process pattern string. 1885-- Process pattern string.
1478local function dopattern(pat, args, sz, op, needrex) 1886local function dopattern(pat, args, sz, op, needrex)
1479 local digit, addin 1887 local digit, addin, vex
1480 local opcode = 0 1888 local opcode = 0
1481 local szov = sz 1889 local szov = sz
1482 local narg = 1 1890 local narg = 1
1483 local rex = 0 1891 local rex = 0
1484 1892
1485 -- Limit number of section buffer positions used by a single dasm_put(). 1893 -- Limit number of section buffer positions used by a single dasm_put().
1486 -- A single opcode needs a maximum of 5 positions. 1894 -- A single opcode needs a maximum of 6 positions.
1487 if secpos+5 > maxsecpos then wflush() end 1895 if secpos+6 > maxsecpos then wflush() end
1488 1896
1489 -- Process each character. 1897 -- Process each character.
1490 for c in gmatch(pat.."|", ".") do 1898 for c in gmatch(pat.."|", ".") do
@@ -1498,6 +1906,8 @@ local function dopattern(pat, args, sz, op, needrex)
1498 szov = nil 1906 szov = nil
1499 elseif c == "X" then -- Force REX.W. 1907 elseif c == "X" then -- Force REX.W.
1500 rex = 8 1908 rex = 8
1909 elseif c == "L" then -- Force VEX.L.
1910 vex.l = true
1501 elseif c == "r" then -- Merge 1st operand regno. into opcode. 1911 elseif c == "r" then -- Merge 1st operand regno. into opcode.
1502 addin = args[1]; opcode = opcode + (addin.reg % 8) 1912 addin = args[1]; opcode = opcode + (addin.reg % 8)
1503 if narg < 2 then narg = 2 end 1913 if narg < 2 then narg = 2 end
@@ -1521,21 +1931,42 @@ local function dopattern(pat, args, sz, op, needrex)
1521 if t.xreg and t.xreg > 7 then rex = rex + 2 end 1931 if t.xreg and t.xreg > 7 then rex = rex + 2 end
1522 if s > 7 then rex = rex + 4 end 1932 if s > 7 then rex = rex + 4 end
1523 if needrex then rex = rex + 16 end 1933 if needrex then rex = rex + 16 end
1524 wputop(szov, opcode, rex); opcode = nil 1934 local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg)
1935 opcode = nil
1525 local imark = sub(pat, -1) -- Force a mark (ugly). 1936 local imark = sub(pat, -1) -- Force a mark (ugly).
1526 -- Put ModRM/SIB with regno/last digit as spare. 1937 -- Put ModRM/SIB with regno/last digit as spare.
1527 wputmrmsib(t, imark, s, addin and addin.vreg) 1938 wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk)
1528 addin = nil 1939 addin = nil
1940 elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
1941 local b = band(opcode, 255); opcode = shr(opcode, 8)
1942 local m = 1
1943 if b == 0x38 then m = 2
1944 elseif b == 0x3a then m = 3 end
1945 if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end
1946 if b ~= 0x0f then
1947 werror("expected `0F', `0F38', or `0F3A' to precede `"..c..
1948 "' in pattern `"..pat.."' for `"..op.."'")
1949 end
1950 local v = map_vexarg[c]
1951 if v then v = remove(args, v) end
1952 b = band(opcode, 255)
1953 local p = 0
1954 if b == 0x66 then p = 1
1955 elseif b == 0xf3 then p = 2
1956 elseif b == 0xf2 then p = 3 end
1957 if p ~= 0 then opcode = shr(opcode, 8) end
1958 if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end
1959 vex = { m = m, p = p, v = v }
1529 else 1960 else
1530 if opcode then -- Flush opcode. 1961 if opcode then -- Flush opcode.
1531 if szov == "q" and rex == 0 then rex = rex + 8 end 1962 if szov == "q" and rex == 0 then rex = rex + 8 end
1532 if needrex then rex = rex + 16 end 1963 if needrex then rex = rex + 16 end
1533 if addin and addin.reg == -1 then 1964 if addin and addin.reg == -1 then
1534 wputop(szov, opcode - 7, rex) 1965 local psz, sk = wputop(szov, opcode - 7, rex, vex, true)
1535 waction("VREG", addin.vreg); wputxb(0) 1966 wvreg("opcode", addin.vreg, psz, sk)
1536 else 1967 else
1537 if addin and addin.reg > 7 then rex = rex + 1 end 1968 if addin and addin.reg > 7 then rex = rex + 1 end
1538 wputop(szov, opcode, rex) 1969 wputop(szov, opcode, rex, vex)
1539 end 1970 end
1540 opcode = nil 1971 opcode = nil
1541 end 1972 end
@@ -1549,7 +1980,7 @@ local function dopattern(pat, args, sz, op, needrex)
1549 local a = args[narg] 1980 local a = args[narg]
1550 narg = narg + 1 1981 narg = narg + 1
1551 local mode, imm = a.mode, a.imm 1982 local mode, imm = a.mode, a.imm
1552 if mode == "iJ" and not match("iIJ", c) then 1983 if mode == "iJ" and not match(x64 and "J" or "iIJ", c) then
1553 werror("bad operand size for label") 1984 werror("bad operand size for label")
1554 end 1985 end
1555 if c == "S" then 1986 if c == "S" then
@@ -1572,6 +2003,14 @@ local function dopattern(pat, args, sz, op, needrex)
1572 else 2003 else
1573 wputlabel("REL_", imm, 2) 2004 wputlabel("REL_", imm, 2)
1574 end 2005 end
2006 elseif c == "s" then
2007 local reg = a.reg
2008 if reg < 0 then
2009 wputb(0)
2010 wvreg("imm.hi", a.vreg)
2011 else
2012 wputb(shl(reg, 4))
2013 end
1575 else 2014 else
1576 werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") 2015 werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
1577 end 2016 end
@@ -1648,11 +2087,14 @@ map_op[".template__"] = function(params, template, nparams)
1648 if pat == "" then pat = lastpat else lastpat = pat end 2087 if pat == "" then pat = lastpat else lastpat = pat end
1649 if matchtm(tm, args) then 2088 if matchtm(tm, args) then
1650 local prefix = sub(szm, 1, 1) 2089 local prefix = sub(szm, 1, 1)
1651 if prefix == "/" then -- Match both operand sizes. 2090 if prefix == "/" then -- Exactly match leading operand sizes.
1652 if args[1].opsize == sub(szm, 2, 2) and 2091 for i = #szm,1,-1 do
1653 args[2].opsize == sub(szm, 3, 3) then 2092 if i == 1 then
1654 dopattern(pat, args, sz, params.op, needrex) -- Process pattern. 2093 dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
1655 return 2094 return
2095 elseif args[i-1].opsize ~= sub(szm, i, i) then
2096 break
2097 end
1656 end 2098 end
1657 else -- Match common operand size. 2099 else -- Match common operand size.
1658 local szp = sz 2100 local szp = sz
@@ -1717,8 +2159,8 @@ if x64 then
1717 rex = a.reg > 7 and 9 or 8 2159 rex = a.reg > 7 and 9 or 8
1718 end 2160 end
1719 end 2161 end
1720 wputop(sz, opcode, rex) 2162 local psz, sk = wputop(sz, opcode, rex, nil, vreg)
1721 if vreg then waction("VREG", vreg); wputxb(0) end 2163 wvreg("opcode", vreg, psz, sk)
1722 waction("IMM_D", format("(unsigned int)(%s)", op64)) 2164 waction("IMM_D", format("(unsigned int)(%s)", op64))
1723 waction("IMM_D", format("(unsigned int)((%s)>>32)", op64)) 2165 waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
1724 end 2166 end
@@ -1730,14 +2172,16 @@ end
1730local function op_data(params) 2172local function op_data(params)
1731 if not params then return "imm..." end 2173 if not params then return "imm..." end
1732 local sz = sub(params.op, 2, 2) 2174 local sz = sub(params.op, 2, 2)
1733 if sz == "a" then sz = addrsize end 2175 if sz == "l" then sz = "d" elseif sz == "a" then sz = addrsize end
1734 for _,p in ipairs(params) do 2176 for _,p in ipairs(params) do
1735 local a = parseoperand(p) 2177 local a = parseoperand(p, sz == "q")
1736 if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then 2178 if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then
1737 werror("bad mode or size in `"..p.."'") 2179 werror("bad mode or size in `"..p.."'")
1738 end 2180 end
1739 if a.mode == "iJ" then 2181 if a.mode == "iJ" then
1740 wputlabel("IMM_", a.imm, 1) 2182 wputlabel("IMM_", a.imm, 1)
2183 elseif sz == "q" then
2184 wputqarg(a.imm)
1741 else 2185 else
1742 wputszarg(sz, a.imm) 2186 wputszarg(sz, a.imm)
1743 end 2187 end
@@ -1749,7 +2193,11 @@ map_op[".byte_*"] = op_data
1749map_op[".sbyte_*"] = op_data 2193map_op[".sbyte_*"] = op_data
1750map_op[".word_*"] = op_data 2194map_op[".word_*"] = op_data
1751map_op[".dword_*"] = op_data 2195map_op[".dword_*"] = op_data
2196map_op[".qword_*"] = op_data
1752map_op[".aword_*"] = op_data 2197map_op[".aword_*"] = op_data
2198map_op[".long_*"] = op_data
2199map_op[".quad_*"] = op_data
2200map_op[".addr_*"] = op_data
1753 2201
1754------------------------------------------------------------------------------ 2202------------------------------------------------------------------------------
1755 2203
diff --git a/dynasm/dynasm.lua b/dynasm/dynasm.lua
index 068efe2a..0d15a872 100644
--- a/dynasm/dynasm.lua
+++ b/dynasm/dynasm.lua
@@ -10,9 +10,9 @@
10local _info = { 10local _info = {
11 name = "DynASM", 11 name = "DynASM",
12 description = "A dynamic assembler for code generation engines", 12 description = "A dynamic assembler for code generation engines",
13 version = "1.3.0", 13 version = "1.5.0",
14 vernum = 10300, 14 vernum = 10500,
15 release = "2011-05-05", 15 release = "2021-05-02",
16 author = "Mike Pall", 16 author = "Mike Pall",
17 url = "https://luajit.org/dynasm.html", 17 url = "https://luajit.org/dynasm.html",
18 license = "MIT", 18 license = "MIT",
@@ -75,7 +75,7 @@ local function wline(line, needindent)
75 g_synclineno = g_synclineno + 1 75 g_synclineno = g_synclineno + 1
76end 76end
77 77
78-- Write assembler line as a comment, if requestd. 78-- Write assembler line as a comment, if requested.
79local function wcomment(aline) 79local function wcomment(aline)
80 if g_opt.comment then 80 if g_opt.comment then
81 wline(g_opt.comment..aline..g_opt.endcomment, true) 81 wline(g_opt.comment..aline..g_opt.endcomment, true)
@@ -630,6 +630,7 @@ end
630-- Load architecture-specific module. 630-- Load architecture-specific module.
631local function loadarch(arch) 631local function loadarch(arch)
632 if not match(arch, "^[%w_]+$") then return "bad arch name" end 632 if not match(arch, "^[%w_]+$") then return "bad arch name" end
633 _G._map_def = map_def
633 local ok, m_arch = pcall(require, "dasm_"..arch) 634 local ok, m_arch = pcall(require, "dasm_"..arch)
634 if not ok then return "cannot load module: "..m_arch end 635 if not ok then return "cannot load module: "..m_arch end
635 g_arch = m_arch 636 g_arch = m_arch
diff --git a/etc/luajit.pc b/etc/luajit.pc
index 68f8d5cc..96433008 100644
--- a/etc/luajit.pc
+++ b/etc/luajit.pc
@@ -1,6 +1,6 @@
1# Package information for LuaJIT to be used by pkg-config. 1# Package information for LuaJIT to be used by pkg-config.
2majver=2 2majver=2
3minver=0 3minver=1
4relver=ROLLING 4relver=ROLLING
5version=${majver}.${minver}.${relver} 5version=${majver}.${minver}.${relver}
6abiver=5.1 6abiver=5.1
diff --git a/src/.gitignore b/src/.gitignore
index 19f2a00e..736a729a 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -6,4 +6,4 @@ lj_ffdef.h
6lj_libdef.h 6lj_libdef.h
7lj_recdef.h 7lj_recdef.h
8lj_folddef.h 8lj_folddef.h
9lj_vm.s 9lj_vm.[sS]
diff --git a/src/Makefile b/src/Makefile
index a83b8629..99642be7 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -11,7 +11,7 @@
11############################################################################## 11##############################################################################
12 12
13MAJVER= 2 13MAJVER= 2
14MINVER= 0 14MINVER= 1
15ABIVER= 5.1 15ABIVER= 5.1
16NODOTABIVER= 51 16NODOTABIVER= 51
17 17
@@ -43,17 +43,14 @@ CCOPT= -O2 -fomit-frame-pointer
43# 43#
44# Target-specific compiler options: 44# Target-specific compiler options:
45# 45#
46# x86 only: it's recommended to compile at least for i686. Better yet,
47# compile for an architecture that has SSE2, too (-msse -msse2).
48#
49# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute 46# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute
50# the binaries to a different machine you could also use: -march=native 47# the binaries to a different machine you could also use: -march=native
51# 48#
52CCOPT_x86= -march=i686 49CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
53CCOPT_x64= 50CCOPT_x64=
54CCOPT_arm= 51CCOPT_arm=
52CCOPT_arm64=
55CCOPT_ppc= 53CCOPT_ppc=
56CCOPT_ppcspe=
57CCOPT_mips= 54CCOPT_mips=
58# 55#
59CCDEBUG= 56CCDEBUG=
@@ -112,6 +109,9 @@ XCFLAGS=
112#XCFLAGS+= -DLUAJIT_NUMMODE=1 109#XCFLAGS+= -DLUAJIT_NUMMODE=1
113#XCFLAGS+= -DLUAJIT_NUMMODE=2 110#XCFLAGS+= -DLUAJIT_NUMMODE=2
114# 111#
112# Disable LJ_GC64 mode for x64.
113#XCFLAGS+= -DLUAJIT_DISABLE_GC64
114#
115############################################################################## 115##############################################################################
116 116
117############################################################################## 117##############################################################################
@@ -123,15 +123,14 @@ XCFLAGS=
123# 123#
124# Use the system provided memory allocator (realloc) instead of the 124# Use the system provided memory allocator (realloc) instead of the
125# bundled memory allocator. This is slower, but sometimes helpful for 125# bundled memory allocator. This is slower, but sometimes helpful for
126# debugging. This option cannot be enabled on x64, since realloc usually 126# debugging. This option cannot be enabled on x64 without GC64, since
127# doesn't return addresses in the right address range. 127# realloc usually doesn't return addresses in the right address range.
128# OTOH this option is mandatory for Valgrind's memcheck tool on x64 and 128# OTOH this option is mandatory for Valgrind's memcheck tool on x64 and
129# the only way to get useful results from it for all other architectures. 129# the only way to get useful results from it for all other architectures.
130#XCFLAGS+= -DLUAJIT_USE_SYSMALLOC 130#XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
131# 131#
132# This define is required to run LuaJIT under Valgrind. The Valgrind 132# This define is required to run LuaJIT under Valgrind. The Valgrind
133# header files must be installed. You should enable debug information, too. 133# header files must be installed. You should enable debug information, too.
134# Use --suppressions=lj.supp to avoid some false positives.
135#XCFLAGS+= -DLUAJIT_USE_VALGRIND 134#XCFLAGS+= -DLUAJIT_USE_VALGRIND
136# 135#
137# This is the client for the GDB JIT API. GDB 7.0 or higher is required 136# This is the client for the GDB JIT API. GDB 7.0 or higher is required
@@ -188,7 +187,8 @@ endif
188# make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows 187# make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows
189# make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- 188# make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
190 189
191CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS) 190ASOPTIONS= $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
191CCOPTIONS= $(CCDEBUG) $(ASOPTIONS)
192LDOPTIONS= $(CCDEBUG) $(LDFLAGS) 192LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
193 193
194HOST_CC= $(CC) 194HOST_CC= $(CC)
@@ -228,6 +228,7 @@ TARGET_XLDFLAGS=
228TARGET_XLIBS= -lm 228TARGET_XLIBS= -lm
229TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) 229TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
230TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) 230TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
231TARGET_ASFLAGS= $(ASOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
231TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS) 232TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS)
232TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS) 233TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
233TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS) 234TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
@@ -242,17 +243,29 @@ else
242ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) 243ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
243 TARGET_LJARCH= arm 244 TARGET_LJARCH= arm
244else 245else
246ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
247 ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
248 TARGET_ARCH= -D__AARCH64EB__=1
249 endif
250 TARGET_LJARCH= arm64
251else
245ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH))) 252ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH)))
253 ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH)))
254 TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE
255 else
256 TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_BE
257 endif
246 TARGET_LJARCH= ppc 258 TARGET_LJARCH= ppc
247else 259else
248ifneq (,$(findstring LJ_TARGET_PPCSPE ,$(TARGET_TESTARCH)))
249 TARGET_LJARCH= ppcspe
250else
251ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) 260ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
252 ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH))) 261 ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH)))
253 TARGET_ARCH= -D__MIPSEL__=1 262 TARGET_ARCH= -D__MIPSEL__=1
254 endif 263 endif
255 TARGET_LJARCH= mips 264 ifneq (,$(findstring LJ_TARGET_MIPS64 ,$(TARGET_TESTARCH)))
265 TARGET_LJARCH= mips64
266 else
267 TARGET_LJARCH= mips
268 endif
256else 269else
257 $(error Unsupported target architecture) 270 $(error Unsupported target architecture)
258endif 271endif
@@ -266,6 +279,7 @@ ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
266 TARGET_SYS= PS3 279 TARGET_SYS= PS3
267 TARGET_ARCH+= -D__CELLOS_LV2__ 280 TARGET_ARCH+= -D__CELLOS_LV2__
268 TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC 281 TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
282 TARGET_XLIBS+= -lpthread
269endif 283endif
270 284
271TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH)) 285TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH))
@@ -285,6 +299,9 @@ endif
285ifneq (,$(LMULTILIB)) 299ifneq (,$(LMULTILIB))
286 TARGET_XCFLAGS+= -DLUA_LMULTILIB=\"$(LMULTILIB)\" 300 TARGET_XCFLAGS+= -DLUA_LMULTILIB=\"$(LMULTILIB)\"
287endif 301endif
302ifneq (,$(INSTALL_LJLIBD))
303 TARGET_XCFLAGS+= -DLUA_LJDIR=\"$(INSTALL_LJLIBD)\"
304endif
288 305
289############################################################################## 306##############################################################################
290# Target system detection. 307# Target system detection.
@@ -305,20 +322,27 @@ ifeq (Darwin,$(TARGET_SYS))
305 $(error missing: export MACOSX_DEPLOYMENT_TARGET=XX.YY) 322 $(error missing: export MACOSX_DEPLOYMENT_TARGET=XX.YY)
306 endif 323 endif
307 TARGET_STRIP+= -x 324 TARGET_STRIP+= -x
308 TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC 325 TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
326 TARGET_XSHLDFLAGS= -dynamiclib -undefined dynamic_lookup -fPIC
309 TARGET_DYNXLDOPTS= 327 TARGET_DYNXLDOPTS=
310 TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255 328 TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255
311 ifeq (x64,$(TARGET_LJARCH))
312 TARGET_XLDFLAGS+= -pagezero_size 10000 -image_base 100000000
313 TARGET_XSHLDFLAGS+= -image_base 7fff04c4a000
314 endif
315else 329else
316ifeq (iOS,$(TARGET_SYS)) 330ifeq (iOS,$(TARGET_SYS))
317 TARGET_STRIP+= -x 331 TARGET_STRIP+= -x
318 TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC 332 TARGET_XSHLDFLAGS= -dynamiclib -undefined dynamic_lookup -fPIC
319 TARGET_DYNXLDOPTS= 333 TARGET_DYNXLDOPTS=
320 TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255 334 TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255
335 ifeq (arm64,$(TARGET_LJARCH))
336 TARGET_XCFLAGS+= -fno-omit-frame-pointer
337 endif
321else 338else
339 ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
340 # Find out whether the target toolchain always generates unwind tables.
341 TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && { grep -qa -e eh_frame -e __unwind_info tmpunwind.o || grep -qU -e eh_frame -e __unwind_info tmpunwind.o; } && echo E; rm -f tmpunwind.o)
342 ifneq (,$(findstring E,$(TARGET_TESTUNWIND)))
343 TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
344 endif
345 endif
322 ifneq (SunOS,$(TARGET_SYS)) 346 ifneq (SunOS,$(TARGET_SYS))
323 ifneq (PS3,$(TARGET_SYS)) 347 ifneq (PS3,$(TARGET_SYS))
324 TARGET_XLDFLAGS+= -Wl,-E 348 TARGET_XLDFLAGS+= -Wl,-E
@@ -345,7 +369,7 @@ ifneq ($(HOST_SYS),$(TARGET_SYS))
345 HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX 369 HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX
346 else 370 else
347 ifeq (iOS,$(TARGET_SYS)) 371 ifeq (iOS,$(TARGET_SYS))
348 HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX 372 HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX -DTARGET_OS_IPHONE=1
349 else 373 else
350 HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OTHER 374 HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OTHER
351 endif 375 endif
@@ -379,6 +403,11 @@ DASM_XFLAGS=
379DASM_AFLAGS= 403DASM_AFLAGS=
380DASM_ARCH= $(TARGET_LJARCH) 404DASM_ARCH= $(TARGET_LJARCH)
381 405
406ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH)))
407 DASM_AFLAGS+= -D ENDIAN_LE
408else
409 DASM_AFLAGS+= -D ENDIAN_BE
410endif
382ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH))) 411ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH)))
383 DASM_AFLAGS+= -D P64 412 DASM_AFLAGS+= -D P64
384endif 413endif
@@ -407,23 +436,27 @@ ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
407 DASM_AFLAGS+= -D NO_UNWIND 436 DASM_AFLAGS+= -D NO_UNWIND
408 TARGET_ARCH+= -DLUAJIT_NO_UNWIND 437 TARGET_ARCH+= -DLUAJIT_NO_UNWIND
409endif 438endif
439ifneq (,$(findstring LJ_ABI_PAUTH 1,$(TARGET_TESTARCH)))
440 DASM_AFLAGS+= -D PAUTH
441 TARGET_ARCH+= -DLJ_ABI_PAUTH=1
442endif
410DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH)))) 443DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH))))
411ifeq (Windows,$(TARGET_SYS)) 444ifeq (Windows,$(TARGET_SYS))
412 DASM_AFLAGS+= -D WIN 445 DASM_AFLAGS+= -D WIN
413endif 446endif
414ifeq (x86,$(TARGET_LJARCH))
415 ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH)))
416 DASM_AFLAGS+= -D SSE
417 endif
418else
419ifeq (x64,$(TARGET_LJARCH)) 447ifeq (x64,$(TARGET_LJARCH))
420 DASM_ARCH= x86 448 ifeq (,$(findstring LJ_FR2 1,$(TARGET_TESTARCH)))
449 DASM_ARCH= x86
450 endif
421else 451else
422ifeq (arm,$(TARGET_LJARCH)) 452ifeq (arm,$(TARGET_LJARCH))
423 ifeq (iOS,$(TARGET_SYS)) 453 ifeq (iOS,$(TARGET_SYS))
424 DASM_AFLAGS+= -D IOS 454 DASM_AFLAGS+= -D IOS
425 endif 455 endif
426else 456else
457ifneq (,$(findstring LJ_TARGET_MIPSR6 ,$(TARGET_TESTARCH)))
458 DASM_AFLAGS+= -D MIPSR6
459endif
427ifeq (ppc,$(TARGET_LJARCH)) 460ifeq (ppc,$(TARGET_LJARCH))
428 ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH))) 461 ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH)))
429 DASM_AFLAGS+= -D SQRT 462 DASM_AFLAGS+= -D SQRT
@@ -431,7 +464,7 @@ ifeq (ppc,$(TARGET_LJARCH))
431 ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH))) 464 ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH)))
432 DASM_AFLAGS+= -D ROUND 465 DASM_AFLAGS+= -D ROUND
433 endif 466 endif
434 ifneq (,$(findstring LJ_ARCH_PPC64 1,$(TARGET_TESTARCH))) 467 ifneq (,$(findstring LJ_ARCH_PPC32ON64 1,$(TARGET_TESTARCH)))
435 DASM_AFLAGS+= -D GPR64 468 DASM_AFLAGS+= -D GPR64
436 endif 469 endif
437 ifeq (PS3,$(TARGET_SYS)) 470 ifeq (PS3,$(TARGET_SYS))
@@ -440,7 +473,6 @@ ifeq (ppc,$(TARGET_LJARCH))
440endif 473endif
441endif 474endif
442endif 475endif
443endif
444 476
445DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) 477DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
446DASM_DASC= vm_$(DASM_ARCH).dasc 478DASM_DASC= vm_$(DASM_ARCH).dasc
@@ -461,19 +493,22 @@ BUILDVM_X= $(BUILDVM_T)
461HOST_O= $(MINILUA_O) $(BUILDVM_O) 493HOST_O= $(MINILUA_O) $(BUILDVM_O)
462HOST_T= $(MINILUA_T) $(BUILDVM_T) 494HOST_T= $(MINILUA_T) $(BUILDVM_T)
463 495
464LJVM_S= lj_vm.s 496LJVM_S= lj_vm.S
465LJVM_O= lj_vm.o 497LJVM_O= lj_vm.o
466LJVM_BOUT= $(LJVM_S) 498LJVM_BOUT= $(LJVM_S)
467LJVM_MODE= elfasm 499LJVM_MODE= elfasm
468 500
469LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \ 501LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
470 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o 502 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o \
503 lib_buffer.o
471LJLIB_C= $(LJLIB_O:.o=.c) 504LJLIB_C= $(LJLIB_O:.o=.c)
472 505
473LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \ 506LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
474 lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ 507 lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
475 lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ 508 lj_prng.o lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o \
476 lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ 509 lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_serialize.o \
510 lj_api.o lj_profile.o \
511 lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
477 lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ 512 lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
478 lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ 513 lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
479 lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ 514 lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
@@ -588,12 +623,15 @@ E= @echo
588default all: $(TARGET_T) 623default all: $(TARGET_T)
589 624
590amalg: 625amalg:
591 @grep "^[+|]" ljamalg.c
592 $(MAKE) all "LJCORE_O=ljamalg.o" 626 $(MAKE) all "LJCORE_O=ljamalg.o"
593 627
594clean: 628clean:
595 $(HOST_RM) $(ALL_RM) 629 $(HOST_RM) $(ALL_RM)
596 630
631libbc:
632 ./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C)
633 $(MAKE) all
634
597depend: 635depend:
598 @for file in $(ALL_HDRGEN); do \ 636 @for file in $(ALL_HDRGEN); do \
599 test -f $$file || touch $$file; \ 637 test -f $$file || touch $$file; \
@@ -608,7 +646,7 @@ depend:
608 test -s $$file || $(HOST_RM) $$file; \ 646 test -s $$file || $(HOST_RM) $$file; \
609 done 647 done
610 648
611.PHONY: default all amalg clean depend 649.PHONY: default all amalg clean libbc depend
612 650
613############################################################################## 651##############################################################################
614# Rules for generated files. 652# Rules for generated files.
@@ -670,10 +708,10 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c
670 $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< 708 $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
671 $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< 709 $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
672 710
673%.o: %.s 711%.o: %.S
674 $(E) "ASM $@" 712 $(E) "ASM $@"
675 $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< 713 $(Q)$(TARGET_DYNCC) $(TARGET_ASFLAGS) -c -o $(@:.o=_dyn.o) $<
676 $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< 714 $(Q)$(TARGET_CC) $(TARGET_ASFLAGS) -c -o $@ $<
677 715
678$(LUAJIT_O): 716$(LUAJIT_O):
679 $(E) "CC $@" 717 $(E) "CC $@"
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 9e14d617..7b534b05 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -1,66 +1,80 @@
1lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ 1lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
2 lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \ 2 lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \
3 lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h 3 lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_vmevent.h
4lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 4lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
5 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \ 5 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h \
6 lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \ 6 lj_str.h lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \
7 lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ 7 lj_cconv.h lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h \
8 lj_lib.h lj_libdef.h 8 lj_strscan.h lj_strfmt.h lj_lib.h lj_libdef.h
9lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 9lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
10 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h 10 lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \
11 lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \
12 lj_ffdef.h lj_lib.h lj_libdef.h
13lib_buffer.o: lib_buffer.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
14 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
15 lj_tab.h lj_udata.h lj_meta.h lj_ctype.h lj_cdata.h lj_cconv.h \
16 lj_strfmt.h lj_serialize.h lj_lib.h lj_libdef.h
11lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 17lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
12 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \ 18 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
13 lj_libdef.h 19 lj_libdef.h
14lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 20lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
15 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \ 21 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \
16 lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \ 22 lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \
17 lj_ccallback.h lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h 23 lj_ccallback.h lj_clib.h lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h \
24 lj_libdef.h
18lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h 25lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
19lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 26lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
20 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_ff.h \ 27 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \
21 lj_ffdef.h lj_lib.h lj_libdef.h 28 lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
22lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \ 29lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
23 lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \ 30 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
24 lj_bc.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_target.h \ 31 lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
25 lj_target_*.h lj_dispatch.h lj_vm.h lj_vmevent.h lj_lib.h luajit.h \ 32 lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \
26 lj_libdef.h 33 lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h
27lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 34lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
28 lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h 35 lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_vm.h lj_prng.h \
36 lj_libdef.h
29lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 37lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
30 lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h 38 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \
39 lj_libdef.h
31lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 40lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
32 lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h 41 lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h
33lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 42lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
34 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \ 43 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
35 lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h lj_char.h \ 44 lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \
36 lj_lib.h lj_libdef.h 45 lj_char.h lj_strfmt.h lj_lib.h lj_libdef.h
37lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 46lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
38 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_lib.h \ 47 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
39 lj_libdef.h 48 lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
40lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h 49lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h \
50 lj_prng.h
41lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 51lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
42 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ 52 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
43 lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \ 53 lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \
44 lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h 54 lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h
45lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 55lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
46 lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \ 56 lj_buf.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h \
47 lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \ 57 lj_jit.h lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \
48 lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h lj_emit_*.h \ 58 lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h \
49 lj_asm_*.h 59 lj_prng.h lj_emit_*.h lj_asm_*.h
60lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
50lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ 61lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
51 lj_bcdef.h 62 lj_bcdef.h
52lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 63lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
53 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_bc.h lj_ctype.h \ 64 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_bc.h \
54 lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h 65 lj_ctype.h lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h \
66 lj_strfmt.h
55lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 67lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
56 lj_gc.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h lj_ir.h \ 68 lj_gc.h lj_buf.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h \
57 lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h 69 lj_ir.h lj_strfmt.h lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h
70lj_buf.o: lj_buf.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
71 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_strfmt.h
58lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 72lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
59 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ctype.h lj_cconv.h \ 73 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ir.h lj_ctype.h \
60 lj_cdata.h lj_carith.h 74 lj_cconv.h lj_cdata.h lj_carith.h lj_strscan.h
61lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 75lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
62 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \ 76 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h \
63 lj_cdata.h lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ 77 lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
64 lj_traceerr.h 78 lj_traceerr.h
65lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \ 79lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
66 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \ 80 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \
@@ -68,110 +82,127 @@ lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
68 lj_target_*.h lj_mcode.h lj_jit.h lj_ir.h lj_trace.h lj_dispatch.h \ 82 lj_target_*.h lj_mcode.h lj_jit.h lj_ir.h lj_trace.h lj_dispatch.h \
69 lj_traceerr.h lj_vm.h 83 lj_traceerr.h lj_vm.h
70lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 84lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
71 lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \ 85 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ctype.h \
72 lj_ccallback.h 86 lj_cdata.h lj_cconv.h lj_ccallback.h
73lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 87lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
74 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \ 88 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h
75 lj_cdata.h
76lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h 89lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h
77lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 90lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
78 lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \ 91 lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \
79 lj_cdata.h lj_clib.h 92 lj_cdata.h lj_clib.h lj_strfmt.h
80lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 93lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
81 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_cparse.h lj_frame.h \ 94 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_ctype.h lj_cparse.h \
82 lj_bc.h lj_vm.h lj_char.h lj_strscan.h 95 lj_frame.h lj_bc.h lj_vm.h lj_char.h lj_strscan.h lj_strfmt.h
83lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 96lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
84 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \ 97 lj_err.h lj_errmsg.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_gc.h \
85 lj_gc.h lj_cdata.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ff.h \ 98 lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \
86 lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 99 lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
87 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \ 100 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \
88 lj_crecord.h 101 lj_crecord.h lj_strfmt.h lj_strscan.h
89lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 102lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
90 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_ccallback.h 103 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \
104 lj_ccallback.h lj_buf.h
91lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 105lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
92 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_state.h lj_frame.h \ 106 lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
93 lj_bc.h lj_vm.h lj_jit.h lj_ir.h 107 lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_jit.h lj_ir.h
94lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 108lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
95 lj_err.h lj_errmsg.h lj_func.h lj_str.h lj_tab.h lj_meta.h lj_debug.h \ 109 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_func.h lj_tab.h \
96 lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h lj_jit.h lj_ir.h \ 110 lj_meta.h lj_debug.h lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h \
97 lj_ccallback.h lj_ctype.h lj_gc.h lj_trace.h lj_dispatch.h lj_traceerr.h \ 111 lj_strfmt.h lj_jit.h lj_ir.h lj_ccallback.h lj_ctype.h lj_trace.h \
98 lj_vm.h luajit.h 112 lj_dispatch.h lj_traceerr.h lj_profile.h lj_vm.h luajit.h
99lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \ 113lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \
100 lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \ 114 lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \
101 lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ 115 lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
102 lj_traceerr.h lj_vm.h 116 lj_traceerr.h lj_vm.h lj_strfmt.h
103lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 117lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
104 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \ 118 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_frame.h \
105 lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 119 lj_bc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
106 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \ 120 lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h \
107 lj_vm.h lj_strscan.h lj_recdef.h 121 lj_crecord.h lj_vm.h lj_strscan.h lj_strfmt.h lj_serialize.h lj_recdef.h
108lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 122lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
109 lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ 123 lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
110 lj_traceerr.h lj_vm.h 124 lj_traceerr.h lj_vm.h
111lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 125lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
112 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \ 126 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
113 lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h \ 127 lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h \
114 lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h 128 lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_vmevent.h
115lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 129lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
116 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_jit.h \ 130 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \
117 lj_ir.h lj_dispatch.h 131 lj_str.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.h
118lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 132lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
119 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 133 lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
120 lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \ 134 lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \
121 lj_vm.h lj_strscan.h lj_lib.h 135 lj_carith.h lj_vm.h lj_strscan.h lj_serialize.h lj_strfmt.h lj_prng.h
122lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 136lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
123 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \ 137 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \
124 lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h 138 lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \
139 lj_strfmt.h
125lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ 140lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
126 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \ 141 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
127 lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lib.h 142 lj_dispatch.h lj_jit.h lj_ir.h lj_ctype.h lj_vm.h lj_strscan.h \
143 lj_strfmt.h lj_lex.h lj_bcdump.h lj_lib.h
128lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ 144lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
129 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_func.h lj_frame.h \ 145 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \
130 lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h 146 lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
131lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 147lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
132 lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \ 148 lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \
133 lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h 149 lj_dispatch.h lj_bc.h lj_traceerr.h lj_prng.h lj_vm.h
134lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 150lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
135 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ 151 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \
136 lj_vm.h lj_strscan.h 152 lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
137lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h 153lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
138lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 154lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
139 lj_ir.h lj_jit.h lj_iropt.h 155 lj_ir.h lj_jit.h lj_iropt.h
140lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 156lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
141 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ 157 lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h \
142 lj_bc.h lj_traceerr.h lj_ctype.h lj_gc.h lj_carith.h lj_vm.h \ 158 lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h \
143 lj_strscan.h lj_folddef.h 159 lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_folddef.h
144lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 160lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
145 lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ 161 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h lj_jit.h \
146 lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h 162 lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \
163 lj_vm.h
147lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 164lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
148 lj_tab.h lj_ir.h lj_jit.h lj_iropt.h 165 lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h lj_dispatch.h lj_bc.h
149lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ 166lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
150 lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ 167 lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
151 lj_traceerr.h lj_vm.h lj_strscan.h 168 lj_traceerr.h lj_vm.h lj_strscan.h
152lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 169lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
153 lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h 170 lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
154lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ 171lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
155 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \ 172 lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \
156 lj_iropt.h lj_vm.h 173 lj_jit.h lj_ircall.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h
157lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 174lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
158 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h \ 175 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
159 lj_state.h lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h 176 lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
177 lj_vm.h lj_vmevent.h
178lj_prng.o: lj_prng.c lj_def.h lua.h luaconf.h lj_arch.h lj_prng.h
179lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
180 lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \
181 lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h
160lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 182lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
161 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ 183 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
162 lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \ 184 lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \
163 lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h \ 185 lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \
164 lj_ffrecord.h lj_snap.h lj_vm.h 186 lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h lj_prng.h
187lj_serialize.o: lj_serialize.c lj_obj.h lua.h luaconf.h lj_def.h \
188 lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
189 lj_udata.h lj_ctype.h lj_cdata.h lj_ir.h lj_serialize.h
165lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 190lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
166 lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ 191 lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
167 lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ 192 lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
168 lj_target_*.h lj_ctype.h lj_cdata.h 193 lj_target_*.h lj_ctype.h lj_cdata.h
169lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 194lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
170 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \ 195 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \
171 lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \ 196 lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \
172 lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h 197 lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_prng.h lj_lex.h \
198 lj_alloc.h luajit.h
173lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 199lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
174 lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_char.h 200 lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_prng.h
201lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
202 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_meta.h lj_state.h \
203 lj_char.h lj_strfmt.h lj_ctype.h lj_lib.h
204lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \
205 lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h
175lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 206lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
176 lj_char.h lj_strscan.h 207 lj_char.h lj_strscan.h
177lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 208lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
@@ -180,35 +211,37 @@ lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
180 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_frame.h lj_bc.h \ 211 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_frame.h lj_bc.h \
181 lj_state.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \ 212 lj_state.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \
182 lj_dispatch.h lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h \ 213 lj_dispatch.h lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h \
183 lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h 214 lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h lj_prng.h
184lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 215lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
185 lj_gc.h lj_udata.h 216 lj_gc.h lj_err.h lj_errmsg.h lj_udata.h
186lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 217lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
187 lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \ 218 lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \
188 lj_vm.h lj_vmevent.h 219 lj_vm.h lj_vmevent.h
189lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 220lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
190 lj_ir.h lj_vm.h 221 lj_ir.h lj_vm.h
191ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ 222ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_assert.c lj_obj.h \
192 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \ 223 lj_def.h lj_arch.h lj_gc.c lj_gc.h lj_err.h lj_errmsg.h lj_buf.h \
193 lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h \ 224 lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h \
194 lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c \ 225 lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
195 lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h \ 226 lj_traceerr.h lj_vm.h lj_vmevent.h lj_err.c lj_debug.h lj_ff.h \
196 lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \ 227 lj_ffdef.h lj_strfmt.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h lj_obj.c \
197 lj_debug.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h \ 228 lj_buf.c lj_str.c lj_prng.h lj_tab.c lj_func.c lj_udata.c lj_meta.c \
198 luajit.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c \ 229 lj_strscan.h lj_lib.h lj_debug.c lj_prng.c lj_state.c lj_lex.h \
199 lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \ 230 lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h lj_profile.h \
200 lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \ 231 lj_vmevent.c lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c \
201 lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \ 232 lj_serialize.c lj_serialize.h lj_api.c lj_profile.c lj_lex.c lualib.h \
202 lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \ 233 lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \
203 lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h \ 234 lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \
204 lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \ 235 lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \
205 lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \ 236 lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \
206 lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ 237 lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \
207 lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ 238 lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \
208 lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ 239 lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \
209 lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ 240 lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \
210 lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \ 241 lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \
211 lib_init.c 242 lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \
243 lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \
244 lib_ffi.c lib_buffer.c lib_init.c
212luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h 245luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
213host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ 246host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
214 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ 247 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \
@@ -220,7 +253,8 @@ host/buildvm_asm.o: host/buildvm_asm.c host/buildvm.h lj_def.h lua.h luaconf.h \
220host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \ 253host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \
221 luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h 254 luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h
222host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \ 255host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \
223 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_lib.h lj_obj.h 256 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_bc.h lj_lib.h lj_obj.h \
257 host/buildvm_libbc.h
224host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \ 258host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \
225 luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h 259 luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h
226host/minilua.o: host/minilua.c 260host/minilua.o: host/minilua.c
diff --git a/src/host/buildvm.c b/src/host/buildvm.c
index 39c2bc24..ec99e501 100644
--- a/src/host/buildvm.c
+++ b/src/host/buildvm.c
@@ -18,8 +18,10 @@
18#include "lj_obj.h" 18#include "lj_obj.h"
19#include "lj_gc.h" 19#include "lj_gc.h"
20#include "lj_bc.h" 20#include "lj_bc.h"
21#if LJ_HASJIT
21#include "lj_ir.h" 22#include "lj_ir.h"
22#include "lj_ircall.h" 23#include "lj_ircall.h"
24#endif
23#include "lj_frame.h" 25#include "lj_frame.h"
24#include "lj_dispatch.h" 26#include "lj_dispatch.h"
25#if LJ_HASFFI 27#if LJ_HASFFI
@@ -59,10 +61,10 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
59#include "../dynasm/dasm_x86.h" 61#include "../dynasm/dasm_x86.h"
60#elif LJ_TARGET_ARM 62#elif LJ_TARGET_ARM
61#include "../dynasm/dasm_arm.h" 63#include "../dynasm/dasm_arm.h"
64#elif LJ_TARGET_ARM64
65#include "../dynasm/dasm_arm64.h"
62#elif LJ_TARGET_PPC 66#elif LJ_TARGET_PPC
63#include "../dynasm/dasm_ppc.h" 67#include "../dynasm/dasm_ppc.h"
64#elif LJ_TARGET_PPCSPE
65#include "../dynasm/dasm_ppc.h"
66#elif LJ_TARGET_MIPS 68#elif LJ_TARGET_MIPS
67#include "../dynasm/dasm_mips.h" 69#include "../dynasm/dasm_mips.h"
68#else 70#else
@@ -110,11 +112,11 @@ static const char *sym_decorate(BuildCtx *ctx,
110 if (p) { 112 if (p) {
111#if LJ_TARGET_X86ORX64 113#if LJ_TARGET_X86ORX64
112 if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj)) 114 if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj))
113 name[0] = '@'; 115 name[0] = name[1] == 'R' ? '_' : '@'; /* Just for _RtlUnwind@16. */
114 else 116 else
115 *p = '\0'; 117 *p = '\0';
116#elif (LJ_TARGET_PPC || LJ_TARGET_PPCSPE) && !LJ_TARGET_CONSOLE 118#elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE
117 /* Keep @plt. */ 119 /* Keep @plt etc. */
118#else 120#else
119 *p = '\0'; 121 *p = '\0';
120#endif 122#endif
@@ -179,6 +181,7 @@ static int build_code(BuildCtx *ctx)
179 ctx->nreloc = 0; 181 ctx->nreloc = 0;
180 182
181 ctx->globnames = globnames; 183 ctx->globnames = globnames;
184 ctx->extnames = extnames;
182 ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *)); 185 ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *));
183 ctx->nrelocsym = 0; 186 ctx->nrelocsym = 0;
184 for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1; 187 for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1;
@@ -249,6 +252,7 @@ BCDEF(BCNAME)
249 NULL 252 NULL
250}; 253};
251 254
255#if LJ_HASJIT
252const char *const ir_names[] = { 256const char *const ir_names[] = {
253#define IRNAME(name, m, m1, m2) #name, 257#define IRNAME(name, m, m1, m2) #name,
254IRDEF(IRNAME) 258IRDEF(IRNAME)
@@ -289,7 +293,9 @@ static const char *const trace_errors[] = {
289#include "lj_traceerr.h" 293#include "lj_traceerr.h"
290 NULL 294 NULL
291}; 295};
296#endif
292 297
298#if LJ_HASJIT
293static const char *lower(char *buf, const char *s) 299static const char *lower(char *buf, const char *s)
294{ 300{
295 char *p = buf; 301 char *p = buf;
@@ -300,6 +306,7 @@ static const char *lower(char *buf, const char *s)
300 *p = '\0'; 306 *p = '\0';
301 return buf; 307 return buf;
302} 308}
309#endif
303 310
304/* Emit C source code for bytecode-related definitions. */ 311/* Emit C source code for bytecode-related definitions. */
305static void emit_bcdef(BuildCtx *ctx) 312static void emit_bcdef(BuildCtx *ctx)
@@ -317,24 +324,27 @@ static void emit_bcdef(BuildCtx *ctx)
317/* Emit VM definitions as Lua code for debug modules. */ 324/* Emit VM definitions as Lua code for debug modules. */
318static void emit_vmdef(BuildCtx *ctx) 325static void emit_vmdef(BuildCtx *ctx)
319{ 326{
327#if LJ_HASJIT
320 char buf[80]; 328 char buf[80];
329#endif
321 int i; 330 int i;
322 fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n"); 331 fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
323 fprintf(ctx->fp, "assert(require(\"jit\").version == \"%s\", \"LuaJIT core/library version mismatch\")\n\n", LUAJIT_VERSION); 332 fprintf(ctx->fp, "assert(require(\"jit\").version == \"%s\", \"LuaJIT core/library version mismatch\")\n\n", LUAJIT_VERSION);
324 fprintf(ctx->fp, "module(...)\n\n"); 333 fprintf(ctx->fp, "return {\n\n");
325 334
326 fprintf(ctx->fp, "bcnames = \""); 335 fprintf(ctx->fp, "bcnames = \"");
327 for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]); 336 for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
328 fprintf(ctx->fp, "\"\n\n"); 337 fprintf(ctx->fp, "\",\n\n");
329 338
339#if LJ_HASJIT
330 fprintf(ctx->fp, "irnames = \""); 340 fprintf(ctx->fp, "irnames = \"");
331 for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]); 341 for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
332 fprintf(ctx->fp, "\"\n\n"); 342 fprintf(ctx->fp, "\",\n\n");
333 343
334 fprintf(ctx->fp, "irfpm = { [0]="); 344 fprintf(ctx->fp, "irfpm = { [0]=");
335 for (i = 0; irfpm_names[i]; i++) 345 for (i = 0; irfpm_names[i]; i++)
336 fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i])); 346 fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i]));
337 fprintf(ctx->fp, "}\n\n"); 347 fprintf(ctx->fp, "},\n\n");
338 348
339 fprintf(ctx->fp, "irfield = { [0]="); 349 fprintf(ctx->fp, "irfield = { [0]=");
340 for (i = 0; irfield_names[i]; i++) { 350 for (i = 0; irfield_names[i]; i++) {
@@ -344,17 +354,18 @@ static void emit_vmdef(BuildCtx *ctx)
344 if (p) *p = '.'; 354 if (p) *p = '.';
345 fprintf(ctx->fp, "\"%s\", ", buf); 355 fprintf(ctx->fp, "\"%s\", ", buf);
346 } 356 }
347 fprintf(ctx->fp, "}\n\n"); 357 fprintf(ctx->fp, "},\n\n");
348 358
349 fprintf(ctx->fp, "ircall = {\n[0]="); 359 fprintf(ctx->fp, "ircall = {\n[0]=");
350 for (i = 0; ircall_names[i]; i++) 360 for (i = 0; ircall_names[i]; i++)
351 fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]); 361 fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
352 fprintf(ctx->fp, "}\n\n"); 362 fprintf(ctx->fp, "},\n\n");
353 363
354 fprintf(ctx->fp, "traceerr = {\n[0]="); 364 fprintf(ctx->fp, "traceerr = {\n[0]=");
355 for (i = 0; trace_errors[i]; i++) 365 for (i = 0; trace_errors[i]; i++)
356 fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); 366 fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
357 fprintf(ctx->fp, "}\n\n"); 367 fprintf(ctx->fp, "},\n\n");
368#endif
358} 369}
359 370
360/* -- Argument parsing ---------------------------------------------------- */ 371/* -- Argument parsing ---------------------------------------------------- */
@@ -491,6 +502,7 @@ int main(int argc, char **argv)
491 case BUILD_vmdef: 502 case BUILD_vmdef:
492 emit_vmdef(ctx); 503 emit_vmdef(ctx);
493 emit_lib(ctx); 504 emit_lib(ctx);
505 fprintf(ctx->fp, "}\n\n");
494 break; 506 break;
495 case BUILD_ffdef: 507 case BUILD_ffdef:
496 case BUILD_libdef: 508 case BUILD_libdef:
diff --git a/src/host/buildvm.h b/src/host/buildvm.h
index ded45d94..add8ee15 100644
--- a/src/host/buildvm.h
+++ b/src/host/buildvm.h
@@ -82,6 +82,7 @@ typedef struct BuildCtx {
82 const char *beginsym; 82 const char *beginsym;
83 /* Strings generated by DynASM. */ 83 /* Strings generated by DynASM. */
84 const char *const *globnames; 84 const char *const *globnames;
85 const char *const *extnames;
85 const char *dasm_ident; 86 const char *dasm_ident;
86 const char *dasm_arch; 87 const char *dasm_arch;
87 /* Relocations. */ 88 /* Relocations. */
diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
index 458ce733..1b261206 100644
--- a/src/host/buildvm_asm.c
+++ b/src/host/buildvm_asm.c
@@ -51,8 +51,8 @@ static const char *const jccnames[] = {
51 "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg" 51 "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg"
52}; 52};
53 53
54/* Emit relocation for the incredibly stupid OSX assembler. */ 54/* Emit x86/x64 text relocations. */
55static void emit_asm_reloc_mach(BuildCtx *ctx, uint8_t *cp, int n, 55static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
56 const char *sym) 56 const char *sym)
57{ 57{
58 const char *opname = NULL; 58 const char *opname = NULL;
@@ -71,6 +71,20 @@ err:
71 exit(1); 71 exit(1);
72 } 72 }
73 emit_asm_bytes(ctx, cp, n); 73 emit_asm_bytes(ctx, cp, n);
74 if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) {
75 /* Various fixups for external symbols outside of our binary. */
76 if (ctx->mode == BUILD_elfasm) {
77 if (LJ_32)
78 fprintf(ctx->fp, "#if __PIC__\n\t%s lj_wrap_%s\n#else\n", opname, sym);
79 fprintf(ctx->fp, "\t%s %s@PLT\n", opname, sym);
80 if (LJ_32)
81 fprintf(ctx->fp, "#endif\n");
82 return;
83 } else if (LJ_32 && ctx->mode == BUILD_machasm) {
84 fprintf(ctx->fp, "\t%s L%s$stub\n", opname, sym);
85 return;
86 }
87 }
74 fprintf(ctx->fp, "\t%s %s\n", opname, sym); 88 fprintf(ctx->fp, "\t%s %s\n", opname, sym);
75} 89}
76#else 90#else
@@ -79,10 +93,14 @@ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n)
79{ 93{
80 int i; 94 int i;
81 for (i = 0; i < n; i += 4) { 95 for (i = 0; i < n; i += 4) {
96 uint32_t ins = *(uint32_t *)(p+i);
97#if LJ_TARGET_ARM64 && LJ_BE
98 ins = lj_bswap(ins); /* ARM64 instructions are always little-endian. */
99#endif
82 if ((i & 15) == 0) 100 if ((i & 15) == 0)
83 fprintf(ctx->fp, "\t.long 0x%08x", *(uint32_t *)(p+i)); 101 fprintf(ctx->fp, "\t.long 0x%08x", ins);
84 else 102 else
85 fprintf(ctx->fp, ",0x%08x", *(uint32_t *)(p+i)); 103 fprintf(ctx->fp, ",0x%08x", ins);
86 if ((i & 15) == 12) putc('\n', ctx->fp); 104 if ((i & 15) == 12) putc('\n', ctx->fp);
87 } 105 }
88 if ((n & 15) != 0) putc('\n', ctx->fp); 106 if ((n & 15) != 0) putc('\n', ctx->fp);
@@ -107,7 +125,16 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
107 ins, sym); 125 ins, sym);
108 exit(1); 126 exit(1);
109 } 127 }
110#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE 128#elif LJ_TARGET_ARM64
129 if ((ins >> 26) == 0x25u) {
130 fprintf(ctx->fp, "\tbl %s\n", sym);
131 } else {
132 fprintf(stderr,
133 "Error: unsupported opcode %08x for %s symbol relocation.\n",
134 ins, sym);
135 exit(1);
136 }
137#elif LJ_TARGET_PPC
111#if LJ_TARGET_PS3 138#if LJ_TARGET_PS3
112#define TOCPREFIX "." 139#define TOCPREFIX "."
113#else 140#else
@@ -216,6 +243,12 @@ void emit_asm(BuildCtx *ctx)
216 243
217 fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch); 244 fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
218 fprintf(ctx->fp, "\t.text\n"); 245 fprintf(ctx->fp, "\t.text\n");
246#if LJ_TARGET_MIPS32 && !LJ_ABI_SOFTFP
247 fprintf(ctx->fp, "\t.module fp=32\n");
248#endif
249#if LJ_TARGET_MIPS
250 fprintf(ctx->fp, "\t.set nomips16\n\t.abicalls\n\t.set noreorder\n\t.set nomacro\n");
251#endif
219 emit_asm_align(ctx, 4); 252 emit_asm_align(ctx, 4);
220 253
221#if LJ_TARGET_PS3 254#if LJ_TARGET_PS3
@@ -228,13 +261,19 @@ void emit_asm(BuildCtx *ctx)
228 261
229#if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND 262#if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND
230 /* This should really be moved into buildvm_arm.dasc. */ 263 /* This should really be moved into buildvm_arm.dasc. */
264#if LJ_ARCH_HASFPU
265 fprintf(ctx->fp,
266 ".fnstart\n"
267 ".save {r5, r6, r7, r8, r9, r10, r11, lr}\n"
268 ".vsave {d8-d15}\n"
269 ".save {r4}\n"
270 ".pad #28\n");
271#else
231 fprintf(ctx->fp, 272 fprintf(ctx->fp,
232 ".fnstart\n" 273 ".fnstart\n"
233 ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n" 274 ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n"
234 ".pad #28\n"); 275 ".pad #28\n");
235#endif 276#endif
236#if LJ_TARGET_MIPS
237 fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n");
238#endif 277#endif
239 278
240 for (i = rel = 0; i < ctx->nsym; i++) { 279 for (i = rel = 0; i < ctx->nsym; i++) {
@@ -255,8 +294,9 @@ void emit_asm(BuildCtx *ctx)
255 BuildReloc *r = &ctx->reloc[rel]; 294 BuildReloc *r = &ctx->reloc[rel];
256 int n = r->ofs - ofs; 295 int n = r->ofs - ofs;
257#if LJ_TARGET_X86ORX64 296#if LJ_TARGET_X86ORX64
258 if (ctx->mode == BUILD_machasm && r->type != 0) { 297 if (r->type != 0 &&
259 emit_asm_reloc_mach(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]); 298 (ctx->mode == BUILD_elfasm || ctx->mode == BUILD_machasm)) {
299 emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
260 } else { 300 } else {
261 emit_asm_bytes(ctx, ctx->code+ofs, n); 301 emit_asm_bytes(ctx, ctx->code+ofs, n);
262 emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]); 302 emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
@@ -290,10 +330,7 @@ void emit_asm(BuildCtx *ctx)
290#if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA) 330#if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
291 fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n"); 331 fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
292#endif 332#endif
293#if LJ_TARGET_PPCSPE 333#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP
294 /* Soft-float ABI + SPE. */
295 fprintf(ctx->fp, "\t.gnu_attribute 4, 2\n\t.gnu_attribute 8, 3\n");
296#elif LJ_TARGET_PPC && !LJ_TARGET_PS3
297 /* Hard-float ABI. */ 334 /* Hard-float ABI. */
298 fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n"); 335 fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
299#endif 336#endif
@@ -302,6 +339,10 @@ void emit_asm(BuildCtx *ctx)
302 fprintf(ctx->fp, "\t.ident \"%s\"\n", ctx->dasm_ident); 339 fprintf(ctx->fp, "\t.ident \"%s\"\n", ctx->dasm_ident);
303 break; 340 break;
304 case BUILD_machasm: 341 case BUILD_machasm:
342#if defined(__apple_build_version__) && __apple_build_version__ >= 15000000 && __apple_build_version__ < 15000300
343 /* Workaround for XCode 15.0 - 15.2. */
344 fprintf(ctx->fp, "\t.subsections_via_symbols\n");
345#endif
305 fprintf(ctx->fp, 346 fprintf(ctx->fp,
306 "\t.cstring\n" 347 "\t.cstring\n"
307 "\t.ascii \"%s\\0\"\n", ctx->dasm_ident); 348 "\t.ascii \"%s\\0\"\n", ctx->dasm_ident);
diff --git a/src/host/buildvm_fold.c b/src/host/buildvm_fold.c
index 388a8146..cc392e93 100644
--- a/src/host/buildvm_fold.c
+++ b/src/host/buildvm_fold.c
@@ -5,6 +5,7 @@
5 5
6#include "buildvm.h" 6#include "buildvm.h"
7#include "lj_obj.h" 7#include "lj_obj.h"
8#if LJ_HASJIT
8#include "lj_ir.h" 9#include "lj_ir.h"
9 10
10/* Context for the folding hash table generator. */ 11/* Context for the folding hash table generator. */
@@ -226,4 +227,10 @@ void emit_fold(BuildCtx *ctx)
226 227
227 makehash(ctx); 228 makehash(ctx);
228} 229}
230#else
231void emit_fold(BuildCtx *ctx)
232{
233 UNUSED(ctx);
234}
235#endif
229 236
diff --git a/src/host/buildvm_lib.c b/src/host/buildvm_lib.c
index 36797e4c..99d12a27 100644
--- a/src/host/buildvm_lib.c
+++ b/src/host/buildvm_lib.c
@@ -5,7 +5,9 @@
5 5
6#include "buildvm.h" 6#include "buildvm.h"
7#include "lj_obj.h" 7#include "lj_obj.h"
8#include "lj_bc.h"
8#include "lj_lib.h" 9#include "lj_lib.h"
10#include "buildvm_libbc.h"
9 11
10/* Context for library definitions. */ 12/* Context for library definitions. */
11static uint8_t obuf[8192]; 13static uint8_t obuf[8192];
@@ -151,6 +153,62 @@ static void libdef_func(BuildCtx *ctx, char *p, int arg)
151 regfunc = REGFUNC_OK; 153 regfunc = REGFUNC_OK;
152} 154}
153 155
156static uint8_t *libdef_uleb128(uint8_t *p, uint32_t *vv)
157{
158 uint32_t v = *p++;
159 if (v >= 0x80) {
160 int sh = 0; v &= 0x7f;
161 do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
162 }
163 *vv = v;
164 return p;
165}
166
167static void libdef_fixupbc(uint8_t *p)
168{
169 uint32_t i, sizebc;
170 p += 4;
171 p = libdef_uleb128(p, &sizebc);
172 p = libdef_uleb128(p, &sizebc);
173 p = libdef_uleb128(p, &sizebc);
174 for (i = 0; i < sizebc; i++, p += 4) {
175 uint8_t op = p[libbc_endian ? 3 : 0];
176 uint8_t ra = p[libbc_endian ? 2 : 1];
177 uint8_t rc = p[libbc_endian ? 1 : 2];
178 uint8_t rb = p[libbc_endian ? 0 : 3];
179 if (!LJ_DUALNUM && op == BC_ISTYPE && rc == ~LJ_TNUMX+1) {
180 op = BC_ISNUM; rc++;
181 }
182 p[LJ_ENDIAN_SELECT(0, 3)] = op;
183 p[LJ_ENDIAN_SELECT(1, 2)] = ra;
184 p[LJ_ENDIAN_SELECT(2, 1)] = rc;
185 p[LJ_ENDIAN_SELECT(3, 0)] = rb;
186 }
187}
188
189static void libdef_lua(BuildCtx *ctx, char *p, int arg)
190{
191 UNUSED(arg);
192 if (ctx->mode == BUILD_libdef) {
193 int i;
194 for (i = 0; libbc_map[i].name != NULL; i++) {
195 if (!strcmp(libbc_map[i].name, p)) {
196 int ofs = libbc_map[i].ofs;
197 int len = libbc_map[i+1].ofs - ofs;
198 obuf[2]++; /* Bump hash table size. */
199 *optr++ = LIBINIT_LUA;
200 libdef_name(p, 0);
201 memcpy(optr, libbc_code + ofs, len);
202 libdef_fixupbc(optr);
203 optr += len;
204 return;
205 }
206 }
207 fprintf(stderr, "Error: missing libbc definition for %s\n", p);
208 exit(1);
209 }
210}
211
154static uint32_t find_rec(char *name) 212static uint32_t find_rec(char *name)
155{ 213{
156 char *p = (char *)obuf; 214 char *p = (char *)obuf;
@@ -277,6 +335,7 @@ static const LibDefHandler libdef_handlers[] = {
277 { "CF(", ")", libdef_func, LIBINIT_CF }, 335 { "CF(", ")", libdef_func, LIBINIT_CF },
278 { "ASM(", ")", libdef_func, LIBINIT_ASM }, 336 { "ASM(", ")", libdef_func, LIBINIT_ASM },
279 { "ASM_(", ")", libdef_func, LIBINIT_ASM_ }, 337 { "ASM_(", ")", libdef_func, LIBINIT_ASM_ },
338 { "LUA(", ")", libdef_lua, 0 },
280 { "REC(", ")", libdef_rec, 0 }, 339 { "REC(", ")", libdef_rec, 0 },
281 { "PUSH(", ")", libdef_push, 0 }, 340 { "PUSH(", ")", libdef_push, 0 },
282 { "SET(", ")", libdef_set, 0 }, 341 { "SET(", ")", libdef_set, 0 },
@@ -333,6 +392,8 @@ void emit_lib(BuildCtx *ctx)
333 ok = LJ_HASJIT; 392 ok = LJ_HASJIT;
334 else if (!strcmp(buf, "#if LJ_HASFFI")) 393 else if (!strcmp(buf, "#if LJ_HASFFI"))
335 ok = LJ_HASFFI; 394 ok = LJ_HASFFI;
395 else if (!strcmp(buf, "#if LJ_HASBUFFER"))
396 ok = LJ_HASBUFFER;
336 if (!ok) { 397 if (!ok) {
337 int lvl = 1; 398 int lvl = 1;
338 while (fgets(buf, sizeof(buf), fp) != NULL) { 399 while (fgets(buf, sizeof(buf), fp) != NULL) {
@@ -380,7 +441,7 @@ void emit_lib(BuildCtx *ctx)
380 "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n", 441 "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n",
381 ffasmfunc); 442 ffasmfunc);
382 } else if (ctx->mode == BUILD_vmdef) { 443 } else if (ctx->mode == BUILD_vmdef) {
383 fprintf(ctx->fp, "}\n\n"); 444 fprintf(ctx->fp, "},\n\n");
384 } else if (ctx->mode == BUILD_bcdef) { 445 } else if (ctx->mode == BUILD_bcdef) {
385 int i; 446 int i;
386 fprintf(ctx->fp, "\n};\n\n"); 447 fprintf(ctx->fp, "\n};\n\n");
diff --git a/src/host/buildvm_libbc.h b/src/host/buildvm_libbc.h
new file mode 100644
index 00000000..276463b2
--- /dev/null
+++ b/src/host/buildvm_libbc.h
@@ -0,0 +1,81 @@
1/* This is a generated file. DO NOT EDIT! */
2
3static const int libbc_endian = 0;
4
5static const uint8_t libbc_code[] = {
6#if LJ_FR2
7/* math.deg */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,241,135,158,166,3,
8220,203,178,130,4,
9/* math.rad */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,243,244,148,165,20,
10198,190,199,252,3,
11/* string.len */ 0,1,2,0,0,0,3,BC_ISTYPE,0,5,0,BC_LEN,1,0,0,BC_RET1,1,2,0,
12/* table.foreachi */ 0,2,10,0,0,0,15,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,
13BC_KSHORT,2,1,0,BC_LEN,3,0,0,BC_KSHORT,4,1,0,BC_FORI,2,8,128,BC_MOV,6,1,0,
14BC_MOV,8,5,0,BC_TGETR,9,5,0,BC_CALL,6,3,2,BC_ISEQP,6,0,0,BC_JMP,7,1,128,
15BC_RET1,6,2,0,BC_FORL,2,248,127,BC_RET0,0,1,0,
16/* table.foreach */ 0,2,11,0,0,1,16,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,BC_KPRI,
172,0,0,BC_MOV,3,0,0,BC_KNUM,4,0,0,BC_JMP,5,7,128,BC_MOV,7,1,0,BC_MOV,9,5,0,
18BC_MOV,10,6,0,BC_CALL,7,3,2,BC_ISEQP,7,0,0,BC_JMP,8,1,128,BC_RET1,7,2,0,
19BC_ITERN,5,3,3,BC_ITERL,5,247,127,BC_RET0,0,1,0,1,255,255,249,255,15,
20/* table.getn */ 0,1,2,0,0,0,3,BC_ISTYPE,0,12,0,BC_LEN,1,0,0,BC_RET1,1,2,0,
21/* table.remove */ 0,2,10,0,0,2,30,BC_ISTYPE,0,12,0,BC_LEN,2,0,0,BC_ISNEP,1,0,
220,BC_JMP,3,7,128,BC_ISEQN,2,0,0,BC_JMP,3,23,128,BC_TGETR,3,2,0,BC_KPRI,4,0,0,
23BC_TSETR,4,2,0,BC_RET1,3,2,0,BC_JMP,3,18,128,BC_ISTYPE,1,14,0,BC_KSHORT,3,1,0,
24BC_ISGT,3,1,0,BC_JMP,3,14,128,BC_ISGT,1,2,0,BC_JMP,3,12,128,BC_TGETR,3,1,0,
25BC_ADDVN,4,1,1,BC_MOV,5,2,0,BC_KSHORT,6,1,0,BC_FORI,4,4,128,BC_SUBVN,8,1,7,
26BC_TGETR,9,7,0,BC_TSETR,9,8,0,BC_FORL,4,252,127,BC_KPRI,4,0,0,BC_TSETR,4,2,0,
27BC_RET1,3,2,0,BC_RET0,0,1,0,0,2,
28/* table.move */ 0,5,12,0,0,0,35,BC_ISTYPE,0,12,0,BC_ISTYPE,1,14,0,BC_ISTYPE,
292,14,0,BC_ISTYPE,3,14,0,BC_ISNEP,4,0,0,BC_JMP,5,1,128,BC_MOV,4,0,0,BC_ISTYPE,
304,12,0,BC_ISGT,1,2,0,BC_JMP,5,24,128,BC_SUBVV,5,1,3,BC_ISLT,2,3,0,BC_JMP,6,4,
31128,BC_ISLE,3,1,0,BC_JMP,6,2,128,BC_ISEQV,4,0,0,BC_JMP,6,9,128,BC_MOV,6,1,0,
32BC_MOV,7,2,0,BC_KSHORT,8,1,0,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,
33BC_TSETR,11,10,4,BC_FORL,6,252,127,BC_JMP,6,8,128,BC_MOV,6,2,0,BC_MOV,7,1,0,
34BC_KSHORT,8,255,255,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,BC_TSETR,
3511,10,4,BC_FORL,6,252,127,BC_RET1,4,2,0,
36#else
37/* math.deg */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,241,135,158,166,3,
38220,203,178,130,4,
39/* math.rad */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,243,244,148,165,20,
40198,190,199,252,3,
41/* string.len */ 0,1,2,0,0,0,3,BC_ISTYPE,0,5,0,BC_LEN,1,0,0,BC_RET1,1,2,0,
42/* table.foreachi */ 0,2,9,0,0,0,15,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,
43BC_KSHORT,2,1,0,BC_LEN,3,0,0,BC_KSHORT,4,1,0,BC_FORI,2,8,128,BC_MOV,6,1,0,
44BC_MOV,7,5,0,BC_TGETR,8,5,0,BC_CALL,6,3,2,BC_ISEQP,6,0,0,BC_JMP,7,1,128,
45BC_RET1,6,2,0,BC_FORL,2,248,127,BC_RET0,0,1,0,
46/* table.foreach */ 0,2,10,0,0,1,16,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,BC_KPRI,
472,0,0,BC_MOV,3,0,0,BC_KNUM,4,0,0,BC_JMP,5,7,128,BC_MOV,7,1,0,BC_MOV,8,5,0,
48BC_MOV,9,6,0,BC_CALL,7,3,2,BC_ISEQP,7,0,0,BC_JMP,8,1,128,BC_RET1,7,2,0,
49BC_ITERN,5,3,3,BC_ITERL,5,247,127,BC_RET0,0,1,0,1,255,255,249,255,15,
50/* table.getn */ 0,1,2,0,0,0,3,BC_ISTYPE,0,12,0,BC_LEN,1,0,0,BC_RET1,1,2,0,
51/* table.remove */ 0,2,10,0,0,2,30,BC_ISTYPE,0,12,0,BC_LEN,2,0,0,BC_ISNEP,1,0,
520,BC_JMP,3,7,128,BC_ISEQN,2,0,0,BC_JMP,3,23,128,BC_TGETR,3,2,0,BC_KPRI,4,0,0,
53BC_TSETR,4,2,0,BC_RET1,3,2,0,BC_JMP,3,18,128,BC_ISTYPE,1,14,0,BC_KSHORT,3,1,0,
54BC_ISGT,3,1,0,BC_JMP,3,14,128,BC_ISGT,1,2,0,BC_JMP,3,12,128,BC_TGETR,3,1,0,
55BC_ADDVN,4,1,1,BC_MOV,5,2,0,BC_KSHORT,6,1,0,BC_FORI,4,4,128,BC_SUBVN,8,1,7,
56BC_TGETR,9,7,0,BC_TSETR,9,8,0,BC_FORL,4,252,127,BC_KPRI,4,0,0,BC_TSETR,4,2,0,
57BC_RET1,3,2,0,BC_RET0,0,1,0,0,2,
58/* table.move */ 0,5,12,0,0,0,35,BC_ISTYPE,0,12,0,BC_ISTYPE,1,14,0,BC_ISTYPE,
592,14,0,BC_ISTYPE,3,14,0,BC_ISNEP,4,0,0,BC_JMP,5,1,128,BC_MOV,4,0,0,BC_ISTYPE,
604,12,0,BC_ISGT,1,2,0,BC_JMP,5,24,128,BC_SUBVV,5,1,3,BC_ISLT,2,3,0,BC_JMP,6,4,
61128,BC_ISLE,3,1,0,BC_JMP,6,2,128,BC_ISEQV,4,0,0,BC_JMP,6,9,128,BC_MOV,6,1,0,
62BC_MOV,7,2,0,BC_KSHORT,8,1,0,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,
63BC_TSETR,11,10,4,BC_FORL,6,252,127,BC_JMP,6,8,128,BC_MOV,6,2,0,BC_MOV,7,1,0,
64BC_KSHORT,8,255,255,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,BC_TSETR,
6511,10,4,BC_FORL,6,252,127,BC_RET1,4,2,0,
66#endif
670
68};
69
70static const struct { const char *name; int ofs; } libbc_map[] = {
71{"math_deg",0},
72{"math_rad",25},
73{"string_len",50},
74{"table_foreachi",69},
75{"table_foreach",136},
76{"table_getn",213},
77{"table_remove",232},
78{"table_move",361},
79{NULL,508}
80};
81
diff --git a/src/host/buildvm_peobj.c b/src/host/buildvm_peobj.c
index 4fbc11b9..8f04c496 100644
--- a/src/host/buildvm_peobj.c
+++ b/src/host/buildvm_peobj.c
@@ -9,7 +9,7 @@
9#include "buildvm.h" 9#include "buildvm.h"
10#include "lj_bc.h" 10#include "lj_bc.h"
11 11
12#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC 12#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
13 13
14/* Context for PE object emitter. */ 14/* Context for PE object emitter. */
15static char *strtab; 15static char *strtab;
@@ -93,12 +93,17 @@ typedef struct PEsymaux {
93#define PEOBJ_RELOC_ADDR32NB 0x03 93#define PEOBJ_RELOC_ADDR32NB 0x03
94#define PEOBJ_RELOC_OFS 0 94#define PEOBJ_RELOC_OFS 0
95#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ 95#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
96#elif LJ_TARGET_PPC 96#define PEOBJ_PDATA_NRELOC 6
97#define PEOBJ_ARCH_TARGET 0x01f2 97#define PEOBJ_XDATA_SIZE (8*2+4+6*2)
98#define PEOBJ_RELOC_REL32 0x06 98#elif LJ_TARGET_ARM64
99#define PEOBJ_RELOC_DIR32 0x02 99#define PEOBJ_ARCH_TARGET 0xaa64
100#define PEOBJ_RELOC_REL32 0x03 /* MS: BRANCH26. */
101#define PEOBJ_RELOC_DIR32 0x01
102#define PEOBJ_RELOC_ADDR32NB 0x02
100#define PEOBJ_RELOC_OFS (-4) 103#define PEOBJ_RELOC_OFS (-4)
101#define PEOBJ_TEXT_FLAGS 0x60400020 /* 60=r+x, 40=align8, 20=code. */ 104#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
105#define PEOBJ_PDATA_NRELOC 4
106#define PEOBJ_XDATA_SIZE (4+24+4 +4+8)
102#endif 107#endif
103 108
104/* Section numbers (0-based). */ 109/* Section numbers (0-based). */
@@ -106,9 +111,11 @@ enum {
106 PEOBJ_SECT_ABS = -2, 111 PEOBJ_SECT_ABS = -2,
107 PEOBJ_SECT_UNDEF = -1, 112 PEOBJ_SECT_UNDEF = -1,
108 PEOBJ_SECT_TEXT, 113 PEOBJ_SECT_TEXT,
109#if LJ_TARGET_X64 114#ifdef PEOBJ_PDATA_NRELOC
110 PEOBJ_SECT_PDATA, 115 PEOBJ_SECT_PDATA,
111 PEOBJ_SECT_XDATA, 116 PEOBJ_SECT_XDATA,
117#elif LJ_TARGET_X86
118 PEOBJ_SECT_SXDATA,
112#endif 119#endif
113 PEOBJ_SECT_RDATA_Z, 120 PEOBJ_SECT_RDATA_Z,
114 PEOBJ_NSECTIONS 121 PEOBJ_NSECTIONS
@@ -179,6 +186,9 @@ void emit_peobj(BuildCtx *ctx)
179 uint32_t sofs; 186 uint32_t sofs;
180 int i, nrsym; 187 int i, nrsym;
181 union { uint8_t b; uint32_t u; } host_endian; 188 union { uint8_t b; uint32_t u; } host_endian;
189#ifdef PEOBJ_PDATA_NRELOC
190 uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs;
191#endif
182 192
183 sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection); 193 sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection);
184 194
@@ -192,22 +202,29 @@ void emit_peobj(BuildCtx *ctx)
192 /* Flags: 60 = read+execute, 50 = align16, 20 = code. */ 202 /* Flags: 60 = read+execute, 50 = align16, 20 = code. */
193 pesect[PEOBJ_SECT_TEXT].flags = PEOBJ_TEXT_FLAGS; 203 pesect[PEOBJ_SECT_TEXT].flags = PEOBJ_TEXT_FLAGS;
194 204
195#if LJ_TARGET_X64 205#ifdef PEOBJ_PDATA_NRELOC
196 memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1); 206 memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1);
197 pesect[PEOBJ_SECT_PDATA].ofs = sofs; 207 pesect[PEOBJ_SECT_PDATA].ofs = sofs;
198 sofs += (pesect[PEOBJ_SECT_PDATA].size = 6*4); 208 sofs += (pesect[PEOBJ_SECT_PDATA].size = PEOBJ_PDATA_NRELOC*4);
199 pesect[PEOBJ_SECT_PDATA].relocofs = sofs; 209 pesect[PEOBJ_SECT_PDATA].relocofs = sofs;
200 sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = 6) * PEOBJ_RELOC_SIZE; 210 sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = PEOBJ_PDATA_NRELOC) * PEOBJ_RELOC_SIZE;
201 /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ 211 /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
202 pesect[PEOBJ_SECT_PDATA].flags = 0x40300040; 212 pesect[PEOBJ_SECT_PDATA].flags = 0x40300040;
203 213
204 memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1); 214 memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1);
205 pesect[PEOBJ_SECT_XDATA].ofs = sofs; 215 pesect[PEOBJ_SECT_XDATA].ofs = sofs;
206 sofs += (pesect[PEOBJ_SECT_XDATA].size = 8*2+4+6*2); /* See below. */ 216 sofs += (pesect[PEOBJ_SECT_XDATA].size = PEOBJ_XDATA_SIZE); /* See below. */
207 pesect[PEOBJ_SECT_XDATA].relocofs = sofs; 217 pesect[PEOBJ_SECT_XDATA].relocofs = sofs;
208 sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE; 218 sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
209 /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ 219 /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
210 pesect[PEOBJ_SECT_XDATA].flags = 0x40300040; 220 pesect[PEOBJ_SECT_XDATA].flags = 0x40300040;
221#elif LJ_TARGET_X86
222 memcpy(pesect[PEOBJ_SECT_SXDATA].name, ".sxdata", sizeof(".sxdata")-1);
223 pesect[PEOBJ_SECT_SXDATA].ofs = sofs;
224 sofs += (pesect[PEOBJ_SECT_SXDATA].size = 4);
225 pesect[PEOBJ_SECT_SXDATA].relocofs = sofs;
226 /* Flags: 40 = read, 30 = align4, 02 = lnk_info, 40 = initialized data. */
227 pesect[PEOBJ_SECT_SXDATA].flags = 0x40300240;
211#endif 228#endif
212 229
213 memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1); 230 memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1);
@@ -231,8 +248,8 @@ void emit_peobj(BuildCtx *ctx)
231 */ 248 */
232 nrsym = ctx->nrelocsym; 249 nrsym = ctx->nrelocsym;
233 pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym; 250 pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym;
234#if LJ_TARGET_X64 251#ifdef PEOBJ_PDATA_NRELOC
235 pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win64. */ 252 pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */
236#endif 253#endif
237 254
238 /* Write PE object header and all sections. */ 255 /* Write PE object header and all sections. */
@@ -242,15 +259,8 @@ void emit_peobj(BuildCtx *ctx)
242 /* Write .text section. */ 259 /* Write .text section. */
243 host_endian.u = 1; 260 host_endian.u = 1;
244 if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) { 261 if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) {
245#if LJ_TARGET_PPC
246 uint32_t *p = (uint32_t *)ctx->code;
247 int n = (int)(ctx->codesz >> 2);
248 for (i = 0; i < n; i++, p++)
249 *p = lj_bswap(*p); /* Byteswap .text section. */
250#else
251 fprintf(stderr, "Error: different byte order for host and target\n"); 262 fprintf(stderr, "Error: different byte order for host and target\n");
252 exit(1); 263 exit(1);
253#endif
254 } 264 }
255 owrite(ctx, ctx->code, ctx->codesz); 265 owrite(ctx, ctx->code, ctx->codesz);
256 for (i = 0; i < ctx->nreloc; i++) { 266 for (i = 0; i < ctx->nreloc; i++) {
@@ -263,7 +273,6 @@ void emit_peobj(BuildCtx *ctx)
263 273
264#if LJ_TARGET_X64 274#if LJ_TARGET_X64
265 { /* Write .pdata section. */ 275 { /* Write .pdata section. */
266 uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs;
267 uint32_t pdata[3]; /* Start of .text, end of .text and .xdata. */ 276 uint32_t pdata[3]; /* Start of .text, end of .text and .xdata. */
268 PEreloc reloc; 277 PEreloc reloc;
269 pdata[0] = 0; pdata[1] = fcofs; pdata[2] = 0; 278 pdata[0] = 0; pdata[1] = fcofs; pdata[2] = 0;
@@ -312,6 +321,100 @@ void emit_peobj(BuildCtx *ctx)
312 reloc.type = PEOBJ_RELOC_ADDR32NB; 321 reloc.type = PEOBJ_RELOC_ADDR32NB;
313 owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); 322 owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
314 } 323 }
324#elif LJ_TARGET_ARM64
325 /* https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling */
326 { /* Write .pdata section. */
327 uint32_t pdata[4];
328 PEreloc reloc;
329 pdata[0] = 0;
330 pdata[1] = 0;
331 pdata[2] = fcofs;
332 pdata[3] = 4+24+4;
333 owrite(ctx, &pdata, sizeof(pdata));
334 /* Start of .text and start of .xdata. */
335 reloc.vaddr = 0; reloc.symidx = 1+2+nrsym+2+2+1;
336 reloc.type = PEOBJ_RELOC_ADDR32NB;
337 owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
338 reloc.vaddr = 4; reloc.symidx = 1+2+nrsym+2;
339 reloc.type = PEOBJ_RELOC_ADDR32NB;
340 owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
341 /* Start of vm_ffi_call and start of second part of .xdata. */
342 reloc.vaddr = 8; reloc.symidx = 1+2+nrsym+2+2+1;
343 reloc.type = PEOBJ_RELOC_ADDR32NB;
344 owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
345 reloc.vaddr = 12; reloc.symidx = 1+2+nrsym+2;
346 reloc.type = PEOBJ_RELOC_ADDR32NB;
347 owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
348 }
349 { /* Write .xdata section. */
350 uint32_t u32;
351 uint8_t *p, uwc[24];
352 PEreloc reloc;
353
354#define CBE16(x) (*p = ((x) >> 8) & 0xff, p[1] = (x) & 0xff, p += 2)
355#define CALLOC_S(s) (*p++ = ((s) >> 4)) /* s < 512 */
356#define CSAVE_FPLR(o) (*p++ = 0x40 | ((o) >> 3)) /* o <= 504 */
357#define CSAVE_REGP(r,o) CBE16(0xc800 | (((r) - 19) << 6) | ((o) >> 3))
358#define CSAVE_REGS(r1,r2,o1) do { \
359 int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_REGP(r, o); \
360} while (0)
361#define CSAVE_REGPX(r,o) CBE16(0xcc00 | (((r) - 19) << 6) | (~(o) >> 3))
362#define CSAVE_FREGP(r,o) CBE16(0xd800 | (((r) - 8) << 6) | ((o) >> 3))
363#define CSAVE_FREGS(r1,r2,o1) do { \
364 int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_FREGP(r, o); \
365} while (0)
366#define CADD_FP(s) CBE16(0xe200 | ((s) >> 3)) /* s < 8*256 */
367#define CODE_NOP 0xe3
368#define CODE_END 0xe4
369#define CEND_ALIGN do { \
370 *p++ = CODE_END; \
371 while ((p - uwc) & 3) *p++ = CODE_NOP; \
372} while (0)
373
374 /* Unwind codes for .text section with handler. */
375 p = uwc;
376 CADD_FP(192); /* +2 */
377 CSAVE_REGS(19, 28, 176); /* +5*2 */
378 CSAVE_FREGS(8, 15, 96); /* +4*2 */
379 CSAVE_FPLR(192); /* +1 */
380 CALLOC_S(208); /* +1 */
381 CEND_ALIGN; /* +1 +1 -> 24 */
382
383 u32 = ((24u >> 2) << 27) | (1u << 20) | (fcofs >> 2);
384 owrite(ctx, &u32, 4);
385 owrite(ctx, &uwc, 24);
386
387 u32 = 0; /* Handler RVA to be relocated at 4 + 24. */
388 owrite(ctx, &u32, 4);
389
390 /* Unwind codes for vm_ffi_call without handler. */
391 p = uwc;
392 CADD_FP(16); /* +2 */
393 CSAVE_FPLR(16); /* +1 */
394 CSAVE_REGPX(19, -32); /* +2 */
395 CEND_ALIGN; /* +1 +2 -> 8 */
396
397 u32 = ((8u >> 2) << 27) | (((uint32_t)ctx->codesz - fcofs) >> 2);
398 owrite(ctx, &u32, 4);
399 owrite(ctx, &uwc, 8);
400
401 reloc.vaddr = 4 + 24; reloc.symidx = 1+2+nrsym+2+2;
402 reloc.type = PEOBJ_RELOC_ADDR32NB;
403 owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
404 }
405#elif LJ_TARGET_X86
406 /* Write .sxdata section. */
407 for (i = 0; i < nrsym; i++) {
408 if (!strcmp(ctx->relocsym[i], "_lj_err_unwind_win")) {
409 uint32_t symidx = 1+2+i;
410 owrite(ctx, &symidx, 4);
411 break;
412 }
413 }
414 if (i == nrsym) {
415 fprintf(stderr, "Error: extern lj_err_unwind_win not used\n");
416 exit(1);
417 }
315#endif 418#endif
316 419
317 /* Write .rdata$Z section. */ 420 /* Write .rdata$Z section. */
@@ -330,11 +433,13 @@ void emit_peobj(BuildCtx *ctx)
330 emit_peobj_sym(ctx, ctx->relocsym[i], 0, 433 emit_peobj_sym(ctx, ctx->relocsym[i], 0,
331 PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); 434 PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
332 435
333#if LJ_TARGET_X64 436#ifdef PEOBJ_PDATA_NRELOC
334 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA); 437 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA);
335 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA); 438 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA);
336 emit_peobj_sym(ctx, "lj_err_unwind_win64", 0, 439 emit_peobj_sym(ctx, "lj_err_unwind_win", 0,
337 PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); 440 PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
441#elif LJ_TARGET_X86
442 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_SXDATA);
338#endif 443#endif
339 444
340 emit_peobj_sym(ctx, ctx->beginsym, 0, 445 emit_peobj_sym(ctx, ctx->beginsym, 0,
diff --git a/src/host/genlibbc.lua b/src/host/genlibbc.lua
new file mode 100644
index 00000000..e697fceb
--- /dev/null
+++ b/src/host/genlibbc.lua
@@ -0,0 +1,234 @@
1----------------------------------------------------------------------------
2-- Lua script to dump the bytecode of the library functions written in Lua.
3-- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
4----------------------------------------------------------------------------
5-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
6-- Released under the MIT license. See Copyright Notice in luajit.h
7----------------------------------------------------------------------------
8
9local ffi = require("ffi")
10local bit = require("bit")
11local vmdef = require("jit.vmdef")
12local bcnames = vmdef.bcnames
13
14local format = string.format
15
16local isbe = (string.byte(string.dump(function() end), 5) % 2 == 1)
17
18local function usage(arg)
19 io.stderr:write("Usage: ", arg and arg[0] or "genlibbc",
20 " [-o buildvm_libbc.h] lib_*.c\n")
21 os.exit(1)
22end
23
24local function parse_arg(arg)
25 local outfile = "-"
26 if not (arg and arg[1]) then
27 usage(arg)
28 end
29 if arg[1] == "-o" then
30 outfile = arg[2]
31 if not outfile then usage(arg) end
32 table.remove(arg, 1)
33 table.remove(arg, 1)
34 end
35 return outfile
36end
37
38local function read_files(names)
39 local src = ""
40 for _,name in ipairs(names) do
41 local fp = assert(io.open(name))
42 src = src .. fp:read("*a")
43 fp:close()
44 end
45 return src
46end
47
48local function transform_lua(code)
49 local fixup = {}
50 local n = -30000
51 code = string.gsub(code, "CHECK_(%w*)%((.-)%)", function(tp, var)
52 n = n + 1
53 fixup[n] = { "CHECK", tp }
54 return format("%s=%d", var, n)
55 end)
56 code = string.gsub(code, "PAIRS%((.-)%)", function(var)
57 fixup.PAIRS = true
58 return format("nil, %s, 0x4dp80", var)
59 end)
60 return "return "..code, fixup
61end
62
63local function read_uleb128(p)
64 local v = p[0]; p = p + 1
65 if v >= 128 then
66 local sh = 7; v = v - 128
67 repeat
68 local r = p[0]
69 v = v + bit.lshift(bit.band(r, 127), sh)
70 sh = sh + 7
71 p = p + 1
72 until r < 128
73 end
74 return p, v
75end
76
77-- ORDER LJ_T
78local name2itype = {
79 str = 5, func = 9, tab = 12, int = 14, num = 15
80}
81
82local BC, BCN = {}, {}
83for i=0,#bcnames/6-1 do
84 local name = bcnames:sub(i*6+1, i*6+6):gsub(" ", "")
85 BC[name] = i
86 BCN[i] = name
87end
88local xop, xra = isbe and 3 or 0, isbe and 2 or 1
89local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3
90
91local function fixup_dump(dump, fixup)
92 local buf = ffi.new("uint8_t[?]", #dump+1, dump)
93 local p = buf+5
94 local n, sizebc
95 p, n = read_uleb128(p)
96 local start = p
97 p = p + 4
98 p = read_uleb128(p)
99 p = read_uleb128(p)
100 p, sizebc = read_uleb128(p)
101 local startbc = tonumber(p - start)
102 local rawtab = {}
103 for i=0,sizebc-1 do
104 local op = p[xop]
105 if op == BC.KSHORT then
106 local rd = p[xrc] + 256*p[xrb]
107 rd = bit.arshift(bit.lshift(rd, 16), 16)
108 local f = fixup[rd]
109 if f then
110 if f[1] == "CHECK" then
111 local tp = f[2]
112 if tp == "tab" then rawtab[p[xra]] = true end
113 p[xop] = tp == "num" and BC.ISNUM or BC.ISTYPE
114 p[xrb] = 0
115 p[xrc] = name2itype[tp]
116 else
117 error("unhandled fixup type: "..f[1])
118 end
119 end
120 elseif op == BC.TGETV then
121 if rawtab[p[xrb]] then
122 p[xop] = BC.TGETR
123 end
124 elseif op == BC.TSETV then
125 if rawtab[p[xrb]] then
126 p[xop] = BC.TSETR
127 end
128 elseif op == BC.ITERC then
129 if fixup.PAIRS then
130 p[xop] = BC.ITERN
131 end
132 end
133 p = p + 4
134 end
135 local ndump = ffi.string(start, n)
136 -- Fixup hi-part of 0x4dp80 to LJ_KEYINDEX.
137 ndump = ndump:gsub("\x80\x80\xcd\xaa\x04", "\xff\xff\xf9\xff\x0f")
138 return { dump = ndump, startbc = startbc, sizebc = sizebc }
139end
140
141local function find_defs(src, mode)
142 local defs = {}
143 for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do
144 local tcode, fixup = transform_lua(code)
145 local func = assert(load(tcode, "", mode))
146 defs[name] = fixup_dump(string.dump(func, mode), fixup)
147 defs[#defs+1] = name
148 end
149 return defs
150end
151
152local function gen_header(defs32, defs64)
153 local t = {}
154 local function w(x) t[#t+1] = x end
155 w("/* This is a generated file. DO NOT EDIT! */\n\n")
156 w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
157 for j,defs in ipairs{defs64, defs32} do
158 local s, sb = "", ""
159 for i,name in ipairs(defs) do
160 local d = defs[name]
161 s = s .. d.dump
162 sb = sb .. string.char(i) .. ("\0"):rep(d.startbc - 1)
163 .. (isbe and "\0\0\0\255" or "\255\0\0\0"):rep(d.sizebc)
164 .. ("\0"):rep(#d.dump - d.startbc - d.sizebc*4)
165 end
166 if j == 1 then
167 w("static const uint8_t libbc_code[] = {\n#if LJ_FR2\n")
168 else
169 w("\n#else\n")
170 end
171 local n = 0
172 for i=1,#s do
173 local x = string.byte(s, i)
174 local xb = string.byte(sb, i)
175 if xb == 255 then
176 local name = BCN[x]
177 local m = #name + 4
178 if n + m > 78 then n = 0; w("\n") end
179 n = n + m
180 w("BC_"); w(name)
181 else
182 local m = x < 10 and 2 or (x < 100 and 3 or 4)
183 if xb == 0 then
184 if n + m > 78 then n = 0; w("\n") end
185 else
186 local name = defs[xb]:gsub("_", ".")
187 if n ~= 0 then w("\n") end
188 w("/* "); w(name); w(" */ ")
189 n = #name + 7
190 end
191 n = n + m
192 w(x)
193 end
194 w(",")
195 end
196 end
197 w("\n#endif\n0\n};\n\n")
198 w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
199 local m32, m64 = 0, 0
200 for i,name in ipairs(defs32) do
201 assert(name == defs64[i])
202 w('{"'); w(name); w('",'); w(m32) w('},\n')
203 m32 = m32 + #defs32[name].dump
204 m64 = m64 + #defs64[name].dump
205 assert(m32 == m64)
206 end
207 w("{NULL,"); w(m32); w("}\n};\n\n")
208 return table.concat(t)
209end
210
211local function write_file(name, data)
212 if name == "-" then
213 assert(io.write(data))
214 assert(io.flush())
215 else
216 local fp = io.open(name)
217 if fp then
218 local old = fp:read("*a")
219 fp:close()
220 if data == old then return end
221 end
222 fp = assert(io.open(name, "w"))
223 assert(fp:write(data))
224 assert(fp:close())
225 end
226end
227
228local outfile = parse_arg(arg)
229local src = read_files(arg)
230local defs32 = find_defs(src, "Wdts")
231local defs64 = find_defs(src, "Xdts")
232local hdr = gen_header(defs32, defs64)
233write_file(outfile, hdr)
234
diff --git a/src/jit/bc.lua b/src/jit/bc.lua
index 4b384012..f1a63b9c 100644
--- a/src/jit/bc.lua
+++ b/src/jit/bc.lua
@@ -178,13 +178,12 @@ local function bcliston(outfile)
178end 178end
179 179
180-- Public module functions. 180-- Public module functions.
181module(...) 181return {
182 182 line = bcline,
183line = bcline 183 dump = bcdump,
184dump = bcdump 184 targets = bctargets,
185targets = bctargets 185 on = bcliston,
186 186 off = bclistoff,
187on = bcliston 187 start = bcliston -- For -j command line option.
188off = bclistoff 188}
189start = bcliston -- For -j command line option.
190 189
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
index 030e6a16..48378819 100644
--- a/src/jit/bcsave.lua
+++ b/src/jit/bcsave.lua
@@ -11,12 +11,16 @@
11------------------------------------------------------------------------------ 11------------------------------------------------------------------------------
12 12
13local jit = require("jit") 13local jit = require("jit")
14assert(jit.version_num == 20099, "LuaJIT core/library version mismatch") 14assert(jit.version_num == 20199, "LuaJIT core/library version mismatch")
15local bit = require("bit") 15local bit = require("bit")
16 16
17-- Symbol name prefix for LuaJIT bytecode. 17-- Symbol name prefix for LuaJIT bytecode.
18local LJBC_PREFIX = "luaJIT_BC_" 18local LJBC_PREFIX = "luaJIT_BC_"
19 19
20local type, assert = type, assert
21local format = string.format
22local tremove, tconcat = table.remove, table.concat
23
20------------------------------------------------------------------------------ 24------------------------------------------------------------------------------
21 25
22local function usage() 26local function usage()
@@ -25,15 +29,19 @@ Save LuaJIT bytecode: luajit -b[options] input output
25 -l Only list bytecode. 29 -l Only list bytecode.
26 -s Strip debug info (default). 30 -s Strip debug info (default).
27 -g Keep debug info. 31 -g Keep debug info.
32 -W Generate 32 bit (non-GC64) bytecode.
33 -X Generate 64 bit (GC64) bytecode.
34 -d Generate bytecode in deterministic manner.
28 -n name Set module name (default: auto-detect from input name). 35 -n name Set module name (default: auto-detect from input name).
29 -t type Set output file type (default: auto-detect from output name). 36 -t type Set output file type (default: auto-detect from output name).
30 -a arch Override architecture for object files (default: native). 37 -a arch Override architecture for object files (default: native).
31 -o os Override OS for object files (default: native). 38 -o os Override OS for object files (default: native).
39 -F name Override filename (default: input filename).
32 -e chunk Use chunk string as input. 40 -e chunk Use chunk string as input.
33 -- Stop handling options. 41 -- Stop handling options.
34 - Use stdin as input and/or stdout as output. 42 - Use stdin as input and/or stdout as output.
35 43
36File types: c h obj o raw (default) 44File types: c cc h obj o raw (default)
37]] 45]]
38 os.exit(1) 46 os.exit(1)
39end 47end
@@ -45,10 +53,23 @@ local function check(ok, ...)
45 os.exit(1) 53 os.exit(1)
46end 54end
47 55
48local function readfile(input) 56local function readfile(ctx, input)
49 if type(input) == "function" then return input end 57 if ctx.string then
50 if input == "-" then input = nil end 58 return check(loadstring(input, nil, ctx.mode))
51 return check(loadfile(input)) 59 elseif ctx.filename then
60 local data
61 if input == "-" then
62 data = io.stdin:read("*a")
63 else
64 local fp = assert(io.open(input, "rb"))
65 data = assert(fp:read("*a"))
66 assert(fp:close())
67 end
68 return check(load(data, ctx.filename, ctx.mode))
69 else
70 if input == "-" then input = nil end
71 return check(loadfile(input, ctx.mode))
72 end
52end 73end
53 74
54local function savefile(name, mode) 75local function savefile(name, mode)
@@ -56,15 +77,30 @@ local function savefile(name, mode)
56 return check(io.open(name, mode)) 77 return check(io.open(name, mode))
57end 78end
58 79
80local function set_stdout_binary(ffi)
81 ffi.cdef[[int _setmode(int fd, int mode);]]
82 ffi.C._setmode(1, 0x8000)
83end
84
59------------------------------------------------------------------------------ 85------------------------------------------------------------------------------
60 86
61local map_type = { 87local map_type = {
62 raw = "raw", c = "c", h = "h", o = "obj", obj = "obj", 88 raw = "raw", c = "c", cc = "c", h = "h", o = "obj", obj = "obj",
63} 89}
64 90
65local map_arch = { 91local map_arch = {
66 x86 = true, x64 = true, arm = true, ppc = true, ppcspe = true, 92 x86 = { e = "le", b = 32, m = 3, p = 0x14c, },
67 mips = true, mipsel = true, 93 x64 = { e = "le", b = 64, m = 62, p = 0x8664, },
94 arm = { e = "le", b = 32, m = 40, p = 0x1c0, },
95 arm64 = { e = "le", b = 64, m = 183, p = 0xaa64, },
96 arm64be = { e = "be", b = 64, m = 183, },
97 ppc = { e = "be", b = 32, m = 20, },
98 mips = { e = "be", b = 32, m = 8, f = 0x50001006, },
99 mipsel = { e = "le", b = 32, m = 8, f = 0x50001006, },
100 mips64 = { e = "be", b = 64, m = 8, f = 0x80000007, },
101 mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, },
102 mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, },
103 mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, },
68} 104}
69 105
70local map_os = { 106local map_os = {
@@ -73,33 +109,33 @@ local map_os = {
73} 109}
74 110
75local function checkarg(str, map, err) 111local function checkarg(str, map, err)
76 str = string.lower(str) 112 str = str:lower()
77 local s = check(map[str], "unknown ", err) 113 local s = check(map[str], "unknown ", err)
78 return s == true and str or s 114 return type(s) == "string" and s or str
79end 115end
80 116
81local function detecttype(str) 117local function detecttype(str)
82 local ext = string.match(string.lower(str), "%.(%a+)$") 118 local ext = str:lower():match("%.(%a+)$")
83 return map_type[ext] or "raw" 119 return map_type[ext] or "raw"
84end 120end
85 121
86local function checkmodname(str) 122local function checkmodname(str)
87 check(string.match(str, "^[%w_.%-]+$"), "bad module name") 123 check(str:match("^[%w_.%-]+$"), "bad module name")
88 return string.gsub(str, "[%.%-]", "_") 124 return str:gsub("[%.%-]", "_")
89end 125end
90 126
91local function detectmodname(str) 127local function detectmodname(str)
92 if type(str) == "string" then 128 if type(str) == "string" then
93 local tail = string.match(str, "[^/\\]+$") 129 local tail = str:match("[^/\\]+$")
94 if tail then str = tail end 130 if tail then str = tail end
95 local head = string.match(str, "^(.*)%.[^.]*$") 131 local head = str:match("^(.*)%.[^.]*$")
96 if head then str = head end 132 if head then str = head end
97 str = string.match(str, "^[%w_.%-]+") 133 str = str:match("^[%w_.%-]+")
98 else 134 else
99 str = nil 135 str = nil
100 end 136 end
101 check(str, "cannot derive module name, use -n name") 137 check(str, "cannot derive module name, use -n name")
102 return string.gsub(str, "[%.%-]", "_") 138 return str:gsub("[%.%-]", "_")
103end 139end
104 140
105------------------------------------------------------------------------------ 141------------------------------------------------------------------------------
@@ -111,6 +147,11 @@ local function bcsave_tail(fp, output, s)
111end 147end
112 148
113local function bcsave_raw(output, s) 149local function bcsave_raw(output, s)
150 if output == "-" and jit.os == "Windows" then
151 local ok, ffi = pcall(require, "ffi")
152 check(ok, "FFI library required to write binary file to stdout")
153 set_stdout_binary(ffi)
154 end
114 local fp = savefile(output, "wb") 155 local fp = savefile(output, "wb")
115 bcsave_tail(fp, output, s) 156 bcsave_tail(fp, output, s)
116end 157end
@@ -118,19 +159,19 @@ end
118local function bcsave_c(ctx, output, s) 159local function bcsave_c(ctx, output, s)
119 local fp = savefile(output, "w") 160 local fp = savefile(output, "w")
120 if ctx.type == "c" then 161 if ctx.type == "c" then
121 fp:write(string.format([[ 162 fp:write(format([[
122#ifdef __cplusplus 163#ifdef __cplusplus
123extern "C" 164extern "C"
124#endif 165#endif
125#ifdef _WIN32 166#ifdef _WIN32
126__declspec(dllexport) 167__declspec(dllexport)
127#endif 168#endif
128const char %s%s[] = { 169const unsigned char %s%s[] = {
129]], LJBC_PREFIX, ctx.modname)) 170]], LJBC_PREFIX, ctx.modname))
130 else 171 else
131 fp:write(string.format([[ 172 fp:write(format([[
132#define %s%s_SIZE %d 173#define %s%s_SIZE %d
133static const char %s%s[] = { 174static const unsigned char %s%s[] = {
134]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname)) 175]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname))
135 end 176 end
136 local t, n, m = {}, 0, 0 177 local t, n, m = {}, 0, 0
@@ -138,13 +179,13 @@ static const char %s%s[] = {
138 local b = tostring(string.byte(s, i)) 179 local b = tostring(string.byte(s, i))
139 m = m + #b + 1 180 m = m + #b + 1
140 if m > 78 then 181 if m > 78 then
141 fp:write(table.concat(t, ",", 1, n), ",\n") 182 fp:write(tconcat(t, ",", 1, n), ",\n")
142 n, m = 0, #b + 1 183 n, m = 0, #b + 1
143 end 184 end
144 n = n + 1 185 n = n + 1
145 t[n] = b 186 t[n] = b
146 end 187 end
147 bcsave_tail(fp, output, table.concat(t, ",", 1, n).."\n};\n") 188 bcsave_tail(fp, output, tconcat(t, ",", 1, n).."\n};\n")
148end 189end
149 190
150local function bcsave_elfobj(ctx, output, s, ffi) 191local function bcsave_elfobj(ctx, output, s, ffi)
@@ -199,12 +240,8 @@ typedef struct {
199} ELF64obj; 240} ELF64obj;
200]] 241]]
201 local symname = LJBC_PREFIX..ctx.modname 242 local symname = LJBC_PREFIX..ctx.modname
202 local is64, isbe = false, false 243 local ai = assert(map_arch[ctx.arch])
203 if ctx.arch == "x64" then 244 local is64, isbe = ai.b == 64, ai.e == "be"
204 is64 = true
205 elseif ctx.arch == "ppc" or ctx.arch == "ppcspe" or ctx.arch == "mips" then
206 isbe = true
207 end
208 245
209 -- Handle different host/target endianess. 246 -- Handle different host/target endianess.
210 local function f32(x) return x end 247 local function f32(x) return x end
@@ -237,10 +274,8 @@ typedef struct {
237 hdr.eendian = isbe and 2 or 1 274 hdr.eendian = isbe and 2 or 1
238 hdr.eversion = 1 275 hdr.eversion = 1
239 hdr.type = f16(1) 276 hdr.type = f16(1)
240 hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, ppcspe=20, mips=8, mipsel=8 })[ctx.arch]) 277 hdr.machine = f16(ai.m)
241 if ctx.arch == "mips" or ctx.arch == "mipsel" then 278 hdr.flags = f32(ai.f or 0)
242 hdr.flags = f32(0x50001006)
243 end
244 hdr.version = f32(1) 279 hdr.version = f32(1)
245 hdr.shofs = fofs(ffi.offsetof(o, "sect")) 280 hdr.shofs = fofs(ffi.offsetof(o, "sect"))
246 hdr.ehsize = f16(ffi.sizeof(hdr)) 281 hdr.ehsize = f16(ffi.sizeof(hdr))
@@ -336,12 +371,8 @@ typedef struct {
336} PEobj; 371} PEobj;
337]] 372]]
338 local symname = LJBC_PREFIX..ctx.modname 373 local symname = LJBC_PREFIX..ctx.modname
339 local is64 = false 374 local ai = assert(map_arch[ctx.arch])
340 if ctx.arch == "x86" then 375 local is64 = ai.b == 64
341 symname = "_"..symname
342 elseif ctx.arch == "x64" then
343 is64 = true
344 end
345 local symexport = " /EXPORT:"..symname..",DATA " 376 local symexport = " /EXPORT:"..symname..",DATA "
346 377
347 -- The file format is always little-endian. Swap if the host is big-endian. 378 -- The file format is always little-endian. Swap if the host is big-endian.
@@ -355,7 +386,7 @@ typedef struct {
355 -- Create PE object and fill in header. 386 -- Create PE object and fill in header.
356 local o = ffi.new("PEobj") 387 local o = ffi.new("PEobj")
357 local hdr = o.hdr 388 local hdr = o.hdr
358 hdr.arch = f16(({ x86=0x14c, x64=0x8664, arm=0x1c0, ppc=0x1f2, mips=0x366, mipsel=0x366 })[ctx.arch]) 389 hdr.arch = f16(assert(ai.p))
359 hdr.nsects = f16(2) 390 hdr.nsects = f16(2)
360 hdr.symtabofs = f32(ffi.offsetof(o, "sym0")) 391 hdr.symtabofs = f32(ffi.offsetof(o, "sym0"))
361 hdr.nsyms = f32(6) 392 hdr.nsyms = f32(6)
@@ -411,23 +442,11 @@ typedef struct
411typedef struct { 442typedef struct {
412 uint32_t cmd, cmdsize; 443 uint32_t cmd, cmdsize;
413 char segname[16]; 444 char segname[16];
414 uint32_t vmaddr, vmsize, fileoff, filesize;
415 uint32_t maxprot, initprot, nsects, flags;
416} mach_segment_command;
417typedef struct {
418 uint32_t cmd, cmdsize;
419 char segname[16];
420 uint64_t vmaddr, vmsize, fileoff, filesize; 445 uint64_t vmaddr, vmsize, fileoff, filesize;
421 uint32_t maxprot, initprot, nsects, flags; 446 uint32_t maxprot, initprot, nsects, flags;
422} mach_segment_command_64; 447} mach_segment_command_64;
423typedef struct { 448typedef struct {
424 char sectname[16], segname[16]; 449 char sectname[16], segname[16];
425 uint32_t addr, size;
426 uint32_t offset, align, reloff, nreloc, flags;
427 uint32_t reserved1, reserved2;
428} mach_section;
429typedef struct {
430 char sectname[16], segname[16];
431 uint64_t addr, size; 450 uint64_t addr, size;
432 uint32_t offset, align, reloff, nreloc, flags; 451 uint32_t offset, align, reloff, nreloc, flags;
433 uint32_t reserved1, reserved2, reserved3; 452 uint32_t reserved1, reserved2, reserved3;
@@ -438,116 +457,55 @@ typedef struct {
438typedef struct { 457typedef struct {
439 int32_t strx; 458 int32_t strx;
440 uint8_t type, sect; 459 uint8_t type, sect;
441 int16_t desc;
442 uint32_t value;
443} mach_nlist;
444typedef struct {
445 uint32_t strx;
446 uint8_t type, sect;
447 uint16_t desc; 460 uint16_t desc;
448 uint64_t value; 461 uint64_t value;
449} mach_nlist_64; 462} mach_nlist_64;
450typedef struct
451{
452 uint32_t magic, nfat_arch;
453} mach_fat_header;
454typedef struct
455{
456 uint32_t cputype, cpusubtype, offset, size, align;
457} mach_fat_arch;
458typedef struct {
459 struct {
460 mach_header hdr;
461 mach_segment_command seg;
462 mach_section sec;
463 mach_symtab_command sym;
464 } arch[1];
465 mach_nlist sym_entry;
466 uint8_t space[4096];
467} mach_obj;
468typedef struct { 463typedef struct {
469 struct { 464 mach_header_64 hdr;
470 mach_header_64 hdr; 465 mach_segment_command_64 seg;
471 mach_segment_command_64 seg; 466 mach_section_64 sec;
472 mach_section_64 sec; 467 mach_symtab_command sym;
473 mach_symtab_command sym;
474 } arch[1];
475 mach_nlist_64 sym_entry; 468 mach_nlist_64 sym_entry;
476 uint8_t space[4096]; 469 uint8_t space[4096];
477} mach_obj_64; 470} mach_obj_64;
478typedef struct {
479 mach_fat_header fat;
480 mach_fat_arch fat_arch[4];
481 struct {
482 mach_header hdr;
483 mach_segment_command seg;
484 mach_section sec;
485 mach_symtab_command sym;
486 } arch[4];
487 mach_nlist sym_entry;
488 uint8_t space[4096];
489} mach_fat_obj;
490]] 471]]
491 local symname = '_'..LJBC_PREFIX..ctx.modname 472 local symname = '_'..LJBC_PREFIX..ctx.modname
492 local isfat, is64, align, mobj = false, false, 4, "mach_obj" 473 local cputype, cpusubtype = 0x01000007, 3
493 if ctx.arch == "x64" then 474 if ctx.arch ~= "x64" then
494 is64, align, mobj = true, 8, "mach_obj_64" 475 check(ctx.arch == "arm64", "unsupported architecture for OSX")
495 elseif ctx.arch == "arm" then 476 cputype, cpusubtype = 0x0100000c, 0
496 isfat, mobj = true, "mach_fat_obj"
497 else
498 check(ctx.arch == "x86", "unsupported architecture for OSX")
499 end 477 end
500 local function aligned(v, a) return bit.band(v+a-1, -a) end 478 local function aligned(v, a) return bit.band(v+a-1, -a) end
501 local be32 = bit.bswap -- Mach-O FAT is BE, supported archs are LE.
502 479
503 -- Create Mach-O object and fill in header. 480 -- Create Mach-O object and fill in header.
504 local o = ffi.new(mobj) 481 local o = ffi.new("mach_obj_64")
505 local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align) 482 local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, 8)
506 local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12,12,12} })[ctx.arch]
507 local cpusubtype = ({ x86={3}, x64={3}, arm={3,6,9,11} })[ctx.arch]
508 if isfat then
509 o.fat.magic = be32(0xcafebabe)
510 o.fat.nfat_arch = be32(#cpusubtype)
511 end
512 483
513 -- Fill in sections and symbols. 484 -- Fill in sections and symbols.
514 for i=0,#cpusubtype-1 do 485 o.hdr.magic = 0xfeedfacf
515 local ofs = 0 486 o.hdr.cputype = cputype
516 if isfat then 487 o.hdr.cpusubtype = cpusubtype
517 local a = o.fat_arch[i] 488 o.hdr.filetype = 1
518 a.cputype = be32(cputype[i+1]) 489 o.hdr.ncmds = 2
519 a.cpusubtype = be32(cpusubtype[i+1]) 490 o.hdr.sizeofcmds = ffi.sizeof(o.seg)+ffi.sizeof(o.sec)+ffi.sizeof(o.sym)
520 -- Subsequent slices overlap each other to share data. 491 o.seg.cmd = 0x19
521 ofs = ffi.offsetof(o, "arch") + i*ffi.sizeof(o.arch[0]) 492 o.seg.cmdsize = ffi.sizeof(o.seg)+ffi.sizeof(o.sec)
522 a.offset = be32(ofs) 493 o.seg.vmsize = #s
523 a.size = be32(mach_size-ofs+#s) 494 o.seg.fileoff = mach_size
524 end 495 o.seg.filesize = #s
525 local a = o.arch[i] 496 o.seg.maxprot = 1
526 a.hdr.magic = is64 and 0xfeedfacf or 0xfeedface 497 o.seg.initprot = 1
527 a.hdr.cputype = cputype[i+1] 498 o.seg.nsects = 1
528 a.hdr.cpusubtype = cpusubtype[i+1] 499 ffi.copy(o.sec.sectname, "__data")
529 a.hdr.filetype = 1 500 ffi.copy(o.sec.segname, "__DATA")
530 a.hdr.ncmds = 2 501 o.sec.size = #s
531 a.hdr.sizeofcmds = ffi.sizeof(a.seg)+ffi.sizeof(a.sec)+ffi.sizeof(a.sym) 502 o.sec.offset = mach_size
532 a.seg.cmd = is64 and 0x19 or 0x1 503 o.sym.cmd = 2
533 a.seg.cmdsize = ffi.sizeof(a.seg)+ffi.sizeof(a.sec) 504 o.sym.cmdsize = ffi.sizeof(o.sym)
534 a.seg.vmsize = #s 505 o.sym.symoff = ffi.offsetof(o, "sym_entry")
535 a.seg.fileoff = mach_size-ofs 506 o.sym.nsyms = 1
536 a.seg.filesize = #s 507 o.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry)
537 a.seg.maxprot = 1 508 o.sym.strsize = aligned(#symname+2, 8)
538 a.seg.initprot = 1
539 a.seg.nsects = 1
540 ffi.copy(a.sec.sectname, "__data")
541 ffi.copy(a.sec.segname, "__DATA")
542 a.sec.size = #s
543 a.sec.offset = mach_size-ofs
544 a.sym.cmd = 2
545 a.sym.cmdsize = ffi.sizeof(a.sym)
546 a.sym.symoff = ffi.offsetof(o, "sym_entry")-ofs
547 a.sym.nsyms = 1
548 a.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry)-ofs
549 a.sym.strsize = aligned(#symname+2, align)
550 end
551 o.sym_entry.type = 0xf 509 o.sym_entry.type = 0xf
552 o.sym_entry.sect = 1 510 o.sym_entry.sect = 1
553 o.sym_entry.strx = 1 511 o.sym_entry.strx = 1
@@ -562,6 +520,9 @@ end
562local function bcsave_obj(ctx, output, s) 520local function bcsave_obj(ctx, output, s)
563 local ok, ffi = pcall(require, "ffi") 521 local ok, ffi = pcall(require, "ffi")
564 check(ok, "FFI library required to write this file type") 522 check(ok, "FFI library required to write this file type")
523 if output == "-" and jit.os == "Windows" then
524 set_stdout_binary(ffi)
525 end
565 if ctx.os == "windows" then 526 if ctx.os == "windows" then
566 return bcsave_peobj(ctx, output, s, ffi) 527 return bcsave_peobj(ctx, output, s, ffi)
567 elseif ctx.os == "osx" then 528 elseif ctx.os == "osx" then
@@ -573,14 +534,14 @@ end
573 534
574------------------------------------------------------------------------------ 535------------------------------------------------------------------------------
575 536
576local function bclist(input, output) 537local function bclist(ctx, input, output)
577 local f = readfile(input) 538 local f = readfile(ctx, input)
578 require("jit.bc").dump(f, savefile(output, "w"), true) 539 require("jit.bc").dump(f, savefile(output, "w"), true)
579end 540end
580 541
581local function bcsave(ctx, input, output) 542local function bcsave(ctx, input, output)
582 local f = readfile(input) 543 local f = readfile(ctx, input)
583 local s = string.dump(f, ctx.strip) 544 local s = string.dump(f, ctx.mode)
584 local t = ctx.type 545 local t = ctx.type
585 if not t then 546 if not t then
586 t = detecttype(output) 547 t = detecttype(output)
@@ -603,35 +564,43 @@ local function docmd(...)
603 local n = 1 564 local n = 1
604 local list = false 565 local list = false
605 local ctx = { 566 local ctx = {
606 strip = true, arch = jit.arch, os = string.lower(jit.os), 567 mode = "bt", arch = jit.arch, os = jit.os:lower(),
607 type = false, modname = false, 568 type = false, modname = false, string = false,
608 } 569 }
570 local strip = "s"
571 local gc64 = ""
609 while n <= #arg do 572 while n <= #arg do
610 local a = arg[n] 573 local a = arg[n]
611 if type(a) == "string" and string.sub(a, 1, 1) == "-" and a ~= "-" then 574 if type(a) == "string" and a:sub(1, 1) == "-" and a ~= "-" then
612 table.remove(arg, n) 575 tremove(arg, n)
613 if a == "--" then break end 576 if a == "--" then break end
614 for m=2,#a do 577 for m=2,#a do
615 local opt = string.sub(a, m, m) 578 local opt = a:sub(m, m)
616 if opt == "l" then 579 if opt == "l" then
617 list = true 580 list = true
618 elseif opt == "s" then 581 elseif opt == "s" then
619 ctx.strip = true 582 strip = "s"
620 elseif opt == "g" then 583 elseif opt == "g" then
621 ctx.strip = false 584 strip = ""
585 elseif opt == "W" or opt == "X" then
586 gc64 = opt
587 elseif opt == "d" then
588 ctx.mode = ctx.mode .. opt
622 else 589 else
623 if arg[n] == nil or m ~= #a then usage() end 590 if arg[n] == nil or m ~= #a then usage() end
624 if opt == "e" then 591 if opt == "e" then
625 if n ~= 1 then usage() end 592 if n ~= 1 then usage() end
626 arg[1] = check(loadstring(arg[1])) 593 ctx.string = true
627 elseif opt == "n" then 594 elseif opt == "n" then
628 ctx.modname = checkmodname(table.remove(arg, n)) 595 ctx.modname = checkmodname(tremove(arg, n))
629 elseif opt == "t" then 596 elseif opt == "t" then
630 ctx.type = checkarg(table.remove(arg, n), map_type, "file type") 597 ctx.type = checkarg(tremove(arg, n), map_type, "file type")
631 elseif opt == "a" then 598 elseif opt == "a" then
632 ctx.arch = checkarg(table.remove(arg, n), map_arch, "architecture") 599 ctx.arch = checkarg(tremove(arg, n), map_arch, "architecture")
633 elseif opt == "o" then 600 elseif opt == "o" then
634 ctx.os = checkarg(table.remove(arg, n), map_os, "OS name") 601 ctx.os = checkarg(tremove(arg, n), map_os, "OS name")
602 elseif opt == "F" then
603 ctx.filename = "@"..tremove(arg, n)
635 else 604 else
636 usage() 605 usage()
637 end 606 end
@@ -641,9 +610,10 @@ local function docmd(...)
641 n = n + 1 610 n = n + 1
642 end 611 end
643 end 612 end
613 ctx.mode = ctx.mode .. strip .. gc64
644 if list then 614 if list then
645 if #arg == 0 or #arg > 2 then usage() end 615 if #arg == 0 or #arg > 2 then usage() end
646 bclist(arg[1], arg[2] or "-") 616 bclist(ctx, arg[1], arg[2] or "-")
647 else 617 else
648 if #arg ~= 2 then usage() end 618 if #arg ~= 2 then usage() end
649 bcsave(ctx, arg[1], arg[2]) 619 bcsave(ctx, arg[1], arg[2])
@@ -653,7 +623,7 @@ end
653------------------------------------------------------------------------------ 623------------------------------------------------------------------------------
654 624
655-- Public module functions. 625-- Public module functions.
656module(...) 626return {
657 627 start = docmd -- Process -b command line option.
658start = docmd -- Process -b command line option. 628}
659 629
diff --git a/src/jit/dis_arm.lua b/src/jit/dis_arm.lua
index 4db85306..a7546a45 100644
--- a/src/jit/dis_arm.lua
+++ b/src/jit/dis_arm.lua
@@ -658,7 +658,7 @@ local function disass_block(ctx, ofs, len)
658end 658end
659 659
660-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 660-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
661local function create_(code, addr, out) 661local function create(code, addr, out)
662 local ctx = {} 662 local ctx = {}
663 ctx.code = code 663 ctx.code = code
664 ctx.addr = addr or 0 664 ctx.addr = addr or 0
@@ -670,20 +670,20 @@ local function create_(code, addr, out)
670end 670end
671 671
672-- Simple API: disassemble code (a string) at address and output via out. 672-- Simple API: disassemble code (a string) at address and output via out.
673local function disass_(code, addr, out) 673local function disass(code, addr, out)
674 create_(code, addr, out):disass() 674 create(code, addr, out):disass()
675end 675end
676 676
677-- Return register name for RID. 677-- Return register name for RID.
678local function regname_(r) 678local function regname(r)
679 if r < 16 then return map_gpr[r] end 679 if r < 16 then return map_gpr[r] end
680 return "d"..(r-16) 680 return "d"..(r-16)
681end 681end
682 682
683-- Public module functions. 683-- Public module functions.
684module(...) 684return {
685 685 create = create,
686create = create_ 686 disass = disass,
687disass = disass_ 687 regname = regname
688regname = regname_ 688}
689 689
diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua
new file mode 100644
index 00000000..2741cd2e
--- /dev/null
+++ b/src/jit/dis_arm64.lua
@@ -0,0 +1,1227 @@
1----------------------------------------------------------------------------
2-- LuaJIT ARM64 disassembler module.
3--
4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6--
7-- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
8-- Sponsored by Cisco Systems, Inc.
9----------------------------------------------------------------------------
10-- This is a helper module used by the LuaJIT machine code dumper module.
11--
12-- It disassembles most user-mode AArch64 instructions.
13-- NYI: Advanced SIMD and VFP instructions.
14------------------------------------------------------------------------------
15
16local type = type
17local sub, byte, format = string.sub, string.byte, string.format
18local match, gmatch, gsub = string.match, string.gmatch, string.gsub
19local concat = table.concat
20local bit = require("bit")
21local band, bor, bxor, tohex = bit.band, bit.bor, bit.bxor, bit.tohex
22local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
23local ror = bit.ror
24
25------------------------------------------------------------------------------
26-- Opcode maps
27------------------------------------------------------------------------------
28
29local map_adr = { -- PC-relative addressing.
30 shift = 31, mask = 1,
31 [0] = "adrDBx", "adrpDBx"
32}
33
34local map_addsubi = { -- Add/subtract immediate.
35 shift = 29, mask = 3,
36 [0] = "add|movDNIg", "adds|cmnD0NIg", "subDNIg", "subs|cmpD0NIg",
37}
38
39local map_logi = { -- Logical immediate.
40 shift = 31, mask = 1,
41 [0] = {
42 shift = 22, mask = 1,
43 [0] = {
44 shift = 29, mask = 3,
45 [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig"
46 },
47 false -- unallocated
48 },
49 {
50 shift = 29, mask = 3,
51 [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig"
52 }
53}
54
55local map_movwi = { -- Move wide immediate.
56 shift = 31, mask = 1,
57 [0] = {
58 shift = 22, mask = 1,
59 [0] = {
60 shift = 29, mask = 3,
61 [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg"
62 }, false -- unallocated
63 },
64 {
65 shift = 29, mask = 3,
66 [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg"
67 },
68}
69
70local map_bitf = { -- Bitfield.
71 shift = 31, mask = 1,
72 [0] = {
73 shift = 22, mask = 1,
74 [0] = {
75 shift = 29, mask = 3,
76 [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12w",
77 "bfm|bfi|bfxilDN13w",
78 "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12w"
79 }
80 },
81 {
82 shift = 22, mask = 1,
83 {
84 shift = 29, mask = 3,
85 [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12x",
86 "bfm|bfi|bfxilDN13x",
87 "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12x"
88 }
89 }
90}
91
92local map_datai = { -- Data processing - immediate.
93 shift = 23, mask = 7,
94 [0] = map_adr, map_adr, map_addsubi, false,
95 map_logi, map_movwi, map_bitf,
96 {
97 shift = 15, mask = 0x1c0c1,
98 [0] = "extr|rorDNM4w", [0x10080] = "extr|rorDNM4x",
99 [0x10081] = "extr|rorDNM4x"
100 }
101}
102
103local map_logsr = { -- Logical, shifted register.
104 shift = 31, mask = 1,
105 [0] = {
106 shift = 15, mask = 1,
107 [0] = {
108 shift = 29, mask = 3,
109 [0] = {
110 shift = 21, mask = 1,
111 [0] = "andDNMSg", "bicDNMSg"
112 },
113 {
114 shift = 21, mask = 1,
115 [0] = "orr|movDN0MSg", "orn|mvnDN0MSg"
116 },
117 {
118 shift = 21, mask = 1,
119 [0] = "eorDNMSg", "eonDNMSg"
120 },
121 {
122 shift = 21, mask = 1,
123 [0] = "ands|tstD0NMSg", "bicsDNMSg"
124 }
125 },
126 false -- unallocated
127 },
128 {
129 shift = 29, mask = 3,
130 [0] = {
131 shift = 21, mask = 1,
132 [0] = "andDNMSg", "bicDNMSg"
133 },
134 {
135 shift = 21, mask = 1,
136 [0] = "orr|movDN0MSg", "orn|mvnDN0MSg"
137 },
138 {
139 shift = 21, mask = 1,
140 [0] = "eorDNMSg", "eonDNMSg"
141 },
142 {
143 shift = 21, mask = 1,
144 [0] = "ands|tstD0NMSg", "bicsDNMSg"
145 }
146 }
147}
148
149local map_assh = {
150 shift = 31, mask = 1,
151 [0] = {
152 shift = 15, mask = 1,
153 [0] = {
154 shift = 29, mask = 3,
155 [0] = {
156 shift = 22, mask = 3,
157 [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg"
158 },
159 {
160 shift = 22, mask = 3,
161 [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg",
162 "adds|cmnD0NMSg", "adds|cmnD0NMg"
163 },
164 {
165 shift = 22, mask = 3,
166 [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg"
167 },
168 {
169 shift = 22, mask = 3,
170 [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg",
171 "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg"
172 },
173 },
174 false -- unallocated
175 },
176 {
177 shift = 29, mask = 3,
178 [0] = {
179 shift = 22, mask = 3,
180 [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg"
181 },
182 {
183 shift = 22, mask = 3,
184 [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", "adds|cmnD0NMSg",
185 "adds|cmnD0NMg"
186 },
187 {
188 shift = 22, mask = 3,
189 [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg"
190 },
191 {
192 shift = 22, mask = 3,
193 [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg",
194 "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg"
195 }
196 }
197}
198
199local map_addsubsh = { -- Add/subtract, shifted register.
200 shift = 22, mask = 3,
201 [0] = map_assh, map_assh, map_assh
202}
203
204local map_addsubex = { -- Add/subtract, extended register.
205 shift = 22, mask = 3,
206 [0] = {
207 shift = 29, mask = 3,
208 [0] = "addDNMXg", "adds|cmnD0NMXg", "subDNMXg", "subs|cmpD0NMzXg",
209 }
210}
211
212local map_addsubc = { -- Add/subtract, with carry.
213 shift = 10, mask = 63,
214 [0] = {
215 shift = 29, mask = 3,
216 [0] = "adcDNMg", "adcsDNMg", "sbc|ngcDN0Mg", "sbcs|ngcsDN0Mg",
217 }
218}
219
220local map_ccomp = {
221 shift = 4, mask = 1,
222 [0] = {
223 shift = 10, mask = 3,
224 [0] = { -- Conditional compare register.
225 shift = 29, mask = 3,
226 "ccmnNMVCg", false, "ccmpNMVCg",
227 },
228 [2] = { -- Conditional compare immediate.
229 shift = 29, mask = 3,
230 "ccmnN5VCg", false, "ccmpN5VCg",
231 }
232 }
233}
234
235local map_csel = { -- Conditional select.
236 shift = 11, mask = 1,
237 [0] = {
238 shift = 10, mask = 1,
239 [0] = {
240 shift = 29, mask = 3,
241 [0] = "cselDNMzCg", false, "csinv|cinv|csetmDNMcg", false,
242 },
243 {
244 shift = 29, mask = 3,
245 [0] = "csinc|cinc|csetDNMcg", false, "csneg|cnegDNMcg", false,
246 }
247 }
248}
249
250local map_data1s = { -- Data processing, 1 source.
251 shift = 29, mask = 1,
252 [0] = {
253 shift = 31, mask = 1,
254 [0] = {
255 shift = 10, mask = 0x7ff,
256 [0] = "rbitDNg", "rev16DNg", "revDNw", false, "clzDNg", "clsDNg"
257 },
258 {
259 shift = 10, mask = 0x7ff,
260 [0] = "rbitDNg", "rev16DNg", "rev32DNx", "revDNx", "clzDNg", "clsDNg"
261 }
262 }
263}
264
265local map_data2s = { -- Data processing, 2 sources.
266 shift = 29, mask = 1,
267 [0] = {
268 shift = 10, mask = 63,
269 false, "udivDNMg", "sdivDNMg", false, false, false, false, "lslDNMg",
270 "lsrDNMg", "asrDNMg", "rorDNMg"
271 }
272}
273
274local map_data3s = { -- Data processing, 3 sources.
275 shift = 29, mask = 7,
276 [0] = {
277 shift = 21, mask = 7,
278 [0] = {
279 shift = 15, mask = 1,
280 [0] = "madd|mulDNMA0g", "msub|mnegDNMA0g"
281 }
282 }, false, false, false,
283 {
284 shift = 15, mask = 1,
285 [0] = {
286 shift = 21, mask = 7,
287 [0] = "madd|mulDNMA0g", "smaddl|smullDxNMwA0x", "smulhDNMx", false,
288 false, "umaddl|umullDxNMwA0x", "umulhDNMx"
289 },
290 {
291 shift = 21, mask = 7,
292 [0] = "msub|mnegDNMA0g", "smsubl|smneglDxNMwA0x", false, false,
293 false, "umsubl|umneglDxNMwA0x"
294 }
295 }
296}
297
298local map_datar = { -- Data processing, register.
299 shift = 28, mask = 1,
300 [0] = {
301 shift = 24, mask = 1,
302 [0] = map_logsr,
303 {
304 shift = 21, mask = 1,
305 [0] = map_addsubsh, map_addsubex
306 }
307 },
308 {
309 shift = 21, mask = 15,
310 [0] = map_addsubc, false, map_ccomp, false, map_csel, false,
311 {
312 shift = 30, mask = 1,
313 [0] = map_data2s, map_data1s
314 },
315 false, map_data3s, map_data3s, map_data3s, map_data3s, map_data3s,
316 map_data3s, map_data3s, map_data3s
317 }
318}
319
320local map_lrl = { -- Load register, literal.
321 shift = 26, mask = 1,
322 [0] = {
323 shift = 30, mask = 3,
324 [0] = "ldrDwB", "ldrDxB", "ldrswDxB"
325 },
326 {
327 shift = 30, mask = 3,
328 [0] = "ldrDsB", "ldrDdB"
329 }
330}
331
332local map_lsriind = { -- Load/store register, immediate pre/post-indexed.
333 shift = 30, mask = 3,
334 [0] = {
335 shift = 26, mask = 1,
336 [0] = {
337 shift = 22, mask = 3,
338 [0] = "strbDwzL", "ldrbDwzL", "ldrsbDxzL", "ldrsbDwzL"
339 }
340 },
341 {
342 shift = 26, mask = 1,
343 [0] = {
344 shift = 22, mask = 3,
345 [0] = "strhDwzL", "ldrhDwzL", "ldrshDxzL", "ldrshDwzL"
346 }
347 },
348 {
349 shift = 26, mask = 1,
350 [0] = {
351 shift = 22, mask = 3,
352 [0] = "strDwzL", "ldrDwzL", "ldrswDxzL"
353 },
354 {
355 shift = 22, mask = 3,
356 [0] = "strDszL", "ldrDszL"
357 }
358 },
359 {
360 shift = 26, mask = 1,
361 [0] = {
362 shift = 22, mask = 3,
363 [0] = "strDxzL", "ldrDxzL"
364 },
365 {
366 shift = 22, mask = 3,
367 [0] = "strDdzL", "ldrDdzL"
368 }
369 }
370}
371
372local map_lsriro = {
373 shift = 21, mask = 1,
374 [0] = { -- Load/store register immediate.
375 shift = 10, mask = 3,
376 [0] = { -- Unscaled immediate.
377 shift = 26, mask = 1,
378 [0] = {
379 shift = 30, mask = 3,
380 [0] = {
381 shift = 22, mask = 3,
382 [0] = "sturbDwK", "ldurbDwK"
383 },
384 {
385 shift = 22, mask = 3,
386 [0] = "sturhDwK", "ldurhDwK"
387 },
388 {
389 shift = 22, mask = 3,
390 [0] = "sturDwK", "ldurDwK"
391 },
392 {
393 shift = 22, mask = 3,
394 [0] = "sturDxK", "ldurDxK"
395 }
396 }
397 }, map_lsriind, false, map_lsriind
398 },
399 { -- Load/store register, register offset.
400 shift = 10, mask = 3,
401 [2] = {
402 shift = 26, mask = 1,
403 [0] = {
404 shift = 30, mask = 3,
405 [0] = {
406 shift = 22, mask = 3,
407 [0] = "strbDwO", "ldrbDwO", "ldrsbDxO", "ldrsbDwO"
408 },
409 {
410 shift = 22, mask = 3,
411 [0] = "strhDwO", "ldrhDwO", "ldrshDxO", "ldrshDwO"
412 },
413 {
414 shift = 22, mask = 3,
415 [0] = "strDwO", "ldrDwO", "ldrswDxO"
416 },
417 {
418 shift = 22, mask = 3,
419 [0] = "strDxO", "ldrDxO"
420 }
421 },
422 {
423 shift = 30, mask = 3,
424 [2] = {
425 shift = 22, mask = 3,
426 [0] = "strDsO", "ldrDsO"
427 },
428 [3] = {
429 shift = 22, mask = 3,
430 [0] = "strDdO", "ldrDdO"
431 }
432 }
433 }
434 }
435}
436
437local map_lsp = { -- Load/store register pair, offset.
438 shift = 22, mask = 1,
439 [0] = {
440 shift = 30, mask = 3,
441 [0] = {
442 shift = 26, mask = 1,
443 [0] = "stpDzAzwP", "stpDzAzsP",
444 },
445 {
446 shift = 26, mask = 1,
447 "stpDzAzdP"
448 },
449 {
450 shift = 26, mask = 1,
451 [0] = "stpDzAzxP"
452 }
453 },
454 {
455 shift = 30, mask = 3,
456 [0] = {
457 shift = 26, mask = 1,
458 [0] = "ldpDzAzwP", "ldpDzAzsP",
459 },
460 {
461 shift = 26, mask = 1,
462 [0] = "ldpswDAxP", "ldpDzAzdP"
463 },
464 {
465 shift = 26, mask = 1,
466 [0] = "ldpDzAzxP"
467 }
468 }
469}
470
471local map_ls = { -- Loads and stores.
472 shift = 24, mask = 0x31,
473 [0x10] = map_lrl, [0x30] = map_lsriro,
474 [0x20] = {
475 shift = 23, mask = 3,
476 map_lsp, map_lsp, map_lsp
477 },
478 [0x21] = {
479 shift = 23, mask = 3,
480 map_lsp, map_lsp, map_lsp
481 },
482 [0x31] = {
483 shift = 26, mask = 1,
484 [0] = {
485 shift = 30, mask = 3,
486 [0] = {
487 shift = 22, mask = 3,
488 [0] = "strbDwzU", "ldrbDwzU"
489 },
490 {
491 shift = 22, mask = 3,
492 [0] = "strhDwzU", "ldrhDwzU"
493 },
494 {
495 shift = 22, mask = 3,
496 [0] = "strDwzU", "ldrDwzU"
497 },
498 {
499 shift = 22, mask = 3,
500 [0] = "strDxzU", "ldrDxzU"
501 }
502 },
503 {
504 shift = 30, mask = 3,
505 [2] = {
506 shift = 22, mask = 3,
507 [0] = "strDszU", "ldrDszU"
508 },
509 [3] = {
510 shift = 22, mask = 3,
511 [0] = "strDdzU", "ldrDdzU"
512 }
513 }
514 },
515}
516
517local map_datafp = { -- Data processing, SIMD and FP.
518 shift = 28, mask = 7,
519 { -- 001
520 shift = 24, mask = 1,
521 [0] = {
522 shift = 21, mask = 1,
523 {
524 shift = 10, mask = 3,
525 [0] = {
526 shift = 12, mask = 1,
527 [0] = {
528 shift = 13, mask = 1,
529 [0] = {
530 shift = 14, mask = 1,
531 [0] = {
532 shift = 15, mask = 1,
533 [0] = { -- FP/int conversion.
534 shift = 31, mask = 1,
535 [0] = {
536 shift = 16, mask = 0xff,
537 [0x20] = "fcvtnsDwNs", [0x21] = "fcvtnuDwNs",
538 [0x22] = "scvtfDsNw", [0x23] = "ucvtfDsNw",
539 [0x24] = "fcvtasDwNs", [0x25] = "fcvtauDwNs",
540 [0x26] = "fmovDwNs", [0x27] = "fmovDsNw",
541 [0x28] = "fcvtpsDwNs", [0x29] = "fcvtpuDwNs",
542 [0x30] = "fcvtmsDwNs", [0x31] = "fcvtmuDwNs",
543 [0x38] = "fcvtzsDwNs", [0x39] = "fcvtzuDwNs",
544 [0x60] = "fcvtnsDwNd", [0x61] = "fcvtnuDwNd",
545 [0x62] = "scvtfDdNw", [0x63] = "ucvtfDdNw",
546 [0x64] = "fcvtasDwNd", [0x65] = "fcvtauDwNd",
547 [0x68] = "fcvtpsDwNd", [0x69] = "fcvtpuDwNd",
548 [0x70] = "fcvtmsDwNd", [0x71] = "fcvtmuDwNd",
549 [0x78] = "fcvtzsDwNd", [0x79] = "fcvtzuDwNd"
550 },
551 {
552 shift = 16, mask = 0xff,
553 [0x20] = "fcvtnsDxNs", [0x21] = "fcvtnuDxNs",
554 [0x22] = "scvtfDsNx", [0x23] = "ucvtfDsNx",
555 [0x24] = "fcvtasDxNs", [0x25] = "fcvtauDxNs",
556 [0x28] = "fcvtpsDxNs", [0x29] = "fcvtpuDxNs",
557 [0x30] = "fcvtmsDxNs", [0x31] = "fcvtmuDxNs",
558 [0x38] = "fcvtzsDxNs", [0x39] = "fcvtzuDxNs",
559 [0x60] = "fcvtnsDxNd", [0x61] = "fcvtnuDxNd",
560 [0x62] = "scvtfDdNx", [0x63] = "ucvtfDdNx",
561 [0x64] = "fcvtasDxNd", [0x65] = "fcvtauDxNd",
562 [0x66] = "fmovDxNd", [0x67] = "fmovDdNx",
563 [0x68] = "fcvtpsDxNd", [0x69] = "fcvtpuDxNd",
564 [0x70] = "fcvtmsDxNd", [0x71] = "fcvtmuDxNd",
565 [0x78] = "fcvtzsDxNd", [0x79] = "fcvtzuDxNd"
566 }
567 }
568 },
569 { -- FP data-processing, 1 source.
570 shift = 31, mask = 1,
571 [0] = {
572 shift = 22, mask = 3,
573 [0] = {
574 shift = 15, mask = 63,
575 [0] = "fmovDNf", "fabsDNf", "fnegDNf",
576 "fsqrtDNf", false, "fcvtDdNs", false, false,
577 "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf",
578 "frintaDNf", false, "frintxDNf", "frintiDNf",
579 },
580 {
581 shift = 15, mask = 63,
582 [0] = "fmovDNf", "fabsDNf", "fnegDNf",
583 "fsqrtDNf", "fcvtDsNd", false, false, false,
584 "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf",
585 "frintaDNf", false, "frintxDNf", "frintiDNf",
586 }
587 }
588 }
589 },
590 { -- FP compare.
591 shift = 31, mask = 1,
592 [0] = {
593 shift = 14, mask = 3,
594 [0] = {
595 shift = 23, mask = 1,
596 [0] = {
597 shift = 0, mask = 31,
598 [0] = "fcmpNMf", [8] = "fcmpNZf",
599 [16] = "fcmpeNMf", [24] = "fcmpeNZf",
600 }
601 }
602 }
603 }
604 },
605 { -- FP immediate.
606 shift = 31, mask = 1,
607 [0] = {
608 shift = 5, mask = 31,
609 [0] = {
610 shift = 23, mask = 1,
611 [0] = "fmovDFf"
612 }
613 }
614 }
615 },
616 { -- FP conditional compare.
617 shift = 31, mask = 1,
618 [0] = {
619 shift = 23, mask = 1,
620 [0] = {
621 shift = 4, mask = 1,
622 [0] = "fccmpNMVCf", "fccmpeNMVCf"
623 }
624 }
625 },
626 { -- FP data-processing, 2 sources.
627 shift = 31, mask = 1,
628 [0] = {
629 shift = 23, mask = 1,
630 [0] = {
631 shift = 12, mask = 15,
632 [0] = "fmulDNMf", "fdivDNMf", "faddDNMf", "fsubDNMf",
633 "fmaxDNMf", "fminDNMf", "fmaxnmDNMf", "fminnmDNMf",
634 "fnmulDNMf"
635 }
636 }
637 },
638 { -- FP conditional select.
639 shift = 31, mask = 1,
640 [0] = {
641 shift = 23, mask = 1,
642 [0] = "fcselDNMCf"
643 }
644 }
645 }
646 },
647 { -- FP data-processing, 3 sources.
648 shift = 31, mask = 1,
649 [0] = {
650 shift = 15, mask = 1,
651 [0] = {
652 shift = 21, mask = 5,
653 [0] = "fmaddDNMAf", "fnmaddDNMAf"
654 },
655 {
656 shift = 21, mask = 5,
657 [0] = "fmsubDNMAf", "fnmsubDNMAf"
658 }
659 }
660 }
661 },
662 { -- 010
663 shift = 0, mask = 0x81f8fc00,
664 [0x100e400] = "moviDdG"
665 }
666}
667
668local map_br = { -- Branches, exception generating and system instructions.
669 shift = 29, mask = 7,
670 [0] = "bB",
671 { -- Compare & branch, immediate.
672 shift = 24, mask = 3,
673 [0] = "cbzDBg", "cbnzDBg", "tbzDTBw", "tbnzDTBw"
674 },
675 { -- Conditional branch, immediate.
676 shift = 24, mask = 3,
677 [0] = {
678 shift = 4, mask = 1,
679 [0] = {
680 shift = 0, mask = 15,
681 [0] = "beqB", "bneB", "bhsB", "bloB", "bmiB", "bplB", "bvsB", "bvcB",
682 "bhiB", "blsB", "bgeB", "bltB", "bgtB", "bleB", "balB"
683 }
684 }
685 }, false, "blB",
686 { -- Compare & branch, immediate.
687 shift = 24, mask = 3,
688 [0] = "cbzDBg", "cbnzDBg", "tbzDTBx", "tbnzDTBx"
689 },
690 {
691 shift = 24, mask = 3,
692 [0] = { -- Exception generation.
693 shift = 0, mask = 0xe0001f,
694 [0x200000] = "brkW"
695 },
696 { -- System instructions.
697 shift = 0, mask = 0x3fffff,
698 [0x03201f] = "nop"
699 },
700 { -- Unconditional branch, register.
701 shift = 0, mask = 0xfffc1f,
702 [0x1f0000] = "brNx", [0x3f0000] = "blrNx",
703 [0x5f0000] = "retNx"
704 },
705 }
706}
707
708local map_init = {
709 shift = 25, mask = 15,
710 [0] = false, false, false, false, map_ls, map_datar, map_ls, map_datafp,
711 map_datai, map_datai, map_br, map_br, map_ls, map_datar, map_ls, map_datafp
712}
713
714------------------------------------------------------------------------------
715
716local map_regs = { x = {}, w = {}, d = {}, s = {} }
717
718for i=0,30 do
719 map_regs.x[i] = "x"..i
720 map_regs.w[i] = "w"..i
721 map_regs.d[i] = "d"..i
722 map_regs.s[i] = "s"..i
723end
724map_regs.x[31] = "sp"
725map_regs.w[31] = "wsp"
726map_regs.d[31] = "d31"
727map_regs.s[31] = "s31"
728
729local map_cond = {
730 [0] = "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
731 "hi", "ls", "ge", "lt", "gt", "le", "al",
732}
733
734local map_shift = { [0] = "lsl", "lsr", "asr", "ror"}
735
736local map_extend = {
737 [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx",
738}
739
740------------------------------------------------------------------------------
741
742-- Output a nicely formatted line with an opcode and operands.
743local function putop(ctx, text, operands)
744 local pos = ctx.pos
745 local extra = ""
746 if ctx.rel then
747 local sym = ctx.symtab[ctx.rel]
748 if sym then
749 extra = "\t->"..sym
750 end
751 end
752 if ctx.hexdump > 0 then
753 ctx.out(format("%08x %s %-5s %s%s\n",
754 ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra))
755 else
756 ctx.out(format("%08x %-5s %s%s\n",
757 ctx.addr+pos, text, concat(operands, ", "), extra))
758 end
759 ctx.pos = pos + 4
760end
761
762-- Fallback for unknown opcodes.
763local function unknown(ctx)
764 return putop(ctx, ".long", { "0x"..tohex(ctx.op) })
765end
766
767local function match_reg(p, pat, regnum)
768 return map_regs[match(pat, p.."%w-([xwds])")][regnum]
769end
770
771local function fmt_hex32(x)
772 if x < 0 then
773 return tohex(x)
774 else
775 return format("%x", x)
776 end
777end
778
779local imm13_rep = { 0x55555555, 0x11111111, 0x01010101, 0x00010001, 0x00000001 }
780
781local function decode_imm13(op)
782 local imms = band(rshift(op, 10), 63)
783 local immr = band(rshift(op, 16), 63)
784 if band(op, 0x00400000) == 0 then
785 local len = 5
786 if imms >= 56 then
787 if imms >= 60 then len = 1 else len = 2 end
788 elseif imms >= 48 then len = 3 elseif imms >= 32 then len = 4 end
789 local l = lshift(1, len)-1
790 local s = band(imms, l)
791 local r = band(immr, l)
792 local imm = ror(rshift(-1, 31-s), r)
793 if len ~= 5 then imm = band(imm, lshift(1, l)-1) + rshift(imm, 31-l) end
794 imm = imm * imm13_rep[len]
795 local ix = fmt_hex32(imm)
796 if rshift(op, 31) ~= 0 then
797 return ix..tohex(imm)
798 else
799 return ix
800 end
801 else
802 local lo, hi = -1, 0
803 if imms < 32 then lo = rshift(-1, 31-imms) else hi = rshift(-1, 63-imms) end
804 if immr ~= 0 then
805 lo, hi = ror(lo, immr), ror(hi, immr)
806 local x = immr == 32 and 0 or band(bxor(lo, hi), lshift(-1, 32-immr))
807 lo, hi = bxor(lo, x), bxor(hi, x)
808 if immr >= 32 then lo, hi = hi, lo end
809 end
810 if hi ~= 0 then
811 return fmt_hex32(hi)..tohex(lo)
812 else
813 return fmt_hex32(lo)
814 end
815 end
816end
817
818local function parse_immpc(op, name)
819 if name == "b" or name == "bl" then
820 return arshift(lshift(op, 6), 4)
821 elseif name == "adr" or name == "adrp" then
822 local immlo = band(rshift(op, 29), 3)
823 local immhi = lshift(arshift(lshift(op, 8), 13), 2)
824 return bor(immhi, immlo)
825 elseif name == "tbz" or name == "tbnz" then
826 return lshift(arshift(lshift(op, 13), 18), 2)
827 else
828 return lshift(arshift(lshift(op, 8), 13), 2)
829 end
830end
831
832local function parse_fpimm8(op)
833 local sign = band(op, 0x100000) == 0 and 1 or -1
834 local exp = bxor(rshift(arshift(lshift(op, 12), 5), 24), 0x80) - 131
835 local frac = 16+band(rshift(op, 13), 15)
836 return sign * frac * 2^exp
837end
838
839local function decode_fpmovi(op)
840 local lo = rshift(op, 5)
841 local hi = rshift(op, 9)
842 lo = bor(band(lo, 1) * 0xff, band(lo, 2) * 0x7f80, band(lo, 4) * 0x3fc000,
843 band(lo, 8) * 0x1fe00000)
844 hi = bor(band(hi, 1) * 0xff, band(hi, 0x80) * 0x1fe,
845 band(hi, 0x100) * 0xff00, band(hi, 0x200) * 0x7f8000)
846 if hi ~= 0 then
847 return fmt_hex32(hi)..tohex(lo)
848 else
849 return fmt_hex32(lo)
850 end
851end
852
853local function prefer_bfx(sf, uns, imms, immr)
854 if imms < immr or imms == 31 or imms == 63 then
855 return false
856 end
857 if immr == 0 then
858 if sf == 0 and (imms == 7 or imms == 15) then
859 return false
860 end
861 if sf ~= 0 and uns == 0 and (imms == 7 or imms == 15 or imms == 31) then
862 return false
863 end
864 end
865 return true
866end
867
868-- Disassemble a single instruction.
869local function disass_ins(ctx)
870 local pos = ctx.pos
871 local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4)
872 local op = bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0)
873 local operands = {}
874 local suffix = ""
875 local last, name, pat
876 local map_reg
877 ctx.op = op
878 ctx.rel = nil
879 last = nil
880 local opat
881 opat = map_init[band(rshift(op, 25), 15)]
882 while type(opat) ~= "string" do
883 if not opat then return unknown(ctx) end
884 opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
885 end
886 name, pat = match(opat, "^([a-z0-9]*)(.*)")
887 local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
888 if altname then pat = pat2 end
889 if sub(pat, 1, 1) == "." then
890 local s2, p2 = match(pat, "^([a-z0-9.]*)(.*)")
891 suffix = suffix..s2
892 pat = p2
893 end
894
895 local rt = match(pat, "[gf]")
896 if rt then
897 if rt == "g" then
898 map_reg = band(op, 0x80000000) ~= 0 and map_regs.x or map_regs.w
899 else
900 map_reg = band(op, 0x400000) ~= 0 and map_regs.d or map_regs.s
901 end
902 end
903
904 local second0, immr
905
906 for p in gmatch(pat, ".") do
907 local x = nil
908 if p == "D" then
909 local regnum = band(op, 31)
910 x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
911 elseif p == "N" then
912 local regnum = band(rshift(op, 5), 31)
913 x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
914 elseif p == "M" then
915 local regnum = band(rshift(op, 16), 31)
916 x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
917 elseif p == "A" then
918 local regnum = band(rshift(op, 10), 31)
919 x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
920 elseif p == "B" then
921 local addr = ctx.addr + pos + parse_immpc(op, name)
922 ctx.rel = addr
923 x = "0x"..tohex(addr)
924 elseif p == "T" then
925 x = bor(band(rshift(op, 26), 32), band(rshift(op, 19), 31))
926 elseif p == "V" then
927 x = band(op, 15)
928 elseif p == "C" then
929 x = map_cond[band(rshift(op, 12), 15)]
930 elseif p == "c" then
931 local rn = band(rshift(op, 5), 31)
932 local rm = band(rshift(op, 16), 31)
933 local cond = band(rshift(op, 12), 15)
934 local invc = bxor(cond, 1)
935 x = map_cond[cond]
936 if altname and cond ~= 14 and cond ~= 15 then
937 local a1, a2 = match(altname, "([^|]*)|(.*)")
938 if rn == rm then
939 local n = #operands
940 operands[n] = nil
941 x = map_cond[invc]
942 if rn ~= 31 then
943 if a1 then name = a1 else name = altname end
944 else
945 operands[n-1] = nil
946 name = a2
947 end
948 end
949 end
950 elseif p == "W" then
951 x = band(rshift(op, 5), 0xffff)
952 elseif p == "Y" then
953 x = band(rshift(op, 5), 0xffff)
954 local hw = band(rshift(op, 21), 3)
955 if altname and (hw == 0 or x ~= 0) then
956 name = altname
957 end
958 elseif p == "L" then
959 local rn = map_regs.x[band(rshift(op, 5), 31)]
960 local imm9 = arshift(lshift(op, 11), 23)
961 if band(op, 0x800) ~= 0 then
962 x = "["..rn..", #"..imm9.."]!"
963 else
964 x = "["..rn.."], #"..imm9
965 end
966 elseif p == "U" then
967 local rn = map_regs.x[band(rshift(op, 5), 31)]
968 local sz = band(rshift(op, 30), 3)
969 local imm12 = lshift(rshift(lshift(op, 10), 20), sz)
970 if imm12 ~= 0 then
971 x = "["..rn..", #"..imm12.."]"
972 else
973 x = "["..rn.."]"
974 end
975 elseif p == "K" then
976 local rn = map_regs.x[band(rshift(op, 5), 31)]
977 local imm9 = arshift(lshift(op, 11), 23)
978 if imm9 ~= 0 then
979 x = "["..rn..", #"..imm9.."]"
980 else
981 x = "["..rn.."]"
982 end
983 elseif p == "O" then
984 local rn, rm = map_regs.x[band(rshift(op, 5), 31)]
985 local m = band(rshift(op, 13), 1)
986 if m == 0 then
987 rm = map_regs.w[band(rshift(op, 16), 31)]
988 else
989 rm = map_regs.x[band(rshift(op, 16), 31)]
990 end
991 x = "["..rn..", "..rm
992 local opt = band(rshift(op, 13), 7)
993 local s = band(rshift(op, 12), 1)
994 local sz = band(rshift(op, 30), 3)
995 -- extension to be applied
996 if opt == 3 then
997 if s == 0 then x = x.."]"
998 else x = x..", lsl #"..sz.."]" end
999 elseif opt == 2 or opt == 6 or opt == 7 then
1000 if s == 0 then x = x..", "..map_extend[opt].."]"
1001 else x = x..", "..map_extend[opt].." #"..sz.."]" end
1002 else
1003 x = x.."]"
1004 end
1005 elseif p == "P" then
1006 local sh = 2 + rshift(op, 31 - band(rshift(op, 26), 1))
1007 local imm7 = lshift(arshift(lshift(op, 10), 25), sh)
1008 local rn = map_regs.x[band(rshift(op, 5), 31)]
1009 local ind = band(rshift(op, 23), 3)
1010 if ind == 1 then
1011 x = "["..rn.."], #"..imm7
1012 elseif ind == 2 then
1013 if imm7 == 0 then
1014 x = "["..rn.."]"
1015 else
1016 x = "["..rn..", #"..imm7.."]"
1017 end
1018 elseif ind == 3 then
1019 x = "["..rn..", #"..imm7.."]!"
1020 end
1021 elseif p == "I" then
1022 local shf = band(rshift(op, 22), 3)
1023 local imm12 = band(rshift(op, 10), 0x0fff)
1024 local rn, rd = band(rshift(op, 5), 31), band(op, 31)
1025 if altname == "mov" and shf == 0 and imm12 == 0 and (rn == 31 or rd == 31) then
1026 name = altname
1027 x = nil
1028 elseif shf == 0 then
1029 x = imm12
1030 elseif shf == 1 then
1031 x = imm12..", lsl #12"
1032 end
1033 elseif p == "i" then
1034 x = "#0x"..decode_imm13(op)
1035 elseif p == "1" then
1036 immr = band(rshift(op, 16), 63)
1037 x = immr
1038 elseif p == "2" then
1039 x = band(rshift(op, 10), 63)
1040 if altname then
1041 local a1, a2, a3, a4, a5, a6 =
1042 match(altname, "([^|]*)|([^|]*)|([^|]*)|([^|]*)|([^|]*)|(.*)")
1043 local sf = band(rshift(op, 26), 32)
1044 local uns = band(rshift(op, 30), 1)
1045 if prefer_bfx(sf, uns, x, immr) then
1046 name = a2
1047 x = x - immr + 1
1048 elseif immr == 0 and x == 7 then
1049 local n = #operands
1050 operands[n] = nil
1051 if sf ~= 0 then
1052 operands[n-1] = gsub(operands[n-1], "x", "w")
1053 end
1054 last = operands[n-1]
1055 name = a6
1056 x = nil
1057 elseif immr == 0 and x == 15 then
1058 local n = #operands
1059 operands[n] = nil
1060 if sf ~= 0 then
1061 operands[n-1] = gsub(operands[n-1], "x", "w")
1062 end
1063 last = operands[n-1]
1064 name = a5
1065 x = nil
1066 elseif x == 31 or x == 63 then
1067 if x == 31 and immr == 0 and name == "sbfm" then
1068 name = a4
1069 local n = #operands
1070 operands[n] = nil
1071 if sf ~= 0 then
1072 operands[n-1] = gsub(operands[n-1], "x", "w")
1073 end
1074 last = operands[n-1]
1075 else
1076 name = a3
1077 end
1078 x = nil
1079 elseif band(x, 31) ~= 31 and immr == x+1 and name == "ubfm" then
1080 name = a4
1081 last = "#"..(sf+32 - immr)
1082 operands[#operands] = last
1083 x = nil
1084 elseif x < immr then
1085 name = a1
1086 last = "#"..(sf+32 - immr)
1087 operands[#operands] = last
1088 x = x + 1
1089 end
1090 end
1091 elseif p == "3" then
1092 x = band(rshift(op, 10), 63)
1093 if altname then
1094 local a1, a2 = match(altname, "([^|]*)|(.*)")
1095 if x < immr then
1096 name = a1
1097 local sf = band(rshift(op, 26), 32)
1098 last = "#"..(sf+32 - immr)
1099 operands[#operands] = last
1100 x = x + 1
1101 else
1102 name = a2
1103 x = x - immr + 1
1104 end
1105 end
1106 elseif p == "4" then
1107 x = band(rshift(op, 10), 63)
1108 local rn = band(rshift(op, 5), 31)
1109 local rm = band(rshift(op, 16), 31)
1110 if altname and rn == rm then
1111 local n = #operands
1112 operands[n] = nil
1113 last = operands[n-1]
1114 name = altname
1115 end
1116 elseif p == "5" then
1117 x = band(rshift(op, 16), 31)
1118 elseif p == "S" then
1119 x = band(rshift(op, 10), 63)
1120 if x == 0 then x = nil
1121 else x = map_shift[band(rshift(op, 22), 3)].." #"..x end
1122 elseif p == "X" then
1123 local opt = band(rshift(op, 13), 7)
1124 -- Width specifier <R>.
1125 if opt ~= 3 and opt ~= 7 then
1126 last = map_regs.w[band(rshift(op, 16), 31)]
1127 operands[#operands] = last
1128 end
1129 x = band(rshift(op, 10), 7)
1130 -- Extension.
1131 if opt == 2 + band(rshift(op, 31), 1) and
1132 band(rshift(op, second0 and 5 or 0), 31) == 31 then
1133 if x == 0 then x = nil
1134 else x = "lsl #"..x end
1135 else
1136 if x == 0 then x = map_extend[band(rshift(op, 13), 7)]
1137 else x = map_extend[band(rshift(op, 13), 7)].." #"..x end
1138 end
1139 elseif p == "R" then
1140 x = band(rshift(op,21), 3)
1141 if x == 0 then x = nil
1142 else x = "lsl #"..x*16 end
1143 elseif p == "z" then
1144 local n = #operands
1145 if operands[n] == "sp" then operands[n] = "xzr"
1146 elseif operands[n] == "wsp" then operands[n] = "wzr"
1147 end
1148 elseif p == "Z" then
1149 x = 0
1150 elseif p == "F" then
1151 x = parse_fpimm8(op)
1152 elseif p == "G" then
1153 x = "#0x"..decode_fpmovi(op)
1154 elseif p == "g" or p == "f" or p == "x" or p == "w" or
1155 p == "d" or p == "s" then
1156 -- These are handled in D/N/M/A.
1157 elseif p == "0" then
1158 if last == "sp" or last == "wsp" then
1159 local n = #operands
1160 operands[n] = nil
1161 last = operands[n-1]
1162 if altname then
1163 local a1, a2 = match(altname, "([^|]*)|(.*)")
1164 if not a1 then
1165 name = altname
1166 elseif second0 then
1167 name, altname = a2, a1
1168 else
1169 name, altname = a1, a2
1170 end
1171 end
1172 end
1173 second0 = true
1174 else
1175 assert(false)
1176 end
1177 if x then
1178 last = x
1179 if type(x) == "number" then x = "#"..x end
1180 operands[#operands+1] = x
1181 end
1182 end
1183
1184 return putop(ctx, name..suffix, operands)
1185end
1186
1187------------------------------------------------------------------------------
1188
1189-- Disassemble a block of code.
1190local function disass_block(ctx, ofs, len)
1191 if not ofs then ofs = 0 end
1192 local stop = len and ofs+len or #ctx.code
1193 ctx.pos = ofs
1194 ctx.rel = nil
1195 while ctx.pos < stop do disass_ins(ctx) end
1196end
1197
1198-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
1199local function create(code, addr, out)
1200 local ctx = {}
1201 ctx.code = code
1202 ctx.addr = addr or 0
1203 ctx.out = out or io.write
1204 ctx.symtab = {}
1205 ctx.disass = disass_block
1206 ctx.hexdump = 8
1207 return ctx
1208end
1209
1210-- Simple API: disassemble code (a string) at address and output via out.
1211local function disass(code, addr, out)
1212 create(code, addr, out):disass()
1213end
1214
1215-- Return register name for RID.
1216local function regname(r)
1217 if r < 32 then return map_regs.x[r] end
1218 return map_regs.d[r-32]
1219end
1220
1221-- Public module functions.
1222return {
1223 create = create,
1224 disass = disass,
1225 regname = regname
1226}
1227
diff --git a/src/jit/dis_arm64be.lua b/src/jit/dis_arm64be.lua
new file mode 100644
index 00000000..f7a56352
--- /dev/null
+++ b/src/jit/dis_arm64be.lua
@@ -0,0 +1,12 @@
1----------------------------------------------------------------------------
2-- LuaJIT ARM64BE disassembler wrapper module.
3--
4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- ARM64 instructions are always little-endian. So just forward to the
8-- common ARM64 disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11return require((string.match(..., ".*%.") or "").."dis_arm64")
12
diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua
index ebfed56a..b0e99df4 100644
--- a/src/jit/dis_mips.lua
+++ b/src/jit/dis_mips.lua
@@ -19,13 +19,34 @@ local band, bor, tohex = bit.band, bit.bor, bit.tohex
19local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift 19local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
20 20
21------------------------------------------------------------------------------ 21------------------------------------------------------------------------------
22-- Primary and extended opcode maps 22-- Extended opcode maps common to all MIPS releases
23------------------------------------------------------------------------------ 23------------------------------------------------------------------------------
24 24
25local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", }
26local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", } 25local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", }
27local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", } 26local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", }
28 27
28local map_cop0 = {
29 shift = 25, mask = 1,
30 [0] = {
31 shift = 21, mask = 15,
32 [0] = "mfc0TDW", [4] = "mtc0TDW",
33 [10] = "rdpgprDT",
34 [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", },
35 [14] = "wrpgprDT",
36 }, {
37 shift = 0, mask = 63,
38 [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp",
39 [24] = "eret", [31] = "deret",
40 [32] = "wait",
41 },
42}
43
44------------------------------------------------------------------------------
45-- Primary and extended opcode maps for MIPS R1-R5
46------------------------------------------------------------------------------
47
48local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", }
49
29local map_special = { 50local map_special = {
30 shift = 0, mask = 63, 51 shift = 0, mask = 63,
31 [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" }, 52 [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" },
@@ -34,15 +55,17 @@ local map_special = {
34 "jrS", "jalrD1S", "movzDST", "movnDST", 55 "jrS", "jalrD1S", "movzDST", "movnDST",
35 "syscallY", "breakY", false, "sync", 56 "syscallY", "breakY", false, "sync",
36 "mfhiD", "mthiS", "mfloD", "mtloS", 57 "mfhiD", "mthiS", "mfloD", "mtloS",
37 false, false, false, false, 58 "dsllvDST", false, "dsrlvDST", "dsravDST",
38 "multST", "multuST", "divST", "divuST", 59 "multST", "multuST", "divST", "divuST",
39 false, false, false, false, 60 "dmultST", "dmultuST", "ddivST", "ddivuST",
40 "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T", 61 "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T",
41 "andDST", "orDST", "xorDST", "nor|notDST0", 62 "andDST", "or|moveDST0", "xorDST", "nor|notDST0",
42 false, false, "sltDST", "sltuDST", 63 false, false, "sltDST", "sltuDST",
43 false, false, false, false, 64 "daddDST", "dadduDST", "dsubDST", "dsubuDST",
44 "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ", 65 "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ",
45 "teqSTZ", false, "tneSTZ", 66 "teqSTZ", false, "tneSTZ", false,
67 "dsllDTA", false, "dsrlDTA", "dsraDTA",
68 "dsll32DTA", false, "dsrl32DTA", "dsra32DTA",
46} 69}
47 70
48local map_special2 = { 71local map_special2 = {
@@ -60,11 +83,17 @@ local map_bshfl = {
60 [24] = "sehDT", 83 [24] = "sehDT",
61} 84}
62 85
86local map_dbshfl = {
87 shift = 6, mask = 31,
88 [2] = "dsbhDT",
89 [5] = "dshdDT",
90}
91
63local map_special3 = { 92local map_special3 = {
64 shift = 0, mask = 63, 93 shift = 0, mask = 63,
65 [0] = "extTSAK", [4] = "insTSAL", 94 [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK",
66 [32] = map_bshfl, 95 [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL",
67 [59] = "rdhwrTD", 96 [32] = map_bshfl, [36] = map_dbshfl, [59] = "rdhwrTD",
68} 97}
69 98
70local map_regimm = { 99local map_regimm = {
@@ -79,22 +108,6 @@ local map_regimm = {
79 false, false, false, "synciSO", 108 false, false, false, "synciSO",
80} 109}
81 110
82local map_cop0 = {
83 shift = 25, mask = 1,
84 [0] = {
85 shift = 21, mask = 15,
86 [0] = "mfc0TDW", [4] = "mtc0TDW",
87 [10] = "rdpgprDT",
88 [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", },
89 [14] = "wrpgprDT",
90 }, {
91 shift = 0, mask = 63,
92 [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp",
93 [24] = "eret", [31] = "deret",
94 [32] = "wait",
95 },
96}
97
98local map_cop1s = { 111local map_cop1s = {
99 shift = 0, mask = 63, 112 shift = 0, mask = 63,
100 [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH", 113 [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH",
@@ -178,8 +191,8 @@ local map_cop1bc = {
178 191
179local map_cop1 = { 192local map_cop1 = {
180 shift = 21, mask = 31, 193 shift = 21, mask = 31,
181 [0] = "mfc1TG", false, "cfc1TG", "mfhc1TG", 194 [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG",
182 "mtc1TG", false, "ctc1TG", "mthc1TG", 195 "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG",
183 map_cop1bc, false, false, false, 196 map_cop1bc, false, false, false,
184 false, false, false, false, 197 false, false, false, false,
185 map_cop1s, map_cop1d, false, false, 198 map_cop1s, map_cop1d, false, false,
@@ -213,16 +226,218 @@ local map_pri = {
213 "andiTSU", "ori|liTS0U", "xoriTSU", "luiTU", 226 "andiTSU", "ori|liTS0U", "xoriTSU", "luiTU",
214 map_cop0, map_cop1, false, map_cop1x, 227 map_cop0, map_cop1, false, map_cop1x,
215 "beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB", 228 "beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB",
216 false, false, false, false, 229 "daddiTSI", "daddiuTSI", false, false,
217 map_special2, false, false, map_special3, 230 map_special2, "jalxJ", false, map_special3,
218 "lbTSO", "lhTSO", "lwlTSO", "lwTSO", 231 "lbTSO", "lhTSO", "lwlTSO", "lwTSO",
219 "lbuTSO", "lhuTSO", "lwrTSO", false, 232 "lbuTSO", "lhuTSO", "lwrTSO", false,
220 "sbTSO", "shTSO", "swlTSO", "swTSO", 233 "sbTSO", "shTSO", "swlTSO", "swTSO",
221 false, false, "swrTSO", "cacheNSO", 234 false, false, "swrTSO", "cacheNSO",
222 "llTSO", "lwc1HSO", "lwc2TSO", "prefNSO", 235 "llTSO", "lwc1HSO", "lwc2TSO", "prefNSO",
223 false, "ldc1HSO", "ldc2TSO", false, 236 false, "ldc1HSO", "ldc2TSO", "ldTSO",
224 "scTSO", "swc1HSO", "swc2TSO", false, 237 "scTSO", "swc1HSO", "swc2TSO", false,
225 false, "sdc1HSO", "sdc2TSO", false, 238 false, "sdc1HSO", "sdc2TSO", "sdTSO",
239}
240
241------------------------------------------------------------------------------
242-- Primary and extended opcode maps for MIPS R6
243------------------------------------------------------------------------------
244
245local map_mul_r6 = { shift = 6, mask = 3, [2] = "mulDST", [3] = "muhDST" }
246local map_mulu_r6 = { shift = 6, mask = 3, [2] = "muluDST", [3] = "muhuDST" }
247local map_div_r6 = { shift = 6, mask = 3, [2] = "divDST", [3] = "modDST" }
248local map_divu_r6 = { shift = 6, mask = 3, [2] = "divuDST", [3] = "moduDST" }
249local map_dmul_r6 = { shift = 6, mask = 3, [2] = "dmulDST", [3] = "dmuhDST" }
250local map_dmulu_r6 = { shift = 6, mask = 3, [2] = "dmuluDST", [3] = "dmuhuDST" }
251local map_ddiv_r6 = { shift = 6, mask = 3, [2] = "ddivDST", [3] = "dmodDST" }
252local map_ddivu_r6 = { shift = 6, mask = 3, [2] = "ddivuDST", [3] = "dmoduDST" }
253
254local map_special_r6 = {
255 shift = 0, mask = 63,
256 [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" },
257 false, map_srl, "sraDTA",
258 "sllvDTS", false, map_srlv, "sravDTS",
259 "jrS", "jalrD1S", false, false,
260 "syscallY", "breakY", false, "sync",
261 "clzDS", "cloDS", "dclzDS", "dcloDS",
262 "dsllvDST", "dlsaDSTA", "dsrlvDST", "dsravDST",
263 map_mul_r6, map_mulu_r6, map_div_r6, map_divu_r6,
264 map_dmul_r6, map_dmulu_r6, map_ddiv_r6, map_ddivu_r6,
265 "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T",
266 "andDST", "or|moveDST0", "xorDST", "nor|notDST0",
267 false, false, "sltDST", "sltuDST",
268 "daddDST", "dadduDST", "dsubDST", "dsubuDST",
269 "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ",
270 "teqSTZ", "seleqzDST", "tneSTZ", "selnezDST",
271 "dsllDTA", false, "dsrlDTA", "dsraDTA",
272 "dsll32DTA", false, "dsrl32DTA", "dsra32DTA",
273}
274
275local map_bshfl_r6 = {
276 shift = 9, mask = 3,
277 [1] = "alignDSTa",
278 _ = {
279 shift = 6, mask = 31,
280 [0] = "bitswapDT",
281 [2] = "wsbhDT",
282 [16] = "sebDT",
283 [24] = "sehDT",
284 }
285}
286
287local map_dbshfl_r6 = {
288 shift = 9, mask = 3,
289 [1] = "dalignDSTa",
290 _ = {
291 shift = 6, mask = 31,
292 [0] = "dbitswapDT",
293 [2] = "dsbhDT",
294 [5] = "dshdDT",
295 }
296}
297
298local map_special3_r6 = {
299 shift = 0, mask = 63,
300 [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK",
301 [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL",
302 [32] = map_bshfl_r6, [36] = map_dbshfl_r6, [59] = "rdhwrTD",
303}
304
305local map_regimm_r6 = {
306 shift = 16, mask = 31,
307 [0] = "bltzSB", [1] = "bgezSB",
308 [6] = "dahiSI", [30] = "datiSI",
309 [23] = "sigrieI", [31] = "synciSO",
310}
311
312local map_pcrel_r6 = {
313 shift = 19, mask = 3,
314 [0] = "addiupcS2", "lwpcS2", "lwupcS2", {
315 shift = 18, mask = 1,
316 [0] = "ldpcS3", { shift = 16, mask = 3, [2] = "auipcSI", [3] = "aluipcSI" }
317 }
318}
319
320local map_cop1s_r6 = {
321 shift = 0, mask = 63,
322 [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH",
323 "sqrt.sFG", "abs.sFG", "mov.sFG", "neg.sFG",
324 "round.l.sFG", "trunc.l.sFG", "ceil.l.sFG", "floor.l.sFG",
325 "round.w.sFG", "trunc.w.sFG", "ceil.w.sFG", "floor.w.sFG",
326 "sel.sFGH", false, false, false,
327 "seleqz.sFGH", "recip.sFG", "rsqrt.sFG", "selnez.sFGH",
328 "maddf.sFGH", "msubf.sFGH", "rint.sFG", "class.sFG",
329 "min.sFGH", "mina.sFGH", "max.sFGH", "maxa.sFGH",
330 false, "cvt.d.sFG", false, false,
331 "cvt.w.sFG", "cvt.l.sFG",
332}
333
334local map_cop1d_r6 = {
335 shift = 0, mask = 63,
336 [0] = "add.dFGH", "sub.dFGH", "mul.dFGH", "div.dFGH",
337 "sqrt.dFG", "abs.dFG", "mov.dFG", "neg.dFG",
338 "round.l.dFG", "trunc.l.dFG", "ceil.l.dFG", "floor.l.dFG",
339 "round.w.dFG", "trunc.w.dFG", "ceil.w.dFG", "floor.w.dFG",
340 "sel.dFGH", false, false, false,
341 "seleqz.dFGH", "recip.dFG", "rsqrt.dFG", "selnez.dFGH",
342 "maddf.dFGH", "msubf.dFGH", "rint.dFG", "class.dFG",
343 "min.dFGH", "mina.dFGH", "max.dFGH", "maxa.dFGH",
344 "cvt.s.dFG", false, false, false,
345 "cvt.w.dFG", "cvt.l.dFG",
346}
347
348local map_cop1w_r6 = {
349 shift = 0, mask = 63,
350 [0] = "cmp.af.sFGH", "cmp.un.sFGH", "cmp.eq.sFGH", "cmp.ueq.sFGH",
351 "cmp.lt.sFGH", "cmp.ult.sFGH", "cmp.le.sFGH", "cmp.ule.sFGH",
352 "cmp.saf.sFGH", "cmp.sun.sFGH", "cmp.seq.sFGH", "cmp.sueq.sFGH",
353 "cmp.slt.sFGH", "cmp.sult.sFGH", "cmp.sle.sFGH", "cmp.sule.sFGH",
354 false, "cmp.or.sFGH", "cmp.une.sFGH", "cmp.ne.sFGH",
355 false, false, false, false,
356 false, "cmp.sor.sFGH", "cmp.sune.sFGH", "cmp.sne.sFGH",
357 false, false, false, false,
358 "cvt.s.wFG", "cvt.d.wFG",
359}
360
361local map_cop1l_r6 = {
362 shift = 0, mask = 63,
363 [0] = "cmp.af.dFGH", "cmp.un.dFGH", "cmp.eq.dFGH", "cmp.ueq.dFGH",
364 "cmp.lt.dFGH", "cmp.ult.dFGH", "cmp.le.dFGH", "cmp.ule.dFGH",
365 "cmp.saf.dFGH", "cmp.sun.dFGH", "cmp.seq.dFGH", "cmp.sueq.dFGH",
366 "cmp.slt.dFGH", "cmp.sult.dFGH", "cmp.sle.dFGH", "cmp.sule.dFGH",
367 false, "cmp.or.dFGH", "cmp.une.dFGH", "cmp.ne.dFGH",
368 false, false, false, false,
369 false, "cmp.sor.dFGH", "cmp.sune.dFGH", "cmp.sne.dFGH",
370 false, false, false, false,
371 "cvt.s.lFG", "cvt.d.lFG",
372}
373
374local map_cop1_r6 = {
375 shift = 21, mask = 31,
376 [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG",
377 "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG",
378 false, "bc1eqzHB", false, false,
379 false, "bc1nezHB", false, false,
380 map_cop1s_r6, map_cop1d_r6, false, false,
381 map_cop1w_r6, map_cop1l_r6,
382}
383
384local function maprs_popTS(rs, rt)
385 if rt == 0 then return 0 elseif rs == 0 then return 1
386 elseif rs == rt then return 2 else return 3 end
387end
388
389local map_pop06_r6 = {
390 maprs = maprs_popTS, [0] = "blezSB", "blezalcTB", "bgezalcTB", "bgeucSTB"
391}
392local map_pop07_r6 = {
393 maprs = maprs_popTS, [0] = "bgtzSB", "bgtzalcTB", "bltzalcTB", "bltucSTB"
394}
395local map_pop26_r6 = {
396 maprs = maprs_popTS, "blezcTB", "bgezcTB", "bgecSTB"
397}
398local map_pop27_r6 = {
399 maprs = maprs_popTS, "bgtzcTB", "bltzcTB", "bltcSTB"
400}
401
402local function maprs_popS(rs, rt)
403 if rs == 0 then return 0 else return 1 end
404end
405
406local map_pop66_r6 = {
407 maprs = maprs_popS, [0] = "jicTI", "beqzcSb"
408}
409local map_pop76_r6 = {
410 maprs = maprs_popS, [0] = "jialcTI", "bnezcSb"
411}
412
413local function maprs_popST(rs, rt)
414 if rs >= rt then return 0 elseif rs == 0 then return 1 else return 2 end
415end
416
417local map_pop10_r6 = {
418 maprs = maprs_popST, [0] = "bovcSTB", "beqzalcTB", "beqcSTB"
419}
420local map_pop30_r6 = {
421 maprs = maprs_popST, [0] = "bnvcSTB", "bnezalcTB", "bnecSTB"
422}
423
424local map_pri_r6 = {
425 [0] = map_special_r6, map_regimm_r6, "jJ", "jalJ",
426 "beq|beqz|bST00B", "bne|bnezST0B", map_pop06_r6, map_pop07_r6,
427 map_pop10_r6, "addiu|liTS0I", "sltiTSI", "sltiuTSI",
428 "andiTSU", "ori|liTS0U", "xoriTSU", "aui|luiTS0U",
429 map_cop0, map_cop1_r6, false, false,
430 false, false, map_pop26_r6, map_pop27_r6,
431 map_pop30_r6, "daddiuTSI", false, false,
432 false, "dauiTSI", false, map_special3_r6,
433 "lbTSO", "lhTSO", false, "lwTSO",
434 "lbuTSO", "lhuTSO", false, false,
435 "sbTSO", "shTSO", false, "swTSO",
436 false, false, false, false,
437 false, "lwc1HSO", "bc#", false,
438 false, "ldc1HSO", map_pop66_r6, "ldTSO",
439 false, "swc1HSO", "balc#", map_pcrel_r6,
440 false, "sdc1HSO", map_pop76_r6, "sdTSO",
226} 441}
227 442
228------------------------------------------------------------------------------ 443------------------------------------------------------------------------------
@@ -279,10 +494,14 @@ local function disass_ins(ctx)
279 ctx.op = op 494 ctx.op = op
280 ctx.rel = nil 495 ctx.rel = nil
281 496
282 local opat = map_pri[rshift(op, 26)] 497 local opat = ctx.map_pri[rshift(op, 26)]
283 while type(opat) ~= "string" do 498 while type(opat) ~= "string" do
284 if not opat then return unknown(ctx) end 499 if not opat then return unknown(ctx) end
285 opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ 500 if opat.maprs then
501 opat = opat[opat.maprs(band(rshift(op,21),31), band(rshift(op,16),31))]
502 else
503 opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
504 end
286 end 505 end
287 local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") 506 local name, pat = match(opat, "^([a-z0-9_.]*)(.*)")
288 local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") 507 local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
@@ -306,6 +525,10 @@ local function disass_ins(ctx)
306 x = "f"..band(rshift(op, 21), 31) 525 x = "f"..band(rshift(op, 21), 31)
307 elseif p == "A" then 526 elseif p == "A" then
308 x = band(rshift(op, 6), 31) 527 x = band(rshift(op, 6), 31)
528 elseif p == "a" then
529 x = band(rshift(op, 6), 7)
530 elseif p == "E" then
531 x = band(rshift(op, 6), 31) + 32
309 elseif p == "M" then 532 elseif p == "M" then
310 x = band(rshift(op, 11), 31) 533 x = band(rshift(op, 11), 31)
311 elseif p == "N" then 534 elseif p == "N" then
@@ -315,10 +538,18 @@ local function disass_ins(ctx)
315 if x == 0 then x = nil end 538 if x == 0 then x = nil end
316 elseif p == "K" then 539 elseif p == "K" then
317 x = band(rshift(op, 11), 31) + 1 540 x = band(rshift(op, 11), 31) + 1
541 elseif p == "P" then
542 x = band(rshift(op, 11), 31) + 33
318 elseif p == "L" then 543 elseif p == "L" then
319 x = band(rshift(op, 11), 31) - last + 1 544 x = band(rshift(op, 11), 31) - last + 1
545 elseif p == "Q" then
546 x = band(rshift(op, 11), 31) - last + 33
320 elseif p == "I" then 547 elseif p == "I" then
321 x = arshift(lshift(op, 16), 16) 548 x = arshift(lshift(op, 16), 16)
549 elseif p == "2" then
550 x = arshift(lshift(op, 13), 11)
551 elseif p == "3" then
552 x = arshift(lshift(op, 14), 11)
322 elseif p == "U" then 553 elseif p == "U" then
323 x = band(op, 0xffff) 554 x = band(op, 0xffff)
324 elseif p == "O" then 555 elseif p == "O" then
@@ -328,13 +559,22 @@ local function disass_ins(ctx)
328 local index = map_gpr[band(rshift(op, 16), 31)] 559 local index = map_gpr[band(rshift(op, 16), 31)]
329 operands[#operands] = format("%s(%s)", index, last) 560 operands[#operands] = format("%s(%s)", index, last)
330 elseif p == "B" then 561 elseif p == "B" then
331 x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4 562 x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 14) + 4
563 ctx.rel = x
564 x = format("0x%08x", x)
565 elseif p == "b" then
566 x = ctx.addr + ctx.pos + arshift(lshift(op, 11), 9) + 4
332 ctx.rel = x 567 ctx.rel = x
333 x = "0x"..tohex(x) 568 x = format("0x%08x", x)
569 elseif p == "#" then
570 x = ctx.addr + ctx.pos + arshift(lshift(op, 6), 4) + 4
571 ctx.rel = x
572 x = format("0x%08x", x)
334 elseif p == "J" then 573 elseif p == "J" then
335 x = band(ctx.addr + ctx.pos, 0xf0000000) + band(op, 0x03ffffff)*4 574 local a = ctx.addr + ctx.pos
575 x = a - band(a, 0x0fffffff) + band(op, 0x03ffffff)*4
336 ctx.rel = x 576 ctx.rel = x
337 x = "0x"..tohex(x) 577 x = format("0x%08x", x)
338 elseif p == "V" then 578 elseif p == "V" then
339 x = band(rshift(op, 8), 7) 579 x = band(rshift(op, 8), 7)
340 if x == 0 then x = nil end 580 if x == 0 then x = nil end
@@ -384,7 +624,7 @@ local function disass_block(ctx, ofs, len)
384end 624end
385 625
386-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 626-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
387local function create_(code, addr, out) 627local function create(code, addr, out)
388 local ctx = {} 628 local ctx = {}
389 ctx.code = code 629 ctx.code = code
390 ctx.addr = addr or 0 630 ctx.addr = addr or 0
@@ -393,36 +633,62 @@ local function create_(code, addr, out)
393 ctx.disass = disass_block 633 ctx.disass = disass_block
394 ctx.hexdump = 8 634 ctx.hexdump = 8
395 ctx.get = get_be 635 ctx.get = get_be
636 ctx.map_pri = map_pri
637 return ctx
638end
639
640local function create_el(code, addr, out)
641 local ctx = create(code, addr, out)
642 ctx.get = get_le
643 return ctx
644end
645
646local function create_r6(code, addr, out)
647 local ctx = create(code, addr, out)
648 ctx.map_pri = map_pri_r6
396 return ctx 649 return ctx
397end 650end
398 651
399local function create_el_(code, addr, out) 652local function create_r6_el(code, addr, out)
400 local ctx = create_(code, addr, out) 653 local ctx = create(code, addr, out)
401 ctx.get = get_le 654 ctx.get = get_le
655 ctx.map_pri = map_pri_r6
402 return ctx 656 return ctx
403end 657end
404 658
405-- Simple API: disassemble code (a string) at address and output via out. 659-- Simple API: disassemble code (a string) at address and output via out.
406local function disass_(code, addr, out) 660local function disass(code, addr, out)
407 create_(code, addr, out):disass() 661 create(code, addr, out):disass()
662end
663
664local function disass_el(code, addr, out)
665 create_el(code, addr, out):disass()
408end 666end
409 667
410local function disass_el_(code, addr, out) 668local function disass_r6(code, addr, out)
411 create_el_(code, addr, out):disass() 669 create_r6(code, addr, out):disass()
670end
671
672local function disass_r6_el(code, addr, out)
673 create_r6_el(code, addr, out):disass()
412end 674end
413 675
414-- Return register name for RID. 676-- Return register name for RID.
415local function regname_(r) 677local function regname(r)
416 if r < 32 then return map_gpr[r] end 678 if r < 32 then return map_gpr[r] end
417 return "f"..(r-32) 679 return "f"..(r-32)
418end 680end
419 681
420-- Public module functions. 682-- Public module functions.
421module(...) 683return {
422 684 create = create,
423create = create_ 685 create_el = create_el,
424create_el = create_el_ 686 create_r6 = create_r6,
425disass = disass_ 687 create_r6_el = create_r6_el,
426disass_el = disass_el_ 688 disass = disass,
427regname = regname_ 689 disass_el = disass_el,
690 disass_r6 = disass_r6,
691 disass_r6_el = disass_r6_el,
692 regname = regname
693}
428 694
diff --git a/src/jit/dis_mips64.lua b/src/jit/dis_mips64.lua
new file mode 100644
index 00000000..5f3a4dab
--- /dev/null
+++ b/src/jit/dis_mips64.lua
@@ -0,0 +1,17 @@
1----------------------------------------------------------------------------
2-- LuaJIT MIPS64 disassembler wrapper module.
3--
4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the big-endian functions from the
8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12return {
13 create = dis_mips.create,
14 disass = dis_mips.disass,
15 regname = dis_mips.regname
16}
17
diff --git a/src/jit/dis_mips64el.lua b/src/jit/dis_mips64el.lua
new file mode 100644
index 00000000..ea513649
--- /dev/null
+++ b/src/jit/dis_mips64el.lua
@@ -0,0 +1,17 @@
1----------------------------------------------------------------------------
2-- LuaJIT MIPS64EL disassembler wrapper module.
3--
4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the little-endian functions from the
8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12return {
13 create = dis_mips.create_el,
14 disass = dis_mips.disass_el,
15 regname = dis_mips.regname
16}
17
diff --git a/src/jit/dis_mips64r6.lua b/src/jit/dis_mips64r6.lua
new file mode 100644
index 00000000..1d948411
--- /dev/null
+++ b/src/jit/dis_mips64r6.lua
@@ -0,0 +1,17 @@
1----------------------------------------------------------------------------
2-- LuaJIT MIPS64R6 disassembler wrapper module.
3--
4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the r6 big-endian functions from the
8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12return {
13 create = dis_mips.create_r6,
14 disass = dis_mips.disass_r6,
15 regname = dis_mips.regname
16}
17
diff --git a/src/jit/dis_mips64r6el.lua b/src/jit/dis_mips64r6el.lua
new file mode 100644
index 00000000..26592e17
--- /dev/null
+++ b/src/jit/dis_mips64r6el.lua
@@ -0,0 +1,17 @@
1----------------------------------------------------------------------------
2-- LuaJIT MIPS64R6EL disassembler wrapper module.
3--
4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the r6 little-endian functions from the
8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12return {
13 create = dis_mips.create_r6_el,
14 disass = dis_mips.disass_r6_el,
15 regname = dis_mips.regname
16}
17
diff --git a/src/jit/dis_mipsel.lua b/src/jit/dis_mipsel.lua
index 6c14800e..6906a779 100644
--- a/src/jit/dis_mipsel.lua
+++ b/src/jit/dis_mipsel.lua
@@ -8,13 +8,10 @@
8-- MIPS disassembler module. All the interesting stuff is there. 8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------ 9------------------------------------------------------------------------------
10 10
11local require = require 11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12 12return {
13module(...) 13 create = dis_mips.create_el,
14 14 disass = dis_mips.disass_el,
15local dis_mips = require(_PACKAGE.."dis_mips") 15 regname = dis_mips.regname
16 16}
17create = dis_mips.create_el
18disass = dis_mips.disass_el
19regname = dis_mips.regname
20 17
diff --git a/src/jit/dis_ppc.lua b/src/jit/dis_ppc.lua
index 26a6b343..95c3da84 100644
--- a/src/jit/dis_ppc.lua
+++ b/src/jit/dis_ppc.lua
@@ -560,7 +560,7 @@ local function disass_block(ctx, ofs, len)
560end 560end
561 561
562-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 562-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
563local function create_(code, addr, out) 563local function create(code, addr, out)
564 local ctx = {} 564 local ctx = {}
565 ctx.code = code 565 ctx.code = code
566 ctx.addr = addr or 0 566 ctx.addr = addr or 0
@@ -572,20 +572,20 @@ local function create_(code, addr, out)
572end 572end
573 573
574-- Simple API: disassemble code (a string) at address and output via out. 574-- Simple API: disassemble code (a string) at address and output via out.
575local function disass_(code, addr, out) 575local function disass(code, addr, out)
576 create_(code, addr, out):disass() 576 create(code, addr, out):disass()
577end 577end
578 578
579-- Return register name for RID. 579-- Return register name for RID.
580local function regname_(r) 580local function regname(r)
581 if r < 32 then return map_gpr[r] end 581 if r < 32 then return map_gpr[r] end
582 return "f"..(r-32) 582 return "f"..(r-32)
583end 583end
584 584
585-- Public module functions. 585-- Public module functions.
586module(...) 586return {
587 587 create = create,
588create = create_ 588 disass = disass,
589disass = disass_ 589 regname = regname
590regname = regname_ 590}
591 591
diff --git a/src/jit/dis_x64.lua b/src/jit/dis_x64.lua
index 3ffd67e2..eb21f044 100644
--- a/src/jit/dis_x64.lua
+++ b/src/jit/dis_x64.lua
@@ -8,13 +8,10 @@
8-- x86/x64 disassembler module. All the interesting stuff is there. 8-- x86/x64 disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------ 9------------------------------------------------------------------------------
10 10
11local require = require 11local dis_x86 = require((string.match(..., ".*%.") or "").."dis_x86")
12 12return {
13module(...) 13 create = dis_x86.create64,
14 14 disass = dis_x86.disass64,
15local dis_x86 = require(_PACKAGE.."dis_x86") 15 regname = dis_x86.regname64
16 16}
17create = dis_x86.create64
18disass = dis_x86.disass64
19regname = dis_x86.regname64
20 17
diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua
index 77702b89..40b8218e 100644
--- a/src/jit/dis_x86.lua
+++ b/src/jit/dis_x86.lua
@@ -15,19 +15,20 @@
15-- Intel and AMD manuals. The supported instruction set is quite extensive 15-- Intel and AMD manuals. The supported instruction set is quite extensive
16-- and reflects what a current generation Intel or AMD CPU implements in 16-- and reflects what a current generation Intel or AMD CPU implements in
17-- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3, 17-- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3,
18-- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM) 18-- SSE4.1, SSE4.2, SSE4a, AVX, AVX2 and even privileged and hypervisor
19-- instructions. 19-- (VMX/SVM) instructions.
20-- 20--
21-- Notes: 21-- Notes:
22-- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported. 22-- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported.
23-- * No attempt at optimization has been made -- it's fast enough for my needs. 23-- * No attempt at optimization has been made -- it's fast enough for my needs.
24-- * The public API may change when more architectures are added.
25------------------------------------------------------------------------------ 24------------------------------------------------------------------------------
26 25
27local type = type 26local type = type
28local sub, byte, format = string.sub, string.byte, string.format 27local sub, byte, format = string.sub, string.byte, string.format
29local match, gmatch, gsub = string.match, string.gmatch, string.gsub 28local match, gmatch, gsub = string.match, string.gmatch, string.gsub
30local lower, rep = string.lower, string.rep 29local lower, rep = string.lower, string.rep
30local bit = require("bit")
31local tohex = bit.tohex
31 32
32-- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on. 33-- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
33local map_opc1_32 = { 34local map_opc1_32 = {
@@ -76,7 +77,7 @@ local map_opc1_32 = {
76"movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi", 77"movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi",
77"movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI", 78"movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI",
78--Cx 79--Cx
79"shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi", 80"shift!Bmu","shift!Vmu","retBw","ret","vex*3$lesVrm","vex*2$ldsVrm","movBmi","movVmi",
80"enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS", 81"enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS",
81--Dx 82--Dx
82"shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb", 83"shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb",
@@ -101,7 +102,7 @@ local map_opc1_64 = setmetatable({
101 [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb", 102 [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb",
102 [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb", 103 [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb",
103 [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb", 104 [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb",
104 [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false, 105 [0x82]=false, [0x9a]=false, [0xc4]="vex*3", [0xc5]="vex*2", [0xce]=false,
105 [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false, 106 [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false,
106}, { __index = map_opc1_32 }) 107}, { __index = map_opc1_32 })
107 108
@@ -112,12 +113,12 @@ local map_opc2 = {
112[0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret", 113[0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret",
113"invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu", 114"invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu",
114--1x 115--1x
115"movupsXrm|movssXrm|movupdXrm|movsdXrm", 116"movupsXrm|movssXrvm|movupdXrm|movsdXrvm",
116"movupsXmr|movssXmr|movupdXmr|movsdXmr", 117"movupsXmr|movssXmvr|movupdXmr|movsdXmvr",
117"movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm", 118"movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm",
118"movlpsXmr||movlpdXmr", 119"movlpsXmr||movlpdXmr",
119"unpcklpsXrm||unpcklpdXrm", 120"unpcklpsXrvm||unpcklpdXrvm",
120"unpckhpsXrm||unpckhpdXrm", 121"unpckhpsXrvm||unpckhpdXrvm",
121"movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm", 122"movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
122"movhpsXmr||movhpdXmr", 123"movhpsXmr||movhpdXmr",
123"$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm", 124"$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
@@ -126,7 +127,7 @@ local map_opc2 = {
126"movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil, 127"movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
127"movapsXrm||movapdXrm", 128"movapsXrm||movapdXrm",
128"movapsXmr||movapdXmr", 129"movapsXmr||movapdXmr",
129"cvtpi2psXrMm|cvtsi2ssXrVmt|cvtpi2pdXrMm|cvtsi2sdXrVmt", 130"cvtpi2psXrMm|cvtsi2ssXrvVmt|cvtpi2pdXrMm|cvtsi2sdXrvVmt",
130"movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr", 131"movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr",
131"cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm", 132"cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm",
132"cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm", 133"cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm",
@@ -142,27 +143,27 @@ local map_opc2 = {
142"cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm", 143"cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm",
143--5x 144--5x
144"movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm", 145"movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm",
145"rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm", 146"rsqrtpsXrm|rsqrtssXrvm","rcppsXrm|rcpssXrvm",
146"andpsXrm||andpdXrm","andnpsXrm||andnpdXrm", 147"andpsXrvm||andpdXrvm","andnpsXrvm||andnpdXrvm",
147"orpsXrm||orpdXrm","xorpsXrm||xorpdXrm", 148"orpsXrvm||orpdXrvm","xorpsXrvm||xorpdXrvm",
148"addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm", 149"addpsXrvm|addssXrvm|addpdXrvm|addsdXrvm","mulpsXrvm|mulssXrvm|mulpdXrvm|mulsdXrvm",
149"cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm", 150"cvtps2pdXrm|cvtss2sdXrvm|cvtpd2psXrm|cvtsd2ssXrvm",
150"cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm", 151"cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm",
151"subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm", 152"subpsXrvm|subssXrvm|subpdXrvm|subsdXrvm","minpsXrvm|minssXrvm|minpdXrvm|minsdXrvm",
152"divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm", 153"divpsXrvm|divssXrvm|divpdXrvm|divsdXrvm","maxpsXrvm|maxssXrvm|maxpdXrvm|maxsdXrvm",
153--6x 154--6x
154"punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm", 155"punpcklbwPrvm","punpcklwdPrvm","punpckldqPrvm","packsswbPrvm",
155"pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm", 156"pcmpgtbPrvm","pcmpgtwPrvm","pcmpgtdPrvm","packuswbPrvm",
156"punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm", 157"punpckhbwPrvm","punpckhwdPrvm","punpckhdqPrvm","packssdwPrvm",
157"||punpcklqdqXrm","||punpckhqdqXrm", 158"||punpcklqdqXrvm","||punpckhqdqXrvm",
158"movPrVSm","movqMrm|movdquXrm|movdqaXrm", 159"movPrVSm","movqMrm|movdquXrm|movdqaXrm",
159--7x 160--7x
160"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu", 161"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pvmu",
161"pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu", 162"pshiftd!Pvmu","pshiftq!Mvmu||pshiftdq!Xvmu",
162"pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|", 163"pcmpeqbPrvm","pcmpeqwPrvm","pcmpeqdPrvm","emms*|",
163"vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$", 164"vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
164nil,nil, 165nil,nil,
165"||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm", 166"||haddpdXrvm|haddpsXrvm","||hsubpdXrvm|hsubpsXrvm",
166"movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr", 167"movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr",
167--8x 168--8x
168"joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj", 169"joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj",
@@ -180,27 +181,27 @@ nil,nil,
180"bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt", 181"bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt",
181--Cx 182--Cx
182"xaddBmr","xaddVmr", 183"xaddBmr","xaddVmr",
183"cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|", 184"cmppsXrvmu|cmpssXrvmu|cmppdXrvmu|cmpsdXrvmu","$movntiVmr|",
184"pinsrwPrWmu","pextrwDrPmu", 185"pinsrwPrvWmu","pextrwDrPmu",
185"shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp", 186"shufpsXrvmu||shufpdXrvmu","$cmpxchg!Qmp",
186"bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR", 187"bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR",
187--Dx 188--Dx
188"||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm", 189"||addsubpdXrvm|addsubpsXrvm","psrlwPrvm","psrldPrvm","psrlqPrvm",
189"paddqPrm","pmullwPrm", 190"paddqPrvm","pmullwPrvm",
190"|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm", 191"|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm",
191"psubusbPrm","psubuswPrm","pminubPrm","pandPrm", 192"psubusbPrvm","psubuswPrvm","pminubPrvm","pandPrvm",
192"paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm", 193"paddusbPrvm","padduswPrvm","pmaxubPrvm","pandnPrvm",
193--Ex 194--Ex
194"pavgbPrm","psrawPrm","psradPrm","pavgwPrm", 195"pavgbPrvm","psrawPrvm","psradPrvm","pavgwPrvm",
195"pmulhuwPrm","pmulhwPrm", 196"pmulhuwPrvm","pmulhwPrvm",
196"|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr", 197"|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr",
197"psubsbPrm","psubswPrm","pminswPrm","porPrm", 198"psubsbPrvm","psubswPrvm","pminswPrvm","porPrvm",
198"paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm", 199"paddsbPrvm","paddswPrvm","pmaxswPrvm","pxorPrvm",
199--Fx 200--Fx
200"|||lddquXrm","psllwPrm","pslldPrm","psllqPrm", 201"|||lddquXrm","psllwPrvm","pslldPrvm","psllqPrvm",
201"pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$", 202"pmuludqPrvm","pmaddwdPrvm","psadbwPrvm","maskmovqMrm||maskmovdquXrm$",
202"psubbPrm","psubwPrm","psubdPrm","psubqPrm", 203"psubbPrvm","psubwPrvm","psubdPrvm","psubqPrvm",
203"paddbPrm","paddwPrm","padddPrm","ud", 204"paddbPrvm","paddwPrvm","padddPrvm","ud",
204} 205}
205assert(map_opc2[255] == "ud") 206assert(map_opc2[255] == "ud")
206 207
@@ -208,49 +209,91 @@ assert(map_opc2[255] == "ud")
208local map_opc3 = { 209local map_opc3 = {
209["38"] = { -- [66] 0f 38 xx 210["38"] = { -- [66] 0f 38 xx
210--0x 211--0x
211[0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm", 212[0]="pshufbPrvm","phaddwPrvm","phadddPrvm","phaddswPrvm",
212"pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm", 213"pmaddubswPrvm","phsubwPrvm","phsubdPrvm","phsubswPrvm",
213"psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm", 214"psignbPrvm","psignwPrvm","psigndPrvm","pmulhrswPrvm",
214nil,nil,nil,nil, 215"||permilpsXrvm","||permilpdXrvm",nil,nil,
215--1x 216--1x
216"||pblendvbXrma",nil,nil,nil, 217"||pblendvbXrma",nil,nil,nil,
217"||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm", 218"||blendvpsXrma","||blendvpdXrma","||permpsXrvm","||ptestXrm",
218nil,nil,nil,nil, 219"||broadcastssXrm","||broadcastsdXrm","||broadcastf128XrlXm",nil,
219"pabsbPrm","pabswPrm","pabsdPrm",nil, 220"pabsbPrm","pabswPrm","pabsdPrm",nil,
220--2x 221--2x
221"||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm", 222"||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm",
222"||pmovsxwqXrm","||pmovsxdqXrm",nil,nil, 223"||pmovsxwqXrm","||pmovsxdqXrm",nil,nil,
223"||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm", 224"||pmuldqXrvm","||pcmpeqqXrvm","||$movntdqaXrm","||packusdwXrvm",
224nil,nil,nil,nil, 225"||maskmovpsXrvm","||maskmovpdXrvm","||maskmovpsXmvr","||maskmovpdXmvr",
225--3x 226--3x
226"||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm", 227"||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm",
227"||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm", 228"||pmovzxwqXrm","||pmovzxdqXrm","||permdXrvm","||pcmpgtqXrvm",
228"||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm", 229"||pminsbXrvm","||pminsdXrvm","||pminuwXrvm","||pminudXrvm",
229"||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm", 230"||pmaxsbXrvm","||pmaxsdXrvm","||pmaxuwXrvm","||pmaxudXrvm",
230--4x 231--4x
231"||pmulddXrm","||phminposuwXrm", 232"||pmulddXrvm","||phminposuwXrm",nil,nil,
233nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm",
234--5x
235[0x58] = "||pbroadcastdXrlXm",[0x59] = "||pbroadcastqXrlXm",
236[0x5a] = "||broadcasti128XrlXm",
237--7x
238[0x78] = "||pbroadcastbXrlXm",[0x79] = "||pbroadcastwXrlXm",
239--8x
240[0x8c] = "||pmaskmovXrvVSm",
241[0x8e] = "||pmaskmovVSmXvr",
242--9x
243[0x96] = "||fmaddsub132pHXrvm",[0x97] = "||fmsubadd132pHXrvm",
244[0x98] = "||fmadd132pHXrvm",[0x99] = "||fmadd132sHXrvm",
245[0x9a] = "||fmsub132pHXrvm",[0x9b] = "||fmsub132sHXrvm",
246[0x9c] = "||fnmadd132pHXrvm",[0x9d] = "||fnmadd132sHXrvm",
247[0x9e] = "||fnmsub132pHXrvm",[0x9f] = "||fnmsub132sHXrvm",
248--Ax
249[0xa6] = "||fmaddsub213pHXrvm",[0xa7] = "||fmsubadd213pHXrvm",
250[0xa8] = "||fmadd213pHXrvm",[0xa9] = "||fmadd213sHXrvm",
251[0xaa] = "||fmsub213pHXrvm",[0xab] = "||fmsub213sHXrvm",
252[0xac] = "||fnmadd213pHXrvm",[0xad] = "||fnmadd213sHXrvm",
253[0xae] = "||fnmsub213pHXrvm",[0xaf] = "||fnmsub213sHXrvm",
254--Bx
255[0xb6] = "||fmaddsub231pHXrvm",[0xb7] = "||fmsubadd231pHXrvm",
256[0xb8] = "||fmadd231pHXrvm",[0xb9] = "||fmadd231sHXrvm",
257[0xba] = "||fmsub231pHXrvm",[0xbb] = "||fmsub231sHXrvm",
258[0xbc] = "||fnmadd231pHXrvm",[0xbd] = "||fnmadd231sHXrvm",
259[0xbe] = "||fnmsub231pHXrvm",[0xbf] = "||fnmsub231sHXrvm",
260--Dx
261[0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm",
262[0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm",
232--Fx 263--Fx
233[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt", 264[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
265[0xf7] = "| sarxVrmv| shlxVrmv| shrxVrmv",
234}, 266},
235 267
236["3a"] = { -- [66] 0f 3a xx 268["3a"] = { -- [66] 0f 3a xx
237--0x 269--0x
238[0x00]=nil,nil,nil,nil,nil,nil,nil,nil, 270[0x00]="||permqXrmu","||permpdXrmu","||pblenddXrvmu",nil,
239"||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu", 271"||permilpsXrmu","||permilpdXrmu","||perm2f128Xrvmu",nil,
240"||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu", 272"||roundpsXrmu","||roundpdXrmu","||roundssXrvmu","||roundsdXrvmu",
273"||blendpsXrvmu","||blendpdXrvmu","||pblendwXrvmu","palignrPrvmu",
241--1x 274--1x
242nil,nil,nil,nil, 275nil,nil,nil,nil,
243"||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru", 276"||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru",
244nil,nil,nil,nil,nil,nil,nil,nil, 277"||insertf128XrvlXmu","||extractf128XlXmYru",nil,nil,
278nil,nil,nil,nil,
245--2x 279--2x
246"||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil, 280"||pinsrbXrvVmu","||insertpsXrvmu","||pinsrXrvVmuS",nil,
281--3x
282[0x38] = "||inserti128Xrvmu",[0x39] = "||extracti128XlXmYru",
247--4x 283--4x
248[0x40] = "||dppsXrmu", 284[0x40] = "||dppsXrvmu",
249[0x41] = "||dppdXrmu", 285[0x41] = "||dppdXrvmu",
250[0x42] = "||mpsadbwXrmu", 286[0x42] = "||mpsadbwXrvmu",
287[0x44] = "||pclmulqdqXrvmu",
288[0x46] = "||perm2i128Xrvmu",
289[0x4a] = "||blendvpsXrvmb",[0x4b] = "||blendvpdXrvmb",
290[0x4c] = "||pblendvbXrvmb",
251--6x 291--6x
252[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu", 292[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
253[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu", 293[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
294[0xdf] = "||aeskeygenassistXrmu",
295--Fx
296[0xf0] = "||| rorxVrmu",
254}, 297},
255} 298}
256 299
@@ -354,17 +397,19 @@ local map_regs = {
354 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext! 397 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext!
355 X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", 398 X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
356 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" }, 399 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" },
400 Y = { "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7",
401 "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" },
357} 402}
358local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" } 403local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" }
359 404
360-- Maps for size names. 405-- Maps for size names.
361local map_sz2n = { 406local map_sz2n = {
362 B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, 407 B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, Y = 32,
363} 408}
364local map_sz2prefix = { 409local map_sz2prefix = {
365 B = "byte", W = "word", D = "dword", 410 B = "byte", W = "word", D = "dword",
366 Q = "qword", 411 Q = "qword",
367 M = "qword", X = "xword", 412 M = "qword", X = "xword", Y = "yword",
368 F = "dword", G = "qword", -- No need for sizes/register names for these two. 413 F = "dword", G = "qword", -- No need for sizes/register names for these two.
369} 414}
370 415
@@ -387,10 +432,13 @@ local function putop(ctx, text, operands)
387 if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end 432 if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end
388 if ctx.rex then 433 if ctx.rex then
389 local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "").. 434 local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "")..
390 (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "") 435 (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")..
391 if t ~= "" then text = "rex."..t.." "..text end 436 (ctx.vexl and "l" or "")
437 if ctx.vexv and ctx.vexv ~= 0 then t = t.."v"..ctx.vexv end
438 if t ~= "" then text = ctx.rex.."."..t.." "..gsub(text, "^ ", "")
439 elseif ctx.rex == "vex" then text = gsub("v"..text, "^v ", "") end
392 ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false 440 ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
393 ctx.rex = false 441 ctx.rex = false; ctx.vexl = false; ctx.vexv = false
394 end 442 end
395 if ctx.seg then 443 if ctx.seg then
396 local text2, n = gsub(text, "%[", "["..ctx.seg..":") 444 local text2, n = gsub(text, "%[", "["..ctx.seg..":")
@@ -405,6 +453,7 @@ local function putop(ctx, text, operands)
405 end 453 end
406 ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text)) 454 ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text))
407 ctx.mrm = false 455 ctx.mrm = false
456 ctx.vexv = false
408 ctx.start = pos 457 ctx.start = pos
409 ctx.imm = nil 458 ctx.imm = nil
410end 459end
@@ -413,7 +462,7 @@ end
413local function clearprefixes(ctx) 462local function clearprefixes(ctx)
414 ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false 463 ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false
415 ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false 464 ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
416 ctx.rex = false; ctx.a32 = false 465 ctx.rex = false; ctx.a32 = false; ctx.vexl = false
417end 466end
418 467
419-- Fallback for incomplete opcodes at the end. 468-- Fallback for incomplete opcodes at the end.
@@ -450,9 +499,9 @@ end
450-- Process pattern string and generate the operands. 499-- Process pattern string and generate the operands.
451local function putpat(ctx, name, pat) 500local function putpat(ctx, name, pat)
452 local operands, regs, sz, mode, sp, rm, sc, rx, sdisp 501 local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
453 local code, pos, stop = ctx.code, ctx.pos, ctx.stop 502 local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl
454 503
455 -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz 504 -- Chars used: 1DFGHIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
456 for p in gmatch(pat, ".") do 505 for p in gmatch(pat, ".") do
457 local x = nil 506 local x = nil
458 if p == "V" or p == "U" then 507 if p == "V" or p == "U" then
@@ -467,12 +516,17 @@ local function putpat(ctx, name, pat)
467 elseif p == "B" then 516 elseif p == "B" then
468 sz = "B" 517 sz = "B"
469 regs = ctx.rex and map_regs.B64 or map_regs.B 518 regs = ctx.rex and map_regs.B64 or map_regs.B
470 elseif match(p, "[WDQMXFG]") then 519 elseif match(p, "[WDQMXYFG]") then
471 sz = p 520 sz = p
521 if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
472 regs = map_regs[sz] 522 regs = map_regs[sz]
473 elseif p == "P" then 523 elseif p == "P" then
474 sz = ctx.o16 and "X" or "M"; ctx.o16 = false 524 sz = ctx.o16 and "X" or "M"; ctx.o16 = false
525 if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
475 regs = map_regs[sz] 526 regs = map_regs[sz]
527 elseif p == "H" then
528 name = name..(ctx.rexw and "d" or "s")
529 ctx.rexw = false
476 elseif p == "S" then 530 elseif p == "S" then
477 name = name..lower(sz) 531 name = name..lower(sz)
478 elseif p == "s" then 532 elseif p == "s" then
@@ -484,6 +538,10 @@ local function putpat(ctx, name, pat)
484 local imm = getimm(ctx, pos, 1); if not imm then return end 538 local imm = getimm(ctx, pos, 1); if not imm then return end
485 x = format("0x%02x", imm) 539 x = format("0x%02x", imm)
486 pos = pos+1 540 pos = pos+1
541 elseif p == "b" then
542 local imm = getimm(ctx, pos, 1); if not imm then return end
543 x = regs[imm/16+1]
544 pos = pos+1
487 elseif p == "w" then 545 elseif p == "w" then
488 local imm = getimm(ctx, pos, 2); if not imm then return end 546 local imm = getimm(ctx, pos, 2); if not imm then return end
489 x = format("0x%x", imm) 547 x = format("0x%x", imm)
@@ -532,7 +590,7 @@ local function putpat(ctx, name, pat)
532 local lo = imm % 0x1000000 590 local lo = imm % 0x1000000
533 x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo) 591 x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
534 else 592 else
535 x = format("0x%08x", imm) 593 x = "0x"..tohex(imm)
536 end 594 end
537 elseif p == "R" then 595 elseif p == "R" then
538 local r = byte(code, pos-1, pos-1)%8 596 local r = byte(code, pos-1, pos-1)%8
@@ -616,8 +674,13 @@ local function putpat(ctx, name, pat)
616 else 674 else
617 x = "CR"..sp 675 x = "CR"..sp
618 end 676 end
677 elseif p == "v" then
678 if ctx.vexv then
679 x = regs[ctx.vexv+1]; ctx.vexv = false
680 end
619 elseif p == "y" then x = "DR"..sp 681 elseif p == "y" then x = "DR"..sp
620 elseif p == "z" then x = "TR"..sp 682 elseif p == "z" then x = "TR"..sp
683 elseif p == "l" then vexl = false
621 elseif p == "t" then 684 elseif p == "t" then
622 else 685 else
623 error("bad pattern `"..pat.."'") 686 error("bad pattern `"..pat.."'")
@@ -692,7 +755,8 @@ map_act = {
692 B = putpat, W = putpat, D = putpat, Q = putpat, 755 B = putpat, W = putpat, D = putpat, Q = putpat,
693 V = putpat, U = putpat, T = putpat, 756 V = putpat, U = putpat, T = putpat,
694 M = putpat, X = putpat, P = putpat, 757 M = putpat, X = putpat, P = putpat,
695 F = putpat, G = putpat, 758 F = putpat, G = putpat, Y = putpat,
759 H = putpat,
696 760
697 -- Collect prefixes. 761 -- Collect prefixes.
698 [":"] = function(ctx, name, pat) 762 [":"] = function(ctx, name, pat)
@@ -753,15 +817,68 @@ map_act = {
753 817
754 -- REX prefix. 818 -- REX prefix.
755 rex = function(ctx, name, pat) 819 rex = function(ctx, name, pat)
756 if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed. 820 if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
757 for p in gmatch(pat, ".") do ctx["rex"..p] = true end 821 for p in gmatch(pat, ".") do ctx["rex"..p] = true end
758 ctx.rex = true 822 ctx.rex = "rex"
823 end,
824
825 -- VEX prefix.
826 vex = function(ctx, name, pat)
827 if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
828 ctx.rex = "vex"
829 local pos = ctx.pos
830 if ctx.mrm then
831 ctx.mrm = nil
832 pos = pos-1
833 end
834 local b = byte(ctx.code, pos, pos)
835 if not b then return incomplete(ctx) end
836 pos = pos+1
837 if b < 128 then ctx.rexr = true end
838 local m = 1
839 if pat == "3" then
840 m = b%32; b = (b-m)/32
841 local nb = b%2; b = (b-nb)/2
842 if nb == 0 then ctx.rexb = true end
843 local nx = b%2
844 if nx == 0 then ctx.rexx = true end
845 b = byte(ctx.code, pos, pos)
846 if not b then return incomplete(ctx) end
847 pos = pos+1
848 if b >= 128 then ctx.rexw = true end
849 end
850 ctx.pos = pos
851 local map
852 if m == 1 then map = map_opc2
853 elseif m == 2 then map = map_opc3["38"]
854 elseif m == 3 then map = map_opc3["3a"]
855 else return unknown(ctx) end
856 local p = b%4; b = (b-p)/4
857 if p == 1 then ctx.o16 = "o16"
858 elseif p == 2 then ctx.rep = "rep"
859 elseif p == 3 then ctx.rep = "repne" end
860 local l = b%2; b = (b-l)/2
861 if l ~= 0 then ctx.vexl = true end
862 ctx.vexv = (-1-b)%16
863 return dispatchmap(ctx, map)
759 end, 864 end,
760 865
761 -- Special case for nop with REX prefix. 866 -- Special case for nop with REX prefix.
762 nop = function(ctx, name, pat) 867 nop = function(ctx, name, pat)
763 return dispatch(ctx, ctx.rex and pat or "nop") 868 return dispatch(ctx, ctx.rex and pat or "nop")
764 end, 869 end,
870
871 -- Special case for 0F 77.
872 emms = function(ctx, name, pat)
873 if ctx.rex ~= "vex" then
874 return putop(ctx, "emms")
875 elseif ctx.vexl then
876 ctx.vexl = false
877 return putop(ctx, "zeroall")
878 else
879 return putop(ctx, "zeroupper")
880 end
881 end,
765} 882}
766 883
767------------------------------------------------------------------------------ 884------------------------------------------------------------------------------
@@ -782,7 +899,7 @@ local function disass_block(ctx, ofs, len)
782end 899end
783 900
784-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 901-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
785local function create_(code, addr, out) 902local function create(code, addr, out)
786 local ctx = {} 903 local ctx = {}
787 ctx.code = code 904 ctx.code = code
788 ctx.addr = (addr or 0) - 1 905 ctx.addr = (addr or 0) - 1
@@ -796,8 +913,8 @@ local function create_(code, addr, out)
796 return ctx 913 return ctx
797end 914end
798 915
799local function create64_(code, addr, out) 916local function create64(code, addr, out)
800 local ctx = create_(code, addr, out) 917 local ctx = create(code, addr, out)
801 ctx.x64 = true 918 ctx.x64 = true
802 ctx.map1 = map_opc1_64 919 ctx.map1 = map_opc1_64
803 ctx.aregs = map_regs.Q 920 ctx.aregs = map_regs.Q
@@ -805,32 +922,32 @@ local function create64_(code, addr, out)
805end 922end
806 923
807-- Simple API: disassemble code (a string) at address and output via out. 924-- Simple API: disassemble code (a string) at address and output via out.
808local function disass_(code, addr, out) 925local function disass(code, addr, out)
809 create_(code, addr, out):disass() 926 create(code, addr, out):disass()
810end 927end
811 928
812local function disass64_(code, addr, out) 929local function disass64(code, addr, out)
813 create64_(code, addr, out):disass() 930 create64(code, addr, out):disass()
814end 931end
815 932
816-- Return register name for RID. 933-- Return register name for RID.
817local function regname_(r) 934local function regname(r)
818 if r < 8 then return map_regs.D[r+1] end 935 if r < 8 then return map_regs.D[r+1] end
819 return map_regs.X[r-7] 936 return map_regs.X[r-7]
820end 937end
821 938
822local function regname64_(r) 939local function regname64(r)
823 if r < 16 then return map_regs.Q[r+1] end 940 if r < 16 then return map_regs.Q[r+1] end
824 return map_regs.X[r-15] 941 return map_regs.X[r-15]
825end 942end
826 943
827-- Public module functions. 944-- Public module functions.
828module(...) 945return {
829 946 create = create,
830create = create_ 947 create64 = create64,
831create64 = create64_ 948 disass = disass,
832disass = disass_ 949 disass64 = disass64,
833disass64 = disass64_ 950 regname = regname,
834regname = regname_ 951 regname64 = regname64
835regname64 = regname64_ 952}
836 953
diff --git a/src/jit/dump.lua b/src/jit/dump.lua
index 86f11e26..f296a517 100644
--- a/src/jit/dump.lua
+++ b/src/jit/dump.lua
@@ -62,7 +62,7 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek
62local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap 62local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
63local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr 63local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
64local bit = require("bit") 64local bit = require("bit")
65local band, shr = bit.band, bit.rshift 65local band, shr, tohex = bit.band, bit.rshift, bit.tohex
66local sub, gsub, format = string.sub, string.gsub, string.format 66local sub, gsub, format = string.sub, string.gsub, string.format
67local byte, rep = string.byte, string.rep 67local byte, rep = string.byte, string.rep
68local type, tostring = type, tostring 68local type, tostring = type, tostring
@@ -84,12 +84,13 @@ local nexitsym = 0
84local function fillsymtab_tr(tr, nexit) 84local function fillsymtab_tr(tr, nexit)
85 local t = {} 85 local t = {}
86 symtabmt.__index = t 86 symtabmt.__index = t
87 if jit.arch == "mips" or jit.arch == "mipsel" then 87 if jit.arch:sub(1, 4) == "mips" then
88 t[traceexitstub(tr, 0)] = "exit" 88 t[traceexitstub(tr, 0)] = "exit"
89 return 89 return
90 end 90 end
91 for i=0,nexit-1 do 91 for i=0,nexit-1 do
92 local addr = traceexitstub(tr, i) 92 local addr = traceexitstub(tr, i)
93 if addr < 0 then addr = addr + 2^32 end
93 t[addr] = tostring(i) 94 t[addr] = tostring(i)
94 end 95 end
95 local addr = traceexitstub(tr, nexit) 96 local addr = traceexitstub(tr, nexit)
@@ -100,10 +101,15 @@ end
100local function fillsymtab(tr, nexit) 101local function fillsymtab(tr, nexit)
101 local t = symtab 102 local t = symtab
102 if nexitsym == 0 then 103 if nexitsym == 0 then
104 local maskaddr = jit.arch == "arm" and -2
103 local ircall = vmdef.ircall 105 local ircall = vmdef.ircall
104 for i=0,#ircall do 106 for i=0,#ircall do
105 local addr = ircalladdr(i) 107 local addr = ircalladdr(i)
106 if addr ~= 0 then t[addr] = ircall[i] end 108 if addr ~= 0 then
109 if maskaddr then addr = band(addr, maskaddr) end
110 if addr < 0 then addr = addr + 2^32 end
111 t[addr] = ircall[i]
112 end
107 end 113 end
108 end 114 end
109 if nexitsym == 1000000 then -- Per-trace exit stubs. 115 if nexitsym == 1000000 then -- Per-trace exit stubs.
@@ -117,6 +123,7 @@ local function fillsymtab(tr, nexit)
117 nexit = 1000000 123 nexit = 1000000
118 break 124 break
119 end 125 end
126 if addr < 0 then addr = addr + 2^32 end
120 t[addr] = tostring(i) 127 t[addr] = tostring(i)
121 end 128 end
122 nexitsym = nexit 129 nexitsym = nexit
@@ -135,6 +142,7 @@ local function dump_mcode(tr)
135 local mcode, addr, loop = tracemc(tr) 142 local mcode, addr, loop = tracemc(tr)
136 if not mcode then return end 143 if not mcode then return end
137 if not disass then disass = require("jit.dis_"..jit.arch) end 144 if not disass then disass = require("jit.dis_"..jit.arch) end
145 if addr < 0 then addr = addr + 2^32 end
138 out:write("---- TRACE ", tr, " mcode ", #mcode, "\n") 146 out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
139 local ctx = disass.create(mcode, addr, dumpwrite) 147 local ctx = disass.create(mcode, addr, dumpwrite)
140 ctx.hexdump = 0 148 ctx.hexdump = 0
@@ -210,8 +218,10 @@ local function colorize_text(s)
210 return s 218 return s
211end 219end
212 220
213local function colorize_ansi(s, t) 221local function colorize_ansi(s, t, extra)
214 return format(colortype_ansi[t], s) 222 local out = format(colortype_ansi[t], s)
223 if extra then out = "\027[3m"..out end
224 return out
215end 225end
216 226
217local irtype_ansi = setmetatable({}, 227local irtype_ansi = setmetatable({},
@@ -220,9 +230,10 @@ local irtype_ansi = setmetatable({},
220 230
221local html_escape = { ["<"] = "&lt;", [">"] = "&gt;", ["&"] = "&amp;", } 231local html_escape = { ["<"] = "&lt;", [">"] = "&gt;", ["&"] = "&amp;", }
222 232
223local function colorize_html(s, t) 233local function colorize_html(s, t, extra)
224 s = gsub(s, "[<>&]", html_escape) 234 s = gsub(s, "[<>&]", html_escape)
225 return format('<span class="irt_%s">%s</span>', irtype_text[t], s) 235 return format('<span class="irt_%s%s">%s</span>',
236 irtype_text[t], extra and " irt_extra" or "", s)
226end 237end
227 238
228local irtype_html = setmetatable({}, 239local irtype_html = setmetatable({},
@@ -247,6 +258,7 @@ span.irt_tab { color: #c00000; }
247span.irt_udt, span.irt_lud { color: #00c0c0; } 258span.irt_udt, span.irt_lud { color: #00c0c0; }
248span.irt_num { color: #4040c0; } 259span.irt_num { color: #4040c0; }
249span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; } 260span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; }
261span.irt_extra { font-style: italic; }
250</style> 262</style>
251]] 263]]
252 264
@@ -262,6 +274,7 @@ local litname = {
262 if band(mode, 8) ~= 0 then s = s.."C" end 274 if band(mode, 8) ~= 0 then s = s.."C" end
263 if band(mode, 16) ~= 0 then s = s.."R" end 275 if band(mode, 16) ~= 0 then s = s.."R" end
264 if band(mode, 32) ~= 0 then s = s.."I" end 276 if band(mode, 32) ~= 0 then s = s.."I" end
277 if band(mode, 64) ~= 0 then s = s.."K" end
265 t[mode] = s 278 t[mode] = s
266 return s 279 return s
267 end}), 280 end}),
@@ -269,16 +282,20 @@ local litname = {
269 ["CONV "] = setmetatable({}, { __index = function(t, mode) 282 ["CONV "] = setmetatable({}, { __index = function(t, mode)
270 local s = irtype[band(mode, 31)] 283 local s = irtype[band(mode, 31)]
271 s = irtype[band(shr(mode, 5), 31)].."."..s 284 s = irtype[band(shr(mode, 5), 31)].."."..s
272 if band(mode, 0x400) ~= 0 then s = s.." trunc" 285 if band(mode, 0x800) ~= 0 then s = s.." sext" end
273 elseif band(mode, 0x800) ~= 0 then s = s.." sext" end
274 local c = shr(mode, 12) 286 local c = shr(mode, 12)
275 if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end 287 if c == 1 then s = s.." none"
288 elseif c == 2 then s = s.." index"
289 elseif c == 3 then s = s.." check" end
276 t[mode] = s 290 t[mode] = s
277 return s 291 return s
278 end}), 292 end}),
279 ["FLOAD "] = vmdef.irfield, 293 ["FLOAD "] = vmdef.irfield,
280 ["FREF "] = vmdef.irfield, 294 ["FREF "] = vmdef.irfield,
281 ["FPMATH"] = vmdef.irfpm, 295 ["FPMATH"] = vmdef.irfpm,
296 ["TMPREF"] = { [0] = "", "IN", "OUT", "INOUT", "", "", "OUT2", "INOUT2" },
297 ["BUFHDR"] = { [0] = "RESET", "APPEND", "WRITE" },
298 ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" },
282} 299}
283 300
284local function ctlsub(c) 301local function ctlsub(c)
@@ -302,15 +319,19 @@ local function fmtfunc(func, pc)
302 end 319 end
303end 320end
304 321
305local function formatk(tr, idx) 322local function formatk(tr, idx, sn)
306 local k, t, slot = tracek(tr, idx) 323 local k, t, slot = tracek(tr, idx)
307 local tn = type(k) 324 local tn = type(k)
308 local s 325 local s
309 if tn == "number" then 326 if tn == "number" then
310 if k == 2^52+2^51 then 327 if t < 12 then
328 s = k == 0 and "NULL" or format("[0x%08x]", k)
329 elseif band(sn or 0, 0x30000) ~= 0 then
330 s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz"
331 elseif k == 2^52+2^51 then
311 s = "bias" 332 s = "bias"
312 else 333 else
313 s = format("%+.14g", k) 334 s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k)
314 end 335 end
315 elseif tn == "string" then 336 elseif tn == "string" then
316 s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub)) 337 s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub))
@@ -328,10 +349,12 @@ local function formatk(tr, idx)
328 elseif t == 21 then -- int64_t 349 elseif t == 21 then -- int64_t
329 s = sub(tostring(k), 1, -3) 350 s = sub(tostring(k), 1, -3)
330 if sub(s, 1, 1) ~= "-" then s = "+"..s end 351 if sub(s, 1, 1) ~= "-" then s = "+"..s end
352 elseif sn == 0x1057fff then -- SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL)
353 return "----" -- Special case for LJ_FR2 slot 1.
331 else 354 else
332 s = tostring(k) -- For primitives. 355 s = tostring(k) -- For primitives.
333 end 356 end
334 s = colorize(format("%-4s", s), t) 357 s = colorize(format("%-4s", s), t, band(sn or 0, 0x100000) ~= 0)
335 if slot then 358 if slot then
336 s = format("%s @%d", s, slot) 359 s = format("%s @%d", s, slot)
337 end 360 end
@@ -346,12 +369,12 @@ local function printsnap(tr, snap)
346 n = n + 1 369 n = n + 1
347 local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS 370 local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS
348 if ref < 0 then 371 if ref < 0 then
349 out:write(formatk(tr, ref)) 372 out:write(formatk(tr, ref, sn))
350 elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM 373 elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM
351 out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) 374 out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
352 else 375 else
353 local m, ot, op1, op2 = traceir(tr, ref) 376 local m, ot, op1, op2 = traceir(tr, ref)
354 out:write(colorize(format("%04d", ref), band(ot, 31))) 377 out:write(colorize(format("%04d", ref), band(ot, 31), band(sn, 0x100000) ~= 0))
355 end 378 end
356 out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME 379 out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME
357 else 380 else
@@ -529,7 +552,12 @@ local recdepth = 0
529local function fmterr(err, info) 552local function fmterr(err, info)
530 if type(err) == "number" then 553 if type(err) == "number" then
531 if type(info) == "function" then info = fmtfunc(info) end 554 if type(info) == "function" then info = fmtfunc(info) end
532 err = format(vmdef.traceerr[err], info) 555 local fmt = vmdef.traceerr[err]
556 if fmt == "NYI: bytecode %s" then
557 local oidx = 6 * info
558 info = sub(vmdef.bcnames, oidx+1, oidx+6)
559 end
560 err = format(fmt, info)
533 end 561 end
534 return err 562 return err
535end 563end
@@ -544,7 +572,7 @@ local function dump_trace(what, tr, func, pc, otr, oex)
544 if what == "start" then 572 if what == "start" then
545 if dumpmode.H then out:write('<pre class="ljdump">\n') end 573 if dumpmode.H then out:write('<pre class="ljdump">\n') end
546 out:write("---- TRACE ", tr, " ", what) 574 out:write("---- TRACE ", tr, " ", what)
547 if otr then out:write(" ", otr, "/", oex) end 575 if otr then out:write(" ", otr, "/", oex == -1 and "stitch" or oex) end
548 out:write(" ", fmtfunc(func, pc), "\n") 576 out:write(" ", fmtfunc(func, pc), "\n")
549 elseif what == "stop" or what == "abort" then 577 elseif what == "stop" or what == "abort" then
550 out:write("---- TRACE ", tr, " ", what) 578 out:write("---- TRACE ", tr, " ", what)
@@ -594,23 +622,26 @@ end
594 622
595------------------------------------------------------------------------------ 623------------------------------------------------------------------------------
596 624
625local gpr64 = jit.arch:match("64")
626local fprmips32 = jit.arch == "mips" or jit.arch == "mipsel"
627
597-- Dump taken trace exits. 628-- Dump taken trace exits.
598local function dump_texit(tr, ex, ngpr, nfpr, ...) 629local function dump_texit(tr, ex, ngpr, nfpr, ...)
599 out:write("---- TRACE ", tr, " exit ", ex, "\n") 630 out:write("---- TRACE ", tr, " exit ", ex, "\n")
600 if dumpmode.X then 631 if dumpmode.X then
601 local regs = {...} 632 local regs = {...}
602 if jit.arch == "x64" then 633 if gpr64 then
603 for i=1,ngpr do 634 for i=1,ngpr do
604 out:write(format(" %016x", regs[i])) 635 out:write(format(" %016x", regs[i]))
605 if i % 4 == 0 then out:write("\n") end 636 if i % 4 == 0 then out:write("\n") end
606 end 637 end
607 else 638 else
608 for i=1,ngpr do 639 for i=1,ngpr do
609 out:write(format(" %08x", regs[i])) 640 out:write(" ", tohex(regs[i]))
610 if i % 8 == 0 then out:write("\n") end 641 if i % 8 == 0 then out:write("\n") end
611 end 642 end
612 end 643 end
613 if jit.arch == "mips" or jit.arch == "mipsel" then 644 if fprmips32 then
614 for i=1,nfpr,2 do 645 for i=1,nfpr,2 do
615 out:write(format(" %+17.14g", regs[ngpr+i])) 646 out:write(format(" %+17.14g", regs[ngpr+i]))
616 if i % 8 == 7 then out:write("\n") end 647 if i % 8 == 7 then out:write("\n") end
@@ -691,9 +722,9 @@ local function dumpon(opt, outfile)
691end 722end
692 723
693-- Public module functions. 724-- Public module functions.
694module(...) 725return {
695 726 on = dumpon,
696on = dumpon 727 off = dumpoff,
697off = dumpoff 728 start = dumpon -- For -j command line option.
698start = dumpon -- For -j command line option. 729}
699 730
diff --git a/src/jit/p.lua b/src/jit/p.lua
new file mode 100644
index 00000000..ef2ee82f
--- /dev/null
+++ b/src/jit/p.lua
@@ -0,0 +1,309 @@
1----------------------------------------------------------------------------
2-- LuaJIT profiler.
3--
4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7--
8-- This module is a simple command line interface to the built-in
9-- low-overhead profiler of LuaJIT.
10--
11-- The lower-level API of the profiler is accessible via the "jit.profile"
12-- module or the luaJIT_profile_* C API.
13--
14-- Example usage:
15--
16-- luajit -jp myapp.lua
17-- luajit -jp=s myapp.lua
18-- luajit -jp=-s myapp.lua
19-- luajit -jp=vl myapp.lua
20-- luajit -jp=G,profile.txt myapp.lua
21--
22-- The following dump features are available:
23--
24-- f Stack dump: function name, otherwise module:line. Default mode.
25-- F Stack dump: ditto, but always prepend module.
26-- l Stack dump: module:line.
27-- <number> stack dump depth (callee < caller). Default: 1.
28-- -<number> Inverse stack dump depth (caller > callee).
29-- s Split stack dump after first stack level. Implies abs(depth) >= 2.
30-- p Show full path for module names.
31-- v Show VM states. Can be combined with stack dumps, e.g. vf or fv.
32-- z Show zones. Can be combined with stack dumps, e.g. zf or fz.
33-- r Show raw sample counts. Default: show percentages.
34-- a Annotate excerpts from source code files.
35-- A Annotate complete source code files.
36-- G Produce raw output suitable for graphical tools (e.g. flame graphs).
37-- m<number> Minimum sample percentage to be shown. Default: 3.
38-- i<number> Sampling interval in milliseconds. Default: 10.
39--
40----------------------------------------------------------------------------
41
42-- Cache some library functions and objects.
43local jit = require("jit")
44local profile = require("jit.profile")
45local vmdef = require("jit.vmdef")
46local math = math
47local pairs, ipairs, tonumber, floor = pairs, ipairs, tonumber, math.floor
48local sort, format = table.sort, string.format
49local stdout = io.stdout
50local zone -- Load jit.zone module on demand.
51
52-- Output file handle.
53local out
54
55------------------------------------------------------------------------------
56
57local prof_ud
58local prof_states, prof_split, prof_min, prof_raw, prof_fmt, prof_depth
59local prof_ann, prof_count1, prof_count2, prof_samples
60
61local map_vmmode = {
62 N = "Compiled",
63 I = "Interpreted",
64 C = "C code",
65 G = "Garbage Collector",
66 J = "JIT Compiler",
67}
68
69-- Profiler callback.
70local function prof_cb(th, samples, vmmode)
71 prof_samples = prof_samples + samples
72 local key_stack, key_stack2, key_state
73 -- Collect keys for sample.
74 if prof_states then
75 if prof_states == "v" then
76 key_state = map_vmmode[vmmode] or vmmode
77 else
78 key_state = zone:get() or "(none)"
79 end
80 end
81 if prof_fmt then
82 key_stack = profile.dumpstack(th, prof_fmt, prof_depth)
83 key_stack = key_stack:gsub("%[builtin#(%d+)%]", function(x)
84 return vmdef.ffnames[tonumber(x)]
85 end)
86 if prof_split == 2 then
87 local k1, k2 = key_stack:match("(.-) [<>] (.*)")
88 if k2 then key_stack, key_stack2 = k1, k2 end
89 elseif prof_split == 3 then
90 key_stack2 = profile.dumpstack(th, "l", 1)
91 end
92 end
93 -- Order keys.
94 local k1, k2
95 if prof_split == 1 then
96 if key_state then
97 k1 = key_state
98 if key_stack then k2 = key_stack end
99 end
100 elseif key_stack then
101 k1 = key_stack
102 if key_stack2 then k2 = key_stack2 elseif key_state then k2 = key_state end
103 end
104 -- Coalesce samples in one or two levels.
105 if k1 then
106 local t1 = prof_count1
107 t1[k1] = (t1[k1] or 0) + samples
108 if k2 then
109 local t2 = prof_count2
110 local t3 = t2[k1]
111 if not t3 then t3 = {}; t2[k1] = t3 end
112 t3[k2] = (t3[k2] or 0) + samples
113 end
114 end
115end
116
117------------------------------------------------------------------------------
118
119-- Show top N list.
120local function prof_top(count1, count2, samples, indent)
121 local t, n = {}, 0
122 for k in pairs(count1) do
123 n = n + 1
124 t[n] = k
125 end
126 sort(t, function(a, b) return count1[a] > count1[b] end)
127 for i=1,n do
128 local k = t[i]
129 local v = count1[k]
130 local pct = floor(v*100/samples + 0.5)
131 if pct < prof_min then break end
132 if not prof_raw then
133 out:write(format("%s%2d%% %s\n", indent, pct, k))
134 elseif prof_raw == "r" then
135 out:write(format("%s%5d %s\n", indent, v, k))
136 else
137 out:write(format("%s %d\n", k, v))
138 end
139 if count2 then
140 local r = count2[k]
141 if r then
142 prof_top(r, nil, v, (prof_split == 3 or prof_split == 1) and " -- " or
143 (prof_depth < 0 and " -> " or " <- "))
144 end
145 end
146 end
147end
148
149-- Annotate source code
150local function prof_annotate(count1, samples)
151 local files = {}
152 local ms = 0
153 for k, v in pairs(count1) do
154 local pct = floor(v*100/samples + 0.5)
155 ms = math.max(ms, v)
156 if pct >= prof_min then
157 local file, line = k:match("^(.*):(%d+)$")
158 if not file then file = k; line = 0 end
159 local fl = files[file]
160 if not fl then fl = {}; files[file] = fl; files[#files+1] = file end
161 line = tonumber(line)
162 fl[line] = prof_raw and v or pct
163 end
164 end
165 sort(files)
166 local fmtv, fmtn = " %3d%% | %s\n", " | %s\n"
167 if prof_raw then
168 local n = math.max(5, math.ceil(math.log10(ms)))
169 fmtv = "%"..n.."d | %s\n"
170 fmtn = (" "):rep(n).." | %s\n"
171 end
172 local ann = prof_ann
173 for _, file in ipairs(files) do
174 local f0 = file:byte()
175 if f0 == 40 or f0 == 91 then
176 out:write(format("\n====== %s ======\n[Cannot annotate non-file]\n", file))
177 break
178 end
179 local fp, err = io.open(file)
180 if not fp then
181 out:write(format("====== ERROR: %s: %s\n", file, err))
182 break
183 end
184 out:write(format("\n====== %s ======\n", file))
185 local fl = files[file]
186 local n, show = 1, false
187 if ann ~= 0 then
188 for i=1,ann do
189 if fl[i] then show = true; out:write("@@ 1 @@\n"); break end
190 end
191 end
192 for line in fp:lines() do
193 if line:byte() == 27 then
194 out:write("[Cannot annotate bytecode file]\n")
195 break
196 end
197 local v = fl[n]
198 if ann ~= 0 then
199 local v2 = fl[n+ann]
200 if show then
201 if v2 then show = n+ann elseif v then show = n
202 elseif show+ann < n then show = false end
203 elseif v2 then
204 show = n+ann
205 out:write(format("@@ %d @@\n", n))
206 end
207 if not show then goto next end
208 end
209 if v then
210 out:write(format(fmtv, v, line))
211 else
212 out:write(format(fmtn, line))
213 end
214 ::next::
215 n = n + 1
216 end
217 fp:close()
218 end
219end
220
221------------------------------------------------------------------------------
222
223-- Finish profiling and dump result.
224local function prof_finish()
225 if prof_ud then
226 profile.stop()
227 local samples = prof_samples
228 if samples == 0 then
229 if prof_raw ~= true then out:write("[No samples collected]\n") end
230 elseif prof_ann then
231 prof_annotate(prof_count1, samples)
232 else
233 prof_top(prof_count1, prof_count2, samples, "")
234 end
235 prof_count1 = nil
236 prof_count2 = nil
237 prof_ud = nil
238 if out ~= stdout then out:close() end
239 end
240end
241
242-- Start profiling.
243local function prof_start(mode)
244 local interval = ""
245 mode = mode:gsub("i%d*", function(s) interval = s; return "" end)
246 prof_min = 3
247 mode = mode:gsub("m(%d+)", function(s) prof_min = tonumber(s); return "" end)
248 prof_depth = 1
249 mode = mode:gsub("%-?%d+", function(s) prof_depth = tonumber(s); return "" end)
250 local m = {}
251 for c in mode:gmatch(".") do m[c] = c end
252 prof_states = m.z or m.v
253 if prof_states == "z" then zone = require("jit.zone") end
254 local scope = m.l or m.f or m.F or (prof_states and "" or "f")
255 local flags = (m.p or "")
256 prof_raw = m.r
257 if m.s then
258 prof_split = 2
259 if prof_depth == -1 or m["-"] then prof_depth = -2
260 elseif prof_depth == 1 then prof_depth = 2 end
261 elseif mode:find("[fF].*l") then
262 scope = "l"
263 prof_split = 3
264 else
265 prof_split = (scope == "" or mode:find("[zv].*[lfF]")) and 1 or 0
266 end
267 prof_ann = m.A and 0 or (m.a and 3)
268 if prof_ann then
269 scope = "l"
270 prof_fmt = "pl"
271 prof_split = 0
272 prof_depth = 1
273 elseif m.G and scope ~= "" then
274 prof_fmt = flags..scope.."Z;"
275 prof_depth = -100
276 prof_raw = true
277 prof_min = 0
278 elseif scope == "" then
279 prof_fmt = false
280 else
281 local sc = prof_split == 3 and m.f or m.F or scope
282 prof_fmt = flags..sc..(prof_depth >= 0 and "Z < " or "Z > ")
283 end
284 prof_count1 = {}
285 prof_count2 = {}
286 prof_samples = 0
287 profile.start(scope:lower()..interval, prof_cb)
288 prof_ud = newproxy(true)
289 getmetatable(prof_ud).__gc = prof_finish
290end
291
292------------------------------------------------------------------------------
293
294local function start(mode, outfile)
295 if not outfile then outfile = os.getenv("LUAJIT_PROFILEFILE") end
296 if outfile then
297 out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
298 else
299 out = stdout
300 end
301 prof_start(mode or "f")
302end
303
304-- Public module functions.
305return {
306 start = start, -- For -j command line option.
307 stop = prof_finish
308}
309
diff --git a/src/jit/v.lua b/src/jit/v.lua
index 29edcf2b..45a663d7 100644
--- a/src/jit/v.lua
+++ b/src/jit/v.lua
@@ -62,7 +62,7 @@ local jit = require("jit")
62local jutil = require("jit.util") 62local jutil = require("jit.util")
63local vmdef = require("jit.vmdef") 63local vmdef = require("jit.vmdef")
64local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo 64local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
65local type, format = type, string.format 65local type, sub, format = type, string.sub, string.format
66local stdout, stderr = io.stdout, io.stderr 66local stdout, stderr = io.stdout, io.stderr
67 67
68-- Active flag and output file handle. 68-- Active flag and output file handle.
@@ -89,7 +89,12 @@ end
89local function fmterr(err, info) 89local function fmterr(err, info)
90 if type(err) == "number" then 90 if type(err) == "number" then
91 if type(info) == "function" then info = fmtfunc(info) end 91 if type(info) == "function" then info = fmtfunc(info) end
92 err = format(vmdef.traceerr[err], info) 92 local fmt = vmdef.traceerr[err]
93 if fmt == "NYI: bytecode %s" then
94 local oidx = 6 * info
95 info = sub(vmdef.bcnames, oidx+1, oidx+6)
96 end
97 err = format(fmt, info)
93 end 98 end
94 return err 99 return err
95end 100end
@@ -98,7 +103,7 @@ end
98local function dump_trace(what, tr, func, pc, otr, oex) 103local function dump_trace(what, tr, func, pc, otr, oex)
99 if what == "start" then 104 if what == "start" then
100 startloc = fmtfunc(func, pc) 105 startloc = fmtfunc(func, pc)
101 startex = otr and "("..otr.."/"..oex..") " or "" 106 startex = otr and "("..otr.."/"..(oex == -1 and "stitch" or oex)..") " or ""
102 else 107 else
103 if what == "abort" then 108 if what == "abort" then
104 local loc = fmtfunc(func, pc) 109 local loc = fmtfunc(func, pc)
@@ -115,6 +120,9 @@ local function dump_trace(what, tr, func, pc, otr, oex)
115 if ltype == "interpreter" then 120 if ltype == "interpreter" then
116 out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n", 121 out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n",
117 tr, startex, startloc)) 122 tr, startex, startloc))
123 elseif ltype == "stitch" then
124 out:write(format("[TRACE %3s %s%s %s %s]\n",
125 tr, startex, startloc, ltype, fmtfunc(func, pc)))
118 elseif link == tr or link == 0 then 126 elseif link == tr or link == 0 then
119 out:write(format("[TRACE %3s %s%s %s]\n", 127 out:write(format("[TRACE %3s %s%s %s]\n",
120 tr, startex, startloc, ltype)) 128 tr, startex, startloc, ltype))
@@ -158,9 +166,9 @@ local function dumpon(outfile)
158end 166end
159 167
160-- Public module functions. 168-- Public module functions.
161module(...) 169return {
162 170 on = dumpon,
163on = dumpon 171 off = dumpoff,
164off = dumpoff 172 start = dumpon -- For -j command line option.
165start = dumpon -- For -j command line option. 173}
166 174
diff --git a/src/jit/zone.lua b/src/jit/zone.lua
new file mode 100644
index 00000000..55dc76d3
--- /dev/null
+++ b/src/jit/zone.lua
@@ -0,0 +1,45 @@
1----------------------------------------------------------------------------
2-- LuaJIT profiler zones.
3--
4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7--
8-- This module implements a simple hierarchical zone model.
9--
10-- Example usage:
11--
12-- local zone = require("jit.zone")
13-- zone("AI")
14-- ...
15-- zone("A*")
16-- ...
17-- print(zone:get()) --> "A*"
18-- ...
19-- zone()
20-- ...
21-- print(zone:get()) --> "AI"
22-- ...
23-- zone()
24--
25----------------------------------------------------------------------------
26
27local remove = table.remove
28
29return setmetatable({
30 flush = function(t)
31 for i=#t,1,-1 do t[i] = nil end
32 end,
33 get = function(t)
34 return t[#t]
35 end
36}, {
37 __call = function(t, zone)
38 if zone then
39 t[#t+1] = zone
40 else
41 return (assert(remove(t), "empty zone stack"))
42 end
43 end
44})
45
diff --git a/src/lauxlib.h b/src/lauxlib.h
index fed1491b..a44f0272 100644
--- a/src/lauxlib.h
+++ b/src/lauxlib.h
@@ -15,9 +15,6 @@
15#include "lua.h" 15#include "lua.h"
16 16
17 17
18#define luaL_getn(L,i) ((int)lua_objlen(L, i))
19#define luaL_setn(L,i,j) ((void)0) /* no op! */
20
21/* extra error code for `luaL_load' */ 18/* extra error code for `luaL_load' */
22#define LUA_ERRFILE (LUA_ERRERR+1) 19#define LUA_ERRFILE (LUA_ERRERR+1)
23 20
@@ -58,6 +55,10 @@ LUALIB_API int (luaL_error) (lua_State *L, const char *fmt, ...);
58LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def, 55LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def,
59 const char *const lst[]); 56 const char *const lst[]);
60 57
58/* pre-defined references */
59#define LUA_NOREF (-2)
60#define LUA_REFNIL (-1)
61
61LUALIB_API int (luaL_ref) (lua_State *L, int t); 62LUALIB_API int (luaL_ref) (lua_State *L, int t);
62LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref); 63LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref);
63 64
@@ -84,6 +85,11 @@ LUALIB_API int (luaL_loadbufferx) (lua_State *L, const char *buff, size_t sz,
84 const char *name, const char *mode); 85 const char *name, const char *mode);
85LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, 86LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg,
86 int level); 87 int level);
88LUALIB_API void (luaL_setfuncs) (lua_State *L, const luaL_Reg *l, int nup);
89LUALIB_API void (luaL_pushmodule) (lua_State *L, const char *modname,
90 int sizehint);
91LUALIB_API void *(luaL_testudata) (lua_State *L, int ud, const char *tname);
92LUALIB_API void (luaL_setmetatable) (lua_State *L, const char *tname);
87 93
88 94
89/* 95/*
@@ -113,6 +119,11 @@ LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg,
113 119
114#define luaL_opt(L,f,n,d) (lua_isnoneornil(L,(n)) ? (d) : f(L,(n))) 120#define luaL_opt(L,f,n,d) (lua_isnoneornil(L,(n)) ? (d) : f(L,(n)))
115 121
122/* From Lua 5.2. */
123#define luaL_newlibtable(L, l) \
124 lua_createtable(L, 0, sizeof(l)/sizeof((l)[0]) - 1)
125#define luaL_newlib(L, l) (luaL_newlibtable(L, l), luaL_setfuncs(L, l, 0))
126
116/* 127/*
117** {====================================================== 128** {======================================================
118** Generic Buffer manipulation 129** Generic Buffer manipulation
@@ -147,21 +158,4 @@ LUALIB_API void (luaL_pushresult) (luaL_Buffer *B);
147 158
148/* }====================================================== */ 159/* }====================================================== */
149 160
150
151/* compatibility with ref system */
152
153/* pre-defined references */
154#define LUA_NOREF (-2)
155#define LUA_REFNIL (-1)
156
157#define lua_ref(L,lock) ((lock) ? luaL_ref(L, LUA_REGISTRYINDEX) : \
158 (lua_pushstring(L, "unlocked references are obsolete"), lua_error(L), 0))
159
160#define lua_unref(L,ref) luaL_unref(L, LUA_REGISTRYINDEX, (ref))
161
162#define lua_getref(L,ref) lua_rawgeti(L, LUA_REGISTRYINDEX, (ref))
163
164
165#define luaL_reg luaL_Reg
166
167#endif 161#endif
diff --git a/src/lib_aux.c b/src/lib_aux.c
index 8a64f185..7e81ac30 100644
--- a/src/lib_aux.c
+++ b/src/lib_aux.c
@@ -21,6 +21,7 @@
21#include "lj_state.h" 21#include "lj_state.h"
22#include "lj_trace.h" 22#include "lj_trace.h"
23#include "lj_lib.h" 23#include "lj_lib.h"
24#include "lj_vmevent.h"
24 25
25#if LJ_TARGET_POSIX 26#if LJ_TARGET_POSIX
26#include <sys/wait.h> 27#include <sys/wait.h>
@@ -107,38 +108,36 @@ LUALIB_API const char *luaL_findtable(lua_State *L, int idx,
107static int libsize(const luaL_Reg *l) 108static int libsize(const luaL_Reg *l)
108{ 109{
109 int size = 0; 110 int size = 0;
110 for (; l->name; l++) size++; 111 for (; l && l->name; l++) size++;
111 return size; 112 return size;
112} 113}
113 114
115LUALIB_API void luaL_pushmodule(lua_State *L, const char *modname, int sizehint)
116{
117 luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16);
118 lua_getfield(L, -1, modname);
119 if (!lua_istable(L, -1)) {
120 lua_pop(L, 1);
121 if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, sizehint) != NULL)
122 lj_err_callerv(L, LJ_ERR_BADMODN, modname);
123 lua_pushvalue(L, -1);
124 lua_setfield(L, -3, modname); /* _LOADED[modname] = new table. */
125 }
126 lua_remove(L, -2); /* Remove _LOADED table. */
127}
128
114LUALIB_API void luaL_openlib(lua_State *L, const char *libname, 129LUALIB_API void luaL_openlib(lua_State *L, const char *libname,
115 const luaL_Reg *l, int nup) 130 const luaL_Reg *l, int nup)
116{ 131{
117 lj_lib_checkfpu(L); 132 lj_lib_checkfpu(L);
118 if (libname) { 133 if (libname) {
119 int size = libsize(l); 134 luaL_pushmodule(L, libname, libsize(l));
120 /* check whether lib already exists */ 135 lua_insert(L, -(nup + 1)); /* Move module table below upvalues. */
121 luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16);
122 lua_getfield(L, -1, libname); /* get _LOADED[libname] */
123 if (!lua_istable(L, -1)) { /* not found? */
124 lua_pop(L, 1); /* remove previous result */
125 /* try global variable (and create one if it does not exist) */
126 if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, size) != NULL)
127 lj_err_callerv(L, LJ_ERR_BADMODN, libname);
128 lua_pushvalue(L, -1);
129 lua_setfield(L, -3, libname); /* _LOADED[libname] = new table */
130 }
131 lua_remove(L, -2); /* remove _LOADED table */
132 lua_insert(L, -(nup+1)); /* move library table to below upvalues */
133 } 136 }
134 for (; l->name; l++) { 137 if (l)
135 int i; 138 luaL_setfuncs(L, l, nup);
136 for (i = 0; i < nup; i++) /* copy upvalues to the top */ 139 else
137 lua_pushvalue(L, -nup); 140 lua_pop(L, nup); /* Remove upvalues. */
138 lua_pushcclosure(L, l->func, nup);
139 lua_setfield(L, -(nup+2), l->name);
140 }
141 lua_pop(L, nup); /* remove upvalues */
142} 141}
143 142
144LUALIB_API void luaL_register(lua_State *L, const char *libname, 143LUALIB_API void luaL_register(lua_State *L, const char *libname,
@@ -147,6 +146,19 @@ LUALIB_API void luaL_register(lua_State *L, const char *libname,
147 luaL_openlib(L, libname, l, 0); 146 luaL_openlib(L, libname, l, 0);
148} 147}
149 148
149LUALIB_API void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup)
150{
151 luaL_checkstack(L, nup, "too many upvalues");
152 for (; l->name; l++) {
153 int i;
154 for (i = 0; i < nup; i++) /* Copy upvalues to the top. */
155 lua_pushvalue(L, -nup);
156 lua_pushcclosure(L, l->func, nup);
157 lua_setfield(L, -(nup + 2), l->name);
158 }
159 lua_pop(L, nup); /* Remove upvalues. */
160}
161
150LUALIB_API const char *luaL_gsub(lua_State *L, const char *s, 162LUALIB_API const char *luaL_gsub(lua_State *L, const char *s,
151 const char *p, const char *r) 163 const char *p, const char *r)
152{ 164{
@@ -207,8 +219,15 @@ LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B)
207 219
208LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l) 220LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l)
209{ 221{
210 while (l--) 222 if (l <= bufffree(B)) {
211 luaL_addchar(B, *s++); 223 memcpy(B->p, s, l);
224 B->p += l;
225 } else {
226 emptybuffer(B);
227 lua_pushlstring(B->L, s, l);
228 B->lvl++;
229 adjuststack(B);
230 }
212} 231}
213 232
214LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s) 233LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s)
@@ -300,9 +319,21 @@ static int panic(lua_State *L)
300 return 0; 319 return 0;
301} 320}
302 321
322#ifndef LUAJIT_DISABLE_VMEVENT
323static int error_finalizer(lua_State *L)
324{
325 const char *s = lua_tostring(L, -1);
326 fputs("ERROR in finalizer: ", stderr);
327 fputs(s ? s : "?", stderr);
328 fputc('\n', stderr);
329 fflush(stderr);
330 return 0;
331}
332#endif
333
303#ifdef LUAJIT_USE_SYSMALLOC 334#ifdef LUAJIT_USE_SYSMALLOC
304 335
305#if LJ_64 && !defined(LUAJIT_USE_VALGRIND) 336#if LJ_64 && !LJ_GC64 && !defined(LUAJIT_USE_VALGRIND)
306#error "Must use builtin allocator for 64 bit target" 337#error "Must use builtin allocator for 64 bit target"
307#endif 338#endif
308 339
@@ -321,29 +352,43 @@ static void *mem_alloc(void *ud, void *ptr, size_t osize, size_t nsize)
321LUALIB_API lua_State *luaL_newstate(void) 352LUALIB_API lua_State *luaL_newstate(void)
322{ 353{
323 lua_State *L = lua_newstate(mem_alloc, NULL); 354 lua_State *L = lua_newstate(mem_alloc, NULL);
324 if (L) G(L)->panic = panic; 355 if (L) {
356 G(L)->panic = panic;
357#ifndef LUAJIT_DISABLE_VMEVENT
358 luaL_findtable(L, LUA_REGISTRYINDEX, LJ_VMEVENTS_REGKEY, LJ_VMEVENTS_HSIZE);
359 lua_pushcfunction(L, error_finalizer);
360 lua_rawseti(L, -2, VMEVENT_HASH(LJ_VMEVENT_ERRFIN));
361 G(L)->vmevmask = VMEVENT_MASK(LJ_VMEVENT_ERRFIN);
362 L->top--;
363#endif
364 }
325 return L; 365 return L;
326} 366}
327 367
328#else 368#else
329 369
330#include "lj_alloc.h"
331
332LUALIB_API lua_State *luaL_newstate(void) 370LUALIB_API lua_State *luaL_newstate(void)
333{ 371{
334 lua_State *L; 372 lua_State *L;
335 void *ud = lj_alloc_create(); 373#if LJ_64 && !LJ_GC64
336 if (ud == NULL) return NULL; 374 L = lj_state_newstate(LJ_ALLOCF_INTERNAL, NULL);
337#if LJ_64
338 L = lj_state_newstate(lj_alloc_f, ud);
339#else 375#else
340 L = lua_newstate(lj_alloc_f, ud); 376 L = lua_newstate(LJ_ALLOCF_INTERNAL, NULL);
341#endif 377#endif
342 if (L) G(L)->panic = panic; 378 if (L) {
379 G(L)->panic = panic;
380#ifndef LUAJIT_DISABLE_VMEVENT
381 luaL_findtable(L, LUA_REGISTRYINDEX, LJ_VMEVENTS_REGKEY, LJ_VMEVENTS_HSIZE);
382 lua_pushcfunction(L, error_finalizer);
383 lua_rawseti(L, -2, VMEVENT_HASH(LJ_VMEVENT_ERRFIN));
384 G(L)->vmevmask = VMEVENT_MASK(LJ_VMEVENT_ERRFIN);
385 L->top--;
386#endif
387 }
343 return L; 388 return L;
344} 389}
345 390
346#if LJ_64 391#if LJ_64 && !LJ_GC64
347LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) 392LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
348{ 393{
349 UNUSED(f); UNUSED(ud); 394 UNUSED(f); UNUSED(ud);
diff --git a/src/lib_base.c b/src/lib_base.c
index a687411f..d644b4f2 100644
--- a/src/lib_base.c
+++ b/src/lib_base.c
@@ -19,10 +19,12 @@
19#include "lj_gc.h" 19#include "lj_gc.h"
20#include "lj_err.h" 20#include "lj_err.h"
21#include "lj_debug.h" 21#include "lj_debug.h"
22#include "lj_buf.h"
22#include "lj_str.h" 23#include "lj_str.h"
23#include "lj_tab.h" 24#include "lj_tab.h"
24#include "lj_meta.h" 25#include "lj_meta.h"
25#include "lj_state.h" 26#include "lj_state.h"
27#include "lj_frame.h"
26#if LJ_HASFFI 28#if LJ_HASFFI
27#include "lj_ctype.h" 29#include "lj_ctype.h"
28#include "lj_cconv.h" 30#include "lj_cconv.h"
@@ -32,6 +34,7 @@
32#include "lj_dispatch.h" 34#include "lj_dispatch.h"
33#include "lj_char.h" 35#include "lj_char.h"
34#include "lj_strscan.h" 36#include "lj_strscan.h"
37#include "lj_strfmt.h"
35#include "lj_lib.h" 38#include "lj_lib.h"
36 39
37/* -- Base library: checks ------------------------------------------------ */ 40/* -- Base library: checks ------------------------------------------------ */
@@ -40,13 +43,13 @@
40 43
41LJLIB_ASM(assert) LJLIB_REC(.) 44LJLIB_ASM(assert) LJLIB_REC(.)
42{ 45{
43 GCstr *s;
44 lj_lib_checkany(L, 1); 46 lj_lib_checkany(L, 1);
45 s = lj_lib_optstr(L, 2); 47 if (L->top == L->base+1)
46 if (s)
47 lj_err_callermsg(L, strdata(s));
48 else
49 lj_err_caller(L, LJ_ERR_ASSERT); 48 lj_err_caller(L, LJ_ERR_ASSERT);
49 else if (tvisstr(L->base+1) || tvisnumber(L->base+1))
50 lj_err_callermsg(L, strdata(lj_lib_checkstr(L, 2)));
51 else
52 lj_err_run(L);
50 return FFH_UNREACHABLE; 53 return FFH_UNREACHABLE;
51} 54}
52 55
@@ -73,9 +76,10 @@ LJLIB_ASM_(type) LJLIB_REC(.)
73/* This solves a circular dependency problem -- change FF_next_N as needed. */ 76/* This solves a circular dependency problem -- change FF_next_N as needed. */
74LJ_STATIC_ASSERT((int)FF_next == FF_next_N); 77LJ_STATIC_ASSERT((int)FF_next == FF_next_N);
75 78
76LJLIB_ASM(next) 79LJLIB_ASM(next) LJLIB_REC(.)
77{ 80{
78 lj_lib_checktab(L, 1); 81 lj_lib_checktab(L, 1);
82 lj_err_msg(L, LJ_ERR_NEXTIDX);
79 return FFH_UNREACHABLE; 83 return FFH_UNREACHABLE;
80} 84}
81 85
@@ -86,10 +90,11 @@ static int ffh_pairs(lua_State *L, MMS mm)
86 cTValue *mo = lj_meta_lookup(L, o, mm); 90 cTValue *mo = lj_meta_lookup(L, o, mm);
87 if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) { 91 if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) {
88 L->top = o+1; /* Only keep one argument. */ 92 L->top = o+1; /* Only keep one argument. */
89 copyTV(L, L->base-1, mo); /* Replace callable. */ 93 copyTV(L, L->base-1-LJ_FR2, mo); /* Replace callable. */
90 return FFH_TAILCALL; 94 return FFH_TAILCALL;
91 } else { 95 } else {
92 if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE); 96 if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE);
97 if (LJ_FR2) { copyTV(L, o-1, o); o--; }
93 setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1))); 98 setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1)));
94 if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0); 99 if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0);
95 return FFH_RES(3); 100 return FFH_RES(3);
@@ -100,7 +105,7 @@ static int ffh_pairs(lua_State *L, MMS mm)
100#endif 105#endif
101 106
102LJLIB_PUSH(lastcl) 107LJLIB_PUSH(lastcl)
103LJLIB_ASM(pairs) 108LJLIB_ASM(pairs) LJLIB_REC(xpairs 0)
104{ 109{
105 return ffh_pairs(L, MM_pairs); 110 return ffh_pairs(L, MM_pairs);
106} 111}
@@ -113,7 +118,7 @@ LJLIB_NOREGUV LJLIB_ASM(ipairs_aux) LJLIB_REC(.)
113} 118}
114 119
115LJLIB_PUSH(lastcl) 120LJLIB_PUSH(lastcl)
116LJLIB_ASM(ipairs) LJLIB_REC(.) 121LJLIB_ASM(ipairs) LJLIB_REC(xpairs 1)
117{ 122{
118 return ffh_pairs(L, MM_ipairs); 123 return ffh_pairs(L, MM_ipairs);
119} 124}
@@ -131,11 +136,11 @@ LJLIB_ASM(setmetatable) LJLIB_REC(.)
131 lj_err_caller(L, LJ_ERR_PROTMT); 136 lj_err_caller(L, LJ_ERR_PROTMT);
132 setgcref(t->metatable, obj2gco(mt)); 137 setgcref(t->metatable, obj2gco(mt));
133 if (mt) { lj_gc_objbarriert(L, t, mt); } 138 if (mt) { lj_gc_objbarriert(L, t, mt); }
134 settabV(L, L->base-1, t); 139 settabV(L, L->base-1-LJ_FR2, t);
135 return FFH_RES(1); 140 return FFH_RES(1);
136} 141}
137 142
138LJLIB_CF(getfenv) 143LJLIB_CF(getfenv) LJLIB_REC(.)
139{ 144{
140 GCfunc *fn; 145 GCfunc *fn;
141 cTValue *o = L->base; 146 cTValue *o = L->base;
@@ -144,6 +149,7 @@ LJLIB_CF(getfenv)
144 o = lj_debug_frame(L, level, &level); 149 o = lj_debug_frame(L, level, &level);
145 if (o == NULL) 150 if (o == NULL)
146 lj_err_arg(L, 1, LJ_ERR_INVLVL); 151 lj_err_arg(L, 1, LJ_ERR_INVLVL);
152 if (LJ_FR2) o--;
147 } 153 }
148 fn = &gcval(o)->fn; 154 fn = &gcval(o)->fn;
149 settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env)); 155 settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env));
@@ -165,6 +171,7 @@ LJLIB_CF(setfenv)
165 o = lj_debug_frame(L, level, &level); 171 o = lj_debug_frame(L, level, &level);
166 if (o == NULL) 172 if (o == NULL)
167 lj_err_arg(L, 1, LJ_ERR_INVLVL); 173 lj_err_arg(L, 1, LJ_ERR_INVLVL);
174 if (LJ_FR2) o--;
168 } 175 }
169 fn = &gcval(o)->fn; 176 fn = &gcval(o)->fn;
170 if (!isluafunc(fn)) 177 if (!isluafunc(fn))
@@ -259,7 +266,7 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
259 if (base == 10) { 266 if (base == 10) {
260 TValue *o = lj_lib_checkany(L, 1); 267 TValue *o = lj_lib_checkany(L, 1);
261 if (lj_strscan_numberobj(o)) { 268 if (lj_strscan_numberobj(o)) {
262 copyTV(L, L->base-1, o); 269 copyTV(L, L->base-1-LJ_FR2, o);
263 return FFH_RES(1); 270 return FFH_RES(1);
264 } 271 }
265#if LJ_HASFFI 272#if LJ_HASFFI
@@ -272,11 +279,11 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
272 ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) { 279 ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) {
273 int32_t i; 280 int32_t i;
274 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0); 281 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0);
275 setintV(L->base-1, i); 282 setintV(L->base-1-LJ_FR2, i);
276 return FFH_RES(1); 283 return FFH_RES(1);
277 } 284 }
278 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE), 285 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE),
279 (uint8_t *)&(L->base-1)->n, o, 0); 286 (uint8_t *)&(L->base-1-LJ_FR2)->n, o, 0);
280 return FFH_RES(1); 287 return FFH_RES(1);
281 } 288 }
282 } 289 }
@@ -284,53 +291,46 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
284 } else { 291 } else {
285 const char *p = strdata(lj_lib_checkstr(L, 1)); 292 const char *p = strdata(lj_lib_checkstr(L, 1));
286 char *ep; 293 char *ep;
294 unsigned int neg = 0;
287 unsigned long ul; 295 unsigned long ul;
288 if (base < 2 || base > 36) 296 if (base < 2 || base > 36)
289 lj_err_arg(L, 2, LJ_ERR_BASERNG); 297 lj_err_arg(L, 2, LJ_ERR_BASERNG);
290 ul = strtoul(p, &ep, base); 298 while (lj_char_isspace((unsigned char)(*p))) p++;
291 if (p != ep) { 299 if (*p == '-') { p++; neg = 1; } else if (*p == '+') { p++; }
292 while (lj_char_isspace((unsigned char)(*ep))) ep++; 300 if (lj_char_isalnum((unsigned char)(*p))) {
293 if (*ep == '\0') { 301 ul = strtoul(p, &ep, base);
294 if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u)) 302 if (p != ep) {
295 setintV(L->base-1, (int32_t)ul); 303 while (lj_char_isspace((unsigned char)(*ep))) ep++;
296 else 304 if (*ep == '\0') {
297 setnumV(L->base-1, (lua_Number)ul); 305 if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) {
298 return FFH_RES(1); 306 if (neg) ul = ~ul+1u;
307 setintV(L->base-1-LJ_FR2, (int32_t)ul);
308 } else {
309 lua_Number n = (lua_Number)ul;
310 if (neg) n = -n;
311 setnumV(L->base-1-LJ_FR2, n);
312 }
313 return FFH_RES(1);
314 }
299 } 315 }
300 } 316 }
301 } 317 }
302 setnilV(L->base-1); 318 setnilV(L->base-1-LJ_FR2);
303 return FFH_RES(1); 319 return FFH_RES(1);
304} 320}
305 321
306LJLIB_PUSH("nil")
307LJLIB_PUSH("false")
308LJLIB_PUSH("true")
309LJLIB_ASM(tostring) LJLIB_REC(.) 322LJLIB_ASM(tostring) LJLIB_REC(.)
310{ 323{
311 TValue *o = lj_lib_checkany(L, 1); 324 TValue *o = lj_lib_checkany(L, 1);
312 cTValue *mo; 325 cTValue *mo;
313 L->top = o+1; /* Only keep one argument. */ 326 L->top = o+1; /* Only keep one argument. */
314 if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { 327 if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
315 copyTV(L, L->base-1, mo); /* Replace callable. */ 328 copyTV(L, L->base-1-LJ_FR2, mo); /* Replace callable. */
316 return FFH_TAILCALL; 329 return FFH_TAILCALL;
317 } else {
318 GCstr *s;
319 if (tvisnumber(o)) {
320 s = lj_str_fromnumber(L, o);
321 } else if (tvispri(o)) {
322 s = strV(lj_lib_upvalue(L, -(int32_t)itype(o)));
323 } else {
324 if (tvisfunc(o) && isffunc(funcV(o)))
325 lua_pushfstring(L, "function: builtin#%d", funcV(o)->c.ffid);
326 else
327 lua_pushfstring(L, "%s: %p", lj_typename(o), lua_topointer(L, 1));
328 /* Note: lua_pushfstring calls the GC which may invalidate o. */
329 s = strV(L->top-1);
330 }
331 setstrV(L, L->base-1, s);
332 return FFH_RES(1);
333 } 330 }
331 lj_gc_check(L);
332 setstrV(L, L->base-1-LJ_FR2, lj_strfmt_obj(L, L->base));
333 return FFH_RES(1);
334} 334}
335 335
336/* -- Base library: throw and catch errors -------------------------------- */ 336/* -- Base library: throw and catch errors -------------------------------- */
@@ -359,8 +359,12 @@ LJLIB_ASM_(xpcall) LJLIB_REC(.)
359 359
360static int load_aux(lua_State *L, int status, int envarg) 360static int load_aux(lua_State *L, int status, int envarg)
361{ 361{
362 if (status == 0) { 362 if (status == LUA_OK) {
363 if (tvistab(L->base+envarg-1)) { 363 /*
364 ** Set environment table for top-level function.
365 ** Don't do this for non-native bytecode, which returns a prototype.
366 */
367 if (tvistab(L->base+envarg-1) && tvisfunc(L->top-1)) {
364 GCfunc *fn = funcV(L->top-1); 368 GCfunc *fn = funcV(L->top-1);
365 GCtab *t = tabV(L->base+envarg-1); 369 GCtab *t = tabV(L->base+envarg-1);
366 setgcref(fn->c.env, obj2gco(t)); 370 setgcref(fn->c.env, obj2gco(t));
@@ -408,10 +412,22 @@ LJLIB_CF(load)
408 GCstr *name = lj_lib_optstr(L, 2); 412 GCstr *name = lj_lib_optstr(L, 2);
409 GCstr *mode = lj_lib_optstr(L, 3); 413 GCstr *mode = lj_lib_optstr(L, 3);
410 int status; 414 int status;
411 if (L->base < L->top && (tvisstr(L->base) || tvisnumber(L->base))) { 415 if (L->base < L->top &&
412 GCstr *s = lj_lib_checkstr(L, 1); 416 (tvisstr(L->base) || tvisnumber(L->base) || tvisbuf(L->base))) {
417 const char *s;
418 MSize len;
419 if (tvisbuf(L->base)) {
420 SBufExt *sbx = bufV(L->base);
421 s = sbx->r;
422 len = sbufxlen(sbx);
423 if (!name) name = &G(L)->strempty; /* Buffers are not NUL-terminated. */
424 } else {
425 GCstr *str = lj_lib_checkstr(L, 1);
426 s = strdata(str);
427 len = str->len;
428 }
413 lua_settop(L, 4); /* Ensure env arg exists. */ 429 lua_settop(L, 4); /* Ensure env arg exists. */
414 status = luaL_loadbufferx(L, strdata(s), s->len, strdata(name ? name : s), 430 status = luaL_loadbufferx(L, s, len, name ? strdata(name) : s,
415 mode ? strdata(mode) : NULL); 431 mode ? strdata(mode) : NULL);
416 } else { 432 } else {
417 lj_lib_checkfunc(L, 1); 433 lj_lib_checkfunc(L, 1);
@@ -432,7 +448,7 @@ LJLIB_CF(dofile)
432 GCstr *fname = lj_lib_optstr(L, 1); 448 GCstr *fname = lj_lib_optstr(L, 1);
433 setnilV(L->top); 449 setnilV(L->top);
434 L->top = L->base+1; 450 L->top = L->base+1;
435 if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != 0) 451 if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != LUA_OK)
436 lua_error(L); 452 lua_error(L);
437 lua_call(L, 0, LUA_MULTRET); 453 lua_call(L, 0, LUA_MULTRET);
438 return (int)(L->top - L->base) - 1; 454 return (int)(L->top - L->base) - 1;
@@ -442,20 +458,20 @@ LJLIB_CF(dofile)
442 458
443LJLIB_CF(gcinfo) 459LJLIB_CF(gcinfo)
444{ 460{
445 setintV(L->top++, (G(L)->gc.total >> 10)); 461 setintV(L->top++, (int32_t)(G(L)->gc.total >> 10));
446 return 1; 462 return 1;
447} 463}
448 464
449LJLIB_CF(collectgarbage) 465LJLIB_CF(collectgarbage)
450{ 466{
451 int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT, /* ORDER LUA_GC* */ 467 int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT, /* ORDER LUA_GC* */
452 "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul"); 468 "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul\1\377\11isrunning");
453 int32_t data = lj_lib_optint(L, 2, 0); 469 int32_t data = lj_lib_optint(L, 2, 0);
454 if (opt == LUA_GCCOUNT) { 470 if (opt == LUA_GCCOUNT) {
455 setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0); 471 setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0);
456 } else { 472 } else {
457 int res = lua_gc(L, opt, data); 473 int res = lua_gc(L, opt, data);
458 if (opt == LUA_GCSTEP) 474 if (opt == LUA_GCSTEP || opt == LUA_GCISRUNNING)
459 setboolV(L->top, res); 475 setboolV(L->top, res);
460 else 476 else
461 setintV(L->top, res); 477 setintV(L->top, res);
@@ -507,23 +523,14 @@ LJLIB_CF(print)
507 tv = L->top-1; 523 tv = L->top-1;
508 } 524 }
509 shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring) && 525 shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring) &&
510 !gcrefu(basemt_it(G(L), LJ_TNUMX)); 526 !gcrefu(basemt_it(G(L), LJ_TNUMX));
511 for (i = 0; i < nargs; i++) { 527 for (i = 0; i < nargs; i++) {
528 cTValue *o = &L->base[i];
512 const char *str; 529 const char *str;
513 size_t size; 530 size_t size;
514 cTValue *o = &L->base[i]; 531 MSize len;
515 if (shortcut && tvisstr(o)) { 532 if (shortcut && (str = lj_strfmt_wstrnum(L, o, &len)) != NULL) {
516 str = strVdata(o); 533 size = len;
517 size = strV(o)->len;
518 } else if (shortcut && tvisint(o)) {
519 char buf[LJ_STR_INTBUF];
520 char *p = lj_str_bufint(buf, intV(o));
521 size = (size_t)(buf+LJ_STR_INTBUF-p);
522 str = p;
523 } else if (shortcut && tvisnum(o)) {
524 char buf[LJ_STR_NUMBUF];
525 size = lj_str_bufnum(buf, o);
526 str = buf;
527 } else { 534 } else {
528 copyTV(L, L->top+1, o); 535 copyTV(L, L->top+1, o);
529 copyTV(L, L->top, L->top-1); 536 copyTV(L, L->top, L->top-1);
@@ -560,8 +567,8 @@ LJLIB_CF(coroutine_status)
560 co = threadV(L->base); 567 co = threadV(L->base);
561 if (co == L) s = "running"; 568 if (co == L) s = "running";
562 else if (co->status == LUA_YIELD) s = "suspended"; 569 else if (co->status == LUA_YIELD) s = "suspended";
563 else if (co->status != 0) s = "dead"; 570 else if (co->status != LUA_OK) s = "dead";
564 else if (co->base > tvref(co->stack)+1) s = "normal"; 571 else if (co->base > tvref(co->stack)+1+LJ_FR2) s = "normal";
565 else if (co->top == co->base) s = "dead"; 572 else if (co->top == co->base) s = "dead";
566 else s = "suspended"; 573 else s = "suspended";
567 lua_pushstring(L, s); 574 lua_pushstring(L, s);
@@ -581,6 +588,12 @@ LJLIB_CF(coroutine_running)
581#endif 588#endif
582} 589}
583 590
591LJLIB_CF(coroutine_isyieldable)
592{
593 setboolV(L->top++, cframe_canyield(L->cframe));
594 return 1;
595}
596
584LJLIB_CF(coroutine_create) 597LJLIB_CF(coroutine_create)
585{ 598{
586 lua_State *L1; 599 lua_State *L1;
@@ -600,14 +613,17 @@ LJLIB_ASM(coroutine_yield)
600static int ffh_resume(lua_State *L, lua_State *co, int wrap) 613static int ffh_resume(lua_State *L, lua_State *co, int wrap)
601{ 614{
602 if (co->cframe != NULL || co->status > LUA_YIELD || 615 if (co->cframe != NULL || co->status > LUA_YIELD ||
603 (co->status == 0 && co->top == co->base)) { 616 (co->status == LUA_OK && co->top == co->base)) {
604 ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD; 617 ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD;
605 if (wrap) lj_err_caller(L, em); 618 if (wrap) lj_err_caller(L, em);
606 setboolV(L->base-1, 0); 619 setboolV(L->base-1-LJ_FR2, 0);
607 setstrV(L, L->base, lj_err_str(L, em)); 620 setstrV(L, L->base-LJ_FR2, lj_err_str(L, em));
608 return FFH_RES(2); 621 return FFH_RES(2);
609 } 622 }
610 lj_state_growstack(co, (MSize)(L->top - L->base)); 623 if (lj_state_cpgrowstack(co, (MSize)(L->top - L->base)) != LUA_OK) {
624 cTValue *msg = --co->top;
625 lj_err_callermsg(L, strVdata(msg));
626 }
611 return FFH_RETRY; 627 return FFH_RETRY;
612} 628}
613 629
@@ -645,9 +661,10 @@ static void setpc_wrap_aux(lua_State *L, GCfunc *fn);
645 661
646LJLIB_CF(coroutine_wrap) 662LJLIB_CF(coroutine_wrap)
647{ 663{
664 GCfunc *fn;
648 lj_cf_coroutine_create(L); 665 lj_cf_coroutine_create(L);
649 lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1); 666 fn = lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1);
650 setpc_wrap_aux(L, funcV(L->top-1)); 667 setpc_wrap_aux(L, fn);
651 return 1; 668 return 1;
652} 669}
653 670
diff --git a/src/lib_bit.c b/src/lib_bit.c
index d7c79aeb..ada9614d 100644
--- a/src/lib_bit.c
+++ b/src/lib_bit.c
@@ -12,26 +12,99 @@
12 12
13#include "lj_obj.h" 13#include "lj_obj.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_str.h" 15#include "lj_buf.h"
16#include "lj_strscan.h"
17#include "lj_strfmt.h"
18#if LJ_HASFFI
19#include "lj_ctype.h"
20#include "lj_cdata.h"
21#include "lj_cconv.h"
22#include "lj_carith.h"
23#endif
24#include "lj_ff.h"
16#include "lj_lib.h" 25#include "lj_lib.h"
17 26
18/* ------------------------------------------------------------------------ */ 27/* ------------------------------------------------------------------------ */
19 28
20#define LJLIB_MODULE_bit 29#define LJLIB_MODULE_bit
21 30
22LJLIB_ASM(bit_tobit) LJLIB_REC(bit_unary IR_TOBIT) 31#if LJ_HASFFI
32static int bit_result64(lua_State *L, CTypeID id, uint64_t x)
23{ 33{
34 GCcdata *cd = lj_cdata_new_(L, id, 8);
35 *(uint64_t *)cdataptr(cd) = x;
36 setcdataV(L, L->base-1-LJ_FR2, cd);
37 return FFH_RES(1);
38}
39#else
40static int32_t bit_checkbit(lua_State *L, int narg)
41{
42 TValue *o = L->base + narg-1;
43 if (!(o < L->top && lj_strscan_numberobj(o)))
44 lj_err_argt(L, narg, LUA_TNUMBER);
45 if (LJ_LIKELY(tvisint(o))) {
46 return intV(o);
47 } else {
48 int32_t i = lj_num2bit(numV(o));
49 if (LJ_DUALNUM) setintV(o, i);
50 return i;
51 }
52}
53#endif
54
55LJLIB_ASM(bit_tobit) LJLIB_REC(bit_tobit)
56{
57#if LJ_HASFFI
58 CTypeID id = 0;
59 setintV(L->base-1-LJ_FR2, (int32_t)lj_carith_check64(L, 1, &id));
60 return FFH_RES(1);
61#else
62 lj_lib_checknumber(L, 1);
63 return FFH_RETRY;
64#endif
65}
66
67LJLIB_ASM(bit_bnot) LJLIB_REC(bit_unary IR_BNOT)
68{
69#if LJ_HASFFI
70 CTypeID id = 0;
71 uint64_t x = lj_carith_check64(L, 1, &id);
72 return id ? bit_result64(L, id, ~x) : FFH_RETRY;
73#else
24 lj_lib_checknumber(L, 1); 74 lj_lib_checknumber(L, 1);
25 return FFH_RETRY; 75 return FFH_RETRY;
76#endif
77}
78
79LJLIB_ASM(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP)
80{
81#if LJ_HASFFI
82 CTypeID id = 0;
83 uint64_t x = lj_carith_check64(L, 1, &id);
84 return id ? bit_result64(L, id, lj_bswap64(x)) : FFH_RETRY;
85#else
86 lj_lib_checknumber(L, 1);
87 return FFH_RETRY;
88#endif
26} 89}
27LJLIB_ASM_(bit_bnot) LJLIB_REC(bit_unary IR_BNOT)
28LJLIB_ASM_(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP)
29 90
30LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL) 91LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL)
31{ 92{
93#if LJ_HASFFI
94 CTypeID id = 0, id2 = 0;
95 uint64_t x = lj_carith_check64(L, 1, &id);
96 int32_t sh = (int32_t)lj_carith_check64(L, 2, &id2);
97 if (id) {
98 x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift);
99 return bit_result64(L, id, x);
100 }
101 setintV(L->base+1, sh);
102 return FFH_RETRY;
103#else
32 lj_lib_checknumber(L, 1); 104 lj_lib_checknumber(L, 1);
33 lj_lib_checkbit(L, 2); 105 bit_checkbit(L, 2);
34 return FFH_RETRY; 106 return FFH_RETRY;
107#endif
35} 108}
36LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR) 109LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR)
37LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR) 110LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR)
@@ -40,25 +113,59 @@ LJLIB_ASM_(bit_ror) LJLIB_REC(bit_shift IR_BROR)
40 113
41LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND) 114LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND)
42{ 115{
116#if LJ_HASFFI
117 CTypeID id = 0;
118 TValue *o = L->base, *top = L->top;
119 int i = 0;
120 do { lj_carith_check64(L, ++i, &id); } while (++o < top);
121 if (id) {
122 CTState *cts = ctype_cts(L);
123 CType *ct = ctype_get(cts, id);
124 int op = curr_func(L)->c.ffid - (int)FF_bit_bor;
125 uint64_t x, y = op >= 0 ? 0 : ~(uint64_t)0;
126 o = L->base;
127 do {
128 lj_cconv_ct_tv(cts, ct, (uint8_t *)&x, o, 0);
129 if (op < 0) y &= x; else if (op == 0) y |= x; else y ^= x;
130 } while (++o < top);
131 return bit_result64(L, id, y);
132 }
133 return FFH_RETRY;
134#else
43 int i = 0; 135 int i = 0;
44 do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top); 136 do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
45 return FFH_RETRY; 137 return FFH_RETRY;
138#endif
46} 139}
47LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR) 140LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR)
48LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR) 141LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR)
49 142
50/* ------------------------------------------------------------------------ */ 143/* ------------------------------------------------------------------------ */
51 144
52LJLIB_CF(bit_tohex) 145LJLIB_CF(bit_tohex) LJLIB_REC(.)
53{ 146{
54 uint32_t b = (uint32_t)lj_lib_checkbit(L, 1); 147#if LJ_HASFFI
55 int32_t i, n = L->base+1 >= L->top ? 8 : lj_lib_checkbit(L, 2); 148 CTypeID id = 0, id2 = 0;
56 const char *hexdigits = "0123456789abcdef"; 149 uint64_t b = lj_carith_check64(L, 1, &id);
57 char buf[8]; 150 int32_t n = L->base+1>=L->top ? (id ? 16 : 8) :
58 if (n < 0) { n = (int32_t)(~(uint32_t)n+1u); hexdigits = "0123456789ABCDEF"; } 151 (int32_t)lj_carith_check64(L, 2, &id2);
59 if ((uint32_t)n > 8) n = 8; 152#else
60 for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; } 153 uint32_t b = (uint32_t)bit_checkbit(L, 1);
61 lua_pushlstring(L, buf, (size_t)n); 154 int32_t n = L->base+1>=L->top ? 8 : bit_checkbit(L, 2);
155#endif
156 SBuf *sb = lj_buf_tmp_(L);
157 SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
158 if (n < 0) { n = (int32_t)(~(uint32_t)n+1u); sf |= STRFMT_F_UPPER; }
159 if ((uint32_t)n > 254) n = 254;
160 sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
161#if LJ_HASFFI
162 if (n < 16) b &= ((uint64_t)1 << 4*n)-1;
163#else
164 if (n < 8) b &= (1u << 4*n)-1;
165#endif
166 sb = lj_strfmt_putfxint(sb, sf, b);
167 setstrV(L, L->top-1, lj_buf_str(L, sb));
168 lj_gc_check(L);
62 return 1; 169 return 1;
63} 170}
64 171
diff --git a/src/lib_buffer.c b/src/lib_buffer.c
new file mode 100644
index 00000000..e4ec9d9d
--- /dev/null
+++ b/src/lib_buffer.c
@@ -0,0 +1,360 @@
1/*
2** Buffer library.
3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lib_buffer_c
7#define LUA_LIB
8
9#include "lua.h"
10#include "lauxlib.h"
11#include "lualib.h"
12
13#include "lj_obj.h"
14
15#if LJ_HASBUFFER
16#include "lj_gc.h"
17#include "lj_err.h"
18#include "lj_buf.h"
19#include "lj_str.h"
20#include "lj_tab.h"
21#include "lj_udata.h"
22#include "lj_meta.h"
23#if LJ_HASFFI
24#include "lj_ctype.h"
25#include "lj_cdata.h"
26#include "lj_cconv.h"
27#endif
28#include "lj_strfmt.h"
29#include "lj_serialize.h"
30#include "lj_lib.h"
31
32/* -- Helper functions ---------------------------------------------------- */
33
34/* Check that the first argument is a string buffer. */
35static SBufExt *buffer_tobuf(lua_State *L)
36{
37 if (!(L->base < L->top && tvisbuf(L->base)))
38 lj_err_argtype(L, 1, "buffer");
39 return bufV(L->base);
40}
41
42/* Ditto, but for writers. */
43static LJ_AINLINE SBufExt *buffer_tobufw(lua_State *L)
44{
45 SBufExt *sbx = buffer_tobuf(L);
46 setsbufXL_(sbx, L);
47 return sbx;
48}
49
50#define buffer_toudata(sbx) ((GCudata *)(sbx)-1)
51
52/* -- Buffer methods ------------------------------------------------------ */
53
54#define LJLIB_MODULE_buffer_method
55
56LJLIB_CF(buffer_method_free)
57{
58 SBufExt *sbx = buffer_tobuf(L);
59 lj_bufx_free(L, sbx);
60 L->top = L->base+1; /* Chain buffer object. */
61 return 1;
62}
63
64LJLIB_CF(buffer_method_reset) LJLIB_REC(.)
65{
66 SBufExt *sbx = buffer_tobuf(L);
67 lj_bufx_reset(sbx);
68 L->top = L->base+1; /* Chain buffer object. */
69 return 1;
70}
71
72LJLIB_CF(buffer_method_skip) LJLIB_REC(.)
73{
74 SBufExt *sbx = buffer_tobuf(L);
75 MSize n = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
76 MSize len = sbufxlen(sbx);
77 if (n < len) {
78 sbx->r += n;
79 } else if (sbufiscow(sbx)) {
80 sbx->r = sbx->w;
81 } else {
82 sbx->r = sbx->w = sbx->b;
83 }
84 L->top = L->base+1; /* Chain buffer object. */
85 return 1;
86}
87
88LJLIB_CF(buffer_method_set) LJLIB_REC(.)
89{
90 SBufExt *sbx = buffer_tobuf(L);
91 GCobj *ref;
92 const char *p;
93 MSize len;
94#if LJ_HASFFI
95 if (tviscdata(L->base+1)) {
96 CTState *cts = ctype_cts(L);
97 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p,
98 L->base+1, CCF_ARG(2));
99 len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF);
100 } else
101#endif
102 {
103 GCstr *str = lj_lib_checkstrx(L, 2);
104 p = strdata(str);
105 len = str->len;
106 }
107 lj_bufx_free(L, sbx);
108 lj_bufx_set_cow(L, sbx, p, len);
109 ref = gcV(L->base+1);
110 setgcref(sbx->cowref, ref);
111 lj_gc_objbarrier(L, buffer_toudata(sbx), ref);
112 L->top = L->base+1; /* Chain buffer object. */
113 return 1;
114}
115
116LJLIB_CF(buffer_method_put) LJLIB_REC(.)
117{
118 SBufExt *sbx = buffer_tobufw(L);
119 ptrdiff_t arg, narg = L->top - L->base;
120 for (arg = 1; arg < narg; arg++) {
121 cTValue *o = &L->base[arg], *mo = NULL;
122 retry:
123 if (tvisstr(o)) {
124 lj_buf_putstr((SBuf *)sbx, strV(o));
125 } else if (tvisint(o)) {
126 lj_strfmt_putint((SBuf *)sbx, intV(o));
127 } else if (tvisnum(o)) {
128 lj_strfmt_putfnum((SBuf *)sbx, STRFMT_G14, numV(o));
129 } else if (tvisbuf(o)) {
130 SBufExt *sbx2 = bufV(o);
131 if (sbx2 == sbx) lj_err_arg(L, (int)(arg+1), LJ_ERR_BUFFER_SELF);
132 lj_buf_putmem((SBuf *)sbx, sbx2->r, sbufxlen(sbx2));
133 } else if (!mo && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
134 /* Call __tostring metamethod inline. */
135 copyTV(L, L->top++, mo);
136 copyTV(L, L->top++, o);
137 lua_call(L, 1, 1);
138 o = &L->base[arg]; /* The stack may have been reallocated. */
139 copyTV(L, &L->base[arg], L->top-1);
140 L->top = L->base + narg;
141 goto retry; /* Retry with the result. */
142 } else {
143 lj_err_argtype(L, (int)(arg+1), "string/number/__tostring");
144 }
145 /* Probably not useful to inline other __tostring MMs, e.g. FFI numbers. */
146 }
147 L->top = L->base+1; /* Chain buffer object. */
148 lj_gc_check(L);
149 return 1;
150}
151
152LJLIB_CF(buffer_method_putf) LJLIB_REC(.)
153{
154 SBufExt *sbx = buffer_tobufw(L);
155 lj_strfmt_putarg(L, (SBuf *)sbx, 2, 2);
156 L->top = L->base+1; /* Chain buffer object. */
157 lj_gc_check(L);
158 return 1;
159}
160
161LJLIB_CF(buffer_method_get) LJLIB_REC(.)
162{
163 SBufExt *sbx = buffer_tobuf(L);
164 ptrdiff_t arg, narg = L->top - L->base;
165 if (narg == 1) {
166 narg++;
167 setnilV(L->top++); /* get() is the same as get(nil). */
168 }
169 for (arg = 1; arg < narg; arg++) {
170 TValue *o = &L->base[arg];
171 MSize n = tvisnil(o) ? LJ_MAX_BUF :
172 (MSize) lj_lib_checkintrange(L, (int)(arg+1), 0, LJ_MAX_BUF);
173 MSize len = sbufxlen(sbx);
174 if (n > len) n = len;
175 setstrV(L, o, lj_str_new(L, sbx->r, n));
176 sbx->r += n;
177 }
178 if (sbx->r == sbx->w && !sbufiscow(sbx)) sbx->r = sbx->w = sbx->b;
179 lj_gc_check(L);
180 return (int)(narg-1);
181}
182
183#if LJ_HASFFI
184LJLIB_CF(buffer_method_putcdata) LJLIB_REC(.)
185{
186 SBufExt *sbx = buffer_tobufw(L);
187 const char *p;
188 MSize len;
189 if (tviscdata(L->base+1)) {
190 CTState *cts = ctype_cts(L);
191 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p,
192 L->base+1, CCF_ARG(2));
193 } else {
194 lj_err_argtype(L, 2, "cdata");
195 }
196 len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF);
197 lj_buf_putmem((SBuf *)sbx, p, len);
198 L->top = L->base+1; /* Chain buffer object. */
199 return 1;
200}
201
202LJLIB_CF(buffer_method_reserve) LJLIB_REC(.)
203{
204 SBufExt *sbx = buffer_tobufw(L);
205 MSize sz = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
206 GCcdata *cd;
207 lj_buf_more((SBuf *)sbx, sz);
208 ctype_loadffi(L);
209 cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR);
210 *(void **)cdataptr(cd) = sbx->w;
211 setcdataV(L, L->top++, cd);
212 setintV(L->top++, sbufleft(sbx));
213 return 2;
214}
215
216LJLIB_CF(buffer_method_commit) LJLIB_REC(.)
217{
218 SBufExt *sbx = buffer_tobuf(L);
219 MSize len = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
220 if (len > sbufleft(sbx)) lj_err_arg(L, 2, LJ_ERR_NUMRNG);
221 sbx->w += len;
222 L->top = L->base+1; /* Chain buffer object. */
223 return 1;
224}
225
226LJLIB_CF(buffer_method_ref) LJLIB_REC(.)
227{
228 SBufExt *sbx = buffer_tobuf(L);
229 GCcdata *cd;
230 ctype_loadffi(L);
231 cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR);
232 *(void **)cdataptr(cd) = sbx->r;
233 setcdataV(L, L->top++, cd);
234 setintV(L->top++, sbufxlen(sbx));
235 return 2;
236}
237#endif
238
239LJLIB_CF(buffer_method_encode) LJLIB_REC(.)
240{
241 SBufExt *sbx = buffer_tobufw(L);
242 cTValue *o = lj_lib_checkany(L, 2);
243 lj_serialize_put(sbx, o);
244 lj_gc_check(L);
245 L->top = L->base+1; /* Chain buffer object. */
246 return 1;
247}
248
249LJLIB_CF(buffer_method_decode) LJLIB_REC(.)
250{
251 SBufExt *sbx = buffer_tobufw(L);
252 setnilV(L->top++);
253 sbx->r = lj_serialize_get(sbx, L->top-1);
254 lj_gc_check(L);
255 return 1;
256}
257
258LJLIB_CF(buffer_method___gc)
259{
260 SBufExt *sbx = buffer_tobuf(L);
261 lj_bufx_free(L, sbx);
262 return 0;
263}
264
265LJLIB_CF(buffer_method___tostring) LJLIB_REC(.)
266{
267 SBufExt *sbx = buffer_tobuf(L);
268 setstrV(L, L->top-1, lj_str_new(L, sbx->r, sbufxlen(sbx)));
269 lj_gc_check(L);
270 return 1;
271}
272
273LJLIB_CF(buffer_method___len) LJLIB_REC(.)
274{
275 SBufExt *sbx = buffer_tobuf(L);
276 setintV(L->top-1, (int32_t)sbufxlen(sbx));
277 return 1;
278}
279
280LJLIB_PUSH("buffer") LJLIB_SET(__metatable)
281LJLIB_PUSH(top-1) LJLIB_SET(__index)
282
283/* -- Buffer library functions -------------------------------------------- */
284
285#define LJLIB_MODULE_buffer
286
287LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */
288
289LJLIB_CF(buffer_new)
290{
291 MSize sz = 0;
292 int targ = 1;
293 GCtab *env, *dict_str = NULL, *dict_mt = NULL;
294 GCudata *ud;
295 SBufExt *sbx;
296 if (L->base < L->top && !tvistab(L->base)) {
297 targ = 2;
298 if (!tvisnil(L->base))
299 sz = (MSize)lj_lib_checkintrange(L, 1, 0, LJ_MAX_BUF);
300 }
301 if (L->base+targ-1 < L->top) {
302 GCtab *options = lj_lib_checktab(L, targ);
303 cTValue *opt_dict, *opt_mt;
304 opt_dict = lj_tab_getstr(options, lj_str_newlit(L, "dict"));
305 if (opt_dict && tvistab(opt_dict)) {
306 dict_str = tabV(opt_dict);
307 lj_serialize_dict_prep_str(L, dict_str);
308 }
309 opt_mt = lj_tab_getstr(options, lj_str_newlit(L, "metatable"));
310 if (opt_mt && tvistab(opt_mt)) {
311 dict_mt = tabV(opt_mt);
312 lj_serialize_dict_prep_mt(L, dict_mt);
313 }
314 }
315 env = tabref(curr_func(L)->c.env);
316 ud = lj_udata_new(L, sizeof(SBufExt), env);
317 ud->udtype = UDTYPE_BUFFER;
318 /* NOBARRIER: The GCudata is new (marked white). */
319 setgcref(ud->metatable, obj2gco(env));
320 setudataV(L, L->top++, ud);
321 sbx = (SBufExt *)uddata(ud);
322 lj_bufx_init(L, sbx);
323 setgcref(sbx->dict_str, obj2gco(dict_str));
324 setgcref(sbx->dict_mt, obj2gco(dict_mt));
325 if (sz > 0) lj_buf_need2((SBuf *)sbx, sz);
326 lj_gc_check(L);
327 return 1;
328}
329
330LJLIB_CF(buffer_encode) LJLIB_REC(.)
331{
332 cTValue *o = lj_lib_checkany(L, 1);
333 setstrV(L, L->top++, lj_serialize_encode(L, o));
334 lj_gc_check(L);
335 return 1;
336}
337
338LJLIB_CF(buffer_decode) LJLIB_REC(.)
339{
340 GCstr *str = lj_lib_checkstrx(L, 1);
341 setnilV(L->top++);
342 lj_serialize_decode(L, L->top-1, str);
343 lj_gc_check(L);
344 return 1;
345}
346
347/* ------------------------------------------------------------------------ */
348
349#include "lj_libdef.h"
350
351int luaopen_string_buffer(lua_State *L)
352{
353 LJ_LIB_REG(L, NULL, buffer_method);
354 lua_getfield(L, -1, "__tostring");
355 lua_setfield(L, -2, "tostring");
356 LJ_LIB_REG(L, NULL, buffer);
357 return 1;
358}
359
360#endif
diff --git a/src/lib_debug.c b/src/lib_debug.c
index c5f3040f..2ad51a74 100644
--- a/src/lib_debug.c
+++ b/src/lib_debug.c
@@ -29,7 +29,7 @@ LJLIB_CF(debug_getregistry)
29 return 1; 29 return 1;
30} 30}
31 31
32LJLIB_CF(debug_getmetatable) 32LJLIB_CF(debug_getmetatable) LJLIB_REC(.)
33{ 33{
34 lj_lib_checkany(L, 1); 34 lj_lib_checkany(L, 1);
35 if (!lua_getmetatable(L, 1)) { 35 if (!lua_getmetatable(L, 1)) {
@@ -231,8 +231,8 @@ LJLIB_CF(debug_upvalueid)
231 int32_t n = lj_lib_checkint(L, 2) - 1; 231 int32_t n = lj_lib_checkint(L, 2) - 1;
232 if ((uint32_t)n >= fn->l.nupvalues) 232 if ((uint32_t)n >= fn->l.nupvalues)
233 lj_err_arg(L, 2, LJ_ERR_IDXRNG); 233 lj_err_arg(L, 2, LJ_ERR_IDXRNG);
234 setlightudV(L->top-1, isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) : 234 lua_pushlightuserdata(L, isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) :
235 (void *)&fn->c.upvalue[n]); 235 (void *)&fn->c.upvalue[n]);
236 return 1; 236 return 1;
237} 237}
238 238
@@ -283,13 +283,13 @@ LJLIB_CF(debug_setuservalue)
283 283
284/* ------------------------------------------------------------------------ */ 284/* ------------------------------------------------------------------------ */
285 285
286static const char KEY_HOOK = 'h'; 286#define KEY_HOOK (U64x(80000000,00000000)|'h')
287 287
288static void hookf(lua_State *L, lua_Debug *ar) 288static void hookf(lua_State *L, lua_Debug *ar)
289{ 289{
290 static const char *const hooknames[] = 290 static const char *const hooknames[] =
291 {"call", "return", "line", "count", "tail return"}; 291 {"call", "return", "line", "count", "tail return"};
292 lua_pushlightuserdata(L, (void *)&KEY_HOOK); 292 (L->top++)->u64 = KEY_HOOK;
293 lua_rawget(L, LUA_REGISTRYINDEX); 293 lua_rawget(L, LUA_REGISTRYINDEX);
294 if (lua_isfunction(L, -1)) { 294 if (lua_isfunction(L, -1)) {
295 lua_pushstring(L, hooknames[(int)ar->event]); 295 lua_pushstring(L, hooknames[(int)ar->event]);
@@ -334,7 +334,7 @@ LJLIB_CF(debug_sethook)
334 count = luaL_optint(L, arg+3, 0); 334 count = luaL_optint(L, arg+3, 0);
335 func = hookf; mask = makemask(smask, count); 335 func = hookf; mask = makemask(smask, count);
336 } 336 }
337 lua_pushlightuserdata(L, (void *)&KEY_HOOK); 337 (L->top++)->u64 = KEY_HOOK;
338 lua_pushvalue(L, arg+1); 338 lua_pushvalue(L, arg+1);
339 lua_rawset(L, LUA_REGISTRYINDEX); 339 lua_rawset(L, LUA_REGISTRYINDEX);
340 lua_sethook(L, func, mask, count); 340 lua_sethook(L, func, mask, count);
@@ -349,7 +349,7 @@ LJLIB_CF(debug_gethook)
349 if (hook != NULL && hook != hookf) { /* external hook? */ 349 if (hook != NULL && hook != hookf) { /* external hook? */
350 lua_pushliteral(L, "external hook"); 350 lua_pushliteral(L, "external hook");
351 } else { 351 } else {
352 lua_pushlightuserdata(L, (void *)&KEY_HOOK); 352 (L->top++)->u64 = KEY_HOOK;
353 lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */ 353 lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */
354 } 354 }
355 lua_pushstring(L, unmakemask(mask, buff)); 355 lua_pushstring(L, unmakemask(mask, buff));
diff --git a/src/lib_ffi.c b/src/lib_ffi.c
index 1422dea6..ddeb10c5 100644
--- a/src/lib_ffi.c
+++ b/src/lib_ffi.c
@@ -29,6 +29,7 @@
29#include "lj_ccall.h" 29#include "lj_ccall.h"
30#include "lj_ccallback.h" 30#include "lj_ccallback.h"
31#include "lj_clib.h" 31#include "lj_clib.h"
32#include "lj_strfmt.h"
32#include "lj_ff.h" 33#include "lj_ff.h"
33#include "lj_lib.h" 34#include "lj_lib.h"
34 35
@@ -137,7 +138,7 @@ static int ffi_index_meta(lua_State *L, CTState *cts, CType *ct, MMS mm)
137 } 138 }
138 } 139 }
139 copyTV(L, base, L->top); 140 copyTV(L, base, L->top);
140 tv = L->top-1; 141 tv = L->top-1-LJ_FR2;
141 } 142 }
142 return lj_meta_tailcall(L, tv); 143 return lj_meta_tailcall(L, tv);
143} 144}
@@ -318,7 +319,7 @@ LJLIB_CF(ffi_meta___tostring)
318 } 319 }
319 } 320 }
320 } 321 }
321 lj_str_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p); 322 lj_strfmt_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p);
322checkgc: 323checkgc:
323 lj_gc_check(L); 324 lj_gc_check(L);
324 return 1; 325 return 1;
@@ -504,10 +505,7 @@ LJLIB_CF(ffi_new) LJLIB_REC(.)
504 } 505 }
505 if (sz == CTSIZE_INVALID) 506 if (sz == CTSIZE_INVALID)
506 lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE); 507 lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE);
507 if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN) 508 cd = lj_cdata_newx(cts, id, sz, info);
508 cd = lj_cdata_new(cts, id, sz);
509 else
510 cd = lj_cdata_newv(cts, id, sz, ctype_align(info));
511 setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */ 509 setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */
512 lj_cconv_ct_init(cts, ct, sz, cdataptr(cd), 510 lj_cconv_ct_init(cts, ct, sz, cdataptr(cd),
513 o, (MSize)(L->top - o)); /* Initialize cdata. */ 511 o, (MSize)(L->top - o)); /* Initialize cdata. */
@@ -515,7 +513,7 @@ LJLIB_CF(ffi_new) LJLIB_REC(.)
515 /* Handle ctype __gc metamethod. Use the fast lookup here. */ 513 /* Handle ctype __gc metamethod. Use the fast lookup here. */
516 cTValue *tv = lj_tab_getinth(cts->miscmap, -(int32_t)id); 514 cTValue *tv = lj_tab_getinth(cts->miscmap, -(int32_t)id);
517 if (tv && tvistab(tv) && (tv = lj_meta_fast(L, tabV(tv), MM_gc))) { 515 if (tv && tvistab(tv) && (tv = lj_meta_fast(L, tabV(tv), MM_gc))) {
518 GCtab *t = cts->finalizer; 516 GCtab *t = tabref(G(L)->gcroot[GCROOT_FFI_FIN]);
519 if (gcref(t->metatable)) { 517 if (gcref(t->metatable)) {
520 /* Add to finalizer table, if still enabled. */ 518 /* Add to finalizer table, if still enabled. */
521 copyTV(L, lj_tab_set(L, t, o-1), tv); 519 copyTV(L, lj_tab_set(L, t, o-1), tv);
@@ -558,6 +556,32 @@ LJLIB_CF(ffi_typeof) LJLIB_REC(.)
558 return 1; 556 return 1;
559} 557}
560 558
559/* Internal and unsupported API. */
560LJLIB_CF(ffi_typeinfo)
561{
562 CTState *cts = ctype_cts(L);
563 CTypeID id = (CTypeID)ffi_checkint(L, 1);
564 if (id > 0 && id < cts->top) {
565 CType *ct = ctype_get(cts, id);
566 GCtab *t;
567 lua_createtable(L, 0, 4); /* Increment hash size if fields are added. */
568 t = tabV(L->top-1);
569 setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "info")), (int32_t)ct->info);
570 if (ct->size != CTSIZE_INVALID)
571 setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "size")), (int32_t)ct->size);
572 if (ct->sib)
573 setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "sib")), (int32_t)ct->sib);
574 if (gcref(ct->name)) {
575 GCstr *s = gco2str(gcref(ct->name));
576 if (isdead(G(L), obj2gco(s))) flipwhite(obj2gco(s));
577 setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "name")), s);
578 }
579 lj_gc_check(L);
580 return 1;
581 }
582 return 0;
583}
584
561LJLIB_CF(ffi_istype) LJLIB_REC(.) 585LJLIB_CF(ffi_istype) LJLIB_REC(.)
562{ 586{
563 CTState *cts = ctype_cts(L); 587 CTState *cts = ctype_cts(L);
@@ -697,45 +721,51 @@ LJLIB_CF(ffi_fill) LJLIB_REC(.)
697 return 0; 721 return 0;
698} 722}
699 723
700#define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be)
701
702/* Test ABI string. */ 724/* Test ABI string. */
703LJLIB_CF(ffi_abi) LJLIB_REC(.) 725LJLIB_CF(ffi_abi) LJLIB_REC(.)
704{ 726{
705 GCstr *s = lj_lib_checkstr(L, 1); 727 GCstr *s = lj_lib_checkstr(L, 1);
706 int b = 0; 728 int b = lj_cparse_case(s,
707 switch (s->hash) {
708#if LJ_64 729#if LJ_64
709 case H_(849858eb,ad35fd06): b = 1; break; /* 64bit */ 730 "\00564bit"
710#else 731#else
711 case H_(662d3c79,d0e22477): b = 1; break; /* 32bit */ 732 "\00532bit"
712#endif 733#endif
713#if LJ_ARCH_HASFPU 734#if LJ_ARCH_HASFPU
714 case H_(e33ee463,e33ee463): b = 1; break; /* fpu */ 735 "\003fpu"
715#endif 736#endif
716#if LJ_ABI_SOFTFP 737#if LJ_ABI_SOFTFP
717 case H_(61211a23,c2e8c81c): b = 1; break; /* softfp */ 738 "\006softfp"
718#else 739#else
719 case H_(539417a8,8ce0812f): b = 1; break; /* hardfp */ 740 "\006hardfp"
720#endif 741#endif
721#if LJ_ABI_EABI 742#if LJ_ABI_EABI
722 case H_(2182df8f,f2ed1152): b = 1; break; /* eabi */ 743 "\004eabi"
723#endif 744#endif
724#if LJ_ABI_WIN 745#if LJ_ABI_WIN
725 case H_(4ab624a8,4ab624a8): b = 1; break; /* win */ 746 "\003win"
726#endif 747#endif
727 case H_(3af93066,1f001464): b = 1; break; /* le/be */ 748#if LJ_ABI_PAUTH
728 default: 749 "\005pauth"
729 break; 750#endif
730 } 751#if LJ_TARGET_UWP
752 "\003uwp"
753#endif
754#if LJ_LE
755 "\002le"
756#else
757 "\002be"
758#endif
759#if LJ_GC64
760 "\004gc64"
761#endif
762 ) >= 0;
731 setboolV(L->top-1, b); 763 setboolV(L->top-1, b);
732 setboolV(&G(L)->tmptv2, b); /* Remember for trace recorder. */ 764 setboolV(&G(L)->tmptv2, b); /* Remember for trace recorder. */
733 return 1; 765 return 1;
734} 766}
735 767
736#undef H_ 768LJLIB_PUSH(top-7) LJLIB_SET(!) /* Store reference to miscmap table. */
737
738LJLIB_PUSH(top-8) LJLIB_SET(!) /* Store reference to miscmap table. */
739 769
740LJLIB_CF(ffi_metatype) 770LJLIB_CF(ffi_metatype)
741{ 771{
@@ -761,26 +791,16 @@ LJLIB_CF(ffi_metatype)
761 return 1; 791 return 1;
762} 792}
763 793
764LJLIB_PUSH(top-7) LJLIB_SET(!) /* Store reference to finalizer table. */
765
766LJLIB_CF(ffi_gc) LJLIB_REC(.) 794LJLIB_CF(ffi_gc) LJLIB_REC(.)
767{ 795{
768 GCcdata *cd = ffi_checkcdata(L, 1); 796 GCcdata *cd = ffi_checkcdata(L, 1);
769 TValue *fin = lj_lib_checkany(L, 2); 797 TValue *fin = lj_lib_checkany(L, 2);
770 CTState *cts = ctype_cts(L); 798 CTState *cts = ctype_cts(L);
771 GCtab *t = cts->finalizer;
772 CType *ct = ctype_raw(cts, cd->ctypeid); 799 CType *ct = ctype_raw(cts, cd->ctypeid);
773 if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) || 800 if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) ||
774 ctype_isrefarray(ct->info))) 801 ctype_isrefarray(ct->info)))
775 lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE); 802 lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
776 if (gcref(t->metatable)) { /* Update finalizer table, if still enabled. */ 803 lj_cdata_setfin(L, cd, gcval(fin), itype(fin));
777 copyTV(L, lj_tab_set(L, t, L->base), fin);
778 lj_gc_anybarriert(L, t);
779 if (!tvisnil(fin))
780 cd->marked |= LJ_GC_CDATA_FIN;
781 else
782 cd->marked &= ~LJ_GC_CDATA_FIN;
783 }
784 L->top = L->base+1; /* Pass through the cdata object. */ 804 L->top = L->base+1; /* Pass through the cdata object. */
785 return 1; 805 return 1;
786} 806}
@@ -803,19 +823,6 @@ LJLIB_PUSH(top-2) LJLIB_SET(arch)
803 823
804/* ------------------------------------------------------------------------ */ 824/* ------------------------------------------------------------------------ */
805 825
806/* Create special weak-keyed finalizer table. */
807static GCtab *ffi_finalizer(lua_State *L)
808{
809 /* NOBARRIER: The table is new (marked white). */
810 GCtab *t = lj_tab_new(L, 0, 1);
811 settabV(L, L->top++, t);
812 setgcref(t->metatable, obj2gco(t));
813 setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "__mode")),
814 lj_str_newlit(L, "k"));
815 t->nomm = (uint8_t)(~(1u<<MM_mode));
816 return t;
817}
818
819/* Register FFI module as loaded. */ 826/* Register FFI module as loaded. */
820static void ffi_register_module(lua_State *L) 827static void ffi_register_module(lua_State *L)
821{ 828{
@@ -831,7 +838,6 @@ LUALIB_API int luaopen_ffi(lua_State *L)
831{ 838{
832 CTState *cts = lj_ctype_init(L); 839 CTState *cts = lj_ctype_init(L);
833 settabV(L, L->top++, (cts->miscmap = lj_tab_new(L, 0, 1))); 840 settabV(L, L->top++, (cts->miscmap = lj_tab_new(L, 0, 1)));
834 cts->finalizer = ffi_finalizer(L);
835 LJ_LIB_REG(L, NULL, ffi_meta); 841 LJ_LIB_REG(L, NULL, ffi_meta);
836 /* NOBARRIER: basemt is a GC root. */ 842 /* NOBARRIER: basemt is a GC root. */
837 setgcref(basemt_it(G(L), LJ_TCDATA), obj2gco(tabV(L->top-1))); 843 setgcref(basemt_it(G(L), LJ_TCDATA), obj2gco(tabV(L->top-1)));
diff --git a/src/lib_io.c b/src/lib_io.c
index f7db083e..a13d4315 100644
--- a/src/lib_io.c
+++ b/src/lib_io.c
@@ -19,8 +19,10 @@
19#include "lj_obj.h" 19#include "lj_obj.h"
20#include "lj_gc.h" 20#include "lj_gc.h"
21#include "lj_err.h" 21#include "lj_err.h"
22#include "lj_buf.h"
22#include "lj_str.h" 23#include "lj_str.h"
23#include "lj_state.h" 24#include "lj_state.h"
25#include "lj_strfmt.h"
24#include "lj_ff.h" 26#include "lj_ff.h"
25#include "lj_lib.h" 27#include "lj_lib.h"
26 28
@@ -84,7 +86,7 @@ static IOFileUD *io_file_open(lua_State *L, const char *mode)
84 IOFileUD *iof = io_file_new(L); 86 IOFileUD *iof = io_file_new(L);
85 iof->fp = fopen(fname, mode); 87 iof->fp = fopen(fname, mode);
86 if (iof->fp == NULL) 88 if (iof->fp == NULL)
87 luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno))); 89 luaL_argerror(L, 1, lj_strfmt_pushf(L, "%s: %s", fname, strerror(errno)));
88 return iof; 90 return iof;
89} 91}
90 92
@@ -97,11 +99,8 @@ static int io_file_close(lua_State *L, IOFileUD *iof)
97 int stat = -1; 99 int stat = -1;
98#if LJ_TARGET_POSIX 100#if LJ_TARGET_POSIX
99 stat = pclose(iof->fp); 101 stat = pclose(iof->fp);
100#elif LJ_TARGET_WINDOWS 102#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP
101 stat = _pclose(iof->fp); 103 stat = _pclose(iof->fp);
102#else
103 lua_assert(0);
104 return 0;
105#endif 104#endif
106#if LJ_52 105#if LJ_52
107 iof->fp = NULL; 106 iof->fp = NULL;
@@ -110,7 +109,8 @@ static int io_file_close(lua_State *L, IOFileUD *iof)
110 ok = (stat != -1); 109 ok = (stat != -1);
111#endif 110#endif
112 } else { 111 } else {
113 lua_assert((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF); 112 lj_assertL((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF,
113 "close of unknown FILE* type");
114 setnilV(L->top++); 114 setnilV(L->top++);
115 lua_pushliteral(L, "cannot close standard file"); 115 lua_pushliteral(L, "cannot close standard file");
116 return 2; 116 return 2;
@@ -145,7 +145,7 @@ static int io_file_readline(lua_State *L, FILE *fp, MSize chop)
145 MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0; 145 MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0;
146 char *buf; 146 char *buf;
147 for (;;) { 147 for (;;) {
148 buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 148 buf = lj_buf_tmp(L, m);
149 if (fgets(buf+n, m-n, fp) == NULL) break; 149 if (fgets(buf+n, m-n, fp) == NULL) break;
150 n += (MSize)strlen(buf+n); 150 n += (MSize)strlen(buf+n);
151 ok |= n; 151 ok |= n;
@@ -161,7 +161,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
161{ 161{
162 MSize m, n; 162 MSize m, n;
163 for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) { 163 for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) {
164 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 164 char *buf = lj_buf_tmp(L, m);
165 n += (MSize)fread(buf+n, 1, m-n, fp); 165 n += (MSize)fread(buf+n, 1, m-n, fp);
166 if (n != m) { 166 if (n != m) {
167 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); 167 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
@@ -174,7 +174,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
174static int io_file_readlen(lua_State *L, FILE *fp, MSize m) 174static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
175{ 175{
176 if (m) { 176 if (m) {
177 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 177 char *buf = lj_buf_tmp(L, m);
178 MSize n = (MSize)fread(buf, 1, m, fp); 178 MSize n = (MSize)fread(buf, 1, m, fp);
179 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); 179 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
180 lj_gc_check(L); 180 lj_gc_check(L);
@@ -202,13 +202,12 @@ static int io_file_read(lua_State *L, IOFileUD *iof, int start)
202 for (n = start; nargs-- && ok; n++) { 202 for (n = start; nargs-- && ok; n++) {
203 if (tvisstr(L->base+n)) { 203 if (tvisstr(L->base+n)) {
204 const char *p = strVdata(L->base+n); 204 const char *p = strVdata(L->base+n);
205 if (p[0] != '*') 205 if (p[0] == '*') p++;
206 lj_err_arg(L, n+1, LJ_ERR_INVOPT); 206 if (p[0] == 'n')
207 if (p[1] == 'n')
208 ok = io_file_readnum(L, fp); 207 ok = io_file_readnum(L, fp);
209 else if ((p[1] & ~0x20) == 'L') 208 else if ((p[0] & ~0x20) == 'L')
210 ok = io_file_readline(L, fp, (p[1] == 'l')); 209 ok = io_file_readline(L, fp, (p[0] == 'l'));
211 else if (p[1] == 'a') 210 else if (p[0] == 'a')
212 io_file_readall(L, fp); 211 io_file_readall(L, fp);
213 else 212 else
214 lj_err_arg(L, n+1, LJ_ERR_INVFMT); 213 lj_err_arg(L, n+1, LJ_ERR_INVFMT);
@@ -232,19 +231,11 @@ static int io_file_write(lua_State *L, IOFileUD *iof, int start)
232 cTValue *tv; 231 cTValue *tv;
233 int status = 1; 232 int status = 1;
234 for (tv = L->base+start; tv < L->top; tv++) { 233 for (tv = L->base+start; tv < L->top; tv++) {
235 if (tvisstr(tv)) { 234 MSize len;
236 MSize len = strV(tv)->len; 235 const char *p = lj_strfmt_wstrnum(L, tv, &len);
237 status = status && (fwrite(strVdata(tv), 1, len, fp) == len); 236 if (!p)
238 } else if (tvisint(tv)) {
239 char buf[LJ_STR_INTBUF];
240 char *p = lj_str_bufint(buf, intV(tv));
241 size_t len = (size_t)(buf+LJ_STR_INTBUF-p);
242 status = status && (fwrite(p, 1, len, fp) == len);
243 } else if (tvisnum(tv)) {
244 status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0);
245 } else {
246 lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING); 237 lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING);
247 } 238 status = status && (fwrite(p, 1, len, fp) == len);
248 } 239 }
249 if (LJ_52 && status) { 240 if (LJ_52 && status) {
250 L->top = L->base+1; 241 L->top = L->base+1;
@@ -319,6 +310,14 @@ LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0)
319 return luaL_fileresult(L, fflush(io_tofile(L)->fp) == 0, NULL); 310 return luaL_fileresult(L, fflush(io_tofile(L)->fp) == 0, NULL);
320} 311}
321 312
313#if LJ_32 && defined(__ANDROID__) && __ANDROID_API__ < 24
314/* The Android NDK is such an unmatched marvel of engineering. */
315extern int fseeko32(FILE *, long int, int) __asm__("fseeko");
316extern long int ftello32(FILE *) __asm__("ftello");
317#define fseeko(fp, pos, whence) (fseeko32((fp), (pos), (whence)))
318#define ftello(fp) (ftello32((fp)))
319#endif
320
322LJLIB_CF(io_method_seek) 321LJLIB_CF(io_method_seek)
323{ 322{
324 FILE *fp = io_tofile(L)->fp; 323 FILE *fp = io_tofile(L)->fp;
@@ -419,7 +418,7 @@ LJLIB_CF(io_open)
419 418
420LJLIB_CF(io_popen) 419LJLIB_CF(io_popen)
421{ 420{
422#if LJ_TARGET_POSIX || LJ_TARGET_WINDOWS 421#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP)
423 const char *fname = strdata(lj_lib_checkstr(L, 1)); 422 const char *fname = strdata(lj_lib_checkstr(L, 1));
424 GCstr *s = lj_lib_optstr(L, 2); 423 GCstr *s = lj_lib_optstr(L, 2);
425 const char *mode = s ? strdata(s) : "r"; 424 const char *mode = s ? strdata(s) : "r";
@@ -440,7 +439,7 @@ LJLIB_CF(io_popen)
440LJLIB_CF(io_tmpfile) 439LJLIB_CF(io_tmpfile)
441{ 440{
442 IOFileUD *iof = io_file_new(L); 441 IOFileUD *iof = io_file_new(L);
443#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA 442#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA || LJ_TARGET_NX
444 iof->fp = NULL; errno = ENOSYS; 443 iof->fp = NULL; errno = ENOSYS;
445#else 444#else
446 iof->fp = tmpfile(); 445 iof->fp = tmpfile();
diff --git a/src/lib_jit.c b/src/lib_jit.c
index 04a564c7..b83c865a 100644
--- a/src/lib_jit.c
+++ b/src/lib_jit.c
@@ -10,13 +10,17 @@
10#include "lauxlib.h" 10#include "lauxlib.h"
11#include "lualib.h" 11#include "lualib.h"
12 12
13#include "lj_arch.h"
14#include "lj_obj.h" 13#include "lj_obj.h"
14#include "lj_gc.h"
15#include "lj_err.h" 15#include "lj_err.h"
16#include "lj_debug.h" 16#include "lj_debug.h"
17#include "lj_str.h" 17#include "lj_str.h"
18#include "lj_tab.h" 18#include "lj_tab.h"
19#include "lj_state.h"
19#include "lj_bc.h" 20#include "lj_bc.h"
21#if LJ_HASFFI
22#include "lj_ctype.h"
23#endif
20#if LJ_HASJIT 24#if LJ_HASJIT
21#include "lj_ir.h" 25#include "lj_ir.h"
22#include "lj_jit.h" 26#include "lj_jit.h"
@@ -24,6 +28,7 @@
24#include "lj_iropt.h" 28#include "lj_iropt.h"
25#include "lj_target.h" 29#include "lj_target.h"
26#endif 30#endif
31#include "lj_trace.h"
27#include "lj_dispatch.h" 32#include "lj_dispatch.h"
28#include "lj_vm.h" 33#include "lj_vm.h"
29#include "lj_vmevent.h" 34#include "lj_vmevent.h"
@@ -99,8 +104,8 @@ LJLIB_CF(jit_status)
99 jit_State *J = L2J(L); 104 jit_State *J = L2J(L);
100 L->top = L->base; 105 L->top = L->base;
101 setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0); 106 setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0);
102 flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING); 107 flagbits_to_strings(L, J->flags, JIT_F_CPU, JIT_F_CPUSTRING);
103 flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING); 108 flagbits_to_strings(L, J->flags, JIT_F_OPT, JIT_F_OPTSTRING);
104 return (int)(L->top - L->base); 109 return (int)(L->top - L->base);
105#else 110#else
106 setboolV(L->top++, 0); 111 setboolV(L->top++, 0);
@@ -108,6 +113,13 @@ LJLIB_CF(jit_status)
108#endif 113#endif
109} 114}
110 115
116LJLIB_CF(jit_security)
117{
118 int idx = lj_lib_checkopt(L, 1, -1, LJ_SECURITY_MODESTRING);
119 setintV(L->top++, ((LJ_SECURITY_MODE >> (2*idx)) & 3));
120 return 1;
121}
122
111LJLIB_CF(jit_attach) 123LJLIB_CF(jit_attach)
112{ 124{
113#ifdef LUAJIT_DISABLE_VMEVENT 125#ifdef LUAJIT_DISABLE_VMEVENT
@@ -149,24 +161,6 @@ LJLIB_PUSH(top-2) LJLIB_SET(version)
149 161
150/* -- Reflection API for Lua functions ------------------------------------ */ 162/* -- Reflection API for Lua functions ------------------------------------ */
151 163
152/* Return prototype of first argument (Lua function or prototype object) */
153static GCproto *check_Lproto(lua_State *L, int nolua)
154{
155 TValue *o = L->base;
156 if (L->top > o) {
157 if (tvisproto(o)) {
158 return protoV(o);
159 } else if (tvisfunc(o)) {
160 if (isluafunc(funcV(o)))
161 return funcproto(funcV(o));
162 else if (nolua)
163 return NULL;
164 }
165 }
166 lj_err_argt(L, 1, LUA_TFUNCTION);
167 return NULL; /* unreachable */
168}
169
170static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val) 164static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val)
171{ 165{
172 setintV(lj_tab_setstr(L, t, lj_str_newz(L, name)), val); 166 setintV(lj_tab_setstr(L, t, lj_str_newz(L, name)), val);
@@ -175,7 +169,7 @@ static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val)
175/* local info = jit.util.funcinfo(func [,pc]) */ 169/* local info = jit.util.funcinfo(func [,pc]) */
176LJLIB_CF(jit_util_funcinfo) 170LJLIB_CF(jit_util_funcinfo)
177{ 171{
178 GCproto *pt = check_Lproto(L, 1); 172 GCproto *pt = lj_lib_checkLproto(L, 1, 1);
179 if (pt) { 173 if (pt) {
180 BCPos pc = (BCPos)lj_lib_optint(L, 2, 0); 174 BCPos pc = (BCPos)lj_lib_optint(L, 2, 0);
181 GCtab *t; 175 GCtab *t;
@@ -217,12 +211,12 @@ LJLIB_CF(jit_util_funcinfo)
217/* local ins, m = jit.util.funcbc(func, pc) */ 211/* local ins, m = jit.util.funcbc(func, pc) */
218LJLIB_CF(jit_util_funcbc) 212LJLIB_CF(jit_util_funcbc)
219{ 213{
220 GCproto *pt = check_Lproto(L, 0); 214 GCproto *pt = lj_lib_checkLproto(L, 1, 0);
221 BCPos pc = (BCPos)lj_lib_checkint(L, 2); 215 BCPos pc = (BCPos)lj_lib_checkint(L, 2);
222 if (pc < pt->sizebc) { 216 if (pc < pt->sizebc) {
223 BCIns ins = proto_bc(pt)[pc]; 217 BCIns ins = proto_bc(pt)[pc];
224 BCOp op = bc_op(ins); 218 BCOp op = bc_op(ins);
225 lua_assert(op < BC__MAX); 219 lj_assertL(op < BC__MAX, "bad bytecode op %d", op);
226 setintV(L->top, ins); 220 setintV(L->top, ins);
227 setintV(L->top+1, lj_bc_mode[op]); 221 setintV(L->top+1, lj_bc_mode[op]);
228 L->top += 2; 222 L->top += 2;
@@ -234,7 +228,7 @@ LJLIB_CF(jit_util_funcbc)
234/* local k = jit.util.funck(func, idx) */ 228/* local k = jit.util.funck(func, idx) */
235LJLIB_CF(jit_util_funck) 229LJLIB_CF(jit_util_funck)
236{ 230{
237 GCproto *pt = check_Lproto(L, 0); 231 GCproto *pt = lj_lib_checkLproto(L, 1, 0);
238 ptrdiff_t idx = (ptrdiff_t)lj_lib_checkint(L, 2); 232 ptrdiff_t idx = (ptrdiff_t)lj_lib_checkint(L, 2);
239 if (idx >= 0) { 233 if (idx >= 0) {
240 if (idx < (ptrdiff_t)pt->sizekn) { 234 if (idx < (ptrdiff_t)pt->sizekn) {
@@ -254,7 +248,7 @@ LJLIB_CF(jit_util_funck)
254/* local name = jit.util.funcuvname(func, idx) */ 248/* local name = jit.util.funcuvname(func, idx) */
255LJLIB_CF(jit_util_funcuvname) 249LJLIB_CF(jit_util_funcuvname)
256{ 250{
257 GCproto *pt = check_Lproto(L, 0); 251 GCproto *pt = lj_lib_checkLproto(L, 1, 0);
258 uint32_t idx = (uint32_t)lj_lib_checkint(L, 2); 252 uint32_t idx = (uint32_t)lj_lib_checkint(L, 2);
259 if (idx < pt->sizeuv) { 253 if (idx < pt->sizeuv) {
260 setstrV(L, L->top-1, lj_str_newz(L, lj_debug_uvname(pt, idx))); 254 setstrV(L, L->top-1, lj_str_newz(L, lj_debug_uvname(pt, idx)));
@@ -280,7 +274,7 @@ static GCtrace *jit_checktrace(lua_State *L)
280/* Names of link types. ORDER LJ_TRLINK */ 274/* Names of link types. ORDER LJ_TRLINK */
281static const char *const jit_trlinkname[] = { 275static const char *const jit_trlinkname[] = {
282 "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion", 276 "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion",
283 "interpreter", "return" 277 "interpreter", "return", "stitch"
284}; 278};
285 279
286/* local info = jit.util.traceinfo(tr) */ 280/* local info = jit.util.traceinfo(tr) */
@@ -333,6 +327,9 @@ LJLIB_CF(jit_util_tracek)
333 slot = ir->op2; 327 slot = ir->op2;
334 ir = &T->ir[ir->op1]; 328 ir = &T->ir[ir->op1];
335 } 329 }
330#if LJ_HASFFI
331 if (ir->o == IR_KINT64) ctype_loadffi(L);
332#endif
336 lj_ir_kvalue(L, L->top-2, ir); 333 lj_ir_kvalue(L, L->top-2, ir);
337 setintV(L->top-1, (int32_t)irt_type(ir->t)); 334 setintV(L->top-1, (int32_t)irt_type(ir->t));
338 if (slot == -1) 335 if (slot == -1)
@@ -407,7 +404,8 @@ LJLIB_CF(jit_util_ircalladdr)
407{ 404{
408 uint32_t idx = (uint32_t)lj_lib_checkint(L, 1); 405 uint32_t idx = (uint32_t)lj_lib_checkint(L, 1);
409 if (idx < IRCALL__MAX) { 406 if (idx < IRCALL__MAX) {
410 setintptrV(L->top-1, (intptr_t)(void *)lj_ir_callinfo[idx].func); 407 ASMFunction func = lj_ir_callinfo[idx].func;
408 setintptrV(L->top-1, (intptr_t)(void *)lj_ptr_strip(func));
411 return 1; 409 return 1;
412 } 410 }
413 return 0; 411 return 0;
@@ -417,6 +415,12 @@ LJLIB_CF(jit_util_ircalladdr)
417 415
418#include "lj_libdef.h" 416#include "lj_libdef.h"
419 417
418static int luaopen_jit_util(lua_State *L)
419{
420 LJ_LIB_REG(L, NULL, jit_util);
421 return 1;
422}
423
420/* -- jit.opt module ------------------------------------------------------ */ 424/* -- jit.opt module ------------------------------------------------------ */
421 425
422#if LJ_HASJIT 426#if LJ_HASJIT
@@ -453,7 +457,7 @@ static int jitopt_flag(jit_State *J, const char *str)
453 str += str[2] == '-' ? 3 : 2; 457 str += str[2] == '-' ? 3 : 2;
454 set = 0; 458 set = 0;
455 } 459 }
456 for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) { 460 for (opt = JIT_F_OPT; ; opt <<= 1) {
457 size_t len = *(const uint8_t *)lst; 461 size_t len = *(const uint8_t *)lst;
458 if (len == 0) 462 if (len == 0)
459 break; 463 break;
@@ -473,7 +477,7 @@ static int jitopt_param(jit_State *J, const char *str)
473 int i; 477 int i;
474 for (i = 0; i < JIT_P__MAX; i++) { 478 for (i = 0; i < JIT_P__MAX; i++) {
475 size_t len = *(const uint8_t *)lst; 479 size_t len = *(const uint8_t *)lst;
476 lua_assert(len != 0); 480 lj_assertJ(len != 0, "bad JIT_P_STRING");
477 if (strncmp(str, lst+1, len) == 0 && str[len] == '=') { 481 if (strncmp(str, lst+1, len) == 0 && str[len] == '=') {
478 int32_t n = 0; 482 int32_t n = 0;
479 const char *p = &str[len+1]; 483 const char *p = &str[len+1];
@@ -514,6 +518,104 @@ LJLIB_CF(jit_opt_start)
514 518
515#endif 519#endif
516 520
521/* -- jit.profile module -------------------------------------------------- */
522
523#if LJ_HASPROFILE
524
525#define LJLIB_MODULE_jit_profile
526
527/* Not loaded by default, use: local profile = require("jit.profile") */
528
529#define KEY_PROFILE_THREAD (U64x(80000000,00000000)|'t')
530#define KEY_PROFILE_FUNC (U64x(80000000,00000000)|'f')
531
532static void jit_profile_callback(lua_State *L2, lua_State *L, int samples,
533 int vmstate)
534{
535 TValue key;
536 cTValue *tv;
537 key.u64 = KEY_PROFILE_FUNC;
538 tv = lj_tab_get(L, tabV(registry(L)), &key);
539 if (tvisfunc(tv)) {
540 char vmst = (char)vmstate;
541 int status;
542 setfuncV(L2, L2->top++, funcV(tv));
543 setthreadV(L2, L2->top++, L);
544 setintV(L2->top++, samples);
545 setstrV(L2, L2->top++, lj_str_new(L2, &vmst, 1));
546 status = lua_pcall(L2, 3, 0, 0); /* callback(thread, samples, vmstate) */
547 if (status) {
548 if (G(L2)->panic) G(L2)->panic(L2);
549 exit(EXIT_FAILURE);
550 }
551 lj_trace_abort(G(L2));
552 }
553}
554
555/* profile.start(mode, cb) */
556LJLIB_CF(jit_profile_start)
557{
558 GCtab *registry = tabV(registry(L));
559 GCstr *mode = lj_lib_optstr(L, 1);
560 GCfunc *func = lj_lib_checkfunc(L, 2);
561 lua_State *L2 = lua_newthread(L); /* Thread that runs profiler callback. */
562 TValue key;
563 /* Anchor thread and function in registry. */
564 key.u64 = KEY_PROFILE_THREAD;
565 setthreadV(L, lj_tab_set(L, registry, &key), L2);
566 key.u64 = KEY_PROFILE_FUNC;
567 setfuncV(L, lj_tab_set(L, registry, &key), func);
568 lj_gc_anybarriert(L, registry);
569 luaJIT_profile_start(L, mode ? strdata(mode) : "",
570 (luaJIT_profile_callback)jit_profile_callback, L2);
571 return 0;
572}
573
574/* profile.stop() */
575LJLIB_CF(jit_profile_stop)
576{
577 GCtab *registry;
578 TValue key;
579 luaJIT_profile_stop(L);
580 registry = tabV(registry(L));
581 key.u64 = KEY_PROFILE_THREAD;
582 setnilV(lj_tab_set(L, registry, &key));
583 key.u64 = KEY_PROFILE_FUNC;
584 setnilV(lj_tab_set(L, registry, &key));
585 lj_gc_anybarriert(L, registry);
586 return 0;
587}
588
589/* dump = profile.dumpstack([thread,] fmt, depth) */
590LJLIB_CF(jit_profile_dumpstack)
591{
592 lua_State *L2 = L;
593 int arg = 0;
594 size_t len;
595 int depth;
596 GCstr *fmt;
597 const char *p;
598 if (L->top > L->base && tvisthread(L->base)) {
599 L2 = threadV(L->base);
600 arg = 1;
601 }
602 fmt = lj_lib_checkstr(L, arg+1);
603 depth = lj_lib_checkint(L, arg+2);
604 p = luaJIT_profile_dumpstack(L2, strdata(fmt), depth, &len);
605 lua_pushlstring(L, p, len);
606 return 1;
607}
608
609#include "lj_libdef.h"
610
611static int luaopen_jit_profile(lua_State *L)
612{
613 LJ_LIB_REG(L, NULL, jit_profile);
614 return 1;
615}
616
617#endif
618
517/* -- JIT compiler initialization ----------------------------------------- */ 619/* -- JIT compiler initialization ----------------------------------------- */
518 620
519#if LJ_HASJIT 621#if LJ_HASJIT
@@ -524,66 +626,41 @@ JIT_PARAMDEF(JIT_PARAMINIT)
524#undef JIT_PARAMINIT 626#undef JIT_PARAMINIT
525 0 627 0
526}; 628};
527#endif
528 629
529#if LJ_TARGET_ARM && LJ_TARGET_LINUX 630#if LJ_TARGET_ARM && LJ_TARGET_LINUX
530#include <sys/utsname.h> 631#include <sys/utsname.h>
531#endif 632#endif
532 633
533/* Arch-dependent CPU detection. */ 634/* Arch-dependent CPU feature detection. */
534static uint32_t jit_cpudetect(lua_State *L) 635static uint32_t jit_cpudetect(void)
535{ 636{
536 uint32_t flags = 0; 637 uint32_t flags = 0;
537#if LJ_TARGET_X86ORX64 638#if LJ_TARGET_X86ORX64
639
538 uint32_t vendor[4]; 640 uint32_t vendor[4];
539 uint32_t features[4]; 641 uint32_t features[4];
540 if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { 642 if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
541#if !LJ_HASJIT
542#define JIT_F_CMOV 1
543#define JIT_F_SSE2 2
544#endif
545 flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
546 flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
547#if LJ_HASJIT
548 flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; 643 flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
549 flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; 644 flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
550 if (vendor[2] == 0x6c65746e) { /* Intel. */ 645 if (vendor[0] >= 7) {
551 if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */ 646 uint32_t xfeatures[4];
552 flags |= JIT_F_P4; /* Currently unused. */ 647 lj_vm_cpuid(7, xfeatures);
553 else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ 648 flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2;
554 flags |= JIT_F_LEA_AGU;
555 } else if (vendor[2] == 0x444d4163) { /* AMD. */
556 uint32_t fam = (features[0] & 0x0ff00f00);
557 if (fam == 0x00000f00) /* K8. */
558 flags |= JIT_F_SPLIT_XMM;
559 if (fam >= 0x00000f00) /* K8, K10. */
560 flags |= JIT_F_PREFER_IMUL;
561 } 649 }
562#endif
563 } 650 }
564 /* Check for required instruction set support on x86 (unnecessary on x64). */ 651 /* Don't bother checking for SSE2 -- the VM will crash before getting here. */
565#if LJ_TARGET_X86 652
566#if !defined(LUAJIT_CPU_NOCMOV)
567 if (!(flags & JIT_F_CMOV))
568 luaL_error(L, "CPU not supported");
569#endif
570#if defined(LUAJIT_CPU_SSE2)
571 if (!(flags & JIT_F_SSE2))
572 luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)");
573#endif
574#endif
575#elif LJ_TARGET_ARM 653#elif LJ_TARGET_ARM
576#if LJ_HASJIT 654
577 int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */ 655 int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */
578#if LJ_TARGET_LINUX 656#if LJ_TARGET_LINUX
579 if (ver < 70) { /* Runtime ARM CPU detection. */ 657 if (ver < 70) { /* Runtime ARM CPU detection. */
580 struct utsname ut; 658 struct utsname ut;
581 uname(&ut); 659 uname(&ut);
582 if (strncmp(ut.machine, "armv", 4) == 0) { 660 if (strncmp(ut.machine, "armv", 4) == 0) {
583 if (ut.machine[4] >= '7') 661 if (ut.machine[4] >= '8') ver = 80;
584 ver = 70; 662 else if (ut.machine[4] == '7') ver = 70;
585 else if (ut.machine[4] == '6') 663 else if (ut.machine[4] == '6') ver = 60;
586 ver = 60;
587 } 664 }
588 } 665 }
589#endif 666#endif
@@ -591,74 +668,77 @@ static uint32_t jit_cpudetect(lua_State *L)
591 ver >= 61 ? JIT_F_ARMV6T2_ : 668 ver >= 61 ? JIT_F_ARMV6T2_ :
592 ver >= 60 ? JIT_F_ARMV6_ : 0; 669 ver >= 60 ? JIT_F_ARMV6_ : 0;
593 flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; 670 flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
594#endif 671
672#elif LJ_TARGET_ARM64
673
674 /* No optional CPU features to detect (for now). */
675
595#elif LJ_TARGET_PPC 676#elif LJ_TARGET_PPC
596#if LJ_HASJIT 677
597#if LJ_ARCH_SQRT 678#if LJ_ARCH_SQRT
598 flags |= JIT_F_SQRT; 679 flags |= JIT_F_SQRT;
599#endif 680#endif
600#if LJ_ARCH_ROUND 681#if LJ_ARCH_ROUND
601 flags |= JIT_F_ROUND; 682 flags |= JIT_F_ROUND;
602#endif 683#endif
603#endif 684
604#elif LJ_TARGET_PPCSPE
605 /* Nothing to do. */
606#elif LJ_TARGET_MIPS 685#elif LJ_TARGET_MIPS
607#if LJ_HASJIT 686
608 /* Compile-time MIPS CPU detection. */ 687 /* Compile-time MIPS CPU detection. */
609#if LJ_ARCH_VERSION >= 20 688#if LJ_ARCH_VERSION >= 20
610 flags |= JIT_F_MIPS32R2; 689 flags |= JIT_F_MIPSXXR2;
611#endif 690#endif
612 /* Runtime MIPS CPU detection. */ 691 /* Runtime MIPS CPU detection. */
613#if defined(__GNUC__) 692#if defined(__GNUC__)
614 if (!(flags & JIT_F_MIPS32R2)) { 693 if (!(flags & JIT_F_MIPSXXR2)) {
615 int x; 694 int x;
695#ifdef __mips16
696 x = 0; /* Runtime detection is difficult. Ensure optimal -march flags. */
697#else
616 /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */ 698 /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */
617 __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2"); 699 __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2");
618 if (x) flags |= JIT_F_MIPS32R2; /* Either 0x80000000 (R2) or 0 (R1). */
619 }
620#endif 700#endif
701 if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */
702 }
621#endif 703#endif
704
622#else 705#else
623#error "Missing CPU detection for this architecture" 706#error "Missing CPU detection for this architecture"
624#endif 707#endif
625 UNUSED(L);
626 return flags; 708 return flags;
627} 709}
628 710
629/* Initialize JIT compiler. */ 711/* Initialize JIT compiler. */
630static void jit_init(lua_State *L) 712static void jit_init(lua_State *L)
631{ 713{
632 uint32_t flags = jit_cpudetect(L);
633#if LJ_HASJIT
634 jit_State *J = L2J(L); 714 jit_State *J = L2J(L);
635#if LJ_TARGET_X86 715 J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT;
636 /* Silently turn off the JIT compiler on CPUs without SSE2. */
637 if ((flags & JIT_F_SSE2))
638#endif
639 J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
640 memcpy(J->param, jit_param_default, sizeof(J->param)); 716 memcpy(J->param, jit_param_default, sizeof(J->param));
641 lj_dispatch_update(G(L)); 717 lj_dispatch_update(G(L));
642#else
643 UNUSED(flags);
644#endif
645} 718}
719#endif
646 720
647LUALIB_API int luaopen_jit(lua_State *L) 721LUALIB_API int luaopen_jit(lua_State *L)
648{ 722{
723#if LJ_HASJIT
724 jit_init(L);
725#endif
649 lua_pushliteral(L, LJ_OS_NAME); 726 lua_pushliteral(L, LJ_OS_NAME);
650 lua_pushliteral(L, LJ_ARCH_NAME); 727 lua_pushliteral(L, LJ_ARCH_NAME);
651 lua_pushinteger(L, LUAJIT_VERSION_NUM); /* Deprecated. */ 728 lua_pushinteger(L, LUAJIT_VERSION_NUM); /* Deprecated. */
652 lua_pushliteral(L, LUAJIT_VERSION); 729 lua_pushliteral(L, LUAJIT_VERSION);
653 LJ_LIB_REG(L, LUA_JITLIBNAME, jit); 730 LJ_LIB_REG(L, LUA_JITLIBNAME, jit);
731#if LJ_HASPROFILE
732 lj_lib_prereg(L, LUA_JITLIBNAME ".profile", luaopen_jit_profile,
733 tabref(L->env));
734#endif
654#ifndef LUAJIT_DISABLE_JITUTIL 735#ifndef LUAJIT_DISABLE_JITUTIL
655 LJ_LIB_REG(L, "jit.util", jit_util); 736 lj_lib_prereg(L, LUA_JITLIBNAME ".util", luaopen_jit_util, tabref(L->env));
656#endif 737#endif
657#if LJ_HASJIT 738#if LJ_HASJIT
658 LJ_LIB_REG(L, "jit.opt", jit_opt); 739 LJ_LIB_REG(L, "jit.opt", jit_opt);
659#endif 740#endif
660 L->top -= 2; 741 L->top -= 2;
661 jit_init(L);
662 return 1; 742 return 1;
663} 743}
664 744
diff --git a/src/lib_math.c b/src/lib_math.c
index 56644746..08bb7673 100644
--- a/src/lib_math.c
+++ b/src/lib_math.c
@@ -13,8 +13,10 @@
13#include "lualib.h" 13#include "lualib.h"
14 14
15#include "lj_obj.h" 15#include "lj_obj.h"
16#include "lj_err.h"
16#include "lj_lib.h" 17#include "lj_lib.h"
17#include "lj_vm.h" 18#include "lj_vm.h"
19#include "lj_prng.h"
18 20
19/* ------------------------------------------------------------------------ */ 21/* ------------------------------------------------------------------------ */
20 22
@@ -33,25 +35,19 @@ LJLIB_ASM(math_sqrt) LJLIB_REC(math_unary IRFPM_SQRT)
33 lj_lib_checknum(L, 1); 35 lj_lib_checknum(L, 1);
34 return FFH_RETRY; 36 return FFH_RETRY;
35} 37}
36LJLIB_ASM_(math_log10) LJLIB_REC(math_unary IRFPM_LOG10) 38LJLIB_ASM_(math_log10) LJLIB_REC(math_call IRCALL_log10)
37LJLIB_ASM_(math_exp) LJLIB_REC(math_unary IRFPM_EXP) 39LJLIB_ASM_(math_exp) LJLIB_REC(math_call IRCALL_exp)
38LJLIB_ASM_(math_sin) LJLIB_REC(math_unary IRFPM_SIN) 40LJLIB_ASM_(math_sin) LJLIB_REC(math_call IRCALL_sin)
39LJLIB_ASM_(math_cos) LJLIB_REC(math_unary IRFPM_COS) 41LJLIB_ASM_(math_cos) LJLIB_REC(math_call IRCALL_cos)
40LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN) 42LJLIB_ASM_(math_tan) LJLIB_REC(math_call IRCALL_tan)
41LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin) 43LJLIB_ASM_(math_asin) LJLIB_REC(math_call IRCALL_asin)
42LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos) 44LJLIB_ASM_(math_acos) LJLIB_REC(math_call IRCALL_acos)
43LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan) 45LJLIB_ASM_(math_atan) LJLIB_REC(math_call IRCALL_atan)
44LJLIB_ASM_(math_sinh) LJLIB_REC(math_htrig IRCALL_sinh) 46LJLIB_ASM_(math_sinh) LJLIB_REC(math_call IRCALL_sinh)
45LJLIB_ASM_(math_cosh) LJLIB_REC(math_htrig IRCALL_cosh) 47LJLIB_ASM_(math_cosh) LJLIB_REC(math_call IRCALL_cosh)
46LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh) 48LJLIB_ASM_(math_tanh) LJLIB_REC(math_call IRCALL_tanh)
47LJLIB_ASM_(math_frexp) 49LJLIB_ASM_(math_frexp)
48LJLIB_ASM_(math_modf) LJLIB_REC(.) 50LJLIB_ASM_(math_modf)
49
50LJLIB_PUSH(57.29577951308232)
51LJLIB_ASM_(math_deg) LJLIB_REC(math_degrad)
52
53LJLIB_PUSH(0.017453292519943295)
54LJLIB_ASM_(math_rad) LJLIB_REC(math_degrad)
55 51
56LJLIB_ASM(math_log) LJLIB_REC(math_log) 52LJLIB_ASM(math_log) LJLIB_REC(math_log)
57{ 53{
@@ -63,12 +59,15 @@ LJLIB_ASM(math_log) LJLIB_REC(math_log)
63#else 59#else
64 x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y); 60 x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y);
65#endif 61#endif
66 setnumV(L->base-1, x*y); /* Do NOT join the expression to x / y. */ 62 setnumV(L->base-1-LJ_FR2, x*y); /* Do NOT join the expression to x / y. */
67 return FFH_RES(1); 63 return FFH_RES(1);
68 } 64 }
69 return FFH_RETRY; 65 return FFH_RETRY;
70} 66}
71 67
68LJLIB_LUA(math_deg) /* function(x) return x * 57.29577951308232 end */
69LJLIB_LUA(math_rad) /* function(x) return x * 0.017453292519943295 end */
70
72LJLIB_ASM(math_atan2) LJLIB_REC(.) 71LJLIB_ASM(math_atan2) LJLIB_REC(.)
73{ 72{
74 lj_lib_checknum(L, 1); 73 lj_lib_checknum(L, 1);
@@ -108,34 +107,11 @@ LJLIB_PUSH(1e310) LJLIB_SET(huge)
108** Full-period ME-CF generator with L=64, J=4, k=223, N1=49. 107** Full-period ME-CF generator with L=64, J=4, k=223, N1=49.
109*/ 108*/
110 109
111/* PRNG state. */
112struct RandomState {
113 uint64_t gen[4]; /* State of the 4 LFSR generators. */
114 int valid; /* State is valid. */
115};
116
117/* Union needed for bit-pattern conversion between uint64_t and double. */ 110/* Union needed for bit-pattern conversion between uint64_t and double. */
118typedef union { uint64_t u64; double d; } U64double; 111typedef union { uint64_t u64; double d; } U64double;
119 112
120/* Update generator i and compute a running xor of all states. */ 113/* PRNG seeding function. */
121#define TW223_GEN(i, k, q, s) \ 114static void random_seed(PRNGState *rs, double d)
122 z = rs->gen[i]; \
123 z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \
124 r ^= z; rs->gen[i] = z;
125
126/* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */
127LJ_NOINLINE uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs)
128{
129 uint64_t z, r = 0;
130 TW223_GEN(0, 63, 31, 18)
131 TW223_GEN(1, 58, 19, 28)
132 TW223_GEN(2, 55, 24, 7)
133 TW223_GEN(3, 47, 21, 8)
134 return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000);
135}
136
137/* PRNG initialization function. */
138static void random_init(RandomState *rs, double d)
139{ 115{
140 uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */ 116 uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */
141 int i; 117 int i;
@@ -144,24 +120,22 @@ static void random_init(RandomState *rs, double d)
144 uint32_t m = 1u << (r&255); 120 uint32_t m = 1u << (r&255);
145 r >>= 8; 121 r >>= 8;
146 u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354; 122 u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354;
147 if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */ 123 if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of u[i] are non-zero. */
148 rs->gen[i] = u.u64; 124 rs->u[i] = u.u64;
149 } 125 }
150 rs->valid = 1;
151 for (i = 0; i < 10; i++) 126 for (i = 0; i < 10; i++)
152 lj_math_random_step(rs); 127 (void)lj_prng_u64(rs);
153} 128}
154 129
155/* PRNG extract function. */ 130/* PRNG extract function. */
156LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */ 131LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */
157LJLIB_CF(math_random) LJLIB_REC(.) 132LJLIB_CF(math_random) LJLIB_REC(.)
158{ 133{
159 int n = (int)(L->top - L->base); 134 int n = (int)(L->top - L->base);
160 RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); 135 PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
161 U64double u; 136 U64double u;
162 double d; 137 double d;
163 if (LJ_UNLIKELY(!rs->valid)) random_init(rs, 0.0); 138 u.u64 = lj_prng_u64d(rs);
164 u.u64 = lj_math_random_step(rs);
165 d = u.d - 1.0; 139 d = u.d - 1.0;
166 if (n > 0) { 140 if (n > 0) {
167#if LJ_DUALNUM 141#if LJ_DUALNUM
@@ -206,11 +180,14 @@ LJLIB_CF(math_random) LJLIB_REC(.)
206} 180}
207 181
208/* PRNG seed function. */ 182/* PRNG seed function. */
209LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */ 183LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */
210LJLIB_CF(math_randomseed) 184LJLIB_CF(math_randomseed)
211{ 185{
212 RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); 186 PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
213 random_init(rs, lj_lib_checknum(L, 1)); 187 if (L->base != L->top)
188 random_seed(rs, lj_lib_checknum(L, 1));
189 else if (!lj_prng_seed_secure(rs))
190 lj_err_caller(L, LJ_ERR_PRNGSD);
214 return 0; 191 return 0;
215} 192}
216 193
@@ -220,14 +197,9 @@ LJLIB_CF(math_randomseed)
220 197
221LUALIB_API int luaopen_math(lua_State *L) 198LUALIB_API int luaopen_math(lua_State *L)
222{ 199{
223 RandomState *rs; 200 PRNGState *rs = (PRNGState *)lua_newuserdata(L, sizeof(PRNGState));
224 rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState)); 201 lj_prng_seed_fixed(rs);
225 rs->valid = 0; /* Use lazy initialization to save some time on startup. */
226 LJ_LIB_REG(L, LUA_MATHLIBNAME, math); 202 LJ_LIB_REG(L, LUA_MATHLIBNAME, math);
227#if defined(LUA_COMPAT_MOD) && !LJ_52
228 lua_getfield(L, -1, "fmod");
229 lua_setfield(L, -2, "mod");
230#endif
231 return 1; 203 return 1;
232} 204}
233 205
diff --git a/src/lib_os.c b/src/lib_os.c
index 029cb75d..cf0df281 100644
--- a/src/lib_os.c
+++ b/src/lib_os.c
@@ -17,7 +17,10 @@
17#include "lualib.h" 17#include "lualib.h"
18 18
19#include "lj_obj.h" 19#include "lj_obj.h"
20#include "lj_gc.h"
20#include "lj_err.h" 21#include "lj_err.h"
22#include "lj_buf.h"
23#include "lj_str.h"
21#include "lj_lib.h" 24#include "lj_lib.h"
22 25
23#if LJ_TARGET_POSIX 26#if LJ_TARGET_POSIX
@@ -73,7 +76,7 @@ LJLIB_CF(os_rename)
73 76
74LJLIB_CF(os_tmpname) 77LJLIB_CF(os_tmpname)
75{ 78{
76#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA 79#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA || LJ_TARGET_NX
77 lj_err_caller(L, LJ_ERR_OSUNIQF); 80 lj_err_caller(L, LJ_ERR_OSUNIQF);
78 return 0; 81 return 0;
79#else 82#else
@@ -188,7 +191,7 @@ LJLIB_CF(os_date)
188#endif 191#endif
189 } 192 }
190 if (stm == NULL) { /* Invalid date? */ 193 if (stm == NULL) { /* Invalid date? */
191 setnilV(L->top-1); 194 setnilV(L->top++);
192 } else if (strcmp(s, "*t") == 0) { 195 } else if (strcmp(s, "*t") == 0) {
193 lua_createtable(L, 0, 9); /* 9 = number of fields */ 196 lua_createtable(L, 0, 9); /* 9 = number of fields */
194 setfield(L, "sec", stm->tm_sec); 197 setfield(L, "sec", stm->tm_sec);
@@ -200,23 +203,25 @@ LJLIB_CF(os_date)
200 setfield(L, "wday", stm->tm_wday+1); 203 setfield(L, "wday", stm->tm_wday+1);
201 setfield(L, "yday", stm->tm_yday+1); 204 setfield(L, "yday", stm->tm_yday+1);
202 setboolfield(L, "isdst", stm->tm_isdst); 205 setboolfield(L, "isdst", stm->tm_isdst);
203 } else { 206 } else if (*s) {
204 char cc[3]; 207 SBuf *sb = &G(L)->tmpbuf;
205 luaL_Buffer b; 208 MSize sz = 0, retry = 4;
206 cc[0] = '%'; cc[2] = '\0'; 209 const char *q;
207 luaL_buffinit(L, &b); 210 for (q = s; *q; q++)
208 for (; *s; s++) { 211 sz += (*q == '%') ? 30 : 1; /* Overflow doesn't matter. */
209 if (*s != '%' || *(s + 1) == '\0') { /* No conversion specifier? */ 212 setsbufL(sb, L);
210 luaL_addchar(&b, *s); 213 while (retry--) { /* Limit growth for invalid format or empty result. */
211 } else { 214 char *buf = lj_buf_need(sb, sz);
212 size_t reslen; 215 size_t len = strftime(buf, sbufsz(sb), s, stm);
213 char buff[200]; /* Should be big enough for any conversion result. */ 216 if (len) {
214 cc[1] = *(++s); 217 setstrV(L, L->top++, lj_str_new(L, buf, len));
215 reslen = strftime(buff, sizeof(buff), cc, stm); 218 lj_gc_check(L);
216 luaL_addlstring(&b, buff, reslen); 219 break;
217 } 220 }
221 sz += (sz|1);
218 } 222 }
219 luaL_pushresult(&b); 223 } else {
224 setstrV(L, L->top++, &G(L)->strempty);
220 } 225 }
221 return 1; 226 return 1;
222} 227}
diff --git a/src/lib_package.c b/src/lib_package.c
index 6ec763a5..7e24afa5 100644
--- a/src/lib_package.c
+++ b/src/lib_package.c
@@ -76,6 +76,20 @@ static const char *ll_bcsym(void *lib, const char *sym)
76BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); 76BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*);
77#endif 77#endif
78 78
79#if LJ_TARGET_UWP
80void *LJ_WIN_LOADLIBA(const char *path)
81{
82 DWORD err = GetLastError();
83 wchar_t wpath[256];
84 HANDLE lib = NULL;
85 if (MultiByteToWideChar(CP_ACP, 0, path, -1, wpath, 256) > 0) {
86 lib = LoadPackagedLibrary(wpath, 0);
87 }
88 SetLastError(err);
89 return lib;
90}
91#endif
92
79#undef setprogdir 93#undef setprogdir
80 94
81static void setprogdir(lua_State *L) 95static void setprogdir(lua_State *L)
@@ -96,9 +110,17 @@ static void setprogdir(lua_State *L)
96static void pusherror(lua_State *L) 110static void pusherror(lua_State *L)
97{ 111{
98 DWORD error = GetLastError(); 112 DWORD error = GetLastError();
113#if LJ_TARGET_XBOXONE
114 wchar_t wbuffer[128];
115 char buffer[128*2];
116 if (FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
117 NULL, error, 0, wbuffer, sizeof(wbuffer)/sizeof(wchar_t), NULL) &&
118 WideCharToMultiByte(CP_ACP, 0, wbuffer, 128, buffer, 128*2, NULL, NULL))
119#else
99 char buffer[128]; 120 char buffer[128];
100 if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM, 121 if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
101 NULL, error, 0, buffer, sizeof(buffer), NULL)) 122 NULL, error, 0, buffer, sizeof(buffer), NULL))
123#endif
102 lua_pushstring(L, buffer); 124 lua_pushstring(L, buffer);
103 else 125 else
104 lua_pushfstring(L, "system error %d\n", error); 126 lua_pushfstring(L, "system error %d\n", error);
@@ -111,7 +133,7 @@ static void ll_unloadlib(void *lib)
111 133
112static void *ll_load(lua_State *L, const char *path, int gl) 134static void *ll_load(lua_State *L, const char *path, int gl)
113{ 135{
114 HINSTANCE lib = LoadLibraryA(path); 136 HINSTANCE lib = LJ_WIN_LOADLIBA(path);
115 if (lib == NULL) pusherror(L); 137 if (lib == NULL) pusherror(L);
116 UNUSED(gl); 138 UNUSED(gl);
117 return lib; 139 return lib;
@@ -124,17 +146,25 @@ static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym)
124 return f; 146 return f;
125} 147}
126 148
149#if LJ_TARGET_UWP
150EXTERN_C IMAGE_DOS_HEADER __ImageBase;
151#endif
152
127static const char *ll_bcsym(void *lib, const char *sym) 153static const char *ll_bcsym(void *lib, const char *sym)
128{ 154{
129 if (lib) { 155 if (lib) {
130 return (const char *)GetProcAddress((HINSTANCE)lib, sym); 156 return (const char *)GetProcAddress((HINSTANCE)lib, sym);
131 } else { 157 } else {
158#if LJ_TARGET_UWP
159 return (const char *)GetProcAddress((HINSTANCE)&__ImageBase, sym);
160#else
132 HINSTANCE h = GetModuleHandleA(NULL); 161 HINSTANCE h = GetModuleHandleA(NULL);
133 const char *p = (const char *)GetProcAddress(h, sym); 162 const char *p = (const char *)GetProcAddress(h, sym);
134 if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, 163 if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
135 (const char *)ll_bcsym, &h)) 164 (const char *)ll_bcsym, &h))
136 p = (const char *)GetProcAddress(h, sym); 165 p = (const char *)GetProcAddress(h, sym);
137 return p; 166 return p;
167#endif
138 } 168 }
139} 169}
140 170
@@ -185,8 +215,7 @@ static void **ll_register(lua_State *L, const char *path)
185 lua_pop(L, 1); 215 lua_pop(L, 1);
186 plib = (void **)lua_newuserdata(L, sizeof(void *)); 216 plib = (void **)lua_newuserdata(L, sizeof(void *));
187 *plib = NULL; 217 *plib = NULL;
188 luaL_getmetatable(L, "_LOADLIB"); 218 luaL_setmetatable(L, "_LOADLIB");
189 lua_setmetatable(L, -2);
190 lua_pushfstring(L, "LOADLIB: %s", path); 219 lua_pushfstring(L, "LOADLIB: %s", path);
191 lua_pushvalue(L, -2); 220 lua_pushvalue(L, -2);
192 lua_settable(L, LUA_REGISTRYINDEX); 221 lua_settable(L, LUA_REGISTRYINDEX);
@@ -396,8 +425,7 @@ static int lj_cf_package_loader_preload(lua_State *L)
396 425
397/* ------------------------------------------------------------------------ */ 426/* ------------------------------------------------------------------------ */
398 427
399static const int sentinel_ = 0; 428#define KEY_SENTINEL (U64x(80000000,00000000)|'s')
400#define sentinel ((void *)&sentinel_)
401 429
402static int lj_cf_package_require(lua_State *L) 430static int lj_cf_package_require(lua_State *L)
403{ 431{
@@ -407,7 +435,7 @@ static int lj_cf_package_require(lua_State *L)
407 lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); 435 lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED");
408 lua_getfield(L, 2, name); 436 lua_getfield(L, 2, name);
409 if (lua_toboolean(L, -1)) { /* is it there? */ 437 if (lua_toboolean(L, -1)) { /* is it there? */
410 if (lua_touserdata(L, -1) == sentinel) /* check loops */ 438 if ((L->top-1)->u64 == KEY_SENTINEL) /* check loops */
411 luaL_error(L, "loop or previous error loading module " LUA_QS, name); 439 luaL_error(L, "loop or previous error loading module " LUA_QS, name);
412 return 1; /* package is already loaded */ 440 return 1; /* package is already loaded */
413 } 441 }
@@ -430,14 +458,14 @@ static int lj_cf_package_require(lua_State *L)
430 else 458 else
431 lua_pop(L, 1); 459 lua_pop(L, 1);
432 } 460 }
433 lua_pushlightuserdata(L, sentinel); 461 (L->top++)->u64 = KEY_SENTINEL;
434 lua_setfield(L, 2, name); /* _LOADED[name] = sentinel */ 462 lua_setfield(L, 2, name); /* _LOADED[name] = sentinel */
435 lua_pushstring(L, name); /* pass name as argument to module */ 463 lua_pushstring(L, name); /* pass name as argument to module */
436 lua_call(L, 1, 1); /* run loaded module */ 464 lua_call(L, 1, 1); /* run loaded module */
437 if (!lua_isnil(L, -1)) /* non-nil return? */ 465 if (!lua_isnil(L, -1)) /* non-nil return? */
438 lua_setfield(L, 2, name); /* _LOADED[name] = returned value */ 466 lua_setfield(L, 2, name); /* _LOADED[name] = returned value */
439 lua_getfield(L, 2, name); 467 lua_getfield(L, 2, name);
440 if (lua_touserdata(L, -1) == sentinel) { /* module did not set a value? */ 468 if ((L->top-1)->u64 == KEY_SENTINEL) { /* module did not set a value? */
441 lua_pushboolean(L, 1); /* use true as result */ 469 lua_pushboolean(L, 1); /* use true as result */
442 lua_pushvalue(L, -1); /* extra copy to be returned */ 470 lua_pushvalue(L, -1); /* extra copy to be returned */
443 lua_setfield(L, 2, name); /* _LOADED[name] = true */ 471 lua_setfield(L, 2, name); /* _LOADED[name] = true */
@@ -487,29 +515,19 @@ static void modinit(lua_State *L, const char *modname)
487static int lj_cf_package_module(lua_State *L) 515static int lj_cf_package_module(lua_State *L)
488{ 516{
489 const char *modname = luaL_checkstring(L, 1); 517 const char *modname = luaL_checkstring(L, 1);
490 int loaded = lua_gettop(L) + 1; /* index of _LOADED table */ 518 int lastarg = (int)(L->top - L->base);
491 lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); 519 luaL_pushmodule(L, modname, 1);
492 lua_getfield(L, loaded, modname); /* get _LOADED[modname] */
493 if (!lua_istable(L, -1)) { /* not found? */
494 lua_pop(L, 1); /* remove previous result */
495 /* try global variable (and create one if it does not exist) */
496 if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, 1) != NULL)
497 lj_err_callerv(L, LJ_ERR_BADMODN, modname);
498 lua_pushvalue(L, -1);
499 lua_setfield(L, loaded, modname); /* _LOADED[modname] = new table */
500 }
501 /* check whether table already has a _NAME field */
502 lua_getfield(L, -1, "_NAME"); 520 lua_getfield(L, -1, "_NAME");
503 if (!lua_isnil(L, -1)) { /* is table an initialized module? */ 521 if (!lua_isnil(L, -1)) { /* Module already initialized? */
504 lua_pop(L, 1); 522 lua_pop(L, 1);
505 } else { /* no; initialize it */ 523 } else {
506 lua_pop(L, 1); 524 lua_pop(L, 1);
507 modinit(L, modname); 525 modinit(L, modname);
508 } 526 }
509 lua_pushvalue(L, -1); 527 lua_pushvalue(L, -1);
510 setfenv(L); 528 setfenv(L);
511 dooptions(L, loaded - 1); 529 dooptions(L, lastarg);
512 return 0; 530 return LJ_52;
513} 531}
514 532
515static int lj_cf_package_seeall(lua_State *L) 533static int lj_cf_package_seeall(lua_State *L)
@@ -580,13 +598,16 @@ LUALIB_API int luaopen_package(lua_State *L)
580 lj_lib_pushcf(L, lj_cf_package_unloadlib, 1); 598 lj_lib_pushcf(L, lj_cf_package_unloadlib, 1);
581 lua_setfield(L, -2, "__gc"); 599 lua_setfield(L, -2, "__gc");
582 luaL_register(L, LUA_LOADLIBNAME, package_lib); 600 luaL_register(L, LUA_LOADLIBNAME, package_lib);
583 lua_pushvalue(L, -1); 601 lua_copy(L, -1, LUA_ENVIRONINDEX);
584 lua_replace(L, LUA_ENVIRONINDEX);
585 lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0); 602 lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0);
586 for (i = 0; package_loaders[i] != NULL; i++) { 603 for (i = 0; package_loaders[i] != NULL; i++) {
587 lj_lib_pushcf(L, package_loaders[i], 1); 604 lj_lib_pushcf(L, package_loaders[i], 1);
588 lua_rawseti(L, -2, i+1); 605 lua_rawseti(L, -2, i+1);
589 } 606 }
607#if LJ_52
608 lua_pushvalue(L, -1);
609 lua_setfield(L, -3, "searchers");
610#endif
590 lua_setfield(L, -2, "loaders"); 611 lua_setfield(L, -2, "loaders");
591 lua_getfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); 612 lua_getfield(L, LUA_REGISTRYINDEX, "LUA_NOENV");
592 noenv = lua_toboolean(L, -1); 613 noenv = lua_toboolean(L, -1);
diff --git a/src/lib_string.c b/src/lib_string.c
index 3e3653b6..255689ce 100644
--- a/src/lib_string.c
+++ b/src/lib_string.c
@@ -6,8 +6,6 @@
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h 6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/ 7*/
8 8
9#include <stdio.h>
10
11#define lib_string_c 9#define lib_string_c
12#define LUA_LIB 10#define LUA_LIB
13 11
@@ -18,6 +16,7 @@
18#include "lj_obj.h" 16#include "lj_obj.h"
19#include "lj_gc.h" 17#include "lj_gc.h"
20#include "lj_err.h" 18#include "lj_err.h"
19#include "lj_buf.h"
21#include "lj_str.h" 20#include "lj_str.h"
22#include "lj_tab.h" 21#include "lj_tab.h"
23#include "lj_meta.h" 22#include "lj_meta.h"
@@ -25,17 +24,19 @@
25#include "lj_ff.h" 24#include "lj_ff.h"
26#include "lj_bcdump.h" 25#include "lj_bcdump.h"
27#include "lj_char.h" 26#include "lj_char.h"
27#include "lj_strfmt.h"
28#include "lj_lib.h" 28#include "lj_lib.h"
29 29
30/* ------------------------------------------------------------------------ */ 30/* ------------------------------------------------------------------------ */
31 31
32#define LJLIB_MODULE_string 32#define LJLIB_MODULE_string
33 33
34LJLIB_ASM(string_len) LJLIB_REC(.) 34LJLIB_LUA(string_len) /*
35{ 35 function(s)
36 lj_lib_checkstr(L, 1); 36 CHECK_str(s)
37 return FFH_RETRY; 37 return #s
38} 38 end
39*/
39 40
40LJLIB_ASM(string_byte) LJLIB_REC(string_range 0) 41LJLIB_ASM(string_byte) LJLIB_REC(string_range 0)
41{ 42{
@@ -57,21 +58,21 @@ LJLIB_ASM(string_byte) LJLIB_REC(string_range 0)
57 lj_state_checkstack(L, (MSize)n); 58 lj_state_checkstack(L, (MSize)n);
58 p = (const unsigned char *)strdata(s) + start; 59 p = (const unsigned char *)strdata(s) + start;
59 for (i = 0; i < n; i++) 60 for (i = 0; i < n; i++)
60 setintV(L->base + i-1, p[i]); 61 setintV(L->base + i-1-LJ_FR2, p[i]);
61 return FFH_RES(n); 62 return FFH_RES(n);
62} 63}
63 64
64LJLIB_ASM(string_char) 65LJLIB_ASM(string_char) LJLIB_REC(.)
65{ 66{
66 int i, nargs = (int)(L->top - L->base); 67 int i, nargs = (int)(L->top - L->base);
67 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (MSize)nargs); 68 char *buf = lj_buf_tmp(L, (MSize)nargs);
68 for (i = 1; i <= nargs; i++) { 69 for (i = 1; i <= nargs; i++) {
69 int32_t k = lj_lib_checkint(L, i); 70 int32_t k = lj_lib_checkint(L, i);
70 if (!checku8(k)) 71 if (!checku8(k))
71 lj_err_arg(L, i, LJ_ERR_BADVAL); 72 lj_err_arg(L, i, LJ_ERR_BADVAL);
72 buf[i-1] = (char)k; 73 buf[i-1] = (char)k;
73 } 74 }
74 setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)nargs)); 75 setstrV(L, L->base-1-LJ_FR2, lj_str_new(L, buf, (size_t)nargs));
75 return FFH_RES(1); 76 return FFH_RES(1);
76} 77}
77 78
@@ -83,89 +84,73 @@ LJLIB_ASM(string_sub) LJLIB_REC(string_range 1)
83 return FFH_RETRY; 84 return FFH_RETRY;
84} 85}
85 86
86LJLIB_ASM(string_rep) 87LJLIB_CF(string_rep) LJLIB_REC(.)
87{ 88{
88 GCstr *s = lj_lib_checkstr(L, 1); 89 GCstr *s = lj_lib_checkstr(L, 1);
89 int32_t k = lj_lib_checkint(L, 2); 90 int32_t rep = lj_lib_checkint(L, 2);
90 GCstr *sep = lj_lib_optstr(L, 3); 91 GCstr *sep = lj_lib_optstr(L, 3);
91 int32_t len = (int32_t)s->len; 92 SBuf *sb = lj_buf_tmp_(L);
92 global_State *g = G(L); 93 if (sep && rep > 1) {
93 int64_t tlen; 94 GCstr *s2 = lj_buf_cat2str(L, sep, s);
94 const char *src; 95 lj_buf_reset(sb);
95 char *buf; 96 lj_buf_putstr(sb, s);
96 if (k <= 0) { 97 s = s2;
97 empty: 98 rep--;
98 setstrV(L, L->base-1, &g->strempty);
99 return FFH_RES(1);
100 }
101 if (sep) {
102 tlen = (int64_t)len + sep->len;
103 if (tlen > LJ_MAX_STR)
104 lj_err_caller(L, LJ_ERR_STROV);
105 tlen *= k;
106 if (tlen > LJ_MAX_STR)
107 lj_err_caller(L, LJ_ERR_STROV);
108 } else {
109 tlen = (int64_t)k * len;
110 if (tlen > LJ_MAX_STR)
111 lj_err_caller(L, LJ_ERR_STROV);
112 } 99 }
113 if (tlen == 0) goto empty; 100 sb = lj_buf_putstr_rep(sb, s, rep);
114 buf = lj_str_needbuf(L, &g->tmpbuf, (MSize)tlen); 101 setstrV(L, L->top-1, lj_buf_str(L, sb));
115 src = strdata(s); 102 lj_gc_check(L);
116 if (sep) { 103 return 1;
117 tlen -= sep->len; /* Ignore trailing separator. */
118 if (k > 1) { /* Paste one string and one separator. */
119 int32_t i;
120 i = 0; while (i < len) *buf++ = src[i++];
121 src = strdata(sep); len = sep->len;
122 i = 0; while (i < len) *buf++ = src[i++];
123 src = g->tmpbuf.buf; len += s->len; k--; /* Now copy that k-1 times. */
124 }
125 }
126 do {
127 int32_t i = 0;
128 do { *buf++ = src[i++]; } while (i < len);
129 } while (--k > 0);
130 setstrV(L, L->base-1, lj_str_new(L, g->tmpbuf.buf, (size_t)tlen));
131 return FFH_RES(1);
132} 104}
133 105
134LJLIB_ASM(string_reverse) 106LJLIB_ASM(string_reverse) LJLIB_REC(string_op IRCALL_lj_buf_putstr_reverse)
135{ 107{
136 GCstr *s = lj_lib_checkstr(L, 1); 108 lj_lib_checkstr(L, 1);
137 lj_str_needbuf(L, &G(L)->tmpbuf, s->len);
138 return FFH_RETRY; 109 return FFH_RETRY;
139} 110}
140LJLIB_ASM_(string_lower) 111LJLIB_ASM_(string_lower) LJLIB_REC(string_op IRCALL_lj_buf_putstr_lower)
141LJLIB_ASM_(string_upper) 112LJLIB_ASM_(string_upper) LJLIB_REC(string_op IRCALL_lj_buf_putstr_upper)
142 113
143/* ------------------------------------------------------------------------ */ 114/* ------------------------------------------------------------------------ */
144 115
145static int writer_buf(lua_State *L, const void *p, size_t size, void *b) 116static int writer_buf(lua_State *L, const void *p, size_t size, void *sb)
146{ 117{
147 luaL_addlstring((luaL_Buffer *)b, (const char *)p, size); 118 lj_buf_putmem((SBuf *)sb, p, (MSize)size);
148 UNUSED(L); 119 UNUSED(L);
149 return 0; 120 return 0;
150} 121}
151 122
152LJLIB_CF(string_dump) 123LJLIB_CF(string_dump)
153{ 124{
154 GCfunc *fn = lj_lib_checkfunc(L, 1); 125 GCproto *pt = lj_lib_checkLproto(L, 1, 1);
155 int strip = L->base+1 < L->top && tvistruecond(L->base+1); 126 uint32_t flags = 0;
156 luaL_Buffer b; 127 SBuf *sb;
128 TValue *o = L->base+1;
129 if (o < L->top) {
130 if (tvisstr(o)) {
131 const char *mode = strVdata(o);
132 char c;
133 while ((c = *mode++)) {
134 if (c == 's') flags |= BCDUMP_F_STRIP;
135 if (c == 'd') flags |= BCDUMP_F_DETERMINISTIC;
136 }
137 } else if (tvistruecond(o)) {
138 flags |= BCDUMP_F_STRIP;
139 }
140 }
141 sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */
157 L->top = L->base+1; 142 L->top = L->base+1;
158 luaL_buffinit(L, &b); 143 if (!pt || lj_bcwrite(L, pt, writer_buf, sb, flags))
159 if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, &b, strip))
160 lj_err_caller(L, LJ_ERR_STRDUMP); 144 lj_err_caller(L, LJ_ERR_STRDUMP);
161 luaL_pushresult(&b); 145 setstrV(L, L->top-1, lj_buf_str(L, sb));
146 lj_gc_check(L);
162 return 1; 147 return 1;
163} 148}
164 149
165/* ------------------------------------------------------------------------ */ 150/* ------------------------------------------------------------------------ */
166 151
167/* macro to `unsign' a character */ 152/* macro to `unsign' a character */
168#define uchar(c) ((unsigned char)(c)) 153#define uchar(c) ((unsigned char)(c))
169 154
170#define CAP_UNFINISHED (-1) 155#define CAP_UNFINISHED (-1)
171#define CAP_POSITION (-2) 156#define CAP_POSITION (-2)
@@ -183,7 +168,6 @@ typedef struct MatchState {
183} MatchState; 168} MatchState;
184 169
185#define L_ESC '%' 170#define L_ESC '%'
186#define SPECIALS "^$*+?.([%-"
187 171
188static int check_capture(MatchState *ms, int l) 172static int check_capture(MatchState *ms, int l)
189{ 173{
@@ -450,30 +434,6 @@ static const char *match(MatchState *ms, const char *s, const char *p)
450 return s; 434 return s;
451} 435}
452 436
453static const char *lmemfind(const char *s1, size_t l1,
454 const char *s2, size_t l2)
455{
456 if (l2 == 0) {
457 return s1; /* empty strings are everywhere */
458 } else if (l2 > l1) {
459 return NULL; /* avoids a negative `l1' */
460 } else {
461 const char *init; /* to search for a `*s2' inside `s1' */
462 l2--; /* 1st char will be checked by `memchr' */
463 l1 = l1-l2; /* `s2' cannot be found after that */
464 while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
465 init++; /* 1st char is already checked */
466 if (memcmp(init, s2+1, l2) == 0) {
467 return init-1;
468 } else { /* correct `l1' and `s1' to try again */
469 l1 -= (size_t)(init-s1);
470 s1 = init;
471 }
472 }
473 return NULL; /* not found */
474 }
475}
476
477static void push_onecapture(MatchState *ms, int i, const char *s, const char *e) 437static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
478{ 438{
479 if (i >= ms->level) { 439 if (i >= ms->level) {
@@ -501,64 +461,60 @@ static int push_captures(MatchState *ms, const char *s, const char *e)
501 return nlevels; /* number of strings pushed */ 461 return nlevels; /* number of strings pushed */
502} 462}
503 463
504static ptrdiff_t posrelat(ptrdiff_t pos, size_t len)
505{
506 /* relative string position: negative means back from end */
507 if (pos < 0) pos += (ptrdiff_t)len + 1;
508 return (pos >= 0) ? pos : 0;
509}
510
511static int str_find_aux(lua_State *L, int find) 464static int str_find_aux(lua_State *L, int find)
512{ 465{
513 size_t l1, l2; 466 GCstr *s = lj_lib_checkstr(L, 1);
514 const char *s = luaL_checklstring(L, 1, &l1); 467 GCstr *p = lj_lib_checkstr(L, 2);
515 const char *p = luaL_checklstring(L, 2, &l2); 468 int32_t start = lj_lib_optint(L, 3, 1);
516 ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1; 469 MSize st;
517 if (init < 0) { 470 if (start < 0) start += (int32_t)s->len; else start--;
518 init = 0; 471 if (start < 0) start = 0;
519 } else if ((size_t)(init) > l1) { 472 st = (MSize)start;
473 if (st > s->len) {
520#if LJ_52 474#if LJ_52
521 setnilV(L->top-1); 475 setnilV(L->top-1);
522 return 1; 476 return 1;
523#else 477#else
524 init = (ptrdiff_t)l1; 478 st = s->len;
525#endif 479#endif
526 } 480 }
527 if (find && (lua_toboolean(L, 4) || /* explicit request? */ 481 if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) ||
528 strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */ 482 !lj_str_haspattern(p))) { /* Search for fixed string. */
529 /* do a plain search */ 483 const char *q = lj_str_find(strdata(s)+st, strdata(p), s->len-st, p->len);
530 const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2); 484 if (q) {
531 if (s2) { 485 setintV(L->top-2, (int32_t)(q-strdata(s)) + 1);
532 lua_pushinteger(L, s2-s+1); 486 setintV(L->top-1, (int32_t)(q-strdata(s)) + (int32_t)p->len);
533 lua_pushinteger(L, s2-s+(ptrdiff_t)l2);
534 return 2; 487 return 2;
535 } 488 }
536 } else { 489 } else { /* Search for pattern. */
537 MatchState ms; 490 MatchState ms;
538 int anchor = (*p == '^') ? (p++, 1) : 0; 491 const char *pstr = strdata(p);
539 const char *s1=s+init; 492 const char *sstr = strdata(s) + st;
493 int anchor = 0;
494 if (*pstr == '^') { pstr++; anchor = 1; }
540 ms.L = L; 495 ms.L = L;
541 ms.src_init = s; 496 ms.src_init = strdata(s);
542 ms.src_end = s+l1; 497 ms.src_end = strdata(s) + s->len;
543 do { 498 do { /* Loop through string and try to match the pattern. */
544 const char *res; 499 const char *q;
545 ms.level = ms.depth = 0; 500 ms.level = ms.depth = 0;
546 if ((res=match(&ms, s1, p)) != NULL) { 501 q = match(&ms, sstr, pstr);
502 if (q) {
547 if (find) { 503 if (find) {
548 lua_pushinteger(L, s1-s+1); /* start */ 504 setintV(L->top++, (int32_t)(sstr-(strdata(s)-1)));
549 lua_pushinteger(L, res-s); /* end */ 505 setintV(L->top++, (int32_t)(q-strdata(s)));
550 return push_captures(&ms, NULL, 0) + 2; 506 return push_captures(&ms, NULL, NULL) + 2;
551 } else { 507 } else {
552 return push_captures(&ms, s1, res); 508 return push_captures(&ms, sstr, q);
553 } 509 }
554 } 510 }
555 } while (s1++ < ms.src_end && !anchor); 511 } while (sstr++ < ms.src_end && !anchor);
556 } 512 }
557 lua_pushnil(L); /* not found */ 513 setnilV(L->top-1); /* Not found. */
558 return 1; 514 return 1;
559} 515}
560 516
561LJLIB_CF(string_find) 517LJLIB_CF(string_find) LJLIB_REC(.)
562{ 518{
563 return str_find_aux(L, 1); 519 return str_find_aux(L, 1);
564} 520}
@@ -698,222 +654,16 @@ LJLIB_CF(string_gsub)
698 654
699/* ------------------------------------------------------------------------ */ 655/* ------------------------------------------------------------------------ */
700 656
701/* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ 657LJLIB_CF(string_format) LJLIB_REC(.)
702#define MAX_FMTITEM 512
703/* valid flags in a format specification */
704#define FMT_FLAGS "-+ #0"
705/*
706** maximum size of each format specification (such as '%-099.99d')
707** (+10 accounts for %99.99x plus margin of error)
708*/
709#define MAX_FMTSPEC (sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
710
711static void addquoted(lua_State *L, luaL_Buffer *b, int arg)
712{
713 GCstr *str = lj_lib_checkstr(L, arg);
714 int32_t len = (int32_t)str->len;
715 const char *s = strdata(str);
716 luaL_addchar(b, '"');
717 while (len--) {
718 uint32_t c = uchar(*s);
719 if (c == '"' || c == '\\' || c == '\n') {
720 luaL_addchar(b, '\\');
721 } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
722 uint32_t d;
723 luaL_addchar(b, '\\');
724 if (c >= 100 || lj_char_isdigit(uchar(s[1]))) {
725 luaL_addchar(b, '0'+(c >= 100)); if (c >= 100) c -= 100;
726 goto tens;
727 } else if (c >= 10) {
728 tens:
729 d = (c * 205) >> 11; c -= d * 10; luaL_addchar(b, '0'+d);
730 }
731 c += '0';
732 }
733 luaL_addchar(b, c);
734 s++;
735 }
736 luaL_addchar(b, '"');
737}
738
739static const char *scanformat(lua_State *L, const char *strfrmt, char *form)
740{ 658{
741 const char *p = strfrmt; 659 int retry = 0;
742 while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++; /* skip flags */ 660 SBuf *sb;
743 if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS)) 661 do {
744 lj_err_caller(L, LJ_ERR_STRFMTR); 662 sb = lj_buf_tmp_(L);
745 if (lj_char_isdigit(uchar(*p))) p++; /* skip width */ 663 retry = lj_strfmt_putarg(L, sb, 1, -retry);
746 if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */ 664 } while (retry > 0);
747 if (*p == '.') { 665 setstrV(L, L->top-1, lj_buf_str(L, sb));
748 p++; 666 lj_gc_check(L);
749 if (lj_char_isdigit(uchar(*p))) p++; /* skip precision */
750 if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */
751 }
752 if (lj_char_isdigit(uchar(*p)))
753 lj_err_caller(L, LJ_ERR_STRFMTW);
754 *(form++) = '%';
755 strncpy(form, strfrmt, (size_t)(p - strfrmt + 1));
756 form += p - strfrmt + 1;
757 *form = '\0';
758 return p;
759}
760
761static void addintlen(char *form)
762{
763 size_t l = strlen(form);
764 char spec = form[l - 1];
765 strcpy(form + l - 1, LUA_INTFRMLEN);
766 form[l + sizeof(LUA_INTFRMLEN) - 2] = spec;
767 form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0';
768}
769
770static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg)
771{
772 if (sizeof(LUA_INTFRM_T) == 4) {
773 return (LUA_INTFRM_T)lj_lib_checkbit(L, arg);
774 } else {
775 cTValue *o;
776 lj_lib_checknumber(L, arg);
777 o = L->base+arg-1;
778 if (tvisint(o))
779 return (LUA_INTFRM_T)intV(o);
780 else
781 return (LUA_INTFRM_T)numV(o);
782 }
783}
784
785static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg)
786{
787 if (sizeof(LUA_INTFRM_T) == 4) {
788 return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg);
789 } else {
790 cTValue *o;
791 lj_lib_checknumber(L, arg);
792 o = L->base+arg-1;
793 if (tvisint(o))
794 return (unsigned LUA_INTFRM_T)intV(o);
795 else if ((int32_t)o->u32.hi < 0)
796 return (unsigned LUA_INTFRM_T)(LUA_INTFRM_T)numV(o);
797 else
798 return (unsigned LUA_INTFRM_T)numV(o);
799 }
800}
801
802static GCstr *meta_tostring(lua_State *L, int arg)
803{
804 TValue *o = L->base+arg-1;
805 cTValue *mo;
806 lua_assert(o < L->top); /* Caller already checks for existence. */
807 if (LJ_LIKELY(tvisstr(o)))
808 return strV(o);
809 if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
810 copyTV(L, L->top++, mo);
811 copyTV(L, L->top++, o);
812 lua_call(L, 1, 1);
813 L->top--;
814 if (tvisstr(L->top))
815 return strV(L->top);
816 o = L->base+arg-1;
817 copyTV(L, o, L->top);
818 }
819 if (tvisnumber(o)) {
820 return lj_str_fromnumber(L, o);
821 } else if (tvisnil(o)) {
822 return lj_str_newlit(L, "nil");
823 } else if (tvisfalse(o)) {
824 return lj_str_newlit(L, "false");
825 } else if (tvistrue(o)) {
826 return lj_str_newlit(L, "true");
827 } else {
828 if (tvisfunc(o) && isffunc(funcV(o)))
829 lj_str_pushf(L, "function: builtin#%d", funcV(o)->c.ffid);
830 else
831 lj_str_pushf(L, "%s: %p", lj_typename(o), lua_topointer(L, arg));
832 L->top--;
833 return strV(L->top);
834 }
835}
836
837LJLIB_CF(string_format)
838{
839 int arg = 1, top = (int)(L->top - L->base);
840 GCstr *fmt = lj_lib_checkstr(L, arg);
841 const char *strfrmt = strdata(fmt);
842 const char *strfrmt_end = strfrmt + fmt->len;
843 luaL_Buffer b;
844 luaL_buffinit(L, &b);
845 while (strfrmt < strfrmt_end) {
846 if (*strfrmt != L_ESC) {
847 luaL_addchar(&b, *strfrmt++);
848 } else if (*++strfrmt == L_ESC) {
849 luaL_addchar(&b, *strfrmt++); /* %% */
850 } else { /* format item */
851 char form[MAX_FMTSPEC]; /* to store the format (`%...') */
852 char buff[MAX_FMTITEM]; /* to store the formatted item */
853 int n = 0;
854 if (++arg > top)
855 luaL_argerror(L, arg, lj_obj_typename[0]);
856 strfrmt = scanformat(L, strfrmt, form);
857 switch (*strfrmt++) {
858 case 'c':
859 n = sprintf(buff, form, lj_lib_checkint(L, arg));
860 break;
861 case 'd': case 'i':
862 addintlen(form);
863 n = sprintf(buff, form, num2intfrm(L, arg));
864 break;
865 case 'o': case 'u': case 'x': case 'X':
866 addintlen(form);
867 n = sprintf(buff, form, num2uintfrm(L, arg));
868 break;
869 case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': {
870 TValue tv;
871 tv.n = lj_lib_checknum(L, arg);
872 if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) {
873 /* Canonicalize output of non-finite values. */
874 char *p, nbuf[LJ_STR_NUMBUF];
875 size_t len = lj_str_bufnum(nbuf, &tv);
876 if (strfrmt[-1] < 'a') {
877 nbuf[len-3] = nbuf[len-3] - 0x20;
878 nbuf[len-2] = nbuf[len-2] - 0x20;
879 nbuf[len-1] = nbuf[len-1] - 0x20;
880 }
881 nbuf[len] = '\0';
882 for (p = form; *p < 'A' && *p != '.'; p++) ;
883 *p++ = 's'; *p = '\0';
884 n = sprintf(buff, form, nbuf);
885 break;
886 }
887 n = sprintf(buff, form, (double)tv.n);
888 break;
889 }
890 case 'q':
891 addquoted(L, &b, arg);
892 continue;
893 case 'p':
894 lj_str_pushf(L, "%p", lua_topointer(L, arg));
895 luaL_addvalue(&b);
896 continue;
897 case 's': {
898 GCstr *str = meta_tostring(L, arg);
899 if (!strchr(form, '.') && str->len >= 100) {
900 /* no precision and string is too long to be formatted;
901 keep original string */
902 setstrV(L, L->top++, str);
903 luaL_addvalue(&b);
904 continue;
905 }
906 n = sprintf(buff, form, strdata(str));
907 break;
908 }
909 default:
910 lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1));
911 break;
912 }
913 luaL_addlstring(&b, buff, n);
914 }
915 }
916 luaL_pushresult(&b);
917 return 1; 667 return 1;
918} 668}
919 669
@@ -926,16 +676,15 @@ LUALIB_API int luaopen_string(lua_State *L)
926 GCtab *mt; 676 GCtab *mt;
927 global_State *g; 677 global_State *g;
928 LJ_LIB_REG(L, LUA_STRLIBNAME, string); 678 LJ_LIB_REG(L, LUA_STRLIBNAME, string);
929#if defined(LUA_COMPAT_GFIND) && !LJ_52
930 lua_getfield(L, -1, "gmatch");
931 lua_setfield(L, -2, "gfind");
932#endif
933 mt = lj_tab_new(L, 0, 1); 679 mt = lj_tab_new(L, 0, 1);
934 /* NOBARRIER: basemt is a GC root. */ 680 /* NOBARRIER: basemt is a GC root. */
935 g = G(L); 681 g = G(L);
936 setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt)); 682 setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt));
937 settabV(L, lj_tab_setstr(L, mt, mmname_str(g, MM_index)), tabV(L->top-1)); 683 settabV(L, lj_tab_setstr(L, mt, mmname_str(g, MM_index)), tabV(L->top-1));
938 mt->nomm = (uint8_t)(~(1u<<MM_index)); 684 mt->nomm = (uint8_t)(~(1u<<MM_index));
685#if LJ_HASBUFFER
686 lj_lib_prereg(L, LUA_STRLIBNAME ".buffer", luaopen_string_buffer, tabV(L->top-1));
687#endif
939 return 1; 688 return 1;
940} 689}
941 690
diff --git a/src/lib_table.c b/src/lib_table.c
index c2a1f18f..97ba2847 100644
--- a/src/lib_table.c
+++ b/src/lib_table.c
@@ -16,57 +16,43 @@
16#include "lj_obj.h" 16#include "lj_obj.h"
17#include "lj_gc.h" 17#include "lj_gc.h"
18#include "lj_err.h" 18#include "lj_err.h"
19#include "lj_buf.h"
19#include "lj_tab.h" 20#include "lj_tab.h"
21#include "lj_ff.h"
20#include "lj_lib.h" 22#include "lj_lib.h"
21 23
22/* ------------------------------------------------------------------------ */ 24/* ------------------------------------------------------------------------ */
23 25
24#define LJLIB_MODULE_table 26#define LJLIB_MODULE_table
25 27
26LJLIB_CF(table_foreachi) 28LJLIB_LUA(table_foreachi) /*
27{ 29 function(t, f)
28 GCtab *t = lj_lib_checktab(L, 1); 30 CHECK_tab(t)
29 GCfunc *func = lj_lib_checkfunc(L, 2); 31 CHECK_func(f)
30 MSize i, n = lj_tab_len(t); 32 for i=1,#t do
31 for (i = 1; i <= n; i++) { 33 local r = f(i, t[i])
32 cTValue *val; 34 if r ~= nil then return r end
33 setfuncV(L, L->top, func); 35 end
34 setintV(L->top+1, i); 36 end
35 val = lj_tab_getint(t, (int32_t)i); 37*/
36 if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); }
37 L->top += 3;
38 lua_call(L, 2, 1);
39 if (!tvisnil(L->top-1))
40 return 1;
41 L->top--;
42 }
43 return 0;
44}
45 38
46LJLIB_CF(table_foreach) 39LJLIB_LUA(table_foreach) /*
47{ 40 function(t, f)
48 GCtab *t = lj_lib_checktab(L, 1); 41 CHECK_tab(t)
49 GCfunc *func = lj_lib_checkfunc(L, 2); 42 CHECK_func(f)
50 L->top = L->base+3; 43 for k, v in PAIRS(t) do
51 setnilV(L->top-1); 44 local r = f(k, v)
52 while (lj_tab_next(L, t, L->top-1)) { 45 if r ~= nil then return r end
53 copyTV(L, L->top+2, L->top); 46 end
54 copyTV(L, L->top+1, L->top-1); 47 end
55 setfuncV(L, L->top, func); 48*/
56 L->top += 3;
57 lua_call(L, 2, 1);
58 if (!tvisnil(L->top-1))
59 return 1;
60 L->top--;
61 }
62 return 0;
63}
64 49
65LJLIB_ASM(table_getn) LJLIB_REC(.) 50LJLIB_LUA(table_getn) /*
66{ 51 function(t)
67 lj_lib_checktab(L, 1); 52 CHECK_tab(t)
68 return FFH_UNREACHABLE; 53 return #t
69} 54 end
55*/
70 56
71LJLIB_CF(table_maxn) 57LJLIB_CF(table_maxn)
72{ 58{
@@ -119,52 +105,67 @@ LJLIB_CF(table_insert) LJLIB_REC(.)
119 return 0; 105 return 0;
120} 106}
121 107
122LJLIB_CF(table_remove) LJLIB_REC(.) 108LJLIB_LUA(table_remove) /*
123{ 109 function(t, pos)
124 GCtab *t = lj_lib_checktab(L, 1); 110 CHECK_tab(t)
125 int32_t e = (int32_t)lj_tab_len(t); 111 local len = #t
126 int32_t pos = lj_lib_optint(L, 2, e); 112 if pos == nil then
127 if (!(1 <= pos && pos <= e)) /* Nothing to remove? */ 113 if len ~= 0 then
128 return 0; 114 local old = t[len]
129 lua_rawgeti(L, 1, pos); /* Get previous value. */ 115 t[len] = nil
130 /* NOBARRIER: This just moves existing elements around. */ 116 return old
131 for (; pos < e; pos++) { 117 end
132 cTValue *src = lj_tab_getint(t, pos+1); 118 else
133 TValue *dst = lj_tab_setint(L, t, pos); 119 CHECK_int(pos)
134 if (src) { 120 if pos >= 1 and pos <= len then
135 copyTV(L, dst, src); 121 local old = t[pos]
136 } else { 122 for i=pos+1,len do
137 setnilV(dst); 123 t[i-1] = t[i]
138 } 124 end
139 } 125 t[len] = nil
140 setnilV(lj_tab_setint(L, t, e)); /* Remove (last) value. */ 126 return old
141 return 1; /* Return previous value. */ 127 end
142} 128 end
129 end
130*/
131
132LJLIB_LUA(table_move) /*
133 function(a1, f, e, t, a2)
134 CHECK_tab(a1)
135 CHECK_int(f)
136 CHECK_int(e)
137 CHECK_int(t)
138 if a2 == nil then a2 = a1 end
139 CHECK_tab(a2)
140 if e >= f then
141 local d = t - f
142 if t > e or t <= f or a2 ~= a1 then
143 for i=f,e do a2[i+d] = a1[i] end
144 else
145 for i=e,f,-1 do a2[i+d] = a1[i] end
146 end
147 end
148 return a2
149 end
150*/
143 151
144LJLIB_CF(table_concat) 152LJLIB_CF(table_concat) LJLIB_REC(.)
145{ 153{
146 luaL_Buffer b;
147 GCtab *t = lj_lib_checktab(L, 1); 154 GCtab *t = lj_lib_checktab(L, 1);
148 GCstr *sep = lj_lib_optstr(L, 2); 155 GCstr *sep = lj_lib_optstr(L, 2);
149 MSize seplen = sep ? sep->len : 0;
150 int32_t i = lj_lib_optint(L, 3, 1); 156 int32_t i = lj_lib_optint(L, 3, 1);
151 int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ? 157 int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ?
152 lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t); 158 lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t);
153 luaL_buffinit(L, &b); 159 SBuf *sb = lj_buf_tmp_(L);
154 if (i <= e) { 160 SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e);
155 for (;;) { 161 if (LJ_UNLIKELY(!sbx)) { /* Error: bad element type. */
156 cTValue *o; 162 int32_t idx = (int32_t)(intptr_t)sb->w;
157 lua_rawgeti(L, 1, i); 163 cTValue *o = lj_tab_getint(t, idx);
158 o = L->top-1; 164 lj_err_callerv(L, LJ_ERR_TABCAT,
159 if (!(tvisstr(o) || tvisnumber(o))) 165 lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx);
160 lj_err_callerv(L, LJ_ERR_TABCAT, lj_typename(o), i);
161 luaL_addvalue(&b);
162 if (i++ == e) break;
163 if (seplen)
164 luaL_addlstring(&b, strdata(sep), seplen);
165 }
166 } 166 }
167 luaL_pushresult(&b); 167 setstrV(L, L->top-1, lj_buf_str(L, sbx));
168 lj_gc_check(L);
168 return 1; 169 return 1;
169} 170}
170 171
@@ -284,6 +285,30 @@ LJLIB_CF(table_pack)
284} 285}
285#endif 286#endif
286 287
288LJLIB_NOREG LJLIB_CF(table_new) LJLIB_REC(.)
289{
290 int32_t a = lj_lib_checkint(L, 1);
291 int32_t h = lj_lib_checkint(L, 2);
292 lua_createtable(L, a, h);
293 return 1;
294}
295
296LJLIB_NOREG LJLIB_CF(table_clear) LJLIB_REC(.)
297{
298 lj_tab_clear(lj_lib_checktab(L, 1));
299 return 0;
300}
301
302static int luaopen_table_new(lua_State *L)
303{
304 return lj_lib_postreg(L, lj_cf_table_new, FF_table_new, "new");
305}
306
307static int luaopen_table_clear(lua_State *L)
308{
309 return lj_lib_postreg(L, lj_cf_table_clear, FF_table_clear, "clear");
310}
311
287/* ------------------------------------------------------------------------ */ 312/* ------------------------------------------------------------------------ */
288 313
289#include "lj_libdef.h" 314#include "lj_libdef.h"
@@ -295,6 +320,8 @@ LUALIB_API int luaopen_table(lua_State *L)
295 lua_getglobal(L, "unpack"); 320 lua_getglobal(L, "unpack");
296 lua_setfield(L, -2, "unpack"); 321 lua_setfield(L, -2, "unpack");
297#endif 322#endif
323 lj_lib_prereg(L, LUA_TABLIBNAME ".new", luaopen_table_new, tabV(L->top-1));
324 lj_lib_prereg(L, LUA_TABLIBNAME ".clear", luaopen_table_clear, tabV(L->top-1));
298 return 1; 325 return 1;
299} 326}
300 327
diff --git a/src/lj.supp b/src/lj.supp
deleted file mode 100644
index 217f7c89..00000000
--- a/src/lj.supp
+++ /dev/null
@@ -1,41 +0,0 @@
1# Valgrind suppression file for LuaJIT 2.0.
2{
3 Optimized string compare
4 Memcheck:Addr4
5 fun:lj_str_cmp
6}
7{
8 Optimized string compare
9 Memcheck:Addr1
10 fun:lj_str_cmp
11}
12{
13 Optimized string compare
14 Memcheck:Addr4
15 fun:lj_str_new
16}
17{
18 Optimized string compare
19 Memcheck:Addr1
20 fun:lj_str_new
21}
22{
23 Optimized string compare
24 Memcheck:Cond
25 fun:lj_str_new
26}
27{
28 Optimized string compare
29 Memcheck:Addr4
30 fun:str_fastcmp
31}
32{
33 Optimized string compare
34 Memcheck:Addr1
35 fun:str_fastcmp
36}
37{
38 Optimized string compare
39 Memcheck:Cond
40 fun:str_fastcmp
41}
diff --git a/src/lj_alloc.c b/src/lj_alloc.c
index 0c0c0c4f..cb704f7b 100644
--- a/src/lj_alloc.c
+++ b/src/lj_alloc.c
@@ -31,6 +31,7 @@
31#include "lj_def.h" 31#include "lj_def.h"
32#include "lj_arch.h" 32#include "lj_arch.h"
33#include "lj_alloc.h" 33#include "lj_alloc.h"
34#include "lj_prng.h"
34 35
35#ifndef LUAJIT_USE_SYSMALLOC 36#ifndef LUAJIT_USE_SYSMALLOC
36 37
@@ -72,15 +73,58 @@
72 73
73#define IS_DIRECT_BIT (SIZE_T_ONE) 74#define IS_DIRECT_BIT (SIZE_T_ONE)
74 75
76
77/* Determine system-specific block allocation method. */
75#if LJ_TARGET_WINDOWS 78#if LJ_TARGET_WINDOWS
76 79
77#define WIN32_LEAN_AND_MEAN 80#define WIN32_LEAN_AND_MEAN
78#include <windows.h> 81#include <windows.h>
79 82
83#define LJ_ALLOC_VIRTUALALLOC 1
84
85#if LJ_64 && !LJ_GC64
86#define LJ_ALLOC_NTAVM 1
87#endif
88
89#else
90
91#include <errno.h>
92/* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */
93#include <sys/mman.h>
94
95#define LJ_ALLOC_MMAP 1
96
80#if LJ_64 97#if LJ_64
81 98
99#define LJ_ALLOC_MMAP_PROBE 1
100
101#if LJ_GC64
102#define LJ_ALLOC_MBITS 47 /* 128 TB in LJ_GC64 mode. */
103#elif LJ_TARGET_X64 && LJ_HASJIT
104/* Due to limitations in the x64 compiler backend. */
105#define LJ_ALLOC_MBITS 31 /* 2 GB on x64 with !LJ_GC64. */
106#else
107#define LJ_ALLOC_MBITS 32 /* 4 GB on other archs with !LJ_GC64. */
108#endif
109
110#endif
111
112#if LJ_64 && !LJ_GC64 && defined(MAP_32BIT)
113#define LJ_ALLOC_MMAP32 1
114#endif
115
116#if LJ_TARGET_LINUX
117#define LJ_ALLOC_MREMAP 1
118#endif
119
120#endif
121
122
123#if LJ_ALLOC_VIRTUALALLOC
124
125#if LJ_ALLOC_NTAVM
82/* Undocumented, but hey, that's what we all love so much about Windows. */ 126/* Undocumented, but hey, that's what we all love so much about Windows. */
83typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits, 127typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG_PTR zbits,
84 size_t *size, ULONG alloctype, ULONG prot); 128 size_t *size, ULONG alloctype, ULONG prot);
85static PNTAVM ntavm; 129static PNTAVM ntavm;
86 130
@@ -89,14 +133,15 @@ static PNTAVM ntavm;
89*/ 133*/
90#define NTAVM_ZEROBITS 1 134#define NTAVM_ZEROBITS 1
91 135
92static void INIT_MMAP(void) 136static void init_mmap(void)
93{ 137{
94 ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"), 138 ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"),
95 "NtAllocateVirtualMemory"); 139 "NtAllocateVirtualMemory");
96} 140}
141#define INIT_MMAP() init_mmap()
97 142
98/* Win64 32 bit MMAP via NtAllocateVirtualMemory. */ 143/* Win64 32 bit MMAP via NtAllocateVirtualMemory. */
99static LJ_AINLINE void *CALL_MMAP(size_t size) 144static void *mmap_plain(size_t size)
100{ 145{
101 DWORD olderr = GetLastError(); 146 DWORD olderr = GetLastError();
102 void *ptr = NULL; 147 void *ptr = NULL;
@@ -107,7 +152,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
107} 152}
108 153
109/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ 154/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
110static LJ_AINLINE void *DIRECT_MMAP(size_t size) 155static void *direct_mmap(size_t size)
111{ 156{
112 DWORD olderr = GetLastError(); 157 DWORD olderr = GetLastError();
113 void *ptr = NULL; 158 void *ptr = NULL;
@@ -119,31 +164,32 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size)
119 164
120#else 165#else
121 166
122#define INIT_MMAP() ((void)0)
123
124/* Win32 MMAP via VirtualAlloc */ 167/* Win32 MMAP via VirtualAlloc */
125static LJ_AINLINE void *CALL_MMAP(size_t size) 168static void *mmap_plain(size_t size)
126{ 169{
127 DWORD olderr = GetLastError(); 170 DWORD olderr = GetLastError();
128 void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); 171 void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
129 SetLastError(olderr); 172 SetLastError(olderr);
130 return ptr ? ptr : MFAIL; 173 return ptr ? ptr : MFAIL;
131} 174}
132 175
133/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ 176/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
134static LJ_AINLINE void *DIRECT_MMAP(size_t size) 177static void *direct_mmap(size_t size)
135{ 178{
136 DWORD olderr = GetLastError(); 179 DWORD olderr = GetLastError();
137 void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, 180 void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
138 PAGE_READWRITE); 181 PAGE_READWRITE);
139 SetLastError(olderr); 182 SetLastError(olderr);
140 return ptr ? ptr : MFAIL; 183 return ptr ? ptr : MFAIL;
141} 184}
142 185
143#endif 186#endif
144 187
188#define CALL_MMAP(prng, size) mmap_plain(size)
189#define DIRECT_MMAP(prng, size) direct_mmap(size)
190
145/* This function supports releasing coalesed segments */ 191/* This function supports releasing coalesed segments */
146static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) 192static int CALL_MUNMAP(void *ptr, size_t size)
147{ 193{
148 DWORD olderr = GetLastError(); 194 DWORD olderr = GetLastError();
149 MEMORY_BASIC_INFORMATION minfo; 195 MEMORY_BASIC_INFORMATION minfo;
@@ -163,10 +209,7 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
163 return 0; 209 return 0;
164} 210}
165 211
166#else 212#elif LJ_ALLOC_MMAP
167
168#include <errno.h>
169#include <sys/mman.h>
170 213
171#define MMAP_PROT (PROT_READ|PROT_WRITE) 214#define MMAP_PROT (PROT_READ|PROT_WRITE)
172#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) 215#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
@@ -174,105 +217,134 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
174#endif 217#endif
175#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) 218#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
176 219
177#if LJ_64 220#if LJ_ALLOC_MMAP_PROBE
178/* 64 bit mode needs special support for allocating memory in the lower 2GB. */
179 221
180#if defined(MAP_32BIT) 222#ifdef MAP_TRYFIXED
181 223#define MMAP_FLAGS_PROBE (MMAP_FLAGS|MAP_TRYFIXED)
182#if defined(__sun__)
183#define MMAP_REGION_START ((uintptr_t)0x1000)
184#else 224#else
185/* Actually this only gives us max. 1GB in current Linux kernels. */ 225#define MMAP_FLAGS_PROBE MMAP_FLAGS
186#define MMAP_REGION_START ((uintptr_t)0)
187#endif 226#endif
188 227
189static LJ_AINLINE void *CALL_MMAP(size_t size) 228#define LJ_ALLOC_MMAP_PROBE_MAX 30
229#define LJ_ALLOC_MMAP_PROBE_LINEAR 5
230
231#define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000)
232
233static void *mmap_probe(PRNGState *rs, size_t size)
190{ 234{
235 /* Hint for next allocation. Doesn't need to be thread-safe. */
236 static uintptr_t hint_addr = 0;
191 int olderr = errno; 237 int olderr = errno;
192 void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0); 238 int retry;
239 for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) {
240 void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0);
241 uintptr_t addr = (uintptr_t)p;
242 if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER &&
243 ((addr + size) >> LJ_ALLOC_MBITS) == 0) {
244 /* We got a suitable address. Bump the hint address. */
245 hint_addr = addr + size;
246 errno = olderr;
247 return p;
248 }
249 if (p != MFAIL) {
250 munmap(p, size);
251 } else if (errno == ENOMEM) {
252 return MFAIL;
253 }
254 if (hint_addr) {
255 /* First, try linear probing. */
256 if (retry < LJ_ALLOC_MMAP_PROBE_LINEAR) {
257 hint_addr += 0x1000000;
258 if (((hint_addr + size) >> LJ_ALLOC_MBITS) != 0)
259 hint_addr = 0;
260 continue;
261 } else if (retry == LJ_ALLOC_MMAP_PROBE_LINEAR) {
262 /* Next, try a no-hint probe to get back an ASLR address. */
263 hint_addr = 0;
264 continue;
265 }
266 }
267 /* Finally, try pseudo-random probing. */
268 do {
269 hint_addr = lj_prng_u64(rs) & (((uintptr_t)1<<LJ_ALLOC_MBITS)-LJ_PAGESIZE);
270 } while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER);
271 }
193 errno = olderr; 272 errno = olderr;
194 return ptr; 273 return MFAIL;
195} 274}
196 275
197#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || LJ_TARGET_CYGWIN 276#endif
277
278#if LJ_ALLOC_MMAP32
198 279
199/* OSX and FreeBSD mmap() use a naive first-fit linear search. 280#if LJ_TARGET_SOLARIS
200** That's perfect for us. Except that -pagezero_size must be set for OSX, 281#define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000)
201** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs
202** to be reduced to 250MB on FreeBSD.
203*/
204#if LJ_TARGET_OSX || defined(__DragonFly__)
205#define MMAP_REGION_START ((uintptr_t)0x10000)
206#elif LJ_TARGET_PS4
207#define MMAP_REGION_START ((uintptr_t)0x4000)
208#else 282#else
209#define MMAP_REGION_START ((uintptr_t)0x10000000) 283#define LJ_ALLOC_MMAP32_START ((uintptr_t)0)
210#endif 284#endif
211#define MMAP_REGION_END ((uintptr_t)0x80000000)
212 285
213#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 286#if LJ_ALLOC_MMAP_PROBE
214#include <sys/resource.h> 287static void *mmap_map32(PRNGState *rs, size_t size)
288#else
289static void *mmap_map32(size_t size)
215#endif 290#endif
216
217static LJ_AINLINE void *CALL_MMAP(size_t size)
218{ 291{
219 int olderr = errno; 292#if LJ_ALLOC_MMAP_PROBE
220 /* Hint for next allocation. Doesn't need to be thread-safe. */ 293 static int fallback = 0;
221 static uintptr_t alloc_hint = MMAP_REGION_START; 294 if (fallback)
222 int retry = 0; 295 return mmap_probe(rs, size);
223#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
224 static int rlimit_modified = 0;
225 if (LJ_UNLIKELY(rlimit_modified == 0)) {
226 struct rlimit rlim;
227 rlim.rlim_cur = rlim.rlim_max = MMAP_REGION_START;
228 setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail below. */
229 rlimit_modified = 1;
230 }
231#endif 296#endif
232 for (;;) { 297 {
233 void *p = mmap((void *)alloc_hint, size, MMAP_PROT, MMAP_FLAGS, -1, 0); 298 int olderr = errno;
234 if ((uintptr_t)p >= MMAP_REGION_START && 299 void *ptr = mmap((void *)LJ_ALLOC_MMAP32_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
235 (uintptr_t)p + size < MMAP_REGION_END) { 300 errno = olderr;
236 alloc_hint = (uintptr_t)p + size; 301 /* This only allows 1GB on Linux. So fallback to probing to get 2GB. */
237 errno = olderr; 302#if LJ_ALLOC_MMAP_PROBE
238 return p; 303 if (ptr == MFAIL) {
304 fallback = 1;
305 return mmap_probe(rs, size);
239 } 306 }
240 if (p != CMFAIL) munmap(p, size);
241#if defined(__sun__) || defined(__DragonFly__)
242 alloc_hint += 0x1000000; /* Need near-exhaustive linear scan. */
243 if (alloc_hint + size < MMAP_REGION_END) continue;
244#endif 307#endif
245 if (retry) break; 308 return ptr;
246 retry = 1;
247 alloc_hint = MMAP_REGION_START;
248 } 309 }
249 errno = olderr;
250 return CMFAIL;
251} 310}
252 311
253#else
254
255#error "NYI: need an equivalent of MAP_32BIT for this 64 bit OS"
256
257#endif 312#endif
258 313
314#if LJ_ALLOC_MMAP32
315#if LJ_ALLOC_MMAP_PROBE
316#define CALL_MMAP(prng, size) mmap_map32(prng, size)
259#else 317#else
260 318#define CALL_MMAP(prng, size) mmap_map32(size)
261/* 32 bit mode is easy. */ 319#endif
262static LJ_AINLINE void *CALL_MMAP(size_t size) 320#elif LJ_ALLOC_MMAP_PROBE
321#define CALL_MMAP(prng, size) mmap_probe(prng, size)
322#else
323static void *mmap_plain(size_t size)
263{ 324{
264 int olderr = errno; 325 int olderr = errno;
265 void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0); 326 void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
266 errno = olderr; 327 errno = olderr;
267 return ptr; 328 return ptr;
268} 329}
269 330#define CALL_MMAP(prng, size) mmap_plain(size)
270#endif 331#endif
271 332
272#define INIT_MMAP() ((void)0) 333#if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__ < 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 && !LJ_TARGET_PS5
273#define DIRECT_MMAP(s) CALL_MMAP(s) 334
335#include <sys/resource.h>
274 336
275static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) 337static void init_mmap(void)
338{
339 struct rlimit rlim;
340 rlim.rlim_cur = rlim.rlim_max = 0x10000;
341 setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail later. */
342}
343#define INIT_MMAP() init_mmap()
344
345#endif
346
347static int CALL_MUNMAP(void *ptr, size_t size)
276{ 348{
277 int olderr = errno; 349 int olderr = errno;
278 int ret = munmap(ptr, size); 350 int ret = munmap(ptr, size);
@@ -280,10 +352,9 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
280 return ret; 352 return ret;
281} 353}
282 354
283#if LJ_TARGET_LINUX 355#if LJ_ALLOC_MREMAP
284/* Need to define _GNU_SOURCE to get the mremap prototype. */ 356/* Need to define _GNU_SOURCE to get the mremap prototype. */
285static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, 357static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags)
286 int flags)
287{ 358{
288 int olderr = errno; 359 int olderr = errno;
289 ptr = mremap(ptr, osz, nsz, flags); 360 ptr = mremap(ptr, osz, nsz, flags);
@@ -294,7 +365,7 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
294#define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv)) 365#define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
295#define CALL_MREMAP_NOMOVE 0 366#define CALL_MREMAP_NOMOVE 0
296#define CALL_MREMAP_MAYMOVE 1 367#define CALL_MREMAP_MAYMOVE 1
297#if LJ_64 368#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64)
298#define CALL_MREMAP_MV CALL_MREMAP_NOMOVE 369#define CALL_MREMAP_MV CALL_MREMAP_NOMOVE
299#else 370#else
300#define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE 371#define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE
@@ -303,6 +374,15 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
303 374
304#endif 375#endif
305 376
377
378#ifndef INIT_MMAP
379#define INIT_MMAP() ((void)0)
380#endif
381
382#ifndef DIRECT_MMAP
383#define DIRECT_MMAP(prng, s) CALL_MMAP(prng, s)
384#endif
385
306#ifndef CALL_MREMAP 386#ifndef CALL_MREMAP
307#define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL) 387#define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL)
308#endif 388#endif
@@ -459,6 +539,7 @@ struct malloc_state {
459 mchunkptr smallbins[(NSMALLBINS+1)*2]; 539 mchunkptr smallbins[(NSMALLBINS+1)*2];
460 tbinptr treebins[NTREEBINS]; 540 tbinptr treebins[NTREEBINS];
461 msegment seg; 541 msegment seg;
542 PRNGState *prng;
462}; 543};
463 544
464typedef struct malloc_state *mstate; 545typedef struct malloc_state *mstate;
@@ -516,7 +597,7 @@ static int has_segment_link(mstate m, msegmentptr ss)
516 noncontiguous segments are added. 597 noncontiguous segments are added.
517*/ 598*/
518#define TOP_FOOT_SIZE\ 599#define TOP_FOOT_SIZE\
519 (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) 600 (align_offset(TWO_SIZE_T_SIZES)+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
520 601
521/* ---------------------------- Indexing Bins ---------------------------- */ 602/* ---------------------------- Indexing Bins ---------------------------- */
522 603
@@ -741,11 +822,11 @@ static int has_segment_link(mstate m, msegmentptr ss)
741 822
742/* ----------------------- Direct-mmapping chunks ----------------------- */ 823/* ----------------------- Direct-mmapping chunks ----------------------- */
743 824
744static void *direct_alloc(size_t nb) 825static void *direct_alloc(mstate m, size_t nb)
745{ 826{
746 size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); 827 size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
747 if (LJ_LIKELY(mmsize > nb)) { /* Check for wrap around 0 */ 828 if (LJ_LIKELY(mmsize > nb)) { /* Check for wrap around 0 */
748 char *mm = (char *)(DIRECT_MMAP(mmsize)); 829 char *mm = (char *)(DIRECT_MMAP(m->prng, mmsize));
749 if (mm != CMFAIL) { 830 if (mm != CMFAIL) {
750 size_t offset = align_offset(chunk2mem(mm)); 831 size_t offset = align_offset(chunk2mem(mm));
751 size_t psize = mmsize - offset - DIRECT_FOOT_PAD; 832 size_t psize = mmsize - offset - DIRECT_FOOT_PAD;
@@ -757,6 +838,7 @@ static void *direct_alloc(size_t nb)
757 return chunk2mem(p); 838 return chunk2mem(p);
758 } 839 }
759 } 840 }
841 UNUSED(m);
760 return NULL; 842 return NULL;
761} 843}
762 844
@@ -905,7 +987,7 @@ static void *alloc_sys(mstate m, size_t nb)
905 987
906 /* Directly map large chunks */ 988 /* Directly map large chunks */
907 if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) { 989 if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) {
908 void *mem = direct_alloc(nb); 990 void *mem = direct_alloc(m, nb);
909 if (mem != 0) 991 if (mem != 0)
910 return mem; 992 return mem;
911 } 993 }
@@ -914,7 +996,7 @@ static void *alloc_sys(mstate m, size_t nb)
914 size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE; 996 size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE;
915 size_t rsize = granularity_align(req); 997 size_t rsize = granularity_align(req);
916 if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */ 998 if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */
917 char *mp = (char *)(CALL_MMAP(rsize)); 999 char *mp = (char *)(CALL_MMAP(m->prng, rsize));
918 if (mp != CMFAIL) { 1000 if (mp != CMFAIL) {
919 tbase = mp; 1001 tbase = mp;
920 tsize = rsize; 1002 tsize = rsize;
@@ -1141,12 +1223,13 @@ static void *tmalloc_small(mstate m, size_t nb)
1141 1223
1142/* ----------------------------------------------------------------------- */ 1224/* ----------------------------------------------------------------------- */
1143 1225
1144void *lj_alloc_create(void) 1226void *lj_alloc_create(PRNGState *rs)
1145{ 1227{
1146 size_t tsize = DEFAULT_GRANULARITY; 1228 size_t tsize = DEFAULT_GRANULARITY;
1147 char *tbase; 1229 char *tbase;
1148 INIT_MMAP(); 1230 INIT_MMAP();
1149 tbase = (char *)(CALL_MMAP(tsize)); 1231 UNUSED(rs);
1232 tbase = (char *)(CALL_MMAP(rs, tsize));
1150 if (tbase != CMFAIL) { 1233 if (tbase != CMFAIL) {
1151 size_t msize = pad_request(sizeof(struct malloc_state)); 1234 size_t msize = pad_request(sizeof(struct malloc_state));
1152 mchunkptr mn; 1235 mchunkptr mn;
@@ -1165,6 +1248,12 @@ void *lj_alloc_create(void)
1165 return NULL; 1248 return NULL;
1166} 1249}
1167 1250
1251void lj_alloc_setprng(void *msp, PRNGState *rs)
1252{
1253 mstate ms = (mstate)msp;
1254 ms->prng = rs;
1255}
1256
1168void lj_alloc_destroy(void *msp) 1257void lj_alloc_destroy(void *msp)
1169{ 1258{
1170 mstate ms = (mstate)msp; 1259 mstate ms = (mstate)msp;
diff --git a/src/lj_alloc.h b/src/lj_alloc.h
index f87a7cf3..669f50b7 100644
--- a/src/lj_alloc.h
+++ b/src/lj_alloc.h
@@ -9,7 +9,8 @@
9#include "lj_def.h" 9#include "lj_def.h"
10 10
11#ifndef LUAJIT_USE_SYSMALLOC 11#ifndef LUAJIT_USE_SYSMALLOC
12LJ_FUNC void *lj_alloc_create(void); 12LJ_FUNC void *lj_alloc_create(PRNGState *rs);
13LJ_FUNC void lj_alloc_setprng(void *msp, PRNGState *rs);
13LJ_FUNC void lj_alloc_destroy(void *msp); 14LJ_FUNC void lj_alloc_destroy(void *msp);
14LJ_FUNC void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize); 15LJ_FUNC void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize);
15#endif 16#endif
diff --git a/src/lj_api.c b/src/lj_api.c
index d40ade30..1ad71678 100644
--- a/src/lj_api.c
+++ b/src/lj_api.c
@@ -24,11 +24,12 @@
24#include "lj_trace.h" 24#include "lj_trace.h"
25#include "lj_vm.h" 25#include "lj_vm.h"
26#include "lj_strscan.h" 26#include "lj_strscan.h"
27#include "lj_strfmt.h"
27 28
28/* -- Common helper functions --------------------------------------------- */ 29/* -- Common helper functions --------------------------------------------- */
29 30
30#define api_checknelems(L, n) api_check(L, (n) <= (L->top - L->base)) 31#define lj_checkapi_slot(idx) \
31#define api_checkvalidindex(L, i) api_check(L, (i) != niltv(L)) 32 lj_checkapi((idx) <= (L->top - L->base), "stack slot %d out of range", (idx))
32 33
33static TValue *index2adr(lua_State *L, int idx) 34static TValue *index2adr(lua_State *L, int idx)
34{ 35{
@@ -36,7 +37,8 @@ static TValue *index2adr(lua_State *L, int idx)
36 TValue *o = L->base + (idx - 1); 37 TValue *o = L->base + (idx - 1);
37 return o < L->top ? o : niltv(L); 38 return o < L->top ? o : niltv(L);
38 } else if (idx > LUA_REGISTRYINDEX) { 39 } else if (idx > LUA_REGISTRYINDEX) {
39 api_check(L, idx != 0 && -idx <= L->top - L->base); 40 lj_checkapi(idx != 0 && -idx <= L->top - L->base,
41 "bad stack slot %d", idx);
40 return L->top + idx; 42 return L->top + idx;
41 } else if (idx == LUA_GLOBALSINDEX) { 43 } else if (idx == LUA_GLOBALSINDEX) {
42 TValue *o = &G(L)->tmptv; 44 TValue *o = &G(L)->tmptv;
@@ -46,7 +48,8 @@ static TValue *index2adr(lua_State *L, int idx)
46 return registry(L); 48 return registry(L);
47 } else { 49 } else {
48 GCfunc *fn = curr_func(L); 50 GCfunc *fn = curr_func(L);
49 api_check(L, fn->c.gct == ~LJ_TFUNC && !isluafunc(fn)); 51 lj_checkapi(fn->c.gct == ~LJ_TFUNC && !isluafunc(fn),
52 "calling frame is not a C function");
50 if (idx == LUA_ENVIRONINDEX) { 53 if (idx == LUA_ENVIRONINDEX) {
51 TValue *o = &G(L)->tmptv; 54 TValue *o = &G(L)->tmptv;
52 settabV(L, o, tabref(fn->c.env)); 55 settabV(L, o, tabref(fn->c.env));
@@ -58,13 +61,27 @@ static TValue *index2adr(lua_State *L, int idx)
58 } 61 }
59} 62}
60 63
61static TValue *stkindex2adr(lua_State *L, int idx) 64static LJ_AINLINE TValue *index2adr_check(lua_State *L, int idx)
65{
66 TValue *o = index2adr(L, idx);
67 lj_checkapi(o != niltv(L), "invalid stack slot %d", idx);
68 return o;
69}
70
71static TValue *index2adr_stack(lua_State *L, int idx)
62{ 72{
63 if (idx > 0) { 73 if (idx > 0) {
64 TValue *o = L->base + (idx - 1); 74 TValue *o = L->base + (idx - 1);
75 if (o < L->top) {
76 return o;
77 } else {
78 lj_checkapi(0, "invalid stack slot %d", idx);
79 return niltv(L);
80 }
65 return o < L->top ? o : niltv(L); 81 return o < L->top ? o : niltv(L);
66 } else { 82 } else {
67 api_check(L, idx != 0 && -idx <= L->top - L->base); 83 lj_checkapi(idx != 0 && -idx <= L->top - L->base,
84 "invalid stack slot %d", idx);
68 return L->top + idx; 85 return L->top + idx;
69 } 86 }
70} 87}
@@ -87,7 +104,12 @@ LUA_API int lua_checkstack(lua_State *L, int size)
87 if (size > LUAI_MAXCSTACK || (L->top - L->base + size) > LUAI_MAXCSTACK) { 104 if (size > LUAI_MAXCSTACK || (L->top - L->base + size) > LUAI_MAXCSTACK) {
88 return 0; /* Stack overflow. */ 105 return 0; /* Stack overflow. */
89 } else if (size > 0) { 106 } else if (size > 0) {
90 lj_state_checkstack(L, (MSize)size); 107 int avail = (int)(mref(L->maxstack, TValue) - L->top);
108 if (size > avail &&
109 lj_state_cpgrowstack(L, (MSize)(size - avail)) != LUA_OK) {
110 L->top--;
111 return 0; /* Out of memory. */
112 }
91 } 113 }
92 return 1; 114 return 1;
93} 115}
@@ -98,17 +120,24 @@ LUALIB_API void luaL_checkstack(lua_State *L, int size, const char *msg)
98 lj_err_callerv(L, LJ_ERR_STKOVM, msg); 120 lj_err_callerv(L, LJ_ERR_STKOVM, msg);
99} 121}
100 122
101LUA_API void lua_xmove(lua_State *from, lua_State *to, int n) 123LUA_API void lua_xmove(lua_State *L, lua_State *to, int n)
102{ 124{
103 TValue *f, *t; 125 TValue *f, *t;
104 if (from == to) return; 126 if (L == to) return;
105 api_checknelems(from, n); 127 lj_checkapi_slot(n);
106 api_check(from, G(from) == G(to)); 128 lj_checkapi(G(L) == G(to), "move across global states");
107 lj_state_checkstack(to, (MSize)n); 129 lj_state_checkstack(to, (MSize)n);
108 f = from->top; 130 f = L->top;
109 t = to->top = to->top + n; 131 t = to->top = to->top + n;
110 while (--n >= 0) copyTV(to, --t, --f); 132 while (--n >= 0) copyTV(to, --t, --f);
111 from->top = f; 133 L->top = f;
134}
135
136LUA_API const lua_Number *lua_version(lua_State *L)
137{
138 static const lua_Number version = LUA_VERSION_NUM;
139 UNUSED(L);
140 return &version;
112} 141}
113 142
114/* -- Stack manipulation -------------------------------------------------- */ 143/* -- Stack manipulation -------------------------------------------------- */
@@ -121,7 +150,7 @@ LUA_API int lua_gettop(lua_State *L)
121LUA_API void lua_settop(lua_State *L, int idx) 150LUA_API void lua_settop(lua_State *L, int idx)
122{ 151{
123 if (idx >= 0) { 152 if (idx >= 0) {
124 api_check(L, idx <= tvref(L->maxstack) - L->base); 153 lj_checkapi(idx <= tvref(L->maxstack) - L->base, "bad stack slot %d", idx);
125 if (L->base + idx > L->top) { 154 if (L->base + idx > L->top) {
126 if (L->base + idx >= tvref(L->maxstack)) 155 if (L->base + idx >= tvref(L->maxstack))
127 lj_state_growstack(L, (MSize)idx - (MSize)(L->top - L->base)); 156 lj_state_growstack(L, (MSize)idx - (MSize)(L->top - L->base));
@@ -130,51 +159,58 @@ LUA_API void lua_settop(lua_State *L, int idx)
130 L->top = L->base + idx; 159 L->top = L->base + idx;
131 } 160 }
132 } else { 161 } else {
133 api_check(L, -(idx+1) <= (L->top - L->base)); 162 lj_checkapi(-(idx+1) <= (L->top - L->base), "bad stack slot %d", idx);
134 L->top += idx+1; /* Shrinks top (idx < 0). */ 163 L->top += idx+1; /* Shrinks top (idx < 0). */
135 } 164 }
136} 165}
137 166
138LUA_API void lua_remove(lua_State *L, int idx) 167LUA_API void lua_remove(lua_State *L, int idx)
139{ 168{
140 TValue *p = stkindex2adr(L, idx); 169 TValue *p = index2adr_stack(L, idx);
141 api_checkvalidindex(L, p);
142 while (++p < L->top) copyTV(L, p-1, p); 170 while (++p < L->top) copyTV(L, p-1, p);
143 L->top--; 171 L->top--;
144} 172}
145 173
146LUA_API void lua_insert(lua_State *L, int idx) 174LUA_API void lua_insert(lua_State *L, int idx)
147{ 175{
148 TValue *q, *p = stkindex2adr(L, idx); 176 TValue *q, *p = index2adr_stack(L, idx);
149 api_checkvalidindex(L, p);
150 for (q = L->top; q > p; q--) copyTV(L, q, q-1); 177 for (q = L->top; q > p; q--) copyTV(L, q, q-1);
151 copyTV(L, p, L->top); 178 copyTV(L, p, L->top);
152} 179}
153 180
154LUA_API void lua_replace(lua_State *L, int idx) 181static void copy_slot(lua_State *L, TValue *f, int idx)
155{ 182{
156 api_checknelems(L, 1);
157 if (idx == LUA_GLOBALSINDEX) { 183 if (idx == LUA_GLOBALSINDEX) {
158 api_check(L, tvistab(L->top-1)); 184 lj_checkapi(tvistab(f), "stack slot %d is not a table", idx);
159 /* NOBARRIER: A thread (i.e. L) is never black. */ 185 /* NOBARRIER: A thread (i.e. L) is never black. */
160 setgcref(L->env, obj2gco(tabV(L->top-1))); 186 setgcref(L->env, obj2gco(tabV(f)));
161 } else if (idx == LUA_ENVIRONINDEX) { 187 } else if (idx == LUA_ENVIRONINDEX) {
162 GCfunc *fn = curr_func(L); 188 GCfunc *fn = curr_func(L);
163 if (fn->c.gct != ~LJ_TFUNC) 189 if (fn->c.gct != ~LJ_TFUNC)
164 lj_err_msg(L, LJ_ERR_NOENV); 190 lj_err_msg(L, LJ_ERR_NOENV);
165 api_check(L, tvistab(L->top-1)); 191 lj_checkapi(tvistab(f), "stack slot %d is not a table", idx);
166 setgcref(fn->c.env, obj2gco(tabV(L->top-1))); 192 setgcref(fn->c.env, obj2gco(tabV(f)));
167 lj_gc_barrier(L, fn, L->top-1); 193 lj_gc_barrier(L, fn, f);
168 } else { 194 } else {
169 TValue *o = index2adr(L, idx); 195 TValue *o = index2adr_check(L, idx);
170 api_checkvalidindex(L, o); 196 copyTV(L, o, f);
171 copyTV(L, o, L->top-1);
172 if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */ 197 if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */
173 lj_gc_barrier(L, curr_func(L), L->top-1); 198 lj_gc_barrier(L, curr_func(L), f);
174 } 199 }
200}
201
202LUA_API void lua_replace(lua_State *L, int idx)
203{
204 lj_checkapi_slot(1);
205 copy_slot(L, L->top - 1, idx);
175 L->top--; 206 L->top--;
176} 207}
177 208
209LUA_API void lua_copy(lua_State *L, int fromidx, int toidx)
210{
211 copy_slot(L, index2adr(L, fromidx), toidx);
212}
213
178LUA_API void lua_pushvalue(lua_State *L, int idx) 214LUA_API void lua_pushvalue(lua_State *L, int idx)
179{ 215{
180 copyTV(L, L->top, index2adr(L, idx)); 216 copyTV(L, L->top, index2adr(L, idx));
@@ -188,7 +224,7 @@ LUA_API int lua_type(lua_State *L, int idx)
188 cTValue *o = index2adr(L, idx); 224 cTValue *o = index2adr(L, idx);
189 if (tvisnumber(o)) { 225 if (tvisnumber(o)) {
190 return LUA_TNUMBER; 226 return LUA_TNUMBER;
191#if LJ_64 227#if LJ_64 && !LJ_GC64
192 } else if (tvislightud(o)) { 228 } else if (tvislightud(o)) {
193 return LUA_TLIGHTUSERDATA; 229 return LUA_TLIGHTUSERDATA;
194#endif 230#endif
@@ -201,7 +237,7 @@ LUA_API int lua_type(lua_State *L, int idx)
201#else 237#else
202 int tt = (int)(((t < 8 ? 0x98042110u : 0x75a06u) >> 4*(t&7)) & 15u); 238 int tt = (int)(((t < 8 ? 0x98042110u : 0x75a06u) >> 4*(t&7)) & 15u);
203#endif 239#endif
204 lua_assert(tt != LUA_TNIL || tvisnil(o)); 240 lj_assertL(tt != LUA_TNIL || tvisnil(o), "bad tag conversion");
205 return tt; 241 return tt;
206 } 242 }
207} 243}
@@ -268,7 +304,7 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
268 return 0; 304 return 0;
269 } else if (tvispri(o1)) { 305 } else if (tvispri(o1)) {
270 return o1 != niltv(L) && o2 != niltv(L); 306 return o1 != niltv(L) && o2 != niltv(L);
271#if LJ_64 307#if LJ_64 && !LJ_GC64
272 } else if (tvislightud(o1)) { 308 } else if (tvislightud(o1)) {
273 return o1->u64 == o2->u64; 309 return o1->u64 == o2->u64;
274#endif 310#endif
@@ -283,8 +319,8 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
283 } else { 319 } else {
284 L->top = base+2; 320 L->top = base+2;
285 lj_vm_call(L, base, 1+1); 321 lj_vm_call(L, base, 1+1);
286 L->top -= 2; 322 L->top -= 2+LJ_FR2;
287 return tvistruecond(L->top+1); 323 return tvistruecond(L->top+1+LJ_FR2);
288 } 324 }
289 } 325 }
290} 326}
@@ -306,8 +342,8 @@ LUA_API int lua_lessthan(lua_State *L, int idx1, int idx2)
306 } else { 342 } else {
307 L->top = base+2; 343 L->top = base+2;
308 lj_vm_call(L, base, 1+1); 344 lj_vm_call(L, base, 1+1);
309 L->top -= 2; 345 L->top -= 2+LJ_FR2;
310 return tvistruecond(L->top+1); 346 return tvistruecond(L->top+1+LJ_FR2);
311 } 347 }
312 } 348 }
313} 349}
@@ -324,6 +360,22 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx)
324 return 0; 360 return 0;
325} 361}
326 362
363LUA_API lua_Number lua_tonumberx(lua_State *L, int idx, int *ok)
364{
365 cTValue *o = index2adr(L, idx);
366 TValue tmp;
367 if (LJ_LIKELY(tvisnumber(o))) {
368 if (ok) *ok = 1;
369 return numberVnum(o);
370 } else if (tvisstr(o) && lj_strscan_num(strV(o), &tmp)) {
371 if (ok) *ok = 1;
372 return numV(&tmp);
373 } else {
374 if (ok) *ok = 0;
375 return 0;
376 }
377}
378
327LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx) 379LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx)
328{ 380{
329 cTValue *o = index2adr(L, idx); 381 cTValue *o = index2adr(L, idx);
@@ -361,7 +413,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
361 if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) 413 if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp)))
362 return 0; 414 return 0;
363 if (tvisint(&tmp)) 415 if (tvisint(&tmp))
364 return (lua_Integer)intV(&tmp); 416 return intV(&tmp);
365 n = numV(&tmp); 417 n = numV(&tmp);
366 } 418 }
367#if LJ_64 419#if LJ_64
@@ -371,6 +423,35 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
371#endif 423#endif
372} 424}
373 425
426LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok)
427{
428 cTValue *o = index2adr(L, idx);
429 TValue tmp;
430 lua_Number n;
431 if (LJ_LIKELY(tvisint(o))) {
432 if (ok) *ok = 1;
433 return intV(o);
434 } else if (LJ_LIKELY(tvisnum(o))) {
435 n = numV(o);
436 } else {
437 if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) {
438 if (ok) *ok = 0;
439 return 0;
440 }
441 if (tvisint(&tmp)) {
442 if (ok) *ok = 1;
443 return intV(&tmp);
444 }
445 n = numV(&tmp);
446 }
447 if (ok) *ok = 1;
448#if LJ_64
449 return (lua_Integer)n;
450#else
451 return lj_num2int(n);
452#endif
453}
454
374LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) 455LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx)
375{ 456{
376 cTValue *o = index2adr(L, idx); 457 cTValue *o = index2adr(L, idx);
@@ -434,7 +515,7 @@ LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len)
434 } else if (tvisnumber(o)) { 515 } else if (tvisnumber(o)) {
435 lj_gc_check(L); 516 lj_gc_check(L);
436 o = index2adr(L, idx); /* GC may move the stack. */ 517 o = index2adr(L, idx); /* GC may move the stack. */
437 s = lj_str_fromnumber(L, o); 518 s = lj_strfmt_number(L, o);
438 setstrV(L, o, s); 519 setstrV(L, o, s);
439 } else { 520 } else {
440 if (len != NULL) *len = 0; 521 if (len != NULL) *len = 0;
@@ -453,7 +534,7 @@ LUALIB_API const char *luaL_checklstring(lua_State *L, int idx, size_t *len)
453 } else if (tvisnumber(o)) { 534 } else if (tvisnumber(o)) {
454 lj_gc_check(L); 535 lj_gc_check(L);
455 o = index2adr(L, idx); /* GC may move the stack. */ 536 o = index2adr(L, idx); /* GC may move the stack. */
456 s = lj_str_fromnumber(L, o); 537 s = lj_strfmt_number(L, o);
457 setstrV(L, o, s); 538 setstrV(L, o, s);
458 } else { 539 } else {
459 lj_err_argt(L, idx, LUA_TSTRING); 540 lj_err_argt(L, idx, LUA_TSTRING);
@@ -475,7 +556,7 @@ LUALIB_API const char *luaL_optlstring(lua_State *L, int idx,
475 } else if (tvisnumber(o)) { 556 } else if (tvisnumber(o)) {
476 lj_gc_check(L); 557 lj_gc_check(L);
477 o = index2adr(L, idx); /* GC may move the stack. */ 558 o = index2adr(L, idx); /* GC may move the stack. */
478 s = lj_str_fromnumber(L, o); 559 s = lj_strfmt_number(L, o);
479 setstrV(L, o, s); 560 setstrV(L, o, s);
480 } else { 561 } else {
481 lj_err_argt(L, idx, LUA_TSTRING); 562 lj_err_argt(L, idx, LUA_TSTRING);
@@ -507,7 +588,7 @@ LUA_API size_t lua_objlen(lua_State *L, int idx)
507 } else if (tvisudata(o)) { 588 } else if (tvisudata(o)) {
508 return udataV(o)->len; 589 return udataV(o)->len;
509 } else if (tvisnumber(o)) { 590 } else if (tvisnumber(o)) {
510 GCstr *s = lj_str_fromnumber(L, o); 591 GCstr *s = lj_strfmt_number(L, o);
511 setstrV(L, o, s); 592 setstrV(L, o, s);
512 return s->len; 593 return s->len;
513 } else { 594 } else {
@@ -532,7 +613,7 @@ LUA_API void *lua_touserdata(lua_State *L, int idx)
532 if (tvisudata(o)) 613 if (tvisudata(o))
533 return uddata(udataV(o)); 614 return uddata(udataV(o));
534 else if (tvislightud(o)) 615 else if (tvislightud(o))
535 return lightudV(o); 616 return lightudV(G(L), o);
536 else 617 else
537 return NULL; 618 return NULL;
538} 619}
@@ -545,17 +626,7 @@ LUA_API lua_State *lua_tothread(lua_State *L, int idx)
545 626
546LUA_API const void *lua_topointer(lua_State *L, int idx) 627LUA_API const void *lua_topointer(lua_State *L, int idx)
547{ 628{
548 cTValue *o = index2adr(L, idx); 629 return lj_obj_ptr(G(L), index2adr(L, idx));
549 if (tvisudata(o))
550 return uddata(udataV(o));
551 else if (tvislightud(o))
552 return lightudV(o);
553 else if (tviscdata(o))
554 return cdataptr(cdataV(o));
555 else if (tvisgcv(o))
556 return gcV(o);
557 else
558 return NULL;
559} 630}
560 631
561/* -- Stack setters (object creation) ------------------------------------- */ 632/* -- Stack setters (object creation) ------------------------------------- */
@@ -606,7 +677,7 @@ LUA_API const char *lua_pushvfstring(lua_State *L, const char *fmt,
606 va_list argp) 677 va_list argp)
607{ 678{
608 lj_gc_check(L); 679 lj_gc_check(L);
609 return lj_str_pushvf(L, fmt, argp); 680 return lj_strfmt_pushvf(L, fmt, argp);
610} 681}
611 682
612LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...) 683LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
@@ -615,7 +686,7 @@ LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
615 va_list argp; 686 va_list argp;
616 lj_gc_check(L); 687 lj_gc_check(L);
617 va_start(argp, fmt); 688 va_start(argp, fmt);
618 ret = lj_str_pushvf(L, fmt, argp); 689 ret = lj_strfmt_pushvf(L, fmt, argp);
619 va_end(argp); 690 va_end(argp);
620 return ret; 691 return ret;
621} 692}
@@ -624,14 +695,14 @@ LUA_API void lua_pushcclosure(lua_State *L, lua_CFunction f, int n)
624{ 695{
625 GCfunc *fn; 696 GCfunc *fn;
626 lj_gc_check(L); 697 lj_gc_check(L);
627 api_checknelems(L, n); 698 lj_checkapi_slot(n);
628 fn = lj_func_newC(L, (MSize)n, getcurrenv(L)); 699 fn = lj_func_newC(L, (MSize)n, getcurrenv(L));
629 fn->c.f = f; 700 fn->c.f = f;
630 L->top -= n; 701 L->top -= n;
631 while (n--) 702 while (n--)
632 copyTV(L, &fn->c.upvalue[n], L->top+n); 703 copyTV(L, &fn->c.upvalue[n], L->top+n);
633 setfuncV(L, L->top, fn); 704 setfuncV(L, L->top, fn);
634 lua_assert(iswhite(obj2gco(fn))); 705 lj_assertL(iswhite(obj2gco(fn)), "new GC object is not white");
635 incr_top(L); 706 incr_top(L);
636} 707}
637 708
@@ -643,16 +714,17 @@ LUA_API void lua_pushboolean(lua_State *L, int b)
643 714
644LUA_API void lua_pushlightuserdata(lua_State *L, void *p) 715LUA_API void lua_pushlightuserdata(lua_State *L, void *p)
645{ 716{
646 setlightudV(L->top, checklightudptr(L, p)); 717#if LJ_64
718 p = lj_lightud_intern(L, p);
719#endif
720 setrawlightudV(L->top, p);
647 incr_top(L); 721 incr_top(L);
648} 722}
649 723
650LUA_API void lua_createtable(lua_State *L, int narray, int nrec) 724LUA_API void lua_createtable(lua_State *L, int narray, int nrec)
651{ 725{
652 GCtab *t;
653 lj_gc_check(L); 726 lj_gc_check(L);
654 t = lj_tab_new(L, (uint32_t)(narray > 0 ? narray+1 : 0), hsize2hbits(nrec)); 727 settabV(L, L->top, lj_tab_new_ah(L, narray, nrec));
655 settabV(L, L->top, t);
656 incr_top(L); 728 incr_top(L);
657} 729}
658 730
@@ -703,7 +775,7 @@ LUA_API void *lua_newuserdata(lua_State *L, size_t size)
703 775
704LUA_API void lua_concat(lua_State *L, int n) 776LUA_API void lua_concat(lua_State *L, int n)
705{ 777{
706 api_checknelems(L, n); 778 lj_checkapi_slot(n);
707 if (n >= 2) { 779 if (n >= 2) {
708 n--; 780 n--;
709 do { 781 do {
@@ -712,11 +784,11 @@ LUA_API void lua_concat(lua_State *L, int n)
712 L->top -= n; 784 L->top -= n;
713 break; 785 break;
714 } 786 }
715 n -= (int)(L->top - top); 787 n -= (int)(L->top - (top - 2*LJ_FR2));
716 L->top = top+2; 788 L->top = top+2;
717 lj_vm_call(L, top, 1+1); 789 lj_vm_call(L, top, 1+1);
718 L->top--; 790 L->top -= 1+LJ_FR2;
719 copyTV(L, L->top-1, L->top); 791 copyTV(L, L->top-1, L->top+LJ_FR2);
720 } while (--n > 0); 792 } while (--n > 0);
721 } else if (n == 0) { /* Push empty string. */ 793 } else if (n == 0) { /* Push empty string. */
722 setstrV(L, L->top, &G(L)->strempty); 794 setstrV(L, L->top, &G(L)->strempty);
@@ -729,30 +801,28 @@ LUA_API void lua_concat(lua_State *L, int n)
729 801
730LUA_API void lua_gettable(lua_State *L, int idx) 802LUA_API void lua_gettable(lua_State *L, int idx)
731{ 803{
732 cTValue *v, *t = index2adr(L, idx); 804 cTValue *t = index2adr_check(L, idx);
733 api_checkvalidindex(L, t); 805 cTValue *v = lj_meta_tget(L, t, L->top-1);
734 v = lj_meta_tget(L, t, L->top-1);
735 if (v == NULL) { 806 if (v == NULL) {
736 L->top += 2; 807 L->top += 2;
737 lj_vm_call(L, L->top-2, 1+1); 808 lj_vm_call(L, L->top-2, 1+1);
738 L->top -= 2; 809 L->top -= 2+LJ_FR2;
739 v = L->top+1; 810 v = L->top+1+LJ_FR2;
740 } 811 }
741 copyTV(L, L->top-1, v); 812 copyTV(L, L->top-1, v);
742} 813}
743 814
744LUA_API void lua_getfield(lua_State *L, int idx, const char *k) 815LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
745{ 816{
746 cTValue *v, *t = index2adr(L, idx); 817 cTValue *v, *t = index2adr_check(L, idx);
747 TValue key; 818 TValue key;
748 api_checkvalidindex(L, t);
749 setstrV(L, &key, lj_str_newz(L, k)); 819 setstrV(L, &key, lj_str_newz(L, k));
750 v = lj_meta_tget(L, t, &key); 820 v = lj_meta_tget(L, t, &key);
751 if (v == NULL) { 821 if (v == NULL) {
752 L->top += 2; 822 L->top += 2;
753 lj_vm_call(L, L->top-2, 1+1); 823 lj_vm_call(L, L->top-2, 1+1);
754 L->top -= 2; 824 L->top -= 2+LJ_FR2;
755 v = L->top+1; 825 v = L->top+1+LJ_FR2;
756 } 826 }
757 copyTV(L, L->top, v); 827 copyTV(L, L->top, v);
758 incr_top(L); 828 incr_top(L);
@@ -761,14 +831,14 @@ LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
761LUA_API void lua_rawget(lua_State *L, int idx) 831LUA_API void lua_rawget(lua_State *L, int idx)
762{ 832{
763 cTValue *t = index2adr(L, idx); 833 cTValue *t = index2adr(L, idx);
764 api_check(L, tvistab(t)); 834 lj_checkapi(tvistab(t), "stack slot %d is not a table", idx);
765 copyTV(L, L->top-1, lj_tab_get(L, tabV(t), L->top-1)); 835 copyTV(L, L->top-1, lj_tab_get(L, tabV(t), L->top-1));
766} 836}
767 837
768LUA_API void lua_rawgeti(lua_State *L, int idx, int n) 838LUA_API void lua_rawgeti(lua_State *L, int idx, int n)
769{ 839{
770 cTValue *v, *t = index2adr(L, idx); 840 cTValue *v, *t = index2adr(L, idx);
771 api_check(L, tvistab(t)); 841 lj_checkapi(tvistab(t), "stack slot %d is not a table", idx);
772 v = lj_tab_getint(tabV(t), n); 842 v = lj_tab_getint(tabV(t), n);
773 if (v) { 843 if (v) {
774 copyTV(L, L->top, v); 844 copyTV(L, L->top, v);
@@ -810,8 +880,7 @@ LUALIB_API int luaL_getmetafield(lua_State *L, int idx, const char *field)
810 880
811LUA_API void lua_getfenv(lua_State *L, int idx) 881LUA_API void lua_getfenv(lua_State *L, int idx)
812{ 882{
813 cTValue *o = index2adr(L, idx); 883 cTValue *o = index2adr_check(L, idx);
814 api_checkvalidindex(L, o);
815 if (tvisfunc(o)) { 884 if (tvisfunc(o)) {
816 settabV(L, L->top, tabref(funcV(o)->c.env)); 885 settabV(L, L->top, tabref(funcV(o)->c.env));
817 } else if (tvisudata(o)) { 886 } else if (tvisudata(o)) {
@@ -828,12 +897,14 @@ LUA_API int lua_next(lua_State *L, int idx)
828{ 897{
829 cTValue *t = index2adr(L, idx); 898 cTValue *t = index2adr(L, idx);
830 int more; 899 int more;
831 api_check(L, tvistab(t)); 900 lj_checkapi(tvistab(t), "stack slot %d is not a table", idx);
832 more = lj_tab_next(L, tabV(t), L->top-1); 901 more = lj_tab_next(tabV(t), L->top-1, L->top-1);
833 if (more) { 902 if (more > 0) {
834 incr_top(L); /* Return new key and value slot. */ 903 incr_top(L); /* Return new key and value slot. */
835 } else { /* End of traversal. */ 904 } else if (!more) { /* End of traversal. */
836 L->top--; /* Remove key slot. */ 905 L->top--; /* Remove key slot. */
906 } else {
907 lj_err_msg(L, LJ_ERR_NEXTIDX);
837 } 908 }
838 return more; 909 return more;
839} 910}
@@ -854,7 +925,7 @@ LUA_API void *lua_upvalueid(lua_State *L, int idx, int n)
854{ 925{
855 GCfunc *fn = funcV(index2adr(L, idx)); 926 GCfunc *fn = funcV(index2adr(L, idx));
856 n--; 927 n--;
857 api_check(L, (uint32_t)n < fn->l.nupvalues); 928 lj_checkapi((uint32_t)n < fn->l.nupvalues, "bad upvalue %d", n);
858 return isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) : 929 return isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) :
859 (void *)&fn->c.upvalue[n]; 930 (void *)&fn->c.upvalue[n];
860} 931}
@@ -864,13 +935,15 @@ LUA_API void lua_upvaluejoin(lua_State *L, int idx1, int n1, int idx2, int n2)
864 GCfunc *fn1 = funcV(index2adr(L, idx1)); 935 GCfunc *fn1 = funcV(index2adr(L, idx1));
865 GCfunc *fn2 = funcV(index2adr(L, idx2)); 936 GCfunc *fn2 = funcV(index2adr(L, idx2));
866 n1--; n2--; 937 n1--; n2--;
867 api_check(L, isluafunc(fn1) && (uint32_t)n1 < fn1->l.nupvalues); 938 lj_checkapi(isluafunc(fn1), "stack slot %d is not a Lua function", idx1);
868 api_check(L, isluafunc(fn2) && (uint32_t)n2 < fn2->l.nupvalues); 939 lj_checkapi(isluafunc(fn2), "stack slot %d is not a Lua function", idx2);
940 lj_checkapi((uint32_t)n1 < fn1->l.nupvalues, "bad upvalue %d", n1+1);
941 lj_checkapi((uint32_t)n2 < fn2->l.nupvalues, "bad upvalue %d", n2+1);
869 setgcrefr(fn1->l.uvptr[n1], fn2->l.uvptr[n2]); 942 setgcrefr(fn1->l.uvptr[n1], fn2->l.uvptr[n2]);
870 lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1])); 943 lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1]));
871} 944}
872 945
873LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) 946LUALIB_API void *luaL_testudata(lua_State *L, int idx, const char *tname)
874{ 947{
875 cTValue *o = index2adr(L, idx); 948 cTValue *o = index2adr(L, idx);
876 if (tvisudata(o)) { 949 if (tvisudata(o)) {
@@ -879,8 +952,14 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
879 if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable)) 952 if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable))
880 return uddata(ud); 953 return uddata(ud);
881 } 954 }
882 lj_err_argtype(L, idx, tname); 955 return NULL; /* value is not a userdata with a metatable */
883 return NULL; /* unreachable */ 956}
957
958LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
959{
960 void *p = luaL_testudata(L, idx, tname);
961 if (!p) lj_err_argtype(L, idx, tname);
962 return p;
884} 963}
885 964
886/* -- Object setters ------------------------------------------------------ */ 965/* -- Object setters ------------------------------------------------------ */
@@ -888,19 +967,19 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
888LUA_API void lua_settable(lua_State *L, int idx) 967LUA_API void lua_settable(lua_State *L, int idx)
889{ 968{
890 TValue *o; 969 TValue *o;
891 cTValue *t = index2adr(L, idx); 970 cTValue *t = index2adr_check(L, idx);
892 api_checknelems(L, 2); 971 lj_checkapi_slot(2);
893 api_checkvalidindex(L, t);
894 o = lj_meta_tset(L, t, L->top-2); 972 o = lj_meta_tset(L, t, L->top-2);
895 if (o) { 973 if (o) {
896 /* NOBARRIER: lj_meta_tset ensures the table is not black. */ 974 /* NOBARRIER: lj_meta_tset ensures the table is not black. */
897 copyTV(L, o, L->top-1);
898 L->top -= 2; 975 L->top -= 2;
976 copyTV(L, o, L->top+1);
899 } else { 977 } else {
900 L->top += 3; 978 TValue *base = L->top;
901 copyTV(L, L->top-1, L->top-6); 979 copyTV(L, base+2, base-3-2*LJ_FR2);
902 lj_vm_call(L, L->top-3, 0+1); 980 L->top = base+3;
903 L->top -= 3; 981 lj_vm_call(L, base, 0+1);
982 L->top -= 3+LJ_FR2;
904 } 983 }
905} 984}
906 985
@@ -908,20 +987,19 @@ LUA_API void lua_setfield(lua_State *L, int idx, const char *k)
908{ 987{
909 TValue *o; 988 TValue *o;
910 TValue key; 989 TValue key;
911 cTValue *t = index2adr(L, idx); 990 cTValue *t = index2adr_check(L, idx);
912 api_checknelems(L, 1); 991 lj_checkapi_slot(1);
913 api_checkvalidindex(L, t);
914 setstrV(L, &key, lj_str_newz(L, k)); 992 setstrV(L, &key, lj_str_newz(L, k));
915 o = lj_meta_tset(L, t, &key); 993 o = lj_meta_tset(L, t, &key);
916 if (o) { 994 if (o) {
917 L->top--;
918 /* NOBARRIER: lj_meta_tset ensures the table is not black. */ 995 /* NOBARRIER: lj_meta_tset ensures the table is not black. */
919 copyTV(L, o, L->top); 996 copyTV(L, o, --L->top);
920 } else { 997 } else {
921 L->top += 3; 998 TValue *base = L->top;
922 copyTV(L, L->top-1, L->top-6); 999 copyTV(L, base+2, base-3-2*LJ_FR2);
923 lj_vm_call(L, L->top-3, 0+1); 1000 L->top = base+3;
924 L->top -= 2; 1001 lj_vm_call(L, base, 0+1);
1002 L->top -= 2+LJ_FR2;
925 } 1003 }
926} 1004}
927 1005
@@ -929,7 +1007,7 @@ LUA_API void lua_rawset(lua_State *L, int idx)
929{ 1007{
930 GCtab *t = tabV(index2adr(L, idx)); 1008 GCtab *t = tabV(index2adr(L, idx));
931 TValue *dst, *key; 1009 TValue *dst, *key;
932 api_checknelems(L, 2); 1010 lj_checkapi_slot(2);
933 key = L->top-2; 1011 key = L->top-2;
934 dst = lj_tab_set(L, t, key); 1012 dst = lj_tab_set(L, t, key);
935 copyTV(L, dst, key+1); 1013 copyTV(L, dst, key+1);
@@ -941,7 +1019,7 @@ LUA_API void lua_rawseti(lua_State *L, int idx, int n)
941{ 1019{
942 GCtab *t = tabV(index2adr(L, idx)); 1020 GCtab *t = tabV(index2adr(L, idx));
943 TValue *dst, *src; 1021 TValue *dst, *src;
944 api_checknelems(L, 1); 1022 lj_checkapi_slot(1);
945 dst = lj_tab_setint(L, t, n); 1023 dst = lj_tab_setint(L, t, n);
946 src = L->top-1; 1024 src = L->top-1;
947 copyTV(L, dst, src); 1025 copyTV(L, dst, src);
@@ -953,13 +1031,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx)
953{ 1031{
954 global_State *g; 1032 global_State *g;
955 GCtab *mt; 1033 GCtab *mt;
956 cTValue *o = index2adr(L, idx); 1034 cTValue *o = index2adr_check(L, idx);
957 api_checknelems(L, 1); 1035 lj_checkapi_slot(1);
958 api_checkvalidindex(L, o);
959 if (tvisnil(L->top-1)) { 1036 if (tvisnil(L->top-1)) {
960 mt = NULL; 1037 mt = NULL;
961 } else { 1038 } else {
962 api_check(L, tvistab(L->top-1)); 1039 lj_checkapi(tvistab(L->top-1), "top stack slot is not a table");
963 mt = tabV(L->top-1); 1040 mt = tabV(L->top-1);
964 } 1041 }
965 g = G(L); 1042 g = G(L);
@@ -989,13 +1066,18 @@ LUA_API int lua_setmetatable(lua_State *L, int idx)
989 return 1; 1066 return 1;
990} 1067}
991 1068
1069LUALIB_API void luaL_setmetatable(lua_State *L, const char *tname)
1070{
1071 lua_getfield(L, LUA_REGISTRYINDEX, tname);
1072 lua_setmetatable(L, -2);
1073}
1074
992LUA_API int lua_setfenv(lua_State *L, int idx) 1075LUA_API int lua_setfenv(lua_State *L, int idx)
993{ 1076{
994 cTValue *o = index2adr(L, idx); 1077 cTValue *o = index2adr_check(L, idx);
995 GCtab *t; 1078 GCtab *t;
996 api_checknelems(L, 1); 1079 lj_checkapi_slot(1);
997 api_checkvalidindex(L, o); 1080 lj_checkapi(tvistab(L->top-1), "top stack slot is not a table");
998 api_check(L, tvistab(L->top-1));
999 t = tabV(L->top-1); 1081 t = tabV(L->top-1);
1000 if (tvisfunc(o)) { 1082 if (tvisfunc(o)) {
1001 setgcref(funcV(o)->c.env, obj2gco(t)); 1083 setgcref(funcV(o)->c.env, obj2gco(t));
@@ -1018,7 +1100,7 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n)
1018 TValue *val; 1100 TValue *val;
1019 GCobj *o; 1101 GCobj *o;
1020 const char *name; 1102 const char *name;
1021 api_checknelems(L, 1); 1103 lj_checkapi_slot(1);
1022 name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val, &o); 1104 name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val, &o);
1023 if (name) { 1105 if (name) {
1024 L->top--; 1106 L->top--;
@@ -1030,11 +1112,25 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n)
1030 1112
1031/* -- Calls --------------------------------------------------------------- */ 1113/* -- Calls --------------------------------------------------------------- */
1032 1114
1115#if LJ_FR2
1116static TValue *api_call_base(lua_State *L, int nargs)
1117{
1118 TValue *o = L->top, *base = o - nargs;
1119 L->top = o+1;
1120 for (; o > base; o--) copyTV(L, o, o-1);
1121 setnilV(o);
1122 return o+1;
1123}
1124#else
1125#define api_call_base(L, nargs) (L->top - (nargs))
1126#endif
1127
1033LUA_API void lua_call(lua_State *L, int nargs, int nresults) 1128LUA_API void lua_call(lua_State *L, int nargs, int nresults)
1034{ 1129{
1035 api_check(L, L->status == 0 || L->status == LUA_ERRERR); 1130 lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR,
1036 api_checknelems(L, nargs+1); 1131 "thread called in wrong state %d", L->status);
1037 lj_vm_call(L, L->top - nargs, nresults+1); 1132 lj_checkapi_slot(nargs+1);
1133 lj_vm_call(L, api_call_base(L, nargs), nresults+1);
1038} 1134}
1039 1135
1040LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) 1136LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
@@ -1043,16 +1139,16 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
1043 uint8_t oldh = hook_save(g); 1139 uint8_t oldh = hook_save(g);
1044 ptrdiff_t ef; 1140 ptrdiff_t ef;
1045 int status; 1141 int status;
1046 api_check(L, L->status == 0 || L->status == LUA_ERRERR); 1142 lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR,
1047 api_checknelems(L, nargs+1); 1143 "thread called in wrong state %d", L->status);
1144 lj_checkapi_slot(nargs+1);
1048 if (errfunc == 0) { 1145 if (errfunc == 0) {
1049 ef = 0; 1146 ef = 0;
1050 } else { 1147 } else {
1051 cTValue *o = stkindex2adr(L, errfunc); 1148 cTValue *o = index2adr_stack(L, errfunc);
1052 api_checkvalidindex(L, o);
1053 ef = savestack(L, o); 1149 ef = savestack(L, o);
1054 } 1150 }
1055 status = lj_vm_pcall(L, L->top - nargs, nresults+1, ef); 1151 status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef);
1056 if (status) hook_restore(g, oldh); 1152 if (status) hook_restore(g, oldh);
1057 return status; 1153 return status;
1058} 1154}
@@ -1060,12 +1156,17 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
1060static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud) 1156static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud)
1061{ 1157{
1062 GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L)); 1158 GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L));
1159 TValue *top = L->top;
1063 fn->c.f = func; 1160 fn->c.f = func;
1064 setfuncV(L, L->top, fn); 1161 setfuncV(L, top++, fn);
1065 setlightudV(L->top+1, checklightudptr(L, ud)); 1162 if (LJ_FR2) setnilV(top++);
1163#if LJ_64
1164 ud = lj_lightud_intern(L, ud);
1165#endif
1166 setrawlightudV(top++, ud);
1066 cframe_nres(L->cframe) = 1+0; /* Zero results. */ 1167 cframe_nres(L->cframe) = 1+0; /* Zero results. */
1067 L->top += 2; 1168 L->top = top;
1068 return L->top-1; /* Now call the newly allocated C function. */ 1169 return top-1; /* Now call the newly allocated C function. */
1069} 1170}
1070 1171
1071LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) 1172LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
@@ -1073,7 +1174,8 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
1073 global_State *g = G(L); 1174 global_State *g = G(L);
1074 uint8_t oldh = hook_save(g); 1175 uint8_t oldh = hook_save(g);
1075 int status; 1176 int status;
1076 api_check(L, L->status == 0 || L->status == LUA_ERRERR); 1177 lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR,
1178 "thread called in wrong state %d", L->status);
1077 status = lj_vm_cpcall(L, func, ud, cpcall); 1179 status = lj_vm_cpcall(L, func, ud, cpcall);
1078 if (status) hook_restore(g, oldh); 1180 if (status) hook_restore(g, oldh);
1079 return status; 1181 return status;
@@ -1082,10 +1184,11 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
1082LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field) 1184LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
1083{ 1185{
1084 if (luaL_getmetafield(L, idx, field)) { 1186 if (luaL_getmetafield(L, idx, field)) {
1085 TValue *base = L->top--; 1187 TValue *top = L->top--;
1086 copyTV(L, base, index2adr(L, idx)); 1188 if (LJ_FR2) setnilV(top++);
1087 L->top = base+1; 1189 copyTV(L, top++, index2adr(L, idx));
1088 lj_vm_call(L, base, 1+1); 1190 L->top = top;
1191 lj_vm_call(L, top-1, 1+1);
1089 return 1; 1192 return 1;
1090 } 1193 }
1091 return 0; 1194 return 0;
@@ -1093,6 +1196,11 @@ LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
1093 1196
1094/* -- Coroutine yield and resume ------------------------------------------ */ 1197/* -- Coroutine yield and resume ------------------------------------------ */
1095 1198
1199LUA_API int lua_isyieldable(lua_State *L)
1200{
1201 return cframe_canyield(L->cframe);
1202}
1203
1096LUA_API int lua_yield(lua_State *L, int nresults) 1204LUA_API int lua_yield(lua_State *L, int nresults)
1097{ 1205{
1098 void *cf = L->cframe; 1206 void *cf = L->cframe;
@@ -1112,13 +1220,16 @@ LUA_API int lua_yield(lua_State *L, int nresults)
1112 } else { /* Yield from hook: add a pseudo-frame. */ 1220 } else { /* Yield from hook: add a pseudo-frame. */
1113 TValue *top = L->top; 1221 TValue *top = L->top;
1114 hook_leave(g); 1222 hook_leave(g);
1115 top->u64 = cframe_multres(cf); 1223 (top++)->u64 = cframe_multres(cf);
1116 setcont(top+1, lj_cont_hook); 1224 setcont(top, lj_cont_hook);
1117 setframe_pc(top+1, cframe_pc(cf)-1); 1225 if (LJ_FR2) top++;
1118 setframe_gc(top+2, obj2gco(L)); 1226 setframe_pc(top, cframe_pc(cf)-1);
1119 setframe_ftsz(top+2, (int)((char *)(top+3)-(char *)L->base)+FRAME_CONT); 1227 top++;
1120 L->top = L->base = top+3; 1228 setframe_gc(top, obj2gco(L), LJ_TTHREAD);
1121#if LJ_TARGET_X64 1229 if (LJ_FR2) top++;
1230 setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT);
1231 L->top = L->base = top+1;
1232#if ((defined(__GNUC__) || defined(__clang__)) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND) || LJ_TARGET_WINDOWS
1122 lj_err_throw(L, LUA_YIELD); 1233 lj_err_throw(L, LUA_YIELD);
1123#else 1234#else
1124 L->cframe = NULL; 1235 L->cframe = NULL;
@@ -1134,7 +1245,9 @@ LUA_API int lua_yield(lua_State *L, int nresults)
1134LUA_API int lua_resume(lua_State *L, int nargs) 1245LUA_API int lua_resume(lua_State *L, int nargs)
1135{ 1246{
1136 if (L->cframe == NULL && L->status <= LUA_YIELD) 1247 if (L->cframe == NULL && L->status <= LUA_YIELD)
1137 return lj_vm_resume(L, L->top - nargs, 0, 0); 1248 return lj_vm_resume(L,
1249 L->status == LUA_OK ? api_call_base(L, nargs) : L->top - nargs,
1250 0, 0);
1138 L->top = L->base; 1251 L->top = L->base;
1139 setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP)); 1252 setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP));
1140 incr_top(L); 1253 incr_top(L);
@@ -1164,7 +1277,7 @@ LUA_API int lua_gc(lua_State *L, int what, int data)
1164 res = (int)(g->gc.total & 0x3ff); 1277 res = (int)(g->gc.total & 0x3ff);
1165 break; 1278 break;
1166 case LUA_GCSTEP: { 1279 case LUA_GCSTEP: {
1167 MSize a = (MSize)data << 10; 1280 GCSize a = (GCSize)data << 10;
1168 g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0; 1281 g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0;
1169 while (g->gc.total >= g->gc.threshold) 1282 while (g->gc.total >= g->gc.threshold)
1170 if (lj_gc_step(L) > 0) { 1283 if (lj_gc_step(L) > 0) {
@@ -1181,6 +1294,9 @@ LUA_API int lua_gc(lua_State *L, int what, int data)
1181 res = (int)(g->gc.stepmul); 1294 res = (int)(g->gc.stepmul);
1182 g->gc.stepmul = (MSize)data; 1295 g->gc.stepmul = (MSize)data;
1183 break; 1296 break;
1297 case LUA_GCISRUNNING:
1298 res = (g->gc.threshold != LJ_MAX_MEM);
1299 break;
1184 default: 1300 default:
1185 res = -1; /* Invalid option. */ 1301 res = -1; /* Invalid option. */
1186 } 1302 }
diff --git a/src/lj_arch.h b/src/lj_arch.h
index e77865d9..e6264398 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -8,6 +8,8 @@
8 8
9#include "lua.h" 9#include "lua.h"
10 10
11/* -- Target definitions -------------------------------------------------- */
12
11/* Target endianess. */ 13/* Target endianess. */
12#define LUAJIT_LE 0 14#define LUAJIT_LE 0
13#define LUAJIT_BE 1 15#define LUAJIT_BE 1
@@ -19,12 +21,16 @@
19#define LUAJIT_ARCH_x64 2 21#define LUAJIT_ARCH_x64 2
20#define LUAJIT_ARCH_ARM 3 22#define LUAJIT_ARCH_ARM 3
21#define LUAJIT_ARCH_arm 3 23#define LUAJIT_ARCH_arm 3
22#define LUAJIT_ARCH_PPC 4 24#define LUAJIT_ARCH_ARM64 4
23#define LUAJIT_ARCH_ppc 4 25#define LUAJIT_ARCH_arm64 4
24#define LUAJIT_ARCH_PPCSPE 5 26#define LUAJIT_ARCH_PPC 5
25#define LUAJIT_ARCH_ppcspe 5 27#define LUAJIT_ARCH_ppc 5
26#define LUAJIT_ARCH_MIPS 6 28#define LUAJIT_ARCH_MIPS 6
27#define LUAJIT_ARCH_mips 6 29#define LUAJIT_ARCH_mips 6
30#define LUAJIT_ARCH_MIPS32 6
31#define LUAJIT_ARCH_mips32 6
32#define LUAJIT_ARCH_MIPS64 7
33#define LUAJIT_ARCH_mips64 7
28 34
29/* Target OS. */ 35/* Target OS. */
30#define LUAJIT_OS_OTHER 0 36#define LUAJIT_OS_OTHER 0
@@ -34,6 +40,14 @@
34#define LUAJIT_OS_BSD 4 40#define LUAJIT_OS_BSD 4
35#define LUAJIT_OS_POSIX 5 41#define LUAJIT_OS_POSIX 5
36 42
43/* Number mode. */
44#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */
45#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */
46#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */
47#define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */
48
49/* -- Target detection ---------------------------------------------------- */
50
37/* Select native target if no target defined. */ 51/* Select native target if no target defined. */
38#ifndef LUAJIT_TARGET 52#ifndef LUAJIT_TARGET
39 53
@@ -43,14 +57,14 @@
43#define LUAJIT_TARGET LUAJIT_ARCH_X64 57#define LUAJIT_TARGET LUAJIT_ARCH_X64
44#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM) 58#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
45#define LUAJIT_TARGET LUAJIT_ARCH_ARM 59#define LUAJIT_TARGET LUAJIT_ARCH_ARM
60#elif defined(__aarch64__) || defined(_M_ARM64)
61#define LUAJIT_TARGET LUAJIT_ARCH_ARM64
46#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) 62#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
47#ifdef __NO_FPRS__
48#define LUAJIT_TARGET LUAJIT_ARCH_PPCSPE
49#else
50#define LUAJIT_TARGET LUAJIT_ARCH_PPC 63#define LUAJIT_TARGET LUAJIT_ARCH_PPC
51#endif 64#elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64)
65#define LUAJIT_TARGET LUAJIT_ARCH_MIPS64
52#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) 66#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
53#define LUAJIT_TARGET LUAJIT_ARCH_MIPS 67#define LUAJIT_TARGET LUAJIT_ARCH_MIPS32
54#else 68#else
55#error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures" 69#error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures"
56#endif 70#endif
@@ -65,16 +79,23 @@
65#elif defined(__linux__) 79#elif defined(__linux__)
66#define LUAJIT_OS LUAJIT_OS_LINUX 80#define LUAJIT_OS LUAJIT_OS_LINUX
67#elif defined(__MACH__) && defined(__APPLE__) 81#elif defined(__MACH__) && defined(__APPLE__)
82#include "TargetConditionals.h"
68#define LUAJIT_OS LUAJIT_OS_OSX 83#define LUAJIT_OS LUAJIT_OS_OSX
69#elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \ 84#elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
70 defined(__NetBSD__) || defined(__OpenBSD__) || \ 85 defined(__NetBSD__) || defined(__OpenBSD__) || \
71 defined(__DragonFly__)) && !defined(__ORBIS__) 86 defined(__DragonFly__)) && !defined(__ORBIS__) && !defined(__PROSPERO__)
72#define LUAJIT_OS LUAJIT_OS_BSD 87#define LUAJIT_OS LUAJIT_OS_BSD
73#elif (defined(__sun__) && defined(__svr4__)) 88#elif (defined(__sun__) && defined(__svr4__))
89#define LJ_TARGET_SOLARIS 1
90#define LUAJIT_OS LUAJIT_OS_POSIX
91#elif defined(__HAIKU__)
74#define LUAJIT_OS LUAJIT_OS_POSIX 92#define LUAJIT_OS LUAJIT_OS_POSIX
75#elif defined(__CYGWIN__) 93#elif defined(__CYGWIN__)
76#define LJ_TARGET_CYGWIN 1 94#define LJ_TARGET_CYGWIN 1
77#define LUAJIT_OS LUAJIT_OS_POSIX 95#define LUAJIT_OS LUAJIT_OS_POSIX
96#elif defined(__QNX__)
97#define LJ_TARGET_QNX 1
98#define LUAJIT_OS LUAJIT_OS_POSIX
78#else 99#else
79#define LUAJIT_OS LUAJIT_OS_OTHER 100#define LUAJIT_OS LUAJIT_OS_OTHER
80#endif 101#endif
@@ -99,10 +120,16 @@
99#define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS) 120#define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS)
100#define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX) 121#define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX)
101#define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX) 122#define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX)
102#define LJ_TARGET_IOS (LJ_TARGET_OSX && LUAJIT_TARGET == LUAJIT_ARCH_ARM) 123#define LJ_TARGET_BSD (LUAJIT_OS == LUAJIT_OS_BSD)
103#define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS) 124#define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS)
104#define LJ_TARGET_DLOPEN LJ_TARGET_POSIX 125#define LJ_TARGET_DLOPEN LJ_TARGET_POSIX
105 126
127#if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
128#define LJ_TARGET_IOS 1
129#else
130#define LJ_TARGET_IOS 0
131#endif
132
106#ifdef __CELLOS_LV2__ 133#ifdef __CELLOS_LV2__
107#define LJ_TARGET_PS3 1 134#define LJ_TARGET_PS3 1
108#define LJ_TARGET_CONSOLE 1 135#define LJ_TARGET_CONSOLE 1
@@ -115,6 +142,13 @@
115#define NULL ((void*)0) 142#define NULL ((void*)0)
116#endif 143#endif
117 144
145#ifdef __PROSPERO__
146#define LJ_TARGET_PS5 1
147#define LJ_TARGET_CONSOLE 1
148#undef NULL
149#define NULL ((void*)0)
150#endif
151
118#ifdef __psp2__ 152#ifdef __psp2__
119#define LJ_TARGET_PSVITA 1 153#define LJ_TARGET_PSVITA 1
120#define LJ_TARGET_CONSOLE 1 154#define LJ_TARGET_CONSOLE 1
@@ -125,10 +159,27 @@
125#define LJ_TARGET_CONSOLE 1 159#define LJ_TARGET_CONSOLE 1
126#endif 160#endif
127 161
128#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */ 162#ifdef _DURANGO
129#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */ 163#define LJ_TARGET_XBOXONE 1
130#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */ 164#define LJ_TARGET_CONSOLE 1
131#define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */ 165#define LJ_TARGET_GC64 1
166#endif
167
168#ifdef __NX__
169#define LJ_TARGET_NX 1
170#define LJ_TARGET_CONSOLE 1
171#undef NULL
172#define NULL ((void*)0)
173#endif
174
175#ifdef _UWP
176#define LJ_TARGET_UWP 1
177#if LUAJIT_TARGET == LUAJIT_ARCH_X64
178#define LJ_TARGET_GC64 1
179#endif
180#endif
181
182/* -- Arch-specific settings ---------------------------------------------- */
132 183
133/* Set target architecture properties. */ 184/* Set target architecture properties. */
134#if LUAJIT_TARGET == LUAJIT_ARCH_X86 185#if LUAJIT_TARGET == LUAJIT_ARCH_X86
@@ -136,14 +187,10 @@
136#define LJ_ARCH_NAME "x86" 187#define LJ_ARCH_NAME "x86"
137#define LJ_ARCH_BITS 32 188#define LJ_ARCH_BITS 32
138#define LJ_ARCH_ENDIAN LUAJIT_LE 189#define LJ_ARCH_ENDIAN LUAJIT_LE
139#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
140#define LJ_ABI_WIN 1
141#else
142#define LJ_ABI_WIN 0
143#endif
144#define LJ_TARGET_X86 1 190#define LJ_TARGET_X86 1
145#define LJ_TARGET_X86ORX64 1 191#define LJ_TARGET_X86ORX64 1
146#define LJ_TARGET_EHRETREG 0 192#define LJ_TARGET_EHRETREG 0
193#define LJ_TARGET_EHRAREG 8
147#define LJ_TARGET_MASKSHIFT 1 194#define LJ_TARGET_MASKSHIFT 1
148#define LJ_TARGET_MASKROT 1 195#define LJ_TARGET_MASKROT 1
149#define LJ_TARGET_UNALIGNED 1 196#define LJ_TARGET_UNALIGNED 1
@@ -154,19 +201,20 @@
154#define LJ_ARCH_NAME "x64" 201#define LJ_ARCH_NAME "x64"
155#define LJ_ARCH_BITS 64 202#define LJ_ARCH_BITS 64
156#define LJ_ARCH_ENDIAN LUAJIT_LE 203#define LJ_ARCH_ENDIAN LUAJIT_LE
157#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
158#define LJ_ABI_WIN 1
159#else
160#define LJ_ABI_WIN 0
161#endif
162#define LJ_TARGET_X64 1 204#define LJ_TARGET_X64 1
163#define LJ_TARGET_X86ORX64 1 205#define LJ_TARGET_X86ORX64 1
164#define LJ_TARGET_EHRETREG 0 206#define LJ_TARGET_EHRETREG 0
207#define LJ_TARGET_EHRAREG 16
165#define LJ_TARGET_JUMPRANGE 31 /* +-2^31 = +-2GB */ 208#define LJ_TARGET_JUMPRANGE 31 /* +-2^31 = +-2GB */
166#define LJ_TARGET_MASKSHIFT 1 209#define LJ_TARGET_MASKSHIFT 1
167#define LJ_TARGET_MASKROT 1 210#define LJ_TARGET_MASKROT 1
168#define LJ_TARGET_UNALIGNED 1 211#define LJ_TARGET_UNALIGNED 1
169#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL 212#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL
213#ifndef LUAJIT_DISABLE_GC64
214#define LJ_TARGET_GC64 1
215#elif LJ_TARGET_OSX
216#error "macOS requires GC64 -- don't disable it"
217#endif
170 218
171#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM 219#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
172 220
@@ -182,6 +230,7 @@
182#define LJ_ABI_EABI 1 230#define LJ_ABI_EABI 1
183#define LJ_TARGET_ARM 1 231#define LJ_TARGET_ARM 1
184#define LJ_TARGET_EHRETREG 0 232#define LJ_TARGET_EHRETREG 0
233#define LJ_TARGET_EHRAREG 14
185#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ 234#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
186#define LJ_TARGET_MASKSHIFT 0 235#define LJ_TARGET_MASKSHIFT 0
187#define LJ_TARGET_MASKROT 1 236#define LJ_TARGET_MASKROT 1
@@ -200,22 +249,90 @@
200#define LJ_ARCH_VERSION 50 249#define LJ_ARCH_VERSION 50
201#endif 250#endif
202 251
252#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64
253
254#define LJ_ARCH_BITS 64
255#if defined(__AARCH64EB__)
256#define LJ_ARCH_NAME "arm64be"
257#define LJ_ARCH_ENDIAN LUAJIT_BE
258#else
259#define LJ_ARCH_NAME "arm64"
260#define LJ_ARCH_ENDIAN LUAJIT_LE
261#endif
262#if !defined(LJ_ABI_PAUTH) && defined(__arm64e__)
263#define LJ_ABI_PAUTH 1
264#endif
265#define LJ_TARGET_ARM64 1
266#define LJ_TARGET_EHRETREG 0
267#define LJ_TARGET_EHRAREG 30
268#define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */
269#define LJ_TARGET_MASKSHIFT 1
270#define LJ_TARGET_MASKROT 1
271#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
272#define LJ_TARGET_GC64 1
273#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
274
275#define LJ_ARCH_VERSION 80
276
203#elif LUAJIT_TARGET == LUAJIT_ARCH_PPC 277#elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
204 278
205#define LJ_ARCH_NAME "ppc" 279#ifndef LJ_ARCH_ENDIAN
280#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
281#define LJ_ARCH_ENDIAN LUAJIT_LE
282#else
283#define LJ_ARCH_ENDIAN LUAJIT_BE
284#endif
285#endif
286
206#if _LP64 287#if _LP64
207#define LJ_ARCH_BITS 64 288#define LJ_ARCH_BITS 64
289#if LJ_ARCH_ENDIAN == LUAJIT_LE
290#define LJ_ARCH_NAME "ppc64le"
291#else
292#define LJ_ARCH_NAME "ppc64"
293#endif
208#else 294#else
209#define LJ_ARCH_BITS 32 295#define LJ_ARCH_BITS 32
296#define LJ_ARCH_NAME "ppc"
297
298#if !defined(LJ_ARCH_HASFPU)
299#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
300#define LJ_ARCH_HASFPU 0
301#else
302#define LJ_ARCH_HASFPU 1
210#endif 303#endif
211#define LJ_ARCH_ENDIAN LUAJIT_BE 304#endif
305
306#if !defined(LJ_ABI_SOFTFP)
307#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
308#define LJ_ABI_SOFTFP 1
309#else
310#define LJ_ABI_SOFTFP 0
311#endif
312#endif
313#endif
314
315#if LJ_ABI_SOFTFP
316#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
317#else
318#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
319#endif
320
212#define LJ_TARGET_PPC 1 321#define LJ_TARGET_PPC 1
213#define LJ_TARGET_EHRETREG 3 322#define LJ_TARGET_EHRETREG 3
323#define LJ_TARGET_EHRAREG 65
214#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ 324#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
215#define LJ_TARGET_MASKSHIFT 0 325#define LJ_TARGET_MASKSHIFT 0
216#define LJ_TARGET_MASKROT 1 326#define LJ_TARGET_MASKROT 1
217#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */ 327#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
218#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE 328
329#if LJ_TARGET_CONSOLE
330#define LJ_ARCH_PPC32ON64 1
331#define LJ_ARCH_NOFFI 1
332#elif LJ_ARCH_BITS == 64
333#error "No support for PPC64"
334#undef LJ_TARGET_PPC
335#endif
219 336
220#if _ARCH_PWR7 337#if _ARCH_PWR7
221#define LJ_ARCH_VERSION 70 338#define LJ_ARCH_VERSION 70
@@ -230,10 +347,6 @@
230#else 347#else
231#define LJ_ARCH_VERSION 0 348#define LJ_ARCH_VERSION 0
232#endif 349#endif
233#if __PPC64__ || __powerpc64__ || LJ_TARGET_CONSOLE
234#define LJ_ARCH_PPC64 1
235#define LJ_ARCH_NOFFI 1
236#endif
237#if _ARCH_PPCSQ 350#if _ARCH_PPCSQ
238#define LJ_ARCH_SQRT 1 351#define LJ_ARCH_SQRT 1
239#endif 352#endif
@@ -247,44 +360,80 @@
247#define LJ_ARCH_XENON 1 360#define LJ_ARCH_XENON 1
248#endif 361#endif
249 362
250#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE 363#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64
251
252#define LJ_ARCH_NAME "ppcspe"
253#define LJ_ARCH_BITS 32
254#define LJ_ARCH_ENDIAN LUAJIT_BE
255#ifndef LJ_ABI_SOFTFP
256#define LJ_ABI_SOFTFP 1
257#endif
258#define LJ_ABI_EABI 1
259#define LJ_TARGET_PPCSPE 1
260#define LJ_TARGET_EHRETREG 3
261#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
262#define LJ_TARGET_MASKSHIFT 0
263#define LJ_TARGET_MASKROT 1
264#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
265#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE
266#define LJ_ARCH_NOFFI 1 /* NYI: comparisons, calls. */
267#define LJ_ARCH_NOJIT 1
268
269#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS
270 364
271#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) 365#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
366#if __mips_isa_rev >= 6
367#define LJ_TARGET_MIPSR6 1
368#define LJ_TARGET_UNALIGNED 1
369#endif
370#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
371#if LJ_TARGET_MIPSR6
372#define LJ_ARCH_NAME "mips32r6el"
373#else
272#define LJ_ARCH_NAME "mipsel" 374#define LJ_ARCH_NAME "mipsel"
375#endif
376#else
377#if LJ_TARGET_MIPSR6
378#define LJ_ARCH_NAME "mips64r6el"
379#else
380#define LJ_ARCH_NAME "mips64el"
381#endif
382#endif
273#define LJ_ARCH_ENDIAN LUAJIT_LE 383#define LJ_ARCH_ENDIAN LUAJIT_LE
274#else 384#else
385#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
386#if LJ_TARGET_MIPSR6
387#define LJ_ARCH_NAME "mips32r6"
388#else
275#define LJ_ARCH_NAME "mips" 389#define LJ_ARCH_NAME "mips"
390#endif
391#else
392#if LJ_TARGET_MIPSR6
393#define LJ_ARCH_NAME "mips64r6"
394#else
395#define LJ_ARCH_NAME "mips64"
396#endif
397#endif
276#define LJ_ARCH_ENDIAN LUAJIT_BE 398#define LJ_ARCH_ENDIAN LUAJIT_BE
277#endif 399#endif
400
401#if !defined(LJ_ARCH_HASFPU)
402#ifdef __mips_soft_float
403#define LJ_ARCH_HASFPU 0
404#else
405#define LJ_ARCH_HASFPU 1
406#endif
407#endif
408
409#if !defined(LJ_ABI_SOFTFP)
410#ifdef __mips_soft_float
411#define LJ_ABI_SOFTFP 1
412#else
413#define LJ_ABI_SOFTFP 0
414#endif
415#endif
416
417#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
278#define LJ_ARCH_BITS 32 418#define LJ_ARCH_BITS 32
419#define LJ_TARGET_MIPS32 1
420#else
421#define LJ_ARCH_BITS 64
422#define LJ_TARGET_MIPS64 1
423#define LJ_TARGET_GC64 1
424#endif
279#define LJ_TARGET_MIPS 1 425#define LJ_TARGET_MIPS 1
280#define LJ_TARGET_EHRETREG 4 426#define LJ_TARGET_EHRETREG 4
427#define LJ_TARGET_EHRAREG 31
281#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ 428#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */
282#define LJ_TARGET_MASKSHIFT 1 429#define LJ_TARGET_MASKSHIFT 1
283#define LJ_TARGET_MASKROT 1 430#define LJ_TARGET_MASKROT 1
284#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ 431#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
285#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE 432#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
286 433
287#if _MIPS_ARCH_MIPS32R2 434#if LJ_TARGET_MIPSR6
435#define LJ_ARCH_VERSION 60
436#elif _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2
288#define LJ_ARCH_VERSION 20 437#define LJ_ARCH_VERSION 20
289#else 438#else
290#define LJ_ARCH_VERSION 10 439#define LJ_ARCH_VERSION 10
@@ -294,9 +443,7 @@
294#error "No target architecture defined" 443#error "No target architecture defined"
295#endif 444#endif
296 445
297#ifndef LJ_PAGESIZE 446/* -- Checks for requirements --------------------------------------------- */
298#define LJ_PAGESIZE 4096
299#endif
300 447
301/* Check for minimum required compiler versions. */ 448/* Check for minimum required compiler versions. */
302#if defined(__GNUC__) 449#if defined(__GNUC__)
@@ -312,12 +459,28 @@
312#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2) 459#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
313#error "Need at least GCC 4.2 or newer" 460#error "Need at least GCC 4.2 or newer"
314#endif 461#endif
462#elif LJ_TARGET_ARM64
463#if __clang__
464#if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)) && !defined(__NX_TOOLCHAIN_MAJOR__)
465#error "Need at least Clang 3.5 or newer"
466#endif
467#else
468#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8)
469#error "Need at least GCC 4.8 or newer"
470#endif
471#endif
315#elif !LJ_TARGET_PS3 472#elif !LJ_TARGET_PS3
473#if __clang__
474#if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5))
475#error "Need at least Clang 3.5 or newer"
476#endif
477#else
316#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3) 478#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3)
317#error "Need at least GCC 4.3 or newer" 479#error "Need at least GCC 4.3 or newer"
318#endif 480#endif
319#endif 481#endif
320#endif 482#endif
483#endif
321 484
322/* Check target-specific constraints. */ 485/* Check target-specific constraints. */
323#ifndef _BUILDVM_H 486#ifndef _BUILDVM_H
@@ -338,31 +501,41 @@
338#error "Only ARM EABI or iOS 3.0+ ABI is supported" 501#error "Only ARM EABI or iOS 3.0+ ABI is supported"
339#undef LJ_TARGET_ARM 502#undef LJ_TARGET_ARM
340#endif 503#endif
341#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE 504#elif LJ_TARGET_ARM64
342#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) 505#if defined(_ILP32)
343#error "No support for PowerPC CPUs without double-precision FPU, use LuaJIT v2.1" 506#error "No support for ILP32 model on ARM64"
344#undef LJ_TARGET_PPC 507#undef LJ_TARGET_ARM64
345#endif 508#endif
509#elif LJ_TARGET_PPC
346#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN)) 510#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN))
347#error "No support for little-endian PowerPC" 511#error "No support for little-endian PPC32"
348#undef LJ_TARGET_PPC 512#undef LJ_TARGET_PPC
349#endif 513#endif
350#if defined(_LP64) 514#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
351#error "No support for PowerPC 64 bit mode" 515#error "No support for PPC/e500, use LuaJIT 2.0"
352#undef LJ_TARGET_PPC 516#undef LJ_TARGET_PPC
353#endif 517#endif
354#elif LJ_TARGET_MIPS 518#elif LJ_TARGET_MIPS32
355#if defined(__mips_soft_float) 519#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32))
356#error "No support for MIPS CPUs without FPU, use LuaJIT v2.1+" 520#error "Only o32 ABI supported for MIPS32"
357#undef LJ_TARGET_MIPS 521#undef LJ_TARGET_MIPS
358#endif 522#endif
359#if defined(_LP64) 523#if LJ_TARGET_MIPSR6
360#error "No support for MIPS64, use LuaJIT v2.1+" 524/* Not that useful, since most available r6 CPUs are 64 bit. */
525#error "No support for MIPS32R6"
526#undef LJ_TARGET_MIPS
527#endif
528#elif LJ_TARGET_MIPS64
529#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64))
530/* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */
531#error "Only n64 ABI supported for MIPS64"
361#undef LJ_TARGET_MIPS 532#undef LJ_TARGET_MIPS
362#endif 533#endif
363#endif 534#endif
364#endif 535#endif
365 536
537/* -- Derived defines ----------------------------------------------------- */
538
366/* Enable or disable the dual-number mode for the VM. */ 539/* Enable or disable the dual-number mode for the VM. */
367#if (LJ_ARCH_NUMMODE == LJ_NUMMODE_SINGLE && LUAJIT_NUMMODE == 2) || \ 540#if (LJ_ARCH_NUMMODE == LJ_NUMMODE_SINGLE && LUAJIT_NUMMODE == 2) || \
368 (LJ_ARCH_NUMMODE == LJ_NUMMODE_DUAL && LUAJIT_NUMMODE == 1) 541 (LJ_ARCH_NUMMODE == LJ_NUMMODE_DUAL && LUAJIT_NUMMODE == 1)
@@ -384,6 +557,20 @@
384#endif 557#endif
385#endif 558#endif
386 559
560/* 64 bit GC references. */
561#if LJ_TARGET_GC64
562#define LJ_GC64 1
563#else
564#define LJ_GC64 0
565#endif
566
567/* 2-slot frame info. */
568#if LJ_GC64
569#define LJ_FR2 1
570#else
571#define LJ_FR2 0
572#endif
573
387/* Disable or enable the JIT compiler. */ 574/* Disable or enable the JIT compiler. */
388#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) 575#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT)
389#define LJ_HASJIT 0 576#define LJ_HASJIT 0
@@ -398,6 +585,28 @@
398#define LJ_HASFFI 1 585#define LJ_HASFFI 1
399#endif 586#endif
400 587
588/* Disable or enable the string buffer extension. */
589#if defined(LUAJIT_DISABLE_BUFFER)
590#define LJ_HASBUFFER 0
591#else
592#define LJ_HASBUFFER 1
593#endif
594
595#if defined(LUAJIT_DISABLE_PROFILE)
596#define LJ_HASPROFILE 0
597#elif LJ_TARGET_POSIX
598#define LJ_HASPROFILE 1
599#define LJ_PROFILE_SIGPROF 1
600#elif LJ_TARGET_PS3
601#define LJ_HASPROFILE 1
602#define LJ_PROFILE_PTHREAD 1
603#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOX360
604#define LJ_HASPROFILE 1
605#define LJ_PROFILE_WTHREAD 1
606#else
607#define LJ_HASPROFILE 0
608#endif
609
401#ifndef LJ_ARCH_HASFPU 610#ifndef LJ_ARCH_HASFPU
402#define LJ_ARCH_HASFPU 1 611#define LJ_ARCH_HASFPU 1
403#endif 612#endif
@@ -405,6 +614,11 @@
405#define LJ_ABI_SOFTFP 0 614#define LJ_ABI_SOFTFP 0
406#endif 615#endif
407#define LJ_SOFTFP (!LJ_ARCH_HASFPU) 616#define LJ_SOFTFP (!LJ_ARCH_HASFPU)
617#define LJ_SOFTFP32 (LJ_SOFTFP && LJ_32)
618
619#ifndef LJ_ABI_PAUTH
620#define LJ_ABI_PAUTH 0
621#endif
408 622
409#if LJ_ARCH_ENDIAN == LUAJIT_BE 623#if LJ_ARCH_ENDIAN == LUAJIT_BE
410#define LJ_LE 0 624#define LJ_LE 0
@@ -430,26 +644,52 @@
430#define LJ_TARGET_UNALIGNED 0 644#define LJ_TARGET_UNALIGNED 0
431#endif 645#endif
432 646
433/* Various workarounds for embedded operating systems. */ 647#ifndef LJ_PAGESIZE
434#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 648#define LJ_PAGESIZE 4096
435#define LUAJIT_NO_LOG2
436#endif 649#endif
437#if defined(__symbian__) 650
438#define LUAJIT_NO_EXP2 651/* Various workarounds for embedded operating systems or weak C runtimes. */
652#if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS
653#define LUAJIT_NO_LOG2
439#endif 654#endif
440#if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0) 655#if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0)
441#define LJ_NO_SYSTEM 1 656#define LJ_NO_SYSTEM 1
442#endif 657#endif
443 658
444#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__ 659#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
445/* NYI: no support for compact unwind specification, yet. */ 660#define LJ_ABI_WIN 1
446#define LUAJIT_NO_UNWIND 1 661#else
662#define LJ_ABI_WIN 0
663#endif
664
665#if LJ_TARGET_WINDOWS
666#if LJ_TARGET_UWP
667#define LJ_WIN_VALLOC VirtualAllocFromApp
668#define LJ_WIN_VPROTECT VirtualProtectFromApp
669extern void *LJ_WIN_LOADLIBA(const char *path);
670#else
671#define LJ_WIN_VALLOC VirtualAlloc
672#define LJ_WIN_VPROTECT VirtualProtect
673#define LJ_WIN_LOADLIBA(path) LoadLibraryExA((path), NULL, 0)
674#endif
447#endif 675#endif
448 676
449#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 677#if defined(LUAJIT_NO_UNWIND) || __GNU_COMPACT_EH__ || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5
450#define LJ_NO_UNWIND 1 678#define LJ_NO_UNWIND 1
451#endif 679#endif
452 680
681#if !LJ_NO_UNWIND && !defined(LUAJIT_UNWIND_INTERNAL) && (LJ_ABI_WIN || (defined(LUAJIT_UNWIND_EXTERNAL) && (defined(__GNUC__) || defined(__clang__))))
682#define LJ_UNWIND_EXT 1
683#else
684#define LJ_UNWIND_EXT 0
685#endif
686
687#if LJ_UNWIND_EXT && LJ_HASJIT && !LJ_TARGET_ARM && !(LJ_ABI_WIN && LJ_TARGET_X86)
688#define LJ_UNWIND_JIT 1
689#else
690#define LJ_UNWIND_JIT 0
691#endif
692
453/* Compatibility with Lua 5.1 vs. 5.2. */ 693/* Compatibility with Lua 5.1 vs. 5.2. */
454#ifdef LUAJIT_ENABLE_LUA52COMPAT 694#ifdef LUAJIT_ENABLE_LUA52COMPAT
455#define LJ_52 1 695#define LJ_52 1
@@ -457,4 +697,46 @@
457#define LJ_52 0 697#define LJ_52 0
458#endif 698#endif
459 699
700/* -- VM security --------------------------------------------------------- */
701
702/* Don't make any changes here. Instead build with:
703** make "XCFLAGS=-DLUAJIT_SECURITY_flag=value"
704**
705** Important note to distro maintainers: DO NOT change the defaults for a
706** regular distro build -- neither upwards, nor downwards!
707** These build-time configurable security flags are intended for embedders
708** who may have specific needs wrt. security vs. performance.
709*/
710
711/* Security defaults. */
712#ifndef LUAJIT_SECURITY_PRNG
713/* PRNG init: 0 = fixed/insecure, 1 = secure from OS. */
714#define LUAJIT_SECURITY_PRNG 1
715#endif
716
717#ifndef LUAJIT_SECURITY_STRHASH
718/* String hash: 0 = sparse only, 1 = sparse + dense. */
719#define LUAJIT_SECURITY_STRHASH 1
720#endif
721
722#ifndef LUAJIT_SECURITY_STRID
723/* String IDs: 0 = linear, 1 = reseed < 255, 2 = reseed < 15, 3 = random. */
724#define LUAJIT_SECURITY_STRID 1
725#endif
726
727#ifndef LUAJIT_SECURITY_MCODE
728/* Machine code page protection: 0 = insecure RWX, 1 = secure RW^X. */
729#define LUAJIT_SECURITY_MCODE 1
730#endif
731
732#define LJ_SECURITY_MODE \
733 ( 0u \
734 | ((LUAJIT_SECURITY_PRNG & 3) << 0) \
735 | ((LUAJIT_SECURITY_STRHASH & 3) << 2) \
736 | ((LUAJIT_SECURITY_STRID & 3) << 4) \
737 | ((LUAJIT_SECURITY_MCODE & 3) << 6) \
738 )
739#define LJ_SECURITY_MODESTRING \
740 "\004prng\007strhash\005strid\005mcode"
741
460#endif 742#endif
diff --git a/src/lj_asm.c b/src/lj_asm.c
index c217609b..c31dd1de 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -11,6 +11,7 @@
11#if LJ_HASJIT 11#if LJ_HASJIT
12 12
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_buf.h"
14#include "lj_str.h" 15#include "lj_str.h"
15#include "lj_tab.h" 16#include "lj_tab.h"
16#include "lj_frame.h" 17#include "lj_frame.h"
@@ -28,6 +29,7 @@
28#include "lj_dispatch.h" 29#include "lj_dispatch.h"
29#include "lj_vm.h" 30#include "lj_vm.h"
30#include "lj_target.h" 31#include "lj_target.h"
32#include "lj_prng.h"
31 33
32#ifdef LUA_USE_ASSERT 34#ifdef LUA_USE_ASSERT
33#include <stdio.h> 35#include <stdio.h>
@@ -71,6 +73,7 @@ typedef struct ASMState {
71 IRRef snaprename; /* Rename highwater mark for snapshot check. */ 73 IRRef snaprename; /* Rename highwater mark for snapshot check. */
72 SnapNo snapno; /* Current snapshot number. */ 74 SnapNo snapno; /* Current snapshot number. */
73 SnapNo loopsnapno; /* Loop snapshot number. */ 75 SnapNo loopsnapno; /* Loop snapshot number. */
76 int snapalloc; /* Current snapshot needs allocation. */
74 BloomFilter snapfilt1, snapfilt2; /* Filled with snapshot refs. */ 77 BloomFilter snapfilt1, snapfilt2; /* Filled with snapshot refs. */
75 78
76 IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */ 79 IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */
@@ -85,18 +88,31 @@ typedef struct ASMState {
85 88
86 MCode *mcbot; /* Bottom of reserved MCode. */ 89 MCode *mcbot; /* Bottom of reserved MCode. */
87 MCode *mctop; /* Top of generated MCode. */ 90 MCode *mctop; /* Top of generated MCode. */
91 MCode *mctoporig; /* Original top of generated MCode. */
88 MCode *mcloop; /* Pointer to loop MCode (or NULL). */ 92 MCode *mcloop; /* Pointer to loop MCode (or NULL). */
89 MCode *invmcp; /* Points to invertible loop branch (or NULL). */ 93 MCode *invmcp; /* Points to invertible loop branch (or NULL). */
90 MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ 94 MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */
91 MCode *realign; /* Realign loop if not NULL. */ 95 MCode *realign; /* Realign loop if not NULL. */
92 96
97#ifdef LUAJIT_RANDOM_RA
98 /* Randomize register allocation. OK for fuzz testing, not for production. */
99 uint64_t prngbits;
100 PRNGState prngstate;
101#endif
102
93#ifdef RID_NUM_KREF 103#ifdef RID_NUM_KREF
94 int32_t krefk[RID_NUM_KREF]; 104 intptr_t krefk[RID_NUM_KREF];
95#endif 105#endif
96 IRRef1 phireg[RID_MAX]; /* PHI register references. */ 106 IRRef1 phireg[RID_MAX]; /* PHI register references. */
97 uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ 107 uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */
98} ASMState; 108} ASMState;
99 109
110#ifdef LUA_USE_ASSERT
111#define lj_assertA(c, ...) lj_assertG_(J2G(as->J), (c), __VA_ARGS__)
112#else
113#define lj_assertA(c, ...) ((void)as)
114#endif
115
100#define IR(ref) (&as->ir[(ref)]) 116#define IR(ref) (&as->ir[(ref)])
101 117
102#define ASMREF_TMP1 REF_TRUE /* Temp. register. */ 118#define ASMREF_TMP1 REF_TRUE /* Temp. register. */
@@ -128,9 +144,8 @@ static LJ_AINLINE void checkmclim(ASMState *as)
128#ifdef LUA_USE_ASSERT 144#ifdef LUA_USE_ASSERT
129 if (as->mcp + MCLIM_REDZONE < as->mcp_prev) { 145 if (as->mcp + MCLIM_REDZONE < as->mcp_prev) {
130 IRIns *ir = IR(as->curins+1); 146 IRIns *ir = IR(as->curins+1);
131 fprintf(stderr, "RED ZONE OVERFLOW: %p IR %04d %02d %04d %04d\n", as->mcp, 147 lj_assertA(0, "red zone overflow: %p IR %04d %02d %04d %04d\n", as->mcp,
132 as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS); 148 as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS);
133 lua_assert(0);
134 } 149 }
135#endif 150#endif
136 if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as); 151 if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as);
@@ -144,7 +159,7 @@ static LJ_AINLINE void checkmclim(ASMState *as)
144#define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) 159#define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref)))
145#define ra_krefk(as, ref) (as->krefk[(ref)]) 160#define ra_krefk(as, ref) (as->krefk[(ref)])
146 161
147static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k) 162static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k)
148{ 163{
149 IRRef ref = (IRRef)(r - RID_MIN_KREF); 164 IRRef ref = (IRRef)(r - RID_MIN_KREF);
150 as->krefk[ref] = k; 165 as->krefk[ref] = k;
@@ -165,12 +180,49 @@ IRFLDEF(FLOFS)
165 0 180 0
166}; 181};
167 182
183#ifdef LUAJIT_RANDOM_RA
184/* Return a fixed number of random bits from the local PRNG state. */
185static uint32_t ra_random_bits(ASMState *as, uint32_t nbits) {
186 uint64_t b = as->prngbits;
187 uint32_t res = (1u << nbits) - 1u;
188 if (b <= res) b = lj_prng_u64(&as->prngstate) | (1ull << 63);
189 res &= (uint32_t)b;
190 as->prngbits = b >> nbits;
191 return res;
192}
193
194/* Pick a random register from a register set. */
195static Reg rset_pickrandom(ASMState *as, RegSet rs)
196{
197 Reg r = rset_pickbot_(rs);
198 rs >>= r;
199 if (rs > 1) { /* More than one bit set? */
200 while (1) {
201 /* We need to sample max. the GPR or FPR half of the set. */
202 uint32_t d = ra_random_bits(as, RSET_BITS-1);
203 if ((rs >> d) & 1) {
204 r += d;
205 break;
206 }
207 }
208 }
209 return r;
210}
211#define rset_picktop(rs) rset_pickrandom(as, rs)
212#define rset_pickbot(rs) rset_pickrandom(as, rs)
213#else
214#define rset_picktop(rs) rset_picktop_(rs)
215#define rset_pickbot(rs) rset_pickbot_(rs)
216#endif
217
168/* -- Target-specific instruction emitter --------------------------------- */ 218/* -- Target-specific instruction emitter --------------------------------- */
169 219
170#if LJ_TARGET_X86ORX64 220#if LJ_TARGET_X86ORX64
171#include "lj_emit_x86.h" 221#include "lj_emit_x86.h"
172#elif LJ_TARGET_ARM 222#elif LJ_TARGET_ARM
173#include "lj_emit_arm.h" 223#include "lj_emit_arm.h"
224#elif LJ_TARGET_ARM64
225#include "lj_emit_arm64.h"
174#elif LJ_TARGET_PPC 226#elif LJ_TARGET_PPC
175#include "lj_emit_ppc.h" 227#include "lj_emit_ppc.h"
176#elif LJ_TARGET_MIPS 228#elif LJ_TARGET_MIPS
@@ -179,6 +231,12 @@ IRFLDEF(FLOFS)
179#error "Missing instruction emitter for target CPU" 231#error "Missing instruction emitter for target CPU"
180#endif 232#endif
181 233
234/* Generic load/store of register from/to stack slot. */
235#define emit_spload(as, ir, r, ofs) \
236 emit_loadofs(as, ir, (r), RID_SP, (ofs))
237#define emit_spstore(as, ir, r, ofs) \
238 emit_storeofs(as, ir, (r), RID_SP, (ofs))
239
182/* -- Register allocator debugging ---------------------------------------- */ 240/* -- Register allocator debugging ---------------------------------------- */
183 241
184/* #define LUAJIT_DEBUG_RA */ 242/* #define LUAJIT_DEBUG_RA */
@@ -236,7 +294,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...)
236 *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q; 294 *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q;
237 } else { 295 } else {
238 *p++ = '?'; 296 *p++ = '?';
239 lua_assert(0); 297 lj_assertA(0, "bad register %d for debug format \"%s\"", r, fmt);
240 } 298 }
241 } else if (e[1] == 'f' || e[1] == 'i') { 299 } else if (e[1] == 'f' || e[1] == 'i') {
242 IRRef ref; 300 IRRef ref;
@@ -254,7 +312,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...)
254 } else if (e[1] == 'x') { 312 } else if (e[1] == 'x') {
255 p += sprintf(p, "%08x", va_arg(argp, int32_t)); 313 p += sprintf(p, "%08x", va_arg(argp, int32_t));
256 } else { 314 } else {
257 lua_assert(0); 315 lj_assertA(0, "bad debug format code");
258 } 316 }
259 fmt = e+2; 317 fmt = e+2;
260 } 318 }
@@ -313,37 +371,51 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
313 Reg r; 371 Reg r;
314 if (ra_iskref(ref)) { 372 if (ra_iskref(ref)) {
315 r = ra_krefreg(ref); 373 r = ra_krefreg(ref);
316 lua_assert(!rset_test(as->freeset, r)); 374 lj_assertA(!rset_test(as->freeset, r), "rematk of free reg %d", r);
317 ra_free(as, r); 375 ra_free(as, r);
318 ra_modified(as, r); 376 ra_modified(as, r);
377#if LJ_64
378 emit_loadu64(as, r, ra_krefk(as, ref));
379#else
319 emit_loadi(as, r, ra_krefk(as, ref)); 380 emit_loadi(as, r, ra_krefk(as, ref));
381#endif
320 return r; 382 return r;
321 } 383 }
322 ir = IR(ref); 384 ir = IR(ref);
323 r = ir->r; 385 r = ir->r;
324 lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); 386 lj_assertA(ra_hasreg(r), "rematk of K%03d has no reg", REF_BIAS - ref);
387 lj_assertA(!ra_hasspill(ir->s),
388 "rematk of K%03d has spill slot [%x]", REF_BIAS - ref, ir->s);
325 ra_free(as, r); 389 ra_free(as, r);
326 ra_modified(as, r); 390 ra_modified(as, r);
327 ir->r = RID_INIT; /* Do not keep any hint. */ 391 ir->r = RID_INIT; /* Do not keep any hint. */
328 RA_DBGX((as, "remat $i $r", ir, r)); 392 RA_DBGX((as, "remat $i $r", ir, r));
329#if !LJ_SOFTFP 393#if !LJ_SOFTFP32
330 if (ir->o == IR_KNUM) { 394 if (ir->o == IR_KNUM) {
331 emit_loadn(as, r, ir_knum(ir)); 395 emit_loadk64(as, r, ir);
332 } else 396 } else
333#endif 397#endif
334 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { 398 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
335 ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ 399 ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */
336 emit_getgl(as, r, jit_base); 400 emit_getgl(as, r, jit_base);
337 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { 401 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
338 lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ 402 /* REF_NIL stores ASMREF_L register. */
339 emit_getgl(as, r, jit_L); 403 lj_assertA(irt_isnil(ir->t), "rematk of bad ASMREF_L");
404 emit_getgl(as, r, cur_L);
340#if LJ_64 405#if LJ_64
341 } else if (ir->o == IR_KINT64) { 406 } else if (ir->o == IR_KINT64) {
342 emit_loadu64(as, r, ir_kint64(ir)->u64); 407 emit_loadu64(as, r, ir_kint64(ir)->u64);
408#if LJ_GC64
409 } else if (ir->o == IR_KGC) {
410 emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
411 } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
412 emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
413#endif
343#endif 414#endif
344 } else { 415 } else {
345 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 416 lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
346 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); 417 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
418 "rematk of bad IR op %d", ir->o);
347 emit_loadi(as, r, ir->i); 419 emit_loadi(as, r, ir->i);
348 } 420 }
349 return r; 421 return r;
@@ -353,7 +425,8 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
353static int32_t ra_spill(ASMState *as, IRIns *ir) 425static int32_t ra_spill(ASMState *as, IRIns *ir)
354{ 426{
355 int32_t slot = ir->s; 427 int32_t slot = ir->s;
356 lua_assert(ir >= as->ir + REF_TRUE); 428 lj_assertA(ir >= as->ir + REF_TRUE,
429 "spill of K%03d", REF_BIAS - (int)(ir - as->ir));
357 if (!ra_hasspill(slot)) { 430 if (!ra_hasspill(slot)) {
358 if (irt_is64(ir->t)) { 431 if (irt_is64(ir->t)) {
359 slot = as->evenspill; 432 slot = as->evenspill;
@@ -378,7 +451,9 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref)
378{ 451{
379 IRIns *ir = IR(ref); 452 IRIns *ir = IR(ref);
380 Reg r = ir->r; 453 Reg r = ir->r;
381 lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); 454 lj_assertA(ra_hasreg(r), "release of TMP%d has no reg", ref-ASMREF_TMP1+1);
455 lj_assertA(!ra_hasspill(ir->s),
456 "release of TMP%d has spill slot [%x]", ref-ASMREF_TMP1+1, ir->s);
382 ra_free(as, r); 457 ra_free(as, r);
383 ra_modified(as, r); 458 ra_modified(as, r);
384 ir->r = RID_INIT; 459 ir->r = RID_INIT;
@@ -394,7 +469,7 @@ static Reg ra_restore(ASMState *as, IRRef ref)
394 IRIns *ir = IR(ref); 469 IRIns *ir = IR(ref);
395 int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ 470 int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */
396 Reg r = ir->r; 471 Reg r = ir->r;
397 lua_assert(ra_hasreg(r)); 472 lj_assertA(ra_hasreg(r), "restore of IR %04d has no reg", ref - REF_BIAS);
398 ra_sethint(ir->r, r); /* Keep hint. */ 473 ra_sethint(ir->r, r); /* Keep hint. */
399 ra_free(as, r); 474 ra_free(as, r);
400 if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ 475 if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */
@@ -423,14 +498,15 @@ static Reg ra_evict(ASMState *as, RegSet allow)
423{ 498{
424 IRRef ref; 499 IRRef ref;
425 RegCost cost = ~(RegCost)0; 500 RegCost cost = ~(RegCost)0;
426 lua_assert(allow != RSET_EMPTY); 501 lj_assertA(allow != RSET_EMPTY, "evict from empty set");
427 if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) { 502 if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) {
428 GPRDEF(MINCOST) 503 GPRDEF(MINCOST)
429 } else { 504 } else {
430 FPRDEF(MINCOST) 505 FPRDEF(MINCOST)
431 } 506 }
432 ref = regcost_ref(cost); 507 ref = regcost_ref(cost);
433 lua_assert(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins)); 508 lj_assertA(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins),
509 "evict of out-of-range IR %04d", ref - REF_BIAS);
434 /* Preferably pick any weak ref instead of a non-weak, non-const ref. */ 510 /* Preferably pick any weak ref instead of a non-weak, non-const ref. */
435 if (!irref_isk(ref) && (as->weakset & allow)) { 511 if (!irref_isk(ref) && (as->weakset & allow)) {
436 IRIns *ir = IR(ref); 512 IRIns *ir = IR(ref);
@@ -512,7 +588,7 @@ static void ra_evictk(ASMState *as)
512 588
513#ifdef RID_NUM_KREF 589#ifdef RID_NUM_KREF
514/* Allocate a register for a constant. */ 590/* Allocate a register for a constant. */
515static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) 591static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow)
516{ 592{
517 /* First try to find a register which already holds the same constant. */ 593 /* First try to find a register which already holds the same constant. */
518 RegSet pick, work = ~as->freeset & RSET_GPR; 594 RegSet pick, work = ~as->freeset & RSET_GPR;
@@ -521,9 +597,35 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
521 IRRef ref; 597 IRRef ref;
522 r = rset_pickbot(work); 598 r = rset_pickbot(work);
523 ref = regcost_ref(as->cost[r]); 599 ref = regcost_ref(as->cost[r]);
600#if LJ_64
601 if (ref < ASMREF_L) {
602 if (ra_iskref(ref)) {
603 if (k == ra_krefk(as, ref))
604 return r;
605 } else {
606 IRIns *ir = IR(ref);
607 if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
608#if LJ_GC64
609#if LJ_TARGET_ARM64
610 (ir->o == IR_KINT && (uint64_t)k == (uint32_t)ir->i) ||
611#else
612 (ir->o == IR_KINT && k == ir->i) ||
613#endif
614 (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
615 ((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
616 k == (intptr_t)ir_kptr(ir))
617#else
618 (ir->o != IR_KINT64 && k == ir->i)
619#endif
620 )
621 return r;
622 }
623 }
624#else
524 if (ref < ASMREF_L && 625 if (ref < ASMREF_L &&
525 k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) 626 k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i))
526 return r; 627 return r;
628#endif
527 rset_clear(work, r); 629 rset_clear(work, r);
528 } 630 }
529 pick = as->freeset & allow; 631 pick = as->freeset & allow;
@@ -543,7 +645,7 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
543} 645}
544 646
545/* Allocate a specific register for a constant. */ 647/* Allocate a specific register for a constant. */
546static void ra_allockreg(ASMState *as, int32_t k, Reg r) 648static void ra_allockreg(ASMState *as, intptr_t k, Reg r)
547{ 649{
548 Reg kr = ra_allock(as, k, RID2RSET(r)); 650 Reg kr = ra_allock(as, k, RID2RSET(r));
549 if (kr != r) { 651 if (kr != r) {
@@ -566,7 +668,8 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow)
566 IRIns *ir = IR(ref); 668 IRIns *ir = IR(ref);
567 RegSet pick = as->freeset & allow; 669 RegSet pick = as->freeset & allow;
568 Reg r; 670 Reg r;
569 lua_assert(ra_noreg(ir->r)); 671 lj_assertA(ra_noreg(ir->r),
672 "IR %04d already has reg %d", ref - REF_BIAS, ir->r);
570 if (pick) { 673 if (pick) {
571 /* First check register hint from propagation or PHI. */ 674 /* First check register hint from propagation or PHI. */
572 if (ra_hashint(ir->r)) { 675 if (ra_hashint(ir->r)) {
@@ -613,15 +716,27 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
613 return r; 716 return r;
614} 717}
615 718
719/* Add a register rename to the IR. */
720static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno)
721{
722 IRRef ren;
723 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno);
724 ren = tref_ref(lj_ir_emit(as->J));
725 as->J->cur.ir[ren].r = (uint8_t)down;
726 as->J->cur.ir[ren].s = SPS_NONE;
727}
728
616/* Rename register allocation and emit move. */ 729/* Rename register allocation and emit move. */
617static void ra_rename(ASMState *as, Reg down, Reg up) 730static void ra_rename(ASMState *as, Reg down, Reg up)
618{ 731{
619 IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); 732 IRRef ref = regcost_ref(as->cost[up] = as->cost[down]);
620 IRIns *ir = IR(ref); 733 IRIns *ir = IR(ref);
621 ir->r = (uint8_t)up; 734 ir->r = (uint8_t)up;
622 as->cost[down] = 0; 735 as->cost[down] = 0;
623 lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR)); 736 lj_assertA((down < RID_MAX_GPR) == (up < RID_MAX_GPR),
624 lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up)); 737 "rename between GPR/FPR %d and %d", down, up);
738 lj_assertA(!rset_test(as->freeset, down), "rename from free reg %d", down);
739 lj_assertA(rset_test(as->freeset, up), "rename to non-free reg %d", up);
625 ra_free(as, down); /* 'down' is free ... */ 740 ra_free(as, down); /* 'down' is free ... */
626 ra_modified(as, down); 741 ra_modified(as, down);
627 rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ 742 rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */
@@ -629,11 +744,14 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
629 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); 744 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up));
630 emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ 745 emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */
631 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ 746 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
632 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); 747 /*
633 ren = tref_ref(lj_ir_emit(as->J)); 748 ** The rename is effective at the subsequent (already emitted) exit
634 as->ir = as->T->ir; /* The IR may have been reallocated. */ 749 ** branch. This is for the current snapshot (as->snapno). Except if we
635 IR(ren)->r = (uint8_t)down; 750 ** haven't yet allocated any refs for the snapshot (as->snapalloc == 1),
636 IR(ren)->s = SPS_NONE; 751 ** then it belongs to the next snapshot.
752 ** See also the discussion at asm_snap_checkrename().
753 */
754 ra_addrename(as, down, ref, as->snapno + as->snapalloc);
637 } 755 }
638} 756}
639 757
@@ -666,7 +784,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r)
666{ 784{
667 Reg dest = ra_dest(as, ir, RID2RSET(r)); 785 Reg dest = ra_dest(as, ir, RID2RSET(r));
668 if (dest != r) { 786 if (dest != r) {
669 lua_assert(rset_test(as->freeset, r)); 787 lj_assertA(rset_test(as->freeset, r), "dest reg %d is not free", r);
670 ra_modified(as, r); 788 ra_modified(as, r);
671 emit_movrr(as, ir, dest, r); 789 emit_movrr(as, ir, dest, r);
672 } 790 }
@@ -683,20 +801,25 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
683 if (ra_noreg(left)) { 801 if (ra_noreg(left)) {
684 if (irref_isk(lref)) { 802 if (irref_isk(lref)) {
685 if (ir->o == IR_KNUM) { 803 if (ir->o == IR_KNUM) {
686 cTValue *tv = ir_knum(ir);
687 /* FP remat needs a load except for +0. Still better than eviction. */ 804 /* FP remat needs a load except for +0. Still better than eviction. */
688 if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { 805 if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) {
689 emit_loadn(as, dest, tv); 806 emit_loadk64(as, dest, ir);
690 return; 807 return;
691 } 808 }
692#if LJ_64 809#if LJ_64
693 } else if (ir->o == IR_KINT64) { 810 } else if (ir->o == IR_KINT64) {
694 emit_loadu64(as, dest, ir_kint64(ir)->u64); 811 emit_loadk64(as, dest, ir);
812 return;
813#if LJ_GC64
814 } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) {
815 emit_loadk64(as, dest, ir);
695 return; 816 return;
696#endif 817#endif
697 } else { 818#endif
698 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 819 } else if (ir->o != IR_KPRI) {
699 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); 820 lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
821 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
822 "K%03d has bad IR op %d", REF_BIAS - lref, ir->o);
700 emit_loadi(as, dest, ir->i); 823 emit_loadi(as, dest, ir->i);
701 return; 824 return;
702 } 825 }
@@ -741,11 +864,11 @@ static void ra_leftov(ASMState *as, Reg dest, IRRef lref)
741} 864}
742#endif 865#endif
743 866
744#if !LJ_64
745/* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */ 867/* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */
746static void ra_destpair(ASMState *as, IRIns *ir) 868static void ra_destpair(ASMState *as, IRIns *ir)
747{ 869{
748 Reg destlo = ir->r, desthi = (ir+1)->r; 870 Reg destlo = ir->r, desthi = (ir+1)->r;
871 IRIns *irx = (LJ_64 && !irt_is64(ir->t)) ? ir+1 : ir;
749 /* First spill unrelated refs blocking the destination registers. */ 872 /* First spill unrelated refs blocking the destination registers. */
750 if (!rset_test(as->freeset, RID_RETLO) && 873 if (!rset_test(as->freeset, RID_RETLO) &&
751 destlo != RID_RETLO && desthi != RID_RETLO) 874 destlo != RID_RETLO && desthi != RID_RETLO)
@@ -769,29 +892,29 @@ static void ra_destpair(ASMState *as, IRIns *ir)
769 /* Check for conflicts and shuffle the registers as needed. */ 892 /* Check for conflicts and shuffle the registers as needed. */
770 if (destlo == RID_RETHI) { 893 if (destlo == RID_RETHI) {
771 if (desthi == RID_RETLO) { 894 if (desthi == RID_RETLO) {
772#if LJ_TARGET_X86 895#if LJ_TARGET_X86ORX64
773 *--as->mcp = XI_XCHGa + RID_RETHI; 896 *--as->mcp = XI_XCHGa + RID_RETHI;
897 if (LJ_64 && irt_is64(irx->t)) *--as->mcp = 0x48;
774#else 898#else
775 emit_movrr(as, ir, RID_RETHI, RID_TMP); 899 emit_movrr(as, irx, RID_RETHI, RID_TMP);
776 emit_movrr(as, ir, RID_RETLO, RID_RETHI); 900 emit_movrr(as, irx, RID_RETLO, RID_RETHI);
777 emit_movrr(as, ir, RID_TMP, RID_RETLO); 901 emit_movrr(as, irx, RID_TMP, RID_RETLO);
778#endif 902#endif
779 } else { 903 } else {
780 emit_movrr(as, ir, RID_RETHI, RID_RETLO); 904 emit_movrr(as, irx, RID_RETHI, RID_RETLO);
781 if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); 905 if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI);
782 } 906 }
783 } else if (desthi == RID_RETLO) { 907 } else if (desthi == RID_RETLO) {
784 emit_movrr(as, ir, RID_RETLO, RID_RETHI); 908 emit_movrr(as, irx, RID_RETLO, RID_RETHI);
785 if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); 909 if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO);
786 } else { 910 } else {
787 if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); 911 if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI);
788 if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); 912 if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO);
789 } 913 }
790 /* Restore spill slots (if any). */ 914 /* Restore spill slots (if any). */
791 if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI); 915 if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI);
792 if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO); 916 if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO);
793} 917}
794#endif
795 918
796/* -- Snapshot handling --------- ----------------------------------------- */ 919/* -- Snapshot handling --------- ----------------------------------------- */
797 920
@@ -841,11 +964,14 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref)
841#endif 964#endif
842 { /* Allocate stored values for TNEW, TDUP and CNEW. */ 965 { /* Allocate stored values for TNEW, TDUP and CNEW. */
843 IRIns *irs; 966 IRIns *irs;
844 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW); 967 lj_assertA(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW,
968 "sink of IR %04d has bad op %d", ref - REF_BIAS, ir->o);
845 for (irs = IR(as->snapref-1); irs > ir; irs--) 969 for (irs = IR(as->snapref-1); irs > ir; irs--)
846 if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) { 970 if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) {
847 lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || 971 lj_assertA(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
848 irs->o == IR_FSTORE || irs->o == IR_XSTORE); 972 irs->o == IR_FSTORE || irs->o == IR_XSTORE,
973 "sunk store IR %04d has bad op %d",
974 (int)(irs - as->ir) - REF_BIAS, irs->o);
849 asm_snap_alloc1(as, irs->op2); 975 asm_snap_alloc1(as, irs->op2);
850 if (LJ_32 && (irs+1)->o == IR_HIOP) 976 if (LJ_32 && (irs+1)->o == IR_HIOP)
851 asm_snap_alloc1(as, (irs+1)->op2); 977 asm_snap_alloc1(as, (irs+1)->op2);
@@ -881,9 +1007,9 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref)
881} 1007}
882 1008
883/* Allocate refs escaping to a snapshot. */ 1009/* Allocate refs escaping to a snapshot. */
884static void asm_snap_alloc(ASMState *as) 1010static void asm_snap_alloc(ASMState *as, int snapno)
885{ 1011{
886 SnapShot *snap = &as->T->snap[as->snapno]; 1012 SnapShot *snap = &as->T->snap[snapno];
887 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 1013 SnapEntry *map = &as->T->snapmap[snap->mapofs];
888 MSize n, nent = snap->nent; 1014 MSize n, nent = snap->nent;
889 as->snapfilt1 = as->snapfilt2 = 0; 1015 as->snapfilt1 = as->snapfilt2 = 0;
@@ -893,7 +1019,9 @@ static void asm_snap_alloc(ASMState *as)
893 if (!irref_isk(ref)) { 1019 if (!irref_isk(ref)) {
894 asm_snap_alloc1(as, ref); 1020 asm_snap_alloc1(as, ref);
895 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { 1021 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
896 lua_assert(irt_type(IR(ref+1)->t) == IRT_SOFTFP); 1022 lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP,
1023 "snap %d[%d] points to bad SOFTFP IR %04d",
1024 snapno, n, ref - REF_BIAS);
897 asm_snap_alloc1(as, ref+1); 1025 asm_snap_alloc1(as, ref+1);
898 } 1026 }
899 } 1027 }
@@ -919,67 +1047,55 @@ static int asm_snap_checkrename(ASMState *as, IRRef ren)
919 return 0; /* Not found. */ 1047 return 0; /* Not found. */
920} 1048}
921 1049
922/* Prepare snapshot for next guard instruction. */ 1050/* Prepare snapshot for next guard or throwing instruction. */
923static void asm_snap_prep(ASMState *as) 1051static void asm_snap_prep(ASMState *as)
924{ 1052{
925 if (as->curins < as->snapref) { 1053 if (as->snapalloc) {
926 do { 1054 /* Alloc on first invocation for each snapshot. */
927 if (as->snapno == 0) return; /* Called by sunk stores before snap #0. */ 1055 as->snapalloc = 0;
928 as->snapno--; 1056 asm_snap_alloc(as, as->snapno);
929 as->snapref = as->T->snap[as->snapno].ref;
930 } while (as->curins < as->snapref);
931 asm_snap_alloc(as);
932 as->snaprename = as->T->nins; 1057 as->snaprename = as->T->nins;
933 } else { 1058 } else {
934 /* Process any renames above the highwater mark. */ 1059 /* Check any renames above the highwater mark. */
935 for (; as->snaprename < as->T->nins; as->snaprename++) { 1060 for (; as->snaprename < as->T->nins; as->snaprename++) {
936 IRIns *ir = IR(as->snaprename); 1061 IRIns *ir = &as->T->ir[as->snaprename];
937 if (asm_snap_checkrename(as, ir->op1)) 1062 if (asm_snap_checkrename(as, ir->op1))
938 ir->op2 = REF_BIAS-1; /* Kill rename. */ 1063 ir->op2 = REF_BIAS-1; /* Kill rename. */
939 } 1064 }
940 } 1065 }
941} 1066}
942 1067
943/* -- Miscellaneous helpers ----------------------------------------------- */ 1068/* Move to previous snapshot when we cross the current snapshot ref. */
944 1069static void asm_snap_prev(ASMState *as)
945/* Collect arguments from CALL* and CARG instructions. */
946static void asm_collectargs(ASMState *as, IRIns *ir,
947 const CCallInfo *ci, IRRef *args)
948{ 1070{
949 uint32_t n = CCI_NARGS(ci); 1071 if (as->curins < as->snapref) {
950 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */ 1072 uintptr_t ofs = (uintptr_t)(as->mctoporig - as->mcp);
951 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } 1073 if (ofs >= 0x10000) lj_trace_err(as->J, LJ_TRERR_MCODEOV);
952 while (n-- > 1) { 1074 do {
953 ir = IR(ir->op1); 1075 if (as->snapno == 0) return;
954 lua_assert(ir->o == IR_CARG); 1076 as->snapno--;
955 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; 1077 as->snapref = as->T->snap[as->snapno].ref;
1078 as->T->snap[as->snapno].mcofs = (uint16_t)ofs; /* Remember mcode ofs. */
1079 } while (as->curins < as->snapref); /* May have no ins inbetween. */
1080 as->snapalloc = 1;
956 } 1081 }
957 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
958 lua_assert(IR(ir->op1)->o != IR_CARG);
959} 1082}
960 1083
961/* Reconstruct CCallInfo flags for CALLX*. */ 1084/* Fixup snapshot mcode offsetst. */
962static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) 1085static void asm_snap_fixup_mcofs(ASMState *as)
963{ 1086{
964 uint32_t nargs = 0; 1087 uint32_t sz = (uint32_t)(as->mctoporig - as->mcp);
965 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ 1088 SnapShot *snap = as->T->snap;
966 IRIns *ira = IR(ir->op1); 1089 SnapNo i;
967 nargs++; 1090 for (i = as->T->nsnap-1; i > 0; i--) {
968 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } 1091 /* Compute offset from mcode start and store in correct snapshot. */
1092 snap[i].mcofs = (uint16_t)(sz - snap[i-1].mcofs);
969 } 1093 }
970#if LJ_HASFFI 1094 snap[0].mcofs = 0;
971 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
972 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
973 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
974 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
975#if LJ_TARGET_X86
976 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
977#endif
978 }
979#endif
980 return (nargs | (ir->t.irt << CCI_OTSHIFT));
981} 1095}
982 1096
1097/* -- Miscellaneous helpers ----------------------------------------------- */
1098
983/* Calculate stack adjustment. */ 1099/* Calculate stack adjustment. */
984static int32_t asm_stack_adjust(ASMState *as) 1100static int32_t asm_stack_adjust(ASMState *as)
985{ 1101{
@@ -989,21 +1105,26 @@ static int32_t asm_stack_adjust(ASMState *as)
989} 1105}
990 1106
991/* Must match with hash*() in lj_tab.c. */ 1107/* Must match with hash*() in lj_tab.c. */
992static uint32_t ir_khash(IRIns *ir) 1108static uint32_t ir_khash(ASMState *as, IRIns *ir)
993{ 1109{
994 uint32_t lo, hi; 1110 uint32_t lo, hi;
1111 UNUSED(as);
995 if (irt_isstr(ir->t)) { 1112 if (irt_isstr(ir->t)) {
996 return ir_kstr(ir)->hash; 1113 return ir_kstr(ir)->sid;
997 } else if (irt_isnum(ir->t)) { 1114 } else if (irt_isnum(ir->t)) {
998 lo = ir_knum(ir)->u32.lo; 1115 lo = ir_knum(ir)->u32.lo;
999 hi = ir_knum(ir)->u32.hi << 1; 1116 hi = ir_knum(ir)->u32.hi << 1;
1000 } else if (irt_ispri(ir->t)) { 1117 } else if (irt_ispri(ir->t)) {
1001 lua_assert(!irt_isnil(ir->t)); 1118 lj_assertA(!irt_isnil(ir->t), "hash of nil key");
1002 return irt_type(ir->t)-IRT_FALSE; 1119 return irt_type(ir->t)-IRT_FALSE;
1003 } else { 1120 } else {
1004 lua_assert(irt_isgcv(ir->t)); 1121 lj_assertA(irt_isgcv(ir->t), "hash of bad IR type %d", irt_type(ir->t));
1005 lo = u32ptr(ir_kgc(ir)); 1122 lo = u32ptr(ir_kgc(ir));
1123#if LJ_GC64
1124 hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15);
1125#else
1006 hi = lo + HASH_BIAS; 1126 hi = lo + HASH_BIAS;
1127#endif
1007 } 1128 }
1008 return hashrot(lo, hi); 1129 return hashrot(lo, hi);
1009} 1130}
@@ -1017,6 +1138,7 @@ static void asm_snew(ASMState *as, IRIns *ir)
1017{ 1138{
1018 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new]; 1139 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
1019 IRRef args[3]; 1140 IRRef args[3];
1141 asm_snap_prep(as);
1020 args[0] = ASMREF_L; /* lua_State *L */ 1142 args[0] = ASMREF_L; /* lua_State *L */
1021 args[1] = ir->op1; /* const char *str */ 1143 args[1] = ir->op1; /* const char *str */
1022 args[2] = ir->op2; /* size_t len */ 1144 args[2] = ir->op2; /* size_t len */
@@ -1029,6 +1151,7 @@ static void asm_tnew(ASMState *as, IRIns *ir)
1029{ 1151{
1030 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1]; 1152 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
1031 IRRef args[2]; 1153 IRRef args[2];
1154 asm_snap_prep(as);
1032 args[0] = ASMREF_L; /* lua_State *L */ 1155 args[0] = ASMREF_L; /* lua_State *L */
1033 args[1] = ASMREF_TMP1; /* uint32_t ahsize */ 1156 args[1] = ASMREF_TMP1; /* uint32_t ahsize */
1034 as->gcsteps++; 1157 as->gcsteps++;
@@ -1041,6 +1164,7 @@ static void asm_tdup(ASMState *as, IRIns *ir)
1041{ 1164{
1042 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup]; 1165 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
1043 IRRef args[2]; 1166 IRRef args[2];
1167 asm_snap_prep(as);
1044 args[0] = ASMREF_L; /* lua_State *L */ 1168 args[0] = ASMREF_L; /* lua_State *L */
1045 args[1] = ir->op1; /* const GCtab *kt */ 1169 args[1] = ir->op1; /* const GCtab *kt */
1046 as->gcsteps++; 1170 as->gcsteps++;
@@ -1064,6 +1188,260 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
1064 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ 1188 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */
1065} 1189}
1066 1190
1191/* -- Buffer operations --------------------------------------------------- */
1192
1193static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode);
1194#if LJ_HASBUFFER
1195static void asm_bufhdr_write(ASMState *as, Reg sb);
1196#endif
1197
1198static void asm_bufhdr(ASMState *as, IRIns *ir)
1199{
1200 Reg sb = ra_dest(as, ir, RSET_GPR);
1201 switch (ir->op2) {
1202 case IRBUFHDR_RESET: {
1203 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
1204 IRIns irbp;
1205 irbp.ot = IRT(0, IRT_PTR); /* Buffer data pointer type. */
1206 emit_storeofs(as, &irbp, tmp, sb, offsetof(SBuf, w));
1207 emit_loadofs(as, &irbp, tmp, sb, offsetof(SBuf, b));
1208 break;
1209 }
1210 case IRBUFHDR_APPEND: {
1211 /* Rematerialize const buffer pointer instead of likely spill. */
1212 IRIns *irp = IR(ir->op1);
1213 if (!(ra_hasreg(irp->r) || irp == ir-1 ||
1214 (irp == ir-2 && !ra_used(ir-1)))) {
1215 while (!(irp->o == IR_BUFHDR && irp->op2 == IRBUFHDR_RESET))
1216 irp = IR(irp->op1);
1217 if (irref_isk(irp->op1)) {
1218 ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
1219 ir = irp;
1220 }
1221 }
1222 break;
1223 }
1224#if LJ_HASBUFFER
1225 case IRBUFHDR_WRITE:
1226 asm_bufhdr_write(as, sb);
1227 break;
1228#endif
1229 default: lj_assertA(0, "bad BUFHDR op2 %d", ir->op2); break;
1230 }
1231#if LJ_TARGET_X86ORX64
1232 ra_left(as, sb, ir->op1);
1233#else
1234 ra_leftov(as, sb, ir->op1);
1235#endif
1236}
1237
1238static void asm_bufput(ASMState *as, IRIns *ir)
1239{
1240 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
1241 IRRef args[3];
1242 IRIns *irs;
1243 int kchar = -129;
1244 args[0] = ir->op1; /* SBuf * */
1245 args[1] = ir->op2; /* GCstr * */
1246 irs = IR(ir->op2);
1247 lj_assertA(irt_isstr(irs->t),
1248 "BUFPUT of non-string IR %04d", ir->op2 - REF_BIAS);
1249 if (irs->o == IR_KGC) {
1250 GCstr *s = ir_kstr(irs);
1251 if (s->len == 1) { /* Optimize put of single-char string constant. */
1252 kchar = (int8_t)strdata(s)[0]; /* Signed! */
1253 args[1] = ASMREF_TMP1; /* int, truncated to char */
1254 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1255 }
1256 } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
1257 if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */
1258 if (irs->op2 == IRTOSTR_NUM) {
1259 args[1] = ASMREF_TMP1; /* TValue * */
1260 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
1261 } else {
1262 lj_assertA(irt_isinteger(IR(irs->op1)->t),
1263 "TOSTR of non-numeric IR %04d", irs->op1);
1264 args[1] = irs->op1; /* int */
1265 if (irs->op2 == IRTOSTR_INT)
1266 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
1267 else
1268 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1269 }
1270 } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */
1271 args[1] = irs->op1; /* const void * */
1272 args[2] = irs->op2; /* MSize */
1273 ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
1274 }
1275 }
1276 asm_setupresult(as, ir, ci); /* SBuf * */
1277 asm_gencall(as, ci, args);
1278 if (args[1] == ASMREF_TMP1) {
1279 Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
1280 if (kchar == -129)
1281 asm_tvptr(as, tmp, irs->op1, IRTMPREF_IN1);
1282 else
1283 ra_allockreg(as, kchar, tmp);
1284 }
1285}
1286
1287static void asm_bufstr(ASMState *as, IRIns *ir)
1288{
1289 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
1290 IRRef args[1];
1291 args[0] = ir->op1; /* SBuf *sb */
1292 as->gcsteps++;
1293 asm_setupresult(as, ir, ci); /* GCstr * */
1294 asm_gencall(as, ci, args);
1295}
1296
1297/* -- Type conversions ---------------------------------------------------- */
1298
1299static void asm_tostr(ASMState *as, IRIns *ir)
1300{
1301 const CCallInfo *ci;
1302 IRRef args[2];
1303 asm_snap_prep(as);
1304 args[0] = ASMREF_L;
1305 as->gcsteps++;
1306 if (ir->op2 == IRTOSTR_NUM) {
1307 args[1] = ASMREF_TMP1; /* cTValue * */
1308 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
1309 } else {
1310 args[1] = ir->op1; /* int32_t k */
1311 if (ir->op2 == IRTOSTR_INT)
1312 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
1313 else
1314 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
1315 }
1316 asm_setupresult(as, ir, ci); /* GCstr * */
1317 asm_gencall(as, ci, args);
1318 if (ir->op2 == IRTOSTR_NUM)
1319 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1, IRTMPREF_IN1);
1320}
1321
1322#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
1323static void asm_conv64(ASMState *as, IRIns *ir)
1324{
1325 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1326 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1327 IRCallID id;
1328 IRRef args[2];
1329 lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP,
1330 "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS);
1331 args[LJ_BE] = (ir-1)->op1;
1332 args[LJ_LE] = ir->op1;
1333 if (st == IRT_NUM || st == IRT_FLOAT) {
1334 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
1335 ir--;
1336 } else {
1337 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
1338 }
1339 {
1340#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1341 CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
1342 cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
1343#else
1344 const CCallInfo *ci = &lj_ir_callinfo[id];
1345#endif
1346 asm_setupresult(as, ir, ci);
1347 asm_gencall(as, ci, args);
1348 }
1349}
1350#endif
1351
1352/* -- Memory references --------------------------------------------------- */
1353
1354static void asm_newref(ASMState *as, IRIns *ir)
1355{
1356 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1357 IRRef args[3];
1358 if (ir->r == RID_SINK)
1359 return;
1360 asm_snap_prep(as);
1361 args[0] = ASMREF_L; /* lua_State *L */
1362 args[1] = ir->op1; /* GCtab *t */
1363 args[2] = ASMREF_TMP1; /* cTValue *key */
1364 asm_setupresult(as, ir, ci); /* TValue * */
1365 asm_gencall(as, ci, args);
1366 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2, IRTMPREF_IN1);
1367}
1368
1369static void asm_tmpref(ASMState *as, IRIns *ir)
1370{
1371 Reg r = ra_dest(as, ir, RSET_GPR);
1372 asm_tvptr(as, r, ir->op1, ir->op2);
1373}
1374
1375static void asm_lref(ASMState *as, IRIns *ir)
1376{
1377 Reg r = ra_dest(as, ir, RSET_GPR);
1378#if LJ_TARGET_X86ORX64
1379 ra_left(as, r, ASMREF_L);
1380#else
1381 ra_leftov(as, r, ASMREF_L);
1382#endif
1383}
1384
1385/* -- Calls --------------------------------------------------------------- */
1386
1387/* Collect arguments from CALL* and CARG instructions. */
1388static void asm_collectargs(ASMState *as, IRIns *ir,
1389 const CCallInfo *ci, IRRef *args)
1390{
1391 uint32_t n = CCI_XNARGS(ci);
1392 /* Account for split args. */
1393 lj_assertA(n <= CCI_NARGS_MAX*2, "too many args %d to collect", n);
1394 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
1395 while (n-- > 1) {
1396 ir = IR(ir->op1);
1397 lj_assertA(ir->o == IR_CARG, "malformed CALL arg tree");
1398 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
1399 }
1400 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
1401 lj_assertA(IR(ir->op1)->o != IR_CARG, "malformed CALL arg tree");
1402}
1403
1404/* Reconstruct CCallInfo flags for CALLX*. */
1405static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
1406{
1407 uint32_t nargs = 0;
1408 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
1409 IRIns *ira = IR(ir->op1);
1410 nargs++;
1411 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
1412 }
1413#if LJ_HASFFI
1414 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
1415 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
1416 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
1417 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
1418#if LJ_TARGET_X86
1419 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
1420#endif
1421 }
1422#endif
1423 return (nargs | (ir->t.irt << CCI_OTSHIFT));
1424}
1425
1426static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
1427{
1428 const CCallInfo *ci = &lj_ir_callinfo[id];
1429 IRRef args[2];
1430 args[0] = ir->op1;
1431 args[1] = ir->op2;
1432 asm_setupresult(as, ir, ci);
1433 asm_gencall(as, ci, args);
1434}
1435
1436static void asm_call(ASMState *as, IRIns *ir)
1437{
1438 IRRef args[CCI_NARGS_MAX];
1439 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1440 asm_collectargs(as, ir, ci, args);
1441 asm_setupresult(as, ir, ci);
1442 asm_gencall(as, ci, args);
1443}
1444
1067/* -- PHI and loop handling ----------------------------------------------- */ 1445/* -- PHI and loop handling ----------------------------------------------- */
1068 1446
1069/* Break a PHI cycle by renaming to a free register (evict if needed). */ 1447/* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1249,12 +1627,7 @@ static void asm_phi_fixup(ASMState *as)
1249 irt_clearmark(ir->t); 1627 irt_clearmark(ir->t);
1250 /* Left PHI gained a spill slot before the loop? */ 1628 /* Left PHI gained a spill slot before the loop? */
1251 if (ra_hasspill(ir->s)) { 1629 if (ra_hasspill(ir->s)) {
1252 IRRef ren; 1630 ra_addrename(as, r, lref, as->loopsnapno);
1253 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
1254 ren = tref_ref(lj_ir_emit(as->J));
1255 as->ir = as->T->ir; /* The IR may have been reallocated. */
1256 IR(ren)->r = (uint8_t)r;
1257 IR(ren)->s = SPS_NONE;
1258 } 1631 }
1259 } 1632 }
1260 rset_clear(work, r); 1633 rset_clear(work, r);
@@ -1329,6 +1702,8 @@ static void asm_loop(ASMState *as)
1329#include "lj_asm_x86.h" 1702#include "lj_asm_x86.h"
1330#elif LJ_TARGET_ARM 1703#elif LJ_TARGET_ARM
1331#include "lj_asm_arm.h" 1704#include "lj_asm_arm.h"
1705#elif LJ_TARGET_ARM64
1706#include "lj_asm_arm64.h"
1332#elif LJ_TARGET_PPC 1707#elif LJ_TARGET_PPC
1333#include "lj_asm_ppc.h" 1708#include "lj_asm_ppc.h"
1334#elif LJ_TARGET_MIPS 1709#elif LJ_TARGET_MIPS
@@ -1337,6 +1712,200 @@ static void asm_loop(ASMState *as)
1337#error "Missing assembler for target CPU" 1712#error "Missing assembler for target CPU"
1338#endif 1713#endif
1339 1714
1715/* -- Common instruction helpers ------------------------------------------ */
1716
1717#if !LJ_SOFTFP32
1718#if !LJ_TARGET_X86ORX64
1719#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1720#endif
1721
1722static void asm_pow(ASMState *as, IRIns *ir)
1723{
1724#if LJ_64 && LJ_HASFFI
1725 if (!irt_isnum(ir->t))
1726 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1727 IRCALL_lj_carith_powu64);
1728 else
1729#endif
1730 asm_callid(as, ir, IRCALL_pow);
1731}
1732
1733static void asm_div(ASMState *as, IRIns *ir)
1734{
1735#if LJ_64 && LJ_HASFFI
1736 if (!irt_isnum(ir->t))
1737 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
1738 IRCALL_lj_carith_divu64);
1739 else
1740#endif
1741 asm_fpdiv(as, ir);
1742}
1743#endif
1744
1745static void asm_mod(ASMState *as, IRIns *ir)
1746{
1747#if LJ_64 && LJ_HASFFI
1748 if (!irt_isint(ir->t))
1749 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
1750 IRCALL_lj_carith_modu64);
1751 else
1752#endif
1753 asm_callid(as, ir, IRCALL_lj_vm_modi);
1754}
1755
1756static void asm_fuseequal(ASMState *as, IRIns *ir)
1757{
1758 /* Fuse HREF + EQ/NE. */
1759 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1760 as->curins--;
1761 asm_href(as, ir-1, (IROp)ir->o);
1762 } else {
1763 asm_equal(as, ir);
1764 }
1765}
1766
1767static void asm_alen(ASMState *as, IRIns *ir)
1768{
1769 asm_callid(as, ir, ir->op2 == REF_NIL ? IRCALL_lj_tab_len :
1770 IRCALL_lj_tab_len_hint);
1771}
1772
1773/* -- Instruction dispatch ------------------------------------------------ */
1774
1775/* Assemble a single instruction. */
1776static void asm_ir(ASMState *as, IRIns *ir)
1777{
1778 switch ((IROp)ir->o) {
1779 /* Miscellaneous ops. */
1780 case IR_LOOP: asm_loop(as); break;
1781 case IR_NOP: case IR_XBAR:
1782 lj_assertA(!ra_used(ir),
1783 "IR %04d not unused", (int)(ir - as->ir) - REF_BIAS);
1784 break;
1785 case IR_USE:
1786 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1787 case IR_PHI: asm_phi(as, ir); break;
1788 case IR_HIOP: asm_hiop(as, ir); break;
1789 case IR_GCSTEP: asm_gcstep(as, ir); break;
1790 case IR_PROF: asm_prof(as, ir); break;
1791
1792 /* Guarded assertions. */
1793 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1794 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1795 case IR_ABC:
1796 asm_comp(as, ir);
1797 break;
1798 case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break;
1799
1800 case IR_RETF: asm_retf(as, ir); break;
1801
1802 /* Bit ops. */
1803 case IR_BNOT: asm_bnot(as, ir); break;
1804 case IR_BSWAP: asm_bswap(as, ir); break;
1805 case IR_BAND: asm_band(as, ir); break;
1806 case IR_BOR: asm_bor(as, ir); break;
1807 case IR_BXOR: asm_bxor(as, ir); break;
1808 case IR_BSHL: asm_bshl(as, ir); break;
1809 case IR_BSHR: asm_bshr(as, ir); break;
1810 case IR_BSAR: asm_bsar(as, ir); break;
1811 case IR_BROL: asm_brol(as, ir); break;
1812 case IR_BROR: asm_bror(as, ir); break;
1813
1814 /* Arithmetic ops. */
1815 case IR_ADD: asm_add(as, ir); break;
1816 case IR_SUB: asm_sub(as, ir); break;
1817 case IR_MUL: asm_mul(as, ir); break;
1818 case IR_MOD: asm_mod(as, ir); break;
1819 case IR_NEG: asm_neg(as, ir); break;
1820#if LJ_SOFTFP32
1821 case IR_DIV: case IR_POW: case IR_ABS:
1822 case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
1823 /* Unused for LJ_SOFTFP32. */
1824 lj_assertA(0, "IR %04d with unused op %d",
1825 (int)(ir - as->ir) - REF_BIAS, ir->o);
1826 break;
1827#else
1828 case IR_DIV: asm_div(as, ir); break;
1829 case IR_POW: asm_pow(as, ir); break;
1830 case IR_ABS: asm_abs(as, ir); break;
1831 case IR_LDEXP: asm_ldexp(as, ir); break;
1832 case IR_FPMATH: asm_fpmath(as, ir); break;
1833 case IR_TOBIT: asm_tobit(as, ir); break;
1834#endif
1835 case IR_MIN: asm_min(as, ir); break;
1836 case IR_MAX: asm_max(as, ir); break;
1837
1838 /* Overflow-checking arithmetic ops. */
1839 case IR_ADDOV: asm_addov(as, ir); break;
1840 case IR_SUBOV: asm_subov(as, ir); break;
1841 case IR_MULOV: asm_mulov(as, ir); break;
1842
1843 /* Memory references. */
1844 case IR_AREF: asm_aref(as, ir); break;
1845 case IR_HREF: asm_href(as, ir, 0); break;
1846 case IR_HREFK: asm_hrefk(as, ir); break;
1847 case IR_NEWREF: asm_newref(as, ir); break;
1848 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1849 case IR_FREF: asm_fref(as, ir); break;
1850 case IR_TMPREF: asm_tmpref(as, ir); break;
1851 case IR_STRREF: asm_strref(as, ir); break;
1852 case IR_LREF: asm_lref(as, ir); break;
1853
1854 /* Loads and stores. */
1855 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1856 asm_ahuvload(as, ir);
1857 break;
1858 case IR_FLOAD: asm_fload(as, ir); break;
1859 case IR_XLOAD: asm_xload(as, ir); break;
1860 case IR_SLOAD: asm_sload(as, ir); break;
1861 case IR_ALEN: asm_alen(as, ir); break;
1862
1863 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1864 case IR_FSTORE: asm_fstore(as, ir); break;
1865 case IR_XSTORE: asm_xstore(as, ir); break;
1866
1867 /* Allocations. */
1868 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1869 case IR_TNEW: asm_tnew(as, ir); break;
1870 case IR_TDUP: asm_tdup(as, ir); break;
1871 case IR_CNEW: case IR_CNEWI:
1872#if LJ_HASFFI
1873 asm_cnew(as, ir);
1874#else
1875 lj_assertA(0, "IR %04d with unused op %d",
1876 (int)(ir - as->ir) - REF_BIAS, ir->o);
1877#endif
1878 break;
1879
1880 /* Buffer operations. */
1881 case IR_BUFHDR: asm_bufhdr(as, ir); break;
1882 case IR_BUFPUT: asm_bufput(as, ir); break;
1883 case IR_BUFSTR: asm_bufstr(as, ir); break;
1884
1885 /* Write barriers. */
1886 case IR_TBAR: asm_tbar(as, ir); break;
1887 case IR_OBAR: asm_obar(as, ir); break;
1888
1889 /* Type conversions. */
1890 case IR_CONV: asm_conv(as, ir); break;
1891 case IR_TOSTR: asm_tostr(as, ir); break;
1892 case IR_STRTO: asm_strto(as, ir); break;
1893
1894 /* Calls. */
1895 case IR_CALLA:
1896 as->gcsteps++;
1897 /* fallthrough */
1898 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1899 case IR_CALLXS: asm_callx(as, ir); break;
1900 case IR_CARG: break;
1901
1902 default:
1903 setintV(&as->J->errinfo, ir->o);
1904 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1905 break;
1906 }
1907}
1908
1340/* -- Head of trace ------------------------------------------------------- */ 1909/* -- Head of trace ------------------------------------------------------- */
1341 1910
1342/* Head of a root trace. */ 1911/* Head of a root trace. */
@@ -1375,8 +1944,7 @@ static void asm_head_side(ASMState *as)
1375 1944
1376 if (as->snapno && as->topslot > as->parent->topslot) { 1945 if (as->snapno && as->topslot > as->parent->topslot) {
1377 /* Force snap #0 alloc to prevent register overwrite in stack check. */ 1946 /* Force snap #0 alloc to prevent register overwrite in stack check. */
1378 as->snapno = 0; 1947 asm_snap_alloc(as, 0);
1379 asm_snap_alloc(as);
1380 } 1948 }
1381 pbase = asm_head_side_base(as, irp); 1949 pbase = asm_head_side_base(as, irp);
1382 if (pbase != RID_NONE) { 1950 if (pbase != RID_NONE) {
@@ -1388,8 +1956,10 @@ static void asm_head_side(ASMState *as)
1388 for (i = as->stopins; i > REF_BASE; i--) { 1956 for (i = as->stopins; i > REF_BASE; i--) {
1389 IRIns *ir = IR(i); 1957 IRIns *ir = IR(i);
1390 RegSP rs; 1958 RegSP rs;
1391 lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || 1959 lj_assertA((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
1392 (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL); 1960 (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL,
1961 "IR %04d has bad parent op %d",
1962 (int)(ir - as->ir) - REF_BIAS, ir->o);
1393 rs = as->parentmap[i - REF_FIRST]; 1963 rs = as->parentmap[i - REF_FIRST];
1394 if (ra_hasreg(ir->r)) { 1964 if (ra_hasreg(ir->r)) {
1395 rset_clear(allow, ir->r); 1965 rset_clear(allow, ir->r);
@@ -1542,7 +2112,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
1542 SnapEntry sn = map[n-1]; 2112 SnapEntry sn = map[n-1];
1543 if ((sn & SNAP_FRAME)) { 2113 if ((sn & SNAP_FRAME)) {
1544 *gotframe = 1; 2114 *gotframe = 1;
1545 return snap_slot(sn); 2115 return snap_slot(sn) - LJ_FR2;
1546 } 2116 }
1547 } 2117 }
1548 return 0; 2118 return 0;
@@ -1562,19 +2132,23 @@ static void asm_tail_link(ASMState *as)
1562 2132
1563 if (as->T->link == 0) { 2133 if (as->T->link == 0) {
1564 /* Setup fixed registers for exit to interpreter. */ 2134 /* Setup fixed registers for exit to interpreter. */
1565 const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); 2135 const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]);
1566 int32_t mres; 2136 int32_t mres;
1567 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ 2137 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */
1568 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; 2138 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
1569 if (bc_isret(bc_op(*retpc))) 2139 if (bc_isret(bc_op(*retpc)))
1570 pc = retpc; 2140 pc = retpc;
1571 } 2141 }
2142#if LJ_GC64
2143 emit_loadu64(as, RID_LPC, u64ptr(pc));
2144#else
1572 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); 2145 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
1573 ra_allockreg(as, i32ptr(pc), RID_LPC); 2146 ra_allockreg(as, i32ptr(pc), RID_LPC);
1574 mres = (int32_t)(snap->nslots - baseslot); 2147#endif
2148 mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
1575 switch (bc_op(*pc)) { 2149 switch (bc_op(*pc)) {
1576 case BC_CALLM: case BC_CALLMT: 2150 case BC_CALLM: case BC_CALLMT:
1577 mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break; 2151 mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
1578 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; 2152 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
1579 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; 2153 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
1580 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; 2154 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
@@ -1586,6 +2160,11 @@ static void asm_tail_link(ASMState *as)
1586 } 2160 }
1587 emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); 2161 emit_addptr(as, RID_BASE, 8*(int32_t)baseslot);
1588 2162
2163 if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */
2164 setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal));
2165 IR(as->J->ktrace)->o = IR_KGC;
2166 }
2167
1589 /* Sync the interpreter state with the on-trace state. */ 2168 /* Sync the interpreter state with the on-trace state. */
1590 asm_stack_restore(as, snap); 2169 asm_stack_restore(as, snap);
1591 2170
@@ -1609,22 +2188,32 @@ static void asm_setup_regsp(ASMState *as)
1609#endif 2188#endif
1610 2189
1611 ra_setup(as); 2190 ra_setup(as);
2191#if LJ_TARGET_ARM64
2192 ra_setkref(as, RID_GL, (intptr_t)J2G(as->J));
2193#endif
1612 2194
1613 /* Clear reg/sp for constants. */ 2195 /* Clear reg/sp for constants. */
1614 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) 2196 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
1615 ir->prev = REGSP_INIT; 2197 ir->prev = REGSP_INIT;
2198 if (irt_is64(ir->t) && ir->o != IR_KNULL) {
2199#if LJ_GC64
2200 /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
2201 ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */
2202#else
2203 /* Make life easier for backends by putting address of constant in i. */
2204 ir->i = (int32_t)(intptr_t)(ir+1);
2205#endif
2206 ir++;
2207 }
2208 }
1616 2209
1617 /* REF_BASE is used for implicit references to the BASE register. */ 2210 /* REF_BASE is used for implicit references to the BASE register. */
1618 lastir->prev = REGSP_HINT(RID_BASE); 2211 lastir->prev = REGSP_HINT(RID_BASE);
1619 2212
1620 ir = IR(nins-1);
1621 if (ir->o == IR_RENAME) {
1622 do { ir--; nins--; } while (ir->o == IR_RENAME);
1623 T->nins = nins; /* Remove any renames left over from ASM restart. */
1624 }
1625 as->snaprename = nins; 2213 as->snaprename = nins;
1626 as->snapref = nins; 2214 as->snapref = nins;
1627 as->snapno = T->nsnap; 2215 as->snapno = T->nsnap;
2216 as->snapalloc = 0;
1628 2217
1629 as->stopins = REF_BASE; 2218 as->stopins = REF_BASE;
1630 as->orignins = nins; 2219 as->orignins = nins;
@@ -1634,7 +2223,7 @@ static void asm_setup_regsp(ASMState *as)
1634 ir = IR(REF_FIRST); 2223 ir = IR(REF_FIRST);
1635 if (as->parent) { 2224 if (as->parent) {
1636 uint16_t *p; 2225 uint16_t *p;
1637 lastir = lj_snap_regspmap(as->parent, as->J->exitno, ir); 2226 lastir = lj_snap_regspmap(as->J, as->parent, as->J->exitno, ir);
1638 if (lastir - ir > LJ_MAX_JSLOTS) 2227 if (lastir - ir > LJ_MAX_JSLOTS)
1639 lj_trace_err(as->J, LJ_TRERR_NYICOAL); 2228 lj_trace_err(as->J, LJ_TRERR_NYICOAL);
1640 as->stopins = (IRRef)((lastir-1) - as->ir); 2229 as->stopins = (IRRef)((lastir-1) - as->ir);
@@ -1673,6 +2262,10 @@ static void asm_setup_regsp(ASMState *as)
1673 ir->prev = (uint16_t)REGSP_HINT((rload & 15)); 2262 ir->prev = (uint16_t)REGSP_HINT((rload & 15));
1674 rload = lj_ror(rload, 4); 2263 rload = lj_ror(rload, 4);
1675 continue; 2264 continue;
2265 case IR_TMPREF:
2266 if ((ir->op2 & IRTMPREF_OUT2) && as->evenspill < 4)
2267 as->evenspill = 4; /* TMPREF OUT2 needs two TValues on the stack. */
2268 break;
1676#endif 2269#endif
1677 case IR_CALLXS: { 2270 case IR_CALLXS: {
1678 CCallInfo ci; 2271 CCallInfo ci;
@@ -1682,7 +2275,17 @@ static void asm_setup_regsp(ASMState *as)
1682 as->modset |= RSET_SCRATCH; 2275 as->modset |= RSET_SCRATCH;
1683 continue; 2276 continue;
1684 } 2277 }
1685 case IR_CALLN: case IR_CALLL: case IR_CALLS: { 2278 case IR_CALLL:
2279 /* lj_vm_next needs two TValues on the stack. */
2280#if LJ_TARGET_X64 && LJ_ABI_WIN
2281 if (ir->op2 == IRCALL_lj_vm_next && as->evenspill < SPS_FIRST + 4)
2282 as->evenspill = SPS_FIRST + 4;
2283#else
2284 if (SPS_FIRST < 4 && ir->op2 == IRCALL_lj_vm_next && as->evenspill < 4)
2285 as->evenspill = 4;
2286#endif
2287 /* fallthrough */
2288 case IR_CALLN: case IR_CALLA: case IR_CALLS: {
1686 const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; 2289 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1687 ir->prev = asm_setup_call_slots(as, ir, ci); 2290 ir->prev = asm_setup_call_slots(as, ir, ci);
1688 if (inloop) 2291 if (inloop)
@@ -1690,7 +2293,6 @@ static void asm_setup_regsp(ASMState *as)
1690 (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; 2293 (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
1691 continue; 2294 continue;
1692 } 2295 }
1693#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
1694 case IR_HIOP: 2296 case IR_HIOP:
1695 switch ((ir-1)->o) { 2297 switch ((ir-1)->o) {
1696#if LJ_SOFTFP && LJ_TARGET_ARM 2298#if LJ_SOFTFP && LJ_TARGET_ARM
@@ -1701,15 +2303,15 @@ static void asm_setup_regsp(ASMState *as)
1701 } 2303 }
1702 break; 2304 break;
1703#endif 2305#endif
1704#if !LJ_SOFTFP && LJ_NEED_FP64 2306#if !LJ_SOFTFP && LJ_NEED_FP64 && LJ_32 && LJ_HASFFI
1705 case IR_CONV: 2307 case IR_CONV:
1706 if (irt_isfp((ir-1)->t)) { 2308 if (irt_isfp((ir-1)->t)) {
1707 ir->prev = REGSP_HINT(RID_FPRET); 2309 ir->prev = REGSP_HINT(RID_FPRET);
1708 continue; 2310 continue;
1709 } 2311 }
1710 /* fallthrough */
1711#endif 2312#endif
1712 case IR_CALLN: case IR_CALLXS: 2313 /* fallthrough */
2314 case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
1713#if LJ_SOFTFP 2315#if LJ_SOFTFP
1714 case IR_MIN: case IR_MAX: 2316 case IR_MIN: case IR_MAX:
1715#endif 2317#endif
@@ -1720,18 +2322,29 @@ static void asm_setup_regsp(ASMState *as)
1720 break; 2322 break;
1721 } 2323 }
1722 break; 2324 break;
1723#endif
1724#if LJ_SOFTFP 2325#if LJ_SOFTFP
1725 case IR_MIN: case IR_MAX: 2326 case IR_MIN: case IR_MAX:
1726 if ((ir+1)->o != IR_HIOP) break; 2327 if ((ir+1)->o != IR_HIOP) break;
1727#endif 2328#endif
1728 /* fallthrough */ 2329 /* fallthrough */
1729 /* C calls evict all scratch regs and return results in RID_RET. */ 2330 /* C calls evict all scratch regs and return results in RID_RET. */
1730 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: 2331 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
1731 if (REGARG_NUMGPR < 3 && as->evenspill < 3) 2332 if (REGARG_NUMGPR < 3 && as->evenspill < 3)
1732 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ 2333 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */
2334#if LJ_TARGET_X86 && LJ_HASFFI
2335 if (0) {
2336 case IR_CNEW:
2337 if (ir->op2 != REF_NIL && as->evenspill < 4)
2338 as->evenspill = 4; /* lj_cdata_newv needs 4 args. */
2339 }
2340 /* fallthrough */
2341#else
2342 /* fallthrough */
2343 case IR_CNEW:
2344#endif
1733 /* fallthrough */ 2345 /* fallthrough */
1734 case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: 2346 case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
2347 case IR_BUFSTR:
1735 ir->prev = REGSP_HINT(RID_RET); 2348 ir->prev = REGSP_HINT(RID_RET);
1736 if (inloop) 2349 if (inloop)
1737 as->modset = RSET_SCRATCH; 2350 as->modset = RSET_SCRATCH;
@@ -1740,58 +2353,73 @@ static void asm_setup_regsp(ASMState *as)
1740 if (inloop) 2353 if (inloop)
1741 as->modset = RSET_SCRATCH; 2354 as->modset = RSET_SCRATCH;
1742 break; 2355 break;
1743#if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP 2356#if !LJ_SOFTFP
1744 case IR_ATAN2: case IR_LDEXP: 2357#if !LJ_TARGET_X86ORX64
2358 case IR_LDEXP:
2359#endif
1745#endif 2360#endif
2361 /* fallthrough */
1746 case IR_POW: 2362 case IR_POW:
1747 if (!LJ_SOFTFP && irt_isnum(ir->t)) { 2363 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1748#if LJ_TARGET_X86ORX64
1749 ir->prev = REGSP_HINT(RID_XMM0);
1750 if (inloop) 2364 if (inloop)
1751 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); 2365 as->modset |= RSET_SCRATCH;
2366#if LJ_TARGET_X86
2367 if (irt_isnum(IR(ir->op2)->t)) {
2368 if (as->evenspill < 4) /* Leave room to call pow(). */
2369 as->evenspill = 4;
2370 }
2371 break;
1752#else 2372#else
1753 ir->prev = REGSP_HINT(RID_FPRET); 2373 ir->prev = REGSP_HINT(RID_FPRET);
1754 if (inloop)
1755 as->modset |= RSET_SCRATCH;
1756#endif
1757 continue; 2374 continue;
2375#endif
1758 } 2376 }
1759 /* fallthrough */ /* for integer POW */ 2377 /* fallthrough */ /* for integer POW */
1760 case IR_DIV: case IR_MOD: 2378 case IR_DIV: case IR_MOD:
1761 if (!irt_isnum(ir->t)) { 2379 if ((LJ_64 && LJ_SOFTFP) || !irt_isnum(ir->t)) {
1762 ir->prev = REGSP_HINT(RID_RET); 2380 ir->prev = REGSP_HINT(RID_RET);
1763 if (inloop) 2381 if (inloop)
1764 as->modset |= (RSET_SCRATCH & RSET_GPR); 2382 as->modset |= (RSET_SCRATCH & RSET_GPR);
1765 continue; 2383 continue;
1766 } 2384 }
1767 break; 2385 break;
1768 case IR_FPMATH: 2386#if LJ_64 && LJ_SOFTFP
1769#if LJ_TARGET_X86ORX64 2387 case IR_ADD: case IR_SUB: case IR_MUL:
1770 if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ 2388 if (irt_isnum(ir->t)) {
1771 ir->prev = REGSP_HINT(RID_XMM0); 2389 ir->prev = REGSP_HINT(RID_RET);
1772#if !LJ_64
1773 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */
1774 as->evenspill = 4;
1775#endif
1776 if (inloop)
1777 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
1778 continue;
1779 } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
1780 ir->prev = REGSP_HINT(RID_XMM0);
1781 if (inloop) 2390 if (inloop)
1782 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); 2391 as->modset |= (RSET_SCRATCH & RSET_GPR);
1783 continue; 2392 continue;
1784 } 2393 }
1785 break; 2394 break;
1786#else 2395#endif
1787 ir->prev = REGSP_HINT(RID_FPRET); 2396 case IR_FPMATH:
2397#if LJ_TARGET_X86ORX64
2398 if (ir->op2 <= IRFPM_TRUNC) {
2399 if (!(as->flags & JIT_F_SSE4_1)) {
2400 ir->prev = REGSP_HINT(RID_XMM0);
2401 if (inloop)
2402 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
2403 continue;
2404 }
2405 break;
2406 }
2407#endif
1788 if (inloop) 2408 if (inloop)
1789 as->modset |= RSET_SCRATCH; 2409 as->modset |= RSET_SCRATCH;
2410#if LJ_TARGET_X86
2411 break;
2412#else
2413 ir->prev = REGSP_HINT(RID_FPRET);
1790 continue; 2414 continue;
1791#endif 2415#endif
1792#if LJ_TARGET_X86ORX64 2416#if LJ_TARGET_X86ORX64
1793 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ 2417 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */
1794 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: 2418 case IR_BSHL: case IR_BSHR: case IR_BSAR:
2419 if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */
2420 break;
2421 /* fallthrough */
2422 case IR_BROL: case IR_BROR:
1795 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { 2423 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
1796 IR(ir->op2)->r = REGSP_HINT(RID_ECX); 2424 IR(ir->op2)->r = REGSP_HINT(RID_ECX);
1797 if (inloop) 2425 if (inloop)
@@ -1835,34 +2463,80 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1835{ 2463{
1836 ASMState as_; 2464 ASMState as_;
1837 ASMState *as = &as_; 2465 ASMState *as = &as_;
1838 MCode *origtop; 2466
2467 /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
2468 {
2469 IRRef nins = T->nins;
2470 IRIns *ir = &T->ir[nins-1];
2471 if (ir->o == IR_NOP || ir->o == IR_RENAME) {
2472 do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME);
2473 T->nins = nins;
2474 }
2475 }
1839 2476
1840 /* Ensure an initialized instruction beyond the last one for HIOP checks. */ 2477 /* Ensure an initialized instruction beyond the last one for HIOP checks. */
1841 J->cur.nins = lj_ir_nextins(J); 2478 /* This also allows one RENAME to be added without reallocating curfinal. */
1842 lj_ir_nop(&J->cur.ir[J->cur.nins]); 2479 as->orignins = lj_ir_nextins(J);
2480 lj_ir_nop(&J->cur.ir[as->orignins]);
1843 2481
1844 /* Setup initial state. Copy some fields to reduce indirections. */ 2482 /* Setup initial state. Copy some fields to reduce indirections. */
1845 as->J = J; 2483 as->J = J;
1846 as->T = T; 2484 as->T = T;
1847 as->ir = T->ir; 2485 J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */
1848 as->flags = J->flags; 2486 as->flags = J->flags;
1849 as->loopref = J->loopref; 2487 as->loopref = J->loopref;
1850 as->realign = NULL; 2488 as->realign = NULL;
1851 as->loopinv = 0; 2489 as->loopinv = 0;
1852 as->parent = J->parent ? traceref(J, J->parent) : NULL; 2490 as->parent = J->parent ? traceref(J, J->parent) : NULL;
2491#ifdef LUAJIT_RANDOM_RA
2492 (void)lj_prng_u64(&J2G(J)->prng); /* Ensure PRNG step between traces. */
2493#endif
1853 2494
1854 /* Reserve MCode memory. */ 2495 /* Reserve MCode memory. */
1855 as->mctop = origtop = lj_mcode_reserve(J, &as->mcbot); 2496 as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot);
1856 as->mcp = as->mctop; 2497 as->mcp = as->mctop;
1857 as->mclim = as->mcbot + MCLIM_REDZONE; 2498 as->mclim = as->mcbot + MCLIM_REDZONE;
1858 asm_setup_target(as); 2499 asm_setup_target(as);
1859 2500
1860 do { 2501 /*
2502 ** This is a loop, because the MCode may have to be (re-)assembled
2503 ** multiple times:
2504 **
2505 ** 1. as->realign is set (and the assembly aborted), if the arch-specific
2506 ** backend wants the MCode to be aligned differently.
2507 **
2508 ** This is currently only the case on x86/x64, where small loops get
2509 ** an aligned loop body plus a short branch. Not much effort is wasted,
2510 ** because the abort happens very quickly and only once.
2511 **
2512 ** 2. The IR is immovable, since the MCode embeds pointers to various
2513 ** constants inside the IR. But RENAMEs may need to be added to the IR
2514 ** during assembly, which might grow and reallocate the IR. We check
2515 ** at the end if the IR (in J->cur.ir) has actually grown, resize the
2516 ** copy (in J->curfinal.ir) and try again.
2517 **
2518 ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have
2519 ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to
2520 ** always have one spare slot in the IR (see above), which means we
2521 ** have to redo the assembly for only ~2% of all traces.
2522 **
2523 ** Very, very rarely, this needs to be done repeatedly, since the
2524 ** location of constants inside the IR (actually, reachability from
2525 ** a global pointer) may affect register allocation and thus the
2526 ** number of RENAMEs.
2527 */
2528 for (;;) {
1861 as->mcp = as->mctop; 2529 as->mcp = as->mctop;
1862#ifdef LUA_USE_ASSERT 2530#ifdef LUA_USE_ASSERT
1863 as->mcp_prev = as->mcp; 2531 as->mcp_prev = as->mcp;
1864#endif 2532#endif
1865 as->curins = T->nins; 2533 as->ir = J->curfinal->ir; /* Use the copied IR. */
2534 as->curins = J->cur.nins = as->orignins;
2535#ifdef LUAJIT_RANDOM_RA
2536 as->prngstate = J2G(J)->prng; /* Must (re)start from identical state. */
2537 as->prngbits = 0;
2538#endif
2539
1866 RA_DBG_START(); 2540 RA_DBG_START();
1867 RA_DBGX((as, "===== STOP =====")); 2541 RA_DBGX((as, "===== STOP ====="));
1868 2542
@@ -1881,7 +2555,11 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1881 /* Assemble a trace in linear backwards order. */ 2555 /* Assemble a trace in linear backwards order. */
1882 for (as->curins--; as->curins > as->stopins; as->curins--) { 2556 for (as->curins--; as->curins > as->stopins; as->curins--) {
1883 IRIns *ir = IR(as->curins); 2557 IRIns *ir = IR(as->curins);
1884 lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ 2558 /* 64 bit types handled by SPLIT for 32 bit archs. */
2559 lj_assertA(!(LJ_32 && irt_isint64(ir->t)),
2560 "IR %04d has unsplit 64 bit type",
2561 (int)(ir - as->ir) - REF_BIAS);
2562 asm_snap_prev(as);
1885 if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) 2563 if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE))
1886 continue; /* Dead-code elimination can be soooo easy. */ 2564 continue; /* Dead-code elimination can be soooo easy. */
1887 if (irt_isguard(ir->t)) 2565 if (irt_isguard(ir->t))
@@ -1890,22 +2568,43 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1890 checkmclim(as); 2568 checkmclim(as);
1891 asm_ir(as, ir); 2569 asm_ir(as, ir);
1892 } 2570 }
1893 } while (as->realign); /* Retry in case the MCode needs to be realigned. */
1894 2571
1895 /* Emit head of trace. */ 2572 if (as->realign && J->curfinal->nins >= T->nins)
1896 RA_DBG_REF(); 2573 continue; /* Retry in case only the MCode needs to be realigned. */
1897 checkmclim(as); 2574
1898 if (as->gcsteps > 0) { 2575 /* Emit head of trace. */
1899 as->curins = as->T->snap[0].ref; 2576 RA_DBG_REF();
1900 asm_snap_prep(as); /* The GC check is a guard. */ 2577 checkmclim(as);
1901 asm_gc_check(as); 2578 if (as->gcsteps > 0) {
2579 as->curins = as->T->snap[0].ref;
2580 asm_snap_prep(as); /* The GC check is a guard. */
2581 asm_gc_check(as);
2582 as->curins = as->stopins;
2583 }
2584 ra_evictk(as);
2585 if (as->parent)
2586 asm_head_side(as);
2587 else
2588 asm_head_root(as);
2589 asm_phi_fixup(as);
2590
2591 if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */
2592 lj_assertA(J->curfinal->nk == T->nk, "unexpected IR constant growth");
2593 memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
2594 (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */
2595 T->nins = J->curfinal->nins;
2596 /* Fill mcofs of any unprocessed snapshots. */
2597 as->curins = REF_FIRST;
2598 asm_snap_prev(as);
2599 break; /* Done. */
2600 }
2601
2602 /* Otherwise try again with a bigger IR. */
2603 lj_trace_free(J2G(J), J->curfinal);
2604 J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */
2605 J->curfinal = lj_trace_alloc(J->L, T);
2606 as->realign = NULL;
1902 } 2607 }
1903 ra_evictk(as);
1904 if (as->parent)
1905 asm_head_side(as);
1906 else
1907 asm_head_root(as);
1908 asm_phi_fixup(as);
1909 2608
1910 RA_DBGX((as, "===== START ====")); 2609 RA_DBGX((as, "===== START ===="));
1911 RA_DBG_FLUSH(); 2610 RA_DBG_FLUSH();
@@ -1915,10 +2614,16 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1915 /* Set trace entry point before fixing up tail to allow link to self. */ 2614 /* Set trace entry point before fixing up tail to allow link to self. */
1916 T->mcode = as->mcp; 2615 T->mcode = as->mcp;
1917 T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0; 2616 T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0;
1918 if (!as->loopref) 2617 if (as->loopref)
2618 asm_loop_tail_fixup(as);
2619 else
1919 asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ 2620 asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */
1920 T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); 2621 T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
1921 lj_mcode_sync(T->mcode, origtop); 2622 asm_snap_fixup_mcofs(as);
2623#if LJ_TARGET_MCODE_FIXUP
2624 asm_mcode_fixup(T->mcode, T->szmcode);
2625#endif
2626 lj_mcode_sync(T->mcode, as->mctoporig);
1922} 2627}
1923 2628
1924#undef IR 2629#undef IR
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 8869af32..bd5fbeb1 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -41,7 +41,7 @@ static Reg ra_scratchpair(ASMState *as, RegSet allow)
41 } 41 }
42 } 42 }
43 } 43 }
44 lua_assert(rset_test(RSET_GPREVEN, r)); 44 lj_assertA(rset_test(RSET_GPREVEN, r), "odd reg %d", r);
45 ra_modified(as, r); 45 ra_modified(as, r);
46 ra_modified(as, r+1); 46 ra_modified(as, r+1);
47 RA_DBGX((as, "scratchpair $r $r", r, r+1)); 47 RA_DBGX((as, "scratchpair $r $r", r, r+1));
@@ -185,6 +185,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
185 *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */ 185 *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */
186 return ra_allock(as, (ofs & ~255), allow); 186 return ra_allock(as, (ofs & ~255), allow);
187 } 187 }
188 } else if (ir->o == IR_TMPREF) {
189 *ofsp = 0;
190 return RID_SP;
188 } 191 }
189 } 192 }
190 *ofsp = 0; 193 *ofsp = 0;
@@ -269,7 +272,7 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref,
269 return; 272 return;
270 } 273 }
271 } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) { 274 } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) {
272 lua_assert(ofs == 0); 275 lj_assertA(ofs == 0, "bad usage");
273 ofs = (int32_t)sizeof(GCstr); 276 ofs = (int32_t)sizeof(GCstr);
274 if (irref_isk(ir->op2)) { 277 if (irref_isk(ir->op2)) {
275 ofs += IR(ir->op2)->i; 278 ofs += IR(ir->op2)->i;
@@ -310,7 +313,11 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref,
310} 313}
311 314
312#if !LJ_SOFTFP 315#if !LJ_SOFTFP
313/* Fuse to multiply-add/sub instruction. */ 316/*
317** Fuse to multiply-add/sub instruction.
318** VMLA rounds twice (UMA, not FMA) -- no need to check for JIT_F_OPT_FMA.
319** VFMA needs VFPv4, which is uncommon on the remaining ARM32 targets.
320*/
314static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air) 321static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
315{ 322{
316 IRRef lref = ir->op1, rref = ir->op2; 323 IRRef lref = ir->op1, rref = ir->op2;
@@ -338,7 +345,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
338/* Generate a call to a C function. */ 345/* Generate a call to a C function. */
339static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 346static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
340{ 347{
341 uint32_t n, nargs = CCI_NARGS(ci); 348 uint32_t n, nargs = CCI_XNARGS(ci);
342 int32_t ofs = 0; 349 int32_t ofs = 0;
343#if LJ_SOFTFP 350#if LJ_SOFTFP
344 Reg gpr = REGARG_FIRSTGPR; 351 Reg gpr = REGARG_FIRSTGPR;
@@ -389,9 +396,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
389 as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); 396 as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1));
390 if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u; 397 if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u;
391 if (gpr <= REGARG_LASTGPR) { 398 if (gpr <= REGARG_LASTGPR) {
392 lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ 399 lj_assertA(rset_test(as->freeset, gpr),
400 "reg %d not free", gpr); /* Must have been evicted. */
393 if (irt_isnum(ir->t)) { 401 if (irt_isnum(ir->t)) {
394 lua_assert(rset_test(as->freeset, gpr+1)); /* Ditto. */ 402 lj_assertA(rset_test(as->freeset, gpr+1),
403 "reg %d not free", gpr+1); /* Ditto. */
395 emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15)); 404 emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15));
396 gpr += 2; 405 gpr += 2;
397 } else { 406 } else {
@@ -408,7 +417,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
408#endif 417#endif
409 { 418 {
410 if (gpr <= REGARG_LASTGPR) { 419 if (gpr <= REGARG_LASTGPR) {
411 lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ 420 lj_assertA(rset_test(as->freeset, gpr),
421 "reg %d not free", gpr); /* Must have been evicted. */
412 if (ref) ra_leftov(as, gpr, ref); 422 if (ref) ra_leftov(as, gpr, ref);
413 gpr++; 423 gpr++;
414 } else { 424 } else {
@@ -433,7 +443,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
433 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ 443 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
434 ra_evictset(as, drop); /* Evictions must be performed first. */ 444 ra_evictset(as, drop); /* Evictions must be performed first. */
435 if (ra_used(ir)) { 445 if (ra_used(ir)) {
436 lua_assert(!irt_ispri(ir->t)); 446 lj_assertA(!irt_ispri(ir->t), "PRI dest");
437 if (!LJ_SOFTFP && irt_isfp(ir->t)) { 447 if (!LJ_SOFTFP && irt_isfp(ir->t)) {
438 if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) { 448 if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) {
439 Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); 449 Reg dest = (ra_dest(as, ir, RSET_FPR) & 15);
@@ -453,15 +463,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
453 UNUSED(ci); 463 UNUSED(ci);
454} 464}
455 465
456static void asm_call(ASMState *as, IRIns *ir)
457{
458 IRRef args[CCI_NARGS_MAX];
459 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
460 asm_collectargs(as, ir, ci, args);
461 asm_setupresult(as, ir, ci);
462 asm_gencall(as, ci, args);
463}
464
465static void asm_callx(ASMState *as, IRIns *ir) 466static void asm_callx(ASMState *as, IRIns *ir)
466{ 467{
467 IRRef args[CCI_NARGS_MAX*2]; 468 IRRef args[CCI_NARGS_MAX*2];
@@ -490,7 +491,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
490{ 491{
491 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 492 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
492 void *pc = ir_kptr(IR(ir->op2)); 493 void *pc = ir_kptr(IR(ir->op2));
493 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 494 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
494 as->topslot -= (BCReg)delta; 495 as->topslot -= (BCReg)delta;
495 if ((int32_t)as->topslot < 0) as->topslot = 0; 496 if ((int32_t)as->topslot < 0) as->topslot = 0;
496 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 497 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -504,6 +505,30 @@ static void asm_retf(ASMState *as, IRIns *ir)
504 emit_lso(as, ARMI_LDR, RID_TMP, base, -4); 505 emit_lso(as, ARMI_LDR, RID_TMP, base, -4);
505} 506}
506 507
508/* -- Buffer operations --------------------------------------------------- */
509
510#if LJ_HASBUFFER
511static void asm_bufhdr_write(ASMState *as, Reg sb)
512{
513 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
514 IRIns irgc;
515 int32_t addr = i32ptr((void *)&J2G(as->J)->cur_L);
516 irgc.ot = IRT(0, IRT_PGC); /* GC type. */
517 emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
518 if ((as->flags & JIT_F_ARMV6T2)) {
519 emit_dnm(as, ARMI_BFI, RID_TMP, lj_fls(SBUF_MASK_FLAG), tmp);
520 } else {
521 emit_dnm(as, ARMI_ORR, RID_TMP, RID_TMP, tmp);
522 emit_dn(as, ARMI_AND|ARMI_K12|SBUF_MASK_FLAG, tmp, tmp);
523 }
524 emit_lso(as, ARMI_LDR, RID_TMP,
525 ra_allock(as, (addr & ~4095),
526 rset_exclude(rset_exclude(RSET_GPR, sb), tmp)),
527 (addr & 4095));
528 emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
529}
530#endif
531
507/* -- Type conversions ---------------------------------------------------- */ 532/* -- Type conversions ---------------------------------------------------- */
508 533
509#if !LJ_SOFTFP 534#if !LJ_SOFTFP
@@ -539,13 +564,17 @@ static void asm_conv(ASMState *as, IRIns *ir)
539#endif 564#endif
540 IRRef lref = ir->op1; 565 IRRef lref = ir->op1;
541 /* 64 bit integer conversions are handled by SPLIT. */ 566 /* 64 bit integer conversions are handled by SPLIT. */
542 lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64)); 567 lj_assertA(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64),
568 "IR %04d has unsplit 64 bit type",
569 (int)(ir - as->ir) - REF_BIAS);
543#if LJ_SOFTFP 570#if LJ_SOFTFP
544 /* FP conversions are handled by SPLIT. */ 571 /* FP conversions are handled by SPLIT. */
545 lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); 572 lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT),
573 "IR %04d has FP type",
574 (int)(ir - as->ir) - REF_BIAS);
546 /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ 575 /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
547#else 576#else
548 lua_assert(irt_type(ir->t) != st); 577 lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
549 if (irt_isfp(ir->t)) { 578 if (irt_isfp(ir->t)) {
550 Reg dest = ra_dest(as, ir, RSET_FPR); 579 Reg dest = ra_dest(as, ir, RSET_FPR);
551 if (stfp) { /* FP to FP conversion. */ 580 if (stfp) { /* FP to FP conversion. */
@@ -562,7 +591,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
562 } else if (stfp) { /* FP to integer conversion. */ 591 } else if (stfp) { /* FP to integer conversion. */
563 if (irt_isguard(ir->t)) { 592 if (irt_isguard(ir->t)) {
564 /* Checked conversions are only supported from number to int. */ 593 /* Checked conversions are only supported from number to int. */
565 lua_assert(irt_isint(ir->t) && st == IRT_NUM); 594 lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
595 "bad type for checked CONV");
566 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 596 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
567 } else { 597 } else {
568 Reg left = ra_alloc1(as, lref, RSET_FPR); 598 Reg left = ra_alloc1(as, lref, RSET_FPR);
@@ -581,7 +611,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
581 Reg dest = ra_dest(as, ir, RSET_GPR); 611 Reg dest = ra_dest(as, ir, RSET_GPR);
582 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ 612 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
583 Reg left = ra_alloc1(as, lref, RSET_GPR); 613 Reg left = ra_alloc1(as, lref, RSET_GPR);
584 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); 614 lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
585 if ((as->flags & JIT_F_ARMV6)) { 615 if ((as->flags & JIT_F_ARMV6)) {
586 ARMIns ai = st == IRT_I8 ? ARMI_SXTB : 616 ARMIns ai = st == IRT_I8 ? ARMI_SXTB :
587 st == IRT_U8 ? ARMI_UXTB : 617 st == IRT_U8 ? ARMI_UXTB :
@@ -601,31 +631,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
601 } 631 }
602} 632}
603 633
604#if !LJ_SOFTFP && LJ_HASFFI
605static void asm_conv64(ASMState *as, IRIns *ir)
606{
607 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
608 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
609 IRCallID id;
610 CCallInfo ci;
611 IRRef args[2];
612 args[0] = (ir-1)->op1;
613 args[1] = ir->op1;
614 if (st == IRT_NUM || st == IRT_FLOAT) {
615 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
616 ir--;
617 } else {
618 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
619 }
620 ci = lj_ir_callinfo[id];
621#if !LJ_ABI_SOFTFP
622 ci.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
623#endif
624 asm_setupresult(as, ir, &ci);
625 asm_gencall(as, &ci, args);
626}
627#endif
628
629static void asm_strto(ASMState *as, IRIns *ir) 634static void asm_strto(ASMState *as, IRIns *ir)
630{ 635{
631 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 636 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -689,60 +694,61 @@ static void asm_strto(ASMState *as, IRIns *ir)
689 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); 694 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
690} 695}
691 696
697/* -- Memory references --------------------------------------------------- */
698
692/* Get pointer to TValue. */ 699/* Get pointer to TValue. */
693static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 700static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
694{ 701{
695 IRIns *ir = IR(ref); 702 if ((mode & IRTMPREF_IN1)) {
696 if (irt_isnum(ir->t)) { 703 IRIns *ir = IR(ref);
697 if (irref_isk(ref)) { 704 if (irt_isnum(ir->t)) {
698 /* Use the number constant itself as a TValue. */ 705 if ((mode & IRTMPREF_OUT1)) {
699 ra_allockreg(as, i32ptr(ir_knum(ir)), dest); 706#if LJ_SOFTFP
700 } else { 707 lj_assertA(irref_isk(ref), "unsplit FP op");
708 emit_dm(as, ARMI_MOV, dest, RID_SP);
709 emit_lso(as, ARMI_STR,
710 ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
711 RID_SP, 0);
712 emit_lso(as, ARMI_STR,
713 ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
714 RID_SP, 4);
715#else
716 Reg src = ra_alloc1(as, ref, RSET_FPR);
717 emit_dm(as, ARMI_MOV, dest, RID_SP);
718 emit_vlso(as, ARMI_VSTR_D, src, RID_SP, 0);
719#endif
720 } else if (irref_isk(ref)) {
721 /* Use the number constant itself as a TValue. */
722 ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
723 } else {
701#if LJ_SOFTFP 724#if LJ_SOFTFP
702 lua_assert(0); 725 lj_assertA(0, "unsplit FP op");
703#else 726#else
704 /* Otherwise force a spill and use the spill slot. */ 727 /* Otherwise force a spill and use the spill slot. */
705 emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); 728 emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
706#endif 729#endif
730 }
731 } else {
732 /* Otherwise use [sp] and [sp+4] to hold the TValue.
733 ** This assumes the following call has max. 4 args.
734 */
735 Reg type;
736 emit_dm(as, ARMI_MOV, dest, RID_SP);
737 if (!irt_ispri(ir->t)) {
738 Reg src = ra_alloc1(as, ref, RSET_GPR);
739 emit_lso(as, ARMI_STR, src, RID_SP, 0);
740 }
741 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
742 type = ra_alloc1(as, ref+1, RSET_GPR);
743 else
744 type = ra_allock(as, irt_toitype(ir->t), RSET_GPR);
745 emit_lso(as, ARMI_STR, type, RID_SP, 4);
707 } 746 }
708 } else { 747 } else {
709 /* Otherwise use [sp] and [sp+4] to hold the TValue. */
710 RegSet allow = rset_exclude(RSET_GPR, dest);
711 Reg type;
712 emit_dm(as, ARMI_MOV, dest, RID_SP); 748 emit_dm(as, ARMI_MOV, dest, RID_SP);
713 if (!irt_ispri(ir->t)) {
714 Reg src = ra_alloc1(as, ref, allow);
715 emit_lso(as, ARMI_STR, src, RID_SP, 0);
716 }
717 if ((ir+1)->o == IR_HIOP)
718 type = ra_alloc1(as, ref+1, allow);
719 else
720 type = ra_allock(as, irt_toitype(ir->t), allow);
721 emit_lso(as, ARMI_STR, type, RID_SP, 4);
722 }
723}
724
725static void asm_tostr(ASMState *as, IRIns *ir)
726{
727 IRRef args[2];
728 args[0] = ASMREF_L;
729 as->gcsteps++;
730 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
731 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
732 args[1] = ASMREF_TMP1; /* const lua_Number * */
733 asm_setupresult(as, ir, ci); /* GCstr * */
734 asm_gencall(as, ci, args);
735 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
736 } else {
737 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
738 args[1] = ir->op1; /* int32_t k */
739 asm_setupresult(as, ir, ci); /* GCstr * */
740 asm_gencall(as, ci, args);
741 } 749 }
742} 750}
743 751
744/* -- Memory references --------------------------------------------------- */
745
746static void asm_aref(ASMState *as, IRIns *ir) 752static void asm_aref(ASMState *as, IRIns *ir)
747{ 753{
748 Reg dest = ra_dest(as, ir, RSET_GPR); 754 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -864,16 +870,16 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
864 *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu); 870 *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu);
865 871
866 /* Load main position relative to tab->node into dest. */ 872 /* Load main position relative to tab->node into dest. */
867 khash = irref_isk(refkey) ? ir_khash(irkey) : 1; 873 khash = irref_isk(refkey) ? ir_khash(as, irkey) : 1;
868 if (khash == 0) { 874 if (khash == 0) {
869 emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); 875 emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
870 } else { 876 } else {
871 emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp); 877 emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp);
872 emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp); 878 emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp);
873 if (irt_isstr(kt)) { /* Fetch of str->hash is cheaper than ra_allock. */ 879 if (irt_isstr(kt)) { /* Fetch of str->sid is cheaper than ra_allock. */
874 emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP); 880 emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP);
875 emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); 881 emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
876 emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, hash)); 882 emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, sid));
877 emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); 883 emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask));
878 } else if (irref_isk(refkey)) { 884 } else if (irref_isk(refkey)) {
879 emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash, 885 emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash,
@@ -920,7 +926,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
920 Reg node = ra_alloc1(as, ir->op1, RSET_GPR); 926 Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
921 Reg key = RID_NONE, type = RID_TMP, idx = node; 927 Reg key = RID_NONE, type = RID_TMP, idx = node;
922 RegSet allow = rset_exclude(RSET_GPR, node); 928 RegSet allow = rset_exclude(RSET_GPR, node);
923 lua_assert(ofs % sizeof(Node) == 0); 929 lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
924 if (ofs > 4095) { 930 if (ofs > 4095) {
925 idx = dest; 931 idx = dest;
926 rset_clear(allow, dest); 932 rset_clear(allow, dest);
@@ -960,48 +966,42 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
960 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); 966 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
961} 967}
962 968
963static void asm_newref(ASMState *as, IRIns *ir)
964{
965 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
966 IRRef args[3];
967 if (ir->r == RID_SINK)
968 return;
969 args[0] = ASMREF_L; /* lua_State *L */
970 args[1] = ir->op1; /* GCtab *t */
971 args[2] = ASMREF_TMP1; /* cTValue *key */
972 asm_setupresult(as, ir, ci); /* TValue * */
973 asm_gencall(as, ci, args);
974 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
975}
976
977static void asm_uref(ASMState *as, IRIns *ir) 969static void asm_uref(ASMState *as, IRIns *ir)
978{ 970{
979 Reg dest = ra_dest(as, ir, RSET_GPR); 971 Reg dest = ra_dest(as, ir, RSET_GPR);
980 if (irref_isk(ir->op1)) { 972 int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
973 if (irref_isk(ir->op1) && !guarded) {
981 GCfunc *fn = ir_kfunc(IR(ir->op1)); 974 GCfunc *fn = ir_kfunc(IR(ir->op1));
982 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; 975 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
983 emit_lsptr(as, ARMI_LDR, dest, v); 976 emit_lsptr(as, ARMI_LDR, dest, v);
984 } else { 977 } else {
985 Reg uv = ra_scratch(as, RSET_GPR); 978 if (guarded) {
986 Reg func = ra_alloc1(as, ir->op1, RSET_GPR); 979 asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
987 if (ir->o == IR_UREFC) {
988 asm_guardcc(as, CC_NE);
989 emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP); 980 emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP);
990 emit_opk(as, ARMI_ADD, dest, uv, 981 }
982 if (ir->o == IR_UREFC)
983 emit_opk(as, ARMI_ADD, dest, dest,
991 (int32_t)offsetof(GCupval, tv), RSET_GPR); 984 (int32_t)offsetof(GCupval, tv), RSET_GPR);
992 emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); 985 else
986 emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(GCupval, v));
987 if (guarded)
988 emit_lso(as, ARMI_LDRB, RID_TMP, dest,
989 (int32_t)offsetof(GCupval, closed));
990 if (irref_isk(ir->op1)) {
991 GCfunc *fn = ir_kfunc(IR(ir->op1));
992 int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
993 emit_loadi(as, dest, k);
993 } else { 994 } else {
994 emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v)); 995 emit_lso(as, ARMI_LDR, dest, ra_alloc1(as, ir->op1, RSET_GPR),
996 (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
995 } 997 }
996 emit_lso(as, ARMI_LDR, uv, func,
997 (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
998 } 998 }
999} 999}
1000 1000
1001static void asm_fref(ASMState *as, IRIns *ir) 1001static void asm_fref(ASMState *as, IRIns *ir)
1002{ 1002{
1003 UNUSED(as); UNUSED(ir); 1003 UNUSED(as); UNUSED(ir);
1004 lua_assert(!ra_used(ir)); 1004 lj_assertA(!ra_used(ir), "unfused FREF");
1005} 1005}
1006 1006
1007static void asm_strref(ASMState *as, IRIns *ir) 1007static void asm_strref(ASMState *as, IRIns *ir)
@@ -1038,25 +1038,27 @@ static void asm_strref(ASMState *as, IRIns *ir)
1038 1038
1039/* -- Loads and stores ---------------------------------------------------- */ 1039/* -- Loads and stores ---------------------------------------------------- */
1040 1040
1041static ARMIns asm_fxloadins(IRIns *ir) 1041static ARMIns asm_fxloadins(ASMState *as, IRIns *ir)
1042{ 1042{
1043 UNUSED(as);
1043 switch (irt_type(ir->t)) { 1044 switch (irt_type(ir->t)) {
1044 case IRT_I8: return ARMI_LDRSB; 1045 case IRT_I8: return ARMI_LDRSB;
1045 case IRT_U8: return ARMI_LDRB; 1046 case IRT_U8: return ARMI_LDRB;
1046 case IRT_I16: return ARMI_LDRSH; 1047 case IRT_I16: return ARMI_LDRSH;
1047 case IRT_U16: return ARMI_LDRH; 1048 case IRT_U16: return ARMI_LDRH;
1048 case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D; 1049 case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VLDR_D;
1049 case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */ 1050 case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */
1050 default: return ARMI_LDR; 1051 default: return ARMI_LDR;
1051 } 1052 }
1052} 1053}
1053 1054
1054static ARMIns asm_fxstoreins(IRIns *ir) 1055static ARMIns asm_fxstoreins(ASMState *as, IRIns *ir)
1055{ 1056{
1057 UNUSED(as);
1056 switch (irt_type(ir->t)) { 1058 switch (irt_type(ir->t)) {
1057 case IRT_I8: case IRT_U8: return ARMI_STRB; 1059 case IRT_I8: case IRT_U8: return ARMI_STRB;
1058 case IRT_I16: case IRT_U16: return ARMI_STRH; 1060 case IRT_I16: case IRT_U16: return ARMI_STRH;
1059 case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D; 1061 case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VSTR_D;
1060 case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */ 1062 case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */
1061 default: return ARMI_STR; 1063 default: return ARMI_STR;
1062 } 1064 }
@@ -1065,17 +1067,23 @@ static ARMIns asm_fxstoreins(IRIns *ir)
1065static void asm_fload(ASMState *as, IRIns *ir) 1067static void asm_fload(ASMState *as, IRIns *ir)
1066{ 1068{
1067 Reg dest = ra_dest(as, ir, RSET_GPR); 1069 Reg dest = ra_dest(as, ir, RSET_GPR);
1068 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); 1070 ARMIns ai = asm_fxloadins(as, ir);
1069 ARMIns ai = asm_fxloadins(ir); 1071 Reg idx;
1070 int32_t ofs; 1072 int32_t ofs;
1071 if (ir->op2 == IRFL_TAB_ARRAY) { 1073 if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
1072 ofs = asm_fuseabase(as, ir->op1); 1074 idx = ra_allock(as, (int32_t)(ir->op2<<2) + (int32_t)J2GG(as->J), RSET_GPR);
1073 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ 1075 ofs = 0;
1074 emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); 1076 } else {
1075 return; 1077 idx = ra_alloc1(as, ir->op1, RSET_GPR);
1078 if (ir->op2 == IRFL_TAB_ARRAY) {
1079 ofs = asm_fuseabase(as, ir->op1);
1080 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
1081 emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
1082 return;
1083 }
1076 } 1084 }
1085 ofs = field_ofs[ir->op2];
1077 } 1086 }
1078 ofs = field_ofs[ir->op2];
1079 if ((ai & 0x04000000)) 1087 if ((ai & 0x04000000))
1080 emit_lso(as, ai, dest, idx, ofs); 1088 emit_lso(as, ai, dest, idx, ofs);
1081 else 1089 else
@@ -1089,7 +1097,7 @@ static void asm_fstore(ASMState *as, IRIns *ir)
1089 IRIns *irf = IR(ir->op1); 1097 IRIns *irf = IR(ir->op1);
1090 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); 1098 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
1091 int32_t ofs = field_ofs[irf->op2]; 1099 int32_t ofs = field_ofs[irf->op2];
1092 ARMIns ai = asm_fxstoreins(ir); 1100 ARMIns ai = asm_fxstoreins(as, ir);
1093 if ((ai & 0x04000000)) 1101 if ((ai & 0x04000000))
1094 emit_lso(as, ai, src, idx, ofs); 1102 emit_lso(as, ai, src, idx, ofs);
1095 else 1103 else
@@ -1101,20 +1109,22 @@ static void asm_xload(ASMState *as, IRIns *ir)
1101{ 1109{
1102 Reg dest = ra_dest(as, ir, 1110 Reg dest = ra_dest(as, ir,
1103 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); 1111 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
1104 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); 1112 lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD");
1105 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 1113 asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0);
1106} 1114}
1107 1115
1108static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 1116static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
1109{ 1117{
1110 if (ir->r != RID_SINK) { 1118 if (ir->r != RID_SINK) {
1111 Reg src = ra_alloc1(as, ir->op2, 1119 Reg src = ra_alloc1(as, ir->op2,
1112 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); 1120 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
1113 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, 1121 asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1,
1114 rset_exclude(RSET_GPR, src), ofs); 1122 rset_exclude(RSET_GPR, src), ofs);
1115 } 1123 }
1116} 1124}
1117 1125
1126#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
1127
1118static void asm_ahuvload(ASMState *as, IRIns *ir) 1128static void asm_ahuvload(ASMState *as, IRIns *ir)
1119{ 1129{
1120 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); 1130 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
@@ -1127,13 +1137,15 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1127 rset_clear(allow, type); 1137 rset_clear(allow, type);
1128 } 1138 }
1129 if (ra_used(ir)) { 1139 if (ra_used(ir)) {
1130 lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || 1140 lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
1131 irt_isint(ir->t) || irt_isaddr(ir->t)); 1141 irt_isint(ir->t) || irt_isaddr(ir->t),
1142 "bad load type %d", irt_type(ir->t));
1132 dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); 1143 dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
1133 rset_clear(allow, dest); 1144 rset_clear(allow, dest);
1134 } 1145 }
1135 idx = asm_fuseahuref(as, ir->op1, &ofs, allow, 1146 idx = asm_fuseahuref(as, ir->op1, &ofs, allow,
1136 (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096); 1147 (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096);
1148 if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
1137 if (!hiop || type == RID_NONE) { 1149 if (!hiop || type == RID_NONE) {
1138 rset_clear(allow, idx); 1150 rset_clear(allow, idx);
1139 if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && 1151 if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 &&
@@ -1194,10 +1206,13 @@ static void asm_sload(ASMState *as, IRIns *ir)
1194 IRType t = hiop ? IRT_NUM : irt_type(ir->t); 1206 IRType t = hiop ? IRT_NUM : irt_type(ir->t);
1195 Reg dest = RID_NONE, type = RID_NONE, base; 1207 Reg dest = RID_NONE, type = RID_NONE, base;
1196 RegSet allow = RSET_GPR; 1208 RegSet allow = RSET_GPR;
1197 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 1209 lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
1198 lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); 1210 "bad parent SLOAD"); /* Handled by asm_head_side(). */
1211 lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
1212 "inconsistent SLOAD variant");
1199#if LJ_SOFTFP 1213#if LJ_SOFTFP
1200 lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ 1214 lj_assertA(!(ir->op2 & IRSLOAD_CONVERT),
1215 "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */
1201 if (hiop && ra_used(ir+1)) { 1216 if (hiop && ra_used(ir+1)) {
1202 type = ra_dest(as, ir+1, allow); 1217 type = ra_dest(as, ir+1, allow);
1203 rset_clear(allow, type); 1218 rset_clear(allow, type);
@@ -1213,8 +1228,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
1213 Reg tmp = RID_NONE; 1228 Reg tmp = RID_NONE;
1214 if ((ir->op2 & IRSLOAD_CONVERT)) 1229 if ((ir->op2 & IRSLOAD_CONVERT))
1215 tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR); 1230 tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR);
1216 lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || 1231 lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
1217 irt_isint(ir->t) || irt_isaddr(ir->t)); 1232 irt_isint(ir->t) || irt_isaddr(ir->t),
1233 "bad SLOAD type %d", irt_type(ir->t));
1218 dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); 1234 dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
1219 rset_clear(allow, dest); 1235 rset_clear(allow, dest);
1220 base = ra_alloc1(as, REF_BASE, allow); 1236 base = ra_alloc1(as, REF_BASE, allow);
@@ -1246,7 +1262,12 @@ dotypecheck:
1246 } 1262 }
1247 } 1263 }
1248 asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE); 1264 asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE);
1249 emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type); 1265 if ((ir->op2 & IRSLOAD_KEYINDEX)) {
1266 emit_n(as, ARMI_CMN|ARMI_K12|1, type);
1267 emit_dn(as, ARMI_EOR^emit_isk12(ARMI_EOR, ~LJ_KEYINDEX), type, type);
1268 } else {
1269 emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type);
1270 }
1250 } 1271 }
1251 if (ra_hasreg(dest)) { 1272 if (ra_hasreg(dest)) {
1252#if !LJ_SOFTFP 1273#if !LJ_SOFTFP
@@ -1272,19 +1293,17 @@ dotypecheck:
1272static void asm_cnew(ASMState *as, IRIns *ir) 1293static void asm_cnew(ASMState *as, IRIns *ir)
1273{ 1294{
1274 CTState *cts = ctype_ctsG(J2G(as->J)); 1295 CTState *cts = ctype_ctsG(J2G(as->J));
1275 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1296 CTypeID id = (CTypeID)IR(ir->op1)->i;
1276 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1297 CTSize sz;
1277 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1298 CTInfo info = lj_ctype_info(cts, id, &sz);
1278 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1299 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1279 IRRef args[2]; 1300 IRRef args[4];
1280 RegSet allow = (RSET_GPR & ~RSET_SCRATCH); 1301 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1281 RegSet drop = RSET_SCRATCH; 1302 RegSet drop = RSET_SCRATCH;
1282 lua_assert(sz != CTSIZE_INVALID); 1303 lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
1304 "bad CNEW/CNEWI operands");
1283 1305
1284 args[0] = ASMREF_L; /* lua_State *L */
1285 args[1] = ASMREF_TMP1; /* MSize size */
1286 as->gcsteps++; 1306 as->gcsteps++;
1287
1288 if (ra_hasreg(ir->r)) 1307 if (ra_hasreg(ir->r))
1289 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1308 rset_clear(drop, ir->r); /* Dest reg handled below. */
1290 ra_evictset(as, drop); 1309 ra_evictset(as, drop);
@@ -1294,10 +1313,10 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1294 /* Initialize immutable cdata object. */ 1313 /* Initialize immutable cdata object. */
1295 if (ir->o == IR_CNEWI) { 1314 if (ir->o == IR_CNEWI) {
1296 int32_t ofs = sizeof(GCcdata); 1315 int32_t ofs = sizeof(GCcdata);
1297 lua_assert(sz == 4 || sz == 8); 1316 lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
1298 if (sz == 8) { 1317 if (sz == 8) {
1299 ofs += 4; ir++; 1318 ofs += 4; ir++;
1300 lua_assert(ir->o == IR_HIOP); 1319 lj_assertA(ir->o == IR_HIOP, "expected HIOP for CNEWI");
1301 } 1320 }
1302 for (;;) { 1321 for (;;) {
1303 Reg r = ra_alloc1(as, ir->op2, allow); 1322 Reg r = ra_alloc1(as, ir->op2, allow);
@@ -1306,22 +1325,32 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1306 if (ofs == sizeof(GCcdata)) break; 1325 if (ofs == sizeof(GCcdata)) break;
1307 ofs -= 4; ir--; 1326 ofs -= 4; ir--;
1308 } 1327 }
1328 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1329 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1330 args[0] = ASMREF_L; /* lua_State *L */
1331 args[1] = ir->op1; /* CTypeID id */
1332 args[2] = ir->op2; /* CTSize sz */
1333 args[3] = ASMREF_TMP1; /* CTSize align */
1334 asm_gencall(as, ci, args);
1335 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1336 return;
1309 } 1337 }
1338
1310 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1339 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1311 { 1340 {
1312 uint32_t k = emit_isk12(ARMI_MOV, ctypeid); 1341 uint32_t k = emit_isk12(ARMI_MOV, id);
1313 Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow); 1342 Reg r = k ? RID_R1 : ra_allock(as, id, allow);
1314 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); 1343 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
1315 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); 1344 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
1316 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); 1345 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
1317 if (k) emit_d(as, ARMI_MOV^k, RID_R1); 1346 if (k) emit_d(as, ARMI_MOV^k, RID_R1);
1318 } 1347 }
1348 args[0] = ASMREF_L; /* lua_State *L */
1349 args[1] = ASMREF_TMP1; /* MSize size */
1319 asm_gencall(as, ci, args); 1350 asm_gencall(as, ci, args);
1320 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1351 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1321 ra_releasetmp(as, ASMREF_TMP1)); 1352 ra_releasetmp(as, ASMREF_TMP1));
1322} 1353}
1323#else
1324#define asm_cnew(as, ir) ((void)0)
1325#endif 1354#endif
1326 1355
1327/* -- Write barriers ------------------------------------------------------ */ 1356/* -- Write barriers ------------------------------------------------------ */
@@ -1353,7 +1382,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
1353 MCLabel l_end; 1382 MCLabel l_end;
1354 Reg obj, val, tmp; 1383 Reg obj, val, tmp;
1355 /* No need for other object barriers (yet). */ 1384 /* No need for other object barriers (yet). */
1356 lua_assert(IR(ir->op1)->o == IR_UREFC); 1385 lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
1357 ra_evictset(as, RSET_SCRATCH); 1386 ra_evictset(as, RSET_SCRATCH);
1358 l_end = emit_label(as); 1387 l_end = emit_label(as);
1359 args[0] = ASMREF_TMP1; /* global_State *g */ 1388 args[0] = ASMREF_TMP1; /* global_State *g */
@@ -1392,23 +1421,36 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
1392 emit_dm(as, ai, (dest & 15), (left & 15)); 1421 emit_dm(as, ai, (dest & 15), (left & 15));
1393} 1422}
1394 1423
1395static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1424static void asm_callround(ASMState *as, IRIns *ir, int id)
1396{ 1425{
1397 IRIns *irp = IR(ir->op1); 1426 /* The modified regs must match with the *.dasc implementation. */
1398 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1427 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1399 IRIns *irpp = IR(irp->op1); 1428 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1400 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1429 RegSet of;
1401 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1430 Reg dest, src;
1402 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1431 ra_evictset(as, drop);
1403 IRRef args[2]; 1432 dest = ra_dest(as, ir, RSET_FPR);
1404 args[0] = irpp->op1; 1433 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1405 args[1] = irp->op2; 1434 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1406 asm_setupresult(as, ir, ci); 1435 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1407 asm_gencall(as, ci, args); 1436 (void *)lj_vm_trunc_sf);
1408 return 1; 1437 /* Workaround to protect argument GPRs from being used for remat. */
1409 } 1438 of = as->freeset;
1410 } 1439 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1411 return 0; 1440 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1441 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1442 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1443 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1444}
1445
1446static void asm_fpmath(ASMState *as, IRIns *ir)
1447{
1448 if (ir->op2 <= IRFPM_TRUNC)
1449 asm_callround(as, ir, ir->op2);
1450 else if (ir->op2 == IRFPM_SQRT)
1451 asm_fpunary(as, ir, ARMI_VSQRT_D);
1452 else
1453 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1412} 1454}
1413#endif 1455#endif
1414 1456
@@ -1474,19 +1516,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
1474 asm_intop(as, ir, asm_drop_cmp0(as, ai)); 1516 asm_intop(as, ir, asm_drop_cmp0(as, ai));
1475} 1517}
1476 1518
1477static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1478{
1479 ai = asm_drop_cmp0(as, ai);
1480 if (ir->op2 == 0) {
1481 Reg dest = ra_dest(as, ir, RSET_GPR);
1482 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1483 emit_d(as, ai^m, dest);
1484 } else {
1485 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1486 asm_intop(as, ir, ai);
1487 }
1488}
1489
1490static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) 1519static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
1491{ 1520{
1492 Reg dest = ra_dest(as, ir, RSET_GPR); 1521 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1552,6 +1581,15 @@ static void asm_mul(ASMState *as, IRIns *ir)
1552 asm_intmul(as, ir); 1581 asm_intmul(as, ir);
1553} 1582}
1554 1583
1584#define asm_addov(as, ir) asm_add(as, ir)
1585#define asm_subov(as, ir) asm_sub(as, ir)
1586#define asm_mulov(as, ir) asm_mul(as, ir)
1587
1588#if !LJ_SOFTFP
1589#define asm_fpdiv(as, ir) asm_fparith(as, ir, ARMI_VDIV_D)
1590#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D)
1591#endif
1592
1555static void asm_neg(ASMState *as, IRIns *ir) 1593static void asm_neg(ASMState *as, IRIns *ir)
1556{ 1594{
1557#if !LJ_SOFTFP 1595#if !LJ_SOFTFP
@@ -1563,41 +1601,22 @@ static void asm_neg(ASMState *as, IRIns *ir)
1563 asm_intneg(as, ir, ARMI_RSB); 1601 asm_intneg(as, ir, ARMI_RSB);
1564} 1602}
1565 1603
1566static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) 1604static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1567{ 1605{
1568 const CCallInfo *ci = &lj_ir_callinfo[id]; 1606 ai = asm_drop_cmp0(as, ai);
1569 IRRef args[2]; 1607 if (ir->op2 == 0) {
1570 args[0] = ir->op1; 1608 Reg dest = ra_dest(as, ir, RSET_GPR);
1571 args[1] = ir->op2; 1609 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1572 asm_setupresult(as, ir, ci); 1610 emit_d(as, ai^m, dest);
1573 asm_gencall(as, ci, args); 1611 } else {
1612 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1613 asm_intop(as, ir, ai);
1614 }
1574} 1615}
1575 1616
1576#if !LJ_SOFTFP 1617#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN)
1577static void asm_callround(ASMState *as, IRIns *ir, int id)
1578{
1579 /* The modified regs must match with the *.dasc implementation. */
1580 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1581 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1582 RegSet of;
1583 Reg dest, src;
1584 ra_evictset(as, drop);
1585 dest = ra_dest(as, ir, RSET_FPR);
1586 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1587 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1588 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1589 (void *)lj_vm_trunc_sf);
1590 /* Workaround to protect argument GPRs from being used for remat. */
1591 of = as->freeset;
1592 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1593 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1594 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1595 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1596 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1597}
1598#endif
1599 1618
1600static void asm_bitswap(ASMState *as, IRIns *ir) 1619static void asm_bswap(ASMState *as, IRIns *ir)
1601{ 1620{
1602 Reg dest = ra_dest(as, ir, RSET_GPR); 1621 Reg dest = ra_dest(as, ir, RSET_GPR);
1603 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1622 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1614,6 +1633,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1614 } 1633 }
1615} 1634}
1616 1635
1636#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND)
1637#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR)
1638#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR)
1639
1617static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) 1640static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1618{ 1641{
1619 if (irref_isk(ir->op2)) { /* Constant shifts. */ 1642 if (irref_isk(ir->op2)) { /* Constant shifts. */
@@ -1631,6 +1654,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1631 } 1654 }
1632} 1655}
1633 1656
1657#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL)
1658#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR)
1659#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR)
1660#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR)
1661#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
1662
1634static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) 1663static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1635{ 1664{
1636 uint32_t kcmp = 0, kmov = 0; 1665 uint32_t kcmp = 0, kmov = 0;
@@ -1704,6 +1733,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
1704 asm_intmin_max(as, ir, cc); 1733 asm_intmin_max(as, ir, cc);
1705} 1734}
1706 1735
1736#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_PL)
1737#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LE)
1738
1707/* -- Comparisons --------------------------------------------------------- */ 1739/* -- Comparisons --------------------------------------------------------- */
1708 1740
1709/* Map of comparisons to flags. ORDER IR. */ 1741/* Map of comparisons to flags. ORDER IR. */
@@ -1777,7 +1809,8 @@ static void asm_intcomp(ASMState *as, IRIns *ir)
1777 Reg left; 1809 Reg left;
1778 uint32_t m; 1810 uint32_t m;
1779 int cmpprev0 = 0; 1811 int cmpprev0 = 0;
1780 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); 1812 lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t),
1813 "bad comparison data type %d", irt_type(ir->t));
1781 if (asm_swapops(as, lref, rref)) { 1814 if (asm_swapops(as, lref, rref)) {
1782 Reg tmp = lref; lref = rref; rref = tmp; 1815 Reg tmp = lref; lref = rref; rref = tmp;
1783 if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ 1816 if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */
@@ -1819,6 +1852,18 @@ notst:
1819 as->flagmcp = as->mcp; /* Allow elimination of the compare. */ 1852 as->flagmcp = as->mcp; /* Allow elimination of the compare. */
1820} 1853}
1821 1854
1855static void asm_comp(ASMState *as, IRIns *ir)
1856{
1857#if !LJ_SOFTFP
1858 if (irt_isnum(ir->t))
1859 asm_fpcomp(as, ir);
1860 else
1861#endif
1862 asm_intcomp(as, ir);
1863}
1864
1865#define asm_equal(as, ir) asm_comp(as, ir)
1866
1822#if LJ_HASFFI 1867#if LJ_HASFFI
1823/* 64 bit integer comparisons. */ 1868/* 64 bit integer comparisons. */
1824static void asm_int64comp(ASMState *as, IRIns *ir) 1869static void asm_int64comp(ASMState *as, IRIns *ir)
@@ -1857,15 +1902,15 @@ static void asm_int64comp(ASMState *as, IRIns *ir)
1857} 1902}
1858#endif 1903#endif
1859 1904
1860/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ 1905/* -- Split register ops -------------------------------------------------- */
1861 1906
1862/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ 1907/* Hiword op of a split 32/32 bit op. Previous op is the loword op. */
1863static void asm_hiop(ASMState *as, IRIns *ir) 1908static void asm_hiop(ASMState *as, IRIns *ir)
1864{ 1909{
1865#if LJ_HASFFI || LJ_SOFTFP
1866 /* HIOP is marked as a store because it needs its own DCE logic. */ 1910 /* HIOP is marked as a store because it needs its own DCE logic. */
1867 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 1911 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
1868 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 1912 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
1913#if LJ_HASFFI || LJ_SOFTFP
1869 if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */ 1914 if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */
1870 as->curins--; /* Always skip the loword comparison. */ 1915 as->curins--; /* Always skip the loword comparison. */
1871#if LJ_SOFTFP 1916#if LJ_SOFTFP
@@ -1882,7 +1927,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1882 } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { 1927 } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
1883 as->curins--; /* Always skip the loword min/max. */ 1928 as->curins--; /* Always skip the loword min/max. */
1884 if (uselo || usehi) 1929 if (uselo || usehi)
1885 asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); 1930 asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE);
1886 return; 1931 return;
1887#elif LJ_HASFFI 1932#elif LJ_HASFFI
1888 } else if ((ir-1)->o == IR_CONV) { 1933 } else if ((ir-1)->o == IR_CONV) {
@@ -1893,9 +1938,10 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1893#endif 1938#endif
1894 } else if ((ir-1)->o == IR_XSTORE) { 1939 } else if ((ir-1)->o == IR_XSTORE) {
1895 if ((ir-1)->r != RID_SINK) 1940 if ((ir-1)->r != RID_SINK)
1896 asm_xstore(as, ir, 4); 1941 asm_xstore_(as, ir, 4);
1897 return; 1942 return;
1898 } 1943 }
1944#endif
1899 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1945 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
1900 switch ((ir-1)->o) { 1946 switch ((ir-1)->o) {
1901#if LJ_HASFFI 1947#if LJ_HASFFI
@@ -1914,6 +1960,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1914 asm_intneg(as, ir, ARMI_RSC); 1960 asm_intneg(as, ir, ARMI_RSC);
1915 asm_intneg(as, ir-1, ARMI_RSB|ARMI_S); 1961 asm_intneg(as, ir-1, ARMI_RSB|ARMI_S);
1916 break; 1962 break;
1963 case IR_CNEWI:
1964 /* Nothing to do here. Handled by lo op itself. */
1965 break;
1917#endif 1966#endif
1918#if LJ_SOFTFP 1967#if LJ_SOFTFP
1919 case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: 1968 case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
@@ -1921,24 +1970,26 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1921 if (!uselo) 1970 if (!uselo)
1922 ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ 1971 ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
1923 break; 1972 break;
1973 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
1974 /* Nothing to do here. Handled by lo op itself. */
1975 break;
1924#endif 1976#endif
1925 case IR_CALLN: 1977 case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
1926 case IR_CALLS:
1927 case IR_CALLXS:
1928 if (!uselo) 1978 if (!uselo)
1929 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ 1979 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
1930 break; 1980 break;
1931#if LJ_SOFTFP 1981 default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
1932 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
1933#endif
1934 case IR_CNEWI:
1935 /* Nothing to do here. Handled by lo op itself. */
1936 break;
1937 default: lua_assert(0); break;
1938 } 1982 }
1939#else 1983}
1940 UNUSED(as); UNUSED(ir); lua_assert(0); 1984
1941#endif 1985/* -- Profiling ----------------------------------------------------------- */
1986
1987static void asm_prof(ASMState *as, IRIns *ir)
1988{
1989 UNUSED(ir);
1990 asm_guardcc(as, CC_NE);
1991 emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP);
1992 emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
1942} 1993}
1943 1994
1944/* -- Stack handling ------------------------------------------------------ */ 1995/* -- Stack handling ------------------------------------------------------ */
@@ -1953,7 +2004,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1953 if (irp) { 2004 if (irp) {
1954 if (!ra_hasspill(irp->s)) { 2005 if (!ra_hasspill(irp->s)) {
1955 pbase = irp->r; 2006 pbase = irp->r;
1956 lua_assert(ra_hasreg(pbase)); 2007 lj_assertA(ra_hasreg(pbase), "base reg lost");
1957 } else if (allow) { 2008 } else if (allow) {
1958 pbase = rset_pickbot(allow); 2009 pbase = rset_pickbot(allow);
1959 } else { 2010 } else {
@@ -1967,13 +2018,13 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1967 if (savereg) 2018 if (savereg)
1968 emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */ 2019 emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */
1969 k = emit_isk12(0, (int32_t)(8*topslot)); 2020 k = emit_isk12(0, (int32_t)(8*topslot));
1970 lua_assert(k); 2021 lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
1971 emit_n(as, ARMI_CMP^k, RID_TMP); 2022 emit_n(as, ARMI_CMP^k, RID_TMP);
1972 emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase); 2023 emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase);
1973 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, 2024 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
1974 (int32_t)offsetof(lua_State, maxstack)); 2025 (int32_t)offsetof(lua_State, maxstack));
1975 if (irp) { /* Must not spill arbitrary registers in head of side trace. */ 2026 if (irp) { /* Must not spill arbitrary registers in head of side trace. */
1976 int32_t i = i32ptr(&J2G(as->J)->jit_L); 2027 int32_t i = i32ptr(&J2G(as->J)->cur_L);
1977 if (ra_hasspill(irp->s)) 2028 if (ra_hasspill(irp->s))
1978 emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); 2029 emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
1979 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); 2030 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
@@ -1981,7 +2032,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1981 emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ 2032 emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */
1982 emit_loadi(as, RID_TMP, (i & ~4095)); 2033 emit_loadi(as, RID_TMP, (i & ~4095));
1983 } else { 2034 } else {
1984 emit_getgl(as, RID_TMP, jit_L); 2035 emit_getgl(as, RID_TMP, cur_L);
1985 } 2036 }
1986} 2037}
1987 2038
@@ -2005,7 +2056,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
2005#if LJ_SOFTFP 2056#if LJ_SOFTFP
2006 RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); 2057 RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);
2007 Reg tmp; 2058 Reg tmp;
2008 lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ 2059 /* LJ_SOFTFP: must be a number constant. */
2060 lj_assertA(irref_isk(ref), "unsplit FP op");
2009 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, 2061 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo,
2010 rset_exclude(RSET_GPREVEN, RID_BASE)); 2062 rset_exclude(RSET_GPREVEN, RID_BASE));
2011 emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs); 2063 emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs);
@@ -2025,7 +2077,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
2025 } else { 2077 } else {
2026 RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); 2078 RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);
2027 Reg type; 2079 Reg type;
2028 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); 2080 lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
2081 "restore of IR type %d", irt_type(ir->t));
2029 if (!irt_ispri(ir->t)) { 2082 if (!irt_ispri(ir->t)) {
2030 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE)); 2083 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE));
2031 emit_lso(as, ARMI_STR, src, RID_BASE, ofs); 2084 emit_lso(as, ARMI_STR, src, RID_BASE, ofs);
@@ -2038,6 +2091,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
2038 } else if ((sn & SNAP_SOFTFPNUM)) { 2091 } else if ((sn & SNAP_SOFTFPNUM)) {
2039 type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE)); 2092 type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE));
2040#endif 2093#endif
2094 } else if ((sn & SNAP_KEYINDEX)) {
2095 type = ra_allock(as, (int32_t)LJ_KEYINDEX, odd);
2041 } else { 2096 } else {
2042 type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd); 2097 type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd);
2043 } 2098 }
@@ -2046,7 +2101,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
2046 checkmclim(as); 2101 checkmclim(as);
2047 } 2102 }
2048 emit_addptr(as, RID_BASE, bias); 2103 emit_addptr(as, RID_BASE, bias);
2049 lua_assert(map + nent == flinks); 2104 lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
2050} 2105}
2051 2106
2052/* -- GC handling --------------------------------------------------------- */ 2107/* -- GC handling --------------------------------------------------------- */
@@ -2100,15 +2155,21 @@ static void asm_loop_fixup(ASMState *as)
2100 } 2155 }
2101} 2156}
2102 2157
2158/* Fixup the tail of the loop. */
2159static void asm_loop_tail_fixup(ASMState *as)
2160{
2161 UNUSED(as); /* Nothing to do. */
2162}
2163
2103/* -- Head of trace ------------------------------------------------------- */ 2164/* -- Head of trace ------------------------------------------------------- */
2104 2165
2105/* Reload L register from g->jit_L. */ 2166/* Reload L register from g->cur_L. */
2106static void asm_head_lreg(ASMState *as) 2167static void asm_head_lreg(ASMState *as)
2107{ 2168{
2108 IRIns *ir = IR(ASMREF_L); 2169 IRIns *ir = IR(ASMREF_L);
2109 if (ra_used(ir)) { 2170 if (ra_used(ir)) {
2110 Reg r = ra_dest(as, ir, RSET_GPR); 2171 Reg r = ra_dest(as, ir, RSET_GPR);
2111 emit_getgl(as, r, jit_L); 2172 emit_getgl(as, r, cur_L);
2112 ra_evictk(as); 2173 ra_evictk(as);
2113 } 2174 }
2114} 2175}
@@ -2136,7 +2197,7 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp)
2136 return ra_dest(as, ir, RSET_GPR); 2197 return ra_dest(as, ir, RSET_GPR);
2137 } else { 2198 } else {
2138 Reg r = irp->r; 2199 Reg r = irp->r;
2139 lua_assert(ra_hasreg(r)); 2200 lj_assertA(ra_hasreg(r), "base reg lost");
2140 if (r != ir->r && !rset_test(as->freeset, r)) 2201 if (r != ir->r && !rset_test(as->freeset, r))
2141 ra_restore(as, regcost_ref(as->cost[r])); 2202 ra_restore(as, regcost_ref(as->cost[r]));
2142 ra_destreg(as, ir, r); 2203 ra_destreg(as, ir, r);
@@ -2157,7 +2218,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
2157 } else { 2218 } else {
2158 /* Patch stack adjustment. */ 2219 /* Patch stack adjustment. */
2159 uint32_t k = emit_isk12(ARMI_ADD, spadj); 2220 uint32_t k = emit_isk12(ARMI_ADD, spadj);
2160 lua_assert(k); 2221 lj_assertA(k, "stack adjustment %d does not fit in K12", spadj);
2161 p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); 2222 p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP);
2162 } 2223 }
2163 /* Patch exit branch. */ 2224 /* Patch exit branch. */
@@ -2178,143 +2239,13 @@ static void asm_tail_prep(ASMState *as)
2178 *p = 0; /* Prevent load/store merging. */ 2239 *p = 0; /* Prevent load/store merging. */
2179} 2240}
2180 2241
2181/* -- Instruction dispatch ------------------------------------------------ */
2182
2183/* Assemble a single instruction. */
2184static void asm_ir(ASMState *as, IRIns *ir)
2185{
2186 switch ((IROp)ir->o) {
2187 /* Miscellaneous ops. */
2188 case IR_LOOP: asm_loop(as); break;
2189 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
2190 case IR_USE:
2191 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
2192 case IR_PHI: asm_phi(as, ir); break;
2193 case IR_HIOP: asm_hiop(as, ir); break;
2194 case IR_GCSTEP: asm_gcstep(as, ir); break;
2195
2196 /* Guarded assertions. */
2197 case IR_EQ: case IR_NE:
2198 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
2199 as->curins--;
2200 asm_href(as, ir-1, (IROp)ir->o);
2201 break;
2202 }
2203 /* fallthrough */
2204 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
2205 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
2206 case IR_ABC:
2207#if !LJ_SOFTFP
2208 if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; }
2209#endif
2210 asm_intcomp(as, ir);
2211 break;
2212
2213 case IR_RETF: asm_retf(as, ir); break;
2214
2215 /* Bit ops. */
2216 case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break;
2217 case IR_BSWAP: asm_bitswap(as, ir); break;
2218
2219 case IR_BAND: asm_bitop(as, ir, ARMI_AND); break;
2220 case IR_BOR: asm_bitop(as, ir, ARMI_ORR); break;
2221 case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break;
2222
2223 case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break;
2224 case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break;
2225 case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break;
2226 case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break;
2227 case IR_BROL: lua_assert(0); break;
2228
2229 /* Arithmetic ops. */
2230 case IR_ADD: case IR_ADDOV: asm_add(as, ir); break;
2231 case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break;
2232 case IR_MUL: case IR_MULOV: asm_mul(as, ir); break;
2233 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2234 case IR_NEG: asm_neg(as, ir); break;
2235
2236#if LJ_SOFTFP
2237 case IR_DIV: case IR_POW: case IR_ABS:
2238 case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
2239 lua_assert(0); /* Unused for LJ_SOFTFP. */
2240 break;
2241#else
2242 case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break;
2243 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2244 case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break;
2245 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2246 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2247 case IR_FPMATH:
2248 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2249 break;
2250 if (ir->op2 <= IRFPM_TRUNC)
2251 asm_callround(as, ir, ir->op2);
2252 else if (ir->op2 == IRFPM_SQRT)
2253 asm_fpunary(as, ir, ARMI_VSQRT_D);
2254 else
2255 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2256 break;
2257 case IR_TOBIT: asm_tobit(as, ir); break;
2258#endif
2259
2260 case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break;
2261 case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break;
2262
2263 /* Memory references. */
2264 case IR_AREF: asm_aref(as, ir); break;
2265 case IR_HREF: asm_href(as, ir, 0); break;
2266 case IR_HREFK: asm_hrefk(as, ir); break;
2267 case IR_NEWREF: asm_newref(as, ir); break;
2268 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2269 case IR_FREF: asm_fref(as, ir); break;
2270 case IR_STRREF: asm_strref(as, ir); break;
2271
2272 /* Loads and stores. */
2273 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2274 asm_ahuvload(as, ir);
2275 break;
2276 case IR_FLOAD: asm_fload(as, ir); break;
2277 case IR_XLOAD: asm_xload(as, ir); break;
2278 case IR_SLOAD: asm_sload(as, ir); break;
2279
2280 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2281 case IR_FSTORE: asm_fstore(as, ir); break;
2282 case IR_XSTORE: asm_xstore(as, ir, 0); break;
2283
2284 /* Allocations. */
2285 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2286 case IR_TNEW: asm_tnew(as, ir); break;
2287 case IR_TDUP: asm_tdup(as, ir); break;
2288 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2289
2290 /* Write barriers. */
2291 case IR_TBAR: asm_tbar(as, ir); break;
2292 case IR_OBAR: asm_obar(as, ir); break;
2293
2294 /* Type conversions. */
2295 case IR_CONV: asm_conv(as, ir); break;
2296 case IR_TOSTR: asm_tostr(as, ir); break;
2297 case IR_STRTO: asm_strto(as, ir); break;
2298
2299 /* Calls. */
2300 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2301 case IR_CALLXS: asm_callx(as, ir); break;
2302 case IR_CARG: break;
2303
2304 default:
2305 setintV(&as->J->errinfo, ir->o);
2306 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2307 break;
2308 }
2309}
2310
2311/* -- Trace setup --------------------------------------------------------- */ 2242/* -- Trace setup --------------------------------------------------------- */
2312 2243
2313/* Ensure there are enough stack slots for call arguments. */ 2244/* Ensure there are enough stack slots for call arguments. */
2314static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2245static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2315{ 2246{
2316 IRRef args[CCI_NARGS_MAX*2]; 2247 IRRef args[CCI_NARGS_MAX*2];
2317 uint32_t i, nargs = (int)CCI_NARGS(ci); 2248 uint32_t i, nargs = CCI_XNARGS(ci);
2318 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; 2249 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
2319 asm_collectargs(as, ir, ci, args); 2250 asm_collectargs(as, ir, ci, args);
2320 for (i = 0; i < nargs; i++) { 2251 for (i = 0; i < nargs; i++) {
@@ -2340,7 +2271,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2340 } 2271 }
2341 if (nslots > as->evenspill) /* Leave room for args in stack slots. */ 2272 if (nslots > as->evenspill) /* Leave room for args in stack slots. */
2342 as->evenspill = nslots; 2273 as->evenspill = nslots;
2343 return REGSP_HINT(RID_RET); 2274 return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET);
2344} 2275}
2345 2276
2346static void asm_setup_target(ASMState *as) 2277static void asm_setup_target(ASMState *as)
@@ -2370,7 +2301,7 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2370 if (!cstart) cstart = p; 2301 if (!cstart) cstart = p;
2371 } 2302 }
2372 } 2303 }
2373 lua_assert(cstart != NULL); 2304 lj_assertJ(cstart != NULL, "exit stub %d not found", exitno);
2374 lj_mcode_sync(cstart, cend); 2305 lj_mcode_sync(cstart, cend);
2375 lj_mcode_patch(J, mcarea, 1); 2306 lj_mcode_patch(J, mcarea, 1);
2376} 2307}
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
new file mode 100644
index 00000000..0e5aee9a
--- /dev/null
+++ b/src/lj_asm_arm64.h
@@ -0,0 +1,2075 @@
1/*
2** ARM64 IR assembler (SSA IR -> machine code).
3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4**
5** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
6** Sponsored by Cisco Systems, Inc.
7*/
8
9/* -- Register allocator extensions --------------------------------------- */
10
11/* Allocate a register with a hint. */
12static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
13{
14 Reg r = IR(ref)->r;
15 if (ra_noreg(r)) {
16 if (!ra_hashint(r) && !iscrossref(as, ref))
17 ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */
18 r = ra_allocref(as, ref, allow);
19 }
20 ra_noweak(as, r);
21 return r;
22}
23
24/* Allocate two source registers for three-operand instructions. */
25static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
26{
27 IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
28 Reg left = irl->r, right = irr->r;
29 if (ra_hasreg(left)) {
30 ra_noweak(as, left);
31 if (ra_noreg(right))
32 right = ra_allocref(as, ir->op2, rset_exclude(allow, left));
33 else
34 ra_noweak(as, right);
35 } else if (ra_hasreg(right)) {
36 ra_noweak(as, right);
37 left = ra_allocref(as, ir->op1, rset_exclude(allow, right));
38 } else if (ra_hashint(right)) {
39 right = ra_allocref(as, ir->op2, allow);
40 left = ra_alloc1(as, ir->op1, rset_exclude(allow, right));
41 } else {
42 left = ra_allocref(as, ir->op1, allow);
43 right = ra_alloc1(as, ir->op2, rset_exclude(allow, left));
44 }
45 return left | (right << 8);
46}
47
48/* -- Guard handling ------------------------------------------------------ */
49
50/* Setup all needed exit stubs. */
51static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
52{
53 ExitNo i;
54 MCode *mxp = as->mctop;
55 if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim)
56 asm_mclimit(as);
57 /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */
58 for (i = nexits-1; (int32_t)i >= 0; i--)
59 *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i));
60 *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno));
61 mxp--;
62 *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp)));
63 *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP));
64 as->mctop = mxp;
65}
66
67static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
68{
69 /* Keep this in-sync with exitstub_trace_addr(). */
70 return as->mctop + exitno + 3;
71}
72
73/* Emit conditional branch to exit for guard. */
74static void asm_guardcc(ASMState *as, A64CC cc)
75{
76 MCode *target = asm_exitstub_addr(as, as->snapno);
77 MCode *p = as->mcp;
78 if (LJ_UNLIKELY(p == as->invmcp)) {
79 as->loopinv = 1;
80 *p = A64I_B | A64F_S26(target-p);
81 emit_cond_branch(as, cc^1, p-1);
82 return;
83 }
84 emit_cond_branch(as, cc, target);
85}
86
87/* Emit test and branch instruction to exit for guard, if in range. */
88static int asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
89{
90 MCode *target = asm_exitstub_addr(as, as->snapno);
91 MCode *p = as->mcp;
92 ptrdiff_t delta = target - p;
93 if (LJ_UNLIKELY(p == as->invmcp)) {
94 if (as->orignins > 1023) return 0; /* Delta might end up too large. */
95 as->loopinv = 1;
96 *p = A64I_B | A64F_S26(delta);
97 ai ^= 0x01000000u;
98 target = p-1;
99 } else if (LJ_UNLIKELY(delta >= 0x1fff)) {
100 return 0;
101 }
102 emit_tnb(as, ai, r, bit, target);
103 return 1;
104}
105
106/* Emit compare and branch instruction to exit for guard. */
107static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r)
108{
109 MCode *target = asm_exitstub_addr(as, as->snapno);
110 MCode *p = as->mcp;
111 if (LJ_UNLIKELY(p == as->invmcp)) {
112 as->loopinv = 1;
113 *p = A64I_B | A64F_S26(target-p);
114 emit_cnb(as, ai^0x01000000u, r, p-1);
115 return;
116 }
117 emit_cnb(as, ai, r, target);
118}
119
120/* -- Operand fusion ------------------------------------------------------ */
121
122/* Limit linear search to this distance. Avoids O(n^2) behavior. */
123#define CONFLICT_SEARCH_LIM 31
124
125static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
126{
127 if (irref_isk(ref)) {
128 IRIns *ir = IR(ref);
129 if (ir->o == IR_KNULL || !irt_is64(ir->t)) {
130 *k = ir->i;
131 return 1;
132 } else if (checki32((int64_t)ir_k64(ir)->u64)) {
133 *k = (int32_t)ir_k64(ir)->u64;
134 return 1;
135 }
136 }
137 return 0;
138}
139
140/* Check if there's no conflicting instruction between curins and ref. */
141static int noconflict(ASMState *as, IRRef ref, IROp conflict)
142{
143 IRIns *ir = as->ir;
144 IRRef i = as->curins;
145 if (i > ref + CONFLICT_SEARCH_LIM)
146 return 0; /* Give up, ref is too far away. */
147 while (--i > ref)
148 if (ir[i].o == conflict)
149 return 0; /* Conflict found. */
150 return 1; /* Ok, no conflict. */
151}
152
153/* Fuse the array base of colocated arrays. */
154static int32_t asm_fuseabase(ASMState *as, IRRef ref)
155{
156 IRIns *ir = IR(ref);
157 if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
158 !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
159 return (int32_t)sizeof(GCtab);
160 return 0;
161}
162
163#define FUSE_REG 0x40000000
164
165/* Fuse array/hash/upvalue reference into register+offset operand. */
166static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
167 A64Ins ins)
168{
169 IRIns *ir = IR(ref);
170 if (ra_noreg(ir->r)) {
171 if (ir->o == IR_AREF) {
172 if (mayfuse(as, ref)) {
173 if (irref_isk(ir->op2)) {
174 IRRef tab = IR(ir->op1)->op1;
175 int32_t ofs = asm_fuseabase(as, tab);
176 IRRef refa = ofs ? tab : ir->op1;
177 ofs += 8*IR(ir->op2)->i;
178 if (emit_checkofs(ins, ofs)) {
179 *ofsp = ofs;
180 return ra_alloc1(as, refa, allow);
181 }
182 } else {
183 Reg base = ra_alloc1(as, ir->op1, allow);
184 *ofsp = FUSE_REG|ra_alloc1(as, ir->op2, rset_exclude(allow, base));
185 return base;
186 }
187 }
188 } else if (ir->o == IR_HREFK) {
189 if (mayfuse(as, ref)) {
190 int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
191 if (emit_checkofs(ins, ofs)) {
192 *ofsp = ofs;
193 return ra_alloc1(as, ir->op1, allow);
194 }
195 }
196 } else if (ir->o == IR_UREFC) {
197 if (irref_isk(ir->op1)) {
198 GCfunc *fn = ir_kfunc(IR(ir->op1));
199 GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
200 int64_t ofs = glofs(as, &uv->tv);
201 if (emit_checkofs(ins, ofs)) {
202 *ofsp = (int32_t)ofs;
203 return RID_GL;
204 }
205 }
206 } else if (ir->o == IR_TMPREF) {
207 *ofsp = (int32_t)glofs(as, &J2G(as->J)->tmptv);
208 return RID_GL;
209 }
210 }
211 *ofsp = 0;
212 return ra_alloc1(as, ref, allow);
213}
214
215/* Fuse m operand into arithmetic/logic instructions. */
216static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
217{
218 IRIns *ir = IR(ref);
219 int logical = (ai & 0x1f000000) == 0x0a000000;
220 if (ra_hasreg(ir->r)) {
221 ra_noweak(as, ir->r);
222 return A64F_M(ir->r);
223 } else if (irref_isk(ref)) {
224 int64_t k = get_k64val(as, ref);
225 uint32_t m = logical ? emit_isk13(k, irt_is64(ir->t)) :
226 emit_isk12(irt_is64(ir->t) ? k : (int32_t)k);
227 if (m)
228 return m;
229 } else if (mayfuse(as, ref)) {
230 if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR && irref_isk(ir->op2)) ||
231 (ir->o == IR_ADD && ir->op1 == ir->op2)) {
232 A64Shift sh = ir->o == IR_BSHR ? A64SH_LSR :
233 ir->o == IR_BSAR ? A64SH_ASR : A64SH_LSL;
234 int shift = ir->o == IR_ADD ? 1 :
235 (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
236 IRIns *irl = IR(ir->op1);
237 if (sh == A64SH_LSL &&
238 irl->o == IR_CONV && !logical &&
239 irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
240 shift <= 4 &&
241 canfuse(as, irl)) {
242 Reg m = ra_alloc1(as, irl->op1, allow);
243 return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift);
244 } else {
245 Reg m = ra_alloc1(as, ir->op1, allow);
246 return A64F_M(m) | A64F_SH(sh, shift);
247 }
248 } else if (ir->o == IR_BROR && logical && irref_isk(ir->op2)) {
249 Reg m = ra_alloc1(as, ir->op1, allow);
250 int shift = (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
251 return A64F_M(m) | A64F_SH(A64SH_ROR, shift);
252 } else if (ir->o == IR_CONV && !logical &&
253 ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) {
254 Reg m = ra_alloc1(as, ir->op1, allow);
255 return A64F_M(m) | A64F_EX(A64EX_SXTW);
256 }
257 }
258 return A64F_M(ra_allocref(as, ref, allow));
259}
260
261/* Fuse XLOAD/XSTORE reference into load/store operand. */
262static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref,
263 RegSet allow)
264{
265 IRIns *ir = IR(ref);
266 Reg base;
267 int32_t ofs = 0;
268 if (ra_noreg(ir->r) && canfuse(as, ir)) {
269 if (ir->o == IR_ADD) {
270 if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) {
271 ref = ir->op1;
272 } else {
273 Reg rn, rm;
274 IRRef lref = ir->op1, rref = ir->op2;
275 IRIns *irl = IR(lref);
276 if (mayfuse(as, irl->op1)) {
277 unsigned int shift = 4;
278 if (irl->o == IR_BSHL && irref_isk(irl->op2)) {
279 shift = (IR(irl->op2)->i & 63);
280 } else if (irl->o == IR_ADD && irl->op1 == irl->op2) {
281 shift = 1;
282 }
283 if ((ai >> 30) == shift) {
284 lref = irl->op1;
285 irl = IR(lref);
286 ai |= A64I_LS_SH;
287 }
288 }
289 if (irl->o == IR_CONV &&
290 irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
291 canfuse(as, irl)) {
292 lref = irl->op1;
293 ai |= A64I_LS_SXTWx;
294 } else {
295 ai |= A64I_LS_LSLx;
296 }
297 rm = ra_alloc1(as, lref, allow);
298 rn = ra_alloc1(as, rref, rset_exclude(allow, rm));
299 emit_dnm(as, (ai^A64I_LS_R), (rd & 31), rn, rm);
300 return;
301 }
302 } else if (ir->o == IR_STRREF) {
303 if (asm_isk32(as, ir->op2, &ofs)) {
304 ref = ir->op1;
305 } else if (asm_isk32(as, ir->op1, &ofs)) {
306 ref = ir->op2;
307 } else {
308 Reg refk = irref_isk(ir->op1) ? ir->op1 : ir->op2;
309 Reg refv = irref_isk(ir->op1) ? ir->op2 : ir->op1;
310 Reg rn = ra_alloc1(as, refv, allow);
311 IRIns *irr = IR(refk);
312 uint32_t m;
313 if (irr+1 == ir && !ra_used(irr) &&
314 irr->o == IR_ADD && irref_isk(irr->op2)) {
315 ofs = sizeof(GCstr) + IR(irr->op2)->i;
316 if (emit_checkofs(ai, ofs)) {
317 Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn));
318 m = A64F_M(rm) | A64F_EX(A64EX_SXTW);
319 goto skipopm;
320 }
321 }
322 m = asm_fuseopm(as, 0, refk, rset_exclude(allow, rn));
323 ofs = sizeof(GCstr);
324 skipopm:
325 emit_lso(as, ai, rd, rd, ofs);
326 emit_dn(as, A64I_ADDx^m, rd, rn);
327 return;
328 }
329 ofs += sizeof(GCstr);
330 if (!emit_checkofs(ai, ofs)) {
331 Reg rn = ra_alloc1(as, ref, allow);
332 Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn));
333 emit_dnm(as, (ai^A64I_LS_R)|A64I_LS_UXTWx, rd, rn, rm);
334 return;
335 }
336 }
337 }
338 base = ra_alloc1(as, ref, allow);
339 emit_lso(as, ai, (rd & 31), base, ofs);
340}
341
342/* Fuse FP multiply-add/sub. */
343static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
344{
345 IRRef lref = ir->op1, rref = ir->op2;
346 IRIns *irm;
347 if ((as->flags & JIT_F_OPT_FMA) &&
348 lref != rref &&
349 ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
350 ra_noreg(irm->r)) ||
351 (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
352 (rref = lref, ai = air, ra_noreg(irm->r))))) {
353 Reg dest = ra_dest(as, ir, RSET_FPR);
354 Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
355 Reg left = ra_alloc2(as, irm,
356 rset_exclude(rset_exclude(RSET_FPR, dest), add));
357 Reg right = (left >> 8); left &= 255;
358 emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31));
359 return 1;
360 }
361 return 0;
362}
363
364/* Fuse BAND + BSHL/BSHR into UBFM. */
365static int asm_fuseandshift(ASMState *as, IRIns *ir)
366{
367 IRIns *irl = IR(ir->op1);
368 lj_assertA(ir->o == IR_BAND, "bad usage");
369 if (canfuse(as, irl) && irref_isk(ir->op2)) {
370 uint64_t mask = get_k64val(as, ir->op2);
371 if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) {
372 int32_t shmask = irt_is64(irl->t) ? 63 : 31;
373 int32_t shift = (IR(irl->op2)->i & shmask);
374 int32_t imms = shift;
375 if (irl->o == IR_BSHL) {
376 mask >>= shift;
377 shift = (shmask-shift+1) & shmask;
378 imms = 0;
379 }
380 if (mask && !((mask+1) & mask)) { /* Contiguous 1-bits at the bottom. */
381 Reg dest = ra_dest(as, ir, RSET_GPR);
382 Reg left = ra_alloc1(as, irl->op1, RSET_GPR);
383 A64Ins ai = shmask == 63 ? A64I_UBFMx : A64I_UBFMw;
384 imms += 63 - emit_clz64(mask);
385 if (imms > shmask) imms = shmask;
386 emit_dn(as, ai | A64F_IMMS(imms) | A64F_IMMR(shift), dest, left);
387 return 1;
388 }
389 }
390 }
391 return 0;
392}
393
394/* Fuse BOR(BSHL, BSHR) into EXTR/ROR. */
395static int asm_fuseorshift(ASMState *as, IRIns *ir)
396{
397 IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
398 lj_assertA(ir->o == IR_BOR, "bad usage");
399 if (canfuse(as, irl) && canfuse(as, irr) &&
400 ((irl->o == IR_BSHR && irr->o == IR_BSHL) ||
401 (irl->o == IR_BSHL && irr->o == IR_BSHR))) {
402 if (irref_isk(irl->op2) && irref_isk(irr->op2)) {
403 IRRef lref = irl->op1, rref = irr->op1;
404 uint32_t lshift = IR(irl->op2)->i, rshift = IR(irr->op2)->i;
405 if (irl->o == IR_BSHR) { /* BSHR needs to be the right operand. */
406 uint32_t tmp2;
407 IRRef tmp1 = lref; lref = rref; rref = tmp1;
408 tmp2 = lshift; lshift = rshift; rshift = tmp2;
409 }
410 if (rshift + lshift == (irt_is64(ir->t) ? 64 : 32)) {
411 A64Ins ai = irt_is64(ir->t) ? A64I_EXTRx : A64I_EXTRw;
412 Reg dest = ra_dest(as, ir, RSET_GPR);
413 Reg left = ra_alloc1(as, lref, RSET_GPR);
414 Reg right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left));
415 emit_dnm(as, ai | A64F_IMMS(rshift), dest, left, right);
416 return 1;
417 }
418 }
419 }
420 return 0;
421}
422
423/* -- Calls --------------------------------------------------------------- */
424
425/* Generate a call to a C function. */
426static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
427{
428 uint32_t n, nargs = CCI_XNARGS(ci);
429 int32_t spofs = 0, spalign = LJ_HASFFI && LJ_TARGET_OSX ? 0 : 7;
430 Reg gpr, fpr = REGARG_FIRSTFPR;
431 if (ci->func)
432 emit_call(as, ci->func);
433 for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
434 as->cost[gpr] = REGCOST(~0u, ASMREF_L);
435 gpr = REGARG_FIRSTGPR;
436#if LJ_HASFFI && LJ_ABI_WIN
437 if ((ci->flags & CCI_VARARG)) {
438 fpr = REGARG_LASTFPR+1;
439 }
440#endif
441 for (n = 0; n < nargs; n++) { /* Setup args. */
442 IRRef ref = args[n];
443 IRIns *ir = IR(ref);
444 if (ref) {
445 if (irt_isfp(ir->t)) {
446 if (fpr <= REGARG_LASTFPR) {
447 lj_assertA(rset_test(as->freeset, fpr),
448 "reg %d not free", fpr); /* Must have been evicted. */
449 ra_leftov(as, fpr, ref);
450 fpr++;
451#if LJ_HASFFI && LJ_ABI_WIN
452 } else if ((ci->flags & CCI_VARARG) && (gpr <= REGARG_LASTGPR)) {
453 Reg rf = ra_alloc1(as, ref, RSET_FPR);
454 emit_dn(as, A64I_FMOV_R_D, gpr++, rf & 31);
455#endif
456 } else {
457 Reg r = ra_alloc1(as, ref, RSET_FPR);
458 int32_t al = spalign;
459#if LJ_HASFFI && LJ_TARGET_OSX
460 al |= irt_isnum(ir->t) ? 7 : 3;
461#endif
462 spofs = (spofs + al) & ~al;
463 if (LJ_BE && al >= 7 && !irt_isnum(ir->t)) spofs += 4, al -= 4;
464 emit_spstore(as, ir, r, spofs);
465 spofs += al + 1;
466 }
467 } else {
468 if (gpr <= REGARG_LASTGPR) {
469 lj_assertA(rset_test(as->freeset, gpr),
470 "reg %d not free", gpr); /* Must have been evicted. */
471 ra_leftov(as, gpr, ref);
472 gpr++;
473 } else {
474 Reg r = ra_alloc1(as, ref, RSET_GPR);
475 int32_t al = spalign;
476#if LJ_HASFFI && LJ_TARGET_OSX
477 al |= irt_size(ir->t) - 1;
478#endif
479 spofs = (spofs + al) & ~al;
480 if (al >= 3) {
481 if (LJ_BE && al >= 7 && !irt_is64(ir->t)) spofs += 4, al -= 4;
482 emit_spstore(as, ir, r, spofs);
483 } else {
484 lj_assertA(al == 0 || al == 1, "size %d unexpected", al + 1);
485 emit_lso(as, al ? A64I_STRH : A64I_STRB, r, RID_SP, spofs);
486 }
487 spofs += al + 1;
488 }
489 }
490#if LJ_HASFFI && LJ_TARGET_OSX
491 } else { /* Marker for start of varargs. */
492 gpr = REGARG_LASTGPR+1;
493 fpr = REGARG_LASTFPR+1;
494 spalign = 7;
495#endif
496 }
497 }
498}
499
500/* Setup result reg/sp for call. Evict scratch regs. */
501static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
502{
503 RegSet drop = RSET_SCRATCH;
504 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
505 if (ra_hasreg(ir->r))
506 rset_clear(drop, ir->r); /* Dest reg handled below. */
507 if (hiop && ra_hasreg((ir+1)->r))
508 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
509 ra_evictset(as, drop); /* Evictions must be performed first. */
510 if (ra_used(ir)) {
511 lj_assertA(!irt_ispri(ir->t), "PRI dest");
512 if (irt_isfp(ir->t)) {
513 if (ci->flags & CCI_CASTU64) {
514 Reg dest = ra_dest(as, ir, RSET_FPR) & 31;
515 emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D_R : A64I_FMOV_S_R,
516 dest, RID_RET);
517 } else {
518 ra_destreg(as, ir, RID_FPRET);
519 }
520 } else if (hiop) {
521 ra_destpair(as, ir);
522 } else {
523 ra_destreg(as, ir, RID_RET);
524 }
525 }
526 UNUSED(ci);
527}
528
529static void asm_callx(ASMState *as, IRIns *ir)
530{
531 IRRef args[CCI_NARGS_MAX*2];
532 CCallInfo ci;
533 IRRef func;
534 IRIns *irf;
535 ci.flags = asm_callx_flags(as, ir);
536 asm_collectargs(as, ir, &ci, args);
537 asm_setupresult(as, ir, &ci);
538 func = ir->op2; irf = IR(func);
539 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
540 if (irref_isk(func)) { /* Call to constant address. */
541 ci.func = (ASMFunction)(ir_k64(irf)->u64);
542 } else { /* Need a non-argument register for indirect calls. */
543 Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
544 emit_n(as, A64I_BLR_AUTH, freg);
545 ci.func = (ASMFunction)(void *)0;
546 }
547 asm_gencall(as, &ci, args);
548}
549
550/* -- Returns ------------------------------------------------------------- */
551
552/* Return to lower frame. Guard that it goes to the right spot. */
553static void asm_retf(ASMState *as, IRIns *ir)
554{
555 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
556 void *pc = ir_kptr(IR(ir->op2));
557 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
558 as->topslot -= (BCReg)delta;
559 if ((int32_t)as->topslot < 0) as->topslot = 0;
560 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
561 emit_setgl(as, base, jit_base);
562 emit_addptr(as, base, -8*delta);
563 asm_guardcc(as, CC_NE);
564 emit_nm(as, A64I_CMPx, RID_TMP,
565 ra_allock(as, i64ptr(pc), rset_exclude(RSET_GPR, base)));
566 emit_lso(as, A64I_LDRx, RID_TMP, base, -8);
567}
568
569/* -- Buffer operations --------------------------------------------------- */
570
571#if LJ_HASBUFFER
572static void asm_bufhdr_write(ASMState *as, Reg sb)
573{
574 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
575 IRIns irgc;
576 irgc.ot = IRT(0, IRT_PGC); /* GC type. */
577 emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
578 emit_dn(as, A64I_BFMx | A64F_IMMS(lj_fls(SBUF_MASK_FLAG)) | A64F_IMMR(0), RID_TMP, tmp);
579 emit_getgl(as, RID_TMP, cur_L);
580 emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
581}
582#endif
583
584/* -- Type conversions ---------------------------------------------------- */
585
586static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
587{
588 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
589 Reg dest = ra_dest(as, ir, RSET_GPR);
590 asm_guardcc(as, CC_NE);
591 emit_nm(as, A64I_FCMPd, (tmp & 31), (left & 31));
592 emit_dn(as, A64I_FCVT_F64_S32, (tmp & 31), dest);
593 emit_dn(as, A64I_FCVT_S32_F64, dest, (left & 31));
594}
595
596static void asm_tobit(ASMState *as, IRIns *ir)
597{
598 RegSet allow = RSET_FPR;
599 Reg left = ra_alloc1(as, ir->op1, allow);
600 Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
601 Reg tmp = ra_scratch(as, rset_clear(allow, right));
602 Reg dest = ra_dest(as, ir, RSET_GPR);
603 emit_dn(as, A64I_FMOV_R_S, dest, (tmp & 31));
604 emit_dnm(as, A64I_FADDd, (tmp & 31), (left & 31), (right & 31));
605}
606
607static void asm_conv(ASMState *as, IRIns *ir)
608{
609 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
610 int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
611 int stfp = (st == IRT_NUM || st == IRT_FLOAT);
612 IRRef lref = ir->op1;
613 lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
614 if (irt_isfp(ir->t)) {
615 Reg dest = ra_dest(as, ir, RSET_FPR);
616 if (stfp) { /* FP to FP conversion. */
617 emit_dn(as, st == IRT_NUM ? A64I_FCVT_F32_F64 : A64I_FCVT_F64_F32,
618 (dest & 31), (ra_alloc1(as, lref, RSET_FPR) & 31));
619 } else { /* Integer to FP conversion. */
620 Reg left = ra_alloc1(as, lref, RSET_GPR);
621 A64Ins ai = irt_isfloat(ir->t) ?
622 (((IRT_IS64 >> st) & 1) ?
623 (st == IRT_I64 ? A64I_FCVT_F32_S64 : A64I_FCVT_F32_U64) :
624 (st == IRT_INT ? A64I_FCVT_F32_S32 : A64I_FCVT_F32_U32)) :
625 (((IRT_IS64 >> st) & 1) ?
626 (st == IRT_I64 ? A64I_FCVT_F64_S64 : A64I_FCVT_F64_U64) :
627 (st == IRT_INT ? A64I_FCVT_F64_S32 : A64I_FCVT_F64_U32));
628 emit_dn(as, ai, (dest & 31), left);
629 }
630 } else if (stfp) { /* FP to integer conversion. */
631 if (irt_isguard(ir->t)) {
632 /* Checked conversions are only supported from number to int. */
633 lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
634 "bad type for checked CONV");
635 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
636 } else {
637 Reg left = ra_alloc1(as, lref, RSET_FPR);
638 Reg dest = ra_dest(as, ir, RSET_GPR);
639 A64Ins ai = irt_is64(ir->t) ?
640 (st == IRT_NUM ?
641 (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) :
642 (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) :
643 (st == IRT_NUM ?
644 (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) :
645 (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32));
646 emit_dn(as, ai, dest, (left & 31));
647 }
648 } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
649 Reg dest = ra_dest(as, ir, RSET_GPR);
650 Reg left = ra_alloc1(as, lref, RSET_GPR);
651 A64Ins ai = st == IRT_I8 ? A64I_SXTBw :
652 st == IRT_U8 ? A64I_UXTBw :
653 st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw;
654 lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
655 emit_dn(as, ai, dest, left);
656 } else {
657 Reg dest = ra_dest(as, ir, RSET_GPR);
658 if (irt_is64(ir->t)) {
659 if (st64 || !(ir->op2 & IRCONV_SEXT)) {
660 /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */
661 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
662 } else { /* 32 to 64 bit sign extension. */
663 Reg left = ra_alloc1(as, lref, RSET_GPR);
664 emit_dn(as, A64I_SXTW, dest, left);
665 }
666 } else {
667 if (st64 && !(ir->op2 & IRCONV_NONE)) {
668 /* This is either a 32 bit reg/reg mov which zeroes the hiword
669 ** or a load of the loword from a 64 bit address.
670 */
671 Reg left = ra_alloc1(as, lref, RSET_GPR);
672 emit_dm(as, A64I_MOVw, dest, left);
673 } else { /* 32/32 bit no-op (cast). */
674 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
675 }
676 }
677 }
678}
679
680static void asm_strto(ASMState *as, IRIns *ir)
681{
682 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
683 IRRef args[2];
684 Reg tmp;
685 int32_t ofs = 0;
686 ra_evictset(as, RSET_SCRATCH);
687 if (ra_used(ir)) {
688 if (ra_hasspill(ir->s)) {
689 ofs = sps_scale(ir->s);
690 if (ra_hasreg(ir->r)) {
691 ra_free(as, ir->r);
692 ra_modified(as, ir->r);
693 emit_spload(as, ir, ir->r, ofs);
694 }
695 } else {
696 Reg dest = ra_dest(as, ir, RSET_FPR);
697 emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0);
698 }
699 }
700 asm_guardcnb(as, A64I_CBZ, RID_RET);
701 args[0] = ir->op1; /* GCstr *str */
702 args[1] = ASMREF_TMP1; /* TValue *n */
703 asm_gencall(as, ci, args);
704 tmp = ra_releasetmp(as, ASMREF_TMP1);
705 emit_opk(as, A64I_ADDx, tmp, RID_SP, ofs, RSET_GPR);
706}
707
708/* -- Memory references --------------------------------------------------- */
709
710/* Store tagged value for ref at base+ofs. */
711static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
712{
713 RegSet allow = rset_exclude(RSET_GPR, base);
714 IRIns *ir = IR(ref);
715 lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
716 "store of IR type %d", irt_type(ir->t));
717 if (irref_isk(ref)) {
718 TValue k;
719 lj_ir_kvalue(as->J->L, &k, ir);
720 emit_lso(as, A64I_STRx, ra_allock(as, k.u64, allow), base, ofs);
721 } else {
722 Reg src = ra_alloc1(as, ref, allow);
723 rset_clear(allow, src);
724 if (irt_isinteger(ir->t)) {
725 Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
726 emit_lso(as, A64I_STRx, RID_TMP, base, ofs);
727 emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), RID_TMP, type, src);
728 } else {
729 Reg type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
730 emit_lso(as, A64I_STRx, RID_TMP, base, ofs);
731 emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), RID_TMP, src, type);
732 }
733 }
734}
735
736/* Get pointer to TValue. */
737static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
738{
739 if ((mode & IRTMPREF_IN1)) {
740 IRIns *ir = IR(ref);
741 if (irt_isnum(ir->t)) {
742 if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) {
743 /* Use the number constant itself as a TValue. */
744 ra_allockreg(as, i64ptr(ir_knum(ir)), dest);
745 return;
746 }
747 emit_lso(as, A64I_STRd, (ra_alloc1(as, ref, RSET_FPR) & 31), dest, 0);
748 } else {
749 asm_tvstore64(as, dest, 0, ref);
750 }
751 }
752 /* g->tmptv holds the TValue(s). */
753 emit_dn(as, A64I_ADDx^emit_isk12(glofs(as, &J2G(as->J)->tmptv)), dest, RID_GL);
754}
755
756static void asm_aref(ASMState *as, IRIns *ir)
757{
758 Reg dest = ra_dest(as, ir, RSET_GPR);
759 Reg idx, base;
760 if (irref_isk(ir->op2)) {
761 IRRef tab = IR(ir->op1)->op1;
762 int32_t ofs = asm_fuseabase(as, tab);
763 IRRef refa = ofs ? tab : ir->op1;
764 uint32_t k = emit_isk12(ofs + 8*IR(ir->op2)->i);
765 if (k) {
766 base = ra_alloc1(as, refa, RSET_GPR);
767 emit_dn(as, A64I_ADDx^k, dest, base);
768 return;
769 }
770 }
771 base = ra_alloc1(as, ir->op1, RSET_GPR);
772 idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
773 emit_dnm(as, A64I_ADDx | A64F_EXSH(A64EX_UXTW, 3), dest, base, idx);
774}
775
776/* Inlined hash lookup. Specialized for key type and for const keys.
777** The equivalent C code is:
778** Node *n = hashkey(t, key);
779** do {
780** if (lj_obj_equal(&n->key, key)) return &n->val;
781** } while ((n = nextnode(n)));
782** return niltv(L);
783*/
784static void asm_href(ASMState *as, IRIns *ir, IROp merge)
785{
786 RegSet allow = RSET_GPR;
787 int destused = ra_used(ir);
788 Reg dest = ra_dest(as, ir, allow);
789 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
790 Reg tmp = RID_TMP, type = RID_NONE, key = RID_NONE, tkey;
791 IRRef refkey = ir->op2;
792 IRIns *irkey = IR(refkey);
793 int isk = irref_isk(refkey);
794 IRType1 kt = irkey->t;
795 uint32_t k = 0;
796 uint32_t khash;
797 MCLabel l_end, l_loop;
798 rset_clear(allow, tab);
799
800 /* Allocate register for tkey outside of the loop. */
801 if (isk) {
802 int64_t kk;
803 if (irt_isaddr(kt)) {
804 kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
805 } else if (irt_isnum(kt)) {
806 kk = (int64_t)ir_knum(irkey)->u64;
807 /* Assumes -0.0 is already canonicalized to +0.0. */
808 } else {
809 lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
810 kk = ~((int64_t)~irt_toitype(kt) << 47);
811 }
812 k = emit_isk12(kk);
813 tkey = k ? 0 : ra_allock(as, kk, allow);
814 } else {
815 tkey = ra_scratch(as, allow);
816 }
817
818 /* Key not found in chain: jump to exit (if merged) or load niltv. */
819 l_end = emit_label(as);
820 as->invmcp = NULL;
821 if (merge == IR_NE) {
822 asm_guardcc(as, CC_AL);
823 } else if (destused) {
824 uint32_t k12 = emit_isk12(offsetof(global_State, nilnode.val));
825 lj_assertA(k12 != 0, "Cannot k12 encode niltv(L)");
826 emit_dn(as, A64I_ADDx^k12, dest, RID_GL);
827 }
828
829 /* Follow hash chain until the end. */
830 l_loop = --as->mcp;
831 if (destused)
832 emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
833
834 /* Type and value comparison. */
835 if (merge == IR_EQ)
836 asm_guardcc(as, CC_EQ);
837 else
838 emit_cond_branch(as, CC_EQ, l_end);
839 emit_nm(as, A64I_CMPx^k, tmp, tkey);
840 if (!destused)
841 emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
842 emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key));
843 *l_loop = A64I_X | A64I_CBNZ | A64F_S19(as->mcp - l_loop) | dest;
844
845 /* Construct tkey as canonicalized or tagged key. */
846 if (!isk) {
847 if (irt_isnum(kt)) {
848 key = ra_alloc1(as, refkey, RSET_FPR);
849 emit_dnm(as, A64I_CSELx | A64F_CC(CC_EQ), tkey, RID_ZERO, tkey);
850 /* A64I_FMOV_R_D from key to tkey done below. */
851 } else {
852 lj_assertA(irt_isaddr(kt), "bad HREF key type");
853 key = ra_alloc1(as, refkey, allow);
854 type = ra_allock(as, irt_toitype(kt) << 15, rset_clear(allow, key));
855 emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 32), tkey, key, type);
856 }
857 }
858
859 /* Load main position relative to tab->node into dest. */
860 khash = isk ? ir_khash(as, irkey) : 1;
861 if (khash == 0) {
862 emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node));
863 } else {
864 emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 3), dest, tmp, dest);
865 emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 1), dest, dest, dest);
866 emit_lso(as, A64I_LDRx, tmp, tab, offsetof(GCtab, node));
867 if (isk) {
868 Reg tmphash = ra_allock(as, khash, allow);
869 emit_dnm(as, A64I_ANDw, dest, dest, tmphash);
870 emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
871 } else if (irt_isstr(kt)) {
872 emit_dnm(as, A64I_ANDw, dest, dest, tmp);
873 emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid));
874 emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
875 } else { /* Must match with hash*() in lj_tab.c. */
876 emit_dnm(as, A64I_ANDw, dest, dest, tmp);
877 emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask));
878 emit_dnm(as, A64I_SUBw, dest, dest, tmp);
879 emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp);
880 emit_dnm(as, A64I_EORw | A64F_SH(A64SH_ROR, 32-HASH_ROT2), dest, tmp, dest);
881 emit_dnm(as, A64I_SUBw, tmp, tmp, dest);
882 emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest);
883 if (irt_isnum(kt)) {
884 emit_dnm(as, A64I_EORw, tmp, tkey, dest);
885 emit_dnm(as, A64I_ADDw, dest, dest, dest);
886 emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, tkey);
887 emit_nm(as, A64I_FCMPZd, (key & 31), 0);
888 emit_dn(as, A64I_FMOV_R_D, tkey, (key & 31));
889 } else {
890 emit_dnm(as, A64I_EORw, tmp, key, dest);
891 emit_dnm(as, A64I_EORx | A64F_SH(A64SH_LSR, 32), dest, type, key);
892 }
893 }
894 }
895}
896
897static void asm_hrefk(ASMState *as, IRIns *ir)
898{
899 IRIns *kslot = IR(ir->op2);
900 IRIns *irkey = IR(kslot->op1);
901 int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
902 int32_t kofs = ofs + (int32_t)offsetof(Node, key);
903 int bigofs = !emit_checkofs(A64I_LDRx, kofs);
904 Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
905 Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
906 Reg idx = node;
907 RegSet allow = rset_exclude(RSET_GPR, node);
908 uint64_t k;
909 lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
910 if (bigofs) {
911 idx = dest;
912 rset_clear(allow, dest);
913 kofs = (int32_t)offsetof(Node, key);
914 } else if (ra_hasreg(dest)) {
915 emit_opk(as, A64I_ADDx, dest, node, ofs, allow);
916 }
917 asm_guardcc(as, CC_NE);
918 if (irt_ispri(irkey->t)) {
919 k = ~((int64_t)~irt_toitype(irkey->t) << 47);
920 } else if (irt_isnum(irkey->t)) {
921 k = ir_knum(irkey)->u64;
922 } else {
923 k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey);
924 }
925 emit_nm(as, A64I_CMPx, RID_TMP, ra_allock(as, k, allow));
926 emit_lso(as, A64I_LDRx, RID_TMP, idx, kofs);
927 if (bigofs)
928 emit_opk(as, A64I_ADDx, dest, node, ofs, rset_exclude(RSET_GPR, node));
929}
930
931static void asm_uref(ASMState *as, IRIns *ir)
932{
933 Reg dest = ra_dest(as, ir, RSET_GPR);
934 int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
935 if (irref_isk(ir->op1) && !guarded) {
936 GCfunc *fn = ir_kfunc(IR(ir->op1));
937 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
938 emit_lsptr(as, A64I_LDRx, dest, v);
939 } else {
940 if (guarded)
941 asm_guardcnb(as, ir->o == IR_UREFC ? A64I_CBZ : A64I_CBNZ, RID_TMP);
942 if (ir->o == IR_UREFC)
943 emit_opk(as, A64I_ADDx, dest, dest,
944 (int32_t)offsetof(GCupval, tv), RSET_GPR);
945 else
946 emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v));
947 if (guarded)
948 emit_lso(as, A64I_LDRB, RID_TMP, dest,
949 (int32_t)offsetof(GCupval, closed));
950 if (irref_isk(ir->op1)) {
951 GCfunc *fn = ir_kfunc(IR(ir->op1));
952 uint64_t k = gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
953 emit_loadu64(as, dest, k);
954 } else {
955 emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR),
956 (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
957 }
958 }
959}
960
961static void asm_fref(ASMState *as, IRIns *ir)
962{
963 UNUSED(as); UNUSED(ir);
964 lj_assertA(!ra_used(ir), "unfused FREF");
965}
966
967static void asm_strref(ASMState *as, IRIns *ir)
968{
969 RegSet allow = RSET_GPR;
970 Reg dest = ra_dest(as, ir, allow);
971 Reg base = ra_alloc1(as, ir->op1, allow);
972 IRIns *irr = IR(ir->op2);
973 int32_t ofs = sizeof(GCstr);
974 uint32_t m;
975 rset_clear(allow, base);
976 if (irref_isk(ir->op2) && (m = emit_isk12(ofs + irr->i))) {
977 emit_dn(as, A64I_ADDx^m, dest, base);
978 } else {
979 emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, dest);
980 emit_dnm(as, A64I_ADDx, dest, base, ra_alloc1(as, ir->op2, allow));
981 }
982}
983
984/* -- Loads and stores ---------------------------------------------------- */
985
986static A64Ins asm_fxloadins(IRIns *ir)
987{
988 switch (irt_type(ir->t)) {
989 case IRT_I8: return A64I_LDRB ^ A64I_LS_S;
990 case IRT_U8: return A64I_LDRB;
991 case IRT_I16: return A64I_LDRH ^ A64I_LS_S;
992 case IRT_U16: return A64I_LDRH;
993 case IRT_NUM: return A64I_LDRd;
994 case IRT_FLOAT: return A64I_LDRs;
995 default: return irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw;
996 }
997}
998
999static A64Ins asm_fxstoreins(IRIns *ir)
1000{
1001 switch (irt_type(ir->t)) {
1002 case IRT_I8: case IRT_U8: return A64I_STRB;
1003 case IRT_I16: case IRT_U16: return A64I_STRH;
1004 case IRT_NUM: return A64I_STRd;
1005 case IRT_FLOAT: return A64I_STRs;
1006 default: return irt_is64(ir->t) ? A64I_STRx : A64I_STRw;
1007 }
1008}
1009
1010static void asm_fload(ASMState *as, IRIns *ir)
1011{
1012 Reg dest = ra_dest(as, ir, RSET_GPR);
1013 Reg idx;
1014 A64Ins ai = asm_fxloadins(ir);
1015 int32_t ofs;
1016 if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
1017 idx = RID_GL;
1018 ofs = (ir->op2 << 2) - GG_OFS(g);
1019 } else {
1020 idx = ra_alloc1(as, ir->op1, RSET_GPR);
1021 if (ir->op2 == IRFL_TAB_ARRAY) {
1022 ofs = asm_fuseabase(as, ir->op1);
1023 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
1024 emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, idx);
1025 return;
1026 }
1027 }
1028 ofs = field_ofs[ir->op2];
1029 }
1030 emit_lso(as, ai, (dest & 31), idx, ofs);
1031}
1032
1033static void asm_fstore(ASMState *as, IRIns *ir)
1034{
1035 if (ir->r != RID_SINK) {
1036 Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
1037 IRIns *irf = IR(ir->op1);
1038 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
1039 int32_t ofs = field_ofs[irf->op2];
1040 emit_lso(as, asm_fxstoreins(ir), (src & 31), idx, ofs);
1041 }
1042}
1043
1044static void asm_xload(ASMState *as, IRIns *ir)
1045{
1046 Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
1047 lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD");
1048 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
1049}
1050
1051static void asm_xstore(ASMState *as, IRIns *ir)
1052{
1053 if (ir->r != RID_SINK) {
1054 Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
1055 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
1056 rset_exclude(RSET_GPR, src));
1057 }
1058}
1059
1060static void asm_ahuvload(ASMState *as, IRIns *ir)
1061{
1062 Reg idx, tmp;
1063 int32_t ofs = 0;
1064 RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1065 lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
1066 irt_isint(ir->t),
1067 "bad load type %d", irt_type(ir->t));
1068 if (ra_used(ir)) {
1069 Reg dest = ra_dest(as, ir, allow);
1070 tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest;
1071 if (irt_isaddr(ir->t)) {
1072 emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest);
1073 } else if (irt_isnum(ir->t)) {
1074 emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp);
1075 } else if (irt_isint(ir->t)) {
1076 emit_dm(as, A64I_MOVw, dest, dest);
1077 }
1078 } else {
1079 tmp = ra_scratch(as, gpr);
1080 }
1081 idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, tmp), A64I_LDRx);
1082 rset_clear(gpr, idx);
1083 if (ofs & FUSE_REG) rset_clear(gpr, ofs & 31);
1084 if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
1085 /* Always do the type check, even if the load result is unused. */
1086 asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE);
1087 if (irt_type(ir->t) >= IRT_NUM) {
1088 lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
1089 "bad load type %d", irt_type(ir->t));
1090 emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
1091 ra_allock(as, LJ_TISNUM << 15, gpr), tmp);
1092 } else if (irt_isaddr(ir->t)) {
1093 emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), RID_TMP);
1094 emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp);
1095 } else if (irt_isnil(ir->t)) {
1096 emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
1097 } else {
1098 emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
1099 ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, gpr), tmp);
1100 }
1101 if (ofs & FUSE_REG)
1102 emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31));
1103 else
1104 emit_lso(as, A64I_LDRx, tmp, idx, ofs);
1105}
1106
1107static void asm_ahustore(ASMState *as, IRIns *ir)
1108{
1109 if (ir->r != RID_SINK) {
1110 RegSet allow = RSET_GPR;
1111 Reg idx, src = RID_NONE, tmp = RID_TMP, type = RID_NONE;
1112 int32_t ofs = 0;
1113 if (irt_isnum(ir->t)) {
1114 src = ra_alloc1(as, ir->op2, RSET_FPR);
1115 idx = asm_fuseahuref(as, ir->op1, &ofs, allow, A64I_STRd);
1116 if (ofs & FUSE_REG)
1117 emit_dnm(as, (A64I_STRd^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, (src & 31), idx, (ofs &31));
1118 else
1119 emit_lso(as, A64I_STRd, (src & 31), idx, ofs);
1120 } else {
1121 if (!irt_ispri(ir->t)) {
1122 src = ra_alloc1(as, ir->op2, allow);
1123 rset_clear(allow, src);
1124 if (irt_isinteger(ir->t))
1125 type = ra_allock(as, (uint64_t)(int32_t)LJ_TISNUM << 47, allow);
1126 else
1127 type = ra_allock(as, irt_toitype(ir->t), allow);
1128 } else {
1129 tmp = type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t)<<47), allow);
1130 }
1131 idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type),
1132 A64I_STRx);
1133 if (ofs & FUSE_REG)
1134 emit_dnm(as, (A64I_STRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31));
1135 else
1136 emit_lso(as, A64I_STRx, tmp, idx, ofs);
1137 if (ra_hasreg(src)) {
1138 if (irt_isinteger(ir->t)) {
1139 emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), tmp, type, src);
1140 } else {
1141 emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, src, type);
1142 }
1143 }
1144 }
1145 }
1146}
1147
1148static void asm_sload(ASMState *as, IRIns *ir)
1149{
1150 int32_t ofs = 8*((int32_t)ir->op1-2);
1151 IRType1 t = ir->t;
1152 Reg dest = RID_NONE, base;
1153 RegSet allow = RSET_GPR;
1154 lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
1155 "bad parent SLOAD"); /* Handled by asm_head_side(). */
1156 lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK),
1157 "inconsistent SLOAD variant");
1158 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
1159 dest = ra_scratch(as, RSET_FPR);
1160 asm_tointg(as, ir, dest);
1161 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1162 } else if (ra_used(ir)) {
1163 Reg tmp = RID_NONE;
1164 if ((ir->op2 & IRSLOAD_CONVERT))
1165 tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR);
1166 lj_assertA((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t),
1167 "bad SLOAD type %d", irt_type(t));
1168 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow);
1169 base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest));
1170 if (irt_isaddr(t)) {
1171 emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest);
1172 } else if ((ir->op2 & IRSLOAD_CONVERT)) {
1173 if (irt_isint(t)) {
1174 emit_dn(as, A64I_FCVT_S32_F64, dest, (tmp & 31));
1175 /* If value is already loaded for type check, move it to FPR. */
1176 if ((ir->op2 & IRSLOAD_TYPECHECK))
1177 emit_dn(as, A64I_FMOV_D_R, (tmp & 31), dest);
1178 else
1179 dest = tmp;
1180 t.irt = IRT_NUM; /* Check for original type. */
1181 } else {
1182 emit_dn(as, A64I_FCVT_F64_S32, (dest & 31), tmp);
1183 dest = tmp;
1184 t.irt = IRT_INT; /* Check for original type. */
1185 }
1186 } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) {
1187 emit_dm(as, A64I_MOVw, dest, dest);
1188 }
1189 goto dotypecheck;
1190 }
1191 base = ra_alloc1(as, REF_BASE, allow);
1192dotypecheck:
1193 rset_clear(allow, base);
1194 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1195 Reg tmp;
1196 if (ra_hasreg(dest) && rset_test(RSET_GPR, dest)) {
1197 tmp = dest;
1198 } else {
1199 tmp = ra_scratch(as, allow);
1200 rset_clear(allow, tmp);
1201 }
1202 if (ra_hasreg(dest) && tmp != dest)
1203 emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp);
1204 /* Need type check, even if the load result is unused. */
1205 asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE);
1206 if (irt_type(t) >= IRT_NUM) {
1207 lj_assertA(irt_isinteger(t) || irt_isnum(t),
1208 "bad SLOAD type %d", irt_type(t));
1209 emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
1210 ra_allock(as, (ir->op2 & IRSLOAD_KEYINDEX) ? LJ_KEYINDEX : (LJ_TISNUM << 15), allow), tmp);
1211 } else if (irt_isnil(t)) {
1212 emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
1213 } else if (irt_ispri(t)) {
1214 emit_nm(as, A64I_CMPx,
1215 ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp);
1216 } else {
1217 emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), RID_TMP);
1218 emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp);
1219 }
1220 emit_lso(as, A64I_LDRx, tmp, base, ofs);
1221 return;
1222 }
1223 if (ra_hasreg(dest)) {
1224 emit_lso(as, irt_isnum(t) ? A64I_LDRd :
1225 (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base,
1226 ofs ^ ((LJ_BE && irt_isint(t) ? 4 : 0)));
1227 }
1228}
1229
1230/* -- Allocations --------------------------------------------------------- */
1231
1232#if LJ_HASFFI
1233static void asm_cnew(ASMState *as, IRIns *ir)
1234{
1235 CTState *cts = ctype_ctsG(J2G(as->J));
1236 CTypeID id = (CTypeID)IR(ir->op1)->i;
1237 CTSize sz;
1238 CTInfo info = lj_ctype_info(cts, id, &sz);
1239 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1240 IRRef args[4];
1241 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1242 lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
1243 "bad CNEW/CNEWI operands");
1244
1245 as->gcsteps++;
1246 asm_setupresult(as, ir, ci); /* GCcdata * */
1247 /* Initialize immutable cdata object. */
1248 if (ir->o == IR_CNEWI) {
1249 int32_t ofs = sizeof(GCcdata);
1250 Reg r = ra_alloc1(as, ir->op2, allow);
1251 lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
1252 emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs);
1253 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1254 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1255 args[0] = ASMREF_L; /* lua_State *L */
1256 args[1] = ir->op1; /* CTypeID id */
1257 args[2] = ir->op2; /* CTSize sz */
1258 args[3] = ASMREF_TMP1; /* CTSize align */
1259 asm_gencall(as, ci, args);
1260 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1261 return;
1262 }
1263
1264 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1265 {
1266 Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow);
1267 emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
1268 emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
1269 emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP);
1270 if (id < 65536) emit_d(as, A64I_MOVZw | A64F_U16(id), RID_X1);
1271 }
1272 args[0] = ASMREF_L; /* lua_State *L */
1273 args[1] = ASMREF_TMP1; /* MSize size */
1274 asm_gencall(as, ci, args);
1275 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1276 ra_releasetmp(as, ASMREF_TMP1));
1277}
1278#endif
1279
1280/* -- Write barriers ------------------------------------------------------ */
1281
1282static void asm_tbar(ASMState *as, IRIns *ir)
1283{
1284 Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
1285 Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
1286 Reg mark = RID_TMP;
1287 MCLabel l_end = emit_label(as);
1288 emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
1289 /* Keep STRx in the middle to avoid LDP/STP fusion with surrounding code. */
1290 emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
1291 emit_setgl(as, tab, gc.grayagain);
1292 emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark);
1293 emit_getgl(as, link, gc.grayagain);
1294 emit_cond_branch(as, CC_EQ, l_end);
1295 emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark);
1296 emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked));
1297}
1298
1299static void asm_obar(ASMState *as, IRIns *ir)
1300{
1301 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
1302 IRRef args[2];
1303 MCLabel l_end;
1304 Reg obj, val, tmp;
1305 /* No need for other object barriers (yet). */
1306 lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
1307 ra_evictset(as, RSET_SCRATCH);
1308 l_end = emit_label(as);
1309 args[0] = ASMREF_TMP1; /* global_State *g */
1310 args[1] = ir->op1; /* TValue *tv */
1311 asm_gencall(as, ci, args);
1312 emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
1313 obj = IR(ir->op1)->r;
1314 tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
1315 emit_tnb(as, A64I_TBZ, tmp, lj_ffs(LJ_GC_BLACK), l_end);
1316 emit_cond_branch(as, CC_EQ, l_end);
1317 emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP);
1318 val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
1319 emit_lso(as, A64I_LDRB, tmp, obj,
1320 (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
1321 emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked));
1322}
1323
1324/* -- Arithmetic and logic operations ------------------------------------- */
1325
1326static void asm_fparith(ASMState *as, IRIns *ir, A64Ins ai)
1327{
1328 Reg dest = ra_dest(as, ir, RSET_FPR);
1329 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1330 right = (left >> 8); left &= 255;
1331 emit_dnm(as, ai, (dest & 31), (left & 31), (right & 31));
1332}
1333
1334static void asm_fpunary(ASMState *as, IRIns *ir, A64Ins ai)
1335{
1336 Reg dest = ra_dest(as, ir, RSET_FPR);
1337 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
1338 emit_dn(as, ai, (dest & 31), (left & 31));
1339}
1340
1341static void asm_fpmath(ASMState *as, IRIns *ir)
1342{
1343 IRFPMathOp fpm = (IRFPMathOp)ir->op2;
1344 if (fpm == IRFPM_SQRT) {
1345 asm_fpunary(as, ir, A64I_FSQRTd);
1346 } else if (fpm <= IRFPM_TRUNC) {
1347 asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd :
1348 fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd);
1349 } else {
1350 asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
1351 }
1352}
1353
1354static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
1355{
1356 IRIns *ir;
1357 if (irref_isk(rref))
1358 return 0; /* Don't swap constants to the left. */
1359 if (irref_isk(lref))
1360 return 1; /* But swap constants to the right. */
1361 ir = IR(rref);
1362 if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
1363 (ir->o == IR_ADD && ir->op1 == ir->op2) ||
1364 (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
1365 return 0; /* Don't swap fusable operands to the left. */
1366 ir = IR(lref);
1367 if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
1368 (ir->o == IR_ADD && ir->op1 == ir->op2) ||
1369 (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
1370 return 1; /* But swap fusable operands to the right. */
1371 return 0; /* Otherwise don't swap. */
1372}
1373
1374static void asm_intop(ASMState *as, IRIns *ir, A64Ins ai)
1375{
1376 IRRef lref = ir->op1, rref = ir->op2;
1377 Reg left, dest = ra_dest(as, ir, RSET_GPR);
1378 uint32_t m;
1379 if ((ai & ~A64I_S) != A64I_SUBw && asm_swapops(as, lref, rref)) {
1380 IRRef tmp = lref; lref = rref; rref = tmp;
1381 }
1382 left = ra_hintalloc(as, lref, dest, RSET_GPR);
1383 if (irt_is64(ir->t)) ai |= A64I_X;
1384 m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left));
1385 if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */
1386 asm_guardcc(as, CC_VS);
1387 ai |= A64I_S;
1388 }
1389 emit_dn(as, ai^m, dest, left);
1390}
1391
1392static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai)
1393{
1394 if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */
1395 as->flagmcp = NULL;
1396 as->mcp++;
1397 ai |= A64I_S;
1398 }
1399 asm_intop(as, ir, ai);
1400}
1401
1402static void asm_intneg(ASMState *as, IRIns *ir)
1403{
1404 Reg dest = ra_dest(as, ir, RSET_GPR);
1405 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1406 emit_dm(as, irt_is64(ir->t) ? A64I_NEGx : A64I_NEGw, dest, left);
1407}
1408
1409/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */
1410static void asm_intmul(ASMState *as, IRIns *ir)
1411{
1412 Reg dest = ra_dest(as, ir, RSET_GPR);
1413 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1414 Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1415 if (irt_isguard(ir->t)) { /* IR_MULOV */
1416 asm_guardcc(as, CC_NE);
1417 emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */
1418 emit_nm(as, A64I_CMPx | A64F_EX(A64EX_SXTW), dest, dest);
1419 emit_dnm(as, A64I_SMULL, dest, right, left);
1420 } else {
1421 emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right);
1422 }
1423}
1424
1425static void asm_add(ASMState *as, IRIns *ir)
1426{
1427 if (irt_isnum(ir->t)) {
1428 if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd))
1429 asm_fparith(as, ir, A64I_FADDd);
1430 return;
1431 }
1432 asm_intop_s(as, ir, A64I_ADDw);
1433}
1434
1435static void asm_sub(ASMState *as, IRIns *ir)
1436{
1437 if (irt_isnum(ir->t)) {
1438 if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd))
1439 asm_fparith(as, ir, A64I_FSUBd);
1440 return;
1441 }
1442 asm_intop_s(as, ir, A64I_SUBw);
1443}
1444
1445static void asm_mul(ASMState *as, IRIns *ir)
1446{
1447 if (irt_isnum(ir->t)) {
1448 asm_fparith(as, ir, A64I_FMULd);
1449 return;
1450 }
1451 asm_intmul(as, ir);
1452}
1453
1454#define asm_addov(as, ir) asm_add(as, ir)
1455#define asm_subov(as, ir) asm_sub(as, ir)
1456#define asm_mulov(as, ir) asm_mul(as, ir)
1457
1458#define asm_fpdiv(as, ir) asm_fparith(as, ir, A64I_FDIVd)
1459#define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS)
1460
1461static void asm_neg(ASMState *as, IRIns *ir)
1462{
1463 if (irt_isnum(ir->t)) {
1464 asm_fpunary(as, ir, A64I_FNEGd);
1465 return;
1466 }
1467 asm_intneg(as, ir);
1468}
1469
1470static void asm_band(ASMState *as, IRIns *ir)
1471{
1472 A64Ins ai = A64I_ANDw;
1473 if (asm_fuseandshift(as, ir))
1474 return;
1475 if (as->flagmcp == as->mcp) {
1476 /* Try to drop cmp r, #0. */
1477 as->flagmcp = NULL;
1478 as->mcp++;
1479 ai = A64I_ANDSw;
1480 }
1481 asm_intop(as, ir, ai);
1482}
1483
1484static void asm_borbxor(ASMState *as, IRIns *ir, A64Ins ai)
1485{
1486 IRRef lref = ir->op1, rref = ir->op2;
1487 IRIns *irl = IR(lref), *irr = IR(rref);
1488 if ((canfuse(as, irl) && irl->o == IR_BNOT && !irref_isk(rref)) ||
1489 (canfuse(as, irr) && irr->o == IR_BNOT && !irref_isk(lref))) {
1490 Reg left, dest = ra_dest(as, ir, RSET_GPR);
1491 uint32_t m;
1492 if (irl->o == IR_BNOT) {
1493 IRRef tmp = lref; lref = rref; rref = tmp;
1494 }
1495 left = ra_alloc1(as, lref, RSET_GPR);
1496 ai |= A64I_ON;
1497 if (irt_is64(ir->t)) ai |= A64I_X;
1498 m = asm_fuseopm(as, ai, IR(rref)->op1, rset_exclude(RSET_GPR, left));
1499 emit_dn(as, ai^m, dest, left);
1500 } else {
1501 asm_intop(as, ir, ai);
1502 }
1503}
1504
1505static void asm_bor(ASMState *as, IRIns *ir)
1506{
1507 if (asm_fuseorshift(as, ir))
1508 return;
1509 asm_borbxor(as, ir, A64I_ORRw);
1510}
1511
1512#define asm_bxor(as, ir) asm_borbxor(as, ir, A64I_EORw)
1513
1514static void asm_bnot(ASMState *as, IRIns *ir)
1515{
1516 A64Ins ai = A64I_MVNw;
1517 Reg dest = ra_dest(as, ir, RSET_GPR);
1518 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1519 if (irt_is64(ir->t)) ai |= A64I_X;
1520 emit_d(as, ai^m, dest);
1521}
1522
1523static void asm_bswap(ASMState *as, IRIns *ir)
1524{
1525 Reg dest = ra_dest(as, ir, RSET_GPR);
1526 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1527 emit_dn(as, irt_is64(ir->t) ? A64I_REVx : A64I_REVw, dest, left);
1528}
1529
1530static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh)
1531{
1532 int32_t shmask = irt_is64(ir->t) ? 63 : 31;
1533 if (irref_isk(ir->op2)) { /* Constant shifts. */
1534 Reg left, dest = ra_dest(as, ir, RSET_GPR);
1535 int32_t shift = (IR(ir->op2)->i & shmask);
1536 IRIns *irl = IR(ir->op1);
1537 if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw;
1538
1539 /* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */
1540 if ((sh == A64SH_LSR || sh == A64SH_ASR) && canfuse(as, irl)) {
1541 if (irl->o == IR_BSHL && irref_isk(irl->op2)) {
1542 int32_t shift2 = (IR(irl->op2)->i & shmask);
1543 shift = ((shift - shift2) & shmask);
1544 shmask -= shift2;
1545 ir = irl;
1546 }
1547 }
1548
1549 left = ra_alloc1(as, ir->op1, RSET_GPR);
1550 switch (sh) {
1551 case A64SH_LSL:
1552 emit_dn(as, ai | A64F_IMMS(shmask-shift) |
1553 A64F_IMMR((shmask-shift+1)&shmask), dest, left);
1554 break;
1555 case A64SH_LSR: case A64SH_ASR:
1556 emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left);
1557 break;
1558 case A64SH_ROR:
1559 emit_dnm(as, ai | A64F_IMMS(shift), dest, left, left);
1560 break;
1561 }
1562 } else { /* Variable-length shifts. */
1563 Reg dest = ra_dest(as, ir, RSET_GPR);
1564 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1565 Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1566 emit_dnm(as, (shmask == 63 ? A64I_SHRx : A64I_SHRw) | A64F_BSH(sh), dest, left, right);
1567 }
1568}
1569
1570#define asm_bshl(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSL)
1571#define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR)
1572#define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR)
1573#define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR)
1574#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
1575
1576static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc)
1577{
1578 Reg dest = ra_dest(as, ir, RSET_GPR);
1579 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1580 Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1581 emit_dnm(as, A64I_CSELw|A64F_CC(cc), dest, left, right);
1582 emit_nm(as, A64I_CMPw, left, right);
1583}
1584
1585static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc)
1586{
1587 Reg dest = (ra_dest(as, ir, RSET_FPR) & 31);
1588 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1589 right = ((left >> 8) & 31); left &= 31;
1590 emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, right, left);
1591 emit_nm(as, A64I_FCMPd, left, right);
1592}
1593
1594static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc)
1595{
1596 if (irt_isnum(ir->t))
1597 asm_fpmin_max(as, ir, fcc);
1598 else
1599 asm_intmin_max(as, ir, cc);
1600}
1601
1602#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_PL)
1603#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_LE)
1604
1605/* -- Comparisons --------------------------------------------------------- */
1606
1607/* Map of comparisons to flags. ORDER IR. */
1608static const uint8_t asm_compmap[IR_ABC+1] = {
1609 /* op FP swp int cc FP cc */
1610 /* LT */ CC_GE + (CC_HS << 4),
1611 /* GE x */ CC_LT + (CC_HI << 4),
1612 /* LE */ CC_GT + (CC_HI << 4),
1613 /* GT x */ CC_LE + (CC_HS << 4),
1614 /* ULT x */ CC_HS + (CC_LS << 4),
1615 /* UGE */ CC_LO + (CC_LO << 4),
1616 /* ULE x */ CC_HI + (CC_LO << 4),
1617 /* UGT */ CC_LS + (CC_LS << 4),
1618 /* EQ */ CC_NE + (CC_NE << 4),
1619 /* NE */ CC_EQ + (CC_EQ << 4),
1620 /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */
1621};
1622
1623/* FP comparisons. */
1624static void asm_fpcomp(ASMState *as, IRIns *ir)
1625{
1626 Reg left, right;
1627 A64Ins ai;
1628 int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1);
1629 if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) {
1630 left = (ra_alloc1(as, ir->op1, RSET_FPR) & 31);
1631 right = 0;
1632 ai = A64I_FCMPZd;
1633 } else {
1634 left = ra_alloc2(as, ir, RSET_FPR);
1635 if (swp) {
1636 right = (left & 31); left = ((left >> 8) & 31);
1637 } else {
1638 right = ((left >> 8) & 31); left &= 31;
1639 }
1640 ai = A64I_FCMPd;
1641 }
1642 asm_guardcc(as, (asm_compmap[ir->o] >> 4));
1643 emit_nm(as, ai, left, right);
1644}
1645
1646/* Integer comparisons. */
1647static void asm_intcomp(ASMState *as, IRIns *ir)
1648{
1649 A64CC oldcc, cc = (asm_compmap[ir->o] & 15);
1650 A64Ins ai = irt_is64(ir->t) ? A64I_CMPx : A64I_CMPw;
1651 IRRef lref = ir->op1, rref = ir->op2;
1652 Reg left;
1653 uint32_t m;
1654 int cmpprev0 = 0;
1655 lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) ||
1656 irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t),
1657 "bad comparison data type %d", irt_type(ir->t));
1658 if (asm_swapops(as, lref, rref)) {
1659 IRRef tmp = lref; lref = rref; rref = tmp;
1660 if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */
1661 else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */
1662 }
1663 oldcc = cc;
1664 if (irref_isk(rref) && get_k64val(as, rref) == 0) {
1665 IRIns *irl = IR(lref);
1666 if (cc == CC_GE) cc = CC_PL;
1667 else if (cc == CC_LT) cc = CC_MI;
1668 else if (cc > CC_NE) goto nocombine; /* Other conds don't work with tst. */
1669 cmpprev0 = (irl+1 == ir);
1670 /* Combine and-cmp-bcc into tbz/tbnz or and-cmp into tst. */
1671 if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) {
1672 IRRef blref = irl->op1, brref = irl->op2;
1673 uint32_t m2 = 0;
1674 Reg bleft;
1675 if (asm_swapops(as, blref, brref)) {
1676 Reg tmp = blref; blref = brref; brref = tmp;
1677 }
1678 bleft = ra_alloc1(as, blref, RSET_GPR);
1679 if (irref_isk(brref)) {
1680 uint64_t k = get_k64val(as, brref);
1681 if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE) &&
1682 asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, bleft,
1683 emit_ctz64(k)))
1684 return;
1685 m2 = emit_isk13(k, irt_is64(irl->t));
1686 }
1687 ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw);
1688 if (!m2)
1689 m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft));
1690 asm_guardcc(as, cc);
1691 emit_n(as, ai^m2, bleft);
1692 return;
1693 }
1694 if (cc == CC_EQ || cc == CC_NE) {
1695 /* Combine cmp-bcc into cbz/cbnz. */
1696 ai = cc == CC_EQ ? A64I_CBZ : A64I_CBNZ;
1697 if (irt_is64(ir->t)) ai |= A64I_X;
1698 asm_guardcnb(as, ai, ra_alloc1(as, lref, RSET_GPR));
1699 return;
1700 }
1701 }
1702nocombine:
1703 left = ra_alloc1(as, lref, RSET_GPR);
1704 m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left));
1705 asm_guardcc(as, cc);
1706 emit_n(as, ai^m, left);
1707 /* Signed comparison with zero and referencing previous ins? */
1708 if (cmpprev0 && (oldcc <= CC_NE || oldcc >= CC_GE))
1709 as->flagmcp = as->mcp; /* Allow elimination of the compare. */
1710}
1711
1712static void asm_comp(ASMState *as, IRIns *ir)
1713{
1714 if (irt_isnum(ir->t))
1715 asm_fpcomp(as, ir);
1716 else
1717 asm_intcomp(as, ir);
1718}
1719
1720#define asm_equal(as, ir) asm_comp(as, ir)
1721
1722/* -- Split register ops -------------------------------------------------- */
1723
1724/* Hiword op of a split 64/64 bit op. Previous op is the loword op. */
1725static void asm_hiop(ASMState *as, IRIns *ir)
1726{
1727 /* HIOP is marked as a store because it needs its own DCE logic. */
1728 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
1729 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
1730 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
1731 switch ((ir-1)->o) {
1732 case IR_CALLN:
1733 case IR_CALLL:
1734 case IR_CALLS:
1735 case IR_CALLXS:
1736 if (!uselo)
1737 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
1738 break;
1739 default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
1740 }
1741}
1742
1743/* -- Profiling ----------------------------------------------------------- */
1744
1745static void asm_prof(ASMState *as, IRIns *ir)
1746{
1747 uint32_t k = emit_isk13(HOOK_PROFILE, 0);
1748 lj_assertA(k != 0, "HOOK_PROFILE does not fit in K13");
1749 UNUSED(ir);
1750 asm_guardcc(as, CC_NE);
1751 emit_n(as, A64I_TSTw^k, RID_TMP);
1752 emit_lsptr(as, A64I_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
1753}
1754
1755/* -- Stack handling ------------------------------------------------------ */
1756
1757/* Check Lua stack size for overflow. Use exit handler as fallback. */
1758static void asm_stack_check(ASMState *as, BCReg topslot,
1759 IRIns *irp, RegSet allow, ExitNo exitno)
1760{
1761 uint32_t k;
1762 Reg pbase = RID_BASE;
1763 if (irp) {
1764 pbase = irp->r;
1765 if (!ra_hasreg(pbase))
1766 pbase = allow ? (0x40 | rset_pickbot(allow)) : (0xC0 | RID_RET);
1767 }
1768 emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno));
1769 if (pbase & 0x80) /* Restore temp. register. */
1770 emit_lso(as, A64I_LDRx, (pbase & 31), RID_SP, 0);
1771 k = emit_isk12((8*topslot));
1772 lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
1773 emit_n(as, A64I_CMPx^k, RID_TMP);
1774 emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, (pbase & 31));
1775 emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP,
1776 (int32_t)offsetof(lua_State, maxstack));
1777 if (pbase & 0x40) {
1778 emit_getgl(as, (pbase & 31), jit_base);
1779 if (pbase & 0x80) /* Save temp register. */
1780 emit_lso(as, A64I_STRx, (pbase & 31), RID_SP, 0);
1781 }
1782 emit_getgl(as, RID_TMP, cur_L);
1783}
1784
1785/* Restore Lua stack from on-trace state. */
1786static void asm_stack_restore(ASMState *as, SnapShot *snap)
1787{
1788 SnapEntry *map = &as->T->snapmap[snap->mapofs];
1789#ifdef LUA_USE_ASSERT
1790 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
1791#endif
1792 MSize n, nent = snap->nent;
1793 /* Store the value of all modified slots to the Lua stack. */
1794 for (n = 0; n < nent; n++) {
1795 SnapEntry sn = map[n];
1796 BCReg s = snap_slot(sn);
1797 int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
1798 IRRef ref = snap_ref(sn);
1799 IRIns *ir = IR(ref);
1800 if ((sn & SNAP_NORESTORE))
1801 continue;
1802 if ((sn & SNAP_KEYINDEX)) {
1803 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
1804 Reg r = irref_isk(ref) ? ra_allock(as, ir->i, allow) :
1805 ra_alloc1(as, ref, allow);
1806 rset_clear(allow, r);
1807 emit_lso(as, A64I_STRw, r, RID_BASE, ofs);
1808 emit_lso(as, A64I_STRw, ra_allock(as, LJ_KEYINDEX, allow), RID_BASE, ofs+4);
1809 } else if (irt_isnum(ir->t)) {
1810 Reg src = ra_alloc1(as, ref, RSET_FPR);
1811 emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs);
1812 } else {
1813 asm_tvstore64(as, RID_BASE, ofs, ref);
1814 }
1815 checkmclim(as);
1816 }
1817 lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
1818}
1819
1820/* -- GC handling --------------------------------------------------------- */
1821
1822/* Marker to prevent patching the GC check exit. */
1823#define ARM64_NOPATCH_GC_CHECK \
1824 (A64I_ORRx|A64F_D(RID_ZERO)|A64F_M(RID_ZERO)|A64F_N(RID_ZERO))
1825
1826/* Check GC threshold and do one or more GC steps. */
1827static void asm_gc_check(ASMState *as)
1828{
1829 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
1830 IRRef args[2];
1831 MCLabel l_end;
1832 Reg tmp2;
1833 ra_evictset(as, RSET_SCRATCH);
1834 l_end = emit_label(as);
1835 /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
1836 asm_guardcnb(as, A64I_CBNZ, RID_RET); /* Assumes asm_snap_prep() is done. */
1837 *--as->mcp = ARM64_NOPATCH_GC_CHECK;
1838 args[0] = ASMREF_TMP1; /* global_State *g */
1839 args[1] = ASMREF_TMP2; /* MSize steps */
1840 asm_gencall(as, ci, args);
1841 emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
1842 tmp2 = ra_releasetmp(as, ASMREF_TMP2);
1843 emit_loadi(as, tmp2, as->gcsteps);
1844 /* Jump around GC step if GC total < GC threshold. */
1845 emit_cond_branch(as, CC_LS, l_end);
1846 emit_nm(as, A64I_CMPx, RID_TMP, tmp2);
1847 emit_getgl(as, tmp2, gc.threshold);
1848 emit_getgl(as, RID_TMP, gc.total);
1849 as->gcsteps = 0;
1850 checkmclim(as);
1851}
1852
1853/* -- Loop handling ------------------------------------------------------- */
1854
1855/* Fixup the loop branch. */
1856static void asm_loop_fixup(ASMState *as)
1857{
1858 MCode *p = as->mctop;
1859 MCode *target = as->mcp;
1860 if (as->loopinv) { /* Inverted loop branch? */
1861 uint32_t mask = (p[-2] & 0x7e000000) == 0x36000000 ? 0x3fffu : 0x7ffffu;
1862 ptrdiff_t delta = target - (p - 2);
1863 /* asm_guard* already inverted the bcc/tnb/cnb and patched the final b. */
1864 p[-2] |= ((uint32_t)delta & mask) << 5;
1865 } else {
1866 ptrdiff_t delta = target - (p - 1);
1867 p[-1] = A64I_B | A64F_S26(delta);
1868 }
1869}
1870
1871/* Fixup the tail of the loop. */
1872static void asm_loop_tail_fixup(ASMState *as)
1873{
1874 UNUSED(as); /* Nothing to do. */
1875}
1876
1877/* -- Head of trace ------------------------------------------------------- */
1878
1879/* Coalesce BASE register for a root trace. */
1880static void asm_head_root_base(ASMState *as)
1881{
1882 IRIns *ir = IR(REF_BASE);
1883 Reg r = ir->r;
1884 if (ra_hasreg(r)) {
1885 ra_free(as, r);
1886 if (rset_test(as->modset, r) || irt_ismarked(ir->t))
1887 ir->r = RID_INIT; /* No inheritance for modified BASE register. */
1888 if (r != RID_BASE)
1889 emit_movrr(as, ir, r, RID_BASE);
1890 }
1891}
1892
1893/* Coalesce BASE register for a side trace. */
1894static Reg asm_head_side_base(ASMState *as, IRIns *irp)
1895{
1896 IRIns *ir = IR(REF_BASE);
1897 Reg r = ir->r;
1898 if (ra_hasreg(r)) {
1899 ra_free(as, r);
1900 if (rset_test(as->modset, r) || irt_ismarked(ir->t))
1901 ir->r = RID_INIT; /* No inheritance for modified BASE register. */
1902 if (irp->r == r) {
1903 return r; /* Same BASE register already coalesced. */
1904 } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
1905 /* Move from coalesced parent reg. */
1906 emit_movrr(as, ir, r, irp->r);
1907 return irp->r;
1908 } else {
1909 emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
1910 }
1911 }
1912 return RID_NONE;
1913}
1914
1915/* -- Tail of trace ------------------------------------------------------- */
1916
1917/* Fixup the tail code. */
1918static void asm_tail_fixup(ASMState *as, TraceNo lnk)
1919{
1920 MCode *p = as->mctop;
1921 MCode *target;
1922 /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
1923 int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED));
1924 if (spadj == 0) {
1925 *--p = A64I_LE(A64I_NOP);
1926 as->mctop = p;
1927 } else {
1928 /* Patch stack adjustment. */
1929 uint32_t k = emit_isk12(spadj);
1930 lj_assertA(k, "stack adjustment %d does not fit in K12", spadj);
1931 p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP);
1932 }
1933 /* Patch exit branch. */
1934 target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
1935 p[-1] = A64I_B | A64F_S26((target-p)+1);
1936}
1937
1938/* Prepare tail of code. */
1939static void asm_tail_prep(ASMState *as)
1940{
1941 MCode *p = as->mctop - 1; /* Leave room for exit branch. */
1942 if (as->loopref) {
1943 as->invmcp = as->mcp = p;
1944 } else {
1945 as->mcp = p-1; /* Leave room for stack pointer adjustment. */
1946 as->invmcp = NULL;
1947 }
1948 *p = 0; /* Prevent load/store merging. */
1949}
1950
1951/* -- Trace setup --------------------------------------------------------- */
1952
1953/* Ensure there are enough stack slots for call arguments. */
1954static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
1955{
1956#if LJ_HASFFI
1957 uint32_t i, nargs = CCI_XNARGS(ci);
1958 if (nargs > (REGARG_NUMGPR < REGARG_NUMFPR ? REGARG_NUMGPR : REGARG_NUMFPR) ||
1959 (LJ_TARGET_OSX && (ci->flags & CCI_VARARG))) {
1960 IRRef args[CCI_NARGS_MAX*2];
1961 int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
1962 int spofs = 0, spalign = LJ_TARGET_OSX ? 0 : 7, nslots;
1963 asm_collectargs(as, ir, ci, args);
1964#if LJ_ABI_WIN
1965 if ((ci->flags & CCI_VARARG)) nfpr = 0;
1966#endif
1967 for (i = 0; i < nargs; i++) {
1968 int al = spalign;
1969 if (!args[i]) {
1970#if LJ_TARGET_OSX
1971 /* Marker for start of varaargs. */
1972 nfpr = 0;
1973 ngpr = 0;
1974 spalign = 7;
1975#endif
1976 } else if (irt_isfp(IR(args[i])->t)) {
1977 if (nfpr > 0) { nfpr--; continue; }
1978#if LJ_ABI_WIN
1979 if ((ci->flags & CCI_VARARG) && ngpr > 0) { ngpr--; continue; }
1980#elif LJ_TARGET_OSX
1981 al |= irt_isnum(IR(args[i])->t) ? 7 : 3;
1982#endif
1983 } else {
1984 if (ngpr > 0) { ngpr--; continue; }
1985#if LJ_TARGET_OSX
1986 al |= irt_size(IR(args[i])->t) - 1;
1987#endif
1988 }
1989 spofs = (spofs + 2*al+1) & ~al; /* Align and bump stack pointer. */
1990 }
1991 nslots = (spofs + 3) >> 2;
1992 if (nslots > as->evenspill) /* Leave room for args in stack slots. */
1993 as->evenspill = nslots;
1994 }
1995#endif
1996 return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET);
1997}
1998
1999static void asm_setup_target(ASMState *as)
2000{
2001 /* May need extra exit for asm_stack_check on side traces. */
2002 asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
2003}
2004
2005#if LJ_BE
2006/* ARM64 instructions are always little-endian. Swap for ARM64BE. */
2007static void asm_mcode_fixup(MCode *mcode, MSize size)
2008{
2009 MCode *pe = (MCode *)((char *)mcode + size);
2010 while (mcode < pe) {
2011 MCode ins = *mcode;
2012 *mcode++ = lj_bswap(ins);
2013 }
2014}
2015#define LJ_TARGET_MCODE_FIXUP 1
2016#endif
2017
2018/* -- Trace patching ------------------------------------------------------ */
2019
2020/* Patch exit jumps of existing machine code to a new target. */
2021void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2022{
2023 MCode *p = T->mcode;
2024 MCode *pe = (MCode *)((char *)p + T->szmcode);
2025 MCode *cstart = NULL;
2026 MCode *mcarea = lj_mcode_patch(J, p, 0);
2027 MCode *px = exitstub_trace_addr(T, exitno);
2028 int patchlong = 1;
2029 /* Note: this assumes a trace exit is only ever patched once. */
2030 for (; p < pe; p++) {
2031 /* Look for exitstub branch, replace with branch to target. */
2032 ptrdiff_t delta = target - p;
2033 MCode ins = A64I_LE(*p);
2034 if ((ins & 0xff000000u) == 0x54000000u &&
2035 ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
2036 /* Patch bcc, if within range. */
2037 if (A64F_S_OK(delta, 19)) {
2038 *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta));
2039 if (!cstart) cstart = p;
2040 }
2041 } else if ((ins & 0xfc000000u) == 0x14000000u &&
2042 ((ins ^ (px-p)) & 0x03ffffffu) == 0) {
2043 /* Patch b. */
2044 lj_assertJ(A64F_S_OK(delta, 26), "branch target out of range");
2045 *p = A64I_LE((ins & 0xfc000000u) | A64F_S26(delta));
2046 if (!cstart) cstart = p;
2047 } else if ((ins & 0x7e000000u) == 0x34000000u &&
2048 ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
2049 /* Patch cbz/cbnz, if within range. */
2050 if (p[-1] == ARM64_NOPATCH_GC_CHECK) {
2051 patchlong = 0;
2052 } else if (A64F_S_OK(delta, 19)) {
2053 *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta));
2054 if (!cstart) cstart = p;
2055 }
2056 } else if ((ins & 0x7e000000u) == 0x36000000u &&
2057 ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) {
2058 /* Patch tbz/tbnz, if within range. */
2059 if (A64F_S_OK(delta, 14)) {
2060 *p = A64I_LE((ins & 0xfff8001fu) | A64F_S14(delta));
2061 if (!cstart) cstart = p;
2062 }
2063 }
2064 }
2065 /* Always patch long-range branch in exit stub itself. Except, if we can't. */
2066 if (patchlong) {
2067 ptrdiff_t delta = target - px;
2068 lj_assertJ(A64F_S_OK(delta, 26), "branch target out of range");
2069 *px = A64I_B | A64F_S26(delta);
2070 if (!cstart) cstart = px;
2071 }
2072 if (cstart) lj_mcode_sync(cstart, px+1);
2073 lj_mcode_patch(J, mcarea, 1);
2074}
2075
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index 3adb62f4..2f64f491 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -23,7 +23,7 @@ static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow)
23{ 23{
24 Reg r = IR(ref)->r; 24 Reg r = IR(ref)->r;
25 if (ra_noreg(r)) { 25 if (ra_noreg(r)) {
26 if (!(allow & RSET_FPR) && irref_isk(ref) && IR(ref)->i == 0) 26 if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(as, ref) == 0)
27 return RID_ZERO; 27 return RID_ZERO;
28 r = ra_allocref(as, ref, allow); 28 r = ra_allocref(as, ref, allow);
29 } else { 29 } else {
@@ -64,17 +64,29 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
64/* Setup spare long-range jump slots per mcarea. */ 64/* Setup spare long-range jump slots per mcarea. */
65static void asm_sparejump_setup(ASMState *as) 65static void asm_sparejump_setup(ASMState *as)
66{ 66{
67 MCode *mxp = as->mcbot; 67 MCode *mxp = as->mctop;
68 if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == sizeof(MCLink)) { 68 if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) {
69 lua_assert(MIPSI_NOP == 0); 69 mxp -= MIPS_SPAREJUMP*2;
70 lj_assertA(MIPSI_NOP == 0, "bad NOP");
70 memset(mxp, 0, MIPS_SPAREJUMP*2*sizeof(MCode)); 71 memset(mxp, 0, MIPS_SPAREJUMP*2*sizeof(MCode));
71 mxp += MIPS_SPAREJUMP*2; 72 as->mctop = mxp;
72 lua_assert(mxp < as->mctop); 73 }
73 lj_mcode_sync(as->mcbot, mxp); 74}
74 lj_mcode_commitbot(as->J, mxp); 75
75 as->mcbot = mxp; 76static MCode *asm_sparejump_use(MCode *mcarea, MCode tjump)
76 as->mclim = as->mcbot + MCLIM_REDZONE; 77{
78 MCode *mxp = (MCode *)((char *)mcarea + ((MCLink *)mcarea)->size);
79 int slot = MIPS_SPAREJUMP;
80 while (slot--) {
81 mxp -= 2;
82 if (*mxp == tjump) {
83 return mxp;
84 } else if (*mxp == MIPSI_NOP) {
85 *mxp = tjump;
86 return mxp;
87 }
77 } 88 }
89 return NULL;
78} 90}
79 91
80/* Setup exit stub after the end of each trace. */ 92/* Setup exit stub after the end of each trace. */
@@ -84,7 +96,8 @@ static void asm_exitstub_setup(ASMState *as)
84 /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ 96 /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */
85 *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno; 97 *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno;
86 *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu); 98 *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu);
87 lua_assert(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0); 99 lj_assertA(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0,
100 "branch target out of range");
88 *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0; 101 *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0;
89 as->mctop = mxp; 102 as->mctop = mxp;
90} 103}
@@ -101,7 +114,12 @@ static void asm_guard(ASMState *as, MIPSIns mi, Reg rs, Reg rt)
101 as->invmcp = NULL; 114 as->invmcp = NULL;
102 as->loopinv = 1; 115 as->loopinv = 1;
103 as->mcp = p+1; 116 as->mcp = p+1;
117#if !LJ_TARGET_MIPSR6
104 mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */ 118 mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */
119#else
120 mi = mi ^ ((mi>>28) == 1 ? 0x04000000u :
121 (mi>>28) == 4 ? 0x00800000u : 0x00010000u); /* Invert cond. */
122#endif
105 target = p; /* Patch target later in asm_loop_fixup. */ 123 target = p; /* Patch target later in asm_loop_fixup. */
106 } 124 }
107 emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); 125 emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
@@ -165,9 +183,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
165 } else if (ir->o == IR_UREFC) { 183 } else if (ir->o == IR_UREFC) {
166 if (irref_isk(ir->op1)) { 184 if (irref_isk(ir->op1)) {
167 GCfunc *fn = ir_kfunc(IR(ir->op1)); 185 GCfunc *fn = ir_kfunc(IR(ir->op1));
168 int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); 186 intptr_t ofs = (intptr_t)&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv;
169 int32_t jgl = (intptr_t)J2G(as->J); 187 intptr_t jgl = (intptr_t)J2G(as->J);
170 if ((uint32_t)(ofs-jgl) < 65536) { 188 if ((uintptr_t)(ofs-jgl) < 65536) {
171 *ofsp = ofs-jgl-32768; 189 *ofsp = ofs-jgl-32768;
172 return RID_JGL; 190 return RID_JGL;
173 } else { 191 } else {
@@ -175,6 +193,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
175 return ra_allock(as, ofs-(int16_t)ofs, allow); 193 return ra_allock(as, ofs-(int16_t)ofs, allow);
176 } 194 }
177 } 195 }
196 } else if (ir->o == IR_TMPREF) {
197 *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768);
198 return RID_JGL;
178 } 199 }
179 } 200 }
180 *ofsp = 0; 201 *ofsp = 0;
@@ -189,20 +210,21 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
189 Reg base; 210 Reg base;
190 if (ra_noreg(ir->r) && canfuse(as, ir)) { 211 if (ra_noreg(ir->r) && canfuse(as, ir)) {
191 if (ir->o == IR_ADD) { 212 if (ir->o == IR_ADD) {
192 int32_t ofs2; 213 intptr_t ofs2;
193 if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) { 214 if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2),
215 checki16(ofs2))) {
194 ref = ir->op1; 216 ref = ir->op1;
195 ofs = ofs2; 217 ofs = (int32_t)ofs2;
196 } 218 }
197 } else if (ir->o == IR_STRREF) { 219 } else if (ir->o == IR_STRREF) {
198 int32_t ofs2 = 65536; 220 intptr_t ofs2 = 65536;
199 lua_assert(ofs == 0); 221 lj_assertA(ofs == 0, "bad usage");
200 ofs = (int32_t)sizeof(GCstr); 222 ofs = (int32_t)sizeof(GCstr);
201 if (irref_isk(ir->op2)) { 223 if (irref_isk(ir->op2)) {
202 ofs2 = ofs + IR(ir->op2)->i; 224 ofs2 = ofs + get_kval(as, ir->op2);
203 ref = ir->op1; 225 ref = ir->op1;
204 } else if (irref_isk(ir->op1)) { 226 } else if (irref_isk(ir->op1)) {
205 ofs2 = ofs + IR(ir->op1)->i; 227 ofs2 = ofs + get_kval(as, ir->op1);
206 ref = ir->op2; 228 ref = ir->op2;
207 } 229 }
208 if (!checki16(ofs2)) { 230 if (!checki16(ofs2)) {
@@ -210,7 +232,7 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
210 Reg right, left = ra_alloc2(as, ir, allow); 232 Reg right, left = ra_alloc2(as, ir, allow);
211 right = (left >> 8); left &= 255; 233 right = (left >> 8); left &= 255;
212 emit_hsi(as, mi, rt, RID_TMP, ofs); 234 emit_hsi(as, mi, rt, RID_TMP, ofs);
213 emit_dst(as, MIPSI_ADDU, RID_TMP, left, right); 235 emit_dst(as, MIPSI_AADDU, RID_TMP, left, right);
214 return; 236 return;
215 } 237 }
216 ofs = ofs2; 238 ofs = ofs2;
@@ -225,29 +247,43 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
225/* Generate a call to a C function. */ 247/* Generate a call to a C function. */
226static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 248static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
227{ 249{
228 uint32_t n, nargs = CCI_NARGS(ci); 250 uint32_t n, nargs = CCI_XNARGS(ci);
229 int32_t ofs = 16; 251 int32_t ofs = LJ_32 ? 16 : 0;
252#if LJ_SOFTFP
253 Reg gpr = REGARG_FIRSTGPR;
254#else
230 Reg gpr, fpr = REGARG_FIRSTFPR; 255 Reg gpr, fpr = REGARG_FIRSTFPR;
256#endif
231 if ((void *)ci->func) 257 if ((void *)ci->func)
232 emit_call(as, (void *)ci->func); 258 emit_call(as, (void *)ci->func, 1);
259#if !LJ_SOFTFP
233 for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) 260 for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
234 as->cost[gpr] = REGCOST(~0u, ASMREF_L); 261 as->cost[gpr] = REGCOST(~0u, ASMREF_L);
235 gpr = REGARG_FIRSTGPR; 262 gpr = REGARG_FIRSTGPR;
263#endif
236 for (n = 0; n < nargs; n++) { /* Setup args. */ 264 for (n = 0; n < nargs; n++) { /* Setup args. */
237 IRRef ref = args[n]; 265 IRRef ref = args[n];
238 if (ref) { 266 if (ref) {
239 IRIns *ir = IR(ref); 267 IRIns *ir = IR(ref);
268#if !LJ_SOFTFP
240 if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR && 269 if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR &&
241 !(ci->flags & CCI_VARARG)) { 270 !(ci->flags & CCI_VARARG)) {
242 lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ 271 lj_assertA(rset_test(as->freeset, fpr),
272 "reg %d not free", fpr); /* Already evicted. */
243 ra_leftov(as, fpr, ref); 273 ra_leftov(as, fpr, ref);
244 fpr += 2; 274 fpr += LJ_32 ? 2 : 1;
245 gpr += irt_isnum(ir->t) ? 2 : 1; 275 gpr += (LJ_32 && irt_isnum(ir->t)) ? 2 : 1;
246 } else { 276 } else
277#endif
278 {
279#if LJ_32 && !LJ_SOFTFP
247 fpr = REGARG_LASTFPR+1; 280 fpr = REGARG_LASTFPR+1;
248 if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1; 281#endif
282 if (LJ_32 && irt_isnum(ir->t)) gpr = (gpr+1) & ~1;
249 if (gpr <= REGARG_LASTGPR) { 283 if (gpr <= REGARG_LASTGPR) {
250 lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ 284 lj_assertA(rset_test(as->freeset, gpr),
285 "reg %d not free", gpr); /* Already evicted. */
286#if !LJ_SOFTFP
251 if (irt_isfp(ir->t)) { 287 if (irt_isfp(ir->t)) {
252 RegSet of = as->freeset; 288 RegSet of = as->freeset;
253 Reg r; 289 Reg r;
@@ -256,31 +292,56 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
256 r = ra_alloc1(as, ref, RSET_FPR); 292 r = ra_alloc1(as, ref, RSET_FPR);
257 as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); 293 as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1));
258 if (irt_isnum(ir->t)) { 294 if (irt_isnum(ir->t)) {
295#if LJ_32
259 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1); 296 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1);
260 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r); 297 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r);
261 lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */ 298 lj_assertA(rset_test(as->freeset, gpr+1),
299 "reg %d not free", gpr+1); /* Already evicted. */
262 gpr += 2; 300 gpr += 2;
301#else
302 emit_tg(as, MIPSI_DMFC1, gpr, r);
303 gpr++; fpr++;
304#endif
263 } else if (irt_isfloat(ir->t)) { 305 } else if (irt_isfloat(ir->t)) {
264 emit_tg(as, MIPSI_MFC1, gpr, r); 306 emit_tg(as, MIPSI_MFC1, gpr, r);
265 gpr++; 307 gpr++;
308#if LJ_64
309 fpr++;
310#endif
266 } 311 }
267 } else { 312 } else
313#endif
314 {
268 ra_leftov(as, gpr, ref); 315 ra_leftov(as, gpr, ref);
269 gpr++; 316 gpr++;
317#if LJ_64 && !LJ_SOFTFP
318 fpr++;
319#endif
270 } 320 }
271 } else { 321 } else {
272 Reg r = ra_alloc1z(as, ref, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 322 Reg r = ra_alloc1z(as, ref, !LJ_SOFTFP && irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
323#if LJ_32
273 if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; 324 if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4;
274 emit_spstore(as, ir, r, ofs); 325 emit_spstore(as, ir, r, ofs);
275 ofs += irt_isnum(ir->t) ? 8 : 4; 326 ofs += irt_isnum(ir->t) ? 8 : 4;
327#else
328 emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isfp(ir->t) && !irt_is64(ir->t)) ? 4 : 0));
329 ofs += 8;
330#endif
276 } 331 }
277 } 332 }
278 } else { 333 } else {
334#if !LJ_SOFTFP
279 fpr = REGARG_LASTFPR+1; 335 fpr = REGARG_LASTFPR+1;
280 if (gpr <= REGARG_LASTGPR) 336#endif
337 if (gpr <= REGARG_LASTGPR) {
281 gpr++; 338 gpr++;
282 else 339#if LJ_64 && !LJ_SOFTFP
283 ofs += 4; 340 fpr++;
341#endif
342 } else {
343 ofs += LJ_32 ? 4 : 8;
344 }
284 } 345 }
285 checkmclim(as); 346 checkmclim(as);
286 } 347 }
@@ -291,28 +352,38 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
291{ 352{
292 RegSet drop = RSET_SCRATCH; 353 RegSet drop = RSET_SCRATCH;
293 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); 354 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
355#if !LJ_SOFTFP
294 if ((ci->flags & CCI_NOFPRCLOBBER)) 356 if ((ci->flags & CCI_NOFPRCLOBBER))
295 drop &= ~RSET_FPR; 357 drop &= ~RSET_FPR;
358#endif
296 if (ra_hasreg(ir->r)) 359 if (ra_hasreg(ir->r))
297 rset_clear(drop, ir->r); /* Dest reg handled below. */ 360 rset_clear(drop, ir->r); /* Dest reg handled below. */
298 if (hiop && ra_hasreg((ir+1)->r)) 361 if (hiop && ra_hasreg((ir+1)->r))
299 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ 362 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
300 ra_evictset(as, drop); /* Evictions must be performed first. */ 363 ra_evictset(as, drop); /* Evictions must be performed first. */
301 if (ra_used(ir)) { 364 if (ra_used(ir)) {
302 lua_assert(!irt_ispri(ir->t)); 365 lj_assertA(!irt_ispri(ir->t), "PRI dest");
303 if (irt_isfp(ir->t)) { 366 if (!LJ_SOFTFP && irt_isfp(ir->t)) {
304 if ((ci->flags & CCI_CASTU64)) { 367 if ((ci->flags & CCI_CASTU64)) {
305 int32_t ofs = sps_scale(ir->s); 368 int32_t ofs = sps_scale(ir->s);
306 Reg dest = ir->r; 369 Reg dest = ir->r;
307 if (ra_hasreg(dest)) { 370 if (ra_hasreg(dest)) {
308 ra_free(as, dest); 371 ra_free(as, dest);
309 ra_modified(as, dest); 372 ra_modified(as, dest);
373#if LJ_32
310 emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1); 374 emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1);
311 emit_tg(as, MIPSI_MTC1, RID_RETLO, dest); 375 emit_tg(as, MIPSI_MTC1, RID_RETLO, dest);
376#else
377 emit_tg(as, MIPSI_DMTC1, RID_RET, dest);
378#endif
312 } 379 }
313 if (ofs) { 380 if (ofs) {
381#if LJ_32
314 emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0)); 382 emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0));
315 emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4)); 383 emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4));
384#else
385 emit_tsi(as, MIPSI_SD, RID_RET, RID_SP, ofs);
386#endif
316 } 387 }
317 } else { 388 } else {
318 ra_destreg(as, ir, RID_FPRET); 389 ra_destreg(as, ir, RID_FPRET);
@@ -325,15 +396,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
325 } 396 }
326} 397}
327 398
328static void asm_call(ASMState *as, IRIns *ir)
329{
330 IRRef args[CCI_NARGS_MAX];
331 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
332 asm_collectargs(as, ir, ci, args);
333 asm_setupresult(as, ir, ci);
334 asm_gencall(as, ci, args);
335}
336
337static void asm_callx(ASMState *as, IRIns *ir) 399static void asm_callx(ASMState *as, IRIns *ir)
338{ 400{
339 IRRef args[CCI_NARGS_MAX*2]; 401 IRRef args[CCI_NARGS_MAX*2];
@@ -346,7 +408,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
346 func = ir->op2; irf = IR(func); 408 func = ir->op2; irf = IR(func);
347 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } 409 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
348 if (irref_isk(func)) { /* Call to constant address. */ 410 if (irref_isk(func)) { /* Call to constant address. */
349 ci.func = (ASMFunction)(void *)(irf->i); 411 ci.func = (ASMFunction)(void *)get_kval(as, func);
350 } else { /* Need specific register for indirect calls. */ 412 } else { /* Need specific register for indirect calls. */
351 Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); 413 Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR));
352 MCode *p = as->mcp; 414 MCode *p = as->mcp;
@@ -361,27 +423,23 @@ static void asm_callx(ASMState *as, IRIns *ir)
361 asm_gencall(as, &ci, args); 423 asm_gencall(as, &ci, args);
362} 424}
363 425
364static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) 426#if !LJ_SOFTFP
365{
366 const CCallInfo *ci = &lj_ir_callinfo[id];
367 IRRef args[2];
368 args[0] = ir->op1;
369 args[1] = ir->op2;
370 asm_setupresult(as, ir, ci);
371 asm_gencall(as, ci, args);
372}
373
374static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) 427static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
375{ 428{
376 /* The modified regs must match with the *.dasc implementation. */ 429 /* The modified regs must match with the *.dasc implementation. */
377 RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| 430 RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)|
378 RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR); 431 RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR)
432#if LJ_TARGET_MIPSR6
433 |RID2RSET(RID_F21)
434#endif
435 ;
379 if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); 436 if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
380 ra_evictset(as, drop); 437 ra_evictset(as, drop);
381 ra_destreg(as, ir, RID_FPRET); 438 ra_destreg(as, ir, RID_FPRET);
382 emit_call(as, (void *)lj_ir_callinfo[id].func); 439 emit_call(as, (void *)lj_ir_callinfo[id].func, 0);
383 ra_leftov(as, REGARG_FIRSTFPR, ir->op1); 440 ra_leftov(as, REGARG_FIRSTFPR, ir->op1);
384} 441}
442#endif
385 443
386/* -- Returns ------------------------------------------------------------- */ 444/* -- Returns ------------------------------------------------------------- */
387 445
@@ -390,25 +448,52 @@ static void asm_retf(ASMState *as, IRIns *ir)
390{ 448{
391 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 449 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
392 void *pc = ir_kptr(IR(ir->op2)); 450 void *pc = ir_kptr(IR(ir->op2));
393 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 451 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
394 as->topslot -= (BCReg)delta; 452 as->topslot -= (BCReg)delta;
395 if ((int32_t)as->topslot < 0) as->topslot = 0; 453 if ((int32_t)as->topslot < 0) as->topslot = 0;
396 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 454 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
397 emit_setgl(as, base, jit_base); 455 emit_setgl(as, base, jit_base);
398 emit_addptr(as, base, -8*delta); 456 emit_addptr(as, base, -8*delta);
399 asm_guard(as, MIPSI_BNE, RID_TMP, 457 asm_guard(as, MIPSI_BNE, RID_TMP,
400 ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); 458 ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base)));
401 emit_tsi(as, MIPSI_LW, RID_TMP, base, LJ_BE ? -8 : -4); 459 emit_tsi(as, MIPSI_AL, RID_TMP, base, (LJ_BE || LJ_FR2) ? -8 : -4);
460}
461
462/* -- Buffer operations --------------------------------------------------- */
463
464#if LJ_HASBUFFER
465static void asm_bufhdr_write(ASMState *as, Reg sb)
466{
467 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
468 IRIns irgc;
469 irgc.ot = IRT(0, IRT_PGC); /* GC type. */
470 emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
471 if ((as->flags & JIT_F_MIPSXXR2)) {
472 emit_tsml(as, LJ_64 ? MIPSI_DINS : MIPSI_INS, RID_TMP, tmp,
473 lj_fls(SBUF_MASK_FLAG), 0);
474 } else {
475 emit_dst(as, MIPSI_OR, RID_TMP, RID_TMP, tmp);
476 emit_tsi(as, MIPSI_ANDI, tmp, tmp, SBUF_MASK_FLAG);
477 }
478 emit_getgl(as, RID_TMP, cur_L);
479 emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
402} 480}
481#endif
403 482
404/* -- Type conversions ---------------------------------------------------- */ 483/* -- Type conversions ---------------------------------------------------- */
405 484
485#if !LJ_SOFTFP
406static void asm_tointg(ASMState *as, IRIns *ir, Reg left) 486static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
407{ 487{
408 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); 488 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
409 Reg dest = ra_dest(as, ir, RSET_GPR); 489 Reg dest = ra_dest(as, ir, RSET_GPR);
490#if !LJ_TARGET_MIPSR6
410 asm_guard(as, MIPSI_BC1F, 0, 0); 491 asm_guard(as, MIPSI_BC1F, 0, 0);
411 emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left); 492 emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left);
493#else
494 asm_guard(as, MIPSI_BC1EQZ, 0, (tmp&31));
495 emit_fgh(as, MIPSI_CMP_EQ_D, tmp, tmp, left);
496#endif
412 emit_fg(as, MIPSI_CVT_D_W, tmp, tmp); 497 emit_fg(as, MIPSI_CVT_D_W, tmp, tmp);
413 emit_tg(as, MIPSI_MFC1, dest, tmp); 498 emit_tg(as, MIPSI_MFC1, dest, tmp);
414 emit_fg(as, MIPSI_CVT_W_D, tmp, left); 499 emit_fg(as, MIPSI_CVT_W_D, tmp, left);
@@ -424,15 +509,57 @@ static void asm_tobit(ASMState *as, IRIns *ir)
424 emit_tg(as, MIPSI_MFC1, dest, tmp); 509 emit_tg(as, MIPSI_MFC1, dest, tmp);
425 emit_fgh(as, MIPSI_ADD_D, tmp, left, right); 510 emit_fgh(as, MIPSI_ADD_D, tmp, left, right);
426} 511}
512#elif LJ_64 /* && LJ_SOFTFP */
513static void asm_tointg(ASMState *as, IRIns *ir, Reg r)
514{
515 /* The modified regs must match with the *.dasc implementation. */
516 RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)|
517 RID2RSET(RID_R1)|RID2RSET(RID_R12);
518 if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
519 ra_evictset(as, drop);
520 /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */
521 ra_destreg(as, ir, RID_RET);
522 asm_guard(as, MIPSI_BNE, RID_RET+1, RID_ZERO);
523 emit_call(as, (void *)lj_ir_callinfo[IRCALL_lj_vm_tointg].func, 0);
524 if (r == RID_NONE)
525 ra_leftov(as, REGARG_FIRSTGPR, ir->op1);
526 else if (r != REGARG_FIRSTGPR)
527 emit_move(as, REGARG_FIRSTGPR, r);
528}
529
530static void asm_tobit(ASMState *as, IRIns *ir)
531{
532 Reg dest = ra_dest(as, ir, RSET_GPR);
533 emit_dta(as, MIPSI_SLL, dest, dest, 0);
534 asm_callid(as, ir, IRCALL_lj_vm_tobit);
535}
536#endif
427 537
428static void asm_conv(ASMState *as, IRIns *ir) 538static void asm_conv(ASMState *as, IRIns *ir)
429{ 539{
430 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 540 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
541#if !LJ_SOFTFP32
431 int stfp = (st == IRT_NUM || st == IRT_FLOAT); 542 int stfp = (st == IRT_NUM || st == IRT_FLOAT);
543#endif
544#if LJ_64
545 int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
546#endif
432 IRRef lref = ir->op1; 547 IRRef lref = ir->op1;
433 lua_assert(irt_type(ir->t) != st); 548#if LJ_32
434 lua_assert(!(irt_isint64(ir->t) || 549 /* 64 bit integer conversions are handled by SPLIT. */
435 (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ 550 lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)),
551 "IR %04d has unsplit 64 bit type",
552 (int)(ir - as->ir) - REF_BIAS);
553#endif
554#if LJ_SOFTFP32
555 /* FP conversions are handled by SPLIT. */
556 lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT),
557 "IR %04d has FP type",
558 (int)(ir - as->ir) - REF_BIAS);
559 /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
560#else
561 lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
562#if !LJ_SOFTFP
436 if (irt_isfp(ir->t)) { 563 if (irt_isfp(ir->t)) {
437 Reg dest = ra_dest(as, ir, RSET_FPR); 564 Reg dest = ra_dest(as, ir, RSET_FPR);
438 if (stfp) { /* FP to FP conversion. */ 565 if (stfp) { /* FP to FP conversion. */
@@ -448,27 +575,56 @@ static void asm_conv(ASMState *as, IRIns *ir)
448 emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); 575 emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp);
449 emit_fg(as, MIPSI_CVT_D_W, dest, dest); 576 emit_fg(as, MIPSI_CVT_D_W, dest, dest);
450 emit_lsptr(as, MIPSI_LDC1, (tmp & 31), 577 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
451 (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), 578 (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
452 RSET_GPR);
453 emit_tg(as, MIPSI_MTC1, RID_TMP, dest); 579 emit_tg(as, MIPSI_MTC1, RID_TMP, dest);
454 emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left); 580 emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left);
455 emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); 581 emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
582#if LJ_64
583 } else if(st == IRT_U64) { /* U64 to FP conversion. */
584 /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */
585 Reg left = ra_alloc1(as, lref, RSET_GPR);
586 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest));
587 MCLabel l_end = emit_label(as);
588 if (irt_isfloat(ir->t)) {
589 emit_fgh(as, MIPSI_ADD_S, dest, dest, tmp);
590 emit_lsptr(as, MIPSI_LWC1, (tmp & 31), (void *)&as->J->k32[LJ_K32_2P63],
591 rset_exclude(RSET_GPR, left));
592 emit_fg(as, MIPSI_CVT_S_L, dest, dest);
593 } else {
594 emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp);
595 emit_lsptr(as, MIPSI_LDC1, (tmp & 31), (void *)&as->J->k64[LJ_K64_2P63],
596 rset_exclude(RSET_GPR, left));
597 emit_fg(as, MIPSI_CVT_D_L, dest, dest);
598 }
599 emit_branch(as, MIPSI_BGEZ, left, RID_ZERO, l_end);
600 emit_tg(as, MIPSI_DMTC1, RID_TMP, dest);
601 emit_tsml(as, MIPSI_DEXTM, RID_TMP, left, 30, 0);
602#endif
456 } else { /* Integer to FP conversion. */ 603 } else { /* Integer to FP conversion. */
457 Reg left = ra_alloc1(as, lref, RSET_GPR); 604 Reg left = ra_alloc1(as, lref, RSET_GPR);
605#if LJ_32
458 emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W, 606 emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W,
459 dest, dest); 607 dest, dest);
460 emit_tg(as, MIPSI_MTC1, left, dest); 608 emit_tg(as, MIPSI_MTC1, left, dest);
609#else
610 MIPSIns mi = irt_isfloat(ir->t) ?
611 (st64 ? MIPSI_CVT_S_L : MIPSI_CVT_S_W) :
612 (st64 ? MIPSI_CVT_D_L : MIPSI_CVT_D_W);
613 emit_fg(as, mi, dest, dest);
614 emit_tg(as, st64 ? MIPSI_DMTC1 : MIPSI_MTC1, left, dest);
615#endif
461 } 616 }
462 } else if (stfp) { /* FP to integer conversion. */ 617 } else if (stfp) { /* FP to integer conversion. */
463 if (irt_isguard(ir->t)) { 618 if (irt_isguard(ir->t)) {
464 /* Checked conversions are only supported from number to int. */ 619 /* Checked conversions are only supported from number to int. */
465 lua_assert(irt_isint(ir->t) && st == IRT_NUM); 620 lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
621 "bad type for checked CONV");
466 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 622 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
467 } else { 623 } else {
468 Reg dest = ra_dest(as, ir, RSET_GPR); 624 Reg dest = ra_dest(as, ir, RSET_GPR);
469 Reg left = ra_alloc1(as, lref, RSET_FPR); 625 Reg left = ra_alloc1(as, lref, RSET_FPR);
470 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); 626 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
471 if (irt_isu32(ir->t)) { 627 if (irt_isu32(ir->t)) { /* FP to U32 conversion. */
472 /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */ 628 /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */
473 emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP); 629 emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP);
474 emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); 630 emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
@@ -479,25 +635,112 @@ static void asm_conv(ASMState *as, IRIns *ir)
479 tmp, left, tmp); 635 tmp, left, tmp);
480 if (st == IRT_FLOAT) 636 if (st == IRT_FLOAT)
481 emit_lsptr(as, MIPSI_LWC1, (tmp & 31), 637 emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
482 (void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)), 638 (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
483 RSET_GPR);
484 else 639 else
485 emit_lsptr(as, MIPSI_LDC1, (tmp & 31), 640 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
486 (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), 641 (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
487 RSET_GPR); 642#if LJ_64
643 } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */
644 MCLabel l_end;
645 emit_tg(as, MIPSI_DMFC1, dest, tmp);
646 l_end = emit_label(as);
647 /* For inputs >= 2^63 add -2^64 and convert again. */
648 if (st == IRT_NUM) {
649 emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp);
650 emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp);
651 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
652 (void *)&as->J->k64[LJ_K64_M2P64],
653 rset_exclude(RSET_GPR, dest));
654 emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */
655#if !LJ_TARGET_MIPSR6
656 emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
657 emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp);
658#else
659 emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end);
660 emit_fgh(as, MIPSI_CMP_LT_D, tmp, left, tmp);
661#endif
662 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
663 (void *)&as->J->k64[LJ_K64_2P63],
664 rset_exclude(RSET_GPR, dest));
665 } else {
666 emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp);
667 emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp);
668 emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
669 (void *)&as->J->k32[LJ_K32_M2P64],
670 rset_exclude(RSET_GPR, dest));
671 emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */
672#if !LJ_TARGET_MIPSR6
673 emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
674 emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp);
675#else
676 emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end);
677 emit_fgh(as, MIPSI_CMP_LT_S, tmp, left, tmp);
678#endif
679 emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
680 (void *)&as->J->k32[LJ_K32_2P63],
681 rset_exclude(RSET_GPR, dest));
682 }
683#endif
488 } else { 684 } else {
685#if LJ_32
489 emit_tg(as, MIPSI_MFC1, dest, tmp); 686 emit_tg(as, MIPSI_MFC1, dest, tmp);
490 emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, 687 emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D,
491 tmp, left); 688 tmp, left);
689#else
690 MIPSIns mi = irt_is64(ir->t) ?
691 (st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) :
692 (st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S);
693 emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, tmp);
694 emit_fg(as, mi, tmp, left);
695#endif
492 } 696 }
493 } 697 }
494 } else { 698 } else
699#else
700 if (irt_isfp(ir->t)) {
701#if LJ_64 && LJ_HASFFI
702 if (stfp) { /* FP to FP conversion. */
703 asm_callid(as, ir, irt_isnum(ir->t) ? IRCALL_softfp_f2d :
704 IRCALL_softfp_d2f);
705 } else { /* Integer to FP conversion. */
706 IRCallID cid = ((IRT_IS64 >> st) & 1) ?
707 (irt_isnum(ir->t) ?
708 (st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d) :
709 (st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f)) :
710 (irt_isnum(ir->t) ?
711 (st == IRT_INT ? IRCALL_softfp_i2d : IRCALL_softfp_ui2d) :
712 (st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f));
713 asm_callid(as, ir, cid);
714 }
715#else
716 asm_callid(as, ir, IRCALL_softfp_i2d);
717#endif
718 } else if (stfp) { /* FP to integer conversion. */
719 if (irt_isguard(ir->t)) {
720 /* Checked conversions are only supported from number to int. */
721 lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
722 "bad type for checked CONV");
723 asm_tointg(as, ir, RID_NONE);
724 } else {
725 IRCallID cid = irt_is64(ir->t) ?
726 ((st == IRT_NUM) ?
727 (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) :
728 (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) :
729 ((st == IRT_NUM) ?
730 (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
731 (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui));
732 asm_callid(as, ir, cid);
733 }
734 } else
735#endif
736#endif
737 {
495 Reg dest = ra_dest(as, ir, RSET_GPR); 738 Reg dest = ra_dest(as, ir, RSET_GPR);
496 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ 739 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
497 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 740 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
498 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); 741 lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
499 if ((ir->op2 & IRCONV_SEXT)) { 742 if ((ir->op2 & IRCONV_SEXT)) {
500 if ((as->flags & JIT_F_MIPS32R2)) { 743 if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) {
501 emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left); 744 emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left);
502 } else { 745 } else {
503 uint32_t shift = st == IRT_I8 ? 24 : 16; 746 uint32_t shift = st == IRT_I8 ? 24 : 16;
@@ -509,94 +752,171 @@ static void asm_conv(ASMState *as, IRIns *ir)
509 (int32_t)(st == IRT_U8 ? 0xff : 0xffff)); 752 (int32_t)(st == IRT_U8 ? 0xff : 0xffff));
510 } 753 }
511 } else { /* 32/64 bit integer conversions. */ 754 } else { /* 32/64 bit integer conversions. */
755#if LJ_32
512 /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */ 756 /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */
513 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ 757 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
758#else
759 if (irt_is64(ir->t)) {
760 if (st64) {
761 /* 64/64 bit no-op (cast)*/
762 ra_leftov(as, dest, lref);
763 } else {
764 Reg left = ra_alloc1(as, lref, RSET_GPR);
765 if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */
766 emit_dta(as, MIPSI_SLL, dest, left, 0);
767 } else { /* 32 to 64 bit zero extension. */
768 emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0);
769 }
770 }
771 } else {
772 if (st64 && !(ir->op2 & IRCONV_NONE)) {
773 /* This is either a 32 bit reg/reg mov which zeroes the hiword
774 ** or a load of the loword from a 64 bit address.
775 */
776 Reg left = ra_alloc1(as, lref, RSET_GPR);
777 emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0);
778 } else { /* 32/32 bit no-op (cast). */
779 /* Do nothing, but may need to move regs. */
780 ra_leftov(as, dest, lref);
781 }
782 }
783#endif
514 } 784 }
515 } 785 }
516} 786}
517 787
518#if LJ_HASFFI
519static void asm_conv64(ASMState *as, IRIns *ir)
520{
521 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
522 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
523 IRCallID id;
524 const CCallInfo *ci;
525 IRRef args[2];
526 args[LJ_BE?0:1] = ir->op1;
527 args[LJ_BE?1:0] = (ir-1)->op1;
528 if (st == IRT_NUM || st == IRT_FLOAT) {
529 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
530 ir--;
531 } else {
532 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
533 }
534 ci = &lj_ir_callinfo[id];
535 asm_setupresult(as, ir, ci);
536 asm_gencall(as, ci, args);
537}
538#endif
539
540static void asm_strto(ASMState *as, IRIns *ir) 788static void asm_strto(ASMState *as, IRIns *ir)
541{ 789{
542 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 790 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
543 IRRef args[2]; 791 IRRef args[2];
792 int32_t ofs = 0;
793#if LJ_SOFTFP32
794 ra_evictset(as, RSET_SCRATCH);
795 if (ra_used(ir)) {
796 if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
797 (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) {
798 int i;
799 for (i = 0; i < 2; i++) {
800 Reg r = (ir+i)->r;
801 if (ra_hasreg(r)) {
802 ra_free(as, r);
803 ra_modified(as, r);
804 emit_spload(as, ir+i, r, sps_scale((ir+i)->s));
805 }
806 }
807 ofs = sps_scale(ir->s & ~1);
808 } else {
809 Reg rhi = ra_dest(as, ir+1, RSET_GPR);
810 Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
811 emit_tsi(as, MIPSI_LW, rhi, RID_SP, ofs+(LJ_BE?0:4));
812 emit_tsi(as, MIPSI_LW, rlo, RID_SP, ofs+(LJ_BE?4:0));
813 }
814 }
815#else
544 RegSet drop = RSET_SCRATCH; 816 RegSet drop = RSET_SCRATCH;
545 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ 817 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
546 ra_evictset(as, drop); 818 ra_evictset(as, drop);
819 ofs = sps_scale(ir->s);
820#endif
547 asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ 821 asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); /* Test return status. */
548 args[0] = ir->op1; /* GCstr *str */ 822 args[0] = ir->op1; /* GCstr *str */
549 args[1] = ASMREF_TMP1; /* TValue *n */ 823 args[1] = ASMREF_TMP1; /* TValue *n */
550 asm_gencall(as, ci, args); 824 asm_gencall(as, ci, args);
551 /* Store the result to the spill slot or temp slots. */ 825 /* Store the result to the spill slot or temp slots. */
552 emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), 826 emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1),
553 RID_SP, sps_scale(ir->s)); 827 RID_SP, ofs);
554} 828}
555 829
556/* Get pointer to TValue. */ 830/* -- Memory references --------------------------------------------------- */
557static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 831
832#if LJ_64
833/* Store tagged value for ref at base+ofs. */
834static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
558{ 835{
836 RegSet allow = rset_exclude(RSET_GPR, base);
559 IRIns *ir = IR(ref); 837 IRIns *ir = IR(ref);
560 if (irt_isnum(ir->t)) { 838 lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
561 if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ 839 "store of IR type %d", irt_type(ir->t));
562 ra_allockreg(as, i32ptr(ir_knum(ir)), dest); 840 if (irref_isk(ref)) {
563 else /* Otherwise force a spill and use the spill slot. */ 841 TValue k;
564 emit_tsi(as, MIPSI_ADDIU, dest, RID_SP, ra_spill(as, ir)); 842 lj_ir_kvalue(as->J->L, &k, ir);
843 emit_tsi(as, MIPSI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs);
565 } else { 844 } else {
566 /* Otherwise use g->tmptv to hold the TValue. */ 845 Reg src = ra_alloc1(as, ref, allow);
567 RegSet allow = rset_exclude(RSET_GPR, dest); 846 Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47,
568 Reg type; 847 rset_exclude(allow, src));
569 emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, offsetof(global_State, tmptv)-32768); 848 emit_tsi(as, MIPSI_SD, RID_TMP, base, ofs);
570 if (!irt_ispri(ir->t)) { 849 if (irt_isinteger(ir->t)) {
571 Reg src = ra_alloc1(as, ref, allow); 850 emit_dst(as, MIPSI_DADDU, RID_TMP, RID_TMP, type);
572 emit_setgl(as, src, tmptv.gcr); 851 emit_tsml(as, MIPSI_DEXT, RID_TMP, src, 31, 0);
852 } else {
853 emit_dst(as, MIPSI_DADDU, RID_TMP, src, type);
573 } 854 }
574 type = ra_allock(as, irt_toitype(ir->t), allow);
575 emit_setgl(as, type, tmptv.it);
576 } 855 }
577} 856}
857#endif
578 858
579static void asm_tostr(ASMState *as, IRIns *ir) 859/* Get pointer to TValue. */
860static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
580{ 861{
581 IRRef args[2]; 862 int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768);
582 args[0] = ASMREF_L; 863 if ((mode & IRTMPREF_IN1)) {
583 as->gcsteps++; 864 IRIns *ir = IR(ref);
584 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) { 865 if (irt_isnum(ir->t)) {
585 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; 866 if ((mode & IRTMPREF_OUT1)) {
586 args[1] = ASMREF_TMP1; /* const lua_Number * */ 867#if LJ_SOFTFP
587 asm_setupresult(as, ir, ci); /* GCstr * */ 868 emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
588 asm_gencall(as, ci, args); 869#if LJ_64
589 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); 870 emit_setgl(as, ra_alloc1(as, ref, RSET_GPR), tmptv.u64);
871#else
872 lj_assertA(irref_isk(ref), "unsplit FP op");
873 emit_setgl(as,
874 ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
875 tmptv.u32.lo);
876 emit_setgl(as,
877 ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
878 tmptv.u32.hi);
879#endif
880#else
881 Reg src = ra_alloc1(as, ref, RSET_FPR);
882 emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
883 emit_tsi(as, MIPSI_SDC1, (src & 31), RID_JGL, tmpofs);
884#endif
885 } else if (irref_isk(ref)) {
886 /* Use the number constant itself as a TValue. */
887 ra_allockreg(as, igcptr(ir_knum(ir)), dest);
888 } else {
889#if LJ_SOFTFP32
890 lj_assertA(0, "unsplit FP op");
891#else
892 /* Otherwise force a spill and use the spill slot. */
893 emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir));
894#endif
895 }
896 } else {
897 /* Otherwise use g->tmptv to hold the TValue. */
898#if LJ_32
899 Reg type;
900 emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, tmpofs);
901 if (!irt_ispri(ir->t)) {
902 Reg src = ra_alloc1(as, ref, RSET_GPR);
903 emit_setgl(as, src, tmptv.gcr);
904 }
905 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
906 type = ra_alloc1(as, ref+1, RSET_GPR);
907 else
908 type = ra_allock(as, (int32_t)irt_toitype(ir->t), RSET_GPR);
909 emit_setgl(as, type, tmptv.it);
910#else
911 asm_tvstore64(as, dest, 0, ref);
912 emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL, tmpofs);
913#endif
914 }
590 } else { 915 } else {
591 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; 916 emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
592 args[1] = ir->op1; /* int32_t k */
593 asm_setupresult(as, ir, ci); /* GCstr * */
594 asm_gencall(as, ci, args);
595 } 917 }
596} 918}
597 919
598/* -- Memory references --------------------------------------------------- */
599
600static void asm_aref(ASMState *as, IRIns *ir) 920static void asm_aref(ASMState *as, IRIns *ir)
601{ 921{
602 Reg dest = ra_dest(as, ir, RSET_GPR); 922 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -608,14 +928,18 @@ static void asm_aref(ASMState *as, IRIns *ir)
608 ofs += 8*IR(ir->op2)->i; 928 ofs += 8*IR(ir->op2)->i;
609 if (checki16(ofs)) { 929 if (checki16(ofs)) {
610 base = ra_alloc1(as, refa, RSET_GPR); 930 base = ra_alloc1(as, refa, RSET_GPR);
611 emit_tsi(as, MIPSI_ADDIU, dest, base, ofs); 931 emit_tsi(as, MIPSI_AADDIU, dest, base, ofs);
612 return; 932 return;
613 } 933 }
614 } 934 }
615 base = ra_alloc1(as, ir->op1, RSET_GPR); 935 base = ra_alloc1(as, ir->op1, RSET_GPR);
616 idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); 936 idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
617 emit_dst(as, MIPSI_ADDU, dest, RID_TMP, base); 937#if !LJ_TARGET_MIPSR6
938 emit_dst(as, MIPSI_AADDU, dest, RID_TMP, base);
618 emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3); 939 emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3);
940#else
941 emit_dst(as, MIPSI_ALSA | MIPSF_A(3-1), dest, idx, base);
942#endif
619} 943}
620 944
621/* Inlined hash lookup. Specialized for key type and for const keys. 945/* Inlined hash lookup. Specialized for key type and for const keys.
@@ -626,21 +950,25 @@ static void asm_aref(ASMState *as, IRIns *ir)
626** } while ((n = nextnode(n))); 950** } while ((n = nextnode(n)));
627** return niltv(L); 951** return niltv(L);
628*/ 952*/
629static void asm_href(ASMState *as, IRIns *ir) 953static void asm_href(ASMState *as, IRIns *ir, IROp merge)
630{ 954{
631 RegSet allow = RSET_GPR; 955 RegSet allow = RSET_GPR;
632 int destused = ra_used(ir); 956 int destused = ra_used(ir);
633 Reg dest = ra_dest(as, ir, allow); 957 Reg dest = ra_dest(as, ir, allow);
634 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); 958 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
635 Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; 959 Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2;
960#if LJ_64
961 Reg cmp64 = RID_NONE;
962#endif
636 IRRef refkey = ir->op2; 963 IRRef refkey = ir->op2;
637 IRIns *irkey = IR(refkey); 964 IRIns *irkey = IR(refkey);
965 int isk = irref_isk(refkey);
638 IRType1 kt = irkey->t; 966 IRType1 kt = irkey->t;
639 uint32_t khash; 967 uint32_t khash;
640 MCLabel l_end, l_loop, l_next; 968 MCLabel l_end, l_loop, l_next;
641 969
642 rset_clear(allow, tab); 970 rset_clear(allow, tab);
643 if (irt_isnum(kt)) { 971 if (!LJ_SOFTFP && irt_isnum(kt)) {
644 key = ra_alloc1(as, refkey, RSET_FPR); 972 key = ra_alloc1(as, refkey, RSET_FPR);
645 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); 973 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
646 } else { 974 } else {
@@ -648,31 +976,76 @@ static void asm_href(ASMState *as, IRIns *ir)
648 key = ra_alloc1(as, refkey, allow); 976 key = ra_alloc1(as, refkey, allow);
649 rset_clear(allow, key); 977 rset_clear(allow, key);
650 } 978 }
651 type = ra_allock(as, irt_toitype(irkey->t), allow); 979#if LJ_32
652 rset_clear(allow, type); 980 if (LJ_SOFTFP && irkey[1].o == IR_HIOP) {
981 if (ra_hasreg((irkey+1)->r)) {
982 type = tmpnum = (irkey+1)->r;
983 tmp1 = ra_scratch(as, allow);
984 rset_clear(allow, tmp1);
985 ra_noweak(as, tmpnum);
986 } else {
987 type = tmpnum = ra_allocref(as, refkey+1, allow);
988 }
989 rset_clear(allow, tmpnum);
990 } else {
991 type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
992 rset_clear(allow, type);
993 }
994#endif
653 } 995 }
654 tmp2 = ra_scratch(as, allow); 996 tmp2 = ra_scratch(as, allow);
655 rset_clear(allow, tmp2); 997 rset_clear(allow, tmp2);
998#if LJ_64
999 if (LJ_SOFTFP || !irt_isnum(kt)) {
1000 /* Allocate cmp64 register used for 64-bit comparisons */
1001 if (LJ_SOFTFP && irt_isnum(kt)) {
1002 cmp64 = key;
1003 } else if (!isk && irt_isaddr(kt)) {
1004 cmp64 = tmp2;
1005 } else {
1006 int64_t k;
1007 if (isk && irt_isaddr(kt)) {
1008 k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
1009 } else {
1010 lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
1011 k = ~((int64_t)~irt_toitype(kt) << 47);
1012 }
1013 cmp64 = ra_allock(as, k, allow);
1014 rset_clear(allow, cmp64);
1015 }
1016 }
1017#endif
656 1018
657 /* Key not found in chain: load niltv. */ 1019 /* Key not found in chain: jump to exit (if merged) or load niltv. */
658 l_end = emit_label(as); 1020 l_end = emit_label(as);
659 if (destused) 1021 as->invmcp = NULL;
1022 if (merge == IR_NE)
1023 asm_guard(as, MIPSI_B, RID_ZERO, RID_ZERO);
1024 else if (destused)
660 emit_loada(as, dest, niltvg(J2G(as->J))); 1025 emit_loada(as, dest, niltvg(J2G(as->J)));
661 else
662 *--as->mcp = MIPSI_NOP;
663 /* Follow hash chain until the end. */ 1026 /* Follow hash chain until the end. */
664 emit_move(as, dest, tmp1); 1027 emit_move(as, dest, tmp1);
665 l_loop = --as->mcp; 1028 l_loop = --as->mcp;
666 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, next)); 1029 emit_tsi(as, MIPSI_AL, tmp1, dest, (int32_t)offsetof(Node, next));
667 l_next = emit_label(as); 1030 l_next = emit_label(as);
668 1031
669 /* Type and value comparison. */ 1032 /* Type and value comparison. */
670 if (irt_isnum(kt)) { 1033 if (merge == IR_EQ) { /* Must match asm_guard(). */
1034 emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
1035 l_end = asm_exitstub_addr(as);
1036 }
1037 if (!LJ_SOFTFP && irt_isnum(kt)) {
1038#if !LJ_TARGET_MIPSR6
671 emit_branch(as, MIPSI_BC1T, 0, 0, l_end); 1039 emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
672 emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); 1040 emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key);
673 emit_tg(as, MIPSI_MFC1, tmp1, key+1); 1041#else
1042 emit_branch(as, MIPSI_BC1NEZ, 0, (tmpnum&31), l_end);
1043 emit_fgh(as, MIPSI_CMP_EQ_D, tmpnum, tmpnum, key);
1044#endif
1045 *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */
674 emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next); 1046 emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next);
675 emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM); 1047 emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM);
1048#if LJ_32
676 emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n)); 1049 emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n));
677 } else { 1050 } else {
678 if (irt_ispri(kt)) { 1051 if (irt_ispri(kt)) {
@@ -685,36 +1058,52 @@ static void asm_href(ASMState *as, IRIns *ir)
685 } 1058 }
686 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it)); 1059 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it));
687 *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); 1060 *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu);
1061#else
1062 emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15);
1063 emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum);
1064 emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
1065 } else {
1066 emit_branch(as, MIPSI_BEQ, tmp1, cmp64, l_end);
1067 emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
1068 }
1069 *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu);
1070 if (!isk && irt_isaddr(kt)) {
1071 type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow);
1072 emit_dst(as, MIPSI_DADDU, tmp2, key, type);
1073 rset_clear(allow, type);
1074 }
1075#endif
688 1076
689 /* Load main position relative to tab->node into dest. */ 1077 /* Load main position relative to tab->node into dest. */
690 khash = irref_isk(refkey) ? ir_khash(irkey) : 1; 1078 khash = isk ? ir_khash(as, irkey) : 1;
691 if (khash == 0) { 1079 if (khash == 0) {
692 emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node)); 1080 emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node));
693 } else { 1081 } else {
694 Reg tmphash = tmp1; 1082 Reg tmphash = tmp1;
695 if (irref_isk(refkey)) 1083 if (isk)
696 tmphash = ra_allock(as, khash, allow); 1084 tmphash = ra_allock(as, khash, allow);
697 emit_dst(as, MIPSI_ADDU, dest, dest, tmp1); 1085 emit_dst(as, MIPSI_AADDU, dest, dest, tmp1);
698 lua_assert(sizeof(Node) == 24); 1086 lj_assertA(sizeof(Node) == 24, "bad Node size");
699 emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1); 1087 emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1);
700 emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3); 1088 emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3);
701 emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5); 1089 emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5);
702 emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash); 1090 emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash);
703 emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node)); 1091 emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node));
704 emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); 1092 emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
705 if (irref_isk(refkey)) { 1093 if (isk) {
706 /* Nothing to do. */ 1094 /* Nothing to do. */
707 } else if (irt_isstr(kt)) { 1095 } else if (irt_isstr(kt)) {
708 emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash)); 1096 emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, sid));
709 } else { /* Must match with hash*() in lj_tab.c. */ 1097 } else { /* Must match with hash*() in lj_tab.c. */
710 emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2); 1098 emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2);
711 emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31); 1099 emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31);
712 emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2); 1100 emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2);
713 emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31); 1101 emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31);
714 emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest); 1102 emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest);
715 if (irt_isnum(kt)) { 1103#if LJ_32
1104 if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
716 emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); 1105 emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1);
717 if ((as->flags & JIT_F_MIPS32R2)) { 1106 if ((as->flags & JIT_F_MIPSXXR2)) {
718 emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); 1107 emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31);
719 } else { 1108 } else {
720 emit_dst(as, MIPSI_OR, dest, dest, tmp1); 1109 emit_dst(as, MIPSI_OR, dest, dest, tmp1);
@@ -722,13 +1111,35 @@ static void asm_href(ASMState *as, IRIns *ir)
722 emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31); 1111 emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31);
723 } 1112 }
724 emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); 1113 emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1);
1114#if LJ_SOFTFP
1115 emit_ds(as, MIPSI_MOVE, tmp1, type);
1116 emit_ds(as, MIPSI_MOVE, tmp2, key);
1117#else
725 emit_tg(as, MIPSI_MFC1, tmp2, key); 1118 emit_tg(as, MIPSI_MFC1, tmp2, key);
726 emit_tg(as, MIPSI_MFC1, tmp1, key+1); 1119 emit_tg(as, MIPSI_MFC1, tmp1, key+1);
1120#endif
727 } else { 1121 } else {
728 emit_dst(as, MIPSI_XOR, tmp2, key, tmp1); 1122 emit_dst(as, MIPSI_XOR, tmp2, key, tmp1);
729 emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31); 1123 emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31);
730 emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow)); 1124 emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow));
731 } 1125 }
1126#else
1127 emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1);
1128 emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31);
1129 if (irt_isnum(kt)) {
1130 emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1);
1131 emit_dta(as, MIPSI_DSRA32, tmp1, LJ_SOFTFP ? key : tmp1, 0);
1132 emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0);
1133#if !LJ_SOFTFP
1134 emit_tg(as, MIPSI_DMFC1, tmp1, key);
1135#endif
1136 } else {
1137 checkmclim(as);
1138 emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0);
1139 emit_dta(as, MIPSI_SLL, tmp2, key, 0);
1140 emit_dst(as, MIPSI_DADDU, tmp1, key, type);
1141 }
1142#endif
732 } 1143 }
733 } 1144 }
734} 1145}
@@ -741,17 +1152,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
741 int32_t kofs = ofs + (int32_t)offsetof(Node, key); 1152 int32_t kofs = ofs + (int32_t)offsetof(Node, key);
742 Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; 1153 Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
743 Reg node = ra_alloc1(as, ir->op1, RSET_GPR); 1154 Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
744 Reg key = RID_NONE, type = RID_TMP, idx = node;
745 RegSet allow = rset_exclude(RSET_GPR, node); 1155 RegSet allow = rset_exclude(RSET_GPR, node);
1156 Reg idx = node;
1157#if LJ_32
1158 Reg key = RID_NONE, type = RID_TMP;
746 int32_t lo, hi; 1159 int32_t lo, hi;
747 lua_assert(ofs % sizeof(Node) == 0); 1160#else
1161 Reg key = ra_scratch(as, allow);
1162 int64_t k;
1163#endif
1164 lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
748 if (ofs > 32736) { 1165 if (ofs > 32736) {
749 idx = dest; 1166 idx = dest;
750 rset_clear(allow, dest); 1167 rset_clear(allow, dest);
751 kofs = (int32_t)offsetof(Node, key); 1168 kofs = (int32_t)offsetof(Node, key);
752 } else if (ra_hasreg(dest)) { 1169 } else if (ra_hasreg(dest)) {
753 emit_tsi(as, MIPSI_ADDIU, dest, node, ofs); 1170 emit_tsi(as, MIPSI_AADDIU, dest, node, ofs);
754 } 1171 }
1172#if LJ_32
755 if (!irt_ispri(irkey->t)) { 1173 if (!irt_ispri(irkey->t)) {
756 key = ra_scratch(as, allow); 1174 key = ra_scratch(as, allow);
757 rset_clear(allow, key); 1175 rset_clear(allow, key);
@@ -770,54 +1188,60 @@ nolo:
770 asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO); 1188 asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO);
771 if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0)); 1189 if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0));
772 emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4)); 1190 emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4));
773 if (ofs > 32736) 1191#else
774 emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow)); 1192 if (irt_ispri(irkey->t)) {
775} 1193 lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type");
776 1194 k = ~((int64_t)~irt_toitype(irkey->t) << 47);
777static void asm_newref(ASMState *as, IRIns *ir) 1195 } else if (irt_isnum(irkey->t)) {
778{ 1196 k = (int64_t)ir_knum(irkey)->u64;
779 if (ir->r != RID_SINK) { 1197 } else {
780 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; 1198 k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey);
781 IRRef args[3];
782 args[0] = ASMREF_L; /* lua_State *L */
783 args[1] = ir->op1; /* GCtab *t */
784 args[2] = ASMREF_TMP1; /* cTValue *key */
785 asm_setupresult(as, ir, ci); /* TValue * */
786 asm_gencall(as, ci, args);
787 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
788 } 1199 }
1200 asm_guard(as, MIPSI_BNE, key, ra_allock(as, k, allow));
1201 emit_tsi(as, MIPSI_LD, key, idx, kofs);
1202#endif
1203 if (ofs > 32736)
1204 emit_tsi(as, MIPSI_AADDU, dest, node, ra_allock(as, ofs, allow));
789} 1205}
790 1206
791static void asm_uref(ASMState *as, IRIns *ir) 1207static void asm_uref(ASMState *as, IRIns *ir)
792{ 1208{
793 Reg dest = ra_dest(as, ir, RSET_GPR); 1209 Reg dest = ra_dest(as, ir, RSET_GPR);
794 if (irref_isk(ir->op1)) { 1210 int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
1211 if (irref_isk(ir->op1) && !guarded) {
795 GCfunc *fn = ir_kfunc(IR(ir->op1)); 1212 GCfunc *fn = ir_kfunc(IR(ir->op1));
796 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; 1213 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
797 emit_lsptr(as, MIPSI_LW, dest, v, RSET_GPR); 1214 emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR);
798 } else { 1215 } else {
799 Reg uv = ra_scratch(as, RSET_GPR); 1216 if (guarded)
800 Reg func = ra_alloc1(as, ir->op1, RSET_GPR); 1217 asm_guard(as, ir->o == IR_UREFC ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO);
801 if (ir->o == IR_UREFC) { 1218 if (ir->o == IR_UREFC)
802 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); 1219 emit_tsi(as, MIPSI_AADDIU, dest, dest, (int32_t)offsetof(GCupval, tv));
803 emit_tsi(as, MIPSI_ADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); 1220 else
804 emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); 1221 emit_tsi(as, MIPSI_AL, dest, dest, (int32_t)offsetof(GCupval, v));
1222 if (guarded)
1223 emit_tsi(as, MIPSI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
1224 if (irref_isk(ir->op1)) {
1225 GCfunc *fn = ir_kfunc(IR(ir->op1));
1226 GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
1227 emit_loada(as, dest, o);
805 } else { 1228 } else {
806 emit_tsi(as, MIPSI_LW, dest, uv, (int32_t)offsetof(GCupval, v)); 1229 emit_tsi(as, MIPSI_AL, dest, ra_alloc1(as, ir->op1, RSET_GPR),
1230 (int32_t)offsetof(GCfuncL, uvptr) +
1231 (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
807 } 1232 }
808 emit_tsi(as, MIPSI_LW, uv, func,
809 (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
810 } 1233 }
811} 1234}
812 1235
813static void asm_fref(ASMState *as, IRIns *ir) 1236static void asm_fref(ASMState *as, IRIns *ir)
814{ 1237{
815 UNUSED(as); UNUSED(ir); 1238 UNUSED(as); UNUSED(ir);
816 lua_assert(!ra_used(ir)); 1239 lj_assertA(!ra_used(ir), "unfused FREF");
817} 1240}
818 1241
819static void asm_strref(ASMState *as, IRIns *ir) 1242static void asm_strref(ASMState *as, IRIns *ir)
820{ 1243{
1244#if LJ_32
821 Reg dest = ra_dest(as, ir, RSET_GPR); 1245 Reg dest = ra_dest(as, ir, RSET_GPR);
822 IRRef ref = ir->op2, refk = ir->op1; 1246 IRRef ref = ir->op2, refk = ir->op1;
823 int32_t ofs = (int32_t)sizeof(GCstr); 1247 int32_t ofs = (int32_t)sizeof(GCstr);
@@ -849,49 +1273,79 @@ static void asm_strref(ASMState *as, IRIns *ir)
849 else 1273 else
850 emit_dst(as, MIPSI_ADDU, dest, r, 1274 emit_dst(as, MIPSI_ADDU, dest, r,
851 ra_allock(as, ofs, rset_exclude(RSET_GPR, r))); 1275 ra_allock(as, ofs, rset_exclude(RSET_GPR, r)));
1276#else
1277 RegSet allow = RSET_GPR;
1278 Reg dest = ra_dest(as, ir, allow);
1279 Reg base = ra_alloc1(as, ir->op1, allow);
1280 IRIns *irr = IR(ir->op2);
1281 int32_t ofs = sizeof(GCstr);
1282 rset_clear(allow, base);
1283 if (irref_isk(ir->op2) && checki16(ofs + irr->i)) {
1284 emit_tsi(as, MIPSI_DADDIU, dest, base, ofs + irr->i);
1285 } else {
1286 emit_tsi(as, MIPSI_DADDIU, dest, dest, ofs);
1287 emit_dst(as, MIPSI_DADDU, dest, base, ra_alloc1(as, ir->op2, allow));
1288 }
1289#endif
852} 1290}
853 1291
854/* -- Loads and stores ---------------------------------------------------- */ 1292/* -- Loads and stores ---------------------------------------------------- */
855 1293
856static MIPSIns asm_fxloadins(IRIns *ir) 1294static MIPSIns asm_fxloadins(ASMState *as, IRIns *ir)
857{ 1295{
1296 UNUSED(as);
858 switch (irt_type(ir->t)) { 1297 switch (irt_type(ir->t)) {
859 case IRT_I8: return MIPSI_LB; 1298 case IRT_I8: return MIPSI_LB;
860 case IRT_U8: return MIPSI_LBU; 1299 case IRT_U8: return MIPSI_LBU;
861 case IRT_I16: return MIPSI_LH; 1300 case IRT_I16: return MIPSI_LH;
862 case IRT_U16: return MIPSI_LHU; 1301 case IRT_U16: return MIPSI_LHU;
863 case IRT_NUM: return MIPSI_LDC1; 1302 case IRT_NUM:
864 case IRT_FLOAT: return MIPSI_LWC1; 1303 lj_assertA(!LJ_SOFTFP32, "unsplit FP op");
865 default: return MIPSI_LW; 1304 if (!LJ_SOFTFP) return MIPSI_LDC1;
1305 /* fallthrough */
1306 case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1;
1307 /* fallthrough */
1308 default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW;
866 } 1309 }
867} 1310}
868 1311
869static MIPSIns asm_fxstoreins(IRIns *ir) 1312static MIPSIns asm_fxstoreins(ASMState *as, IRIns *ir)
870{ 1313{
1314 UNUSED(as);
871 switch (irt_type(ir->t)) { 1315 switch (irt_type(ir->t)) {
872 case IRT_I8: case IRT_U8: return MIPSI_SB; 1316 case IRT_I8: case IRT_U8: return MIPSI_SB;
873 case IRT_I16: case IRT_U16: return MIPSI_SH; 1317 case IRT_I16: case IRT_U16: return MIPSI_SH;
874 case IRT_NUM: return MIPSI_SDC1; 1318 case IRT_NUM:
875 case IRT_FLOAT: return MIPSI_SWC1; 1319 lj_assertA(!LJ_SOFTFP32, "unsplit FP op");
876 default: return MIPSI_SW; 1320 if (!LJ_SOFTFP) return MIPSI_SDC1;
1321 /* fallthrough */
1322 case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1;
1323 /* fallthrough */
1324 default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW;
877 } 1325 }
878} 1326}
879 1327
880static void asm_fload(ASMState *as, IRIns *ir) 1328static void asm_fload(ASMState *as, IRIns *ir)
881{ 1329{
882 Reg dest = ra_dest(as, ir, RSET_GPR); 1330 Reg dest = ra_dest(as, ir, RSET_GPR);
883 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); 1331 MIPSIns mi = asm_fxloadins(as, ir);
884 MIPSIns mi = asm_fxloadins(ir); 1332 Reg idx;
885 int32_t ofs; 1333 int32_t ofs;
886 if (ir->op2 == IRFL_TAB_ARRAY) { 1334 if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
887 ofs = asm_fuseabase(as, ir->op1); 1335 idx = RID_JGL;
888 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ 1336 ofs = (ir->op2 << 2) - 32768 - GG_OFS(g);
889 emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs); 1337 } else {
890 return; 1338 idx = ra_alloc1(as, ir->op1, RSET_GPR);
1339 if (ir->op2 == IRFL_TAB_ARRAY) {
1340 ofs = asm_fuseabase(as, ir->op1);
1341 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
1342 emit_tsi(as, MIPSI_AADDIU, dest, idx, ofs);
1343 return;
1344 }
891 } 1345 }
1346 ofs = field_ofs[ir->op2];
1347 lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD");
892 } 1348 }
893 ofs = field_ofs[ir->op2];
894 lua_assert(!irt_isfp(ir->t));
895 emit_tsi(as, mi, dest, idx, ofs); 1349 emit_tsi(as, mi, dest, idx, ofs);
896} 1350}
897 1351
@@ -902,51 +1356,90 @@ static void asm_fstore(ASMState *as, IRIns *ir)
902 IRIns *irf = IR(ir->op1); 1356 IRIns *irf = IR(ir->op1);
903 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); 1357 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
904 int32_t ofs = field_ofs[irf->op2]; 1358 int32_t ofs = field_ofs[irf->op2];
905 MIPSIns mi = asm_fxstoreins(ir); 1359 MIPSIns mi = asm_fxstoreins(as, ir);
906 lua_assert(!irt_isfp(ir->t)); 1360 lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE");
907 emit_tsi(as, mi, src, idx, ofs); 1361 emit_tsi(as, mi, src, idx, ofs);
908 } 1362 }
909} 1363}
910 1364
911static void asm_xload(ASMState *as, IRIns *ir) 1365static void asm_xload(ASMState *as, IRIns *ir)
912{ 1366{
913 Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 1367 Reg dest = ra_dest(as, ir,
914 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); 1368 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
915 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 1369 lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED),
1370 "unaligned XLOAD");
1371 asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0);
916} 1372}
917 1373
918static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 1374static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
919{ 1375{
920 if (ir->r != RID_SINK) { 1376 if (ir->r != RID_SINK) {
921 Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 1377 Reg src = ra_alloc1z(as, ir->op2,
922 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, 1378 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
1379 asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1,
923 rset_exclude(RSET_GPR, src), ofs); 1380 rset_exclude(RSET_GPR, src), ofs);
924 } 1381 }
925} 1382}
926 1383
1384#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
1385
927static void asm_ahuvload(ASMState *as, IRIns *ir) 1386static void asm_ahuvload(ASMState *as, IRIns *ir)
928{ 1387{
929 IRType1 t = ir->t; 1388 int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP);
930 Reg dest = RID_NONE, type = RID_TMP, idx; 1389 Reg dest = RID_NONE, type = RID_TMP, idx;
931 RegSet allow = RSET_GPR; 1390 RegSet allow = RSET_GPR;
932 int32_t ofs = 0; 1391 int32_t ofs = 0;
1392 IRType1 t = ir->t;
1393 if (hiop) {
1394 t.irt = IRT_NUM;
1395 if (ra_used(ir+1)) {
1396 type = ra_dest(as, ir+1, allow);
1397 rset_clear(allow, type);
1398 }
1399 }
933 if (ra_used(ir)) { 1400 if (ra_used(ir)) {
934 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1401 lj_assertA((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) ||
935 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); 1402 irt_isint(ir->t) || irt_isaddr(ir->t),
1403 "bad load type %d", irt_type(ir->t));
1404 dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
936 rset_clear(allow, dest); 1405 rset_clear(allow, dest);
1406#if LJ_64
1407 if (irt_isaddr(t))
1408 emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0);
1409 else if (irt_isint(t))
1410 emit_dta(as, MIPSI_SLL, dest, dest, 0);
1411#endif
937 } 1412 }
938 idx = asm_fuseahuref(as, ir->op1, &ofs, allow); 1413 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
1414 if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
939 rset_clear(allow, idx); 1415 rset_clear(allow, idx);
940 if (irt_isnum(t)) { 1416 if (irt_isnum(t)) {
941 asm_guard(as, MIPSI_BEQ, type, RID_ZERO); 1417 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
942 emit_tsi(as, MIPSI_SLTIU, type, type, (int32_t)LJ_TISNUM); 1418 emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM);
943 if (ra_hasreg(dest))
944 emit_hsi(as, MIPSI_LDC1, dest, idx, ofs);
945 } else { 1419 } else {
946 asm_guard(as, MIPSI_BNE, type, ra_allock(as, irt_toitype(t), allow)); 1420 asm_guard(as, MIPSI_BNE, type,
947 if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0)); 1421 ra_allock(as, (int32_t)irt_toitype(t), allow));
1422 }
1423#if LJ_32
1424 if (ra_hasreg(dest)) {
1425 if (!LJ_SOFTFP && irt_isnum(t))
1426 emit_hsi(as, MIPSI_LDC1, dest, idx, ofs);
1427 else
1428 emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0));
948 } 1429 }
949 emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4)); 1430 emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4));
1431#else
1432 if (ra_hasreg(dest)) {
1433 if (!LJ_SOFTFP && irt_isnum(t)) {
1434 emit_hsi(as, MIPSI_LDC1, dest, idx, ofs);
1435 dest = type;
1436 }
1437 } else {
1438 dest = type;
1439 }
1440 emit_dta(as, MIPSI_DSRA32, type, dest, 15);
1441 emit_tsi(as, MIPSI_LD, dest, idx, ofs);
1442#endif
950} 1443}
951 1444
952static void asm_ahustore(ASMState *as, IRIns *ir) 1445static void asm_ahustore(ASMState *as, IRIns *ir)
@@ -956,81 +1449,184 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
956 int32_t ofs = 0; 1449 int32_t ofs = 0;
957 if (ir->r == RID_SINK) 1450 if (ir->r == RID_SINK)
958 return; 1451 return;
959 if (irt_isnum(ir->t)) { 1452 if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
960 src = ra_alloc1(as, ir->op2, RSET_FPR); 1453 src = ra_alloc1(as, ir->op2, LJ_SOFTFP ? RSET_GPR : RSET_FPR);
1454 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
1455 emit_hsi(as, LJ_SOFTFP ? MIPSI_SD : MIPSI_SDC1, src, idx, ofs);
961 } else { 1456 } else {
1457#if LJ_32
962 if (!irt_ispri(ir->t)) { 1458 if (!irt_ispri(ir->t)) {
963 src = ra_alloc1(as, ir->op2, allow); 1459 src = ra_alloc1(as, ir->op2, allow);
964 rset_clear(allow, src); 1460 rset_clear(allow, src);
965 } 1461 }
966 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 1462 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
1463 type = ra_alloc1(as, (ir+1)->op2, allow);
1464 else
1465 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
967 rset_clear(allow, type); 1466 rset_clear(allow, type);
968 } 1467 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
969 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
970 if (irt_isnum(ir->t)) {
971 emit_hsi(as, MIPSI_SDC1, src, idx, ofs);
972 } else {
973 if (ra_hasreg(src)) 1468 if (ra_hasreg(src))
974 emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0)); 1469 emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0));
975 emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4)); 1470 emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4));
1471#else
1472 Reg tmp = RID_TMP;
1473 if (irt_ispri(ir->t)) {
1474 tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
1475 rset_clear(allow, tmp);
1476 } else {
1477 src = ra_alloc1(as, ir->op2, allow);
1478 rset_clear(allow, src);
1479 type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
1480 rset_clear(allow, type);
1481 }
1482 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
1483 emit_tsi(as, MIPSI_SD, tmp, idx, ofs);
1484 if (ra_hasreg(src)) {
1485 if (irt_isinteger(ir->t)) {
1486 emit_dst(as, MIPSI_DADDU, tmp, tmp, type);
1487 emit_tsml(as, MIPSI_DEXT, tmp, src, 31, 0);
1488 } else {
1489 emit_dst(as, MIPSI_DADDU, tmp, src, type);
1490 }
1491 }
1492#endif
976 } 1493 }
977} 1494}
978 1495
979static void asm_sload(ASMState *as, IRIns *ir) 1496static void asm_sload(ASMState *as, IRIns *ir)
980{ 1497{
981 int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
982 IRType1 t = ir->t;
983 Reg dest = RID_NONE, type = RID_NONE, base; 1498 Reg dest = RID_NONE, type = RID_NONE, base;
984 RegSet allow = RSET_GPR; 1499 RegSet allow = RSET_GPR;
985 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 1500 IRType1 t = ir->t;
986 lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); 1501#if LJ_32
987 lua_assert(!irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); 1502 int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
1503 int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP);
1504 if (hiop)
1505 t.irt = IRT_NUM;
1506#else
1507 int32_t ofs = 8*((int32_t)ir->op1-2);
1508#endif
1509 lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
1510 "bad parent SLOAD"); /* Handled by asm_head_side(). */
1511 lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
1512 "inconsistent SLOAD variant");
1513#if LJ_SOFTFP32
1514 lj_assertA(!(ir->op2 & IRSLOAD_CONVERT),
1515 "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */
1516 if (hiop && ra_used(ir+1)) {
1517 type = ra_dest(as, ir+1, allow);
1518 rset_clear(allow, type);
1519 }
1520#else
988 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { 1521 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
989 dest = ra_scratch(as, RSET_FPR); 1522 dest = ra_scratch(as, LJ_SOFTFP ? allow : RSET_FPR);
990 asm_tointg(as, ir, dest); 1523 asm_tointg(as, ir, dest);
991 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1524 t.irt = IRT_NUM; /* Continue with a regular number type check. */
992 } else if (ra_used(ir)) { 1525 } else
993 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1526#endif
994 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); 1527 if (ra_used(ir)) {
1528 lj_assertA((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) ||
1529 irt_isint(ir->t) || irt_isaddr(ir->t),
1530 "bad SLOAD type %d", irt_type(ir->t));
1531 dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
995 rset_clear(allow, dest); 1532 rset_clear(allow, dest);
996 base = ra_alloc1(as, REF_BASE, allow); 1533 base = ra_alloc1(as, REF_BASE, allow);
997 rset_clear(allow, base); 1534 rset_clear(allow, base);
998 if ((ir->op2 & IRSLOAD_CONVERT)) { 1535 if (!LJ_SOFTFP32 && (ir->op2 & IRSLOAD_CONVERT)) {
999 if (irt_isint(t)) { 1536 if (irt_isint(t)) {
1000 Reg tmp = ra_scratch(as, RSET_FPR); 1537 Reg tmp = ra_scratch(as, LJ_SOFTFP ? RSET_GPR : RSET_FPR);
1538#if LJ_SOFTFP
1539 ra_evictset(as, rset_exclude(RSET_SCRATCH, dest));
1540 ra_destreg(as, ir, RID_RET);
1541 emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_d2i].func, 0);
1542 if (tmp != REGARG_FIRSTGPR)
1543 emit_move(as, REGARG_FIRSTGPR, tmp);
1544#else
1001 emit_tg(as, MIPSI_MFC1, dest, tmp); 1545 emit_tg(as, MIPSI_MFC1, dest, tmp);
1002 emit_fg(as, MIPSI_CVT_W_D, tmp, tmp); 1546 emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp);
1547#endif
1003 dest = tmp; 1548 dest = tmp;
1004 t.irt = IRT_NUM; /* Check for original type. */ 1549 t.irt = IRT_NUM; /* Check for original type. */
1005 } else { 1550 } else {
1006 Reg tmp = ra_scratch(as, RSET_GPR); 1551 Reg tmp = ra_scratch(as, RSET_GPR);
1552#if LJ_SOFTFP
1553 ra_evictset(as, rset_exclude(RSET_SCRATCH, dest));
1554 ra_destreg(as, ir, RID_RET);
1555 emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_i2d].func, 0);
1556 emit_dta(as, MIPSI_SLL, REGARG_FIRSTGPR, tmp, 0);
1557#else
1007 emit_fg(as, MIPSI_CVT_D_W, dest, dest); 1558 emit_fg(as, MIPSI_CVT_D_W, dest, dest);
1008 emit_tg(as, MIPSI_MTC1, tmp, dest); 1559 emit_tg(as, MIPSI_MTC1, tmp, dest);
1560#endif
1009 dest = tmp; 1561 dest = tmp;
1010 t.irt = IRT_INT; /* Check for original type. */ 1562 t.irt = IRT_INT; /* Check for original type. */
1011 } 1563 }
1012 } 1564 }
1565#if LJ_64
1566 else if (irt_isaddr(t)) {
1567 /* Clear type from pointers. */
1568 emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0);
1569 } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) {
1570 /* Sign-extend integers. */
1571 emit_dta(as, MIPSI_SLL, dest, dest, 0);
1572 }
1573#endif
1013 goto dotypecheck; 1574 goto dotypecheck;
1014 } 1575 }
1015 base = ra_alloc1(as, REF_BASE, allow); 1576 base = ra_alloc1(as, REF_BASE, allow);
1016 rset_clear(allow, base); 1577 rset_clear(allow, base);
1017dotypecheck: 1578dotypecheck:
1018 if (irt_isnum(t)) { 1579#if LJ_32
1019 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1580 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1020 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); 1581 if (ra_noreg(type))
1021 emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM);
1022 type = RID_TMP; 1582 type = RID_TMP;
1583 if (irt_isnum(t)) {
1584 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
1585 emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM);
1586 } else {
1587 Reg ktype = ra_allock(as, (ir->op2 & IRSLOAD_KEYINDEX) ? LJ_KEYINDEX : irt_toitype(t), allow);
1588 asm_guard(as, MIPSI_BNE, type, ktype);
1023 } 1589 }
1024 if (ra_hasreg(dest)) emit_hsi(as, MIPSI_LDC1, dest, base, ofs); 1590 }
1025 } else { 1591 if (ra_hasreg(dest)) {
1026 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1592 if (!LJ_SOFTFP && irt_isnum(t))
1027 Reg ktype = ra_allock(as, irt_toitype(t), allow); 1593 emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
1028 asm_guard(as, MIPSI_BNE, RID_TMP, ktype); 1594 else
1029 type = RID_TMP; 1595 emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0));
1596 }
1597 if (ra_hasreg(type))
1598 emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4));
1599#else
1600 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1601 type = dest < RID_MAX_GPR ? dest : RID_TMP;
1602 if (irt_ispri(t)) {
1603 asm_guard(as, MIPSI_BNE, type,
1604 ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow));
1605 } else if ((ir->op2 & IRSLOAD_KEYINDEX)) {
1606 asm_guard(as, MIPSI_BNE, RID_TMP,
1607 ra_allock(as, (int32_t)LJ_KEYINDEX, allow));
1608 emit_dta(as, MIPSI_DSRA32, RID_TMP, type, 0);
1609 } else {
1610 if (irt_isnum(t)) {
1611 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
1612 emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM);
1613 if (!LJ_SOFTFP && ra_hasreg(dest))
1614 emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
1615 } else {
1616 asm_guard(as, MIPSI_BNE, RID_TMP,
1617 ra_allock(as, (int32_t)irt_toitype(t), allow));
1618 }
1619 emit_dta(as, MIPSI_DSRA32, RID_TMP, type, 15);
1030 } 1620 }
1031 if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0)); 1621 emit_tsi(as, MIPSI_LD, type, base, ofs);
1622 } else if (ra_hasreg(dest)) {
1623 if (!LJ_SOFTFP && irt_isnum(t))
1624 emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
1625 else
1626 emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base,
1627 ofs ^ ((LJ_BE && irt_isint(t)) ? 4 : 0));
1032 } 1628 }
1033 if (ra_hasreg(type)) emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4)); 1629#endif
1034} 1630}
1035 1631
1036/* -- Allocations --------------------------------------------------------- */ 1632/* -- Allocations --------------------------------------------------------- */
@@ -1039,19 +1635,16 @@ dotypecheck:
1039static void asm_cnew(ASMState *as, IRIns *ir) 1635static void asm_cnew(ASMState *as, IRIns *ir)
1040{ 1636{
1041 CTState *cts = ctype_ctsG(J2G(as->J)); 1637 CTState *cts = ctype_ctsG(J2G(as->J));
1042 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1638 CTypeID id = (CTypeID)IR(ir->op1)->i;
1043 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1639 CTSize sz;
1044 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1640 CTInfo info = lj_ctype_info(cts, id, &sz);
1045 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1641 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1046 IRRef args[2]; 1642 IRRef args[4];
1047 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1048 RegSet drop = RSET_SCRATCH; 1643 RegSet drop = RSET_SCRATCH;
1049 lua_assert(sz != CTSIZE_INVALID); 1644 lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
1645 "bad CNEW/CNEWI operands");
1050 1646
1051 args[0] = ASMREF_L; /* lua_State *L */
1052 args[1] = ASMREF_TMP1; /* MSize size */
1053 as->gcsteps++; 1647 as->gcsteps++;
1054
1055 if (ra_hasreg(ir->r)) 1648 if (ra_hasreg(ir->r))
1056 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1649 rset_clear(drop, ir->r); /* Dest reg handled below. */
1057 ra_evictset(as, drop); 1650 ra_evictset(as, drop);
@@ -1060,11 +1653,12 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1060 1653
1061 /* Initialize immutable cdata object. */ 1654 /* Initialize immutable cdata object. */
1062 if (ir->o == IR_CNEWI) { 1655 if (ir->o == IR_CNEWI) {
1656 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1657#if LJ_32
1063 int32_t ofs = sizeof(GCcdata); 1658 int32_t ofs = sizeof(GCcdata);
1064 lua_assert(sz == 4 || sz == 8);
1065 if (sz == 8) { 1659 if (sz == 8) {
1066 ofs += 4; 1660 ofs += 4;
1067 lua_assert((ir+1)->o == IR_HIOP); 1661 lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI");
1068 if (LJ_LE) ir++; 1662 if (LJ_LE) ir++;
1069 } 1663 }
1070 for (;;) { 1664 for (;;) {
@@ -1074,18 +1668,33 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1074 if (ofs == sizeof(GCcdata)) break; 1668 if (ofs == sizeof(GCcdata)) break;
1075 ofs -= 4; if (LJ_BE) ir++; else ir--; 1669 ofs -= 4; if (LJ_BE) ir++; else ir--;
1076 } 1670 }
1671#else
1672 emit_tsi(as, sz == 8 ? MIPSI_SD : MIPSI_SW, ra_alloc1(as, ir->op2, allow),
1673 RID_RET, sizeof(GCcdata));
1674#endif
1675 lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
1676 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1677 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1678 args[0] = ASMREF_L; /* lua_State *L */
1679 args[1] = ir->op1; /* CTypeID id */
1680 args[2] = ir->op2; /* CTSize sz */
1681 args[3] = ASMREF_TMP1; /* CTSize align */
1682 asm_gencall(as, ci, args);
1683 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1684 return;
1077 } 1685 }
1686
1078 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1687 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1079 emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); 1688 emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
1080 emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); 1689 emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
1081 emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA); 1690 emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA);
1082 emit_ti(as, MIPSI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ 1691 emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
1692 args[0] = ASMREF_L; /* lua_State *L */
1693 args[1] = ASMREF_TMP1; /* MSize size */
1083 asm_gencall(as, ci, args); 1694 asm_gencall(as, ci, args);
1084 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1695 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1085 ra_releasetmp(as, ASMREF_TMP1)); 1696 ra_releasetmp(as, ASMREF_TMP1));
1086} 1697}
1087#else
1088#define asm_cnew(as, ir) ((void)0)
1089#endif 1698#endif
1090 1699
1091/* -- Write barriers ------------------------------------------------------ */ 1700/* -- Write barriers ------------------------------------------------------ */
@@ -1096,7 +1705,7 @@ static void asm_tbar(ASMState *as, IRIns *ir)
1096 Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); 1705 Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab));
1097 Reg link = RID_TMP; 1706 Reg link = RID_TMP;
1098 MCLabel l_end = emit_label(as); 1707 MCLabel l_end = emit_label(as);
1099 emit_tsi(as, MIPSI_SW, link, tab, (int32_t)offsetof(GCtab, gclist)); 1708 emit_tsi(as, MIPSI_AS, link, tab, (int32_t)offsetof(GCtab, gclist));
1100 emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); 1709 emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked));
1101 emit_setgl(as, tab, gc.grayagain); 1710 emit_setgl(as, tab, gc.grayagain);
1102 emit_getgl(as, link, gc.grayagain); 1711 emit_getgl(as, link, gc.grayagain);
@@ -1113,13 +1722,13 @@ static void asm_obar(ASMState *as, IRIns *ir)
1113 MCLabel l_end; 1722 MCLabel l_end;
1114 Reg obj, val, tmp; 1723 Reg obj, val, tmp;
1115 /* No need for other object barriers (yet). */ 1724 /* No need for other object barriers (yet). */
1116 lua_assert(IR(ir->op1)->o == IR_UREFC); 1725 lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
1117 ra_evictset(as, RSET_SCRATCH); 1726 ra_evictset(as, RSET_SCRATCH);
1118 l_end = emit_label(as); 1727 l_end = emit_label(as);
1119 args[0] = ASMREF_TMP1; /* global_State *g */ 1728 args[0] = ASMREF_TMP1; /* global_State *g */
1120 args[1] = ir->op1; /* TValue *tv */ 1729 args[1] = ir->op1; /* TValue *tv */
1121 asm_gencall(as, ci, args); 1730 asm_gencall(as, ci, args);
1122 emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); 1731 emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
1123 obj = IR(ir->op1)->r; 1732 obj = IR(ir->op1)->r;
1124 tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); 1733 tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
1125 emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); 1734 emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end);
@@ -1134,6 +1743,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
1134 1743
1135/* -- Arithmetic and logic operations ------------------------------------- */ 1744/* -- Arithmetic and logic operations ------------------------------------- */
1136 1745
1746#if !LJ_SOFTFP
1137static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi) 1747static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi)
1138{ 1748{
1139 Reg dest = ra_dest(as, ir, RSET_FPR); 1749 Reg dest = ra_dest(as, ir, RSET_FPR);
@@ -1148,83 +1758,147 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
1148 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); 1758 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
1149 emit_fg(as, mi, dest, left); 1759 emit_fg(as, mi, dest, left);
1150} 1760}
1761#endif
1151 1762
1152static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1763#if !LJ_SOFTFP32
1153{ 1764static void asm_fpmath(ASMState *as, IRIns *ir)
1154 IRIns *irp = IR(ir->op1); 1765{
1155 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1766#if !LJ_SOFTFP
1156 IRIns *irpp = IR(irp->op1); 1767 if (ir->op2 <= IRFPM_TRUNC)
1157 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1768 asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
1158 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1769 else if (ir->op2 == IRFPM_SQRT)
1159 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1770 asm_fpunary(as, ir, MIPSI_SQRT_D);
1160 IRRef args[2]; 1771 else
1161 args[0] = irpp->op1; 1772#endif
1162 args[1] = irp->op2; 1773 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1163 asm_setupresult(as, ir, ci);
1164 asm_gencall(as, ci, args);
1165 return 1;
1166 }
1167 }
1168 return 0;
1169} 1774}
1775#endif
1776
1777#if !LJ_SOFTFP
1778#define asm_fpadd(as, ir) asm_fparith(as, ir, MIPSI_ADD_D)
1779#define asm_fpsub(as, ir) asm_fparith(as, ir, MIPSI_SUB_D)
1780#define asm_fpmul(as, ir) asm_fparith(as, ir, MIPSI_MUL_D)
1781#elif LJ_64 /* && LJ_SOFTFP */
1782#define asm_fpadd(as, ir) asm_callid(as, ir, IRCALL_softfp_add)
1783#define asm_fpsub(as, ir) asm_callid(as, ir, IRCALL_softfp_sub)
1784#define asm_fpmul(as, ir) asm_callid(as, ir, IRCALL_softfp_mul)
1785#endif
1170 1786
1171static void asm_add(ASMState *as, IRIns *ir) 1787static void asm_add(ASMState *as, IRIns *ir)
1172{ 1788{
1173 if (irt_isnum(ir->t)) { 1789 IRType1 t = ir->t;
1174 asm_fparith(as, ir, MIPSI_ADD_D); 1790#if !LJ_SOFTFP32
1175 } else { 1791 if (irt_isnum(t)) {
1792 asm_fpadd(as, ir);
1793 } else
1794#endif
1795 {
1796 /* TODO MIPSR6: Fuse ADD(BSHL(a,1-4),b) or ADD(ADD(a,a),b) to MIPSI_ALSA. */
1176 Reg dest = ra_dest(as, ir, RSET_GPR); 1797 Reg dest = ra_dest(as, ir, RSET_GPR);
1177 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1798 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1178 if (irref_isk(ir->op2)) { 1799 if (irref_isk(ir->op2)) {
1179 int32_t k = IR(ir->op2)->i; 1800 intptr_t k = get_kval(as, ir->op2);
1180 if (checki16(k)) { 1801 if (checki16(k)) {
1181 emit_tsi(as, MIPSI_ADDIU, dest, left, k); 1802 emit_tsi(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDIU : MIPSI_ADDIU, dest,
1803 left, k);
1182 return; 1804 return;
1183 } 1805 }
1184 } 1806 }
1185 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); 1807 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1186 emit_dst(as, MIPSI_ADDU, dest, left, right); 1808 emit_dst(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDU : MIPSI_ADDU, dest,
1809 left, right);
1187 } 1810 }
1188} 1811}
1189 1812
1190static void asm_sub(ASMState *as, IRIns *ir) 1813static void asm_sub(ASMState *as, IRIns *ir)
1191{ 1814{
1815#if !LJ_SOFTFP32
1192 if (irt_isnum(ir->t)) { 1816 if (irt_isnum(ir->t)) {
1193 asm_fparith(as, ir, MIPSI_SUB_D); 1817 asm_fpsub(as, ir);
1194 } else { 1818 } else
1819#endif
1820 {
1195 Reg dest = ra_dest(as, ir, RSET_GPR); 1821 Reg dest = ra_dest(as, ir, RSET_GPR);
1196 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 1822 Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1197 right = (left >> 8); left &= 255; 1823 right = (left >> 8); left &= 255;
1198 emit_dst(as, MIPSI_SUBU, dest, left, right); 1824 emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest,
1825 left, right);
1199 } 1826 }
1200} 1827}
1201 1828
1202static void asm_mul(ASMState *as, IRIns *ir) 1829static void asm_mul(ASMState *as, IRIns *ir)
1203{ 1830{
1831#if !LJ_SOFTFP32
1204 if (irt_isnum(ir->t)) { 1832 if (irt_isnum(ir->t)) {
1205 asm_fparith(as, ir, MIPSI_MUL_D); 1833 asm_fpmul(as, ir);
1206 } else { 1834 } else
1835#endif
1836 {
1207 Reg dest = ra_dest(as, ir, RSET_GPR); 1837 Reg dest = ra_dest(as, ir, RSET_GPR);
1208 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 1838 Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1209 right = (left >> 8); left &= 255; 1839 right = (left >> 8); left &= 255;
1210 emit_dst(as, MIPSI_MUL, dest, left, right); 1840 if (LJ_64 && irt_is64(ir->t)) {
1841#if !LJ_TARGET_MIPSR6
1842 emit_dst(as, MIPSI_MFLO, dest, 0, 0);
1843 emit_dst(as, MIPSI_DMULT, 0, left, right);
1844#else
1845 emit_dst(as, MIPSI_DMUL, dest, left, right);
1846#endif
1847 } else {
1848 emit_dst(as, MIPSI_MUL, dest, left, right);
1849 }
1211 } 1850 }
1212} 1851}
1213 1852
1853#if !LJ_SOFTFP32
1854static void asm_fpdiv(ASMState *as, IRIns *ir)
1855{
1856#if !LJ_SOFTFP
1857 asm_fparith(as, ir, MIPSI_DIV_D);
1858#else
1859 asm_callid(as, ir, IRCALL_softfp_div);
1860#endif
1861}
1862#endif
1863
1214static void asm_neg(ASMState *as, IRIns *ir) 1864static void asm_neg(ASMState *as, IRIns *ir)
1215{ 1865{
1866#if !LJ_SOFTFP
1216 if (irt_isnum(ir->t)) { 1867 if (irt_isnum(ir->t)) {
1217 asm_fpunary(as, ir, MIPSI_NEG_D); 1868 asm_fpunary(as, ir, MIPSI_NEG_D);
1218 } else { 1869 } else
1870#elif LJ_64 /* && LJ_SOFTFP */
1871 if (irt_isnum(ir->t)) {
1872 Reg dest = ra_dest(as, ir, RSET_GPR);
1873 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1874 emit_dst(as, MIPSI_XOR, dest, left,
1875 ra_allock(as, 0x8000000000000000ll, rset_exclude(RSET_GPR, dest)));
1876 } else
1877#endif
1878 {
1219 Reg dest = ra_dest(as, ir, RSET_GPR); 1879 Reg dest = ra_dest(as, ir, RSET_GPR);
1220 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1880 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1221 emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left); 1881 emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest,
1882 RID_ZERO, left);
1222 } 1883 }
1223} 1884}
1224 1885
1886#if !LJ_SOFTFP
1887#define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D)
1888#elif LJ_64 /* && LJ_SOFTFP */
1889static void asm_abs(ASMState *as, IRIns *ir)
1890{
1891 Reg dest = ra_dest(as, ir, RSET_GPR);
1892 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1893 emit_tsml(as, MIPSI_DEXTM, dest, left, 30, 0);
1894}
1895#endif
1896
1225static void asm_arithov(ASMState *as, IRIns *ir) 1897static void asm_arithov(ASMState *as, IRIns *ir)
1226{ 1898{
1899 /* TODO MIPSR6: bovc/bnvc. Caveat: no delay slot to load RID_TMP. */
1227 Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); 1900 Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
1901 lj_assertA(!irt_is64(ir->t), "bad usage");
1228 if (irref_isk(ir->op2)) { 1902 if (irref_isk(ir->op2)) {
1229 int k = IR(ir->op2)->i; 1903 int k = IR(ir->op2)->i;
1230 if (ir->o == IR_SUBOV) k = (int)(~(unsigned int)k+1u); 1904 if (ir->o == IR_SUBOV) k = (int)(~(unsigned int)k+1u);
@@ -1255,16 +1929,29 @@ static void asm_arithov(ASMState *as, IRIns *ir)
1255 emit_move(as, RID_TMP, dest == left ? left : right); 1929 emit_move(as, RID_TMP, dest == left ? left : right);
1256} 1930}
1257 1931
1932#define asm_addov(as, ir) asm_arithov(as, ir)
1933#define asm_subov(as, ir) asm_arithov(as, ir)
1934
1258static void asm_mulov(ASMState *as, IRIns *ir) 1935static void asm_mulov(ASMState *as, IRIns *ir)
1259{ 1936{
1260#if LJ_DUALNUM 1937 Reg dest = ra_dest(as, ir, RSET_GPR);
1261#error "NYI: MULOV" 1938 Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR);
1939 right = (left >> 8); left &= 255;
1940 tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
1941 right), dest));
1942 asm_guard(as, MIPSI_BNE, RID_TMP, tmp);
1943 emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31);
1944#if !LJ_TARGET_MIPSR6
1945 emit_dst(as, MIPSI_MFHI, tmp, 0, 0);
1946 emit_dst(as, MIPSI_MFLO, dest, 0, 0);
1947 emit_dst(as, MIPSI_MULT, 0, left, right);
1262#else 1948#else
1263 UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused in single-number mode. */ 1949 emit_dst(as, MIPSI_MUL, dest, left, right);
1950 emit_dst(as, MIPSI_MUH, tmp, left, right);
1264#endif 1951#endif
1265} 1952}
1266 1953
1267#if LJ_HASFFI 1954#if LJ_32 && LJ_HASFFI
1268static void asm_add64(ASMState *as, IRIns *ir) 1955static void asm_add64(ASMState *as, IRIns *ir)
1269{ 1956{
1270 Reg dest = ra_dest(as, ir, RSET_GPR); 1957 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1348,7 +2035,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
1348} 2035}
1349#endif 2036#endif
1350 2037
1351static void asm_bitnot(ASMState *as, IRIns *ir) 2038static void asm_bnot(ASMState *as, IRIns *ir)
1352{ 2039{
1353 Reg left, right, dest = ra_dest(as, ir, RSET_GPR); 2040 Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
1354 IRIns *irl = IR(ir->op1); 2041 IRIns *irl = IR(ir->op1);
@@ -1362,11 +2049,12 @@ static void asm_bitnot(ASMState *as, IRIns *ir)
1362 emit_dst(as, MIPSI_NOR, dest, left, right); 2049 emit_dst(as, MIPSI_NOR, dest, left, right);
1363} 2050}
1364 2051
1365static void asm_bitswap(ASMState *as, IRIns *ir) 2052static void asm_bswap(ASMState *as, IRIns *ir)
1366{ 2053{
1367 Reg dest = ra_dest(as, ir, RSET_GPR); 2054 Reg dest = ra_dest(as, ir, RSET_GPR);
1368 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 2055 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1369 if ((as->flags & JIT_F_MIPS32R2)) { 2056#if LJ_32
2057 if ((as->flags & JIT_F_MIPSXXR2)) {
1370 emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); 2058 emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16);
1371 emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); 2059 emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left);
1372 } else { 2060 } else {
@@ -1381,6 +2069,15 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1381 emit_dta(as, MIPSI_SRL, tmp, left, 24); 2069 emit_dta(as, MIPSI_SRL, tmp, left, 24);
1382 emit_dta(as, MIPSI_SLL, RID_TMP, left, 24); 2070 emit_dta(as, MIPSI_SLL, RID_TMP, left, 24);
1383 } 2071 }
2072#else
2073 if (irt_is64(ir->t)) {
2074 emit_dst(as, MIPSI_DSHD, dest, 0, RID_TMP);
2075 emit_dst(as, MIPSI_DSBH, RID_TMP, 0, left);
2076 } else {
2077 emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16);
2078 emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left);
2079 }
2080#endif
1384} 2081}
1385 2082
1386static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) 2083static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
@@ -1388,7 +2085,7 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1388 Reg dest = ra_dest(as, ir, RSET_GPR); 2085 Reg dest = ra_dest(as, ir, RSET_GPR);
1389 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 2086 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1390 if (irref_isk(ir->op2)) { 2087 if (irref_isk(ir->op2)) {
1391 int32_t k = IR(ir->op2)->i; 2088 intptr_t k = get_kval(as, ir->op2);
1392 if (checku16(k)) { 2089 if (checku16(k)) {
1393 emit_tsi(as, mik, dest, left, k); 2090 emit_tsi(as, mik, dest, left, k);
1394 return; 2091 return;
@@ -1398,22 +2095,34 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1398 emit_dst(as, mi, dest, left, right); 2095 emit_dst(as, mi, dest, left, right);
1399} 2096}
1400 2097
2098#define asm_band(as, ir) asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI)
2099#define asm_bor(as, ir) asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI)
2100#define asm_bxor(as, ir) asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI)
2101
1401static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) 2102static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1402{ 2103{
1403 Reg dest = ra_dest(as, ir, RSET_GPR); 2104 Reg dest = ra_dest(as, ir, RSET_GPR);
1404 if (irref_isk(ir->op2)) { /* Constant shifts. */ 2105 if (irref_isk(ir->op2)) { /* Constant shifts. */
1405 uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31); 2106 uint32_t shift = (uint32_t)IR(ir->op2)->i;
1406 emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), shift); 2107 if (LJ_64 && irt_is64(ir->t)) mik |= (shift & 32) ? MIPSI_D32 : MIPSI_D;
2108 emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR),
2109 (shift & 31));
1407 } else { 2110 } else {
1408 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 2111 Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1409 right = (left >> 8); left &= 255; 2112 right = (left >> 8); left &= 255;
2113 if (LJ_64 && irt_is64(ir->t)) mi |= MIPSI_DV;
1410 emit_dst(as, mi, dest, right, left); /* Shift amount is in rs. */ 2114 emit_dst(as, mi, dest, right, left); /* Shift amount is in rs. */
1411 } 2115 }
1412} 2116}
1413 2117
1414static void asm_bitror(ASMState *as, IRIns *ir) 2118#define asm_bshl(as, ir) asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL)
2119#define asm_bshr(as, ir) asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL)
2120#define asm_bsar(as, ir) asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA)
2121#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
2122
2123static void asm_bror(ASMState *as, IRIns *ir)
1415{ 2124{
1416 if ((as->flags & JIT_F_MIPS32R2)) { 2125 if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) {
1417 asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); 2126 asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
1418 } else { 2127 } else {
1419 Reg dest = ra_dest(as, ir, RSET_GPR); 2128 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1432,55 +2141,182 @@ static void asm_bitror(ASMState *as, IRIns *ir)
1432 } 2141 }
1433} 2142}
1434 2143
2144#if LJ_SOFTFP
2145static void asm_sfpmin_max(ASMState *as, IRIns *ir)
2146{
2147 CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax];
2148#if LJ_64
2149 IRRef args[2];
2150 args[0] = ir->op1;
2151 args[1] = ir->op2;
2152#else
2153 IRRef args[4];
2154 args[0^LJ_BE] = ir->op1;
2155 args[1^LJ_BE] = (ir+1)->op1;
2156 args[2^LJ_BE] = ir->op2;
2157 args[3^LJ_BE] = (ir+1)->op2;
2158#endif
2159 asm_setupresult(as, ir, &ci);
2160 emit_call(as, (void *)ci.func, 0);
2161 ci.func = NULL;
2162 asm_gencall(as, &ci, args);
2163}
2164#endif
2165
1435static void asm_min_max(ASMState *as, IRIns *ir, int ismax) 2166static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1436{ 2167{
1437 if (irt_isnum(ir->t)) { 2168 if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
2169#if LJ_SOFTFP
2170 asm_sfpmin_max(as, ir);
2171#else
1438 Reg dest = ra_dest(as, ir, RSET_FPR); 2172 Reg dest = ra_dest(as, ir, RSET_FPR);
1439 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 2173 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1440 right = (left >> 8); left &= 255; 2174 right = (left >> 8); left &= 255;
2175#if !LJ_TARGET_MIPSR6
1441 if (dest == left) { 2176 if (dest == left) {
1442 emit_fg(as, MIPSI_MOVT_D, dest, right); 2177 emit_fg(as, MIPSI_MOVF_D, dest, right);
1443 } else { 2178 } else {
1444 emit_fg(as, MIPSI_MOVF_D, dest, left); 2179 emit_fg(as, MIPSI_MOVT_D, dest, left);
1445 if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); 2180 if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right);
1446 } 2181 }
1447 emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left); 2182 emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? right : left, ismax ? left : right);
2183#else
2184 emit_fgh(as, ismax ? MIPSI_MAX_D : MIPSI_MIN_D, dest, left, right);
2185#endif
2186#endif
1448 } else { 2187 } else {
1449 Reg dest = ra_dest(as, ir, RSET_GPR); 2188 Reg dest = ra_dest(as, ir, RSET_GPR);
1450 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 2189 Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1451 right = (left >> 8); left &= 255; 2190 right = (left >> 8); left &= 255;
1452 if (dest == left) { 2191 if (left == right) {
1453 emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP); 2192 if (dest != left) emit_move(as, dest, left);
1454 } else { 2193 } else {
1455 emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP); 2194#if !LJ_TARGET_MIPSR6
1456 if (dest != right) emit_move(as, dest, right); 2195 if (dest == left) {
2196 emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP);
2197 } else {
2198 emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP);
2199 if (dest != right) emit_move(as, dest, right);
2200 }
2201#else
2202 emit_dst(as, MIPSI_OR, dest, dest, RID_TMP);
2203 if (dest != right) {
2204 emit_dst(as, MIPSI_SELNEZ, RID_TMP, right, RID_TMP);
2205 emit_dst(as, MIPSI_SELEQZ, dest, left, RID_TMP);
2206 } else {
2207 emit_dst(as, MIPSI_SELEQZ, RID_TMP, left, RID_TMP);
2208 emit_dst(as, MIPSI_SELNEZ, dest, right, RID_TMP);
2209 }
2210#endif
2211 emit_dst(as, MIPSI_SLT, RID_TMP,
2212 ismax ? left : right, ismax ? right : left);
1457 } 2213 }
1458 emit_dst(as, MIPSI_SLT, RID_TMP,
1459 ismax ? left : right, ismax ? right : left);
1460 } 2214 }
1461} 2215}
1462 2216
2217#define asm_min(as, ir) asm_min_max(as, ir, 0)
2218#define asm_max(as, ir) asm_min_max(as, ir, 1)
2219
1463/* -- Comparisons --------------------------------------------------------- */ 2220/* -- Comparisons --------------------------------------------------------- */
1464 2221
2222#if LJ_SOFTFP
2223/* SFP comparisons. */
2224static void asm_sfpcomp(ASMState *as, IRIns *ir)
2225{
2226 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
2227 RegSet drop = RSET_SCRATCH;
2228 Reg r;
2229#if LJ_64
2230 IRRef args[2];
2231 args[0] = ir->op1;
2232 args[1] = ir->op2;
2233#else
2234 IRRef args[4];
2235 args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1;
2236 args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2;
2237#endif
2238
2239 for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+(LJ_64?1:3); r++) {
2240 if (!rset_test(as->freeset, r) &&
2241 regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
2242 rset_clear(drop, r);
2243 }
2244 ra_evictset(as, drop);
2245
2246 asm_setupresult(as, ir, ci);
2247
2248 switch ((IROp)ir->o) {
2249 case IR_LT:
2250 asm_guard(as, MIPSI_BGEZ, RID_RET, 0);
2251 break;
2252 case IR_ULT:
2253 asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP);
2254 emit_loadi(as, RID_TMP, 1);
2255 asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO);
2256 break;
2257 case IR_GE:
2258 asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP);
2259 emit_loadi(as, RID_TMP, 2);
2260 asm_guard(as, MIPSI_BLTZ, RID_RET, 0);
2261 break;
2262 case IR_LE:
2263 asm_guard(as, MIPSI_BGTZ, RID_RET, 0);
2264 break;
2265 case IR_GT:
2266 asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP);
2267 emit_loadi(as, RID_TMP, 2);
2268 asm_guard(as, MIPSI_BLEZ, RID_RET, 0);
2269 break;
2270 case IR_UGE:
2271 asm_guard(as, MIPSI_BLTZ, RID_RET, 0);
2272 break;
2273 case IR_ULE:
2274 asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP);
2275 emit_loadi(as, RID_TMP, 1);
2276 break;
2277 case IR_UGT: case IR_ABC:
2278 asm_guard(as, MIPSI_BLEZ, RID_RET, 0);
2279 break;
2280 case IR_EQ: case IR_NE:
2281 asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_RET, RID_ZERO);
2282 default:
2283 break;
2284 }
2285 asm_gencall(as, ci, args);
2286}
2287#endif
2288
1465static void asm_comp(ASMState *as, IRIns *ir) 2289static void asm_comp(ASMState *as, IRIns *ir)
1466{ 2290{
1467 /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ 2291 /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */
1468 IROp op = ir->o; 2292 IROp op = ir->o;
1469 if (irt_isnum(ir->t)) { 2293 if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
2294#if LJ_SOFTFP
2295 asm_sfpcomp(as, ir);
2296#else
2297#if !LJ_TARGET_MIPSR6
1470 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 2298 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1471 right = (left >> 8); left &= 255; 2299 right = (left >> 8); left &= 255;
1472 asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); 2300 asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
1473 emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right); 2301 emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right);
2302#else
2303 Reg tmp, right, left = ra_alloc2(as, ir, RSET_FPR);
2304 right = (left >> 8); left &= 255;
2305 tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right));
2306 asm_guard(as, (op&1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31));
2307 emit_fgh(as, MIPSI_CMP_LT_D + ((op&3) ^ ((op>>2)&1)), tmp, left, right);
2308#endif
2309#endif
1474 } else { 2310 } else {
1475 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); 2311 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
1476 if (op == IR_ABC) op = IR_UGT; 2312 if (op == IR_ABC) op = IR_UGT;
1477 if ((op&4) == 0 && irref_isk(ir->op2) && IR(ir->op2)->i == 0) { 2313 if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) {
1478 MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) : 2314 MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) :
1479 ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ); 2315 ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ);
1480 asm_guard(as, mi, left, 0); 2316 asm_guard(as, mi, left, 0);
1481 } else { 2317 } else {
1482 if (irref_isk(ir->op2)) { 2318 if (irref_isk(ir->op2)) {
1483 int32_t k = IR(ir->op2)->i; 2319 intptr_t k = get_kval(as, ir->op2);
1484 if ((op&2)) k++; 2320 if ((op&2)) k++;
1485 if (checki16(k)) { 2321 if (checki16(k)) {
1486 asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); 2322 asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
@@ -1497,19 +2333,28 @@ static void asm_comp(ASMState *as, IRIns *ir)
1497 } 2333 }
1498} 2334}
1499 2335
1500static void asm_compeq(ASMState *as, IRIns *ir) 2336static void asm_equal(ASMState *as, IRIns *ir)
1501{ 2337{
1502 Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR); 2338 Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ?
2339 RSET_FPR : RSET_GPR);
1503 right = (left >> 8); left &= 255; 2340 right = (left >> 8); left &= 255;
1504 if (irt_isnum(ir->t)) { 2341 if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
2342#if LJ_SOFTFP
2343 asm_sfpcomp(as, ir);
2344#elif !LJ_TARGET_MIPSR6
1505 asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); 2345 asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
1506 emit_fgh(as, MIPSI_C_EQ_D, 0, left, right); 2346 emit_fgh(as, MIPSI_C_EQ_D, 0, left, right);
2347#else
2348 Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right));
2349 asm_guard(as, (ir->o & 1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31));
2350 emit_fgh(as, MIPSI_CMP_EQ_D, tmp, left, right);
2351#endif
1507 } else { 2352 } else {
1508 asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right); 2353 asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right);
1509 } 2354 }
1510} 2355}
1511 2356
1512#if LJ_HASFFI 2357#if LJ_32 && LJ_HASFFI
1513/* 64 bit integer comparisons. */ 2358/* 64 bit integer comparisons. */
1514static void asm_comp64(ASMState *as, IRIns *ir) 2359static void asm_comp64(ASMState *as, IRIns *ir)
1515{ 2360{
@@ -1546,54 +2391,99 @@ static void asm_comp64eq(ASMState *as, IRIns *ir)
1546} 2391}
1547#endif 2392#endif
1548 2393
1549/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ 2394/* -- Split register ops -------------------------------------------------- */
1550 2395
1551/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ 2396/* Hiword op of a split 32/32 or 64/64 bit op. Previous op is the loword op. */
1552static void asm_hiop(ASMState *as, IRIns *ir) 2397static void asm_hiop(ASMState *as, IRIns *ir)
1553{ 2398{
1554#if LJ_HASFFI
1555 /* HIOP is marked as a store because it needs its own DCE logic. */ 2399 /* HIOP is marked as a store because it needs its own DCE logic. */
1556 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 2400 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
1557 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 2401 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
2402#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP)
1558 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ 2403 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
1559 as->curins--; /* Always skip the CONV. */ 2404 as->curins--; /* Always skip the CONV. */
2405#if LJ_HASFFI && !LJ_SOFTFP
1560 if (usehi || uselo) 2406 if (usehi || uselo)
1561 asm_conv64(as, ir); 2407 asm_conv64(as, ir);
1562 return; 2408 return;
2409#endif
1563 } else if ((ir-1)->o < IR_EQ) { /* 64 bit integer comparisons. ORDER IR. */ 2410 } else if ((ir-1)->o < IR_EQ) { /* 64 bit integer comparisons. ORDER IR. */
1564 as->curins--; /* Always skip the loword comparison. */ 2411 as->curins--; /* Always skip the loword comparison. */
2412#if LJ_SOFTFP
2413 if (!irt_isint(ir->t)) {
2414 asm_sfpcomp(as, ir-1);
2415 return;
2416 }
2417#endif
2418#if LJ_HASFFI
1565 asm_comp64(as, ir); 2419 asm_comp64(as, ir);
2420#endif
1566 return; 2421 return;
1567 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ 2422 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
1568 as->curins--; /* Always skip the loword comparison. */ 2423 as->curins--; /* Always skip the loword comparison. */
2424#if LJ_SOFTFP
2425 if (!irt_isint(ir->t)) {
2426 asm_sfpcomp(as, ir-1);
2427 return;
2428 }
2429#endif
2430#if LJ_HASFFI
1569 asm_comp64eq(as, ir); 2431 asm_comp64eq(as, ir);
2432#endif
1570 return; 2433 return;
2434#if LJ_SOFTFP
2435 } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
2436 as->curins--; /* Always skip the loword min/max. */
2437 if (uselo || usehi)
2438 asm_sfpmin_max(as, ir-1);
2439 return;
2440#endif
1571 } else if ((ir-1)->o == IR_XSTORE) { 2441 } else if ((ir-1)->o == IR_XSTORE) {
1572 as->curins--; /* Handle both stores here. */ 2442 as->curins--; /* Handle both stores here. */
1573 if ((ir-1)->r != RID_SINK) { 2443 if ((ir-1)->r != RID_SINK) {
1574 asm_xstore(as, ir, LJ_LE ? 4 : 0); 2444 asm_xstore_(as, ir, LJ_LE ? 4 : 0);
1575 asm_xstore(as, ir-1, LJ_LE ? 0 : 4); 2445 asm_xstore_(as, ir-1, LJ_LE ? 0 : 4);
1576 } 2446 }
1577 return; 2447 return;
1578 } 2448 }
2449#endif
1579 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 2450 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
1580 switch ((ir-1)->o) { 2451 switch ((ir-1)->o) {
2452#if LJ_32 && LJ_HASFFI
1581 case IR_ADD: as->curins--; asm_add64(as, ir); break; 2453 case IR_ADD: as->curins--; asm_add64(as, ir); break;
1582 case IR_SUB: as->curins--; asm_sub64(as, ir); break; 2454 case IR_SUB: as->curins--; asm_sub64(as, ir); break;
1583 case IR_NEG: as->curins--; asm_neg64(as, ir); break; 2455 case IR_NEG: as->curins--; asm_neg64(as, ir); break;
1584 case IR_CALLN: 2456 case IR_CNEWI:
1585 case IR_CALLXS: 2457 /* Nothing to do here. Handled by lo op itself. */
2458 break;
2459#endif
2460#if LJ_32 && LJ_SOFTFP
2461 case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2462 case IR_STRTO:
1586 if (!uselo) 2463 if (!uselo)
1587 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ 2464 ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
1588 break; 2465 break;
1589 case IR_CNEWI: 2466 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
1590 /* Nothing to do here. Handled by lo op itself. */ 2467 /* Nothing to do here. Handled by lo op itself. */
1591 break; 2468 break;
1592 default: lua_assert(0); break;
1593 }
1594#else
1595 UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */
1596#endif 2469#endif
2470 case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
2471 if (!uselo)
2472 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
2473 break;
2474 default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
2475 }
2476}
2477
2478/* -- Profiling ----------------------------------------------------------- */
2479
2480static void asm_prof(ASMState *as, IRIns *ir)
2481{
2482 UNUSED(ir);
2483 asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
2484 emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE);
2485 emit_lsglptr(as, MIPSI_LBU, RID_TMP,
2486 (int32_t)offsetof(global_State, hookmask));
1597} 2487}
1598 2488
1599/* -- Stack handling ------------------------------------------------------ */ 2489/* -- Stack handling ------------------------------------------------------ */
@@ -1606,47 +2496,70 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1606 Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; 2496 Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE;
1607 ExitNo oldsnap = as->snapno; 2497 ExitNo oldsnap = as->snapno;
1608 rset_clear(allow, pbase); 2498 rset_clear(allow, pbase);
2499#if LJ_32
1609 tmp = allow ? rset_pickbot(allow) : 2500 tmp = allow ? rset_pickbot(allow) :
1610 (pbase == RID_RETHI ? RID_RETLO : RID_RETHI); 2501 (pbase == RID_RETHI ? RID_RETLO : RID_RETHI);
2502#else
2503 tmp = allow ? rset_pickbot(allow) : RID_RET;
2504#endif
1611 as->snapno = exitno; 2505 as->snapno = exitno;
1612 asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); 2506 asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
1613 as->snapno = oldsnap; 2507 as->snapno = oldsnap;
1614 if (allow == RSET_EMPTY) /* Restore temp. register. */ 2508 if (allow == RSET_EMPTY) /* Restore temp. register. */
1615 emit_tsi(as, MIPSI_LW, tmp, RID_SP, 0); 2509 emit_tsi(as, MIPSI_AL, tmp, RID_SP, 0);
1616 else 2510 else
1617 ra_modified(as, tmp); 2511 ra_modified(as, tmp);
1618 emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); 2512 emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot));
1619 emit_dst(as, MIPSI_SUBU, RID_TMP, tmp, pbase); 2513 emit_dst(as, MIPSI_ASUBU, RID_TMP, tmp, pbase);
1620 emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack)); 2514 emit_tsi(as, MIPSI_AL, tmp, tmp, offsetof(lua_State, maxstack));
1621 if (pbase == RID_TMP) 2515 if (pbase == RID_TMP)
1622 emit_getgl(as, RID_TMP, jit_base); 2516 emit_getgl(as, RID_TMP, jit_base);
1623 emit_getgl(as, tmp, jit_L); 2517 emit_getgl(as, tmp, cur_L);
1624 if (allow == RSET_EMPTY) /* Spill temp. register. */ 2518 if (allow == RSET_EMPTY) /* Spill temp. register. */
1625 emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0); 2519 emit_tsi(as, MIPSI_AS, tmp, RID_SP, 0);
1626} 2520}
1627 2521
1628/* Restore Lua stack from on-trace state. */ 2522/* Restore Lua stack from on-trace state. */
1629static void asm_stack_restore(ASMState *as, SnapShot *snap) 2523static void asm_stack_restore(ASMState *as, SnapShot *snap)
1630{ 2524{
1631 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 2525 SnapEntry *map = &as->T->snapmap[snap->mapofs];
1632 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; 2526#if LJ_32 || defined(LUA_USE_ASSERT)
2527 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
2528#endif
1633 MSize n, nent = snap->nent; 2529 MSize n, nent = snap->nent;
1634 /* Store the value of all modified slots to the Lua stack. */ 2530 /* Store the value of all modified slots to the Lua stack. */
1635 for (n = 0; n < nent; n++) { 2531 for (n = 0; n < nent; n++) {
1636 SnapEntry sn = map[n]; 2532 SnapEntry sn = map[n];
1637 BCReg s = snap_slot(sn); 2533 BCReg s = snap_slot(sn);
1638 int32_t ofs = 8*((int32_t)s-1); 2534 int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
1639 IRRef ref = snap_ref(sn); 2535 IRRef ref = snap_ref(sn);
1640 IRIns *ir = IR(ref); 2536 IRIns *ir = IR(ref);
1641 if ((sn & SNAP_NORESTORE)) 2537 if ((sn & SNAP_NORESTORE))
1642 continue; 2538 continue;
1643 if (irt_isnum(ir->t)) { 2539 if (irt_isnum(ir->t)) {
2540#if LJ_SOFTFP32
2541 Reg tmp;
2542 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
2543 /* LJ_SOFTFP: must be a number constant. */
2544 lj_assertA(irref_isk(ref), "unsplit FP op");
2545 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow);
2546 emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?4:0));
2547 if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
2548 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
2549 emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
2550#elif LJ_SOFTFP /* && LJ_64 */
2551 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
2552 emit_tsi(as, MIPSI_SD, src, RID_BASE, ofs);
2553#else
1644 Reg src = ra_alloc1(as, ref, RSET_FPR); 2554 Reg src = ra_alloc1(as, ref, RSET_FPR);
1645 emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs); 2555 emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs);
2556#endif
1646 } else { 2557 } else {
1647 Reg type; 2558#if LJ_32
1648 RegSet allow = rset_exclude(RSET_GPR, RID_BASE); 2559 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
1649 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); 2560 Reg type;
2561 lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
2562 "restore of IR type %d", irt_type(ir->t));
1650 if (!irt_ispri(ir->t)) { 2563 if (!irt_ispri(ir->t)) {
1651 Reg src = ra_alloc1(as, ref, allow); 2564 Reg src = ra_alloc1(as, ref, allow);
1652 rset_clear(allow, src); 2565 rset_clear(allow, src);
@@ -1655,14 +2568,38 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
1655 if ((sn & (SNAP_CONT|SNAP_FRAME))) { 2568 if ((sn & (SNAP_CONT|SNAP_FRAME))) {
1656 if (s == 0) continue; /* Do not overwrite link to previous frame. */ 2569 if (s == 0) continue; /* Do not overwrite link to previous frame. */
1657 type = ra_allock(as, (int32_t)(*flinks--), allow); 2570 type = ra_allock(as, (int32_t)(*flinks--), allow);
2571#if LJ_SOFTFP
2572 } else if ((sn & SNAP_SOFTFPNUM)) {
2573 type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
2574#endif
2575 } else if ((sn & SNAP_KEYINDEX)) {
2576 type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow);
1658 } else { 2577 } else {
1659 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 2578 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
1660 } 2579 }
1661 emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4)); 2580 emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4));
2581#else
2582 if ((sn & SNAP_KEYINDEX)) {
2583 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
2584 int64_t kki = (int64_t)LJ_KEYINDEX << 32;
2585 if (irref_isk(ref)) {
2586 emit_tsi(as, MIPSI_SD,
2587 ra_allock(as, kki | (int64_t)(uint32_t)ir->i, allow),
2588 RID_BASE, ofs);
2589 } else {
2590 Reg src = ra_alloc1(as, ref, allow);
2591 Reg rki = ra_allock(as, kki, rset_exclude(allow, src));
2592 emit_tsi(as, MIPSI_SD, RID_TMP, RID_BASE, ofs);
2593 emit_dst(as, MIPSI_DADDU, RID_TMP, src, rki);
2594 }
2595 } else {
2596 asm_tvstore64(as, RID_BASE, ofs, ref);
2597 }
2598#endif
1662 } 2599 }
1663 checkmclim(as); 2600 checkmclim(as);
1664 } 2601 }
1665 lua_assert(map + nent == flinks); 2602 lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
1666} 2603}
1667 2604
1668/* -- GC handling --------------------------------------------------------- */ 2605/* -- GC handling --------------------------------------------------------- */
@@ -1686,7 +2623,7 @@ static void asm_gc_check(ASMState *as)
1686 args[1] = ASMREF_TMP2; /* MSize steps */ 2623 args[1] = ASMREF_TMP2; /* MSize steps */
1687 asm_gencall(as, ci, args); 2624 asm_gencall(as, ci, args);
1688 l_end[-3] = MIPS_NOPATCH_GC_CHECK; /* Replace the nop after the call. */ 2625 l_end[-3] = MIPS_NOPATCH_GC_CHECK; /* Replace the nop after the call. */
1689 emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); 2626 emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
1690 tmp = ra_releasetmp(as, ASMREF_TMP2); 2627 tmp = ra_releasetmp(as, ASMREF_TMP2);
1691 emit_loadi(as, tmp, as->gcsteps); 2628 emit_loadi(as, tmp, as->gcsteps);
1692 /* Jump around GC step if GC total < GC threshold. */ 2629 /* Jump around GC step if GC total < GC threshold. */
@@ -1714,6 +2651,12 @@ static void asm_loop_fixup(ASMState *as)
1714 } 2651 }
1715} 2652}
1716 2653
2654/* Fixup the tail of the loop. */
2655static void asm_loop_tail_fixup(ASMState *as)
2656{
2657 if (as->loopinv) as->mctop--;
2658}
2659
1717/* -- Head of trace ------------------------------------------------------- */ 2660/* -- Head of trace ------------------------------------------------------- */
1718 2661
1719/* Coalesce BASE register for a root trace. */ 2662/* Coalesce BASE register for a root trace. */
@@ -1721,7 +2664,6 @@ static void asm_head_root_base(ASMState *as)
1721{ 2664{
1722 IRIns *ir = IR(REF_BASE); 2665 IRIns *ir = IR(REF_BASE);
1723 Reg r = ir->r; 2666 Reg r = ir->r;
1724 if (as->loopinv) as->mctop--;
1725 if (ra_hasreg(r)) { 2667 if (ra_hasreg(r)) {
1726 ra_free(as, r); 2668 ra_free(as, r);
1727 if (rset_test(as->modset, r) || irt_ismarked(ir->t)) 2669 if (rset_test(as->modset, r) || irt_ismarked(ir->t))
@@ -1736,7 +2678,6 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp)
1736{ 2678{
1737 IRIns *ir = IR(REF_BASE); 2679 IRIns *ir = IR(REF_BASE);
1738 Reg r = ir->r; 2680 Reg r = ir->r;
1739 if (as->loopinv) as->mctop--;
1740 if (ra_hasreg(r)) { 2681 if (ra_hasreg(r)) {
1741 ra_free(as, r); 2682 ra_free(as, r);
1742 if (rset_test(as->modset, r) || irt_ismarked(ir->t)) 2683 if (rset_test(as->modset, r) || irt_ismarked(ir->t))
@@ -1761,7 +2702,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
1761 MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; 2702 MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp;
1762 int32_t spadj = as->T->spadjust; 2703 int32_t spadj = as->T->spadjust;
1763 MCode *p = as->mctop-1; 2704 MCode *p = as->mctop-1;
1764 *p = spadj ? (MIPSI_ADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; 2705 *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP;
1765 p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); 2706 p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
1766} 2707}
1767 2708
@@ -1772,139 +2713,26 @@ static void asm_tail_prep(ASMState *as)
1772 as->invmcp = as->loopref ? as->mcp : NULL; 2713 as->invmcp = as->loopref ? as->mcp : NULL;
1773} 2714}
1774 2715
1775/* -- Instruction dispatch ------------------------------------------------ */
1776
1777/* Assemble a single instruction. */
1778static void asm_ir(ASMState *as, IRIns *ir)
1779{
1780 switch ((IROp)ir->o) {
1781 /* Miscellaneous ops. */
1782 case IR_LOOP: asm_loop(as); break;
1783 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1784 case IR_USE:
1785 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1786 case IR_PHI: asm_phi(as, ir); break;
1787 case IR_HIOP: asm_hiop(as, ir); break;
1788 case IR_GCSTEP: asm_gcstep(as, ir); break;
1789
1790 /* Guarded assertions. */
1791 case IR_EQ: case IR_NE: asm_compeq(as, ir); break;
1792 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1793 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1794 case IR_ABC:
1795 asm_comp(as, ir);
1796 break;
1797
1798 case IR_RETF: asm_retf(as, ir); break;
1799
1800 /* Bit ops. */
1801 case IR_BNOT: asm_bitnot(as, ir); break;
1802 case IR_BSWAP: asm_bitswap(as, ir); break;
1803
1804 case IR_BAND: asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI); break;
1805 case IR_BOR: asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI); break;
1806 case IR_BXOR: asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI); break;
1807
1808 case IR_BSHL: asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL); break;
1809 case IR_BSHR: asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL); break;
1810 case IR_BSAR: asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA); break;
1811 case IR_BROL: lua_assert(0); break;
1812 case IR_BROR: asm_bitror(as, ir); break;
1813
1814 /* Arithmetic ops. */
1815 case IR_ADD: asm_add(as, ir); break;
1816 case IR_SUB: asm_sub(as, ir); break;
1817 case IR_MUL: asm_mul(as, ir); break;
1818 case IR_DIV: asm_fparith(as, ir, MIPSI_DIV_D); break;
1819 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
1820 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
1821 case IR_NEG: asm_neg(as, ir); break;
1822
1823 case IR_ABS: asm_fpunary(as, ir, MIPSI_ABS_D); break;
1824 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
1825 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
1826 case IR_MIN: asm_min_max(as, ir, 0); break;
1827 case IR_MAX: asm_min_max(as, ir, 1); break;
1828 case IR_FPMATH:
1829 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1830 break;
1831 if (ir->op2 <= IRFPM_TRUNC)
1832 asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
1833 else if (ir->op2 == IRFPM_SQRT)
1834 asm_fpunary(as, ir, MIPSI_SQRT_D);
1835 else
1836 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1837 break;
1838
1839 /* Overflow-checking arithmetic ops. */
1840 case IR_ADDOV: asm_arithov(as, ir); break;
1841 case IR_SUBOV: asm_arithov(as, ir); break;
1842 case IR_MULOV: asm_mulov(as, ir); break;
1843
1844 /* Memory references. */
1845 case IR_AREF: asm_aref(as, ir); break;
1846 case IR_HREF: asm_href(as, ir); break;
1847 case IR_HREFK: asm_hrefk(as, ir); break;
1848 case IR_NEWREF: asm_newref(as, ir); break;
1849 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1850 case IR_FREF: asm_fref(as, ir); break;
1851 case IR_STRREF: asm_strref(as, ir); break;
1852
1853 /* Loads and stores. */
1854 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1855 asm_ahuvload(as, ir);
1856 break;
1857 case IR_FLOAD: asm_fload(as, ir); break;
1858 case IR_XLOAD: asm_xload(as, ir); break;
1859 case IR_SLOAD: asm_sload(as, ir); break;
1860
1861 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1862 case IR_FSTORE: asm_fstore(as, ir); break;
1863 case IR_XSTORE: asm_xstore(as, ir, 0); break;
1864
1865 /* Allocations. */
1866 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1867 case IR_TNEW: asm_tnew(as, ir); break;
1868 case IR_TDUP: asm_tdup(as, ir); break;
1869 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1870
1871 /* Write barriers. */
1872 case IR_TBAR: asm_tbar(as, ir); break;
1873 case IR_OBAR: asm_obar(as, ir); break;
1874
1875 /* Type conversions. */
1876 case IR_CONV: asm_conv(as, ir); break;
1877 case IR_TOBIT: asm_tobit(as, ir); break;
1878 case IR_TOSTR: asm_tostr(as, ir); break;
1879 case IR_STRTO: asm_strto(as, ir); break;
1880
1881 /* Calls. */
1882 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1883 case IR_CALLXS: asm_callx(as, ir); break;
1884 case IR_CARG: break;
1885
1886 default:
1887 setintV(&as->J->errinfo, ir->o);
1888 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1889 break;
1890 }
1891}
1892
1893/* -- Trace setup --------------------------------------------------------- */ 2716/* -- Trace setup --------------------------------------------------------- */
1894 2717
1895/* Ensure there are enough stack slots for call arguments. */ 2718/* Ensure there are enough stack slots for call arguments. */
1896static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2719static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
1897{ 2720{
1898 IRRef args[CCI_NARGS_MAX*2]; 2721 IRRef args[CCI_NARGS_MAX*2];
1899 uint32_t i, nargs = (int)CCI_NARGS(ci); 2722 uint32_t i, nargs = CCI_XNARGS(ci);
2723#if LJ_32
1900 int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; 2724 int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
2725#else
2726 int nslots = 0, ngpr = REGARG_NUMGPR;
2727#endif
1901 asm_collectargs(as, ir, ci, args); 2728 asm_collectargs(as, ir, ci, args);
1902 for (i = 0; i < nargs; i++) { 2729 for (i = 0; i < nargs; i++) {
1903 if (args[i] && irt_isfp(IR(args[i])->t) && 2730#if LJ_32
2731 if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t) &&
1904 nfpr > 0 && !(ci->flags & CCI_VARARG)) { 2732 nfpr > 0 && !(ci->flags & CCI_VARARG)) {
1905 nfpr--; 2733 nfpr--;
1906 ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1; 2734 ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1;
1907 } else if (args[i] && irt_isnum(IR(args[i])->t)) { 2735 } else if (!LJ_SOFTFP && args[i] && irt_isnum(IR(args[i])->t)) {
1908 nfpr = 0; 2736 nfpr = 0;
1909 ngpr = ngpr & ~1; 2737 ngpr = ngpr & ~1;
1910 if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1; 2738 if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1;
@@ -1912,6 +2740,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
1912 nfpr = 0; 2740 nfpr = 0;
1913 if (ngpr > 0) ngpr--; else nslots++; 2741 if (ngpr > 0) ngpr--; else nslots++;
1914 } 2742 }
2743#else
2744 if (ngpr > 0) ngpr--; else nslots += 2;
2745#endif
1915 } 2746 }
1916 if (nslots > as->evenspill) /* Leave room for args in stack slots. */ 2747 if (nslots > as->evenspill) /* Leave room for args in stack slots. */
1917 as->evenspill = nslots; 2748 as->evenspill = nslots;
@@ -1942,35 +2773,35 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
1942 if (((p[-1] ^ (px-p)) & 0xffffu) == 0 && 2773 if (((p[-1] ^ (px-p)) & 0xffffu) == 0 &&
1943 ((p[-1] & 0xf0000000u) == MIPSI_BEQ || 2774 ((p[-1] & 0xf0000000u) == MIPSI_BEQ ||
1944 (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ || 2775 (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ ||
1945 (p[-1] & 0xffe00000u) == MIPSI_BC1F) && 2776#if !LJ_TARGET_MIPSR6
1946 p[-2] != MIPS_NOPATCH_GC_CHECK) { 2777 (p[-1] & 0xffe00000u) == MIPSI_BC1F
2778#else
2779 (p[-1] & 0xff600000u) == MIPSI_BC1EQZ
2780#endif
2781 ) && p[-2] != MIPS_NOPATCH_GC_CHECK) {
1947 ptrdiff_t delta = target - p; 2782 ptrdiff_t delta = target - p;
1948 if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */ 2783 if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */
1949 patchbranch: 2784 patchbranch:
1950 p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu); 2785 p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu);
1951 *p = MIPSI_NOP; /* Replace the load of the exit number. */ 2786 *p = MIPSI_NOP; /* Replace the load of the exit number. */
1952 cstop = p; 2787 cstop = p+1;
1953 if (!cstart) cstart = p-1; 2788 if (!cstart) cstart = p-1;
1954 } else { /* Branch out of range. Use spare jump slot in mcarea. */ 2789 } else { /* Branch out of range. Use spare jump slot in mcarea. */
1955 int i; 2790 MCode *mcjump = asm_sparejump_use(mcarea, tjump);
1956 for (i = (int)(sizeof(MCLink)/sizeof(MCode)); 2791 if (mcjump) {
1957 i < (int)(sizeof(MCLink)/sizeof(MCode)+MIPS_SPAREJUMP*2); 2792 lj_mcode_sync(mcjump, mcjump+1);
1958 i += 2) { 2793 delta = mcjump - p;
1959 if (mcarea[i] == tjump) { 2794 if (((delta + 0x8000) >> 16) == 0) {
1960 delta = mcarea+i - p;
1961 goto patchbranch;
1962 } else if (mcarea[i] == MIPSI_NOP) {
1963 mcarea[i] = tjump;
1964 cstart = mcarea+i;
1965 delta = mcarea+i - p;
1966 goto patchbranch; 2795 goto patchbranch;
2796 } else {
2797 lj_assertJ(0, "spare jump out of range: -Osizemcode too big");
1967 } 2798 }
1968 } 2799 }
1969 /* Ignore jump slot overflow. Child trace is simply not attached. */ 2800 /* Ignore jump slot overflow. Child trace is simply not attached. */
1970 } 2801 }
1971 } else if (p+1 == pe) { 2802 } else if (p+1 == pe) {
1972 /* Patch NOP after code for inverted loop branch. Use of J is ok. */ 2803 /* Patch NOP after code for inverted loop branch. Use of J is ok. */
1973 lua_assert(p[1] == MIPSI_NOP); 2804 lj_assertJ(p[1] == MIPSI_NOP, "expected NOP");
1974 p[1] = tjump; 2805 p[1] = tjump;
1975 *p = MIPSI_NOP; /* Replace the load of the exit number. */ 2806 *p = MIPSI_NOP; /* Replace the load of the exit number. */
1976 cstop = p+2; 2807 cstop = p+2;
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index bd9374cc..8e9a92a4 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -156,6 +156,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
156 return ra_allock(as, ofs-(int16_t)ofs, allow); 156 return ra_allock(as, ofs-(int16_t)ofs, allow);
157 } 157 }
158 } 158 }
159 } else if (ir->o == IR_TMPREF) {
160 *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768);
161 return RID_JGL;
159 } 162 }
160 } 163 }
161 *ofsp = 0; 164 *ofsp = 0;
@@ -181,7 +184,7 @@ static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
181 return; 184 return;
182 } 185 }
183 } else if (ir->o == IR_STRREF) { 186 } else if (ir->o == IR_STRREF) {
184 lua_assert(ofs == 0); 187 lj_assertA(ofs == 0, "bad usage");
185 ofs = (int32_t)sizeof(GCstr); 188 ofs = (int32_t)sizeof(GCstr);
186 if (irref_isk(ir->op2)) { 189 if (irref_isk(ir->op2)) {
187 ofs += IR(ir->op2)->i; 190 ofs += IR(ir->op2)->i;
@@ -226,12 +229,14 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
226 emit_tab(as, pi, rt, left, right); 229 emit_tab(as, pi, rt, left, right);
227} 230}
228 231
232#if !LJ_SOFTFP
229/* Fuse to multiply-add/sub instruction. */ 233/* Fuse to multiply-add/sub instruction. */
230static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) 234static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
231{ 235{
232 IRRef lref = ir->op1, rref = ir->op2; 236 IRRef lref = ir->op1, rref = ir->op2;
233 IRIns *irm; 237 IRIns *irm;
234 if (lref != rref && 238 if ((as->flags & JIT_F_OPT_FMA) &&
239 lref != rref &&
235 ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && 240 ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
236 ra_noreg(irm->r)) || 241 ra_noreg(irm->r)) ||
237 (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && 242 (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
@@ -245,24 +250,30 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
245 } 250 }
246 return 0; 251 return 0;
247} 252}
253#endif
248 254
249/* -- Calls --------------------------------------------------------------- */ 255/* -- Calls --------------------------------------------------------------- */
250 256
251/* Generate a call to a C function. */ 257/* Generate a call to a C function. */
252static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 258static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
253{ 259{
254 uint32_t n, nargs = CCI_NARGS(ci); 260 uint32_t n, nargs = CCI_XNARGS(ci);
255 int32_t ofs = 8; 261 int32_t ofs = 8;
256 Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; 262 Reg gpr = REGARG_FIRSTGPR;
263#if !LJ_SOFTFP
264 Reg fpr = REGARG_FIRSTFPR;
265#endif
257 if ((void *)ci->func) 266 if ((void *)ci->func)
258 emit_call(as, (void *)ci->func); 267 emit_call(as, (void *)ci->func);
259 for (n = 0; n < nargs; n++) { /* Setup args. */ 268 for (n = 0; n < nargs; n++) { /* Setup args. */
260 IRRef ref = args[n]; 269 IRRef ref = args[n];
261 if (ref) { 270 if (ref) {
262 IRIns *ir = IR(ref); 271 IRIns *ir = IR(ref);
272#if !LJ_SOFTFP
263 if (irt_isfp(ir->t)) { 273 if (irt_isfp(ir->t)) {
264 if (fpr <= REGARG_LASTFPR) { 274 if (fpr <= REGARG_LASTFPR) {
265 lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ 275 lj_assertA(rset_test(as->freeset, fpr),
276 "reg %d not free", fpr); /* Already evicted. */
266 ra_leftov(as, fpr, ref); 277 ra_leftov(as, fpr, ref);
267 fpr++; 278 fpr++;
268 } else { 279 } else {
@@ -271,9 +282,12 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
271 emit_spstore(as, ir, r, ofs); 282 emit_spstore(as, ir, r, ofs);
272 ofs += irt_isnum(ir->t) ? 8 : 4; 283 ofs += irt_isnum(ir->t) ? 8 : 4;
273 } 284 }
274 } else { 285 } else
286#endif
287 {
275 if (gpr <= REGARG_LASTGPR) { 288 if (gpr <= REGARG_LASTGPR) {
276 lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ 289 lj_assertA(rset_test(as->freeset, gpr),
290 "reg %d not free", gpr); /* Already evicted. */
277 ra_leftov(as, gpr, ref); 291 ra_leftov(as, gpr, ref);
278 gpr++; 292 gpr++;
279 } else { 293 } else {
@@ -290,8 +304,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
290 } 304 }
291 checkmclim(as); 305 checkmclim(as);
292 } 306 }
307#if !LJ_SOFTFP
293 if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ 308 if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */
294 emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); 309 emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
310#endif
295} 311}
296 312
297/* Setup result reg/sp for call. Evict scratch regs. */ 313/* Setup result reg/sp for call. Evict scratch regs. */
@@ -299,16 +315,18 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
299{ 315{
300 RegSet drop = RSET_SCRATCH; 316 RegSet drop = RSET_SCRATCH;
301 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); 317 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
318#if !LJ_SOFTFP
302 if ((ci->flags & CCI_NOFPRCLOBBER)) 319 if ((ci->flags & CCI_NOFPRCLOBBER))
303 drop &= ~RSET_FPR; 320 drop &= ~RSET_FPR;
321#endif
304 if (ra_hasreg(ir->r)) 322 if (ra_hasreg(ir->r))
305 rset_clear(drop, ir->r); /* Dest reg handled below. */ 323 rset_clear(drop, ir->r); /* Dest reg handled below. */
306 if (hiop && ra_hasreg((ir+1)->r)) 324 if (hiop && ra_hasreg((ir+1)->r))
307 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ 325 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
308 ra_evictset(as, drop); /* Evictions must be performed first. */ 326 ra_evictset(as, drop); /* Evictions must be performed first. */
309 if (ra_used(ir)) { 327 if (ra_used(ir)) {
310 lua_assert(!irt_ispri(ir->t)); 328 lj_assertA(!irt_ispri(ir->t), "PRI dest");
311 if (irt_isfp(ir->t)) { 329 if (!LJ_SOFTFP && irt_isfp(ir->t)) {
312 if ((ci->flags & CCI_CASTU64)) { 330 if ((ci->flags & CCI_CASTU64)) {
313 /* Use spill slot or temp slots. */ 331 /* Use spill slot or temp slots. */
314 int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; 332 int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
@@ -331,15 +349,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
331 } 349 }
332} 350}
333 351
334static void asm_call(ASMState *as, IRIns *ir)
335{
336 IRRef args[CCI_NARGS_MAX];
337 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
338 asm_collectargs(as, ir, ci, args);
339 asm_setupresult(as, ir, ci);
340 asm_gencall(as, ci, args);
341}
342
343static void asm_callx(ASMState *as, IRIns *ir) 352static void asm_callx(ASMState *as, IRIns *ir)
344{ 353{
345 IRRef args[CCI_NARGS_MAX*2]; 354 IRRef args[CCI_NARGS_MAX*2];
@@ -352,7 +361,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
352 func = ir->op2; irf = IR(func); 361 func = ir->op2; irf = IR(func);
353 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } 362 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
354 if (irref_isk(func)) { /* Call to constant address. */ 363 if (irref_isk(func)) { /* Call to constant address. */
355 ci.func = (ASMFunction)(void *)(irf->i); 364 ci.func = (ASMFunction)(void *)(intptr_t)(irf->i);
356 } else { /* Need a non-argument register for indirect calls. */ 365 } else { /* Need a non-argument register for indirect calls. */
357 RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); 366 RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
358 Reg freg = ra_alloc1(as, func, allow); 367 Reg freg = ra_alloc1(as, func, allow);
@@ -363,16 +372,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
363 asm_gencall(as, &ci, args); 372 asm_gencall(as, &ci, args);
364} 373}
365 374
366static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
367{
368 const CCallInfo *ci = &lj_ir_callinfo[id];
369 IRRef args[2];
370 args[0] = ir->op1;
371 args[1] = ir->op2;
372 asm_setupresult(as, ir, ci);
373 asm_gencall(as, ci, args);
374}
375
376/* -- Returns ------------------------------------------------------------- */ 375/* -- Returns ------------------------------------------------------------- */
377 376
378/* Return to lower frame. Guard that it goes to the right spot. */ 377/* Return to lower frame. Guard that it goes to the right spot. */
@@ -380,7 +379,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
380{ 379{
381 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 380 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
382 void *pc = ir_kptr(IR(ir->op2)); 381 void *pc = ir_kptr(IR(ir->op2));
383 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 382 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
384 as->topslot -= (BCReg)delta; 383 as->topslot -= (BCReg)delta;
385 if ((int32_t)as->topslot < 0) as->topslot = 0; 384 if ((int32_t)as->topslot < 0) as->topslot = 0;
386 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 385 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -392,8 +391,24 @@ static void asm_retf(ASMState *as, IRIns *ir)
392 emit_tai(as, PPCI_LWZ, RID_TMP, base, -8); 391 emit_tai(as, PPCI_LWZ, RID_TMP, base, -8);
393} 392}
394 393
394/* -- Buffer operations --------------------------------------------------- */
395
396#if LJ_HASBUFFER
397static void asm_bufhdr_write(ASMState *as, Reg sb)
398{
399 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
400 IRIns irgc;
401 irgc.ot = IRT(0, IRT_PGC); /* GC type. */
402 emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
403 emit_rot(as, PPCI_RLWIMI, RID_TMP, tmp, 0, 31-lj_fls(SBUF_MASK_FLAG), 31);
404 emit_getgl(as, RID_TMP, cur_L);
405 emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
406}
407#endif
408
395/* -- Type conversions ---------------------------------------------------- */ 409/* -- Type conversions ---------------------------------------------------- */
396 410
411#if !LJ_SOFTFP
397static void asm_tointg(ASMState *as, IRIns *ir, Reg left) 412static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
398{ 413{
399 RegSet allow = RSET_FPR; 414 RegSet allow = RSET_FPR;
@@ -410,8 +425,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
410 emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); 425 emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000);
411 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); 426 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
412 emit_lsptr(as, PPCI_LFS, (fbias & 31), 427 emit_lsptr(as, PPCI_LFS, (fbias & 31),
413 (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), 428 (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR);
414 RSET_GPR);
415 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); 429 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
416 emit_fb(as, PPCI_FCTIWZ, tmp, left); 430 emit_fb(as, PPCI_FCTIWZ, tmp, left);
417} 431}
@@ -427,15 +441,27 @@ static void asm_tobit(ASMState *as, IRIns *ir)
427 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); 441 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
428 emit_fab(as, PPCI_FADD, tmp, left, right); 442 emit_fab(as, PPCI_FADD, tmp, left, right);
429} 443}
444#endif
430 445
431static void asm_conv(ASMState *as, IRIns *ir) 446static void asm_conv(ASMState *as, IRIns *ir)
432{ 447{
433 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 448 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
449#if !LJ_SOFTFP
434 int stfp = (st == IRT_NUM || st == IRT_FLOAT); 450 int stfp = (st == IRT_NUM || st == IRT_FLOAT);
451#endif
435 IRRef lref = ir->op1; 452 IRRef lref = ir->op1;
436 lua_assert(irt_type(ir->t) != st); 453 /* 64 bit integer conversions are handled by SPLIT. */
437 lua_assert(!(irt_isint64(ir->t) || 454 lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)),
438 (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ 455 "IR %04d has unsplit 64 bit type",
456 (int)(ir - as->ir) - REF_BIAS);
457#if LJ_SOFTFP
458 /* FP conversions are handled by SPLIT. */
459 lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT),
460 "IR %04d has FP type",
461 (int)(ir - as->ir) - REF_BIAS);
462 /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
463#else
464 lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
439 if (irt_isfp(ir->t)) { 465 if (irt_isfp(ir->t)) {
440 Reg dest = ra_dest(as, ir, RSET_FPR); 466 Reg dest = ra_dest(as, ir, RSET_FPR);
441 if (stfp) { /* FP to FP conversion. */ 467 if (stfp) { /* FP to FP conversion. */
@@ -450,13 +476,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
450 Reg left = ra_alloc1(as, lref, allow); 476 Reg left = ra_alloc1(as, lref, allow);
451 Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); 477 Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left));
452 Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); 478 Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
453 const float *kbias;
454 if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); 479 if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest);
455 emit_fab(as, PPCI_FSUB, dest, dest, fbias); 480 emit_fab(as, PPCI_FSUB, dest, dest, fbias);
456 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); 481 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
457 kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000)); 482 emit_lsptr(as, PPCI_LFS, (fbias & 31),
458 if (st == IRT_U32) kbias++; 483 &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31],
459 emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias,
460 rset_clear(allow, hibias)); 484 rset_clear(allow, hibias));
461 emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, 485 emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP,
462 RID_SP, SPOFS_TMPLO); 486 RID_SP, SPOFS_TMPLO);
@@ -466,7 +490,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
466 } else if (stfp) { /* FP to integer conversion. */ 490 } else if (stfp) { /* FP to integer conversion. */
467 if (irt_isguard(ir->t)) { 491 if (irt_isguard(ir->t)) {
468 /* Checked conversions are only supported from number to int. */ 492 /* Checked conversions are only supported from number to int. */
469 lua_assert(irt_isint(ir->t) && st == IRT_NUM); 493 lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
494 "bad type for checked CONV");
470 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 495 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
471 } else { 496 } else {
472 Reg dest = ra_dest(as, ir, RSET_GPR); 497 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -489,19 +514,20 @@ static void asm_conv(ASMState *as, IRIns *ir)
489 emit_fb(as, PPCI_FCTIWZ, tmp, tmp); 514 emit_fb(as, PPCI_FCTIWZ, tmp, tmp);
490 emit_fab(as, PPCI_FSUB, tmp, left, tmp); 515 emit_fab(as, PPCI_FSUB, tmp, left, tmp);
491 emit_lsptr(as, PPCI_LFS, (tmp & 31), 516 emit_lsptr(as, PPCI_LFS, (tmp & 31),
492 (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)), 517 (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
493 RSET_GPR);
494 } else { 518 } else {
495 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); 519 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
496 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); 520 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
497 emit_fb(as, PPCI_FCTIWZ, tmp, left); 521 emit_fb(as, PPCI_FCTIWZ, tmp, left);
498 } 522 }
499 } 523 }
500 } else { 524 } else
525#endif
526 {
501 Reg dest = ra_dest(as, ir, RSET_GPR); 527 Reg dest = ra_dest(as, ir, RSET_GPR);
502 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ 528 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
503 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 529 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
504 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); 530 lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
505 if ((ir->op2 & IRCONV_SEXT)) 531 if ((ir->op2 & IRCONV_SEXT))
506 emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left); 532 emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left);
507 else 533 else
@@ -513,90 +539,102 @@ static void asm_conv(ASMState *as, IRIns *ir)
513 } 539 }
514} 540}
515 541
516#if LJ_HASFFI
517static void asm_conv64(ASMState *as, IRIns *ir)
518{
519 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
520 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
521 IRCallID id;
522 const CCallInfo *ci;
523 IRRef args[2];
524 args[0] = ir->op1;
525 args[1] = (ir-1)->op1;
526 if (st == IRT_NUM || st == IRT_FLOAT) {
527 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
528 ir--;
529 } else {
530 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
531 }
532 ci = &lj_ir_callinfo[id];
533 asm_setupresult(as, ir, ci);
534 asm_gencall(as, ci, args);
535}
536#endif
537
538static void asm_strto(ASMState *as, IRIns *ir) 542static void asm_strto(ASMState *as, IRIns *ir)
539{ 543{
540 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 544 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
541 IRRef args[2]; 545 IRRef args[2];
542 int32_t ofs; 546 int32_t ofs = SPOFS_TMP;
547#if LJ_SOFTFP
548 ra_evictset(as, RSET_SCRATCH);
549 if (ra_used(ir)) {
550 if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
551 (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) {
552 int i;
553 for (i = 0; i < 2; i++) {
554 Reg r = (ir+i)->r;
555 if (ra_hasreg(r)) {
556 ra_free(as, r);
557 ra_modified(as, r);
558 emit_spload(as, ir+i, r, sps_scale((ir+i)->s));
559 }
560 }
561 ofs = sps_scale(ir->s & ~1);
562 } else {
563 Reg rhi = ra_dest(as, ir+1, RSET_GPR);
564 Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
565 emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs);
566 emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4);
567 }
568 }
569#else
543 RegSet drop = RSET_SCRATCH; 570 RegSet drop = RSET_SCRATCH;
544 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ 571 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
545 ra_evictset(as, drop); 572 ra_evictset(as, drop);
573 if (ir->s) ofs = sps_scale(ir->s);
574#endif
546 asm_guardcc(as, CC_EQ); 575 asm_guardcc(as, CC_EQ);
547 emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ 576 emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */
548 args[0] = ir->op1; /* GCstr *str */ 577 args[0] = ir->op1; /* GCstr *str */
549 args[1] = ASMREF_TMP1; /* TValue *n */ 578 args[1] = ASMREF_TMP1; /* TValue *n */
550 asm_gencall(as, ci, args); 579 asm_gencall(as, ci, args);
551 /* Store the result to the spill slot or temp slots. */ 580 /* Store the result to the spill slot or temp slots. */
552 ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
553 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); 581 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
554} 582}
555 583
584/* -- Memory references --------------------------------------------------- */
585
556/* Get pointer to TValue. */ 586/* Get pointer to TValue. */
557static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 587static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
558{ 588{
559 IRIns *ir = IR(ref); 589 int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768);
560 if (irt_isnum(ir->t)) { 590 if ((mode & IRTMPREF_IN1)) {
561 if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ 591 IRIns *ir = IR(ref);
562 ra_allockreg(as, i32ptr(ir_knum(ir)), dest); 592 if (irt_isnum(ir->t)) {
563 else /* Otherwise force a spill and use the spill slot. */ 593 if ((mode & IRTMPREF_OUT1)) {
564 emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir)); 594#if LJ_SOFTFP
565 } else { 595 lj_assertA(irref_isk(ref), "unsplit FP op");
566 /* Otherwise use g->tmptv to hold the TValue. */ 596 emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
567 RegSet allow = rset_exclude(RSET_GPR, dest); 597 emit_setgl(as,
568 Reg type; 598 ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
569 emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768); 599 tmptv.u32.lo);
570 if (!irt_ispri(ir->t)) { 600 emit_setgl(as,
571 Reg src = ra_alloc1(as, ref, allow); 601 ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
572 emit_setgl(as, src, tmptv.gcr); 602 tmptv.u32.hi);
603#else
604 Reg src = ra_alloc1(as, ref, RSET_FPR);
605 emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
606 emit_fai(as, PPCI_STFD, src, RID_JGL, tmpofs);
607#endif
608 } else if (irref_isk(ref)) {
609 /* Use the number constant itself as a TValue. */
610 ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
611 } else {
612#if LJ_SOFTFP
613 lj_assertA(0, "unsplit FP op");
614#else
615 /* Otherwise force a spill and use the spill slot. */
616 emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir));
617#endif
618 }
619 } else {
620 /* Otherwise use g->tmptv to hold the TValue. */
621 Reg type;
622 emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
623 if (!irt_ispri(ir->t)) {
624 Reg src = ra_alloc1(as, ref, RSET_GPR);
625 emit_setgl(as, src, tmptv.gcr);
626 }
627 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
628 type = ra_alloc1(as, ref+1, RSET_GPR);
629 else
630 type = ra_allock(as, irt_toitype(ir->t), RSET_GPR);
631 emit_setgl(as, type, tmptv.it);
573 } 632 }
574 type = ra_allock(as, irt_toitype(ir->t), allow);
575 emit_setgl(as, type, tmptv.it);
576 }
577}
578
579static void asm_tostr(ASMState *as, IRIns *ir)
580{
581 IRRef args[2];
582 args[0] = ASMREF_L;
583 as->gcsteps++;
584 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
585 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
586 args[1] = ASMREF_TMP1; /* const lua_Number * */
587 asm_setupresult(as, ir, ci); /* GCstr * */
588 asm_gencall(as, ci, args);
589 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
590 } else { 633 } else {
591 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; 634 emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
592 args[1] = ir->op1; /* int32_t k */
593 asm_setupresult(as, ir, ci); /* GCstr * */
594 asm_gencall(as, ci, args);
595 } 635 }
596} 636}
597 637
598/* -- Memory references --------------------------------------------------- */
599
600static void asm_aref(ASMState *as, IRIns *ir) 638static void asm_aref(ASMState *as, IRIns *ir)
601{ 639{
602 Reg dest = ra_dest(as, ir, RSET_GPR); 640 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -636,11 +674,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
636 Reg tisnum = RID_NONE, tmpnum = RID_NONE; 674 Reg tisnum = RID_NONE, tmpnum = RID_NONE;
637 IRRef refkey = ir->op2; 675 IRRef refkey = ir->op2;
638 IRIns *irkey = IR(refkey); 676 IRIns *irkey = IR(refkey);
677 int isk = irref_isk(refkey);
639 IRType1 kt = irkey->t; 678 IRType1 kt = irkey->t;
640 uint32_t khash; 679 uint32_t khash;
641 MCLabel l_end, l_loop, l_next; 680 MCLabel l_end, l_loop, l_next;
642 681
643 rset_clear(allow, tab); 682 rset_clear(allow, tab);
683#if LJ_SOFTFP
684 if (!isk) {
685 key = ra_alloc1(as, refkey, allow);
686 rset_clear(allow, key);
687 if (irkey[1].o == IR_HIOP) {
688 if (ra_hasreg((irkey+1)->r)) {
689 tmpnum = (irkey+1)->r;
690 ra_noweak(as, tmpnum);
691 } else {
692 tmpnum = ra_allocref(as, refkey+1, allow);
693 }
694 rset_clear(allow, tmpnum);
695 }
696 }
697#else
644 if (irt_isnum(kt)) { 698 if (irt_isnum(kt)) {
645 key = ra_alloc1(as, refkey, RSET_FPR); 699 key = ra_alloc1(as, refkey, RSET_FPR);
646 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); 700 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
@@ -650,6 +704,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
650 key = ra_alloc1(as, refkey, allow); 704 key = ra_alloc1(as, refkey, allow);
651 rset_clear(allow, key); 705 rset_clear(allow, key);
652 } 706 }
707#endif
653 tmp2 = ra_scratch(as, allow); 708 tmp2 = ra_scratch(as, allow);
654 rset_clear(allow, tmp2); 709 rset_clear(allow, tmp2);
655 710
@@ -672,7 +727,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
672 asm_guardcc(as, CC_EQ); 727 asm_guardcc(as, CC_EQ);
673 else 728 else
674 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); 729 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
675 if (irt_isnum(kt)) { 730 if (!LJ_SOFTFP && irt_isnum(kt)) {
676 emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); 731 emit_fab(as, PPCI_FCMPU, 0, tmpnum, key);
677 emit_condbranch(as, PPCI_BC, CC_GE, l_next); 732 emit_condbranch(as, PPCI_BC, CC_GE, l_next);
678 emit_ab(as, PPCI_CMPLW, tmp1, tisnum); 733 emit_ab(as, PPCI_CMPLW, tmp1, tisnum);
@@ -682,7 +737,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
682 emit_ab(as, PPCI_CMPW, tmp2, key); 737 emit_ab(as, PPCI_CMPW, tmp2, key);
683 emit_condbranch(as, PPCI_BC, CC_NE, l_next); 738 emit_condbranch(as, PPCI_BC, CC_NE, l_next);
684 } 739 }
685 emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); 740 if (LJ_SOFTFP && ra_hasreg(tmpnum))
741 emit_ab(as, PPCI_CMPW, tmp1, tmpnum);
742 else
743 emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
686 if (!irt_ispri(kt)) 744 if (!irt_ispri(kt))
687 emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); 745 emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
688 } 746 }
@@ -691,35 +749,41 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
691 (((char *)as->mcp-(char *)l_loop) & 0xffffu); 749 (((char *)as->mcp-(char *)l_loop) & 0xffffu);
692 750
693 /* Load main position relative to tab->node into dest. */ 751 /* Load main position relative to tab->node into dest. */
694 khash = irref_isk(refkey) ? ir_khash(irkey) : 1; 752 khash = isk ? ir_khash(as, irkey) : 1;
695 if (khash == 0) { 753 if (khash == 0) {
696 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); 754 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
697 } else { 755 } else {
698 Reg tmphash = tmp1; 756 Reg tmphash = tmp1;
699 if (irref_isk(refkey)) 757 if (isk)
700 tmphash = ra_allock(as, khash, allow); 758 tmphash = ra_allock(as, khash, allow);
701 emit_tab(as, PPCI_ADD, dest, dest, tmp1); 759 emit_tab(as, PPCI_ADD, dest, dest, tmp1);
702 emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); 760 emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node));
703 emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); 761 emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash);
704 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); 762 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
705 emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); 763 emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
706 if (irref_isk(refkey)) { 764 if (isk) {
707 /* Nothing to do. */ 765 /* Nothing to do. */
708 } else if (irt_isstr(kt)) { 766 } else if (irt_isstr(kt)) {
709 emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); 767 emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, sid));
710 } else { /* Must match with hash*() in lj_tab.c. */ 768 } else { /* Must match with hash*() in lj_tab.c. */
711 emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1); 769 emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1);
712 emit_rotlwi(as, tmp2, tmp2, HASH_ROT3); 770 emit_rotlwi(as, tmp2, tmp2, HASH_ROT3);
713 emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); 771 emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2);
714 emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); 772 emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31);
715 emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); 773 emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2);
716 if (irt_isnum(kt)) { 774 if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
775#if LJ_SOFTFP
776 emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
777 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
778 emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum);
779#else
717 int32_t ofs = ra_spill(as, irkey); 780 int32_t ofs = ra_spill(as, irkey);
718 emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); 781 emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1);
719 emit_rotlwi(as, dest, tmp1, HASH_ROT1); 782 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
720 emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); 783 emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1);
721 emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); 784 emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4);
722 emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); 785 emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs);
786#endif
723 } else { 787 } else {
724 emit_asb(as, PPCI_XOR, tmp2, key, tmp1); 788 emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
725 emit_rotlwi(as, dest, tmp1, HASH_ROT1); 789 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
@@ -740,7 +804,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
740 Reg node = ra_alloc1(as, ir->op1, RSET_GPR); 804 Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
741 Reg key = RID_NONE, type = RID_TMP, idx = node; 805 Reg key = RID_NONE, type = RID_TMP, idx = node;
742 RegSet allow = rset_exclude(RSET_GPR, node); 806 RegSet allow = rset_exclude(RSET_GPR, node);
743 lua_assert(ofs % sizeof(Node) == 0); 807 lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
744 if (ofs > 32736) { 808 if (ofs > 32736) {
745 idx = dest; 809 idx = dest;
746 rset_clear(allow, dest); 810 rset_clear(allow, dest);
@@ -773,47 +837,40 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
773 } 837 }
774} 838}
775 839
776static void asm_newref(ASMState *as, IRIns *ir)
777{
778 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
779 IRRef args[3];
780 if (ir->r == RID_SINK)
781 return;
782 args[0] = ASMREF_L; /* lua_State *L */
783 args[1] = ir->op1; /* GCtab *t */
784 args[2] = ASMREF_TMP1; /* cTValue *key */
785 asm_setupresult(as, ir, ci); /* TValue * */
786 asm_gencall(as, ci, args);
787 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
788}
789
790static void asm_uref(ASMState *as, IRIns *ir) 840static void asm_uref(ASMState *as, IRIns *ir)
791{ 841{
792 Reg dest = ra_dest(as, ir, RSET_GPR); 842 Reg dest = ra_dest(as, ir, RSET_GPR);
793 if (irref_isk(ir->op1)) { 843 int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
844 if (irref_isk(ir->op1) && !guarded) {
794 GCfunc *fn = ir_kfunc(IR(ir->op1)); 845 GCfunc *fn = ir_kfunc(IR(ir->op1));
795 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; 846 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
796 emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR); 847 emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR);
797 } else { 848 } else {
798 Reg uv = ra_scratch(as, RSET_GPR); 849 if (guarded) {
799 Reg func = ra_alloc1(as, ir->op1, RSET_GPR); 850 asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
800 if (ir->o == IR_UREFC) {
801 asm_guardcc(as, CC_NE);
802 emit_ai(as, PPCI_CMPWI, RID_TMP, 1); 851 emit_ai(as, PPCI_CMPWI, RID_TMP, 1);
803 emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv)); 852 }
804 emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); 853 if (ir->o == IR_UREFC)
854 emit_tai(as, PPCI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv));
855 else
856 emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(GCupval, v));
857 if (guarded)
858 emit_tai(as, PPCI_LBZ, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
859 if (irref_isk(ir->op1)) {
860 GCfunc *fn = ir_kfunc(IR(ir->op1));
861 int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
862 emit_loadi(as, dest, k);
805 } else { 863 } else {
806 emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v)); 864 emit_tai(as, PPCI_LWZ, dest, ra_alloc1(as, ir->op1, RSET_GPR),
865 (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
807 } 866 }
808 emit_tai(as, PPCI_LWZ, uv, func,
809 (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
810 } 867 }
811} 868}
812 869
813static void asm_fref(ASMState *as, IRIns *ir) 870static void asm_fref(ASMState *as, IRIns *ir)
814{ 871{
815 UNUSED(as); UNUSED(ir); 872 UNUSED(as); UNUSED(ir);
816 lua_assert(!ra_used(ir)); 873 lj_assertA(!ra_used(ir), "unfused FREF");
817} 874}
818 875
819static void asm_strref(ASMState *as, IRIns *ir) 876static void asm_strref(ASMState *as, IRIns *ir)
@@ -853,26 +910,28 @@ static void asm_strref(ASMState *as, IRIns *ir)
853 910
854/* -- Loads and stores ---------------------------------------------------- */ 911/* -- Loads and stores ---------------------------------------------------- */
855 912
856static PPCIns asm_fxloadins(IRIns *ir) 913static PPCIns asm_fxloadins(ASMState *as, IRIns *ir)
857{ 914{
915 UNUSED(as);
858 switch (irt_type(ir->t)) { 916 switch (irt_type(ir->t)) {
859 case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */ 917 case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */
860 case IRT_U8: return PPCI_LBZ; 918 case IRT_U8: return PPCI_LBZ;
861 case IRT_I16: return PPCI_LHA; 919 case IRT_I16: return PPCI_LHA;
862 case IRT_U16: return PPCI_LHZ; 920 case IRT_U16: return PPCI_LHZ;
863 case IRT_NUM: return PPCI_LFD; 921 case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_LFD;
864 case IRT_FLOAT: return PPCI_LFS; 922 case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS;
865 default: return PPCI_LWZ; 923 default: return PPCI_LWZ;
866 } 924 }
867} 925}
868 926
869static PPCIns asm_fxstoreins(IRIns *ir) 927static PPCIns asm_fxstoreins(ASMState *as, IRIns *ir)
870{ 928{
929 UNUSED(as);
871 switch (irt_type(ir->t)) { 930 switch (irt_type(ir->t)) {
872 case IRT_I8: case IRT_U8: return PPCI_STB; 931 case IRT_I8: case IRT_U8: return PPCI_STB;
873 case IRT_I16: case IRT_U16: return PPCI_STH; 932 case IRT_I16: case IRT_U16: return PPCI_STH;
874 case IRT_NUM: return PPCI_STFD; 933 case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_STFD;
875 case IRT_FLOAT: return PPCI_STFS; 934 case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS;
876 default: return PPCI_STW; 935 default: return PPCI_STW;
877 } 936 }
878} 937}
@@ -880,18 +939,24 @@ static PPCIns asm_fxstoreins(IRIns *ir)
880static void asm_fload(ASMState *as, IRIns *ir) 939static void asm_fload(ASMState *as, IRIns *ir)
881{ 940{
882 Reg dest = ra_dest(as, ir, RSET_GPR); 941 Reg dest = ra_dest(as, ir, RSET_GPR);
883 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); 942 PPCIns pi = asm_fxloadins(as, ir);
884 PPCIns pi = asm_fxloadins(ir); 943 Reg idx;
885 int32_t ofs; 944 int32_t ofs;
886 if (ir->op2 == IRFL_TAB_ARRAY) { 945 if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
887 ofs = asm_fuseabase(as, ir->op1); 946 idx = RID_JGL;
888 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ 947 ofs = (ir->op2 << 2) - 32768 - GG_OFS(g);
889 emit_tai(as, PPCI_ADDI, dest, idx, ofs); 948 } else {
890 return; 949 idx = ra_alloc1(as, ir->op1, RSET_GPR);
950 if (ir->op2 == IRFL_TAB_ARRAY) {
951 ofs = asm_fuseabase(as, ir->op1);
952 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
953 emit_tai(as, PPCI_ADDI, dest, idx, ofs);
954 return;
955 }
891 } 956 }
957 ofs = field_ofs[ir->op2];
892 } 958 }
893 ofs = field_ofs[ir->op2]; 959 lj_assertA(!irt_isi8(ir->t), "unsupported FLOAD I8");
894 lua_assert(!irt_isi8(ir->t));
895 emit_tai(as, pi, dest, idx, ofs); 960 emit_tai(as, pi, dest, idx, ofs);
896} 961}
897 962
@@ -902,21 +967,22 @@ static void asm_fstore(ASMState *as, IRIns *ir)
902 IRIns *irf = IR(ir->op1); 967 IRIns *irf = IR(ir->op1);
903 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); 968 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
904 int32_t ofs = field_ofs[irf->op2]; 969 int32_t ofs = field_ofs[irf->op2];
905 PPCIns pi = asm_fxstoreins(ir); 970 PPCIns pi = asm_fxstoreins(as, ir);
906 emit_tai(as, pi, src, idx, ofs); 971 emit_tai(as, pi, src, idx, ofs);
907 } 972 }
908} 973}
909 974
910static void asm_xload(ASMState *as, IRIns *ir) 975static void asm_xload(ASMState *as, IRIns *ir)
911{ 976{
912 Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 977 Reg dest = ra_dest(as, ir,
913 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); 978 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
979 lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD");
914 if (irt_isi8(ir->t)) 980 if (irt_isi8(ir->t))
915 emit_as(as, PPCI_EXTSB, dest, dest); 981 emit_as(as, PPCI_EXTSB, dest, dest);
916 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 982 asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0);
917} 983}
918 984
919static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 985static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
920{ 986{
921 IRIns *irb; 987 IRIns *irb;
922 if (ir->r == RID_SINK) 988 if (ir->r == RID_SINK)
@@ -927,36 +993,54 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
927 Reg src = ra_alloc1(as, irb->op1, RSET_GPR); 993 Reg src = ra_alloc1(as, irb->op1, RSET_GPR);
928 asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); 994 asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src));
929 } else { 995 } else {
930 Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 996 Reg src = ra_alloc1(as, ir->op2,
931 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, 997 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
998 asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1,
932 rset_exclude(RSET_GPR, src), ofs); 999 rset_exclude(RSET_GPR, src), ofs);
933 } 1000 }
934} 1001}
935 1002
1003#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
1004
936static void asm_ahuvload(ASMState *as, IRIns *ir) 1005static void asm_ahuvload(ASMState *as, IRIns *ir)
937{ 1006{
938 IRType1 t = ir->t; 1007 IRType1 t = ir->t;
939 Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; 1008 Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx;
940 RegSet allow = RSET_GPR; 1009 RegSet allow = RSET_GPR;
941 int32_t ofs = AHUREF_LSX; 1010 int32_t ofs = AHUREF_LSX;
1011 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) {
1012 t.irt = IRT_NUM;
1013 if (ra_used(ir+1)) {
1014 type = ra_dest(as, ir+1, allow);
1015 rset_clear(allow, type);
1016 }
1017 ofs = 0;
1018 }
942 if (ra_used(ir)) { 1019 if (ra_used(ir)) {
943 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1020 lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
944 if (!irt_isnum(t)) ofs = 0; 1021 irt_isint(ir->t) || irt_isaddr(ir->t),
945 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); 1022 "bad load type %d", irt_type(ir->t));
1023 if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0;
1024 dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
946 rset_clear(allow, dest); 1025 rset_clear(allow, dest);
947 } 1026 }
948 idx = asm_fuseahuref(as, ir->op1, &ofs, allow); 1027 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
1028 if (ir->o == IR_VLOAD) {
1029 ofs = ofs != AHUREF_LSX ? ofs + 8 * ir->op2 :
1030 ir->op2 ? 8 * ir->op2 : AHUREF_LSX;
1031 }
949 if (irt_isnum(t)) { 1032 if (irt_isnum(t)) {
950 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx)); 1033 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx));
951 asm_guardcc(as, CC_GE); 1034 asm_guardcc(as, CC_GE);
952 emit_ab(as, PPCI_CMPLW, type, tisnum); 1035 emit_ab(as, PPCI_CMPLW, type, tisnum);
953 if (ra_hasreg(dest)) { 1036 if (ra_hasreg(dest)) {
954 if (ofs == AHUREF_LSX) { 1037 if (!LJ_SOFTFP && ofs == AHUREF_LSX) {
955 tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, 1038 tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR,
956 (idx&255)), (idx>>8))); 1039 (idx&255)), (idx>>8)));
957 emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); 1040 emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp);
958 } else { 1041 } else {
959 emit_fai(as, PPCI_LFD, dest, idx, ofs); 1042 emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx,
1043 ofs+4*LJ_SOFTFP);
960 } 1044 }
961 } 1045 }
962 } else { 1046 } else {
@@ -979,7 +1063,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
979 int32_t ofs = AHUREF_LSX; 1063 int32_t ofs = AHUREF_LSX;
980 if (ir->r == RID_SINK) 1064 if (ir->r == RID_SINK)
981 return; 1065 return;
982 if (irt_isnum(ir->t)) { 1066 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
983 src = ra_alloc1(as, ir->op2, RSET_FPR); 1067 src = ra_alloc1(as, ir->op2, RSET_FPR);
984 } else { 1068 } else {
985 if (!irt_ispri(ir->t)) { 1069 if (!irt_ispri(ir->t)) {
@@ -987,11 +1071,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
987 rset_clear(allow, src); 1071 rset_clear(allow, src);
988 ofs = 0; 1072 ofs = 0;
989 } 1073 }
990 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 1074 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
1075 type = ra_alloc1(as, (ir+1)->op2, allow);
1076 else
1077 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
991 rset_clear(allow, type); 1078 rset_clear(allow, type);
992 } 1079 }
993 idx = asm_fuseahuref(as, ir->op1, &ofs, allow); 1080 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
994 if (irt_isnum(ir->t)) { 1081 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
995 if (ofs == AHUREF_LSX) { 1082 if (ofs == AHUREF_LSX) {
996 emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); 1083 emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP);
997 emit_slwi(as, RID_TMP, (idx>>8), 3); 1084 emit_slwi(as, RID_TMP, (idx>>8), 3);
@@ -1016,21 +1103,39 @@ static void asm_sload(ASMState *as, IRIns *ir)
1016 IRType1 t = ir->t; 1103 IRType1 t = ir->t;
1017 Reg dest = RID_NONE, type = RID_NONE, base; 1104 Reg dest = RID_NONE, type = RID_NONE, base;
1018 RegSet allow = RSET_GPR; 1105 RegSet allow = RSET_GPR;
1019 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 1106 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
1020 lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); 1107 if (hiop)
1021 lua_assert(LJ_DUALNUM || 1108 t.irt = IRT_NUM;
1022 !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); 1109 lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
1110 "bad parent SLOAD"); /* Handled by asm_head_side(). */
1111 lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
1112 "inconsistent SLOAD variant");
1113 lj_assertA(LJ_DUALNUM ||
1114 !irt_isint(t) ||
1115 (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)),
1116 "bad SLOAD type");
1117#if LJ_SOFTFP
1118 lj_assertA(!(ir->op2 & IRSLOAD_CONVERT),
1119 "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */
1120 if (hiop && ra_used(ir+1)) {
1121 type = ra_dest(as, ir+1, allow);
1122 rset_clear(allow, type);
1123 }
1124#else
1023 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { 1125 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
1024 dest = ra_scratch(as, RSET_FPR); 1126 dest = ra_scratch(as, RSET_FPR);
1025 asm_tointg(as, ir, dest); 1127 asm_tointg(as, ir, dest);
1026 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1128 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1027 } else if (ra_used(ir)) { 1129 } else
1028 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1130#endif
1029 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); 1131 if (ra_used(ir)) {
1132 lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t),
1133 "bad SLOAD type %d", irt_type(ir->t));
1134 dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
1030 rset_clear(allow, dest); 1135 rset_clear(allow, dest);
1031 base = ra_alloc1(as, REF_BASE, allow); 1136 base = ra_alloc1(as, REF_BASE, allow);
1032 rset_clear(allow, base); 1137 rset_clear(allow, base);
1033 if ((ir->op2 & IRSLOAD_CONVERT)) { 1138 if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) {
1034 if (irt_isint(t)) { 1139 if (irt_isint(t)) {
1035 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); 1140 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
1036 dest = ra_scratch(as, RSET_FPR); 1141 dest = ra_scratch(as, RSET_FPR);
@@ -1044,7 +1149,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1044 emit_fab(as, PPCI_FSUB, dest, dest, fbias); 1149 emit_fab(as, PPCI_FSUB, dest, dest, fbias);
1045 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); 1150 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
1046 emit_lsptr(as, PPCI_LFS, (fbias & 31), 1151 emit_lsptr(as, PPCI_LFS, (fbias & 31),
1047 (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), 1152 (void *)&as->J->k32[LJ_K32_2P52_2P31],
1048 rset_clear(allow, hibias)); 1153 rset_clear(allow, hibias));
1049 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); 1154 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO);
1050 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); 1155 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);
@@ -1062,14 +1167,22 @@ dotypecheck:
1062 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1167 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1063 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); 1168 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow);
1064 asm_guardcc(as, CC_GE); 1169 asm_guardcc(as, CC_GE);
1065 emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum); 1170#if !LJ_SOFTFP
1066 type = RID_TMP; 1171 type = RID_TMP;
1172#endif
1173 emit_ab(as, PPCI_CMPLW, type, tisnum);
1067 } 1174 }
1068 if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4); 1175 if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest,
1176 base, ofs-(LJ_SOFTFP?0:4));
1069 } else { 1177 } else {
1070 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1178 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1071 asm_guardcc(as, CC_NE); 1179 asm_guardcc(as, CC_NE);
1072 emit_ai(as, PPCI_CMPWI, RID_TMP, irt_toitype(t)); 1180 if ((ir->op2 & IRSLOAD_KEYINDEX)) {
1181 emit_ai(as, PPCI_CMPWI, RID_TMP, (LJ_KEYINDEX & 0xffff));
1182 emit_asi(as, PPCI_XORIS, RID_TMP, RID_TMP, (LJ_KEYINDEX >> 16));
1183 } else {
1184 emit_ai(as, PPCI_CMPWI, RID_TMP, irt_toitype(t));
1185 }
1073 type = RID_TMP; 1186 type = RID_TMP;
1074 } 1187 }
1075 if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, base, ofs); 1188 if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, base, ofs);
@@ -1083,19 +1196,16 @@ dotypecheck:
1083static void asm_cnew(ASMState *as, IRIns *ir) 1196static void asm_cnew(ASMState *as, IRIns *ir)
1084{ 1197{
1085 CTState *cts = ctype_ctsG(J2G(as->J)); 1198 CTState *cts = ctype_ctsG(J2G(as->J));
1086 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1199 CTypeID id = (CTypeID)IR(ir->op1)->i;
1087 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1200 CTSize sz;
1088 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1201 CTInfo info = lj_ctype_info(cts, id, &sz);
1089 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1202 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1090 IRRef args[2]; 1203 IRRef args[4];
1091 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1092 RegSet drop = RSET_SCRATCH; 1204 RegSet drop = RSET_SCRATCH;
1093 lua_assert(sz != CTSIZE_INVALID); 1205 lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
1206 "bad CNEW/CNEWI operands");
1094 1207
1095 args[0] = ASMREF_L; /* lua_State *L */
1096 args[1] = ASMREF_TMP1; /* MSize size */
1097 as->gcsteps++; 1208 as->gcsteps++;
1098
1099 if (ra_hasreg(ir->r)) 1209 if (ra_hasreg(ir->r))
1100 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1210 rset_clear(drop, ir->r); /* Dest reg handled below. */
1101 ra_evictset(as, drop); 1211 ra_evictset(as, drop);
@@ -1104,11 +1214,12 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1104 1214
1105 /* Initialize immutable cdata object. */ 1215 /* Initialize immutable cdata object. */
1106 if (ir->o == IR_CNEWI) { 1216 if (ir->o == IR_CNEWI) {
1217 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1107 int32_t ofs = sizeof(GCcdata); 1218 int32_t ofs = sizeof(GCcdata);
1108 lua_assert(sz == 4 || sz == 8); 1219 lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
1109 if (sz == 8) { 1220 if (sz == 8) {
1110 ofs += 4; 1221 ofs += 4;
1111 lua_assert((ir+1)->o == IR_HIOP); 1222 lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI");
1112 } 1223 }
1113 for (;;) { 1224 for (;;) {
1114 Reg r = ra_alloc1(as, ir->op2, allow); 1225 Reg r = ra_alloc1(as, ir->op2, allow);
@@ -1117,18 +1228,28 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1117 if (ofs == sizeof(GCcdata)) break; 1228 if (ofs == sizeof(GCcdata)) break;
1118 ofs -= 4; ir++; 1229 ofs -= 4; ir++;
1119 } 1230 }
1231 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1232 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1233 args[0] = ASMREF_L; /* lua_State *L */
1234 args[1] = ir->op1; /* CTypeID id */
1235 args[2] = ir->op2; /* CTSize sz */
1236 args[3] = ASMREF_TMP1; /* CTSize align */
1237 asm_gencall(as, ci, args);
1238 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1239 return;
1120 } 1240 }
1241
1121 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1242 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1122 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); 1243 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
1123 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); 1244 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
1124 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); 1245 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA);
1125 emit_ti(as, PPCI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ 1246 emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
1247 args[0] = ASMREF_L; /* lua_State *L */
1248 args[1] = ASMREF_TMP1; /* MSize size */
1126 asm_gencall(as, ci, args); 1249 asm_gencall(as, ci, args);
1127 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1250 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1128 ra_releasetmp(as, ASMREF_TMP1)); 1251 ra_releasetmp(as, ASMREF_TMP1));
1129} 1252}
1130#else
1131#define asm_cnew(as, ir) ((void)0)
1132#endif 1253#endif
1133 1254
1134/* -- Write barriers ------------------------------------------------------ */ 1255/* -- Write barriers ------------------------------------------------------ */
@@ -1142,7 +1263,7 @@ static void asm_tbar(ASMState *as, IRIns *ir)
1142 emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist)); 1263 emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist));
1143 emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked)); 1264 emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked));
1144 emit_setgl(as, tab, gc.grayagain); 1265 emit_setgl(as, tab, gc.grayagain);
1145 lua_assert(LJ_GC_BLACK == 0x04); 1266 lj_assertA(LJ_GC_BLACK == 0x04, "bad LJ_GC_BLACK");
1146 emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */ 1267 emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */
1147 emit_getgl(as, link, gc.grayagain); 1268 emit_getgl(as, link, gc.grayagain);
1148 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); 1269 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
@@ -1157,7 +1278,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
1157 MCLabel l_end; 1278 MCLabel l_end;
1158 Reg obj, val, tmp; 1279 Reg obj, val, tmp;
1159 /* No need for other object barriers (yet). */ 1280 /* No need for other object barriers (yet). */
1160 lua_assert(IR(ir->op1)->o == IR_UREFC); 1281 lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
1161 ra_evictset(as, RSET_SCRATCH); 1282 ra_evictset(as, RSET_SCRATCH);
1162 l_end = emit_label(as); 1283 l_end = emit_label(as);
1163 args[0] = ASMREF_TMP1; /* global_State *g */ 1284 args[0] = ASMREF_TMP1; /* global_State *g */
@@ -1178,6 +1299,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
1178 1299
1179/* -- Arithmetic and logic operations ------------------------------------- */ 1300/* -- Arithmetic and logic operations ------------------------------------- */
1180 1301
1302#if !LJ_SOFTFP
1181static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) 1303static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi)
1182{ 1304{
1183 Reg dest = ra_dest(as, ir, RSET_FPR); 1305 Reg dest = ra_dest(as, ir, RSET_FPR);
@@ -1196,31 +1318,24 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
1196 emit_fb(as, pi, dest, left); 1318 emit_fb(as, pi, dest, left);
1197} 1319}
1198 1320
1199static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1321static void asm_fpmath(ASMState *as, IRIns *ir)
1200{ 1322{
1201 IRIns *irp = IR(ir->op1); 1323 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
1202 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1324 asm_fpunary(as, ir, PPCI_FSQRT);
1203 IRIns *irpp = IR(irp->op1); 1325 else
1204 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1326 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1205 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1206 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
1207 IRRef args[2];
1208 args[0] = irpp->op1;
1209 args[1] = irp->op2;
1210 asm_setupresult(as, ir, ci);
1211 asm_gencall(as, ci, args);
1212 return 1;
1213 }
1214 }
1215 return 0;
1216} 1327}
1328#endif
1217 1329
1218static void asm_add(ASMState *as, IRIns *ir) 1330static void asm_add(ASMState *as, IRIns *ir)
1219{ 1331{
1332#if !LJ_SOFTFP
1220 if (irt_isnum(ir->t)) { 1333 if (irt_isnum(ir->t)) {
1221 if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) 1334 if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD))
1222 asm_fparith(as, ir, PPCI_FADD); 1335 asm_fparith(as, ir, PPCI_FADD);
1223 } else { 1336 } else
1337#endif
1338 {
1224 Reg dest = ra_dest(as, ir, RSET_GPR); 1339 Reg dest = ra_dest(as, ir, RSET_GPR);
1225 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1340 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1226 PPCIns pi; 1341 PPCIns pi;
@@ -1259,10 +1374,13 @@ static void asm_add(ASMState *as, IRIns *ir)
1259 1374
1260static void asm_sub(ASMState *as, IRIns *ir) 1375static void asm_sub(ASMState *as, IRIns *ir)
1261{ 1376{
1377#if !LJ_SOFTFP
1262 if (irt_isnum(ir->t)) { 1378 if (irt_isnum(ir->t)) {
1263 if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) 1379 if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB))
1264 asm_fparith(as, ir, PPCI_FSUB); 1380 asm_fparith(as, ir, PPCI_FSUB);
1265 } else { 1381 } else
1382#endif
1383 {
1266 PPCIns pi = PPCI_SUBF; 1384 PPCIns pi = PPCI_SUBF;
1267 Reg dest = ra_dest(as, ir, RSET_GPR); 1385 Reg dest = ra_dest(as, ir, RSET_GPR);
1268 Reg left, right; 1386 Reg left, right;
@@ -1288,9 +1406,12 @@ static void asm_sub(ASMState *as, IRIns *ir)
1288 1406
1289static void asm_mul(ASMState *as, IRIns *ir) 1407static void asm_mul(ASMState *as, IRIns *ir)
1290{ 1408{
1409#if !LJ_SOFTFP
1291 if (irt_isnum(ir->t)) { 1410 if (irt_isnum(ir->t)) {
1292 asm_fparith(as, ir, PPCI_FMUL); 1411 asm_fparith(as, ir, PPCI_FMUL);
1293 } else { 1412 } else
1413#endif
1414 {
1294 PPCIns pi = PPCI_MULLW; 1415 PPCIns pi = PPCI_MULLW;
1295 Reg dest = ra_dest(as, ir, RSET_GPR); 1416 Reg dest = ra_dest(as, ir, RSET_GPR);
1296 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1417 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
@@ -1312,11 +1433,16 @@ static void asm_mul(ASMState *as, IRIns *ir)
1312 } 1433 }
1313} 1434}
1314 1435
1436#define asm_fpdiv(as, ir) asm_fparith(as, ir, PPCI_FDIV)
1437
1315static void asm_neg(ASMState *as, IRIns *ir) 1438static void asm_neg(ASMState *as, IRIns *ir)
1316{ 1439{
1440#if !LJ_SOFTFP
1317 if (irt_isnum(ir->t)) { 1441 if (irt_isnum(ir->t)) {
1318 asm_fpunary(as, ir, PPCI_FNEG); 1442 asm_fpunary(as, ir, PPCI_FNEG);
1319 } else { 1443 } else
1444#endif
1445 {
1320 Reg dest, left; 1446 Reg dest, left;
1321 PPCIns pi = PPCI_NEG; 1447 PPCIns pi = PPCI_NEG;
1322 if (as->flagmcp == as->mcp) { 1448 if (as->flagmcp == as->mcp) {
@@ -1330,6 +1456,8 @@ static void asm_neg(ASMState *as, IRIns *ir)
1330 } 1456 }
1331} 1457}
1332 1458
1459#define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS)
1460
1333static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) 1461static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1334{ 1462{
1335 Reg dest, left, right; 1463 Reg dest, left, right;
@@ -1345,6 +1473,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1345 emit_tab(as, pi|PPCF_DOT, dest, left, right); 1473 emit_tab(as, pi|PPCF_DOT, dest, left, right);
1346} 1474}
1347 1475
1476#define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO)
1477#define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO)
1478#define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO)
1479
1348#if LJ_HASFFI 1480#if LJ_HASFFI
1349static void asm_add64(ASMState *as, IRIns *ir) 1481static void asm_add64(ASMState *as, IRIns *ir)
1350{ 1482{
@@ -1424,7 +1556,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
1424} 1556}
1425#endif 1557#endif
1426 1558
1427static void asm_bitnot(ASMState *as, IRIns *ir) 1559static void asm_bnot(ASMState *as, IRIns *ir)
1428{ 1560{
1429 Reg dest, left, right; 1561 Reg dest, left, right;
1430 PPCIns pi = PPCI_NOR; 1562 PPCIns pi = PPCI_NOR;
@@ -1451,7 +1583,7 @@ nofuse:
1451 emit_asb(as, pi, dest, left, right); 1583 emit_asb(as, pi, dest, left, right);
1452} 1584}
1453 1585
1454static void asm_bitswap(ASMState *as, IRIns *ir) 1586static void asm_bswap(ASMState *as, IRIns *ir)
1455{ 1587{
1456 Reg dest = ra_dest(as, ir, RSET_GPR); 1588 Reg dest = ra_dest(as, ir, RSET_GPR);
1457 IRIns *irx; 1589 IRIns *irx;
@@ -1472,32 +1604,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1472 } 1604 }
1473} 1605}
1474 1606
1475static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1476{
1477 Reg dest = ra_dest(as, ir, RSET_GPR);
1478 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1479 if (irref_isk(ir->op2)) {
1480 int32_t k = IR(ir->op2)->i;
1481 Reg tmp = left;
1482 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1483 if (!checku16(k)) {
1484 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1485 if ((k & 0xffff) == 0) return;
1486 }
1487 emit_asi(as, pik, dest, left, k);
1488 return;
1489 }
1490 }
1491 /* May fail due to spills/restores above, but simplifies the logic. */
1492 if (as->flagmcp == as->mcp) {
1493 as->flagmcp = NULL;
1494 as->mcp++;
1495 pi |= PPCF_DOT;
1496 }
1497 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1498 emit_asb(as, pi, dest, left, right);
1499}
1500
1501/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ 1607/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
1502static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) 1608static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
1503{ 1609{
@@ -1528,7 +1634,7 @@ nofuse:
1528 *--as->mcp = pi | PPCF_T(left); 1634 *--as->mcp = pi | PPCF_T(left);
1529} 1635}
1530 1636
1531static void asm_bitand(ASMState *as, IRIns *ir) 1637static void asm_band(ASMState *as, IRIns *ir)
1532{ 1638{
1533 Reg dest, left, right; 1639 Reg dest, left, right;
1534 IRRef lref = ir->op1; 1640 IRRef lref = ir->op1;
@@ -1583,6 +1689,35 @@ static void asm_bitand(ASMState *as, IRIns *ir)
1583 emit_asb(as, PPCI_AND ^ dot, dest, left, right); 1689 emit_asb(as, PPCI_AND ^ dot, dest, left, right);
1584} 1690}
1585 1691
1692static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1693{
1694 Reg dest = ra_dest(as, ir, RSET_GPR);
1695 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1696 if (irref_isk(ir->op2)) {
1697 int32_t k = IR(ir->op2)->i;
1698 Reg tmp = left;
1699 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1700 if (!checku16(k)) {
1701 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1702 if ((k & 0xffff) == 0) return;
1703 }
1704 emit_asi(as, pik, dest, left, k);
1705 return;
1706 }
1707 }
1708 /* May fail due to spills/restores above, but simplifies the logic. */
1709 if (as->flagmcp == as->mcp) {
1710 as->flagmcp = NULL;
1711 as->mcp++;
1712 pi |= PPCF_DOT;
1713 }
1714 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1715 emit_asb(as, pi, dest, left, right);
1716}
1717
1718#define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI)
1719#define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI)
1720
1586static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) 1721static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1587{ 1722{
1588 Reg dest, left; 1723 Reg dest, left;
@@ -1608,9 +1743,48 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1608 } 1743 }
1609} 1744}
1610 1745
1746#define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0)
1747#define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1)
1748#define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI)
1749#define asm_brol(as, ir) \
1750 asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \
1751 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
1752#define asm_bror(as, ir) lj_assertA(0, "unexpected BROR")
1753
1754#if LJ_SOFTFP
1755static void asm_sfpmin_max(ASMState *as, IRIns *ir)
1756{
1757 CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp];
1758 IRRef args[4];
1759 MCLabel l_right, l_end;
1760 Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR);
1761 Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR);
1762 Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR);
1763 PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE;
1764 righthi = (lefthi >> 8); lefthi &= 255;
1765 rightlo = (leftlo >> 8); leftlo &= 255;
1766 args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
1767 args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
1768 l_end = emit_label(as);
1769 if (desthi != righthi) emit_mr(as, desthi, righthi);
1770 if (destlo != rightlo) emit_mr(as, destlo, rightlo);
1771 l_right = emit_label(as);
1772 if (l_end != l_right) emit_jmp(as, l_end);
1773 if (desthi != lefthi) emit_mr(as, desthi, lefthi);
1774 if (destlo != leftlo) emit_mr(as, destlo, leftlo);
1775 if (l_right == as->mcp+1) {
1776 cond ^= 4; l_right = l_end; ++as->mcp;
1777 }
1778 emit_condbranch(as, PPCI_BC, cond, l_right);
1779 ra_evictset(as, RSET_SCRATCH);
1780 emit_cmpi(as, RID_RET, 1);
1781 asm_gencall(as, &ci, args);
1782}
1783#endif
1784
1611static void asm_min_max(ASMState *as, IRIns *ir, int ismax) 1785static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1612{ 1786{
1613 if (irt_isnum(ir->t)) { 1787 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1614 Reg dest = ra_dest(as, ir, RSET_FPR); 1788 Reg dest = ra_dest(as, ir, RSET_FPR);
1615 Reg tmp = dest; 1789 Reg tmp = dest;
1616 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 1790 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
@@ -1618,9 +1792,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1618 if (tmp == left || tmp == right) 1792 if (tmp == left || tmp == right)
1619 tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR, 1793 tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR,
1620 dest), left), right)); 1794 dest), left), right));
1621 emit_facb(as, PPCI_FSEL, dest, tmp, 1795 emit_facb(as, PPCI_FSEL, dest, tmp, left, right);
1622 ismax ? left : right, ismax ? right : left); 1796 emit_fab(as, PPCI_FSUB, tmp, ismax ? left : right, ismax ? right : left);
1623 emit_fab(as, PPCI_FSUB, tmp, left, right);
1624 } else { 1797 } else {
1625 Reg dest = ra_dest(as, ir, RSET_GPR); 1798 Reg dest = ra_dest(as, ir, RSET_GPR);
1626 Reg tmp1 = RID_TMP, tmp2 = dest; 1799 Reg tmp1 = RID_TMP, tmp2 = dest;
@@ -1638,6 +1811,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1638 } 1811 }
1639} 1812}
1640 1813
1814#define asm_min(as, ir) asm_min_max(as, ir, 0)
1815#define asm_max(as, ir) asm_min_max(as, ir, 1)
1816
1641/* -- Comparisons --------------------------------------------------------- */ 1817/* -- Comparisons --------------------------------------------------------- */
1642 1818
1643#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ 1819#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */
@@ -1695,7 +1871,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc)
1695static void asm_comp(ASMState *as, IRIns *ir) 1871static void asm_comp(ASMState *as, IRIns *ir)
1696{ 1872{
1697 PPCCC cc = asm_compmap[ir->o]; 1873 PPCCC cc = asm_compmap[ir->o];
1698 if (irt_isnum(ir->t)) { 1874 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1699 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 1875 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1700 right = (left >> 8); left &= 255; 1876 right = (left >> 8); left &= 255;
1701 asm_guardcc(as, (cc >> 4)); 1877 asm_guardcc(as, (cc >> 4));
@@ -1714,6 +1890,46 @@ static void asm_comp(ASMState *as, IRIns *ir)
1714 } 1890 }
1715} 1891}
1716 1892
1893#define asm_equal(as, ir) asm_comp(as, ir)
1894
1895#if LJ_SOFTFP
1896/* SFP comparisons. */
1897static void asm_sfpcomp(ASMState *as, IRIns *ir)
1898{
1899 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
1900 RegSet drop = RSET_SCRATCH;
1901 Reg r;
1902 IRRef args[4];
1903 args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
1904 args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
1905
1906 for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) {
1907 if (!rset_test(as->freeset, r) &&
1908 regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
1909 rset_clear(drop, r);
1910 }
1911 ra_evictset(as, drop);
1912 asm_setupresult(as, ir, ci);
1913 switch ((IROp)ir->o) {
1914 case IR_ULT:
1915 asm_guardcc(as, CC_EQ);
1916 emit_ai(as, PPCI_CMPWI, RID_RET, 0);
1917 case IR_ULE:
1918 asm_guardcc(as, CC_EQ);
1919 emit_ai(as, PPCI_CMPWI, RID_RET, 1);
1920 break;
1921 case IR_GE: case IR_GT:
1922 asm_guardcc(as, CC_EQ);
1923 emit_ai(as, PPCI_CMPWI, RID_RET, 2);
1924 default:
1925 asm_guardcc(as, (asm_compmap[ir->o] & 0xf));
1926 emit_ai(as, PPCI_CMPWI, RID_RET, 0);
1927 break;
1928 }
1929 asm_gencall(as, ci, args);
1930}
1931#endif
1932
1717#if LJ_HASFFI 1933#if LJ_HASFFI
1718/* 64 bit integer comparisons. */ 1934/* 64 bit integer comparisons. */
1719static void asm_comp64(ASMState *as, IRIns *ir) 1935static void asm_comp64(ASMState *as, IRIns *ir)
@@ -1738,50 +1954,87 @@ static void asm_comp64(ASMState *as, IRIns *ir)
1738} 1954}
1739#endif 1955#endif
1740 1956
1741/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ 1957/* -- Split register ops -------------------------------------------------- */
1742 1958
1743/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ 1959/* Hiword op of a split 32/32 bit op. Previous op is be the loword op. */
1744static void asm_hiop(ASMState *as, IRIns *ir) 1960static void asm_hiop(ASMState *as, IRIns *ir)
1745{ 1961{
1746#if LJ_HASFFI
1747 /* HIOP is marked as a store because it needs its own DCE logic. */ 1962 /* HIOP is marked as a store because it needs its own DCE logic. */
1748 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 1963 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
1749 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 1964 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
1965#if LJ_HASFFI || LJ_SOFTFP
1750 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ 1966 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
1751 as->curins--; /* Always skip the CONV. */ 1967 as->curins--; /* Always skip the CONV. */
1968#if LJ_HASFFI && !LJ_SOFTFP
1752 if (usehi || uselo) 1969 if (usehi || uselo)
1753 asm_conv64(as, ir); 1970 asm_conv64(as, ir);
1754 return; 1971 return;
1972#endif
1755 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ 1973 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
1756 as->curins--; /* Always skip the loword comparison. */ 1974 as->curins--; /* Always skip the loword comparison. */
1975#if LJ_SOFTFP
1976 if (!irt_isint(ir->t)) {
1977 asm_sfpcomp(as, ir-1);
1978 return;
1979 }
1980#endif
1981#if LJ_HASFFI
1757 asm_comp64(as, ir); 1982 asm_comp64(as, ir);
1983#endif
1758 return; 1984 return;
1985#if LJ_SOFTFP
1986 } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
1987 as->curins--; /* Always skip the loword min/max. */
1988 if (uselo || usehi)
1989 asm_sfpmin_max(as, ir-1);
1990 return;
1991#endif
1759 } else if ((ir-1)->o == IR_XSTORE) { 1992 } else if ((ir-1)->o == IR_XSTORE) {
1760 as->curins--; /* Handle both stores here. */ 1993 as->curins--; /* Handle both stores here. */
1761 if ((ir-1)->r != RID_SINK) { 1994 if ((ir-1)->r != RID_SINK) {
1762 asm_xstore(as, ir, 0); 1995 asm_xstore_(as, ir, 0);
1763 asm_xstore(as, ir-1, 4); 1996 asm_xstore_(as, ir-1, 4);
1764 } 1997 }
1765 return; 1998 return;
1766 } 1999 }
2000#endif
1767 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 2001 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
1768 switch ((ir-1)->o) { 2002 switch ((ir-1)->o) {
2003#if LJ_HASFFI
1769 case IR_ADD: as->curins--; asm_add64(as, ir); break; 2004 case IR_ADD: as->curins--; asm_add64(as, ir); break;
1770 case IR_SUB: as->curins--; asm_sub64(as, ir); break; 2005 case IR_SUB: as->curins--; asm_sub64(as, ir); break;
1771 case IR_NEG: as->curins--; asm_neg64(as, ir); break; 2006 case IR_NEG: as->curins--; asm_neg64(as, ir); break;
1772 case IR_CALLN: 2007 case IR_CNEWI:
1773 case IR_CALLXS: 2008 /* Nothing to do here. Handled by lo op itself. */
2009 break;
2010#endif
2011#if LJ_SOFTFP
2012 case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2013 case IR_STRTO:
1774 if (!uselo) 2014 if (!uselo)
1775 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ 2015 ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
1776 break; 2016 break;
1777 case IR_CNEWI: 2017 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
1778 /* Nothing to do here. Handled by lo op itself. */ 2018 /* Nothing to do here. Handled by lo op itself. */
1779 break; 2019 break;
1780 default: lua_assert(0); break;
1781 }
1782#else
1783 UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */
1784#endif 2020#endif
2021 case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
2022 if (!uselo)
2023 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
2024 break;
2025 default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
2026 }
2027}
2028
2029/* -- Profiling ----------------------------------------------------------- */
2030
2031static void asm_prof(ASMState *as, IRIns *ir)
2032{
2033 UNUSED(ir);
2034 asm_guardcc(as, CC_NE);
2035 emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE);
2036 emit_lsglptr(as, PPCI_LBZ, RID_TMP,
2037 (int32_t)offsetof(global_State, hookmask));
1785} 2038}
1786 2039
1787/* -- Stack handling ------------------------------------------------------ */ 2040/* -- Stack handling ------------------------------------------------------ */
@@ -1805,7 +2058,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1805 emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); 2058 emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack));
1806 if (pbase == RID_TMP) 2059 if (pbase == RID_TMP)
1807 emit_getgl(as, RID_TMP, jit_base); 2060 emit_getgl(as, RID_TMP, jit_base);
1808 emit_getgl(as, tmp, jit_L); 2061 emit_getgl(as, tmp, cur_L);
1809 if (allow == RSET_EMPTY) /* Spill temp. register. */ 2062 if (allow == RSET_EMPTY) /* Spill temp. register. */
1810 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); 2063 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW);
1811} 2064}
@@ -1826,12 +2079,25 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
1826 if ((sn & SNAP_NORESTORE)) 2079 if ((sn & SNAP_NORESTORE))
1827 continue; 2080 continue;
1828 if (irt_isnum(ir->t)) { 2081 if (irt_isnum(ir->t)) {
2082#if LJ_SOFTFP
2083 Reg tmp;
2084 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
2085 /* LJ_SOFTFP: must be a number constant. */
2086 lj_assertA(irref_isk(ref), "unsplit FP op");
2087 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow);
2088 emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0));
2089 if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
2090 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
2091 emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
2092#else
1829 Reg src = ra_alloc1(as, ref, RSET_FPR); 2093 Reg src = ra_alloc1(as, ref, RSET_FPR);
1830 emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); 2094 emit_fai(as, PPCI_STFD, src, RID_BASE, ofs);
2095#endif
1831 } else { 2096 } else {
1832 Reg type; 2097 Reg type;
1833 RegSet allow = rset_exclude(RSET_GPR, RID_BASE); 2098 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
1834 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); 2099 lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
2100 "restore of IR type %d", irt_type(ir->t));
1835 if (!irt_ispri(ir->t)) { 2101 if (!irt_ispri(ir->t)) {
1836 Reg src = ra_alloc1(as, ref, allow); 2102 Reg src = ra_alloc1(as, ref, allow);
1837 rset_clear(allow, src); 2103 rset_clear(allow, src);
@@ -1840,6 +2106,12 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
1840 if ((sn & (SNAP_CONT|SNAP_FRAME))) { 2106 if ((sn & (SNAP_CONT|SNAP_FRAME))) {
1841 if (s == 0) continue; /* Do not overwrite link to previous frame. */ 2107 if (s == 0) continue; /* Do not overwrite link to previous frame. */
1842 type = ra_allock(as, (int32_t)(*flinks--), allow); 2108 type = ra_allock(as, (int32_t)(*flinks--), allow);
2109#if LJ_SOFTFP
2110 } else if ((sn & SNAP_SOFTFPNUM)) {
2111 type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
2112#endif
2113 } else if ((sn & SNAP_KEYINDEX)) {
2114 type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow);
1843 } else { 2115 } else {
1844 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 2116 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
1845 } 2117 }
@@ -1847,7 +2119,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
1847 } 2119 }
1848 checkmclim(as); 2120 checkmclim(as);
1849 } 2121 }
1850 lua_assert(map + nent == flinks); 2122 lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
1851} 2123}
1852 2124
1853/* -- GC handling --------------------------------------------------------- */ 2125/* -- GC handling --------------------------------------------------------- */
@@ -1898,6 +2170,12 @@ static void asm_loop_fixup(ASMState *as)
1898 } 2170 }
1899} 2171}
1900 2172
2173/* Fixup the tail of the loop. */
2174static void asm_loop_tail_fixup(ASMState *as)
2175{
2176 UNUSED(as); /* Nothing to do. */
2177}
2178
1901/* -- Head of trace ------------------------------------------------------- */ 2179/* -- Head of trace ------------------------------------------------------- */
1902 2180
1903/* Coalesce BASE register for a root trace. */ 2181/* Coalesce BASE register for a root trace. */
@@ -1949,7 +2227,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
1949 as->mctop = p; 2227 as->mctop = p;
1950 } else { 2228 } else {
1951 /* Patch stack adjustment. */ 2229 /* Patch stack adjustment. */
1952 lua_assert(checki16(CFRAME_SIZE+spadj)); 2230 lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range");
1953 p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); 2231 p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj);
1954 p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; 2232 p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj;
1955 } 2233 }
@@ -1970,147 +2248,25 @@ static void asm_tail_prep(ASMState *as)
1970 } 2248 }
1971} 2249}
1972 2250
1973/* -- Instruction dispatch ------------------------------------------------ */
1974
1975/* Assemble a single instruction. */
1976static void asm_ir(ASMState *as, IRIns *ir)
1977{
1978 switch ((IROp)ir->o) {
1979 /* Miscellaneous ops. */
1980 case IR_LOOP: asm_loop(as); break;
1981 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1982 case IR_USE:
1983 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1984 case IR_PHI: asm_phi(as, ir); break;
1985 case IR_HIOP: asm_hiop(as, ir); break;
1986 case IR_GCSTEP: asm_gcstep(as, ir); break;
1987
1988 /* Guarded assertions. */
1989 case IR_EQ: case IR_NE:
1990 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1991 as->curins--;
1992 asm_href(as, ir-1, (IROp)ir->o);
1993 break;
1994 }
1995 /* fallthrough */
1996 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1997 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1998 case IR_ABC:
1999 asm_comp(as, ir);
2000 break;
2001
2002 case IR_RETF: asm_retf(as, ir); break;
2003
2004 /* Bit ops. */
2005 case IR_BNOT: asm_bitnot(as, ir); break;
2006 case IR_BSWAP: asm_bitswap(as, ir); break;
2007
2008 case IR_BAND: asm_bitand(as, ir); break;
2009 case IR_BOR: asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break;
2010 case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break;
2011
2012 case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break;
2013 case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break;
2014 case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break;
2015 case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31),
2016 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break;
2017 case IR_BROR: lua_assert(0); break;
2018
2019 /* Arithmetic ops. */
2020 case IR_ADD: asm_add(as, ir); break;
2021 case IR_SUB: asm_sub(as, ir); break;
2022 case IR_MUL: asm_mul(as, ir); break;
2023 case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break;
2024 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2025 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2026 case IR_NEG: asm_neg(as, ir); break;
2027
2028 case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break;
2029 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2030 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2031 case IR_MIN: asm_min_max(as, ir, 0); break;
2032 case IR_MAX: asm_min_max(as, ir, 1); break;
2033 case IR_FPMATH:
2034 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2035 break;
2036 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
2037 asm_fpunary(as, ir, PPCI_FSQRT);
2038 else
2039 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2040 break;
2041
2042 /* Overflow-checking arithmetic ops. */
2043 case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break;
2044 case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break;
2045 case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break;
2046
2047 /* Memory references. */
2048 case IR_AREF: asm_aref(as, ir); break;
2049 case IR_HREF: asm_href(as, ir, 0); break;
2050 case IR_HREFK: asm_hrefk(as, ir); break;
2051 case IR_NEWREF: asm_newref(as, ir); break;
2052 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2053 case IR_FREF: asm_fref(as, ir); break;
2054 case IR_STRREF: asm_strref(as, ir); break;
2055
2056 /* Loads and stores. */
2057 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2058 asm_ahuvload(as, ir);
2059 break;
2060 case IR_FLOAD: asm_fload(as, ir); break;
2061 case IR_XLOAD: asm_xload(as, ir); break;
2062 case IR_SLOAD: asm_sload(as, ir); break;
2063
2064 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2065 case IR_FSTORE: asm_fstore(as, ir); break;
2066 case IR_XSTORE: asm_xstore(as, ir, 0); break;
2067
2068 /* Allocations. */
2069 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2070 case IR_TNEW: asm_tnew(as, ir); break;
2071 case IR_TDUP: asm_tdup(as, ir); break;
2072 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2073
2074 /* Write barriers. */
2075 case IR_TBAR: asm_tbar(as, ir); break;
2076 case IR_OBAR: asm_obar(as, ir); break;
2077
2078 /* Type conversions. */
2079 case IR_CONV: asm_conv(as, ir); break;
2080 case IR_TOBIT: asm_tobit(as, ir); break;
2081 case IR_TOSTR: asm_tostr(as, ir); break;
2082 case IR_STRTO: asm_strto(as, ir); break;
2083
2084 /* Calls. */
2085 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2086 case IR_CALLXS: asm_callx(as, ir); break;
2087 case IR_CARG: break;
2088
2089 default:
2090 setintV(&as->J->errinfo, ir->o);
2091 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2092 break;
2093 }
2094}
2095
2096/* -- Trace setup --------------------------------------------------------- */ 2251/* -- Trace setup --------------------------------------------------------- */
2097 2252
2098/* Ensure there are enough stack slots for call arguments. */ 2253/* Ensure there are enough stack slots for call arguments. */
2099static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2254static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2100{ 2255{
2101 IRRef args[CCI_NARGS_MAX*2]; 2256 IRRef args[CCI_NARGS_MAX*2];
2102 uint32_t i, nargs = (int)CCI_NARGS(ci); 2257 uint32_t i, nargs = CCI_XNARGS(ci);
2103 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; 2258 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
2104 asm_collectargs(as, ir, ci, args); 2259 asm_collectargs(as, ir, ci, args);
2105 for (i = 0; i < nargs; i++) 2260 for (i = 0; i < nargs; i++)
2106 if (args[i] && irt_isfp(IR(args[i])->t)) { 2261 if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) {
2107 if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; 2262 if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1;
2108 } else { 2263 } else {
2109 if (ngpr > 0) ngpr--; else nslots++; 2264 if (ngpr > 0) ngpr--; else nslots++;
2110 } 2265 }
2111 if (nslots > as->evenspill) /* Leave room for args in stack slots. */ 2266 if (nslots > as->evenspill) /* Leave room for args in stack slots. */
2112 as->evenspill = nslots; 2267 as->evenspill = nslots;
2113 return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); 2268 return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) :
2269 REGSP_HINT(RID_RET);
2114} 2270}
2115 2271
2116static void asm_setup_target(ASMState *as) 2272static void asm_setup_target(ASMState *as)
@@ -2150,7 +2306,8 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2150 } else if ((ins & 0xfc000000u) == PPCI_B && 2306 } else if ((ins & 0xfc000000u) == PPCI_B &&
2151 ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) { 2307 ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) {
2152 ptrdiff_t delta = (char *)target - (char *)p; 2308 ptrdiff_t delta = (char *)target - (char *)p;
2153 lua_assert(((delta + 0x02000000) >> 26) == 0); 2309 lj_assertJ(((delta + 0x02000000) >> 26) == 0,
2310 "branch target out of range");
2154 *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu); 2311 *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu);
2155 if (!cstart) cstart = p; 2312 if (!cstart) cstart = p;
2156 } 2313 }
@@ -2158,7 +2315,8 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2158 /* Always patch long-range branch in exit stub itself. Except, if we can't. */ 2315 /* Always patch long-range branch in exit stub itself. Except, if we can't. */
2159 if (patchlong) { 2316 if (patchlong) {
2160 ptrdiff_t delta = (char *)target - (char *)px - clearso; 2317 ptrdiff_t delta = (char *)target - (char *)px - clearso;
2161 lua_assert(((delta + 0x02000000) >> 26) == 0); 2318 lj_assertJ(((delta + 0x02000000) >> 26) == 0,
2319 "branch target out of range");
2162 *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu); 2320 *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu);
2163 } 2321 }
2164 if (!cstart) cstart = px; 2322 if (!cstart) cstart = px;
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index ddbe9c55..aee33716 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -21,15 +21,17 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
21 } 21 }
22 /* Push the high byte of the exitno for each exit stub group. */ 22 /* Push the high byte of the exitno for each exit stub group. */
23 *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8); 23 *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8);
24#if !LJ_GC64
24 /* Store DISPATCH at original stack slot 0. Account for the two push ops. */ 25 /* Store DISPATCH at original stack slot 0. Account for the two push ops. */
25 *mxp++ = XI_MOVmi; 26 *mxp++ = XI_MOVmi;
26 *mxp++ = MODRM(XM_OFS8, 0, RID_ESP); 27 *mxp++ = MODRM(XM_OFS8, 0, RID_ESP);
27 *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP); 28 *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
28 *mxp++ = 2*sizeof(void *); 29 *mxp++ = 2*sizeof(void *);
29 *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; 30 *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4;
31#endif
30 /* Jump to exit handler which fills in the ExitState. */ 32 /* Jump to exit handler which fills in the ExitState. */
31 *mxp++ = XI_JMP; mxp += 4; 33 *mxp++ = XI_JMP; mxp += 4;
32 *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler); 34 *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, (MCode *)(void *)lj_vm_exit_handler);
33 /* Commit the code for this group (even if assembly fails later on). */ 35 /* Commit the code for this group (even if assembly fails later on). */
34 lj_mcode_commitbot(as->J, mxp); 36 lj_mcode_commitbot(as->J, mxp);
35 as->mcbot = mxp; 37 as->mcbot = mxp;
@@ -58,14 +60,18 @@ static void asm_guardcc(ASMState *as, int cc)
58 MCode *p = as->mcp; 60 MCode *p = as->mcp;
59 if (LJ_UNLIKELY(p == as->invmcp)) { 61 if (LJ_UNLIKELY(p == as->invmcp)) {
60 as->loopinv = 1; 62 as->loopinv = 1;
61 *(int32_t *)(p+1) = jmprel(p+5, target); 63 *(int32_t *)(p+1) = jmprel(as->J, p+5, target);
62 target = p; 64 target = p;
63 cc ^= 1; 65 cc ^= 1;
64 if (as->realign) { 66 if (as->realign) {
67 if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP))
68 as->mrm.ofs += 2; /* Fixup RIP offset for pending fused load. */
65 emit_sjcc(as, cc, target); 69 emit_sjcc(as, cc, target);
66 return; 70 return;
67 } 71 }
68 } 72 }
73 if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP))
74 as->mrm.ofs += 6; /* Fixup RIP offset for pending fused load. */
69 emit_jcc(as, cc, target); 75 emit_jcc(as, cc, target);
70} 76}
71 77
@@ -79,6 +85,15 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
79{ 85{
80 if (irref_isk(ref)) { 86 if (irref_isk(ref)) {
81 IRIns *ir = IR(ref); 87 IRIns *ir = IR(ref);
88#if LJ_GC64
89 if (ir->o == IR_KNULL || !irt_is64(ir->t)) {
90 *k = ir->i;
91 return 1;
92 } else if (checki32((int64_t)ir_k64(ir)->u64)) {
93 *k = (int32_t)ir_k64(ir)->u64;
94 return 1;
95 }
96#else
82 if (ir->o != IR_KINT64) { 97 if (ir->o != IR_KINT64) {
83 *k = ir->i; 98 *k = ir->i;
84 return 1; 99 return 1;
@@ -86,6 +101,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
86 *k = (int32_t)ir_kint64(ir)->u64; 101 *k = (int32_t)ir_kint64(ir)->u64;
87 return 1; 102 return 1;
88 } 103 }
104#endif
89 } 105 }
90 return 0; 106 return 0;
91} 107}
@@ -102,7 +118,7 @@ static int noconflict(ASMState *as, IRRef ref, IROp conflict, int check)
102 while (--i > ref) { 118 while (--i > ref) {
103 if (ir[i].o == conflict) 119 if (ir[i].o == conflict)
104 return 0; /* Conflict found. */ 120 return 0; /* Conflict found. */
105 else if ((check & 1) && ir[i].o == IR_NEWREF) 121 else if ((check & 1) && (ir[i].o == IR_NEWREF || ir[i].o == IR_CALLS))
106 return 0; 122 return 0;
107 else if ((check & 2) && (ir[i].op1 == ref || ir[i].op2 == ref)) 123 else if ((check & 2) && (ir[i].op1 == ref || ir[i].op2 == ref))
108 return 0; 124 return 0;
@@ -117,7 +133,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
117 as->mrm.ofs = 0; 133 as->mrm.ofs = 0;
118 if (irb->o == IR_FLOAD) { 134 if (irb->o == IR_FLOAD) {
119 IRIns *ira = IR(irb->op1); 135 IRIns *ira = IR(irb->op1);
120 lua_assert(irb->op2 == IRFL_TAB_ARRAY); 136 lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY");
121 /* We can avoid the FLOAD of t->array for colocated arrays. */ 137 /* We can avoid the FLOAD of t->array for colocated arrays. */
122 if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && 138 if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
123 !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 0)) { 139 !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 0)) {
@@ -126,7 +142,8 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
126 } 142 }
127 } else if (irb->o == IR_ADD && irref_isk(irb->op2)) { 143 } else if (irb->o == IR_ADD && irref_isk(irb->op2)) {
128 /* Fuse base offset (vararg load). */ 144 /* Fuse base offset (vararg load). */
129 as->mrm.ofs = IR(irb->op2)->i; 145 IRIns *irk = IR(irb->op2);
146 as->mrm.ofs = irk->o == IR_KINT ? irk->i : (int32_t)ir_kint64(irk)->u64;
130 return irb->op1; 147 return irb->op1;
131 } 148 }
132 return ref; /* Otherwise use the given array base. */ 149 return ref; /* Otherwise use the given array base. */
@@ -136,7 +153,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
136static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow) 153static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow)
137{ 154{
138 IRIns *irx; 155 IRIns *irx;
139 lua_assert(ir->o == IR_AREF); 156 lj_assertA(ir->o == IR_AREF, "expected AREF");
140 as->mrm.base = (uint8_t)ra_alloc1(as, asm_fuseabase(as, ir->op1), allow); 157 as->mrm.base = (uint8_t)ra_alloc1(as, asm_fuseabase(as, ir->op1), allow);
141 irx = IR(ir->op2); 158 irx = IR(ir->op2);
142 if (irref_isk(ir->op2)) { 159 if (irref_isk(ir->op2)) {
@@ -187,14 +204,32 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
187 if (irref_isk(ir->op1)) { 204 if (irref_isk(ir->op1)) {
188 GCfunc *fn = ir_kfunc(IR(ir->op1)); 205 GCfunc *fn = ir_kfunc(IR(ir->op1));
189 GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; 206 GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
207#if LJ_GC64
208 int64_t ofs = dispofs(as, &uv->tv);
209 if (checki32(ofs) && checki32(ofs+4)) {
210 as->mrm.ofs = (int32_t)ofs;
211 as->mrm.base = RID_DISPATCH;
212 as->mrm.idx = RID_NONE;
213 return;
214 }
215#else
190 as->mrm.ofs = ptr2addr(&uv->tv); 216 as->mrm.ofs = ptr2addr(&uv->tv);
191 as->mrm.base = as->mrm.idx = RID_NONE; 217 as->mrm.base = as->mrm.idx = RID_NONE;
192 return; 218 return;
219#endif
193 } 220 }
194 break; 221 break;
222 case IR_TMPREF:
223#if LJ_GC64
224 as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->tmptv);
225 as->mrm.base = RID_DISPATCH;
226 as->mrm.idx = RID_NONE;
227#else
228 as->mrm.ofs = igcptr(&J2G(as->J)->tmptv);
229 as->mrm.base = as->mrm.idx = RID_NONE;
230#endif
231 return;
195 default: 232 default:
196 lua_assert(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO ||
197 ir->o == IR_KKPTR);
198 break; 233 break;
199 } 234 }
200 } 235 }
@@ -206,26 +241,53 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
206/* Fuse FLOAD/FREF reference into memory operand. */ 241/* Fuse FLOAD/FREF reference into memory operand. */
207static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) 242static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow)
208{ 243{
209 lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF); 244 lj_assertA(ir->o == IR_FLOAD || ir->o == IR_FREF,
210 as->mrm.ofs = field_ofs[ir->op2]; 245 "bad IR op %d", ir->o);
211 as->mrm.idx = RID_NONE; 246 as->mrm.idx = RID_NONE;
247 if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
248#if LJ_GC64
249 as->mrm.ofs = (int32_t)(ir->op2 << 2) - GG_OFS(dispatch);
250 as->mrm.base = RID_DISPATCH;
251#else
252 as->mrm.ofs = (int32_t)(ir->op2 << 2) + ptr2addr(J2GG(as->J));
253 as->mrm.base = RID_NONE;
254#endif
255 return;
256 }
257 as->mrm.ofs = field_ofs[ir->op2];
212 if (irref_isk(ir->op1)) { 258 if (irref_isk(ir->op1)) {
213 as->mrm.ofs += IR(ir->op1)->i; 259 IRIns *op1 = IR(ir->op1);
260#if LJ_GC64
261 if (ir->op1 == REF_NIL) {
262 as->mrm.ofs -= GG_OFS(dispatch);
263 as->mrm.base = RID_DISPATCH;
264 return;
265 } else if (op1->o == IR_KPTR || op1->o == IR_KKPTR) {
266 intptr_t ofs = dispofs(as, ir_kptr(op1));
267 if (checki32(as->mrm.ofs + ofs)) {
268 as->mrm.ofs += (int32_t)ofs;
269 as->mrm.base = RID_DISPATCH;
270 return;
271 }
272 }
273#else
274 as->mrm.ofs += op1->i;
214 as->mrm.base = RID_NONE; 275 as->mrm.base = RID_NONE;
215 } else { 276 return;
216 as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); 277#endif
217 } 278 }
279 as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
218} 280}
219 281
220/* Fuse string reference into memory operand. */ 282/* Fuse string reference into memory operand. */
221static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) 283static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
222{ 284{
223 IRIns *irr; 285 IRIns *irr;
224 lua_assert(ir->o == IR_STRREF); 286 lj_assertA(ir->o == IR_STRREF, "bad IR op %d", ir->o);
225 as->mrm.base = as->mrm.idx = RID_NONE; 287 as->mrm.base = as->mrm.idx = RID_NONE;
226 as->mrm.scale = XM_SCALE1; 288 as->mrm.scale = XM_SCALE1;
227 as->mrm.ofs = sizeof(GCstr); 289 as->mrm.ofs = sizeof(GCstr);
228 if (irref_isk(ir->op1)) { 290 if (!LJ_GC64 && irref_isk(ir->op1)) {
229 as->mrm.ofs += IR(ir->op1)->i; 291 as->mrm.ofs += IR(ir->op1)->i;
230 } else { 292 } else {
231 Reg r = ra_alloc1(as, ir->op1, allow); 293 Reg r = ra_alloc1(as, ir->op1, allow);
@@ -257,10 +319,20 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
257 IRIns *ir = IR(ref); 319 IRIns *ir = IR(ref);
258 as->mrm.idx = RID_NONE; 320 as->mrm.idx = RID_NONE;
259 if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { 321 if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
322#if LJ_GC64
323 intptr_t ofs = dispofs(as, ir_kptr(ir));
324 if (checki32(ofs)) {
325 as->mrm.ofs = (int32_t)ofs;
326 as->mrm.base = RID_DISPATCH;
327 return;
328 }
329 } if (0) {
330#else
260 as->mrm.ofs = ir->i; 331 as->mrm.ofs = ir->i;
261 as->mrm.base = RID_NONE; 332 as->mrm.base = RID_NONE;
262 } else if (ir->o == IR_STRREF) { 333 } else if (ir->o == IR_STRREF) {
263 asm_fusestrref(as, ir, allow); 334 asm_fusestrref(as, ir, allow);
335#endif
264 } else { 336 } else {
265 as->mrm.ofs = 0; 337 as->mrm.ofs = 0;
266 if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) { 338 if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) {
@@ -303,7 +375,47 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
303 } 375 }
304} 376}
305 377
306/* Fuse load into memory operand. */ 378/* Fuse load of 64 bit IR constant into memory operand. */
379static Reg asm_fuseloadk64(ASMState *as, IRIns *ir)
380{
381 const uint64_t *k = &ir_k64(ir)->u64;
382 if (!LJ_GC64 || checki32((intptr_t)k)) {
383 as->mrm.ofs = ptr2addr(k);
384 as->mrm.base = RID_NONE;
385#if LJ_GC64
386 } else if (checki32(dispofs(as, k))) {
387 as->mrm.ofs = (int32_t)dispofs(as, k);
388 as->mrm.base = RID_DISPATCH;
389 } else if (checki32(mcpofs(as, k)) && checki32(mcpofs(as, k+1)) &&
390 checki32(mctopofs(as, k)) && checki32(mctopofs(as, k+1))) {
391 as->mrm.ofs = (int32_t)mcpofs(as, k);
392 as->mrm.base = RID_RIP;
393 } else { /* Intern 64 bit constant at bottom of mcode. */
394 if (ir->i) {
395 lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i),
396 "bad interned 64 bit constant");
397 } else {
398 while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
399 *(uint64_t*)as->mcbot = *k;
400 ir->i = (int32_t)(as->mctop - as->mcbot);
401 as->mcbot += 8;
402 as->mclim = as->mcbot + MCLIM_REDZONE;
403 lj_mcode_commitbot(as->J, as->mcbot);
404 }
405 as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i);
406 as->mrm.base = RID_RIP;
407#endif
408 }
409 as->mrm.idx = RID_NONE;
410 return RID_MRM;
411}
412
413/* Fuse load into memory operand.
414**
415** Important caveat: this may emit RIP-relative loads! So don't place any
416** code emitters between this function and the use of its result.
417** The only permitted exception is asm_guardcc().
418*/
307static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) 419static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
308{ 420{
309 IRIns *ir = IR(ref); 421 IRIns *ir = IR(ref);
@@ -321,27 +433,36 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
321 } 433 }
322 if (ir->o == IR_KNUM) { 434 if (ir->o == IR_KNUM) {
323 RegSet avail = as->freeset & ~as->modset & RSET_FPR; 435 RegSet avail = as->freeset & ~as->modset & RSET_FPR;
324 lua_assert(allow != RSET_EMPTY); 436 lj_assertA(allow != RSET_EMPTY, "no register allowed");
325 if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ 437 if (!(avail & (avail-1))) /* Fuse if less than two regs available. */
326 as->mrm.ofs = ptr2addr(ir_knum(ir)); 438 return asm_fuseloadk64(as, ir);
327 as->mrm.base = as->mrm.idx = RID_NONE;
328 return RID_MRM;
329 }
330 } else if (ref == REF_BASE || ir->o == IR_KINT64) { 439 } else if (ref == REF_BASE || ir->o == IR_KINT64) {
331 RegSet avail = as->freeset & ~as->modset & RSET_GPR; 440 RegSet avail = as->freeset & ~as->modset & RSET_GPR;
332 lua_assert(allow != RSET_EMPTY); 441 lj_assertA(allow != RSET_EMPTY, "no register allowed");
333 if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ 442 if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
334 as->mrm.ofs = ptr2addr(ref == REF_BASE ? (void *)&J2G(as->J)->jit_base : (void *)ir_kint64(ir)); 443 if (ref == REF_BASE) {
335 as->mrm.base = as->mrm.idx = RID_NONE; 444#if LJ_GC64
336 return RID_MRM; 445 as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->jit_base);
446 as->mrm.base = RID_DISPATCH;
447#else
448 as->mrm.ofs = ptr2addr(&J2G(as->J)->jit_base);
449 as->mrm.base = RID_NONE;
450#endif
451 as->mrm.idx = RID_NONE;
452 return RID_MRM;
453 } else {
454 return asm_fuseloadk64(as, ir);
455 }
337 } 456 }
338 } else if (mayfuse(as, ref)) { 457 } else if (mayfuse(as, ref)) {
339 RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; 458 RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
340 if (ir->o == IR_SLOAD) { 459 if (ir->o == IR_SLOAD) {
341 if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && 460 if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
342 noconflict(as, ref, IR_RETF, 2)) { 461 noconflict(as, ref, IR_RETF, 2) &&
462 !(LJ_GC64 && irt_isaddr(ir->t))) {
343 as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); 463 as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
344 as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0); 464 as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
465 (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
345 as->mrm.idx = RID_NONE; 466 as->mrm.idx = RID_NONE;
346 return RID_MRM; 467 return RID_MRM;
347 } 468 }
@@ -353,7 +474,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
353 return RID_MRM; 474 return RID_MRM;
354 } 475 }
355 } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { 476 } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
356 if (noconflict(as, ref, ir->o + IRDELTA_L2S, 2+(ir->o != IR_ULOAD))) { 477 if (noconflict(as, ref, ir->o + IRDELTA_L2S, 2+(ir->o != IR_ULOAD)) &&
478 !(LJ_GC64 && irt_isaddr(ir->t))) {
357 asm_fuseahuref(as, ir->op1, xallow); 479 asm_fuseahuref(as, ir->op1, xallow);
358 return RID_MRM; 480 return RID_MRM;
359 } 481 }
@@ -366,11 +488,17 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
366 asm_fusexref(as, ir->op1, xallow); 488 asm_fusexref(as, ir->op1, xallow);
367 return RID_MRM; 489 return RID_MRM;
368 } 490 }
369 } else if (ir->o == IR_VLOAD) { 491 } else if (ir->o == IR_VLOAD && IR(ir->op1)->o == IR_AREF &&
492 !(LJ_GC64 && irt_isaddr(ir->t))) {
370 asm_fuseahuref(as, ir->op1, xallow); 493 asm_fuseahuref(as, ir->op1, xallow);
494 as->mrm.ofs += 8 * ir->op2;
371 return RID_MRM; 495 return RID_MRM;
372 } 496 }
373 } 497 }
498 if (ir->o == IR_FLOAD && ir->op1 == REF_NIL) {
499 asm_fusefref(as, ir, RSET_EMPTY);
500 return RID_MRM;
501 }
374 if (!(as->freeset & allow) && !emit_canremat(ref) && 502 if (!(as->freeset & allow) && !emit_canremat(ref) &&
375 (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref))) 503 (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref)))
376 goto fusespill; 504 goto fusespill;
@@ -394,7 +522,7 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64)
394/* Count the required number of stack slots for a call. */ 522/* Count the required number of stack slots for a call. */
395static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) 523static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
396{ 524{
397 uint32_t i, nargs = CCI_NARGS(ci); 525 uint32_t i, nargs = CCI_XNARGS(ci);
398 int nslots = 0; 526 int nslots = 0;
399#if LJ_64 527#if LJ_64
400 if (LJ_ABI_WIN) { 528 if (LJ_ABI_WIN) {
@@ -427,7 +555,7 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
427/* Generate a call to a C function. */ 555/* Generate a call to a C function. */
428static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 556static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
429{ 557{
430 uint32_t n, nargs = CCI_NARGS(ci); 558 uint32_t n, nargs = CCI_XNARGS(ci);
431 int32_t ofs = STACKARG_OFS; 559 int32_t ofs = STACKARG_OFS;
432#if LJ_64 560#if LJ_64
433 uint32_t gprs = REGARG_GPRS; 561 uint32_t gprs = REGARG_GPRS;
@@ -487,13 +615,14 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
487 if (r) { /* Argument is in a register. */ 615 if (r) { /* Argument is in a register. */
488 if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { 616 if (r < RID_MAX_GPR && ref < ASMREF_TMP1) {
489#if LJ_64 617#if LJ_64
490 if (ir->o == IR_KINT64) 618 if (LJ_GC64 ? !(ir->o == IR_KINT || ir->o == IR_KNULL) : ir->o == IR_KINT64)
491 emit_loadu64(as, r, ir_kint64(ir)->u64); 619 emit_loadu64(as, r, ir_k64(ir)->u64);
492 else 620 else
493#endif 621#endif
494 emit_loadi(as, r, ir->i); 622 emit_loadi(as, r, ir->i);
495 } else { 623 } else {
496 lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ 624 /* Must have been evicted. */
625 lj_assertA(rset_test(as->freeset, r), "reg %d not free", r);
497 if (ra_hasreg(ir->r)) { 626 if (ra_hasreg(ir->r)) {
498 ra_noweak(as, ir->r); 627 ra_noweak(as, ir->r);
499 emit_movrr(as, ir, r, ir->r); 628 emit_movrr(as, ir, r, ir->r);
@@ -502,7 +631,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
502 } 631 }
503 } 632 }
504 } else if (irt_isfp(ir->t)) { /* FP argument is on stack. */ 633 } else if (irt_isfp(ir->t)) { /* FP argument is on stack. */
505 lua_assert(!(irt_isfloat(ir->t) && irref_isk(ref))); /* No float k. */ 634 lj_assertA(!(irt_isfloat(ir->t) && irref_isk(ref)),
635 "unexpected float constant");
506 if (LJ_32 && (ofs & 4) && irref_isk(ref)) { 636 if (LJ_32 && (ofs & 4) && irref_isk(ref)) {
507 /* Split stores for unaligned FP consts. */ 637 /* Split stores for unaligned FP consts. */
508 emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); 638 emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo);
@@ -533,7 +663,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
533static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) 663static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
534{ 664{
535 RegSet drop = RSET_SCRATCH; 665 RegSet drop = RSET_SCRATCH;
536 int hiop = (LJ_32 && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); 666 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
537 if ((ci->flags & CCI_NOFPRCLOBBER)) 667 if ((ci->flags & CCI_NOFPRCLOBBER))
538 drop &= ~RSET_FPR; 668 drop &= ~RSET_FPR;
539 if (ra_hasreg(ir->r)) 669 if (ra_hasreg(ir->r))
@@ -562,7 +692,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
562 if (ra_hasreg(dest)) { 692 if (ra_hasreg(dest)) {
563 ra_free(as, dest); 693 ra_free(as, dest);
564 ra_modified(as, dest); 694 ra_modified(as, dest);
565 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 695 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS,
566 dest, RID_ESP, ofs); 696 dest, RID_ESP, ofs);
567 } 697 }
568 if ((ci->flags & CCI_CASTU64)) { 698 if ((ci->flags & CCI_CASTU64)) {
@@ -573,12 +703,10 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
573 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); 703 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
574 } 704 }
575#endif 705#endif
576#if LJ_32
577 } else if (hiop) { 706 } else if (hiop) {
578 ra_destpair(as, ir); 707 ra_destpair(as, ir);
579#endif
580 } else { 708 } else {
581 lua_assert(!irt_ispri(ir->t)); 709 lj_assertA(!irt_ispri(ir->t), "PRI dest");
582 ra_destreg(as, ir, RID_RET); 710 ra_destreg(as, ir, RID_RET);
583 } 711 }
584 } else if (LJ_32 && irt_isfp(ir->t) && !(ci->flags & CCI_CASTU64)) { 712 } else if (LJ_32 && irt_isfp(ir->t) && !(ci->flags & CCI_CASTU64)) {
@@ -586,15 +714,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
586 } 714 }
587} 715}
588 716
589static void asm_call(ASMState *as, IRIns *ir)
590{
591 IRRef args[CCI_NARGS_MAX];
592 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
593 asm_collectargs(as, ir, ci, args);
594 asm_setupresult(as, ir, ci);
595 asm_gencall(as, ci, args);
596}
597
598/* Return a constant function pointer or NULL for indirect calls. */ 717/* Return a constant function pointer or NULL for indirect calls. */
599static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) 718static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
600{ 719{
@@ -653,16 +772,39 @@ static void asm_callx(ASMState *as, IRIns *ir)
653static void asm_retf(ASMState *as, IRIns *ir) 772static void asm_retf(ASMState *as, IRIns *ir)
654{ 773{
655 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 774 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
775#if LJ_FR2
776 Reg rpc = ra_scratch(as, rset_exclude(RSET_GPR, base));
777#endif
656 void *pc = ir_kptr(IR(ir->op2)); 778 void *pc = ir_kptr(IR(ir->op2));
657 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 779 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
658 as->topslot -= (BCReg)delta; 780 as->topslot -= (BCReg)delta;
659 if ((int32_t)as->topslot < 0) as->topslot = 0; 781 if ((int32_t)as->topslot < 0) as->topslot = 0;
660 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 782 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
661 emit_setgl(as, base, jit_base); 783 emit_setgl(as, base, jit_base);
662 emit_addptr(as, base, -8*delta); 784 emit_addptr(as, base, -8*delta);
663 asm_guardcc(as, CC_NE); 785 asm_guardcc(as, CC_NE);
786#if LJ_FR2
787 emit_rmro(as, XO_CMP, rpc|REX_GC64, base, -8);
788 emit_loadu64(as, rpc, u64ptr(pc));
789#else
664 emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); 790 emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc));
791#endif
792}
793
794/* -- Buffer operations --------------------------------------------------- */
795
796#if LJ_HASBUFFER
797static void asm_bufhdr_write(ASMState *as, Reg sb)
798{
799 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
800 IRIns irgc;
801 irgc.ot = IRT(0, IRT_PGC); /* GC type. */
802 emit_storeofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
803 emit_opgl(as, XO_ARITH(XOg_OR), tmp|REX_GC64, cur_L);
804 emit_gri(as, XG_ARITHi(XOg_AND), tmp, SBUF_MASK_FLAG);
805 emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
665} 806}
807#endif
666 808
667/* -- Type conversions ---------------------------------------------------- */ 809/* -- Type conversions ---------------------------------------------------- */
668 810
@@ -674,8 +816,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
674 asm_guardcc(as, CC_NE); 816 asm_guardcc(as, CC_NE);
675 emit_rr(as, XO_UCOMISD, left, tmp); 817 emit_rr(as, XO_UCOMISD, left, tmp);
676 emit_rr(as, XO_CVTSI2SD, tmp, dest); 818 emit_rr(as, XO_CVTSI2SD, tmp, dest);
677 if (!(as->flags & JIT_F_SPLIT_XMM)) 819 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
678 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
679 checkmclim(as); 820 checkmclim(as);
680 emit_rr(as, XO_CVTTSD2SI, dest, left); 821 emit_rr(as, XO_CVTTSD2SI, dest, left);
681 /* Can't fuse since left is needed twice. */ 822 /* Can't fuse since left is needed twice. */
@@ -687,8 +828,9 @@ static void asm_tobit(ASMState *as, IRIns *ir)
687 Reg tmp = ra_noreg(IR(ir->op1)->r) ? 828 Reg tmp = ra_noreg(IR(ir->op1)->r) ?
688 ra_alloc1(as, ir->op1, RSET_FPR) : 829 ra_alloc1(as, ir->op1, RSET_FPR) :
689 ra_scratch(as, RSET_FPR); 830 ra_scratch(as, RSET_FPR);
690 Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp)); 831 Reg right;
691 emit_rr(as, XO_MOVDto, tmp, dest); 832 emit_rr(as, XO_MOVDto, tmp, dest);
833 right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp));
692 emit_mrm(as, XO_ADDSD, tmp, right); 834 emit_mrm(as, XO_ADDSD, tmp, right);
693 ra_left(as, tmp, ir->op1); 835 ra_left(as, tmp, ir->op1);
694} 836}
@@ -699,8 +841,10 @@ static void asm_conv(ASMState *as, IRIns *ir)
699 int st64 = (st == IRT_I64 || st == IRT_U64 || (LJ_64 && st == IRT_P64)); 841 int st64 = (st == IRT_I64 || st == IRT_U64 || (LJ_64 && st == IRT_P64));
700 int stfp = (st == IRT_NUM || st == IRT_FLOAT); 842 int stfp = (st == IRT_NUM || st == IRT_FLOAT);
701 IRRef lref = ir->op1; 843 IRRef lref = ir->op1;
702 lua_assert(irt_type(ir->t) != st); 844 lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
703 lua_assert(!(LJ_32 && (irt_isint64(ir->t) || st64))); /* Handled by SPLIT. */ 845 lj_assertA(!(LJ_32 && (irt_isint64(ir->t) || st64)),
846 "IR %04d has unsplit 64 bit type",
847 (int)(ir - as->ir) - REF_BIAS);
704 if (irt_isfp(ir->t)) { 848 if (irt_isfp(ir->t)) {
705 Reg dest = ra_dest(as, ir, RSET_FPR); 849 Reg dest = ra_dest(as, ir, RSET_FPR);
706 if (stfp) { /* FP to FP conversion. */ 850 if (stfp) { /* FP to FP conversion. */
@@ -709,13 +853,13 @@ static void asm_conv(ASMState *as, IRIns *ir)
709 if (left == dest) return; /* Avoid the XO_XORPS. */ 853 if (left == dest) return; /* Avoid the XO_XORPS. */
710 } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */ 854 } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */
711 /* number = (2^52+2^51 .. u32) - (2^52+2^51) */ 855 /* number = (2^52+2^51 .. u32) - (2^52+2^51) */
712 cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000)); 856 cTValue *k = &as->J->k64[LJ_K64_TOBIT];
713 Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); 857 Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
714 if (irt_isfloat(ir->t)) 858 if (irt_isfloat(ir->t))
715 emit_rr(as, XO_CVTSD2SS, dest, dest); 859 emit_rr(as, XO_CVTSD2SS, dest, dest);
716 emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ 860 emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */
717 emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ 861 emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */
718 emit_loadn(as, bias, k); 862 emit_rma(as, XO_MOVSD, bias, k);
719 checkmclim(as); 863 checkmclim(as);
720 emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); 864 emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR));
721 return; 865 return;
@@ -725,7 +869,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
725 asm_fuseloadm(as, lref, RSET_GPR, st64); 869 asm_fuseloadm(as, lref, RSET_GPR, st64);
726 if (LJ_64 && st == IRT_U64) { 870 if (LJ_64 && st == IRT_U64) {
727 MCLabel l_end = emit_label(as); 871 MCLabel l_end = emit_label(as);
728 const void *k = lj_ir_k64_find(as->J, U64x(43f00000,00000000)); 872 cTValue *k = &as->J->k64[LJ_K64_2P64];
729 emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */ 873 emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */
730 emit_sjcc(as, CC_NS, l_end); 874 emit_sjcc(as, CC_NS, l_end);
731 emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */ 875 emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */
@@ -733,18 +877,16 @@ static void asm_conv(ASMState *as, IRIns *ir)
733 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, 877 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
734 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); 878 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
735 } 879 }
736 if (!(as->flags & JIT_F_SPLIT_XMM)) 880 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
737 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
738 } else if (stfp) { /* FP to integer conversion. */ 881 } else if (stfp) { /* FP to integer conversion. */
739 if (irt_isguard(ir->t)) { 882 if (irt_isguard(ir->t)) {
740 /* Checked conversions are only supported from number to int. */ 883 /* Checked conversions are only supported from number to int. */
741 lua_assert(irt_isint(ir->t) && st == IRT_NUM); 884 lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
885 "bad type for checked CONV");
742 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 886 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
743 } else { 887 } else {
744 Reg dest = ra_dest(as, ir, RSET_GPR); 888 Reg dest = ra_dest(as, ir, RSET_GPR);
745 x86Op op = st == IRT_NUM ? 889 x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
746 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) :
747 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI);
748 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { 890 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
749 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ 891 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
750 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ 892 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
@@ -755,30 +897,27 @@ static void asm_conv(ASMState *as, IRIns *ir)
755 emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); 897 emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000);
756 emit_rr(as, op, dest|REX_64, tmp); 898 emit_rr(as, op, dest|REX_64, tmp);
757 if (st == IRT_NUM) 899 if (st == IRT_NUM)
758 emit_rma(as, XO_ADDSD, tmp, lj_ir_k64_find(as->J, 900 emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]);
759 LJ_64 ? U64x(c3f00000,00000000) : U64x(c1e00000,00000000)));
760 else 901 else
761 emit_rma(as, XO_ADDSS, tmp, lj_ir_k64_find(as->J, 902 emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]);
762 LJ_64 ? U64x(00000000,df800000) : U64x(00000000,cf000000)));
763 emit_sjcc(as, CC_NS, l_end); 903 emit_sjcc(as, CC_NS, l_end);
764 emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */ 904 emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */
765 emit_rr(as, op, dest|REX_64, tmp); 905 emit_rr(as, op, dest|REX_64, tmp);
766 ra_left(as, tmp, lref); 906 ra_left(as, tmp, lref);
767 } else { 907 } else {
768 Reg left = asm_fuseload(as, lref, RSET_FPR);
769 if (LJ_64 && irt_isu32(ir->t)) 908 if (LJ_64 && irt_isu32(ir->t))
770 emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ 909 emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */
771 emit_mrm(as, op, 910 emit_mrm(as, op,
772 dest|((LJ_64 && 911 dest|((LJ_64 &&
773 (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), 912 (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0),
774 left); 913 asm_fuseload(as, lref, RSET_FPR));
775 } 914 }
776 } 915 }
777 } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ 916 } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
778 Reg left, dest = ra_dest(as, ir, RSET_GPR); 917 Reg left, dest = ra_dest(as, ir, RSET_GPR);
779 RegSet allow = RSET_GPR; 918 RegSet allow = RSET_GPR;
780 x86Op op; 919 x86Op op;
781 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); 920 lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
782 if (st == IRT_I8) { 921 if (st == IRT_I8) {
783 op = XO_MOVSXb; allow = RSET_GPR8; dest |= FORCE_REX; 922 op = XO_MOVSXb; allow = RSET_GPR8; dest |= FORCE_REX;
784 } else if (st == IRT_U8) { 923 } else if (st == IRT_U8) {
@@ -812,7 +951,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
812 } 951 }
813 } else { 952 } else {
814 Reg dest = ra_dest(as, ir, RSET_GPR); 953 Reg dest = ra_dest(as, ir, RSET_GPR);
815 if (st64) { 954 if (st64 && !(ir->op2 & IRCONV_NONE)) {
816 Reg left = asm_fuseload(as, lref, RSET_GPR); 955 Reg left = asm_fuseload(as, lref, RSET_GPR);
817 /* This is either a 32 bit reg/reg mov which zeroes the hiword 956 /* This is either a 32 bit reg/reg mov which zeroes the hiword
818 ** or a load of the loword from a 64 bit address. 957 ** or a load of the loword from a 64 bit address.
@@ -838,20 +977,18 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
838 if (ra_hasreg(dest)) { 977 if (ra_hasreg(dest)) {
839 ra_free(as, dest); 978 ra_free(as, dest);
840 ra_modified(as, dest); 979 ra_modified(as, dest);
841 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 980 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs);
842 dest, RID_ESP, ofs);
843 } 981 }
844 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, 982 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
845 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); 983 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
846 if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) { 984 if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) {
847 /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */ 985 /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */
848 MCLabel l_end = emit_label(as); 986 MCLabel l_end = emit_label(as);
849 emit_rma(as, XO_FADDq, XOg_FADDq, 987 emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_2P64]);
850 lj_ir_k64_find(as->J, U64x(43f00000,00000000)));
851 emit_sjcc(as, CC_NS, l_end); 988 emit_sjcc(as, CC_NS, l_end);
852 emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ 989 emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */
853 } else { 990 } else {
854 lua_assert(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64); 991 lj_assertA(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64, "bad type for CONV");
855 } 992 }
856 emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0); 993 emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0);
857 /* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */ 994 /* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */
@@ -865,9 +1002,8 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
865 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); 1002 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
866 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); 1003 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
867 Reg lo, hi; 1004 Reg lo, hi;
868 lua_assert(st == IRT_NUM || st == IRT_FLOAT); 1005 lj_assertA(st == IRT_NUM || st == IRT_FLOAT, "bad type for CONV");
869 lua_assert(dt == IRT_I64 || dt == IRT_U64); 1006 lj_assertA(dt == IRT_I64 || dt == IRT_U64, "bad type for CONV");
870 lua_assert(((ir-1)->op2 & IRCONV_TRUNC));
871 hi = ra_dest(as, ir, RSET_GPR); 1007 hi = ra_dest(as, ir, RSET_GPR);
872 lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); 1008 lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
873 if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); 1009 if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
@@ -888,8 +1024,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
888 emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); 1024 emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
889 else 1025 else
890 emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); 1026 emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0);
891 emit_rma(as, XO_FADDq, XOg_FADDq, 1027 emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]);
892 lj_ir_k64_find(as->J, U64x(c3f00000,00000000)));
893 emit_sjcc(as, CC_NS, l_pop); 1028 emit_sjcc(as, CC_NS, l_pop);
894 emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ 1029 emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */
895 } 1030 }
@@ -910,6 +1045,14 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
910 st == IRT_NUM ? XOg_FLDq: XOg_FLDd, 1045 st == IRT_NUM ? XOg_FLDq: XOg_FLDd,
911 asm_fuseload(as, ir->op1, RSET_EMPTY)); 1046 asm_fuseload(as, ir->op1, RSET_EMPTY));
912} 1047}
1048
1049static void asm_conv64(ASMState *as, IRIns *ir)
1050{
1051 if (irt_isfp(ir->t))
1052 asm_conv_fp_int64(as, ir);
1053 else
1054 asm_conv_int64_fp(as, ir);
1055}
913#endif 1056#endif
914 1057
915static void asm_strto(ASMState *as, IRIns *ir) 1058static void asm_strto(ASMState *as, IRIns *ir)
@@ -931,54 +1074,61 @@ static void asm_strto(ASMState *as, IRIns *ir)
931 RID_ESP, sps_scale(ir->s)); 1074 RID_ESP, sps_scale(ir->s));
932} 1075}
933 1076
934static void asm_tostr(ASMState *as, IRIns *ir) 1077/* -- Memory references --------------------------------------------------- */
1078
1079/* Get pointer to TValue. */
1080static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
935{ 1081{
936 IRIns *irl = IR(ir->op1); 1082 if ((mode & IRTMPREF_IN1)) {
937 IRRef args[2]; 1083 IRIns *ir = IR(ref);
938 args[0] = ASMREF_L; 1084 if (irt_isnum(ir->t)) {
939 as->gcsteps++; 1085 if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) {
940 if (irt_isnum(irl->t)) { 1086 /* Use the number constant itself as a TValue. */
941 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; 1087 emit_loada(as, dest, ir_knum(ir));
942 args[1] = ASMREF_TMP1; /* const lua_Number * */ 1088 return;
943 asm_setupresult(as, ir, ci); /* GCstr * */ 1089 }
944 asm_gencall(as, ci, args); 1090 emit_rmro(as, XO_MOVSDto, ra_alloc1(as, ref, RSET_FPR), dest, 0);
945 emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64, 1091 } else {
946 RID_ESP, ra_spill(as, irl)); 1092#if LJ_GC64
947 } else { 1093 if (irref_isk(ref)) {
948 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; 1094 TValue k;
949 args[1] = ir->op1; /* int32_t k */ 1095 lj_ir_kvalue(as->J->L, &k, ir);
950 asm_setupresult(as, ir, ci); /* GCstr * */ 1096 emit_movmroi(as, dest, 4, k.u32.hi);
951 asm_gencall(as, ci, args); 1097 emit_movmroi(as, dest, 0, k.u32.lo);
1098 } else {
1099 /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
1100 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
1101 if (irt_is64(ir->t)) {
1102 emit_u32(as, irt_toitype(ir->t) << 15);
1103 emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4);
1104 } else {
1105 emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15));
1106 }
1107 emit_movtomro(as, REX_64IR(ir, src), dest, 0);
1108 }
1109#else
1110 if (!irref_isk(ref)) {
1111 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
1112 emit_movtomro(as, REX_64IR(ir, src), dest, 0);
1113 } else if (!irt_ispri(ir->t)) {
1114 emit_movmroi(as, dest, 0, ir->i);
1115 }
1116 if (!(LJ_64 && irt_islightud(ir->t)))
1117 emit_movmroi(as, dest, 4, irt_toitype(ir->t));
1118#endif
1119 }
952 } 1120 }
1121 emit_loada(as, dest, &J2G(as->J)->tmptv); /* g->tmptv holds the TValue(s). */
953} 1122}
954 1123
955/* -- Memory references --------------------------------------------------- */
956
957static void asm_aref(ASMState *as, IRIns *ir) 1124static void asm_aref(ASMState *as, IRIns *ir)
958{ 1125{
959 Reg dest = ra_dest(as, ir, RSET_GPR); 1126 Reg dest = ra_dest(as, ir, RSET_GPR);
960 asm_fusearef(as, ir, RSET_GPR); 1127 asm_fusearef(as, ir, RSET_GPR);
961 if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0)) 1128 if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0))
962 emit_mrm(as, XO_LEA, dest, RID_MRM); 1129 emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM);
963 else if (as->mrm.base != dest) 1130 else if (as->mrm.base != dest)
964 emit_rr(as, XO_MOV, dest, as->mrm.base); 1131 emit_rr(as, XO_MOV, dest|REX_GC64, as->mrm.base);
965}
966
967/* Merge NE(HREF, niltv) check. */
968static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
969{
970 /* Assumes nothing else generates NE of HREF. */
971 if ((ir[1].o == IR_NE || ir[1].o == IR_EQ) && ir[1].op1 == as->curins &&
972 ra_hasreg(ir->r)) {
973 MCode *p = as->mcp;
974 p += (LJ_64 && *p != XI_ARITHi) ? 7+6 : 6+6;
975 /* Ensure no loop branch inversion happened. */
976 if (p[-6] == 0x0f && p[-5] == XI_JCCn+(CC_NE^(ir[1].o & 1))) {
977 as->mcp = p; /* Kill cmp reg, imm32 + jz exit. */
978 return p + *(int32_t *)(p-4); /* Return exit address. */
979 }
980 }
981 return NULL;
982} 1132}
983 1133
984/* Inlined hash lookup. Specialized for key type and for const keys. 1134/* Inlined hash lookup. Specialized for key type and for const keys.
@@ -989,10 +1139,10 @@ static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
989** } while ((n = nextnode(n))); 1139** } while ((n = nextnode(n)));
990** return niltv(L); 1140** return niltv(L);
991*/ 1141*/
992static void asm_href(ASMState *as, IRIns *ir) 1142static void asm_href(ASMState *as, IRIns *ir, IROp merge)
993{ 1143{
994 MCode *nilexit = merge_href_niltv(as, ir); /* Do this before any restores. */
995 RegSet allow = RSET_GPR; 1144 RegSet allow = RSET_GPR;
1145 int destused = ra_used(ir);
996 Reg dest = ra_dest(as, ir, allow); 1146 Reg dest = ra_dest(as, ir, allow);
997 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); 1147 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
998 Reg key = RID_NONE, tmp = RID_NONE; 1148 Reg key = RID_NONE, tmp = RID_NONE;
@@ -1005,28 +1155,26 @@ static void asm_href(ASMState *as, IRIns *ir)
1005 if (!isk) { 1155 if (!isk) {
1006 rset_clear(allow, tab); 1156 rset_clear(allow, tab);
1007 key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); 1157 key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
1008 if (!irt_isstr(kt)) 1158 if (LJ_GC64 || !irt_isstr(kt))
1009 tmp = ra_scratch(as, rset_exclude(allow, key)); 1159 tmp = ra_scratch(as, rset_exclude(allow, key));
1010 } 1160 }
1011 1161
1012 /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */ 1162 /* Key not found in chain: jump to exit (if merged) or load niltv. */
1013 l_end = emit_label(as); 1163 l_end = emit_label(as);
1014 if (nilexit && ir[1].o == IR_NE) { 1164 if (merge == IR_NE)
1015 emit_jcc(as, CC_E, nilexit); /* XI_JMP is not found by lj_asm_patchexit. */ 1165 asm_guardcc(as, CC_E); /* XI_JMP is not found by lj_asm_patchexit. */
1016 nilexit = NULL; 1166 else if (destused)
1017 } else {
1018 emit_loada(as, dest, niltvg(J2G(as->J))); 1167 emit_loada(as, dest, niltvg(J2G(as->J)));
1019 }
1020 1168
1021 /* Follow hash chain until the end. */ 1169 /* Follow hash chain until the end. */
1022 l_loop = emit_sjcc_label(as, CC_NZ); 1170 l_loop = emit_sjcc_label(as, CC_NZ);
1023 emit_rr(as, XO_TEST, dest, dest); 1171 emit_rr(as, XO_TEST, dest|REX_GC64, dest);
1024 emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next)); 1172 emit_rmro(as, XO_MOV, dest|REX_GC64, dest, offsetof(Node, next));
1025 l_next = emit_label(as); 1173 l_next = emit_label(as);
1026 1174
1027 /* Type and value comparison. */ 1175 /* Type and value comparison. */
1028 if (nilexit) 1176 if (merge == IR_EQ)
1029 emit_jcc(as, CC_E, nilexit); 1177 asm_guardcc(as, CC_E);
1030 else 1178 else
1031 emit_sjcc(as, CC_E, l_end); 1179 emit_sjcc(as, CC_E, l_end);
1032 checkmclim(as); 1180 checkmclim(as);
@@ -1043,7 +1191,7 @@ static void asm_href(ASMState *as, IRIns *ir)
1043 emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); 1191 emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n));
1044 emit_sjcc(as, CC_AE, l_next); 1192 emit_sjcc(as, CC_AE, l_next);
1045 /* The type check avoids NaN penalties and complaints from Valgrind. */ 1193 /* The type check avoids NaN penalties and complaints from Valgrind. */
1046#if LJ_64 1194#if LJ_64 && !LJ_GC64
1047 emit_u32(as, LJ_TISNUM); 1195 emit_u32(as, LJ_TISNUM);
1048 emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); 1196 emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
1049#else 1197#else
@@ -1051,13 +1199,31 @@ static void asm_href(ASMState *as, IRIns *ir)
1051 emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); 1199 emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
1052#endif 1200#endif
1053 } 1201 }
1054#if LJ_64 1202#if LJ_64 && !LJ_GC64
1055 } else if (irt_islightud(kt)) { 1203 } else if (irt_islightud(kt)) {
1056 emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64)); 1204 emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64));
1057#endif 1205#endif
1206#if LJ_GC64
1207 } else if (irt_isaddr(kt)) {
1208 if (isk) {
1209 TValue k;
1210 k.u64 = ((uint64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
1211 emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo),
1212 k.u32.lo);
1213 emit_sjcc(as, CC_NE, l_next);
1214 emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi),
1215 k.u32.hi);
1216 } else {
1217 emit_rmro(as, XO_CMP, tmp|REX_64, dest, offsetof(Node, key.u64));
1218 }
1219 } else {
1220 lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
1221 emit_u32(as, (irt_toitype(kt)<<15)|0x7fff);
1222 emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
1223#else
1058 } else { 1224 } else {
1059 if (!irt_ispri(kt)) { 1225 if (!irt_ispri(kt)) {
1060 lua_assert(irt_isaddr(kt)); 1226 lj_assertA(irt_isaddr(kt), "bad HREF key type");
1061 if (isk) 1227 if (isk)
1062 emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr), 1228 emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr),
1063 ptr2addr(ir_kgc(irkey))); 1229 ptr2addr(ir_kgc(irkey)));
@@ -1065,30 +1231,32 @@ static void asm_href(ASMState *as, IRIns *ir)
1065 emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr)); 1231 emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr));
1066 emit_sjcc(as, CC_NE, l_next); 1232 emit_sjcc(as, CC_NE, l_next);
1067 } 1233 }
1068 lua_assert(!irt_isnil(kt)); 1234 lj_assertA(!irt_isnil(kt), "bad HREF key type");
1069 emit_i8(as, irt_toitype(kt)); 1235 emit_i8(as, irt_toitype(kt));
1070 emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); 1236 emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
1237#endif
1071 } 1238 }
1072 emit_sfixup(as, l_loop); 1239 emit_sfixup(as, l_loop);
1240#if LJ_GC64
1241 if (!isk && irt_isaddr(kt)) {
1242 emit_rr(as, XO_OR, tmp|REX_64, key);
1243 emit_loadu64(as, tmp, (uint64_t)irt_toitype(kt) << 47);
1244 }
1245#endif
1073 1246
1074 /* Load main position relative to tab->node into dest. */ 1247 /* Load main position relative to tab->node into dest. */
1075 khash = isk ? ir_khash(irkey) : 1; 1248 khash = isk ? ir_khash(as, irkey) : 1;
1076 if (khash == 0) { 1249 if (khash == 0) {
1077 emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node)); 1250 emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node));
1078 } else { 1251 } else {
1079 emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node)); 1252 emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node));
1080 if ((as->flags & JIT_F_PREFER_IMUL)) { 1253 emit_shifti(as, XOg_SHL, dest, 3);
1081 emit_i8(as, sizeof(Node)); 1254 emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0);
1082 emit_rr(as, XO_IMULi8, dest, dest);
1083 } else {
1084 emit_shifti(as, XOg_SHL, dest, 3);
1085 emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0);
1086 }
1087 if (isk) { 1255 if (isk) {
1088 emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash); 1256 emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash);
1089 emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); 1257 emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
1090 } else if (irt_isstr(kt)) { 1258 } else if (irt_isstr(kt)) {
1091 emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, hash)); 1259 emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, sid));
1092 emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); 1260 emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
1093 } else { /* Must match with hashrot() in lj_tab.c. */ 1261 } else { /* Must match with hashrot() in lj_tab.c. */
1094 emit_rmro(as, XO_ARITH(XOg_AND), dest, tab, offsetof(GCtab, hmask)); 1262 emit_rmro(as, XO_ARITH(XOg_AND), dest, tab, offsetof(GCtab, hmask));
@@ -1112,7 +1280,18 @@ static void asm_href(ASMState *as, IRIns *ir)
1112#endif 1280#endif
1113 } else { 1281 } else {
1114 emit_rr(as, XO_MOV, tmp, key); 1282 emit_rr(as, XO_MOV, tmp, key);
1283#if LJ_GC64
1284 emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15);
1285 if ((as->flags & JIT_F_BMI2)) {
1286 emit_i8(as, 32);
1287 emit_mrm(as, XV_RORX|VEX_64, dest, key);
1288 } else {
1289 emit_shifti(as, XOg_SHR|REX_64, dest, 32);
1290 emit_rr(as, XO_MOV, dest|REX_64, key|REX_64);
1291 }
1292#else
1115 emit_rmro(as, XO_LEA, dest, key, HASH_BIAS); 1293 emit_rmro(as, XO_LEA, dest, key, HASH_BIAS);
1294#endif
1116 } 1295 }
1117 } 1296 }
1118 } 1297 }
@@ -1128,15 +1307,15 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1128#if !LJ_64 1307#if !LJ_64
1129 MCLabel l_exit; 1308 MCLabel l_exit;
1130#endif 1309#endif
1131 lua_assert(ofs % sizeof(Node) == 0); 1310 lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
1132 if (ra_hasreg(dest)) { 1311 if (ra_hasreg(dest)) {
1133 if (ofs != 0) { 1312 if (ofs != 0) {
1134 if (dest == node && !(as->flags & JIT_F_LEA_AGU)) 1313 if (dest == node)
1135 emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs); 1314 emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs);
1136 else 1315 else
1137 emit_rmro(as, XO_LEA, dest, node, ofs); 1316 emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs);
1138 } else if (dest != node) { 1317 } else if (dest != node) {
1139 emit_rr(as, XO_MOV, dest, node); 1318 emit_rr(as, XO_MOV, dest|REX_GC64, node);
1140 } 1319 }
1141 } 1320 }
1142 asm_guardcc(as, CC_NE); 1321 asm_guardcc(as, CC_NE);
@@ -1145,16 +1324,28 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1145 Reg key = ra_scratch(as, rset_exclude(RSET_GPR, node)); 1324 Reg key = ra_scratch(as, rset_exclude(RSET_GPR, node));
1146 emit_rmro(as, XO_CMP, key|REX_64, node, 1325 emit_rmro(as, XO_CMP, key|REX_64, node,
1147 ofs + (int32_t)offsetof(Node, key.u64)); 1326 ofs + (int32_t)offsetof(Node, key.u64));
1148 lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t)); 1327 lj_assertA(irt_isnum(irkey->t) || irt_isgcv(irkey->t),
1328 "bad HREFK key type");
1149 /* Assumes -0.0 is already canonicalized to +0.0. */ 1329 /* Assumes -0.0 is already canonicalized to +0.0. */
1150 emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 : 1330 emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 :
1331#if LJ_GC64
1332 ((uint64_t)irt_toitype(irkey->t) << 47) |
1333 (uint64_t)ir_kgc(irkey));
1334#else
1151 ((uint64_t)irt_toitype(irkey->t) << 32) | 1335 ((uint64_t)irt_toitype(irkey->t) << 32) |
1152 (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey))); 1336 (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey)));
1337#endif
1153 } else { 1338 } else {
1154 lua_assert(!irt_isnil(irkey->t)); 1339 lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type");
1340#if LJ_GC64
1341 emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff);
1342 emit_rmro(as, XO_ARITHi, XOg_CMP, node,
1343 ofs + (int32_t)offsetof(Node, key.it));
1344#else
1155 emit_i8(as, irt_toitype(irkey->t)); 1345 emit_i8(as, irt_toitype(irkey->t));
1156 emit_rmro(as, XO_ARITHi8, XOg_CMP, node, 1346 emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
1157 ofs + (int32_t)offsetof(Node, key.it)); 1347 ofs + (int32_t)offsetof(Node, key.it));
1348#endif
1158 } 1349 }
1159#else 1350#else
1160 l_exit = emit_label(as); 1351 l_exit = emit_label(as);
@@ -1169,13 +1360,13 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1169 (int32_t)ir_knum(irkey)->u32.hi); 1360 (int32_t)ir_knum(irkey)->u32.hi);
1170 } else { 1361 } else {
1171 if (!irt_ispri(irkey->t)) { 1362 if (!irt_ispri(irkey->t)) {
1172 lua_assert(irt_isgcv(irkey->t)); 1363 lj_assertA(irt_isgcv(irkey->t), "bad HREFK key type");
1173 emit_gmroi(as, XG_ARITHi(XOg_CMP), node, 1364 emit_gmroi(as, XG_ARITHi(XOg_CMP), node,
1174 ofs + (int32_t)offsetof(Node, key.gcr), 1365 ofs + (int32_t)offsetof(Node, key.gcr),
1175 ptr2addr(ir_kgc(irkey))); 1366 ptr2addr(ir_kgc(irkey)));
1176 emit_sjcc(as, CC_NE, l_exit); 1367 emit_sjcc(as, CC_NE, l_exit);
1177 } 1368 }
1178 lua_assert(!irt_isnil(irkey->t)); 1369 lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type");
1179 emit_i8(as, irt_toitype(irkey->t)); 1370 emit_i8(as, irt_toitype(irkey->t));
1180 emit_rmro(as, XO_ARITHi8, XOg_CMP, node, 1371 emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
1181 ofs + (int32_t)offsetof(Node, key.it)); 1372 ofs + (int32_t)offsetof(Node, key.it));
@@ -1183,61 +1374,34 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1183#endif 1374#endif
1184} 1375}
1185 1376
1186static void asm_newref(ASMState *as, IRIns *ir)
1187{
1188 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1189 IRRef args[3];
1190 IRIns *irkey;
1191 Reg tmp;
1192 if (ir->r == RID_SINK)
1193 return;
1194 args[0] = ASMREF_L; /* lua_State *L */
1195 args[1] = ir->op1; /* GCtab *t */
1196 args[2] = ASMREF_TMP1; /* cTValue *key */
1197 asm_setupresult(as, ir, ci); /* TValue * */
1198 asm_gencall(as, ci, args);
1199 tmp = ra_releasetmp(as, ASMREF_TMP1);
1200 irkey = IR(ir->op2);
1201 if (irt_isnum(irkey->t)) {
1202 /* For numbers use the constant itself or a spill slot as a TValue. */
1203 if (irref_isk(ir->op2))
1204 emit_loada(as, tmp, ir_knum(irkey));
1205 else
1206 emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey));
1207 } else {
1208 /* Otherwise use g->tmptv to hold the TValue. */
1209 if (!irref_isk(ir->op2)) {
1210 Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
1211 emit_movtomro(as, REX_64IR(irkey, src), tmp, 0);
1212 } else if (!irt_ispri(irkey->t)) {
1213 emit_movmroi(as, tmp, 0, irkey->i);
1214 }
1215 if (!(LJ_64 && irt_islightud(irkey->t)))
1216 emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
1217 emit_loada(as, tmp, &J2G(as->J)->tmptv);
1218 }
1219}
1220
1221static void asm_uref(ASMState *as, IRIns *ir) 1377static void asm_uref(ASMState *as, IRIns *ir)
1222{ 1378{
1223 Reg dest = ra_dest(as, ir, RSET_GPR); 1379 Reg dest = ra_dest(as, ir, RSET_GPR);
1224 if (irref_isk(ir->op1)) { 1380 int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
1381 if (irref_isk(ir->op1) && !guarded) {
1225 GCfunc *fn = ir_kfunc(IR(ir->op1)); 1382 GCfunc *fn = ir_kfunc(IR(ir->op1));
1226 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; 1383 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
1227 emit_rma(as, XO_MOV, dest, v); 1384 emit_rma(as, XO_MOV, dest|REX_GC64, v);
1228 } else { 1385 } else {
1229 Reg uv = ra_scratch(as, RSET_GPR); 1386 Reg uv = ra_scratch(as, RSET_GPR);
1230 Reg func = ra_alloc1(as, ir->op1, RSET_GPR); 1387 if (ir->o == IR_UREFC)
1231 if (ir->o == IR_UREFC) { 1388 emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
1232 emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv)); 1389 else
1233 asm_guardcc(as, CC_NE); 1390 emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
1234 emit_i8(as, 1); 1391 if (guarded) {
1392 asm_guardcc(as, ir->o == IR_UREFC ? CC_E : CC_NE);
1393 emit_i8(as, 0);
1235 emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); 1394 emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
1395 }
1396 if (irref_isk(ir->op1)) {
1397 GCfunc *fn = ir_kfunc(IR(ir->op1));
1398 GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
1399 emit_loada(as, uv, o);
1236 } else { 1400 } else {
1237 emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v)); 1401 emit_rmro(as, XO_MOV, uv|REX_GC64, ra_alloc1(as, ir->op1, RSET_GPR),
1402 (int32_t)offsetof(GCfuncL, uvptr) +
1403 (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
1238 } 1404 }
1239 emit_rmro(as, XO_MOV, uv, func,
1240 (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
1241 } 1405 }
1242} 1406}
1243 1407
@@ -1255,9 +1419,9 @@ static void asm_strref(ASMState *as, IRIns *ir)
1255 if (as->mrm.base == RID_NONE) 1419 if (as->mrm.base == RID_NONE)
1256 emit_loadi(as, dest, as->mrm.ofs); 1420 emit_loadi(as, dest, as->mrm.ofs);
1257 else if (as->mrm.base == dest && as->mrm.idx == RID_NONE) 1421 else if (as->mrm.base == dest && as->mrm.idx == RID_NONE)
1258 emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs); 1422 emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, as->mrm.ofs);
1259 else 1423 else
1260 emit_mrm(as, XO_LEA, dest, RID_MRM); 1424 emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM);
1261} 1425}
1262 1426
1263/* -- Loads and stores ---------------------------------------------------- */ 1427/* -- Loads and stores ---------------------------------------------------- */
@@ -1276,19 +1440,23 @@ static void asm_fxload(ASMState *as, IRIns *ir)
1276 case IRT_U8: xo = XO_MOVZXb; break; 1440 case IRT_U8: xo = XO_MOVZXb; break;
1277 case IRT_I16: xo = XO_MOVSXw; break; 1441 case IRT_I16: xo = XO_MOVSXw; break;
1278 case IRT_U16: xo = XO_MOVZXw; break; 1442 case IRT_U16: xo = XO_MOVZXw; break;
1279 case IRT_NUM: xo = XMM_MOVRM(as); break; 1443 case IRT_NUM: xo = XO_MOVSD; break;
1280 case IRT_FLOAT: xo = XO_MOVSS; break; 1444 case IRT_FLOAT: xo = XO_MOVSS; break;
1281 default: 1445 default:
1282 if (LJ_64 && irt_is64(ir->t)) 1446 if (LJ_64 && irt_is64(ir->t))
1283 dest |= REX_64; 1447 dest |= REX_64;
1284 else 1448 else
1285 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); 1449 lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t),
1450 "unsplit 64 bit load");
1286 xo = XO_MOV; 1451 xo = XO_MOV;
1287 break; 1452 break;
1288 } 1453 }
1289 emit_mrm(as, xo, dest, RID_MRM); 1454 emit_mrm(as, xo, dest, RID_MRM);
1290} 1455}
1291 1456
1457#define asm_fload(as, ir) asm_fxload(as, ir)
1458#define asm_xload(as, ir) asm_fxload(as, ir)
1459
1292static void asm_fxstore(ASMState *as, IRIns *ir) 1460static void asm_fxstore(ASMState *as, IRIns *ir)
1293{ 1461{
1294 RegSet allow = RSET_GPR; 1462 RegSet allow = RSET_GPR;
@@ -1323,14 +1491,17 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
1323 case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; 1491 case IRT_I16: case IRT_U16: xo = XO_MOVtow; break;
1324 case IRT_NUM: xo = XO_MOVSDto; break; 1492 case IRT_NUM: xo = XO_MOVSDto; break;
1325 case IRT_FLOAT: xo = XO_MOVSSto; break; 1493 case IRT_FLOAT: xo = XO_MOVSSto; break;
1326#if LJ_64 1494#if LJ_64 && !LJ_GC64
1327 case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ 1495 case IRT_LIGHTUD:
1496 /* NYI: mask 64 bit lightuserdata. */
1497 lj_assertA(0, "store of lightuserdata");
1328#endif 1498#endif
1329 default: 1499 default:
1330 if (LJ_64 && irt_is64(ir->t)) 1500 if (LJ_64 && irt_is64(ir->t))
1331 src |= REX_64; 1501 src |= REX_64;
1332 else 1502 else
1333 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); 1503 lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t),
1504 "unsplit 64 bit store");
1334 xo = XO_MOVto; 1505 xo = XO_MOVto;
1335 break; 1506 break;
1336 } 1507 }
@@ -1344,15 +1515,18 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
1344 emit_i8(as, k); 1515 emit_i8(as, k);
1345 emit_mrm(as, XO_MOVmib, 0, RID_MRM); 1516 emit_mrm(as, XO_MOVmib, 0, RID_MRM);
1346 } else { 1517 } else {
1347 lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) || 1518 lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) ||
1348 irt_isaddr(ir->t)); 1519 irt_isaddr(ir->t), "bad store type");
1349 emit_i32(as, k); 1520 emit_i32(as, k);
1350 emit_mrm(as, XO_MOVmi, REX_64IR(ir, 0), RID_MRM); 1521 emit_mrm(as, XO_MOVmi, REX_64IR(ir, 0), RID_MRM);
1351 } 1522 }
1352 } 1523 }
1353} 1524}
1354 1525
1355#if LJ_64 1526#define asm_fstore(as, ir) asm_fxstore(as, ir)
1527#define asm_xstore(as, ir) asm_fxstore(as, ir)
1528
1529#if LJ_64 && !LJ_GC64
1356static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) 1530static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
1357{ 1531{
1358 if (ra_used(ir) || typecheck) { 1532 if (ra_used(ir) || typecheck) {
@@ -1374,14 +1548,19 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
1374 1548
1375static void asm_ahuvload(ASMState *as, IRIns *ir) 1549static void asm_ahuvload(ASMState *as, IRIns *ir)
1376{ 1550{
1377 lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || 1551#if LJ_GC64
1378 (LJ_DUALNUM && irt_isint(ir->t))); 1552 Reg tmp = RID_NONE;
1379#if LJ_64 1553#endif
1554 lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
1555 (LJ_DUALNUM && irt_isint(ir->t)),
1556 "bad load type %d", irt_type(ir->t));
1557#if LJ_64 && !LJ_GC64
1380 if (irt_islightud(ir->t)) { 1558 if (irt_islightud(ir->t)) {
1381 Reg dest = asm_load_lightud64(as, ir, 1); 1559 Reg dest = asm_load_lightud64(as, ir, 1);
1382 if (ra_hasreg(dest)) { 1560 if (ra_hasreg(dest)) {
1383 checkmclim(as); 1561 checkmclim(as);
1384 asm_fuseahuref(as, ir->op1, RSET_GPR); 1562 asm_fuseahuref(as, ir->op1, RSET_GPR);
1563 if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
1385 emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM); 1564 emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
1386 } 1565 }
1387 return; 1566 return;
@@ -1391,21 +1570,68 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1391 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; 1570 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1392 Reg dest = ra_dest(as, ir, allow); 1571 Reg dest = ra_dest(as, ir, allow);
1393 asm_fuseahuref(as, ir->op1, RSET_GPR); 1572 asm_fuseahuref(as, ir->op1, RSET_GPR);
1394 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); 1573 if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
1574#if LJ_GC64
1575 if (irt_isaddr(ir->t)) {
1576 emit_shifti(as, XOg_SHR|REX_64, dest, 17);
1577 asm_guardcc(as, CC_NE);
1578 emit_i8(as, irt_toitype(ir->t));
1579 emit_rr(as, XO_ARITHi8, XOg_CMP, dest);
1580 emit_i8(as, XI_O16);
1581 if ((as->flags & JIT_F_BMI2)) {
1582 emit_i8(as, 47);
1583 emit_mrm(as, XV_RORX|VEX_64, dest, RID_MRM);
1584 } else {
1585 emit_shifti(as, XOg_ROR|REX_64, dest, 47);
1586 emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
1587 }
1588 return;
1589 } else
1590#endif
1591 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
1395 } else { 1592 } else {
1396 asm_fuseahuref(as, ir->op1, RSET_GPR); 1593 RegSet gpr = RSET_GPR;
1594#if LJ_GC64
1595 if (irt_isaddr(ir->t)) {
1596 tmp = ra_scratch(as, RSET_GPR);
1597 gpr = rset_exclude(gpr, tmp);
1598 }
1599#endif
1600 asm_fuseahuref(as, ir->op1, gpr);
1601 if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
1397 } 1602 }
1398 /* Always do the type check, even if the load result is unused. */ 1603 /* Always do the type check, even if the load result is unused. */
1399 as->mrm.ofs += 4; 1604 as->mrm.ofs += 4;
1400 asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); 1605 asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE);
1401 if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { 1606 if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
1402 lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); 1607 lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
1608 "bad load type %d", irt_type(ir->t));
1403 checkmclim(as); 1609 checkmclim(as);
1610#if LJ_GC64
1611 emit_u32(as, LJ_TISNUM << 15);
1612#else
1404 emit_u32(as, LJ_TISNUM); 1613 emit_u32(as, LJ_TISNUM);
1614#endif
1405 emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); 1615 emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
1616#if LJ_GC64
1617 } else if (irt_isaddr(ir->t)) {
1618 as->mrm.ofs -= 4;
1619 emit_i8(as, irt_toitype(ir->t));
1620 emit_mrm(as, XO_ARITHi8, XOg_CMP, tmp);
1621 emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
1622 emit_mrm(as, XO_MOV, tmp|REX_64, RID_MRM);
1623 } else if (irt_isnil(ir->t)) {
1624 as->mrm.ofs -= 4;
1625 emit_i8(as, -1);
1626 emit_mrm(as, XO_ARITHi8, XOg_CMP|REX_64, RID_MRM);
1627 } else {
1628 emit_u32(as, (irt_toitype(ir->t) << 15) | 0x7fff);
1629 emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
1630#else
1406 } else { 1631 } else {
1407 emit_i8(as, irt_toitype(ir->t)); 1632 emit_i8(as, irt_toitype(ir->t));
1408 emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM); 1633 emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM);
1634#endif
1409 } 1635 }
1410} 1636}
1411 1637
@@ -1417,12 +1643,28 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
1417 Reg src = ra_alloc1(as, ir->op2, RSET_FPR); 1643 Reg src = ra_alloc1(as, ir->op2, RSET_FPR);
1418 asm_fuseahuref(as, ir->op1, RSET_GPR); 1644 asm_fuseahuref(as, ir->op1, RSET_GPR);
1419 emit_mrm(as, XO_MOVSDto, src, RID_MRM); 1645 emit_mrm(as, XO_MOVSDto, src, RID_MRM);
1420#if LJ_64 1646#if LJ_64 && !LJ_GC64
1421 } else if (irt_islightud(ir->t)) { 1647 } else if (irt_islightud(ir->t)) {
1422 Reg src = ra_alloc1(as, ir->op2, RSET_GPR); 1648 Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
1423 asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src)); 1649 asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src));
1424 emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); 1650 emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
1425#endif 1651#endif
1652#if LJ_GC64
1653 } else if (irref_isk(ir->op2)) {
1654 TValue k;
1655 lj_ir_kvalue(as->J->L, &k, IR(ir->op2));
1656 asm_fuseahuref(as, ir->op1, RSET_GPR);
1657 if (tvisnil(&k)) {
1658 emit_i32(as, -1);
1659 emit_mrm(as, XO_MOVmi, REX_64, RID_MRM);
1660 } else {
1661 emit_u32(as, k.u32.lo);
1662 emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1663 as->mrm.ofs += 4;
1664 emit_u32(as, k.u32.hi);
1665 emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1666 }
1667#endif
1426 } else { 1668 } else {
1427 IRIns *irr = IR(ir->op2); 1669 IRIns *irr = IR(ir->op2);
1428 RegSet allow = RSET_GPR; 1670 RegSet allow = RSET_GPR;
@@ -1433,34 +1675,56 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
1433 } 1675 }
1434 asm_fuseahuref(as, ir->op1, allow); 1676 asm_fuseahuref(as, ir->op1, allow);
1435 if (ra_hasreg(src)) { 1677 if (ra_hasreg(src)) {
1678#if LJ_GC64
1679 if (!(LJ_DUALNUM && irt_isinteger(ir->t))) {
1680 /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
1681 as->mrm.ofs += 4;
1682 emit_u32(as, irt_toitype(ir->t) << 15);
1683 emit_mrm(as, XO_ARITHi, XOg_OR, RID_MRM);
1684 as->mrm.ofs -= 4;
1685 emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
1686 return;
1687 }
1688#endif
1436 emit_mrm(as, XO_MOVto, src, RID_MRM); 1689 emit_mrm(as, XO_MOVto, src, RID_MRM);
1437 } else if (!irt_ispri(irr->t)) { 1690 } else if (!irt_ispri(irr->t)) {
1438 lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); 1691 lj_assertA(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t)),
1692 "bad store type");
1439 emit_i32(as, irr->i); 1693 emit_i32(as, irr->i);
1440 emit_mrm(as, XO_MOVmi, 0, RID_MRM); 1694 emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1441 } 1695 }
1442 as->mrm.ofs += 4; 1696 as->mrm.ofs += 4;
1697#if LJ_GC64
1698 lj_assertA(LJ_DUALNUM && irt_isinteger(ir->t), "bad store type");
1699 emit_i32(as, LJ_TNUMX << 15);
1700#else
1443 emit_i32(as, (int32_t)irt_toitype(ir->t)); 1701 emit_i32(as, (int32_t)irt_toitype(ir->t));
1702#endif
1444 emit_mrm(as, XO_MOVmi, 0, RID_MRM); 1703 emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1445 } 1704 }
1446} 1705}
1447 1706
1448static void asm_sload(ASMState *as, IRIns *ir) 1707static void asm_sload(ASMState *as, IRIns *ir)
1449{ 1708{
1450 int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); 1709 int32_t ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
1710 (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
1451 IRType1 t = ir->t; 1711 IRType1 t = ir->t;
1452 Reg base; 1712 Reg base;
1453 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 1713 lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
1454 lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); 1714 "bad parent SLOAD"); /* Handled by asm_head_side(). */
1455 lua_assert(LJ_DUALNUM || 1715 lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK),
1456 !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); 1716 "inconsistent SLOAD variant");
1717 lj_assertA(LJ_DUALNUM ||
1718 !irt_isint(t) ||
1719 (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)),
1720 "bad SLOAD type");
1457 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { 1721 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
1458 Reg left = ra_scratch(as, RSET_FPR); 1722 Reg left = ra_scratch(as, RSET_FPR);
1459 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ 1723 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */
1460 base = ra_alloc1(as, REF_BASE, RSET_GPR); 1724 base = ra_alloc1(as, REF_BASE, RSET_GPR);
1461 emit_rmro(as, XMM_MOVRM(as), left, base, ofs); 1725 emit_rmro(as, XO_MOVSD, left, base, ofs);
1462 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1726 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1463#if LJ_64 1727#if LJ_64 && !LJ_GC64
1464 } else if (irt_islightud(t)) { 1728 } else if (irt_islightud(t)) {
1465 Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK)); 1729 Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK));
1466 if (ra_hasreg(dest)) { 1730 if (ra_hasreg(dest)) {
@@ -1473,14 +1737,43 @@ static void asm_sload(ASMState *as, IRIns *ir)
1473 RegSet allow = irt_isnum(t) ? RSET_FPR : RSET_GPR; 1737 RegSet allow = irt_isnum(t) ? RSET_FPR : RSET_GPR;
1474 Reg dest = ra_dest(as, ir, allow); 1738 Reg dest = ra_dest(as, ir, allow);
1475 base = ra_alloc1(as, REF_BASE, RSET_GPR); 1739 base = ra_alloc1(as, REF_BASE, RSET_GPR);
1476 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1740 lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t),
1741 "bad SLOAD type %d", irt_type(t));
1477 if ((ir->op2 & IRSLOAD_CONVERT)) { 1742 if ((ir->op2 & IRSLOAD_CONVERT)) {
1478 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ 1743 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
1479 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); 1744 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
1480 } else if (irt_isnum(t)) {
1481 emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
1482 } else { 1745 } else {
1483 emit_rmro(as, XO_MOV, dest, base, ofs); 1746#if LJ_GC64
1747 if (irt_isaddr(t)) {
1748 /* LJ_GC64 type check + tag removal without BMI2 and with BMI2:
1749 **
1750 ** mov r64, [addr] rorx r64, [addr], 47
1751 ** ror r64, 47
1752 ** cmp r16, itype cmp r16, itype
1753 ** jne ->exit jne ->exit
1754 ** shr r64, 16 shr r64, 16
1755 */
1756 emit_shifti(as, XOg_SHR|REX_64, dest, 17);
1757 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1758 asm_guardcc(as, CC_NE);
1759 emit_i8(as, irt_toitype(t));
1760 emit_rr(as, XO_ARITHi8, XOg_CMP, dest);
1761 emit_i8(as, XI_O16);
1762 }
1763 if ((as->flags & JIT_F_BMI2)) {
1764 emit_i8(as, 47);
1765 emit_rmro(as, XV_RORX|VEX_64, dest, base, ofs);
1766 } else {
1767 if ((ir->op2 & IRSLOAD_TYPECHECK))
1768 emit_shifti(as, XOg_ROR|REX_64, dest, 47);
1769 else
1770 emit_shifti(as, XOg_SHL|REX_64, dest, 17);
1771 emit_rmro(as, XO_MOV, dest|REX_64, base, ofs);
1772 }
1773 return;
1774 } else
1775#endif
1776 emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
1484 } 1777 }
1485 } else { 1778 } else {
1486 if (!(ir->op2 & IRSLOAD_TYPECHECK)) 1779 if (!(ir->op2 & IRSLOAD_TYPECHECK))
@@ -1490,13 +1783,42 @@ static void asm_sload(ASMState *as, IRIns *ir)
1490 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1783 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1491 /* Need type check, even if the load result is unused. */ 1784 /* Need type check, even if the load result is unused. */
1492 asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); 1785 asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE);
1493 if (LJ_64 && irt_type(t) >= IRT_NUM) { 1786 if ((LJ_64 && irt_type(t) >= IRT_NUM) || (ir->op2 & IRSLOAD_KEYINDEX)) {
1494 lua_assert(irt_isinteger(t) || irt_isnum(t)); 1787 lj_assertA(irt_isinteger(t) || irt_isnum(t),
1495 emit_u32(as, LJ_TISNUM); 1788 "bad SLOAD type %d", irt_type(t));
1789 emit_u32(as, (ir->op2 & IRSLOAD_KEYINDEX) ? LJ_KEYINDEX :
1790 LJ_GC64 ? (LJ_TISNUM << 15) : LJ_TISNUM);
1791 emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
1792#if LJ_GC64
1793 } else if (irt_isnil(t)) {
1794 /* LJ_GC64 type check for nil:
1795 **
1796 ** cmp qword [addr], -1
1797 ** jne ->exit
1798 */
1799 emit_i8(as, -1);
1800 emit_rmro(as, XO_ARITHi8, XOg_CMP|REX_64, base, ofs);
1801 } else if (irt_ispri(t)) {
1802 emit_u32(as, (irt_toitype(t) << 15) | 0x7fff);
1496 emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); 1803 emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
1497 } else { 1804 } else {
1805 /* LJ_GC64 type check only:
1806 **
1807 ** mov r64, [addr]
1808 ** sar r64, 47
1809 ** cmp r32, itype
1810 ** jne ->exit
1811 */
1812 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, base));
1813 emit_i8(as, irt_toitype(t));
1814 emit_rr(as, XO_ARITHi8, XOg_CMP, tmp);
1815 emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
1816 emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs);
1817#else
1818 } else {
1498 emit_i8(as, irt_toitype(t)); 1819 emit_i8(as, irt_toitype(t));
1499 emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4); 1820 emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4);
1821#endif
1500 } 1822 }
1501 } 1823 }
1502} 1824}
@@ -1507,15 +1829,14 @@ static void asm_sload(ASMState *as, IRIns *ir)
1507static void asm_cnew(ASMState *as, IRIns *ir) 1829static void asm_cnew(ASMState *as, IRIns *ir)
1508{ 1830{
1509 CTState *cts = ctype_ctsG(J2G(as->J)); 1831 CTState *cts = ctype_ctsG(J2G(as->J));
1510 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1832 CTypeID id = (CTypeID)IR(ir->op1)->i;
1511 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1833 CTSize sz;
1512 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1834 CTInfo info = lj_ctype_info(cts, id, &sz);
1513 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1835 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1514 IRRef args[2]; 1836 IRRef args[4];
1515 lua_assert(sz != CTSIZE_INVALID); 1837 lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
1838 "bad CNEW/CNEWI operands");
1516 1839
1517 args[0] = ASMREF_L; /* lua_State *L */
1518 args[1] = ASMREF_TMP1; /* MSize size */
1519 as->gcsteps++; 1840 as->gcsteps++;
1520 asm_setupresult(as, ir, ci); /* GCcdata * */ 1841 asm_setupresult(as, ir, ci); /* GCcdata * */
1521 1842
@@ -1526,8 +1847,9 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1526 Reg r64 = sz == 8 ? REX_64 : 0; 1847 Reg r64 = sz == 8 ? REX_64 : 0;
1527 if (irref_isk(ir->op2)) { 1848 if (irref_isk(ir->op2)) {
1528 IRIns *irk = IR(ir->op2); 1849 IRIns *irk = IR(ir->op2);
1529 uint64_t k = irk->o == IR_KINT64 ? ir_k64(irk)->u64 : 1850 uint64_t k = (irk->o == IR_KINT64 ||
1530 (uint64_t)(uint32_t)irk->i; 1851 (LJ_GC64 && (irk->o == IR_KPTR || irk->o == IR_KKPTR))) ?
1852 ir_k64(irk)->u64 : (uint64_t)(uint32_t)irk->i;
1531 if (sz == 4 || checki32((int64_t)k)) { 1853 if (sz == 4 || checki32((int64_t)k)) {
1532 emit_i32(as, (int32_t)k); 1854 emit_i32(as, (int32_t)k);
1533 emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata)); 1855 emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata));
@@ -1543,7 +1865,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1543 int32_t ofs = sizeof(GCcdata); 1865 int32_t ofs = sizeof(GCcdata);
1544 if (sz == 8) { 1866 if (sz == 8) {
1545 ofs += 4; ir++; 1867 ofs += 4; ir++;
1546 lua_assert(ir->o == IR_HIOP); 1868 lj_assertA(ir->o == IR_HIOP, "missing CNEWI HIOP");
1547 } 1869 }
1548 do { 1870 do {
1549 if (irref_isk(ir->op2)) { 1871 if (irref_isk(ir->op2)) {
@@ -1557,21 +1879,30 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1557 ofs -= 4; ir--; 1879 ofs -= 4; ir--;
1558 } while (1); 1880 } while (1);
1559#endif 1881#endif
1560 lua_assert(sz == 4 || sz == 8); 1882 lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
1883 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1884 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1885 args[0] = ASMREF_L; /* lua_State *L */
1886 args[1] = ir->op1; /* CTypeID id */
1887 args[2] = ir->op2; /* CTSize sz */
1888 args[3] = ASMREF_TMP1; /* CTSize align */
1889 asm_gencall(as, ci, args);
1890 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1891 return;
1561 } 1892 }
1562 1893
1563 /* Combine initialization of marked, gct and ctypeid. */ 1894 /* Combine initialization of marked, gct and ctypeid. */
1564 emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked)); 1895 emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked));
1565 emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX, 1896 emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX,
1566 (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16))); 1897 (int32_t)((~LJ_TCDATA<<8)+(id<<16)));
1567 emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES); 1898 emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES);
1568 emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite); 1899 emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite);
1569 1900
1901 args[0] = ASMREF_L; /* lua_State *L */
1902 args[1] = ASMREF_TMP1; /* MSize size */
1570 asm_gencall(as, ci, args); 1903 asm_gencall(as, ci, args);
1571 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); 1904 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
1572} 1905}
1573#else
1574#define asm_cnew(as, ir) ((void)0)
1575#endif 1906#endif
1576 1907
1577/* -- Write barriers ------------------------------------------------------ */ 1908/* -- Write barriers ------------------------------------------------------ */
@@ -1581,7 +1912,7 @@ static void asm_tbar(ASMState *as, IRIns *ir)
1581 Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); 1912 Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
1582 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab)); 1913 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab));
1583 MCLabel l_end = emit_label(as); 1914 MCLabel l_end = emit_label(as);
1584 emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist)); 1915 emit_movtomro(as, tmp|REX_GC64, tab, offsetof(GCtab, gclist));
1585 emit_setgl(as, tab, gc.grayagain); 1916 emit_setgl(as, tab, gc.grayagain);
1586 emit_getgl(as, tmp, gc.grayagain); 1917 emit_getgl(as, tmp, gc.grayagain);
1587 emit_i8(as, ~LJ_GC_BLACK); 1918 emit_i8(as, ~LJ_GC_BLACK);
@@ -1598,7 +1929,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
1598 MCLabel l_end; 1929 MCLabel l_end;
1599 Reg obj; 1930 Reg obj;
1600 /* No need for other object barriers (yet). */ 1931 /* No need for other object barriers (yet). */
1601 lua_assert(IR(ir->op1)->o == IR_UREFC); 1932 lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
1602 ra_evictset(as, RSET_SCRATCH); 1933 ra_evictset(as, RSET_SCRATCH);
1603 l_end = emit_label(as); 1934 l_end = emit_label(as);
1604 args[0] = ASMREF_TMP1; /* global_State *g */ 1935 args[0] = ASMREF_TMP1; /* global_State *g */
@@ -1644,36 +1975,9 @@ static void asm_x87load(ASMState *as, IRRef ref)
1644 } 1975 }
1645} 1976}
1646 1977
1647/* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */
1648static int fpmjoin_pow(ASMState *as, IRIns *ir)
1649{
1650 IRIns *irp = IR(ir->op1);
1651 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
1652 IRIns *irpp = IR(irp->op1);
1653 if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1654 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1655 /* The modified regs must match with the *.dasc implementation. */
1656 RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
1657 IRIns *irx;
1658 if (ra_hasreg(ir->r))
1659 rset_clear(drop, ir->r); /* Dest reg handled below. */
1660 ra_evictset(as, drop);
1661 ra_destreg(as, ir, RID_XMM0);
1662 emit_call(as, lj_vm_pow_sse);
1663 irx = IR(irpp->op1);
1664 if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
1665 irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */
1666 ra_left(as, RID_XMM0, irpp->op1);
1667 ra_left(as, RID_XMM1, irp->op2);
1668 return 1;
1669 }
1670 }
1671 return 0;
1672}
1673
1674static void asm_fpmath(ASMState *as, IRIns *ir) 1978static void asm_fpmath(ASMState *as, IRIns *ir)
1675{ 1979{
1676 IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER; 1980 IRFPMathOp fpm = (IRFPMathOp)ir->op2;
1677 if (fpm == IRFPM_SQRT) { 1981 if (fpm == IRFPM_SQRT) {
1678 Reg dest = ra_dest(as, ir, RSET_FPR); 1982 Reg dest = ra_dest(as, ir, RSET_FPR);
1679 Reg left = asm_fuseload(as, ir->op1, RSET_FPR); 1983 Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
@@ -1704,93 +2008,32 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1704 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); 2008 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
1705 ra_left(as, RID_XMM0, ir->op1); 2009 ra_left(as, RID_XMM0, ir->op1);
1706 } 2010 }
1707 } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) { 2011 } else {
1708 /* Rejoined to pow(). */ 2012 asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
1709 } else { /* Handle x87 ops. */
1710 int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
1711 Reg dest = ir->r;
1712 if (ra_hasreg(dest)) {
1713 ra_free(as, dest);
1714 ra_modified(as, dest);
1715 emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs);
1716 }
1717 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
1718 switch (fpm) { /* st0 = lj_vm_*(st0) */
1719 case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break;
1720 case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break;
1721 case IRFPM_SIN: emit_x87op(as, XI_FSIN); break;
1722 case IRFPM_COS: emit_x87op(as, XI_FCOS); break;
1723 case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break;
1724 case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10:
1725 /* Note: the use of fyl2xp1 would be pointless here. When computing
1726 ** log(1.0+eps) the precision is already lost after 1.0 is added.
1727 ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense.
1728 */
1729 emit_x87op(as, XI_FYL2X); break;
1730 case IRFPM_OTHER:
1731 switch (ir->o) {
1732 case IR_ATAN2:
1733 emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break;
1734 case IR_LDEXP:
1735 emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break;
1736 default: lua_assert(0); break;
1737 }
1738 break;
1739 default: lua_assert(0); break;
1740 }
1741 asm_x87load(as, ir->op1);
1742 switch (fpm) {
1743 case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break;
1744 case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break;
1745 case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break;
1746 case IRFPM_OTHER:
1747 if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2);
1748 break;
1749 default: break;
1750 }
1751 } 2013 }
1752} 2014}
1753 2015
1754static void asm_fppowi(ASMState *as, IRIns *ir) 2016static void asm_ldexp(ASMState *as, IRIns *ir)
1755{
1756 /* The modified regs must match with the *.dasc implementation. */
1757 RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX);
1758 if (ra_hasreg(ir->r))
1759 rset_clear(drop, ir->r); /* Dest reg handled below. */
1760 ra_evictset(as, drop);
1761 ra_destreg(as, ir, RID_XMM0);
1762 emit_call(as, lj_vm_powi_sse);
1763 ra_left(as, RID_XMM0, ir->op1);
1764 ra_left(as, RID_EAX, ir->op2);
1765}
1766
1767#if LJ_64 && LJ_HASFFI
1768static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id)
1769{ 2017{
1770 const CCallInfo *ci = &lj_ir_callinfo[id]; 2018 int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
1771 IRRef args[2]; 2019 Reg dest = ir->r;
1772 args[0] = ir->op1; 2020 if (ra_hasreg(dest)) {
1773 args[1] = ir->op2; 2021 ra_free(as, dest);
1774 asm_setupresult(as, ir, ci); 2022 ra_modified(as, dest);
1775 asm_gencall(as, ci, args); 2023 emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
1776} 2024 }
1777#endif 2025 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
1778 2026 emit_x87op(as, XI_FPOP1);
1779static void asm_intmod(ASMState *as, IRIns *ir) 2027 emit_x87op(as, XI_FSCALE);
1780{ 2028 asm_x87load(as, ir->op1);
1781 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi]; 2029 asm_x87load(as, ir->op2);
1782 IRRef args[2];
1783 args[0] = ir->op1;
1784 args[1] = ir->op2;
1785 asm_setupresult(as, ir, ci);
1786 asm_gencall(as, ci, args);
1787} 2030}
1788 2031
1789static int asm_swapops(ASMState *as, IRIns *ir) 2032static int asm_swapops(ASMState *as, IRIns *ir)
1790{ 2033{
1791 IRIns *irl = IR(ir->op1); 2034 IRIns *irl = IR(ir->op1);
1792 IRIns *irr = IR(ir->op2); 2035 IRIns *irr = IR(ir->op2);
1793 lua_assert(ra_noreg(irr->r)); 2036 lj_assertA(ra_noreg(irr->r), "bad usage");
1794 if (!irm_iscomm(lj_ir_mode[ir->o])) 2037 if (!irm_iscomm(lj_ir_mode[ir->o]))
1795 return 0; /* Can't swap non-commutative operations. */ 2038 return 0; /* Can't swap non-commutative operations. */
1796 if (irref_isk(ir->op2)) 2039 if (irref_isk(ir->op2))
@@ -1962,11 +2205,28 @@ static void asm_add(ASMState *as, IRIns *ir)
1962{ 2205{
1963 if (irt_isnum(ir->t)) 2206 if (irt_isnum(ir->t))
1964 asm_fparith(as, ir, XO_ADDSD); 2207 asm_fparith(as, ir, XO_ADDSD);
1965 else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp || 2208 else if (as->flagmcp == as->mcp || irt_is64(ir->t) || !asm_lea(as, ir))
1966 irt_is64(ir->t) || !asm_lea(as, ir))
1967 asm_intarith(as, ir, XOg_ADD); 2209 asm_intarith(as, ir, XOg_ADD);
1968} 2210}
1969 2211
2212static void asm_sub(ASMState *as, IRIns *ir)
2213{
2214 if (irt_isnum(ir->t))
2215 asm_fparith(as, ir, XO_SUBSD);
2216 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
2217 asm_intarith(as, ir, XOg_SUB);
2218}
2219
2220static void asm_mul(ASMState *as, IRIns *ir)
2221{
2222 if (irt_isnum(ir->t))
2223 asm_fparith(as, ir, XO_MULSD);
2224 else
2225 asm_intarith(as, ir, XOg_X_IMUL);
2226}
2227
2228#define asm_fpdiv(as, ir) asm_fparith(as, ir, XO_DIVSD)
2229
1970static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) 2230static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
1971{ 2231{
1972 Reg dest = ra_dest(as, ir, RSET_GPR); 2232 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1974,7 +2234,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
1974 ra_left(as, dest, ir->op1); 2234 ra_left(as, dest, ir->op1);
1975} 2235}
1976 2236
1977static void asm_min_max(ASMState *as, IRIns *ir, int cc) 2237static void asm_neg(ASMState *as, IRIns *ir)
2238{
2239 if (irt_isnum(ir->t))
2240 asm_fparith(as, ir, XO_XORPS);
2241 else
2242 asm_neg_not(as, ir, XOg_NEG);
2243}
2244
2245#define asm_abs(as, ir) asm_fparith(as, ir, XO_ANDPS)
2246
2247static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1978{ 2248{
1979 Reg right, dest = ra_dest(as, ir, RSET_GPR); 2249 Reg right, dest = ra_dest(as, ir, RSET_GPR);
1980 IRRef lref = ir->op1, rref = ir->op2; 2250 IRRef lref = ir->op1, rref = ir->op2;
@@ -1985,7 +2255,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc)
1985 ra_left(as, dest, lref); 2255 ra_left(as, dest, lref);
1986} 2256}
1987 2257
1988static void asm_bitswap(ASMState *as, IRIns *ir) 2258static void asm_min(ASMState *as, IRIns *ir)
2259{
2260 if (irt_isnum(ir->t))
2261 asm_fparith(as, ir, XO_MINSD);
2262 else
2263 asm_intmin_max(as, ir, CC_G);
2264}
2265
2266static void asm_max(ASMState *as, IRIns *ir)
2267{
2268 if (irt_isnum(ir->t))
2269 asm_fparith(as, ir, XO_MAXSD);
2270 else
2271 asm_intmin_max(as, ir, CC_L);
2272}
2273
2274/* Note: don't use LEA for overflow-checking arithmetic! */
2275#define asm_addov(as, ir) asm_intarith(as, ir, XOg_ADD)
2276#define asm_subov(as, ir) asm_intarith(as, ir, XOg_SUB)
2277#define asm_mulov(as, ir) asm_intarith(as, ir, XOg_X_IMUL)
2278
2279#define asm_bnot(as, ir) asm_neg_not(as, ir, XOg_NOT)
2280
2281static void asm_bswap(ASMState *as, IRIns *ir)
1989{ 2282{
1990 Reg dest = ra_dest(as, ir, RSET_GPR); 2283 Reg dest = ra_dest(as, ir, RSET_GPR);
1991 as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), 2284 as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24),
@@ -1993,7 +2286,11 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1993 ra_left(as, dest, ir->op1); 2286 ra_left(as, dest, ir->op1);
1994} 2287}
1995 2288
1996static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) 2289#define asm_band(as, ir) asm_intarith(as, ir, XOg_AND)
2290#define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR)
2291#define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR)
2292
2293static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs, x86Op xv)
1997{ 2294{
1998 IRRef rref = ir->op2; 2295 IRRef rref = ir->op2;
1999 IRIns *irr = IR(rref); 2296 IRIns *irr = IR(rref);
@@ -2002,17 +2299,33 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
2002 int shift; 2299 int shift;
2003 dest = ra_dest(as, ir, RSET_GPR); 2300 dest = ra_dest(as, ir, RSET_GPR);
2004 shift = irr->i & (irt_is64(ir->t) ? 63 : 31); 2301 shift = irr->i & (irt_is64(ir->t) ? 63 : 31);
2302 if (!xv && shift && (as->flags & JIT_F_BMI2)) {
2303 Reg left = asm_fuseloadm(as, ir->op1, RSET_GPR, irt_is64(ir->t));
2304 if (left != dest) { /* BMI2 rotate right by constant. */
2305 emit_i8(as, xs == XOg_ROL ? -shift : shift);
2306 emit_mrm(as, VEX_64IR(ir, XV_RORX), dest, left);
2307 return;
2308 }
2309 }
2005 switch (shift) { 2310 switch (shift) {
2006 case 0: break; 2311 case 0: break;
2007 case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break; 2312 case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break;
2008 default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break; 2313 default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break;
2009 } 2314 }
2315 } else if ((as->flags & JIT_F_BMI2) && xv) { /* BMI2 variable shifts. */
2316 Reg left, right;
2317 dest = ra_dest(as, ir, RSET_GPR);
2318 right = ra_alloc1(as, rref, RSET_GPR);
2319 left = asm_fuseloadm(as, ir->op1, rset_exclude(RSET_GPR, right),
2320 irt_is64(ir->t));
2321 emit_mrm(as, VEX_64IR(ir, xv) ^ (right << 19), dest, left);
2322 return;
2010 } else { /* Variable shifts implicitly use register cl (i.e. ecx). */ 2323 } else { /* Variable shifts implicitly use register cl (i.e. ecx). */
2011 Reg right; 2324 Reg right;
2012 dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX)); 2325 dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX));
2013 if (dest == RID_ECX) { 2326 if (dest == RID_ECX) {
2014 dest = ra_scratch(as, rset_exclude(RSET_GPR, RID_ECX)); 2327 dest = ra_scratch(as, rset_exclude(RSET_GPR, RID_ECX));
2015 emit_rr(as, XO_MOV, RID_ECX, dest); 2328 emit_rr(as, XO_MOV, REX_64IR(ir, RID_ECX), dest);
2016 } 2329 }
2017 right = irr->r; 2330 right = irr->r;
2018 if (ra_noreg(right)) 2331 if (ra_noreg(right))
@@ -2032,6 +2345,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
2032 */ 2345 */
2033} 2346}
2034 2347
2348#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL, XV_SHLX)
2349#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR, XV_SHRX)
2350#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR, XV_SARX)
2351#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL, 0)
2352#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR, 0)
2353
2035/* -- Comparisons --------------------------------------------------------- */ 2354/* -- Comparisons --------------------------------------------------------- */
2036 2355
2037/* Virtual flags for unordered FP comparisons. */ 2356/* Virtual flags for unordered FP comparisons. */
@@ -2058,8 +2377,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = {
2058}; 2377};
2059 2378
2060/* FP and integer comparisons. */ 2379/* FP and integer comparisons. */
2061static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) 2380static void asm_comp(ASMState *as, IRIns *ir)
2062{ 2381{
2382 uint32_t cc = asm_compmap[ir->o];
2063 if (irt_isnum(ir->t)) { 2383 if (irt_isnum(ir->t)) {
2064 IRRef lref = ir->op1; 2384 IRRef lref = ir->op1;
2065 IRRef rref = ir->op2; 2385 IRRef rref = ir->op2;
@@ -2080,7 +2400,6 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
2080 cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */ 2400 cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */
2081 } 2401 }
2082 left = ra_alloc1(as, lref, RSET_FPR); 2402 left = ra_alloc1(as, lref, RSET_FPR);
2083 right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
2084 l_around = emit_label(as); 2403 l_around = emit_label(as);
2085 asm_guardcc(as, cc >> 4); 2404 asm_guardcc(as, cc >> 4);
2086 if (cc & VCC_P) { /* Extra CC_P branch required? */ 2405 if (cc & VCC_P) { /* Extra CC_P branch required? */
@@ -2097,14 +2416,16 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
2097 emit_jcc(as, CC_P, as->mcp); 2416 emit_jcc(as, CC_P, as->mcp);
2098 } 2417 }
2099 } 2418 }
2419 right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
2100 emit_mrm(as, XO_UCOMISD, left, right); 2420 emit_mrm(as, XO_UCOMISD, left, right);
2101 } else { 2421 } else {
2102 IRRef lref = ir->op1, rref = ir->op2; 2422 IRRef lref = ir->op1, rref = ir->op2;
2103 IROp leftop = (IROp)(IR(lref)->o); 2423 IROp leftop = (IROp)(IR(lref)->o);
2104 Reg r64 = REX_64IR(ir, 0); 2424 Reg r64 = REX_64IR(ir, 0);
2105 int32_t imm = 0; 2425 int32_t imm = 0;
2106 lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || 2426 lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) ||
2107 irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t)); 2427 irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t),
2428 "bad comparison data type %d", irt_type(ir->t));
2108 /* Swap constants (only for ABC) and fusable loads to the right. */ 2429 /* Swap constants (only for ABC) and fusable loads to the right. */
2109 if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { 2430 if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) {
2110 if ((cc & 0xc) == 0xc) cc ^= 0x53; /* L <-> G, LE <-> GE */ 2431 if ((cc & 0xc) == 0xc) cc ^= 0x53; /* L <-> G, LE <-> GE */
@@ -2186,7 +2507,7 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
2186 /* Use test r,r instead of cmp r,0. */ 2507 /* Use test r,r instead of cmp r,0. */
2187 x86Op xo = XO_TEST; 2508 x86Op xo = XO_TEST;
2188 if (irt_isu8(ir->t)) { 2509 if (irt_isu8(ir->t)) {
2189 lua_assert(ir->o == IR_EQ || ir->o == IR_NE); 2510 lj_assertA(ir->o == IR_EQ || ir->o == IR_NE, "bad usage");
2190 xo = XO_TESTb; 2511 xo = XO_TESTb;
2191 if (!rset_test(RSET_RANGE(RID_EAX, RID_EBX+1), left)) { 2512 if (!rset_test(RSET_RANGE(RID_EAX, RID_EBX+1), left)) {
2192 if (LJ_64) { 2513 if (LJ_64) {
@@ -2214,6 +2535,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
2214 } 2535 }
2215} 2536}
2216 2537
2538#define asm_equal(as, ir) asm_comp(as, ir)
2539
2217#if LJ_32 && LJ_HASFFI 2540#if LJ_32 && LJ_HASFFI
2218/* 64 bit integer comparisons in 32 bit mode. */ 2541/* 64 bit integer comparisons in 32 bit mode. */
2219static void asm_comp_int64(ASMState *as, IRIns *ir) 2542static void asm_comp_int64(ASMState *as, IRIns *ir)
@@ -2286,23 +2609,19 @@ static void asm_comp_int64(ASMState *as, IRIns *ir)
2286} 2609}
2287#endif 2610#endif
2288 2611
2289/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ 2612/* -- Split register ops -------------------------------------------------- */
2290 2613
2291/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ 2614/* Hiword op of a split 32/32 or 64/64 bit op. Previous op is the loword op. */
2292static void asm_hiop(ASMState *as, IRIns *ir) 2615static void asm_hiop(ASMState *as, IRIns *ir)
2293{ 2616{
2294#if LJ_32 && LJ_HASFFI
2295 /* HIOP is marked as a store because it needs its own DCE logic. */ 2617 /* HIOP is marked as a store because it needs its own DCE logic. */
2296 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 2618 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
2297 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 2619 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
2620#if LJ_32 && LJ_HASFFI
2298 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ 2621 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
2299 if (usehi || uselo) {
2300 if (irt_isfp(ir->t))
2301 asm_conv_fp_int64(as, ir);
2302 else
2303 asm_conv_int64_fp(as, ir);
2304 }
2305 as->curins--; /* Always skip the CONV. */ 2622 as->curins--; /* Always skip the CONV. */
2623 if (usehi || uselo)
2624 asm_conv64(as, ir);
2306 return; 2625 return;
2307 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ 2626 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
2308 asm_comp_int64(as, ir); 2627 asm_comp_int64(as, ir);
@@ -2312,8 +2631,10 @@ static void asm_hiop(ASMState *as, IRIns *ir)
2312 asm_fxstore(as, ir); 2631 asm_fxstore(as, ir);
2313 return; 2632 return;
2314 } 2633 }
2634#endif
2315 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 2635 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
2316 switch ((ir-1)->o) { 2636 switch ((ir-1)->o) {
2637#if LJ_32 && LJ_HASFFI
2317 case IR_ADD: 2638 case IR_ADD:
2318 as->flagmcp = NULL; 2639 as->flagmcp = NULL;
2319 as->curins--; 2640 as->curins--;
@@ -2336,19 +2657,26 @@ static void asm_hiop(ASMState *as, IRIns *ir)
2336 asm_neg_not(as, ir-1, XOg_NEG); 2657 asm_neg_not(as, ir-1, XOg_NEG);
2337 break; 2658 break;
2338 } 2659 }
2339 case IR_CALLN:
2340 case IR_CALLXS:
2341 if (!uselo)
2342 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
2343 break;
2344 case IR_CNEWI: 2660 case IR_CNEWI:
2345 /* Nothing to do here. Handled by CNEWI itself. */ 2661 /* Nothing to do here. Handled by CNEWI itself. */
2346 break; 2662 break;
2347 default: lua_assert(0); break;
2348 }
2349#else
2350 UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on x64 or without FFI. */
2351#endif 2663#endif
2664 case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
2665 if (!uselo)
2666 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
2667 break;
2668 default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
2669 }
2670}
2671
2672/* -- Profiling ----------------------------------------------------------- */
2673
2674static void asm_prof(ASMState *as, IRIns *ir)
2675{
2676 UNUSED(ir);
2677 asm_guardcc(as, CC_NE);
2678 emit_i8(as, HOOK_PROFILE);
2679 emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask);
2352} 2680}
2353 2681
2354/* -- Stack handling ------------------------------------------------------ */ 2682/* -- Stack handling ------------------------------------------------------ */
@@ -2365,14 +2693,19 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
2365 emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0); 2693 emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0);
2366 else 2694 else
2367 ra_modified(as, r); 2695 ra_modified(as, r);
2368 emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot)); 2696 emit_gri(as, XG_ARITHi(XOg_CMP), r|REX_GC64, (int32_t)(8*topslot));
2369 if (ra_hasreg(pbase) && pbase != r) 2697 if (ra_hasreg(pbase) && pbase != r)
2370 emit_rr(as, XO_ARITH(XOg_SUB), r, pbase); 2698 emit_rr(as, XO_ARITH(XOg_SUB), r|REX_GC64, pbase);
2371 else 2699 else
2700#if LJ_GC64
2701 emit_rmro(as, XO_ARITH(XOg_SUB), r|REX_64, RID_DISPATCH,
2702 (int32_t)dispofs(as, &J2G(as->J)->jit_base));
2703#else
2372 emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, 2704 emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
2373 ptr2addr(&J2G(as->J)->jit_base)); 2705 ptr2addr(&J2G(as->J)->jit_base));
2374 emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); 2706#endif
2375 emit_getgl(as, r, jit_L); 2707 emit_rmro(as, XO_MOV, r|REX_GC64, r, offsetof(lua_State, maxstack));
2708 emit_getgl(as, r, cur_L);
2376 if (allow == RSET_EMPTY) /* Spill temp. register. */ 2709 if (allow == RSET_EMPTY) /* Spill temp. register. */
2377 emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); 2710 emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
2378} 2711}
@@ -2381,40 +2714,79 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
2381static void asm_stack_restore(ASMState *as, SnapShot *snap) 2714static void asm_stack_restore(ASMState *as, SnapShot *snap)
2382{ 2715{
2383 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 2716 SnapEntry *map = &as->T->snapmap[snap->mapofs];
2384 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; 2717#if !LJ_FR2 || defined(LUA_USE_ASSERT)
2718 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
2719#endif
2385 MSize n, nent = snap->nent; 2720 MSize n, nent = snap->nent;
2386 /* Store the value of all modified slots to the Lua stack. */ 2721 /* Store the value of all modified slots to the Lua stack. */
2387 for (n = 0; n < nent; n++) { 2722 for (n = 0; n < nent; n++) {
2388 SnapEntry sn = map[n]; 2723 SnapEntry sn = map[n];
2389 BCReg s = snap_slot(sn); 2724 BCReg s = snap_slot(sn);
2390 int32_t ofs = 8*((int32_t)s-1); 2725 int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
2391 IRRef ref = snap_ref(sn); 2726 IRRef ref = snap_ref(sn);
2392 IRIns *ir = IR(ref); 2727 IRIns *ir = IR(ref);
2393 if ((sn & SNAP_NORESTORE)) 2728 if ((sn & SNAP_NORESTORE))
2394 continue; 2729 continue;
2395 if (irt_isnum(ir->t)) { 2730 if ((sn & SNAP_KEYINDEX)) {
2731 emit_movmroi(as, RID_BASE, ofs+4, LJ_KEYINDEX);
2732 if (irref_isk(ref)) {
2733 emit_movmroi(as, RID_BASE, ofs, ir->i);
2734 } else {
2735 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
2736 emit_movtomro(as, src, RID_BASE, ofs);
2737 }
2738 } else if (irt_isnum(ir->t)) {
2396 Reg src = ra_alloc1(as, ref, RSET_FPR); 2739 Reg src = ra_alloc1(as, ref, RSET_FPR);
2397 emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); 2740 emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
2398 } else { 2741 } else {
2399 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || 2742 lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) ||
2400 (LJ_DUALNUM && irt_isinteger(ir->t))); 2743 (LJ_DUALNUM && irt_isinteger(ir->t)),
2744 "restore of IR type %d", irt_type(ir->t));
2401 if (!irref_isk(ref)) { 2745 if (!irref_isk(ref)) {
2402 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); 2746 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
2747#if LJ_GC64
2748 if (irt_is64(ir->t)) {
2749 /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
2750 emit_u32(as, irt_toitype(ir->t) << 15);
2751 emit_rmro(as, XO_ARITHi, XOg_OR, RID_BASE, ofs+4);
2752 } else if (LJ_DUALNUM && irt_isinteger(ir->t)) {
2753 emit_movmroi(as, RID_BASE, ofs+4, LJ_TISNUM << 15);
2754 } else {
2755 emit_movmroi(as, RID_BASE, ofs+4, (irt_toitype(ir->t)<<15)|0x7fff);
2756 }
2757#endif
2403 emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); 2758 emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs);
2759#if LJ_GC64
2760 } else {
2761 TValue k;
2762 lj_ir_kvalue(as->J->L, &k, ir);
2763 if (tvisnil(&k)) {
2764 emit_i32(as, -1);
2765 emit_rmro(as, XO_MOVmi, REX_64, RID_BASE, ofs);
2766 } else {
2767 emit_movmroi(as, RID_BASE, ofs+4, k.u32.hi);
2768 emit_movmroi(as, RID_BASE, ofs, k.u32.lo);
2769 }
2770#else
2404 } else if (!irt_ispri(ir->t)) { 2771 } else if (!irt_ispri(ir->t)) {
2405 emit_movmroi(as, RID_BASE, ofs, ir->i); 2772 emit_movmroi(as, RID_BASE, ofs, ir->i);
2773#endif
2406 } 2774 }
2407 if ((sn & (SNAP_CONT|SNAP_FRAME))) { 2775 if ((sn & (SNAP_CONT|SNAP_FRAME))) {
2776#if !LJ_FR2
2408 if (s != 0) /* Do not overwrite link to previous frame. */ 2777 if (s != 0) /* Do not overwrite link to previous frame. */
2409 emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); 2778 emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
2779#endif
2780#if !LJ_GC64
2410 } else { 2781 } else {
2411 if (!(LJ_64 && irt_islightud(ir->t))) 2782 if (!(LJ_64 && irt_islightud(ir->t)))
2412 emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); 2783 emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
2784#endif
2413 } 2785 }
2414 } 2786 }
2415 checkmclim(as); 2787 checkmclim(as);
2416 } 2788 }
2417 lua_assert(map + nent == flinks); 2789 lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
2418} 2790}
2419 2791
2420/* -- GC handling --------------------------------------------------------- */ 2792/* -- GC handling --------------------------------------------------------- */
@@ -2435,11 +2807,15 @@ static void asm_gc_check(ASMState *as)
2435 args[1] = ASMREF_TMP2; /* MSize steps */ 2807 args[1] = ASMREF_TMP2; /* MSize steps */
2436 asm_gencall(as, ci, args); 2808 asm_gencall(as, ci, args);
2437 tmp = ra_releasetmp(as, ASMREF_TMP1); 2809 tmp = ra_releasetmp(as, ASMREF_TMP1);
2810#if LJ_GC64
2811 emit_rmro(as, XO_LEA, tmp|REX_64, RID_DISPATCH, GG_DISP2G);
2812#else
2438 emit_loada(as, tmp, J2G(as->J)); 2813 emit_loada(as, tmp, J2G(as->J));
2814#endif
2439 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps); 2815 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps);
2440 /* Jump around GC step if GC total < GC threshold. */ 2816 /* Jump around GC step if GC total < GC threshold. */
2441 emit_sjcc(as, CC_B, l_end); 2817 emit_sjcc(as, CC_B, l_end);
2442 emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold); 2818 emit_opgl(as, XO_ARITH(XOg_CMP), tmp|REX_GC64, gc.threshold);
2443 emit_getgl(as, tmp, gc.total); 2819 emit_getgl(as, tmp, gc.total);
2444 as->gcsteps = 0; 2820 as->gcsteps = 0;
2445 checkmclim(as); 2821 checkmclim(as);
@@ -2454,16 +2830,16 @@ static void asm_loop_fixup(ASMState *as)
2454 MCode *target = as->mcp; 2830 MCode *target = as->mcp;
2455 if (as->realign) { /* Realigned loops use short jumps. */ 2831 if (as->realign) { /* Realigned loops use short jumps. */
2456 as->realign = NULL; /* Stop another retry. */ 2832 as->realign = NULL; /* Stop another retry. */
2457 lua_assert(((intptr_t)target & 15) == 0); 2833 lj_assertA(((intptr_t)target & 15) == 0, "loop realign failed");
2458 if (as->loopinv) { /* Inverted loop branch? */ 2834 if (as->loopinv) { /* Inverted loop branch? */
2459 p -= 5; 2835 p -= 5;
2460 p[0] = XI_JMP; 2836 p[0] = XI_JMP;
2461 lua_assert(target - p >= -128); 2837 lj_assertA(target - p >= -128, "loop realign failed");
2462 p[-1] = (MCode)(target - p); /* Patch sjcc. */ 2838 p[-1] = (MCode)(target - p); /* Patch sjcc. */
2463 if (as->loopinv == 2) 2839 if (as->loopinv == 2)
2464 p[-3] = (MCode)(target - p + 2); /* Patch opt. short jp. */ 2840 p[-3] = (MCode)(target - p + 2); /* Patch opt. short jp. */
2465 } else { 2841 } else {
2466 lua_assert(target - p >= -128); 2842 lj_assertA(target - p >= -128, "loop realign failed");
2467 p[-1] = (MCode)(int8_t)(target - p); /* Patch short jmp. */ 2843 p[-1] = (MCode)(int8_t)(target - p); /* Patch short jmp. */
2468 p[-2] = XI_JMPs; 2844 p[-2] = XI_JMPs;
2469 } 2845 }
@@ -2492,6 +2868,12 @@ static void asm_loop_fixup(ASMState *as)
2492 } 2868 }
2493} 2869}
2494 2870
2871/* Fixup the tail of the loop. */
2872static void asm_loop_tail_fixup(ASMState *as)
2873{
2874 UNUSED(as); /* Nothing to do. */
2875}
2876
2495/* -- Head of trace ------------------------------------------------------- */ 2877/* -- Head of trace ------------------------------------------------------- */
2496 2878
2497/* Coalesce BASE register for a root trace. */ 2879/* Coalesce BASE register for a root trace. */
@@ -2504,7 +2886,7 @@ static void asm_head_root_base(ASMState *as)
2504 if (rset_test(as->modset, r) || irt_ismarked(ir->t)) 2886 if (rset_test(as->modset, r) || irt_ismarked(ir->t))
2505 ir->r = RID_INIT; /* No inheritance for modified BASE register. */ 2887 ir->r = RID_INIT; /* No inheritance for modified BASE register. */
2506 if (r != RID_BASE) 2888 if (r != RID_BASE)
2507 emit_rr(as, XO_MOV, r, RID_BASE); 2889 emit_rr(as, XO_MOV, r|REX_GC64, RID_BASE);
2508 } 2890 }
2509} 2891}
2510 2892
@@ -2520,7 +2902,8 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp)
2520 if (irp->r == r) { 2902 if (irp->r == r) {
2521 return r; /* Same BASE register already coalesced. */ 2903 return r; /* Same BASE register already coalesced. */
2522 } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { 2904 } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
2523 emit_rr(as, XO_MOV, r, irp->r); /* Move from coalesced parent reg. */ 2905 /* Move from coalesced parent reg. */
2906 emit_rr(as, XO_MOV, r|REX_GC64, irp->r);
2524 return irp->r; 2907 return irp->r;
2525 } else { 2908 } else {
2526 emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ 2909 emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
@@ -2539,7 +2922,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
2539 MCode *target, *q; 2922 MCode *target, *q;
2540 int32_t spadj = as->T->spadjust; 2923 int32_t spadj = as->T->spadjust;
2541 if (spadj == 0) { 2924 if (spadj == 0) {
2542 p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0); 2925 p -= LJ_64 ? 7 : 6;
2543 } else { 2926 } else {
2544 MCode *p1; 2927 MCode *p1;
2545 /* Patch stack adjustment. */ 2928 /* Patch stack adjustment. */
@@ -2551,24 +2934,15 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
2551 p1 = p-9; 2934 p1 = p-9;
2552 *(int32_t *)p1 = spadj; 2935 *(int32_t *)p1 = spadj;
2553 } 2936 }
2554 if ((as->flags & JIT_F_LEA_AGU)) {
2555#if LJ_64
2556 p1[-4] = 0x48;
2557#endif
2558 p1[-3] = (MCode)XI_LEA;
2559 p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP);
2560 p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
2561 } else {
2562#if LJ_64 2937#if LJ_64
2563 p1[-3] = 0x48; 2938 p1[-3] = 0x48;
2564#endif 2939#endif
2565 p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); 2940 p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
2566 p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); 2941 p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
2567 }
2568 } 2942 }
2569 /* Patch exit branch. */ 2943 /* Patch exit branch. */
2570 target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; 2944 target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
2571 *(int32_t *)(p-4) = jmprel(p, target); 2945 *(int32_t *)(p-4) = jmprel(as->J, p, target);
2572 p[-5] = XI_JMP; 2946 p[-5] = XI_JMP;
2573 /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */ 2947 /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */
2574 for (q = as->mctop-1; q >= p; q--) 2948 for (q = as->mctop-1; q >= p; q--)
@@ -2595,168 +2969,11 @@ static void asm_tail_prep(ASMState *as)
2595 as->invmcp = as->mcp = p; 2969 as->invmcp = as->mcp = p;
2596 } else { 2970 } else {
2597 /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ 2971 /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */
2598 as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0)); 2972 as->mcp = p - (LJ_64 ? 7 : 6);
2599 as->invmcp = NULL; 2973 as->invmcp = NULL;
2600 } 2974 }
2601} 2975}
2602 2976
2603/* -- Instruction dispatch ------------------------------------------------ */
2604
2605/* Assemble a single instruction. */
2606static void asm_ir(ASMState *as, IRIns *ir)
2607{
2608 switch ((IROp)ir->o) {
2609 /* Miscellaneous ops. */
2610 case IR_LOOP: asm_loop(as); break;
2611 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
2612 case IR_USE:
2613 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
2614 case IR_PHI: asm_phi(as, ir); break;
2615 case IR_HIOP: asm_hiop(as, ir); break;
2616 case IR_GCSTEP: asm_gcstep(as, ir); break;
2617
2618 /* Guarded assertions. */
2619 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
2620 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
2621 case IR_EQ: case IR_NE: case IR_ABC:
2622 asm_comp(as, ir, asm_compmap[ir->o]);
2623 break;
2624
2625 case IR_RETF: asm_retf(as, ir); break;
2626
2627 /* Bit ops. */
2628 case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break;
2629 case IR_BSWAP: asm_bitswap(as, ir); break;
2630
2631 case IR_BAND: asm_intarith(as, ir, XOg_AND); break;
2632 case IR_BOR: asm_intarith(as, ir, XOg_OR); break;
2633 case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break;
2634
2635 case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break;
2636 case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break;
2637 case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break;
2638 case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break;
2639 case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break;
2640
2641 /* Arithmetic ops. */
2642 case IR_ADD: asm_add(as, ir); break;
2643 case IR_SUB:
2644 if (irt_isnum(ir->t))
2645 asm_fparith(as, ir, XO_SUBSD);
2646 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
2647 asm_intarith(as, ir, XOg_SUB);
2648 break;
2649 case IR_MUL:
2650 if (irt_isnum(ir->t))
2651 asm_fparith(as, ir, XO_MULSD);
2652 else
2653 asm_intarith(as, ir, XOg_X_IMUL);
2654 break;
2655 case IR_DIV:
2656#if LJ_64 && LJ_HASFFI
2657 if (!irt_isnum(ir->t))
2658 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
2659 IRCALL_lj_carith_divu64);
2660 else
2661#endif
2662 asm_fparith(as, ir, XO_DIVSD);
2663 break;
2664 case IR_MOD:
2665#if LJ_64 && LJ_HASFFI
2666 if (!irt_isint(ir->t))
2667 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
2668 IRCALL_lj_carith_modu64);
2669 else
2670#endif
2671 asm_intmod(as, ir);
2672 break;
2673
2674 case IR_NEG:
2675 if (irt_isnum(ir->t))
2676 asm_fparith(as, ir, XO_XORPS);
2677 else
2678 asm_neg_not(as, ir, XOg_NEG);
2679 break;
2680 case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break;
2681
2682 case IR_MIN:
2683 if (irt_isnum(ir->t))
2684 asm_fparith(as, ir, XO_MINSD);
2685 else
2686 asm_min_max(as, ir, CC_G);
2687 break;
2688 case IR_MAX:
2689 if (irt_isnum(ir->t))
2690 asm_fparith(as, ir, XO_MAXSD);
2691 else
2692 asm_min_max(as, ir, CC_L);
2693 break;
2694
2695 case IR_FPMATH: case IR_ATAN2: case IR_LDEXP:
2696 asm_fpmath(as, ir);
2697 break;
2698 case IR_POW:
2699#if LJ_64 && LJ_HASFFI
2700 if (!irt_isnum(ir->t))
2701 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
2702 IRCALL_lj_carith_powu64);
2703 else
2704#endif
2705 asm_fppowi(as, ir);
2706 break;
2707
2708 /* Overflow-checking arithmetic ops. Note: don't use LEA here! */
2709 case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break;
2710 case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break;
2711 case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break;
2712
2713 /* Memory references. */
2714 case IR_AREF: asm_aref(as, ir); break;
2715 case IR_HREF: asm_href(as, ir); break;
2716 case IR_HREFK: asm_hrefk(as, ir); break;
2717 case IR_NEWREF: asm_newref(as, ir); break;
2718 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2719 case IR_FREF: asm_fref(as, ir); break;
2720 case IR_STRREF: asm_strref(as, ir); break;
2721
2722 /* Loads and stores. */
2723 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2724 asm_ahuvload(as, ir);
2725 break;
2726 case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
2727 case IR_SLOAD: asm_sload(as, ir); break;
2728
2729 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2730 case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break;
2731
2732 /* Allocations. */
2733 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2734 case IR_TNEW: asm_tnew(as, ir); break;
2735 case IR_TDUP: asm_tdup(as, ir); break;
2736 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2737
2738 /* Write barriers. */
2739 case IR_TBAR: asm_tbar(as, ir); break;
2740 case IR_OBAR: asm_obar(as, ir); break;
2741
2742 /* Type conversions. */
2743 case IR_TOBIT: asm_tobit(as, ir); break;
2744 case IR_CONV: asm_conv(as, ir); break;
2745 case IR_TOSTR: asm_tostr(as, ir); break;
2746 case IR_STRTO: asm_strto(as, ir); break;
2747
2748 /* Calls. */
2749 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2750 case IR_CALLXS: asm_callx(as, ir); break;
2751 case IR_CARG: break;
2752
2753 default:
2754 setintV(&as->J->errinfo, ir->o);
2755 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2756 break;
2757 }
2758}
2759
2760/* -- Trace setup --------------------------------------------------------- */ 2977/* -- Trace setup --------------------------------------------------------- */
2761 2978
2762/* Ensure there are enough stack slots for call arguments. */ 2979/* Ensure there are enough stack slots for call arguments. */
@@ -2779,6 +2996,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2779static void asm_setup_target(ASMState *as) 2996static void asm_setup_target(ASMState *as)
2780{ 2997{
2781 asm_exitstub_setup(as, as->T->nsnap); 2998 asm_exitstub_setup(as, as->T->nsnap);
2999 as->mrm.base = 0;
2782} 3000}
2783 3001
2784/* -- Trace patching ------------------------------------------------------ */ 3002/* -- Trace patching ------------------------------------------------------ */
@@ -2892,18 +3110,24 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2892 MCode *px = exitstub_addr(J, exitno) - 6; 3110 MCode *px = exitstub_addr(J, exitno) - 6;
2893 MCode *pe = p+len-6; 3111 MCode *pe = p+len-6;
2894 MCode *pgc = NULL; 3112 MCode *pgc = NULL;
2895 uint32_t stateaddr = u32ptr(&J2G(J)->vmstate); 3113#if LJ_GC64
3114 uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch));
3115#else
3116 uint32_t statei = u32ptr(&J2G(J)->vmstate);
3117#endif
2896 if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) 3118 if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px)
2897 *(int32_t *)(p+len-4) = jmprel(p+len, target); 3119 *(int32_t *)(p+len-4) = jmprel(J, p+len, target);
2898 /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ 3120 /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */
2899 for (; p < pe; p += asm_x86_inslen(p)) 3121 for (; p < pe; p += asm_x86_inslen(p)) {
2900 if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) 3122 intptr_t ofs = LJ_GC64 ? (p[0] & 0xf0) == 0x40 : LJ_64;
3123 if (*(uint32_t *)(p+2+ofs) == statei && p[ofs+LJ_GC64-LJ_64] == XI_MOVmi)
2901 break; 3124 break;
2902 lua_assert(p < pe); 3125 }
3126 lj_assertJ(p < pe, "instruction length decoder failed");
2903 for (; p < pe; p += asm_x86_inslen(p)) { 3127 for (; p < pe; p += asm_x86_inslen(p)) {
2904 if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px && 3128 if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px &&
2905 p != pgc) { 3129 p != pgc) {
2906 *(int32_t *)(p+2) = jmprel(p+6, target); 3130 *(int32_t *)(p+2) = jmprel(J, p+6, target);
2907 } else if (*p == XI_CALL && 3131 } else if (*p == XI_CALL &&
2908 (void *)(p+5+*(int32_t *)(p+1)) == (void *)lj_gc_step_jit) { 3132 (void *)(p+5+*(int32_t *)(p+1)) == (void *)lj_gc_step_jit) {
2909 pgc = p+7; /* Do not patch GC check exit. */ 3133 pgc = p+7; /* Do not patch GC check exit. */
diff --git a/src/lj_assert.c b/src/lj_assert.c
new file mode 100644
index 00000000..5c948b41
--- /dev/null
+++ b/src/lj_assert.c
@@ -0,0 +1,28 @@
1/*
2** Internal assertions.
3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_assert_c
7#define LUA_CORE
8
9#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK)
10
11#include <stdio.h>
12
13#include "lj_obj.h"
14
15void lj_assert_fail(global_State *g, const char *file, int line,
16 const char *func, const char *fmt, ...)
17{
18 va_list argp;
19 va_start(argp, fmt);
20 fprintf(stderr, "LuaJIT ASSERT %s:%d: %s: ", file, line, func);
21 vfprintf(stderr, fmt, argp);
22 fputc('\n', stderr);
23 va_end(argp);
24 UNUSED(g); /* May be NULL. TODO: optionally dump state. */
25 abort();
26}
27
28#endif
diff --git a/src/lj_bc.h b/src/lj_bc.h
index 5f6146b6..97e19a1b 100644
--- a/src/lj_bc.h
+++ b/src/lj_bc.h
@@ -89,6 +89,8 @@
89 _(ISFC, dst, ___, var, ___) \ 89 _(ISFC, dst, ___, var, ___) \
90 _(IST, ___, ___, var, ___) \ 90 _(IST, ___, ___, var, ___) \
91 _(ISF, ___, ___, var, ___) \ 91 _(ISF, ___, ___, var, ___) \
92 _(ISTYPE, var, ___, lit, ___) \
93 _(ISNUM, var, ___, lit, ___) \
92 \ 94 \
93 /* Unary ops. */ \ 95 /* Unary ops. */ \
94 _(MOV, dst, ___, var, ___) \ 96 _(MOV, dst, ___, var, ___) \
@@ -143,10 +145,12 @@
143 _(TGETV, dst, var, var, index) \ 145 _(TGETV, dst, var, var, index) \
144 _(TGETS, dst, var, str, index) \ 146 _(TGETS, dst, var, str, index) \
145 _(TGETB, dst, var, lit, index) \ 147 _(TGETB, dst, var, lit, index) \
148 _(TGETR, dst, var, var, index) \
146 _(TSETV, var, var, var, newindex) \ 149 _(TSETV, var, var, var, newindex) \
147 _(TSETS, var, var, str, newindex) \ 150 _(TSETS, var, var, str, newindex) \
148 _(TSETB, var, var, lit, newindex) \ 151 _(TSETB, var, var, lit, newindex) \
149 _(TSETM, base, ___, num, newindex) \ 152 _(TSETM, base, ___, num, newindex) \
153 _(TSETR, var, var, var, newindex) \
150 \ 154 \
151 /* Calls and vararg handling. T = tail call. */ \ 155 /* Calls and vararg handling. T = tail call. */ \
152 _(CALLM, base, lit, lit, call) \ 156 _(CALLM, base, lit, lit, call) \
diff --git a/src/lj_bcdump.h b/src/lj_bcdump.h
index 5cca36a9..3e56e39c 100644
--- a/src/lj_bcdump.h
+++ b/src/lj_bcdump.h
@@ -36,14 +36,17 @@
36/* If you perform *any* kind of private modifications to the bytecode itself 36/* If you perform *any* kind of private modifications to the bytecode itself
37** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher. 37** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher.
38*/ 38*/
39#define BCDUMP_VERSION 1 39#define BCDUMP_VERSION 2
40 40
41/* Compatibility flags. */ 41/* Compatibility flags. */
42#define BCDUMP_F_BE 0x01 42#define BCDUMP_F_BE 0x01
43#define BCDUMP_F_STRIP 0x02 43#define BCDUMP_F_STRIP 0x02
44#define BCDUMP_F_FFI 0x04 44#define BCDUMP_F_FFI 0x04
45#define BCDUMP_F_FR2 0x08
45 46
46#define BCDUMP_F_KNOWN (BCDUMP_F_FFI*2-1) 47#define BCDUMP_F_KNOWN (BCDUMP_F_FR2*2-1)
48
49#define BCDUMP_F_DETERMINISTIC 0x80000000
47 50
48/* Type codes for the GC constants of a prototype. Plus length for strings. */ 51/* Type codes for the GC constants of a prototype. Plus length for strings. */
49enum { 52enum {
@@ -60,7 +63,8 @@ enum {
60/* -- Bytecode reader/writer ---------------------------------------------- */ 63/* -- Bytecode reader/writer ---------------------------------------------- */
61 64
62LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, 65LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
63 void *data, int strip); 66 void *data, uint32_t flags);
67LJ_FUNC GCproto *lj_bcread_proto(LexState *ls);
64LJ_FUNC GCproto *lj_bcread(LexState *ls); 68LJ_FUNC GCproto *lj_bcread(LexState *ls);
65 69
66#endif 70#endif
diff --git a/src/lj_bcread.c b/src/lj_bcread.c
index ad6b2a5c..637ef067 100644
--- a/src/lj_bcread.c
+++ b/src/lj_bcread.c
@@ -9,6 +9,7 @@
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_buf.h"
12#include "lj_str.h" 13#include "lj_str.h"
13#include "lj_tab.h" 14#include "lj_tab.h"
14#include "lj_bc.h" 15#include "lj_bc.h"
@@ -20,6 +21,7 @@
20#include "lj_lex.h" 21#include "lj_lex.h"
21#include "lj_bcdump.h" 22#include "lj_bcdump.h"
22#include "lj_state.h" 23#include "lj_state.h"
24#include "lj_strfmt.h"
23 25
24/* Reuse some lexer fields for our own purposes. */ 26/* Reuse some lexer fields for our own purposes. */
25#define bcread_flags(ls) ls->level 27#define bcread_flags(ls) ls->level
@@ -38,85 +40,74 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
38 const char *name = ls->chunkarg; 40 const char *name = ls->chunkarg;
39 if (*name == BCDUMP_HEAD1) name = "(binary)"; 41 if (*name == BCDUMP_HEAD1) name = "(binary)";
40 else if (*name == '@' || *name == '=') name++; 42 else if (*name == '@' || *name == '=') name++;
41 lj_str_pushf(L, "%s: %s", name, err2msg(em)); 43 lj_strfmt_pushf(L, "%s: %s", name, err2msg(em));
42 lj_err_throw(L, LUA_ERRSYNTAX); 44 lj_err_throw(L, LUA_ERRSYNTAX);
43} 45}
44 46
45/* Resize input buffer. */ 47/* Refill buffer. */
46static void bcread_resize(LexState *ls, MSize len)
47{
48 if (ls->sb.sz < len) {
49 MSize sz = ls->sb.sz * 2;
50 while (len > sz) sz = sz * 2;
51 lj_str_resizebuf(ls->L, &ls->sb, sz);
52 /* Caveat: this may change ls->sb.buf which may affect ls->p. */
53 }
54}
55
56/* Refill buffer if needed. */
57static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) 48static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
58{ 49{
59 lua_assert(len != 0); 50 lj_assertLS(len != 0, "empty refill");
60 if (len > LJ_MAX_MEM || ls->current < 0) 51 if (len > LJ_MAX_BUF || ls->c < 0)
61 bcread_error(ls, LJ_ERR_BCBAD); 52 bcread_error(ls, LJ_ERR_BCBAD);
62 do { 53 do {
63 const char *buf; 54 const char *buf;
64 size_t size; 55 size_t sz;
65 if (ls->n) { /* Copy remainder to buffer. */ 56 char *p = ls->sb.b;
66 if (ls->sb.n) { /* Move down in buffer. */ 57 MSize n = (MSize)(ls->pe - ls->p);
67 lua_assert(ls->p + ls->n == ls->sb.buf + ls->sb.n); 58 if (n) { /* Copy remainder to buffer. */
68 if (ls->n != ls->sb.n) 59 if (sbuflen(&ls->sb)) { /* Move down in buffer. */
69 memmove(ls->sb.buf, ls->p, ls->n); 60 lj_assertLS(ls->pe == ls->sb.w, "bad buffer pointer");
61 if (ls->p != p) memmove(p, ls->p, n);
70 } else { /* Copy from buffer provided by reader. */ 62 } else { /* Copy from buffer provided by reader. */
71 bcread_resize(ls, len); 63 p = lj_buf_need(&ls->sb, len);
72 memcpy(ls->sb.buf, ls->p, ls->n); 64 memcpy(p, ls->p, n);
73 } 65 }
74 ls->p = ls->sb.buf; 66 ls->p = p;
67 ls->pe = p + n;
75 } 68 }
76 ls->sb.n = ls->n; 69 ls->sb.w = p + n;
77 buf = ls->rfunc(ls->L, ls->rdata, &size); /* Get more data from reader. */ 70 buf = ls->rfunc(ls->L, ls->rdata, &sz); /* Get more data from reader. */
78 if (buf == NULL || size == 0) { /* EOF? */ 71 if (buf == NULL || sz == 0) { /* EOF? */
79 if (need) bcread_error(ls, LJ_ERR_BCBAD); 72 if (need) bcread_error(ls, LJ_ERR_BCBAD);
80 ls->current = -1; /* Only bad if we get called again. */ 73 ls->c = -1; /* Only bad if we get called again. */
81 break; 74 break;
82 } 75 }
83 if (size >= LJ_MAX_MEM - ls->sb.n) lj_err_mem(ls->L); 76 if (sz >= LJ_MAX_BUF - n) lj_err_mem(ls->L);
84 if (ls->sb.n) { /* Append to buffer. */ 77 if (n) { /* Append to buffer. */
85 MSize n = ls->sb.n + (MSize)size; 78 n += (MSize)sz;
86 bcread_resize(ls, n < len ? len : n); 79 p = lj_buf_need(&ls->sb, n < len ? len : n);
87 memcpy(ls->sb.buf + ls->sb.n, buf, size); 80 memcpy(ls->sb.w, buf, sz);
88 ls->n = ls->sb.n = n; 81 ls->sb.w = p + n;
89 ls->p = ls->sb.buf; 82 ls->p = p;
83 ls->pe = p + n;
90 } else { /* Return buffer provided by reader. */ 84 } else { /* Return buffer provided by reader. */
91 ls->n = (MSize)size;
92 ls->p = buf; 85 ls->p = buf;
86 ls->pe = buf + sz;
93 } 87 }
94 } while (ls->n < len); 88 } while ((MSize)(ls->pe - ls->p) < len);
95} 89}
96 90
97/* Need a certain number of bytes. */ 91/* Need a certain number of bytes. */
98static LJ_AINLINE void bcread_need(LexState *ls, MSize len) 92static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
99{ 93{
100 if (LJ_UNLIKELY(ls->n < len)) 94 if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
101 bcread_fill(ls, len, 1); 95 bcread_fill(ls, len, 1);
102} 96}
103 97
104/* Want to read up to a certain number of bytes, but may need less. */ 98/* Want to read up to a certain number of bytes, but may need less. */
105static LJ_AINLINE void bcread_want(LexState *ls, MSize len) 99static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
106{ 100{
107 if (LJ_UNLIKELY(ls->n < len)) 101 if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
108 bcread_fill(ls, len, 0); 102 bcread_fill(ls, len, 0);
109} 103}
110 104
111#define bcread_dec(ls) check_exp(ls->n > 0, ls->n--)
112#define bcread_consume(ls, len) check_exp(ls->n >= (len), ls->n -= (len))
113
114/* Return memory block from buffer. */ 105/* Return memory block from buffer. */
115static uint8_t *bcread_mem(LexState *ls, MSize len) 106static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len)
116{ 107{
117 uint8_t *p = (uint8_t *)ls->p; 108 uint8_t *p = (uint8_t *)ls->p;
118 bcread_consume(ls, len); 109 ls->p += len;
119 ls->p = (char *)p + len; 110 lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
120 return p; 111 return p;
121} 112}
122 113
@@ -129,25 +120,15 @@ static void bcread_block(LexState *ls, void *q, MSize len)
129/* Read byte from buffer. */ 120/* Read byte from buffer. */
130static LJ_AINLINE uint32_t bcread_byte(LexState *ls) 121static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
131{ 122{
132 bcread_dec(ls); 123 lj_assertLS(ls->p < ls->pe, "buffer read overflow");
133 return (uint32_t)(uint8_t)*ls->p++; 124 return (uint32_t)(uint8_t)*ls->p++;
134} 125}
135 126
136/* Read ULEB128 value from buffer. */ 127/* Read ULEB128 value from buffer. */
137static uint32_t bcread_uleb128(LexState *ls) 128static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls)
138{ 129{
139 const uint8_t *p = (const uint8_t *)ls->p; 130 uint32_t v = lj_buf_ruleb128(&ls->p);
140 uint32_t v = *p++; 131 lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
141 if (LJ_UNLIKELY(v >= 0x80)) {
142 int sh = 0;
143 v &= 0x7f;
144 do {
145 v |= ((*p & 0x7f) << (sh += 7));
146 bcread_dec(ls);
147 } while (*p++ >= 0x80);
148 }
149 bcread_dec(ls);
150 ls->p = (char *)p;
151 return v; 132 return v;
152} 133}
153 134
@@ -161,11 +142,10 @@ static uint32_t bcread_uleb128_33(LexState *ls)
161 v &= 0x3f; 142 v &= 0x3f;
162 do { 143 do {
163 v |= ((*p & 0x7f) << (sh += 7)); 144 v |= ((*p & 0x7f) << (sh += 7));
164 bcread_dec(ls);
165 } while (*p++ >= 0x80); 145 } while (*p++ >= 0x80);
166 } 146 }
167 bcread_dec(ls);
168 ls->p = (char *)p; 147 ls->p = (char *)p;
148 lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
169 return v; 149 return v;
170} 150}
171 151
@@ -212,8 +192,8 @@ static void bcread_ktabk(LexState *ls, TValue *o)
212 o->u32.lo = bcread_uleb128(ls); 192 o->u32.lo = bcread_uleb128(ls);
213 o->u32.hi = bcread_uleb128(ls); 193 o->u32.hi = bcread_uleb128(ls);
214 } else { 194 } else {
215 lua_assert(tp <= BCDUMP_KTAB_TRUE); 195 lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp);
216 setitype(o, ~tp); 196 setpriV(o, ~tp);
217 } 197 }
218} 198}
219 199
@@ -234,7 +214,7 @@ static GCtab *bcread_ktab(LexState *ls)
234 for (i = 0; i < nhash; i++) { 214 for (i = 0; i < nhash; i++) {
235 TValue key; 215 TValue key;
236 bcread_ktabk(ls, &key); 216 bcread_ktabk(ls, &key);
237 lua_assert(!tvisnil(&key)); 217 lj_assertLS(!tvisnil(&key), "nil key");
238 bcread_ktabk(ls, lj_tab_set(ls->L, t, &key)); 218 bcread_ktabk(ls, lj_tab_set(ls->L, t, &key));
239 } 219 }
240 } 220 }
@@ -271,7 +251,7 @@ static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc)
271#endif 251#endif
272 } else { 252 } else {
273 lua_State *L = ls->L; 253 lua_State *L = ls->L;
274 lua_assert(tp == BCDUMP_KGC_CHILD); 254 lj_assertLS(tp == BCDUMP_KGC_CHILD, "bad constant type %d", tp);
275 if (L->top <= bcread_oldtop(L, ls)) /* Stack underflow? */ 255 if (L->top <= bcread_oldtop(L, ls)) /* Stack underflow? */
276 bcread_error(ls, LJ_ERR_BCBAD); 256 bcread_error(ls, LJ_ERR_BCBAD);
277 L->top--; 257 L->top--;
@@ -301,8 +281,11 @@ static void bcread_knum(LexState *ls, GCproto *pt, MSize sizekn)
301static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc) 281static void bcread_bytecode(LexState *ls, GCproto *pt, MSize sizebc)
302{ 282{
303 BCIns *bc = proto_bc(pt); 283 BCIns *bc = proto_bc(pt);
304 bc[0] = BCINS_AD((pt->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF, 284 BCIns op;
305 pt->framesize, 0); 285 if (ls->fr2 != LJ_FR2) op = BC_NOT; /* Mark non-native prototype. */
286 else if ((pt->flags & PROTO_VARARG)) op = BC_FUNCV;
287 else op = BC_FUNCF;
288 bc[0] = BCINS_AD(op, pt->framesize, 0);
306 bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns)); 289 bcread_block(ls, bc+1, (sizebc-1)*(MSize)sizeof(BCIns));
307 /* Swap bytecode instructions if the endianess differs. */ 290 /* Swap bytecode instructions if the endianess differs. */
308 if (bcread_swap(ls)) { 291 if (bcread_swap(ls)) {
@@ -327,25 +310,13 @@ static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv)
327} 310}
328 311
329/* Read a prototype. */ 312/* Read a prototype. */
330static GCproto *bcread_proto(LexState *ls) 313GCproto *lj_bcread_proto(LexState *ls)
331{ 314{
332 GCproto *pt; 315 GCproto *pt;
333 MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept; 316 MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
334 MSize ofsk, ofsuv, ofsdbg; 317 MSize ofsk, ofsuv, ofsdbg;
335 MSize sizedbg = 0; 318 MSize sizedbg = 0;
336 BCLine firstline = 0, numline = 0; 319 BCLine firstline = 0, numline = 0;
337 MSize len, startn;
338
339 /* Read length. */
340 if (ls->n > 0 && ls->p[0] == 0) { /* Shortcut EOF. */
341 ls->n--; ls->p++;
342 return NULL;
343 }
344 bcread_want(ls, 5);
345 len = bcread_uleb128(ls);
346 if (!len) return NULL; /* EOF */
347 bcread_need(ls, len);
348 startn = ls->n;
349 320
350 /* Read prototype header. */ 321 /* Read prototype header. */
351 flags = bcread_byte(ls); 322 flags = bcread_byte(ls);
@@ -414,9 +385,6 @@ static GCproto *bcread_proto(LexState *ls)
414 setmref(pt->uvinfo, NULL); 385 setmref(pt->uvinfo, NULL);
415 setmref(pt->varinfo, NULL); 386 setmref(pt->varinfo, NULL);
416 } 387 }
417
418 if (len != startn - ls->n)
419 bcread_error(ls, LJ_ERR_BCBAD);
420 return pt; 388 return pt;
421} 389}
422 390
@@ -430,14 +398,11 @@ static int bcread_header(LexState *ls)
430 bcread_byte(ls) != BCDUMP_VERSION) return 0; 398 bcread_byte(ls) != BCDUMP_VERSION) return 0;
431 bcread_flags(ls) = flags = bcread_uleb128(ls); 399 bcread_flags(ls) = flags = bcread_uleb128(ls);
432 if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0; 400 if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
401 if ((flags & BCDUMP_F_FR2) != (uint32_t)ls->fr2*BCDUMP_F_FR2) return 0;
433 if ((flags & BCDUMP_F_FFI)) { 402 if ((flags & BCDUMP_F_FFI)) {
434#if LJ_HASFFI 403#if LJ_HASFFI
435 lua_State *L = ls->L; 404 lua_State *L = ls->L;
436 if (!ctype_ctsG(G(L))) { 405 ctype_loadffi(L);
437 ptrdiff_t oldtop = savestack(L, L->top);
438 luaopen_ffi(L); /* Load FFI library on-demand. */
439 L->top = restorestack(L, oldtop);
440 }
441#else 406#else
442 return 0; 407 return 0;
443#endif 408#endif
@@ -456,19 +421,33 @@ static int bcread_header(LexState *ls)
456GCproto *lj_bcread(LexState *ls) 421GCproto *lj_bcread(LexState *ls)
457{ 422{
458 lua_State *L = ls->L; 423 lua_State *L = ls->L;
459 lua_assert(ls->current == BCDUMP_HEAD1); 424 lj_assertLS(ls->c == BCDUMP_HEAD1, "bad bytecode header");
460 bcread_savetop(L, ls, L->top); 425 bcread_savetop(L, ls, L->top);
461 lj_str_resetbuf(&ls->sb); 426 lj_buf_reset(&ls->sb);
462 /* Check for a valid bytecode dump header. */ 427 /* Check for a valid bytecode dump header. */
463 if (!bcread_header(ls)) 428 if (!bcread_header(ls))
464 bcread_error(ls, LJ_ERR_BCFMT); 429 bcread_error(ls, LJ_ERR_BCFMT);
465 for (;;) { /* Process all prototypes in the bytecode dump. */ 430 for (;;) { /* Process all prototypes in the bytecode dump. */
466 GCproto *pt = bcread_proto(ls); 431 GCproto *pt;
467 if (!pt) break; 432 MSize len;
433 const char *startp;
434 /* Read length. */
435 if (ls->p < ls->pe && ls->p[0] == 0) { /* Shortcut EOF. */
436 ls->p++;
437 break;
438 }
439 bcread_want(ls, 5);
440 len = bcread_uleb128(ls);
441 if (!len) break; /* EOF */
442 bcread_need(ls, len);
443 startp = ls->p;
444 pt = lj_bcread_proto(ls);
445 if (ls->p != startp + len)
446 bcread_error(ls, LJ_ERR_BCBAD);
468 setprotoV(L, L->top, pt); 447 setprotoV(L, L->top, pt);
469 incr_top(L); 448 incr_top(L);
470 } 449 }
471 if ((ls->n && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls)) 450 if ((ls->pe != ls->p && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls))
472 bcread_error(ls, LJ_ERR_BCBAD); 451 bcread_error(ls, LJ_ERR_BCBAD);
473 /* Pop off last prototype. */ 452 /* Pop off last prototype. */
474 L->top--; 453 L->top--;
diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c
index 9820ad12..ddfa46c5 100644
--- a/src/lj_bcwrite.c
+++ b/src/lj_bcwrite.c
@@ -8,7 +8,7 @@
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_str.h" 11#include "lj_buf.h"
12#include "lj_bc.h" 12#include "lj_bc.h"
13#if LJ_HASFFI 13#if LJ_HASFFI
14#include "lj_ctype.h" 14#include "lj_ctype.h"
@@ -17,99 +17,138 @@
17#include "lj_dispatch.h" 17#include "lj_dispatch.h"
18#include "lj_jit.h" 18#include "lj_jit.h"
19#endif 19#endif
20#include "lj_strfmt.h"
20#include "lj_bcdump.h" 21#include "lj_bcdump.h"
21#include "lj_vm.h" 22#include "lj_vm.h"
22 23
23/* Context for bytecode writer. */ 24/* Context for bytecode writer. */
24typedef struct BCWriteCtx { 25typedef struct BCWriteCtx {
25 SBuf sb; /* Output buffer. */ 26 SBuf sb; /* Output buffer. */
26 lua_State *L; /* Lua state. */
27 GCproto *pt; /* Root prototype. */ 27 GCproto *pt; /* Root prototype. */
28 lua_Writer wfunc; /* Writer callback. */ 28 lua_Writer wfunc; /* Writer callback. */
29 void *wdata; /* Writer callback data. */ 29 void *wdata; /* Writer callback data. */
30 int strip; /* Strip debug info. */ 30 TValue **heap; /* Heap used for deterministic sorting. */
31 uint32_t heapsz; /* Size of heap. */
32 uint32_t flags; /* BCDUMP_F_* flags. */
31 int status; /* Status from writer callback. */ 33 int status; /* Status from writer callback. */
34#ifdef LUA_USE_ASSERT
35 global_State *g;
36#endif
32} BCWriteCtx; 37} BCWriteCtx;
33 38
34/* -- Output buffer handling ---------------------------------------------- */ 39#ifdef LUA_USE_ASSERT
35 40#define lj_assertBCW(c, ...) lj_assertG_(ctx->g, (c), __VA_ARGS__)
36/* Resize buffer if needed. */ 41#else
37static LJ_NOINLINE void bcwrite_resize(BCWriteCtx *ctx, MSize len) 42#define lj_assertBCW(c, ...) ((void)ctx)
38{ 43#endif
39 MSize sz = ctx->sb.sz * 2;
40 while (ctx->sb.n + len > sz) sz = sz * 2;
41 lj_str_resizebuf(ctx->L, &ctx->sb, sz);
42}
43
44/* Need a certain amount of buffer space. */
45static LJ_AINLINE void bcwrite_need(BCWriteCtx *ctx, MSize len)
46{
47 if (LJ_UNLIKELY(ctx->sb.n + len > ctx->sb.sz))
48 bcwrite_resize(ctx, len);
49}
50
51/* Add memory block to buffer. */
52static void bcwrite_block(BCWriteCtx *ctx, const void *p, MSize len)
53{
54 uint8_t *q = (uint8_t *)(ctx->sb.buf + ctx->sb.n);
55 MSize i;
56 ctx->sb.n += len;
57 for (i = 0; i < len; i++) q[i] = ((uint8_t *)p)[i];
58}
59
60/* Add byte to buffer. */
61static LJ_AINLINE void bcwrite_byte(BCWriteCtx *ctx, uint8_t b)
62{
63 ctx->sb.buf[ctx->sb.n++] = b;
64}
65
66/* Add ULEB128 value to buffer. */
67static void bcwrite_uleb128(BCWriteCtx *ctx, uint32_t v)
68{
69 MSize n = ctx->sb.n;
70 uint8_t *p = (uint8_t *)ctx->sb.buf;
71 for (; v >= 0x80; v >>= 7)
72 p[n++] = (uint8_t)((v & 0x7f) | 0x80);
73 p[n++] = (uint8_t)v;
74 ctx->sb.n = n;
75}
76 44
77/* -- Bytecode writer ----------------------------------------------------- */ 45/* -- Bytecode writer ----------------------------------------------------- */
78 46
79/* Write a single constant key/value of a template table. */ 47/* Write a single constant key/value of a template table. */
80static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) 48static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
81{ 49{
82 bcwrite_need(ctx, 1+10); 50 char *p = lj_buf_more(&ctx->sb, 1+10);
83 if (tvisstr(o)) { 51 if (tvisstr(o)) {
84 const GCstr *str = strV(o); 52 const GCstr *str = strV(o);
85 MSize len = str->len; 53 MSize len = str->len;
86 bcwrite_need(ctx, 5+len); 54 p = lj_buf_more(&ctx->sb, 5+len);
87 bcwrite_uleb128(ctx, BCDUMP_KTAB_STR+len); 55 p = lj_strfmt_wuleb128(p, BCDUMP_KTAB_STR+len);
88 bcwrite_block(ctx, strdata(str), len); 56 p = lj_buf_wmem(p, strdata(str), len);
89 } else if (tvisint(o)) { 57 } else if (tvisint(o)) {
90 bcwrite_byte(ctx, BCDUMP_KTAB_INT); 58 *p++ = BCDUMP_KTAB_INT;
91 bcwrite_uleb128(ctx, intV(o)); 59 p = lj_strfmt_wuleb128(p, intV(o));
92 } else if (tvisnum(o)) { 60 } else if (tvisnum(o)) {
93 if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */ 61 if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */
94 lua_Number num = numV(o); 62 lua_Number num = numV(o);
95 int32_t k = lj_num2int(num); 63 int32_t k = lj_num2int(num);
96 if (num == (lua_Number)k) { /* -0 is never a constant. */ 64 if (num == (lua_Number)k) { /* -0 is never a constant. */
97 bcwrite_byte(ctx, BCDUMP_KTAB_INT); 65 *p++ = BCDUMP_KTAB_INT;
98 bcwrite_uleb128(ctx, k); 66 p = lj_strfmt_wuleb128(p, k);
67 ctx->sb.w = p;
99 return; 68 return;
100 } 69 }
101 } 70 }
102 bcwrite_byte(ctx, BCDUMP_KTAB_NUM); 71 *p++ = BCDUMP_KTAB_NUM;
103 bcwrite_uleb128(ctx, o->u32.lo); 72 p = lj_strfmt_wuleb128(p, o->u32.lo);
104 bcwrite_uleb128(ctx, o->u32.hi); 73 p = lj_strfmt_wuleb128(p, o->u32.hi);
74 } else {
75 lj_assertBCW(tvispri(o), "unhandled type %d", itype(o));
76 *p++ = BCDUMP_KTAB_NIL+~itype(o);
77 }
78 ctx->sb.w = p;
79}
80
81/* Compare two template table keys. */
82static LJ_AINLINE int bcwrite_ktabk_lt(TValue *a, TValue *b)
83{
84 uint32_t at = itype(a), bt = itype(b);
85 if (at != bt) { /* This also handles false and true keys. */
86 return at < bt;
87 } else if (at == LJ_TSTR) {
88 return lj_str_cmp(strV(a), strV(b)) < 0;
105 } else { 89 } else {
106 lua_assert(tvispri(o)); 90 return a->u64 < b->u64; /* This works for numbers and integers. */
107 bcwrite_byte(ctx, BCDUMP_KTAB_NIL+~itype(o));
108 } 91 }
109} 92}
110 93
94/* Insert key into a sorted heap. */
95static void bcwrite_ktabk_heap_insert(TValue **heap, MSize idx, MSize end,
96 TValue *key)
97{
98 MSize child;
99 while ((child = idx * 2 + 1) < end) {
100 /* Find lower of the two children. */
101 TValue *c0 = heap[child];
102 if (child + 1 < end) {
103 TValue *c1 = heap[child + 1];
104 if (bcwrite_ktabk_lt(c1, c0)) {
105 c0 = c1;
106 child++;
107 }
108 }
109 if (bcwrite_ktabk_lt(key, c0)) break; /* Key lower? Found our position. */
110 heap[idx] = c0; /* Move lower child up. */
111 idx = child; /* Descend. */
112 }
113 heap[idx] = key; /* Insert key here. */
114}
115
116/* Resize heap, dropping content. */
117static void bcwrite_heap_resize(BCWriteCtx *ctx, uint32_t nsz)
118{
119 lua_State *L = sbufL(&ctx->sb);
120 if (ctx->heapsz) {
121 lj_mem_freevec(G(L), ctx->heap, ctx->heapsz, TValue *);
122 ctx->heapsz = 0;
123 }
124 if (nsz) {
125 ctx->heap = lj_mem_newvec(L, nsz, TValue *);
126 ctx->heapsz = nsz;
127 }
128}
129
130/* Write hash part of template table in sorted order. */
131static void bcwrite_ktab_sorted_hash(BCWriteCtx *ctx, Node *node, MSize nhash)
132{
133 TValue **heap = ctx->heap;
134 MSize i = nhash;
135 for (;; node--) { /* Build heap. */
136 if (!tvisnil(&node->key)) {
137 bcwrite_ktabk_heap_insert(heap, --i, nhash, &node->key);
138 if (i == 0) break;
139 }
140 }
141 do { /* Drain heap. */
142 TValue *key = heap[0]; /* Output lowest key from top. */
143 bcwrite_ktabk(ctx, key, 0);
144 bcwrite_ktabk(ctx, (TValue *)((char *)key - offsetof(Node, key)), 1);
145 key = heap[--nhash]; /* Remove last key. */
146 bcwrite_ktabk_heap_insert(heap, 0, nhash, key); /* Re-insert. */
147 } while (nhash);
148}
149
111/* Write a template table. */ 150/* Write a template table. */
112static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t) 151static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
113{ 152{
114 MSize narray = 0, nhash = 0; 153 MSize narray = 0, nhash = 0;
115 if (t->asize > 0) { /* Determine max. length of array part. */ 154 if (t->asize > 0) { /* Determine max. length of array part. */
@@ -127,8 +166,9 @@ static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t)
127 nhash += !tvisnil(&node[i].key); 166 nhash += !tvisnil(&node[i].key);
128 } 167 }
129 /* Write number of array slots and hash slots. */ 168 /* Write number of array slots and hash slots. */
130 bcwrite_uleb128(ctx, narray); 169 p = lj_strfmt_wuleb128(p, narray);
131 bcwrite_uleb128(ctx, nhash); 170 p = lj_strfmt_wuleb128(p, nhash);
171 ctx->sb.w = p;
132 if (narray) { /* Write array entries (may contain nil). */ 172 if (narray) { /* Write array entries (may contain nil). */
133 MSize i; 173 MSize i;
134 TValue *o = tvref(t->array); 174 TValue *o = tvref(t->array);
@@ -136,14 +176,20 @@ static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t)
136 bcwrite_ktabk(ctx, o, 1); 176 bcwrite_ktabk(ctx, o, 1);
137 } 177 }
138 if (nhash) { /* Write hash entries. */ 178 if (nhash) { /* Write hash entries. */
139 MSize i = nhash;
140 Node *node = noderef(t->node) + t->hmask; 179 Node *node = noderef(t->node) + t->hmask;
141 for (;; node--) 180 if ((ctx->flags & BCDUMP_F_DETERMINISTIC) && nhash > 1) {
142 if (!tvisnil(&node->key)) { 181 if (ctx->heapsz < nhash)
143 bcwrite_ktabk(ctx, &node->key, 0); 182 bcwrite_heap_resize(ctx, t->hmask + 1);
144 bcwrite_ktabk(ctx, &node->val, 1); 183 bcwrite_ktab_sorted_hash(ctx, node, nhash);
145 if (--i == 0) break; 184 } else {
146 } 185 MSize i = nhash;
186 for (;; node--)
187 if (!tvisnil(&node->key)) {
188 bcwrite_ktabk(ctx, &node->key, 0);
189 bcwrite_ktabk(ctx, &node->val, 1);
190 if (--i == 0) break;
191 }
192 }
147 } 193 }
148} 194}
149 195
@@ -155,12 +201,13 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
155 for (i = 0; i < sizekgc; i++, kr++) { 201 for (i = 0; i < sizekgc; i++, kr++) {
156 GCobj *o = gcref(*kr); 202 GCobj *o = gcref(*kr);
157 MSize tp, need = 1; 203 MSize tp, need = 1;
204 char *p;
158 /* Determine constant type and needed size. */ 205 /* Determine constant type and needed size. */
159 if (o->gch.gct == ~LJ_TSTR) { 206 if (o->gch.gct == ~LJ_TSTR) {
160 tp = BCDUMP_KGC_STR + gco2str(o)->len; 207 tp = BCDUMP_KGC_STR + gco2str(o)->len;
161 need = 5+gco2str(o)->len; 208 need = 5+gco2str(o)->len;
162 } else if (o->gch.gct == ~LJ_TPROTO) { 209 } else if (o->gch.gct == ~LJ_TPROTO) {
163 lua_assert((pt->flags & PROTO_CHILD)); 210 lj_assertBCW((pt->flags & PROTO_CHILD), "prototype has unexpected child");
164 tp = BCDUMP_KGC_CHILD; 211 tp = BCDUMP_KGC_CHILD;
165#if LJ_HASFFI 212#if LJ_HASFFI
166 } else if (o->gch.gct == ~LJ_TCDATA) { 213 } else if (o->gch.gct == ~LJ_TCDATA) {
@@ -171,34 +218,38 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
171 } else if (id == CTID_UINT64) { 218 } else if (id == CTID_UINT64) {
172 tp = BCDUMP_KGC_U64; 219 tp = BCDUMP_KGC_U64;
173 } else { 220 } else {
174 lua_assert(id == CTID_COMPLEX_DOUBLE); 221 lj_assertBCW(id == CTID_COMPLEX_DOUBLE,
222 "bad cdata constant CTID %d", id);
175 tp = BCDUMP_KGC_COMPLEX; 223 tp = BCDUMP_KGC_COMPLEX;
176 } 224 }
177#endif 225#endif
178 } else { 226 } else {
179 lua_assert(o->gch.gct == ~LJ_TTAB); 227 lj_assertBCW(o->gch.gct == ~LJ_TTAB,
228 "bad constant GC type %d", o->gch.gct);
180 tp = BCDUMP_KGC_TAB; 229 tp = BCDUMP_KGC_TAB;
181 need = 1+2*5; 230 need = 1+2*5;
182 } 231 }
183 /* Write constant type. */ 232 /* Write constant type. */
184 bcwrite_need(ctx, need); 233 p = lj_buf_more(&ctx->sb, need);
185 bcwrite_uleb128(ctx, tp); 234 p = lj_strfmt_wuleb128(p, tp);
186 /* Write constant data (if any). */ 235 /* Write constant data (if any). */
187 if (tp >= BCDUMP_KGC_STR) { 236 if (tp >= BCDUMP_KGC_STR) {
188 bcwrite_block(ctx, strdata(gco2str(o)), gco2str(o)->len); 237 p = lj_buf_wmem(p, strdata(gco2str(o)), gco2str(o)->len);
189 } else if (tp == BCDUMP_KGC_TAB) { 238 } else if (tp == BCDUMP_KGC_TAB) {
190 bcwrite_ktab(ctx, gco2tab(o)); 239 bcwrite_ktab(ctx, p, gco2tab(o));
240 continue;
191#if LJ_HASFFI 241#if LJ_HASFFI
192 } else if (tp != BCDUMP_KGC_CHILD) { 242 } else if (tp != BCDUMP_KGC_CHILD) {
193 cTValue *p = (TValue *)cdataptr(gco2cd(o)); 243 cTValue *q = (TValue *)cdataptr(gco2cd(o));
194 bcwrite_uleb128(ctx, p[0].u32.lo); 244 p = lj_strfmt_wuleb128(p, q[0].u32.lo);
195 bcwrite_uleb128(ctx, p[0].u32.hi); 245 p = lj_strfmt_wuleb128(p, q[0].u32.hi);
196 if (tp == BCDUMP_KGC_COMPLEX) { 246 if (tp == BCDUMP_KGC_COMPLEX) {
197 bcwrite_uleb128(ctx, p[1].u32.lo); 247 p = lj_strfmt_wuleb128(p, q[1].u32.lo);
198 bcwrite_uleb128(ctx, p[1].u32.hi); 248 p = lj_strfmt_wuleb128(p, q[1].u32.hi);
199 } 249 }
200#endif 250#endif
201 } 251 }
252 ctx->sb.w = p;
202 } 253 }
203} 254}
204 255
@@ -207,7 +258,7 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
207{ 258{
208 MSize i, sizekn = pt->sizekn; 259 MSize i, sizekn = pt->sizekn;
209 cTValue *o = mref(pt->k, TValue); 260 cTValue *o = mref(pt->k, TValue);
210 bcwrite_need(ctx, 10*sizekn); 261 char *p = lj_buf_more(&ctx->sb, 10*sizekn);
211 for (i = 0; i < sizekn; i++, o++) { 262 for (i = 0; i < sizekn; i++, o++) {
212 int32_t k; 263 int32_t k;
213 if (tvisint(o)) { 264 if (tvisint(o)) {
@@ -215,60 +266,61 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
215 goto save_int; 266 goto save_int;
216 } else { 267 } else {
217 /* Write a 33 bit ULEB128 for the int (lsb=0) or loword (lsb=1). */ 268 /* Write a 33 bit ULEB128 for the int (lsb=0) or loword (lsb=1). */
218 if (!LJ_DUALNUM) { /* Narrow number constants to integers. */ 269 if (!LJ_DUALNUM && o->u32.hi != LJ_KEYINDEX) {
270 /* Narrow number constants to integers. */
219 lua_Number num = numV(o); 271 lua_Number num = numV(o);
220 k = lj_num2int(num); 272 k = lj_num2int(num);
221 if (num == (lua_Number)k) { /* -0 is never a constant. */ 273 if (num == (lua_Number)k) { /* -0 is never a constant. */
222 save_int: 274 save_int:
223 bcwrite_uleb128(ctx, 2*(uint32_t)k | ((uint32_t)k & 0x80000000u)); 275 p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u));
224 if (k < 0) { 276 if (k < 0)
225 char *p = &ctx->sb.buf[ctx->sb.n-1]; 277 p[-1] = (p[-1] & 7) | ((k>>27) & 0x18);
226 *p = (*p & 7) | ((k>>27) & 0x18);
227 }
228 continue; 278 continue;
229 } 279 }
230 } 280 }
231 bcwrite_uleb128(ctx, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u))); 281 p = lj_strfmt_wuleb128(p, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u)));
232 if (o->u32.lo >= 0x80000000u) { 282 if (o->u32.lo >= 0x80000000u)
233 char *p = &ctx->sb.buf[ctx->sb.n-1]; 283 p[-1] = (p[-1] & 7) | ((o->u32.lo>>27) & 0x18);
234 *p = (*p & 7) | ((o->u32.lo>>27) & 0x18); 284 p = lj_strfmt_wuleb128(p, o->u32.hi);
235 }
236 bcwrite_uleb128(ctx, o->u32.hi);
237 } 285 }
238 } 286 }
287 ctx->sb.w = p;
239} 288}
240 289
241/* Write bytecode instructions. */ 290/* Write bytecode instructions. */
242static void bcwrite_bytecode(BCWriteCtx *ctx, GCproto *pt) 291static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto *pt)
243{ 292{
244 MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */ 293 MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */
245#if LJ_HASJIT 294#if LJ_HASJIT
246 uint8_t *p = (uint8_t *)&ctx->sb.buf[ctx->sb.n]; 295 uint8_t *q = (uint8_t *)p;
247#endif 296#endif
248 bcwrite_block(ctx, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns)); 297 p = lj_buf_wmem(p, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns));
298 UNUSED(ctx);
249#if LJ_HASJIT 299#if LJ_HASJIT
250 /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */ 300 /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */
251 if ((pt->flags & PROTO_ILOOP) || pt->trace) { 301 if ((pt->flags & PROTO_ILOOP) || pt->trace) {
252 jit_State *J = L2J(ctx->L); 302 jit_State *J = L2J(sbufL(&ctx->sb));
253 MSize i; 303 MSize i;
254 for (i = 0; i < nbc; i++, p += sizeof(BCIns)) { 304 for (i = 0; i < nbc; i++, q += sizeof(BCIns)) {
255 BCOp op = (BCOp)p[LJ_ENDIAN_SELECT(0, 3)]; 305 BCOp op = (BCOp)q[LJ_ENDIAN_SELECT(0, 3)];
256 if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP || 306 if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP ||
257 op == BC_JFORI) { 307 op == BC_JFORI) {
258 p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL); 308 q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL);
259 } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) { 309 } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
260 BCReg rd = p[LJ_ENDIAN_SELECT(2, 1)] + (p[LJ_ENDIAN_SELECT(3, 0)] << 8); 310 BCReg rd = q[LJ_ENDIAN_SELECT(2, 1)] + (q[LJ_ENDIAN_SELECT(3, 0)] << 8);
261 memcpy(p, &traceref(J, rd)->startins, 4); 311 memcpy(q, &traceref(J, rd)->startins, 4);
262 } 312 }
263 } 313 }
264 } 314 }
265#endif 315#endif
316 return p;
266} 317}
267 318
268/* Write prototype. */ 319/* Write prototype. */
269static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) 320static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
270{ 321{
271 MSize sizedbg = 0; 322 MSize sizedbg = 0;
323 char *p;
272 324
273 /* Recursively write children of prototype. */ 325 /* Recursively write children of prototype. */
274 if ((pt->flags & PROTO_CHILD)) { 326 if ((pt->flags & PROTO_CHILD)) {
@@ -282,31 +334,32 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
282 } 334 }
283 335
284 /* Start writing the prototype info to a buffer. */ 336 /* Start writing the prototype info to a buffer. */
285 lj_str_resetbuf(&ctx->sb); 337 p = lj_buf_need(&ctx->sb,
286 ctx->sb.n = 5; /* Leave room for final size. */ 338 5+4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2);
287 bcwrite_need(ctx, 4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2); 339 p += 5; /* Leave room for final size. */
288 340
289 /* Write prototype header. */ 341 /* Write prototype header. */
290 bcwrite_byte(ctx, (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI))); 342 *p++ = (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI));
291 bcwrite_byte(ctx, pt->numparams); 343 *p++ = pt->numparams;
292 bcwrite_byte(ctx, pt->framesize); 344 *p++ = pt->framesize;
293 bcwrite_byte(ctx, pt->sizeuv); 345 *p++ = pt->sizeuv;
294 bcwrite_uleb128(ctx, pt->sizekgc); 346 p = lj_strfmt_wuleb128(p, pt->sizekgc);
295 bcwrite_uleb128(ctx, pt->sizekn); 347 p = lj_strfmt_wuleb128(p, pt->sizekn);
296 bcwrite_uleb128(ctx, pt->sizebc-1); 348 p = lj_strfmt_wuleb128(p, pt->sizebc-1);
297 if (!ctx->strip) { 349 if (!(ctx->flags & BCDUMP_F_STRIP)) {
298 if (proto_lineinfo(pt)) 350 if (proto_lineinfo(pt))
299 sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt); 351 sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt);
300 bcwrite_uleb128(ctx, sizedbg); 352 p = lj_strfmt_wuleb128(p, sizedbg);
301 if (sizedbg) { 353 if (sizedbg) {
302 bcwrite_uleb128(ctx, pt->firstline); 354 p = lj_strfmt_wuleb128(p, pt->firstline);
303 bcwrite_uleb128(ctx, pt->numline); 355 p = lj_strfmt_wuleb128(p, pt->numline);
304 } 356 }
305 } 357 }
306 358
307 /* Write bytecode instructions and upvalue refs. */ 359 /* Write bytecode instructions and upvalue refs. */
308 bcwrite_bytecode(ctx, pt); 360 p = bcwrite_bytecode(ctx, p, pt);
309 bcwrite_block(ctx, proto_uv(pt), pt->sizeuv*2); 361 p = lj_buf_wmem(p, proto_uv(pt), pt->sizeuv*2);
362 ctx->sb.w = p;
310 363
311 /* Write constants. */ 364 /* Write constants. */
312 bcwrite_kgc(ctx, pt); 365 bcwrite_kgc(ctx, pt);
@@ -314,18 +367,19 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
314 367
315 /* Write debug info, if not stripped. */ 368 /* Write debug info, if not stripped. */
316 if (sizedbg) { 369 if (sizedbg) {
317 bcwrite_need(ctx, sizedbg); 370 p = lj_buf_more(&ctx->sb, sizedbg);
318 bcwrite_block(ctx, proto_lineinfo(pt), sizedbg); 371 p = lj_buf_wmem(p, proto_lineinfo(pt), sizedbg);
372 ctx->sb.w = p;
319 } 373 }
320 374
321 /* Pass buffer to writer function. */ 375 /* Pass buffer to writer function. */
322 if (ctx->status == 0) { 376 if (ctx->status == 0) {
323 MSize n = ctx->sb.n - 5; 377 MSize n = sbuflen(&ctx->sb) - 5;
324 MSize nn = (lj_fls(n)+8)*9 >> 6; 378 MSize nn = (lj_fls(n)+8)*9 >> 6;
325 ctx->sb.n = 5 - nn; 379 char *q = ctx->sb.b + (5 - nn);
326 bcwrite_uleb128(ctx, n); /* Fill in final size. */ 380 p = lj_strfmt_wuleb128(q, n); /* Fill in final size. */
327 lua_assert(ctx->sb.n == 5); 381 lj_assertBCW(p == ctx->sb.b + 5, "bad ULEB128 write");
328 ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf+5-nn, nn+n, ctx->wdata); 382 ctx->status = ctx->wfunc(sbufL(&ctx->sb), q, nn+n, ctx->wdata);
329 } 383 }
330} 384}
331 385
@@ -335,20 +389,20 @@ static void bcwrite_header(BCWriteCtx *ctx)
335 GCstr *chunkname = proto_chunkname(ctx->pt); 389 GCstr *chunkname = proto_chunkname(ctx->pt);
336 const char *name = strdata(chunkname); 390 const char *name = strdata(chunkname);
337 MSize len = chunkname->len; 391 MSize len = chunkname->len;
338 lj_str_resetbuf(&ctx->sb); 392 char *p = lj_buf_need(&ctx->sb, 5+5+len);
339 bcwrite_need(ctx, 5+5+len); 393 *p++ = BCDUMP_HEAD1;
340 bcwrite_byte(ctx, BCDUMP_HEAD1); 394 *p++ = BCDUMP_HEAD2;
341 bcwrite_byte(ctx, BCDUMP_HEAD2); 395 *p++ = BCDUMP_HEAD3;
342 bcwrite_byte(ctx, BCDUMP_HEAD3); 396 *p++ = BCDUMP_VERSION;
343 bcwrite_byte(ctx, BCDUMP_VERSION); 397 *p++ = (ctx->flags & (BCDUMP_F_STRIP | BCDUMP_F_FR2)) +
344 bcwrite_byte(ctx, (ctx->strip ? BCDUMP_F_STRIP : 0) + 398 LJ_BE*BCDUMP_F_BE +
345 (LJ_BE ? BCDUMP_F_BE : 0) + 399 ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0);
346 ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0)); 400 if (!(ctx->flags & BCDUMP_F_STRIP)) {
347 if (!ctx->strip) { 401 p = lj_strfmt_wuleb128(p, len);
348 bcwrite_uleb128(ctx, len); 402 p = lj_buf_wmem(p, name, len);
349 bcwrite_block(ctx, name, len);
350 } 403 }
351 ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf, ctx->sb.n, ctx->wdata); 404 ctx->status = ctx->wfunc(sbufL(&ctx->sb), ctx->sb.b,
405 (MSize)(p - ctx->sb.b), ctx->wdata);
352} 406}
353 407
354/* Write footer of bytecode dump. */ 408/* Write footer of bytecode dump. */
@@ -356,7 +410,7 @@ static void bcwrite_footer(BCWriteCtx *ctx)
356{ 410{
357 if (ctx->status == 0) { 411 if (ctx->status == 0) {
358 uint8_t zero = 0; 412 uint8_t zero = 0;
359 ctx->status = ctx->wfunc(ctx->L, &zero, 1, ctx->wdata); 413 ctx->status = ctx->wfunc(sbufL(&ctx->sb), &zero, 1, ctx->wdata);
360 } 414 }
361} 415}
362 416
@@ -364,8 +418,8 @@ static void bcwrite_footer(BCWriteCtx *ctx)
364static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud) 418static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
365{ 419{
366 BCWriteCtx *ctx = (BCWriteCtx *)ud; 420 BCWriteCtx *ctx = (BCWriteCtx *)ud;
367 UNUSED(dummy); 421 UNUSED(L); UNUSED(dummy);
368 lj_str_resizebuf(L, &ctx->sb, 1024); /* Avoids resize for most prototypes. */ 422 lj_buf_need(&ctx->sb, 1024); /* Avoids resize for most prototypes. */
369 bcwrite_header(ctx); 423 bcwrite_header(ctx);
370 bcwrite_proto(ctx, ctx->pt); 424 bcwrite_proto(ctx, ctx->pt);
371 bcwrite_footer(ctx); 425 bcwrite_footer(ctx);
@@ -374,20 +428,25 @@ static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
374 428
375/* Write bytecode for a prototype. */ 429/* Write bytecode for a prototype. */
376int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data, 430int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
377 int strip) 431 uint32_t flags)
378{ 432{
379 BCWriteCtx ctx; 433 BCWriteCtx ctx;
380 int status; 434 int status;
381 ctx.L = L;
382 ctx.pt = pt; 435 ctx.pt = pt;
383 ctx.wfunc = writer; 436 ctx.wfunc = writer;
384 ctx.wdata = data; 437 ctx.wdata = data;
385 ctx.strip = strip; 438 ctx.heapsz = 0;
439 if ((bc_op(proto_bc(pt)[0]) != BC_NOT) == LJ_FR2) flags |= BCDUMP_F_FR2;
440 ctx.flags = flags;
386 ctx.status = 0; 441 ctx.status = 0;
387 lj_str_initbuf(&ctx.sb); 442#ifdef LUA_USE_ASSERT
443 ctx.g = G(L);
444#endif
445 lj_buf_init(L, &ctx.sb);
388 status = lj_vm_cpcall(L, NULL, &ctx, cpwriter); 446 status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
389 if (status == 0) status = ctx.status; 447 if (status == 0) status = ctx.status;
390 lj_str_freebuf(G(ctx.L), &ctx.sb); 448 lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb);
449 bcwrite_heap_resize(&ctx, 0);
391 return status; 450 return status;
392} 451}
393 452
diff --git a/src/lj_buf.c b/src/lj_buf.c
new file mode 100644
index 00000000..702c5a40
--- /dev/null
+++ b/src/lj_buf.c
@@ -0,0 +1,303 @@
1/*
2** Buffer handling.
3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_buf_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10#include "lj_gc.h"
11#include "lj_err.h"
12#include "lj_buf.h"
13#include "lj_str.h"
14#include "lj_tab.h"
15#include "lj_strfmt.h"
16
17/* -- Buffer management --------------------------------------------------- */
18
19static void buf_grow(SBuf *sb, MSize sz)
20{
21 MSize osz = sbufsz(sb), len = sbuflen(sb), nsz = osz;
22 char *b;
23 GCSize flag;
24 if (nsz < LJ_MIN_SBUF) nsz = LJ_MIN_SBUF;
25 while (nsz < sz) nsz += nsz;
26 flag = sbufflag(sb);
27 if ((flag & SBUF_FLAG_COW)) { /* Copy-on-write semantics. */
28 lj_assertG_(G(sbufL(sb)), sb->w == sb->e, "bad SBuf COW");
29 b = (char *)lj_mem_new(sbufL(sb), nsz);
30 setsbufflag(sb, flag & ~(GCSize)SBUF_FLAG_COW);
31 setgcrefnull(sbufX(sb)->cowref);
32 memcpy(b, sb->b, osz);
33 } else {
34 b = (char *)lj_mem_realloc(sbufL(sb), sb->b, osz, nsz);
35 }
36 if ((flag & SBUF_FLAG_EXT)) {
37 sbufX(sb)->r = sbufX(sb)->r - sb->b + b; /* Adjust read pointer, too. */
38 }
39 /* Adjust buffer pointers. */
40 sb->b = b;
41 sb->w = b + len;
42 sb->e = b + nsz;
43 if ((flag & SBUF_FLAG_BORROW)) { /* Adjust borrowed buffer pointers. */
44 SBuf *bsb = mref(sbufX(sb)->bsb, SBuf);
45 bsb->b = b;
46 bsb->w = b + len;
47 bsb->e = b + nsz;
48 }
49}
50
51LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz)
52{
53 lj_assertG_(G(sbufL(sb)), sz > sbufsz(sb), "SBuf overflow");
54 if (LJ_UNLIKELY(sz > LJ_MAX_BUF))
55 lj_err_mem(sbufL(sb));
56 buf_grow(sb, sz);
57 return sb->b;
58}
59
60LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz)
61{
62 if (sbufisext(sb)) {
63 SBufExt *sbx = (SBufExt *)sb;
64 MSize len = sbufxlen(sbx);
65 if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
66 lj_err_mem(sbufL(sbx));
67 if (len + sz > sbufsz(sbx)) { /* Must grow. */
68 buf_grow((SBuf *)sbx, len + sz);
69 } else if (sbufiscow(sb) || sbufxslack(sbx) < (sbufsz(sbx) >> 3)) {
70 /* Also grow to avoid excessive compactions, if slack < size/8. */
71 buf_grow((SBuf *)sbx, sbuflen(sbx) + sz); /* Not sbufxlen! */
72 return sbx->w;
73 }
74 if (sbx->r != sbx->b) { /* Compact by moving down. */
75 memmove(sbx->b, sbx->r, len);
76 sbx->r = sbx->b;
77 sbx->w = sbx->b + len;
78 lj_assertG_(G(sbufL(sbx)), len + sz <= sbufsz(sbx), "bad SBuf compact");
79 }
80 } else {
81 MSize len = sbuflen(sb);
82 lj_assertG_(G(sbufL(sb)), sz > sbufleft(sb), "SBuf overflow");
83 if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
84 lj_err_mem(sbufL(sb));
85 buf_grow(sb, len + sz);
86 }
87 return sb->w;
88}
89
90void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb)
91{
92 char *b = sb->b;
93 MSize osz = (MSize)(sb->e - b);
94 if (osz > 2*LJ_MIN_SBUF) {
95 b = lj_mem_realloc(L, b, osz, (osz >> 1));
96 sb->w = sb->b = b; /* Not supposed to keep data across shrinks. */
97 sb->e = b + (osz >> 1);
98 }
99 lj_assertG_(G(sbufL(sb)), !sbufisext(sb), "YAGNI shrink SBufExt");
100}
101
102char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz)
103{
104 SBuf *sb = &G(L)->tmpbuf;
105 setsbufL(sb, L);
106 return lj_buf_need(sb, sz);
107}
108
109#if LJ_HASBUFFER && LJ_HASJIT
110void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *ref)
111{
112 lua_State *L = sbufL(sbx);
113 lj_bufx_free(L, sbx);
114 lj_bufx_set_cow(L, sbx, p, len);
115 setgcref(sbx->cowref, ref);
116 lj_gc_objbarrier(L, (GCudata *)sbx - 1, ref);
117}
118
119#if LJ_HASFFI
120MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz)
121{
122 lj_buf_more((SBuf *)sbx, sz);
123 return sbufleft(sbx);
124}
125#endif
126#endif
127
128/* -- Low-level buffer put operations ------------------------------------- */
129
130SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len)
131{
132 char *w = lj_buf_more(sb, len);
133 w = lj_buf_wmem(w, q, len);
134 sb->w = w;
135 return sb;
136}
137
138#if LJ_HASJIT || LJ_HASFFI
139static LJ_NOINLINE SBuf * LJ_FASTCALL lj_buf_putchar2(SBuf *sb, int c)
140{
141 char *w = lj_buf_more2(sb, 1);
142 *w++ = (char)c;
143 sb->w = w;
144 return sb;
145}
146
147SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c)
148{
149 char *w = sb->w;
150 if (LJ_LIKELY(w < sb->e)) {
151 *w++ = (char)c;
152 sb->w = w;
153 return sb;
154 }
155 return lj_buf_putchar2(sb, c);
156}
157#endif
158
159SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s)
160{
161 MSize len = s->len;
162 char *w = lj_buf_more(sb, len);
163 w = lj_buf_wmem(w, strdata(s), len);
164 sb->w = w;
165 return sb;
166}
167
168/* -- High-level buffer put operations ------------------------------------ */
169
170SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s)
171{
172 MSize len = s->len;
173 char *w = lj_buf_more(sb, len), *e = w+len;
174 const char *q = strdata(s)+len-1;
175 while (w < e)
176 *w++ = *q--;
177 sb->w = w;
178 return sb;
179}
180
181SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s)
182{
183 MSize len = s->len;
184 char *w = lj_buf_more(sb, len), *e = w+len;
185 const char *q = strdata(s);
186 for (; w < e; w++, q++) {
187 uint32_t c = *(unsigned char *)q;
188#if LJ_TARGET_PPC
189 *w = c + ((c >= 'A' && c <= 'Z') << 5);
190#else
191 if (c >= 'A' && c <= 'Z') c += 0x20;
192 *w = c;
193#endif
194 }
195 sb->w = w;
196 return sb;
197}
198
199SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s)
200{
201 MSize len = s->len;
202 char *w = lj_buf_more(sb, len), *e = w+len;
203 const char *q = strdata(s);
204 for (; w < e; w++, q++) {
205 uint32_t c = *(unsigned char *)q;
206#if LJ_TARGET_PPC
207 *w = c - ((c >= 'a' && c <= 'z') << 5);
208#else
209 if (c >= 'a' && c <= 'z') c -= 0x20;
210 *w = c;
211#endif
212 }
213 sb->w = w;
214 return sb;
215}
216
217SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep)
218{
219 MSize len = s->len;
220 if (rep > 0 && len) {
221 uint64_t tlen = (uint64_t)rep * len;
222 char *w;
223 if (LJ_UNLIKELY(tlen > LJ_MAX_STR))
224 lj_err_mem(sbufL(sb));
225 w = lj_buf_more(sb, (MSize)tlen);
226 if (len == 1) { /* Optimize a common case. */
227 uint32_t c = strdata(s)[0];
228 do { *w++ = c; } while (--rep > 0);
229 } else {
230 const char *e = strdata(s) + len;
231 do {
232 const char *q = strdata(s);
233 do { *w++ = *q++; } while (q < e);
234 } while (--rep > 0);
235 }
236 sb->w = w;
237 }
238 return sb;
239}
240
241SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e)
242{
243 MSize seplen = sep ? sep->len : 0;
244 if (i <= e) {
245 for (;;) {
246 cTValue *o = lj_tab_getint(t, i);
247 char *w;
248 if (!o) {
249 badtype: /* Error: bad element type. */
250 sb->w = (char *)(intptr_t)i; /* Store failing index. */
251 return NULL;
252 } else if (tvisstr(o)) {
253 MSize len = strV(o)->len;
254 w = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len);
255 } else if (tvisint(o)) {
256 w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o));
257 } else if (tvisnum(o)) {
258 w = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen);
259 } else {
260 goto badtype;
261 }
262 if (i++ == e) {
263 sb->w = w;
264 break;
265 }
266 if (seplen) w = lj_buf_wmem(w, strdata(sep), seplen);
267 sb->w = w;
268 }
269 }
270 return sb;
271}
272
273/* -- Miscellaneous buffer operations ------------------------------------- */
274
275GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb)
276{
277 return lj_str_new(sbufL(sb), sb->b, sbuflen(sb));
278}
279
280/* Concatenate two strings. */
281GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2)
282{
283 MSize len1 = s1->len, len2 = s2->len;
284 char *buf = lj_buf_tmp(L, len1 + len2);
285 memcpy(buf, strdata(s1), len1);
286 memcpy(buf+len1, strdata(s2), len2);
287 return lj_str_new(L, buf, len1 + len2);
288}
289
290/* Read ULEB128 from buffer. */
291uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp)
292{
293 const uint8_t *w = (const uint8_t *)*pp;
294 uint32_t v = *w++;
295 if (LJ_UNLIKELY(v >= 0x80)) {
296 int sh = 0;
297 v &= 0x7f;
298 do { v |= ((*w & 0x7f) << (sh += 7)); } while (*w++ >= 0x80);
299 }
300 *pp = (const char *)w;
301 return v;
302}
303
diff --git a/src/lj_buf.h b/src/lj_buf.h
new file mode 100644
index 00000000..744e5747
--- /dev/null
+++ b/src/lj_buf.h
@@ -0,0 +1,198 @@
1/*
2** Buffer handling.
3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_BUF_H
7#define _LJ_BUF_H
8
9#include "lj_obj.h"
10#include "lj_gc.h"
11#include "lj_str.h"
12
13/* Resizable string buffers. */
14
15/* The SBuf struct definition is in lj_obj.h:
16** char *w; Write pointer.
17** char *e; End pointer.
18** char *b; Base pointer.
19** MRef L; lua_State, used for buffer resizing. Extension bits in 3 LSB.
20*/
21
22/* Extended string buffer. */
23typedef struct SBufExt {
24 SBufHeader;
25 union {
26 GCRef cowref; /* Copy-on-write object reference. */
27 MRef bsb; /* Borrowed string buffer. */
28 };
29 char *r; /* Read pointer. */
30 GCRef dict_str; /* Serialization string dictionary table. */
31 GCRef dict_mt; /* Serialization metatable dictionary table. */
32 int depth; /* Remaining recursion depth. */
33} SBufExt;
34
35#define sbufsz(sb) ((MSize)((sb)->e - (sb)->b))
36#define sbuflen(sb) ((MSize)((sb)->w - (sb)->b))
37#define sbufleft(sb) ((MSize)((sb)->e - (sb)->w))
38#define sbufxlen(sbx) ((MSize)((sbx)->w - (sbx)->r))
39#define sbufxslack(sbx) ((MSize)((sbx)->r - (sbx)->b))
40
41#define SBUF_MASK_FLAG (7)
42#define SBUF_MASK_L (~(GCSize)SBUF_MASK_FLAG)
43#define SBUF_FLAG_EXT 1 /* Extended string buffer. */
44#define SBUF_FLAG_COW 2 /* Copy-on-write buffer. */
45#define SBUF_FLAG_BORROW 4 /* Borrowed string buffer. */
46
47#define sbufL(sb) \
48 ((lua_State *)(void *)(uintptr_t)(mrefu((sb)->L) & SBUF_MASK_L))
49#define setsbufL(sb, l) (setmref((sb)->L, (l)))
50#define setsbufXL(sb, l, flag) \
51 (setmrefu((sb)->L, (GCSize)(uintptr_t)(void *)(l) + (flag)))
52#define setsbufXL_(sb, l) \
53 (setmrefu((sb)->L, (GCSize)(uintptr_t)(void *)(l) | (mrefu((sb)->L) & SBUF_MASK_FLAG)))
54
55#define sbufflag(sb) (mrefu((sb)->L))
56#define sbufisext(sb) (sbufflag((sb)) & SBUF_FLAG_EXT)
57#define sbufiscow(sb) (sbufflag((sb)) & SBUF_FLAG_COW)
58#define sbufisborrow(sb) (sbufflag((sb)) & SBUF_FLAG_BORROW)
59#define sbufiscoworborrow(sb) (sbufflag((sb)) & (SBUF_FLAG_COW|SBUF_FLAG_BORROW))
60#define sbufX(sb) \
61 (lj_assertG_(G(sbufL(sb)), sbufisext(sb), "not an SBufExt"), (SBufExt *)(sb))
62#define setsbufflag(sb, flag) (setmrefu((sb)->L, (flag)))
63
64#define tvisbuf(o) \
65 (LJ_HASBUFFER && tvisudata(o) && udataV(o)->udtype == UDTYPE_BUFFER)
66#define bufV(o) check_exp(tvisbuf(o), ((SBufExt *)uddata(udataV(o))))
67
68/* Buffer management */
69LJ_FUNC char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz);
70LJ_FUNC char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz);
71LJ_FUNC void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb);
72LJ_FUNC char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz);
73
74static LJ_AINLINE void lj_buf_init(lua_State *L, SBuf *sb)
75{
76 setsbufL(sb, L);
77 sb->w = sb->e = sb->b = NULL;
78}
79
80static LJ_AINLINE void lj_buf_reset(SBuf *sb)
81{
82 sb->w = sb->b;
83}
84
85static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L)
86{
87 SBuf *sb = &G(L)->tmpbuf;
88 setsbufL(sb, L);
89 lj_buf_reset(sb);
90 return sb;
91}
92
93static LJ_AINLINE void lj_buf_free(global_State *g, SBuf *sb)
94{
95 lj_assertG(!sbufisext(sb), "bad free of SBufExt");
96 lj_mem_free(g, sb->b, sbufsz(sb));
97}
98
99static LJ_AINLINE char *lj_buf_need(SBuf *sb, MSize sz)
100{
101 if (LJ_UNLIKELY(sz > sbufsz(sb)))
102 return lj_buf_need2(sb, sz);
103 return sb->b;
104}
105
106static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz)
107{
108 if (LJ_UNLIKELY(sz > sbufleft(sb)))
109 return lj_buf_more2(sb, sz);
110 return sb->w;
111}
112
113/* Extended buffer management */
114static LJ_AINLINE void lj_bufx_init(lua_State *L, SBufExt *sbx)
115{
116 memset(sbx, 0, sizeof(SBufExt));
117 setsbufXL(sbx, L, SBUF_FLAG_EXT);
118}
119
120static LJ_AINLINE void lj_bufx_set_borrow(lua_State *L, SBufExt *sbx, SBuf *sb)
121{
122 setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_BORROW);
123 setmref(sbx->bsb, sb);
124 sbx->r = sbx->w = sbx->b = sb->b;
125 sbx->e = sb->e;
126}
127
128static LJ_AINLINE void lj_bufx_set_cow(lua_State *L, SBufExt *sbx,
129 const char *p, MSize len)
130{
131 setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_COW);
132 sbx->r = sbx->b = (char *)p;
133 sbx->w = sbx->e = (char *)p + len;
134}
135
136static LJ_AINLINE void lj_bufx_reset(SBufExt *sbx)
137{
138 if (sbufiscow(sbx)) {
139 setmrefu(sbx->L, (mrefu(sbx->L) & ~(GCSize)SBUF_FLAG_COW));
140 setgcrefnull(sbx->cowref);
141 sbx->b = sbx->e = NULL;
142 }
143 sbx->r = sbx->w = sbx->b;
144}
145
146static LJ_AINLINE void lj_bufx_free(lua_State *L, SBufExt *sbx)
147{
148 if (!sbufiscoworborrow(sbx)) lj_mem_free(G(L), sbx->b, sbufsz(sbx));
149 setsbufXL(sbx, L, SBUF_FLAG_EXT);
150 setgcrefnull(sbx->cowref);
151 sbx->r = sbx->w = sbx->b = sbx->e = NULL;
152}
153
154#if LJ_HASBUFFER && LJ_HASJIT
155LJ_FUNC void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *o);
156#if LJ_HASFFI
157LJ_FUNC MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz);
158#endif
159#endif
160
161/* Low-level buffer put operations */
162LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len);
163#if LJ_HASJIT || LJ_HASFFI
164LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c);
165#endif
166LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s);
167
168static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len)
169{
170 return (char *)memcpy(p, q, len) + len;
171}
172
173static LJ_AINLINE void lj_buf_putb(SBuf *sb, int c)
174{
175 char *w = lj_buf_more(sb, 1);
176 *w++ = (char)c;
177 sb->w = w;
178}
179
180/* High-level buffer put operations */
181LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s);
182LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s);
183LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s);
184LJ_FUNC SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep);
185LJ_FUNC SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep,
186 int32_t i, int32_t e);
187
188/* Miscellaneous buffer operations */
189LJ_FUNCA GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb);
190LJ_FUNC GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2);
191LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp);
192
193static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb)
194{
195 return lj_str_new(L, sb->b, sbuflen(sb));
196}
197
198#endif
diff --git a/src/lj_carith.c b/src/lj_carith.c
index bad5fe66..ffda626e 100644
--- a/src/lj_carith.c
+++ b/src/lj_carith.c
@@ -11,10 +11,12 @@
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_tab.h" 12#include "lj_tab.h"
13#include "lj_meta.h" 13#include "lj_meta.h"
14#include "lj_ir.h"
14#include "lj_ctype.h" 15#include "lj_ctype.h"
15#include "lj_cconv.h" 16#include "lj_cconv.h"
16#include "lj_cdata.h" 17#include "lj_cdata.h"
17#include "lj_carith.h" 18#include "lj_carith.h"
19#include "lj_strscan.h"
18 20
19/* -- C data arithmetic --------------------------------------------------- */ 21/* -- C data arithmetic --------------------------------------------------- */
20 22
@@ -124,7 +126,7 @@ static int carith_ptr(lua_State *L, CTState *cts, CDArith *ca, MMS mm)
124 setboolV(L->top-1, ((uintptr_t)pp < (uintptr_t)pp2)); 126 setboolV(L->top-1, ((uintptr_t)pp < (uintptr_t)pp2));
125 return 1; 127 return 1;
126 } else { 128 } else {
127 lua_assert(mm == MM_le); 129 lj_assertL(mm == MM_le, "bad metamethod %d", mm);
128 setboolV(L->top-1, ((uintptr_t)pp <= (uintptr_t)pp2)); 130 setboolV(L->top-1, ((uintptr_t)pp <= (uintptr_t)pp2));
129 return 1; 131 return 1;
130 } 132 }
@@ -210,7 +212,9 @@ static int carith_int64(lua_State *L, CTState *cts, CDArith *ca, MMS mm)
210 *up = lj_carith_powu64(u0, u1); 212 *up = lj_carith_powu64(u0, u1);
211 break; 213 break;
212 case MM_unm: *up = ~u0+1u; break; 214 case MM_unm: *up = ~u0+1u; break;
213 default: lua_assert(0); break; 215 default:
216 lj_assertL(0, "bad metamethod %d", mm);
217 break;
214 } 218 }
215 lj_gc_check(L); 219 lj_gc_check(L);
216 return 1; 220 return 1;
@@ -276,6 +280,79 @@ int lj_carith_op(lua_State *L, MMS mm)
276 return lj_carith_meta(L, cts, &ca, mm); 280 return lj_carith_meta(L, cts, &ca, mm);
277} 281}
278 282
283/* -- 64 bit bit operations helpers --------------------------------------- */
284
285#if LJ_64
286#define B64DEF(name) \
287 static LJ_AINLINE uint64_t lj_carith_##name(uint64_t x, int32_t sh)
288#else
289/* Not inlined on 32 bit archs, since some of these are quite lengthy. */
290#define B64DEF(name) \
291 uint64_t LJ_NOINLINE lj_carith_##name(uint64_t x, int32_t sh)
292#endif
293
294B64DEF(shl64) { return x << (sh&63); }
295B64DEF(shr64) { return x >> (sh&63); }
296B64DEF(sar64) { return (uint64_t)((int64_t)x >> (sh&63)); }
297B64DEF(rol64) { return lj_rol(x, (sh&63)); }
298B64DEF(ror64) { return lj_ror(x, (sh&63)); }
299
300#undef B64DEF
301
302uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op)
303{
304 switch (op) {
305 case IR_BSHL-IR_BSHL: x = lj_carith_shl64(x, sh); break;
306 case IR_BSHR-IR_BSHL: x = lj_carith_shr64(x, sh); break;
307 case IR_BSAR-IR_BSHL: x = lj_carith_sar64(x, sh); break;
308 case IR_BROL-IR_BSHL: x = lj_carith_rol64(x, sh); break;
309 case IR_BROR-IR_BSHL: x = lj_carith_ror64(x, sh); break;
310 default:
311 lj_assertX(0, "bad shift op %d", op);
312 break;
313 }
314 return x;
315}
316
317/* Equivalent to lj_lib_checkbit(), but handles cdata. */
318uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id)
319{
320 TValue *o = L->base + narg-1;
321 if (o >= L->top) {
322 err:
323 lj_err_argt(L, narg, LUA_TNUMBER);
324 } else if (LJ_LIKELY(tvisnumber(o))) {
325 /* Handled below. */
326 } else if (tviscdata(o)) {
327 CTState *cts = ctype_cts(L);
328 uint8_t *sp = (uint8_t *)cdataptr(cdataV(o));
329 CTypeID sid = cdataV(o)->ctypeid;
330 CType *s = ctype_get(cts, sid);
331 uint64_t x;
332 if (ctype_isref(s->info)) {
333 sp = *(void **)sp;
334 sid = ctype_cid(s->info);
335 }
336 s = ctype_raw(cts, sid);
337 if (ctype_isenum(s->info)) s = ctype_child(cts, s);
338 if ((s->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
339 CTINFO(CT_NUM, CTF_UNSIGNED) && s->size == 8)
340 *id = CTID_UINT64; /* Use uint64_t, since it has the highest rank. */
341 else if (!*id)
342 *id = CTID_INT64; /* Use int64_t, unless already set. */
343 lj_cconv_ct_ct(cts, ctype_get(cts, *id), s,
344 (uint8_t *)&x, sp, CCF_ARG(narg));
345 return x;
346 } else if (!(tvisstr(o) && lj_strscan_number(strV(o), o))) {
347 goto err;
348 }
349 if (LJ_LIKELY(tvisint(o))) {
350 return (uint32_t)intV(o);
351 } else {
352 return (uint32_t)lj_num2bit(numV(o));
353 }
354}
355
279/* -- 64 bit integer arithmetic helpers ----------------------------------- */ 356/* -- 64 bit integer arithmetic helpers ----------------------------------- */
280 357
281#if LJ_32 && LJ_HASJIT 358#if LJ_32 && LJ_HASJIT
diff --git a/src/lj_carith.h b/src/lj_carith.h
index d87c2d07..f124c27c 100644
--- a/src/lj_carith.h
+++ b/src/lj_carith.h
@@ -12,6 +12,16 @@
12 12
13LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); 13LJ_FUNC int lj_carith_op(lua_State *L, MMS mm);
14 14
15#if LJ_32
16LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh);
17LJ_FUNC uint64_t lj_carith_shr64(uint64_t x, int32_t sh);
18LJ_FUNC uint64_t lj_carith_sar64(uint64_t x, int32_t sh);
19LJ_FUNC uint64_t lj_carith_rol64(uint64_t x, int32_t sh);
20LJ_FUNC uint64_t lj_carith_ror64(uint64_t x, int32_t sh);
21#endif
22LJ_FUNC uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op);
23LJ_FUNC uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id);
24
15#if LJ_32 && LJ_HASJIT 25#if LJ_32 && LJ_HASJIT
16LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k); 26LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k);
17#endif 27#endif
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index 79e726c6..5f95f5d8 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -9,7 +9,6 @@
9 9
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h"
13#include "lj_tab.h" 12#include "lj_tab.h"
14#include "lj_ctype.h" 13#include "lj_ctype.h"
15#include "lj_cconv.h" 14#include "lj_cconv.h"
@@ -21,12 +20,15 @@
21#if LJ_TARGET_X86 20#if LJ_TARGET_X86
22/* -- x86 calling conventions --------------------------------------------- */ 21/* -- x86 calling conventions --------------------------------------------- */
23 22
23#define CCALL_PUSH(arg) \
24 *(GPRArg *)((uint8_t *)cc->stack + nsp) = (GPRArg)(arg), nsp += CTSIZE_PTR
25
24#if LJ_ABI_WIN 26#if LJ_ABI_WIN
25 27
26#define CCALL_HANDLE_STRUCTRET \ 28#define CCALL_HANDLE_STRUCTRET \
27 /* Return structs bigger than 8 by reference (on stack only). */ \ 29 /* Return structs bigger than 8 by reference (on stack only). */ \
28 cc->retref = (sz > 8); \ 30 cc->retref = (sz > 8); \
29 if (cc->retref) cc->stack[nsp++] = (GPRArg)dp; 31 if (cc->retref) CCALL_PUSH(dp);
30 32
31#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET 33#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET
32 34
@@ -41,7 +43,7 @@
41 if (ngpr < maxgpr) \ 43 if (ngpr < maxgpr) \
42 cc->gpr[ngpr++] = (GPRArg)dp; \ 44 cc->gpr[ngpr++] = (GPRArg)dp; \
43 else \ 45 else \
44 cc->stack[nsp++] = (GPRArg)dp; \ 46 CCALL_PUSH(dp); \
45 } else { /* Struct with single FP field ends up in FPR. */ \ 47 } else { /* Struct with single FP field ends up in FPR. */ \
46 cc->resx87 = ccall_classify_struct(cts, ctr); \ 48 cc->resx87 = ccall_classify_struct(cts, ctr); \
47 } 49 }
@@ -57,7 +59,7 @@
57 if (ngpr < maxgpr) \ 59 if (ngpr < maxgpr) \
58 cc->gpr[ngpr++] = (GPRArg)dp; \ 60 cc->gpr[ngpr++] = (GPRArg)dp; \
59 else \ 61 else \
60 cc->stack[nsp++] = (GPRArg)dp; 62 CCALL_PUSH(dp);
61 63
62#endif 64#endif
63 65
@@ -68,7 +70,7 @@
68 if (ngpr < maxgpr) \ 70 if (ngpr < maxgpr) \
69 cc->gpr[ngpr++] = (GPRArg)dp; \ 71 cc->gpr[ngpr++] = (GPRArg)dp; \
70 else \ 72 else \
71 cc->stack[nsp++] = (GPRArg)dp; \ 73 CCALL_PUSH(dp); \
72 } 74 }
73 75
74#endif 76#endif
@@ -279,8 +281,8 @@
279 if (ngpr < maxgpr) { \ 281 if (ngpr < maxgpr) { \
280 dp = &cc->gpr[ngpr]; \ 282 dp = &cc->gpr[ngpr]; \
281 if (ngpr + n > maxgpr) { \ 283 if (ngpr + n > maxgpr) { \
282 nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ 284 nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
283 if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ 285 if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
284 ngpr = maxgpr; \ 286 ngpr = maxgpr; \
285 } else { \ 287 } else { \
286 ngpr += n; \ 288 ngpr += n; \
@@ -291,56 +293,83 @@
291#define CCALL_HANDLE_RET \ 293#define CCALL_HANDLE_RET \
292 if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0]; 294 if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0];
293 295
294#elif LJ_TARGET_PPC 296#elif LJ_TARGET_ARM64
295/* -- PPC calling conventions --------------------------------------------- */ 297/* -- ARM64 calling conventions ------------------------------------------- */
296 298
297#define CCALL_HANDLE_STRUCTRET \ 299#define CCALL_HANDLE_STRUCTRET \
298 cc->retref = 1; /* Return all structs by reference. */ \ 300 cc->retref = !ccall_classify_struct(cts, ctr); \
299 cc->gpr[ngpr++] = (GPRArg)dp; 301 if (cc->retref) cc->retp = dp;
302
303#define CCALL_HANDLE_STRUCTRET2 \
304 unsigned int cl = ccall_classify_struct(cts, ctr); \
305 if ((cl & 4)) { /* Combine float HFA from separate registers. */ \
306 CTSize i = (cl >> 8) - 1; \
307 do { ((uint32_t *)dp)[i] = cc->fpr[i].lo; } while (i--); \
308 } else { \
309 if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \
310 memcpy(dp, sp, ctr->size); \
311 }
300 312
301#define CCALL_HANDLE_COMPLEXRET \ 313#define CCALL_HANDLE_COMPLEXRET \
302 /* Complex values are returned in 2 or 4 GPRs. */ \ 314 /* Complex values are returned in one or two FPRs. */ \
303 cc->retref = 0; 315 cc->retref = 0;
304 316
305#define CCALL_HANDLE_COMPLEXRET2 \ 317#define CCALL_HANDLE_COMPLEXRET2 \
306 memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */ 318 if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
319 ((float *)dp)[0] = cc->fpr[0].f; \
320 ((float *)dp)[1] = cc->fpr[1].f; \
321 } else { /* Copy complex double from FPRs. */ \
322 ((double *)dp)[0] = cc->fpr[0].d; \
323 ((double *)dp)[1] = cc->fpr[1].d; \
324 }
307 325
308#define CCALL_HANDLE_STRUCTARG \ 326#define CCALL_HANDLE_STRUCTARG \
309 rp = cdataptr(lj_cdata_new(cts, did, sz)); \ 327 unsigned int cl = ccall_classify_struct(cts, d); \
310 sz = CTSIZE_PTR; /* Pass all structs by reference. */ 328 if (cl == 0) { /* Pass struct by reference. */ \
329 rp = cdataptr(lj_cdata_new(cts, did, sz)); \
330 sz = CTSIZE_PTR; \
331 } else if (cl > 1) { /* Pass struct in FPRs or on stack. */ \
332 isfp = (cl & 4) ? 2 : 1; \
333 } /* else: Pass struct in GPRs or on stack. */
311 334
312#define CCALL_HANDLE_COMPLEXARG \ 335#define CCALL_HANDLE_COMPLEXARG \
313 /* Pass complex by value in 2 or 4 GPRs. */ 336 /* Pass complex by value in separate (!) FPRs or on stack. */ \
337 isfp = sz == 2*sizeof(float) ? 2 : 1;
314 338
315#define CCALL_HANDLE_REGARG \ 339#define CCALL_HANDLE_REGARG \
316 if (isfp) { /* Try to pass argument in FPRs. */ \ 340 if (LJ_TARGET_OSX && isva) { \
317 if (nfpr + 1 <= CCALL_NARG_FPR) { \ 341 /* IOS: All variadic arguments are on the stack. */ \
342 } else if (isfp) { /* Try to pass argument in FPRs. */ \
343 int n2 = ctype_isvector(d->info) ? 1 : \
344 isfp == 1 ? n : (d->size >> (4-isfp)); \
345 if (nfpr + n2 <= CCALL_NARG_FPR) { \
318 dp = &cc->fpr[nfpr]; \ 346 dp = &cc->fpr[nfpr]; \
319 nfpr += 1; \ 347 nfpr += n2; \
320 d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
321 goto done; \ 348 goto done; \
349 } else { \
350 nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \
322 } \ 351 } \
323 } else { /* Try to pass argument in GPRs. */ \ 352 } else { /* Try to pass argument in GPRs. */ \
324 if (n > 1) { \ 353 if (!LJ_TARGET_OSX && (d->info & CTF_ALIGN) > CTALIGN_PTR) \
325 lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \ 354 ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
326 if (ctype_isinteger(d->info)) \
327 ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
328 else if (ngpr + n > maxgpr) \
329 ngpr = maxgpr; /* Prevent reordering. */ \
330 } \
331 if (ngpr + n <= maxgpr) { \ 355 if (ngpr + n <= maxgpr) { \
332 dp = &cc->gpr[ngpr]; \ 356 dp = &cc->gpr[ngpr]; \
333 ngpr += n; \ 357 ngpr += n; \
334 goto done; \ 358 goto done; \
359 } else { \
360 ngpr = maxgpr; /* Prevent reordering. */ \
335 } \ 361 } \
336 } 362 }
337 363
364#if LJ_BE
338#define CCALL_HANDLE_RET \ 365#define CCALL_HANDLE_RET \
339 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ 366 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
340 ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ 367 sp = (uint8_t *)&cc->fpr[0].f;
368#endif
341 369
342#elif LJ_TARGET_PPCSPE 370
343/* -- PPC/SPE calling conventions ----------------------------------------- */ 371#elif LJ_TARGET_PPC
372/* -- PPC calling conventions --------------------------------------------- */
344 373
345#define CCALL_HANDLE_STRUCTRET \ 374#define CCALL_HANDLE_STRUCTRET \
346 cc->retref = 1; /* Return all structs by reference. */ \ 375 cc->retref = 1; /* Return all structs by reference. */ \
@@ -360,12 +389,13 @@
360#define CCALL_HANDLE_COMPLEXARG \ 389#define CCALL_HANDLE_COMPLEXARG \
361 /* Pass complex by value in 2 or 4 GPRs. */ 390 /* Pass complex by value in 2 or 4 GPRs. */
362 391
363/* PPC/SPE has a softfp ABI. */ 392#define CCALL_HANDLE_GPR \
364#define CCALL_HANDLE_REGARG \ 393 /* Try to pass argument in GPRs. */ \
365 if (n > 1) { /* Doesn't fit in a single GPR? */ \ 394 if (n > 1) { \
366 lua_assert(n == 2 || n == 4); /* int64_t, double or complex (float). */ \ 395 /* int64_t or complex (float). */ \
367 if (n == 2) \ 396 lj_assertL(n == 2 || n == 4, "bad GPR size %d", n); \
368 ngpr = (ngpr + 1u) & ~1u; /* Only align 64 bit value to regpair. */ \ 397 if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \
398 ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
369 else if (ngpr + n > maxgpr) \ 399 else if (ngpr + n > maxgpr) \
370 ngpr = maxgpr; /* Prevent reordering. */ \ 400 ngpr = maxgpr; /* Prevent reordering. */ \
371 } \ 401 } \
@@ -373,10 +403,32 @@
373 dp = &cc->gpr[ngpr]; \ 403 dp = &cc->gpr[ngpr]; \
374 ngpr += n; \ 404 ngpr += n; \
375 goto done; \ 405 goto done; \
406 } \
407
408#if LJ_ABI_SOFTFP
409#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR
410#else
411#define CCALL_HANDLE_REGARG \
412 if (isfp) { /* Try to pass argument in FPRs. */ \
413 if (nfpr + 1 <= CCALL_NARG_FPR) { \
414 dp = &cc->fpr[nfpr]; \
415 nfpr += 1; \
416 d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
417 goto done; \
418 } \
419 } else { \
420 CCALL_HANDLE_GPR \
376 } 421 }
422#endif
377 423
378#elif LJ_TARGET_MIPS 424#if !LJ_ABI_SOFTFP
379/* -- MIPS calling conventions -------------------------------------------- */ 425#define CCALL_HANDLE_RET \
426 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
427 ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
428#endif
429
430#elif LJ_TARGET_MIPS32
431/* -- MIPS o32 calling conventions ---------------------------------------- */
380 432
381#define CCALL_HANDLE_STRUCTRET \ 433#define CCALL_HANDLE_STRUCTRET \
382 cc->retref = 1; /* Return all structs by reference. */ \ 434 cc->retref = 1; /* Return all structs by reference. */ \
@@ -386,6 +438,18 @@
386 /* Complex values are returned in 1 or 2 FPRs. */ \ 438 /* Complex values are returned in 1 or 2 FPRs. */ \
387 cc->retref = 0; 439 cc->retref = 0;
388 440
441#if LJ_ABI_SOFTFP
442#define CCALL_HANDLE_COMPLEXRET2 \
443 if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \
444 ((intptr_t *)dp)[0] = cc->gpr[0]; \
445 ((intptr_t *)dp)[1] = cc->gpr[1]; \
446 } else { /* Copy complex double from GPRs. */ \
447 ((intptr_t *)dp)[0] = cc->gpr[0]; \
448 ((intptr_t *)dp)[1] = cc->gpr[1]; \
449 ((intptr_t *)dp)[2] = cc->gpr[2]; \
450 ((intptr_t *)dp)[3] = cc->gpr[3]; \
451 }
452#else
389#define CCALL_HANDLE_COMPLEXRET2 \ 453#define CCALL_HANDLE_COMPLEXRET2 \
390 if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ 454 if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
391 ((float *)dp)[0] = cc->fpr[0].f; \ 455 ((float *)dp)[0] = cc->fpr[0].f; \
@@ -394,6 +458,7 @@
394 ((double *)dp)[0] = cc->fpr[0].d; \ 458 ((double *)dp)[0] = cc->fpr[0].d; \
395 ((double *)dp)[1] = cc->fpr[1].d; \ 459 ((double *)dp)[1] = cc->fpr[1].d; \
396 } 460 }
461#endif
397 462
398#define CCALL_HANDLE_STRUCTARG \ 463#define CCALL_HANDLE_STRUCTARG \
399 /* Pass all structs by value in registers and/or on the stack. */ 464 /* Pass all structs by value in registers and/or on the stack. */
@@ -401,6 +466,22 @@
401#define CCALL_HANDLE_COMPLEXARG \ 466#define CCALL_HANDLE_COMPLEXARG \
402 /* Pass complex by value in 2 or 4 GPRs. */ 467 /* Pass complex by value in 2 or 4 GPRs. */
403 468
469#define CCALL_HANDLE_GPR \
470 if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \
471 ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
472 if (ngpr < maxgpr) { \
473 dp = &cc->gpr[ngpr]; \
474 if (ngpr + n > maxgpr) { \
475 nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
476 if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
477 ngpr = maxgpr; \
478 } else { \
479 ngpr += n; \
480 } \
481 goto done; \
482 }
483
484#if !LJ_ABI_SOFTFP /* MIPS32 hard-float */
404#define CCALL_HANDLE_REGARG \ 485#define CCALL_HANDLE_REGARG \
405 if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \ 486 if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \
406 /* Try to pass argument in FPRs. */ \ 487 /* Try to pass argument in FPRs. */ \
@@ -409,25 +490,91 @@
409 goto done; \ 490 goto done; \
410 } else { /* Try to pass argument in GPRs. */ \ 491 } else { /* Try to pass argument in GPRs. */ \
411 nfpr = CCALL_NARG_FPR; \ 492 nfpr = CCALL_NARG_FPR; \
412 if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \ 493 CCALL_HANDLE_GPR \
413 ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ 494 }
414 if (ngpr < maxgpr) { \ 495#else /* MIPS32 soft-float */
415 dp = &cc->gpr[ngpr]; \ 496#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR
416 if (ngpr + n > maxgpr) { \ 497#endif
417 nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ 498
418 if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ 499#if !LJ_ABI_SOFTFP
419 ngpr = maxgpr; \ 500/* On MIPS64 soft-float, position of float return values is endian-dependant. */
420 } else { \ 501#define CCALL_HANDLE_RET \
421 ngpr += n; \ 502 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
422 } \ 503 sp = (uint8_t *)&cc->fpr[0].f;
423 goto done; \ 504#endif
424 } \ 505
506#elif LJ_TARGET_MIPS64
507/* -- MIPS n64 calling conventions ---------------------------------------- */
508
509#define CCALL_HANDLE_STRUCTRET \
510 cc->retref = !(sz <= 16); \
511 if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
512
513#define CCALL_HANDLE_STRUCTRET2 \
514 ccall_copy_struct(cc, ctr, dp, sp, ccall_classify_struct(cts, ctr, ct));
515
516#define CCALL_HANDLE_COMPLEXRET \
517 /* Complex values are returned in 1 or 2 FPRs. */ \
518 cc->retref = 0;
519
520#if LJ_ABI_SOFTFP /* MIPS64 soft-float */
521
522#define CCALL_HANDLE_COMPLEXRET2 \
523 if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \
524 ((intptr_t *)dp)[0] = cc->gpr[0]; \
525 } else { /* Copy complex double from GPRs. */ \
526 ((intptr_t *)dp)[0] = cc->gpr[0]; \
527 ((intptr_t *)dp)[1] = cc->gpr[1]; \
528 }
529
530#define CCALL_HANDLE_COMPLEXARG \
531 /* Pass complex by value in 2 or 4 GPRs. */
532
533/* Position of soft-float 'float' return value depends on endianess. */
534#define CCALL_HANDLE_RET \
535 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
536 sp = (uint8_t *)cc->gpr + LJ_ENDIAN_SELECT(0, 4);
537
538#else /* MIPS64 hard-float */
539
540#define CCALL_HANDLE_COMPLEXRET2 \
541 if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
542 ((float *)dp)[0] = cc->fpr[0].f; \
543 ((float *)dp)[1] = cc->fpr[1].f; \
544 } else { /* Copy complex double from FPRs. */ \
545 ((double *)dp)[0] = cc->fpr[0].d; \
546 ((double *)dp)[1] = cc->fpr[1].d; \
547 }
548
549#define CCALL_HANDLE_COMPLEXARG \
550 if (sz == 2*sizeof(float)) { \
551 isfp = 2; \
552 if (ngpr < maxgpr) \
553 sz *= 2; \
425 } 554 }
426 555
427#define CCALL_HANDLE_RET \ 556#define CCALL_HANDLE_RET \
428 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ 557 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
429 sp = (uint8_t *)&cc->fpr[0].f; 558 sp = (uint8_t *)&cc->fpr[0].f;
430 559
560#endif
561
562#define CCALL_HANDLE_STRUCTARG \
563 /* Pass all structs by value in registers and/or on the stack. */
564
565#define CCALL_HANDLE_REGARG \
566 if (ngpr < maxgpr) { \
567 dp = &cc->gpr[ngpr]; \
568 if (ngpr + n > maxgpr) { \
569 nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \
570 if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \
571 ngpr = maxgpr; \
572 } else { \
573 ngpr += n; \
574 } \
575 goto done; \
576 }
577
431#else 578#else
432#error "Missing calling convention definitions for this architecture" 579#error "Missing calling convention definitions for this architecture"
433#endif 580#endif
@@ -497,7 +644,8 @@ static void ccall_classify_ct(CTState *cts, CType *ct, int *rcl, CTSize ofs)
497 ccall_classify_struct(cts, ct, rcl, ofs); 644 ccall_classify_struct(cts, ct, rcl, ofs);
498 } else { 645 } else {
499 int cl = ctype_isfp(ct->info) ? CCALL_RCL_SSE : CCALL_RCL_INT; 646 int cl = ctype_isfp(ct->info) ? CCALL_RCL_SSE : CCALL_RCL_INT;
500 lua_assert(ctype_hassize(ct->info)); 647 lj_assertCTS(ctype_hassize(ct->info),
648 "classify ctype %08x without size", ct->info);
501 if ((ofs & (ct->size-1))) cl = CCALL_RCL_MEM; /* Unaligned. */ 649 if ((ofs & (ct->size-1))) cl = CCALL_RCL_MEM; /* Unaligned. */
502 rcl[(ofs >= 8)] |= cl; 650 rcl[(ofs >= 8)] |= cl;
503 } 651 }
@@ -522,12 +670,13 @@ static int ccall_classify_struct(CTState *cts, CType *ct, int *rcl, CTSize ofs)
522} 670}
523 671
524/* Try to split up a small struct into registers. */ 672/* Try to split up a small struct into registers. */
525static int ccall_struct_reg(CCallState *cc, GPRArg *dp, int *rcl) 673static int ccall_struct_reg(CCallState *cc, CTState *cts, GPRArg *dp, int *rcl)
526{ 674{
527 MSize ngpr = cc->ngpr, nfpr = cc->nfpr; 675 MSize ngpr = cc->ngpr, nfpr = cc->nfpr;
528 uint32_t i; 676 uint32_t i;
677 UNUSED(cts);
529 for (i = 0; i < 2; i++) { 678 for (i = 0; i < 2; i++) {
530 lua_assert(!(rcl[i] & CCALL_RCL_MEM)); 679 lj_assertCTS(!(rcl[i] & CCALL_RCL_MEM), "pass mem struct in reg");
531 if ((rcl[i] & CCALL_RCL_INT)) { /* Integer class takes precedence. */ 680 if ((rcl[i] & CCALL_RCL_INT)) { /* Integer class takes precedence. */
532 if (ngpr >= CCALL_NARG_GPR) return 1; /* Register overflow. */ 681 if (ngpr >= CCALL_NARG_GPR) return 1; /* Register overflow. */
533 cc->gpr[ngpr++] = dp[i]; 682 cc->gpr[ngpr++] = dp[i];
@@ -548,11 +697,13 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl,
548 dp[0] = dp[1] = 0; 697 dp[0] = dp[1] = 0;
549 /* Convert to temp. struct. */ 698 /* Convert to temp. struct. */
550 lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); 699 lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
551 if (ccall_struct_reg(cc, dp, rcl)) { /* Register overflow? Pass on stack. */ 700 if (ccall_struct_reg(cc, cts, dp, rcl)) {
552 MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1; 701 /* Register overflow? Pass on stack. */
553 if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */ 702 MSize nsp = cc->nsp, sz = rcl[1] ? 2*CTSIZE_PTR : CTSIZE_PTR;
554 cc->nsp = nsp + n; 703 if (nsp + sz > CCALL_SIZE_STACK)
555 memcpy(&cc->stack[nsp], dp, n*CTSIZE_PTR); 704 return 1; /* Too many arguments. */
705 cc->nsp = nsp + sz;
706 memcpy((uint8_t *)cc->stack + nsp, dp, sz);
556 } 707 }
557 return 0; /* Ok. */ 708 return 0; /* Ok. */
558} 709}
@@ -621,6 +772,125 @@ noth: /* Not a homogeneous float/double aggregate. */
621 772
622#endif 773#endif
623 774
775/* -- ARM64 ABI struct classification ------------------------------------- */
776
777#if LJ_TARGET_ARM64
778
779/* Classify a struct based on its fields. */
780static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
781{
782 CTSize sz = ct->size;
783 unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
784 while (ct->sib) {
785 CType *sct;
786 ct = ctype_get(cts, ct->sib);
787 if (ctype_isfield(ct->info)) {
788 sct = ctype_rawchild(cts, ct);
789 if (ctype_isfp(sct->info)) {
790 r |= sct->size;
791 if (!isu) n++; else if (n == 0) n = 1;
792 } else if (ctype_iscomplex(sct->info)) {
793 r |= (sct->size >> 1);
794 if (!isu) n += 2; else if (n < 2) n = 2;
795 } else if (ctype_isstruct(sct->info)) {
796 goto substruct;
797 } else {
798 goto noth;
799 }
800 } else if (ctype_isbitfield(ct->info)) {
801 goto noth;
802 } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
803 sct = ctype_rawchild(cts, ct);
804 substruct:
805 if (sct->size > 0) {
806 unsigned int s = ccall_classify_struct(cts, sct);
807 if (s <= 1) goto noth;
808 r |= (s & 255);
809 if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
810 }
811 }
812 }
813 if ((r == 4 || r == 8) && n <= 4)
814 return r + (n << 8);
815noth: /* Not a homogeneous float/double aggregate. */
816 return (sz <= 16); /* Return structs of size <= 16 in GPRs. */
817}
818
819#endif
820
821/* -- MIPS64 ABI struct classification ---------------------------- */
822
823#if LJ_TARGET_MIPS64
824
825#define FTYPE_FLOAT 1
826#define FTYPE_DOUBLE 2
827
828/* Classify FP fields (max. 2) and their types. */
829static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf)
830{
831 int n = 0, ft = 0;
832 if ((ctf->info & CTF_VARARG) || (ct->info & CTF_UNION))
833 goto noth;
834 while (ct->sib) {
835 CType *sct;
836 ct = ctype_get(cts, ct->sib);
837 if (n == 2) {
838 goto noth;
839 } else if (ctype_isfield(ct->info)) {
840 sct = ctype_rawchild(cts, ct);
841 if (ctype_isfp(sct->info)) {
842 ft |= (sct->size == 4 ? FTYPE_FLOAT : FTYPE_DOUBLE) << 2*n;
843 n++;
844 } else {
845 goto noth;
846 }
847 } else if (ctype_isbitfield(ct->info) ||
848 ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
849 goto noth;
850 }
851 }
852 if (n <= 2)
853 return ft;
854noth: /* Not a homogeneous float/double aggregate. */
855 return 0; /* Struct is in GPRs. */
856}
857
858static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp,
859 int ft)
860{
861 if (LJ_ABI_SOFTFP ? ft :
862 ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) {
863 int i, ofs = 0;
864 for (i = 0; ft != 0; i++, ft >>= 2) {
865 if ((ft & 3) == FTYPE_FLOAT) {
866#if LJ_ABI_SOFTFP
867 /* The 2nd FP struct result is in CARG1 (gpr[2]) and not CRET2. */
868 memcpy((uint8_t *)dp + ofs,
869 (uint8_t *)&cc->gpr[2*i] + LJ_ENDIAN_SELECT(0, 4), 4);
870#else
871 *(float *)((uint8_t *)dp + ofs) = cc->fpr[i].f;
872#endif
873 ofs += 4;
874 } else {
875 ofs = (ofs + 7) & ~7; /* 64 bit alignment. */
876#if LJ_ABI_SOFTFP
877 *(intptr_t *)((uint8_t *)dp + ofs) = cc->gpr[2*i];
878#else
879 *(double *)((uint8_t *)dp + ofs) = cc->fpr[i].d;
880#endif
881 ofs += 8;
882 }
883 }
884 } else {
885#if !LJ_ABI_SOFTFP
886 if (ft) sp = (uint8_t *)&cc->fpr[0];
887#endif
888 memcpy(dp, sp, ctr->size);
889 }
890}
891
892#endif
893
624/* -- Common C call handling ---------------------------------------------- */ 894/* -- Common C call handling ---------------------------------------------- */
625 895
626/* Infer the destination CTypeID for a vararg argument. */ 896/* Infer the destination CTypeID for a vararg argument. */
@@ -715,6 +985,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
715 fid = ctf->sib; 985 fid = ctf->sib;
716 } 986 }
717 987
988#if LJ_TARGET_ARM64 && LJ_ABI_WIN
989 if ((ct->info & CTF_VARARG)) {
990 nsp -= maxgpr * CTSIZE_PTR; /* May end up with negative nsp. */
991 ngpr = maxgpr;
992 nfpr = CCALL_NARG_FPR;
993 }
994#endif
995
718 /* Walk through all passed arguments. */ 996 /* Walk through all passed arguments. */
719 for (o = L->base+1, narg = 1; o < top; o++, narg++) { 997 for (o = L->base+1, narg = 1; o < top; o++, narg++) {
720 CTypeID did; 998 CTypeID did;
@@ -726,7 +1004,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
726 if (fid) { /* Get argument type from field. */ 1004 if (fid) { /* Get argument type from field. */
727 CType *ctf = ctype_get(cts, fid); 1005 CType *ctf = ctype_get(cts, fid);
728 fid = ctf->sib; 1006 fid = ctf->sib;
729 lua_assert(ctype_isfield(ctf->info)); 1007 lj_assertL(ctype_isfield(ctf->info), "field expected");
730 did = ctype_cid(ctf->info); 1008 did = ctype_cid(ctf->info);
731 } else { 1009 } else {
732 if (!(ct->info & CTF_VARARG)) 1010 if (!(ct->info & CTF_VARARG))
@@ -751,25 +1029,31 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
751 CCALL_HANDLE_STRUCTARG 1029 CCALL_HANDLE_STRUCTARG
752 } else if (ctype_iscomplex(d->info)) { 1030 } else if (ctype_iscomplex(d->info)) {
753 CCALL_HANDLE_COMPLEXARG 1031 CCALL_HANDLE_COMPLEXARG
754 } else { 1032 } else if (!(CCALL_PACK_STACKARG && ctype_isenum(d->info))) {
755 sz = CTSIZE_PTR; 1033 sz = CTSIZE_PTR;
756 } 1034 }
757 sz = (sz + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); 1035 n = (sz + CTSIZE_PTR-1) / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */
758 n = sz / CTSIZE_PTR; /* Number of GPRs or stack slots needed. */
759 1036
760 CCALL_HANDLE_REGARG /* Handle register arguments. */ 1037 CCALL_HANDLE_REGARG /* Handle register arguments. */
761 1038
762 /* Otherwise pass argument on stack. */ 1039 /* Otherwise pass argument on stack. */
763 if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) { 1040 if (CCALL_ALIGN_STACKARG) { /* Align argument on stack. */
764 MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1; 1041 MSize align = (1u << ctype_align(d->info)) - 1;
765 nsp = (nsp + align) & ~align; /* Align argument on stack. */ 1042 if (rp || (CCALL_PACK_STACKARG && isva && align < CTSIZE_PTR-1))
1043 align = CTSIZE_PTR-1;
1044 nsp = (nsp + align) & ~align;
766 } 1045 }
767 if (nsp + n > CCALL_MAXSTACK) { /* Too many arguments. */ 1046#if LJ_TARGET_ARM64 && LJ_ABI_WIN
1047 /* A negative nsp points into cc->gpr. Blame MS for their messy ABI. */
1048 dp = ((uint8_t *)cc->stack) + (int32_t)nsp;
1049#else
1050 dp = ((uint8_t *)cc->stack) + nsp;
1051#endif
1052 nsp += CCALL_PACK_STACKARG ? sz : n * CTSIZE_PTR;
1053 if ((int32_t)nsp > CCALL_SIZE_STACK) { /* Too many arguments. */
768 err_nyi: 1054 err_nyi:
769 lj_err_caller(L, LJ_ERR_FFI_NYICALL); 1055 lj_err_caller(L, LJ_ERR_FFI_NYICALL);
770 } 1056 }
771 dp = &cc->stack[nsp];
772 nsp += n;
773 isva = 0; 1057 isva = 0;
774 1058
775 done: 1059 done:
@@ -780,7 +1064,8 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
780 } 1064 }
781 lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); 1065 lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
782 /* Extend passed integers to 32 bits at least. */ 1066 /* Extend passed integers to 32 bits at least. */
783 if (ctype_isinteger_or_bool(d->info) && d->size < 4) { 1067 if (ctype_isinteger_or_bool(d->info) && d->size < 4 &&
1068 (!CCALL_PACK_STACKARG || !((uintptr_t)dp & 3))) { /* Assumes LJ_LE. */
784 if (d->info & CTF_UNSIGNED) 1069 if (d->info & CTF_UNSIGNED)
785 *(uint32_t *)dp = d->size == 1 ? (uint32_t)*(uint8_t *)dp : 1070 *(uint32_t *)dp = d->size == 1 ? (uint32_t)*(uint8_t *)dp :
786 (uint32_t)*(uint16_t *)dp; 1071 (uint32_t)*(uint16_t *)dp;
@@ -788,6 +1073,19 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
788 *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp : 1073 *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp :
789 (int32_t)*(int16_t *)dp; 1074 (int32_t)*(int16_t *)dp;
790 } 1075 }
1076#if LJ_TARGET_ARM64 && LJ_BE
1077 if (isfp && d->size == sizeof(float))
1078 ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */
1079#endif
1080#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
1081 if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
1082#if LJ_TARGET_MIPS64
1083 || (isfp && nsp == 0)
1084#endif
1085 ) && d->size <= 4) {
1086 *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */
1087 }
1088#endif
791#if LJ_TARGET_X64 && LJ_ABI_WIN 1089#if LJ_TARGET_X64 && LJ_ABI_WIN
792 if (isva) { /* Windows/x64 mirrors varargs in both register sets. */ 1090 if (isva) { /* Windows/x64 mirrors varargs in both register sets. */
793 if (nfpr == ngpr) 1091 if (nfpr == ngpr)
@@ -803,19 +1101,28 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
803 cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */ 1101 cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */
804 cc->fpr[nfpr-2].d[1] = 0; 1102 cc->fpr[nfpr-2].d[1] = 0;
805 } 1103 }
1104#elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP)
1105 if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) {
1106 /* Split float HFA or complex float into separate registers. */
1107 CTSize i = (sz >> 2) - 1;
1108 do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--);
1109 }
806#else 1110#else
807 UNUSED(isfp); 1111 UNUSED(isfp);
808#endif 1112#endif
809 } 1113 }
810 if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ 1114 if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
1115#if LJ_TARGET_ARM64 && LJ_ABI_WIN
1116 if ((int32_t)nsp < 0) nsp = 0;
1117#endif
811 1118
812#if LJ_TARGET_X64 || LJ_TARGET_PPC 1119#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
813 cc->nfpr = nfpr; /* Required for vararg functions. */ 1120 cc->nfpr = nfpr; /* Required for vararg functions. */
814#endif 1121#endif
815 cc->nsp = nsp; 1122 cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
816 cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA)*CTSIZE_PTR; 1123 cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA) * CTSIZE_PTR;
817 if (nsp > CCALL_SPS_FREE) 1124 if (cc->nsp > CCALL_SPS_FREE * CTSIZE_PTR)
818 cc->spadj += (((nsp-CCALL_SPS_FREE)*CTSIZE_PTR + 15u) & ~15u); 1125 cc->spadj += (((cc->nsp - CCALL_SPS_FREE * CTSIZE_PTR) + 15u) & ~15u);
819 return gcsteps; 1126 return gcsteps;
820} 1127}
821 1128
@@ -844,7 +1151,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct,
844 CCALL_HANDLE_COMPLEXRET2 1151 CCALL_HANDLE_COMPLEXRET2
845 return 1; /* One GC step. */ 1152 return 1; /* One GC step. */
846 } 1153 }
847 if (LJ_BE && ctype_isinteger_or_bool(ctr->info) && ctr->size < CTSIZE_PTR) 1154 if (LJ_BE && ctr->size < CTSIZE_PTR &&
1155 (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info)))
848 sp += (CTSIZE_PTR - ctr->size); 1156 sp += (CTSIZE_PTR - ctr->size);
849#if CCALL_NUM_FPR 1157#if CCALL_NUM_FPR
850 if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info)) 1158 if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info))
@@ -854,7 +1162,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct,
854 CCALL_HANDLE_RET 1162 CCALL_HANDLE_RET
855#endif 1163#endif
856 /* No reference types end up here, so there's no need for the CTypeID. */ 1164 /* No reference types end up here, so there's no need for the CTypeID. */
857 lua_assert(!(ctype_isrefarray(ctr->info) || ctype_isstruct(ctr->info))); 1165 lj_assertL(!(ctype_isrefarray(ctr->info) || ctype_isstruct(ctr->info)),
1166 "unexpected reference ctype");
858 return lj_cconv_tv_ct(cts, ctr, 0, L->top-1, sp); 1167 return lj_cconv_tv_ct(cts, ctr, 0, L->top-1, sp);
859} 1168}
860 1169
@@ -878,7 +1187,7 @@ int lj_ccall_func(lua_State *L, GCcdata *cd)
878 lj_vm_ffi_call(&cc); 1187 lj_vm_ffi_call(&cc);
879 if (cts->cb.slot != ~0u) { /* Blacklist function that called a callback. */ 1188 if (cts->cb.slot != ~0u) { /* Blacklist function that called a callback. */
880 TValue tv; 1189 TValue tv;
881 setlightudV(&tv, (void *)cc.func); 1190 tv.u64 = ((uintptr_t)(void *)cc.func >> 2) | U64x(800000000, 00000000);
882 setboolV(lj_tab_set(L, cts->miscmap, &tv), 1); 1191 setboolV(lj_tab_set(L, cts->miscmap, &tv), 1);
883 } 1192 }
884 ct = (CType *)((intptr_t)ct+(intptr_t)cts->tab); /* May be reallocated. */ 1193 ct = (CType *)((intptr_t)ct+(intptr_t)cts->tab); /* May be reallocated. */
diff --git a/src/lj_ccall.h b/src/lj_ccall.h
index 2f4fa7a6..24646d90 100644
--- a/src/lj_ccall.h
+++ b/src/lj_ccall.h
@@ -68,35 +68,59 @@ typedef union FPRArg {
68 float f[2]; 68 float f[2];
69} FPRArg; 69} FPRArg;
70 70
71#elif LJ_TARGET_PPC 71#elif LJ_TARGET_ARM64
72 72
73#define CCALL_NARG_GPR 8 73#define CCALL_NARG_GPR 8
74#define CCALL_NRET_GPR 2
74#define CCALL_NARG_FPR 8 75#define CCALL_NARG_FPR 8
76#define CCALL_NRET_FPR 4
77#define CCALL_SPS_FREE 0
78#if LJ_TARGET_OSX
79#define CCALL_PACK_STACKARG 1
80#endif
81
82typedef intptr_t GPRArg;
83typedef union FPRArg {
84 double d;
85 struct { LJ_ENDIAN_LOHI(float f; , float g;) };
86 struct { LJ_ENDIAN_LOHI(uint32_t lo; , uint32_t hi;) };
87} FPRArg;
88
89#elif LJ_TARGET_PPC
90
91#define CCALL_NARG_GPR 8
92#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8)
75#define CCALL_NRET_GPR 4 /* For complex double. */ 93#define CCALL_NRET_GPR 4 /* For complex double. */
76#define CCALL_NRET_FPR 1 94#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1)
77#define CCALL_SPS_EXTRA 4 95#define CCALL_SPS_EXTRA 4
78#define CCALL_SPS_FREE 0 96#define CCALL_SPS_FREE 0
79 97
80typedef intptr_t GPRArg; 98typedef intptr_t GPRArg;
81typedef double FPRArg; 99typedef double FPRArg;
82 100
83#elif LJ_TARGET_PPCSPE 101#elif LJ_TARGET_MIPS32
84 102
85#define CCALL_NARG_GPR 8 103#define CCALL_NARG_GPR 4
86#define CCALL_NARG_FPR 0 104#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 2)
87#define CCALL_NRET_GPR 4 /* For softfp complex double. */ 105#define CCALL_NRET_GPR (LJ_ABI_SOFTFP ? 4 : 2)
88#define CCALL_NRET_FPR 0 106#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2)
89#define CCALL_SPS_FREE 0 /* NYI */ 107#define CCALL_SPS_EXTRA 7
108#define CCALL_SPS_FREE 1
90 109
91typedef intptr_t GPRArg; 110typedef intptr_t GPRArg;
111typedef union FPRArg {
112 double d;
113 struct { LJ_ENDIAN_LOHI(float f; , float g;) };
114} FPRArg;
92 115
93#elif LJ_TARGET_MIPS 116#elif LJ_TARGET_MIPS64
94 117
95#define CCALL_NARG_GPR 4 118/* FP args are positional and overlay the GPR array. */
96#define CCALL_NARG_FPR 2 119#define CCALL_NARG_GPR 8
120#define CCALL_NARG_FPR 0
97#define CCALL_NRET_GPR 2 121#define CCALL_NRET_GPR 2
98#define CCALL_NRET_FPR 2 122#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2)
99#define CCALL_SPS_EXTRA 7 123#define CCALL_SPS_EXTRA 3
100#define CCALL_SPS_FREE 1 124#define CCALL_SPS_FREE 1
101 125
102typedef intptr_t GPRArg; 126typedef intptr_t GPRArg;
@@ -118,6 +142,9 @@ typedef union FPRArg {
118#ifndef CCALL_ALIGN_STACKARG 142#ifndef CCALL_ALIGN_STACKARG
119#define CCALL_ALIGN_STACKARG 1 143#define CCALL_ALIGN_STACKARG 1
120#endif 144#endif
145#ifndef CCALL_PACK_STACKARG
146#define CCALL_PACK_STACKARG 0
147#endif
121#ifndef CCALL_ALIGN_CALLSTATE 148#ifndef CCALL_ALIGN_CALLSTATE
122#define CCALL_ALIGN_CALLSTATE 8 149#define CCALL_ALIGN_CALLSTATE 8
123#endif 150#endif
@@ -131,20 +158,23 @@ typedef union FPRArg {
131LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR); 158LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR);
132LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR); 159LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR);
133 160
134#define CCALL_MAXSTACK 32 161#define CCALL_NUM_STACK 31
162#define CCALL_SIZE_STACK (CCALL_NUM_STACK * CTSIZE_PTR)
135 163
136/* -- C call state -------------------------------------------------------- */ 164/* -- C call state -------------------------------------------------------- */
137 165
138typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { 166typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
139 void (*func)(void); /* Pointer to called function. */ 167 void (*func)(void); /* Pointer to called function. */
140 uint32_t spadj; /* Stack pointer adjustment. */ 168 uint32_t spadj; /* Stack pointer adjustment. */
141 uint8_t nsp; /* Number of stack slots. */ 169 uint8_t nsp; /* Number of bytes on stack. */
142 uint8_t retref; /* Return value by reference. */ 170 uint8_t retref; /* Return value by reference. */
143#if LJ_TARGET_X64 171#if LJ_TARGET_X64
144 uint8_t ngpr; /* Number of arguments in GPRs. */ 172 uint8_t ngpr; /* Number of arguments in GPRs. */
145 uint8_t nfpr; /* Number of arguments in FPRs. */ 173 uint8_t nfpr; /* Number of arguments in FPRs. */
146#elif LJ_TARGET_X86 174#elif LJ_TARGET_X86
147 uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ 175 uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */
176#elif LJ_TARGET_ARM64
177 void *retp; /* Aggregate return pointer in x8. */
148#elif LJ_TARGET_PPC 178#elif LJ_TARGET_PPC
149 uint8_t nfpr; /* Number of arguments in FPRs. */ 179 uint8_t nfpr; /* Number of arguments in FPRs. */
150#endif 180#endif
@@ -155,7 +185,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
155 FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */ 185 FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */
156#endif 186#endif
157 GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */ 187 GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */
158 GPRArg stack[CCALL_MAXSTACK]; /* Stack slots. */ 188 GPRArg stack[CCALL_NUM_STACK]; /* Stack slots. */
159} CCallState; 189} CCallState;
160 190
161/* -- C call handling ----------------------------------------------------- */ 191/* -- C call handling ----------------------------------------------------- */
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index 363fef45..17d26b52 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -27,7 +27,7 @@
27 27
28#if LJ_OS_NOJIT 28#if LJ_OS_NOJIT
29 29
30/* Disabled callback support. */ 30/* Callbacks disabled. */
31#define CALLBACK_SLOT2OFS(slot) (0*(slot)) 31#define CALLBACK_SLOT2OFS(slot) (0*(slot))
32#define CALLBACK_OFS2SLOT(ofs) (0*(ofs)) 32#define CALLBACK_OFS2SLOT(ofs) (0*(ofs))
33#define CALLBACK_MAX_SLOT 0 33#define CALLBACK_MAX_SLOT 0
@@ -35,7 +35,7 @@
35#elif LJ_TARGET_X86ORX64 35#elif LJ_TARGET_X86ORX64
36 36
37#define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0) 37#define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0)
38#define CALLBACK_MCODE_GROUP (-2+1+2+5+(LJ_64 ? 6 : 5)) 38#define CALLBACK_MCODE_GROUP (-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5))
39 39
40#define CALLBACK_SLOT2OFS(slot) \ 40#define CALLBACK_SLOT2OFS(slot) \
41 (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot)) 41 (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot))
@@ -54,23 +54,22 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
54#elif LJ_TARGET_ARM 54#elif LJ_TARGET_ARM
55 55
56#define CALLBACK_MCODE_HEAD 32 56#define CALLBACK_MCODE_HEAD 32
57#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) 57
58#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) 58#elif LJ_TARGET_ARM64
59#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) 59
60#define CALLBACK_MCODE_HEAD 32
60 61
61#elif LJ_TARGET_PPC 62#elif LJ_TARGET_PPC
62 63
63#define CALLBACK_MCODE_HEAD 24 64#define CALLBACK_MCODE_HEAD 24
64#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
65#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
66#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
67 65
68#elif LJ_TARGET_MIPS 66#elif LJ_TARGET_MIPS32
69 67
70#define CALLBACK_MCODE_HEAD 24 68#define CALLBACK_MCODE_HEAD 20
71#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) 69
72#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) 70#elif LJ_TARGET_MIPS64
73#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) 71
72#define CALLBACK_MCODE_HEAD 52
74 73
75#else 74#else
76 75
@@ -81,6 +80,12 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
81 80
82#endif 81#endif
83 82
83#ifndef CALLBACK_SLOT2OFS
84#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
85#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
86#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
87#endif
88
84/* Convert callback slot number to callback function pointer. */ 89/* Convert callback slot number to callback function pointer. */
85static void *callback_slot2ptr(CTState *cts, MSize slot) 90static void *callback_slot2ptr(CTState *cts, MSize slot)
86{ 91{
@@ -102,9 +107,9 @@ MSize lj_ccallback_ptr2slot(CTState *cts, void *p)
102/* Initialize machine code for callback function pointers. */ 107/* Initialize machine code for callback function pointers. */
103#if LJ_OS_NOJIT 108#if LJ_OS_NOJIT
104/* Disabled callback support. */ 109/* Disabled callback support. */
105#define callback_mcode_init(g, p) UNUSED(p) 110#define callback_mcode_init(g, p) (p)
106#elif LJ_TARGET_X86ORX64 111#elif LJ_TARGET_X86ORX64
107static void callback_mcode_init(global_State *g, uint8_t *page) 112static void *callback_mcode_init(global_State *g, uint8_t *page)
108{ 113{
109 uint8_t *p = page; 114 uint8_t *p = page;
110 uint8_t *target = (uint8_t *)(void *)lj_vm_ffi_callback; 115 uint8_t *target = (uint8_t *)(void *)lj_vm_ffi_callback;
@@ -119,8 +124,13 @@ static void callback_mcode_init(global_State *g, uint8_t *page)
119 /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */ 124 /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */
120 *p++ = XI_PUSH + RID_EBP; 125 *p++ = XI_PUSH + RID_EBP;
121 *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8); 126 *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8);
127#if LJ_GC64
128 *p++ = 0x48; *p++ = XI_MOVri | RID_EBP;
129 *(uint64_t *)p = (uint64_t)(g); p += 8;
130#else
122 *p++ = XI_MOVri | RID_EBP; 131 *p++ = XI_MOVri | RID_EBP;
123 *(int32_t *)p = i32ptr(g); p += 4; 132 *(int32_t *)p = i32ptr(g); p += 4;
133#endif
124#if LJ_64 134#if LJ_64
125 /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */ 135 /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */
126 *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; 136 *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP;
@@ -133,10 +143,10 @@ static void callback_mcode_init(global_State *g, uint8_t *page)
133 *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2); 143 *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2);
134 } 144 }
135 } 145 }
136 lua_assert(p - page <= CALLBACK_MCODE_SIZE); 146 return p;
137} 147}
138#elif LJ_TARGET_ARM 148#elif LJ_TARGET_ARM
139static void callback_mcode_init(global_State *g, uint32_t *page) 149static void *callback_mcode_init(global_State *g, uint32_t *page)
140{ 150{
141 uint32_t *p = page; 151 uint32_t *p = page;
142 void *target = (void *)lj_vm_ffi_callback; 152 void *target = (void *)lj_vm_ffi_callback;
@@ -155,10 +165,30 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
155 *p = ARMI_B | ((page-p-2) & 0x00ffffffu); 165 *p = ARMI_B | ((page-p-2) & 0x00ffffffu);
156 p++; 166 p++;
157 } 167 }
158 lua_assert(p - page <= CALLBACK_MCODE_SIZE); 168 return p;
169}
170#elif LJ_TARGET_ARM64
171static void *callback_mcode_init(global_State *g, uint32_t *page)
172{
173 uint32_t *p = page;
174 ASMFunction target = lj_vm_ffi_callback;
175 MSize slot;
176 *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4));
177 *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5));
178 *p++ = A64I_LE(A64I_BR_AUTH | A64F_N(RID_X11));
179 *p++ = A64I_LE(A64I_NOP);
180 ((ASMFunction *)p)[0] = target;
181 ((void **)p)[1] = g;
182 p += 4;
183 for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
184 *p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot));
185 *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu));
186 p++;
187 }
188 return p;
159} 189}
160#elif LJ_TARGET_PPC 190#elif LJ_TARGET_PPC
161static void callback_mcode_init(global_State *g, uint32_t *page) 191static void *callback_mcode_init(global_State *g, uint32_t *page)
162{ 192{
163 uint32_t *p = page; 193 uint32_t *p = page;
164 void *target = (void *)lj_vm_ffi_callback; 194 void *target = (void *)lj_vm_ffi_callback;
@@ -174,30 +204,43 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
174 *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2); 204 *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2);
175 p++; 205 p++;
176 } 206 }
177 lua_assert(p - page <= CALLBACK_MCODE_SIZE); 207 return p;
178} 208}
179#elif LJ_TARGET_MIPS 209#elif LJ_TARGET_MIPS
180static void callback_mcode_init(global_State *g, uint32_t *page) 210static void *callback_mcode_init(global_State *g, uint32_t *page)
181{ 211{
182 uint32_t *p = page; 212 uint32_t *p = page;
183 void *target = (void *)lj_vm_ffi_callback; 213 uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback;
214 uintptr_t ug = (uintptr_t)(void *)g;
184 MSize slot; 215 MSize slot;
185 *p++ = MIPSI_SW | MIPSF_T(RID_R1)|MIPSF_S(RID_SP) | 0; 216#if LJ_TARGET_MIPS32
186 *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (u32ptr(target) >> 16); 217 *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 16);
187 *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (u32ptr(g) >> 16); 218 *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 16);
188 *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) |(u32ptr(target)&0xffff); 219#else
220 *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 48);
221 *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 48);
222 *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 32) & 0xffff);
223 *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 32) & 0xffff);
224 *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16);
225 *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16);
226 *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 16) & 0xffff);
227 *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 16) & 0xffff);
228 *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16);
229 *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16);
230#endif
231 *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | (target & 0xffff);
189 *p++ = MIPSI_JR | MIPSF_S(RID_R3); 232 *p++ = MIPSI_JR | MIPSF_S(RID_R3);
190 *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (u32ptr(g)&0xffff); 233 *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (ug & 0xffff);
191 for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { 234 for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
192 *p = MIPSI_B | ((page-p-1) & 0x0000ffffu); 235 *p = MIPSI_B | ((page-p-1) & 0x0000ffffu);
193 p++; 236 p++;
194 *p++ = MIPSI_LI | MIPSF_T(RID_R1) | slot; 237 *p++ = MIPSI_LI | MIPSF_T(RID_R1) | slot;
195 } 238 }
196 lua_assert(p - page <= CALLBACK_MCODE_SIZE); 239 return p;
197} 240}
198#else 241#else
199/* Missing support for this architecture. */ 242/* Missing support for this architecture. */
200#define callback_mcode_init(g, p) UNUSED(p) 243#define callback_mcode_init(g, p) (p)
201#endif 244#endif
202 245
203/* -- Machine code management --------------------------------------------- */ 246/* -- Machine code management --------------------------------------------- */
@@ -213,6 +256,11 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
213#ifndef MAP_ANONYMOUS 256#ifndef MAP_ANONYMOUS
214#define MAP_ANONYMOUS MAP_ANON 257#define MAP_ANONYMOUS MAP_ANON
215#endif 258#endif
259#ifdef PROT_MPROTECT
260#define CCPROT_CREATE (PROT_MPROTECT(PROT_EXEC))
261#else
262#define CCPROT_CREATE 0
263#endif
216 264
217#endif 265#endif
218 266
@@ -220,15 +268,15 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
220static void callback_mcode_new(CTState *cts) 268static void callback_mcode_new(CTState *cts)
221{ 269{
222 size_t sz = (size_t)CALLBACK_MCODE_SIZE; 270 size_t sz = (size_t)CALLBACK_MCODE_SIZE;
223 void *p; 271 void *p, *pe;
224 if (CALLBACK_MAX_SLOT == 0) 272 if (CALLBACK_MAX_SLOT == 0)
225 lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); 273 lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
226#if LJ_TARGET_WINDOWS 274#if LJ_TARGET_WINDOWS
227 p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); 275 p = LJ_WIN_VALLOC(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
228 if (!p) 276 if (!p)
229 lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); 277 lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
230#elif LJ_TARGET_POSIX 278#elif LJ_TARGET_POSIX
231 p = mmap(NULL, sz, (PROT_READ|PROT_WRITE), MAP_PRIVATE|MAP_ANONYMOUS, 279 p = mmap(NULL, sz, (PROT_READ|PROT_WRITE|CCPROT_CREATE), MAP_PRIVATE|MAP_ANONYMOUS,
232 -1, 0); 280 -1, 0);
233 if (p == MAP_FAILED) 281 if (p == MAP_FAILED)
234 lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); 282 lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
@@ -237,12 +285,15 @@ static void callback_mcode_new(CTState *cts)
237 p = lj_mem_new(cts->L, sz); 285 p = lj_mem_new(cts->L, sz);
238#endif 286#endif
239 cts->cb.mcode = p; 287 cts->cb.mcode = p;
240 callback_mcode_init(cts->g, p); 288 pe = callback_mcode_init(cts->g, p);
289 UNUSED(pe);
290 lj_assertCTS((size_t)((char *)pe - (char *)p) <= sz,
291 "miscalculated CALLBACK_MAX_SLOT");
241 lj_mcode_sync(p, (char *)p + sz); 292 lj_mcode_sync(p, (char *)p + sz);
242#if LJ_TARGET_WINDOWS 293#if LJ_TARGET_WINDOWS
243 { 294 {
244 DWORD oprot; 295 DWORD oprot;
245 VirtualProtect(p, sz, PAGE_EXECUTE_READ, &oprot); 296 LJ_WIN_VPROTECT(p, sz, PAGE_EXECUTE_READ, &oprot);
246 } 297 }
247#elif LJ_TARGET_POSIX 298#elif LJ_TARGET_POSIX
248 mprotect(p, sz, (PROT_READ|PROT_EXEC)); 299 mprotect(p, sz, (PROT_READ|PROT_EXEC));
@@ -351,33 +402,78 @@ void lj_ccallback_mcode_free(CTState *cts)
351 goto done; \ 402 goto done; \
352 } CALLBACK_HANDLE_REGARG_FP2 403 } CALLBACK_HANDLE_REGARG_FP2
353 404
354#elif LJ_TARGET_PPC 405#elif LJ_TARGET_ARM64
355 406
356#define CALLBACK_HANDLE_REGARG \ 407#define CALLBACK_HANDLE_REGARG \
357 if (isfp) { \ 408 if (isfp) { \
358 if (nfpr + 1 <= CCALL_NARG_FPR) { \ 409 if (nfpr + n <= CCALL_NARG_FPR) { \
359 sp = &cts->cb.fpr[nfpr++]; \ 410 sp = &cts->cb.fpr[nfpr]; \
360 cta = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ 411 nfpr += n; \
361 goto done; \ 412 goto done; \
413 } else { \
414 nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \
362 } \ 415 } \
363 } else { /* Try to pass argument in GPRs. */ \ 416 } else { \
364 if (n > 1) { \ 417 if (!LJ_TARGET_OSX && n > 1) \
365 lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \ 418 ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
366 ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
367 } \
368 if (ngpr + n <= maxgpr) { \ 419 if (ngpr + n <= maxgpr) { \
369 sp = &cts->cb.gpr[ngpr]; \ 420 sp = &cts->cb.gpr[ngpr]; \
370 ngpr += n; \ 421 ngpr += n; \
371 goto done; \ 422 goto done; \
423 } else { \
424 ngpr = CCALL_NARG_GPR; /* Prevent reordering. */ \
372 } \ 425 } \
373 } 426 }
374 427
428#elif LJ_TARGET_PPC
429
430#define CALLBACK_HANDLE_GPR \
431 if (n > 1) { \
432 lj_assertCTS(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */ \
433 ctype_isinteger(cta->info)) && n == 2, /* int64_t. */ \
434 "bad GPR type"); \
435 ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
436 } \
437 if (ngpr + n <= maxgpr) { \
438 sp = &cts->cb.gpr[ngpr]; \
439 ngpr += n; \
440 goto done; \
441 }
442
443#if LJ_ABI_SOFTFP
444#define CALLBACK_HANDLE_REGARG \
445 CALLBACK_HANDLE_GPR \
446 UNUSED(isfp);
447#else
448#define CALLBACK_HANDLE_REGARG \
449 if (isfp) { \
450 if (nfpr + 1 <= CCALL_NARG_FPR) { \
451 sp = &cts->cb.fpr[nfpr++]; \
452 cta = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
453 goto done; \
454 } \
455 } else { /* Try to pass argument in GPRs. */ \
456 CALLBACK_HANDLE_GPR \
457 }
458#endif
459
460#if !LJ_ABI_SOFTFP
375#define CALLBACK_HANDLE_RET \ 461#define CALLBACK_HANDLE_RET \
376 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ 462 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
377 *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ 463 *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */
464#endif
378 465
379#elif LJ_TARGET_MIPS 466#elif LJ_TARGET_MIPS32
380 467
468#define CALLBACK_HANDLE_GPR \
469 if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
470 if (ngpr + n <= maxgpr) { \
471 sp = &cts->cb.gpr[ngpr]; \
472 ngpr += n; \
473 goto done; \
474 }
475
476#if !LJ_ABI_SOFTFP /* MIPS32 hard-float */
381#define CALLBACK_HANDLE_REGARG \ 477#define CALLBACK_HANDLE_REGARG \
382 if (isfp && nfpr < CCALL_NARG_FPR) { /* Try to pass argument in FPRs. */ \ 478 if (isfp && nfpr < CCALL_NARG_FPR) { /* Try to pass argument in FPRs. */ \
383 sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \ 479 sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \
@@ -385,13 +481,36 @@ void lj_ccallback_mcode_free(CTState *cts)
385 goto done; \ 481 goto done; \
386 } else { /* Try to pass argument in GPRs. */ \ 482 } else { /* Try to pass argument in GPRs. */ \
387 nfpr = CCALL_NARG_FPR; \ 483 nfpr = CCALL_NARG_FPR; \
388 if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ 484 CALLBACK_HANDLE_GPR \
389 if (ngpr + n <= maxgpr) { \
390 sp = &cts->cb.gpr[ngpr]; \
391 ngpr += n; \
392 goto done; \
393 } \
394 } 485 }
486#else /* MIPS32 soft-float */
487#define CALLBACK_HANDLE_REGARG \
488 CALLBACK_HANDLE_GPR \
489 UNUSED(isfp);
490#endif
491
492#define CALLBACK_HANDLE_RET \
493 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
494 ((float *)dp)[1] = *(float *)dp;
495
496#elif LJ_TARGET_MIPS64
497
498#if !LJ_ABI_SOFTFP /* MIPS64 hard-float */
499#define CALLBACK_HANDLE_REGARG \
500 if (ngpr + n <= maxgpr) { \
501 sp = isfp ? (void*) &cts->cb.fpr[ngpr] : (void*) &cts->cb.gpr[ngpr]; \
502 ngpr += n; \
503 goto done; \
504 }
505#else /* MIPS64 soft-float */
506#define CALLBACK_HANDLE_REGARG \
507 if (ngpr + n <= maxgpr) { \
508 UNUSED(isfp); \
509 sp = (void*) &cts->cb.gpr[ngpr]; \
510 ngpr += n; \
511 goto done; \
512 }
513#endif
395 514
396#define CALLBACK_HANDLE_RET \ 515#define CALLBACK_HANDLE_RET \
397 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ 516 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
@@ -411,6 +530,7 @@ static void callback_conv_args(CTState *cts, lua_State *L)
411 int gcsteps = 0; 530 int gcsteps = 0;
412 CType *ct; 531 CType *ct;
413 GCfunc *fn; 532 GCfunc *fn;
533 int fntp;
414 MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR; 534 MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR;
415#if CCALL_NARG_FPR 535#if CCALL_NARG_FPR
416 MSize nfpr = 0; 536 MSize nfpr = 0;
@@ -421,18 +541,27 @@ static void callback_conv_args(CTState *cts, lua_State *L)
421 541
422 if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) { 542 if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) {
423 ct = ctype_get(cts, id); 543 ct = ctype_get(cts, id);
424 rid = ctype_cid(ct->info); 544 rid = ctype_cid(ct->info); /* Return type. x86: +(spadj<<16). */
425 fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot)); 545 fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot));
546 fntp = LJ_TFUNC;
426 } else { /* Must set up frame first, before throwing the error. */ 547 } else { /* Must set up frame first, before throwing the error. */
427 ct = NULL; 548 ct = NULL;
428 rid = 0; 549 rid = 0;
429 fn = (GCfunc *)L; 550 fn = (GCfunc *)L;
551 fntp = LJ_TTHREAD;
552 }
553 /* Continuation returns from callback. */
554 if (LJ_FR2) {
555 (o++)->u64 = LJ_CONT_FFI_CALLBACK;
556 (o++)->u64 = rid;
557 } else {
558 o->u32.lo = LJ_CONT_FFI_CALLBACK;
559 o->u32.hi = rid;
560 o++;
430 } 561 }
431 o->u32.lo = LJ_CONT_FFI_CALLBACK; /* Continuation returns from callback. */ 562 setframe_gc(o, obj2gco(fn), fntp);
432 o->u32.hi = rid; /* Return type. x86: +(spadj<<16). */ 563 if (LJ_FR2) o++;
433 o++; 564 setframe_ftsz(o, ((char *)(o+1) - (char *)L->base) + FRAME_CONT);
434 setframe_gc(o, obj2gco(fn));
435 setframe_ftsz(o, (int)((char *)(o+1) - (char *)L->base) + FRAME_CONT);
436 L->top = L->base = ++o; 565 L->top = L->base = ++o;
437 if (!ct) 566 if (!ct)
438 lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK); 567 lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK);
@@ -459,7 +588,7 @@ static void callback_conv_args(CTState *cts, lua_State *L)
459 CTSize sz; 588 CTSize sz;
460 int isfp; 589 int isfp;
461 MSize n; 590 MSize n;
462 lua_assert(ctype_isfield(ctf->info)); 591 lj_assertCTS(ctype_isfield(ctf->info), "field expected");
463 cta = ctype_rawchild(cts, ctf); 592 cta = ctype_rawchild(cts, ctf);
464 isfp = ctype_isfp(cta->info); 593 isfp = ctype_isfp(cta->info);
465 sz = (cta->size + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); 594 sz = (cta->size + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
@@ -474,7 +603,11 @@ static void callback_conv_args(CTState *cts, lua_State *L)
474 nsp += n; 603 nsp += n;
475 604
476 done: 605 done:
477 if (LJ_BE && cta->size < CTSIZE_PTR) 606 if (LJ_BE && cta->size < CTSIZE_PTR
607#if LJ_TARGET_MIPS64
608 && !(isfp && nsp)
609#endif
610 )
478 sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size); 611 sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size);
479 gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp); 612 gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp);
480 } 613 }
@@ -483,9 +616,14 @@ static void callback_conv_args(CTState *cts, lua_State *L)
483 L->top = o; 616 L->top = o;
484#if LJ_TARGET_X86 617#if LJ_TARGET_X86
485 /* Store stack adjustment for returns from non-cdecl callbacks. */ 618 /* Store stack adjustment for returns from non-cdecl callbacks. */
486 if (ctype_cconv(ct->info) != CTCC_CDECL) 619 if (ctype_cconv(ct->info) != CTCC_CDECL) {
620#if LJ_FR2
621 (L->base-3)->u64 |= (nsp << (16+2));
622#else
487 (L->base-2)->u32.hi |= (nsp << (16+2)); 623 (L->base-2)->u32.hi |= (nsp << (16+2));
488#endif 624#endif
625 }
626#endif
489 while (gcsteps-- > 0) 627 while (gcsteps-- > 0)
490 lj_gc_check(L); 628 lj_gc_check(L);
491} 629}
@@ -493,7 +631,11 @@ static void callback_conv_args(CTState *cts, lua_State *L)
493/* Convert Lua object to callback result. */ 631/* Convert Lua object to callback result. */
494static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) 632static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
495{ 633{
634#if LJ_FR2
635 CType *ctr = ctype_raw(cts, (uint16_t)(L->base-3)->u64);
636#else
496 CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi); 637 CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi);
638#endif
497#if LJ_TARGET_X86 639#if LJ_TARGET_X86
498 cts->cb.gpr[2] = 0; 640 cts->cb.gpr[2] = 0;
499#endif 641#endif
@@ -503,6 +645,10 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
503 if (ctype_isfp(ctr->info)) 645 if (ctype_isfp(ctr->info))
504 dp = (uint8_t *)&cts->cb.fpr[0]; 646 dp = (uint8_t *)&cts->cb.fpr[0];
505#endif 647#endif
648#if LJ_TARGET_ARM64 && LJ_BE
649 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float))
650 dp = (uint8_t *)&cts->cb.fpr[0].f[1];
651#endif
506 lj_cconv_ct_tv(cts, ctr, dp, o, 0); 652 lj_cconv_ct_tv(cts, ctr, dp, o, 0);
507#ifdef CALLBACK_HANDLE_RET 653#ifdef CALLBACK_HANDLE_RET
508 CALLBACK_HANDLE_RET 654 CALLBACK_HANDLE_RET
@@ -516,6 +662,12 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
516 *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : 662 *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
517 (int32_t)*(int16_t *)dp; 663 (int32_t)*(int16_t *)dp;
518 } 664 }
665#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
666 /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
667 if (ctr->size <= 4 &&
668 (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))
669 *(int64_t *)dp = (int64_t)*(int32_t *)dp;
670#endif
519#if LJ_TARGET_X86 671#if LJ_TARGET_X86
520 if (ctype_isfp(ctr->info)) 672 if (ctype_isfp(ctr->info))
521 cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2; 673 cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2;
@@ -528,8 +680,8 @@ lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf)
528{ 680{
529 lua_State *L = cts->L; 681 lua_State *L = cts->L;
530 global_State *g = cts->g; 682 global_State *g = cts->g;
531 lua_assert(L != NULL); 683 lj_assertG(L != NULL, "uninitialized cts->L in callback");
532 if (gcref(g->jit_L)) { 684 if (tvref(g->jit_base)) {
533 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK)); 685 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK));
534 if (g->panic) g->panic(L); 686 if (g->panic) g->panic(L);
535 exit(EXIT_FAILURE); 687 exit(EXIT_FAILURE);
@@ -562,9 +714,9 @@ void LJ_FASTCALL lj_ccallback_leave(CTState *cts, TValue *o)
562 } 714 }
563 callback_conv_result(cts, L, o); 715 callback_conv_result(cts, L, o);
564 /* Finally drop C frame and continuation frame. */ 716 /* Finally drop C frame and continuation frame. */
565 L->cframe = cframe_prev(L->cframe); 717 L->top -= 2+2*LJ_FR2;
566 L->top -= 2;
567 L->base = obase; 718 L->base = obase;
719 L->cframe = cframe_prev(L->cframe);
568 cts->cb.slot = 0; /* Blacklist C function that called the callback. */ 720 cts->cb.slot = 0; /* Blacklist C function that called the callback. */
569} 721}
570 722
@@ -613,7 +765,7 @@ static CType *callback_checkfunc(CTState *cts, CType *ct)
613 CType *ctf = ctype_get(cts, fid); 765 CType *ctf = ctype_get(cts, fid);
614 if (!ctype_isattrib(ctf->info)) { 766 if (!ctype_isattrib(ctf->info)) {
615 CType *cta; 767 CType *cta;
616 lua_assert(ctype_isfield(ctf->info)); 768 lj_assertCTS(ctype_isfield(ctf->info), "field expected");
617 cta = ctype_rawchild(cts, ctf); 769 cta = ctype_rawchild(cts, ctf);
618 if (!(ctype_isenum(cta->info) || ctype_isptr(cta->info) || 770 if (!(ctype_isenum(cta->info) || ctype_isptr(cta->info) ||
619 (ctype_isnum(cta->info) && cta->size <= 8)) || 771 (ctype_isnum(cta->info) && cta->size <= 8)) ||
diff --git a/src/lj_cconv.c b/src/lj_cconv.c
index a5a4e69b..419a8f45 100644
--- a/src/lj_cconv.c
+++ b/src/lj_cconv.c
@@ -8,6 +8,7 @@
8#if LJ_HASFFI 8#if LJ_HASFFI
9 9
10#include "lj_err.h" 10#include "lj_err.h"
11#include "lj_buf.h"
11#include "lj_tab.h" 12#include "lj_tab.h"
12#include "lj_ctype.h" 13#include "lj_ctype.h"
13#include "lj_cdata.h" 14#include "lj_cdata.h"
@@ -122,19 +123,25 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s,
122 CTInfo dinfo = d->info, sinfo = s->info; 123 CTInfo dinfo = d->info, sinfo = s->info;
123 void *tmpptr; 124 void *tmpptr;
124 125
125 lua_assert(!ctype_isenum(dinfo) && !ctype_isenum(sinfo)); 126 lj_assertCTS(!ctype_isenum(dinfo) && !ctype_isenum(sinfo),
126 lua_assert(!ctype_isattrib(dinfo) && !ctype_isattrib(sinfo)); 127 "unresolved enum");
128 lj_assertCTS(!ctype_isattrib(dinfo) && !ctype_isattrib(sinfo),
129 "unstripped attribute");
127 130
128 if (ctype_type(dinfo) > CT_MAYCONVERT || ctype_type(sinfo) > CT_MAYCONVERT) 131 if (ctype_type(dinfo) > CT_MAYCONVERT || ctype_type(sinfo) > CT_MAYCONVERT)
129 goto err_conv; 132 goto err_conv;
130 133
131 /* Some basic sanity checks. */ 134 /* Some basic sanity checks. */
132 lua_assert(!ctype_isnum(dinfo) || dsize > 0); 135 lj_assertCTS(!ctype_isnum(dinfo) || dsize > 0, "bad size for number type");
133 lua_assert(!ctype_isnum(sinfo) || ssize > 0); 136 lj_assertCTS(!ctype_isnum(sinfo) || ssize > 0, "bad size for number type");
134 lua_assert(!ctype_isbool(dinfo) || dsize == 1 || dsize == 4); 137 lj_assertCTS(!ctype_isbool(dinfo) || dsize == 1 || dsize == 4,
135 lua_assert(!ctype_isbool(sinfo) || ssize == 1 || ssize == 4); 138 "bad size for bool type");
136 lua_assert(!ctype_isinteger(dinfo) || (1u<<lj_fls(dsize)) == dsize); 139 lj_assertCTS(!ctype_isbool(sinfo) || ssize == 1 || ssize == 4,
137 lua_assert(!ctype_isinteger(sinfo) || (1u<<lj_fls(ssize)) == ssize); 140 "bad size for bool type");
141 lj_assertCTS(!ctype_isinteger(dinfo) || (1u<<lj_fls(dsize)) == dsize,
142 "bad size for integer type");
143 lj_assertCTS(!ctype_isinteger(sinfo) || (1u<<lj_fls(ssize)) == ssize,
144 "bad size for integer type");
138 145
139 switch (cconv_idx2(dinfo, sinfo)) { 146 switch (cconv_idx2(dinfo, sinfo)) {
140 /* Destination is a bool. */ 147 /* Destination is a bool. */
@@ -357,7 +364,7 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s,
357 if ((flags & CCF_CAST) || (d->info & CTF_VLA) || d != s) 364 if ((flags & CCF_CAST) || (d->info & CTF_VLA) || d != s)
358 goto err_conv; /* Must be exact same type. */ 365 goto err_conv; /* Must be exact same type. */
359copyval: /* Copy value. */ 366copyval: /* Copy value. */
360 lua_assert(dsize == ssize); 367 lj_assertCTS(dsize == ssize, "value copy with different sizes");
361 memcpy(dp, sp, dsize); 368 memcpy(dp, sp, dsize);
362 break; 369 break;
363 370
@@ -389,7 +396,7 @@ int lj_cconv_tv_ct(CTState *cts, CType *s, CTypeID sid,
389 lj_cconv_ct_ct(cts, ctype_get(cts, CTID_DOUBLE), s, 396 lj_cconv_ct_ct(cts, ctype_get(cts, CTID_DOUBLE), s,
390 (uint8_t *)&o->n, sp, 0); 397 (uint8_t *)&o->n, sp, 0);
391 /* Numbers are NOT canonicalized here! Beware of uninitialized data. */ 398 /* Numbers are NOT canonicalized here! Beware of uninitialized data. */
392 lua_assert(tvisnum(o)); 399 lj_assertCTS(tvisnum(o), "non-canonical NaN passed");
393 } 400 }
394 } else { 401 } else {
395 uint32_t b = s->size == 1 ? (*sp != 0) : (*(int *)sp != 0); 402 uint32_t b = s->size == 1 ? (*sp != 0) : (*(int *)sp != 0);
@@ -406,7 +413,7 @@ int lj_cconv_tv_ct(CTState *cts, CType *s, CTypeID sid,
406 CTSize sz; 413 CTSize sz;
407 copyval: /* Copy value. */ 414 copyval: /* Copy value. */
408 sz = s->size; 415 sz = s->size;
409 lua_assert(sz != CTSIZE_INVALID); 416 lj_assertCTS(sz != CTSIZE_INVALID, "value copy with invalid size");
410 /* Attributes are stripped, qualifiers are kept (but mostly ignored). */ 417 /* Attributes are stripped, qualifiers are kept (but mostly ignored). */
411 cd = lj_cdata_new(cts, ctype_typeid(cts, s), sz); 418 cd = lj_cdata_new(cts, ctype_typeid(cts, s), sz);
412 setcdataV(cts->L, o, cd); 419 setcdataV(cts->L, o, cd);
@@ -421,19 +428,22 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp)
421 CTInfo info = s->info; 428 CTInfo info = s->info;
422 CTSize pos, bsz; 429 CTSize pos, bsz;
423 uint32_t val; 430 uint32_t val;
424 lua_assert(ctype_isbitfield(info)); 431 lj_assertCTS(ctype_isbitfield(info), "bitfield expected");
425 /* NYI: packed bitfields may cause misaligned reads. */ 432 /* NYI: packed bitfields may cause misaligned reads. */
426 switch (ctype_bitcsz(info)) { 433 switch (ctype_bitcsz(info)) {
427 case 4: val = *(uint32_t *)sp; break; 434 case 4: val = *(uint32_t *)sp; break;
428 case 2: val = *(uint16_t *)sp; break; 435 case 2: val = *(uint16_t *)sp; break;
429 case 1: val = *(uint8_t *)sp; break; 436 case 1: val = *(uint8_t *)sp; break;
430 default: lua_assert(0); val = 0; break; 437 default:
438 lj_assertCTS(0, "bad bitfield container size %d", ctype_bitcsz(info));
439 val = 0;
440 break;
431 } 441 }
432 /* Check if a packed bitfield crosses a container boundary. */ 442 /* Check if a packed bitfield crosses a container boundary. */
433 pos = ctype_bitpos(info); 443 pos = ctype_bitpos(info);
434 bsz = ctype_bitbsz(info); 444 bsz = ctype_bitbsz(info);
435 lua_assert(pos < 8*ctype_bitcsz(info)); 445 lj_assertCTS(pos < 8*ctype_bitcsz(info), "bad bitfield position");
436 lua_assert(bsz > 0 && bsz <= 8*ctype_bitcsz(info)); 446 lj_assertCTS(bsz > 0 && bsz <= 8*ctype_bitcsz(info), "bad bitfield size");
437 if (pos + bsz > 8*ctype_bitcsz(info)) 447 if (pos + bsz > 8*ctype_bitcsz(info))
438 lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT); 448 lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT);
439 if (!(info & CTF_BOOL)) { 449 if (!(info & CTF_BOOL)) {
@@ -448,8 +458,10 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp)
448 setintV(o, (int32_t)val); 458 setintV(o, (int32_t)val);
449 } 459 }
450 } else { 460 } else {
451 lua_assert(bsz == 1); 461 uint32_t b = (val >> pos) & 1;
452 setboolV(o, (val >> pos) & 1); 462 lj_assertCTS(bsz == 1, "bad bool bitfield size");
463 setboolV(o, b);
464 setboolV(&cts->g->tmptv2, b); /* Remember for trace recorder. */
453 } 465 }
454 return 0; /* No GC step needed. */ 466 return 0; /* No GC step needed. */
455} 467}
@@ -551,7 +563,7 @@ void lj_cconv_ct_tv(CTState *cts, CType *d,
551 sid = cdataV(o)->ctypeid; 563 sid = cdataV(o)->ctypeid;
552 s = ctype_get(cts, sid); 564 s = ctype_get(cts, sid);
553 if (ctype_isref(s->info)) { /* Resolve reference for value. */ 565 if (ctype_isref(s->info)) { /* Resolve reference for value. */
554 lua_assert(s->size == CTSIZE_PTR); 566 lj_assertCTS(s->size == CTSIZE_PTR, "ref is not pointer-sized");
555 sp = *(void **)sp; 567 sp = *(void **)sp;
556 sid = ctype_cid(s->info); 568 sid = ctype_cid(s->info);
557 } 569 }
@@ -571,7 +583,7 @@ void lj_cconv_ct_tv(CTState *cts, CType *d,
571 CType *cct = lj_ctype_getfield(cts, d, str, &ofs); 583 CType *cct = lj_ctype_getfield(cts, d, str, &ofs);
572 if (!cct || !ctype_isconstval(cct->info)) 584 if (!cct || !ctype_isconstval(cct->info))
573 goto err_conv; 585 goto err_conv;
574 lua_assert(d->size == 4); 586 lj_assertCTS(d->size == 4, "only 32 bit enum supported"); /* NYI */
575 sp = (uint8_t *)&cct->size; 587 sp = (uint8_t *)&cct->size;
576 sid = ctype_cid(cct->info); 588 sid = ctype_cid(cct->info);
577 } else if (ctype_isrefarray(d->info)) { /* Copy string to array. */ 589 } else if (ctype_isrefarray(d->info)) { /* Copy string to array. */
@@ -610,8 +622,10 @@ void lj_cconv_ct_tv(CTState *cts, CType *d,
610 tmpptr = uddata(ud); 622 tmpptr = uddata(ud);
611 if (ud->udtype == UDTYPE_IO_FILE) 623 if (ud->udtype == UDTYPE_IO_FILE)
612 tmpptr = *(void **)tmpptr; 624 tmpptr = *(void **)tmpptr;
625 else if (ud->udtype == UDTYPE_BUFFER)
626 tmpptr = ((SBufExt *)tmpptr)->r;
613 } else if (tvislightud(o)) { 627 } else if (tvislightud(o)) {
614 tmpptr = lightudV(o); 628 tmpptr = lightudV(cts->g, o);
615 } else if (tvisfunc(o)) { 629 } else if (tvisfunc(o)) {
616 void *p = lj_ccallback_new(cts, d, funcV(o)); 630 void *p = lj_ccallback_new(cts, d, funcV(o));
617 if (p) { 631 if (p) {
@@ -635,10 +649,10 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o)
635 CTInfo info = d->info; 649 CTInfo info = d->info;
636 CTSize pos, bsz; 650 CTSize pos, bsz;
637 uint32_t val, mask; 651 uint32_t val, mask;
638 lua_assert(ctype_isbitfield(info)); 652 lj_assertCTS(ctype_isbitfield(info), "bitfield expected");
639 if ((info & CTF_BOOL)) { 653 if ((info & CTF_BOOL)) {
640 uint8_t tmpbool; 654 uint8_t tmpbool;
641 lua_assert(ctype_bitbsz(info) == 1); 655 lj_assertCTS(ctype_bitbsz(info) == 1, "bad bool bitfield size");
642 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_BOOL), &tmpbool, o, 0); 656 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_BOOL), &tmpbool, o, 0);
643 val = tmpbool; 657 val = tmpbool;
644 } else { 658 } else {
@@ -647,8 +661,8 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o)
647 } 661 }
648 pos = ctype_bitpos(info); 662 pos = ctype_bitpos(info);
649 bsz = ctype_bitbsz(info); 663 bsz = ctype_bitbsz(info);
650 lua_assert(pos < 8*ctype_bitcsz(info)); 664 lj_assertCTS(pos < 8*ctype_bitcsz(info), "bad bitfield position");
651 lua_assert(bsz > 0 && bsz <= 8*ctype_bitcsz(info)); 665 lj_assertCTS(bsz > 0 && bsz <= 8*ctype_bitcsz(info), "bad bitfield size");
652 /* Check if a packed bitfield crosses a container boundary. */ 666 /* Check if a packed bitfield crosses a container boundary. */
653 if (pos + bsz > 8*ctype_bitcsz(info)) 667 if (pos + bsz > 8*ctype_bitcsz(info))
654 lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT); 668 lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT);
@@ -659,7 +673,9 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o)
659 case 4: *(uint32_t *)dp = (*(uint32_t *)dp & ~mask) | (uint32_t)val; break; 673 case 4: *(uint32_t *)dp = (*(uint32_t *)dp & ~mask) | (uint32_t)val; break;
660 case 2: *(uint16_t *)dp = (*(uint16_t *)dp & ~mask) | (uint16_t)val; break; 674 case 2: *(uint16_t *)dp = (*(uint16_t *)dp & ~mask) | (uint16_t)val; break;
661 case 1: *(uint8_t *)dp = (*(uint8_t *)dp & ~mask) | (uint8_t)val; break; 675 case 1: *(uint8_t *)dp = (*(uint8_t *)dp & ~mask) | (uint8_t)val; break;
662 default: lua_assert(0); break; 676 default:
677 lj_assertCTS(0, "bad bitfield container size %d", ctype_bitcsz(info));
678 break;
663 } 679 }
664} 680}
665 681
diff --git a/src/lj_cconv.h b/src/lj_cconv.h
index 6199448d..9da2c33c 100644
--- a/src/lj_cconv.h
+++ b/src/lj_cconv.h
@@ -27,13 +27,14 @@ enum {
27static LJ_AINLINE uint32_t cconv_idx(CTInfo info) 27static LJ_AINLINE uint32_t cconv_idx(CTInfo info)
28{ 28{
29 uint32_t idx = ((info >> 26) & 15u); /* Dispatch bits. */ 29 uint32_t idx = ((info >> 26) & 15u); /* Dispatch bits. */
30 lua_assert(ctype_type(info) <= CT_MAYCONVERT); 30 lj_assertX(ctype_type(info) <= CT_MAYCONVERT,
31 "cannot convert ctype %08x", info);
31#if LJ_64 32#if LJ_64
32 idx = ((uint32_t)(U64x(f436fff5,fff7f021) >> 4*idx) & 15u); 33 idx = ((uint32_t)(U64x(f436fff5,fff7f021) >> 4*idx) & 15u);
33#else 34#else
34 idx = (((idx < 8 ? 0xfff7f021u : 0xf436fff5) >> 4*(idx & 7u)) & 15u); 35 idx = (((idx < 8 ? 0xfff7f021u : 0xf436fff5) >> 4*(idx & 7u)) & 15u);
35#endif 36#endif
36 lua_assert(idx < 8); 37 lj_assertX(idx < 8, "cannot convert ctype %08x", info);
37 return idx; 38 return idx;
38} 39}
39 40
diff --git a/src/lj_cdata.c b/src/lj_cdata.c
index 32c69829..2879e2a8 100644
--- a/src/lj_cdata.c
+++ b/src/lj_cdata.c
@@ -9,7 +9,6 @@
9 9
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h"
13#include "lj_tab.h" 12#include "lj_tab.h"
14#include "lj_ctype.h" 13#include "lj_ctype.h"
15#include "lj_cconv.h" 14#include "lj_cconv.h"
@@ -27,20 +26,20 @@ GCcdata *lj_cdata_newref(CTState *cts, const void *p, CTypeID id)
27} 26}
28 27
29/* Allocate variable-sized or specially aligned C data object. */ 28/* Allocate variable-sized or specially aligned C data object. */
30GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align) 29GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align)
31{ 30{
32 global_State *g; 31 global_State *g;
33 MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) + 32 MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) +
34 (align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0); 33 (align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0);
35 char *p = lj_mem_newt(cts->L, extra + sz, char); 34 char *p = lj_mem_newt(L, extra + sz, char);
36 uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata); 35 uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata);
37 uintptr_t almask = (1u << align) - 1u; 36 uintptr_t almask = (1u << align) - 1u;
38 GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata)); 37 GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata));
39 lua_assert((char *)cd - p < 65536); 38 lj_assertL((char *)cd - p < 65536, "excessive cdata alignment");
40 cdatav(cd)->offset = (uint16_t)((char *)cd - p); 39 cdatav(cd)->offset = (uint16_t)((char *)cd - p);
41 cdatav(cd)->extra = extra; 40 cdatav(cd)->extra = extra;
42 cdatav(cd)->len = sz; 41 cdatav(cd)->len = sz;
43 g = cts->g; 42 g = G(L);
44 setgcrefr(cd->nextgc, g->gc.root); 43 setgcrefr(cd->nextgc, g->gc.root);
45 setgcref(g->gc.root, obj2gco(cd)); 44 setgcref(g->gc.root, obj2gco(cd));
46 newwhite(g, obj2gco(cd)); 45 newwhite(g, obj2gco(cd));
@@ -50,6 +49,15 @@ GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align)
50 return cd; 49 return cd;
51} 50}
52 51
52/* Allocate arbitrary C data object. */
53GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz, CTInfo info)
54{
55 if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN)
56 return lj_cdata_new(cts, id, sz);
57 else
58 return lj_cdata_newv(cts->L, id, sz, ctype_align(info));
59}
60
53/* Free a C data object. */ 61/* Free a C data object. */
54void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd) 62void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
55{ 63{
@@ -68,29 +76,30 @@ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
68 } else if (LJ_LIKELY(!cdataisv(cd))) { 76 } else if (LJ_LIKELY(!cdataisv(cd))) {
69 CType *ct = ctype_raw(ctype_ctsG(g), cd->ctypeid); 77 CType *ct = ctype_raw(ctype_ctsG(g), cd->ctypeid);
70 CTSize sz = ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR; 78 CTSize sz = ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR;
71 lua_assert(ctype_hassize(ct->info) || ctype_isfunc(ct->info) || 79 lj_assertG(ctype_hassize(ct->info) || ctype_isfunc(ct->info) ||
72 ctype_isextern(ct->info)); 80 ctype_isextern(ct->info), "free of ctype without a size");
73 lj_mem_free(g, cd, sizeof(GCcdata) + sz); 81 lj_mem_free(g, cd, sizeof(GCcdata) + sz);
74 } else { 82 } else {
75 lj_mem_free(g, memcdatav(cd), sizecdatav(cd)); 83 lj_mem_free(g, memcdatav(cd), sizecdatav(cd));
76 } 84 }
77} 85}
78 86
79TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd) 87void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it)
80{ 88{
81 global_State *g = G(L); 89 GCtab *t = tabref(G(L)->gcroot[GCROOT_FFI_FIN]);
82 GCtab *t = ctype_ctsG(g)->finalizer;
83 if (gcref(t->metatable)) { 90 if (gcref(t->metatable)) {
84 /* Add cdata to finalizer table, if still enabled. */ 91 /* Add cdata to finalizer table, if still enabled. */
85 TValue *tv, tmp; 92 TValue *tv, tmp;
86 setcdataV(L, &tmp, cd); 93 setcdataV(L, &tmp, cd);
87 lj_gc_anybarriert(L, t); 94 lj_gc_anybarriert(L, t);
88 tv = lj_tab_set(L, t, &tmp); 95 tv = lj_tab_set(L, t, &tmp);
89 cd->marked |= LJ_GC_CDATA_FIN; 96 if (it == LJ_TNIL) {
90 return tv; 97 setnilV(tv);
91 } else { 98 cd->marked &= ~LJ_GC_CDATA_FIN;
92 /* Otherwise return dummy TValue. */ 99 } else {
93 return &g->tmptv; 100 setgcV(L, tv, obj, it);
101 cd->marked |= LJ_GC_CDATA_FIN;
102 }
94 } 103 }
95} 104}
96 105
@@ -106,7 +115,7 @@ CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, uint8_t **pp,
106 115
107 /* Resolve reference for cdata object. */ 116 /* Resolve reference for cdata object. */
108 if (ctype_isref(ct->info)) { 117 if (ctype_isref(ct->info)) {
109 lua_assert(ct->size == CTSIZE_PTR); 118 lj_assertCTS(ct->size == CTSIZE_PTR, "ref is not pointer-sized");
110 p = *(uint8_t **)p; 119 p = *(uint8_t **)p;
111 ct = ctype_child(cts, ct); 120 ct = ctype_child(cts, ct);
112 } 121 }
@@ -117,13 +126,19 @@ collect_attrib:
117 if (ctype_attrib(ct->info) == CTA_QUAL) *qual |= ct->size; 126 if (ctype_attrib(ct->info) == CTA_QUAL) *qual |= ct->size;
118 ct = ctype_child(cts, ct); 127 ct = ctype_child(cts, ct);
119 } 128 }
120 lua_assert(!ctype_isref(ct->info)); /* Interning rejects refs to refs. */ 129 /* Interning rejects refs to refs. */
130 lj_assertCTS(!ctype_isref(ct->info), "bad ref of ref");
121 131
122 if (tvisint(key)) { 132 if (tvisint(key)) {
123 idx = (ptrdiff_t)intV(key); 133 idx = (ptrdiff_t)intV(key);
124 goto integer_key; 134 goto integer_key;
125 } else if (tvisnum(key)) { /* Numeric key. */ 135 } else if (tvisnum(key)) { /* Numeric key. */
126 idx = LJ_64 ? (ptrdiff_t)numV(key) : (ptrdiff_t)lj_num2int(numV(key)); 136#ifdef _MSC_VER
137 /* Workaround for MSVC bug. */
138 volatile
139#endif
140 lua_Number n = numV(key);
141 idx = LJ_64 ? (ptrdiff_t)n : (ptrdiff_t)lj_num2int(n);
127 integer_key: 142 integer_key:
128 if (ctype_ispointer(ct->info)) { 143 if (ctype_ispointer(ct->info)) {
129 CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */ 144 CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */
@@ -198,7 +213,8 @@ collect_attrib:
198static void cdata_getconst(CTState *cts, TValue *o, CType *ct) 213static void cdata_getconst(CTState *cts, TValue *o, CType *ct)
199{ 214{
200 CType *ctt = ctype_child(cts, ct); 215 CType *ctt = ctype_child(cts, ct);
201 lua_assert(ctype_isinteger(ctt->info) && ctt->size <= 4); 216 lj_assertCTS(ctype_isinteger(ctt->info) && ctt->size <= 4,
217 "only 32 bit const supported"); /* NYI */
202 /* Constants are already zero-extended/sign-extended to 32 bits. */ 218 /* Constants are already zero-extended/sign-extended to 32 bits. */
203 if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0) 219 if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0)
204 setnumV(o, (lua_Number)(uint32_t)ct->size); 220 setnumV(o, (lua_Number)(uint32_t)ct->size);
@@ -219,13 +235,14 @@ int lj_cdata_get(CTState *cts, CType *s, TValue *o, uint8_t *sp)
219 } 235 }
220 236
221 /* Get child type of pointer/array/field. */ 237 /* Get child type of pointer/array/field. */
222 lua_assert(ctype_ispointer(s->info) || ctype_isfield(s->info)); 238 lj_assertCTS(ctype_ispointer(s->info) || ctype_isfield(s->info),
239 "pointer or field expected");
223 sid = ctype_cid(s->info); 240 sid = ctype_cid(s->info);
224 s = ctype_get(cts, sid); 241 s = ctype_get(cts, sid);
225 242
226 /* Resolve reference for field. */ 243 /* Resolve reference for field. */
227 if (ctype_isref(s->info)) { 244 if (ctype_isref(s->info)) {
228 lua_assert(s->size == CTSIZE_PTR); 245 lj_assertCTS(s->size == CTSIZE_PTR, "ref is not pointer-sized");
229 sp = *(uint8_t **)sp; 246 sp = *(uint8_t **)sp;
230 sid = ctype_cid(s->info); 247 sid = ctype_cid(s->info);
231 s = ctype_get(cts, sid); 248 s = ctype_get(cts, sid);
@@ -252,12 +269,13 @@ void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o, CTInfo qual)
252 } 269 }
253 270
254 /* Get child type of pointer/array/field. */ 271 /* Get child type of pointer/array/field. */
255 lua_assert(ctype_ispointer(d->info) || ctype_isfield(d->info)); 272 lj_assertCTS(ctype_ispointer(d->info) || ctype_isfield(d->info),
273 "pointer or field expected");
256 d = ctype_child(cts, d); 274 d = ctype_child(cts, d);
257 275
258 /* Resolve reference for field. */ 276 /* Resolve reference for field. */
259 if (ctype_isref(d->info)) { 277 if (ctype_isref(d->info)) {
260 lua_assert(d->size == CTSIZE_PTR); 278 lj_assertCTS(d->size == CTSIZE_PTR, "ref is not pointer-sized");
261 dp = *(uint8_t **)dp; 279 dp = *(uint8_t **)dp;
262 d = ctype_child(cts, d); 280 d = ctype_child(cts, d);
263 } 281 }
@@ -272,7 +290,8 @@ void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o, CTInfo qual)
272 d = ctype_child(cts, d); 290 d = ctype_child(cts, d);
273 } 291 }
274 292
275 lua_assert(ctype_hassize(d->info) && !ctype_isvoid(d->info)); 293 lj_assertCTS(ctype_hassize(d->info), "store to ctype without size");
294 lj_assertCTS(!ctype_isvoid(d->info), "store to void type");
276 295
277 if (((d->info|qual) & CTF_CONST)) { 296 if (((d->info|qual) & CTF_CONST)) {
278 err_const: 297 err_const:
diff --git a/src/lj_cdata.h b/src/lj_cdata.h
index 4de5969a..e6ab48ca 100644
--- a/src/lj_cdata.h
+++ b/src/lj_cdata.h
@@ -18,7 +18,7 @@ static LJ_AINLINE void *cdata_getptr(void *p, CTSize sz)
18 if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */ 18 if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */
19 return ((void *)(uintptr_t)*(uint32_t *)p); 19 return ((void *)(uintptr_t)*(uint32_t *)p);
20 } else { 20 } else {
21 lua_assert(sz == CTSIZE_PTR); 21 lj_assertX(sz == CTSIZE_PTR, "bad pointer size %d", sz);
22 return *(void **)p; 22 return *(void **)p;
23 } 23 }
24} 24}
@@ -29,7 +29,7 @@ static LJ_AINLINE void cdata_setptr(void *p, CTSize sz, const void *v)
29 if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */ 29 if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */
30 *(uint32_t *)p = (uint32_t)(uintptr_t)v; 30 *(uint32_t *)p = (uint32_t)(uintptr_t)v;
31 } else { 31 } else {
32 lua_assert(sz == CTSIZE_PTR); 32 lj_assertX(sz == CTSIZE_PTR, "bad pointer size %d", sz);
33 *(void **)p = (void *)v; 33 *(void **)p = (void *)v;
34 } 34 }
35} 35}
@@ -40,7 +40,8 @@ static LJ_AINLINE GCcdata *lj_cdata_new(CTState *cts, CTypeID id, CTSize sz)
40 GCcdata *cd; 40 GCcdata *cd;
41#ifdef LUA_USE_ASSERT 41#ifdef LUA_USE_ASSERT
42 CType *ct = ctype_raw(cts, id); 42 CType *ct = ctype_raw(cts, id);
43 lua_assert((ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR) == sz); 43 lj_assertCTS((ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR) == sz,
44 "inconsistent size of fixed-size cdata alloc");
44#endif 45#endif
45 cd = (GCcdata *)lj_mem_newgco(cts->L, sizeof(GCcdata) + sz); 46 cd = (GCcdata *)lj_mem_newgco(cts->L, sizeof(GCcdata) + sz);
46 cd->gct = ~LJ_TCDATA; 47 cd->gct = ~LJ_TCDATA;
@@ -58,11 +59,14 @@ static LJ_AINLINE GCcdata *lj_cdata_new_(lua_State *L, CTypeID id, CTSize sz)
58} 59}
59 60
60LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id); 61LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id);
61LJ_FUNC GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, 62LJ_FUNC GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz,
62 CTSize align); 63 CTSize align);
64LJ_FUNC GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz,
65 CTInfo info);
63 66
64LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd); 67LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd);
65LJ_FUNCA TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd); 68LJ_FUNC void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj,
69 uint32_t it);
66 70
67LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, 71LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key,
68 uint8_t **pp, CTInfo *qual); 72 uint8_t **pp, CTInfo *qual);
diff --git a/src/lj_clib.c b/src/lj_clib.c
index d8d879a0..513528ce 100644
--- a/src/lj_clib.c
+++ b/src/lj_clib.c
@@ -16,6 +16,7 @@
16#include "lj_cconv.h" 16#include "lj_cconv.h"
17#include "lj_cdata.h" 17#include "lj_cdata.h"
18#include "lj_clib.h" 18#include "lj_clib.h"
19#include "lj_strfmt.h"
19 20
20/* -- OS-specific functions ----------------------------------------------- */ 21/* -- OS-specific functions ----------------------------------------------- */
21 22
@@ -61,7 +62,7 @@ static const char *clib_extname(lua_State *L, const char *name)
61#endif 62#endif
62 ) { 63 ) {
63 if (!strchr(name, '.')) { 64 if (!strchr(name, '.')) {
64 name = lj_str_pushf(L, CLIB_SOEXT, name); 65 name = lj_strfmt_pushf(L, CLIB_SOEXT, name);
65 L->top--; 66 L->top--;
66#if LJ_TARGET_CYGWIN 67#if LJ_TARGET_CYGWIN
67 } else { 68 } else {
@@ -70,7 +71,7 @@ static const char *clib_extname(lua_State *L, const char *name)
70 } 71 }
71 if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] && 72 if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] &&
72 name[2] == CLIB_SOPREFIX[2])) { 73 name[2] == CLIB_SOPREFIX[2])) {
73 name = lj_str_pushf(L, CLIB_SOPREFIX "%s", name); 74 name = lj_strfmt_pushf(L, CLIB_SOPREFIX "%s", name);
74 L->top--; 75 L->top--;
75 } 76 }
76 } 77 }
@@ -158,11 +159,13 @@ BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*);
158/* Default libraries. */ 159/* Default libraries. */
159enum { 160enum {
160 CLIB_HANDLE_EXE, 161 CLIB_HANDLE_EXE,
162#if !LJ_TARGET_UWP
161 CLIB_HANDLE_DLL, 163 CLIB_HANDLE_DLL,
162 CLIB_HANDLE_CRT, 164 CLIB_HANDLE_CRT,
163 CLIB_HANDLE_KERNEL32, 165 CLIB_HANDLE_KERNEL32,
164 CLIB_HANDLE_USER32, 166 CLIB_HANDLE_USER32,
165 CLIB_HANDLE_GDI32, 167 CLIB_HANDLE_GDI32,
168#endif
166 CLIB_HANDLE_MAX 169 CLIB_HANDLE_MAX
167}; 170};
168 171
@@ -172,11 +175,19 @@ LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt,
172 const char *name) 175 const char *name)
173{ 176{
174 DWORD err = GetLastError(); 177 DWORD err = GetLastError();
178#if LJ_TARGET_XBOXONE
179 wchar_t wbuf[128];
180 char buf[128*2];
181 if (!FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM,
182 NULL, err, 0, wbuf, sizeof(wbuf)/sizeof(wchar_t), NULL) ||
183 !WideCharToMultiByte(CP_ACP, 0, wbuf, 128, buf, 128*2, NULL, NULL))
184#else
175 char buf[128]; 185 char buf[128];
176 if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM, 186 if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM,
177 NULL, err, 0, buf, sizeof(buf), NULL)) 187 NULL, err, 0, buf, sizeof(buf), NULL))
188#endif
178 buf[0] = '\0'; 189 buf[0] = '\0';
179 lj_err_callermsg(L, lj_str_pushf(L, fmt, name, buf)); 190 lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, buf));
180} 191}
181 192
182static int clib_needext(const char *s) 193static int clib_needext(const char *s)
@@ -191,7 +202,7 @@ static int clib_needext(const char *s)
191static const char *clib_extname(lua_State *L, const char *name) 202static const char *clib_extname(lua_State *L, const char *name)
192{ 203{
193 if (clib_needext(name)) { 204 if (clib_needext(name)) {
194 name = lj_str_pushf(L, "%s.dll", name); 205 name = lj_strfmt_pushf(L, "%s.dll", name);
195 L->top--; 206 L->top--;
196 } 207 }
197 return name; 208 return name;
@@ -200,7 +211,7 @@ static const char *clib_extname(lua_State *L, const char *name)
200static void *clib_loadlib(lua_State *L, const char *name, int global) 211static void *clib_loadlib(lua_State *L, const char *name, int global)
201{ 212{
202 DWORD oldwerr = GetLastError(); 213 DWORD oldwerr = GetLastError();
203 void *h = (void *)LoadLibraryA(clib_extname(L, name)); 214 void *h = LJ_WIN_LOADLIBA(clib_extname(L, name));
204 if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name); 215 if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name);
205 SetLastError(oldwerr); 216 SetLastError(oldwerr);
206 UNUSED(global); 217 UNUSED(global);
@@ -210,6 +221,7 @@ static void *clib_loadlib(lua_State *L, const char *name, int global)
210static void clib_unloadlib(CLibrary *cl) 221static void clib_unloadlib(CLibrary *cl)
211{ 222{
212 if (cl->handle == CLIB_DEFHANDLE) { 223 if (cl->handle == CLIB_DEFHANDLE) {
224#if !LJ_TARGET_UWP
213 MSize i; 225 MSize i;
214 for (i = CLIB_HANDLE_KERNEL32; i < CLIB_HANDLE_MAX; i++) { 226 for (i = CLIB_HANDLE_KERNEL32; i < CLIB_HANDLE_MAX; i++) {
215 void *h = clib_def_handle[i]; 227 void *h = clib_def_handle[i];
@@ -218,11 +230,16 @@ static void clib_unloadlib(CLibrary *cl)
218 FreeLibrary((HINSTANCE)h); 230 FreeLibrary((HINSTANCE)h);
219 } 231 }
220 } 232 }
233#endif
221 } else if (cl->handle) { 234 } else if (cl->handle) {
222 FreeLibrary((HINSTANCE)cl->handle); 235 FreeLibrary((HINSTANCE)cl->handle);
223 } 236 }
224} 237}
225 238
239#if LJ_TARGET_UWP
240EXTERN_C IMAGE_DOS_HEADER __ImageBase;
241#endif
242
226static void *clib_getsym(CLibrary *cl, const char *name) 243static void *clib_getsym(CLibrary *cl, const char *name)
227{ 244{
228 void *p = NULL; 245 void *p = NULL;
@@ -231,6 +248,9 @@ static void *clib_getsym(CLibrary *cl, const char *name)
231 for (i = 0; i < CLIB_HANDLE_MAX; i++) { 248 for (i = 0; i < CLIB_HANDLE_MAX; i++) {
232 HINSTANCE h = (HINSTANCE)clib_def_handle[i]; 249 HINSTANCE h = (HINSTANCE)clib_def_handle[i];
233 if (!(void *)h) { /* Resolve default library handles (once). */ 250 if (!(void *)h) { /* Resolve default library handles (once). */
251#if LJ_TARGET_UWP
252 h = (HINSTANCE)&__ImageBase;
253#else
234 switch (i) { 254 switch (i) {
235 case CLIB_HANDLE_EXE: GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, NULL, &h); break; 255 case CLIB_HANDLE_EXE: GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, NULL, &h); break;
236 case CLIB_HANDLE_DLL: 256 case CLIB_HANDLE_DLL:
@@ -241,11 +261,12 @@ static void *clib_getsym(CLibrary *cl, const char *name)
241 GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, 261 GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
242 (const char *)&_fmode, &h); 262 (const char *)&_fmode, &h);
243 break; 263 break;
244 case CLIB_HANDLE_KERNEL32: h = LoadLibraryA("kernel32.dll"); break; 264 case CLIB_HANDLE_KERNEL32: h = LJ_WIN_LOADLIBA("kernel32.dll"); break;
245 case CLIB_HANDLE_USER32: h = LoadLibraryA("user32.dll"); break; 265 case CLIB_HANDLE_USER32: h = LJ_WIN_LOADLIBA("user32.dll"); break;
246 case CLIB_HANDLE_GDI32: h = LoadLibraryA("gdi32.dll"); break; 266 case CLIB_HANDLE_GDI32: h = LJ_WIN_LOADLIBA("gdi32.dll"); break;
247 } 267 }
248 if (!h) continue; 268 if (!h) continue;
269#endif
249 clib_def_handle[i] = (void *)h; 270 clib_def_handle[i] = (void *)h;
250 } 271 }
251 p = (void *)GetProcAddress(h, name); 272 p = (void *)GetProcAddress(h, name);
@@ -264,7 +285,7 @@ static void *clib_getsym(CLibrary *cl, const char *name)
264LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt, 285LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt,
265 const char *name) 286 const char *name)
266{ 287{
267 lj_err_callermsg(L, lj_str_pushf(L, fmt, name, "no support for this OS")); 288 lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, "no support for this OS"));
268} 289}
269 290
270static void *clib_loadlib(lua_State *L, const char *name, int global) 291static void *clib_loadlib(lua_State *L, const char *name, int global)
@@ -329,7 +350,8 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name)
329 lj_err_callerv(L, LJ_ERR_FFI_NODECL, strdata(name)); 350 lj_err_callerv(L, LJ_ERR_FFI_NODECL, strdata(name));
330 if (ctype_isconstval(ct->info)) { 351 if (ctype_isconstval(ct->info)) {
331 CType *ctt = ctype_child(cts, ct); 352 CType *ctt = ctype_child(cts, ct);
332 lua_assert(ctype_isinteger(ctt->info) && ctt->size <= 4); 353 lj_assertCTS(ctype_isinteger(ctt->info) && ctt->size <= 4,
354 "only 32 bit const supported"); /* NYI */
333 if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0) 355 if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0)
334 setnumV(tv, (lua_Number)(uint32_t)ct->size); 356 setnumV(tv, (lua_Number)(uint32_t)ct->size);
335 else 357 else
@@ -341,14 +363,15 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name)
341#endif 363#endif
342 void *p = clib_getsym(cl, sym); 364 void *p = clib_getsym(cl, sym);
343 GCcdata *cd; 365 GCcdata *cd;
344 lua_assert(ctype_isfunc(ct->info) || ctype_isextern(ct->info)); 366 lj_assertCTS(ctype_isfunc(ct->info) || ctype_isextern(ct->info),
367 "unexpected ctype %08x in clib", ct->info);
345#if LJ_TARGET_X86 && LJ_ABI_WIN 368#if LJ_TARGET_X86 && LJ_ABI_WIN
346 /* Retry with decorated name for fastcall/stdcall functions. */ 369 /* Retry with decorated name for fastcall/stdcall functions. */
347 if (!p && ctype_isfunc(ct->info)) { 370 if (!p && ctype_isfunc(ct->info)) {
348 CTInfo cconv = ctype_cconv(ct->info); 371 CTInfo cconv = ctype_cconv(ct->info);
349 if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) { 372 if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) {
350 CTSize sz = clib_func_argsize(cts, ct); 373 CTSize sz = clib_func_argsize(cts, ct);
351 const char *symd = lj_str_pushf(L, 374 const char *symd = lj_strfmt_pushf(L,
352 cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d", 375 cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d",
353 sym, sz); 376 sym, sz);
354 L->top--; 377 L->top--;
diff --git a/src/lj_cparse.c b/src/lj_cparse.c
index 2ef7dbe1..9774f3a5 100644
--- a/src/lj_cparse.c
+++ b/src/lj_cparse.c
@@ -9,13 +9,14 @@
9 9
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h" 12#include "lj_buf.h"
13#include "lj_ctype.h" 13#include "lj_ctype.h"
14#include "lj_cparse.h" 14#include "lj_cparse.h"
15#include "lj_frame.h" 15#include "lj_frame.h"
16#include "lj_vm.h" 16#include "lj_vm.h"
17#include "lj_char.h" 17#include "lj_char.h"
18#include "lj_strscan.h" 18#include "lj_strscan.h"
19#include "lj_strfmt.h"
19 20
20/* 21/*
21** Important note: this is NOT a validating C parser! This is a minimal 22** Important note: this is NOT a validating C parser! This is a minimal
@@ -27,6 +28,30 @@
27** If in doubt, please check the input against your favorite C compiler. 28** If in doubt, please check the input against your favorite C compiler.
28*/ 29*/
29 30
31#ifdef LUA_USE_ASSERT
32#define lj_assertCP(c, ...) (lj_assertG_(G(cp->L), (c), __VA_ARGS__))
33#else
34#define lj_assertCP(c, ...) ((void)cp)
35#endif
36
37/* -- Miscellaneous ------------------------------------------------------- */
38
39/* Match string against a C literal. */
40#define cp_str_is(str, k) \
41 ((str)->len == sizeof(k)-1 && !memcmp(strdata(str), k, sizeof(k)-1))
42
43/* Check string against a linear list of matches. */
44int lj_cparse_case(GCstr *str, const char *match)
45{
46 MSize len;
47 int n;
48 for (n = 0; (len = (MSize)*match++); n++, match += len) {
49 if (str->len == len && !memcmp(match, strdata(str), len))
50 return n;
51 }
52 return -1;
53}
54
30/* -- C lexer ------------------------------------------------------------- */ 55/* -- C lexer ------------------------------------------------------------- */
31 56
32/* C lexer token names. */ 57/* C lexer token names. */
@@ -42,13 +67,13 @@ LJ_NORET static void cp_err(CPState *cp, ErrMsg em);
42 67
43static const char *cp_tok2str(CPState *cp, CPToken tok) 68static const char *cp_tok2str(CPState *cp, CPToken tok)
44{ 69{
45 lua_assert(tok < CTOK_FIRSTDECL); 70 lj_assertCP(tok < CTOK_FIRSTDECL, "bad CPToken %d", tok);
46 if (tok > CTOK_OFS) 71 if (tok > CTOK_OFS)
47 return ctoknames[tok-CTOK_OFS-1]; 72 return ctoknames[tok-CTOK_OFS-1];
48 else if (!lj_char_iscntrl(tok)) 73 else if (!lj_char_iscntrl(tok))
49 return lj_str_pushf(cp->L, "%c", tok); 74 return lj_strfmt_pushf(cp->L, "%c", tok);
50 else 75 else
51 return lj_str_pushf(cp->L, "char(%d)", tok); 76 return lj_strfmt_pushf(cp->L, "char(%d)", tok);
52} 77}
53 78
54/* End-of-line? */ 79/* End-of-line? */
@@ -85,24 +110,10 @@ static LJ_NOINLINE CPChar cp_get_bs(CPState *cp)
85 return cp_get(cp); 110 return cp_get(cp);
86} 111}
87 112
88/* Grow save buffer. */
89static LJ_NOINLINE void cp_save_grow(CPState *cp, CPChar c)
90{
91 MSize newsize;
92 if (cp->sb.sz >= CPARSE_MAX_BUF/2)
93 cp_err(cp, LJ_ERR_XELEM);
94 newsize = cp->sb.sz * 2;
95 lj_str_resizebuf(cp->L, &cp->sb, newsize);
96 cp->sb.buf[cp->sb.n++] = (char)c;
97}
98
99/* Save character in buffer. */ 113/* Save character in buffer. */
100static LJ_AINLINE void cp_save(CPState *cp, CPChar c) 114static LJ_AINLINE void cp_save(CPState *cp, CPChar c)
101{ 115{
102 if (LJ_UNLIKELY(cp->sb.n + 1 > cp->sb.sz)) 116 lj_buf_putb(&cp->sb, c);
103 cp_save_grow(cp, c);
104 else
105 cp->sb.buf[cp->sb.n++] = (char)c;
106} 117}
107 118
108/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */ 119/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
@@ -122,20 +133,20 @@ LJ_NORET static void cp_errmsg(CPState *cp, CPToken tok, ErrMsg em, ...)
122 tokstr = NULL; 133 tokstr = NULL;
123 } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING || 134 } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING ||
124 tok >= CTOK_FIRSTDECL) { 135 tok >= CTOK_FIRSTDECL) {
125 if (cp->sb.n == 0) cp_save(cp, '$'); 136 if (cp->sb.w == cp->sb.b) cp_save(cp, '$');
126 cp_save(cp, '\0'); 137 cp_save(cp, '\0');
127 tokstr = cp->sb.buf; 138 tokstr = cp->sb.b;
128 } else { 139 } else {
129 tokstr = cp_tok2str(cp, tok); 140 tokstr = cp_tok2str(cp, tok);
130 } 141 }
131 L = cp->L; 142 L = cp->L;
132 va_start(argp, em); 143 va_start(argp, em);
133 msg = lj_str_pushvf(L, err2msg(em), argp); 144 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
134 va_end(argp); 145 va_end(argp);
135 if (tokstr) 146 if (tokstr)
136 msg = lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr); 147 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr);
137 if (cp->linenumber > 1) 148 if (cp->linenumber > 1)
138 msg = lj_str_pushf(L, "%s at line %d", msg, cp->linenumber); 149 msg = lj_strfmt_pushf(L, "%s at line %d", msg, cp->linenumber);
139 lj_err_callermsg(L, msg); 150 lj_err_callermsg(L, msg);
140} 151}
141 152
@@ -164,7 +175,8 @@ static CPToken cp_number(CPState *cp)
164 TValue o; 175 TValue o;
165 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); 176 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
166 cp_save(cp, '\0'); 177 cp_save(cp, '\0');
167 fmt = lj_strscan_scan((const uint8_t *)cp->sb.buf, &o, STRSCAN_OPT_C); 178 fmt = lj_strscan_scan((const uint8_t *)(cp->sb.b), sbuflen(&cp->sb)-1,
179 &o, STRSCAN_OPT_C);
168 if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32; 180 if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
169 else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32; 181 else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
170 else if (!(cp->mode & CPARSE_MODE_SKIP)) 182 else if (!(cp->mode & CPARSE_MODE_SKIP))
@@ -177,7 +189,7 @@ static CPToken cp_number(CPState *cp)
177static CPToken cp_ident(CPState *cp) 189static CPToken cp_ident(CPState *cp)
178{ 190{
179 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); 191 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
180 cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n); 192 cp->str = lj_buf_str(cp->L, &cp->sb);
181 cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask); 193 cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask);
182 if (ctype_type(cp->ct->info) == CT_KW) 194 if (ctype_type(cp->ct->info) == CT_KW)
183 return ctype_cid(cp->ct->info); 195 return ctype_cid(cp->ct->info);
@@ -263,11 +275,11 @@ static CPToken cp_string(CPState *cp)
263 } 275 }
264 cp_get(cp); 276 cp_get(cp);
265 if (delim == '"') { 277 if (delim == '"') {
266 cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n); 278 cp->str = lj_buf_str(cp->L, &cp->sb);
267 return CTOK_STRING; 279 return CTOK_STRING;
268 } else { 280 } else {
269 if (cp->sb.n != 1) cp_err_token(cp, '\''); 281 if (sbuflen(&cp->sb) != 1) cp_err_token(cp, '\'');
270 cp->val.i32 = (int32_t)(char)cp->sb.buf[0]; 282 cp->val.i32 = (int32_t)(char)*cp->sb.b;
271 cp->val.id = CTID_INT32; 283 cp->val.id = CTID_INT32;
272 return CTOK_INTEGER; 284 return CTOK_INTEGER;
273 } 285 }
@@ -296,7 +308,7 @@ static void cp_comment_cpp(CPState *cp)
296/* Lexical scanner for C. Only a minimal subset is implemented. */ 308/* Lexical scanner for C. Only a minimal subset is implemented. */
297static CPToken cp_next_(CPState *cp) 309static CPToken cp_next_(CPState *cp)
298{ 310{
299 lj_str_resetbuf(&cp->sb); 311 lj_buf_reset(&cp->sb);
300 for (;;) { 312 for (;;) {
301 if (lj_char_isident(cp->c)) 313 if (lj_char_isident(cp->c))
302 return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp); 314 return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp);
@@ -385,9 +397,8 @@ static void cp_init(CPState *cp)
385 cp->depth = 0; 397 cp->depth = 0;
386 cp->curpack = 0; 398 cp->curpack = 0;
387 cp->packstack[0] = 255; 399 cp->packstack[0] = 255;
388 lj_str_initbuf(&cp->sb); 400 lj_buf_init(cp->L, &cp->sb);
389 lj_str_resizebuf(cp->L, &cp->sb, LJ_MIN_SBUF); 401 lj_assertCP(cp->p != NULL, "uninitialized cp->p");
390 lua_assert(cp->p != NULL);
391 cp_get(cp); /* Read-ahead first char. */ 402 cp_get(cp); /* Read-ahead first char. */
392 cp->tok = 0; 403 cp->tok = 0;
393 cp->tmask = CPNS_DEFAULT; 404 cp->tmask = CPNS_DEFAULT;
@@ -398,7 +409,7 @@ static void cp_init(CPState *cp)
398static void cp_cleanup(CPState *cp) 409static void cp_cleanup(CPState *cp)
399{ 410{
400 global_State *g = G(cp->L); 411 global_State *g = G(cp->L);
401 lj_str_freebuf(g, &cp->sb); 412 lj_buf_free(g, &cp->sb);
402} 413}
403 414
404/* Check and consume optional token. */ 415/* Check and consume optional token. */
@@ -848,12 +859,13 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl)
848 /* The cid is already part of info for copies of pointers/functions. */ 859 /* The cid is already part of info for copies of pointers/functions. */
849 idx = ct->next; 860 idx = ct->next;
850 if (ctype_istypedef(info)) { 861 if (ctype_istypedef(info)) {
851 lua_assert(id == 0); 862 lj_assertCP(id == 0, "typedef not at toplevel");
852 id = ctype_cid(info); 863 id = ctype_cid(info);
853 /* Always refetch info/size, since struct/enum may have been completed. */ 864 /* Always refetch info/size, since struct/enum may have been completed. */
854 cinfo = ctype_get(cp->cts, id)->info; 865 cinfo = ctype_get(cp->cts, id)->info;
855 csize = ctype_get(cp->cts, id)->size; 866 csize = ctype_get(cp->cts, id)->size;
856 lua_assert(ctype_isstruct(cinfo) || ctype_isenum(cinfo)); 867 lj_assertCP(ctype_isstruct(cinfo) || ctype_isenum(cinfo),
868 "typedef of bad type");
857 } else if (ctype_isfunc(info)) { /* Intern function. */ 869 } else if (ctype_isfunc(info)) { /* Intern function. */
858 CType *fct; 870 CType *fct;
859 CTypeID fid; 871 CTypeID fid;
@@ -886,7 +898,7 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl)
886 /* Inherit csize/cinfo from original type. */ 898 /* Inherit csize/cinfo from original type. */
887 } else { 899 } else {
888 if (ctype_isnum(info)) { /* Handle mode/vector-size attributes. */ 900 if (ctype_isnum(info)) { /* Handle mode/vector-size attributes. */
889 lua_assert(id == 0); 901 lj_assertCP(id == 0, "number not at toplevel");
890 if (!(info & CTF_BOOL)) { 902 if (!(info & CTF_BOOL)) {
891 CTSize msize = ctype_msizeP(decl->attr); 903 CTSize msize = ctype_msizeP(decl->attr);
892 CTSize vsize = ctype_vsizeP(decl->attr); 904 CTSize vsize = ctype_vsizeP(decl->attr);
@@ -941,7 +953,7 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl)
941 info = (info & ~CTF_ALIGN) | (cinfo & CTF_ALIGN); 953 info = (info & ~CTF_ALIGN) | (cinfo & CTF_ALIGN);
942 info |= (cinfo & CTF_QUAL); /* Inherit qual. */ 954 info |= (cinfo & CTF_QUAL); /* Inherit qual. */
943 } else { 955 } else {
944 lua_assert(ctype_isvoid(info)); 956 lj_assertCP(ctype_isvoid(info), "bad ctype %08x", info);
945 } 957 }
946 csize = size; 958 csize = size;
947 cinfo = info+id; 959 cinfo = info+id;
@@ -953,8 +965,6 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl)
953 965
954/* -- C declaration parser ------------------------------------------------ */ 966/* -- C declaration parser ------------------------------------------------ */
955 967
956#define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be)
957
958/* Reset declaration state to declaration specifier. */ 968/* Reset declaration state to declaration specifier. */
959static void cp_decl_reset(CPDecl *decl) 969static void cp_decl_reset(CPDecl *decl)
960{ 970{
@@ -1031,7 +1041,7 @@ static void cp_decl_asm(CPState *cp, CPDecl *decl)
1031 if (cp->tok == CTOK_STRING) { 1041 if (cp->tok == CTOK_STRING) {
1032 GCstr *str = cp->str; 1042 GCstr *str = cp->str;
1033 while (cp_next(cp) == CTOK_STRING) { 1043 while (cp_next(cp) == CTOK_STRING) {
1034 lj_str_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str)); 1044 lj_strfmt_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str));
1035 cp->L->top--; 1045 cp->L->top--;
1036 str = strV(cp->L->top); 1046 str = strV(cp->L->top);
1037 } 1047 }
@@ -1083,44 +1093,57 @@ static void cp_decl_gccattribute(CPState *cp, CPDecl *decl)
1083 if (cp->tok == CTOK_IDENT) { 1093 if (cp->tok == CTOK_IDENT) {
1084 GCstr *attrstr = cp->str; 1094 GCstr *attrstr = cp->str;
1085 cp_next(cp); 1095 cp_next(cp);
1086 switch (attrstr->hash) { 1096 switch (lj_cparse_case(attrstr,
1087 case H_(64a9208e,8ce14319): case H_(8e6331b2,95a282af): /* aligned */ 1097 "\007aligned" "\013__aligned__"
1098 "\006packed" "\012__packed__"
1099 "\004mode" "\010__mode__"
1100 "\013vector_size" "\017__vector_size__"
1101#if LJ_TARGET_X86
1102 "\007regparm" "\013__regparm__"
1103 "\005cdecl" "\011__cdecl__"
1104 "\010thiscall" "\014__thiscall__"
1105 "\010fastcall" "\014__fastcall__"
1106 "\007stdcall" "\013__stdcall__"
1107 "\012sseregparm" "\016__sseregparm__"
1108#endif
1109 )) {
1110 case 0: case 1: /* aligned */
1088 cp_decl_align(cp, decl); 1111 cp_decl_align(cp, decl);
1089 break; 1112 break;
1090 case H_(42eb47de,f0ede26c): case H_(29f48a09,cf383e0c): /* packed */ 1113 case 2: case 3: /* packed */
1091 decl->attr |= CTFP_PACKED; 1114 decl->attr |= CTFP_PACKED;
1092 break; 1115 break;
1093 case H_(0a84eef6,8dfab04c): case H_(995cf92c,d5696591): /* mode */ 1116 case 4: case 5: /* mode */
1094 cp_decl_mode(cp, decl); 1117 cp_decl_mode(cp, decl);
1095 break; 1118 break;
1096 case H_(0ab31997,2d5213fa): case H_(bf875611,200e9990): /* vector_size */ 1119 case 6: case 7: /* vector_size */
1097 { 1120 {
1098 CTSize vsize = cp_decl_sizeattr(cp); 1121 CTSize vsize = cp_decl_sizeattr(cp);
1099 if (vsize) CTF_INSERT(decl->attr, VSIZEP, lj_fls(vsize)); 1122 if (vsize) CTF_INSERT(decl->attr, VSIZEP, lj_fls(vsize));
1100 } 1123 }
1101 break; 1124 break;
1102#if LJ_TARGET_X86 1125#if LJ_TARGET_X86
1103 case H_(5ad22db8,c689b848): case H_(439150fa,65ea78cb): /* regparm */ 1126 case 8: case 9: /* regparm */
1104 CTF_INSERT(decl->fattr, REGPARM, cp_decl_sizeattr(cp)); 1127 CTF_INSERT(decl->fattr, REGPARM, cp_decl_sizeattr(cp));
1105 decl->fattr |= CTFP_CCONV; 1128 decl->fattr |= CTFP_CCONV;
1106 break; 1129 break;
1107 case H_(18fc0b98,7ff4c074): case H_(4e62abed,0a747424): /* cdecl */ 1130 case 10: case 11: /* cdecl */
1108 CTF_INSERT(decl->fattr, CCONV, CTCC_CDECL); 1131 CTF_INSERT(decl->fattr, CCONV, CTCC_CDECL);
1109 decl->fattr |= CTFP_CCONV; 1132 decl->fattr |= CTFP_CCONV;
1110 break; 1133 break;
1111 case H_(72b2e41b,494c5a44): case H_(f2356d59,f25fc9bd): /* thiscall */ 1134 case 12: case 13: /* thiscall */
1112 CTF_INSERT(decl->fattr, CCONV, CTCC_THISCALL); 1135 CTF_INSERT(decl->fattr, CCONV, CTCC_THISCALL);
1113 decl->fattr |= CTFP_CCONV; 1136 decl->fattr |= CTFP_CCONV;
1114 break; 1137 break;
1115 case H_(0d0ffc42,ab746f88): case H_(21c54ba1,7f0ca7e3): /* fastcall */ 1138 case 14: case 15: /* fastcall */
1116 CTF_INSERT(decl->fattr, CCONV, CTCC_FASTCALL); 1139 CTF_INSERT(decl->fattr, CCONV, CTCC_FASTCALL);
1117 decl->fattr |= CTFP_CCONV; 1140 decl->fattr |= CTFP_CCONV;
1118 break; 1141 break;
1119 case H_(ef76b040,9412e06a): case H_(de56697b,c750e6e1): /* stdcall */ 1142 case 16: case 17: /* stdcall */
1120 CTF_INSERT(decl->fattr, CCONV, CTCC_STDCALL); 1143 CTF_INSERT(decl->fattr, CCONV, CTCC_STDCALL);
1121 decl->fattr |= CTFP_CCONV; 1144 decl->fattr |= CTFP_CCONV;
1122 break; 1145 break;
1123 case H_(ea78b622,f234bd8e): case H_(252ffb06,8d50f34b): /* sseregparm */ 1146 case 18: case 19: /* sseregparm */
1124 decl->fattr |= CTF_SSEREGPARM; 1147 decl->fattr |= CTF_SSEREGPARM;
1125 decl->fattr |= CTFP_CCONV; 1148 decl->fattr |= CTFP_CCONV;
1126 break; 1149 break;
@@ -1152,16 +1175,13 @@ static void cp_decl_msvcattribute(CPState *cp, CPDecl *decl)
1152 while (cp->tok == CTOK_IDENT) { 1175 while (cp->tok == CTOK_IDENT) {
1153 GCstr *attrstr = cp->str; 1176 GCstr *attrstr = cp->str;
1154 cp_next(cp); 1177 cp_next(cp);
1155 switch (attrstr->hash) { 1178 if (cp_str_is(attrstr, "align")) {
1156 case H_(bc2395fa,98f267f8): /* align */
1157 cp_decl_align(cp, decl); 1179 cp_decl_align(cp, decl);
1158 break; 1180 } else { /* Ignore all other attributes. */
1159 default: /* Ignore all other attributes. */
1160 if (cp_opt(cp, '(')) { 1181 if (cp_opt(cp, '(')) {
1161 while (cp->tok != ')' && cp->tok != CTOK_EOF) cp_next(cp); 1182 while (cp->tok != ')' && cp->tok != CTOK_EOF) cp_next(cp);
1162 cp_check(cp, ')'); 1183 cp_check(cp, ')');
1163 } 1184 }
1164 break;
1165 } 1185 }
1166 } 1186 }
1167 cp_check(cp, ')'); 1187 cp_check(cp, ')');
@@ -1572,7 +1592,7 @@ end_decl:
1572 cp_errmsg(cp, cp->tok, LJ_ERR_FFI_DECLSPEC); 1592 cp_errmsg(cp, cp->tok, LJ_ERR_FFI_DECLSPEC);
1573 sz = sizeof(int); 1593 sz = sizeof(int);
1574 } 1594 }
1575 lua_assert(sz != 0); 1595 lj_assertCP(sz != 0, "basic ctype with zero size");
1576 info += CTALIGN(lj_fls(sz)); /* Use natural alignment. */ 1596 info += CTALIGN(lj_fls(sz)); /* Use natural alignment. */
1577 info += (decl->attr & CTF_QUAL); /* Merge qualifiers. */ 1597 info += (decl->attr & CTF_QUAL); /* Merge qualifiers. */
1578 cp_push(decl, info, sz); 1598 cp_push(decl, info, sz);
@@ -1741,19 +1761,18 @@ static CTypeID cp_decl_abstract(CPState *cp)
1741static void cp_pragma(CPState *cp, BCLine pragmaline) 1761static void cp_pragma(CPState *cp, BCLine pragmaline)
1742{ 1762{
1743 cp_next(cp); 1763 cp_next(cp);
1744 if (cp->tok == CTOK_IDENT && 1764 if (cp->tok == CTOK_IDENT && cp_str_is(cp->str, "pack")) {
1745 cp->str->hash == H_(e79b999f,42ca3e85)) { /* pack */
1746 cp_next(cp); 1765 cp_next(cp);
1747 cp_check(cp, '('); 1766 cp_check(cp, '(');
1748 if (cp->tok == CTOK_IDENT) { 1767 if (cp->tok == CTOK_IDENT) {
1749 if (cp->str->hash == H_(738e923c,a1b65954)) { /* push */ 1768 if (cp_str_is(cp->str, "push")) {
1750 if (cp->curpack < CPARSE_MAX_PACKSTACK-1) { 1769 if (cp->curpack < CPARSE_MAX_PACKSTACK-1) {
1751 cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack]; 1770 cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack];
1752 cp->curpack++; 1771 cp->curpack++;
1753 } else { 1772 } else {
1754 cp_errmsg(cp, cp->tok, LJ_ERR_XLEVELS); 1773 cp_errmsg(cp, cp->tok, LJ_ERR_XLEVELS);
1755 } 1774 }
1756 } else if (cp->str->hash == H_(6c71cf27,6c71cf27)) { /* pop */ 1775 } else if (cp_str_is(cp->str, "pop")) {
1757 if (cp->curpack > 0) cp->curpack--; 1776 if (cp->curpack > 0) cp->curpack--;
1758 } else { 1777 } else {
1759 cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL); 1778 cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL);
@@ -1775,6 +1794,16 @@ static void cp_pragma(CPState *cp, BCLine pragmaline)
1775 } 1794 }
1776} 1795}
1777 1796
1797/* Handle line number. */
1798static void cp_line(CPState *cp, BCLine hashline)
1799{
1800 BCLine newline = cp->val.u32;
1801 /* TODO: Handle file name and include it in error messages. */
1802 while (cp->tok != CTOK_EOF && cp->linenumber == hashline)
1803 cp_next(cp);
1804 cp->linenumber = newline;
1805}
1806
1778/* Parse multiple C declarations of types or extern identifiers. */ 1807/* Parse multiple C declarations of types or extern identifiers. */
1779static void cp_decl_multi(CPState *cp) 1808static void cp_decl_multi(CPState *cp)
1780{ 1809{
@@ -1787,12 +1816,21 @@ static void cp_decl_multi(CPState *cp)
1787 continue; 1816 continue;
1788 } 1817 }
1789 if (cp->tok == '#') { /* Workaround, since we have no preprocessor, yet. */ 1818 if (cp->tok == '#') { /* Workaround, since we have no preprocessor, yet. */
1790 BCLine pragmaline = cp->linenumber; 1819 BCLine hashline = cp->linenumber;
1791 if (!(cp_next(cp) == CTOK_IDENT && 1820 CPToken tok = cp_next(cp);
1792 cp->str->hash == H_(f5e6b4f8,1d509107))) /* pragma */ 1821 if (tok == CTOK_INTEGER) {
1822 cp_line(cp, hashline);
1823 continue;
1824 } else if (tok == CTOK_IDENT && cp_str_is(cp->str, "line")) {
1825 if (cp_next(cp) != CTOK_INTEGER) cp_err_token(cp, tok);
1826 cp_line(cp, hashline);
1827 continue;
1828 } else if (tok == CTOK_IDENT && cp_str_is(cp->str, "pragma")) {
1829 cp_pragma(cp, hashline);
1830 continue;
1831 } else {
1793 cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL); 1832 cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL);
1794 cp_pragma(cp, pragmaline); 1833 }
1795 continue;
1796 } 1834 }
1797 scl = cp_decl_spec(cp, &decl, CDF_TYPEDEF|CDF_EXTERN|CDF_STATIC); 1835 scl = cp_decl_spec(cp, &decl, CDF_TYPEDEF|CDF_EXTERN|CDF_STATIC);
1798 if ((cp->tok == ';' || cp->tok == CTOK_EOF) && 1836 if ((cp->tok == ';' || cp->tok == CTOK_EOF) &&
@@ -1816,7 +1854,7 @@ static void cp_decl_multi(CPState *cp)
1816 /* Treat both static and extern function declarations as extern. */ 1854 /* Treat both static and extern function declarations as extern. */
1817 ct = ctype_get(cp->cts, ctypeid); 1855 ct = ctype_get(cp->cts, ctypeid);
1818 /* We always get new anonymous functions (typedefs are copied). */ 1856 /* We always get new anonymous functions (typedefs are copied). */
1819 lua_assert(gcref(ct->name) == NULL); 1857 lj_assertCP(gcref(ct->name) == NULL, "unexpected named function");
1820 id = ctypeid; /* Just name it. */ 1858 id = ctypeid; /* Just name it. */
1821 } else if ((scl & CDF_STATIC)) { /* Accept static constants. */ 1859 } else if ((scl & CDF_STATIC)) { /* Accept static constants. */
1822 id = cp_decl_constinit(cp, &ct, ctypeid); 1860 id = cp_decl_constinit(cp, &ct, ctypeid);
@@ -1858,8 +1896,6 @@ static void cp_decl_single(CPState *cp)
1858 if (cp->tok != CTOK_EOF) cp_err_token(cp, CTOK_EOF); 1896 if (cp->tok != CTOK_EOF) cp_err_token(cp, CTOK_EOF);
1859} 1897}
1860 1898
1861#undef H_
1862
1863/* ------------------------------------------------------------------------ */ 1899/* ------------------------------------------------------------------------ */
1864 1900
1865/* Protected callback for C parser. */ 1901/* Protected callback for C parser. */
@@ -1875,7 +1911,7 @@ static TValue *cpcparser(lua_State *L, lua_CFunction dummy, void *ud)
1875 cp_decl_single(cp); 1911 cp_decl_single(cp);
1876 if (cp->param && cp->param != cp->L->top) 1912 if (cp->param && cp->param != cp->L->top)
1877 cp_err(cp, LJ_ERR_FFI_NUMPARAM); 1913 cp_err(cp, LJ_ERR_FFI_NUMPARAM);
1878 lua_assert(cp->depth == 0); 1914 lj_assertCP(cp->depth == 0, "unbalanced cparser declaration depth");
1879 return NULL; 1915 return NULL;
1880} 1916}
1881 1917
diff --git a/src/lj_cparse.h b/src/lj_cparse.h
index f5268693..42f827a0 100644
--- a/src/lj_cparse.h
+++ b/src/lj_cparse.h
@@ -60,6 +60,8 @@ typedef struct CPState {
60 60
61LJ_FUNC int lj_cparse(CPState *cp); 61LJ_FUNC int lj_cparse(CPState *cp);
62 62
63LJ_FUNC int lj_cparse_case(GCstr *str, const char *match);
64
63#endif 65#endif
64 66
65#endif 67#endif
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index 1f6396f6..e8ae426d 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -11,13 +11,13 @@
11#if LJ_HASJIT && LJ_HASFFI 11#if LJ_HASJIT && LJ_HASFFI
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_str.h"
15#include "lj_tab.h" 14#include "lj_tab.h"
16#include "lj_frame.h" 15#include "lj_frame.h"
17#include "lj_ctype.h" 16#include "lj_ctype.h"
18#include "lj_cdata.h" 17#include "lj_cdata.h"
19#include "lj_cparse.h" 18#include "lj_cparse.h"
20#include "lj_cconv.h" 19#include "lj_cconv.h"
20#include "lj_carith.h"
21#include "lj_clib.h" 21#include "lj_clib.h"
22#include "lj_ccall.h" 22#include "lj_ccall.h"
23#include "lj_ff.h" 23#include "lj_ff.h"
@@ -31,6 +31,8 @@
31#include "lj_snap.h" 31#include "lj_snap.h"
32#include "lj_crecord.h" 32#include "lj_crecord.h"
33#include "lj_dispatch.h" 33#include "lj_dispatch.h"
34#include "lj_strfmt.h"
35#include "lj_strscan.h"
34 36
35/* Some local macros to save typing. Undef'd at the end. */ 37/* Some local macros to save typing. Undef'd at the end. */
36#define IR(ref) (&J->cur.ir[(ref)]) 38#define IR(ref) (&J->cur.ir[(ref)])
@@ -60,7 +62,8 @@ static GCcdata *argv2cdata(jit_State *J, TRef tr, cTValue *o)
60static CTypeID crec_constructor(jit_State *J, GCcdata *cd, TRef tr) 62static CTypeID crec_constructor(jit_State *J, GCcdata *cd, TRef tr)
61{ 63{
62 CTypeID id; 64 CTypeID id;
63 lua_assert(tref_iscdata(tr) && cd->ctypeid == CTID_CTYPEID); 65 lj_assertJ(tref_iscdata(tr) && cd->ctypeid == CTID_CTYPEID,
66 "expected CTypeID cdata");
64 id = *(CTypeID *)cdataptr(cd); 67 id = *(CTypeID *)cdataptr(cd);
65 tr = emitir(IRT(IR_FLOAD, IRT_INT), tr, IRFL_CDATA_INT); 68 tr = emitir(IRT(IR_FLOAD, IRT_INT), tr, IRFL_CDATA_INT);
66 emitir(IRTG(IR_EQ, IRT_INT), tr, lj_ir_kint(J, (int32_t)id)); 69 emitir(IRTG(IR_EQ, IRT_INT), tr, lj_ir_kint(J, (int32_t)id));
@@ -211,7 +214,7 @@ static void crec_copy_emit(jit_State *J, CRecMemList *ml, MSize mlp,
211 ml[i].trval = emitir(IRT(IR_XLOAD, ml[i].tp), trsptr, 0); 214 ml[i].trval = emitir(IRT(IR_XLOAD, ml[i].tp), trsptr, 0);
212 ml[i].trofs = trofs; 215 ml[i].trofs = trofs;
213 i++; 216 i++;
214 rwin += (LJ_SOFTFP && ml[i].tp == IRT_NUM) ? 2 : 1; 217 rwin += (LJ_SOFTFP32 && ml[i].tp == IRT_NUM) ? 2 : 1;
215 if (rwin >= CREC_COPY_REGWIN || i >= mlp) { /* Flush buffered stores. */ 218 if (rwin >= CREC_COPY_REGWIN || i >= mlp) { /* Flush buffered stores. */
216 rwin = 0; 219 rwin = 0;
217 for ( ; j < i; j++) { 220 for ( ; j < i; j++) {
@@ -236,13 +239,14 @@ static void crec_copy(jit_State *J, TRef trdst, TRef trsrc, TRef trlen,
236 if (len > CREC_COPY_MAXLEN) goto fallback; 239 if (len > CREC_COPY_MAXLEN) goto fallback;
237 if (ct) { 240 if (ct) {
238 CTState *cts = ctype_ctsG(J2G(J)); 241 CTState *cts = ctype_ctsG(J2G(J));
239 lua_assert(ctype_isarray(ct->info) || ctype_isstruct(ct->info)); 242 lj_assertJ(ctype_isarray(ct->info) || ctype_isstruct(ct->info),
243 "copy of non-aggregate");
240 if (ctype_isarray(ct->info)) { 244 if (ctype_isarray(ct->info)) {
241 CType *cct = ctype_rawchild(cts, ct); 245 CType *cct = ctype_rawchild(cts, ct);
242 tp = crec_ct2irt(cts, cct); 246 tp = crec_ct2irt(cts, cct);
243 if (tp == IRT_CDATA) goto rawcopy; 247 if (tp == IRT_CDATA) goto rawcopy;
244 step = lj_ir_type_size[tp]; 248 step = lj_ir_type_size[tp];
245 lua_assert((len & (step-1)) == 0); 249 lj_assertJ((len & (step-1)) == 0, "copy of fractional size");
246 } else if ((ct->info & CTF_UNION)) { 250 } else if ((ct->info & CTF_UNION)) {
247 step = (1u << ctype_align(ct->info)); 251 step = (1u << ctype_align(ct->info));
248 goto rawcopy; 252 goto rawcopy;
@@ -441,7 +445,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
441 /* fallthrough */ 445 /* fallthrough */
442 case CCX(I, F): 446 case CCX(I, F):
443 if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; 447 if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi;
444 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC|IRCONV_ANY); 448 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY);
445 goto xstore; 449 goto xstore;
446 case CCX(I, P): 450 case CCX(I, P):
447 case CCX(I, A): 451 case CCX(I, A):
@@ -521,7 +525,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
521 if (st == IRT_CDATA) goto err_nyi; 525 if (st == IRT_CDATA) goto err_nyi;
522 /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ 526 /* The signed conversion is cheaper. x64 really has 47 bit pointers. */
523 sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, 527 sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32,
524 st, IRCONV_TRUNC|IRCONV_ANY); 528 st, IRCONV_ANY);
525 goto xstore; 529 goto xstore;
526 530
527 /* Destination is an array. */ 531 /* Destination is an array. */
@@ -613,10 +617,12 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval)
613 sp = lj_ir_kptr(J, NULL); 617 sp = lj_ir_kptr(J, NULL);
614 } else if (tref_isudata(sp)) { 618 } else if (tref_isudata(sp)) {
615 GCudata *ud = udataV(sval); 619 GCudata *ud = udataV(sval);
616 if (ud->udtype == UDTYPE_IO_FILE) { 620 if (ud->udtype == UDTYPE_IO_FILE || ud->udtype == UDTYPE_BUFFER) {
617 TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), sp, IRFL_UDATA_UDTYPE); 621 TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), sp, IRFL_UDATA_UDTYPE);
618 emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, UDTYPE_IO_FILE)); 622 emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, ud->udtype));
619 sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp, IRFL_UDATA_FILE); 623 sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp,
624 ud->udtype == UDTYPE_IO_FILE ? IRFL_UDATA_FILE :
625 IRFL_SBUF_R);
620 } else { 626 } else {
621 sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCudata))); 627 sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCudata)));
622 } 628 }
@@ -628,7 +634,8 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval)
628 /* Specialize to the name of the enum constant. */ 634 /* Specialize to the name of the enum constant. */
629 emitir(IRTG(IR_EQ, IRT_STR), sp, lj_ir_kstr(J, str)); 635 emitir(IRTG(IR_EQ, IRT_STR), sp, lj_ir_kstr(J, str));
630 if (cct && ctype_isconstval(cct->info)) { 636 if (cct && ctype_isconstval(cct->info)) {
631 lua_assert(ctype_child(cts, cct)->size == 4); 637 lj_assertJ(ctype_child(cts, cct)->size == 4,
638 "only 32 bit const supported"); /* NYI */
632 svisnz = (void *)(intptr_t)(ofs != 0); 639 svisnz = (void *)(intptr_t)(ofs != 0);
633 sp = lj_ir_kint(J, (int32_t)ofs); 640 sp = lj_ir_kint(J, (int32_t)ofs);
634 sid = ctype_cid(cct->info); 641 sid = ctype_cid(cct->info);
@@ -640,12 +647,22 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval)
640 sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCstr))); 647 sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCstr)));
641 sid = CTID_A_CCHAR; 648 sid = CTID_A_CCHAR;
642 } 649 }
643 } else { /* NYI: tref_istab(sp), tref_islightud(sp). */ 650 } else if (tref_islightud(sp)) {
651#if LJ_64
652 lj_trace_err(J, LJ_TRERR_NYICONV);
653#endif
654 } else { /* NYI: tref_istab(sp). */
644 IRType t; 655 IRType t;
645 sid = argv2cdata(J, sp, sval)->ctypeid; 656 sid = argv2cdata(J, sp, sval)->ctypeid;
646 s = ctype_raw(cts, sid); 657 s = ctype_raw(cts, sid);
647 svisnz = cdataptr(cdataV(sval)); 658 svisnz = cdataptr(cdataV(sval));
648 t = crec_ct2irt(cts, s); 659 if (ctype_isfunc(s->info)) {
660 sid = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|sid), CTSIZE_PTR);
661 s = ctype_get(cts, sid);
662 t = IRT_PTR;
663 } else {
664 t = crec_ct2irt(cts, s);
665 }
649 if (ctype_isptr(s->info)) { 666 if (ctype_isptr(s->info)) {
650 sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_PTR); 667 sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_PTR);
651 if (ctype_isref(s->info)) { 668 if (ctype_isref(s->info)) {
@@ -700,6 +717,19 @@ static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz)
700 return tr; 717 return tr;
701} 718}
702 719
720/* Tailcall to function. */
721static void crec_tailcall(jit_State *J, RecordFFData *rd, cTValue *tv)
722{
723 TRef kfunc = lj_ir_kfunc(J, funcV(tv));
724#if LJ_FR2
725 J->base[-2] = kfunc;
726 J->base[-1] = TREF_FRAME;
727#else
728 J->base[-1] = kfunc | TREF_FRAME;
729#endif
730 rd->nres = -1; /* Pending tailcall. */
731}
732
703/* Record ctype __index/__newindex metamethods. */ 733/* Record ctype __index/__newindex metamethods. */
704static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, 734static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
705 RecordFFData *rd) 735 RecordFFData *rd)
@@ -709,8 +739,7 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
709 if (!tv) 739 if (!tv)
710 lj_trace_err(J, LJ_TRERR_BADTYPE); 740 lj_trace_err(J, LJ_TRERR_BADTYPE);
711 if (tvisfunc(tv)) { 741 if (tvisfunc(tv)) {
712 J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; 742 crec_tailcall(J, rd, tv);
713 rd->nres = -1; /* Pending tailcall. */
714 } else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) { 743 } else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) {
715 /* Specialize to result of __index lookup. */ 744 /* Specialize to result of __index lookup. */
716 cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]); 745 cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]);
@@ -727,6 +756,48 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
727 } 756 }
728} 757}
729 758
759/* Record bitfield load/store. */
760static void crec_index_bf(jit_State *J, RecordFFData *rd, TRef ptr, CTInfo info)
761{
762 IRType t = IRT_I8 + 2*lj_fls(ctype_bitcsz(info)) + ((info&CTF_UNSIGNED)?1:0);
763 TRef tr = emitir(IRT(IR_XLOAD, t), ptr, 0);
764 CTSize pos = ctype_bitpos(info), bsz = ctype_bitbsz(info), shift = 32 - bsz;
765 lj_assertJ(t <= IRT_U32, "only 32 bit bitfields supported"); /* NYI */
766 if (rd->data == 0) { /* __index metamethod. */
767 if ((info & CTF_BOOL)) {
768 tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << pos))));
769 /* Assume not equal to zero. Fixup and emit pending guard later. */
770 lj_ir_set(J, IRTGI(IR_NE), tr, lj_ir_kint(J, 0));
771 J->postproc = LJ_POST_FIXGUARD;
772 tr = TREF_TRUE;
773 } else if (!(info & CTF_UNSIGNED)) {
774 tr = emitir(IRTI(IR_BSHL), tr, lj_ir_kint(J, shift - pos));
775 tr = emitir(IRTI(IR_BSAR), tr, lj_ir_kint(J, shift));
776 } else {
777 lj_assertJ(bsz < 32, "unexpected full bitfield index");
778 tr = emitir(IRTI(IR_BSHR), tr, lj_ir_kint(J, pos));
779 tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << bsz)-1)));
780 /* We can omit the U32 to NUM conversion, since bsz < 32. */
781 }
782 J->base[0] = tr;
783 } else { /* __newindex metamethod. */
784 CTState *cts = ctype_ctsG(J2G(J));
785 CType *ct = ctype_get(cts,
786 (info & CTF_BOOL) ? CTID_BOOL :
787 (info & CTF_UNSIGNED) ? CTID_UINT32 : CTID_INT32);
788 int32_t mask = (int32_t)(((1u << bsz)-1) << pos);
789 TRef sp = crec_ct_tv(J, ct, 0, J->base[2], &rd->argv[2]);
790 sp = emitir(IRTI(IR_BSHL), sp, lj_ir_kint(J, pos));
791 /* Use of the target type avoids forwarding conversions. */
792 sp = emitir(IRT(IR_BAND, t), sp, lj_ir_kint(J, mask));
793 tr = emitir(IRT(IR_BAND, t), tr, lj_ir_kint(J, (int32_t)~mask));
794 tr = emitir(IRT(IR_BOR, t), tr, sp);
795 emitir(IRT(IR_XSTORE, t), ptr, tr);
796 rd->nres = 0;
797 J->needsnap = 1;
798 }
799}
800
730void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) 801void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd)
731{ 802{
732 TRef idx, ptr = J->base[0]; 803 TRef idx, ptr = J->base[0];
@@ -801,6 +872,7 @@ again:
801 CType *fct; 872 CType *fct;
802 fct = lj_ctype_getfield(cts, ct, name, &fofs); 873 fct = lj_ctype_getfield(cts, ct, name, &fofs);
803 if (fct) { 874 if (fct) {
875 ofs += (ptrdiff_t)fofs;
804 /* Always specialize to the field name. */ 876 /* Always specialize to the field name. */
805 emitir(IRTG(IR_EQ, IRT_STR), idx, lj_ir_kstr(J, name)); 877 emitir(IRTG(IR_EQ, IRT_STR), idx, lj_ir_kstr(J, name));
806 if (ctype_isconstval(fct->info)) { 878 if (ctype_isconstval(fct->info)) {
@@ -812,12 +884,14 @@ again:
812 J->base[0] = lj_ir_kint(J, (int32_t)fct->size); 884 J->base[0] = lj_ir_kint(J, (int32_t)fct->size);
813 return; /* Interpreter will throw for newindex. */ 885 return; /* Interpreter will throw for newindex. */
814 } else if (ctype_isbitfield(fct->info)) { 886 } else if (ctype_isbitfield(fct->info)) {
815 lj_trace_err(J, LJ_TRERR_NYICONV); 887 if (ofs)
888 ptr = emitir(IRT(IR_ADD, IRT_PTR), ptr, lj_ir_kintp(J, ofs));
889 crec_index_bf(J, rd, ptr, fct->info);
890 return;
816 } else { 891 } else {
817 lua_assert(ctype_isfield(fct->info)); 892 lj_assertJ(ctype_isfield(fct->info), "field expected");
818 sid = ctype_cid(fct->info); 893 sid = ctype_cid(fct->info);
819 } 894 }
820 ofs += (ptrdiff_t)fofs;
821 } 895 }
822 } else if (ctype_iscomplex(ct->info)) { 896 } else if (ctype_iscomplex(ct->info)) {
823 if (name->len == 2 && 897 if (name->len == 2 &&
@@ -867,21 +941,17 @@ again:
867} 941}
868 942
869/* Record setting a finalizer. */ 943/* Record setting a finalizer. */
870static void crec_finalizer(jit_State *J, TRef trcd, cTValue *fin) 944static void crec_finalizer(jit_State *J, TRef trcd, TRef trfin, cTValue *fin)
871{ 945{
872 TRef trlo = lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd); 946 if (tvisgcv(fin)) {
873 TRef trhi = emitir(IRT(IR_ADD, IRT_P32), trlo, lj_ir_kint(J, 4)); 947 if (!trfin) trfin = lj_ir_kptr(J, gcval(fin));
874 if (LJ_BE) { TRef tmp = trlo; trlo = trhi; trhi = tmp; } 948 } else if (tvisnil(fin)) {
875 if (tvisfunc(fin)) { 949 trfin = lj_ir_kptr(J, NULL);
876 emitir(IRT(IR_XSTORE, IRT_P32), trlo, lj_ir_kfunc(J, funcV(fin)));
877 emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TFUNC));
878 } else if (tviscdata(fin)) {
879 emitir(IRT(IR_XSTORE, IRT_P32), trlo,
880 lj_ir_kgc(J, obj2gco(cdataV(fin)), IRT_CDATA));
881 emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TCDATA));
882 } else { 950 } else {
883 lj_trace_err(J, LJ_TRERR_BADTYPE); 951 lj_trace_err(J, LJ_TRERR_BADTYPE);
884 } 952 }
953 lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd,
954 trfin, lj_ir_kint(J, (int32_t)itype(fin)));
885 J->needsnap = 1; 955 J->needsnap = 1;
886} 956}
887 957
@@ -892,10 +962,8 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
892 CTSize sz; 962 CTSize sz;
893 CTInfo info = lj_ctype_info(cts, id, &sz); 963 CTInfo info = lj_ctype_info(cts, id, &sz);
894 CType *d = ctype_raw(cts, id); 964 CType *d = ctype_raw(cts, id);
895 TRef trid; 965 TRef trcd, trid = lj_ir_kint(J, id);
896 if (!sz || sz > 128 || (info & CTF_VLA) || ctype_align(info) > CT_MEMALIGN) 966 cTValue *fin;
897 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: large/special allocations. */
898 trid = lj_ir_kint(J, id);
899 /* Use special instruction to box pointer or 32/64 bit integer. */ 967 /* Use special instruction to box pointer or 32/64 bit integer. */
900 if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) { 968 if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) {
901 TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) : 969 TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) :
@@ -903,11 +971,36 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
903 sz == 4 ? lj_ir_kint(J, 0) : 971 sz == 4 ? lj_ir_kint(J, 0) :
904 (lj_needsplit(J), lj_ir_kint64(J, 0)); 972 (lj_needsplit(J), lj_ir_kint64(J, 0));
905 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp); 973 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp);
974 return;
906 } else { 975 } else {
907 TRef trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, TREF_NIL); 976 TRef trsz = TREF_NIL;
908 cTValue *fin; 977 if ((info & CTF_VLA)) { /* Calculate VLA/VLS size at runtime. */
909 J->base[0] = trcd; 978 CTSize sz0, sz1;
910 if (J->base[1] && !J->base[2] && 979 if (!J->base[1] || J->base[2])
980 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init VLA/VLS. */
981 trsz = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0,
982 J->base[1], &rd->argv[1]);
983 sz0 = lj_ctype_vlsize(cts, d, 0);
984 sz1 = lj_ctype_vlsize(cts, d, 1);
985 trsz = emitir(IRTGI(IR_MULOV), trsz, lj_ir_kint(J, (int32_t)(sz1-sz0)));
986 trsz = emitir(IRTGI(IR_ADDOV), trsz, lj_ir_kint(J, (int32_t)sz0));
987 J->base[1] = 0; /* Simplify logic below. */
988 } else if (ctype_align(info) > CT_MEMALIGN) {
989 trsz = lj_ir_kint(J, sz);
990 }
991 trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, trsz);
992 if (sz > 128 || (info & CTF_VLA)) {
993 TRef dp;
994 CTSize align;
995 special: /* Only handle bulk zero-fill for large/VLA/VLS types. */
996 if (J->base[1])
997 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init large/VLA/VLS types. */
998 dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, sizeof(GCcdata)));
999 if (trsz == TREF_NIL) trsz = lj_ir_kint(J, sz);
1000 align = ctype_align(info);
1001 if (align < CT_MEMALIGN) align = CT_MEMALIGN;
1002 crec_fill(J, dp, trsz, lj_ir_kint(J, 0), (1u << align));
1003 } else if (J->base[1] && !J->base[2] &&
911 !lj_cconv_multi_init(cts, d, &rd->argv[1])) { 1004 !lj_cconv_multi_init(cts, d, &rd->argv[1])) {
912 goto single_init; 1005 goto single_init;
913 } else if (ctype_isarray(d->info)) { 1006 } else if (ctype_isarray(d->info)) {
@@ -918,8 +1011,9 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
918 TValue *sval = &tv; 1011 TValue *sval = &tv;
919 MSize i; 1012 MSize i;
920 tv.u64 = 0; 1013 tv.u64 = 0;
921 if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info))) 1014 if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)) ||
922 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init array of aggregates. */ 1015 esize * CREC_FILL_MAXUNROLL < sz)
1016 goto special;
923 for (i = 1, ofs = 0; ofs < sz; ofs += esize) { 1017 for (i = 1, ofs = 0; ofs < sz; ofs += esize) {
924 TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, 1018 TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd,
925 lj_ir_kintp(J, ofs + sizeof(GCcdata))); 1019 lj_ir_kintp(J, ofs + sizeof(GCcdata)));
@@ -933,8 +1027,26 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
933 crec_ct_tv(J, dc, dp, sp, sval); 1027 crec_ct_tv(J, dc, dp, sp, sval);
934 } 1028 }
935 } else if (ctype_isstruct(d->info)) { 1029 } else if (ctype_isstruct(d->info)) {
936 CTypeID fid = d->sib; 1030 CTypeID fid;
937 MSize i = 1; 1031 MSize i = 1;
1032 if (!J->base[1]) { /* Handle zero-fill of struct-of-NYI. */
1033 fid = d->sib;
1034 while (fid) {
1035 CType *df = ctype_get(cts, fid);
1036 fid = df->sib;
1037 if (ctype_isfield(df->info)) {
1038 CType *dc;
1039 if (!gcref(df->name)) continue; /* Ignore unnamed fields. */
1040 dc = ctype_rawchild(cts, df); /* Field type. */
1041 if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info) ||
1042 ctype_isenum(dc->info)))
1043 goto special;
1044 } else if (!ctype_isconstval(df->info)) {
1045 goto special;
1046 }
1047 }
1048 }
1049 fid = d->sib;
938 while (fid) { 1050 while (fid) {
939 CType *df = ctype_get(cts, fid); 1051 CType *df = ctype_get(cts, fid);
940 fid = df->sib; 1052 fid = df->sib;
@@ -981,11 +1093,12 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
981 crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv); 1093 crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv);
982 } 1094 }
983 } 1095 }
984 /* Handle __gc metamethod. */
985 fin = lj_ctype_meta(cts, id, MM_gc);
986 if (fin)
987 crec_finalizer(J, trcd, fin);
988 } 1096 }
1097 J->base[0] = trcd;
1098 /* Handle __gc metamethod. */
1099 fin = lj_ctype_meta(cts, id, MM_gc);
1100 if (fin)
1101 crec_finalizer(J, trcd, 0, fin);
989} 1102}
990 1103
991/* Record argument conversions. */ 1104/* Record argument conversions. */
@@ -1006,6 +1119,8 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
1006 ngpr = 1; 1119 ngpr = 1;
1007 else if (ctype_cconv(ct->info) == CTCC_FASTCALL) 1120 else if (ctype_cconv(ct->info) == CTCC_FASTCALL)
1008 ngpr = 2; 1121 ngpr = 2;
1122#elif LJ_TARGET_ARM64 && LJ_TARGET_OSX
1123 int ngpr = CCALL_NARG_GPR;
1009#endif 1124#endif
1010 1125
1011 /* Skip initial attributes. */ 1126 /* Skip initial attributes. */
@@ -1026,11 +1141,19 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
1026 if (fid) { /* Get argument type from field. */ 1141 if (fid) { /* Get argument type from field. */
1027 CType *ctf = ctype_get(cts, fid); 1142 CType *ctf = ctype_get(cts, fid);
1028 fid = ctf->sib; 1143 fid = ctf->sib;
1029 lua_assert(ctype_isfield(ctf->info)); 1144 lj_assertJ(ctype_isfield(ctf->info), "field expected");
1030 did = ctype_cid(ctf->info); 1145 did = ctype_cid(ctf->info);
1031 } else { 1146 } else {
1032 if (!(ct->info & CTF_VARARG)) 1147 if (!(ct->info & CTF_VARARG))
1033 lj_trace_err(J, LJ_TRERR_NYICALL); /* Too many arguments. */ 1148 lj_trace_err(J, LJ_TRERR_NYICALL); /* Too many arguments. */
1149#if LJ_TARGET_ARM64 && LJ_TARGET_OSX
1150 if (ngpr >= 0) {
1151 ngpr = -1;
1152 args[n++] = TREF_NIL; /* Marker for start of varargs. */
1153 if (n >= CCI_NARGS_MAX)
1154 lj_trace_err(J, LJ_TRERR_NYICALL);
1155 }
1156#endif
1034 did = lj_ccall_ctid_vararg(cts, o); /* Infer vararg type. */ 1157 did = lj_ccall_ctid_vararg(cts, o); /* Infer vararg type. */
1035 } 1158 }
1036 d = ctype_raw(cts, did); 1159 d = ctype_raw(cts, did);
@@ -1039,13 +1162,22 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
1039 lj_trace_err(J, LJ_TRERR_NYICALL); 1162 lj_trace_err(J, LJ_TRERR_NYICALL);
1040 tr = crec_ct_tv(J, d, 0, *base, o); 1163 tr = crec_ct_tv(J, d, 0, *base, o);
1041 if (ctype_isinteger_or_bool(d->info)) { 1164 if (ctype_isinteger_or_bool(d->info)) {
1165#if LJ_TARGET_ARM64 && LJ_TARGET_OSX
1166 if (!ngpr) {
1167 /* Fixed args passed on the stack use their unpromoted size. */
1168 if (d->size != lj_ir_type_size[tref_type(tr)]) {
1169 lj_assertJ(d->size == 1 || d->size==2, "unexpected size %d", d->size);
1170 tr = emitconv(tr, d->size==1 ? IRT_U8 : IRT_U16, tref_type(tr), 0);
1171 }
1172 } else
1173#endif
1042 if (d->size < 4) { 1174 if (d->size < 4) {
1043 if ((d->info & CTF_UNSIGNED)) 1175 if ((d->info & CTF_UNSIGNED))
1044 tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_U8 : IRT_U16, 0); 1176 tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_U8 : IRT_U16, 0);
1045 else 1177 else
1046 tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT); 1178 tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT);
1047 } 1179 }
1048 } else if (LJ_SOFTFP && ctype_isfp(d->info) && d->size > 4) { 1180 } else if (LJ_SOFTFP32 && ctype_isfp(d->info) && d->size > 4) {
1049 lj_needsplit(J); 1181 lj_needsplit(J);
1050 } 1182 }
1051#if LJ_TARGET_X86 1183#if LJ_TARGET_X86
@@ -1076,6 +1208,10 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
1076 } 1208 }
1077 } 1209 }
1078#endif 1210#endif
1211#elif LJ_TARGET_ARM64 && LJ_TARGET_OSX
1212 if (!ctype_isfp(d->info) && ngpr) {
1213 ngpr--;
1214 }
1079#endif 1215#endif
1080 args[n] = tr; 1216 args[n] = tr;
1081 } 1217 }
@@ -1091,20 +1227,20 @@ static void crec_snap_caller(jit_State *J)
1091 lua_State *L = J->L; 1227 lua_State *L = J->L;
1092 TValue *base = L->base, *top = L->top; 1228 TValue *base = L->base, *top = L->top;
1093 const BCIns *pc = J->pc; 1229 const BCIns *pc = J->pc;
1094 TRef ftr = J->base[-1]; 1230 TRef ftr = J->base[-1-LJ_FR2];
1095 ptrdiff_t delta; 1231 ptrdiff_t delta;
1096 if (!frame_islua(base-1) || J->framedepth <= 0) 1232 if (!frame_islua(base-1) || J->framedepth <= 0)
1097 lj_trace_err(J, LJ_TRERR_NYICALL); 1233 lj_trace_err(J, LJ_TRERR_NYICALL);
1098 J->pc = frame_pc(base-1); delta = 1+bc_a(J->pc[-1]); 1234 J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]);
1099 L->top = base; L->base = base - delta; 1235 L->top = base; L->base = base - delta;
1100 J->base[-1] = TREF_FALSE; 1236 J->base[-1-LJ_FR2] = TREF_FALSE;
1101 J->base -= delta; J->baseslot -= (BCReg)delta; 1237 J->base -= delta; J->baseslot -= (BCReg)delta;
1102 J->maxslot = (BCReg)delta; J->framedepth--; 1238 J->maxslot = (BCReg)delta-LJ_FR2; J->framedepth--;
1103 lj_snap_add(J); 1239 lj_snap_add(J);
1104 L->base = base; L->top = top; 1240 L->base = base; L->top = top;
1105 J->framedepth++; J->maxslot = 1; 1241 J->framedepth++; J->maxslot = 1;
1106 J->base += delta; J->baseslot += (BCReg)delta; 1242 J->base += delta; J->baseslot += (BCReg)delta;
1107 J->base[-1] = ftr; J->pc = pc; 1243 J->base[-1-LJ_FR2] = ftr; J->pc = pc;
1108} 1244}
1109 1245
1110/* Record function call. */ 1246/* Record function call. */
@@ -1124,8 +1260,7 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd)
1124 TRef tr; 1260 TRef tr;
1125 TValue tv; 1261 TValue tv;
1126 /* Check for blacklisted C functions that might call a callback. */ 1262 /* Check for blacklisted C functions that might call a callback. */
1127 setlightudV(&tv, 1263 tv.u64 = ((uintptr_t)cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4) >> 2) | U64x(800000000, 00000000);
1128 cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4));
1129 if (tvistrue(lj_tab_get(J->L, cts->miscmap, &tv))) 1264 if (tvistrue(lj_tab_get(J->L, cts->miscmap, &tv)))
1130 lj_trace_err(J, LJ_TRERR_BLACKL); 1265 lj_trace_err(J, LJ_TRERR_BLACKL);
1131 if (ctype_isvoid(ctr->info)) { 1266 if (ctype_isvoid(ctr->info)) {
@@ -1196,8 +1331,7 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd)
1196 tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm); 1331 tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm);
1197 if (tv) { 1332 if (tv) {
1198 if (tvisfunc(tv)) { 1333 if (tvisfunc(tv)) {
1199 J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; 1334 crec_tailcall(J, rd, tv);
1200 rd->nres = -1; /* Pending tailcall. */
1201 return; 1335 return;
1202 } 1336 }
1203 } else if (mm == MM_new) { 1337 } else if (mm == MM_new) {
@@ -1238,7 +1372,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm)
1238 for (i = 0; i < 2; i++) { 1372 for (i = 0; i < 2; i++) {
1239 IRType st = tref_type(sp[i]); 1373 IRType st = tref_type(sp[i]);
1240 if (st == IRT_NUM || st == IRT_FLOAT) 1374 if (st == IRT_NUM || st == IRT_FLOAT)
1241 sp[i] = emitconv(sp[i], dt, st, IRCONV_TRUNC|IRCONV_ANY); 1375 sp[i] = emitconv(sp[i], dt, st, IRCONV_ANY);
1242 else if (!(st == IRT_I64 || st == IRT_U64)) 1376 else if (!(st == IRT_I64 || st == IRT_U64))
1243 sp[i] = emitconv(sp[i], dt, IRT_INT, 1377 sp[i] = emitconv(sp[i], dt, IRT_INT,
1244 (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); 1378 (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT);
@@ -1307,15 +1441,14 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm)
1307 CTypeID id; 1441 CTypeID id;
1308#if LJ_64 1442#if LJ_64
1309 if (t == IRT_NUM || t == IRT_FLOAT) 1443 if (t == IRT_NUM || t == IRT_FLOAT)
1310 tr = emitconv(tr, IRT_INTP, t, IRCONV_TRUNC|IRCONV_ANY); 1444 tr = emitconv(tr, IRT_INTP, t, IRCONV_ANY);
1311 else if (!(t == IRT_I64 || t == IRT_U64)) 1445 else if (!(t == IRT_I64 || t == IRT_U64))
1312 tr = emitconv(tr, IRT_INTP, IRT_INT, 1446 tr = emitconv(tr, IRT_INTP, IRT_INT,
1313 ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT); 1447 ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT);
1314#else 1448#else
1315 if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) { 1449 if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) {
1316 tr = emitconv(tr, IRT_INTP, t, 1450 tr = emitconv(tr, IRT_INTP, t,
1317 (t == IRT_NUM || t == IRT_FLOAT) ? 1451 (t == IRT_NUM || t == IRT_FLOAT) ? IRCONV_ANY : 0);
1318 IRCONV_TRUNC|IRCONV_ANY : 0);
1319 } 1452 }
1320#endif 1453#endif
1321 tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz)); 1454 tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz));
@@ -1347,8 +1480,7 @@ static TRef crec_arith_meta(jit_State *J, TRef *sp, CType **s, CTState *cts,
1347 } 1480 }
1348 if (tv) { 1481 if (tv) {
1349 if (tvisfunc(tv)) { 1482 if (tvisfunc(tv)) {
1350 J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; 1483 crec_tailcall(J, rd, tv);
1351 rd->nres = -1; /* Pending tailcall. */
1352 return 0; 1484 return 0;
1353 } /* NYI: non-function metamethods. */ 1485 } /* NYI: non-function metamethods. */
1354 } else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */ 1486 } else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */
@@ -1466,8 +1598,7 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd)
1466 !irt_isguard(J->guardemit)) { 1598 !irt_isguard(J->guardemit)) {
1467 const BCIns *pc = frame_contpc(J->L->base-1) - 1; 1599 const BCIns *pc = frame_contpc(J->L->base-1) - 1;
1468 if (bc_op(*pc) <= BC_ISNEP) { 1600 if (bc_op(*pc) <= BC_ISNEP) {
1469 setframe_pc(&J2G(J)->tmptv, pc); 1601 J2G(J)->tmptv.u64 = (uint64_t)(uintptr_t)pc;
1470 J2G(J)->tmptv.u32.lo = ((tref_istrue(tr) ^ bc_op(*pc)) & 1);
1471 J->postproc = LJ_POST_FIXCOMP; 1602 J->postproc = LJ_POST_FIXCOMP;
1472 } 1603 }
1473 } 1604 }
@@ -1656,7 +1787,150 @@ void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd)
1656void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd) 1787void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd)
1657{ 1788{
1658 argv2cdata(J, J->base[0], &rd->argv[0]); 1789 argv2cdata(J, J->base[0], &rd->argv[0]);
1659 crec_finalizer(J, J->base[0], &rd->argv[1]); 1790 if (!J->base[1])
1791 lj_trace_err(J, LJ_TRERR_BADTYPE);
1792 crec_finalizer(J, J->base[0], J->base[1], &rd->argv[1]);
1793}
1794
1795/* -- 64 bit bit.* library functions -------------------------------------- */
1796
1797/* Determine bit operation type from argument type. */
1798static CTypeID crec_bit64_type(CTState *cts, cTValue *tv)
1799{
1800 if (tviscdata(tv)) {
1801 CType *ct = lj_ctype_rawref(cts, cdataV(tv)->ctypeid);
1802 if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
1803 if ((ct->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
1804 CTINFO(CT_NUM, CTF_UNSIGNED) && ct->size == 8)
1805 return CTID_UINT64; /* Use uint64_t, since it has the highest rank. */
1806 return CTID_INT64; /* Otherwise use int64_t. */
1807 }
1808 return 0; /* Use regular 32 bit ops. */
1809}
1810
1811static TRef crec_bit64_arg(jit_State *J, CType *d, TRef sp, TValue *sval)
1812{
1813 if (LJ_UNLIKELY(tref_isstr(sp))) {
1814 if (lj_strscan_num(strV(sval), sval)) {
1815 sp = emitir(IRTG(IR_STRTO, IRT_NUM), sp, 0);
1816 } /* else: interpreter will throw. */
1817 }
1818 return crec_ct_tv(J, d, 0, sp, sval);
1819}
1820
1821void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd)
1822{
1823 CTState *cts = ctype_ctsG(J2G(J));
1824 TRef tr = crec_bit64_arg(J, ctype_get(cts, CTID_INT64),
1825 J->base[0], &rd->argv[0]);
1826 if (!tref_isinteger(tr))
1827 tr = emitconv(tr, IRT_INT, tref_type(tr), 0);
1828 J->base[0] = tr;
1829}
1830
1831int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd)
1832{
1833 CTState *cts = ctype_ctsG(J2G(J));
1834 CTypeID id = crec_bit64_type(cts, &rd->argv[0]);
1835 if (id) {
1836 TRef tr = crec_bit64_arg(J, ctype_get(cts, id), J->base[0], &rd->argv[0]);
1837 tr = emitir(IRT(rd->data, id-CTID_INT64+IRT_I64), tr, 0);
1838 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1839 return 1;
1840 }
1841 return 0;
1842}
1843
1844int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd)
1845{
1846 CTState *cts = ctype_ctsG(J2G(J));
1847 CTypeID id = 0;
1848 MSize i;
1849 for (i = 0; J->base[i] != 0; i++) {
1850 CTypeID aid = crec_bit64_type(cts, &rd->argv[i]);
1851 if (id < aid) id = aid; /* Determine highest type rank of all arguments. */
1852 }
1853 if (id) {
1854 CType *ct = ctype_get(cts, id);
1855 uint32_t ot = IRT(rd->data, id-CTID_INT64+IRT_I64);
1856 TRef tr = crec_bit64_arg(J, ct, J->base[0], &rd->argv[0]);
1857 for (i = 1; J->base[i] != 0; i++) {
1858 TRef tr2 = crec_bit64_arg(J, ct, J->base[i], &rd->argv[i]);
1859 tr = emitir(ot, tr, tr2);
1860 }
1861 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1862 return 1;
1863 }
1864 return 0;
1865}
1866
1867int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd)
1868{
1869 CTState *cts = ctype_ctsG(J2G(J));
1870 CTypeID id;
1871 TRef tsh = 0;
1872 if (J->base[0] && tref_iscdata(J->base[1])) {
1873 tsh = crec_bit64_arg(J, ctype_get(cts, CTID_INT64),
1874 J->base[1], &rd->argv[1]);
1875 if (!tref_isinteger(tsh))
1876 tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0);
1877 J->base[1] = tsh;
1878 }
1879 id = crec_bit64_type(cts, &rd->argv[0]);
1880 if (id) {
1881 TRef tr = crec_bit64_arg(J, ctype_get(cts, id), J->base[0], &rd->argv[0]);
1882 uint32_t op = rd->data;
1883 if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]);
1884 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1885 !tref_isk(tsh))
1886 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63));
1887#ifdef LJ_TARGET_UNIFYROT
1888 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
1889 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
1890 tsh = emitir(IRTI(IR_NEG), tsh, tsh);
1891 }
1892#endif
1893 tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh);
1894 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1895 return 1;
1896 }
1897 return 0;
1898}
1899
1900TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr)
1901{
1902 CTState *cts = ctype_ctsG(J2G(J));
1903 CTypeID id = crec_bit64_type(cts, &rd->argv[0]);
1904 TRef tr, trsf = J->base[1];
1905 SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
1906 int32_t n;
1907 if (trsf) {
1908 CTypeID id2 = 0;
1909 n = (int32_t)lj_carith_check64(J->L, 2, &id2);
1910 if (id2)
1911 trsf = crec_bit64_arg(J, ctype_get(cts, CTID_INT32), trsf, &rd->argv[1]);
1912 else
1913 trsf = lj_opt_narrow_tobit(J, trsf);
1914 emitir(IRTGI(IR_EQ), trsf, lj_ir_kint(J, n)); /* Specialize to n. */
1915 } else {
1916 n = id ? 16 : 8;
1917 }
1918 if (n < 0) { n = (int32_t)(~n+1u); sf |= STRFMT_F_UPPER; }
1919 if ((uint32_t)n > 254) n = 254;
1920 sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
1921 if (id) {
1922 tr = crec_bit64_arg(J, ctype_get(cts, id), J->base[0], &rd->argv[0]);
1923 if (n < 16)
1924 tr = emitir(IRT(IR_BAND, IRT_U64), tr,
1925 lj_ir_kint64(J, ((uint64_t)1 << 4*n)-1));
1926 } else {
1927 tr = lj_opt_narrow_tobit(J, J->base[0]);
1928 if (n < 8)
1929 tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << 4*n)-1)));
1930 tr = emitconv(tr, IRT_U64, IRT_INT, 0); /* No sign-extension. */
1931 lj_needsplit(J);
1932 }
1933 return lj_ir_call(J, IRCALL_lj_strfmt_putfxint, hdr, lj_ir_kint(J, sf), tr);
1660} 1934}
1661 1935
1662/* -- Miscellaneous library functions ------------------------------------- */ 1936/* -- Miscellaneous library functions ------------------------------------- */
@@ -1680,6 +1954,30 @@ void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd)
1680 } 1954 }
1681} 1955}
1682 1956
1957TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o)
1958{
1959 CTypeID id = argv2cdata(J, tr, o)->ctypeid;
1960 if (!(id == CTID_INT64 || id == CTID_UINT64))
1961 lj_trace_err(J, LJ_TRERR_BADTYPE);
1962 lj_needsplit(J);
1963 return emitir(IRT(IR_FLOAD, id == CTID_INT64 ? IRT_I64 : IRT_U64), tr,
1964 IRFL_CDATA_INT64);
1965}
1966
1967#if LJ_HASBUFFER
1968TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o)
1969{
1970 CTState *cts = ctype_ctsG(J2G(J));
1971 if (!tref_iscdata(tr)) lj_trace_err(J, LJ_TRERR_BADTYPE);
1972 return crec_ct_tv(J, ctype_get(cts, CTID_P_CVOID), 0, tr, o);
1973}
1974
1975TRef lj_crecord_topuint8(jit_State *J, TRef tr)
1976{
1977 return emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, CTID_P_UINT8), tr);
1978}
1979#endif
1980
1683#undef IR 1981#undef IR
1684#undef emitir 1982#undef emitir
1685#undef emitconv 1983#undef emitconv
diff --git a/src/lj_crecord.h b/src/lj_crecord.h
index e3ba70c1..32973cf4 100644
--- a/src/lj_crecord.h
+++ b/src/lj_crecord.h
@@ -25,7 +25,19 @@ LJ_FUNC void LJ_FASTCALL recff_ffi_istype(jit_State *J, RecordFFData *rd);
25LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd); 25LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd);
26LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd); 26LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd);
27LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd); 27LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd);
28
29LJ_FUNC void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd);
30LJ_FUNC int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd);
31LJ_FUNC int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd);
32LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd);
33LJ_FUNC TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr);
34
28LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd); 35LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd);
36LJ_FUNC TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o);
37#if LJ_HASBUFFER
38LJ_FUNC TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o);
39LJ_FUNC TRef lj_crecord_topuint8(jit_State *J, TRef tr);
40#endif
29#endif 41#endif
30 42
31#endif 43#endif
diff --git a/src/lj_ctype.c b/src/lj_ctype.c
index 872e8cda..0f6baac9 100644
--- a/src/lj_ctype.c
+++ b/src/lj_ctype.c
@@ -11,8 +11,10 @@
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h" 12#include "lj_str.h"
13#include "lj_tab.h" 13#include "lj_tab.h"
14#include "lj_strfmt.h"
14#include "lj_ctype.h" 15#include "lj_ctype.h"
15#include "lj_ccallback.h" 16#include "lj_ccallback.h"
17#include "lj_buf.h"
16 18
17/* -- C type definitions -------------------------------------------------- */ 19/* -- C type definitions -------------------------------------------------- */
18 20
@@ -37,6 +39,8 @@
37 _("uint64_t", UINT64) \ 39 _("uint64_t", UINT64) \
38 _("intptr_t", INT_PSZ) \ 40 _("intptr_t", INT_PSZ) \
39 _("uintptr_t", UINT_PSZ) \ 41 _("uintptr_t", UINT_PSZ) \
42 /* From POSIX. */ \
43 _("ssize_t", INT_PSZ) \
40 /* End of typedef list. */ 44 /* End of typedef list. */
41 45
42/* Keywords (only the ones we actually care for). */ 46/* Keywords (only the ones we actually care for). */
@@ -149,7 +153,7 @@ CTypeID lj_ctype_new(CTState *cts, CType **ctp)
149{ 153{
150 CTypeID id = cts->top; 154 CTypeID id = cts->top;
151 CType *ct; 155 CType *ct;
152 lua_assert(cts->L); 156 lj_assertCTS(cts->L, "uninitialized cts->L");
153 if (LJ_UNLIKELY(id >= cts->sizetab)) { 157 if (LJ_UNLIKELY(id >= cts->sizetab)) {
154 if (id >= CTID_MAX) lj_err_msg(cts->L, LJ_ERR_TABOV); 158 if (id >= CTID_MAX) lj_err_msg(cts->L, LJ_ERR_TABOV);
155#ifdef LUAJIT_CTYPE_CHECK_ANCHOR 159#ifdef LUAJIT_CTYPE_CHECK_ANCHOR
@@ -178,7 +182,7 @@ CTypeID lj_ctype_intern(CTState *cts, CTInfo info, CTSize size)
178{ 182{
179 uint32_t h = ct_hashtype(info, size); 183 uint32_t h = ct_hashtype(info, size);
180 CTypeID id = cts->hash[h]; 184 CTypeID id = cts->hash[h];
181 lua_assert(cts->L); 185 lj_assertCTS(cts->L, "uninitialized cts->L");
182 while (id) { 186 while (id) {
183 CType *ct = ctype_get(cts, id); 187 CType *ct = ctype_get(cts, id);
184 if (ct->info == info && ct->size == size) 188 if (ct->info == info && ct->size == size)
@@ -306,9 +310,9 @@ CTSize lj_ctype_vlsize(CTState *cts, CType *ct, CTSize nelem)
306 } 310 }
307 ct = ctype_raw(cts, arrid); 311 ct = ctype_raw(cts, arrid);
308 } 312 }
309 lua_assert(ctype_isvlarray(ct->info)); /* Must be a VLA. */ 313 lj_assertCTS(ctype_isvlarray(ct->info), "VLA expected");
310 ct = ctype_rawchild(cts, ct); /* Get array element. */ 314 ct = ctype_rawchild(cts, ct); /* Get array element. */
311 lua_assert(ctype_hassize(ct->info)); 315 lj_assertCTS(ctype_hassize(ct->info), "bad VLA without size");
312 /* Calculate actual size of VLA and check for overflow. */ 316 /* Calculate actual size of VLA and check for overflow. */
313 xsz += (uint64_t)ct->size * nelem; 317 xsz += (uint64_t)ct->size * nelem;
314 return xsz < 0x80000000u ? (CTSize)xsz : CTSIZE_INVALID; 318 return xsz < 0x80000000u ? (CTSize)xsz : CTSIZE_INVALID;
@@ -331,7 +335,8 @@ CTInfo lj_ctype_info(CTState *cts, CTypeID id, CTSize *szp)
331 } else { 335 } else {
332 if (!(qual & CTFP_ALIGNED)) qual |= (info & CTF_ALIGN); 336 if (!(qual & CTFP_ALIGNED)) qual |= (info & CTF_ALIGN);
333 qual |= (info & ~(CTF_ALIGN|CTMASK_CID)); 337 qual |= (info & ~(CTF_ALIGN|CTMASK_CID));
334 lua_assert(ctype_hassize(info) || ctype_isfunc(info)); 338 lj_assertCTS(ctype_hassize(info) || ctype_isfunc(info),
339 "ctype without size");
335 *szp = ctype_isfunc(info) ? CTSIZE_INVALID : ct->size; 340 *szp = ctype_isfunc(info) ? CTSIZE_INVALID : ct->size;
336 break; 341 break;
337 } 342 }
@@ -544,7 +549,7 @@ static void ctype_repr(CTRepr *ctr, CTypeID id)
544 ctype_appc(ctr, ')'); 549 ctype_appc(ctr, ')');
545 break; 550 break;
546 default: 551 default:
547 lua_assert(0); 552 lj_assertG_(ctr->cts->g, 0, "bad ctype %08x", info);
548 break; 553 break;
549 } 554 }
550 ct = ctype_get(ctr->cts, ctype_cid(info)); 555 ct = ctype_get(ctr->cts, ctype_cid(info));
@@ -588,19 +593,18 @@ GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned)
588/* Convert complex to string with 'i' or 'I' suffix. */ 593/* Convert complex to string with 'i' or 'I' suffix. */
589GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size) 594GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size)
590{ 595{
591 char buf[2*LJ_STR_NUMBUF+2+1]; 596 SBuf *sb = lj_buf_tmp_(L);
592 TValue re, im; 597 TValue re, im;
593 size_t len;
594 if (size == 2*sizeof(double)) { 598 if (size == 2*sizeof(double)) {
595 re.n = *(double *)sp; im.n = ((double *)sp)[1]; 599 re.n = *(double *)sp; im.n = ((double *)sp)[1];
596 } else { 600 } else {
597 re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1]; 601 re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1];
598 } 602 }
599 len = lj_str_bufnum(buf, &re); 603 lj_strfmt_putfnum(sb, STRFMT_G14, re.n);
600 if (!(im.u32.hi & 0x80000000u) || im.n != im.n) buf[len++] = '+'; 604 if (!(im.u32.hi & 0x80000000u) || im.n != im.n) lj_buf_putchar(sb, '+');
601 len += lj_str_bufnum(buf+len, &im); 605 lj_strfmt_putfnum(sb, STRFMT_G14, im.n);
602 buf[len] = buf[len-1] >= 'a' ? 'I' : 'i'; 606 lj_buf_putchar(sb, sb->w[-1] >= 'a' ? 'I' : 'i');
603 return lj_str_new(L, buf, len+1); 607 return lj_buf_str(L, sb);
604} 608}
605 609
606/* -- C type state -------------------------------------------------------- */ 610/* -- C type state -------------------------------------------------------- */
@@ -639,6 +643,18 @@ CTState *lj_ctype_init(lua_State *L)
639 return cts; 643 return cts;
640} 644}
641 645
646/* Create special weak-keyed finalizer table. */
647void lj_ctype_initfin(lua_State *L)
648{
649 /* NOBARRIER: The table is new (marked white). */
650 GCtab *t = lj_tab_new(L, 0, 1);
651 setgcref(t->metatable, obj2gco(t));
652 setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "__mode")),
653 lj_str_newlit(L, "k"));
654 t->nomm = (uint8_t)(~(1u<<MM_mode));
655 setgcref(G(L)->gcroot[GCROOT_FFI_FIN], obj2gco(t));
656}
657
642/* Free C type table and state. */ 658/* Free C type table and state. */
643void lj_ctype_freestate(global_State *g) 659void lj_ctype_freestate(global_State *g)
644{ 660{
diff --git a/src/lj_ctype.h b/src/lj_ctype.h
index 0787d800..d53c4ea4 100644
--- a/src/lj_ctype.h
+++ b/src/lj_ctype.h
@@ -177,7 +177,6 @@ typedef struct CTState {
177 MSize sizetab; /* Size of C type table. */ 177 MSize sizetab; /* Size of C type table. */
178 lua_State *L; /* Lua state (needed for errors and allocations). */ 178 lua_State *L; /* Lua state (needed for errors and allocations). */
179 global_State *g; /* Global state. */ 179 global_State *g; /* Global state. */
180 GCtab *finalizer; /* Map of cdata to finalizer. */
181 GCtab *miscmap; /* Map of -CTypeID to metatable and cb slot to func. */ 180 GCtab *miscmap; /* Map of -CTypeID to metatable and cb slot to func. */
182 CCallback cb; /* Temporary callback state. */ 181 CCallback cb; /* Temporary callback state. */
183 CTypeID1 hash[CTHASH_SIZE]; /* Hash anchors for C type table. */ 182 CTypeID1 hash[CTHASH_SIZE]; /* Hash anchors for C type table. */
@@ -260,16 +259,24 @@ typedef struct CTState {
260 259
261#define CT_MEMALIGN 3 /* Alignment guaranteed by memory allocator. */ 260#define CT_MEMALIGN 3 /* Alignment guaranteed by memory allocator. */
262 261
262#ifdef LUA_USE_ASSERT
263#define lj_assertCTS(c, ...) (lj_assertG_(cts->g, (c), __VA_ARGS__))
264#else
265#define lj_assertCTS(c, ...) ((void)cts)
266#endif
267
263/* -- Predefined types ---------------------------------------------------- */ 268/* -- Predefined types ---------------------------------------------------- */
264 269
265/* Target-dependent types. */ 270/* Target-dependent types. */
266#if LJ_TARGET_PPC || LJ_TARGET_PPCSPE 271#if LJ_TARGET_PPC
267#define CTTYDEFP(_) \ 272#define CTTYDEFP(_) \
268 _(LINT32, 4, CT_NUM, CTF_LONG|CTALIGN(2)) 273 _(LINT32, 4, CT_NUM, CTF_LONG|CTALIGN(2))
269#else 274#else
270#define CTTYDEFP(_) 275#define CTTYDEFP(_)
271#endif 276#endif
272 277
278#define CTF_LONG_IF8 (CTF_LONG * (sizeof(long) == 8))
279
273/* Common types. */ 280/* Common types. */
274#define CTTYDEF(_) \ 281#define CTTYDEF(_) \
275 _(NONE, 0, CT_ATTRIB, CTATTRIB(CTA_BAD)) \ 282 _(NONE, 0, CT_ATTRIB, CTATTRIB(CTA_BAD)) \
@@ -283,8 +290,8 @@ typedef struct CTState {
283 _(UINT16, 2, CT_NUM, CTF_UNSIGNED|CTALIGN(1)) \ 290 _(UINT16, 2, CT_NUM, CTF_UNSIGNED|CTALIGN(1)) \
284 _(INT32, 4, CT_NUM, CTALIGN(2)) \ 291 _(INT32, 4, CT_NUM, CTALIGN(2)) \
285 _(UINT32, 4, CT_NUM, CTF_UNSIGNED|CTALIGN(2)) \ 292 _(UINT32, 4, CT_NUM, CTF_UNSIGNED|CTALIGN(2)) \
286 _(INT64, 8, CT_NUM, CTF_LONG|CTALIGN(3)) \ 293 _(INT64, 8, CT_NUM, CTF_LONG_IF8|CTALIGN(3)) \
287 _(UINT64, 8, CT_NUM, CTF_UNSIGNED|CTF_LONG|CTALIGN(3)) \ 294 _(UINT64, 8, CT_NUM, CTF_UNSIGNED|CTF_LONG_IF8|CTALIGN(3)) \
288 _(FLOAT, 4, CT_NUM, CTF_FP|CTALIGN(2)) \ 295 _(FLOAT, 4, CT_NUM, CTF_FP|CTALIGN(2)) \
289 _(DOUBLE, 8, CT_NUM, CTF_FP|CTALIGN(3)) \ 296 _(DOUBLE, 8, CT_NUM, CTF_FP|CTALIGN(3)) \
290 _(COMPLEX_FLOAT, 8, CT_ARRAY, CTF_COMPLEX|CTALIGN(2)|CTID_FLOAT) \ 297 _(COMPLEX_FLOAT, 8, CT_ARRAY, CTF_COMPLEX|CTALIGN(2)|CTID_FLOAT) \
@@ -292,6 +299,7 @@ typedef struct CTState {
292 _(P_VOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_VOID) \ 299 _(P_VOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_VOID) \
293 _(P_CVOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CVOID) \ 300 _(P_CVOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CVOID) \
294 _(P_CCHAR, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CCHAR) \ 301 _(P_CCHAR, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CCHAR) \
302 _(P_UINT8, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_UINT8) \
295 _(A_CCHAR, -1, CT_ARRAY, CTF_CONST|CTALIGN(0)|CTID_CCHAR) \ 303 _(A_CCHAR, -1, CT_ARRAY, CTF_CONST|CTALIGN(0)|CTID_CCHAR) \
296 _(CTYPEID, 4, CT_ENUM, CTALIGN(2)|CTID_INT32) \ 304 _(CTYPEID, 4, CT_ENUM, CTALIGN(2)|CTID_INT32) \
297 CTTYDEFP(_) \ 305 CTTYDEFP(_) \
@@ -383,6 +391,16 @@ static LJ_AINLINE CTState *ctype_cts(lua_State *L)
383 return cts; 391 return cts;
384} 392}
385 393
394/* Load FFI library on-demand. */
395#define ctype_loadffi(L) \
396 do { \
397 if (!ctype_ctsG(G(L))) { \
398 ptrdiff_t oldtop = (char *)L->top - mref(L->stack, char); \
399 luaopen_ffi(L); \
400 L->top = (TValue *)(mref(L->stack, char) + oldtop); \
401 } \
402 } while (0)
403
386/* Save and restore state of C type table. */ 404/* Save and restore state of C type table. */
387#define LJ_CTYPE_SAVE(cts) CTState savects_ = *(cts) 405#define LJ_CTYPE_SAVE(cts) CTState savects_ = *(cts)
388#define LJ_CTYPE_RESTORE(cts) \ 406#define LJ_CTYPE_RESTORE(cts) \
@@ -392,7 +410,8 @@ static LJ_AINLINE CTState *ctype_cts(lua_State *L)
392/* Check C type ID for validity when assertions are enabled. */ 410/* Check C type ID for validity when assertions are enabled. */
393static LJ_AINLINE CTypeID ctype_check(CTState *cts, CTypeID id) 411static LJ_AINLINE CTypeID ctype_check(CTState *cts, CTypeID id)
394{ 412{
395 lua_assert(id > 0 && id < cts->top); UNUSED(cts); 413 UNUSED(cts);
414 lj_assertCTS(id > 0 && id < cts->top, "bad CTID %d", id);
396 return id; 415 return id;
397} 416}
398 417
@@ -408,8 +427,9 @@ static LJ_AINLINE CType *ctype_get(CTState *cts, CTypeID id)
408/* Get child C type. */ 427/* Get child C type. */
409static LJ_AINLINE CType *ctype_child(CTState *cts, CType *ct) 428static LJ_AINLINE CType *ctype_child(CTState *cts, CType *ct)
410{ 429{
411 lua_assert(!(ctype_isvoid(ct->info) || ctype_isstruct(ct->info) || 430 lj_assertCTS(!(ctype_isvoid(ct->info) || ctype_isstruct(ct->info) ||
412 ctype_isbitfield(ct->info))); /* These don't have children. */ 431 ctype_isbitfield(ct->info)),
432 "ctype %08x has no children", ct->info);
413 return ctype_get(cts, ctype_cid(ct->info)); 433 return ctype_get(cts, ctype_cid(ct->info));
414} 434}
415 435
@@ -455,6 +475,7 @@ LJ_FUNC GCstr *lj_ctype_repr(lua_State *L, CTypeID id, GCstr *name);
455LJ_FUNC GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned); 475LJ_FUNC GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned);
456LJ_FUNC GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size); 476LJ_FUNC GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size);
457LJ_FUNC CTState *lj_ctype_init(lua_State *L); 477LJ_FUNC CTState *lj_ctype_init(lua_State *L);
478LJ_FUNC void lj_ctype_initfin(lua_State *L);
458LJ_FUNC void lj_ctype_freestate(global_State *g); 479LJ_FUNC void lj_ctype_freestate(global_State *g);
459 480
460#endif 481#endif
diff --git a/src/lj_debug.c b/src/lj_debug.c
index abb7572c..8d8b9eb5 100644
--- a/src/lj_debug.c
+++ b/src/lj_debug.c
@@ -9,12 +9,12 @@
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_err.h" 10#include "lj_err.h"
11#include "lj_debug.h" 11#include "lj_debug.h"
12#include "lj_str.h" 12#include "lj_buf.h"
13#include "lj_tab.h" 13#include "lj_tab.h"
14#include "lj_state.h" 14#include "lj_state.h"
15#include "lj_frame.h" 15#include "lj_frame.h"
16#include "lj_bc.h" 16#include "lj_bc.h"
17#include "lj_vm.h" 17#include "lj_strfmt.h"
18#if LJ_HASJIT 18#if LJ_HASJIT
19#include "lj_jit.h" 19#include "lj_jit.h"
20#endif 20#endif
@@ -24,11 +24,11 @@
24/* Get frame corresponding to a level. */ 24/* Get frame corresponding to a level. */
25cTValue *lj_debug_frame(lua_State *L, int level, int *size) 25cTValue *lj_debug_frame(lua_State *L, int level, int *size)
26{ 26{
27 cTValue *frame, *nextframe, *bot = tvref(L->stack); 27 cTValue *frame, *nextframe, *bot = tvref(L->stack)+LJ_FR2;
28 /* Traverse frames backwards. */ 28 /* Traverse frames backwards. */
29 for (nextframe = frame = L->base-1; frame > bot; ) { 29 for (nextframe = frame = L->base-1; frame > bot; ) {
30 if (frame_gc(frame) == obj2gco(L)) 30 if (frame_gc(frame) == obj2gco(L))
31 level++; /* Skip dummy frames. See lj_meta_call(). */ 31 level++; /* Skip dummy frames. See lj_err_optype_call(). */
32 if (level-- == 0) { 32 if (level-- == 0) {
33 *size = (int)(nextframe - frame); 33 *size = (int)(nextframe - frame);
34 return frame; /* Level found. */ 34 return frame; /* Level found. */
@@ -55,7 +55,8 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
55 const BCIns *ins; 55 const BCIns *ins;
56 GCproto *pt; 56 GCproto *pt;
57 BCPos pos; 57 BCPos pos;
58 lua_assert(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD); 58 lj_assertL(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD,
59 "function or frame expected");
59 if (!isluafunc(fn)) { /* Cannot derive a PC for non-Lua functions. */ 60 if (!isluafunc(fn)) { /* Cannot derive a PC for non-Lua functions. */
60 return NO_BCPOS; 61 return NO_BCPOS;
61 } else if (nextframe == NULL) { /* Lua function on top. */ 62 } else if (nextframe == NULL) { /* Lua function on top. */
@@ -88,8 +89,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
88 if (frame_islua(f)) { 89 if (frame_islua(f)) {
89 f = frame_prevl(f); 90 f = frame_prevl(f);
90 } else { 91 } else {
91 if (frame_isc(f) || (LJ_HASFFI && frame_iscont(f) && 92 if (frame_isc(f) || (frame_iscont(f) && frame_iscont_fficb(f)))
92 (f-1)->u32.lo == LJ_CONT_FFI_CALLBACK))
93 cf = cframe_raw(cframe_prev(cf)); 93 cf = cframe_raw(cframe_prev(cf));
94 f = frame_prevd(f); 94 f = frame_prevd(f);
95 } 95 }
@@ -106,7 +106,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
106 GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins)); 106 GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins));
107 pos = proto_bcpos(pt, mref(T->startpc, const BCIns)); 107 pos = proto_bcpos(pt, mref(T->startpc, const BCIns));
108 } else { 108 } else {
109 pos = NO_BCPOS; /* Punt in case of stack overflow. */ 109 pos = NO_BCPOS; /* Punt in case of stack overflow for stitched trace. */
110 } 110 }
111 } 111 }
112#endif 112#endif
@@ -139,7 +139,7 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe)
139 BCPos pc = debug_framepc(L, fn, nextframe); 139 BCPos pc = debug_framepc(L, fn, nextframe);
140 if (pc != NO_BCPOS) { 140 if (pc != NO_BCPOS) {
141 GCproto *pt = funcproto(fn); 141 GCproto *pt = funcproto(fn);
142 lua_assert(pc <= pt->sizebc); 142 lj_assertL(pc <= pt->sizebc, "PC out of range");
143 return lj_debug_line(pt, pc); 143 return lj_debug_line(pt, pc);
144 } 144 }
145 return -1; 145 return -1;
@@ -147,38 +147,25 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe)
147 147
148/* -- Variable names ------------------------------------------------------ */ 148/* -- Variable names ------------------------------------------------------ */
149 149
150/* Read ULEB128 value. */
151static uint32_t debug_read_uleb128(const uint8_t **pp)
152{
153 const uint8_t *p = *pp;
154 uint32_t v = *p++;
155 if (LJ_UNLIKELY(v >= 0x80)) {
156 int sh = 0;
157 v &= 0x7f;
158 do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
159 }
160 *pp = p;
161 return v;
162}
163
164/* Get name of a local variable from slot number and PC. */ 150/* Get name of a local variable from slot number and PC. */
165static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot) 151static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot)
166{ 152{
167 const uint8_t *p = proto_varinfo(pt); 153 const char *p = (const char *)proto_varinfo(pt);
168 if (p) { 154 if (p) {
169 BCPos lastpc = 0; 155 BCPos lastpc = 0;
170 for (;;) { 156 for (;;) {
171 const char *name = (const char *)p; 157 const char *name = p;
172 uint32_t vn = *p++; 158 uint32_t vn = *(const uint8_t *)p;
173 BCPos startpc, endpc; 159 BCPos startpc, endpc;
174 if (vn < VARNAME__MAX) { 160 if (vn < VARNAME__MAX) {
175 if (vn == VARNAME_END) break; /* End of varinfo. */ 161 if (vn == VARNAME_END) break; /* End of varinfo. */
176 } else { 162 } else {
177 while (*p++) ; /* Skip over variable name string. */ 163 do { p++; } while (*(const uint8_t *)p); /* Skip over variable name. */
178 } 164 }
179 lastpc = startpc = lastpc + debug_read_uleb128(&p); 165 p++;
166 lastpc = startpc = lastpc + lj_buf_ruleb128(&p);
180 if (startpc > pc) break; 167 if (startpc > pc) break;
181 endpc = startpc + debug_read_uleb128(&p); 168 endpc = startpc + lj_buf_ruleb128(&p);
182 if (pc < endpc && slot-- == 0) { 169 if (pc < endpc && slot-- == 0) {
183 if (vn < VARNAME__MAX) { 170 if (vn < VARNAME__MAX) {
184#define VARNAMESTR(name, str) str "\0" 171#define VARNAMESTR(name, str) str "\0"
@@ -203,7 +190,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
203 TValue *nextframe = size ? frame + size : NULL; 190 TValue *nextframe = size ? frame + size : NULL;
204 GCfunc *fn = frame_func(frame); 191 GCfunc *fn = frame_func(frame);
205 BCPos pc = debug_framepc(L, fn, nextframe); 192 BCPos pc = debug_framepc(L, fn, nextframe);
206 if (!nextframe) nextframe = L->top; 193 if (!nextframe) nextframe = L->top+LJ_FR2;
207 if ((int)slot1 < 0) { /* Negative slot number is for varargs. */ 194 if ((int)slot1 < 0) { /* Negative slot number is for varargs. */
208 if (pc != NO_BCPOS) { 195 if (pc != NO_BCPOS) {
209 GCproto *pt = funcproto(fn); 196 GCproto *pt = funcproto(fn);
@@ -213,7 +200,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
213 nextframe = frame; 200 nextframe = frame;
214 frame = frame_prevd(frame); 201 frame = frame_prevd(frame);
215 } 202 }
216 if (frame + slot1 < nextframe) { 203 if (frame + slot1+LJ_FR2 < nextframe) {
217 *name = "(*vararg)"; 204 *name = "(*vararg)";
218 return frame+slot1; 205 return frame+slot1;
219 } 206 }
@@ -224,7 +211,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
224 if (pc != NO_BCPOS && 211 if (pc != NO_BCPOS &&
225 (*name = debug_varname(funcproto(fn), pc, slot1-1)) != NULL) 212 (*name = debug_varname(funcproto(fn), pc, slot1-1)) != NULL)
226 ; 213 ;
227 else if (slot1 > 0 && frame + slot1 < nextframe) 214 else if (slot1 > 0 && frame + slot1+LJ_FR2 < nextframe)
228 *name = "(*temporary)"; 215 *name = "(*temporary)";
229 return frame+slot1; 216 return frame+slot1;
230} 217}
@@ -233,7 +220,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
233const char *lj_debug_uvname(GCproto *pt, uint32_t idx) 220const char *lj_debug_uvname(GCproto *pt, uint32_t idx)
234{ 221{
235 const uint8_t *p = proto_uvinfo(pt); 222 const uint8_t *p = proto_uvinfo(pt);
236 lua_assert(idx < pt->sizeuv); 223 lj_assertX(idx < pt->sizeuv, "bad upvalue index");
237 if (!p) return ""; 224 if (!p) return "";
238 if (idx) while (*p++ || --idx) ; 225 if (idx) while (*p++ || --idx) ;
239 return (const char *)p; 226 return (const char *)p;
@@ -290,7 +277,7 @@ restart:
290 *name = strdata(gco2str(proto_kgc(pt, ~(ptrdiff_t)bc_c(ins)))); 277 *name = strdata(gco2str(proto_kgc(pt, ~(ptrdiff_t)bc_c(ins))));
291 if (ip > proto_bc(pt)) { 278 if (ip > proto_bc(pt)) {
292 BCIns insp = ip[-1]; 279 BCIns insp = ip[-1];
293 if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1 && 280 if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1+LJ_FR2 &&
294 bc_d(insp) == bc_b(ins)) 281 bc_d(insp) == bc_b(ins))
295 return "method"; 282 return "method";
296 } 283 }
@@ -307,12 +294,12 @@ restart:
307} 294}
308 295
309/* Deduce function name from caller of a frame. */ 296/* Deduce function name from caller of a frame. */
310const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name) 297const char *lj_debug_funcname(lua_State *L, cTValue *frame, const char **name)
311{ 298{
312 TValue *pframe; 299 cTValue *pframe;
313 GCfunc *fn; 300 GCfunc *fn;
314 BCPos pc; 301 BCPos pc;
315 if (frame <= tvref(L->stack)) 302 if (frame <= tvref(L->stack)+LJ_FR2)
316 return NULL; 303 return NULL;
317 if (frame_isvarg(frame)) 304 if (frame_isvarg(frame))
318 frame = frame_prevd(frame); 305 frame = frame_prevd(frame);
@@ -338,7 +325,7 @@ const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name)
338/* -- Source code locations ----------------------------------------------- */ 325/* -- Source code locations ----------------------------------------------- */
339 326
340/* Generate shortened source name. */ 327/* Generate shortened source name. */
341void lj_debug_shortname(char *out, GCstr *str) 328void lj_debug_shortname(char *out, GCstr *str, BCLine line)
342{ 329{
343 const char *src = strdata(str); 330 const char *src = strdata(str);
344 if (*src == '=') { 331 if (*src == '=') {
@@ -352,11 +339,11 @@ void lj_debug_shortname(char *out, GCstr *str)
352 *out++ = '.'; *out++ = '.'; *out++ = '.'; 339 *out++ = '.'; *out++ = '.'; *out++ = '.';
353 } 340 }
354 strcpy(out, src); 341 strcpy(out, src);
355 } else { /* Output [string "string"]. */ 342 } else { /* Output [string "string"] or [builtin:name]. */
356 size_t len; /* Length, up to first control char. */ 343 size_t len; /* Length, up to first control char. */
357 for (len = 0; len < LUA_IDSIZE-12; len++) 344 for (len = 0; len < LUA_IDSIZE-12; len++)
358 if (((const unsigned char *)src)[len] < ' ') break; 345 if (((const unsigned char *)src)[len] < ' ') break;
359 strcpy(out, "[string \""); out += 9; 346 strcpy(out, line == ~(BCLine)0 ? "[builtin:" : "[string \""); out += 9;
360 if (src[len] != '\0') { /* Must truncate? */ 347 if (src[len] != '\0') { /* Must truncate? */
361 if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15; 348 if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15;
362 strncpy(out, src, len); out += len; 349 strncpy(out, src, len); out += len;
@@ -364,7 +351,7 @@ void lj_debug_shortname(char *out, GCstr *str)
364 } else { 351 } else {
365 strcpy(out, src); out += len; 352 strcpy(out, src); out += len;
366 } 353 }
367 strcpy(out, "\"]"); 354 strcpy(out, line == ~(BCLine)0 ? "]" : "\"]");
368 } 355 }
369} 356}
370 357
@@ -377,14 +364,15 @@ void lj_debug_addloc(lua_State *L, const char *msg,
377 if (isluafunc(fn)) { 364 if (isluafunc(fn)) {
378 BCLine line = debug_frameline(L, fn, nextframe); 365 BCLine line = debug_frameline(L, fn, nextframe);
379 if (line >= 0) { 366 if (line >= 0) {
367 GCproto *pt = funcproto(fn);
380 char buf[LUA_IDSIZE]; 368 char buf[LUA_IDSIZE];
381 lj_debug_shortname(buf, proto_chunkname(funcproto(fn))); 369 lj_debug_shortname(buf, proto_chunkname(pt), pt->firstline);
382 lj_str_pushf(L, "%s:%d: %s", buf, line, msg); 370 lj_strfmt_pushf(L, "%s:%d: %s", buf, line, msg);
383 return; 371 return;
384 } 372 }
385 } 373 }
386 } 374 }
387 lj_str_pushf(L, "%s", msg); 375 lj_strfmt_pushf(L, "%s", msg);
388} 376}
389 377
390/* Push location string for a bytecode position to Lua stack. */ 378/* Push location string for a bytecode position to Lua stack. */
@@ -394,20 +382,22 @@ void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc)
394 const char *s = strdata(name); 382 const char *s = strdata(name);
395 MSize i, len = name->len; 383 MSize i, len = name->len;
396 BCLine line = lj_debug_line(pt, pc); 384 BCLine line = lj_debug_line(pt, pc);
397 if (*s == '@') { 385 if (pt->firstline == ~(BCLine)0) {
386 lj_strfmt_pushf(L, "builtin:%s", s);
387 } else if (*s == '@') {
398 s++; len--; 388 s++; len--;
399 for (i = len; i > 0; i--) 389 for (i = len; i > 0; i--)
400 if (s[i] == '/' || s[i] == '\\') { 390 if (s[i] == '/' || s[i] == '\\') {
401 s += i+1; 391 s += i+1;
402 break; 392 break;
403 } 393 }
404 lj_str_pushf(L, "%s:%d", s, line); 394 lj_strfmt_pushf(L, "%s:%d", s, line);
405 } else if (len > 40) { 395 } else if (len > 40) {
406 lj_str_pushf(L, "%p:%d", pt, line); 396 lj_strfmt_pushf(L, "%p:%d", pt, line);
407 } else if (*s == '=') { 397 } else if (*s == '=') {
408 lj_str_pushf(L, "%s:%d", s+1, line); 398 lj_strfmt_pushf(L, "%s:%d", s+1, line);
409 } else { 399 } else {
410 lj_str_pushf(L, "\"%s\":%d", s, line); 400 lj_strfmt_pushf(L, "\"%s\":%d", s, line);
411 } 401 }
412} 402}
413 403
@@ -455,13 +445,14 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext)
455 } else { 445 } else {
456 uint32_t offset = (uint32_t)ar->i_ci & 0xffff; 446 uint32_t offset = (uint32_t)ar->i_ci & 0xffff;
457 uint32_t size = (uint32_t)ar->i_ci >> 16; 447 uint32_t size = (uint32_t)ar->i_ci >> 16;
458 lua_assert(offset != 0); 448 lj_assertL(offset != 0, "bad frame offset");
459 frame = tvref(L->stack) + offset; 449 frame = tvref(L->stack) + offset;
460 if (size) nextframe = frame + size; 450 if (size) nextframe = frame + size;
461 lua_assert(frame <= tvref(L->maxstack) && 451 lj_assertL(frame <= tvref(L->maxstack) &&
462 (!nextframe || nextframe <= tvref(L->maxstack))); 452 (!nextframe || nextframe <= tvref(L->maxstack)),
453 "broken frame chain");
463 fn = frame_func(frame); 454 fn = frame_func(frame);
464 lua_assert(fn->c.gct == ~LJ_TFUNC); 455 lj_assertL(fn->c.gct == ~LJ_TFUNC, "bad frame function");
465 } 456 }
466 for (; *what; what++) { 457 for (; *what; what++) {
467 if (*what == 'S') { 458 if (*what == 'S') {
@@ -470,7 +461,7 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext)
470 BCLine firstline = pt->firstline; 461 BCLine firstline = pt->firstline;
471 GCstr *name = proto_chunkname(pt); 462 GCstr *name = proto_chunkname(pt);
472 ar->source = strdata(name); 463 ar->source = strdata(name);
473 lj_debug_shortname(ar->short_src, name); 464 lj_debug_shortname(ar->short_src, name, pt->firstline);
474 ar->linedefined = (int)firstline; 465 ar->linedefined = (int)firstline;
475 ar->lastlinedefined = (int)(firstline + pt->numline); 466 ar->lastlinedefined = (int)(firstline + pt->numline);
476 ar->what = (firstline || !pt->numline) ? "Lua" : "main"; 467 ar->what = (firstline || !pt->numline) ? "Lua" : "main";
@@ -560,6 +551,111 @@ LUA_API int lua_getstack(lua_State *L, int level, lua_Debug *ar)
560 } 551 }
561} 552}
562 553
554#if LJ_HASPROFILE
555/* Put the chunkname into a buffer. */
556static int debug_putchunkname(SBuf *sb, GCproto *pt, int pathstrip)
557{
558 GCstr *name = proto_chunkname(pt);
559 const char *p = strdata(name);
560 if (pt->firstline == ~(BCLine)0) {
561 lj_buf_putmem(sb, "[builtin:", 9);
562 lj_buf_putstr(sb, name);
563 lj_buf_putb(sb, ']');
564 return 0;
565 }
566 if (*p == '=' || *p == '@') {
567 MSize len = name->len-1;
568 p++;
569 if (pathstrip) {
570 int i;
571 for (i = len-1; i >= 0; i--)
572 if (p[i] == '/' || p[i] == '\\') {
573 len -= i+1;
574 p = p+i+1;
575 break;
576 }
577 }
578 lj_buf_putmem(sb, p, len);
579 } else {
580 lj_buf_putmem(sb, "[string]", 8);
581 }
582 return 1;
583}
584
585/* Put a compact stack dump into a buffer. */
586void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int depth)
587{
588 int level = 0, dir = 1, pathstrip = 1;
589 MSize lastlen = 0;
590 if (depth < 0) { level = ~depth; depth = dir = -1; } /* Reverse frames. */
591 while (level != depth) { /* Loop through all frame. */
592 int size;
593 cTValue *frame = lj_debug_frame(L, level, &size);
594 if (frame) {
595 cTValue *nextframe = size ? frame+size : NULL;
596 GCfunc *fn = frame_func(frame);
597 const uint8_t *p = (const uint8_t *)fmt;
598 int c;
599 while ((c = *p++)) {
600 switch (c) {
601 case 'p': /* Preserve full path. */
602 pathstrip = 0;
603 break;
604 case 'F': case 'f': { /* Dump function name. */
605 const char *name;
606 const char *what = lj_debug_funcname(L, frame, &name);
607 if (what) {
608 if (c == 'F' && isluafunc(fn)) { /* Dump module:name for 'F'. */
609 GCproto *pt = funcproto(fn);
610 if (pt->firstline != ~(BCLine)0) { /* Not a bytecode builtin. */
611 debug_putchunkname(sb, pt, pathstrip);
612 lj_buf_putb(sb, ':');
613 }
614 }
615 lj_buf_putmem(sb, name, (MSize)strlen(name));
616 break;
617 } /* else: can't derive a name, dump module:line. */
618 }
619 /* fallthrough */
620 case 'l': /* Dump module:line. */
621 if (isluafunc(fn)) {
622 GCproto *pt = funcproto(fn);
623 if (debug_putchunkname(sb, pt, pathstrip)) {
624 /* Regular Lua function. */
625 BCLine line = c == 'l' ? debug_frameline(L, fn, nextframe) :
626 pt->firstline;
627 lj_buf_putb(sb, ':');
628 lj_strfmt_putint(sb, line >= 0 ? line : pt->firstline);
629 }
630 } else if (isffunc(fn)) { /* Dump numbered builtins. */
631 lj_buf_putmem(sb, "[builtin#", 9);
632 lj_strfmt_putint(sb, fn->c.ffid);
633 lj_buf_putb(sb, ']');
634 } else { /* Dump C function address. */
635 lj_buf_putb(sb, '@');
636 lj_strfmt_putptr(sb, fn->c.f);
637 }
638 break;
639 case 'Z': /* Zap trailing separator. */
640 lastlen = sbuflen(sb);
641 break;
642 default:
643 lj_buf_putb(sb, c);
644 break;
645 }
646 }
647 } else if (dir == 1) {
648 break;
649 } else {
650 level -= size; /* Reverse frame order: quickly skip missing level. */
651 }
652 level += dir;
653 }
654 if (lastlen)
655 sb->w = sb->b + lastlen; /* Zap trailing separator. */
656}
657#endif
658
563/* Number of frames for the leading and trailing part of a traceback. */ 659/* Number of frames for the leading and trailing part of a traceback. */
564#define TRACEBACK_LEVELS1 12 660#define TRACEBACK_LEVELS1 12
565#define TRACEBACK_LEVELS2 10 661#define TRACEBACK_LEVELS2 10
diff --git a/src/lj_debug.h b/src/lj_debug.h
index 71f7c6a2..33ac29bc 100644
--- a/src/lj_debug.h
+++ b/src/lj_debug.h
@@ -33,14 +33,18 @@ LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp,
33 GCobj **op); 33 GCobj **op);
34LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc, 34LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc,
35 BCReg slot, const char **name); 35 BCReg slot, const char **name);
36LJ_FUNC const char *lj_debug_funcname(lua_State *L, TValue *frame, 36LJ_FUNC const char *lj_debug_funcname(lua_State *L, cTValue *frame,
37 const char **name); 37 const char **name);
38LJ_FUNC void lj_debug_shortname(char *out, GCstr *str); 38LJ_FUNC void lj_debug_shortname(char *out, GCstr *str, BCLine line);
39LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg, 39LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg,
40 cTValue *frame, cTValue *nextframe); 40 cTValue *frame, cTValue *nextframe);
41LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc); 41LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc);
42LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, 42LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar,
43 int ext); 43 int ext);
44#if LJ_HASPROFILE
45LJ_FUNC void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt,
46 int depth);
47#endif
44 48
45/* Fixed internal variable names. */ 49/* Fixed internal variable names. */
46#define VARNAMEDEF(_) \ 50#define VARNAMEDEF(_) \
diff --git a/src/lj_def.h b/src/lj_def.h
index a5e317cd..2a1d7b56 100644
--- a/src/lj_def.h
+++ b/src/lj_def.h
@@ -46,10 +46,14 @@ typedef unsigned int uintptr_t;
46#include <stdlib.h> 46#include <stdlib.h>
47 47
48/* Various VM limits. */ 48/* Various VM limits. */
49#define LJ_MAX_MEM 0x7fffff00 /* Max. total memory allocation. */ 49#define LJ_MAX_MEM32 0x7fffff00 /* Max. 32 bit memory allocation. */
50#define LJ_MAX_MEM64 ((uint64_t)1<<47) /* Max. 64 bit memory allocation. */
51/* Max. total memory allocation. */
52#define LJ_MAX_MEM (LJ_GC64 ? LJ_MAX_MEM64 : LJ_MAX_MEM32)
50#define LJ_MAX_ALLOC LJ_MAX_MEM /* Max. individual allocation length. */ 53#define LJ_MAX_ALLOC LJ_MAX_MEM /* Max. individual allocation length. */
51#define LJ_MAX_STR LJ_MAX_MEM /* Max. string length. */ 54#define LJ_MAX_STR LJ_MAX_MEM32 /* Max. string length. */
52#define LJ_MAX_UDATA LJ_MAX_MEM /* Max. userdata length. */ 55#define LJ_MAX_BUF LJ_MAX_MEM32 /* Max. buffer length. */
56#define LJ_MAX_UDATA LJ_MAX_MEM32 /* Max. userdata length. */
53 57
54#define LJ_MAX_STRTAB (1<<26) /* Max. string table size. */ 58#define LJ_MAX_STRTAB (1<<26) /* Max. string table size. */
55#define LJ_MAX_HBITS 26 /* Max. hash bits. */ 59#define LJ_MAX_HBITS 26 /* Max. hash bits. */
@@ -57,7 +61,7 @@ typedef unsigned int uintptr_t;
57#define LJ_MAX_ASIZE ((1<<(LJ_MAX_ABITS-1))+1) /* Max. array part size. */ 61#define LJ_MAX_ASIZE ((1<<(LJ_MAX_ABITS-1))+1) /* Max. array part size. */
58#define LJ_MAX_COLOSIZE 16 /* Max. elems for colocated array. */ 62#define LJ_MAX_COLOSIZE 16 /* Max. elems for colocated array. */
59 63
60#define LJ_MAX_LINE LJ_MAX_MEM /* Max. source code line number. */ 64#define LJ_MAX_LINE LJ_MAX_MEM32 /* Max. source code line number. */
61#define LJ_MAX_XLEVEL 200 /* Max. syntactic nesting level. */ 65#define LJ_MAX_XLEVEL 200 /* Max. syntactic nesting level. */
62#define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */ 66#define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */
63#define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */ 67#define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */
@@ -65,7 +69,7 @@ typedef unsigned int uintptr_t;
65#define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */ 69#define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */
66 70
67#define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */ 71#define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */
68#define LJ_STACK_EXTRA 5 /* Extra stack space (metamethods). */ 72#define LJ_STACK_EXTRA (5+3*LJ_FR2) /* Extra stack space (metamethods). */
69 73
70#define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */ 74#define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */
71 75
@@ -76,7 +80,6 @@ typedef unsigned int uintptr_t;
76#define LJ_MIN_SBUF 32 /* Min. string buffer length. */ 80#define LJ_MIN_SBUF 32 /* Min. string buffer length. */
77#define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */ 81#define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */
78#define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */ 82#define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */
79#define LJ_MIN_K64SZ 16 /* Min. size for chained K64Array. */
80 83
81/* JIT compiler limits. */ 84/* JIT compiler limits. */
82#define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */ 85#define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */
@@ -91,6 +94,9 @@ typedef unsigned int uintptr_t;
91#define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo) 94#define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo)
92#define i32ptr(p) ((int32_t)(intptr_t)(void *)(p)) 95#define i32ptr(p) ((int32_t)(intptr_t)(void *)(p))
93#define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p)) 96#define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p))
97#define i64ptr(p) ((int64_t)(intptr_t)(void *)(p))
98#define u64ptr(p) ((uint64_t)(intptr_t)(void *)(p))
99#define igcptr(p) (LJ_GC64 ? i64ptr(p) : i32ptr(p))
94 100
95#define checki8(x) ((x) == (int32_t)(int8_t)(x)) 101#define checki8(x) ((x) == (int32_t)(int8_t)(x))
96#define checku8(x) ((x) == (int32_t)(uint8_t)(x)) 102#define checku8(x) ((x) == (int32_t)(uint8_t)(x))
@@ -98,7 +104,10 @@ typedef unsigned int uintptr_t;
98#define checku16(x) ((x) == (int32_t)(uint16_t)(x)) 104#define checku16(x) ((x) == (int32_t)(uint16_t)(x))
99#define checki32(x) ((x) == (int32_t)(x)) 105#define checki32(x) ((x) == (int32_t)(x))
100#define checku32(x) ((x) == (uint32_t)(x)) 106#define checku32(x) ((x) == (uint32_t)(x))
107#define checkptr31(x) (((uint64_t)(uintptr_t)(x) >> 31) == 0)
101#define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x)) 108#define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x))
109#define checkptr47(x) (((uint64_t)(uintptr_t)(x) >> 47) == 0)
110#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr31((x)) :1)
102 111
103/* Every half-decent C compiler transforms this into a rotate instruction. */ 112/* Every half-decent C compiler transforms this into a rotate instruction. */
104#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1)))) 113#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
@@ -111,7 +120,7 @@ typedef uintptr_t BloomFilter;
111#define bloomset(b, x) ((b) |= bloombit((x))) 120#define bloomset(b, x) ((b) |= bloombit((x)))
112#define bloomtest(b, x) ((b) & bloombit((x))) 121#define bloomtest(b, x) ((b) & bloombit((x)))
113 122
114#if defined(__GNUC__) || defined(__psp2__) 123#if defined(__GNUC__) || defined(__clang__) || defined(__psp2__)
115 124
116#define LJ_NORET __attribute__((noreturn)) 125#define LJ_NORET __attribute__((noreturn))
117#define LJ_ALIGN(n) __attribute__((aligned(n))) 126#define LJ_ALIGN(n) __attribute__((aligned(n)))
@@ -137,15 +146,9 @@ typedef uintptr_t BloomFilter;
137#define LJ_UNLIKELY(x) __builtin_expect(!!(x), 0) 146#define LJ_UNLIKELY(x) __builtin_expect(!!(x), 0)
138 147
139#define lj_ffs(x) ((uint32_t)__builtin_ctz(x)) 148#define lj_ffs(x) ((uint32_t)__builtin_ctz(x))
140/* Don't ask ... */
141#if defined(__INTEL_COMPILER) && (defined(__i386__) || defined(__x86_64__))
142static LJ_AINLINE uint32_t lj_fls(uint32_t x)
143{
144 uint32_t r; __asm__("bsrl %1, %0" : "=r" (r) : "rm" (x) : "cc"); return r;
145}
146#else
147#define lj_fls(x) ((uint32_t)(__builtin_clz(x)^31)) 149#define lj_fls(x) ((uint32_t)(__builtin_clz(x)^31))
148#endif 150#define lj_ffs64(x) ((uint32_t)__builtin_ctzll(x))
151#define lj_fls64(x) ((uint32_t)(__builtin_clzll(x)^63))
149 152
150#if defined(__arm__) 153#if defined(__arm__)
151static LJ_AINLINE uint32_t lj_bswap(uint32_t x) 154static LJ_AINLINE uint32_t lj_bswap(uint32_t x)
@@ -173,7 +176,7 @@ static LJ_AINLINE uint64_t lj_bswap64(uint64_t x)
173{ 176{
174 return ((uint64_t)lj_bswap((uint32_t)x)<<32) | lj_bswap((uint32_t)(x>>32)); 177 return ((uint64_t)lj_bswap((uint32_t)x)<<32) | lj_bswap((uint32_t)(x>>32));
175} 178}
176#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) 179#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __clang__
177static LJ_AINLINE uint32_t lj_bswap(uint32_t x) 180static LJ_AINLINE uint32_t lj_bswap(uint32_t x)
178{ 181{
179 return (uint32_t)__builtin_bswap32((int32_t)x); 182 return (uint32_t)__builtin_bswap32((int32_t)x);
@@ -268,6 +271,23 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x)
268{ 271{
269 unsigned long r; _BitScanReverse(&r, x); return (uint32_t)r; 272 unsigned long r; _BitScanReverse(&r, x); return (uint32_t)r;
270} 273}
274
275#if defined(_M_X64) || defined(_M_ARM64)
276unsigned char _BitScanForward64(unsigned long *, uint64_t);
277unsigned char _BitScanReverse64(unsigned long *, uint64_t);
278#pragma intrinsic(_BitScanForward64)
279#pragma intrinsic(_BitScanReverse64)
280
281static LJ_AINLINE uint32_t lj_ffs64(uint64_t x)
282{
283 unsigned long r; _BitScanForward64(&r, x); return (uint32_t)r;
284}
285
286static LJ_AINLINE uint32_t lj_fls64(uint64_t x)
287{
288 unsigned long r; _BitScanReverse64(&r, x); return (uint32_t)r;
289}
290#endif
271#endif 291#endif
272 292
273unsigned long _byteswap_ulong(unsigned long); 293unsigned long _byteswap_ulong(unsigned long);
@@ -329,14 +349,28 @@ static LJ_AINLINE uint32_t lj_getu32(const void *v)
329#define LJ_FUNCA_NORET LJ_FUNCA LJ_NORET 349#define LJ_FUNCA_NORET LJ_FUNCA LJ_NORET
330#define LJ_ASMF_NORET LJ_ASMF LJ_NORET 350#define LJ_ASMF_NORET LJ_ASMF LJ_NORET
331 351
332/* Runtime assertions. */ 352/* Internal assertions. */
333#ifdef lua_assert 353#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK)
334#define check_exp(c, e) (lua_assert(c), (e)) 354#define lj_assert_check(g, c, ...) \
335#define api_check(l, e) lua_assert(e) 355 ((c) ? (void)0 : \
356 (lj_assert_fail((g), __FILE__, __LINE__, __func__, __VA_ARGS__), 0))
357#define lj_checkapi(c, ...) lj_assert_check(G(L), (c), __VA_ARGS__)
336#else 358#else
337#define lua_assert(c) ((void)0) 359#define lj_checkapi(c, ...) ((void)L)
360#endif
361
362#ifdef LUA_USE_ASSERT
363#define lj_assertG_(g, c, ...) lj_assert_check((g), (c), __VA_ARGS__)
364#define lj_assertG(c, ...) lj_assert_check(g, (c), __VA_ARGS__)
365#define lj_assertL(c, ...) lj_assert_check(G(L), (c), __VA_ARGS__)
366#define lj_assertX(c, ...) lj_assert_check(NULL, (c), __VA_ARGS__)
367#define check_exp(c, e) (lj_assertX((c), #c), (e))
368#else
369#define lj_assertG_(g, c, ...) ((void)0)
370#define lj_assertG(c, ...) ((void)g)
371#define lj_assertL(c, ...) ((void)L)
372#define lj_assertX(c, ...) ((void)0)
338#define check_exp(c, e) (e) 373#define check_exp(c, e) (e)
339#define api_check luai_apicheck
340#endif 374#endif
341 375
342/* Static assertions. */ 376/* Static assertions. */
@@ -350,4 +384,9 @@ static LJ_AINLINE uint32_t lj_getu32(const void *v)
350 extern void LJ_ASSERT_NAME(__LINE__)(int STATIC_ASSERTION_FAILED[(cond)?1:-1]) 384 extern void LJ_ASSERT_NAME(__LINE__)(int STATIC_ASSERTION_FAILED[(cond)?1:-1])
351#endif 385#endif
352 386
387/* PRNG state. Need this here, details in lj_prng.h. */
388typedef struct PRNGState {
389 uint64_t u[4];
390} PRNGState;
391
353#endif 392#endif
diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c
index 63e09752..b9748bba 100644
--- a/src/lj_dispatch.c
+++ b/src/lj_dispatch.c
@@ -8,6 +8,7 @@
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_err.h" 10#include "lj_err.h"
11#include "lj_buf.h"
11#include "lj_func.h" 12#include "lj_func.h"
12#include "lj_str.h" 13#include "lj_str.h"
13#include "lj_tab.h" 14#include "lj_tab.h"
@@ -17,6 +18,7 @@
17#include "lj_frame.h" 18#include "lj_frame.h"
18#include "lj_bc.h" 19#include "lj_bc.h"
19#include "lj_ff.h" 20#include "lj_ff.h"
21#include "lj_strfmt.h"
20#if LJ_HASJIT 22#if LJ_HASJIT
21#include "lj_jit.h" 23#include "lj_jit.h"
22#endif 24#endif
@@ -25,6 +27,9 @@
25#endif 27#endif
26#include "lj_trace.h" 28#include "lj_trace.h"
27#include "lj_dispatch.h" 29#include "lj_dispatch.h"
30#if LJ_HASPROFILE
31#include "lj_profile.h"
32#endif
28#include "lj_vm.h" 33#include "lj_vm.h"
29#include "luajit.h" 34#include "luajit.h"
30 35
@@ -37,6 +42,12 @@ LJ_STATIC_ASSERT(GG_NUM_ASMFF == FF_NUM_ASMFUNC);
37#include <math.h> 42#include <math.h>
38LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, 43LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L,
39 lua_State *co); 44 lua_State *co);
45#if !LJ_HASJIT
46#define lj_dispatch_stitch lj_dispatch_ins
47#endif
48#if !LJ_HASPROFILE
49#define lj_dispatch_profile lj_dispatch_ins
50#endif
40 51
41#define GOTFUNC(name) (ASMFunction)name, 52#define GOTFUNC(name) (ASMFunction)name,
42static const ASMFunction dispatch_got[] = { 53static const ASMFunction dispatch_got[] = {
@@ -57,6 +68,8 @@ void lj_dispatch_init(GG_State *GG)
57 /* The JIT engine is off by default. luaopen_jit() turns it on. */ 68 /* The JIT engine is off by default. luaopen_jit() turns it on. */
58 disp[BC_FORL] = disp[BC_IFORL]; 69 disp[BC_FORL] = disp[BC_IFORL];
59 disp[BC_ITERL] = disp[BC_IITERL]; 70 disp[BC_ITERL] = disp[BC_IITERL];
71 /* Workaround for stable v2.1 bytecode. TODO: Replace with BC_IITERN. */
72 disp[BC_ITERN] = &lj_vm_IITERN;
60 disp[BC_LOOP] = disp[BC_ILOOP]; 73 disp[BC_LOOP] = disp[BC_ILOOP];
61 disp[BC_FUNCF] = disp[BC_IFUNCF]; 74 disp[BC_FUNCF] = disp[BC_IFUNCF];
62 disp[BC_FUNCV] = disp[BC_IFUNCV]; 75 disp[BC_FUNCV] = disp[BC_IFUNCV];
@@ -64,7 +77,7 @@ void lj_dispatch_init(GG_State *GG)
64 for (i = 0; i < GG_NUM_ASMFF; i++) 77 for (i = 0; i < GG_NUM_ASMFF; i++)
65 GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0); 78 GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0);
66#if LJ_TARGET_MIPS 79#if LJ_TARGET_MIPS
67 memcpy(GG->got, dispatch_got, LJ_GOT__MAX*4); 80 memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *));
68#endif 81#endif
69} 82}
70 83
@@ -82,11 +95,12 @@ void lj_dispatch_init_hotcount(global_State *g)
82#endif 95#endif
83 96
84/* Internal dispatch mode bits. */ 97/* Internal dispatch mode bits. */
85#define DISPMODE_JIT 0x01 /* JIT compiler on. */ 98#define DISPMODE_CALL 0x01 /* Override call dispatch. */
86#define DISPMODE_REC 0x02 /* Recording active. */ 99#define DISPMODE_RET 0x02 /* Override return dispatch. */
87#define DISPMODE_INS 0x04 /* Override instruction dispatch. */ 100#define DISPMODE_INS 0x04 /* Override instruction dispatch. */
88#define DISPMODE_CALL 0x08 /* Override call dispatch. */ 101#define DISPMODE_JIT 0x10 /* JIT compiler on. */
89#define DISPMODE_RET 0x10 /* Override return dispatch. */ 102#define DISPMODE_REC 0x20 /* Recording active. */
103#define DISPMODE_PROF 0x40 /* Profiling active. */
90 104
91/* Update dispatch table depending on various flags. */ 105/* Update dispatch table depending on various flags. */
92void lj_dispatch_update(global_State *g) 106void lj_dispatch_update(global_State *g)
@@ -98,24 +112,29 @@ void lj_dispatch_update(global_State *g)
98 mode |= G2J(g)->state != LJ_TRACE_IDLE ? 112 mode |= G2J(g)->state != LJ_TRACE_IDLE ?
99 (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0; 113 (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0;
100#endif 114#endif
115#if LJ_HASPROFILE
116 mode |= (g->hookmask & HOOK_PROFILE) ? (DISPMODE_PROF|DISPMODE_INS) : 0;
117#endif
101 mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0; 118 mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0;
102 mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0; 119 mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0;
103 mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0; 120 mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0;
104 if (oldmode != mode) { /* Mode changed? */ 121 if (oldmode != mode) { /* Mode changed? */
105 ASMFunction *disp = G2GG(g)->dispatch; 122 ASMFunction *disp = G2GG(g)->dispatch;
106 ASMFunction f_forl, f_iterl, f_loop, f_funcf, f_funcv; 123 ASMFunction f_forl, f_iterl, f_itern, f_loop, f_funcf, f_funcv;
107 g->dispatchmode = mode; 124 g->dispatchmode = mode;
108 125
109 /* Hotcount if JIT is on, but not while recording. */ 126 /* Hotcount if JIT is on, but not while recording. */
110 if ((mode & (DISPMODE_JIT|DISPMODE_REC)) == DISPMODE_JIT) { 127 if ((mode & (DISPMODE_JIT|DISPMODE_REC)) == DISPMODE_JIT) {
111 f_forl = makeasmfunc(lj_bc_ofs[BC_FORL]); 128 f_forl = makeasmfunc(lj_bc_ofs[BC_FORL]);
112 f_iterl = makeasmfunc(lj_bc_ofs[BC_ITERL]); 129 f_iterl = makeasmfunc(lj_bc_ofs[BC_ITERL]);
130 f_itern = makeasmfunc(lj_bc_ofs[BC_ITERN]);
113 f_loop = makeasmfunc(lj_bc_ofs[BC_LOOP]); 131 f_loop = makeasmfunc(lj_bc_ofs[BC_LOOP]);
114 f_funcf = makeasmfunc(lj_bc_ofs[BC_FUNCF]); 132 f_funcf = makeasmfunc(lj_bc_ofs[BC_FUNCF]);
115 f_funcv = makeasmfunc(lj_bc_ofs[BC_FUNCV]); 133 f_funcv = makeasmfunc(lj_bc_ofs[BC_FUNCV]);
116 } else { /* Otherwise use the non-hotcounting instructions. */ 134 } else { /* Otherwise use the non-hotcounting instructions. */
117 f_forl = disp[GG_LEN_DDISP+BC_IFORL]; 135 f_forl = disp[GG_LEN_DDISP+BC_IFORL];
118 f_iterl = disp[GG_LEN_DDISP+BC_IITERL]; 136 f_iterl = disp[GG_LEN_DDISP+BC_IITERL];
137 f_itern = &lj_vm_IITERN;
119 f_loop = disp[GG_LEN_DDISP+BC_ILOOP]; 138 f_loop = disp[GG_LEN_DDISP+BC_ILOOP];
120 f_funcf = makeasmfunc(lj_bc_ofs[BC_IFUNCF]); 139 f_funcf = makeasmfunc(lj_bc_ofs[BC_IFUNCF]);
121 f_funcv = makeasmfunc(lj_bc_ofs[BC_IFUNCV]); 140 f_funcv = makeasmfunc(lj_bc_ofs[BC_IFUNCV]);
@@ -123,12 +142,13 @@ void lj_dispatch_update(global_State *g)
123 /* Init static counting instruction dispatch first (may be copied below). */ 142 /* Init static counting instruction dispatch first (may be copied below). */
124 disp[GG_LEN_DDISP+BC_FORL] = f_forl; 143 disp[GG_LEN_DDISP+BC_FORL] = f_forl;
125 disp[GG_LEN_DDISP+BC_ITERL] = f_iterl; 144 disp[GG_LEN_DDISP+BC_ITERL] = f_iterl;
145 disp[GG_LEN_DDISP+BC_ITERN] = f_itern;
126 disp[GG_LEN_DDISP+BC_LOOP] = f_loop; 146 disp[GG_LEN_DDISP+BC_LOOP] = f_loop;
127 147
128 /* Set dynamic instruction dispatch. */ 148 /* Set dynamic instruction dispatch. */
129 if ((oldmode ^ mode) & (DISPMODE_REC|DISPMODE_INS)) { 149 if ((oldmode ^ mode) & (DISPMODE_PROF|DISPMODE_REC|DISPMODE_INS)) {
130 /* Need to update the whole table. */ 150 /* Need to update the whole table. */
131 if (!(mode & (DISPMODE_REC|DISPMODE_INS))) { /* No ins dispatch? */ 151 if (!(mode & DISPMODE_INS)) { /* No ins dispatch? */
132 /* Copy static dispatch table to dynamic dispatch table. */ 152 /* Copy static dispatch table to dynamic dispatch table. */
133 memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction)); 153 memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction));
134 /* Overwrite with dynamic return dispatch. */ 154 /* Overwrite with dynamic return dispatch. */
@@ -140,15 +160,17 @@ void lj_dispatch_update(global_State *g)
140 } 160 }
141 } else { 161 } else {
142 /* The recording dispatch also checks for hooks. */ 162 /* The recording dispatch also checks for hooks. */
143 ASMFunction f = (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook; 163 ASMFunction f = (mode & DISPMODE_PROF) ? lj_vm_profhook :
164 (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook;
144 uint32_t i; 165 uint32_t i;
145 for (i = 0; i < GG_LEN_SDISP; i++) 166 for (i = 0; i < GG_LEN_SDISP; i++)
146 disp[i] = f; 167 disp[i] = f;
147 } 168 }
148 } else if (!(mode & (DISPMODE_REC|DISPMODE_INS))) { 169 } else if (!(mode & DISPMODE_INS)) {
149 /* Otherwise set dynamic counting ins. */ 170 /* Otherwise set dynamic counting ins. */
150 disp[BC_FORL] = f_forl; 171 disp[BC_FORL] = f_forl;
151 disp[BC_ITERL] = f_iterl; 172 disp[BC_ITERL] = f_iterl;
173 disp[BC_ITERN] = f_itern;
152 disp[BC_LOOP] = f_loop; 174 disp[BC_LOOP] = f_loop;
153 /* Set dynamic return dispatch. */ 175 /* Set dynamic return dispatch. */
154 if ((mode & DISPMODE_RET)) { 176 if ((mode & DISPMODE_RET)) {
@@ -236,22 +258,15 @@ int luaJIT_setmode(lua_State *L, int idx, int mode)
236 } else { 258 } else {
237 if (!(mode & LUAJIT_MODE_ON)) 259 if (!(mode & LUAJIT_MODE_ON))
238 G2J(g)->flags &= ~(uint32_t)JIT_F_ON; 260 G2J(g)->flags &= ~(uint32_t)JIT_F_ON;
239#if LJ_TARGET_X86ORX64
240 else if ((G2J(g)->flags & JIT_F_SSE2))
241 G2J(g)->flags |= (uint32_t)JIT_F_ON;
242 else
243 return 0; /* Don't turn on JIT compiler without SSE2 support. */
244#else
245 else 261 else
246 G2J(g)->flags |= (uint32_t)JIT_F_ON; 262 G2J(g)->flags |= (uint32_t)JIT_F_ON;
247#endif
248 lj_dispatch_update(g); 263 lj_dispatch_update(g);
249 } 264 }
250 break; 265 break;
251 case LUAJIT_MODE_FUNC: 266 case LUAJIT_MODE_FUNC:
252 case LUAJIT_MODE_ALLFUNC: 267 case LUAJIT_MODE_ALLFUNC:
253 case LUAJIT_MODE_ALLSUBFUNC: { 268 case LUAJIT_MODE_ALLSUBFUNC: {
254 cTValue *tv = idx == 0 ? frame_prev(L->base-1) : 269 cTValue *tv = idx == 0 ? frame_prev(L->base-1)-LJ_FR2 :
255 idx > 0 ? L->base + (idx-1) : L->top + idx; 270 idx > 0 ? L->base + (idx-1) : L->top + idx;
256 GCproto *pt; 271 GCproto *pt;
257 if ((idx == 0 || tvisfunc(tv)) && isluafunc(&gcval(tv)->fn)) 272 if ((idx == 0 || tvisfunc(tv)) && isluafunc(&gcval(tv)->fn))
@@ -286,7 +301,7 @@ int luaJIT_setmode(lua_State *L, int idx, int mode)
286 if (idx != 0) { 301 if (idx != 0) {
287 cTValue *tv = idx > 0 ? L->base + (idx-1) : L->top + idx; 302 cTValue *tv = idx > 0 ? L->base + (idx-1) : L->top + idx;
288 if (tvislightud(tv)) 303 if (tvislightud(tv))
289 g->wrapf = (lua_CFunction)lightudV(tv); 304 g->wrapf = (lua_CFunction)lightudV(g, tv);
290 else 305 else
291 return 0; /* Failed. */ 306 return 0; /* Failed. */
292 } else { 307 } else {
@@ -352,10 +367,19 @@ static void callhook(lua_State *L, int event, BCLine line)
352 /* Top frame, nextframe = NULL. */ 367 /* Top frame, nextframe = NULL. */
353 ar.i_ci = (int)((L->base-1) - tvref(L->stack)); 368 ar.i_ci = (int)((L->base-1) - tvref(L->stack));
354 lj_state_checkstack(L, 1+LUA_MINSTACK); 369 lj_state_checkstack(L, 1+LUA_MINSTACK);
370#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF
371 lj_profile_hook_enter(g);
372#else
355 hook_enter(g); 373 hook_enter(g);
374#endif
356 hookf(L, &ar); 375 hookf(L, &ar);
357 lua_assert(hook_active(g)); 376 lj_assertG(hook_active(g), "active hook flag removed");
377 setgcref(g->cur_L, obj2gco(L));
378#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF
379 lj_profile_hook_leave(g);
380#else
358 hook_leave(g); 381 hook_leave(g);
382#endif
359 } 383 }
360} 384}
361 385
@@ -368,7 +392,7 @@ static BCReg cur_topslot(GCproto *pt, const BCIns *pc, uint32_t nres)
368 if (bc_op(ins) == BC_UCLO) 392 if (bc_op(ins) == BC_UCLO)
369 ins = pc[bc_j(ins)]; 393 ins = pc[bc_j(ins)];
370 switch (bc_op(ins)) { 394 switch (bc_op(ins)) {
371 case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1; 395 case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1+LJ_FR2;
372 case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1; 396 case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1;
373 case BC_TSETM: return bc_a(ins) + nres-1; 397 case BC_TSETM: return bc_a(ins) + nres-1;
374 default: return pt->framesize; 398 default: return pt->framesize;
@@ -397,7 +421,8 @@ void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc)
397#endif 421#endif
398 J->L = L; 422 J->L = L;
399 lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */ 423 lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */
400 lua_assert(L->top - L->base == delta); 424 lj_assertG(L->top - L->base == delta,
425 "unbalanced stack after tracing of instruction");
401 } 426 }
402 } 427 }
403#endif 428#endif
@@ -428,7 +453,7 @@ static int call_init(lua_State *L, GCfunc *fn)
428 int numparams = pt->numparams; 453 int numparams = pt->numparams;
429 int gotparams = (int)(L->top - L->base); 454 int gotparams = (int)(L->top - L->base);
430 int need = pt->framesize; 455 int need = pt->framesize;
431 if ((pt->flags & PROTO_VARARG)) need += 1+gotparams; 456 if ((pt->flags & PROTO_VARARG)) need += 1+LJ_FR2+gotparams;
432 lj_state_checkstack(L, (MSize)need); 457 lj_state_checkstack(L, (MSize)need);
433 numparams -= gotparams; 458 numparams -= gotparams;
434 return numparams >= 0 ? numparams : 0; 459 return numparams >= 0 ? numparams : 0;
@@ -457,7 +482,8 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc)
457#endif 482#endif
458 pc = (const BCIns *)((uintptr_t)pc & ~(uintptr_t)1); 483 pc = (const BCIns *)((uintptr_t)pc & ~(uintptr_t)1);
459 lj_trace_hot(J, pc); 484 lj_trace_hot(J, pc);
460 lua_assert(L->top - L->base == delta); 485 lj_assertG(L->top - L->base == delta,
486 "unbalanced stack after hot call");
461 goto out; 487 goto out;
462 } else if (J->state != LJ_TRACE_IDLE && 488 } else if (J->state != LJ_TRACE_IDLE &&
463 !(g->hookmask & (HOOK_GC|HOOK_VMEVENT))) { 489 !(g->hookmask & (HOOK_GC|HOOK_VMEVENT))) {
@@ -466,7 +492,8 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc)
466#endif 492#endif
467 /* Record the FUNC* bytecodes, too. */ 493 /* Record the FUNC* bytecodes, too. */
468 lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */ 494 lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */
469 lua_assert(L->top - L->base == delta); 495 lj_assertG(L->top - L->base == delta,
496 "unbalanced stack after hot instruction");
470 } 497 }
471#endif 498#endif
472 if ((g->hookmask & LUA_MASKCALL)) { 499 if ((g->hookmask & LUA_MASKCALL)) {
@@ -492,3 +519,41 @@ out:
492 return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */ 519 return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */
493} 520}
494 521
522#if LJ_HASJIT
523/* Stitch a new trace. */
524void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc)
525{
526 ERRNO_SAVE
527 lua_State *L = J->L;
528 void *cf = cframe_raw(L->cframe);
529 const BCIns *oldpc = cframe_pc(cf);
530 setcframe_pc(cf, pc);
531 /* Before dispatch, have to bias PC by 1. */
532 L->top = L->base + cur_topslot(curr_proto(L), pc+1, cframe_multres_n(cf));
533 lj_trace_stitch(J, pc-1); /* Point to the CALL instruction. */
534 setcframe_pc(cf, oldpc);
535 ERRNO_RESTORE
536}
537#endif
538
539#if LJ_HASPROFILE
540/* Profile dispatch. */
541void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc)
542{
543 ERRNO_SAVE
544 GCfunc *fn = curr_func(L);
545 GCproto *pt = funcproto(fn);
546 void *cf = cframe_raw(L->cframe);
547 const BCIns *oldpc = cframe_pc(cf);
548 global_State *g;
549 setcframe_pc(cf, pc);
550 L->top = L->base + cur_topslot(pt, pc, cframe_multres_n(cf));
551 lj_profile_interpreter(L);
552 setcframe_pc(cf, oldpc);
553 g = G(L);
554 setgcref(g->cur_L, obj2gco(L));
555 setvmstate(g, INTERP);
556 ERRNO_RESTORE
557}
558#endif
559
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
index b26c6b94..774bc4dc 100644
--- a/src/lj_dispatch.h
+++ b/src/lj_dispatch.h
@@ -14,8 +14,24 @@
14 14
15#if LJ_TARGET_MIPS 15#if LJ_TARGET_MIPS
16/* Need our own global offset table for the dreaded MIPS calling conventions. */ 16/* Need our own global offset table for the dreaded MIPS calling conventions. */
17
18#ifndef _LJ_VM_H
19LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b);
20#endif
21
22#if LJ_SOFTFP
23#ifndef _LJ_IRCALL_H
24extern double __adddf3(double a, double b);
25extern double __subdf3(double a, double b);
26extern double __muldf3(double a, double b);
27extern double __divdf3(double a, double b);
28#endif
29#define SFGOTDEF(_) _(sqrt) _(__adddf3) _(__subdf3) _(__muldf3) _(__divdf3)
30#else
31#define SFGOTDEF(_)
32#endif
17#if LJ_HASJIT 33#if LJ_HASJIT
18#define JITGOTDEF(_) _(lj_trace_exit) _(lj_trace_hot) 34#define JITGOTDEF(_) _(lj_err_trace) _(lj_trace_exit) _(lj_trace_hot)
19#else 35#else
20#define JITGOTDEF(_) 36#define JITGOTDEF(_)
21#endif 37#endif
@@ -28,16 +44,19 @@
28#define GOTDEF(_) \ 44#define GOTDEF(_) \
29 _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ 45 _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \
30 _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ 46 _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
31 _(pow) _(fmod) _(ldexp) \ 47 _(pow) _(fmod) _(ldexp) _(lj_vm_modi) \
32 _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_err_throw) _(lj_err_run) \ 48 _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \
49 _(lj_dispatch_profile) _(lj_err_throw) \
33 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ 50 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
34 _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ 51 _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \
35 _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ 52 _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \
36 _(lj_meta_for) _(lj_meta_len) _(lj_meta_tget) _(lj_meta_tset) \ 53 _(lj_meta_for) _(lj_meta_istype) _(lj_meta_len) _(lj_meta_tget) \
37 _(lj_state_growstack) _(lj_str_fromnum) _(lj_str_fromnumber) _(lj_str_new) \ 54 _(lj_meta_tset) _(lj_state_growstack) _(lj_strfmt_number) \
38 _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) _(lj_tab_new) \ 55 _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \
39 _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ 56 _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \
40 JITGOTDEF(_) FFIGOTDEF(_) 57 _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \
58 _(lj_buf_putstr_upper) _(lj_buf_tostr) \
59 JITGOTDEF(_) FFIGOTDEF(_) SFGOTDEF(_)
41 60
42enum { 61enum {
43#define GOTENUM(name) LJ_GOT_##name, 62#define GOTENUM(name) LJ_GOT_##name,
@@ -60,7 +79,7 @@ typedef uint16_t HotCount;
60#define HOTCOUNT_CALL 1 79#define HOTCOUNT_CALL 1
61 80
62/* This solves a circular dependency problem -- bump as needed. Sigh. */ 81/* This solves a circular dependency problem -- bump as needed. Sigh. */
63#define GG_NUM_ASMFF 62 82#define GG_NUM_ASMFF 57
64 83
65#define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF) 84#define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF)
66#define GG_LEN_SDISP BC_FUNCF 85#define GG_LEN_SDISP BC_FUNCF
@@ -70,7 +89,7 @@ typedef uint16_t HotCount;
70typedef struct GG_State { 89typedef struct GG_State {
71 lua_State L; /* Main thread. */ 90 lua_State L; /* Main thread. */
72 global_State g; /* Global state. */ 91 global_State g; /* Global state. */
73#if LJ_TARGET_ARM 92#if LJ_TARGET_ARM && !LJ_TARGET_NX
74 /* Make g reachable via K12 encoded DISPATCH-relative addressing. */ 93 /* Make g reachable via K12 encoded DISPATCH-relative addressing. */
75 uint8_t align1[(16-sizeof(global_State))&15]; 94 uint8_t align1[(16-sizeof(global_State))&15];
76#endif 95#endif
@@ -80,7 +99,7 @@ typedef struct GG_State {
80#if LJ_HASJIT 99#if LJ_HASJIT
81 jit_State J; /* JIT state. */ 100 jit_State J; /* JIT state. */
82 HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */ 101 HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */
83#if LJ_TARGET_ARM 102#if LJ_TARGET_ARM && !LJ_TARGET_NX
84 /* Ditto for J. */ 103 /* Ditto for J. */
85 uint8_t align2[(16-sizeof(jit_State)-sizeof(HotCount)*HOTCOUNT_SIZE)&15]; 104 uint8_t align2[(16-sizeof(jit_State)-sizeof(HotCount)*HOTCOUNT_SIZE)&15];
86#endif 105#endif
@@ -96,6 +115,7 @@ typedef struct GG_State {
96#define J2G(J) (&J2GG(J)->g) 115#define J2G(J) (&J2GG(J)->g)
97#define G2J(gl) (&G2GG(gl)->J) 116#define G2J(gl) (&G2GG(gl)->J)
98#define L2J(L) (&L2GG(L)->J) 117#define L2J(L) (&L2GG(L)->J)
118#define GG_G2J (GG_OFS(J) - GG_OFS(g))
99#define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g)) 119#define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g))
100#define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch)) 120#define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch))
101#define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch)) 121#define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch))
@@ -117,7 +137,12 @@ LJ_FUNC void lj_dispatch_update(global_State *g);
117/* Instruction dispatch callback for hooks or when recording. */ 137/* Instruction dispatch callback for hooks or when recording. */
118LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc); 138LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc);
119LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc); 139LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc);
120LJ_FUNCA void LJ_FASTCALL lj_dispatch_return(lua_State *L, const BCIns *pc); 140#if LJ_HASJIT
141LJ_FUNCA void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc);
142#endif
143#if LJ_HASPROFILE
144LJ_FUNCA void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc);
145#endif
121 146
122#if LJ_HASFFI && !defined(_BUILDVM_H) 147#if LJ_HASFFI && !defined(_BUILDVM_H)
123/* Save/restore errno and GetLastError() around hooks, exits and recording. */ 148/* Save/restore errno and GetLastError() around hooks, exits and recording. */
diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h
index d38c8a38..5dcea839 100644
--- a/src/lj_emit_arm.h
+++ b/src/lj_emit_arm.h
@@ -81,7 +81,8 @@ static void emit_m(ASMState *as, ARMIns ai, Reg rm)
81 81
82static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) 82static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
83{ 83{
84 lua_assert(ofs >= -255 && ofs <= 255); 84 lj_assertA(ofs >= -255 && ofs <= 255,
85 "load/store offset %d out of range", ofs);
85 if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; 86 if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U;
86 *--as->mcp = ai | ARMI_LS_P | ARMI_LSX_I | ARMF_D(rd) | ARMF_N(rn) | 87 *--as->mcp = ai | ARMI_LS_P | ARMI_LSX_I | ARMF_D(rd) | ARMF_N(rn) |
87 ((ofs & 0xf0) << 4) | (ofs & 0x0f); 88 ((ofs & 0xf0) << 4) | (ofs & 0x0f);
@@ -89,7 +90,8 @@ static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
89 90
90static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) 91static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
91{ 92{
92 lua_assert(ofs >= -4095 && ofs <= 4095); 93 lj_assertA(ofs >= -4095 && ofs <= 4095,
94 "load/store offset %d out of range", ofs);
93 /* Combine LDR/STR pairs to LDRD/STRD. */ 95 /* Combine LDR/STR pairs to LDRD/STRD. */
94 if (*as->mcp == (ai|ARMI_LS_P|ARMI_LS_U|ARMF_D(rd^1)|ARMF_N(rn)|(ofs^4)) && 96 if (*as->mcp == (ai|ARMI_LS_P|ARMI_LS_U|ARMF_D(rd^1)|ARMF_N(rn)|(ofs^4)) &&
95 (ai & ~(ARMI_LDR^ARMI_STR)) == ARMI_STR && rd != rn && 97 (ai & ~(ARMI_LDR^ARMI_STR)) == ARMI_STR && rd != rn &&
@@ -106,7 +108,8 @@ static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
106#if !LJ_SOFTFP 108#if !LJ_SOFTFP
107static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) 109static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
108{ 110{
109 lua_assert(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0); 111 lj_assertA(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0,
112 "load/store offset %d out of range", ofs);
110 if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; 113 if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U;
111 *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2); 114 *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2);
112} 115}
@@ -124,7 +127,7 @@ static int emit_kdelta1(ASMState *as, Reg d, int32_t i)
124 while (work) { 127 while (work) {
125 Reg r = rset_picktop(work); 128 Reg r = rset_picktop(work);
126 IRRef ref = regcost_ref(as->cost[r]); 129 IRRef ref = regcost_ref(as->cost[r]);
127 lua_assert(r != d); 130 lj_assertA(r != d, "dest reg not free");
128 if (emit_canremat(ref)) { 131 if (emit_canremat(ref)) {
129 int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); 132 int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i);
130 uint32_t k = emit_isk12(ARMI_ADD, delta); 133 uint32_t k = emit_isk12(ARMI_ADD, delta);
@@ -142,13 +145,13 @@ static int emit_kdelta1(ASMState *as, Reg d, int32_t i)
142} 145}
143 146
144/* Try to find a two step delta relative to another constant. */ 147/* Try to find a two step delta relative to another constant. */
145static int emit_kdelta2(ASMState *as, Reg d, int32_t i) 148static int emit_kdelta2(ASMState *as, Reg rd, int32_t i)
146{ 149{
147 RegSet work = ~as->freeset & RSET_GPR; 150 RegSet work = ~as->freeset & RSET_GPR;
148 while (work) { 151 while (work) {
149 Reg r = rset_picktop(work); 152 Reg r = rset_picktop(work);
150 IRRef ref = regcost_ref(as->cost[r]); 153 IRRef ref = regcost_ref(as->cost[r]);
151 lua_assert(r != d); 154 lj_assertA(r != rd, "dest reg %d not free", rd);
152 if (emit_canremat(ref)) { 155 if (emit_canremat(ref)) {
153 int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i; 156 int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i;
154 if (other) { 157 if (other) {
@@ -159,8 +162,8 @@ static int emit_kdelta2(ASMState *as, Reg d, int32_t i)
159 k2 = emit_isk12(0, delta & (255 << sh)); 162 k2 = emit_isk12(0, delta & (255 << sh));
160 k = emit_isk12(0, delta & ~(255 << sh)); 163 k = emit_isk12(0, delta & ~(255 << sh));
161 if (k) { 164 if (k) {
162 emit_dn(as, ARMI_ADD^k2^inv, d, d); 165 emit_dn(as, ARMI_ADD^k2^inv, rd, rd);
163 emit_dn(as, ARMI_ADD^k^inv, d, r); 166 emit_dn(as, ARMI_ADD^k^inv, rd, r);
164 return 1; 167 return 1;
165 } 168 }
166 } 169 }
@@ -171,23 +174,24 @@ static int emit_kdelta2(ASMState *as, Reg d, int32_t i)
171} 174}
172 175
173/* Load a 32 bit constant into a GPR. */ 176/* Load a 32 bit constant into a GPR. */
174static void emit_loadi(ASMState *as, Reg r, int32_t i) 177static void emit_loadi(ASMState *as, Reg rd, int32_t i)
175{ 178{
176 uint32_t k = emit_isk12(ARMI_MOV, i); 179 uint32_t k = emit_isk12(ARMI_MOV, i);
177 lua_assert(rset_test(as->freeset, r) || r == RID_TMP); 180 lj_assertA(rset_test(as->freeset, rd) || rd == RID_TMP,
181 "dest reg %d not free", rd);
178 if (k) { 182 if (k) {
179 /* Standard K12 constant. */ 183 /* Standard K12 constant. */
180 emit_d(as, ARMI_MOV^k, r); 184 emit_d(as, ARMI_MOV^k, rd);
181 } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) { 185 } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) {
182 /* 16 bit loword constant for ARMv6T2. */ 186 /* 16 bit loword constant for ARMv6T2. */
183 emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r); 187 emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd);
184 } else if (emit_kdelta1(as, r, i)) { 188 } else if (emit_kdelta1(as, rd, i)) {
185 /* One step delta relative to another constant. */ 189 /* One step delta relative to another constant. */
186 } else if ((as->flags & JIT_F_ARMV6T2)) { 190 } else if ((as->flags & JIT_F_ARMV6T2)) {
187 /* 32 bit hiword/loword constant for ARMv6T2. */ 191 /* 32 bit hiword/loword constant for ARMv6T2. */
188 emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), r); 192 emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), rd);
189 emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r); 193 emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd);
190 } else if (emit_kdelta2(as, r, i)) { 194 } else if (emit_kdelta2(as, rd, i)) {
191 /* Two step delta relative to another constant. */ 195 /* Two step delta relative to another constant. */
192 } else { 196 } else {
193 /* Otherwise construct the constant with up to 4 instructions. */ 197 /* Otherwise construct the constant with up to 4 instructions. */
@@ -197,17 +201,17 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
197 int32_t m = i & (255 << sh); 201 int32_t m = i & (255 << sh);
198 i &= ~(255 << sh); 202 i &= ~(255 << sh);
199 if (i == 0) { 203 if (i == 0) {
200 emit_d(as, ARMI_MOV ^ emit_isk12(0, m), r); 204 emit_d(as, ARMI_MOV ^ emit_isk12(0, m), rd);
201 break; 205 break;
202 } 206 }
203 emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), r, r); 207 emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), rd, rd);
204 } 208 }
205 } 209 }
206} 210}
207 211
208#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) 212#define emit_loada(as, rd, addr) emit_loadi(as, (rd), i32ptr((addr)))
209 213
210static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); 214static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
211 215
212/* Get/set from constant pointer. */ 216/* Get/set from constant pointer. */
213static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) 217static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p)
@@ -219,8 +223,9 @@ static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p)
219 223
220#if !LJ_SOFTFP 224#if !LJ_SOFTFP
221/* Load a number constant into an FPR. */ 225/* Load a number constant into an FPR. */
222static void emit_loadn(ASMState *as, Reg r, cTValue *tv) 226static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
223{ 227{
228 cTValue *tv = ir_knum(ir);
224 int32_t i; 229 int32_t i;
225 if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) { 230 if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) {
226 uint32_t hi = tv->u32.hi; 231 uint32_t hi = tv->u32.hi;
@@ -260,7 +265,7 @@ static void emit_branch(ASMState *as, ARMIns ai, MCode *target)
260{ 265{
261 MCode *p = as->mcp; 266 MCode *p = as->mcp;
262 ptrdiff_t delta = (target - p) - 1; 267 ptrdiff_t delta = (target - p) - 1;
263 lua_assert(((delta + 0x00800000) >> 24) == 0); 268 lj_assertA(((delta + 0x00800000) >> 24) == 0, "branch target out of range");
264 *--p = ai | ((uint32_t)delta & 0x00ffffffu); 269 *--p = ai | ((uint32_t)delta & 0x00ffffffu);
265 as->mcp = p; 270 as->mcp = p;
266} 271}
@@ -288,7 +293,7 @@ static void emit_call(ASMState *as, void *target)
288static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) 293static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
289{ 294{
290#if LJ_SOFTFP 295#if LJ_SOFTFP
291 lua_assert(!irt_isnum(ir->t)); UNUSED(ir); 296 lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
292#else 297#else
293 if (dst >= RID_MAX_GPR) { 298 if (dst >= RID_MAX_GPR) {
294 emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S, 299 emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S,
@@ -308,30 +313,30 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
308 emit_dm(as, ARMI_MOV, dst, src); 313 emit_dm(as, ARMI_MOV, dst, src);
309} 314}
310 315
311/* Generic load of register from stack slot. */ 316/* Generic load of register with base and (small) offset address. */
312static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 317static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
313{ 318{
314#if LJ_SOFTFP 319#if LJ_SOFTFP
315 lua_assert(!irt_isnum(ir->t)); UNUSED(ir); 320 lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
316#else 321#else
317 if (r >= RID_MAX_GPR) 322 if (r >= RID_MAX_GPR)
318 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, RID_SP, ofs); 323 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs);
319 else 324 else
320#endif 325#endif
321 emit_lso(as, ARMI_LDR, r, RID_SP, ofs); 326 emit_lso(as, ARMI_LDR, r, base, ofs);
322} 327}
323 328
324/* Generic store of register to stack slot. */ 329/* Generic store of register with base and (small) offset address. */
325static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 330static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
326{ 331{
327#if LJ_SOFTFP 332#if LJ_SOFTFP
328 lua_assert(!irt_isnum(ir->t)); UNUSED(ir); 333 lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
329#else 334#else
330 if (r >= RID_MAX_GPR) 335 if (r >= RID_MAX_GPR)
331 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, RID_SP, ofs); 336 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs);
332 else 337 else
333#endif 338#endif
334 emit_lso(as, ARMI_STR, r, RID_SP, ofs); 339 emit_lso(as, ARMI_STR, r, base, ofs);
335} 340}
336 341
337/* Emit an arithmetic/logic operation with a constant operand. */ 342/* Emit an arithmetic/logic operation with a constant operand. */
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h
new file mode 100644
index 00000000..6838693e
--- /dev/null
+++ b/src/lj_emit_arm64.h
@@ -0,0 +1,473 @@
1/*
2** ARM64 instruction emitter.
3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4**
5** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
6** Sponsored by Cisco Systems, Inc.
7*/
8
9/* -- Constant encoding --------------------------------------------------- */
10
11static uint64_t get_k64val(ASMState *as, IRRef ref)
12{
13 IRIns *ir = IR(ref);
14 if (ir->o == IR_KINT64) {
15 return ir_kint64(ir)->u64;
16 } else if (ir->o == IR_KGC) {
17 return (uint64_t)ir_kgc(ir);
18 } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
19 return (uint64_t)ir_kptr(ir);
20 } else {
21 lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL,
22 "bad 64 bit const IR op %d", ir->o);
23 return (uint32_t)ir->i; /* Zero-extended. */
24 }
25}
26
27/* Encode constant in K12 format for data processing instructions. */
28static uint32_t emit_isk12(int64_t n)
29{
30 uint64_t k = n < 0 ? ~(uint64_t)n+1u : (uint64_t)n;
31 uint32_t m = n < 0 ? 0x40000000 : 0;
32 if (k < 0x1000) {
33 return (uint32_t)(A64I_K12|m|A64F_U12(k));
34 } else if ((k & 0xfff000) == k) {
35 return (uint32_t)(A64I_K12|m|0x400000|A64F_U12(k>>12));
36 }
37 return 0;
38}
39
40#define emit_clz64(n) (lj_fls64(n)^63)
41#define emit_ctz64(n) lj_ffs64(n)
42
43/* Encode constant in K13 format for logical data processing instructions. */
44static uint32_t emit_isk13(uint64_t n, int is64)
45{
46 /* Thanks to: https://dougallj.wordpress.com/2021/10/30/ */
47 int rot, ones, size, immr, imms;
48 if (!is64) n = ((uint64_t)n << 32) | (uint32_t)n;
49 if ((n+1u) <= 1u) return 0; /* Neither all-zero nor all-ones are allowed. */
50 rot = (n & (n+1u)) ? emit_ctz64(n & (n+1u)) : 64;
51 n = lj_ror(n, rot & 63);
52 ones = emit_ctz64(~n);
53 size = emit_clz64(n) + ones;
54 if (lj_ror(n, size & 63) != n) return 0; /* Non-repeating? */
55 immr = -rot & (size - 1);
56 imms = (-(size << 1) | (ones - 1)) & 63;
57 return A64I_K13 | A64F_IMMR(immr | (size & 64)) | A64F_IMMS(imms);
58}
59
60static uint32_t emit_isfpk64(uint64_t n)
61{
62 uint64_t etop9 = ((n >> 54) & 0x1ff);
63 if ((n << 16) == 0 && (etop9 == 0x100 || etop9 == 0x0ff)) {
64 return (uint32_t)(((n >> 48) & 0x7f) | ((n >> 56) & 0x80));
65 }
66 return ~0u;
67}
68
69static uint32_t emit_isfpmovi(uint64_t n)
70{
71 /* Is every byte either 0x00 or 0xff? */
72 if ((n & U64x(01010101,01010101)) * 0xff != n) return 0;
73 /* Form 8-bit value by taking one bit from each byte. */
74 n &= U64x(80402010,08040201);
75 n = (n * U64x(01010101,01010101)) >> 56;
76 /* Split into the format expected by movi. */
77 return ((n & 0xe0) << 6) | 0x700 | (n & 0x1f);
78}
79
80/* -- Emit basic instructions --------------------------------------------- */
81
82static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra)
83{
84 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm) | A64F_A(ra);
85}
86
87static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm)
88{
89 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm);
90}
91
92static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm)
93{
94 *--as->mcp = ai | A64F_D(rd) | A64F_M(rm);
95}
96
97static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn)
98{
99 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn);
100}
101
102static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm)
103{
104 *--as->mcp = ai | A64F_N(rn) | A64F_M(rm);
105}
106
107static void emit_d(ASMState *as, A64Ins ai, Reg rd)
108{
109 *--as->mcp = ai | A64F_D(rd);
110}
111
112static void emit_dl(ASMState *as, A64Ins ai, Reg rd, uint32_t l)
113{
114 *--as->mcp = ai | A64F_D(rd) | A64F_S19(l >> 2);
115}
116
117static void emit_n(ASMState *as, A64Ins ai, Reg rn)
118{
119 *--as->mcp = ai | A64F_N(rn);
120}
121
122static int emit_checkofs(A64Ins ai, int64_t ofs)
123{
124 int scale = (ai >> 30) & 3;
125 if (ofs < 0 || (ofs & ((1<<scale)-1))) {
126 return (ofs >= -256 && ofs <= 255) ? -1 : 0;
127 } else {
128 return (ofs < (4096<<scale)) ? 1 : 0;
129 }
130}
131
132static LJ_AINLINE uint32_t emit_lso_pair_candidate(A64Ins ai, int ofs, int sc)
133{
134 if (ofs >= 0) {
135 return ai | A64F_U12(ofs>>sc); /* Subsequent lj_ror checks ofs. */
136 } else if (ofs >= -256) {
137 return (ai^A64I_LS_U) | A64F_S9(ofs & 0x1ff);
138 } else {
139 return A64F_D(31); /* Will mismatch prev. */
140 }
141}
142
143static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs64)
144{
145 int ot = emit_checkofs(ai, ofs64), sc = (ai >> 30) & 3, ofs = (int)ofs64;
146 lj_assertA(ot, "load/store offset %d out of range", ofs);
147 /* Combine LDR/STR pairs to LDP/STP. */
148 if ((sc == 2 || sc == 3) &&
149 (!(ai & 0x400000) || rd != rn) &&
150 as->mcp != as->mcloop) {
151 uint32_t prev = *as->mcp & ~A64F_D(31);
152 int ofsm = ofs - (1<<sc), ofsp = ofs + (1<<sc);
153 A64Ins aip;
154 if (prev == emit_lso_pair_candidate(ai | A64F_N(rn), ofsm, sc)) {
155 aip = (A64F_A(rd) | A64F_D(*as->mcp & 31));
156 } else if (prev == emit_lso_pair_candidate(ai | A64F_N(rn), ofsp, sc)) {
157 aip = (A64F_D(rd) | A64F_A(*as->mcp & 31));
158 ofsm = ofs;
159 } else {
160 goto nopair;
161 }
162 if (lj_ror((unsigned int)ofsm + (64u<<sc), sc) <= 127u) {
163 *as->mcp = aip | A64F_N(rn) | (((ofsm >> sc) & 0x7f) << 15) |
164 (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000));
165 return;
166 }
167 }
168nopair:
169 if (ot == 1)
170 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_U12(ofs >> sc);
171 else
172 *--as->mcp = (ai^A64I_LS_U) | A64F_D(rd) | A64F_N(rn) | A64F_S9(ofs & 0x1ff);
173}
174
175/* -- Emit loads/stores --------------------------------------------------- */
176
177/* Prefer rematerialization of BASE/L from global_State over spills. */
178#define emit_canremat(ref) ((ref) <= REF_BASE)
179
180/* Try to find a one-step delta relative to other consts. */
181static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int is64)
182{
183 RegSet work = (~as->freeset & RSET_GPR) | RID2RSET(RID_GL);
184 while (work) {
185 Reg r = rset_picktop(work);
186 IRRef ref = regcost_ref(as->cost[r]);
187 lj_assertA(r != rd, "dest reg %d not free", rd);
188 if (ref < REF_TRUE) {
189 uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) :
190 get_k64val(as, ref);
191 int64_t delta = (int64_t)(k - kx);
192 if (!is64) delta = (int64_t)(int32_t)delta; /* Sign-extend. */
193 if (delta == 0) {
194 emit_dm(as, is64|A64I_MOVw, rd, r);
195 return 1;
196 } else {
197 uint32_t k12 = emit_isk12(delta < 0 ? (int64_t)(~(uint64_t)delta+1u) : delta);
198 if (k12) {
199 emit_dn(as, (delta < 0 ? A64I_SUBw : A64I_ADDw)^is64^k12, rd, r);
200 return 1;
201 }
202 /* Do other ops or multi-step deltas pay off? Probably not.
203 ** E.g. XOR rarely helps with pointer consts.
204 */
205 }
206 }
207 rset_clear(work, r);
208 }
209 return 0; /* Failed. */
210}
211
212#define glofs(as, k) \
213 ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g))
214#define mcpofs(as, k) \
215 ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1)))
216#define checkmcpofs(as, k) \
217 (A64F_S_OK(mcpofs(as, k)>>2, 19))
218
219/* Try to form a const as ADR or ADRP or ADRP + ADD. */
220static int emit_kadrp(ASMState *as, Reg rd, uint64_t k)
221{
222 A64Ins ai = A64I_ADR;
223 int64_t ofs = mcpofs(as, k);
224 if (!A64F_S_OK((uint64_t)ofs, 21)) {
225 uint64_t kpage = k & ~0xfffull;
226 MCode *adrp = as->mcp - 1 - (k != kpage);
227 ofs = (int64_t)(kpage - ((uint64_t)adrp & ~0xfffull)) >> 12;
228 if (!A64F_S_OK(ofs, 21))
229 return 0; /* Failed. */
230 if (k != kpage)
231 emit_dn(as, (A64I_ADDx^A64I_K12)|A64F_U12(k - kpage), rd, rd);
232 ai = A64I_ADRP;
233 }
234 emit_dl(as, ai|(((uint32_t)ofs&3)<<29), rd, ofs);
235 return 1;
236}
237
238static void emit_loadk(ASMState *as, Reg rd, uint64_t u64)
239{
240 int zeros = 0, ones = 0, neg, lshift = 0;
241 int is64 = (u64 >> 32) ? A64I_X : 0, i = is64 ? 4 : 2;
242 /* Count non-homogeneous 16 bit fragments. */
243 while (--i >= 0) {
244 uint32_t frag = (u64 >> i*16) & 0xffff;
245 zeros += (frag != 0);
246 ones += (frag != 0xffff);
247 }
248 neg = ones < zeros; /* Use MOVN if it pays off. */
249 if ((neg ? ones : zeros) > 1) { /* Need 2+ ins. Try 1 ins encodings. */
250 uint32_t k13 = emit_isk13(u64, is64);
251 if (k13) {
252 emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
253 return;
254 }
255 if (emit_kdelta(as, rd, u64, is64)) {
256 return;
257 }
258 if (emit_kadrp(as, rd, u64)) { /* Either 1 or 2 ins. */
259 return;
260 }
261 }
262 if (neg) {
263 u64 = ~u64;
264 if (!is64) u64 = (uint32_t)u64;
265 }
266 if (u64) {
267 /* Find first/last fragment to be filled. */
268 int shift = (63-emit_clz64(u64)) & ~15;
269 lshift = emit_ctz64(u64) & ~15;
270 for (; shift > lshift; shift -= 16) {
271 uint32_t frag = (u64 >> shift) & 0xffff;
272 if (frag == 0) continue; /* Will be correctly filled by MOVN/MOVZ. */
273 if (neg) frag ^= 0xffff; /* MOVK requires the original value. */
274 emit_d(as, is64 | A64I_MOVKw | A64F_U16(frag) | A64F_LSL16(shift), rd);
275 }
276 }
277 /* But MOVN needs an inverted value. */
278 emit_d(as, is64 | (neg ? A64I_MOVNw : A64I_MOVZw) |
279 A64F_U16((u64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
280}
281
282/* Load a 32 bit constant into a GPR. */
283#define emit_loadi(as, rd, i) emit_loadk(as, rd, (uint32_t)i)
284
285/* Load a 64 bit constant into a GPR. */
286#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i)
287
288static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
289
290/* Get/set from constant pointer. */
291static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p)
292{
293 Reg base = RID_GL;
294 int64_t ofs = glofs(as, p);
295 if (emit_checkofs(ai, ofs)) {
296 /* GL + offset, might subsequently fuse to LDP/STP. */
297 } else if (ai == A64I_LDRx && checkmcpofs(as, p)) {
298 /* IP + offset is cheaper than allock, but address must be in range. */
299 emit_dl(as, A64I_LDRLx, r, mcpofs(as, p));
300 return;
301 } else { /* Split up into base reg + offset. */
302 int64_t i64 = i64ptr(p);
303 base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r));
304 ofs = i64 & 0x7fffull;
305 }
306 emit_lso(as, ai, r, base, ofs);
307}
308
309/* Load 64 bit IR constant into register. */
310static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
311{
312 const uint64_t *k = &ir_k64(ir)->u64;
313 int64_t ofs;
314 if (r >= RID_MAX_GPR) {
315 uint32_t fpk = emit_isfpk64(*k);
316 if (fpk != ~0u) {
317 emit_d(as, A64I_FMOV_DI | A64F_FP8(fpk), (r & 31));
318 return;
319 } else if ((fpk = emit_isfpmovi(*k))) {
320 emit_d(as, A64I_MOVI_DI | (fpk << 5), (r & 31));
321 return;
322 }
323 }
324 ofs = glofs(as, k);
325 if (emit_checkofs(A64I_LDRx, ofs)) {
326 emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx,
327 (r & 31), RID_GL, ofs);
328 } else if (checkmcpofs(as, k)) {
329 emit_dl(as, r >= RID_MAX_GPR ? A64I_LDRLd : A64I_LDRLx,
330 (r & 31), mcpofs(as, k));
331 } else {
332 if (r >= RID_MAX_GPR) {
333 emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP);
334 r = RID_TMP;
335 }
336 emit_loadu64(as, r, *k);
337 }
338}
339
340/* Get/set global_State fields. */
341#define emit_getgl(as, r, field) \
342 emit_lsptr(as, A64I_LDRx, (r), (void *)&J2G(as->J)->field)
343#define emit_setgl(as, r, field) \
344 emit_lsptr(as, A64I_STRx, (r), (void *)&J2G(as->J)->field)
345
346/* Trace number is determined from pc of exit instruction. */
347#define emit_setvmstate(as, i) UNUSED(i)
348
349/* -- Emit control-flow instructions -------------------------------------- */
350
351/* Label for internal jumps. */
352typedef MCode *MCLabel;
353
354/* Return label pointing to current PC. */
355#define emit_label(as) ((as)->mcp)
356
357static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target)
358{
359 MCode *p = --as->mcp;
360 ptrdiff_t delta = target - p;
361 lj_assertA(A64F_S_OK(delta, 19), "branch target out of range");
362 *p = A64I_BCC | A64F_S19(delta) | cond;
363}
364
365static void emit_branch(ASMState *as, A64Ins ai, MCode *target)
366{
367 MCode *p = --as->mcp;
368 ptrdiff_t delta = target - p;
369 lj_assertA(A64F_S_OK(delta, 26), "branch target out of range");
370 *p = ai | A64F_S26(delta);
371}
372
373static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target)
374{
375 MCode *p = --as->mcp;
376 ptrdiff_t delta = target - p;
377 lj_assertA(bit < 63, "bit number out of range");
378 lj_assertA(A64F_S_OK(delta, 14), "branch target out of range");
379 if (bit > 31) ai |= A64I_X;
380 *p = ai | A64F_BIT(bit & 31) | A64F_S14(delta) | r;
381}
382
383static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target)
384{
385 MCode *p = --as->mcp;
386 ptrdiff_t delta = target - p;
387 lj_assertA(A64F_S_OK(delta, 19), "branch target out of range");
388 *p = ai | A64F_S19(delta) | r;
389}
390
391#define emit_jmp(as, target) emit_branch(as, A64I_B, (target))
392
393static void emit_call(ASMState *as, ASMFunction target)
394{
395 MCode *p = --as->mcp;
396#if LJ_ABI_PAUTH
397 char *targetp = ptrauth_auth_data((char *)target,
398 ptrauth_key_function_pointer, 0);
399#else
400 char *targetp = (char *)target;
401#endif
402 ptrdiff_t delta = targetp - (char *)p;
403 if (A64F_S_OK(delta>>2, 26)) {
404 *p = A64I_BL | A64F_S26(delta>>2);
405 } else { /* Target out of range: need indirect call. But don't use R0-R7. */
406 Reg r = ra_allock(as, i64ptr(target),
407 RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
408 *p = A64I_BLR_AUTH | A64F_N(r);
409 }
410}
411
412/* -- Emit generic operations --------------------------------------------- */
413
414/* Generic move between two regs. */
415static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
416{
417 if (dst >= RID_MAX_GPR) {
418 emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D : A64I_FMOV_S,
419 (dst & 31), (src & 31));
420 return;
421 }
422 if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */
423 MCode ins = *as->mcp, swp = (src^dst);
424 if ((ins & 0xbf800000) == 0xb9000000) {
425 if (!((ins ^ (dst << 5)) & 0x000003e0))
426 *as->mcp = ins ^ (swp << 5); /* Swap N in load/store. */
427 if (!(ins & 0x00400000) && !((ins ^ dst) & 0x0000001f))
428 *as->mcp = ins ^ swp; /* Swap D in store. */
429 }
430 }
431 emit_dm(as, A64I_MOVx, dst, src);
432}
433
434/* Generic load of register with base and (small) offset address. */
435static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
436{
437 if (r >= RID_MAX_GPR)
438 emit_lso(as, irt_isnum(ir->t) ? A64I_LDRd : A64I_LDRs, (r & 31), base, ofs);
439 else
440 emit_lso(as, irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw, r, base, ofs);
441}
442
443/* Generic store of register with base and (small) offset address. */
444static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
445{
446 if (r >= RID_MAX_GPR)
447 emit_lso(as, irt_isnum(ir->t) ? A64I_STRd : A64I_STRs, (r & 31), base, ofs);
448 else
449 emit_lso(as, irt_is64(ir->t) ? A64I_STRx : A64I_STRw, r, base, ofs);
450}
451
452/* Emit an arithmetic operation with a constant operand. */
453static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src,
454 int32_t i, RegSet allow)
455{
456 uint32_t k = emit_isk12(i);
457 if (k)
458 emit_dn(as, ai^k, dest, src);
459 else
460 emit_dnm(as, ai, dest, src, ra_allock(as, i, allow));
461}
462
463/* Add offset to pointer. */
464static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
465{
466 if (ofs)
467 emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r,
468 ofs < 0 ? (int32_t)(~(uint32_t)ofs+1u) : ofs,
469 rset_exclude(RSET_GPR, r));
470}
471
472#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs))
473
diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h
index 57a7a7cd..dda9092d 100644
--- a/src/lj_emit_mips.h
+++ b/src/lj_emit_mips.h
@@ -3,6 +3,32 @@
3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h 3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4*/ 4*/
5 5
6#if LJ_64
7static intptr_t get_k64val(ASMState *as, IRRef ref)
8{
9 IRIns *ir = IR(ref);
10 if (ir->o == IR_KINT64) {
11 return (intptr_t)ir_kint64(ir)->u64;
12 } else if (ir->o == IR_KGC) {
13 return (intptr_t)ir_kgc(ir);
14 } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
15 return (intptr_t)ir_kptr(ir);
16 } else if (LJ_SOFTFP && ir->o == IR_KNUM) {
17 return (intptr_t)ir_knum(ir)->u64;
18 } else {
19 lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL,
20 "bad 64 bit const IR op %d", ir->o);
21 return ir->i; /* Sign-extended. */
22 }
23}
24#endif
25
26#if LJ_64
27#define get_kval(as, ref) get_k64val(as, ref)
28#else
29#define get_kval(as, ref) (IR((ref))->i)
30#endif
31
6/* -- Emit basic instructions --------------------------------------------- */ 32/* -- Emit basic instructions --------------------------------------------- */
7 33
8static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt) 34static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt)
@@ -35,7 +61,7 @@ static void emit_fgh(ASMState *as, MIPSIns mi, Reg rf, Reg rg, Reg rh)
35 61
36static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) 62static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift)
37{ 63{
38 if ((as->flags & JIT_F_MIPS32R2)) { 64 if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) {
39 emit_dta(as, MIPSI_ROTR, dest, src, shift); 65 emit_dta(as, MIPSI_ROTR, dest, src, shift);
40 } else { 66 } else {
41 emit_dst(as, MIPSI_OR, dest, dest, tmp); 67 emit_dst(as, MIPSI_OR, dest, dest, tmp);
@@ -44,23 +70,32 @@ static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift)
44 } 70 }
45} 71}
46 72
73#if LJ_64 || LJ_HASBUFFER
74static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb,
75 uint32_t lsb)
76{
77 *--as->mcp = mi | MIPSF_T(rt) | MIPSF_S(rs) | MIPSF_M(msb) | MIPSF_L(lsb);
78}
79#endif
80
47/* -- Emit loads/stores --------------------------------------------------- */ 81/* -- Emit loads/stores --------------------------------------------------- */
48 82
49/* Prefer rematerialization of BASE/L from global_State over spills. */ 83/* Prefer rematerialization of BASE/L from global_State over spills. */
50#define emit_canremat(ref) ((ref) <= REF_BASE) 84#define emit_canremat(ref) ((ref) <= REF_BASE)
51 85
52/* Try to find a one step delta relative to another constant. */ 86/* Try to find a one step delta relative to another constant. */
53static int emit_kdelta1(ASMState *as, Reg t, int32_t i) 87static int emit_kdelta1(ASMState *as, Reg rd, intptr_t i)
54{ 88{
55 RegSet work = ~as->freeset & RSET_GPR; 89 RegSet work = ~as->freeset & RSET_GPR;
56 while (work) { 90 while (work) {
57 Reg r = rset_picktop(work); 91 Reg r = rset_picktop(work);
58 IRRef ref = regcost_ref(as->cost[r]); 92 IRRef ref = regcost_ref(as->cost[r]);
59 lua_assert(r != t); 93 lj_assertA(r != rd, "dest reg %d not free", rd);
60 if (ref < ASMREF_L) { 94 if (ref < ASMREF_L) {
61 int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); 95 intptr_t delta = (intptr_t)((uintptr_t)i -
96 (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(as, ref)));
62 if (checki16(delta)) { 97 if (checki16(delta)) {
63 emit_tsi(as, MIPSI_ADDIU, t, r, delta); 98 emit_tsi(as, MIPSI_AADDIU, rd, r, delta);
64 return 1; 99 return 1;
65 } 100 }
66 } 101 }
@@ -76,8 +111,8 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
76 emit_ti(as, MIPSI_LI, r, i); 111 emit_ti(as, MIPSI_LI, r, i);
77 } else { 112 } else {
78 if ((i & 0xffff)) { 113 if ((i & 0xffff)) {
79 int32_t jgl = i32ptr(J2G(as->J)); 114 intptr_t jgl = (intptr_t)(void *)J2G(as->J);
80 if ((uint32_t)(i-jgl) < 65536) { 115 if ((uintptr_t)(i-jgl) < 65536) {
81 emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768); 116 emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768);
82 return; 117 return;
83 } else if (emit_kdelta1(as, r, i)) { 118 } else if (emit_kdelta1(as, r, i)) {
@@ -92,16 +127,49 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
92 } 127 }
93} 128}
94 129
130#if LJ_64
131/* Load a 64 bit constant into a GPR. */
132static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
133{
134 if (checki32((int64_t)u64)) {
135 emit_loadi(as, r, (int32_t)u64);
136 } else {
137 uint64_t delta = u64 - (uint64_t)(void *)J2G(as->J);
138 if (delta < 65536) {
139 emit_tsi(as, MIPSI_DADDIU, r, RID_JGL, (int32_t)(delta-32768));
140 } else if (emit_kdelta1(as, r, (intptr_t)u64)) {
141 return;
142 } else {
143 /* TODO MIPSR6: Use DAHI & DATI. Caveat: sign-extension. */
144 if ((u64 & 0xffff)) {
145 emit_tsi(as, MIPSI_ORI, r, r, u64 & 0xffff);
146 }
147 if (((u64 >> 16) & 0xffff)) {
148 emit_dta(as, MIPSI_DSLL, r, r, 16);
149 emit_tsi(as, MIPSI_ORI, r, r, (u64 >> 16) & 0xffff);
150 emit_dta(as, MIPSI_DSLL, r, r, 16);
151 } else {
152 emit_dta(as, MIPSI_DSLL32, r, r, 0);
153 }
154 emit_loadi(as, r, (int32_t)(u64 >> 32));
155 }
156 /* TODO: There are probably more optimization opportunities. */
157 }
158}
159
160#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr)))
161#else
95#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) 162#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr)))
163#endif
96 164
97static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); 165static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
98static void ra_allockreg(ASMState *as, int32_t k, Reg r); 166static void ra_allockreg(ASMState *as, intptr_t k, Reg r);
99 167
100/* Get/set from constant pointer. */ 168/* Get/set from constant pointer. */
101static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) 169static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow)
102{ 170{
103 int32_t jgl = i32ptr(J2G(as->J)); 171 intptr_t jgl = (intptr_t)(J2G(as->J));
104 int32_t i = i32ptr(p); 172 intptr_t i = (intptr_t)(p);
105 Reg base; 173 Reg base;
106 if ((uint32_t)(i-jgl) < 65536) { 174 if ((uint32_t)(i-jgl) < 65536) {
107 i = i-jgl-32768; 175 i = i-jgl-32768;
@@ -112,8 +180,24 @@ static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow)
112 emit_tsi(as, mi, r, base, i); 180 emit_tsi(as, mi, r, base, i);
113} 181}
114 182
115#define emit_loadn(as, r, tv) \ 183#if LJ_64
116 emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)(tv), RSET_GPR) 184static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
185{
186 const uint64_t *k = &ir_k64(ir)->u64;
187 Reg r64 = r;
188 if (rset_test(RSET_FPR, r)) {
189 r64 = RID_TMP;
190 emit_tg(as, MIPSI_DMTC1, r64, r);
191 }
192 if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536)
193 emit_lsptr(as, MIPSI_LD, r64, (void *)k, 0);
194 else
195 emit_loadu64(as, r64, *k);
196}
197#else
198#define emit_loadk64(as, r, ir) \
199 emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR)
200#endif
117 201
118/* Get/set global_State fields. */ 202/* Get/set global_State fields. */
119static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) 203static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs)
@@ -122,9 +206,9 @@ static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs)
122} 206}
123 207
124#define emit_getgl(as, r, field) \ 208#define emit_getgl(as, r, field) \
125 emit_lsglptr(as, MIPSI_LW, (r), (int32_t)offsetof(global_State, field)) 209 emit_lsglptr(as, MIPSI_AL, (r), (int32_t)offsetof(global_State, field))
126#define emit_setgl(as, r, field) \ 210#define emit_setgl(as, r, field) \
127 emit_lsglptr(as, MIPSI_SW, (r), (int32_t)offsetof(global_State, field)) 211 emit_lsglptr(as, MIPSI_AS, (r), (int32_t)offsetof(global_State, field))
128 212
129/* Trace number is determined from per-trace exit stubs. */ 213/* Trace number is determined from per-trace exit stubs. */
130#define emit_setvmstate(as, i) UNUSED(i) 214#define emit_setvmstate(as, i) UNUSED(i)
@@ -141,7 +225,7 @@ static void emit_branch(ASMState *as, MIPSIns mi, Reg rs, Reg rt, MCode *target)
141{ 225{
142 MCode *p = as->mcp; 226 MCode *p = as->mcp;
143 ptrdiff_t delta = target - p; 227 ptrdiff_t delta = target - p;
144 lua_assert(((delta + 0x8000) >> 16) == 0); 228 lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of range");
145 *--p = mi | MIPSF_S(rs) | MIPSF_T(rt) | ((uint32_t)delta & 0xffffu); 229 *--p = mi | MIPSF_S(rs) | MIPSF_T(rt) | ((uint32_t)delta & 0xffffu);
146 as->mcp = p; 230 as->mcp = p;
147} 231}
@@ -152,16 +236,31 @@ static void emit_jmp(ASMState *as, MCode *target)
152 emit_branch(as, MIPSI_B, RID_ZERO, RID_ZERO, (target)); 236 emit_branch(as, MIPSI_B, RID_ZERO, RID_ZERO, (target));
153} 237}
154 238
155static void emit_call(ASMState *as, void *target) 239static void emit_call(ASMState *as, void *target, int needcfa)
156{ 240{
157 MCode *p = as->mcp; 241 MCode *p = as->mcp;
158 *--p = MIPSI_NOP; 242#if LJ_TARGET_MIPSR6
159 if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) 243 ptrdiff_t delta = (char *)target - (char *)p;
244 if ((((delta>>2) + 0x02000000) >> 26) == 0) { /* Try compact call first. */
245 *--p = MIPSI_BALC | (((uintptr_t)delta >>2) & 0x03ffffffu);
246 as->mcp = p;
247 return;
248 }
249#endif
250 *--p = MIPSI_NOP; /* Delay slot. */
251 if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) {
252#if !LJ_TARGET_MIPSR6
253 *--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) |
254 (((uintptr_t)target >>2) & 0x03ffffffu);
255#else
160 *--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu); 256 *--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu);
161 else /* Target out of range: need indirect call. */ 257#endif
258 } else { /* Target out of range: need indirect call. */
162 *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR); 259 *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR);
260 needcfa = 1;
261 }
163 as->mcp = p; 262 as->mcp = p;
164 ra_allockreg(as, i32ptr(target), RID_CFUNCADDR); 263 if (needcfa) ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR);
165} 264}
166 265
167/* -- Emit generic operations --------------------------------------------- */ 266/* -- Emit generic operations --------------------------------------------- */
@@ -178,32 +277,32 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
178 emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src); 277 emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src);
179} 278}
180 279
181/* Generic load of register from stack slot. */ 280/* Generic load of register with base and (small) offset address. */
182static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 281static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
183{ 282{
184 if (r < RID_MAX_GPR) 283 if (r < RID_MAX_GPR)
185 emit_tsi(as, MIPSI_LW, r, RID_SP, ofs); 284 emit_tsi(as, irt_is64(ir->t) ? MIPSI_LD : MIPSI_LW, r, base, ofs);
186 else 285 else
187 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1, 286 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1,
188 (r & 31), RID_SP, ofs); 287 (r & 31), base, ofs);
189} 288}
190 289
191/* Generic store of register to stack slot. */ 290/* Generic store of register with base and (small) offset address. */
192static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 291static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
193{ 292{
194 if (r < RID_MAX_GPR) 293 if (r < RID_MAX_GPR)
195 emit_tsi(as, MIPSI_SW, r, RID_SP, ofs); 294 emit_tsi(as, irt_is64(ir->t) ? MIPSI_SD : MIPSI_SW, r, base, ofs);
196 else 295 else
197 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1, 296 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1,
198 (r&31), RID_SP, ofs); 297 (r&31), base, ofs);
199} 298}
200 299
201/* Add offset to pointer. */ 300/* Add offset to pointer. */
202static void emit_addptr(ASMState *as, Reg r, int32_t ofs) 301static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
203{ 302{
204 if (ofs) { 303 if (ofs) {
205 lua_assert(checki16(ofs)); 304 lj_assertA(checki16(ofs), "offset %d out of range", ofs);
206 emit_tsi(as, MIPSI_ADDIU, r, r, ofs); 305 emit_tsi(as, MIPSI_AADDIU, r, r, ofs);
207 } 306 }
208} 307}
209 308
diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h
index 62462ade..c34dcbe2 100644
--- a/src/lj_emit_ppc.h
+++ b/src/lj_emit_ppc.h
@@ -41,13 +41,13 @@ static void emit_rot(ASMState *as, PPCIns pi, Reg ra, Reg rs,
41 41
42static void emit_slwi(ASMState *as, Reg ra, Reg rs, int32_t n) 42static void emit_slwi(ASMState *as, Reg ra, Reg rs, int32_t n)
43{ 43{
44 lua_assert(n >= 0 && n < 32); 44 lj_assertA(n >= 0 && n < 32, "shift out or range");
45 emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31-n); 45 emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31-n);
46} 46}
47 47
48static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n) 48static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n)
49{ 49{
50 lua_assert(n >= 0 && n < 32); 50 lj_assertA(n >= 0 && n < 32, "shift out or range");
51 emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31); 51 emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31);
52} 52}
53 53
@@ -57,17 +57,17 @@ static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n)
57#define emit_canremat(ref) ((ref) <= REF_BASE) 57#define emit_canremat(ref) ((ref) <= REF_BASE)
58 58
59/* Try to find a one step delta relative to another constant. */ 59/* Try to find a one step delta relative to another constant. */
60static int emit_kdelta1(ASMState *as, Reg t, int32_t i) 60static int emit_kdelta1(ASMState *as, Reg rd, int32_t i)
61{ 61{
62 RegSet work = ~as->freeset & RSET_GPR; 62 RegSet work = ~as->freeset & RSET_GPR;
63 while (work) { 63 while (work) {
64 Reg r = rset_picktop(work); 64 Reg r = rset_picktop(work);
65 IRRef ref = regcost_ref(as->cost[r]); 65 IRRef ref = regcost_ref(as->cost[r]);
66 lua_assert(r != t); 66 lj_assertA(r != rd, "dest reg %d not free", rd);
67 if (ref < ASMREF_L) { 67 if (ref < ASMREF_L) {
68 int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); 68 int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i);
69 if (checki16(delta)) { 69 if (checki16(delta)) {
70 emit_tai(as, PPCI_ADDI, t, r, delta); 70 emit_tai(as, PPCI_ADDI, rd, r, delta);
71 return 1; 71 return 1;
72 } 72 }
73 } 73 }
@@ -98,7 +98,7 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
98 98
99#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) 99#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr)))
100 100
101static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); 101static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
102 102
103/* Get/set from constant pointer. */ 103/* Get/set from constant pointer. */
104static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow) 104static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow)
@@ -115,8 +115,8 @@ static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow)
115 emit_tai(as, pi, r, base, i); 115 emit_tai(as, pi, r, base, i);
116} 116}
117 117
118#define emit_loadn(as, r, tv) \ 118#define emit_loadk64(as, r, ir) \
119 emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)(tv), RSET_GPR) 119 emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR)
120 120
121/* Get/set global_State fields. */ 121/* Get/set global_State fields. */
122static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs) 122static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs)
@@ -144,7 +144,7 @@ static void emit_condbranch(ASMState *as, PPCIns pi, PPCCC cc, MCode *target)
144{ 144{
145 MCode *p = --as->mcp; 145 MCode *p = --as->mcp;
146 ptrdiff_t delta = (char *)target - (char *)p; 146 ptrdiff_t delta = (char *)target - (char *)p;
147 lua_assert(((delta + 0x8000) >> 16) == 0); 147 lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of range");
148 pi ^= (delta & 0x8000) * (PPCF_Y/0x8000); 148 pi ^= (delta & 0x8000) * (PPCF_Y/0x8000);
149 *p = pi | PPCF_CC(cc) | ((uint32_t)delta & 0xffffu); 149 *p = pi | PPCF_CC(cc) | ((uint32_t)delta & 0xffffu);
150} 150}
@@ -186,22 +186,22 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
186 emit_fb(as, PPCI_FMR, dst, src); 186 emit_fb(as, PPCI_FMR, dst, src);
187} 187}
188 188
189/* Generic load of register from stack slot. */ 189/* Generic load of register with base and (small) offset address. */
190static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 190static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
191{ 191{
192 if (r < RID_MAX_GPR) 192 if (r < RID_MAX_GPR)
193 emit_tai(as, PPCI_LWZ, r, RID_SP, ofs); 193 emit_tai(as, PPCI_LWZ, r, base, ofs);
194 else 194 else
195 emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, RID_SP, ofs); 195 emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, base, ofs);
196} 196}
197 197
198/* Generic store of register to stack slot. */ 198/* Generic store of register with base and (small) offset address. */
199static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 199static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
200{ 200{
201 if (r < RID_MAX_GPR) 201 if (r < RID_MAX_GPR)
202 emit_tai(as, PPCI_STW, r, RID_SP, ofs); 202 emit_tai(as, PPCI_STW, r, base, ofs);
203 else 203 else
204 emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, RID_SP, ofs); 204 emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, base, ofs);
205} 205}
206 206
207/* Emit a compare (for equality) with a constant operand. */ 207/* Emit a compare (for equality) with a constant operand. */
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index 0e26ad52..d215402c 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -13,10 +13,17 @@
13 if (rex != 0x40) *--(p) = rex; } 13 if (rex != 0x40) *--(p) = rex; }
14#define FORCE_REX 0x200 14#define FORCE_REX 0x200
15#define REX_64 (FORCE_REX|0x080000) 15#define REX_64 (FORCE_REX|0x080000)
16#define VEX_64 0x800000
16#else 17#else
17#define REXRB(p, rr, rb) ((void)0) 18#define REXRB(p, rr, rb) ((void)0)
18#define FORCE_REX 0 19#define FORCE_REX 0
19#define REX_64 0 20#define REX_64 0
21#define VEX_64 0
22#endif
23#if LJ_GC64
24#define REX_GC64 REX_64
25#else
26#define REX_GC64 0
20#endif 27#endif
21 28
22#define emit_i8(as, i) (*--as->mcp = (MCode)(i)) 29#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
@@ -31,7 +38,14 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
31 MCode *p, int delta) 38 MCode *p, int delta)
32{ 39{
33 int n = (int8_t)xo; 40 int n = (int8_t)xo;
34#if defined(__GNUC__) 41 if (n == -60) { /* VEX-encoded instruction */
42#if LJ_64
43 xo ^= (((rr>>1)&4)+((rx>>2)&2)+((rb>>3)&1))<<13;
44#endif
45 *(uint32_t *)(p+delta-5) = (uint32_t)xo;
46 return p+delta-5;
47 }
48#if defined(__GNUC__) || defined(__clang__)
35 if (__builtin_constant_p(xo) && n == -2) 49 if (__builtin_constant_p(xo) && n == -2)
36 p[delta-2] = (MCode)(xo >> 24); 50 p[delta-2] = (MCode)(xo >> 24);
37 else if (__builtin_constant_p(xo) && n == -3) 51 else if (__builtin_constant_p(xo) && n == -3)
@@ -78,33 +92,24 @@ static void emit_rr(ASMState *as, x86Op xo, Reg r1, Reg r2)
78/* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */ 92/* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */
79static int32_t ptr2addr(const void *p) 93static int32_t ptr2addr(const void *p)
80{ 94{
81 lua_assert((uintptr_t)p < (uintptr_t)0x80000000); 95 lj_assertX((uintptr_t)p < (uintptr_t)0x80000000, "pointer outside 2G range");
82 return i32ptr(p); 96 return i32ptr(p);
83} 97}
84#else 98#else
85#define ptr2addr(p) (i32ptr((p))) 99#define ptr2addr(p) (i32ptr((p)))
86#endif 100#endif
87 101
88/* op r, [addr] */
89static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
90{
91 MCode *p = as->mcp;
92 *(int32_t *)(p-4) = ptr2addr(addr);
93#if LJ_64
94 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
95 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
96#else
97 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
98#endif
99}
100
101/* op r, [base+ofs] */ 102/* op r, [base+ofs] */
102static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) 103static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs)
103{ 104{
104 MCode *p = as->mcp; 105 MCode *p = as->mcp;
105 x86Mode mode; 106 x86Mode mode;
106 if (ra_hasreg(rb)) { 107 if (ra_hasreg(rb)) {
107 if (ofs == 0 && (rb&7) != RID_EBP) { 108 if (LJ_GC64 && rb == RID_RIP) {
109 mode = XM_OFS0;
110 p -= 4;
111 *(int32_t *)p = ofs;
112 } else if (ofs == 0 && (rb&7) != RID_EBP) {
108 mode = XM_OFS0; 113 mode = XM_OFS0;
109 } else if (checki8(ofs)) { 114 } else if (checki8(ofs)) {
110 *--p = (MCode)ofs; 115 *--p = (MCode)ofs;
@@ -202,6 +207,11 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb)
202 *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); 207 *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
203 rb = RID_ESP; 208 rb = RID_ESP;
204#endif 209#endif
210 } else if (LJ_GC64 && rb == RID_RIP) {
211 lj_assertA(as->mrm.idx == RID_NONE, "RIP-rel mrm cannot have index");
212 mode = XM_OFS0;
213 p -= 4;
214 *(int32_t *)p = as->mrm.ofs;
205 } else { 215 } else {
206 if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { 216 if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) {
207 mode = XM_OFS0; 217 mode = XM_OFS0;
@@ -241,10 +251,6 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i)
241 251
242/* -- Emit loads/stores --------------------------------------------------- */ 252/* -- Emit loads/stores --------------------------------------------------- */
243 253
244/* Instruction selection for XMM moves. */
245#define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS)
246#define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD)
247
248/* mov [base+ofs], i */ 254/* mov [base+ofs], i */
249static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) 255static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
250{ 256{
@@ -259,8 +265,8 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
259/* Get/set global_State fields. */ 265/* Get/set global_State fields. */
260#define emit_opgl(as, xo, r, field) \ 266#define emit_opgl(as, xo, r, field) \
261 emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) 267 emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field)
262#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field) 268#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field)
263#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field) 269#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field)
264 270
265#define emit_setvmstate(as, i) \ 271#define emit_setvmstate(as, i) \
266 (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate)) 272 (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate))
@@ -285,9 +291,21 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
285 } 291 }
286} 292}
287 293
294#if LJ_GC64
295#define dispofs(as, k) \
296 ((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch))
297#define mcpofs(as, k) \
298 ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp))
299#define mctopofs(as, k) \
300 ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop))
301/* mov r, addr */
302#define emit_loada(as, r, addr) \
303 emit_loadu64(as, (r), (uintptr_t)(addr))
304#else
288/* mov r, addr */ 305/* mov r, addr */
289#define emit_loada(as, r, addr) \ 306#define emit_loada(as, r, addr) \
290 emit_loadi(as, (r), ptr2addr((addr))) 307 emit_loadi(as, (r), ptr2addr((addr)))
308#endif
291 309
292#if LJ_64 310#if LJ_64
293/* mov r, imm64 or shorter 32 bit extended load. */ 311/* mov r, imm64 or shorter 32 bit extended load. */
@@ -299,6 +317,15 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
299 MCode *p = as->mcp; 317 MCode *p = as->mcp;
300 *(int32_t *)(p-4) = (int32_t)u64; 318 *(int32_t *)(p-4) = (int32_t)u64;
301 as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); 319 as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4);
320#if LJ_GC64
321 } else if (checki32(dispofs(as, u64))) {
322 emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64));
323 } else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) {
324 /* Since as->realign assumes the code size doesn't change, check
325 ** RIP-relative addressing reachability for both as->mcp and as->mctop.
326 */
327 emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64));
328#endif
302 } else { /* Full-size 64 bit load. */ 329 } else { /* Full-size 64 bit load. */
303 MCode *p = as->mcp; 330 MCode *p = as->mcp;
304 *(uint64_t *)(p-8) = u64; 331 *(uint64_t *)(p-8) = u64;
@@ -310,13 +337,90 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
310} 337}
311#endif 338#endif
312 339
313/* movsd r, [&tv->n] / xorps r, r */ 340/* op r, [addr] */
314static void emit_loadn(ASMState *as, Reg r, cTValue *tv) 341static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
315{ 342{
316 if (tvispzero(tv)) /* Use xor only for +0. */ 343#if LJ_GC64
317 emit_rr(as, XO_XORPS, r, r); 344 if (checki32(dispofs(as, addr))) {
318 else 345 emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr));
319 emit_rma(as, XMM_MOVRM(as), r, &tv->n); 346 } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) {
347 emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr));
348 } else if (!checki32((intptr_t)addr)) {
349 Reg ra = (rr & 15);
350 if (xo != XO_MOV) {
351 /* We can't allocate a register here. Use and restore DISPATCH. Ugly. */
352 uint64_t dispaddr = (uintptr_t)J2GG(as->J)->dispatch;
353 uint8_t i8 = xo == XO_GROUP3b ? *as->mcp++ : 0;
354 ra = RID_DISPATCH;
355 if (checku32(dispaddr)) {
356 emit_loadi(as, ra, (int32_t)dispaddr);
357 } else { /* Full-size 64 bit load. */
358 MCode *p = as->mcp;
359 *(uint64_t *)(p-8) = dispaddr;
360 p[-9] = (MCode)(XI_MOVri+(ra&7));
361 p[-10] = 0x48 + ((ra>>3)&1);
362 p -= 10;
363 as->mcp = p;
364 }
365 if (xo == XO_GROUP3b) emit_i8(as, i8);
366 }
367 emit_rmro(as, xo, rr, ra, 0);
368 emit_loadu64(as, ra, (uintptr_t)addr);
369 } else
370#endif
371 {
372 MCode *p = as->mcp;
373 *(int32_t *)(p-4) = ptr2addr(addr);
374#if LJ_64
375 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
376 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
377#else
378 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
379#endif
380 }
381}
382
383/* Load 64 bit IR constant into register. */
384static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
385{
386 Reg r64;
387 x86Op xo;
388 const uint64_t *k = &ir_k64(ir)->u64;
389 if (rset_test(RSET_FPR, r)) {
390 r64 = r;
391 xo = XO_MOVSD;
392 } else {
393 r64 = r | REX_64;
394 xo = XO_MOV;
395 }
396 if (*k == 0) {
397 emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r);
398#if LJ_GC64
399 } else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) ||
400 (checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) {
401 emit_rma(as, xo, r64, k);
402 } else {
403 if (ir->i) {
404 lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i),
405 "bad interned 64 bit constant");
406 } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) {
407 emit_loadu64(as, r, *k);
408 return;
409 } else {
410 /* If all else fails, add the FP constant at the MCode area bottom. */
411 while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
412 *(uint64_t *)as->mcbot = *k;
413 ir->i = (int32_t)(as->mctop - as->mcbot);
414 as->mcbot += 8;
415 as->mclim = as->mcbot + MCLIM_REDZONE;
416 lj_mcode_commitbot(as->J, as->mcbot);
417 }
418 emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i));
419#else
420 } else {
421 emit_rma(as, xo, r64, k);
422#endif
423 }
320} 424}
321 425
322/* -- Emit control-flow instructions -------------------------------------- */ 426/* -- Emit control-flow instructions -------------------------------------- */
@@ -330,7 +434,7 @@ static void emit_sjmp(ASMState *as, MCLabel target)
330{ 434{
331 MCode *p = as->mcp; 435 MCode *p = as->mcp;
332 ptrdiff_t delta = target - p; 436 ptrdiff_t delta = target - p;
333 lua_assert(delta == (int8_t)delta); 437 lj_assertA(delta == (int8_t)delta, "short jump target out of range");
334 p[-1] = (MCode)(int8_t)delta; 438 p[-1] = (MCode)(int8_t)delta;
335 p[-2] = XI_JMPs; 439 p[-2] = XI_JMPs;
336 as->mcp = p - 2; 440 as->mcp = p - 2;
@@ -342,7 +446,7 @@ static void emit_sjcc(ASMState *as, int cc, MCLabel target)
342{ 446{
343 MCode *p = as->mcp; 447 MCode *p = as->mcp;
344 ptrdiff_t delta = target - p; 448 ptrdiff_t delta = target - p;
345 lua_assert(delta == (int8_t)delta); 449 lj_assertA(delta == (int8_t)delta, "short jump target out of range");
346 p[-1] = (MCode)(int8_t)delta; 450 p[-1] = (MCode)(int8_t)delta;
347 p[-2] = (MCode)(XI_JCCs+(cc&15)); 451 p[-2] = (MCode)(XI_JCCs+(cc&15));
348 as->mcp = p - 2; 452 as->mcp = p - 2;
@@ -368,10 +472,11 @@ static void emit_sfixup(ASMState *as, MCLabel source)
368#define emit_label(as) ((as)->mcp) 472#define emit_label(as) ((as)->mcp)
369 473
370/* Compute relative 32 bit offset for jump and call instructions. */ 474/* Compute relative 32 bit offset for jump and call instructions. */
371static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target) 475static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target)
372{ 476{
373 ptrdiff_t delta = target - p; 477 ptrdiff_t delta = target - p;
374 lua_assert(delta == (int32_t)delta); 478 UNUSED(J);
479 lj_assertJ(delta == (int32_t)delta, "jump target out of range");
375 return (int32_t)delta; 480 return (int32_t)delta;
376} 481}
377 482
@@ -379,7 +484,7 @@ static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target)
379static void emit_jcc(ASMState *as, int cc, MCode *target) 484static void emit_jcc(ASMState *as, int cc, MCode *target)
380{ 485{
381 MCode *p = as->mcp; 486 MCode *p = as->mcp;
382 *(int32_t *)(p-4) = jmprel(p, target); 487 *(int32_t *)(p-4) = jmprel(as->J, p, target);
383 p[-5] = (MCode)(XI_JCCn+(cc&15)); 488 p[-5] = (MCode)(XI_JCCn+(cc&15));
384 p[-6] = 0x0f; 489 p[-6] = 0x0f;
385 as->mcp = p - 6; 490 as->mcp = p - 6;
@@ -389,7 +494,7 @@ static void emit_jcc(ASMState *as, int cc, MCode *target)
389static void emit_jmp(ASMState *as, MCode *target) 494static void emit_jmp(ASMState *as, MCode *target)
390{ 495{
391 MCode *p = as->mcp; 496 MCode *p = as->mcp;
392 *(int32_t *)(p-4) = jmprel(p, target); 497 *(int32_t *)(p-4) = jmprel(as->J, p, target);
393 p[-5] = XI_JMP; 498 p[-5] = XI_JMP;
394 as->mcp = p - 5; 499 as->mcp = p - 5;
395} 500}
@@ -406,7 +511,7 @@ static void emit_call_(ASMState *as, MCode *target)
406 return; 511 return;
407 } 512 }
408#endif 513#endif
409 *(int32_t *)(p-4) = jmprel(p, target); 514 *(int32_t *)(p-4) = jmprel(as->J, p, target);
410 p[-5] = XI_CALL; 515 p[-5] = XI_CALL;
411 as->mcp = p - 5; 516 as->mcp = p - 5;
412} 517}
@@ -418,8 +523,10 @@ static void emit_call_(ASMState *as, MCode *target)
418/* Use 64 bit operations to handle 64 bit IR types. */ 523/* Use 64 bit operations to handle 64 bit IR types. */
419#if LJ_64 524#if LJ_64
420#define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) 525#define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0))
526#define VEX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? VEX_64 : 0))
421#else 527#else
422#define REX_64IR(ir, r) (r) 528#define REX_64IR(ir, r) (r)
529#define VEX_64IR(ir, r) (r)
423#endif 530#endif
424 531
425/* Generic move between two regs. */ 532/* Generic move between two regs. */
@@ -429,35 +536,32 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
429 if (dst < RID_MAX_GPR) 536 if (dst < RID_MAX_GPR)
430 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); 537 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src);
431 else 538 else
432 emit_rr(as, XMM_MOVRR(as), dst, src); 539 emit_rr(as, XO_MOVAPS, dst, src);
433} 540}
434 541
435/* Generic load of register from stack slot. */ 542/* Generic load of register with base and (small) offset address. */
436static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 543static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
437{ 544{
438 if (r < RID_MAX_GPR) 545 if (r < RID_MAX_GPR)
439 emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); 546 emit_rmro(as, XO_MOV, REX_64IR(ir, r), base, ofs);
440 else 547 else
441 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs); 548 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, base, ofs);
442} 549}
443 550
444/* Generic store of register to stack slot. */ 551/* Generic store of register with base and (small) offset address. */
445static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 552static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
446{ 553{
447 if (r < RID_MAX_GPR) 554 if (r < RID_MAX_GPR)
448 emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, ofs); 555 emit_rmro(as, XO_MOVto, REX_64IR(ir, r), base, ofs);
449 else 556 else
450 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, RID_ESP, ofs); 557 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, base, ofs);
451} 558}
452 559
453/* Add offset to pointer. */ 560/* Add offset to pointer. */
454static void emit_addptr(ASMState *as, Reg r, int32_t ofs) 561static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
455{ 562{
456 if (ofs) { 563 if (ofs) {
457 if ((as->flags & JIT_F_LEA_AGU)) 564 emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
458 emit_rmro(as, XO_LEA, r, r, ofs);
459 else
460 emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs);
461 } 565 }
462} 566}
463 567
diff --git a/src/lj_err.c b/src/lj_err.c
index 1d1f6b9e..b0ceaa2f 100644
--- a/src/lj_err.c
+++ b/src/lj_err.c
@@ -16,6 +16,7 @@
16#include "lj_ff.h" 16#include "lj_ff.h"
17#include "lj_trace.h" 17#include "lj_trace.h"
18#include "lj_vm.h" 18#include "lj_vm.h"
19#include "lj_strfmt.h"
19 20
20/* 21/*
21** LuaJIT can either use internal or external frame unwinding: 22** LuaJIT can either use internal or external frame unwinding:
@@ -28,12 +29,18 @@
28** Pros and Cons: 29** Pros and Cons:
29** 30**
30** - EXT requires unwind tables for *all* functions on the C stack between 31** - EXT requires unwind tables for *all* functions on the C stack between
31** the pcall/catch and the error/throw. This is the default on x64, 32** the pcall/catch and the error/throw. C modules used by Lua code can
32** but needs to be manually enabled on x86/PPC for non-C++ code. 33** throw errors, so these need to have unwind tables, too. Transitively
34** this applies to all system libraries used by C modules -- at least
35** when they have callbacks which may throw an error.
33** 36**
34** - INT is faster when actually throwing errors (but this happens rarely). 37** - INT is faster when actually throwing errors, but this happens rarely.
35** Setting up error handlers is zero-cost in any case. 38** Setting up error handlers is zero-cost in any case.
36** 39**
40** - INT needs to save *all* callee-saved registers when entering the
41** interpreter. EXT only needs to save those actually used inside the
42** interpreter. JIT-compiled code may need to save some more.
43**
37** - EXT provides full interoperability with C++ exceptions. You can throw 44** - EXT provides full interoperability with C++ exceptions. You can throw
38** Lua errors or C++ exceptions through a mix of Lua frames and C++ frames. 45** Lua errors or C++ exceptions through a mix of Lua frames and C++ frames.
39** C++ destructors are called as needed. C++ exceptions caught by pcall 46** C++ destructors are called as needed. C++ exceptions caught by pcall
@@ -45,27 +52,38 @@
45** the wrapper function feature. Lua errors thrown through C++ frames 52** the wrapper function feature. Lua errors thrown through C++ frames
46** cannot be caught by C++ code and C++ destructors are not run. 53** cannot be caught by C++ code and C++ destructors are not run.
47** 54**
48** EXT is the default on x64 systems, INT is the default on all other systems. 55** - EXT can handle errors from internal helper functions that are called
56** from JIT-compiled code (except for Windows/x86 and 32 bit ARM).
57** INT has no choice but to call the panic handler, if this happens.
58** Note: this is mainly relevant for out-of-memory errors.
59**
60** EXT is the default on all systems where the toolchain produces unwind
61** tables by default (*). This is hard-coded and/or detected in src/Makefile.
62** You can thwart the detection with: TARGET_XCFLAGS=-DLUAJIT_UNWIND_INTERNAL
63**
64** INT is the default on all other systems.
65**
66** EXT can be manually enabled for toolchains that are able to produce
67** conforming unwind tables:
68** "TARGET_XCFLAGS=-funwind-tables -DLUAJIT_UNWIND_EXTERNAL"
69** As explained above, *all* C code used directly or indirectly by LuaJIT
70** must be compiled with -funwind-tables (or -fexceptions). C++ code must
71** *not* be compiled with -fno-exceptions.
72**
73** If you're unsure whether error handling inside the VM works correctly,
74** try running this and check whether it prints "OK":
49** 75**
50** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack 76** luajit -e "print(select(2, load('OK')):match('OK'))"
51** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled
52** with -funwind-tables (or -fexceptions). This includes LuaJIT itself (set
53** TARGET_CFLAGS), all of your C/Lua binding code, all loadable C modules
54** and all C libraries that have callbacks which may be used to call back
55** into Lua. C++ code must *not* be compiled with -fno-exceptions.
56** 77**
57** EXT cannot be enabled on WIN32 since system exceptions use code-driven SEH. 78** (*) Originally, toolchains only generated unwind tables for C++ code. For
58** EXT is mandatory on WIN64 since the calling convention has an abundance 79** interoperability reasons, this can be manually enabled for plain C code,
59** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15). 80** too (with -funwind-tables). With the introduction of the x64 architecture,
60** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4). 81** the corresponding POSIX and Windows ABIs mandated unwind tables for all
82** code. Over the following years most desktop and server platforms have
83** enabled unwind tables by default on all architectures. OTOH mobile and
84** embedded platforms do not consistently mandate unwind tables.
61*/ 85*/
62 86
63#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND
64#define LJ_UNWIND_EXT 1
65#elif LJ_TARGET_X64 && LJ_TARGET_WINDOWS
66#define LJ_UNWIND_EXT 1
67#endif
68
69/* -- Error messages ------------------------------------------------------ */ 87/* -- Error messages ------------------------------------------------------ */
70 88
71/* Error message strings. */ 89/* Error message strings. */
@@ -98,14 +116,14 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
98 TValue *top = restorestack(L, -nres); 116 TValue *top = restorestack(L, -nres);
99 if (frame < top) { /* Frame reached? */ 117 if (frame < top) { /* Frame reached? */
100 if (errcode) { 118 if (errcode) {
101 L->cframe = cframe_prev(cf);
102 L->base = frame+1; 119 L->base = frame+1;
120 L->cframe = cframe_prev(cf);
103 unwindstack(L, top); 121 unwindstack(L, top);
104 } 122 }
105 return cf; 123 return cf;
106 } 124 }
107 } 125 }
108 if (frame <= tvref(L->stack)) 126 if (frame <= tvref(L->stack)+LJ_FR2)
109 break; 127 break;
110 switch (frame_typep(frame)) { 128 switch (frame_typep(frame)) {
111 case FRAME_LUA: /* Lua frame. */ 129 case FRAME_LUA: /* Lua frame. */
@@ -113,14 +131,12 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
113 frame = frame_prevl(frame); 131 frame = frame_prevl(frame);
114 break; 132 break;
115 case FRAME_C: /* C frame. */ 133 case FRAME_C: /* C frame. */
116#if LJ_HASFFI
117 unwind_c: 134 unwind_c:
118#endif
119#if LJ_UNWIND_EXT 135#if LJ_UNWIND_EXT
120 if (errcode) { 136 if (errcode) {
121 L->cframe = cframe_prev(cf);
122 L->base = frame_prevd(frame) + 1; 137 L->base = frame_prevd(frame) + 1;
123 unwindstack(L, frame); 138 L->cframe = cframe_prev(cf);
139 unwindstack(L, frame - LJ_FR2);
124 } else if (cf != stopcf) { 140 } else if (cf != stopcf) {
125 cf = cframe_prev(cf); 141 cf = cframe_prev(cf);
126 frame = frame_prevd(frame); 142 frame = frame_prevd(frame);
@@ -143,16 +159,14 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
143 return cf; 159 return cf;
144 } 160 }
145 if (errcode) { 161 if (errcode) {
146 L->cframe = cframe_prev(cf);
147 L->base = frame_prevd(frame) + 1; 162 L->base = frame_prevd(frame) + 1;
148 unwindstack(L, frame); 163 L->cframe = cframe_prev(cf);
164 unwindstack(L, frame - LJ_FR2);
149 } 165 }
150 return cf; 166 return cf;
151 case FRAME_CONT: /* Continuation frame. */ 167 case FRAME_CONT: /* Continuation frame. */
152#if LJ_HASFFI 168 if (frame_iscont_fficb(frame))
153 if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK)
154 goto unwind_c; 169 goto unwind_c;
155#endif
156 /* fallthrough */ 170 /* fallthrough */
157 case FRAME_VARG: /* Vararg frame. */ 171 case FRAME_VARG: /* Vararg frame. */
158 frame = frame_prevd(frame); 172 frame = frame_prevd(frame);
@@ -160,14 +174,17 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
160 case FRAME_PCALL: /* FF pcall() frame. */ 174 case FRAME_PCALL: /* FF pcall() frame. */
161 case FRAME_PCALLH: /* FF pcall() frame inside hook. */ 175 case FRAME_PCALLH: /* FF pcall() frame inside hook. */
162 if (errcode) { 176 if (errcode) {
177 global_State *g;
163 if (errcode == LUA_YIELD) { 178 if (errcode == LUA_YIELD) {
164 frame = frame_prevd(frame); 179 frame = frame_prevd(frame);
165 break; 180 break;
166 } 181 }
182 g = G(L);
183 setgcref(g->cur_L, obj2gco(L));
167 if (frame_typep(frame) == FRAME_PCALL) 184 if (frame_typep(frame) == FRAME_PCALL)
168 hook_leave(G(L)); 185 hook_leave(g);
169 L->cframe = cf;
170 L->base = frame_prevd(frame) + 1; 186 L->base = frame_prevd(frame) + 1;
187 L->cframe = cf;
171 unwindstack(L, L->base); 188 unwindstack(L, L->base);
172 } 189 }
173 return (void *)((intptr_t)cf | CFRAME_UNWIND_FF); 190 return (void *)((intptr_t)cf | CFRAME_UNWIND_FF);
@@ -175,8 +192,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
175 } 192 }
176 /* No C frame. */ 193 /* No C frame. */
177 if (errcode) { 194 if (errcode) {
195 L->base = tvref(L->stack)+1+LJ_FR2;
178 L->cframe = NULL; 196 L->cframe = NULL;
179 L->base = tvref(L->stack)+1;
180 unwindstack(L, L->base); 197 unwindstack(L, L->base);
181 if (G(L)->panic) 198 if (G(L)->panic)
182 G(L)->panic(L); 199 G(L)->panic(L);
@@ -187,33 +204,226 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
187 204
188/* -- External frame unwinding -------------------------------------------- */ 205/* -- External frame unwinding -------------------------------------------- */
189 206
190#if defined(__GNUC__) && !LJ_NO_UNWIND && !LJ_ABI_WIN 207#if LJ_ABI_WIN
191 208
192/* 209/*
193** We have to use our own definitions instead of the mandatory (!) unwind.h, 210** Someone in Redmond owes me several days of my life. A lot of this is
194** since various OS, distros and compilers mess up the header installation. 211** undocumented or just plain wrong on MSDN. Some of it can be gathered
212** from 3rd party docs or must be found by trial-and-error. They really
213** don't want you to write your own language-specific exception handler
214** or to interact gracefully with MSVC. :-(
195*/ 215*/
196 216
197typedef struct _Unwind_Exception 217#define WIN32_LEAN_AND_MEAN
218#include <windows.h>
219
220#if LJ_TARGET_X86
221typedef void *UndocumentedDispatcherContext; /* Unused on x86. */
222#else
223/* Taken from: http://www.nynaeve.net/?p=99 */
224typedef struct UndocumentedDispatcherContext {
225 ULONG64 ControlPc;
226 ULONG64 ImageBase;
227 PRUNTIME_FUNCTION FunctionEntry;
228 ULONG64 EstablisherFrame;
229 ULONG64 TargetIp;
230 PCONTEXT ContextRecord;
231 void (*LanguageHandler)(void);
232 PVOID HandlerData;
233 PUNWIND_HISTORY_TABLE HistoryTable;
234 ULONG ScopeIndex;
235 ULONG Fill0;
236} UndocumentedDispatcherContext;
237#endif
238
239/* Another wild guess. */
240extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
241
242#define LJ_MSVC_EXCODE ((DWORD)0xe06d7363)
243#define LJ_GCC_EXCODE ((DWORD)0x20474343)
244
245#define LJ_EXCODE ((DWORD)0xe24c4a00)
246#define LJ_EXCODE_MAKE(c) (LJ_EXCODE | (DWORD)(c))
247#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff)
248#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff))
249
250/* Windows exception handler for interpreter frame. */
251LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
252 void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
198{ 253{
199 uint64_t exclass; 254#if LJ_TARGET_X86
200 void (*excleanup)(int, struct _Unwind_Exception *); 255 void *cf = (char *)f - CFRAME_OFS_SEH;
201 uintptr_t p1, p2; 256#elif LJ_TARGET_ARM64
202} __attribute__((__aligned__)) _Unwind_Exception; 257 void *cf = (char *)f - CFRAME_SIZE;
258#else
259 void *cf = f;
260#endif
261 lua_State *L = cframe_L(cf);
262 int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
263 LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
264 if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */
265 if (rec->ExceptionCode == STATUS_LONGJUMP &&
266 rec->ExceptionRecord &&
267 LJ_EXCODE_CHECK(rec->ExceptionRecord->ExceptionCode)) {
268 errcode = LJ_EXCODE_ERRCODE(rec->ExceptionRecord->ExceptionCode);
269 if ((rec->ExceptionFlags & 0x20)) { /* EH_TARGET_UNWIND */
270 /* Unwinding is about to finish; revert the ExceptionCode so that
271 ** RtlRestoreContext does not try to restore from a _JUMP_BUFFER.
272 */
273 rec->ExceptionCode = 0;
274 }
275 }
276 /* Unwind internal frames. */
277 err_unwind(L, cf, errcode);
278 } else {
279 void *cf2 = err_unwind(L, cf, 0);
280 if (cf2) { /* We catch it, so start unwinding the upper frames. */
281#if !LJ_TARGET_X86
282 EXCEPTION_RECORD rec2;
283#endif
284 if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
285 rec->ExceptionCode == LJ_GCC_EXCODE) {
286#if !LJ_TARGET_CYGWIN
287 __DestructExceptionObject(rec, 1);
288#endif
289 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
290 } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
291 /* Don't catch access violations etc. */
292 return 1; /* ExceptionContinueSearch */
293 }
294#if LJ_TARGET_X86
295 UNUSED(ctx);
296 UNUSED(dispatch);
297 /* Call all handlers for all lower C frames (including ourselves) again
298 ** with EH_UNWINDING set. Then call the specified function, passing cf
299 ** and errcode.
300 */
301 lj_vm_rtlunwind(cf, (void *)rec,
302 (cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
303 (void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
304 /* lj_vm_rtlunwind does not return. */
305#else
306 if (LJ_EXCODE_CHECK(rec->ExceptionCode)) {
307 /* For unwind purposes, wrap the EXCEPTION_RECORD in something that
308 ** looks like a longjmp, so that MSVC will execute C++ destructors in
309 ** the frames we unwind over. ExceptionInformation[0] should really
310 ** contain a _JUMP_BUFFER*, but hopefully nobody is looking too closely
311 ** at this point.
312 */
313 rec2.ExceptionCode = STATUS_LONGJUMP;
314 rec2.ExceptionRecord = rec;
315 rec2.ExceptionAddress = 0;
316 rec2.NumberParameters = 1;
317 rec2.ExceptionInformation[0] = (ULONG_PTR)ctx;
318 rec = &rec2;
319 }
320 /* Unwind the stack and call all handlers for all lower C frames
321 ** (including ourselves) again with EH_UNWINDING set. Then set
322 ** stack pointer = f, result = errcode and jump to the specified target.
323 */
324 RtlUnwindEx(f, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
325 lj_vm_unwind_ff_eh :
326 lj_vm_unwind_c_eh),
327 rec, (void *)(uintptr_t)errcode, dispatch->ContextRecord,
328 dispatch->HistoryTable);
329 /* RtlUnwindEx should never return. */
330#endif
331 }
332 }
333 return 1; /* ExceptionContinueSearch */
334}
335
336#if LJ_UNWIND_JIT
337
338#if LJ_TARGET_X64
339#define CONTEXT_REG_PC Rip
340#elif LJ_TARGET_ARM64
341#define CONTEXT_REG_PC Pc
342#else
343#error "NYI: Windows arch-specific unwinder for JIT-compiled code"
344#endif
345
346/* Windows unwinder for JIT-compiled code. */
347static void err_unwind_win_jit(global_State *g, int errcode)
348{
349 CONTEXT ctx;
350 UNWIND_HISTORY_TABLE hist;
351
352 memset(&hist, 0, sizeof(hist));
353 RtlCaptureContext(&ctx);
354 while (1) {
355 DWORD64 frame, base, addr = ctx.CONTEXT_REG_PC;
356 void *hdata;
357 PRUNTIME_FUNCTION func = RtlLookupFunctionEntry(addr, &base, &hist);
358 if (!func) { /* Found frame without .pdata: must be JIT-compiled code. */
359 ExitNo exitno;
360 uintptr_t stub = lj_trace_unwind(G2J(g), (uintptr_t)(addr - sizeof(MCode)), &exitno);
361 if (stub) { /* Jump to side exit to unwind the trace. */
362 ctx.CONTEXT_REG_PC = stub;
363 G2J(g)->exitcode = errcode;
364 RtlRestoreContext(&ctx, NULL); /* Does not return. */
365 }
366 break;
367 }
368 RtlVirtualUnwind(UNW_FLAG_NHANDLER, base, addr, func,
369 &ctx, &hdata, &frame, NULL);
370 if (!addr) break;
371 }
372 /* Unwinding failed, if we end up here. */
373}
374#endif
375
376/* Raise Windows exception. */
377static void err_raise_ext(global_State *g, int errcode)
378{
379#if LJ_UNWIND_JIT
380 if (tvref(g->jit_base)) {
381 err_unwind_win_jit(g, errcode);
382 return; /* Unwinding failed. */
383 }
384#elif LJ_HASJIT
385 /* Cannot catch on-trace errors for Windows/x86 SEH. Unwind to interpreter. */
386 setmref(g->jit_base, NULL);
387#endif
388 UNUSED(g);
389 RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL);
390}
391
392#elif !LJ_NO_UNWIND && (defined(__GNUC__) || defined(__clang__))
393
394/*
395** We have to use our own definitions instead of the mandatory (!) unwind.h,
396** since various OS, distros and compilers mess up the header installation.
397*/
203 398
204typedef struct _Unwind_Context _Unwind_Context; 399typedef struct _Unwind_Context _Unwind_Context;
205 400
206#define _URC_OK 0 401#define _URC_OK 0
402#define _URC_FATAL_PHASE2_ERROR 2
207#define _URC_FATAL_PHASE1_ERROR 3 403#define _URC_FATAL_PHASE1_ERROR 3
208#define _URC_HANDLER_FOUND 6 404#define _URC_HANDLER_FOUND 6
209#define _URC_INSTALL_CONTEXT 7 405#define _URC_INSTALL_CONTEXT 7
210#define _URC_CONTINUE_UNWIND 8 406#define _URC_CONTINUE_UNWIND 8
211#define _URC_FAILURE 9 407#define _URC_FAILURE 9
212 408
409#define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */
410#define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c))
411#define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff)
412#define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff))
413
213#if !LJ_TARGET_ARM 414#if !LJ_TARGET_ARM
214 415
416typedef struct _Unwind_Exception
417{
418 uint64_t exclass;
419 void (*excleanup)(int, struct _Unwind_Exception *);
420 uintptr_t p1, p2;
421} __attribute__((__aligned__)) _Unwind_Exception;
422#define UNWIND_EXCEPTION_TYPE _Unwind_Exception
423
215extern uintptr_t _Unwind_GetCFA(_Unwind_Context *); 424extern uintptr_t _Unwind_GetCFA(_Unwind_Context *);
216extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t); 425extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t);
426extern uintptr_t _Unwind_GetIP(_Unwind_Context *);
217extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t); 427extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t);
218extern void _Unwind_DeleteException(_Unwind_Exception *); 428extern void _Unwind_DeleteException(_Unwind_Exception *);
219extern int _Unwind_RaiseException(_Unwind_Exception *); 429extern int _Unwind_RaiseException(_Unwind_Exception *);
@@ -223,11 +433,6 @@ extern int _Unwind_RaiseException(_Unwind_Exception *);
223#define _UA_HANDLER_FRAME 4 433#define _UA_HANDLER_FRAME 4
224#define _UA_FORCE_UNWIND 8 434#define _UA_FORCE_UNWIND 8
225 435
226#define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */
227#define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c))
228#define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff)
229#define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff))
230
231/* DWARF2 personality handler referenced from interpreter .eh_frame. */ 436/* DWARF2 personality handler referenced from interpreter .eh_frame. */
232LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, 437LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
233 uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx) 438 uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx)
@@ -236,7 +441,6 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
236 lua_State *L; 441 lua_State *L;
237 if (version != 1) 442 if (version != 1)
238 return _URC_FATAL_PHASE1_ERROR; 443 return _URC_FATAL_PHASE1_ERROR;
239 UNUSED(uexclass);
240 cf = (void *)_Unwind_GetCFA(ctx); 444 cf = (void *)_Unwind_GetCFA(ctx);
241 L = cframe_L(cf); 445 L = cframe_L(cf);
242 if ((actions & _UA_SEARCH_PHASE)) { 446 if ((actions & _UA_SEARCH_PHASE)) {
@@ -263,10 +467,10 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
263 if ((actions & _UA_FORCE_UNWIND)) { 467 if ((actions & _UA_FORCE_UNWIND)) {
264 return _URC_CONTINUE_UNWIND; 468 return _URC_CONTINUE_UNWIND;
265 } else if (cf) { 469 } else if (cf) {
470 ASMFunction ip;
266 _Unwind_SetGR(ctx, LJ_TARGET_EHRETREG, errcode); 471 _Unwind_SetGR(ctx, LJ_TARGET_EHRETREG, errcode);
267 _Unwind_SetIP(ctx, (uintptr_t)(cframe_unwind_ff(cf) ? 472 ip = cframe_unwind_ff(cf) ? lj_vm_unwind_ff_eh : lj_vm_unwind_c_eh;
268 lj_vm_unwind_ff_eh : 473 _Unwind_SetIP(ctx, (uintptr_t)lj_ptr_strip(ip));
269 lj_vm_unwind_c_eh));
270 return _URC_INSTALL_CONTEXT; 474 return _URC_INSTALL_CONTEXT;
271 } 475 }
272#if LJ_TARGET_X86ORX64 476#if LJ_TARGET_X86ORX64
@@ -284,27 +488,170 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
284 ** it on non-x64 because the interpreter restores all callee-saved regs. 488 ** it on non-x64 because the interpreter restores all callee-saved regs.
285 */ 489 */
286 lj_err_throw(L, errcode); 490 lj_err_throw(L, errcode);
491#if LJ_TARGET_X64
492#error "Broken build system -- only use the provided Makefiles!"
493#endif
287#endif 494#endif
288 } 495 }
289 return _URC_CONTINUE_UNWIND; 496 return _URC_CONTINUE_UNWIND;
290} 497}
291 498
292#if LJ_UNWIND_EXT 499#if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT)
293static __thread _Unwind_Exception static_uex; 500struct dwarf_eh_bases { void *tbase, *dbase, *func; };
501extern const void *_Unwind_Find_FDE(void *pc, struct dwarf_eh_bases *bases);
502
503/* Verify that external error handling actually has a chance to work. */
504void lj_err_verify(void)
505{
506#if !LJ_TARGET_OSX
507 /* Check disabled on MacOS due to brilliant software engineering at Apple. */
508 struct dwarf_eh_bases ehb;
509 lj_assertX(_Unwind_Find_FDE((void *)lj_err_throw, &ehb), "broken build: external frame unwinding enabled, but missing -funwind-tables");
510#endif
511 /* Check disabled, because of broken Fedora/ARM64. See #722.
512 lj_assertX(_Unwind_Find_FDE((void *)_Unwind_RaiseException, &ehb), "broken build: external frame unwinding enabled, but system libraries have no unwind tables");
513 */
514}
515#endif
294 516
295/* Raise DWARF2 exception. */ 517#if LJ_UNWIND_JIT
296static void err_raise_ext(int errcode) 518/* DWARF2 personality handler for JIT-compiled code. */
519static int err_unwind_jit(int version, int actions,
520 uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx)
297{ 521{
298 static_uex.exclass = LJ_UEXCLASS_MAKE(errcode); 522 /* NYI: FFI C++ exception interoperability. */
299 static_uex.excleanup = NULL; 523 if (version != 1 || !LJ_UEXCLASS_CHECK(uexclass))
300 _Unwind_RaiseException(&static_uex); 524 return _URC_FATAL_PHASE1_ERROR;
525 if ((actions & _UA_SEARCH_PHASE)) {
526 return _URC_HANDLER_FOUND;
527 }
528 if ((actions & _UA_CLEANUP_PHASE)) {
529 global_State *g = *(global_State **)(uex+1);
530 ExitNo exitno;
531 uintptr_t addr = _Unwind_GetIP(ctx); /* Return address _after_ call. */
532 uintptr_t stub = lj_trace_unwind(G2J(g), addr - sizeof(MCode), &exitno);
533 lj_assertG(tvref(g->jit_base), "unexpected throw across mcode frame");
534 if (stub) { /* Jump to side exit to unwind the trace. */
535 G2J(g)->exitcode = LJ_UEXCLASS_ERRCODE(uexclass);
536#ifdef LJ_TARGET_MIPS
537 _Unwind_SetGR(ctx, 4, stub);
538 _Unwind_SetGR(ctx, 5, exitno);
539 _Unwind_SetIP(ctx, (uintptr_t)(void *)lj_vm_unwind_stub);
540#else
541 _Unwind_SetIP(ctx, stub);
542#endif
543 return _URC_INSTALL_CONTEXT;
544 }
545 return _URC_FATAL_PHASE2_ERROR;
546 }
547 return _URC_FATAL_PHASE1_ERROR;
301} 548}
549
550/* DWARF2 template frame info for JIT-compiled code.
551**
552** After copying the template to the start of the mcode segment,
553** the frame handler function and the code size is patched.
554** The frame handler always installs a new context to jump to the exit,
555** so don't bother to add any unwind opcodes.
556*/
557static const uint8_t err_frame_jit_template[] = {
558#if LJ_BE
559 0,0,0,
560#endif
561 LJ_64 ? 0x1c : 0x14, /* CIE length. */
562#if LJ_LE
563 0,0,0,
564#endif
565 0,0,0,0, 1, 'z','P','R',0, /* CIE mark, CIE version, augmentation. */
566 1, LJ_64 ? 0x78 : 0x7c, LJ_TARGET_EHRAREG, /* Code/data align, RA. */
567#if LJ_64
568 10, 0, 0,0,0,0,0,0,0,0, 0x1b, /* Aug. data ABS handler, PCREL|SDATA4 code. */
569 0,0,0,0,0, /* Alignment. */
570#else
571 6, 0, 0,0,0,0, 0x1b, /* Aug. data ABS handler, PCREL|SDATA4 code. */
572 0, /* Alignment. */
573#endif
574#if LJ_BE
575 0,0,0,
576#endif
577 LJ_64 ? 0x14 : 0x10, /* FDE length. */
578 0,0,0,
579 LJ_64 ? 0x24 : 0x1c, /* CIE offset. */
580 0,0,0,
581 LJ_64 ? 0x14 : 0x10, /* Code offset. After Final FDE. */
582#if LJ_LE
583 0,0,0,
584#endif
585 0,0,0,0, 0, 0,0,0, /* Code size, augmentation length, alignment. */
586#if LJ_64
587 0,0,0,0, /* Alignment. */
588#endif
589 0,0,0,0 /* Final FDE. */
590};
591
592#define ERR_FRAME_JIT_OFS_HANDLER 0x12
593#define ERR_FRAME_JIT_OFS_FDE (LJ_64 ? 0x20 : 0x18)
594#define ERR_FRAME_JIT_OFS_CODE_SIZE (LJ_64 ? 0x2c : 0x24)
595#if LJ_TARGET_OSX
596#define ERR_FRAME_JIT_OFS_REGISTER ERR_FRAME_JIT_OFS_FDE
597#else
598#define ERR_FRAME_JIT_OFS_REGISTER 0
302#endif 599#endif
303 600
601extern void __register_frame(const void *);
602extern void __deregister_frame(const void *);
603
604uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info)
605{
606 ASMFunction handler = (ASMFunction)err_unwind_jit;
607 memcpy(info, err_frame_jit_template, sizeof(err_frame_jit_template));
608#if LJ_ABI_PAUTH
609#if LJ_TARGET_ARM64
610 handler = ptrauth_auth_and_resign(handler,
611 ptrauth_key_function_pointer, 0,
612 ptrauth_key_process_independent_code, info + ERR_FRAME_JIT_OFS_HANDLER);
304#else 613#else
614#error "missing pointer authentication support for this architecture"
615#endif
616#endif
617 memcpy(info + ERR_FRAME_JIT_OFS_HANDLER, &handler, sizeof(handler));
618 *(uint32_t *)(info + ERR_FRAME_JIT_OFS_CODE_SIZE) =
619 (uint32_t)(sz - sizeof(err_frame_jit_template) - (info - (uint8_t *)base));
620 __register_frame(info + ERR_FRAME_JIT_OFS_REGISTER);
621#ifdef LUA_USE_ASSERT
622 {
623 struct dwarf_eh_bases ehb;
624 lj_assertX(_Unwind_Find_FDE(info + sizeof(err_frame_jit_template)+1, &ehb),
625 "bad JIT unwind table registration");
626 }
627#endif
628 return info + sizeof(err_frame_jit_template);
629}
305 630
306extern void _Unwind_DeleteException(void *); 631void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info)
307extern int __gnu_unwind_frame (void *, _Unwind_Context *); 632{
633 UNUSED(base); UNUSED(sz);
634 __deregister_frame(info + ERR_FRAME_JIT_OFS_REGISTER);
635}
636#endif
637
638#else /* LJ_TARGET_ARM */
639
640#define _US_VIRTUAL_UNWIND_FRAME 0
641#define _US_UNWIND_FRAME_STARTING 1
642#define _US_ACTION_MASK 3
643#define _US_FORCE_UNWIND 8
644
645typedef struct _Unwind_Control_Block _Unwind_Control_Block;
646#define UNWIND_EXCEPTION_TYPE _Unwind_Control_Block
647
648struct _Unwind_Control_Block {
649 uint64_t exclass;
650 uint32_t misc[20];
651};
652
653extern int _Unwind_RaiseException(_Unwind_Control_Block *);
654extern int __gnu_unwind_frame(_Unwind_Control_Block *, _Unwind_Context *);
308extern int _Unwind_VRS_Set(_Unwind_Context *, int, uint32_t, int, void *); 655extern int _Unwind_VRS_Set(_Unwind_Context *, int, uint32_t, int, void *);
309extern int _Unwind_VRS_Get(_Unwind_Context *, int, uint32_t, int, void *); 656extern int _Unwind_VRS_Get(_Unwind_Context *, int, uint32_t, int, void *);
310 657
@@ -320,120 +667,98 @@ static inline void _Unwind_SetGR(_Unwind_Context *ctx, int r, uint32_t v)
320 _Unwind_VRS_Set(ctx, 0, r, 0, &v); 667 _Unwind_VRS_Set(ctx, 0, r, 0, &v);
321} 668}
322 669
323#define _US_VIRTUAL_UNWIND_FRAME 0 670extern void lj_vm_unwind_ext(void);
324#define _US_UNWIND_FRAME_STARTING 1
325#define _US_ACTION_MASK 3
326#define _US_FORCE_UNWIND 8
327 671
328/* ARM unwinder personality handler referenced from interpreter .ARM.extab. */ 672/* ARM unwinder personality handler referenced from interpreter .ARM.extab. */
329LJ_FUNCA int lj_err_unwind_arm(int state, void *ucb, _Unwind_Context *ctx) 673LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block *ucb,
674 _Unwind_Context *ctx)
330{ 675{
331 void *cf = (void *)_Unwind_GetGR(ctx, 13); 676 void *cf = (void *)_Unwind_GetGR(ctx, 13);
332 lua_State *L = cframe_L(cf); 677 lua_State *L = cframe_L(cf);
333 if ((state & _US_ACTION_MASK) == _US_VIRTUAL_UNWIND_FRAME) { 678 int errcode;
334 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); 679
680 switch ((state & _US_ACTION_MASK)) {
681 case _US_VIRTUAL_UNWIND_FRAME:
682 if ((state & _US_FORCE_UNWIND)) break;
335 return _URC_HANDLER_FOUND; 683 return _URC_HANDLER_FOUND;
336 } 684 case _US_UNWIND_FRAME_STARTING:
337 if ((state&(_US_ACTION_MASK|_US_FORCE_UNWIND)) == _US_UNWIND_FRAME_STARTING) { 685 if (LJ_UEXCLASS_CHECK(ucb->exclass)) {
338 _Unwind_DeleteException(ucb); 686 errcode = LJ_UEXCLASS_ERRCODE(ucb->exclass);
339 _Unwind_SetGR(ctx, 15, (uint32_t)(void *)lj_err_throw); 687 } else {
340 _Unwind_SetGR(ctx, 0, (uint32_t)L); 688 errcode = LUA_ERRRUN;
341 _Unwind_SetGR(ctx, 1, (uint32_t)LUA_ERRRUN); 689 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
690 }
691 cf = err_unwind(L, cf, errcode);
692 if ((state & _US_FORCE_UNWIND) || cf == NULL) break;
693 _Unwind_SetGR(ctx, 15, (uint32_t)lj_vm_unwind_ext);
694 _Unwind_SetGR(ctx, 0, (uint32_t)ucb);
695 _Unwind_SetGR(ctx, 1, (uint32_t)errcode);
696 _Unwind_SetGR(ctx, 2, cframe_unwind_ff(cf) ?
697 (uint32_t)lj_vm_unwind_ff_eh :
698 (uint32_t)lj_vm_unwind_c_eh);
342 return _URC_INSTALL_CONTEXT; 699 return _URC_INSTALL_CONTEXT;
700 default:
701 return _URC_FAILURE;
343 } 702 }
344 if (__gnu_unwind_frame(ucb, ctx) != _URC_OK) 703 if (__gnu_unwind_frame(ucb, ctx) != _URC_OK)
345 return _URC_FAILURE; 704 return _URC_FAILURE;
705#ifdef LUA_USE_ASSERT
706 /* We should never get here unless this is a forced unwind aka backtrace. */
707 if (_Unwind_GetGR(ctx, 0) == 0xff33aa77) {
708 _Unwind_SetGR(ctx, 0, 0xff33aa88);
709 }
710#endif
346 return _URC_CONTINUE_UNWIND; 711 return _URC_CONTINUE_UNWIND;
347} 712}
348 713
349#endif 714#if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT)
715typedef int (*_Unwind_Trace_Fn)(_Unwind_Context *, void *);
716extern int _Unwind_Backtrace(_Unwind_Trace_Fn, void *);
350 717
351#elif LJ_TARGET_X64 && LJ_ABI_WIN 718static int err_verify_bt(_Unwind_Context *ctx, int *got)
719{
720 if (_Unwind_GetGR(ctx, 0) == 0xff33aa88) { *got = 2; }
721 else if (*got == 0) { *got = 1; _Unwind_SetGR(ctx, 0, 0xff33aa77); }
722 return _URC_OK;
723}
724
725/* Verify that external error handling actually has a chance to work. */
726void lj_err_verify(void)
727{
728 int got = 0;
729 _Unwind_Backtrace((_Unwind_Trace_Fn)err_verify_bt, &got);
730 lj_assertX(got == 2, "broken build: external frame unwinding enabled, but missing -funwind-tables");
731}
732#endif
352 733
353/* 734/*
354** Someone in Redmond owes me several days of my life. A lot of this is 735** Note: LJ_UNWIND_JIT is not implemented for 32 bit ARM.
355** undocumented or just plain wrong on MSDN. Some of it can be gathered
356** from 3rd party docs or must be found by trial-and-error. They really
357** don't want you to write your own language-specific exception handler
358** or to interact gracefully with MSVC. :-(
359** 736**
360** Apparently MSVC doesn't call C++ destructors for foreign exceptions 737** The quirky ARM unwind API doesn't have __register_frame().
361** unless you compile your C++ code with /EHa. Unfortunately this means 738** A potential workaround might involve _Unwind_Backtrace.
362** catch (...) also catches things like access violations. The use of 739** But most 32 bit ARM targets don't qualify for LJ_UNWIND_EXT, anyway,
363** _set_se_translator doesn't really help, because it requires /EHa, too. 740** since they are built without unwind tables by default.
364*/ 741*/
365 742
366#define WIN32_LEAN_AND_MEAN 743#endif /* LJ_TARGET_ARM */
367#include <windows.h>
368
369/* Taken from: http://www.nynaeve.net/?p=99 */
370typedef struct UndocumentedDispatcherContext {
371 ULONG64 ControlPc;
372 ULONG64 ImageBase;
373 PRUNTIME_FUNCTION FunctionEntry;
374 ULONG64 EstablisherFrame;
375 ULONG64 TargetIp;
376 PCONTEXT ContextRecord;
377 void (*LanguageHandler)(void);
378 PVOID HandlerData;
379 PUNWIND_HISTORY_TABLE HistoryTable;
380 ULONG ScopeIndex;
381 ULONG Fill0;
382} UndocumentedDispatcherContext;
383
384/* Another wild guess. */
385extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
386 744
387#define LJ_MSVC_EXCODE ((DWORD)0xe06d7363)
388#define LJ_GCC_EXCODE ((DWORD)0x20474343)
389 745
390#define LJ_EXCODE ((DWORD)0xe24c4a00) 746#if LJ_UNWIND_EXT
391#define LJ_EXCODE_MAKE(c) (LJ_EXCODE | (DWORD)(c)) 747static __thread struct {
392#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff) 748 UNWIND_EXCEPTION_TYPE ex;
393#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff)) 749 global_State *g;
750} static_uex;
394 751
395/* Win64 exception handler for interpreter frame. */ 752/* Raise external exception. */
396LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec, 753static void err_raise_ext(global_State *g, int errcode)
397 void *cf, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
398{ 754{
399 lua_State *L = cframe_L(cf); 755 memset(&static_uex, 0, sizeof(static_uex));
400 int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ? 756 static_uex.ex.exclass = LJ_UEXCLASS_MAKE(errcode);
401 LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN; 757 static_uex.g = g;
402 if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */ 758 _Unwind_RaiseException(&static_uex.ex);
403 /* Unwind internal frames. */
404 err_unwind(L, cf, errcode);
405 } else {
406 void *cf2 = err_unwind(L, cf, 0);
407 if (cf2) { /* We catch it, so start unwinding the upper frames. */
408 if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
409 rec->ExceptionCode == LJ_GCC_EXCODE) {
410#if LJ_TARGET_WINDOWS
411 __DestructExceptionObject(rec, 1);
412#endif
413 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
414 } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
415 /* Don't catch access violations etc. */
416 return ExceptionContinueSearch;
417 }
418 /* Unwind the stack and call all handlers for all lower C frames
419 ** (including ourselves) again with EH_UNWINDING set. Then set
420 ** rsp = cf, rax = errcode and jump to the specified target.
421 */
422 RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
423 lj_vm_unwind_ff_eh :
424 lj_vm_unwind_c_eh),
425 rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
426 /* RtlUnwindEx should never return. */
427 }
428 }
429 return ExceptionContinueSearch;
430} 759}
431 760
432/* Raise Windows exception. */ 761#endif
433static void err_raise_ext(int errcode)
434{
435 RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL);
436}
437 762
438#endif 763#endif
439 764
@@ -444,22 +769,23 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode)
444{ 769{
445 global_State *g = G(L); 770 global_State *g = G(L);
446 lj_trace_abort(g); 771 lj_trace_abort(g);
447 setgcrefnull(g->jit_L); 772 L->status = LUA_OK;
448 L->status = 0;
449#if LJ_UNWIND_EXT 773#if LJ_UNWIND_EXT
450 err_raise_ext(errcode); 774 err_raise_ext(g, errcode);
451 /* 775 /*
452 ** A return from this function signals a corrupt C stack that cannot be 776 ** A return from this function signals a corrupt C stack that cannot be
453 ** unwound. We have no choice but to call the panic function and exit. 777 ** unwound. We have no choice but to call the panic function and exit.
454 ** 778 **
455 ** Usually this is caused by a C function without unwind information. 779 ** Usually this is caused by a C function without unwind information.
456 ** This should never happen on x64, but may happen if you've manually 780 ** This may happen if you've manually enabled LUAJIT_UNWIND_EXTERNAL
457 ** enabled LUAJIT_UNWIND_EXTERNAL and forgot to recompile *every* 781 ** and forgot to recompile *every* non-C++ file with -funwind-tables.
458 ** non-C++ file with -funwind-tables.
459 */ 782 */
460 if (G(L)->panic) 783 if (G(L)->panic)
461 G(L)->panic(L); 784 G(L)->panic(L);
462#else 785#else
786#if LJ_HASJIT
787 setmref(g->jit_base, NULL);
788#endif
463 { 789 {
464 void *cf = err_unwind(L, NULL, errcode); 790 void *cf = err_unwind(L, NULL, errcode);
465 if (cframe_unwind_ff(cf)) 791 if (cframe_unwind_ff(cf))
@@ -482,12 +808,16 @@ LJ_NOINLINE void lj_err_mem(lua_State *L)
482{ 808{
483 if (L->status == LUA_ERRERR+1) /* Don't touch the stack during lua_open. */ 809 if (L->status == LUA_ERRERR+1) /* Don't touch the stack during lua_open. */
484 lj_vm_unwind_c(L->cframe, LUA_ERRMEM); 810 lj_vm_unwind_c(L->cframe, LUA_ERRMEM);
811 if (LJ_HASJIT) {
812 TValue *base = tvref(G(L)->jit_base);
813 if (base) L->base = base;
814 }
485 if (curr_funcisL(L)) { 815 if (curr_funcisL(L)) {
486 L->top = curr_topL(L); 816 L->top = curr_topL(L);
487 if (LJ_UNLIKELY(L->top > tvref(L->maxstack))) { 817 if (LJ_UNLIKELY(L->top > tvref(L->maxstack))) {
488 /* The current Lua frame violates the stack. Replace it with a dummy. */ 818 /* The current Lua frame violates the stack. Replace it with a dummy. */
489 L->top = L->base; 819 L->top = L->base;
490 setframe_gc(L->base - 1, obj2gco(L)); 820 setframe_gc(L->base - 1 - LJ_FR2, obj2gco(L), LJ_TTHREAD);
491 } 821 }
492 } 822 }
493 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRMEM)); 823 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRMEM));
@@ -497,7 +827,7 @@ LJ_NOINLINE void lj_err_mem(lua_State *L)
497/* Find error function for runtime errors. Requires an extra stack traversal. */ 827/* Find error function for runtime errors. Requires an extra stack traversal. */
498static ptrdiff_t finderrfunc(lua_State *L) 828static ptrdiff_t finderrfunc(lua_State *L)
499{ 829{
500 cTValue *frame = L->base-1, *bot = tvref(L->stack); 830 cTValue *frame = L->base-1, *bot = tvref(L->stack)+LJ_FR2;
501 void *cf = L->cframe; 831 void *cf = L->cframe;
502 while (frame > bot && cf) { 832 while (frame > bot && cf) {
503 while (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */ 833 while (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */
@@ -521,10 +851,8 @@ static ptrdiff_t finderrfunc(lua_State *L)
521 frame = frame_prevd(frame); 851 frame = frame_prevd(frame);
522 break; 852 break;
523 case FRAME_CONT: 853 case FRAME_CONT:
524#if LJ_HASFFI 854 if (frame_iscont_fficb(frame))
525 if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK)
526 cf = cframe_prev(cf); 855 cf = cframe_prev(cf);
527#endif
528 frame = frame_prevd(frame); 856 frame = frame_prevd(frame);
529 break; 857 break;
530 case FRAME_CP: 858 case FRAME_CP:
@@ -536,11 +864,11 @@ static ptrdiff_t finderrfunc(lua_State *L)
536 break; 864 break;
537 case FRAME_PCALL: 865 case FRAME_PCALL:
538 case FRAME_PCALLH: 866 case FRAME_PCALLH:
539 if (frame_ftsz(frame) >= (ptrdiff_t)(2*sizeof(TValue))) /* xpcall? */ 867 if (frame_func(frame_prevd(frame))->c.ffid == FF_xpcall)
540 return savestack(L, frame-1); /* Point to xpcall's errorfunc. */ 868 return savestack(L, frame_prevd(frame)+1); /* xpcall's errorfunc. */
541 return 0; 869 return 0;
542 default: 870 default:
543 lua_assert(0); 871 lj_assertL(0, "bad frame type");
544 return 0; 872 return 0;
545 } 873 }
546 } 874 }
@@ -550,7 +878,7 @@ static ptrdiff_t finderrfunc(lua_State *L)
550/* Runtime error. */ 878/* Runtime error. */
551LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L) 879LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L)
552{ 880{
553 ptrdiff_t ef = finderrfunc(L); 881 ptrdiff_t ef = (LJ_HASJIT && tvref(G(L)->jit_base)) ? 0 : finderrfunc(L);
554 if (ef) { 882 if (ef) {
555 TValue *errfunc, *top; 883 TValue *errfunc, *top;
556 lj_state_checkstack(L, LUA_MINSTACK * 2); /* Might raise new error. */ 884 lj_state_checkstack(L, LUA_MINSTACK * 2); /* Might raise new error. */
@@ -562,8 +890,9 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L)
562 lj_err_throw(L, LUA_ERRERR); 890 lj_err_throw(L, LUA_ERRERR);
563 } 891 }
564 L->status = LUA_ERRERR; 892 L->status = LUA_ERRERR;
565 copyTV(L, top, top-1); 893 copyTV(L, top+LJ_FR2, top-1);
566 copyTV(L, top-1, errfunc); 894 copyTV(L, top-1, errfunc);
895 if (LJ_FR2) setnilV(top++);
567 L->top = top+1; 896 L->top = top+1;
568 lj_vm_call(L, top, 1+1); /* Stack: |errfunc|msg| -> |msg| */ 897 lj_vm_call(L, top, 1+1); /* Stack: |errfunc|msg| -> |msg| */
569 } 898 }
@@ -577,14 +906,29 @@ void LJ_FASTCALL lj_err_stkov(lua_State *L)
577 lj_err_run(L); 906 lj_err_run(L);
578} 907}
579 908
909#if LJ_HASJIT
910/* Rethrow error after doing a trace exit. */
911LJ_NOINLINE void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode)
912{
913 if (errcode == LUA_ERRRUN)
914 lj_err_run(L);
915 else
916 lj_err_throw(L, errcode);
917}
918#endif
919
580/* Formatted runtime error message. */ 920/* Formatted runtime error message. */
581LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...) 921LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...)
582{ 922{
583 const char *msg; 923 const char *msg;
584 va_list argp; 924 va_list argp;
585 va_start(argp, em); 925 va_start(argp, em);
926 if (LJ_HASJIT) {
927 TValue *base = tvref(G(L)->jit_base);
928 if (base) L->base = base;
929 }
586 if (curr_funcisL(L)) L->top = curr_topL(L); 930 if (curr_funcisL(L)) L->top = curr_topL(L);
587 msg = lj_str_pushvf(L, err2msg(em), argp); 931 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
588 va_end(argp); 932 va_end(argp);
589 lj_debug_addloc(L, msg, L->base-1, NULL); 933 lj_debug_addloc(L, msg, L->base-1, NULL);
590 lj_err_run(L); 934 lj_err_run(L);
@@ -602,11 +946,11 @@ LJ_NOINLINE void lj_err_lex(lua_State *L, GCstr *src, const char *tok,
602{ 946{
603 char buff[LUA_IDSIZE]; 947 char buff[LUA_IDSIZE];
604 const char *msg; 948 const char *msg;
605 lj_debug_shortname(buff, src); 949 lj_debug_shortname(buff, src, line);
606 msg = lj_str_pushvf(L, err2msg(em), argp); 950 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
607 msg = lj_str_pushf(L, "%s:%d: %s", buff, line, msg); 951 msg = lj_strfmt_pushf(L, "%s:%d: %s", buff, line, msg);
608 if (tok) 952 if (tok)
609 lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok); 953 lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok);
610 lj_err_throw(L, LUA_ERRSYNTAX); 954 lj_err_throw(L, LUA_ERRSYNTAX);
611} 955}
612 956
@@ -645,8 +989,9 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o)
645 const BCIns *pc = cframe_Lpc(L); 989 const BCIns *pc = cframe_Lpc(L);
646 if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) { 990 if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) {
647 const char *tname = lj_typename(o); 991 const char *tname = lj_typename(o);
992 setframe_gc(o, obj2gco(L), LJ_TTHREAD);
993 if (LJ_FR2) o++;
648 setframe_pc(o, pc); 994 setframe_pc(o, pc);
649 setframe_gc(o, obj2gco(L));
650 L->top = L->base = o+1; 995 L->top = L->base = o+1;
651 err_msgv(L, LJ_ERR_BADCALL, tname); 996 err_msgv(L, LJ_ERR_BADCALL, tname);
652 } 997 }
@@ -656,28 +1001,27 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o)
656/* Error in context of caller. */ 1001/* Error in context of caller. */
657LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg) 1002LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg)
658{ 1003{
659 TValue *frame = L->base-1; 1004 TValue *frame = NULL, *pframe = NULL;
660 TValue *pframe = NULL; 1005 if (!(LJ_HASJIT && tvref(G(L)->jit_base))) {
661 if (frame_islua(frame)) { 1006 frame = L->base-1;
662 pframe = frame_prevl(frame); 1007 if (frame_islua(frame)) {
663 } else if (frame_iscont(frame)) { 1008 pframe = frame_prevl(frame);
1009 } else if (frame_iscont(frame)) {
1010 if (frame_iscont_fficb(frame)) {
1011 pframe = frame;
1012 frame = NULL;
1013 } else {
1014 pframe = frame_prevd(frame);
664#if LJ_HASFFI 1015#if LJ_HASFFI
665 if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK) { 1016 /* Remove frame for FFI metamethods. */
666 pframe = frame; 1017 if (frame_func(frame)->c.ffid >= FF_ffi_meta___index &&
667 frame = NULL; 1018 frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) {
668 } else 1019 L->base = pframe+1;
1020 L->top = frame;
1021 setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame));
1022 }
669#endif 1023#endif
670 {
671 pframe = frame_prevd(frame);
672#if LJ_HASFFI
673 /* Remove frame for FFI metamethods. */
674 if (frame_func(frame)->c.ffid >= FF_ffi_meta___index &&
675 frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) {
676 L->base = pframe+1;
677 L->top = frame;
678 setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame));
679 } 1024 }
680#endif
681 } 1025 }
682 } 1026 }
683 lj_debug_addloc(L, msg, pframe, frame); 1027 lj_debug_addloc(L, msg, pframe, frame);
@@ -690,7 +1034,7 @@ LJ_NOINLINE void lj_err_callerv(lua_State *L, ErrMsg em, ...)
690 const char *msg; 1034 const char *msg;
691 va_list argp; 1035 va_list argp;
692 va_start(argp, em); 1036 va_start(argp, em);
693 msg = lj_str_pushvf(L, err2msg(em), argp); 1037 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
694 va_end(argp); 1038 va_end(argp);
695 lj_err_callermsg(L, msg); 1039 lj_err_callermsg(L, msg);
696} 1040}
@@ -710,9 +1054,9 @@ LJ_NORET LJ_NOINLINE static void err_argmsg(lua_State *L, int narg,
710 if (narg < 0 && narg > LUA_REGISTRYINDEX) 1054 if (narg < 0 && narg > LUA_REGISTRYINDEX)
711 narg = (int)(L->top - L->base) + narg + 1; 1055 narg = (int)(L->top - L->base) + narg + 1;
712 if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */ 1056 if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */
713 msg = lj_str_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg); 1057 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg);
714 else 1058 else
715 msg = lj_str_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg); 1059 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg);
716 lj_err_callermsg(L, msg); 1060 lj_err_callermsg(L, msg);
717} 1061}
718 1062
@@ -722,7 +1066,7 @@ LJ_NOINLINE void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...)
722 const char *msg; 1066 const char *msg;
723 va_list argp; 1067 va_list argp;
724 va_start(argp, em); 1068 va_start(argp, em);
725 msg = lj_str_pushvf(L, err2msg(em), argp); 1069 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
726 va_end(argp); 1070 va_end(argp);
727 err_argmsg(L, narg, msg); 1071 err_argmsg(L, narg, msg);
728} 1072}
@@ -752,7 +1096,7 @@ LJ_NOINLINE void lj_err_argtype(lua_State *L, int narg, const char *xname)
752 TValue *o = narg < 0 ? L->top + narg : L->base + narg-1; 1096 TValue *o = narg < 0 ? L->top + narg : L->base + narg-1;
753 tname = o < L->top ? lj_typename(o) : lj_obj_typename[0]; 1097 tname = o < L->top ? lj_typename(o) : lj_obj_typename[0];
754 } 1098 }
755 msg = lj_str_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname); 1099 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname);
756 err_argmsg(L, narg, msg); 1100 err_argmsg(L, narg, msg);
757} 1101}
758 1102
@@ -802,7 +1146,7 @@ LUALIB_API int luaL_error(lua_State *L, const char *fmt, ...)
802 const char *msg; 1146 const char *msg;
803 va_list argp; 1147 va_list argp;
804 va_start(argp, fmt); 1148 va_start(argp, fmt);
805 msg = lj_str_pushvf(L, fmt, argp); 1149 msg = lj_strfmt_pushvf(L, fmt, argp);
806 va_end(argp); 1150 va_end(argp);
807 lj_err_callermsg(L, msg); 1151 lj_err_callermsg(L, msg);
808 return 0; /* unreachable */ 1152 return 0; /* unreachable */
diff --git a/src/lj_err.h b/src/lj_err.h
index 15040922..67686cb7 100644
--- a/src/lj_err.h
+++ b/src/lj_err.h
@@ -24,7 +24,10 @@ LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em);
24LJ_FUNCA_NORET void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode); 24LJ_FUNCA_NORET void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode);
25LJ_FUNC_NORET void lj_err_mem(lua_State *L); 25LJ_FUNC_NORET void lj_err_mem(lua_State *L);
26LJ_FUNC_NORET void LJ_FASTCALL lj_err_stkov(lua_State *L); 26LJ_FUNC_NORET void LJ_FASTCALL lj_err_stkov(lua_State *L);
27LJ_FUNCA_NORET void LJ_FASTCALL lj_err_run(lua_State *L); 27LJ_FUNC_NORET void LJ_FASTCALL lj_err_run(lua_State *L);
28#if LJ_HASJIT
29LJ_FUNCA_NORET void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode);
30#endif
28LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em); 31LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em);
29LJ_FUNC_NORET void lj_err_lex(lua_State *L, GCstr *src, const char *tok, 32LJ_FUNC_NORET void lj_err_lex(lua_State *L, GCstr *src, const char *tok,
30 BCLine line, ErrMsg em, va_list argp); 33 BCLine line, ErrMsg em, va_list argp);
@@ -39,4 +42,18 @@ LJ_FUNC_NORET void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...);
39LJ_FUNC_NORET void lj_err_argtype(lua_State *L, int narg, const char *xname); 42LJ_FUNC_NORET void lj_err_argtype(lua_State *L, int narg, const char *xname);
40LJ_FUNC_NORET void lj_err_argt(lua_State *L, int narg, int tt); 43LJ_FUNC_NORET void lj_err_argt(lua_State *L, int narg, int tt);
41 44
45#if LJ_UNWIND_JIT && !LJ_ABI_WIN
46LJ_FUNC uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info);
47LJ_FUNC void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info);
48#else
49#define lj_err_register_mcode(base, sz, info) (info)
50#define lj_err_deregister_mcode(base, sz, info) UNUSED(base)
51#endif
52
53#if LJ_UNWIND_EXT && !LJ_ABI_WIN && defined(LUA_USE_ASSERT)
54LJ_FUNC void lj_err_verify(void);
55#else
56#define lj_err_verify() ((void)0)
57#endif
58
42#endif 59#endif
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h
index 9c695ce5..109e909c 100644
--- a/src/lj_errmsg.h
+++ b/src/lj_errmsg.h
@@ -67,6 +67,7 @@ ERRDEF(PROTMT, "cannot change a protected metatable")
67ERRDEF(UNPACK, "too many results to unpack") 67ERRDEF(UNPACK, "too many results to unpack")
68ERRDEF(RDRSTR, "reader function must return a string") 68ERRDEF(RDRSTR, "reader function must return a string")
69ERRDEF(PRTOSTR, LUA_QL("tostring") " must return a string to " LUA_QL("print")) 69ERRDEF(PRTOSTR, LUA_QL("tostring") " must return a string to " LUA_QL("print"))
70ERRDEF(NUMRNG, "number out of range")
70ERRDEF(IDXRNG, "index out of range") 71ERRDEF(IDXRNG, "index out of range")
71ERRDEF(BASERNG, "base out of range") 72ERRDEF(BASERNG, "base out of range")
72ERRDEF(LVLRNG, "level out of range") 73ERRDEF(LVLRNG, "level out of range")
@@ -78,6 +79,7 @@ ERRDEF(SETFENV, LUA_QL("setfenv") " cannot change environment of given object")
78ERRDEF(CORUN, "cannot resume running coroutine") 79ERRDEF(CORUN, "cannot resume running coroutine")
79ERRDEF(CODEAD, "cannot resume dead coroutine") 80ERRDEF(CODEAD, "cannot resume dead coroutine")
80ERRDEF(COSUSP, "cannot resume non-suspended coroutine") 81ERRDEF(COSUSP, "cannot resume non-suspended coroutine")
82ERRDEF(PRNGSD, "PRNG seeding failed")
81ERRDEF(TABINS, "wrong number of arguments to " LUA_QL("insert")) 83ERRDEF(TABINS, "wrong number of arguments to " LUA_QL("insert"))
82ERRDEF(TABCAT, "invalid value (%s) at index %d in table for " LUA_QL("concat")) 84ERRDEF(TABCAT, "invalid value (%s) at index %d in table for " LUA_QL("concat"))
83ERRDEF(TABSORT, "invalid order function for sorting") 85ERRDEF(TABSORT, "invalid order function for sorting")
@@ -96,18 +98,12 @@ ERRDEF(STRPATX, "pattern too complex")
96ERRDEF(STRCAPI, "invalid capture index") 98ERRDEF(STRCAPI, "invalid capture index")
97ERRDEF(STRCAPN, "too many captures") 99ERRDEF(STRCAPN, "too many captures")
98ERRDEF(STRCAPU, "unfinished capture") 100ERRDEF(STRCAPU, "unfinished capture")
99ERRDEF(STRFMTO, "invalid option " LUA_QL("%%%c") " to " LUA_QL("format")) 101ERRDEF(STRFMT, "invalid option " LUA_QS " to " LUA_QL("format"))
100ERRDEF(STRFMTR, "invalid format (repeated flags)")
101ERRDEF(STRFMTW, "invalid format (width or precision too long)")
102ERRDEF(STRGSRV, "invalid replacement value (a %s)") 102ERRDEF(STRGSRV, "invalid replacement value (a %s)")
103ERRDEF(BADMODN, "name conflict for module " LUA_QS) 103ERRDEF(BADMODN, "name conflict for module " LUA_QS)
104#if LJ_HASJIT 104#if LJ_HASJIT
105ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?") 105ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?")
106#if LJ_TARGET_X86ORX64
107ERRDEF(NOJIT, "JIT compiler disabled, CPU does not support SSE2")
108#else
109ERRDEF(NOJIT, "JIT compiler disabled") 106ERRDEF(NOJIT, "JIT compiler disabled")
110#endif
111#elif defined(LJ_ARCH_NOJIT) 107#elif defined(LJ_ARCH_NOJIT)
112ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)") 108ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)")
113#else 109#else
@@ -118,7 +114,6 @@ ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS)
118/* Lexer/parser errors. */ 114/* Lexer/parser errors. */
119ERRDEF(XMODE, "attempt to load chunk with wrong mode") 115ERRDEF(XMODE, "attempt to load chunk with wrong mode")
120ERRDEF(XNEAR, "%s near " LUA_QS) 116ERRDEF(XNEAR, "%s near " LUA_QS)
121ERRDEF(XELEM, "lexical element too long")
122ERRDEF(XLINES, "chunk has too many lines") 117ERRDEF(XLINES, "chunk has too many lines")
123ERRDEF(XLEVELS, "chunk has too many syntax levels") 118ERRDEF(XLEVELS, "chunk has too many syntax levels")
124ERRDEF(XNUMBER, "malformed number") 119ERRDEF(XNUMBER, "malformed number")
@@ -186,6 +181,19 @@ ERRDEF(FFI_NYIPACKBIT, "NYI: packed bit fields")
186ERRDEF(FFI_NYICALL, "NYI: cannot call this C function (yet)") 181ERRDEF(FFI_NYICALL, "NYI: cannot call this C function (yet)")
187#endif 182#endif
188 183
184#if LJ_HASBUFFER
185/* String buffer errors. */
186ERRDEF(BUFFER_SELF, "cannot put buffer into itself")
187ERRDEF(BUFFER_BADOPT, "bad options table")
188ERRDEF(BUFFER_BADENC, "cannot serialize " LUA_QS)
189ERRDEF(BUFFER_BADDEC, "cannot deserialize tag 0x%02x")
190ERRDEF(BUFFER_BADDICTX, "cannot deserialize dictionary index %d")
191ERRDEF(BUFFER_DEPTH, "too deep to serialize")
192ERRDEF(BUFFER_DUPKEY, "duplicate table key")
193ERRDEF(BUFFER_EOB, "unexpected end of buffer")
194ERRDEF(BUFFER_LEFTOV, "left-over data in buffer")
195#endif
196
189#undef ERRDEF 197#undef ERRDEF
190 198
191/* Detecting unused error messages: 199/* Detecting unused error messages:
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 32d9e4c7..938c23b6 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -11,6 +11,7 @@
11#if LJ_HASJIT 11#if LJ_HASJIT
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_buf.h"
14#include "lj_str.h" 15#include "lj_str.h"
15#include "lj_tab.h" 16#include "lj_tab.h"
16#include "lj_frame.h" 17#include "lj_frame.h"
@@ -27,6 +28,8 @@
27#include "lj_dispatch.h" 28#include "lj_dispatch.h"
28#include "lj_vm.h" 29#include "lj_vm.h"
29#include "lj_strscan.h" 30#include "lj_strscan.h"
31#include "lj_strfmt.h"
32#include "lj_serialize.h"
30 33
31/* Some local macros to save typing. Undef'd at the end. */ 34/* Some local macros to save typing. Undef'd at the end. */
32#define IR(ref) (&J->cur.ir[(ref)]) 35#define IR(ref) (&J->cur.ir[(ref)])
@@ -79,10 +82,7 @@ static GCstr *argv2str(jit_State *J, TValue *o)
79 GCstr *s; 82 GCstr *s;
80 if (!tvisnumber(o)) 83 if (!tvisnumber(o))
81 lj_trace_err(J, LJ_TRERR_BADTYPE); 84 lj_trace_err(J, LJ_TRERR_BADTYPE);
82 if (tvisint(o)) 85 s = lj_strfmt_number(J->L, o);
83 s = lj_str_fromint(J->L, intV(o));
84 else
85 s = lj_str_fromnum(J->L, &o->n);
86 setstrV(J->L, o, s); 86 setstrV(J->L, o, s);
87 return s; 87 return s;
88 } 88 }
@@ -98,27 +98,115 @@ static ptrdiff_t results_wanted(jit_State *J)
98 return -1; 98 return -1;
99} 99}
100 100
101/* Throw error for unsupported variant of fast function. */ 101static TValue *rec_stop_stitch_cp(lua_State *L, lua_CFunction dummy, void *ud)
102LJ_NORET static void recff_nyiu(jit_State *J)
103{ 102{
104 setfuncV(J->L, &J->errinfo, J->fn); 103 jit_State *J = (jit_State *)ud;
105 lj_trace_err_info(J, LJ_TRERR_NYIFFU); 104 lj_record_stop(J, LJ_TRLINK_STITCH, 0);
105 UNUSED(L); UNUSED(dummy);
106 return NULL;
106} 107}
107 108
108/* Fallback handler for all fast functions that are not recorded (yet). */ 109/* Trace stitching: add continuation below frame to start a new trace. */
110static void recff_stitch(jit_State *J)
111{
112 ASMFunction cont = lj_cont_stitch;
113 lua_State *L = J->L;
114 TValue *base = L->base;
115 BCReg nslot = J->maxslot + 1 + LJ_FR2;
116 TValue *nframe = base + 1 + LJ_FR2;
117 const BCIns *pc = frame_pc(base-1);
118 TValue *pframe = frame_prevl(base-1);
119 int errcode;
120
121 /* Move func + args up in Lua stack and insert continuation. */
122 memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot);
123 setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT);
124 setcont(base-LJ_FR2, cont);
125 setframe_pc(base, pc);
126 setnilV(base-1-LJ_FR2); /* Incorrect, but rec_check_slots() won't run anymore. */
127 L->base += 2 + LJ_FR2;
128 L->top += 2 + LJ_FR2;
129
130 /* Ditto for the IR. */
131 memmove(&J->base[1], &J->base[-1-LJ_FR2], sizeof(TRef)*nslot);
132#if LJ_FR2
133 J->base[2] = TREF_FRAME;
134 J->base[-1] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont)));
135 J->base[0] = lj_ir_k64(J, IR_KNUM, u64ptr(pc)) | TREF_CONT;
136#else
137 J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT;
138#endif
139 J->ktrace = tref_ref((J->base[-1-LJ_FR2] = lj_ir_ktrace(J)));
140 J->base += 2 + LJ_FR2;
141 J->baseslot += 2 + LJ_FR2;
142 J->framedepth++;
143
144 errcode = lj_vm_cpcall(L, NULL, J, rec_stop_stitch_cp);
145
146 /* Undo Lua stack changes. */
147 memmove(&base[-1-LJ_FR2], &base[1], sizeof(TValue)*nslot);
148 setframe_pc(base-1, pc);
149 L->base -= 2 + LJ_FR2;
150 L->top -= 2 + LJ_FR2;
151
152 if (errcode) {
153 if (errcode == LUA_ERRRUN)
154 copyTV(L, L->top-1, L->top + (1 + LJ_FR2));
155 else
156 setintV(L->top-1, (int32_t)LJ_TRERR_RECERR);
157 lj_err_throw(L, errcode); /* Propagate errors. */
158 }
159}
160
161/* Fallback handler for fast functions that are not recorded (yet). */
109static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd) 162static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
110{ 163{
111 setfuncV(J->L, &J->errinfo, J->fn); 164 if (J->cur.nins < (IRRef)J->param[JIT_P_minstitch] + REF_BASE) {
112 lj_trace_err_info(J, LJ_TRERR_NYIFF); 165 lj_trace_err_info(J, LJ_TRERR_TRACEUV);
113 UNUSED(rd); 166 } else {
167 /* Can only stitch from Lua call. */
168 if (J->framedepth && frame_islua(J->L->base-1)) {
169 BCOp op = bc_op(*frame_pc(J->L->base-1));
170 /* Stitched trace cannot start with *M op with variable # of args. */
171 if (!(op == BC_CALLM || op == BC_CALLMT ||
172 op == BC_RETM || op == BC_TSETM)) {
173 switch (J->fn->c.ffid) {
174 case FF_error:
175 case FF_debug_sethook:
176 case FF_jit_flush:
177 break; /* Don't stitch across special builtins. */
178 default:
179 recff_stitch(J); /* Use trace stitching. */
180 rd->nres = -1;
181 return;
182 }
183 }
184 }
185 /* Otherwise stop trace and return to interpreter. */
186 lj_record_stop(J, LJ_TRLINK_RETURN, 0);
187 rd->nres = -1;
188 }
114} 189}
115 190
116/* C functions can have arbitrary side-effects and are not recorded (yet). */ 191/* Fallback handler for unsupported variants of fast functions. */
117static void LJ_FASTCALL recff_c(jit_State *J, RecordFFData *rd) 192#define recff_nyiu recff_nyi
193
194/* Must stop the trace for classic C functions with arbitrary side-effects. */
195#define recff_c recff_nyi
196
197/* Emit BUFHDR for the global temporary buffer. */
198static TRef recff_bufhdr(jit_State *J)
118{ 199{
119 setfuncV(J->L, &J->errinfo, J->fn); 200 return emitir(IRT(IR_BUFHDR, IRT_PGC),
120 lj_trace_err_info(J, LJ_TRERR_NYICF); 201 lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
121 UNUSED(rd); 202}
203
204/* Emit TMPREF. */
205static TRef recff_tmpref(jit_State *J, TRef tr, int mode)
206{
207 if (!LJ_DUALNUM && tref_isinteger(tr))
208 tr = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT);
209 return emitir(IRT(IR_TMPREF, IRT_PGC), tr, mode);
122} 210}
123 211
124/* -- Base library fast functions ----------------------------------------- */ 212/* -- Base library fast functions ----------------------------------------- */
@@ -135,7 +223,7 @@ static void LJ_FASTCALL recff_type(jit_State *J, RecordFFData *rd)
135 uint32_t t; 223 uint32_t t;
136 if (tvisnumber(&rd->argv[0])) 224 if (tvisnumber(&rd->argv[0]))
137 t = ~LJ_TNUMX; 225 t = ~LJ_TNUMX;
138 else if (LJ_64 && tvislightud(&rd->argv[0])) 226 else if (LJ_64 && !LJ_GC64 && tvislightud(&rd->argv[0]))
139 t = ~LJ_TLIGHTUD; 227 t = ~LJ_TLIGHTUD;
140 else 228 else
141 t = ~itype(&rd->argv[0]); 229 t = ~itype(&rd->argv[0]);
@@ -167,7 +255,7 @@ static void LJ_FASTCALL recff_setmetatable(jit_State *J, RecordFFData *rd)
167 ix.tab = tr; 255 ix.tab = tr;
168 copyTV(J->L, &ix.tabv, &rd->argv[0]); 256 copyTV(J->L, &ix.tabv, &rd->argv[0]);
169 lj_record_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable. */ 257 lj_record_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable. */
170 fref = emitir(IRT(IR_FREF, IRT_P32), tr, IRFL_TAB_META); 258 fref = emitir(IRT(IR_FREF, IRT_PGC), tr, IRFL_TAB_META);
171 mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt; 259 mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt;
172 emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref); 260 emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref);
173 if (!tref_isnil(mt)) 261 if (!tref_isnil(mt))
@@ -220,7 +308,7 @@ static void LJ_FASTCALL recff_rawlen(jit_State *J, RecordFFData *rd)
220 if (tref_isstr(tr)) 308 if (tref_isstr(tr))
221 J->base[0] = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN); 309 J->base[0] = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN);
222 else if (tref_istab(tr)) 310 else if (tref_istab(tr))
223 J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, tr); 311 J->base[0] = emitir(IRTI(IR_ALEN), tr, TREF_NIL);
224 /* else: Interpreter will throw. */ 312 /* else: Interpreter will throw. */
225 UNUSED(rd); 313 UNUSED(rd);
226} 314}
@@ -233,9 +321,9 @@ int32_t lj_ffrecord_select_mode(jit_State *J, TRef tr, TValue *tv)
233 if (strV(tv)->len == 1) { 321 if (strV(tv)->len == 1) {
234 emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, strV(tv))); 322 emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, strV(tv)));
235 } else { 323 } else {
236 TRef trptr = emitir(IRT(IR_STRREF, IRT_P32), tr, lj_ir_kint(J, 0)); 324 TRef trptr = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0));
237 TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY); 325 TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY);
238 emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#')); 326 emitir(IRTGI(IR_EQ), trchar, lj_ir_kint(J, '#'));
239 } 327 }
240 return 0; 328 return 0;
241 } else { /* select(n, ...) */ 329 } else { /* select(n, ...) */
@@ -263,7 +351,8 @@ static void LJ_FASTCALL recff_select(jit_State *J, RecordFFData *rd)
263 J->base[i] = J->base[start+i]; 351 J->base[i] = J->base[start+i];
264 } /* else: Interpreter will throw. */ 352 } /* else: Interpreter will throw. */
265 } else { 353 } else {
266 recff_nyiu(J); 354 recff_nyiu(J, rd);
355 return;
267 } 356 }
268 } /* else: Interpreter will throw. */ 357 } /* else: Interpreter will throw. */
269} 358}
@@ -274,14 +363,18 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd)
274 TRef base = J->base[1]; 363 TRef base = J->base[1];
275 if (tr && !tref_isnil(base)) { 364 if (tr && !tref_isnil(base)) {
276 base = lj_opt_narrow_toint(J, base); 365 base = lj_opt_narrow_toint(J, base);
277 if (!tref_isk(base) || IR(tref_ref(base))->i != 10) 366 if (!tref_isk(base) || IR(tref_ref(base))->i != 10) {
278 recff_nyiu(J); 367 recff_nyiu(J, rd);
368 return;
369 }
279 } 370 }
280 if (tref_isnumber_str(tr)) { 371 if (tref_isnumber_str(tr)) {
281 if (tref_isstr(tr)) { 372 if (tref_isstr(tr)) {
282 TValue tmp; 373 TValue tmp;
283 if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) 374 if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) {
284 recff_nyiu(J); /* Would need an inverted STRTO for this case. */ 375 recff_nyiu(J, rd); /* Would need an inverted STRTO for this case. */
376 return;
377 }
285 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); 378 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
286 } 379 }
287#if LJ_HASFFI 380#if LJ_HASFFI
@@ -313,10 +406,10 @@ static int recff_metacall(jit_State *J, RecordFFData *rd, MMS mm)
313 int errcode; 406 int errcode;
314 TValue argv0; 407 TValue argv0;
315 /* Temporarily insert metamethod below object. */ 408 /* Temporarily insert metamethod below object. */
316 J->base[1] = J->base[0]; 409 J->base[1+LJ_FR2] = J->base[0];
317 J->base[0] = ix.mobj; 410 J->base[0] = ix.mobj;
318 copyTV(J->L, &argv0, &rd->argv[0]); 411 copyTV(J->L, &argv0, &rd->argv[0]);
319 copyTV(J->L, &rd->argv[1], &rd->argv[0]); 412 copyTV(J->L, &rd->argv[1+LJ_FR2], &rd->argv[0]);
320 copyTV(J->L, &rd->argv[0], &ix.mobjv); 413 copyTV(J->L, &rd->argv[0], &ix.mobjv);
321 /* Need to protect lj_record_tailcall because it may throw. */ 414 /* Need to protect lj_record_tailcall because it may throw. */
322 errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp); 415 errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp);
@@ -336,13 +429,15 @@ static void LJ_FASTCALL recff_tostring(jit_State *J, RecordFFData *rd)
336 if (tref_isstr(tr)) { 429 if (tref_isstr(tr)) {
337 /* Ignore __tostring in the string base metatable. */ 430 /* Ignore __tostring in the string base metatable. */
338 /* Pass on result in J->base[0]. */ 431 /* Pass on result in J->base[0]. */
339 } else if (!recff_metacall(J, rd, MM_tostring)) { 432 } else if (tr && !recff_metacall(J, rd, MM_tostring)) {
340 if (tref_isnumber(tr)) { 433 if (tref_isnumber(tr)) {
341 J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); 434 J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr,
435 tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT);
342 } else if (tref_ispri(tr)) { 436 } else if (tref_ispri(tr)) {
343 J->base[0] = lj_ir_kstr(J, strV(&J->fn->c.upvalue[tref_type(tr)])); 437 J->base[0] = lj_ir_kstr(J, lj_strfmt_obj(J->L, &rd->argv[0]));
344 } else { 438 } else {
345 recff_nyiu(J); 439 recff_nyiu(J, rd);
440 return;
346 } 441 }
347 } 442 }
348} 443}
@@ -364,15 +459,15 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd)
364 } /* else: Interpreter will throw. */ 459 } /* else: Interpreter will throw. */
365} 460}
366 461
367static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd) 462static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd)
368{ 463{
369 TRef tr = J->base[0]; 464 TRef tr = J->base[0];
370 if (!((LJ_52 || (LJ_HASFFI && tref_iscdata(tr))) && 465 if (!((LJ_52 || (LJ_HASFFI && tref_iscdata(tr))) &&
371 recff_metacall(J, rd, MM_ipairs))) { 466 recff_metacall(J, rd, MM_pairs + rd->data))) {
372 if (tref_istab(tr)) { 467 if (tref_istab(tr)) {
373 J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0])); 468 J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0]));
374 J->base[1] = tr; 469 J->base[1] = tr;
375 J->base[2] = lj_ir_kint(J, 0); 470 J->base[2] = rd->data ? lj_ir_kint(J, 0) : TREF_NIL;
376 rd->nres = 3; 471 rd->nres = 3;
377 } /* else: Interpreter will throw. */ 472 } /* else: Interpreter will throw. */
378 } 473 }
@@ -381,8 +476,13 @@ static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd)
381static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd) 476static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd)
382{ 477{
383 if (J->maxslot >= 1) { 478 if (J->maxslot >= 1) {
479#if LJ_FR2
480 /* Shift function arguments up. */
481 memmove(J->base + 1, J->base, sizeof(TRef) * J->maxslot);
482#endif
384 lj_record_call(J, 0, J->maxslot - 1); 483 lj_record_call(J, 0, J->maxslot - 1);
385 rd->nres = -1; /* Pending call. */ 484 rd->nres = -1; /* Pending call. */
485 J->needsnap = 1; /* Start catching on-trace errors. */
386 } /* else: Interpreter will throw. */ 486 } /* else: Interpreter will throw. */
387} 487}
388 488
@@ -406,6 +506,10 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
406 copyTV(J->L, &argv1, &rd->argv[1]); 506 copyTV(J->L, &argv1, &rd->argv[1]);
407 copyTV(J->L, &rd->argv[0], &argv1); 507 copyTV(J->L, &rd->argv[0], &argv1);
408 copyTV(J->L, &rd->argv[1], &argv0); 508 copyTV(J->L, &rd->argv[1], &argv0);
509#if LJ_FR2
510 /* Shift function arguments up. */
511 memmove(J->base + 2, J->base + 1, sizeof(TRef) * (J->maxslot-1));
512#endif
409 /* Need to protect lj_record_call because it may throw. */ 513 /* Need to protect lj_record_call because it may throw. */
410 errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp); 514 errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp);
411 /* Always undo Lua stack swap to avoid confusing the interpreter. */ 515 /* Always undo Lua stack swap to avoid confusing the interpreter. */
@@ -414,7 +518,54 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
414 if (errcode) 518 if (errcode)
415 lj_err_throw(J->L, errcode); /* Propagate errors. */ 519 lj_err_throw(J->L, errcode); /* Propagate errors. */
416 rd->nres = -1; /* Pending call. */ 520 rd->nres = -1; /* Pending call. */
521 J->needsnap = 1; /* Start catching on-trace errors. */
522 } /* else: Interpreter will throw. */
523}
524
525static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd)
526{
527 TRef tr = J->base[0];
528 /* Only support getfenv(0) for now. */
529 if (tref_isint(tr) && tref_isk(tr) && IR(tref_ref(tr))->i == 0) {
530 TRef trl = emitir(IRT(IR_LREF, IRT_THREAD), 0, 0);
531 J->base[0] = emitir(IRT(IR_FLOAD, IRT_TAB), trl, IRFL_THREAD_ENV);
532 return;
533 }
534 recff_nyiu(J, rd);
535}
536
537static void LJ_FASTCALL recff_next(jit_State *J, RecordFFData *rd)
538{
539#if LJ_BE
540 /* YAGNI: Disabled on big-endian due to issues with lj_vm_next,
541 ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair.
542 */
543 recff_nyi(J, rd);
544#else
545 TRef tab = J->base[0];
546 if (tref_istab(tab)) {
547 RecordIndex ix;
548 cTValue *keyv;
549 ix.tab = tab;
550 if (tref_isnil(J->base[1])) { /* Shortcut for start of traversal. */
551 ix.key = lj_ir_kint(J, 0);
552 keyv = niltvg(J2G(J));
553 } else {
554 TRef tmp = recff_tmpref(J, J->base[1], IRTMPREF_IN1);
555 ix.key = lj_ir_call(J, IRCALL_lj_tab_keyindex, tab, tmp);
556 keyv = &rd->argv[1];
557 }
558 copyTV(J->L, &ix.tabv, &rd->argv[0]);
559 ix.keyv.u32.lo = lj_tab_keyindex(tabV(&ix.tabv), keyv);
560 /* Omit the value, if not used by the caller. */
561 ix.idxchain = (J->framedepth && frame_islua(J->L->base-1) &&
562 bc_b(frame_pc(J->L->base-1)[-1])-1 < 2);
563 ix.mobj = 0; /* We don't need the next index. */
564 rd->nres = lj_record_next(J, &ix);
565 J->base[0] = ix.key;
566 J->base[1] = ix.val;
417 } /* else: Interpreter will throw. */ 567 } /* else: Interpreter will throw. */
568#endif
418} 569}
419 570
420/* -- Math library fast functions ----------------------------------------- */ 571/* -- Math library fast functions ----------------------------------------- */
@@ -422,7 +573,7 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
422static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd) 573static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd)
423{ 574{
424 TRef tr = lj_ir_tonum(J, J->base[0]); 575 TRef tr = lj_ir_tonum(J, J->base[0]);
425 J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_knum_abs(J)); 576 J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_ksimd(J, LJ_KSIMD_ABS));
426 UNUSED(rd); 577 UNUSED(rd);
427} 578}
428 579
@@ -475,7 +626,7 @@ static void LJ_FASTCALL recff_math_atan2(jit_State *J, RecordFFData *rd)
475{ 626{
476 TRef tr = lj_ir_tonum(J, J->base[0]); 627 TRef tr = lj_ir_tonum(J, J->base[0]);
477 TRef tr2 = lj_ir_tonum(J, J->base[1]); 628 TRef tr2 = lj_ir_tonum(J, J->base[1]);
478 J->base[0] = emitir(IRTN(IR_ATAN2), tr, tr2); 629 J->base[0] = lj_ir_call(J, IRCALL_atan2, tr, tr2);
479 UNUSED(rd); 630 UNUSED(rd);
480} 631}
481 632
@@ -492,55 +643,16 @@ static void LJ_FASTCALL recff_math_ldexp(jit_State *J, RecordFFData *rd)
492 UNUSED(rd); 643 UNUSED(rd);
493} 644}
494 645
495/* Record math.asin, math.acos, math.atan. */ 646static void LJ_FASTCALL recff_math_call(jit_State *J, RecordFFData *rd)
496static void LJ_FASTCALL recff_math_atrig(jit_State *J, RecordFFData *rd)
497{
498 TRef y = lj_ir_tonum(J, J->base[0]);
499 TRef x = lj_ir_knum_one(J);
500 uint32_t ffid = rd->data;
501 if (ffid != FF_math_atan) {
502 TRef tmp = emitir(IRTN(IR_MUL), y, y);
503 tmp = emitir(IRTN(IR_SUB), x, tmp);
504 tmp = emitir(IRTN(IR_FPMATH), tmp, IRFPM_SQRT);
505 if (ffid == FF_math_asin) { x = tmp; } else { x = y; y = tmp; }
506 }
507 J->base[0] = emitir(IRTN(IR_ATAN2), y, x);
508}
509
510static void LJ_FASTCALL recff_math_htrig(jit_State *J, RecordFFData *rd)
511{ 647{
512 TRef tr = lj_ir_tonum(J, J->base[0]); 648 TRef tr = lj_ir_tonum(J, J->base[0]);
513 J->base[0] = emitir(IRTN(IR_CALLN), tr, rd->data); 649 J->base[0] = emitir(IRTN(IR_CALLN), tr, rd->data);
514} 650}
515 651
516static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd)
517{
518 TRef tr = J->base[0];
519 if (tref_isinteger(tr)) {
520 J->base[0] = tr;
521 J->base[1] = lj_ir_kint(J, 0);
522 } else {
523 TRef trt;
524 tr = lj_ir_tonum(J, tr);
525 trt = emitir(IRTN(IR_FPMATH), tr, IRFPM_TRUNC);
526 J->base[0] = trt;
527 J->base[1] = emitir(IRTN(IR_SUB), tr, trt);
528 }
529 rd->nres = 2;
530}
531
532static void LJ_FASTCALL recff_math_degrad(jit_State *J, RecordFFData *rd)
533{
534 TRef tr = lj_ir_tonum(J, J->base[0]);
535 TRef trm = lj_ir_knum(J, numV(&J->fn->c.upvalue[0]));
536 J->base[0] = emitir(IRTN(IR_MUL), tr, trm);
537 UNUSED(rd);
538}
539
540static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) 652static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd)
541{ 653{
542 J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1], 654 J->base[0] = lj_opt_narrow_arith(J, J->base[0], J->base[1],
543 &rd->argv[0], &rd->argv[1]); 655 &rd->argv[0], &rd->argv[1], IR_POW);
544 UNUSED(rd); 656 UNUSED(rd);
545} 657}
546 658
@@ -567,7 +679,7 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd)
567 GCudata *ud = udataV(&J->fn->c.upvalue[0]); 679 GCudata *ud = udataV(&J->fn->c.upvalue[0]);
568 TRef tr, one; 680 TRef tr, one;
569 lj_ir_kgc(J, obj2gco(ud), IRT_UDATA); /* Prevent collection. */ 681 lj_ir_kgc(J, obj2gco(ud), IRT_UDATA); /* Prevent collection. */
570 tr = lj_ir_call(J, IRCALL_lj_math_random_step, lj_ir_kptr(J, uddata(ud))); 682 tr = lj_ir_call(J, IRCALL_lj_prng_u64d, lj_ir_kptr(J, uddata(ud)));
571 one = lj_ir_knum_one(J); 683 one = lj_ir_knum_one(J);
572 tr = emitir(IRTN(IR_SUB), tr, one); 684 tr = emitir(IRTN(IR_SUB), tr, one);
573 if (J->base[0]) { 685 if (J->base[0]) {
@@ -591,48 +703,105 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd)
591 703
592/* -- Bit library fast functions ------------------------------------------ */ 704/* -- Bit library fast functions ------------------------------------------ */
593 705
594/* Record unary bit.tobit, bit.bnot, bit.bswap. */ 706/* Record bit.tobit. */
707static void LJ_FASTCALL recff_bit_tobit(jit_State *J, RecordFFData *rd)
708{
709 TRef tr = J->base[0];
710#if LJ_HASFFI
711 if (tref_iscdata(tr)) { recff_bit64_tobit(J, rd); return; }
712#endif
713 J->base[0] = lj_opt_narrow_tobit(J, tr);
714 UNUSED(rd);
715}
716
717/* Record unary bit.bnot, bit.bswap. */
595static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) 718static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd)
596{ 719{
597 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 720#if LJ_HASFFI
598 J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0); 721 if (recff_bit64_unary(J, rd))
722 return;
723#endif
724 J->base[0] = emitir(IRTI(rd->data), lj_opt_narrow_tobit(J, J->base[0]), 0);
599} 725}
600 726
601/* Record N-ary bit.band, bit.bor, bit.bxor. */ 727/* Record N-ary bit.band, bit.bor, bit.bxor. */
602static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) 728static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd)
603{ 729{
604 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 730#if LJ_HASFFI
605 uint32_t op = rd->data; 731 if (recff_bit64_nary(J, rd))
606 BCReg i; 732 return;
607 for (i = 1; J->base[i] != 0; i++) 733#endif
608 tr = emitir(IRTI(op), tr, lj_opt_narrow_tobit(J, J->base[i])); 734 {
609 J->base[0] = tr; 735 TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
736 uint32_t ot = IRTI(rd->data);
737 BCReg i;
738 for (i = 1; J->base[i] != 0; i++)
739 tr = emitir(ot, tr, lj_opt_narrow_tobit(J, J->base[i]));
740 J->base[0] = tr;
741 }
610} 742}
611 743
612/* Record bit shifts. */ 744/* Record bit shifts. */
613static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) 745static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd)
614{ 746{
615 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 747#if LJ_HASFFI
616 TRef tsh = lj_opt_narrow_tobit(J, J->base[1]); 748 if (recff_bit64_shift(J, rd))
617 IROp op = (IROp)rd->data; 749 return;
618 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && 750#endif
619 !tref_isk(tsh)) 751 {
620 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); 752 TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
753 TRef tsh = lj_opt_narrow_tobit(J, J->base[1]);
754 IROp op = (IROp)rd->data;
755 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
756 !tref_isk(tsh))
757 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31));
621#ifdef LJ_TARGET_UNIFYROT 758#ifdef LJ_TARGET_UNIFYROT
622 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { 759 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
623 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; 760 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
624 tsh = emitir(IRTI(IR_NEG), tsh, tsh); 761 tsh = emitir(IRTI(IR_NEG), tsh, tsh);
762 }
763#endif
764 J->base[0] = emitir(IRTI(op), tr, tsh);
625 } 765 }
766}
767
768static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData *rd)
769{
770#if LJ_HASFFI
771 TRef hdr = recff_bufhdr(J);
772 TRef tr = recff_bit64_tohex(J, rd, hdr);
773 J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
774#else
775 recff_nyiu(J, rd); /* Don't bother working around this NYI. */
626#endif 776#endif
627 J->base[0] = emitir(IRTI(op), tr, tsh);
628} 777}
629 778
630/* -- String library fast functions --------------------------------------- */ 779/* -- String library fast functions --------------------------------------- */
631 780
632static void LJ_FASTCALL recff_string_len(jit_State *J, RecordFFData *rd) 781/* Specialize to relative starting position for string. */
782static TRef recff_string_start(jit_State *J, GCstr *s, int32_t *st, TRef tr,
783 TRef trlen, TRef tr0)
633{ 784{
634 J->base[0] = emitir(IRTI(IR_FLOAD), lj_ir_tostr(J, J->base[0]), IRFL_STR_LEN); 785 int32_t start = *st;
635 UNUSED(rd); 786 if (start < 0) {
787 emitir(IRTGI(IR_LT), tr, tr0);
788 tr = emitir(IRTI(IR_ADD), trlen, tr);
789 start = start + (int32_t)s->len;
790 emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), tr, tr0);
791 if (start < 0) {
792 tr = tr0;
793 start = 0;
794 }
795 } else if (start == 0) {
796 emitir(IRTGI(IR_EQ), tr, tr0);
797 tr = tr0;
798 } else {
799 tr = emitir(IRTI(IR_ADD), tr, lj_ir_kint(J, -1));
800 emitir(IRTGI(IR_GE), tr, tr0);
801 start--;
802 }
803 *st = start;
804 return tr;
636} 805}
637 806
638/* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */ 807/* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */
@@ -679,39 +848,21 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
679 } else if ((MSize)end <= str->len) { 848 } else if ((MSize)end <= str->len) {
680 emitir(IRTGI(IR_ULE), trend, trlen); 849 emitir(IRTGI(IR_ULE), trend, trlen);
681 } else { 850 } else {
682 emitir(IRTGI(IR_GT), trend, trlen); 851 emitir(IRTGI(IR_UGT), trend, trlen);
683 end = (int32_t)str->len; 852 end = (int32_t)str->len;
684 trend = trlen; 853 trend = trlen;
685 } 854 }
686 if (start < 0) { 855 trstart = recff_string_start(J, str, &start, trstart, trlen, tr0);
687 emitir(IRTGI(IR_LT), trstart, tr0);
688 trstart = emitir(IRTI(IR_ADD), trlen, trstart);
689 start = start+(int32_t)str->len;
690 emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), trstart, tr0);
691 if (start < 0) {
692 trstart = tr0;
693 start = 0;
694 }
695 } else {
696 if (start == 0) {
697 emitir(IRTGI(IR_EQ), trstart, tr0);
698 trstart = tr0;
699 } else {
700 trstart = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, -1));
701 emitir(IRTGI(IR_GE), trstart, tr0);
702 start--;
703 }
704 }
705 if (rd->data) { /* Return string.sub result. */ 856 if (rd->data) { /* Return string.sub result. */
706 if (end - start >= 0) { 857 if (end - start >= 0) {
707 /* Also handle empty range here, to avoid extra traces. */ 858 /* Also handle empty range here, to avoid extra traces. */
708 TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart); 859 TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart);
709 emitir(IRTGI(IR_GE), trslen, tr0); 860 emitir(IRTGI(IR_GE), trslen, tr0);
710 trptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart); 861 trptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart);
711 J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen); 862 J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen);
712 } else { /* Range underflow: return empty string. */ 863 } else { /* Range underflow: return empty string. */
713 emitir(IRTGI(IR_LT), trend, trstart); 864 emitir(IRTGI(IR_LT), trend, trstart);
714 J->base[0] = lj_ir_kstr(J, lj_str_new(J->L, strdata(str), 0)); 865 J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty);
715 } 866 }
716 } else { /* Return string.byte result(s). */ 867 } else { /* Return string.byte result(s). */
717 ptrdiff_t i, len = end - start; 868 ptrdiff_t i, len = end - start;
@@ -723,7 +874,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
723 rd->nres = len; 874 rd->nres = len;
724 for (i = 0; i < len; i++) { 875 for (i = 0; i < len; i++) {
725 TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, (int32_t)i)); 876 TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, (int32_t)i));
726 tmp = emitir(IRT(IR_STRREF, IRT_P32), trstr, tmp); 877 tmp = emitir(IRT(IR_STRREF, IRT_PGC), trstr, tmp);
727 J->base[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY); 878 J->base[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY);
728 } 879 }
729 } else { /* Empty range or range underflow: return no results. */ 880 } else { /* Empty range or range underflow: return no results. */
@@ -733,48 +884,542 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
733 } 884 }
734} 885}
735 886
736/* -- Table library fast functions ---------------------------------------- */ 887static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd)
737
738static void LJ_FASTCALL recff_table_getn(jit_State *J, RecordFFData *rd)
739{ 888{
740 if (tref_istab(J->base[0])) 889 TRef k255 = lj_ir_kint(J, 255);
741 J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, J->base[0]); 890 BCReg i;
742 /* else: Interpreter will throw. */ 891 for (i = 0; J->base[i] != 0; i++) { /* Convert char values to strings. */
892 TRef tr = lj_opt_narrow_toint(J, J->base[i]);
893 emitir(IRTGI(IR_ULE), tr, k255);
894 J->base[i] = emitir(IRT(IR_TOSTR, IRT_STR), tr, IRTOSTR_CHAR);
895 }
896 if (i > 1) { /* Concatenate the strings, if there's more than one. */
897 TRef hdr = recff_bufhdr(J), tr = hdr;
898 for (i = 0; J->base[i] != 0; i++)
899 tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, J->base[i]);
900 J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
901 } else if (i == 0) {
902 J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty);
903 }
743 UNUSED(rd); 904 UNUSED(rd);
744} 905}
745 906
746static void LJ_FASTCALL recff_table_remove(jit_State *J, RecordFFData *rd) 907static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd)
747{ 908{
748 TRef tab = J->base[0]; 909 TRef str = lj_ir_tostr(J, J->base[0]);
749 rd->nres = 0; 910 TRef rep = lj_opt_narrow_toint(J, J->base[1]);
750 if (tref_istab(tab)) { 911 TRef hdr, tr, str2 = 0;
751 if (tref_isnil(J->base[1])) { /* Simple pop: t[#t] = nil */ 912 if (!tref_isnil(J->base[2])) {
752 TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, tab); 913 TRef sep = lj_ir_tostr(J, J->base[2]);
753 GCtab *t = tabV(&rd->argv[0]); 914 int32_t vrep = argv2int(J, &rd->argv[1]);
754 MSize len = lj_tab_len(t); 915 emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1));
755 emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0)); 916 if (vrep > 1) {
756 if (len) { 917 TRef hdr2 = recff_bufhdr(J);
757 RecordIndex ix; 918 TRef tr2 = emitir(IRTG(IR_BUFPUT, IRT_PGC), hdr2, sep);
758 ix.tab = tab; 919 tr2 = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr2, str);
759 ix.key = trlen; 920 str2 = emitir(IRTG(IR_BUFSTR, IRT_STR), tr2, hdr2);
760 settabV(J->L, &ix.tabv, t); 921 }
761 setintV(&ix.keyv, len); 922 }
762 ix.idxchain = 0; 923 tr = hdr = recff_bufhdr(J);
763 if (results_wanted(J) != 0) { /* Specialize load only if needed. */ 924 if (str2) {
764 ix.val = 0; 925 tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, str);
765 J->base[0] = lj_record_idx(J, &ix); /* Load previous value. */ 926 str = str2;
766 rd->nres = 1; 927 rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1));
767 /* Assumes ix.key/ix.tab is not modified for raw lj_record_idx(). */ 928 }
929 tr = lj_ir_call(J, IRCALL_lj_buf_putstr_rep, tr, str, rep);
930 J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
931}
932
933static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd)
934{
935 TRef str = lj_ir_tostr(J, J->base[0]);
936 TRef hdr = recff_bufhdr(J);
937 TRef tr = lj_ir_call(J, rd->data, hdr, str);
938 J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
939}
940
941static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
942{
943 TRef trstr = lj_ir_tostr(J, J->base[0]);
944 TRef trpat = lj_ir_tostr(J, J->base[1]);
945 TRef trlen = emitir(IRTI(IR_FLOAD), trstr, IRFL_STR_LEN);
946 TRef tr0 = lj_ir_kint(J, 0);
947 TRef trstart;
948 GCstr *str = argv2str(J, &rd->argv[0]);
949 GCstr *pat = argv2str(J, &rd->argv[1]);
950 int32_t start;
951 J->needsnap = 1;
952 if (tref_isnil(J->base[2])) {
953 trstart = lj_ir_kint(J, 1);
954 start = 1;
955 } else {
956 trstart = lj_opt_narrow_toint(J, J->base[2]);
957 start = argv2int(J, &rd->argv[2]);
958 }
959 trstart = recff_string_start(J, str, &start, trstart, trlen, tr0);
960 if ((MSize)start <= str->len) {
961 emitir(IRTGI(IR_ULE), trstart, trlen);
962 } else {
963 emitir(IRTGI(IR_UGT), trstart, trlen);
964#if LJ_52
965 J->base[0] = TREF_NIL;
966 return;
967#else
968 trstart = trlen;
969 start = str->len;
970#endif
971 }
972 /* Fixed arg or no pattern matching chars? (Specialized to pattern string.) */
973 if ((J->base[2] && tref_istruecond(J->base[3])) ||
974 (emitir(IRTG(IR_EQ, IRT_STR), trpat, lj_ir_kstr(J, pat)),
975 !lj_str_haspattern(pat))) { /* Search for fixed string. */
976 TRef trsptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart);
977 TRef trpptr = emitir(IRT(IR_STRREF, IRT_PGC), trpat, tr0);
978 TRef trslen = emitir(IRTI(IR_SUB), trlen, trstart);
979 TRef trplen = emitir(IRTI(IR_FLOAD), trpat, IRFL_STR_LEN);
980 TRef tr = lj_ir_call(J, IRCALL_lj_str_find, trsptr, trpptr, trslen, trplen);
981 TRef trp0 = lj_ir_kkptr(J, NULL);
982 if (lj_str_find(strdata(str)+(MSize)start, strdata(pat),
983 str->len-(MSize)start, pat->len)) {
984 TRef pos;
985 emitir(IRTG(IR_NE, IRT_PGC), tr, trp0);
986 /* Recompute offset. trsptr may not point into trstr after folding. */
987 pos = emitir(IRTI(IR_ADD), emitir(IRTI(IR_SUB), tr, trsptr), trstart);
988 J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1));
989 J->base[1] = emitir(IRTI(IR_ADD), pos, trplen);
990 rd->nres = 2;
991 } else {
992 emitir(IRTG(IR_EQ, IRT_PGC), tr, trp0);
993 J->base[0] = TREF_NIL;
994 }
995 } else { /* Search for pattern. */
996 recff_nyiu(J, rd);
997 return;
998 }
999}
1000
1001static void recff_format(jit_State *J, RecordFFData *rd, TRef hdr, int sbufx)
1002{
1003 ptrdiff_t arg = sbufx;
1004 TRef tr = hdr, trfmt = lj_ir_tostr(J, J->base[arg]);
1005 GCstr *fmt = argv2str(J, &rd->argv[arg]);
1006 FormatState fs;
1007 SFormat sf;
1008 int nfmt = 0;
1009 /* Specialize to the format string. */
1010 emitir(IRTG(IR_EQ, IRT_STR), trfmt, lj_ir_kstr(J, fmt));
1011 lj_strfmt_init(&fs, strdata(fmt), fmt->len);
1012 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { /* Parse format. */
1013 TRef tra = sf == STRFMT_LIT ? 0 : J->base[++arg];
1014 TRef trsf = lj_ir_kint(J, (int32_t)sf);
1015 IRCallID id;
1016 switch (STRFMT_TYPE(sf)) {
1017 case STRFMT_LIT:
1018 tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr,
1019 lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len)));
1020 break;
1021 case STRFMT_INT:
1022 id = IRCALL_lj_strfmt_putfnum_int;
1023 handle_int:
1024 if (!tref_isinteger(tra)) {
1025#if LJ_HASFFI
1026 if (tref_iscdata(tra)) {
1027 tra = lj_crecord_loadiu64(J, tra, &rd->argv[arg]);
1028 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra);
1029 break;
768 } 1030 }
769 ix.val = TREF_NIL; 1031#endif
770 lj_record_idx(J, &ix); /* Remove value. */ 1032 goto handle_num;
771 } 1033 }
772 } else { /* Complex case: remove in the middle. */ 1034 if (sf == STRFMT_INT) { /* Shortcut for plain %d. */
773 recff_nyiu(J); 1035 tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr,
1036 emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT));
1037 } else {
1038#if LJ_HASFFI
1039 tra = emitir(IRT(IR_CONV, IRT_U64), tra,
1040 (IRT_INT|(IRT_U64<<5)|IRCONV_SEXT));
1041 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra);
1042 lj_needsplit(J);
1043#else
1044 recff_nyiu(J, rd); /* Don't bother working around this NYI. */
1045 return;
1046#endif
1047 }
1048 break;
1049 case STRFMT_UINT:
1050 id = IRCALL_lj_strfmt_putfnum_uint;
1051 goto handle_int;
1052 case STRFMT_NUM:
1053 id = IRCALL_lj_strfmt_putfnum;
1054 handle_num:
1055 tra = lj_ir_tonum(J, tra);
1056 tr = lj_ir_call(J, id, tr, trsf, tra);
1057 if (LJ_SOFTFP32) lj_needsplit(J);
1058 break;
1059 case STRFMT_STR:
1060 if (!tref_isstr(tra)) {
1061 recff_nyiu(J, rd); /* NYI: __tostring and non-string types for %s. */
1062 /* NYI: also buffers. */
1063 return;
1064 }
1065 if (sf == STRFMT_STR) /* Shortcut for plain %s. */
1066 tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, tra);
1067 else if ((sf & STRFMT_T_QUOTED))
1068 tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra);
1069 else
1070 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfstr, tr, trsf, tra);
1071 break;
1072 case STRFMT_CHAR:
1073 tra = lj_opt_narrow_toint(J, tra);
1074 if (sf == STRFMT_CHAR) /* Shortcut for plain %c. */
1075 tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr,
1076 emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR));
1077 else
1078 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra);
1079 break;
1080 case STRFMT_PTR: /* NYI */
1081 case STRFMT_ERR:
1082 default:
1083 recff_nyiu(J, rd);
1084 return;
1085 }
1086 if (++nfmt > 100) lj_trace_err(J, LJ_TRERR_TRACEOV);
1087 }
1088 if (sbufx) {
1089 emitir(IRT(IR_USE, IRT_NIL), tr, 0);
1090 } else {
1091 J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
1092 }
1093}
1094
1095static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
1096{
1097 recff_format(J, rd, recff_bufhdr(J), 0);
1098}
1099
1100/* -- Buffer library fast functions --------------------------------------- */
1101
1102#if LJ_HASBUFFER
1103
1104static LJ_AINLINE TRef recff_sbufx_get_L(jit_State *J, TRef ud)
1105{
1106 return emitir(IRT(IR_FLOAD, IRT_PGC), ud, IRFL_SBUF_L);
1107}
1108
1109static LJ_AINLINE void recff_sbufx_set_L(jit_State *J, TRef ud, TRef val)
1110{
1111 TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_L);
1112 emitir(IRT(IR_FSTORE, IRT_PGC), fref, val);
1113}
1114
1115static LJ_AINLINE TRef recff_sbufx_get_ptr(jit_State *J, TRef ud, IRFieldID fl)
1116{
1117 return emitir(IRT(IR_FLOAD, IRT_PTR), ud, fl);
1118}
1119
1120static LJ_AINLINE void recff_sbufx_set_ptr(jit_State *J, TRef ud, IRFieldID fl, TRef val)
1121{
1122 TRef fref = emitir(IRT(IR_FREF, IRT_PTR), ud, fl);
1123 emitir(IRT(IR_FSTORE, IRT_PTR), fref, val);
1124}
1125
1126static LJ_AINLINE TRef recff_sbufx_len(jit_State *J, TRef trr, TRef trw)
1127{
1128 TRef len = emitir(IRT(IR_SUB, IRT_INTP), trw, trr);
1129 if (LJ_64)
1130 len = emitir(IRTI(IR_CONV), len, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE);
1131 return len;
1132}
1133
1134/* Emit typecheck for string buffer. */
1135static TRef recff_sbufx_check(jit_State *J, RecordFFData *rd, ptrdiff_t arg)
1136{
1137 TRef trtype, ud = J->base[arg];
1138 if (!tvisbuf(&rd->argv[arg])) lj_trace_err(J, LJ_TRERR_BADTYPE);
1139 trtype = emitir(IRT(IR_FLOAD, IRT_U8), ud, IRFL_UDATA_UDTYPE);
1140 emitir(IRTGI(IR_EQ), trtype, lj_ir_kint(J, UDTYPE_BUFFER));
1141 J->needsnap = 1;
1142 return ud;
1143}
1144
1145/* Emit BUFHDR for write to extended string buffer. */
1146static TRef recff_sbufx_write(jit_State *J, TRef ud)
1147{
1148 TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kintpgc(J, sizeof(GCudata)));
1149 return emitir(IRT(IR_BUFHDR, IRT_PGC), trbuf, IRBUFHDR_WRITE);
1150}
1151
1152/* Check for integer in range for the buffer API. */
1153static TRef recff_sbufx_checkint(jit_State *J, RecordFFData *rd, ptrdiff_t arg)
1154{
1155 TRef tr = J->base[arg];
1156 TRef trlim = lj_ir_kint(J, LJ_MAX_BUF);
1157 if (tref_isinteger(tr)) {
1158 emitir(IRTGI(IR_ULE), tr, trlim);
1159 } else if (tref_isnum(tr)) {
1160 tr = emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY);
1161 emitir(IRTGI(IR_ULE), tr, trlim);
1162#if LJ_HASFFI
1163 } else if (tref_iscdata(tr)) {
1164 tr = lj_crecord_loadiu64(J, tr, &rd->argv[arg]);
1165 emitir(IRTG(IR_ULE, IRT_U64), tr, lj_ir_kint64(J, LJ_MAX_BUF));
1166 tr = emitir(IRTI(IR_CONV), tr, (IRT_INT<<5)|IRT_I64|IRCONV_NONE);
1167#else
1168 UNUSED(rd);
1169#endif
1170 } else {
1171 lj_trace_err(J, LJ_TRERR_BADTYPE);
1172 }
1173 return tr;
1174}
1175
1176static void LJ_FASTCALL recff_buffer_method_reset(jit_State *J, RecordFFData *rd)
1177{
1178 TRef ud = recff_sbufx_check(J, rd, 0);
1179 SBufExt *sbx = bufV(&rd->argv[0]);
1180 int iscow = (int)sbufiscow(sbx);
1181 TRef trl = recff_sbufx_get_L(J, ud);
1182 TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kintpgc(J, SBUF_FLAG_COW));
1183 TRef zeropgc = lj_ir_kintpgc(J, 0);
1184 emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zeropgc);
1185 if (iscow) {
1186 TRef zerop = lj_ir_kintp(J, 0);
1187 trl = emitir(IRT(IR_BXOR, IRT_IGC), trl, lj_ir_kintpgc(J, SBUF_FLAG_COW));
1188 recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zerop);
1189 recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zerop);
1190 recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zerop);
1191 recff_sbufx_set_L(J, ud, trl);
1192 emitir(IRT(IR_FSTORE, IRT_PGC),
1193 emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zeropgc);
1194 recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zerop);
1195 } else {
1196 TRef trb = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_B);
1197 recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trb);
1198 recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trb);
1199 }
1200}
1201
1202static void LJ_FASTCALL recff_buffer_method_skip(jit_State *J, RecordFFData *rd)
1203{
1204 TRef ud = recff_sbufx_check(J, rd, 0);
1205 TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
1206 TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
1207 TRef len = recff_sbufx_len(J, trr, trw);
1208 TRef trn = recff_sbufx_checkint(J, rd, 1);
1209 len = emitir(IRTI(IR_MIN), len, trn);
1210 trr = emitir(IRT(IR_ADD, IRT_PTR), trr, len);
1211 recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
1212}
1213
1214static void LJ_FASTCALL recff_buffer_method_set(jit_State *J, RecordFFData *rd)
1215{
1216 TRef ud = recff_sbufx_check(J, rd, 0);
1217 TRef trbuf = recff_sbufx_write(J, ud);
1218 TRef tr = J->base[1];
1219 if (tref_isstr(tr)) {
1220 TRef trp = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0));
1221 TRef len = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN);
1222 IRIns *irp = IR(tref_ref(trp));
1223 /* trp must point into the anchored obj, even after folding. */
1224 if (irp->o == IR_STRREF)
1225 tr = irp->op1;
1226 else if (!tref_isk(tr))
1227 trp = emitir(IRT(IR_ADD, IRT_PGC), tr, lj_ir_kintpgc(J, sizeof(GCstr)));
1228 lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr);
1229#if LJ_HASFFI
1230 } else if (tref_iscdata(tr)) {
1231 TRef trp = lj_crecord_topcvoid(J, tr, &rd->argv[1]);
1232 TRef len = recff_sbufx_checkint(J, rd, 2);
1233 lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr);
1234#endif
1235 } /* else: Interpreter will throw. */
1236}
1237
1238static void LJ_FASTCALL recff_buffer_method_put(jit_State *J, RecordFFData *rd)
1239{
1240 TRef ud = recff_sbufx_check(J, rd, 0);
1241 TRef trbuf = recff_sbufx_write(J, ud);
1242 TRef tr;
1243 ptrdiff_t arg;
1244 if (!J->base[1]) return;
1245 for (arg = 1; (tr = J->base[arg]); arg++) {
1246 if (tref_isudata(tr)) {
1247 TRef ud2 = recff_sbufx_check(J, rd, arg);
1248 emitir(IRTG(IR_NE, IRT_PGC), ud, ud2);
1249 }
1250 }
1251 for (arg = 1; (tr = J->base[arg]); arg++) {
1252 if (tref_isstr(tr)) {
1253 trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf, tr);
1254 } else if (tref_isnumber(tr)) {
1255 trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf,
1256 emitir(IRT(IR_TOSTR, IRT_STR), tr,
1257 tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT));
1258 } else if (tref_isudata(tr)) {
1259 TRef trr = recff_sbufx_get_ptr(J, tr, IRFL_SBUF_R);
1260 TRef trw = recff_sbufx_get_ptr(J, tr, IRFL_SBUF_W);
1261 TRef len = recff_sbufx_len(J, trr, trw);
1262 trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, trr, len);
1263 } else {
1264 recff_nyiu(J, rd);
1265 }
1266 }
1267 emitir(IRT(IR_USE, IRT_NIL), trbuf, 0);
1268}
1269
1270static void LJ_FASTCALL recff_buffer_method_putf(jit_State *J, RecordFFData *rd)
1271{
1272 TRef ud = recff_sbufx_check(J, rd, 0);
1273 TRef trbuf = recff_sbufx_write(J, ud);
1274 recff_format(J, rd, trbuf, 1);
1275}
1276
1277static void LJ_FASTCALL recff_buffer_method_get(jit_State *J, RecordFFData *rd)
1278{
1279 TRef ud = recff_sbufx_check(J, rd, 0);
1280 TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
1281 TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
1282 TRef tr;
1283 ptrdiff_t arg;
1284 if (!J->base[1]) { J->base[1] = TREF_NIL; J->base[2] = 0; }
1285 for (arg = 0; (tr = J->base[arg+1]); arg++) {
1286 if (!tref_isnil(tr)) {
1287 J->base[arg+1] = recff_sbufx_checkint(J, rd, arg+1);
1288 }
1289 }
1290 for (arg = 0; (tr = J->base[arg+1]); arg++) {
1291 TRef len = recff_sbufx_len(J, trr, trw);
1292 if (tref_isnil(tr)) {
1293 J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len);
1294 trr = trw;
1295 } else {
1296 TRef tru;
1297 len = emitir(IRTI(IR_MIN), len, tr);
1298 tru = emitir(IRT(IR_ADD, IRT_PTR), trr, len);
1299 J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len);
1300 trr = tru; /* Doing the ADD before the SNEW generates better code. */
774 } 1301 }
1302 recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
1303 }
1304 rd->nres = arg;
1305}
1306
1307static void LJ_FASTCALL recff_buffer_method___tostring(jit_State *J, RecordFFData *rd)
1308{
1309 TRef ud = recff_sbufx_check(J, rd, 0);
1310 TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
1311 TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
1312 J->base[0] = emitir(IRT(IR_XSNEW, IRT_STR), trr, recff_sbufx_len(J, trr, trw));
1313}
1314
1315static void LJ_FASTCALL recff_buffer_method___len(jit_State *J, RecordFFData *rd)
1316{
1317 TRef ud = recff_sbufx_check(J, rd, 0);
1318 TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
1319 TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
1320 J->base[0] = recff_sbufx_len(J, trr, trw);
1321}
1322
1323#if LJ_HASFFI
1324static void LJ_FASTCALL recff_buffer_method_putcdata(jit_State *J, RecordFFData *rd)
1325{
1326 TRef ud = recff_sbufx_check(J, rd, 0);
1327 TRef trbuf = recff_sbufx_write(J, ud);
1328 TRef tr = lj_crecord_topcvoid(J, J->base[1], &rd->argv[1]);
1329 TRef len = recff_sbufx_checkint(J, rd, 2);
1330 trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, tr, len);
1331 emitir(IRT(IR_USE, IRT_NIL), trbuf, 0);
1332}
1333
1334static void LJ_FASTCALL recff_buffer_method_reserve(jit_State *J, RecordFFData *rd)
1335{
1336 TRef ud = recff_sbufx_check(J, rd, 0);
1337 TRef trbuf = recff_sbufx_write(J, ud);
1338 TRef trsz = recff_sbufx_checkint(J, rd, 1);
1339 J->base[1] = lj_ir_call(J, IRCALL_lj_bufx_more, trbuf, trsz);
1340 J->base[0] = lj_crecord_topuint8(J, recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W));
1341 rd->nres = 2;
1342}
1343
1344static void LJ_FASTCALL recff_buffer_method_commit(jit_State *J, RecordFFData *rd)
1345{
1346 TRef ud = recff_sbufx_check(J, rd, 0);
1347 TRef len = recff_sbufx_checkint(J, rd, 1);
1348 TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
1349 TRef tre = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_E);
1350 TRef left = emitir(IRT(IR_SUB, IRT_INTP), tre, trw);
1351 if (LJ_64)
1352 left = emitir(IRTI(IR_CONV), left, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE);
1353 emitir(IRTGI(IR_ULE), len, left);
1354 trw = emitir(IRT(IR_ADD, IRT_PTR), trw, len);
1355 recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trw);
1356}
1357
1358static void LJ_FASTCALL recff_buffer_method_ref(jit_State *J, RecordFFData *rd)
1359{
1360 TRef ud = recff_sbufx_check(J, rd, 0);
1361 TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
1362 TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
1363 J->base[0] = lj_crecord_topuint8(J, trr);
1364 J->base[1] = recff_sbufx_len(J, trr, trw);
1365 rd->nres = 2;
1366}
1367#endif
1368
1369static void LJ_FASTCALL recff_buffer_method_encode(jit_State *J, RecordFFData *rd)
1370{
1371 TRef ud = recff_sbufx_check(J, rd, 0);
1372 TRef trbuf = recff_sbufx_write(J, ud);
1373 TRef tmp = recff_tmpref(J, J->base[1], IRTMPREF_IN1);
1374 lj_ir_call(J, IRCALL_lj_serialize_put, trbuf, tmp);
1375 /* No IR_USE needed, since the call is a store. */
1376}
1377
1378static void LJ_FASTCALL recff_buffer_method_decode(jit_State *J, RecordFFData *rd)
1379{
1380 TRef ud = recff_sbufx_check(J, rd, 0);
1381 TRef trbuf = recff_sbufx_write(J, ud);
1382 TRef tmp = recff_tmpref(J, TREF_NIL, IRTMPREF_OUT1);
1383 TRef trr = lj_ir_call(J, IRCALL_lj_serialize_get, trbuf, tmp);
1384 IRType t = (IRType)lj_serialize_peektype(bufV(&rd->argv[0]));
1385 /* No IR_USE needed, since the call is a store. */
1386 J->base[0] = lj_record_vload(J, tmp, 0, t);
1387 /* The sbx->r store must be after the VLOAD type check, in case it fails. */
1388 recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
1389}
1390
1391static void LJ_FASTCALL recff_buffer_encode(jit_State *J, RecordFFData *rd)
1392{
1393 TRef tmp = recff_tmpref(J, J->base[0], IRTMPREF_IN1);
1394 J->base[0] = lj_ir_call(J, IRCALL_lj_serialize_encode, tmp);
1395 /* IR_USE needed for IR_CALLA, because the encoder may throw non-OOM. */
1396 emitir(IRT(IR_USE, IRT_NIL), J->base[0], 0);
1397 UNUSED(rd);
1398}
1399
1400static void LJ_FASTCALL recff_buffer_decode(jit_State *J, RecordFFData *rd)
1401{
1402 if (tvisstr(&rd->argv[0])) {
1403 GCstr *str = strV(&rd->argv[0]);
1404 SBufExt sbx;
1405 IRType t;
1406 TRef tmp = recff_tmpref(J, TREF_NIL, IRTMPREF_OUT1);
1407 TRef tr = lj_ir_call(J, IRCALL_lj_serialize_decode, tmp, J->base[0]);
1408 /* IR_USE needed for IR_CALLA, because the decoder may throw non-OOM.
1409 ** That's why IRCALL_lj_serialize_decode needs a fake INT result.
1410 */
1411 emitir(IRT(IR_USE, IRT_NIL), tr, 0);
1412 memset(&sbx, 0, sizeof(SBufExt));
1413 lj_bufx_set_cow(J->L, &sbx, strdata(str), str->len);
1414 t = (IRType)lj_serialize_peektype(&sbx);
1415 J->base[0] = lj_record_vload(J, tmp, 0, t);
775 } /* else: Interpreter will throw. */ 1416 } /* else: Interpreter will throw. */
776} 1417}
777 1418
1419#endif
1420
1421/* -- Table library fast functions ---------------------------------------- */
1422
778static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) 1423static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
779{ 1424{
780 RecordIndex ix; 1425 RecordIndex ix;
@@ -783,7 +1428,7 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
783 rd->nres = 0; 1428 rd->nres = 0;
784 if (tref_istab(ix.tab) && ix.val) { 1429 if (tref_istab(ix.tab) && ix.val) {
785 if (!J->base[2]) { /* Simple push: t[#t+1] = v */ 1430 if (!J->base[2]) { /* Simple push: t[#t+1] = v */
786 TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, ix.tab); 1431 TRef trlen = emitir(IRTI(IR_ALEN), ix.tab, TREF_NIL);
787 GCtab *t = tabV(&rd->argv[0]); 1432 GCtab *t = tabV(&rd->argv[0]);
788 ix.key = emitir(IRTI(IR_ADD), trlen, lj_ir_kint(J, 1)); 1433 ix.key = emitir(IRTI(IR_ADD), trlen, lj_ir_kint(J, 1));
789 settabV(J->L, &ix.tabv, t); 1434 settabV(J->L, &ix.tabv, t);
@@ -791,8 +1436,55 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
791 ix.idxchain = 0; 1436 ix.idxchain = 0;
792 lj_record_idx(J, &ix); /* Set new value. */ 1437 lj_record_idx(J, &ix); /* Set new value. */
793 } else { /* Complex case: insert in the middle. */ 1438 } else { /* Complex case: insert in the middle. */
794 recff_nyiu(J); 1439 recff_nyiu(J, rd);
1440 return;
1441 }
1442 } /* else: Interpreter will throw. */
1443}
1444
1445static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd)
1446{
1447 TRef tab = J->base[0];
1448 if (tref_istab(tab)) {
1449 TRef sep = !tref_isnil(J->base[1]) ?
1450 lj_ir_tostr(J, J->base[1]) : lj_ir_knull(J, IRT_STR);
1451 TRef tri = (J->base[1] && !tref_isnil(J->base[2])) ?
1452 lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1);
1453 TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ?
1454 lj_opt_narrow_toint(J, J->base[3]) :
1455 emitir(IRTI(IR_ALEN), tab, TREF_NIL);
1456 TRef hdr = recff_bufhdr(J);
1457 TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre);
1458 emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL));
1459 J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
1460 } /* else: Interpreter will throw. */
1461 UNUSED(rd);
1462}
1463
1464static void LJ_FASTCALL recff_table_new(jit_State *J, RecordFFData *rd)
1465{
1466 TRef tra = lj_opt_narrow_toint(J, J->base[0]);
1467 TRef trh = lj_opt_narrow_toint(J, J->base[1]);
1468 if (tref_isk(tra) && tref_isk(trh)) {
1469 int32_t a = IR(tref_ref(tra))->i;
1470 if (a < 0x7fff) {
1471 uint32_t hbits = hsize2hbits(IR(tref_ref(trh))->i);
1472 a = a > 0 ? a+1 : 0;
1473 J->base[0] = emitir(IRTG(IR_TNEW, IRT_TAB), (uint32_t)a, hbits);
1474 return;
795 } 1475 }
1476 }
1477 J->base[0] = lj_ir_call(J, IRCALL_lj_tab_new_ah, tra, trh);
1478 UNUSED(rd);
1479}
1480
1481static void LJ_FASTCALL recff_table_clear(jit_State *J, RecordFFData *rd)
1482{
1483 TRef tr = J->base[0];
1484 if (tref_istab(tr)) {
1485 rd->nres = 0;
1486 lj_ir_call(J, IRCALL_lj_tab_clear, tr);
1487 J->needsnap = 1;
796 } /* else: Interpreter will throw. */ 1488 } /* else: Interpreter will throw. */
797} 1489}
798 1490
@@ -805,8 +1497,7 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id)
805{ 1497{
806 TRef tr, ud, fp; 1498 TRef tr, ud, fp;
807 if (id) { /* io.func() */ 1499 if (id) { /* io.func() */
808 tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); 1500 ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id]));
809 ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0);
810 } else { /* fp:method() */ 1501 } else { /* fp:method() */
811 ud = J->base[0]; 1502 ud = J->base[0];
812 if (!tref_isudata(ud)) 1503 if (!tref_isudata(ud))
@@ -828,10 +1519,13 @@ static void LJ_FASTCALL recff_io_write(jit_State *J, RecordFFData *rd)
828 ptrdiff_t i = rd->data == 0 ? 1 : 0; 1519 ptrdiff_t i = rd->data == 0 ? 1 : 0;
829 for (; J->base[i]; i++) { 1520 for (; J->base[i]; i++) {
830 TRef str = lj_ir_tostr(J, J->base[i]); 1521 TRef str = lj_ir_tostr(J, J->base[i]);
831 TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero); 1522 TRef buf = emitir(IRT(IR_STRREF, IRT_PGC), str, zero);
832 TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN); 1523 TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN);
833 if (tref_isk(len) && IR(tref_ref(len))->i == 1) { 1524 if (tref_isk(len) && IR(tref_ref(len))->i == 1) {
834 TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY); 1525 IRIns *irs = IR(tref_ref(str));
1526 TRef tr = (irs->o == IR_TOSTR && irs->op2 == IRTOSTR_CHAR) ?
1527 irs->op1 :
1528 emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY);
835 tr = lj_ir_call(J, IRCALL_fputc, tr, fp); 1529 tr = lj_ir_call(J, IRCALL_fputc, tr, fp);
836 if (results_wanted(J) != 0) /* Check result only if not ignored. */ 1530 if (results_wanted(J) != 0) /* Check result only if not ignored. */
837 emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1)); 1531 emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1));
@@ -853,6 +1547,28 @@ static void LJ_FASTCALL recff_io_flush(jit_State *J, RecordFFData *rd)
853 J->base[0] = TREF_TRUE; 1547 J->base[0] = TREF_TRUE;
854} 1548}
855 1549
1550/* -- Debug library fast functions ---------------------------------------- */
1551
1552static void LJ_FASTCALL recff_debug_getmetatable(jit_State *J, RecordFFData *rd)
1553{
1554 GCtab *mt;
1555 TRef mtref;
1556 TRef tr = J->base[0];
1557 if (tref_istab(tr)) {
1558 mt = tabref(tabV(&rd->argv[0])->metatable);
1559 mtref = emitir(IRT(IR_FLOAD, IRT_TAB), tr, IRFL_TAB_META);
1560 } else if (tref_isudata(tr)) {
1561 mt = tabref(udataV(&rd->argv[0])->metatable);
1562 mtref = emitir(IRT(IR_FLOAD, IRT_TAB), tr, IRFL_UDATA_META);
1563 } else {
1564 mt = tabref(basemt_obj(J2G(J), &rd->argv[0]));
1565 J->base[0] = mt ? lj_ir_ktab(J, mt) : TREF_NIL;
1566 return;
1567 }
1568 emitir(IRTG(mt ? IR_NE : IR_EQ, IRT_TAB), mtref, lj_ir_knull(J, IRT_TAB));
1569 J->base[0] = mt ? mtref : TREF_NIL;
1570}
1571
856/* -- Record calls to fast functions -------------------------------------- */ 1572/* -- Record calls to fast functions -------------------------------------- */
857 1573
858#include "lj_recdef.h" 1574#include "lj_recdef.h"
diff --git a/src/lj_frame.h b/src/lj_frame.h
index 35cbb4ea..bb7dda63 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -11,7 +11,16 @@
11 11
12/* -- Lua stack frame ----------------------------------------------------- */ 12/* -- Lua stack frame ----------------------------------------------------- */
13 13
14/* Frame type markers in callee function slot (callee base-1). */ 14/* Frame type markers in LSB of PC (4-byte aligned) or delta (8-byte aligned:
15**
16** PC 00 Lua frame
17** delta 001 C frame
18** delta 010 Continuation frame
19** delta 011 Lua vararg frame
20** delta 101 cpcall() frame
21** delta 110 ff pcall() frame
22** delta 111 ff pcall() frame with active hook
23*/
15enum { 24enum {
16 FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG, 25 FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG,
17 FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH 26 FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH
@@ -21,9 +30,47 @@ enum {
21#define FRAME_TYPEP (FRAME_TYPE|FRAME_P) 30#define FRAME_TYPEP (FRAME_TYPE|FRAME_P)
22 31
23/* Macros to access and modify Lua frames. */ 32/* Macros to access and modify Lua frames. */
33#if LJ_FR2
34/* Two-slot frame info, required for 64 bit PC/GCRef:
35**
36** base-2 base-1 | base base+1 ...
37** [func PC/delta/ft] | [slots ...]
38** ^-- frame | ^-- base ^-- top
39**
40** Continuation frames:
41**
42** base-4 base-3 base-2 base-1 | base base+1 ...
43** [cont PC ] [func PC/delta/ft] | [slots ...]
44** ^-- frame | ^-- base ^-- top
45*/
46#define frame_gc(f) (gcval((f)-1))
47#define frame_ftsz(f) ((ptrdiff_t)(f)->ftsz)
48#define frame_pc(f) ((const BCIns *)frame_ftsz(f))
49#define setframe_gc(f, p, tp) (setgcVraw((f), (p), (tp)))
50#define setframe_ftsz(f, sz) ((f)->ftsz = (sz))
51#define setframe_pc(f, pc) ((f)->ftsz = (int64_t)(intptr_t)(pc))
52#else
53/* One-slot frame info, sufficient for 32 bit PC/GCRef:
54**
55** base-1 | base base+1 ...
56** lo hi |
57** [func | PC/delta/ft] | [slots ...]
58** ^-- frame | ^-- base ^-- top
59**
60** Continuation frames:
61**
62** base-2 base-1 | base base+1 ...
63** lo hi lo hi |
64** [cont | PC] [func | PC/delta/ft] | [slots ...]
65** ^-- frame | ^-- base ^-- top
66*/
24#define frame_gc(f) (gcref((f)->fr.func)) 67#define frame_gc(f) (gcref((f)->fr.func))
25#define frame_func(f) (&frame_gc(f)->fn) 68#define frame_ftsz(f) ((ptrdiff_t)(f)->fr.tp.ftsz)
26#define frame_ftsz(f) ((f)->fr.tp.ftsz) 69#define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns))
70#define setframe_gc(f, p, tp) (setgcref((f)->fr.func, (p)), UNUSED(tp))
71#define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (int32_t)(sz))
72#define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc)))
73#endif
27 74
28#define frame_type(f) (frame_ftsz(f) & FRAME_TYPE) 75#define frame_type(f) (frame_ftsz(f) & FRAME_TYPE)
29#define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP) 76#define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP)
@@ -33,33 +80,53 @@ enum {
33#define frame_isvarg(f) (frame_typep(f) == FRAME_VARG) 80#define frame_isvarg(f) (frame_typep(f) == FRAME_VARG)
34#define frame_ispcall(f) ((frame_ftsz(f) & 6) == FRAME_PCALL) 81#define frame_ispcall(f) ((frame_ftsz(f) & 6) == FRAME_PCALL)
35 82
36#define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns)) 83#define frame_func(f) (&frame_gc(f)->fn)
84#define frame_delta(f) (frame_ftsz(f) >> 3)
85#define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP)
86
87enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
88
89#if LJ_FR2
90#define frame_contpc(f) (frame_pc((f)-2))
91#define frame_contv(f) (((f)-3)->u64)
92#else
37#define frame_contpc(f) (frame_pc((f)-1)) 93#define frame_contpc(f) (frame_pc((f)-1))
38#if LJ_64 94#define frame_contv(f) (((f)-1)->u32.lo)
95#endif
96#if LJ_FR2
97#define frame_contf(f) ((ASMFunction)(uintptr_t)((f)-3)->u64)
98#elif LJ_64
39#define frame_contf(f) \ 99#define frame_contf(f) \
40 ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin + \ 100 ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin + \
41 (intptr_t)(int32_t)((f)-1)->u32.lo)) 101 (intptr_t)(int32_t)((f)-1)->u32.lo))
42#else 102#else
43#define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void)) 103#define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void))
44#endif 104#endif
45#define frame_delta(f) (frame_ftsz(f) >> 3) 105#define frame_iscont_fficb(f) \
46#define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP) 106 (LJ_HASFFI && frame_contv(f) == LJ_CONT_FFI_CALLBACK)
47 107
48#define frame_prevl(f) ((f) - (1+bc_a(frame_pc(f)[-1]))) 108#define frame_prevl(f) ((f) - (1+LJ_FR2+bc_a(frame_pc(f)[-1])))
49#define frame_prevd(f) ((TValue *)((char *)(f) - frame_sized(f))) 109#define frame_prevd(f) ((TValue *)((char *)(f) - frame_sized(f)))
50#define frame_prev(f) (frame_islua(f)?frame_prevl(f):frame_prevd(f)) 110#define frame_prev(f) (frame_islua(f)?frame_prevl(f):frame_prevd(f))
51/* Note: this macro does not skip over FRAME_VARG. */ 111/* Note: this macro does not skip over FRAME_VARG. */
52 112
53#define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc)))
54#define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (sz))
55#define setframe_gc(f, p) (setgcref((f)->fr.func, (p)))
56
57/* -- C stack frame ------------------------------------------------------- */ 113/* -- C stack frame ------------------------------------------------------- */
58 114
59/* Macros to access and modify the C stack frame chain. */ 115/* Macros to access and modify the C stack frame chain. */
60 116
61/* These definitions must match with the arch-specific *.dasc files. */ 117/* These definitions must match with the arch-specific *.dasc files. */
62#if LJ_TARGET_X86 118#if LJ_TARGET_X86
119#if LJ_ABI_WIN
120#define CFRAME_OFS_ERRF (19*4)
121#define CFRAME_OFS_NRES (18*4)
122#define CFRAME_OFS_PREV (17*4)
123#define CFRAME_OFS_L (16*4)
124#define CFRAME_OFS_SEH (9*4)
125#define CFRAME_OFS_PC (6*4)
126#define CFRAME_OFS_MULTRES (5*4)
127#define CFRAME_SIZE (16*4)
128#define CFRAME_SHIFT_MULTRES 0
129#else
63#define CFRAME_OFS_ERRF (15*4) 130#define CFRAME_OFS_ERRF (15*4)
64#define CFRAME_OFS_NRES (14*4) 131#define CFRAME_OFS_NRES (14*4)
65#define CFRAME_OFS_PREV (13*4) 132#define CFRAME_OFS_PREV (13*4)
@@ -68,24 +135,41 @@ enum {
68#define CFRAME_OFS_MULTRES (5*4) 135#define CFRAME_OFS_MULTRES (5*4)
69#define CFRAME_SIZE (12*4) 136#define CFRAME_SIZE (12*4)
70#define CFRAME_SHIFT_MULTRES 0 137#define CFRAME_SHIFT_MULTRES 0
138#endif
71#elif LJ_TARGET_X64 139#elif LJ_TARGET_X64
72#if LJ_ABI_WIN 140#if LJ_ABI_WIN
73#define CFRAME_OFS_PREV (13*8) 141#define CFRAME_OFS_PREV (13*8)
142#if LJ_GC64
143#define CFRAME_OFS_PC (12*8)
144#define CFRAME_OFS_L (11*8)
145#define CFRAME_OFS_ERRF (21*4)
146#define CFRAME_OFS_NRES (20*4)
147#define CFRAME_OFS_MULTRES (8*4)
148#else
74#define CFRAME_OFS_PC (25*4) 149#define CFRAME_OFS_PC (25*4)
75#define CFRAME_OFS_L (24*4) 150#define CFRAME_OFS_L (24*4)
76#define CFRAME_OFS_ERRF (23*4) 151#define CFRAME_OFS_ERRF (23*4)
77#define CFRAME_OFS_NRES (22*4) 152#define CFRAME_OFS_NRES (22*4)
78#define CFRAME_OFS_MULTRES (21*4) 153#define CFRAME_OFS_MULTRES (21*4)
154#endif
79#define CFRAME_SIZE (10*8) 155#define CFRAME_SIZE (10*8)
80#define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8) 156#define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8)
81#define CFRAME_SHIFT_MULTRES 0 157#define CFRAME_SHIFT_MULTRES 0
82#else 158#else
83#define CFRAME_OFS_PREV (4*8) 159#define CFRAME_OFS_PREV (4*8)
160#if LJ_GC64
161#define CFRAME_OFS_PC (3*8)
162#define CFRAME_OFS_L (2*8)
163#define CFRAME_OFS_ERRF (3*4)
164#define CFRAME_OFS_NRES (2*4)
165#define CFRAME_OFS_MULTRES (0*4)
166#else
84#define CFRAME_OFS_PC (7*4) 167#define CFRAME_OFS_PC (7*4)
85#define CFRAME_OFS_L (6*4) 168#define CFRAME_OFS_L (6*4)
86#define CFRAME_OFS_ERRF (5*4) 169#define CFRAME_OFS_ERRF (5*4)
87#define CFRAME_OFS_NRES (4*4) 170#define CFRAME_OFS_NRES (4*4)
88#define CFRAME_OFS_MULTRES (1*4) 171#define CFRAME_OFS_MULTRES (1*4)
172#endif
89#if LJ_NO_UNWIND 173#if LJ_NO_UNWIND
90#define CFRAME_SIZE (12*8) 174#define CFRAME_SIZE (12*8)
91#else 175#else
@@ -107,6 +191,15 @@ enum {
107#define CFRAME_SIZE 64 191#define CFRAME_SIZE 64
108#endif 192#endif
109#define CFRAME_SHIFT_MULTRES 3 193#define CFRAME_SHIFT_MULTRES 3
194#elif LJ_TARGET_ARM64
195#define CFRAME_OFS_ERRF 36
196#define CFRAME_OFS_NRES 40
197#define CFRAME_OFS_PREV 0
198#define CFRAME_OFS_L 16
199#define CFRAME_OFS_PC 8
200#define CFRAME_OFS_MULTRES 32
201#define CFRAME_SIZE 208
202#define CFRAME_SHIFT_MULTRES 3
110#elif LJ_TARGET_PPC 203#elif LJ_TARGET_PPC
111#if LJ_TARGET_XBOX360 204#if LJ_TARGET_XBOX360
112#define CFRAME_OFS_ERRF 424 205#define CFRAME_OFS_ERRF 424
@@ -117,7 +210,7 @@ enum {
117#define CFRAME_OFS_MULTRES 408 210#define CFRAME_OFS_MULTRES 408
118#define CFRAME_SIZE 384 211#define CFRAME_SIZE 384
119#define CFRAME_SHIFT_MULTRES 3 212#define CFRAME_SHIFT_MULTRES 3
120#elif LJ_ARCH_PPC64 213#elif LJ_ARCH_PPC32ON64
121#define CFRAME_OFS_ERRF 472 214#define CFRAME_OFS_ERRF 472
122#define CFRAME_OFS_NRES 468 215#define CFRAME_OFS_NRES 468
123#define CFRAME_OFS_PREV 448 216#define CFRAME_OFS_PREV 448
@@ -133,26 +226,43 @@ enum {
133#define CFRAME_OFS_L 36 226#define CFRAME_OFS_L 36
134#define CFRAME_OFS_PC 32 227#define CFRAME_OFS_PC 32
135#define CFRAME_OFS_MULTRES 28 228#define CFRAME_OFS_MULTRES 28
136#define CFRAME_SIZE 272 229#define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128)
137#define CFRAME_SHIFT_MULTRES 3 230#define CFRAME_SHIFT_MULTRES 3
138#endif 231#endif
139#elif LJ_TARGET_PPCSPE 232#elif LJ_TARGET_MIPS32
140#define CFRAME_OFS_ERRF 28 233#if LJ_ARCH_HASFPU
141#define CFRAME_OFS_NRES 24
142#define CFRAME_OFS_PREV 20
143#define CFRAME_OFS_L 16
144#define CFRAME_OFS_PC 12
145#define CFRAME_OFS_MULTRES 8
146#define CFRAME_SIZE 184
147#define CFRAME_SHIFT_MULTRES 3
148#elif LJ_TARGET_MIPS
149#define CFRAME_OFS_ERRF 124 234#define CFRAME_OFS_ERRF 124
150#define CFRAME_OFS_NRES 120 235#define CFRAME_OFS_NRES 120
151#define CFRAME_OFS_PREV 116 236#define CFRAME_OFS_PREV 116
152#define CFRAME_OFS_L 112 237#define CFRAME_OFS_L 112
238#define CFRAME_SIZE 112
239#else
240#define CFRAME_OFS_ERRF 76
241#define CFRAME_OFS_NRES 72
242#define CFRAME_OFS_PREV 68
243#define CFRAME_OFS_L 64
244#define CFRAME_SIZE 64
245#endif
153#define CFRAME_OFS_PC 20 246#define CFRAME_OFS_PC 20
154#define CFRAME_OFS_MULTRES 16 247#define CFRAME_OFS_MULTRES 16
155#define CFRAME_SIZE 112 248#define CFRAME_SHIFT_MULTRES 3
249#elif LJ_TARGET_MIPS64
250#if LJ_ARCH_HASFPU
251#define CFRAME_OFS_ERRF 188
252#define CFRAME_OFS_NRES 184
253#define CFRAME_OFS_PREV 176
254#define CFRAME_OFS_L 168
255#define CFRAME_OFS_PC 160
256#define CFRAME_SIZE 192
257#else
258#define CFRAME_OFS_ERRF 124
259#define CFRAME_OFS_NRES 120
260#define CFRAME_OFS_PREV 112
261#define CFRAME_OFS_L 104
262#define CFRAME_OFS_PC 96
263#define CFRAME_SIZE 128
264#endif
265#define CFRAME_OFS_MULTRES 0
156#define CFRAME_SHIFT_MULTRES 3 266#define CFRAME_SHIFT_MULTRES 3
157#else 267#else
158#error "Missing CFRAME_* definitions for this architecture" 268#error "Missing CFRAME_* definitions for this architecture"
diff --git a/src/lj_func.c b/src/lj_func.c
index 4ac47875..6a537649 100644
--- a/src/lj_func.c
+++ b/src/lj_func.c
@@ -24,9 +24,11 @@ void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt)
24 24
25/* -- Upvalues ------------------------------------------------------------ */ 25/* -- Upvalues ------------------------------------------------------------ */
26 26
27static void unlinkuv(GCupval *uv) 27static void unlinkuv(global_State *g, GCupval *uv)
28{ 28{
29 lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); 29 UNUSED(g);
30 lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv,
31 "broken upvalue chain");
30 setgcrefr(uvnext(uv)->prev, uv->prev); 32 setgcrefr(uvnext(uv)->prev, uv->prev);
31 setgcrefr(uvprev(uv)->next, uv->next); 33 setgcrefr(uvprev(uv)->next, uv->next);
32} 34}
@@ -40,7 +42,7 @@ static GCupval *func_finduv(lua_State *L, TValue *slot)
40 GCupval *uv; 42 GCupval *uv;
41 /* Search the sorted list of open upvalues. */ 43 /* Search the sorted list of open upvalues. */
42 while (gcref(*pp) != NULL && uvval((p = gco2uv(gcref(*pp)))) >= slot) { 44 while (gcref(*pp) != NULL && uvval((p = gco2uv(gcref(*pp)))) >= slot) {
43 lua_assert(!p->closed && uvval(p) != &p->tv); 45 lj_assertG(!p->closed && uvval(p) != &p->tv, "closed upvalue in chain");
44 if (uvval(p) == slot) { /* Found open upvalue pointing to same slot? */ 46 if (uvval(p) == slot) { /* Found open upvalue pointing to same slot? */
45 if (isdead(g, obj2gco(p))) /* Resurrect it, if it's dead. */ 47 if (isdead(g, obj2gco(p))) /* Resurrect it, if it's dead. */
46 flipwhite(obj2gco(p)); 48 flipwhite(obj2gco(p));
@@ -61,7 +63,8 @@ static GCupval *func_finduv(lua_State *L, TValue *slot)
61 setgcrefr(uv->next, g->uvhead.next); 63 setgcrefr(uv->next, g->uvhead.next);
62 setgcref(uvnext(uv)->prev, obj2gco(uv)); 64 setgcref(uvnext(uv)->prev, obj2gco(uv));
63 setgcref(g->uvhead.next, obj2gco(uv)); 65 setgcref(g->uvhead.next, obj2gco(uv));
64 lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); 66 lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv,
67 "broken upvalue chain");
65 return uv; 68 return uv;
66} 69}
67 70
@@ -84,12 +87,13 @@ void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level)
84 while (gcref(L->openupval) != NULL && 87 while (gcref(L->openupval) != NULL &&
85 uvval((uv = gco2uv(gcref(L->openupval)))) >= level) { 88 uvval((uv = gco2uv(gcref(L->openupval)))) >= level) {
86 GCobj *o = obj2gco(uv); 89 GCobj *o = obj2gco(uv);
87 lua_assert(!isblack(o) && !uv->closed && uvval(uv) != &uv->tv); 90 lj_assertG(!isblack(o), "bad black upvalue");
91 lj_assertG(!uv->closed && uvval(uv) != &uv->tv, "closed upvalue in chain");
88 setgcrefr(L->openupval, uv->nextgc); /* No longer in open list. */ 92 setgcrefr(L->openupval, uv->nextgc); /* No longer in open list. */
89 if (isdead(g, o)) { 93 if (isdead(g, o)) {
90 lj_func_freeuv(g, uv); 94 lj_func_freeuv(g, uv);
91 } else { 95 } else {
92 unlinkuv(uv); 96 unlinkuv(g, uv);
93 lj_gc_closeuv(g, uv); 97 lj_gc_closeuv(g, uv);
94 } 98 }
95 } 99 }
@@ -98,7 +102,7 @@ void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level)
98void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv) 102void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv)
99{ 103{
100 if (!uv->closed) 104 if (!uv->closed)
101 unlinkuv(uv); 105 unlinkuv(g, uv);
102 lj_mem_freet(g, uv); 106 lj_mem_freet(g, uv);
103} 107}
104 108
diff --git a/src/lj_gc.c b/src/lj_gc.c
index 25374d03..bfa4daa8 100644
--- a/src/lj_gc.c
+++ b/src/lj_gc.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_func.h" 18#include "lj_func.h"
@@ -24,7 +25,9 @@
24#include "lj_cdata.h" 25#include "lj_cdata.h"
25#endif 26#endif
26#include "lj_trace.h" 27#include "lj_trace.h"
28#include "lj_dispatch.h"
27#include "lj_vm.h" 29#include "lj_vm.h"
30#include "lj_vmevent.h"
28 31
29#define GCSTEPSIZE 1024u 32#define GCSTEPSIZE 1024u
30#define GCSWEEPMAX 40 33#define GCSWEEPMAX 40
@@ -40,7 +43,8 @@
40 43
41/* Mark a TValue (if needed). */ 44/* Mark a TValue (if needed). */
42#define gc_marktv(g, tv) \ 45#define gc_marktv(g, tv) \
43 { lua_assert(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct)); \ 46 { lj_assertG(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct), \
47 "TValue and GC type mismatch"); \
44 if (tviswhite(tv)) gc_mark(g, gcV(tv)); } 48 if (tviswhite(tv)) gc_mark(g, gcV(tv)); }
45 49
46/* Mark a GCobj (if needed). */ 50/* Mark a GCobj (if needed). */
@@ -54,21 +58,32 @@
54static void gc_mark(global_State *g, GCobj *o) 58static void gc_mark(global_State *g, GCobj *o)
55{ 59{
56 int gct = o->gch.gct; 60 int gct = o->gch.gct;
57 lua_assert(iswhite(o) && !isdead(g, o)); 61 lj_assertG(iswhite(o), "mark of non-white object");
62 lj_assertG(!isdead(g, o), "mark of dead object");
58 white2gray(o); 63 white2gray(o);
59 if (LJ_UNLIKELY(gct == ~LJ_TUDATA)) { 64 if (LJ_UNLIKELY(gct == ~LJ_TUDATA)) {
60 GCtab *mt = tabref(gco2ud(o)->metatable); 65 GCtab *mt = tabref(gco2ud(o)->metatable);
61 gray2black(o); /* Userdata are never gray. */ 66 gray2black(o); /* Userdata are never gray. */
62 if (mt) gc_markobj(g, mt); 67 if (mt) gc_markobj(g, mt);
63 gc_markobj(g, tabref(gco2ud(o)->env)); 68 gc_markobj(g, tabref(gco2ud(o)->env));
69 if (LJ_HASBUFFER && gco2ud(o)->udtype == UDTYPE_BUFFER) {
70 SBufExt *sbx = (SBufExt *)uddata(gco2ud(o));
71 if (sbufiscow(sbx) && gcref(sbx->cowref))
72 gc_markobj(g, gcref(sbx->cowref));
73 if (gcref(sbx->dict_str))
74 gc_markobj(g, gcref(sbx->dict_str));
75 if (gcref(sbx->dict_mt))
76 gc_markobj(g, gcref(sbx->dict_mt));
77 }
64 } else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) { 78 } else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) {
65 GCupval *uv = gco2uv(o); 79 GCupval *uv = gco2uv(o);
66 gc_marktv(g, uvval(uv)); 80 gc_marktv(g, uvval(uv));
67 if (uv->closed) 81 if (uv->closed)
68 gray2black(o); /* Closed upvalues are never gray. */ 82 gray2black(o); /* Closed upvalues are never gray. */
69 } else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) { 83 } else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) {
70 lua_assert(gct == ~LJ_TFUNC || gct == ~LJ_TTAB || 84 lj_assertG(gct == ~LJ_TFUNC || gct == ~LJ_TTAB ||
71 gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO); 85 gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO || gct == ~LJ_TTRACE,
86 "bad GC type %d", gct);
72 setgcrefr(o->gch.gclist, g->gc.gray); 87 setgcrefr(o->gch.gclist, g->gc.gray);
73 setgcref(g->gc.gray, o); 88 setgcref(g->gc.gray, o);
74 } 89 }
@@ -93,9 +108,6 @@ static void gc_mark_start(global_State *g)
93 gc_markobj(g, tabref(mainthread(g)->env)); 108 gc_markobj(g, tabref(mainthread(g)->env));
94 gc_marktv(g, &g->registrytv); 109 gc_marktv(g, &g->registrytv);
95 gc_mark_gcroot(g); 110 gc_mark_gcroot(g);
96#if LJ_HASFFI
97 if (ctype_ctsG(g)) gc_markobj(g, ctype_ctsG(g)->finalizer);
98#endif
99 g->gc.state = GCSpropagate; 111 g->gc.state = GCSpropagate;
100} 112}
101 113
@@ -104,7 +116,8 @@ static void gc_mark_uv(global_State *g)
104{ 116{
105 GCupval *uv; 117 GCupval *uv;
106 for (uv = uvnext(&g->uvhead); uv != &g->uvhead; uv = uvnext(uv)) { 118 for (uv = uvnext(&g->uvhead); uv != &g->uvhead; uv = uvnext(uv)) {
107 lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); 119 lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv,
120 "broken upvalue chain");
108 if (isgray(obj2gco(uv))) 121 if (isgray(obj2gco(uv)))
109 gc_marktv(g, uvval(uv)); 122 gc_marktv(g, uvval(uv));
110 } 123 }
@@ -174,8 +187,7 @@ static int gc_traverse_tab(global_State *g, GCtab *t)
174 } 187 }
175 if (weak) { /* Weak tables are cleared in the atomic phase. */ 188 if (weak) { /* Weak tables are cleared in the atomic phase. */
176#if LJ_HASFFI 189#if LJ_HASFFI
177 CTState *cts = ctype_ctsG(g); 190 if (gcref(g->gcroot[GCROOT_FFI_FIN]) == obj2gco(t)) {
178 if (cts && cts->finalizer == t) {
179 weak = (int)(~0u & ~LJ_GC_WEAKVAL); 191 weak = (int)(~0u & ~LJ_GC_WEAKVAL);
180 } else 192 } else
181#endif 193#endif
@@ -199,7 +211,7 @@ static int gc_traverse_tab(global_State *g, GCtab *t)
199 for (i = 0; i <= hmask; i++) { 211 for (i = 0; i <= hmask; i++) {
200 Node *n = &node[i]; 212 Node *n = &node[i];
201 if (!tvisnil(&n->val)) { /* Mark non-empty slot. */ 213 if (!tvisnil(&n->val)) { /* Mark non-empty slot. */
202 lua_assert(!tvisnil(&n->key)); 214 lj_assertG(!tvisnil(&n->key), "mark of nil key in non-empty slot");
203 if (!(weak & LJ_GC_WEAKKEY)) gc_marktv(g, &n->key); 215 if (!(weak & LJ_GC_WEAKKEY)) gc_marktv(g, &n->key);
204 if (!(weak & LJ_GC_WEAKVAL)) gc_marktv(g, &n->val); 216 if (!(weak & LJ_GC_WEAKVAL)) gc_marktv(g, &n->val);
205 } 217 }
@@ -214,7 +226,8 @@ static void gc_traverse_func(global_State *g, GCfunc *fn)
214 gc_markobj(g, tabref(fn->c.env)); 226 gc_markobj(g, tabref(fn->c.env));
215 if (isluafunc(fn)) { 227 if (isluafunc(fn)) {
216 uint32_t i; 228 uint32_t i;
217 lua_assert(fn->l.nupvalues <= funcproto(fn)->sizeuv); 229 lj_assertG(fn->l.nupvalues <= funcproto(fn)->sizeuv,
230 "function upvalues out of range");
218 gc_markobj(g, funcproto(fn)); 231 gc_markobj(g, funcproto(fn));
219 for (i = 0; i < fn->l.nupvalues; i++) /* Mark Lua function upvalues. */ 232 for (i = 0; i < fn->l.nupvalues; i++) /* Mark Lua function upvalues. */
220 gc_markobj(g, &gcref(fn->l.uvptr[i])->uv); 233 gc_markobj(g, &gcref(fn->l.uvptr[i])->uv);
@@ -230,7 +243,7 @@ static void gc_traverse_func(global_State *g, GCfunc *fn)
230static void gc_marktrace(global_State *g, TraceNo traceno) 243static void gc_marktrace(global_State *g, TraceNo traceno)
231{ 244{
232 GCobj *o = obj2gco(traceref(G2J(g), traceno)); 245 GCobj *o = obj2gco(traceref(G2J(g), traceno));
233 lua_assert(traceno != G2J(g)->cur.traceno); 246 lj_assertG(traceno != G2J(g)->cur.traceno, "active trace escaped");
234 if (iswhite(o)) { 247 if (iswhite(o)) {
235 white2gray(o); 248 white2gray(o);
236 setgcrefr(o->gch.gclist, g->gc.gray); 249 setgcrefr(o->gch.gclist, g->gc.gray);
@@ -247,6 +260,8 @@ static void gc_traverse_trace(global_State *g, GCtrace *T)
247 IRIns *ir = &T->ir[ref]; 260 IRIns *ir = &T->ir[ref];
248 if (ir->o == IR_KGC) 261 if (ir->o == IR_KGC)
249 gc_markobj(g, ir_kgc(ir)); 262 gc_markobj(g, ir_kgc(ir));
263 if (irt_is64(ir->t) && ir->o != IR_KNULL)
264 ref++;
250 } 265 }
251 if (T->link) gc_marktrace(g, T->link); 266 if (T->link) gc_marktrace(g, T->link);
252 if (T->nextroot) gc_marktrace(g, T->nextroot); 267 if (T->nextroot) gc_marktrace(g, T->nextroot);
@@ -277,12 +292,12 @@ static MSize gc_traverse_frames(global_State *g, lua_State *th)
277{ 292{
278 TValue *frame, *top = th->top-1, *bot = tvref(th->stack); 293 TValue *frame, *top = th->top-1, *bot = tvref(th->stack);
279 /* Note: extra vararg frame not skipped, marks function twice (harmless). */ 294 /* Note: extra vararg frame not skipped, marks function twice (harmless). */
280 for (frame = th->base-1; frame > bot; frame = frame_prev(frame)) { 295 for (frame = th->base-1; frame > bot+LJ_FR2; frame = frame_prev(frame)) {
281 GCfunc *fn = frame_func(frame); 296 GCfunc *fn = frame_func(frame);
282 TValue *ftop = frame; 297 TValue *ftop = frame;
283 if (isluafunc(fn)) ftop += funcproto(fn)->framesize; 298 if (isluafunc(fn)) ftop += funcproto(fn)->framesize;
284 if (ftop > top) top = ftop; 299 if (ftop > top) top = ftop;
285 gc_markobj(g, fn); /* Need to mark hidden function (or L). */ 300 if (!LJ_FR2) gc_markobj(g, fn); /* Need to mark hidden function (or L). */
286 } 301 }
287 top++; /* Correct bias of -1 (frame == base-1). */ 302 top++; /* Correct bias of -1 (frame == base-1). */
288 if (top > tvref(th->maxstack)) top = tvref(th->maxstack); 303 if (top > tvref(th->maxstack)) top = tvref(th->maxstack);
@@ -293,7 +308,7 @@ static MSize gc_traverse_frames(global_State *g, lua_State *th)
293static void gc_traverse_thread(global_State *g, lua_State *th) 308static void gc_traverse_thread(global_State *g, lua_State *th)
294{ 309{
295 TValue *o, *top = th->top; 310 TValue *o, *top = th->top;
296 for (o = tvref(th->stack)+1; o < top; o++) 311 for (o = tvref(th->stack)+1+LJ_FR2; o < top; o++)
297 gc_marktv(g, o); 312 gc_marktv(g, o);
298 if (g->gc.state == GCSatomic) { 313 if (g->gc.state == GCSatomic) {
299 top = tvref(th->stack) + th->stacksize; 314 top = tvref(th->stack) + th->stacksize;
@@ -309,7 +324,7 @@ static size_t propagatemark(global_State *g)
309{ 324{
310 GCobj *o = gcref(g->gc.gray); 325 GCobj *o = gcref(g->gc.gray);
311 int gct = o->gch.gct; 326 int gct = o->gch.gct;
312 lua_assert(isgray(o)); 327 lj_assertG(isgray(o), "propagation of non-gray object");
313 gray2black(o); 328 gray2black(o);
314 setgcrefr(g->gc.gray, o->gch.gclist); /* Remove from gray list. */ 329 setgcrefr(g->gc.gray, o->gch.gclist); /* Remove from gray list. */
315 if (LJ_LIKELY(gct == ~LJ_TTAB)) { 330 if (LJ_LIKELY(gct == ~LJ_TTAB)) {
@@ -341,7 +356,7 @@ static size_t propagatemark(global_State *g)
341 return ((sizeof(GCtrace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) + 356 return ((sizeof(GCtrace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) +
342 T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(SnapEntry); 357 T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(SnapEntry);
343#else 358#else
344 lua_assert(0); 359 lj_assertG(0, "bad GC type %d", gct);
345 return 0; 360 return 0;
346#endif 361#endif
347 } 362 }
@@ -358,15 +373,6 @@ static size_t gc_propagate_gray(global_State *g)
358 373
359/* -- Sweep phase --------------------------------------------------------- */ 374/* -- Sweep phase --------------------------------------------------------- */
360 375
361/* Try to shrink some common data structures. */
362static void gc_shrink(global_State *g, lua_State *L)
363{
364 if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1)
365 lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */
366 if (g->tmpbuf.sz > LJ_MIN_SBUF*2)
367 lj_str_resizebuf(L, &g->tmpbuf, g->tmpbuf.sz >> 1); /* Shrink temp buf. */
368}
369
370/* Type of GC free functions. */ 376/* Type of GC free functions. */
371typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o); 377typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o);
372 378
@@ -392,7 +398,7 @@ static const GCFreeFunc gc_freefunc[] = {
392}; 398};
393 399
394/* Full sweep of a GC list. */ 400/* Full sweep of a GC list. */
395#define gc_fullsweep(g, p) gc_sweep(g, (p), LJ_MAX_MEM) 401#define gc_fullsweep(g, p) gc_sweep(g, (p), ~(uint32_t)0)
396 402
397/* Partial sweep of a GC list. */ 403/* Partial sweep of a GC list. */
398static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) 404static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim)
@@ -404,11 +410,13 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim)
404 if (o->gch.gct == ~LJ_TTHREAD) /* Need to sweep open upvalues, too. */ 410 if (o->gch.gct == ~LJ_TTHREAD) /* Need to sweep open upvalues, too. */
405 gc_fullsweep(g, &gco2th(o)->openupval); 411 gc_fullsweep(g, &gco2th(o)->openupval);
406 if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */ 412 if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */
407 lua_assert(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED)); 413 lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED),
414 "sweep of undead object");
408 makewhite(g, o); /* Value is alive, change to the current white. */ 415 makewhite(g, o); /* Value is alive, change to the current white. */
409 p = &o->gch.nextgc; 416 p = &o->gch.nextgc;
410 } else { /* Otherwise value is dead, free it. */ 417 } else { /* Otherwise value is dead, free it. */
411 lua_assert(isdead(g, o) || ow == LJ_GC_SFIXED); 418 lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED,
419 "sweep of unlive object");
412 setgcrefr(*p, o->gch.nextgc); 420 setgcrefr(*p, o->gch.nextgc);
413 if (o == gcref(g->gc.root)) 421 if (o == gcref(g->gc.root))
414 setgcrefr(g->gc.root, o->gch.nextgc); /* Adjust list anchor. */ 422 setgcrefr(g->gc.root, o->gch.nextgc); /* Adjust list anchor. */
@@ -418,6 +426,32 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim)
418 return p; 426 return p;
419} 427}
420 428
429/* Sweep one string interning table chain. Preserves hashalg bit. */
430static void gc_sweepstr(global_State *g, GCRef *chain)
431{
432 /* Mask with other white and LJ_GC_FIXED. Or LJ_GC_SFIXED on shutdown. */
433 int ow = otherwhite(g);
434 uintptr_t u = gcrefu(*chain);
435 GCRef q;
436 GCRef *p = &q;
437 GCobj *o;
438 setgcrefp(q, (u & ~(uintptr_t)1));
439 while ((o = gcref(*p)) != NULL) {
440 if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */
441 lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED),
442 "sweep of undead string");
443 makewhite(g, o); /* String is alive, change to the current white. */
444 p = &o->gch.nextgc;
445 } else { /* Otherwise string is dead, free it. */
446 lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED,
447 "sweep of unlive string");
448 setgcrefr(*p, o->gch.nextgc);
449 lj_str_free(g, gco2str(o));
450 }
451 }
452 setgcrefp(*chain, (gcrefu(q) | (u & 1)));
453}
454
421/* Check whether we can clear a key or a value slot from a table. */ 455/* Check whether we can clear a key or a value slot from a table. */
422static int gc_mayclear(cTValue *o, int val) 456static int gc_mayclear(cTValue *o, int val)
423{ 457{
@@ -435,11 +469,12 @@ static int gc_mayclear(cTValue *o, int val)
435} 469}
436 470
437/* Clear collected entries from weak tables. */ 471/* Clear collected entries from weak tables. */
438static void gc_clearweak(GCobj *o) 472static void gc_clearweak(global_State *g, GCobj *o)
439{ 473{
474 UNUSED(g);
440 while (o) { 475 while (o) {
441 GCtab *t = gco2tab(o); 476 GCtab *t = gco2tab(o);
442 lua_assert((t->marked & LJ_GC_WEAK)); 477 lj_assertG((t->marked & LJ_GC_WEAK), "clear of non-weak table");
443 if ((t->marked & LJ_GC_WEAKVAL)) { 478 if ((t->marked & LJ_GC_WEAKVAL)) {
444 MSize i, asize = t->asize; 479 MSize i, asize = t->asize;
445 for (i = 0; i < asize; i++) { 480 for (i = 0; i < asize; i++) {
@@ -470,21 +505,29 @@ static void gc_call_finalizer(global_State *g, lua_State *L,
470{ 505{
471 /* Save and restore lots of state around the __gc callback. */ 506 /* Save and restore lots of state around the __gc callback. */
472 uint8_t oldh = hook_save(g); 507 uint8_t oldh = hook_save(g);
473 MSize oldt = g->gc.threshold; 508 GCSize oldt = g->gc.threshold;
474 int errcode; 509 int errcode;
475 TValue *top; 510 TValue *top;
476 lj_trace_abort(g); 511 lj_trace_abort(g);
477 top = L->top;
478 L->top = top+2;
479 hook_entergc(g); /* Disable hooks and new traces during __gc. */ 512 hook_entergc(g); /* Disable hooks and new traces during __gc. */
513 if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g);
480 g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */ 514 g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */
481 copyTV(L, top, mo); 515 top = L->top;
482 setgcV(L, top+1, o, ~o->gch.gct); 516 copyTV(L, top++, mo);
483 errcode = lj_vm_pcall(L, top+1, 1+0, -1); /* Stack: |mo|o| -> | */ 517 if (LJ_FR2) setnilV(top++);
518 setgcV(L, top, o, ~o->gch.gct);
519 L->top = top+1;
520 errcode = lj_vm_pcall(L, top, 1+0, -1); /* Stack: |mo|o| -> | */
484 hook_restore(g, oldh); 521 hook_restore(g, oldh);
522 if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g);
485 g->gc.threshold = oldt; /* Restore GC threshold. */ 523 g->gc.threshold = oldt; /* Restore GC threshold. */
486 if (errcode) 524 if (errcode) {
487 lj_err_throw(L, errcode); /* Propagate errors. */ 525 ptrdiff_t errobj = savestack(L, L->top-1); /* Stack may be resized. */
526 lj_vmevent_send(L, ERRFIN,
527 copyTV(L, L->top++, restorestack(L, errobj));
528 );
529 L->top--;
530 }
488} 531}
489 532
490/* Finalize one userdata or cdata object from the mmudata list. */ 533/* Finalize one userdata or cdata object from the mmudata list. */
@@ -493,7 +536,7 @@ static void gc_finalize(lua_State *L)
493 global_State *g = G(L); 536 global_State *g = G(L);
494 GCobj *o = gcnext(gcref(g->gc.mmudata)); 537 GCobj *o = gcnext(gcref(g->gc.mmudata));
495 cTValue *mo; 538 cTValue *mo;
496 lua_assert(gcref(g->jit_L) == NULL); /* Must not be called on trace. */ 539 lj_assertG(tvref(g->jit_base) == NULL, "finalizer called on trace");
497 /* Unchain from list of userdata to be finalized. */ 540 /* Unchain from list of userdata to be finalized. */
498 if (o == gcref(g->gc.mmudata)) 541 if (o == gcref(g->gc.mmudata))
499 setgcrefnull(g->gc.mmudata); 542 setgcrefnull(g->gc.mmudata);
@@ -509,9 +552,8 @@ static void gc_finalize(lua_State *L)
509 o->gch.marked &= (uint8_t)~LJ_GC_CDATA_FIN; 552 o->gch.marked &= (uint8_t)~LJ_GC_CDATA_FIN;
510 /* Resolve finalizer. */ 553 /* Resolve finalizer. */
511 setcdataV(L, &tmp, gco2cd(o)); 554 setcdataV(L, &tmp, gco2cd(o));
512 tv = lj_tab_set(L, ctype_ctsG(g)->finalizer, &tmp); 555 tv = lj_tab_set(L, tabref(g->gcroot[GCROOT_FFI_FIN]), &tmp);
513 if (!tvisnil(tv)) { 556 if (!tvisnil(tv)) {
514 g->gc.nocdatafin = 0;
515 copyTV(L, &tmp, tv); 557 copyTV(L, &tmp, tv);
516 setnilV(tv); /* Clear entry in finalizer table. */ 558 setnilV(tv); /* Clear entry in finalizer table. */
517 gc_call_finalizer(g, L, &tmp, o); 559 gc_call_finalizer(g, L, &tmp, o);
@@ -541,23 +583,20 @@ void lj_gc_finalize_udata(lua_State *L)
541void lj_gc_finalize_cdata(lua_State *L) 583void lj_gc_finalize_cdata(lua_State *L)
542{ 584{
543 global_State *g = G(L); 585 global_State *g = G(L);
544 CTState *cts = ctype_ctsG(g); 586 GCtab *t = tabref(g->gcroot[GCROOT_FFI_FIN]);
545 if (cts) { 587 Node *node = noderef(t->node);
546 GCtab *t = cts->finalizer; 588 ptrdiff_t i;
547 Node *node = noderef(t->node); 589 setgcrefnull(t->metatable); /* Mark finalizer table as disabled. */
548 ptrdiff_t i; 590 for (i = (ptrdiff_t)t->hmask; i >= 0; i--)
549 setgcrefnull(t->metatable); /* Mark finalizer table as disabled. */ 591 if (!tvisnil(&node[i].val) && tviscdata(&node[i].key)) {
550 for (i = (ptrdiff_t)t->hmask; i >= 0; i--) 592 GCobj *o = gcV(&node[i].key);
551 if (!tvisnil(&node[i].val) && tviscdata(&node[i].key)) { 593 TValue tmp;
552 GCobj *o = gcV(&node[i].key); 594 makewhite(g, o);
553 TValue tmp; 595 o->gch.marked &= (uint8_t)~LJ_GC_CDATA_FIN;
554 makewhite(g, o); 596 copyTV(L, &tmp, &node[i].val);
555 o->gch.marked &= (uint8_t)~LJ_GC_CDATA_FIN; 597 setnilV(&node[i].val);
556 copyTV(L, &tmp, &node[i].val); 598 gc_call_finalizer(g, L, &tmp, o);
557 setnilV(&node[i].val); 599 }
558 gc_call_finalizer(g, L, &tmp, o);
559 }
560 }
561} 600}
562#endif 601#endif
563 602
@@ -568,8 +607,8 @@ void lj_gc_freeall(global_State *g)
568 /* Free everything, except super-fixed objects (the main thread). */ 607 /* Free everything, except super-fixed objects (the main thread). */
569 g->gc.currentwhite = LJ_GC_WHITES | LJ_GC_SFIXED; 608 g->gc.currentwhite = LJ_GC_WHITES | LJ_GC_SFIXED;
570 gc_fullsweep(g, &g->gc.root); 609 gc_fullsweep(g, &g->gc.root);
571 for (i = g->strmask; i != ~(MSize)0; i--) /* Free all string hash chains. */ 610 for (i = g->str.mask; i != ~(MSize)0; i--) /* Free all string hash chains. */
572 gc_fullsweep(g, &g->strhash[i]); 611 gc_sweepstr(g, &g->str.tab[i]);
573} 612}
574 613
575/* -- Collector ----------------------------------------------------------- */ 614/* -- Collector ----------------------------------------------------------- */
@@ -584,7 +623,7 @@ static void atomic(global_State *g, lua_State *L)
584 623
585 setgcrefr(g->gc.gray, g->gc.weak); /* Empty the list of weak tables. */ 624 setgcrefr(g->gc.gray, g->gc.weak); /* Empty the list of weak tables. */
586 setgcrefnull(g->gc.weak); 625 setgcrefnull(g->gc.weak);
587 lua_assert(!iswhite(obj2gco(mainthread(g)))); 626 lj_assertG(!iswhite(obj2gco(mainthread(g))), "main thread turned white");
588 gc_markobj(g, L); /* Mark running thread. */ 627 gc_markobj(g, L); /* Mark running thread. */
589 gc_traverse_curtrace(g); /* Traverse current trace. */ 628 gc_traverse_curtrace(g); /* Traverse current trace. */
590 gc_mark_gcroot(g); /* Mark GC roots (again). */ 629 gc_mark_gcroot(g); /* Mark GC roots (again). */
@@ -599,13 +638,15 @@ static void atomic(global_State *g, lua_State *L)
599 udsize += gc_propagate_gray(g); /* And propagate the marks. */ 638 udsize += gc_propagate_gray(g); /* And propagate the marks. */
600 639
601 /* All marking done, clear weak tables. */ 640 /* All marking done, clear weak tables. */
602 gc_clearweak(gcref(g->gc.weak)); 641 gc_clearweak(g, gcref(g->gc.weak));
642
643 lj_buf_shrink(L, &g->tmpbuf); /* Shrink temp buffer. */
603 644
604 /* Prepare for sweep phase. */ 645 /* Prepare for sweep phase. */
605 g->gc.currentwhite = (uint8_t)otherwhite(g); /* Flip current white. */ 646 g->gc.currentwhite = (uint8_t)otherwhite(g); /* Flip current white. */
606 g->strempty.marked = g->gc.currentwhite; 647 g->strempty.marked = g->gc.currentwhite;
607 setmref(g->gc.sweep, &g->gc.root); 648 setmref(g->gc.sweep, &g->gc.root);
608 g->gc.estimate = g->gc.total - (MSize)udsize; /* Initial estimate. */ 649 g->gc.estimate = g->gc.total - (GCSize)udsize; /* Initial estimate. */
609} 650}
610 651
611/* GC state machine. Returns a cost estimate for each step performed. */ 652/* GC state machine. Returns a cost estimate for each step performed. */
@@ -622,33 +663,31 @@ static size_t gc_onestep(lua_State *L)
622 g->gc.state = GCSatomic; /* End of mark phase. */ 663 g->gc.state = GCSatomic; /* End of mark phase. */
623 return 0; 664 return 0;
624 case GCSatomic: 665 case GCSatomic:
625 if (gcref(g->jit_L)) /* Don't run atomic phase on trace. */ 666 if (tvref(g->jit_base)) /* Don't run atomic phase on trace. */
626 return LJ_MAX_MEM; 667 return LJ_MAX_MEM;
627 atomic(g, L); 668 atomic(g, L);
628 g->gc.state = GCSsweepstring; /* Start of sweep phase. */ 669 g->gc.state = GCSsweepstring; /* Start of sweep phase. */
629 g->gc.sweepstr = 0; 670 g->gc.sweepstr = 0;
630 return 0; 671 return 0;
631 case GCSsweepstring: { 672 case GCSsweepstring: {
632 MSize old = g->gc.total; 673 GCSize old = g->gc.total;
633 gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */ 674 gc_sweepstr(g, &g->str.tab[g->gc.sweepstr++]); /* Sweep one chain. */
634 if (g->gc.sweepstr > g->strmask) 675 if (g->gc.sweepstr > g->str.mask)
635 g->gc.state = GCSsweep; /* All string hash chains sweeped. */ 676 g->gc.state = GCSsweep; /* All string hash chains sweeped. */
636 lua_assert(old >= g->gc.total); 677 lj_assertG(old >= g->gc.total, "sweep increased memory");
637 g->gc.estimate -= old - g->gc.total; 678 g->gc.estimate -= old - g->gc.total;
638 return GCSWEEPCOST; 679 return GCSWEEPCOST;
639 } 680 }
640 case GCSsweep: { 681 case GCSsweep: {
641 MSize old = g->gc.total; 682 GCSize old = g->gc.total;
642 setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX)); 683 setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX));
643 lua_assert(old >= g->gc.total); 684 lj_assertG(old >= g->gc.total, "sweep increased memory");
644 g->gc.estimate -= old - g->gc.total; 685 g->gc.estimate -= old - g->gc.total;
645 if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) { 686 if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) {
646 gc_shrink(g, L); 687 if (g->str.num <= (g->str.mask >> 2) && g->str.mask > LJ_MIN_STRTAB*2-1)
688 lj_str_resize(L, g->str.mask >> 1); /* Shrink string table. */
647 if (gcref(g->gc.mmudata)) { /* Need any finalizations? */ 689 if (gcref(g->gc.mmudata)) { /* Need any finalizations? */
648 g->gc.state = GCSfinalize; 690 g->gc.state = GCSfinalize;
649#if LJ_HASFFI
650 g->gc.nocdatafin = 1;
651#endif
652 } else { /* Otherwise skip this phase to help the JIT. */ 691 } else { /* Otherwise skip this phase to help the JIT. */
653 g->gc.state = GCSpause; /* End of GC cycle. */ 692 g->gc.state = GCSpause; /* End of GC cycle. */
654 g->gc.debt = 0; 693 g->gc.debt = 0;
@@ -658,21 +697,21 @@ static size_t gc_onestep(lua_State *L)
658 } 697 }
659 case GCSfinalize: 698 case GCSfinalize:
660 if (gcref(g->gc.mmudata) != NULL) { 699 if (gcref(g->gc.mmudata) != NULL) {
661 if (gcref(g->jit_L)) /* Don't call finalizers on trace. */ 700 GCSize old = g->gc.total;
701 if (tvref(g->jit_base)) /* Don't call finalizers on trace. */
662 return LJ_MAX_MEM; 702 return LJ_MAX_MEM;
663 gc_finalize(L); /* Finalize one userdata object. */ 703 gc_finalize(L); /* Finalize one userdata object. */
704 if (old >= g->gc.total && g->gc.estimate > old - g->gc.total)
705 g->gc.estimate -= old - g->gc.total;
664 if (g->gc.estimate > GCFINALIZECOST) 706 if (g->gc.estimate > GCFINALIZECOST)
665 g->gc.estimate -= GCFINALIZECOST; 707 g->gc.estimate -= GCFINALIZECOST;
666 return GCFINALIZECOST; 708 return GCFINALIZECOST;
667 } 709 }
668#if LJ_HASFFI
669 if (!g->gc.nocdatafin) lj_tab_rehash(L, ctype_ctsG(g)->finalizer);
670#endif
671 g->gc.state = GCSpause; /* End of GC cycle. */ 710 g->gc.state = GCSpause; /* End of GC cycle. */
672 g->gc.debt = 0; 711 g->gc.debt = 0;
673 return 0; 712 return 0;
674 default: 713 default:
675 lua_assert(0); 714 lj_assertG(0, "bad GC state");
676 return 0; 715 return 0;
677 } 716 }
678} 717}
@@ -681,7 +720,7 @@ static size_t gc_onestep(lua_State *L)
681int LJ_FASTCALL lj_gc_step(lua_State *L) 720int LJ_FASTCALL lj_gc_step(lua_State *L)
682{ 721{
683 global_State *g = G(L); 722 global_State *g = G(L);
684 MSize lim; 723 GCSize lim;
685 int32_t ostate = g->vmstate; 724 int32_t ostate = g->vmstate;
686 setvmstate(g, GC); 725 setvmstate(g, GC);
687 lim = (GCSTEPSIZE/100) * g->gc.stepmul; 726 lim = (GCSTEPSIZE/100) * g->gc.stepmul;
@@ -690,13 +729,13 @@ int LJ_FASTCALL lj_gc_step(lua_State *L)
690 if (g->gc.total > g->gc.threshold) 729 if (g->gc.total > g->gc.threshold)
691 g->gc.debt += g->gc.total - g->gc.threshold; 730 g->gc.debt += g->gc.total - g->gc.threshold;
692 do { 731 do {
693 lim -= (MSize)gc_onestep(L); 732 lim -= (GCSize)gc_onestep(L);
694 if (g->gc.state == GCSpause) { 733 if (g->gc.state == GCSpause) {
695 g->gc.threshold = (g->gc.estimate/100) * g->gc.pause; 734 g->gc.threshold = (g->gc.estimate/100) * g->gc.pause;
696 g->vmstate = ostate; 735 g->vmstate = ostate;
697 return 1; /* Finished a GC cycle. */ 736 return 1; /* Finished a GC cycle. */
698 } 737 }
699 } while ((int32_t)lim > 0); 738 } while (sizeof(lim) == 8 ? ((int64_t)lim > 0) : ((int32_t)lim > 0));
700 if (g->gc.debt < GCSTEPSIZE) { 739 if (g->gc.debt < GCSTEPSIZE) {
701 g->gc.threshold = g->gc.total + GCSTEPSIZE; 740 g->gc.threshold = g->gc.total + GCSTEPSIZE;
702 g->vmstate = ostate; 741 g->vmstate = ostate;
@@ -720,8 +759,8 @@ void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L)
720/* Perform multiple GC steps. Called from JIT-compiled code. */ 759/* Perform multiple GC steps. Called from JIT-compiled code. */
721int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps) 760int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps)
722{ 761{
723 lua_State *L = gco2th(gcref(g->jit_L)); 762 lua_State *L = gco2th(gcref(g->cur_L));
724 L->base = mref(G(L)->jit_base, TValue); 763 L->base = tvref(G(L)->jit_base);
725 L->top = curr_topL(L); 764 L->top = curr_topL(L);
726 while (steps-- > 0 && lj_gc_step(L) == 0) 765 while (steps-- > 0 && lj_gc_step(L) == 0)
727 ; 766 ;
@@ -746,7 +785,8 @@ void lj_gc_fullgc(lua_State *L)
746 } 785 }
747 while (g->gc.state == GCSsweepstring || g->gc.state == GCSsweep) 786 while (g->gc.state == GCSsweepstring || g->gc.state == GCSsweep)
748 gc_onestep(L); /* Finish sweep. */ 787 gc_onestep(L); /* Finish sweep. */
749 lua_assert(g->gc.state == GCSfinalize || g->gc.state == GCSpause); 788 lj_assertG(g->gc.state == GCSfinalize || g->gc.state == GCSpause,
789 "bad GC state");
750 /* Now perform a full GC. */ 790 /* Now perform a full GC. */
751 g->gc.state = GCSpause; 791 g->gc.state = GCSpause;
752 do { gc_onestep(L); } while (g->gc.state != GCSpause); 792 do { gc_onestep(L); } while (g->gc.state != GCSpause);
@@ -759,9 +799,11 @@ void lj_gc_fullgc(lua_State *L)
759/* Move the GC propagation frontier forward. */ 799/* Move the GC propagation frontier forward. */
760void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v) 800void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v)
761{ 801{
762 lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o)); 802 lj_assertG(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o),
763 lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); 803 "bad object states for forward barrier");
764 lua_assert(o->gch.gct != ~LJ_TTAB); 804 lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause,
805 "bad GC state");
806 lj_assertG(o->gch.gct != ~LJ_TTAB, "barrier object is not a table");
765 /* Preserve invariant during propagation. Otherwise it doesn't matter. */ 807 /* Preserve invariant during propagation. Otherwise it doesn't matter. */
766 if (g->gc.state == GCSpropagate || g->gc.state == GCSatomic) 808 if (g->gc.state == GCSpropagate || g->gc.state == GCSatomic)
767 gc_mark(g, v); /* Move frontier forward. */ 809 gc_mark(g, v); /* Move frontier forward. */
@@ -798,7 +840,8 @@ void lj_gc_closeuv(global_State *g, GCupval *uv)
798 lj_gc_barrierf(g, o, gcV(&uv->tv)); 840 lj_gc_barrierf(g, o, gcV(&uv->tv));
799 } else { 841 } else {
800 makewhite(g, o); /* Make it white, i.e. sweep the upvalue. */ 842 makewhite(g, o); /* Make it white, i.e. sweep the upvalue. */
801 lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); 843 lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause,
844 "bad GC state");
802 } 845 }
803 } 846 }
804} 847}
@@ -815,27 +858,29 @@ void lj_gc_barriertrace(global_State *g, uint32_t traceno)
815/* -- Allocator ----------------------------------------------------------- */ 858/* -- Allocator ----------------------------------------------------------- */
816 859
817/* Call pluggable memory allocator to allocate or resize a fragment. */ 860/* Call pluggable memory allocator to allocate or resize a fragment. */
818void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz) 861void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz)
819{ 862{
820 global_State *g = G(L); 863 global_State *g = G(L);
821 lua_assert((osz == 0) == (p == NULL)); 864 lj_assertG((osz == 0) == (p == NULL), "realloc API violation");
822 p = g->allocf(g->allocd, p, osz, nsz); 865 p = g->allocf(g->allocd, p, osz, nsz);
823 if (p == NULL && nsz > 0) 866 if (p == NULL && nsz > 0)
824 lj_err_mem(L); 867 lj_err_mem(L);
825 lua_assert((nsz == 0) == (p == NULL)); 868 lj_assertG((nsz == 0) == (p == NULL), "allocf API violation");
826 lua_assert(checkptr32(p)); 869 lj_assertG(checkptrGC(p),
870 "allocated memory address %p outside required range", p);
827 g->gc.total = (g->gc.total - osz) + nsz; 871 g->gc.total = (g->gc.total - osz) + nsz;
828 return p; 872 return p;
829} 873}
830 874
831/* Allocate new GC object and link it to the root set. */ 875/* Allocate new GC object and link it to the root set. */
832void * LJ_FASTCALL lj_mem_newgco(lua_State *L, MSize size) 876void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size)
833{ 877{
834 global_State *g = G(L); 878 global_State *g = G(L);
835 GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size); 879 GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size);
836 if (o == NULL) 880 if (o == NULL)
837 lj_err_mem(L); 881 lj_err_mem(L);
838 lua_assert(checkptr32(o)); 882 lj_assertG(checkptrGC(o),
883 "allocated memory address %p outside required range", o);
839 g->gc.total += size; 884 g->gc.total += size;
840 setgcrefr(o->gch.nextgc, g->gc.root); 885 setgcrefr(o->gch.nextgc, g->gc.root);
841 setgcref(g->gc.root, o); 886 setgcref(g->gc.root, o);
diff --git a/src/lj_gc.h b/src/lj_gc.h
index 8c8ed336..3ed790ca 100644
--- a/src/lj_gc.h
+++ b/src/lj_gc.h
@@ -81,8 +81,10 @@ LJ_FUNC void lj_gc_barriertrace(global_State *g, uint32_t traceno);
81static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t) 81static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t)
82{ 82{
83 GCobj *o = obj2gco(t); 83 GCobj *o = obj2gco(t);
84 lua_assert(isblack(o) && !isdead(g, o)); 84 lj_assertG(isblack(o) && !isdead(g, o),
85 lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); 85 "bad object states for backward barrier");
86 lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause,
87 "bad GC state");
86 black2gray(o); 88 black2gray(o);
87 setgcrefr(t->gclist, g->gc.grayagain); 89 setgcrefr(t->gclist, g->gc.grayagain);
88 setgcref(g->gc.grayagain, o); 90 setgcref(g->gc.grayagain, o);
@@ -107,8 +109,8 @@ static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t)
107 lj_gc_barrierf(G(L), obj2gco(p), obj2gco(o)); } 109 lj_gc_barrierf(G(L), obj2gco(p), obj2gco(o)); }
108 110
109/* Allocator. */ 111/* Allocator. */
110LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz); 112LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz);
111LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, MSize size); 113LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size);
112LJ_FUNC void *lj_mem_grow(lua_State *L, void *p, 114LJ_FUNC void *lj_mem_grow(lua_State *L, void *p,
113 MSize *szp, MSize lim, MSize esz); 115 MSize *szp, MSize lim, MSize esz);
114 116
@@ -116,13 +118,13 @@ LJ_FUNC void *lj_mem_grow(lua_State *L, void *p,
116 118
117static LJ_AINLINE void lj_mem_free(global_State *g, void *p, size_t osize) 119static LJ_AINLINE void lj_mem_free(global_State *g, void *p, size_t osize)
118{ 120{
119 g->gc.total -= (MSize)osize; 121 g->gc.total -= (GCSize)osize;
120 g->allocf(g->allocd, p, osize, 0); 122 g->allocf(g->allocd, p, osize, 0);
121} 123}
122 124
123#define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (MSize)((n)*sizeof(t)))) 125#define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (GCSize)((n)*sizeof(t))))
124#define lj_mem_reallocvec(L, p, on, n, t) \ 126#define lj_mem_reallocvec(L, p, on, n, t) \
125 ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (MSize)((n)*sizeof(t)))) 127 ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (GCSize)((n)*sizeof(t))))
126#define lj_mem_growvec(L, p, n, m, t) \ 128#define lj_mem_growvec(L, p, n, m, t) \
127 ((p) = (t *)lj_mem_grow(L, (p), &(n), (m), (MSize)sizeof(t))) 129 ((p) = (t *)lj_mem_grow(L, (p), &(n), (m), (MSize)sizeof(t)))
128#define lj_mem_freevec(g, p, n, t) lj_mem_free(g, (p), (n)*sizeof(t)) 130#define lj_mem_freevec(g, p, n, t) lj_mem_free(g, (p), (n)*sizeof(t))
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c
index c0d7a164..56094cf1 100644
--- a/src/lj_gdbjit.c
+++ b/src/lj_gdbjit.c
@@ -14,6 +14,8 @@
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_debug.h" 15#include "lj_debug.h"
16#include "lj_frame.h" 16#include "lj_frame.h"
17#include "lj_buf.h"
18#include "lj_strfmt.h"
17#include "lj_jit.h" 19#include "lj_jit.h"
18#include "lj_dispatch.h" 20#include "lj_dispatch.h"
19 21
@@ -294,6 +296,9 @@ enum {
294#elif LJ_TARGET_ARM 296#elif LJ_TARGET_ARM
295 DW_REG_SP = 13, 297 DW_REG_SP = 13,
296 DW_REG_RA = 14, 298 DW_REG_RA = 14,
299#elif LJ_TARGET_ARM64
300 DW_REG_SP = 31,
301 DW_REG_RA = 30,
297#elif LJ_TARGET_PPC 302#elif LJ_TARGET_PPC
298 DW_REG_SP = 1, 303 DW_REG_SP = 1,
299 DW_REG_RA = 65, 304 DW_REG_RA = 65,
@@ -358,7 +363,7 @@ static const ELFheader elfhdr_template = {
358 .eosabi = 12, 363 .eosabi = 12,
359#elif defined(__DragonFly__) 364#elif defined(__DragonFly__)
360 .eosabi = 0, 365 .eosabi = 0,
361#elif (defined(__sun__) && defined(__svr4__)) 366#elif LJ_TARGET_SOLARIS
362 .eosabi = 6, 367 .eosabi = 6,
363#else 368#else
364 .eosabi = 0, 369 .eosabi = 0,
@@ -372,6 +377,8 @@ static const ELFheader elfhdr_template = {
372 .machine = 62, 377 .machine = 62,
373#elif LJ_TARGET_ARM 378#elif LJ_TARGET_ARM
374 .machine = 40, 379 .machine = 40,
380#elif LJ_TARGET_ARM64
381 .machine = 183,
375#elif LJ_TARGET_PPC 382#elif LJ_TARGET_PPC
376 .machine = 20, 383 .machine = 20,
377#elif LJ_TARGET_MIPS 384#elif LJ_TARGET_MIPS
@@ -428,16 +435,6 @@ static void gdbjit_catnum(GDBJITctx *ctx, uint32_t n)
428 *ctx->p++ = '0' + n; 435 *ctx->p++ = '0' + n;
429} 436}
430 437
431/* Add a ULEB128 value. */
432static void gdbjit_uleb128(GDBJITctx *ctx, uint32_t v)
433{
434 uint8_t *p = ctx->p;
435 for (; v >= 0x80; v >>= 7)
436 *p++ = (uint8_t)((v & 0x7f) | 0x80);
437 *p++ = (uint8_t)v;
438 ctx->p = p;
439}
440
441/* Add a SLEB128 value. */ 438/* Add a SLEB128 value. */
442static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v) 439static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v)
443{ 440{
@@ -454,7 +451,7 @@ static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v)
454#define DU16(x) (*(uint16_t *)p = (x), p += 2) 451#define DU16(x) (*(uint16_t *)p = (x), p += 2)
455#define DU32(x) (*(uint32_t *)p = (x), p += 4) 452#define DU32(x) (*(uint32_t *)p = (x), p += 4)
456#define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t)) 453#define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t))
457#define DUV(x) (ctx->p = p, gdbjit_uleb128(ctx, (x)), p = ctx->p) 454#define DUV(x) (p = (uint8_t *)lj_strfmt_wuleb128((char *)p, (x)))
458#define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p) 455#define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p)
459#define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p) 456#define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p)
460#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop 457#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop
@@ -564,13 +561,20 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx)
564 DB(DW_CFA_offset|DW_REG_15); DUV(4); 561 DB(DW_CFA_offset|DW_REG_15); DUV(4);
565 DB(DW_CFA_offset|DW_REG_14); DUV(5); 562 DB(DW_CFA_offset|DW_REG_14); DUV(5);
566 /* Extra registers saved for JIT-compiled code. */ 563 /* Extra registers saved for JIT-compiled code. */
567 DB(DW_CFA_offset|DW_REG_13); DUV(9); 564 DB(DW_CFA_offset|DW_REG_13); DUV(LJ_GC64 ? 10 : 9);
568 DB(DW_CFA_offset|DW_REG_12); DUV(10); 565 DB(DW_CFA_offset|DW_REG_12); DUV(LJ_GC64 ? 11 : 10);
569#elif LJ_TARGET_ARM 566#elif LJ_TARGET_ARM
570 { 567 {
571 int i; 568 int i;
572 for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); } 569 for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); }
573 } 570 }
571#elif LJ_TARGET_ARM64
572 {
573 int i;
574 DB(DW_CFA_offset|31); DUV(2);
575 for (i = 28; i >= 19; i--) { DB(DW_CFA_offset|i); DUV(3+(28-i)); }
576 for (i = 15; i >= 8; i--) { DB(DW_CFA_offset|32|i); DUV(28-i); }
577 }
574#elif LJ_TARGET_PPC 578#elif LJ_TARGET_PPC
575 { 579 {
576 int i; 580 int i;
@@ -720,13 +724,27 @@ static void gdbjit_buildobj(GDBJITctx *ctx)
720 SECTALIGN(ctx->p, sizeof(uintptr_t)); 724 SECTALIGN(ctx->p, sizeof(uintptr_t));
721 gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame, gdbjit_ehframe); 725 gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame, gdbjit_ehframe);
722 ctx->objsize = (size_t)((char *)ctx->p - (char *)obj); 726 ctx->objsize = (size_t)((char *)ctx->p - (char *)obj);
723 lua_assert(ctx->objsize < sizeof(GDBJITobj)); 727 lj_assertX(ctx->objsize < sizeof(GDBJITobj), "GDBJITobj overflow");
724} 728}
725 729
726#undef SECTALIGN 730#undef SECTALIGN
727 731
728/* -- Interface to GDB JIT API -------------------------------------------- */ 732/* -- Interface to GDB JIT API -------------------------------------------- */
729 733
734static int gdbjit_lock;
735
736static void gdbjit_lock_acquire()
737{
738 while (__sync_lock_test_and_set(&gdbjit_lock, 1)) {
739 /* Just spin; futexes or pthreads aren't worth the portability cost. */
740 }
741}
742
743static void gdbjit_lock_release()
744{
745 __sync_lock_release(&gdbjit_lock);
746}
747
730/* Add new entry to GDB JIT symbol chain. */ 748/* Add new entry to GDB JIT symbol chain. */
731static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) 749static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
732{ 750{
@@ -738,6 +756,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
738 ctx->T->gdbjit_entry = (void *)eo; 756 ctx->T->gdbjit_entry = (void *)eo;
739 /* Link new entry to chain and register it. */ 757 /* Link new entry to chain and register it. */
740 eo->entry.prev_entry = NULL; 758 eo->entry.prev_entry = NULL;
759 gdbjit_lock_acquire();
741 eo->entry.next_entry = __jit_debug_descriptor.first_entry; 760 eo->entry.next_entry = __jit_debug_descriptor.first_entry;
742 if (eo->entry.next_entry) 761 if (eo->entry.next_entry)
743 eo->entry.next_entry->prev_entry = &eo->entry; 762 eo->entry.next_entry->prev_entry = &eo->entry;
@@ -747,6 +766,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
747 __jit_debug_descriptor.relevant_entry = &eo->entry; 766 __jit_debug_descriptor.relevant_entry = &eo->entry;
748 __jit_debug_descriptor.action_flag = GDBJIT_REGISTER; 767 __jit_debug_descriptor.action_flag = GDBJIT_REGISTER;
749 __jit_debug_register_code(); 768 __jit_debug_register_code();
769 gdbjit_lock_release();
750} 770}
751 771
752/* Add debug info for newly compiled trace and notify GDB. */ 772/* Add debug info for newly compiled trace and notify GDB. */
@@ -762,7 +782,8 @@ void lj_gdbjit_addtrace(jit_State *J, GCtrace *T)
762 ctx.spadjp = CFRAME_SIZE_JIT + 782 ctx.spadjp = CFRAME_SIZE_JIT +
763 (MSize)(parent ? traceref(J, parent)->spadjust : 0); 783 (MSize)(parent ? traceref(J, parent)->spadjust : 0);
764 ctx.spadj = CFRAME_SIZE_JIT + T->spadjust; 784 ctx.spadj = CFRAME_SIZE_JIT + T->spadjust;
765 lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc); 785 lj_assertJ(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc,
786 "start PC out of range");
766 ctx.lineno = lj_debug_line(pt, proto_bcpos(pt, startpc)); 787 ctx.lineno = lj_debug_line(pt, proto_bcpos(pt, startpc));
767 ctx.filename = proto_chunknamestr(pt); 788 ctx.filename = proto_chunknamestr(pt);
768 if (*ctx.filename == '@' || *ctx.filename == '=') 789 if (*ctx.filename == '@' || *ctx.filename == '=')
@@ -778,6 +799,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T)
778{ 799{
779 GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry; 800 GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry;
780 if (eo) { 801 if (eo) {
802 gdbjit_lock_acquire();
781 if (eo->entry.prev_entry) 803 if (eo->entry.prev_entry)
782 eo->entry.prev_entry->next_entry = eo->entry.next_entry; 804 eo->entry.prev_entry->next_entry = eo->entry.next_entry;
783 else 805 else
@@ -787,6 +809,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T)
787 __jit_debug_descriptor.relevant_entry = &eo->entry; 809 __jit_debug_descriptor.relevant_entry = &eo->entry;
788 __jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER; 810 __jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER;
789 __jit_debug_register_code(); 811 __jit_debug_register_code();
812 gdbjit_lock_release();
790 lj_mem_free(J2G(J), eo, eo->sz); 813 lj_mem_free(J2G(J), eo, eo->sz);
791 } 814 }
792} 815}
diff --git a/src/lj_ir.c b/src/lj_ir.c
index cad40292..631b7cbc 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -15,6 +15,7 @@
15#if LJ_HASJIT 15#if LJ_HASJIT
16 16
17#include "lj_gc.h" 17#include "lj_gc.h"
18#include "lj_buf.h"
18#include "lj_str.h" 19#include "lj_str.h"
19#include "lj_tab.h" 20#include "lj_tab.h"
20#include "lj_ir.h" 21#include "lj_ir.h"
@@ -29,14 +30,16 @@
29#endif 30#endif
30#include "lj_vm.h" 31#include "lj_vm.h"
31#include "lj_strscan.h" 32#include "lj_strscan.h"
32#include "lj_lib.h" 33#include "lj_serialize.h"
34#include "lj_strfmt.h"
35#include "lj_prng.h"
33 36
34/* Some local macros to save typing. Undef'd at the end. */ 37/* Some local macros to save typing. Undef'd at the end. */
35#define IR(ref) (&J->cur.ir[(ref)]) 38#define IR(ref) (&J->cur.ir[(ref)])
36#define fins (&J->fold.ins) 39#define fins (&J->fold.ins)
37 40
38/* Pass IR on to next optimization in chain (FOLD). */ 41/* Pass IR on to next optimization in chain (FOLD). */
39#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) 42#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
40 43
41/* -- IR tables ----------------------------------------------------------- */ 44/* -- IR tables ----------------------------------------------------------- */
42 45
@@ -88,8 +91,9 @@ static void lj_ir_growbot(jit_State *J)
88{ 91{
89 IRIns *baseir = J->irbuf + J->irbotlim; 92 IRIns *baseir = J->irbuf + J->irbotlim;
90 MSize szins = J->irtoplim - J->irbotlim; 93 MSize szins = J->irtoplim - J->irbotlim;
91 lua_assert(szins != 0); 94 lj_assertJ(szins != 0, "zero IR size");
92 lua_assert(J->cur.nk == J->irbotlim); 95 lj_assertJ(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim,
96 "unexpected IR growth");
93 if (J->cur.nins + (szins >> 1) < J->irtoplim) { 97 if (J->cur.nins + (szins >> 1) < J->irtoplim) {
94 /* More than half of the buffer is free on top: shift up by a quarter. */ 98 /* More than half of the buffer is free on top: shift up by a quarter. */
95 MSize ofs = szins >> 2; 99 MSize ofs = szins >> 2;
@@ -143,6 +147,17 @@ TRef lj_ir_call(jit_State *J, IRCallID id, ...)
143 return emitir(CCI_OPTYPE(ci), tr, id); 147 return emitir(CCI_OPTYPE(ci), tr, id);
144} 148}
145 149
150/* Load field of type t from GG_State + offset. Must be 32 bit aligned. */
151TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs)
152{
153 lj_assertJ((ofs & 3) == 0, "unaligned GG_State field offset");
154 ofs >>= 2;
155 lj_assertJ(ofs >= IRFL__MAX && ofs <= 0x3ff,
156 "GG_State field offset breaks 10 bit FOLD key limit");
157 lj_ir_set(J, IRT(IR_FLOAD, t), REF_NIL, ofs);
158 return lj_opt_fold(J);
159}
160
146/* -- Interning of constants ---------------------------------------------- */ 161/* -- Interning of constants ---------------------------------------------- */
147 162
148/* 163/*
@@ -163,6 +178,24 @@ static LJ_AINLINE IRRef ir_nextk(jit_State *J)
163 return ref; 178 return ref;
164} 179}
165 180
181/* Get ref of next 64 bit IR constant and optionally grow IR.
182** Note: this may invalidate all IRIns *!
183*/
184static LJ_AINLINE IRRef ir_nextk64(jit_State *J)
185{
186 IRRef ref = J->cur.nk - 2;
187 lj_assertJ(J->state != LJ_TRACE_ASM, "bad JIT state");
188 if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J);
189 J->cur.nk = ref;
190 return ref;
191}
192
193#if LJ_GC64
194#define ir_nextkgc ir_nextk64
195#else
196#define ir_nextkgc ir_nextk
197#endif
198
166/* Intern int32_t constant. */ 199/* Intern int32_t constant. */
167TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k) 200TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k)
168{ 201{
@@ -182,79 +215,21 @@ found:
182 return TREF(ref, IRT_INT); 215 return TREF(ref, IRT_INT);
183} 216}
184 217
185/* The MRef inside the KNUM/KINT64 IR instructions holds the address of the 218/* Intern 64 bit constant, given by its 64 bit pattern. */
186** 64 bit constant. The constants themselves are stored in a chained array 219TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64)
187** and shared across traces.
188**
189** Rationale for choosing this data structure:
190** - The address of the constants is embedded in the generated machine code
191** and must never move. A resizable array or hash table wouldn't work.
192** - Most apps need very few non-32 bit integer constants (less than a dozen).
193** - Linear search is hard to beat in terms of speed and low complexity.
194*/
195typedef struct K64Array {
196 MRef next; /* Pointer to next list. */
197 MSize numk; /* Number of used elements in this array. */
198 TValue k[LJ_MIN_K64SZ]; /* Array of constants. */
199} K64Array;
200
201/* Free all chained arrays. */
202void lj_ir_k64_freeall(jit_State *J)
203{
204 K64Array *k;
205 for (k = mref(J->k64, K64Array); k; ) {
206 K64Array *next = mref(k->next, K64Array);
207 lj_mem_free(J2G(J), k, sizeof(K64Array));
208 k = next;
209 }
210}
211
212/* Find 64 bit constant in chained array or add it. */
213cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64)
214{
215 K64Array *k, *kp = NULL;
216 TValue *ntv;
217 MSize idx;
218 /* Search for the constant in the whole chain of arrays. */
219 for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) {
220 kp = k; /* Remember previous element in list. */
221 for (idx = 0; idx < k->numk; idx++) { /* Search one array. */
222 TValue *tv = &k->k[idx];
223 if (tv->u64 == u64) /* Needed for +-0/NaN/absmask. */
224 return tv;
225 }
226 }
227 /* Constant was not found, need to add it. */
228 if (!(kp && kp->numk < LJ_MIN_K64SZ)) { /* Allocate a new array. */
229 K64Array *kn = lj_mem_newt(J->L, sizeof(K64Array), K64Array);
230 setmref(kn->next, NULL);
231 kn->numk = 0;
232 if (kp)
233 setmref(kp->next, kn); /* Chain to the end of the list. */
234 else
235 setmref(J->k64, kn); /* Link first array. */
236 kp = kn;
237 }
238 ntv = &kp->k[kp->numk++]; /* Add to current array. */
239 ntv->u64 = u64;
240 return ntv;
241}
242
243/* Intern 64 bit constant, given by its address. */
244TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv)
245{ 220{
246 IRIns *ir, *cir = J->cur.ir; 221 IRIns *ir, *cir = J->cur.ir;
247 IRRef ref; 222 IRRef ref;
248 IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64; 223 IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64;
249 for (ref = J->chain[op]; ref; ref = cir[ref].prev) 224 for (ref = J->chain[op]; ref; ref = cir[ref].prev)
250 if (ir_k64(&cir[ref]) == tv) 225 if (ir_k64(&cir[ref])->u64 == u64)
251 goto found; 226 goto found;
252 ref = ir_nextk(J); 227 ref = ir_nextk64(J);
253 ir = IR(ref); 228 ir = IR(ref);
254 lua_assert(checkptr32(tv)); 229 ir[1].tv.u64 = u64;
255 setmref(ir->ptr, tv);
256 ir->t.irt = t; 230 ir->t.irt = t;
257 ir->o = op; 231 ir->o = op;
232 ir->op12 = 0;
258 ir->prev = J->chain[op]; 233 ir->prev = J->chain[op];
259 J->chain[op] = (IRRef1)ref; 234 J->chain[op] = (IRRef1)ref;
260found: 235found:
@@ -264,13 +239,13 @@ found:
264/* Intern FP constant, given by its 64 bit pattern. */ 239/* Intern FP constant, given by its 64 bit pattern. */
265TRef lj_ir_knum_u64(jit_State *J, uint64_t u64) 240TRef lj_ir_knum_u64(jit_State *J, uint64_t u64)
266{ 241{
267 return lj_ir_k64(J, IR_KNUM, lj_ir_k64_find(J, u64)); 242 return lj_ir_k64(J, IR_KNUM, u64);
268} 243}
269 244
270/* Intern 64 bit integer constant. */ 245/* Intern 64 bit integer constant. */
271TRef lj_ir_kint64(jit_State *J, uint64_t u64) 246TRef lj_ir_kint64(jit_State *J, uint64_t u64)
272{ 247{
273 return lj_ir_k64(J, IR_KINT64, lj_ir_k64_find(J, u64)); 248 return lj_ir_k64(J, IR_KINT64, u64);
274} 249}
275 250
276/* Check whether a number is int and return it. -0 is NOT considered an int. */ 251/* Check whether a number is int and return it. -0 is NOT considered an int. */
@@ -305,14 +280,15 @@ TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t)
305{ 280{
306 IRIns *ir, *cir = J->cur.ir; 281 IRIns *ir, *cir = J->cur.ir;
307 IRRef ref; 282 IRRef ref;
308 lua_assert(!isdead(J2G(J), o)); 283 lj_assertJ(!isdead(J2G(J), o), "interning of dead GC object");
309 for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev) 284 for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev)
310 if (ir_kgc(&cir[ref]) == o) 285 if (ir_kgc(&cir[ref]) == o)
311 goto found; 286 goto found;
312 ref = ir_nextk(J); 287 ref = ir_nextkgc(J);
313 ir = IR(ref); 288 ir = IR(ref);
314 /* NOBARRIER: Current trace is a GC root. */ 289 /* NOBARRIER: Current trace is a GC root. */
315 setgcref(ir->gcr, o); 290 ir->op12 = 0;
291 setgcref(ir[LJ_GC64].gcr, o);
316 ir->t.irt = (uint8_t)t; 292 ir->t.irt = (uint8_t)t;
317 ir->o = IR_KGC; 293 ir->o = IR_KGC;
318 ir->prev = J->chain[IR_KGC]; 294 ir->prev = J->chain[IR_KGC];
@@ -321,24 +297,44 @@ found:
321 return TREF(ref, t); 297 return TREF(ref, t);
322} 298}
323 299
324/* Intern 32 bit pointer constant. */ 300/* Allocate GCtrace constant placeholder (no interning). */
301TRef lj_ir_ktrace(jit_State *J)
302{
303 IRRef ref = ir_nextkgc(J);
304 IRIns *ir = IR(ref);
305 lj_assertJ(irt_toitype_(IRT_P64) == LJ_TTRACE, "mismatched type mapping");
306 ir->t.irt = IRT_P64;
307 ir->o = LJ_GC64 ? IR_KNUM : IR_KNULL; /* Not IR_KGC yet, but same size. */
308 ir->op12 = 0;
309 ir->prev = 0;
310 return TREF(ref, IRT_P64);
311}
312
313/* Intern pointer constant. */
325TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr) 314TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr)
326{ 315{
327 IRIns *ir, *cir = J->cur.ir; 316 IRIns *ir, *cir = J->cur.ir;
328 IRRef ref; 317 IRRef ref;
329 lua_assert((void *)(intptr_t)i32ptr(ptr) == ptr); 318#if LJ_64 && !LJ_GC64
319 lj_assertJ((void *)(uintptr_t)u32ptr(ptr) == ptr, "out-of-range GC pointer");
320#endif
330 for (ref = J->chain[op]; ref; ref = cir[ref].prev) 321 for (ref = J->chain[op]; ref; ref = cir[ref].prev)
331 if (mref(cir[ref].ptr, void) == ptr) 322 if (ir_kptr(&cir[ref]) == ptr)
332 goto found; 323 goto found;
324#if LJ_GC64
325 ref = ir_nextk64(J);
326#else
333 ref = ir_nextk(J); 327 ref = ir_nextk(J);
328#endif
334 ir = IR(ref); 329 ir = IR(ref);
335 setmref(ir->ptr, ptr); 330 ir->op12 = 0;
336 ir->t.irt = IRT_P32; 331 setmref(ir[LJ_GC64].ptr, ptr);
332 ir->t.irt = IRT_PGC;
337 ir->o = op; 333 ir->o = op;
338 ir->prev = J->chain[op]; 334 ir->prev = J->chain[op];
339 J->chain[op] = (IRRef1)ref; 335 J->chain[op] = (IRRef1)ref;
340found: 336found:
341 return TREF(ref, IRT_P32); 337 return TREF(ref, IRT_PGC);
342} 338}
343 339
344/* Intern typed NULL constant. */ 340/* Intern typed NULL constant. */
@@ -367,7 +363,8 @@ TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot)
367 IRRef2 op12 = IRREF2((IRRef1)key, (IRRef1)slot); 363 IRRef2 op12 = IRREF2((IRRef1)key, (IRRef1)slot);
368 IRRef ref; 364 IRRef ref;
369 /* Const part is not touched by CSE/DCE, so 0-65535 is ok for IRMlit here. */ 365 /* Const part is not touched by CSE/DCE, so 0-65535 is ok for IRMlit here. */
370 lua_assert(tref_isk(key) && slot == (IRRef)(IRRef1)slot); 366 lj_assertJ(tref_isk(key) && slot == (IRRef)(IRRef1)slot,
367 "out-of-range key/slot");
371 for (ref = J->chain[IR_KSLOT]; ref; ref = cir[ref].prev) 368 for (ref = J->chain[IR_KSLOT]; ref; ref = cir[ref].prev)
372 if (cir[ref].op12 == op12) 369 if (cir[ref].op12 == op12)
373 goto found; 370 goto found;
@@ -388,14 +385,15 @@ found:
388void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) 385void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir)
389{ 386{
390 UNUSED(L); 387 UNUSED(L);
391 lua_assert(ir->o != IR_KSLOT); /* Common mistake. */ 388 lj_assertL(ir->o != IR_KSLOT, "unexpected KSLOT"); /* Common mistake. */
392 switch (ir->o) { 389 switch (ir->o) {
393 case IR_KPRI: setitype(tv, irt_toitype(ir->t)); break; 390 case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break;
394 case IR_KINT: setintV(tv, ir->i); break; 391 case IR_KINT: setintV(tv, ir->i); break;
395 case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break; 392 case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break;
396 case IR_KPTR: case IR_KKPTR: case IR_KNULL: 393 case IR_KPTR: case IR_KKPTR:
397 setlightudV(tv, mref(ir->ptr, void)); 394 setnumV(tv, (lua_Number)(uintptr_t)ir_kptr(ir));
398 break; 395 break;
396 case IR_KNULL: setintV(tv, 0); break;
399 case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break; 397 case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break;
400#if LJ_HASFFI 398#if LJ_HASFFI
401 case IR_KINT64: { 399 case IR_KINT64: {
@@ -405,7 +403,7 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir)
405 break; 403 break;
406 } 404 }
407#endif 405#endif
408 default: lua_assert(0); break; 406 default: lj_assertL(0, "bad IR constant op %d", ir->o); break;
409 } 407 }
410} 408}
411 409
@@ -443,7 +441,8 @@ TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr)
443 if (!tref_isstr(tr)) { 441 if (!tref_isstr(tr)) {
444 if (!tref_isnumber(tr)) 442 if (!tref_isnumber(tr))
445 lj_trace_err(J, LJ_TRERR_BADTYPE); 443 lj_trace_err(J, LJ_TRERR_BADTYPE);
446 tr = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); 444 tr = emitir(IRT(IR_TOSTR, IRT_STR), tr,
445 tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT);
447 } 446 }
448 return tr; 447 return tr;
449} 448}
@@ -464,7 +463,7 @@ int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op)
464 case IR_UGE: return !(a < b); 463 case IR_UGE: return !(a < b);
465 case IR_ULE: return !(a > b); 464 case IR_ULE: return !(a > b);
466 case IR_UGT: return !(a <= b); 465 case IR_UGT: return !(a <= b);
467 default: lua_assert(0); return 0; 466 default: lj_assertX(0, "bad IR op %d", op); return 0;
468 } 467 }
469} 468}
470 469
@@ -477,7 +476,7 @@ int lj_ir_strcmp(GCstr *a, GCstr *b, IROp op)
477 case IR_GE: return (res >= 0); 476 case IR_GE: return (res >= 0);
478 case IR_LE: return (res <= 0); 477 case IR_LE: return (res <= 0);
479 case IR_GT: return (res > 0); 478 case IR_GT: return (res > 0);
480 default: lua_assert(0); return 0; 479 default: lj_assertX(0, "bad IR op %d", op); return 0;
481 } 480 }
482} 481}
483 482
diff --git a/src/lj_ir.h b/src/lj_ir.h
index 6d974ed2..cc73a849 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -40,6 +40,7 @@
40 _(USE, S , ref, ___) \ 40 _(USE, S , ref, ___) \
41 _(PHI, S , ref, ref) \ 41 _(PHI, S , ref, ref) \
42 _(RENAME, S , ref, lit) \ 42 _(RENAME, S , ref, lit) \
43 _(PROF, S , ___, ___) \
43 \ 44 \
44 /* Constants. */ \ 45 /* Constants. */ \
45 _(KPRI, N , ___, ___) \ 46 _(KPRI, N , ___, ___) \
@@ -74,10 +75,9 @@
74 _(NEG, N , ref, ref) \ 75 _(NEG, N , ref, ref) \
75 \ 76 \
76 _(ABS, N , ref, ref) \ 77 _(ABS, N , ref, ref) \
77 _(ATAN2, N , ref, ref) \
78 _(LDEXP, N , ref, ref) \ 78 _(LDEXP, N , ref, ref) \
79 _(MIN, C , ref, ref) \ 79 _(MIN, N , ref, ref) \
80 _(MAX, C , ref, ref) \ 80 _(MAX, N , ref, ref) \
81 _(FPMATH, N , ref, lit) \ 81 _(FPMATH, N , ref, lit) \
82 \ 82 \
83 /* Overflow-checking arithmetic ops. */ \ 83 /* Overflow-checking arithmetic ops. */ \
@@ -95,7 +95,9 @@
95 _(UREFO, LW, ref, lit) \ 95 _(UREFO, LW, ref, lit) \
96 _(UREFC, LW, ref, lit) \ 96 _(UREFC, LW, ref, lit) \
97 _(FREF, R , ref, lit) \ 97 _(FREF, R , ref, lit) \
98 _(TMPREF, S , ref, lit) \
98 _(STRREF, N , ref, ref) \ 99 _(STRREF, N , ref, ref) \
100 _(LREF, L , ___, ___) \
99 \ 101 \
100 /* Loads and Stores. These must be in the same order. */ \ 102 /* Loads and Stores. These must be in the same order. */ \
101 _(ALOAD, L , ref, ___) \ 103 _(ALOAD, L , ref, ___) \
@@ -104,7 +106,8 @@
104 _(FLOAD, L , ref, lit) \ 106 _(FLOAD, L , ref, lit) \
105 _(XLOAD, L , ref, lit) \ 107 _(XLOAD, L , ref, lit) \
106 _(SLOAD, L , lit, lit) \ 108 _(SLOAD, L , lit, lit) \
107 _(VLOAD, L , ref, ___) \ 109 _(VLOAD, L , ref, lit) \
110 _(ALEN, L , ref, ref) \
108 \ 111 \
109 _(ASTORE, S , ref, ref) \ 112 _(ASTORE, S , ref, ref) \
110 _(HSTORE, S , ref, ref) \ 113 _(HSTORE, S , ref, ref) \
@@ -120,6 +123,11 @@
120 _(CNEW, AW, ref, ref) \ 123 _(CNEW, AW, ref, ref) \
121 _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \ 124 _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \
122 \ 125 \
126 /* Buffer operations. */ \
127 _(BUFHDR, L , ref, lit) \
128 _(BUFPUT, LW, ref, ref) \
129 _(BUFSTR, AW, ref, ref) \
130 \
123 /* Barriers. */ \ 131 /* Barriers. */ \
124 _(TBAR, S , ref, ___) \ 132 _(TBAR, S , ref, ___) \
125 _(OBAR, S , ref, ref) \ 133 _(OBAR, S , ref, ref) \
@@ -128,12 +136,13 @@
128 /* Type conversions. */ \ 136 /* Type conversions. */ \
129 _(CONV, N , ref, lit) \ 137 _(CONV, N , ref, lit) \
130 _(TOBIT, N , ref, ref) \ 138 _(TOBIT, N , ref, ref) \
131 _(TOSTR, N , ref, ___) \ 139 _(TOSTR, N , ref, lit) \
132 _(STRTO, N , ref, ___) \ 140 _(STRTO, N , ref, ___) \
133 \ 141 \
134 /* Calls. */ \ 142 /* Calls. */ \
135 _(CALLN, N , ref, lit) \ 143 _(CALLN, NW, ref, lit) \
136 _(CALLL, L , ref, lit) \ 144 _(CALLA, AW, ref, lit) \
145 _(CALLL, LW, ref, lit) \
137 _(CALLS, S , ref, lit) \ 146 _(CALLS, S , ref, lit) \
138 _(CALLXS, S , ref, ref) \ 147 _(CALLXS, S , ref, ref) \
139 _(CARG, N , ref, ref) \ 148 _(CARG, N , ref, ref) \
@@ -170,8 +179,7 @@ LJ_STATIC_ASSERT((int)IR_XLOAD + IRDELTA_L2S == (int)IR_XSTORE);
170/* FPMATH sub-functions. ORDER FPM. */ 179/* FPMATH sub-functions. ORDER FPM. */
171#define IRFPMDEF(_) \ 180#define IRFPMDEF(_) \
172 _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \ 181 _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \
173 _(SQRT) _(EXP) _(EXP2) _(LOG) _(LOG2) _(LOG10) \ 182 _(SQRT) _(LOG) _(LOG2) \
174 _(SIN) _(COS) _(TAN) \
175 _(OTHER) 183 _(OTHER)
176 184
177typedef enum { 185typedef enum {
@@ -186,6 +194,8 @@ IRFPMDEF(FPMENUM)
186 _(STR_LEN, offsetof(GCstr, len)) \ 194 _(STR_LEN, offsetof(GCstr, len)) \
187 _(FUNC_ENV, offsetof(GCfunc, l.env)) \ 195 _(FUNC_ENV, offsetof(GCfunc, l.env)) \
188 _(FUNC_PC, offsetof(GCfunc, l.pc)) \ 196 _(FUNC_PC, offsetof(GCfunc, l.pc)) \
197 _(FUNC_FFID, offsetof(GCfunc, l.ffid)) \
198 _(THREAD_ENV, offsetof(lua_State, env)) \
189 _(TAB_META, offsetof(GCtab, metatable)) \ 199 _(TAB_META, offsetof(GCtab, metatable)) \
190 _(TAB_ARRAY, offsetof(GCtab, array)) \ 200 _(TAB_ARRAY, offsetof(GCtab, array)) \
191 _(TAB_NODE, offsetof(GCtab, node)) \ 201 _(TAB_NODE, offsetof(GCtab, node)) \
@@ -195,9 +205,15 @@ IRFPMDEF(FPMENUM)
195 _(UDATA_META, offsetof(GCudata, metatable)) \ 205 _(UDATA_META, offsetof(GCudata, metatable)) \
196 _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \ 206 _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \
197 _(UDATA_FILE, sizeof(GCudata)) \ 207 _(UDATA_FILE, sizeof(GCudata)) \
208 _(SBUF_W, sizeof(GCudata) + offsetof(SBufExt, w)) \
209 _(SBUF_E, sizeof(GCudata) + offsetof(SBufExt, e)) \
210 _(SBUF_B, sizeof(GCudata) + offsetof(SBufExt, b)) \
211 _(SBUF_L, sizeof(GCudata) + offsetof(SBufExt, L)) \
212 _(SBUF_REF, sizeof(GCudata) + offsetof(SBufExt, cowref)) \
213 _(SBUF_R, sizeof(GCudata) + offsetof(SBufExt, r)) \
198 _(CDATA_CTYPEID, offsetof(GCcdata, ctypeid)) \ 214 _(CDATA_CTYPEID, offsetof(GCcdata, ctypeid)) \
199 _(CDATA_PTR, sizeof(GCcdata)) \ 215 _(CDATA_PTR, sizeof(GCcdata)) \
200 _(CDATA_INT, sizeof(GCcdata)) \ 216 _(CDATA_INT, sizeof(GCcdata)) \
201 _(CDATA_INT64, sizeof(GCcdata)) \ 217 _(CDATA_INT64, sizeof(GCcdata)) \
202 _(CDATA_INT64_4, sizeof(GCcdata) + 4) 218 _(CDATA_INT64_4, sizeof(GCcdata) + 4)
203 219
@@ -208,18 +224,29 @@ IRFLDEF(FLENUM)
208 IRFL__MAX 224 IRFL__MAX
209} IRFieldID; 225} IRFieldID;
210 226
227/* TMPREF mode bits, stored in op2. */
228#define IRTMPREF_IN1 0x01 /* First input value. */
229#define IRTMPREF_OUT1 0x02 /* First output value. */
230#define IRTMPREF_OUT2 0x04 /* Second output value. */
231
211/* SLOAD mode bits, stored in op2. */ 232/* SLOAD mode bits, stored in op2. */
212#define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */ 233#define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */
213#define IRSLOAD_FRAME 0x02 /* Load hiword of frame. */ 234#define IRSLOAD_FRAME 0x02 /* Load 32 bits of ftsz. */
214#define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */ 235#define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */
215#define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ 236#define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */
216#define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ 237#define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */
217#define IRSLOAD_INHERIT 0x20 /* Inherited by exits/side traces. */ 238#define IRSLOAD_INHERIT 0x20 /* Inherited by exits/side traces. */
239#define IRSLOAD_KEYINDEX 0x40 /* Table traversal key index. */
218 240
219/* XLOAD mode, stored in op2. */ 241/* XLOAD mode bits, stored in op2. */
220#define IRXLOAD_READONLY 1 /* Load from read-only data. */ 242#define IRXLOAD_READONLY 0x01 /* Load from read-only data. */
221#define IRXLOAD_VOLATILE 2 /* Load from volatile data. */ 243#define IRXLOAD_VOLATILE 0x02 /* Load from volatile data. */
222#define IRXLOAD_UNALIGNED 4 /* Unaligned load. */ 244#define IRXLOAD_UNALIGNED 0x04 /* Unaligned load. */
245
246/* BUFHDR mode, stored in op2. */
247#define IRBUFHDR_RESET 0 /* Reset buffer. */
248#define IRBUFHDR_APPEND 1 /* Append to buffer. */
249#define IRBUFHDR_WRITE 2 /* Write to string buffer. */
223 250
224/* CONV mode, stored in op2. */ 251/* CONV mode, stored in op2. */
225#define IRCONV_SRCMASK 0x001f /* Source IRType. */ 252#define IRCONV_SRCMASK 0x001f /* Source IRType. */
@@ -227,7 +254,6 @@ IRFLDEF(FLENUM)
227#define IRCONV_DSH 5 254#define IRCONV_DSH 5
228#define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT) 255#define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT)
229#define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM) 256#define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM)
230#define IRCONV_TRUNC 0x0400 /* Truncate number to integer. */
231#define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */ 257#define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */
232#define IRCONV_MODEMASK 0x0fff 258#define IRCONV_MODEMASK 0x0fff
233#define IRCONV_CONVMASK 0xf000 259#define IRCONV_CONVMASK 0xf000
@@ -237,6 +263,12 @@ IRFLDEF(FLENUM)
237#define IRCONV_ANY (1<<IRCONV_CSH) /* Any FP number is ok. */ 263#define IRCONV_ANY (1<<IRCONV_CSH) /* Any FP number is ok. */
238#define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */ 264#define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */
239#define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */ 265#define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */
266#define IRCONV_NONE IRCONV_ANY /* INT|*64 no conv, but change type. */
267
268/* TOSTR mode, stored in op2. */
269#define IRTOSTR_INT 0 /* Convert integer to string. */
270#define IRTOSTR_NUM 1 /* Convert number to string. */
271#define IRTOSTR_CHAR 2 /* Convert char value to string. */
240 272
241/* -- IR operands --------------------------------------------------------- */ 273/* -- IR operands --------------------------------------------------------- */
242 274
@@ -276,7 +308,9 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
276 308
277/* -- IR instruction types ------------------------------------------------ */ 309/* -- IR instruction types ------------------------------------------------ */
278 310
279/* Map of itypes to non-negative numbers. ORDER LJ_T. 311#define IRTSIZE_PGC (LJ_GC64 ? 8 : 4)
312
313/* Map of itypes to non-negative numbers and their sizes. ORDER LJ_T.
280** LJ_TUPVAL/LJ_TTRACE never appear in a TValue. Use these itypes for 314** LJ_TUPVAL/LJ_TTRACE never appear in a TValue. Use these itypes for
281** IRT_P32 and IRT_P64, which never escape the IR. 315** IRT_P32 and IRT_P64, which never escape the IR.
282** The various integers are only used in the IR and can only escape to 316** The various integers are only used in the IR and can only escape to
@@ -284,12 +318,13 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
284** contiguous and next to IRT_NUM (see the typerange macros below). 318** contiguous and next to IRT_NUM (see the typerange macros below).
285*/ 319*/
286#define IRTDEF(_) \ 320#define IRTDEF(_) \
287 _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) _(STR, 4) \ 321 _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) \
288 _(P32, 4) _(THREAD, 4) _(PROTO, 4) _(FUNC, 4) _(P64, 8) _(CDATA, 4) \ 322 _(STR, IRTSIZE_PGC) _(P32, 4) _(THREAD, IRTSIZE_PGC) _(PROTO, IRTSIZE_PGC) \
289 _(TAB, 4) _(UDATA, 4) \ 323 _(FUNC, IRTSIZE_PGC) _(P64, 8) _(CDATA, IRTSIZE_PGC) _(TAB, IRTSIZE_PGC) \
324 _(UDATA, IRTSIZE_PGC) \
290 _(FLOAT, 4) _(NUM, 8) _(I8, 1) _(U8, 1) _(I16, 2) _(U16, 2) \ 325 _(FLOAT, 4) _(NUM, 8) _(I8, 1) _(U8, 1) _(I16, 2) _(U16, 2) \
291 _(INT, 4) _(U32, 4) _(I64, 8) _(U64, 8) \ 326 _(INT, 4) _(U32, 4) _(I64, 8) _(U64, 8) \
292 _(SOFTFP, 4) /* There is room for 9 more types. */ 327 _(SOFTFP, 4) /* There is room for 8 more types. */
293 328
294/* IR result type and flags (8 bit). */ 329/* IR result type and flags (8 bit). */
295typedef enum { 330typedef enum {
@@ -300,6 +335,8 @@ IRTDEF(IRTENUM)
300 335
301 /* Native pointer type and the corresponding integer type. */ 336 /* Native pointer type and the corresponding integer type. */
302 IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32, 337 IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32,
338 IRT_PGC = LJ_GC64 ? IRT_P64 : IRT_P32,
339 IRT_IGC = LJ_GC64 ? IRT_I64 : IRT_INT,
303 IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT, 340 IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT,
304 IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32, 341 IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32,
305 342
@@ -354,7 +391,14 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
354#define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) 391#define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA))
355#define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64)) 392#define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64))
356 393
357#if LJ_64 394#if LJ_GC64
395/* Include IRT_NIL, so IR(ASMREF_L) (aka REF_NIL) is considered 64 bit. */
396#define IRT_IS64 \
397 ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|\
398 (1u<<IRT_LIGHTUD)|(1u<<IRT_STR)|(1u<<IRT_THREAD)|(1u<<IRT_PROTO)|\
399 (1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA)|\
400 (1u<<IRT_NIL))
401#elif LJ_64
358#define IRT_IS64 \ 402#define IRT_IS64 \
359 ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD)) 403 ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD))
360#else 404#else
@@ -375,7 +419,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv)
375 return IRT_INT; 419 return IRT_INT;
376 else if (tvisnum(tv)) 420 else if (tvisnum(tv))
377 return IRT_NUM; 421 return IRT_NUM;
378#if LJ_64 422#if LJ_64 && !LJ_GC64
379 else if (tvislightud(tv)) 423 else if (tvislightud(tv))
380 return IRT_LIGHTUD; 424 return IRT_LIGHTUD;
381#endif 425#endif
@@ -385,11 +429,12 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv)
385 429
386static LJ_AINLINE uint32_t irt_toitype_(IRType t) 430static LJ_AINLINE uint32_t irt_toitype_(IRType t)
387{ 431{
388 lua_assert(!LJ_64 || t != IRT_LIGHTUD); 432 lj_assertX(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD,
433 "no plain type tag for lightuserdata");
389 if (LJ_DUALNUM && t > IRT_NUM) { 434 if (LJ_DUALNUM && t > IRT_NUM) {
390 return LJ_TISNUM; 435 return LJ_TISNUM;
391 } else { 436 } else {
392 lua_assert(t <= IRT_NUM); 437 lj_assertX(t <= IRT_NUM, "no plain type tag for IR type %d", t);
393 return ~(uint32_t)t; 438 return ~(uint32_t)t;
394 } 439 }
395} 440}
@@ -452,6 +497,7 @@ typedef uint32_t TRef;
452#define TREF_REFMASK 0x0000ffff 497#define TREF_REFMASK 0x0000ffff
453#define TREF_FRAME 0x00010000 498#define TREF_FRAME 0x00010000
454#define TREF_CONT 0x00020000 499#define TREF_CONT 0x00020000
500#define TREF_KEYINDEX 0x00100000
455 501
456#define TREF(ref, t) ((TRef)((ref) + ((t)<<24))) 502#define TREF(ref, t) ((TRef)((ref) + ((t)<<24)))
457 503
@@ -465,6 +511,7 @@ typedef uint32_t TRef;
465#define tref_isnil(tr) (tref_istype((tr), IRT_NIL)) 511#define tref_isnil(tr) (tref_istype((tr), IRT_NIL))
466#define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE)) 512#define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE))
467#define tref_istrue(tr) (tref_istype((tr), IRT_TRUE)) 513#define tref_istrue(tr) (tref_istype((tr), IRT_TRUE))
514#define tref_islightud(tr) (tref_istype((tr), IRT_LIGHTUD))
468#define tref_isstr(tr) (tref_istype((tr), IRT_STR)) 515#define tref_isstr(tr) (tref_istype((tr), IRT_STR))
469#define tref_isfunc(tr) (tref_istype((tr), IRT_FUNC)) 516#define tref_isfunc(tr) (tref_istype((tr), IRT_FUNC))
470#define tref_iscdata(tr) (tref_istype((tr), IRT_CDATA)) 517#define tref_iscdata(tr) (tref_istype((tr), IRT_CDATA))
@@ -497,7 +544,9 @@ typedef uint32_t TRef;
497** +-------+-------+---+---+---+---+ 544** +-------+-------+---+---+---+---+
498** | op1 | op2 | t | o | r | s | 545** | op1 | op2 | t | o | r | s |
499** +-------+-------+---+---+---+---+ 546** +-------+-------+---+---+---+---+
500** | op12/i/gco | ot | prev | (alternative fields in union) 547** | op12/i/gco32 | ot | prev | (alternative fields in union)
548** +-------+-------+---+---+---+---+
549** | TValue/gco64 | (2nd IR slot for 64 bit constants)
501** +---------------+-------+-------+ 550** +---------------+-------+-------+
502** 32 16 16 551** 32 16 16
503** 552**
@@ -525,21 +574,27 @@ typedef union IRIns {
525 ) 574 )
526 }; 575 };
527 int32_t i; /* 32 bit signed integer literal (overlaps op12). */ 576 int32_t i; /* 32 bit signed integer literal (overlaps op12). */
528 GCRef gcr; /* GCobj constant (overlaps op12). */ 577 GCRef gcr; /* GCobj constant (overlaps op12 or entire slot). */
529 MRef ptr; /* Pointer constant (overlaps op12). */ 578 MRef ptr; /* Pointer constant (overlaps op12 or entire slot). */
579 TValue tv; /* TValue constant (overlaps entire slot). */
530} IRIns; 580} IRIns;
531 581
532#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)->gcr)) 582#define ir_isk64(ir) \
583 ((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \
584 (LJ_GC64 && \
585 ((ir)->o == IR_KGC || (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)))
586
587#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)[LJ_GC64].gcr))
533#define ir_kstr(ir) (gco2str(ir_kgc((ir)))) 588#define ir_kstr(ir) (gco2str(ir_kgc((ir))))
534#define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) 589#define ir_ktab(ir) (gco2tab(ir_kgc((ir))))
535#define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) 590#define ir_kfunc(ir) (gco2func(ir_kgc((ir))))
536#define ir_kcdata(ir) (gco2cd(ir_kgc((ir)))) 591#define ir_kcdata(ir) (gco2cd(ir_kgc((ir))))
537#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, mref((ir)->ptr, cTValue)) 592#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv)
538#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, mref((ir)->ptr,cTValue)) 593#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv)
539#define ir_k64(ir) \ 594#define ir_k64(ir) check_exp(ir_isk64(ir), &(ir)[1].tv)
540 check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, mref((ir)->ptr,cTValue))
541#define ir_kptr(ir) \ 595#define ir_kptr(ir) \
542 check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void)) 596 check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \
597 mref((ir)[LJ_GC64].ptr, void))
543 598
544/* A store or any other op with a non-weak guard has a side-effect. */ 599/* A store or any other op with a non-weak guard has a side-effect. */
545static LJ_AINLINE int ir_sideeff(IRIns *ir) 600static LJ_AINLINE int ir_sideeff(IRIns *ir)
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 4e99b8b5..f342cdd2 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -16,22 +16,26 @@ typedef struct CCallInfo {
16 uint32_t flags; /* Number of arguments and flags. */ 16 uint32_t flags; /* Number of arguments and flags. */
17} CCallInfo; 17} CCallInfo;
18 18
19#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* Extract # of args. */ 19#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* # of args. */
20#define CCI_NARGS_MAX 32 /* Max. # of args. */ 20#define CCI_NARGS_MAX 32 /* Max. # of args. */
21 21
22#define CCI_OTSHIFT 16 22#define CCI_OTSHIFT 16
23#define CCI_OPTYPE(ci) ((ci)->flags >> CCI_OTSHIFT) /* Get op/type. */ 23#define CCI_OPTYPE(ci) ((ci)->flags >> CCI_OTSHIFT) /* Get op/type. */
24#define CCI_TYPE(ci) (((ci)->flags>>CCI_OTSHIFT) & IRT_TYPE)
24#define CCI_OPSHIFT 24 25#define CCI_OPSHIFT 24
25#define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */ 26#define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */
26 27
27#define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT) 28#define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT)
29#define CCI_CALL_A (IR_CALLA << CCI_OPSHIFT)
28#define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT) 30#define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT)
29#define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT) 31#define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT)
30#define CCI_CALL_FN (CCI_CALL_N|CCI_CC_FASTCALL) 32#define CCI_CALL_FN (CCI_CALL_N|CCI_CC_FASTCALL)
33#define CCI_CALL_FA (CCI_CALL_A|CCI_CC_FASTCALL)
31#define CCI_CALL_FL (CCI_CALL_L|CCI_CC_FASTCALL) 34#define CCI_CALL_FL (CCI_CALL_L|CCI_CC_FASTCALL)
32#define CCI_CALL_FS (CCI_CALL_S|CCI_CC_FASTCALL) 35#define CCI_CALL_FS (CCI_CALL_S|CCI_CC_FASTCALL)
33 36
34/* C call info flags. */ 37/* C call info flags. */
38#define CCI_T (IRT_GUARD << CCI_OTSHIFT) /* May throw. */
35#define CCI_L 0x0100 /* Implicit L arg. */ 39#define CCI_L 0x0100 /* Implicit L arg. */
36#define CCI_CASTU64 0x0200 /* Cast u64 result to number. */ 40#define CCI_CASTU64 0x0200 /* Cast u64 result to number. */
37#define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */ 41#define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */
@@ -45,10 +49,21 @@ typedef struct CCallInfo {
45#define CCI_CC_FASTCALL 0x2000 /* Fastcall calling convention. */ 49#define CCI_CC_FASTCALL 0x2000 /* Fastcall calling convention. */
46#define CCI_CC_STDCALL 0x3000 /* Stdcall calling convention. */ 50#define CCI_CC_STDCALL 0x3000 /* Stdcall calling convention. */
47 51
52/* Extra args for SOFTFP, SPLIT 64 bit. */
53#define CCI_XARGS_SHIFT 14
54#define CCI_XARGS(ci) (((ci)->flags >> CCI_XARGS_SHIFT) & 3)
55#define CCI_XA (1u << CCI_XARGS_SHIFT)
56
57#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
58#define CCI_XNARGS(ci) (CCI_NARGS((ci)) + CCI_XARGS((ci)))
59#else
60#define CCI_XNARGS(ci) CCI_NARGS((ci))
61#endif
62
48/* Helpers for conditional function definitions. */ 63/* Helpers for conditional function definitions. */
49#define IRCALLCOND_ANY(x) x 64#define IRCALLCOND_ANY(x) x
50 65
51#if LJ_TARGET_X86ORX64 66#if LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64
52#define IRCALLCOND_FPMATH(x) NULL 67#define IRCALLCOND_FPMATH(x) NULL
53#else 68#else
54#define IRCALLCOND_FPMATH(x) x 69#define IRCALLCOND_FPMATH(x) x
@@ -66,6 +81,18 @@ typedef struct CCallInfo {
66#define IRCALLCOND_SOFTFP_FFI(x) NULL 81#define IRCALLCOND_SOFTFP_FFI(x) NULL
67#endif 82#endif
68 83
84#if LJ_SOFTFP && LJ_TARGET_MIPS
85#define IRCALLCOND_SOFTFP_MIPS(x) x
86#else
87#define IRCALLCOND_SOFTFP_MIPS(x) NULL
88#endif
89
90#if LJ_SOFTFP && LJ_TARGET_MIPS64
91#define IRCALLCOND_SOFTFP_MIPS64(x) x
92#else
93#define IRCALLCOND_SOFTFP_MIPS64(x) NULL
94#endif
95
69#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS) 96#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS)
70 97
71#if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) 98#if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64)
@@ -86,93 +113,158 @@ typedef struct CCallInfo {
86#define IRCALLCOND_FFI32(x) NULL 113#define IRCALLCOND_FFI32(x) NULL
87#endif 114#endif
88 115
116#if LJ_HASBUFFER
117#define IRCALLCOND_BUFFER(x) x
118#else
119#define IRCALLCOND_BUFFER(x) NULL
120#endif
121
122#if LJ_HASBUFFER && LJ_HASFFI
123#define IRCALLCOND_BUFFFI(x) x
124#else
125#define IRCALLCOND_BUFFFI(x) NULL
126#endif
127
89#if LJ_SOFTFP 128#if LJ_SOFTFP
90#define ARG1_FP 2 /* Treat as 2 32 bit arguments. */ 129#define XA_FP CCI_XA
130#define XA2_FP (CCI_XA+CCI_XA)
91#else 131#else
92#define ARG1_FP 1 132#define XA_FP 0
133#define XA2_FP 0
134#endif
135
136#if LJ_SOFTFP32
137#define XA_FP32 CCI_XA
138#define XA2_FP32 (CCI_XA+CCI_XA)
139#else
140#define XA_FP32 0
141#define XA2_FP32 0
93#endif 142#endif
94 143
95#if LJ_32 144#if LJ_32
96#define ARG2_64 4 /* Treat as 4 32 bit arguments. */ 145#define XA_64 CCI_XA
146#define XA2_64 (CCI_XA+CCI_XA)
97#else 147#else
98#define ARG2_64 2 148#define XA_64 0
149#define XA2_64 0
99#endif 150#endif
100 151
101/* Function definitions for CALL* instructions. */ 152/* Function definitions for CALL* instructions. */
102#define IRCALLDEF(_) \ 153#define IRCALLDEF(_) \
103 _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ 154 _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \
104 _(ANY, lj_str_new, 3, S, STR, CCI_L) \ 155 _(ANY, lj_str_find, 4, N, PGC, 0) \
156 _(ANY, lj_str_new, 3, S, STR, CCI_L|CCI_T) \
105 _(ANY, lj_strscan_num, 2, FN, INT, 0) \ 157 _(ANY, lj_strscan_num, 2, FN, INT, 0) \
106 _(ANY, lj_str_fromint, 2, FN, STR, CCI_L) \ 158 _(ANY, lj_strfmt_int, 2, FN, STR, CCI_L|CCI_T) \
107 _(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \ 159 _(ANY, lj_strfmt_num, 2, FN, STR, CCI_L|CCI_T) \
108 _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ 160 _(ANY, lj_strfmt_char, 2, FN, STR, CCI_L|CCI_T) \
109 _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ 161 _(ANY, lj_strfmt_putint, 2, FL, PGC, CCI_T) \
110 _(ANY, lj_tab_newkey, 3, S, P32, CCI_L) \ 162 _(ANY, lj_strfmt_putnum, 2, FL, PGC, CCI_T) \
163 _(ANY, lj_strfmt_putquoted, 2, FL, PGC, CCI_T) \
164 _(ANY, lj_strfmt_putfxint, 3, L, PGC, XA_64|CCI_T) \
165 _(ANY, lj_strfmt_putfnum_int, 3, L, PGC, XA_FP|CCI_T) \
166 _(ANY, lj_strfmt_putfnum_uint, 3, L, PGC, XA_FP|CCI_T) \
167 _(ANY, lj_strfmt_putfnum, 3, L, PGC, XA_FP|CCI_T) \
168 _(ANY, lj_strfmt_putfstr, 3, L, PGC, CCI_T) \
169 _(ANY, lj_strfmt_putfchar, 3, L, PGC, CCI_T) \
170 _(ANY, lj_buf_putmem, 3, S, PGC, CCI_T) \
171 _(ANY, lj_buf_putstr, 2, FL, PGC, CCI_T) \
172 _(ANY, lj_buf_putchar, 2, FL, PGC, CCI_T) \
173 _(ANY, lj_buf_putstr_reverse, 2, FL, PGC, CCI_T) \
174 _(ANY, lj_buf_putstr_lower, 2, FL, PGC, CCI_T) \
175 _(ANY, lj_buf_putstr_upper, 2, FL, PGC, CCI_T) \
176 _(ANY, lj_buf_putstr_rep, 3, L, PGC, CCI_T) \
177 _(ANY, lj_buf_puttab, 5, L, PGC, CCI_T) \
178 _(BUFFER, lj_bufx_set, 4, S, NIL, 0) \
179 _(BUFFFI, lj_bufx_more, 2, FS, INT, CCI_T) \
180 _(BUFFER, lj_serialize_put, 2, FS, PGC, CCI_T) \
181 _(BUFFER, lj_serialize_get, 2, FS, PTR, CCI_T) \
182 _(BUFFER, lj_serialize_encode, 2, FA, STR, CCI_L|CCI_T) \
183 _(BUFFER, lj_serialize_decode, 3, A, INT, CCI_L|CCI_T) \
184 _(ANY, lj_buf_tostr, 1, FL, STR, CCI_T) \
185 _(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L|CCI_T) \
186 _(ANY, lj_tab_new1, 2, FA, TAB, CCI_L|CCI_T) \
187 _(ANY, lj_tab_dup, 2, FA, TAB, CCI_L|CCI_T) \
188 _(ANY, lj_tab_clear, 1, FS, NIL, 0) \
189 _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L|CCI_T) \
190 _(ANY, lj_tab_keyindex, 2, FL, INT, 0) \
191 _(ANY, lj_vm_next, 2, FL, PTR, 0) \
111 _(ANY, lj_tab_len, 1, FL, INT, 0) \ 192 _(ANY, lj_tab_len, 1, FL, INT, 0) \
193 _(ANY, lj_tab_len_hint, 2, FL, INT, 0) \
112 _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ 194 _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \
113 _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \ 195 _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \
114 _(ANY, lj_mem_newgco, 2, FS, P32, CCI_L) \ 196 _(ANY, lj_mem_newgco, 2, FA, PGC, CCI_L|CCI_T) \
115 _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64) \ 197 _(ANY, lj_prng_u64d, 1, FS, NUM, CCI_CASTU64) \
116 _(ANY, lj_vm_modi, 2, FN, INT, 0) \ 198 _(ANY, lj_vm_modi, 2, FN, INT, 0) \
117 _(ANY, sinh, ARG1_FP, N, NUM, 0) \ 199 _(ANY, log10, 1, N, NUM, XA_FP) \
118 _(ANY, cosh, ARG1_FP, N, NUM, 0) \ 200 _(ANY, exp, 1, N, NUM, XA_FP) \
119 _(ANY, tanh, ARG1_FP, N, NUM, 0) \ 201 _(ANY, sin, 1, N, NUM, XA_FP) \
120 _(ANY, fputc, 2, S, INT, 0) \ 202 _(ANY, cos, 1, N, NUM, XA_FP) \
121 _(ANY, fwrite, 4, S, INT, 0) \ 203 _(ANY, tan, 1, N, NUM, XA_FP) \
122 _(ANY, fflush, 1, S, INT, 0) \ 204 _(ANY, asin, 1, N, NUM, XA_FP) \
205 _(ANY, acos, 1, N, NUM, XA_FP) \
206 _(ANY, atan, 1, N, NUM, XA_FP) \
207 _(ANY, sinh, 1, N, NUM, XA_FP) \
208 _(ANY, cosh, 1, N, NUM, XA_FP) \
209 _(ANY, tanh, 1, N, NUM, XA_FP) \
210 _(ANY, fputc, 2, S, INT, 0) \
211 _(ANY, fwrite, 4, S, INT, 0) \
212 _(ANY, fflush, 1, S, INT, 0) \
123 /* ORDER FPM */ \ 213 /* ORDER FPM */ \
124 _(FPMATH, lj_vm_floor, ARG1_FP, N, NUM, 0) \ 214 _(FPMATH, lj_vm_floor, 1, N, NUM, XA_FP) \
125 _(FPMATH, lj_vm_ceil, ARG1_FP, N, NUM, 0) \ 215 _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \
126 _(FPMATH, lj_vm_trunc, ARG1_FP, N, NUM, 0) \ 216 _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \
127 _(FPMATH, sqrt, ARG1_FP, N, NUM, 0) \ 217 _(FPMATH, sqrt, 1, N, NUM, XA_FP) \
128 _(FPMATH, exp, ARG1_FP, N, NUM, 0) \ 218 _(ANY, log, 1, N, NUM, XA_FP) \
129 _(FPMATH, lj_vm_exp2, ARG1_FP, N, NUM, 0) \ 219 _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \
130 _(FPMATH, log, ARG1_FP, N, NUM, 0) \ 220 _(ANY, pow, 2, N, NUM, XA2_FP) \
131 _(FPMATH, lj_vm_log2, ARG1_FP, N, NUM, 0) \ 221 _(ANY, atan2, 2, N, NUM, XA2_FP) \
132 _(FPMATH, log10, ARG1_FP, N, NUM, 0) \ 222 _(ANY, ldexp, 2, N, NUM, XA_FP) \
133 _(FPMATH, sin, ARG1_FP, N, NUM, 0) \ 223 _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \
134 _(FPMATH, cos, ARG1_FP, N, NUM, 0) \ 224 _(SOFTFP, softfp_add, 2, N, NUM, XA2_FP32) \
135 _(FPMATH, tan, ARG1_FP, N, NUM, 0) \ 225 _(SOFTFP, softfp_sub, 2, N, NUM, XA2_FP32) \
136 _(FPMATH, lj_vm_powi, ARG1_FP+1, N, NUM, 0) \ 226 _(SOFTFP, softfp_mul, 2, N, NUM, XA2_FP32) \
137 _(FPMATH, pow, ARG1_FP*2, N, NUM, 0) \ 227 _(SOFTFP, softfp_div, 2, N, NUM, XA2_FP32) \
138 _(FPMATH, atan2, ARG1_FP*2, N, NUM, 0) \ 228 _(SOFTFP, softfp_cmp, 2, N, NIL, XA2_FP32) \
139 _(FPMATH, ldexp, ARG1_FP+1, N, NUM, 0) \
140 _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \
141 _(SOFTFP, softfp_add, 4, N, NUM, 0) \
142 _(SOFTFP, softfp_sub, 4, N, NUM, 0) \
143 _(SOFTFP, softfp_mul, 4, N, NUM, 0) \
144 _(SOFTFP, softfp_div, 4, N, NUM, 0) \
145 _(SOFTFP, softfp_cmp, 4, N, NIL, 0) \
146 _(SOFTFP, softfp_i2d, 1, N, NUM, 0) \ 229 _(SOFTFP, softfp_i2d, 1, N, NUM, 0) \
147 _(SOFTFP, softfp_d2i, 2, N, INT, 0) \ 230 _(SOFTFP, softfp_d2i, 1, N, INT, XA_FP32) \
231 _(SOFTFP_MIPS, lj_vm_sfmin, 2, N, NUM, XA2_FP32) \
232 _(SOFTFP_MIPS, lj_vm_sfmax, 2, N, NUM, XA2_FP32) \
233 _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \
148 _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \ 234 _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \
149 _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \ 235 _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \
150 _(SOFTFP_FFI, softfp_d2ui, 2, N, INT, 0) \ 236 _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \
151 _(SOFTFP_FFI, softfp_d2f, 2, N, FLOAT, 0) \ 237 _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \
152 _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \ 238 _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \
153 _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ 239 _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \
154 _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ 240 _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \
155 _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \ 241 _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \
156 _(FP64_FFI, fp64_l2d, 2, N, NUM, 0) \ 242 _(FP64_FFI, fp64_l2d, 1, N, NUM, XA_64) \
157 _(FP64_FFI, fp64_ul2d, 2, N, NUM, 0) \ 243 _(FP64_FFI, fp64_ul2d, 1, N, NUM, XA_64) \
158 _(FP64_FFI, fp64_l2f, 2, N, FLOAT, 0) \ 244 _(FP64_FFI, fp64_l2f, 1, N, FLOAT, XA_64) \
159 _(FP64_FFI, fp64_ul2f, 2, N, FLOAT, 0) \ 245 _(FP64_FFI, fp64_ul2f, 1, N, FLOAT, XA_64) \
160 _(FP64_FFI, fp64_d2l, ARG1_FP, N, I64, 0) \ 246 _(FP64_FFI, fp64_d2l, 1, N, I64, XA_FP) \
161 _(FP64_FFI, fp64_d2ul, ARG1_FP, N, U64, 0) \ 247 _(FP64_FFI, fp64_d2ul, 1, N, U64, XA_FP) \
162 _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \ 248 _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \
163 _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \ 249 _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \
164 _(FFI, lj_carith_divi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ 250 _(FFI, lj_carith_divi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
165 _(FFI, lj_carith_divu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ 251 _(FFI, lj_carith_divu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
166 _(FFI, lj_carith_modi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ 252 _(FFI, lj_carith_modi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
167 _(FFI, lj_carith_modu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ 253 _(FFI, lj_carith_modu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
168 _(FFI, lj_carith_powi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ 254 _(FFI, lj_carith_powi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
169 _(FFI, lj_carith_powu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ 255 _(FFI, lj_carith_powu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
170 _(FFI, lj_cdata_setfin, 2, FN, P32, CCI_L) \ 256 _(FFI, lj_cdata_newv, 4, S, CDATA, CCI_L) \
171 _(FFI, strlen, 1, L, INTP, 0) \ 257 _(FFI, lj_cdata_setfin, 4, S, NIL, CCI_L) \
172 _(FFI, memcpy, 3, S, PTR, 0) \ 258 _(FFI, strlen, 1, L, INTP, 0) \
173 _(FFI, memset, 3, S, PTR, 0) \ 259 _(FFI, memcpy, 3, S, PTR, 0) \
174 _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \ 260 _(FFI, memset, 3, S, PTR, 0) \
175 _(FFI32, lj_carith_mul64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) 261 _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \
262 _(FFI32, lj_carith_mul64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
263 _(FFI32, lj_carith_shl64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
264 _(FFI32, lj_carith_shr64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
265 _(FFI32, lj_carith_sar64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
266 _(FFI32, lj_carith_rol64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
267 _(FFI32, lj_carith_ror64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
176 \ 268 \
177 /* End of list. */ 269 /* End of list. */
178 270
@@ -220,6 +312,22 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
220#define fp64_f2l __aeabi_f2lz 312#define fp64_f2l __aeabi_f2lz
221#define fp64_f2ul __aeabi_f2ulz 313#define fp64_f2ul __aeabi_f2ulz
222#endif 314#endif
315#elif LJ_TARGET_MIPS || LJ_TARGET_PPC
316#define softfp_add __adddf3
317#define softfp_sub __subdf3
318#define softfp_mul __muldf3
319#define softfp_div __divdf3
320#define softfp_cmp __ledf2
321#define softfp_i2d __floatsidf
322#define softfp_d2i __fixdfsi
323#define softfp_ui2d __floatunsidf
324#define softfp_f2d __extendsfdf2
325#define softfp_d2ui __fixunsdfsi
326#define softfp_d2f __truncdfsf2
327#define softfp_i2f __floatsisf
328#define softfp_ui2f __floatunsisf
329#define softfp_f2i __fixsfsi
330#define softfp_f2ui __fixunssfsi
223#else 331#else
224#error "Missing soft-float definitions for target architecture" 332#error "Missing soft-float definitions for target architecture"
225#endif 333#endif
@@ -240,10 +348,14 @@ extern float softfp_ui2f(uint32_t a);
240extern int32_t softfp_f2i(float a); 348extern int32_t softfp_f2i(float a);
241extern uint32_t softfp_f2ui(float a); 349extern uint32_t softfp_f2ui(float a);
242#endif 350#endif
351#if LJ_TARGET_MIPS
352extern double lj_vm_sfmin(double a, double b);
353extern double lj_vm_sfmax(double a, double b);
354#endif
243#endif 355#endif
244 356
245#if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP) 357#if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP)
246#ifdef __GNUC__ 358#if defined(__GNUC__) || defined(__clang__)
247#define fp64_l2d __floatdidf 359#define fp64_l2d __floatdidf
248#define fp64_ul2d __floatundidf 360#define fp64_ul2d __floatundidf
249#define fp64_l2f __floatdisf 361#define fp64_l2f __floatdisf
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
index 9aa03abc..a71a717b 100644
--- a/src/lj_iropt.h
+++ b/src/lj_iropt.h
@@ -36,11 +36,11 @@ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J)
36 return ref; 36 return ref;
37} 37}
38 38
39LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs);
40
39/* Interning of constants. */ 41/* Interning of constants. */
40LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k); 42LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k);
41LJ_FUNC void lj_ir_k64_freeall(jit_State *J); 43LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64);
42LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv);
43LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64);
44LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64); 44LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64);
45LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n); 45LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n);
46LJ_FUNC TRef lj_ir_kint64(jit_State *J, uint64_t u64); 46LJ_FUNC TRef lj_ir_kint64(jit_State *J, uint64_t u64);
@@ -48,6 +48,7 @@ LJ_FUNC TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t);
48LJ_FUNC TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr); 48LJ_FUNC TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr);
49LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t); 49LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t);
50LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot); 50LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot);
51LJ_FUNC TRef lj_ir_ktrace(jit_State *J);
51 52
52#if LJ_64 53#if LJ_64
53#define lj_ir_kintp(J, k) lj_ir_kint64(J, (uint64_t)(k)) 54#define lj_ir_kintp(J, k) lj_ir_kint64(J, (uint64_t)(k))
@@ -55,6 +56,12 @@ LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot);
55#define lj_ir_kintp(J, k) lj_ir_kint(J, (int32_t)(k)) 56#define lj_ir_kintp(J, k) lj_ir_kint(J, (int32_t)(k))
56#endif 57#endif
57 58
59#if LJ_GC64
60#define lj_ir_kintpgc lj_ir_kintp
61#else
62#define lj_ir_kintpgc lj_ir_kint
63#endif
64
58static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n) 65static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n)
59{ 66{
60 TValue tv; 67 TValue tv;
@@ -74,8 +81,8 @@ static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n)
74#define lj_ir_knum_tobit(J) lj_ir_knum_u64(J, U64x(43380000,00000000)) 81#define lj_ir_knum_tobit(J) lj_ir_knum_u64(J, U64x(43380000,00000000))
75 82
76/* Special 128 bit SIMD constants. */ 83/* Special 128 bit SIMD constants. */
77#define lj_ir_knum_abs(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_ABS)) 84#define lj_ir_ksimd(J, idx) \
78#define lj_ir_knum_neg(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_NEG)) 85 lj_ir_ggfload(J, IRT_NUM, (uintptr_t)LJ_KSIMD(J, idx) - (uintptr_t)J2GG(J))
79 86
80/* Access to constants. */ 87/* Access to constants. */
81LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir); 88LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir);
@@ -119,10 +126,11 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J);
119LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); 126LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J);
120LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); 127LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J);
121LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J); 128LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J);
122LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J); 129LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J);
123LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J); 130LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J);
124LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J); 131LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J);
125LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim); 132LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim);
133LJ_FUNC int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim);
126LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref); 134LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref);
127 135
128/* Dead-store elimination. */ 136/* Dead-store elimination. */
@@ -143,13 +151,12 @@ LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
143 TValue *vb, TValue *vc, IROp op); 151 TValue *vb, TValue *vc, IROp op);
144LJ_FUNC TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc); 152LJ_FUNC TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc);
145LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc); 153LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc);
146LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc);
147LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase); 154LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase);
148 155
149/* Optimization passes. */ 156/* Optimization passes. */
150LJ_FUNC void lj_opt_dce(jit_State *J); 157LJ_FUNC void lj_opt_dce(jit_State *J);
151LJ_FUNC int lj_opt_loop(jit_State *J); 158LJ_FUNC int lj_opt_loop(jit_State *J);
152#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) 159#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
153LJ_FUNC void lj_opt_split(jit_State *J); 160LJ_FUNC void lj_opt_split(jit_State *J);
154#else 161#else
155#define lj_opt_split(J) UNUSED(J) 162#define lj_opt_split(J) UNUSED(J)
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 911c899c..59f92e55 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -7,75 +7,91 @@
7#define _LJ_JIT_H 7#define _LJ_JIT_H
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10#if LJ_HASJIT
10#include "lj_ir.h" 11#include "lj_ir.h"
11 12
12/* JIT engine flags. */ 13/* -- JIT engine flags ---------------------------------------------------- */
14
15/* General JIT engine flags. 4 bits. */
13#define JIT_F_ON 0x00000001 16#define JIT_F_ON 0x00000001
14 17
15/* CPU-specific JIT engine flags. */ 18/* CPU-specific JIT engine flags. 12 bits. Flags and strings must match. */
19#define JIT_F_CPU 0x00000010
20
16#if LJ_TARGET_X86ORX64 21#if LJ_TARGET_X86ORX64
17#define JIT_F_CMOV 0x00000010 22
18#define JIT_F_SSE2 0x00000020 23#define JIT_F_SSE3 (JIT_F_CPU << 0)
19#define JIT_F_SSE3 0x00000040 24#define JIT_F_SSE4_1 (JIT_F_CPU << 1)
20#define JIT_F_SSE4_1 0x00000080 25#define JIT_F_BMI2 (JIT_F_CPU << 2)
21#define JIT_F_P4 0x00000100 26
22#define JIT_F_PREFER_IMUL 0x00000200 27
23#define JIT_F_SPLIT_XMM 0x00000400 28#define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2"
24#define JIT_F_LEA_AGU 0x00000800 29
25
26/* Names for the CPU-specific flags. Must match the order above. */
27#define JIT_F_CPU_FIRST JIT_F_CMOV
28#define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM"
29#elif LJ_TARGET_ARM 30#elif LJ_TARGET_ARM
30#define JIT_F_ARMV6_ 0x00000010 31
31#define JIT_F_ARMV6T2_ 0x00000020 32#define JIT_F_ARMV6_ (JIT_F_CPU << 0)
32#define JIT_F_ARMV7 0x00000040 33#define JIT_F_ARMV6T2_ (JIT_F_CPU << 1)
33#define JIT_F_VFPV2 0x00000080 34#define JIT_F_ARMV7 (JIT_F_CPU << 2)
34#define JIT_F_VFPV3 0x00000100 35#define JIT_F_ARMV8 (JIT_F_CPU << 3)
35 36#define JIT_F_VFPV2 (JIT_F_CPU << 4)
36#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7) 37#define JIT_F_VFPV3 (JIT_F_CPU << 5)
37#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7) 38
39#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8)
40#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8)
38#define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3) 41#define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3)
39 42
40/* Names for the CPU-specific flags. Must match the order above. */ 43#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5ARMv8\5VFPv2\5VFPv3"
41#define JIT_F_CPU_FIRST JIT_F_ARMV6_ 44
42#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5VFPv2\5VFPv3"
43#elif LJ_TARGET_PPC 45#elif LJ_TARGET_PPC
44#define JIT_F_SQRT 0x00000010
45#define JIT_F_ROUND 0x00000020
46 46
47/* Names for the CPU-specific flags. Must match the order above. */ 47#define JIT_F_SQRT (JIT_F_CPU << 0)
48#define JIT_F_CPU_FIRST JIT_F_SQRT 48#define JIT_F_ROUND (JIT_F_CPU << 1)
49
49#define JIT_F_CPUSTRING "\4SQRT\5ROUND" 50#define JIT_F_CPUSTRING "\4SQRT\5ROUND"
51
50#elif LJ_TARGET_MIPS 52#elif LJ_TARGET_MIPS
51#define JIT_F_MIPS32R2 0x00000010
52 53
53/* Names for the CPU-specific flags. Must match the order above. */ 54#define JIT_F_MIPSXXR2 (JIT_F_CPU << 0)
54#define JIT_F_CPU_FIRST JIT_F_MIPS32R2 55
56#if LJ_TARGET_MIPS32
57#if LJ_TARGET_MIPSR6
58#define JIT_F_CPUSTRING "\010MIPS32R6"
59#else
55#define JIT_F_CPUSTRING "\010MIPS32R2" 60#define JIT_F_CPUSTRING "\010MIPS32R2"
61#endif
56#else 62#else
57#define JIT_F_CPU_FIRST 0 63#if LJ_TARGET_MIPSR6
64#define JIT_F_CPUSTRING "\010MIPS64R6"
65#else
66#define JIT_F_CPUSTRING "\010MIPS64R2"
67#endif
68#endif
69
70#else
71
58#define JIT_F_CPUSTRING "" 72#define JIT_F_CPUSTRING ""
73
59#endif 74#endif
60 75
61/* Optimization flags. */ 76/* Optimization flags. 12 bits. */
77#define JIT_F_OPT 0x00010000
62#define JIT_F_OPT_MASK 0x0fff0000 78#define JIT_F_OPT_MASK 0x0fff0000
63 79
64#define JIT_F_OPT_FOLD 0x00010000 80#define JIT_F_OPT_FOLD (JIT_F_OPT << 0)
65#define JIT_F_OPT_CSE 0x00020000 81#define JIT_F_OPT_CSE (JIT_F_OPT << 1)
66#define JIT_F_OPT_DCE 0x00040000 82#define JIT_F_OPT_DCE (JIT_F_OPT << 2)
67#define JIT_F_OPT_FWD 0x00080000 83#define JIT_F_OPT_FWD (JIT_F_OPT << 3)
68#define JIT_F_OPT_DSE 0x00100000 84#define JIT_F_OPT_DSE (JIT_F_OPT << 4)
69#define JIT_F_OPT_NARROW 0x00200000 85#define JIT_F_OPT_NARROW (JIT_F_OPT << 5)
70#define JIT_F_OPT_LOOP 0x00400000 86#define JIT_F_OPT_LOOP (JIT_F_OPT << 6)
71#define JIT_F_OPT_ABC 0x00800000 87#define JIT_F_OPT_ABC (JIT_F_OPT << 7)
72#define JIT_F_OPT_SINK 0x01000000 88#define JIT_F_OPT_SINK (JIT_F_OPT << 8)
73#define JIT_F_OPT_FUSE 0x02000000 89#define JIT_F_OPT_FUSE (JIT_F_OPT << 9)
90#define JIT_F_OPT_FMA (JIT_F_OPT << 10)
74 91
75/* Optimizations names for -O. Must match the order above. */ 92/* Optimizations names for -O. Must match the order above. */
76#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD
77#define JIT_F_OPTSTRING \ 93#define JIT_F_OPTSTRING \
78 "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse" 94 "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse\3fma"
79 95
80/* Optimization levels set a fixed combination of flags. */ 96/* Optimization levels set a fixed combination of flags. */
81#define JIT_F_OPT_0 0 97#define JIT_F_OPT_0 0
@@ -84,6 +100,9 @@
84#define JIT_F_OPT_3 (JIT_F_OPT_2|\ 100#define JIT_F_OPT_3 (JIT_F_OPT_2|\
85 JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE) 101 JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE)
86#define JIT_F_OPT_DEFAULT JIT_F_OPT_3 102#define JIT_F_OPT_DEFAULT JIT_F_OPT_3
103/* Note: FMA is not set by default. */
104
105/* -- JIT engine parameters ----------------------------------------------- */
87 106
88#if LJ_TARGET_WINDOWS || LJ_64 107#if LJ_TARGET_WINDOWS || LJ_64
89/* See: https://devblogs.microsoft.com/oldnewthing/20031008-00/?p=42223 */ 108/* See: https://devblogs.microsoft.com/oldnewthing/20031008-00/?p=42223 */
@@ -100,6 +119,7 @@
100 _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ 119 _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \
101 _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ 120 _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \
102 _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ 121 _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \
122 _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \
103 \ 123 \
104 _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ 124 _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \
105 _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ 125 _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \
@@ -126,11 +146,14 @@ JIT_PARAMDEF(JIT_PARAMENUM)
126#define JIT_PARAMSTR(len, name, value) #len #name 146#define JIT_PARAMSTR(len, name, value) #len #name
127#define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR) 147#define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR)
128 148
149/* -- JIT engine data structures ------------------------------------------ */
150
129/* Trace compiler state. */ 151/* Trace compiler state. */
130typedef enum { 152typedef enum {
131 LJ_TRACE_IDLE, /* Trace compiler idle. */ 153 LJ_TRACE_IDLE, /* Trace compiler idle. */
132 LJ_TRACE_ACTIVE = 0x10, 154 LJ_TRACE_ACTIVE = 0x10,
133 LJ_TRACE_RECORD, /* Bytecode recording active. */ 155 LJ_TRACE_RECORD, /* Bytecode recording active. */
156 LJ_TRACE_RECORD_1ST, /* Record 1st instruction, too. */
134 LJ_TRACE_START, /* New trace started. */ 157 LJ_TRACE_START, /* New trace started. */
135 LJ_TRACE_END, /* End of trace. */ 158 LJ_TRACE_END, /* End of trace. */
136 LJ_TRACE_ASM, /* Assemble trace. */ 159 LJ_TRACE_ASM, /* Assemble trace. */
@@ -165,6 +188,7 @@ typedef struct MCLink {
165typedef struct SnapShot { 188typedef struct SnapShot {
166 uint32_t mapofs; /* Offset into snapshot map. */ 189 uint32_t mapofs; /* Offset into snapshot map. */
167 IRRef1 ref; /* First IR ref for this snapshot. */ 190 IRRef1 ref; /* First IR ref for this snapshot. */
191 uint16_t mcofs; /* Offset into machine code in MCode units. */
168 uint8_t nslots; /* Number of valid slots. */ 192 uint8_t nslots; /* Number of valid slots. */
169 uint8_t topslot; /* Maximum frame extent. */ 193 uint8_t topslot; /* Maximum frame extent. */
170 uint8_t nent; /* Number of compressed entries. */ 194 uint8_t nent; /* Number of compressed entries. */
@@ -180,20 +204,35 @@ typedef uint32_t SnapEntry;
180#define SNAP_CONT 0x020000 /* Continuation slot. */ 204#define SNAP_CONT 0x020000 /* Continuation slot. */
181#define SNAP_NORESTORE 0x040000 /* No need to restore slot. */ 205#define SNAP_NORESTORE 0x040000 /* No need to restore slot. */
182#define SNAP_SOFTFPNUM 0x080000 /* Soft-float number. */ 206#define SNAP_SOFTFPNUM 0x080000 /* Soft-float number. */
207#define SNAP_KEYINDEX 0x100000 /* Traversal key index. */
183LJ_STATIC_ASSERT(SNAP_FRAME == TREF_FRAME); 208LJ_STATIC_ASSERT(SNAP_FRAME == TREF_FRAME);
184LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); 209LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT);
210LJ_STATIC_ASSERT(SNAP_KEYINDEX == TREF_KEYINDEX);
185 211
186#define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) 212#define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref))
187#define SNAP_TR(slot, tr) \ 213#define SNAP_TR(slot, tr) \
188 (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) 214 (((SnapEntry)(slot) << 24) + \
215 ((tr) & (TREF_KEYINDEX|TREF_CONT|TREF_FRAME|TREF_REFMASK)))
216#if !LJ_FR2
189#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) 217#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc))
218#endif
190#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) 219#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz))
191#define snap_ref(sn) ((sn) & 0xffff) 220#define snap_ref(sn) ((sn) & 0xffff)
192#define snap_slot(sn) ((BCReg)((sn) >> 24)) 221#define snap_slot(sn) ((BCReg)((sn) >> 24))
193#define snap_isframe(sn) ((sn) & SNAP_FRAME) 222#define snap_isframe(sn) ((sn) & SNAP_FRAME)
194#define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn))
195#define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) 223#define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref))
196 224
225static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn)
226{
227#if LJ_FR2
228 uint64_t pcbase;
229 memcpy(&pcbase, sn, sizeof(uint64_t));
230 return (const BCIns *)(pcbase >> 8);
231#else
232 return (const BCIns *)(uintptr_t)*sn;
233#endif
234}
235
197/* Snapshot and exit numbers. */ 236/* Snapshot and exit numbers. */
198typedef uint32_t SnapNo; 237typedef uint32_t SnapNo;
199typedef uint32_t ExitNo; 238typedef uint32_t ExitNo;
@@ -211,7 +250,8 @@ typedef enum {
211 LJ_TRLINK_UPREC, /* Up-recursion. */ 250 LJ_TRLINK_UPREC, /* Up-recursion. */
212 LJ_TRLINK_DOWNREC, /* Down-recursion. */ 251 LJ_TRLINK_DOWNREC, /* Down-recursion. */
213 LJ_TRLINK_INTERP, /* Fallback to interpreter. */ 252 LJ_TRLINK_INTERP, /* Fallback to interpreter. */
214 LJ_TRLINK_RETURN /* Return to interpreter. */ 253 LJ_TRLINK_RETURN, /* Return to interpreter. */
254 LJ_TRLINK_STITCH /* Trace stitching. */
215} TraceLink; 255} TraceLink;
216 256
217/* Trace object. */ 257/* Trace object. */
@@ -219,6 +259,9 @@ typedef struct GCtrace {
219 GCHeader; 259 GCHeader;
220 uint16_t nsnap; /* Number of snapshots. */ 260 uint16_t nsnap; /* Number of snapshots. */
221 IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ 261 IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */
262#if LJ_GC64
263 uint32_t unused_gc64;
264#endif
222 GCRef gclist; 265 GCRef gclist;
223 IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ 266 IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */
224 IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ 267 IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */
@@ -230,6 +273,9 @@ typedef struct GCtrace {
230 BCIns startins; /* Original bytecode of starting instruction. */ 273 BCIns startins; /* Original bytecode of starting instruction. */
231 MSize szmcode; /* Size of machine code. */ 274 MSize szmcode; /* Size of machine code. */
232 MCode *mcode; /* Start of machine code. */ 275 MCode *mcode; /* Start of machine code. */
276#if LJ_ABI_PAUTH
277 ASMFunction mcauth; /* Start of machine code, with ptr auth applied. */
278#endif
233 MSize mcloop; /* Offset of loop start in machine code. */ 279 MSize mcloop; /* Offset of loop start in machine code. */
234 uint16_t nchild; /* Number of child traces (root trace only). */ 280 uint16_t nchild; /* Number of child traces (root trace only). */
235 uint16_t spadjust; /* Stack pointer adjustment (offset in bytes). */ 281 uint16_t spadjust; /* Stack pointer adjustment (offset in bytes). */
@@ -294,6 +340,16 @@ typedef struct ScEvEntry {
294 uint8_t dir; /* Direction. 1: +, 0: -. */ 340 uint8_t dir; /* Direction. 1: +, 0: -. */
295} ScEvEntry; 341} ScEvEntry;
296 342
343/* Reverse bytecode map (IRRef -> PC). Only for selected instructions. */
344typedef struct RBCHashEntry {
345 MRef pc; /* Bytecode PC. */
346 GCRef pt; /* Prototype. */
347 IRRef ref; /* IR reference. */
348} RBCHashEntry;
349
350/* Number of slots in the reverse bytecode hash table. Must be a power of 2. */
351#define RBCHASH_SLOTS 8
352
297/* 128 bit SIMD constants. */ 353/* 128 bit SIMD constants. */
298enum { 354enum {
299 LJ_KSIMD_ABS, 355 LJ_KSIMD_ABS,
@@ -301,12 +357,53 @@ enum {
301 LJ_KSIMD__MAX 357 LJ_KSIMD__MAX
302}; 358};
303 359
360enum {
361#if LJ_TARGET_X86ORX64
362 LJ_K64_TOBIT, /* 2^52 + 2^51 */
363 LJ_K64_2P64, /* 2^64 */
364 LJ_K64_M2P64, /* -2^64 */
365#if LJ_32
366 LJ_K64_M2P64_31, /* -2^64 or -2^31 */
367#else
368 LJ_K64_M2P64_31 = LJ_K64_M2P64,
369#endif
370#endif
371#if LJ_TARGET_MIPS
372 LJ_K64_2P31, /* 2^31 */
373#if LJ_64
374 LJ_K64_2P63, /* 2^63 */
375 LJ_K64_M2P64, /* -2^64 */
376#endif
377#endif
378 LJ_K64__MAX,
379};
380#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS)
381
382enum {
383#if LJ_TARGET_X86ORX64
384 LJ_K32_M2P64_31, /* -2^64 or -2^31 */
385#endif
386#if LJ_TARGET_PPC
387 LJ_K32_2P52_2P31, /* 2^52 + 2^31 */
388 LJ_K32_2P52, /* 2^52 */
389#endif
390#if LJ_TARGET_PPC || LJ_TARGET_MIPS
391 LJ_K32_2P31, /* 2^31 */
392#endif
393#if LJ_TARGET_MIPS64
394 LJ_K32_2P63, /* 2^63 */
395 LJ_K32_M2P64, /* -2^64 */
396#endif
397 LJ_K32__MAX
398};
399#define LJ_K32__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_PPC || LJ_TARGET_MIPS)
400
304/* Get 16 byte aligned pointer to SIMD constant. */ 401/* Get 16 byte aligned pointer to SIMD constant. */
305#define LJ_KSIMD(J, n) \ 402#define LJ_KSIMD(J, n) \
306 ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) 403 ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15))
307 404
308/* Set/reset flag to activate the SPLIT pass for the current trace. */ 405/* Set/reset flag to activate the SPLIT pass for the current trace. */
309#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) 406#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
310#define lj_needsplit(J) (J->needsplit = 1) 407#define lj_needsplit(J) (J->needsplit = 1)
311#define lj_resetsplit(J) (J->needsplit = 0) 408#define lj_resetsplit(J) (J->needsplit = 0)
312#else 409#else
@@ -317,13 +414,14 @@ enum {
317/* Fold state is used to fold instructions on-the-fly. */ 414/* Fold state is used to fold instructions on-the-fly. */
318typedef struct FoldState { 415typedef struct FoldState {
319 IRIns ins; /* Currently emitted instruction. */ 416 IRIns ins; /* Currently emitted instruction. */
320 IRIns left; /* Instruction referenced by left operand. */ 417 IRIns left[2]; /* Instruction referenced by left operand. */
321 IRIns right; /* Instruction referenced by right operand. */ 418 IRIns right[2]; /* Instruction referenced by right operand. */
322} FoldState; 419} FoldState;
323 420
324/* JIT compiler state. */ 421/* JIT compiler state. */
325typedef struct jit_State { 422typedef struct jit_State {
326 GCtrace cur; /* Current trace. */ 423 GCtrace cur; /* Current trace. */
424 GCtrace *curfinal; /* Final address of current trace (set during asm). */
327 425
328 lua_State *L; /* Current Lua state. */ 426 lua_State *L; /* Current Lua state. */
329 const BCIns *pc; /* Current PC. */ 427 const BCIns *pc; /* Current PC. */
@@ -353,12 +451,17 @@ typedef struct jit_State {
353 int32_t framedepth; /* Current frame depth. */ 451 int32_t framedepth; /* Current frame depth. */
354 int32_t retdepth; /* Return frame depth (count of RETF). */ 452 int32_t retdepth; /* Return frame depth (count of RETF). */
355 453
356 MRef k64; /* Pointer to chained array of 64 bit constants. */ 454#if LJ_K32__USED
455 uint32_t k32[LJ_K32__MAX]; /* Common 4 byte constants used by backends. */
456#endif
357 TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ 457 TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */
458#if LJ_K64__USED
459 TValue k64[LJ_K64__MAX]; /* Common 8 byte constants. */
460#endif
358 461
359 IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ 462 IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */
360 IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ 463 IRRef irtoplim; /* Upper limit of instruction buffer (biased). */
361 IRRef irbotlim; /* Lower limit of instuction buffer (biased). */ 464 IRRef irbotlim; /* Lower limit of instruction buffer (biased). */
362 IRRef loopref; /* Last loop reference or ref of final LOOP (or 0). */ 465 IRRef loopref; /* Last loop reference or ref of final LOOP (or 0). */
363 466
364 MSize sizesnap; /* Size of temp. snapshot buffer. */ 467 MSize sizesnap; /* Size of temp. snapshot buffer. */
@@ -367,13 +470,15 @@ typedef struct jit_State {
367 MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ 470 MSize sizesnapmap; /* Size of temp. snapshot map buffer. */
368 471
369 PostProc postproc; /* Required post-processing after execution. */ 472 PostProc postproc; /* Required post-processing after execution. */
370#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) 473#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
371 int needsplit; /* Need SPLIT pass. */ 474 uint8_t needsplit; /* Need SPLIT pass. */
372#endif 475#endif
476 uint8_t retryrec; /* Retry recording. */
373 477
374 GCRef *trace; /* Array of traces. */ 478 GCRef *trace; /* Array of traces. */
375 TraceNo freetrace; /* Start of scan for next free trace. */ 479 TraceNo freetrace; /* Start of scan for next free trace. */
376 MSize sizetrace; /* Size of trace array. */ 480 MSize sizetrace; /* Size of trace array. */
481 IRRef1 ktrace; /* Reference to KGC with GCtrace. */
377 482
378 IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ 483 IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */
379 TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ 484 TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */
@@ -384,7 +489,10 @@ typedef struct jit_State {
384 489
385 HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */ 490 HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */
386 uint32_t penaltyslot; /* Round-robin index into penalty slots. */ 491 uint32_t penaltyslot; /* Round-robin index into penalty slots. */
387 uint32_t prngstate; /* PRNG state. */ 492
493#ifdef LUAJIT_ENABLE_TABLE_BUMP
494 RBCHashEntry rbchash[RBCHASH_SLOTS]; /* Reverse bytecode map. */
495#endif
388 496
389 BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */ 497 BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */
390 uint32_t bpropslot; /* Round-robin index into bpropcache slots. */ 498 uint32_t bpropslot; /* Round-robin index into bpropcache slots. */
@@ -394,6 +502,7 @@ typedef struct jit_State {
394 const BCIns *startpc; /* Bytecode PC of starting instruction. */ 502 const BCIns *startpc; /* Bytecode PC of starting instruction. */
395 TraceNo parent; /* Parent of current side trace (0 for root traces). */ 503 TraceNo parent; /* Parent of current side trace (0 for root traces). */
396 ExitNo exitno; /* Exit number in parent of current side trace. */ 504 ExitNo exitno; /* Exit number in parent of current side trace. */
505 int exitcode; /* Exit code from unwound trace. */
397 506
398 BCIns *patchpc; /* PC for pending re-patch. */ 507 BCIns *patchpc; /* PC for pending re-patch. */
399 BCIns patchins; /* Instruction for pending re-patch. */ 508 BCIns patchins; /* Instruction for pending re-patch. */
@@ -406,14 +515,19 @@ typedef struct jit_State {
406 size_t szallmcarea; /* Total size of all allocated mcode areas. */ 515 size_t szallmcarea; /* Total size of all allocated mcode areas. */
407 516
408 TValue errinfo; /* Additional info element for trace errors. */ 517 TValue errinfo; /* Additional info element for trace errors. */
518
519#if LJ_HASPROFILE
520 GCproto *prev_pt; /* Previous prototype. */
521 BCLine prev_line; /* Previous line. */
522 int prof_mode; /* Profiling mode: 0, 'f', 'l'. */
523#endif
409} jit_State; 524} jit_State;
410 525
411/* Trivial PRNG e.g. used for penalty randomization. */ 526#ifdef LUA_USE_ASSERT
412static LJ_AINLINE uint32_t LJ_PRNG_BITS(jit_State *J, int bits) 527#define lj_assertJ(c, ...) lj_assertG_(J2G(J), (c), __VA_ARGS__)
413{ 528#else
414 /* Yes, this LCG is very weak, but that doesn't matter for our use case. */ 529#define lj_assertJ(c, ...) ((void)J)
415 J->prngstate = J->prngstate * 1103515245 + 12345; 530#endif
416 return J->prngstate >> (32-bits); 531#endif
417}
418 532
419#endif 533#endif
diff --git a/src/lj_lex.c b/src/lj_lex.c
index 87601597..bd81dc40 100644
--- a/src/lj_lex.c
+++ b/src/lj_lex.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#if LJ_HASFFI 17#if LJ_HASFFI
17#include "lj_tab.h" 18#include "lj_tab.h"
@@ -24,6 +25,7 @@
24#include "lj_parse.h" 25#include "lj_parse.h"
25#include "lj_char.h" 26#include "lj_char.h"
26#include "lj_strscan.h" 27#include "lj_strscan.h"
28#include "lj_strfmt.h"
27 29
28/* Lua lexer token names. */ 30/* Lua lexer token names. */
29static const char *const tokennames[] = { 31static const char *const tokennames[] = {
@@ -37,54 +39,54 @@ TKDEF(TKSTR1, TKSTR2)
37 39
38/* -- Buffer handling ----------------------------------------------------- */ 40/* -- Buffer handling ----------------------------------------------------- */
39 41
40#define char2int(c) ((int)(uint8_t)(c)) 42#define LEX_EOF (-1)
41#define next(ls) \ 43#define lex_iseol(ls) (ls->c == '\n' || ls->c == '\r')
42 (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls))
43#define save_and_next(ls) (save(ls, ls->current), next(ls))
44#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
45#define END_OF_STREAM (-1)
46 44
47static int fillbuf(LexState *ls) 45/* Get more input from reader. */
46static LJ_NOINLINE LexChar lex_more(LexState *ls)
48{ 47{
49 size_t sz; 48 size_t sz;
50 const char *buf = ls->rfunc(ls->L, ls->rdata, &sz); 49 const char *p = ls->rfunc(ls->L, ls->rdata, &sz);
51 if (buf == NULL || sz == 0) return END_OF_STREAM; 50 if (p == NULL || sz == 0) return LEX_EOF;
52 if (sz >= LJ_MAX_MEM) { 51 if (sz >= LJ_MAX_BUF) {
53 if (sz != ~(size_t)0) lj_err_mem(ls->L); 52 if (sz != ~(size_t)0) lj_err_mem(ls->L);
53 sz = ~(uintptr_t)0 - (uintptr_t)p;
54 if (sz >= LJ_MAX_BUF) sz = LJ_MAX_BUF-1;
54 ls->endmark = 1; 55 ls->endmark = 1;
55 } 56 }
56 ls->n = (MSize)sz - 1; 57 ls->pe = p + sz;
57 ls->p = buf; 58 ls->p = p + 1;
58 return char2int(*(ls->p++)); 59 return (LexChar)(uint8_t)p[0];
59} 60}
60 61
61static LJ_NOINLINE void save_grow(LexState *ls, int c) 62/* Get next character. */
63static LJ_AINLINE LexChar lex_next(LexState *ls)
62{ 64{
63 MSize newsize; 65 return (ls->c = ls->p < ls->pe ? (LexChar)(uint8_t)*ls->p++ : lex_more(ls));
64 if (ls->sb.sz >= LJ_MAX_STR/2)
65 lj_lex_error(ls, 0, LJ_ERR_XELEM);
66 newsize = ls->sb.sz * 2;
67 lj_str_resizebuf(ls->L, &ls->sb, newsize);
68 ls->sb.buf[ls->sb.n++] = (char)c;
69} 66}
70 67
71static LJ_AINLINE void save(LexState *ls, int c) 68/* Save character. */
69static LJ_AINLINE void lex_save(LexState *ls, LexChar c)
72{ 70{
73 if (LJ_UNLIKELY(ls->sb.n + 1 > ls->sb.sz)) 71 lj_buf_putb(&ls->sb, c);
74 save_grow(ls, c); 72}
75 else 73
76 ls->sb.buf[ls->sb.n++] = (char)c; 74/* Save previous character and get next character. */
75static LJ_AINLINE LexChar lex_savenext(LexState *ls)
76{
77 lex_save(ls, ls->c);
78 return lex_next(ls);
77} 79}
78 80
79static void inclinenumber(LexState *ls) 81/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
82static void lex_newline(LexState *ls)
80{ 83{
81 int old = ls->current; 84 LexChar old = ls->c;
82 lua_assert(currIsNewline(ls)); 85 lj_assertLS(lex_iseol(ls), "bad usage");
83 next(ls); /* skip `\n' or `\r' */ 86 lex_next(ls); /* Skip "\n" or "\r". */
84 if (currIsNewline(ls) && ls->current != old) 87 if (lex_iseol(ls) && ls->c != old) lex_next(ls); /* Skip "\n\r" or "\r\n". */
85 next(ls); /* skip `\n\r' or `\r\n' */
86 if (++ls->linenumber >= LJ_MAX_LINE) 88 if (++ls->linenumber >= LJ_MAX_LINE)
87 lj_lex_error(ls, ls->token, LJ_ERR_XLINES); 89 lj_lex_error(ls, ls->tok, LJ_ERR_XLINES);
88} 90}
89 91
90/* -- Scanner for terminals ----------------------------------------------- */ 92/* -- Scanner for terminals ----------------------------------------------- */
@@ -93,19 +95,17 @@ static void inclinenumber(LexState *ls)
93static void lex_number(LexState *ls, TValue *tv) 95static void lex_number(LexState *ls, TValue *tv)
94{ 96{
95 StrScanFmt fmt; 97 StrScanFmt fmt;
96 int c, xp = 'e'; 98 LexChar c, xp = 'e';
97 lua_assert(lj_char_isdigit(ls->current)); 99 lj_assertLS(lj_char_isdigit(ls->c), "bad usage");
98 if ((c = ls->current) == '0') { 100 if ((c = ls->c) == '0' && (lex_savenext(ls) | 0x20) == 'x')
99 save_and_next(ls); 101 xp = 'p';
100 if ((ls->current | 0x20) == 'x') xp = 'p'; 102 while (lj_char_isident(ls->c) || ls->c == '.' ||
101 } 103 ((ls->c == '-' || ls->c == '+') && (c | 0x20) == xp)) {
102 while (lj_char_isident(ls->current) || ls->current == '.' || 104 c = ls->c;
103 ((ls->current == '-' || ls->current == '+') && (c | 0x20) == xp)) { 105 lex_savenext(ls);
104 c = ls->current;
105 save_and_next(ls);
106 } 106 }
107 save(ls, '\0'); 107 lex_save(ls, '\0');
108 fmt = lj_strscan_scan((const uint8_t *)ls->sb.buf, tv, 108 fmt = lj_strscan_scan((const uint8_t *)ls->sb.b, sbuflen(&ls->sb)-1, tv,
109 (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) | 109 (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
110 (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0)); 110 (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
111 if (LJ_DUALNUM && fmt == STRSCAN_INT) { 111 if (LJ_DUALNUM && fmt == STRSCAN_INT) {
@@ -116,12 +116,9 @@ static void lex_number(LexState *ls, TValue *tv)
116 } else if (fmt != STRSCAN_ERROR) { 116 } else if (fmt != STRSCAN_ERROR) {
117 lua_State *L = ls->L; 117 lua_State *L = ls->L;
118 GCcdata *cd; 118 GCcdata *cd;
119 lua_assert(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG); 119 lj_assertLS(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG,
120 if (!ctype_ctsG(G(L))) { 120 "unexpected number format %d", fmt);
121 ptrdiff_t oldtop = savestack(L, L->top); 121 ctype_loadffi(L);
122 luaopen_ffi(L); /* Load FFI library on-demand. */
123 L->top = restorestack(L, oldtop);
124 }
125 if (fmt == STRSCAN_IMAG) { 122 if (fmt == STRSCAN_IMAG) {
126 cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double)); 123 cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double));
127 ((double *)cdataptr(cd))[0] = 0; 124 ((double *)cdataptr(cd))[0] = 0;
@@ -133,65 +130,66 @@ static void lex_number(LexState *ls, TValue *tv)
133 lj_parse_keepcdata(ls, tv, cd); 130 lj_parse_keepcdata(ls, tv, cd);
134#endif 131#endif
135 } else { 132 } else {
136 lua_assert(fmt == STRSCAN_ERROR); 133 lj_assertLS(fmt == STRSCAN_ERROR,
134 "unexpected number format %d", fmt);
137 lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER); 135 lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER);
138 } 136 }
139} 137}
140 138
141static int skip_sep(LexState *ls) 139/* Skip equal signs for "[=...=[" and "]=...=]" and return their count. */
140static int lex_skipeq(LexState *ls)
142{ 141{
143 int count = 0; 142 int count = 0;
144 int s = ls->current; 143 LexChar s = ls->c;
145 lua_assert(s == '[' || s == ']'); 144 lj_assertLS(s == '[' || s == ']', "bad usage");
146 save_and_next(ls); 145 while (lex_savenext(ls) == '=' && count < 0x20000000)
147 while (ls->current == '=' && count < 0x20000000) {
148 save_and_next(ls);
149 count++; 146 count++;
150 } 147 return (ls->c == s) ? count : (-count) - 1;
151 return (ls->current == s) ? count : (-count) - 1;
152} 148}
153 149
154static void read_long_string(LexState *ls, TValue *tv, int sep) 150/* Parse a long string or long comment (tv set to NULL). */
151static void lex_longstring(LexState *ls, TValue *tv, int sep)
155{ 152{
156 save_and_next(ls); /* skip 2nd `[' */ 153 lex_savenext(ls); /* Skip second '['. */
157 if (currIsNewline(ls)) /* string starts with a newline? */ 154 if (lex_iseol(ls)) /* Skip initial newline. */
158 inclinenumber(ls); /* skip it */ 155 lex_newline(ls);
159 for (;;) { 156 for (;;) {
160 switch (ls->current) { 157 switch (ls->c) {
161 case END_OF_STREAM: 158 case LEX_EOF:
162 lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM); 159 lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM);
163 break; 160 break;
164 case ']': 161 case ']':
165 if (skip_sep(ls) == sep) { 162 if (lex_skipeq(ls) == sep) {
166 save_and_next(ls); /* skip 2nd `]' */ 163 lex_savenext(ls); /* Skip second ']'. */
167 goto endloop; 164 goto endloop;
168 } 165 }
169 break; 166 break;
170 case '\n': 167 case '\n':
171 case '\r': 168 case '\r':
172 save(ls, '\n'); 169 lex_save(ls, '\n');
173 inclinenumber(ls); 170 lex_newline(ls);
174 if (!tv) lj_str_resetbuf(&ls->sb); /* avoid wasting space */ 171 if (!tv) lj_buf_reset(&ls->sb); /* Don't waste space for comments. */
175 break; 172 break;
176 default: 173 default:
177 if (tv) save_and_next(ls); 174 lex_savenext(ls);
178 else next(ls);
179 break; 175 break;
180 } 176 }
181 } endloop: 177 } endloop:
182 if (tv) { 178 if (tv) {
183 GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep), 179 GCstr *str = lj_parse_keepstr(ls, ls->sb.b + (2 + (MSize)sep),
184 ls->sb.n - 2*(2 + (MSize)sep)); 180 sbuflen(&ls->sb) - 2*(2 + (MSize)sep));
185 setstrV(ls->L, tv, str); 181 setstrV(ls->L, tv, str);
186 } 182 }
187} 183}
188 184
189static void read_string(LexState *ls, int delim, TValue *tv) 185/* Parse a string. */
186static void lex_string(LexState *ls, TValue *tv)
190{ 187{
191 save_and_next(ls); 188 LexChar delim = ls->c; /* Delimiter is '\'' or '"'. */
192 while (ls->current != delim) { 189 lex_savenext(ls);
193 switch (ls->current) { 190 while (ls->c != delim) {
194 case END_OF_STREAM: 191 switch (ls->c) {
192 case LEX_EOF:
195 lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); 193 lj_lex_error(ls, TK_eof, LJ_ERR_XSTR);
196 continue; 194 continue;
197 case '\n': 195 case '\n':
@@ -199,7 +197,7 @@ static void read_string(LexState *ls, int delim, TValue *tv)
199 lj_lex_error(ls, TK_string, LJ_ERR_XSTR); 197 lj_lex_error(ls, TK_string, LJ_ERR_XSTR);
200 continue; 198 continue;
201 case '\\': { 199 case '\\': {
202 int c = next(ls); /* Skip the '\\'. */ 200 LexChar c = lex_next(ls); /* Skip the '\\'. */
203 switch (c) { 201 switch (c) {
204 case 'a': c = '\a'; break; 202 case 'a': c = '\a'; break;
205 case 'b': c = '\b'; break; 203 case 'b': c = '\b'; break;
@@ -209,111 +207,139 @@ static void read_string(LexState *ls, int delim, TValue *tv)
209 case 't': c = '\t'; break; 207 case 't': c = '\t'; break;
210 case 'v': c = '\v'; break; 208 case 'v': c = '\v'; break;
211 case 'x': /* Hexadecimal escape '\xXX'. */ 209 case 'x': /* Hexadecimal escape '\xXX'. */
212 c = (next(ls) & 15u) << 4; 210 c = (lex_next(ls) & 15u) << 4;
213 if (!lj_char_isdigit(ls->current)) { 211 if (!lj_char_isdigit(ls->c)) {
214 if (!lj_char_isxdigit(ls->current)) goto err_xesc; 212 if (!lj_char_isxdigit(ls->c)) goto err_xesc;
215 c += 9 << 4; 213 c += 9 << 4;
216 } 214 }
217 c += (next(ls) & 15u); 215 c += (lex_next(ls) & 15u);
218 if (!lj_char_isdigit(ls->current)) { 216 if (!lj_char_isdigit(ls->c)) {
219 if (!lj_char_isxdigit(ls->current)) goto err_xesc; 217 if (!lj_char_isxdigit(ls->c)) goto err_xesc;
220 c += 9; 218 c += 9;
221 } 219 }
222 break; 220 break;
221 case 'u': /* Unicode escape '\u{XX...}'. */
222 if (lex_next(ls) != '{') goto err_xesc;
223 lex_next(ls);
224 c = 0;
225 do {
226 c = (c << 4) | (ls->c & 15u);
227 if (!lj_char_isdigit(ls->c)) {
228 if (!lj_char_isxdigit(ls->c)) goto err_xesc;
229 c += 9;
230 }
231 if (c >= 0x110000) goto err_xesc; /* Out of Unicode range. */
232 } while (lex_next(ls) != '}');
233 if (c < 0x800) {
234 if (c < 0x80) break;
235 lex_save(ls, 0xc0 | (c >> 6));
236 } else {
237 if (c >= 0x10000) {
238 lex_save(ls, 0xf0 | (c >> 18));
239 lex_save(ls, 0x80 | ((c >> 12) & 0x3f));
240 } else {
241 if (c >= 0xd800 && c < 0xe000) goto err_xesc; /* No surrogates. */
242 lex_save(ls, 0xe0 | (c >> 12));
243 }
244 lex_save(ls, 0x80 | ((c >> 6) & 0x3f));
245 }
246 c = 0x80 | (c & 0x3f);
247 break;
223 case 'z': /* Skip whitespace. */ 248 case 'z': /* Skip whitespace. */
224 next(ls); 249 lex_next(ls);
225 while (lj_char_isspace(ls->current)) 250 while (lj_char_isspace(ls->c))
226 if (currIsNewline(ls)) inclinenumber(ls); else next(ls); 251 if (lex_iseol(ls)) lex_newline(ls); else lex_next(ls);
227 continue; 252 continue;
228 case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue; 253 case '\n': case '\r': lex_save(ls, '\n'); lex_newline(ls); continue;
229 case '\\': case '\"': case '\'': break; 254 case '\\': case '\"': case '\'': break;
230 case END_OF_STREAM: continue; 255 case LEX_EOF: continue;
231 default: 256 default:
232 if (!lj_char_isdigit(c)) 257 if (!lj_char_isdigit(c))
233 goto err_xesc; 258 goto err_xesc;
234 c -= '0'; /* Decimal escape '\ddd'. */ 259 c -= '0'; /* Decimal escape '\ddd'. */
235 if (lj_char_isdigit(next(ls))) { 260 if (lj_char_isdigit(lex_next(ls))) {
236 c = c*10 + (ls->current - '0'); 261 c = c*10 + (ls->c - '0');
237 if (lj_char_isdigit(next(ls))) { 262 if (lj_char_isdigit(lex_next(ls))) {
238 c = c*10 + (ls->current - '0'); 263 c = c*10 + (ls->c - '0');
239 if (c > 255) { 264 if (c > 255) {
240 err_xesc: 265 err_xesc:
241 lj_lex_error(ls, TK_string, LJ_ERR_XESC); 266 lj_lex_error(ls, TK_string, LJ_ERR_XESC);
242 } 267 }
243 next(ls); 268 lex_next(ls);
244 } 269 }
245 } 270 }
246 save(ls, c); 271 lex_save(ls, c);
247 continue; 272 continue;
248 } 273 }
249 save(ls, c); 274 lex_save(ls, c);
250 next(ls); 275 lex_next(ls);
251 continue; 276 continue;
252 } 277 }
253 default: 278 default:
254 save_and_next(ls); 279 lex_savenext(ls);
255 break; 280 break;
256 } 281 }
257 } 282 }
258 save_and_next(ls); /* skip delimiter */ 283 lex_savenext(ls); /* Skip trailing delimiter. */
259 setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2)); 284 setstrV(ls->L, tv,
285 lj_parse_keepstr(ls, ls->sb.b+1, sbuflen(&ls->sb)-2));
260} 286}
261 287
262/* -- Main lexical scanner ------------------------------------------------ */ 288/* -- Main lexical scanner ------------------------------------------------ */
263 289
264static int llex(LexState *ls, TValue *tv) 290/* Get next lexical token. */
291static LexToken lex_scan(LexState *ls, TValue *tv)
265{ 292{
266 lj_str_resetbuf(&ls->sb); 293 lj_buf_reset(&ls->sb);
267 for (;;) { 294 for (;;) {
268 if (lj_char_isident(ls->current)) { 295 if (lj_char_isident(ls->c)) {
269 GCstr *s; 296 GCstr *s;
270 if (lj_char_isdigit(ls->current)) { /* Numeric literal. */ 297 if (lj_char_isdigit(ls->c)) { /* Numeric literal. */
271 lex_number(ls, tv); 298 lex_number(ls, tv);
272 return TK_number; 299 return TK_number;
273 } 300 }
274 /* Identifier or reserved word. */ 301 /* Identifier or reserved word. */
275 do { 302 do {
276 save_and_next(ls); 303 lex_savenext(ls);
277 } while (lj_char_isident(ls->current)); 304 } while (lj_char_isident(ls->c));
278 s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n); 305 s = lj_parse_keepstr(ls, ls->sb.b, sbuflen(&ls->sb));
279 setstrV(ls->L, tv, s); 306 setstrV(ls->L, tv, s);
280 if (s->reserved > 0) /* Reserved word? */ 307 if (s->reserved > 0) /* Reserved word? */
281 return TK_OFS + s->reserved; 308 return TK_OFS + s->reserved;
282 return TK_name; 309 return TK_name;
283 } 310 }
284 switch (ls->current) { 311 switch (ls->c) {
285 case '\n': 312 case '\n':
286 case '\r': 313 case '\r':
287 inclinenumber(ls); 314 lex_newline(ls);
288 continue; 315 continue;
289 case ' ': 316 case ' ':
290 case '\t': 317 case '\t':
291 case '\v': 318 case '\v':
292 case '\f': 319 case '\f':
293 next(ls); 320 lex_next(ls);
294 continue; 321 continue;
295 case '-': 322 case '-':
296 next(ls); 323 lex_next(ls);
297 if (ls->current != '-') return '-'; 324 if (ls->c != '-') return '-';
298 /* else is a comment */ 325 lex_next(ls);
299 next(ls); 326 if (ls->c == '[') { /* Long comment "--[=*[...]=*]". */
300 if (ls->current == '[') { 327 int sep = lex_skipeq(ls);
301 int sep = skip_sep(ls); 328 lj_buf_reset(&ls->sb); /* `lex_skipeq' may dirty the buffer */
302 lj_str_resetbuf(&ls->sb); /* `skip_sep' may dirty the buffer */
303 if (sep >= 0) { 329 if (sep >= 0) {
304 read_long_string(ls, NULL, sep); /* long comment */ 330 lex_longstring(ls, NULL, sep);
305 lj_str_resetbuf(&ls->sb); 331 lj_buf_reset(&ls->sb);
306 continue; 332 continue;
307 } 333 }
308 } 334 }
309 /* else short comment */ 335 /* Short comment "--.*\n". */
310 while (!currIsNewline(ls) && ls->current != END_OF_STREAM) 336 while (!lex_iseol(ls) && ls->c != LEX_EOF)
311 next(ls); 337 lex_next(ls);
312 continue; 338 continue;
313 case '[': { 339 case '[': {
314 int sep = skip_sep(ls); 340 int sep = lex_skipeq(ls);
315 if (sep >= 0) { 341 if (sep >= 0) {
316 read_long_string(ls, tv, sep); 342 lex_longstring(ls, tv, sep);
317 return TK_string; 343 return TK_string;
318 } else if (sep == -1) { 344 } else if (sep == -1) {
319 return '['; 345 return '[';
@@ -323,44 +349,43 @@ static int llex(LexState *ls, TValue *tv)
323 } 349 }
324 } 350 }
325 case '=': 351 case '=':
326 next(ls); 352 lex_next(ls);
327 if (ls->current != '=') return '='; else { next(ls); return TK_eq; } 353 if (ls->c != '=') return '='; else { lex_next(ls); return TK_eq; }
328 case '<': 354 case '<':
329 next(ls); 355 lex_next(ls);
330 if (ls->current != '=') return '<'; else { next(ls); return TK_le; } 356 if (ls->c != '=') return '<'; else { lex_next(ls); return TK_le; }
331 case '>': 357 case '>':
332 next(ls); 358 lex_next(ls);
333 if (ls->current != '=') return '>'; else { next(ls); return TK_ge; } 359 if (ls->c != '=') return '>'; else { lex_next(ls); return TK_ge; }
334 case '~': 360 case '~':
335 next(ls); 361 lex_next(ls);
336 if (ls->current != '=') return '~'; else { next(ls); return TK_ne; } 362 if (ls->c != '=') return '~'; else { lex_next(ls); return TK_ne; }
337 case ':': 363 case ':':
338 next(ls); 364 lex_next(ls);
339 if (ls->current != ':') return ':'; else { next(ls); return TK_label; } 365 if (ls->c != ':') return ':'; else { lex_next(ls); return TK_label; }
340 case '"': 366 case '"':
341 case '\'': 367 case '\'':
342 read_string(ls, ls->current, tv); 368 lex_string(ls, tv);
343 return TK_string; 369 return TK_string;
344 case '.': 370 case '.':
345 save_and_next(ls); 371 if (lex_savenext(ls) == '.') {
346 if (ls->current == '.') { 372 lex_next(ls);
347 next(ls); 373 if (ls->c == '.') {
348 if (ls->current == '.') { 374 lex_next(ls);
349 next(ls);
350 return TK_dots; /* ... */ 375 return TK_dots; /* ... */
351 } 376 }
352 return TK_concat; /* .. */ 377 return TK_concat; /* .. */
353 } else if (!lj_char_isdigit(ls->current)) { 378 } else if (!lj_char_isdigit(ls->c)) {
354 return '.'; 379 return '.';
355 } else { 380 } else {
356 lex_number(ls, tv); 381 lex_number(ls, tv);
357 return TK_number; 382 return TK_number;
358 } 383 }
359 case END_OF_STREAM: 384 case LEX_EOF:
360 return TK_eof; 385 return TK_eof;
361 default: { 386 default: {
362 int c = ls->current; 387 LexChar c = ls->c;
363 next(ls); 388 lex_next(ls);
364 return c; /* Single-char tokens (+ - / ...). */ 389 return c; /* Single-char tokens (+ - / ...). */
365 } 390 }
366 } 391 }
@@ -375,36 +400,34 @@ int lj_lex_setup(lua_State *L, LexState *ls)
375 int header = 0; 400 int header = 0;
376 ls->L = L; 401 ls->L = L;
377 ls->fs = NULL; 402 ls->fs = NULL;
378 ls->n = 0; 403 ls->pe = ls->p = NULL;
379 ls->p = NULL;
380 ls->vstack = NULL; 404 ls->vstack = NULL;
381 ls->sizevstack = 0; 405 ls->sizevstack = 0;
382 ls->vtop = 0; 406 ls->vtop = 0;
383 ls->bcstack = NULL; 407 ls->bcstack = NULL;
384 ls->sizebcstack = 0; 408 ls->sizebcstack = 0;
385 ls->token = 0; 409 ls->tok = 0;
386 ls->lookahead = TK_eof; /* No look-ahead token. */ 410 ls->lookahead = TK_eof; /* No look-ahead token. */
387 ls->linenumber = 1; 411 ls->linenumber = 1;
388 ls->lastline = 1; 412 ls->lastline = 1;
389 ls->endmark = 0; 413 ls->endmark = 0;
390 lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF); 414 ls->fr2 = LJ_FR2; /* Generate native bytecode by default. */
391 next(ls); /* Read-ahead first char. */ 415 lex_next(ls); /* Read-ahead first char. */
392 if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb && 416 if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb &&
393 char2int(ls->p[1]) == 0xbf) { /* Skip UTF-8 BOM (if buffered). */ 417 (uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
394 ls->n -= 2;
395 ls->p += 2; 418 ls->p += 2;
396 next(ls); 419 lex_next(ls);
397 header = 1; 420 header = 1;
398 } 421 }
399 if (ls->current == '#') { /* Skip POSIX #! header line. */ 422 if (ls->c == '#') { /* Skip POSIX #! header line. */
400 do { 423 do {
401 next(ls); 424 lex_next(ls);
402 if (ls->current == END_OF_STREAM) return 0; 425 if (ls->c == LEX_EOF) return 0;
403 } while (!currIsNewline(ls)); 426 } while (!lex_iseol(ls));
404 inclinenumber(ls); 427 lex_newline(ls);
405 header = 1; 428 header = 1;
406 } 429 }
407 if (ls->current == LUA_SIGNATURE[0]) { /* Bytecode dump. */ 430 if (ls->c == LUA_SIGNATURE[0]) { /* Bytecode dump. */
408 if (header) { 431 if (header) {
409 /* 432 /*
410 ** Loading bytecode with an extra header is disabled for security 433 ** Loading bytecode with an extra header is disabled for security
@@ -426,55 +449,60 @@ void lj_lex_cleanup(lua_State *L, LexState *ls)
426 global_State *g = G(L); 449 global_State *g = G(L);
427 lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine); 450 lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine);
428 lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo); 451 lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo);
429 lj_str_freebuf(g, &ls->sb); 452 lj_buf_free(g, &ls->sb);
430} 453}
431 454
455/* Return next lexical token. */
432void lj_lex_next(LexState *ls) 456void lj_lex_next(LexState *ls)
433{ 457{
434 ls->lastline = ls->linenumber; 458 ls->lastline = ls->linenumber;
435 if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */ 459 if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */
436 ls->token = llex(ls, &ls->tokenval); /* Get next token. */ 460 ls->tok = lex_scan(ls, &ls->tokval); /* Get next token. */
437 } else { /* Otherwise return lookahead token. */ 461 } else { /* Otherwise return lookahead token. */
438 ls->token = ls->lookahead; 462 ls->tok = ls->lookahead;
439 ls->lookahead = TK_eof; 463 ls->lookahead = TK_eof;
440 ls->tokenval = ls->lookaheadval; 464 ls->tokval = ls->lookaheadval;
441 } 465 }
442} 466}
443 467
468/* Look ahead for the next token. */
444LexToken lj_lex_lookahead(LexState *ls) 469LexToken lj_lex_lookahead(LexState *ls)
445{ 470{
446 lua_assert(ls->lookahead == TK_eof); 471 lj_assertLS(ls->lookahead == TK_eof, "double lookahead");
447 ls->lookahead = llex(ls, &ls->lookaheadval); 472 ls->lookahead = lex_scan(ls, &ls->lookaheadval);
448 return ls->lookahead; 473 return ls->lookahead;
449} 474}
450 475
451const char *lj_lex_token2str(LexState *ls, LexToken token) 476/* Convert token to string. */
477const char *lj_lex_token2str(LexState *ls, LexToken tok)
452{ 478{
453 if (token > TK_OFS) 479 if (tok > TK_OFS)
454 return tokennames[token-TK_OFS-1]; 480 return tokennames[tok-TK_OFS-1];
455 else if (!lj_char_iscntrl(token)) 481 else if (!lj_char_iscntrl(tok))
456 return lj_str_pushf(ls->L, "%c", token); 482 return lj_strfmt_pushf(ls->L, "%c", tok);
457 else 483 else
458 return lj_str_pushf(ls->L, "char(%d)", token); 484 return lj_strfmt_pushf(ls->L, "char(%d)", tok);
459} 485}
460 486
461void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...) 487/* Lexer error. */
488void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...)
462{ 489{
463 const char *tok; 490 const char *tokstr;
464 va_list argp; 491 va_list argp;
465 if (token == 0) { 492 if (tok == 0) {
466 tok = NULL; 493 tokstr = NULL;
467 } else if (token == TK_name || token == TK_string || token == TK_number) { 494 } else if (tok == TK_name || tok == TK_string || tok == TK_number) {
468 save(ls, '\0'); 495 lex_save(ls, '\0');
469 tok = ls->sb.buf; 496 tokstr = ls->sb.b;
470 } else { 497 } else {
471 tok = lj_lex_token2str(ls, token); 498 tokstr = lj_lex_token2str(ls, tok);
472 } 499 }
473 va_start(argp, em); 500 va_start(argp, em);
474 lj_err_lex(ls->L, ls->chunkname, tok, ls->linenumber, em, argp); 501 lj_err_lex(ls->L, ls->chunkname, tokstr, ls->linenumber, em, argp);
475 va_end(argp); 502 va_end(argp);
476} 503}
477 504
505/* Initialize strings for reserved words. */
478void lj_lex_init(lua_State *L) 506void lj_lex_init(lua_State *L)
479{ 507{
480 uint32_t i; 508 uint32_t i;
diff --git a/src/lj_lex.h b/src/lj_lex.h
index e244806e..2ef7fc77 100644
--- a/src/lj_lex.h
+++ b/src/lj_lex.h
@@ -30,7 +30,8 @@ TKDEF(TKENUM1, TKENUM2)
30 TK_RESERVED = TK_while - TK_OFS 30 TK_RESERVED = TK_while - TK_OFS
31}; 31};
32 32
33typedef int LexToken; 33typedef int LexChar; /* Lexical character. Unsigned ext. from char. */
34typedef int LexToken; /* Lexical token. */
34 35
35/* Combined bytecode ins/line. Only used during bytecode generation. */ 36/* Combined bytecode ins/line. Only used during bytecode generation. */
36typedef struct BCInsLine { 37typedef struct BCInsLine {
@@ -51,13 +52,13 @@ typedef struct VarInfo {
51typedef struct LexState { 52typedef struct LexState {
52 struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */ 53 struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */
53 struct lua_State *L; /* Lua state. */ 54 struct lua_State *L; /* Lua state. */
54 TValue tokenval; /* Current token value. */ 55 TValue tokval; /* Current token value. */
55 TValue lookaheadval; /* Lookahead token value. */ 56 TValue lookaheadval; /* Lookahead token value. */
56 int current; /* Current character (charint). */
57 LexToken token; /* Current token. */
58 LexToken lookahead; /* Lookahead token. */
59 MSize n; /* Bytes left in input buffer. */
60 const char *p; /* Current position in input buffer. */ 57 const char *p; /* Current position in input buffer. */
58 const char *pe; /* End of input buffer. */
59 LexChar c; /* Current character. */
60 LexToken tok; /* Current token. */
61 LexToken lookahead; /* Lookahead token. */
61 SBuf sb; /* String buffer for tokens. */ 62 SBuf sb; /* String buffer for tokens. */
62 lua_Reader rfunc; /* Reader callback. */ 63 lua_Reader rfunc; /* Reader callback. */
63 void *rdata; /* Reader callback data. */ 64 void *rdata; /* Reader callback data. */
@@ -73,14 +74,21 @@ typedef struct LexState {
73 MSize sizebcstack; /* Size of bytecode stack. */ 74 MSize sizebcstack; /* Size of bytecode stack. */
74 uint32_t level; /* Syntactical nesting level. */ 75 uint32_t level; /* Syntactical nesting level. */
75 int endmark; /* Trust bytecode end marker, even if not at EOF. */ 76 int endmark; /* Trust bytecode end marker, even if not at EOF. */
77 int fr2; /* Generate bytecode for LJ_FR2 mode. */
76} LexState; 78} LexState;
77 79
78LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls); 80LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls);
79LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls); 81LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls);
80LJ_FUNC void lj_lex_next(LexState *ls); 82LJ_FUNC void lj_lex_next(LexState *ls);
81LJ_FUNC LexToken lj_lex_lookahead(LexState *ls); 83LJ_FUNC LexToken lj_lex_lookahead(LexState *ls);
82LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken token); 84LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken tok);
83LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...); 85LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...);
84LJ_FUNC void lj_lex_init(lua_State *L); 86LJ_FUNC void lj_lex_init(lua_State *L);
85 87
88#ifdef LUA_USE_ASSERT
89#define lj_assertLS(c, ...) (lj_assertG_(G(ls->L), (c), __VA_ARGS__))
90#else
91#define lj_assertLS(c, ...) ((void)ls)
92#endif
93
86#endif 94#endif
diff --git a/src/lj_lib.c b/src/lj_lib.c
index 67ecb5a3..06ae4fcf 100644
--- a/src/lj_lib.c
+++ b/src/lj_lib.c
@@ -16,8 +16,14 @@
16#include "lj_func.h" 16#include "lj_func.h"
17#include "lj_bc.h" 17#include "lj_bc.h"
18#include "lj_dispatch.h" 18#include "lj_dispatch.h"
19#if LJ_HASFFI
20#include "lj_ctype.h"
21#endif
19#include "lj_vm.h" 22#include "lj_vm.h"
20#include "lj_strscan.h" 23#include "lj_strscan.h"
24#include "lj_strfmt.h"
25#include "lj_lex.h"
26#include "lj_bcdump.h"
21#include "lj_lib.h" 27#include "lj_lib.h"
22 28
23/* -- Library initialization ---------------------------------------------- */ 29/* -- Library initialization ---------------------------------------------- */
@@ -43,6 +49,29 @@ static GCtab *lib_create_table(lua_State *L, const char *libname, int hsize)
43 return tabV(L->top-1); 49 return tabV(L->top-1);
44} 50}
45 51
52static const uint8_t *lib_read_lfunc(lua_State *L, const uint8_t *p, GCtab *tab)
53{
54 int len = *p++;
55 GCstr *name = lj_str_new(L, (const char *)p, len);
56 LexState ls;
57 GCproto *pt;
58 GCfunc *fn;
59 memset(&ls, 0, sizeof(ls));
60 ls.L = L;
61 ls.p = (const char *)(p+len);
62 ls.pe = (const char *)~(uintptr_t)0;
63 ls.c = -1;
64 ls.level = (BCDUMP_F_STRIP|(LJ_BE*BCDUMP_F_BE));
65 ls.fr2 = LJ_FR2;
66 ls.chunkname = name;
67 pt = lj_bcread_proto(&ls);
68 pt->firstline = ~(BCLine)0;
69 fn = lj_func_newL_empty(L, pt, tabref(L->env));
70 /* NOBARRIER: See below for common barrier. */
71 setfuncV(L, lj_tab_setstr(L, tab, name), fn);
72 return (const uint8_t *)ls.p;
73}
74
46void lj_lib_register(lua_State *L, const char *libname, 75void lj_lib_register(lua_State *L, const char *libname,
47 const uint8_t *p, const lua_CFunction *cf) 76 const uint8_t *p, const lua_CFunction *cf)
48{ 77{
@@ -87,6 +116,9 @@ void lj_lib_register(lua_State *L, const char *libname,
87 ofn = fn; 116 ofn = fn;
88 } else { 117 } else {
89 switch (tag | len) { 118 switch (tag | len) {
119 case LIBINIT_LUA:
120 p = lib_read_lfunc(L, p, tab);
121 break;
90 case LIBINIT_SET: 122 case LIBINIT_SET:
91 L->top -= 2; 123 L->top -= 2;
92 if (tvisstr(L->top+1) && strV(L->top+1)->len == 0) 124 if (tvisstr(L->top+1) && strV(L->top+1)->len == 0)
@@ -120,6 +152,37 @@ void lj_lib_register(lua_State *L, const char *libname,
120 } 152 }
121} 153}
122 154
155/* Push internal function on the stack. */
156GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n)
157{
158 GCfunc *fn;
159 lua_pushcclosure(L, f, n);
160 fn = funcV(L->top-1);
161 fn->c.ffid = (uint8_t)id;
162 setmref(fn->c.pc, &G(L)->bc_cfunc_int);
163 return fn;
164}
165
166void lj_lib_prereg(lua_State *L, const char *name, lua_CFunction f, GCtab *env)
167{
168 luaL_findtable(L, LUA_REGISTRYINDEX, "_PRELOAD", 4);
169 lua_pushcfunction(L, f);
170 /* NOBARRIER: The function is new (marked white). */
171 setgcref(funcV(L->top-1)->c.env, obj2gco(env));
172 lua_setfield(L, -2, name);
173 L->top--;
174}
175
176int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id, const char *name)
177{
178 GCfunc *fn = lj_lib_pushcf(L, cf, id);
179 GCtab *t = tabref(curr_func(L)->c.env); /* Reference to parent table. */
180 setfuncV(L, lj_tab_setstr(L, t, lj_str_newz(L, name)), fn);
181 lj_gc_anybarriert(L, t);
182 setfuncV(L, L->top++, fn);
183 return 1;
184}
185
123/* -- Type checks --------------------------------------------------------- */ 186/* -- Type checks --------------------------------------------------------- */
124 187
125TValue *lj_lib_checkany(lua_State *L, int narg) 188TValue *lj_lib_checkany(lua_State *L, int narg)
@@ -137,7 +200,7 @@ GCstr *lj_lib_checkstr(lua_State *L, int narg)
137 if (LJ_LIKELY(tvisstr(o))) { 200 if (LJ_LIKELY(tvisstr(o))) {
138 return strV(o); 201 return strV(o);
139 } else if (tvisnumber(o)) { 202 } else if (tvisnumber(o)) {
140 GCstr *s = lj_str_fromnumber(L, o); 203 GCstr *s = lj_strfmt_number(L, o);
141 setstrV(L, o, s); 204 setstrV(L, o, s);
142 return s; 205 return s;
143 } 206 }
@@ -196,20 +259,6 @@ int32_t lj_lib_optint(lua_State *L, int narg, int32_t def)
196 return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def; 259 return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def;
197} 260}
198 261
199int32_t lj_lib_checkbit(lua_State *L, int narg)
200{
201 TValue *o = L->base + narg-1;
202 if (!(o < L->top && lj_strscan_numberobj(o)))
203 lj_err_argt(L, narg, LUA_TNUMBER);
204 if (LJ_LIKELY(tvisint(o))) {
205 return intV(o);
206 } else {
207 int32_t i = lj_num2bit(numV(o));
208 if (LJ_DUALNUM) setintV(o, i);
209 return i;
210 }
211}
212
213GCfunc *lj_lib_checkfunc(lua_State *L, int narg) 262GCfunc *lj_lib_checkfunc(lua_State *L, int narg)
214{ 263{
215 TValue *o = L->base + narg-1; 264 TValue *o = L->base + narg-1;
@@ -218,6 +267,23 @@ GCfunc *lj_lib_checkfunc(lua_State *L, int narg)
218 return funcV(o); 267 return funcV(o);
219} 268}
220 269
270GCproto *lj_lib_checkLproto(lua_State *L, int narg, int nolua)
271{
272 TValue *o = L->base + narg-1;
273 if (L->top > o) {
274 if (tvisproto(o)) {
275 return protoV(o);
276 } else if (tvisfunc(o)) {
277 if (isluafunc(funcV(o)))
278 return funcproto(funcV(o));
279 else if (nolua)
280 return NULL;
281 }
282 }
283 lj_err_argt(L, narg, LUA_TFUNCTION);
284 return NULL; /* unreachable */
285}
286
221GCtab *lj_lib_checktab(lua_State *L, int narg) 287GCtab *lj_lib_checktab(lua_State *L, int narg)
222{ 288{
223 TValue *o = L->base + narg-1; 289 TValue *o = L->base + narg-1;
@@ -256,3 +322,56 @@ int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst)
256 return def; 322 return def;
257} 323}
258 324
325/* -- Strict type checks -------------------------------------------------- */
326
327/* The following type checks do not coerce between strings and numbers.
328** And they handle plain int64_t/uint64_t FFI numbers, too.
329*/
330
331#if LJ_HASBUFFER
332GCstr *lj_lib_checkstrx(lua_State *L, int narg)
333{
334 TValue *o = L->base + narg-1;
335 if (!(o < L->top && tvisstr(o))) lj_err_argt(L, narg, LUA_TSTRING);
336 return strV(o);
337}
338
339int32_t lj_lib_checkintrange(lua_State *L, int narg, int32_t a, int32_t b)
340{
341 TValue *o = L->base + narg-1;
342 lj_assertL(b >= 0, "expected range must be non-negative");
343 if (o < L->top) {
344 if (LJ_LIKELY(tvisint(o))) {
345 int32_t i = intV(o);
346 if (i >= a && i <= b) return i;
347 } else if (LJ_LIKELY(tvisnum(o))) {
348 /* For performance reasons, this doesn't check for integerness or
349 ** integer overflow. Overflow detection still works, since all FPUs
350 ** return either MININT or MAXINT, which is then out of range.
351 */
352 int32_t i = (int32_t)numV(o);
353 if (i >= a && i <= b) return i;
354#if LJ_HASFFI
355 } else if (tviscdata(o)) {
356 GCcdata *cd = cdataV(o);
357 if (cd->ctypeid == CTID_INT64) {
358 int64_t i = *(int64_t *)cdataptr(cd);
359 if (i >= (int64_t)a && i <= (int64_t)b) return (int32_t)i;
360 } else if (cd->ctypeid == CTID_UINT64) {
361 uint64_t i = *(uint64_t *)cdataptr(cd);
362 if ((a < 0 || i >= (uint64_t)a) && i <= (uint64_t)b) return (int32_t)i;
363 } else {
364 goto badtype;
365 }
366#endif
367 } else {
368 goto badtype;
369 }
370 lj_err_arg(L, narg, LJ_ERR_NUMRNG);
371 }
372badtype:
373 lj_err_argt(L, narg, LUA_TNUMBER);
374 return 0; /* unreachable */
375}
376#endif
377
diff --git a/src/lj_lib.h b/src/lj_lib.h
index d8e93925..a48e3c98 100644
--- a/src/lj_lib.h
+++ b/src/lj_lib.h
@@ -41,15 +41,29 @@ LJ_FUNC void lj_lib_checknumber(lua_State *L, int narg);
41LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg); 41LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg);
42LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg); 42LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg);
43LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def); 43LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def);
44LJ_FUNC int32_t lj_lib_checkbit(lua_State *L, int narg);
45LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg); 44LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg);
45LJ_FUNC GCproto *lj_lib_checkLproto(lua_State *L, int narg, int nolua);
46LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg); 46LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg);
47LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); 47LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg);
48LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); 48LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
49 49
50#if LJ_HASBUFFER
51LJ_FUNC GCstr *lj_lib_checkstrx(lua_State *L, int narg);
52LJ_FUNC int32_t lj_lib_checkintrange(lua_State *L, int narg,
53 int32_t a, int32_t b);
54#endif
55
50/* Avoid including lj_frame.h. */ 56/* Avoid including lj_frame.h. */
57#if LJ_GC64
58#define lj_lib_upvalue(L, n) \
59 (&gcval(L->base-2)->fn.c.upvalue[(n)-1])
60#elif LJ_FR2
61#define lj_lib_upvalue(L, n) \
62 (&gcref((L->base-2)->gcr)->fn.c.upvalue[(n)-1])
63#else
51#define lj_lib_upvalue(L, n) \ 64#define lj_lib_upvalue(L, n) \
52 (&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1]) 65 (&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1])
66#endif
53 67
54#if LJ_TARGET_WINDOWS 68#if LJ_TARGET_WINDOWS
55#define lj_lib_checkfpu(L) \ 69#define lj_lib_checkfpu(L) \
@@ -60,23 +74,14 @@ LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
60#define lj_lib_checkfpu(L) UNUSED(L) 74#define lj_lib_checkfpu(L) UNUSED(L)
61#endif 75#endif
62 76
63/* Push internal function on the stack. */ 77LJ_FUNC GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n);
64static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f,
65 int id, int n)
66{
67 GCfunc *fn;
68 lua_pushcclosure(L, f, n);
69 fn = funcV(L->top-1);
70 fn->c.ffid = (uint8_t)id;
71 setmref(fn->c.pc, &G(L)->bc_cfunc_int);
72}
73
74#define lj_lib_pushcf(L, fn, id) (lj_lib_pushcc(L, (fn), (id), 0)) 78#define lj_lib_pushcf(L, fn, id) (lj_lib_pushcc(L, (fn), (id), 0))
75 79
76/* Library function declarations. Scanned by buildvm. */ 80/* Library function declarations. Scanned by buildvm. */
77#define LJLIB_CF(name) static int lj_cf_##name(lua_State *L) 81#define LJLIB_CF(name) static int lj_cf_##name(lua_State *L)
78#define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L) 82#define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L)
79#define LJLIB_ASM_(name) 83#define LJLIB_ASM_(name)
84#define LJLIB_LUA(name)
80#define LJLIB_SET(name) 85#define LJLIB_SET(name)
81#define LJLIB_PUSH(arg) 86#define LJLIB_PUSH(arg)
82#define LJLIB_REC(handler) 87#define LJLIB_REC(handler)
@@ -88,6 +93,10 @@ static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f,
88 93
89LJ_FUNC void lj_lib_register(lua_State *L, const char *libname, 94LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
90 const uint8_t *init, const lua_CFunction *cf); 95 const uint8_t *init, const lua_CFunction *cf);
96LJ_FUNC void lj_lib_prereg(lua_State *L, const char *name, lua_CFunction f,
97 GCtab *env);
98LJ_FUNC int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id,
99 const char *name);
91 100
92/* Library init data tags. */ 101/* Library init data tags. */
93#define LIBINIT_LENMASK 0x3f 102#define LIBINIT_LENMASK 0x3f
@@ -96,7 +105,8 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
96#define LIBINIT_ASM 0x40 105#define LIBINIT_ASM 0x40
97#define LIBINIT_ASM_ 0x80 106#define LIBINIT_ASM_ 0x80
98#define LIBINIT_STRING 0xc0 107#define LIBINIT_STRING 0xc0
99#define LIBINIT_MAXSTR 0x39 108#define LIBINIT_MAXSTR 0x38
109#define LIBINIT_LUA 0xf9
100#define LIBINIT_SET 0xfa 110#define LIBINIT_SET 0xfa
101#define LIBINIT_NUMBER 0xfb 111#define LIBINIT_NUMBER 0xfb
102#define LIBINIT_COPY 0xfc 112#define LIBINIT_COPY 0xfc
@@ -104,9 +114,4 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
104#define LIBINIT_FFID 0xfe 114#define LIBINIT_FFID 0xfe
105#define LIBINIT_END 0xff 115#define LIBINIT_END 0xff
106 116
107/* Exported library functions. */
108
109typedef struct RandomState RandomState;
110LJ_FUNC uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs);
111
112#endif 117#endif
diff --git a/src/lj_load.c b/src/lj_load.c
index 1524aeb6..5bb10b5b 100644
--- a/src/lj_load.c
+++ b/src/lj_load.c
@@ -15,7 +15,7 @@
15#include "lj_obj.h" 15#include "lj_obj.h"
16#include "lj_gc.h" 16#include "lj_gc.h"
17#include "lj_err.h" 17#include "lj_err.h"
18#include "lj_str.h" 18#include "lj_buf.h"
19#include "lj_func.h" 19#include "lj_func.h"
20#include "lj_frame.h" 20#include "lj_frame.h"
21#include "lj_vm.h" 21#include "lj_vm.h"
@@ -34,14 +34,28 @@ static TValue *cpparser(lua_State *L, lua_CFunction dummy, void *ud)
34 UNUSED(dummy); 34 UNUSED(dummy);
35 cframe_errfunc(L->cframe) = -1; /* Inherit error function. */ 35 cframe_errfunc(L->cframe) = -1; /* Inherit error function. */
36 bc = lj_lex_setup(L, ls); 36 bc = lj_lex_setup(L, ls);
37 if (ls->mode && !strchr(ls->mode, bc ? 'b' : 't')) { 37 if (ls->mode) {
38 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_XMODE)); 38 int xmode = 1;
39 lj_err_throw(L, LUA_ERRSYNTAX); 39 const char *mode = ls->mode;
40 char c;
41 while ((c = *mode++)) {
42 if (c == (bc ? 'b' : 't')) xmode = 0;
43 if (c == (LJ_FR2 ? 'W' : 'X')) ls->fr2 = !LJ_FR2;
44 }
45 if (xmode) {
46 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_XMODE));
47 lj_err_throw(L, LUA_ERRSYNTAX);
48 }
40 } 49 }
41 pt = bc ? lj_bcread(ls) : lj_parse(ls); 50 pt = bc ? lj_bcread(ls) : lj_parse(ls);
42 fn = lj_func_newL_empty(L, pt, tabref(L->env)); 51 if (ls->fr2 == LJ_FR2) {
43 /* Don't combine above/below into one statement. */ 52 fn = lj_func_newL_empty(L, pt, tabref(L->env));
44 setfuncV(L, L->top++, fn); 53 /* Don't combine above/below into one statement. */
54 setfuncV(L, L->top++, fn);
55 } else {
56 /* Non-native generation returns a dumpable, but non-runnable prototype. */
57 setprotoV(L, L->top++, pt);
58 }
45 return NULL; 59 return NULL;
46} 60}
47 61
@@ -54,7 +68,7 @@ LUA_API int lua_loadx(lua_State *L, lua_Reader reader, void *data,
54 ls.rdata = data; 68 ls.rdata = data;
55 ls.chunkarg = chunkname ? chunkname : "?"; 69 ls.chunkarg = chunkname ? chunkname : "?";
56 ls.mode = mode; 70 ls.mode = mode;
57 lj_str_initbuf(&ls.sb); 71 lj_buf_init(L, &ls.sb);
58 status = lj_vm_cpcall(L, NULL, &ls, cpparser); 72 status = lj_vm_cpcall(L, NULL, &ls, cpparser);
59 lj_lex_cleanup(L, &ls); 73 lj_lex_cleanup(L, &ls);
60 lj_gc_check(L); 74 lj_gc_check(L);
@@ -160,9 +174,10 @@ LUALIB_API int luaL_loadstring(lua_State *L, const char *s)
160LUA_API int lua_dump(lua_State *L, lua_Writer writer, void *data) 174LUA_API int lua_dump(lua_State *L, lua_Writer writer, void *data)
161{ 175{
162 cTValue *o = L->top-1; 176 cTValue *o = L->top-1;
163 api_check(L, L->top > L->base); 177 uint32_t flags = LJ_FR2*BCDUMP_F_FR2; /* Default mode for legacy C API. */
178 lj_checkapi(L->top > L->base, "top slot empty");
164 if (tvisfunc(o) && isluafunc(funcV(o))) 179 if (tvisfunc(o) && isluafunc(funcV(o)))
165 return lj_bcwrite(L, funcproto(funcV(o)), writer, data, 0); 180 return lj_bcwrite(L, funcproto(funcV(o)), writer, data, flags);
166 else 181 else
167 return 1; 182 return 1;
168} 183}
diff --git a/src/lj_mcode.c b/src/lj_mcode.c
index 4a1ba4aa..8a4851dd 100644
--- a/src/lj_mcode.c
+++ b/src/lj_mcode.c
@@ -14,6 +14,7 @@
14#include "lj_mcode.h" 14#include "lj_mcode.h"
15#include "lj_trace.h" 15#include "lj_trace.h"
16#include "lj_dispatch.h" 16#include "lj_dispatch.h"
17#include "lj_prng.h"
17#endif 18#endif
18#if LJ_HASJIT || LJ_HASFFI 19#if LJ_HASJIT || LJ_HASFFI
19#include "lj_vm.h" 20#include "lj_vm.h"
@@ -28,6 +29,11 @@
28#include <valgrind/valgrind.h> 29#include <valgrind/valgrind.h>
29#endif 30#endif
30 31
32#if LJ_TARGET_WINDOWS
33#define WIN32_LEAN_AND_MEAN
34#include <windows.h>
35#endif
36
31#if LJ_TARGET_IOS 37#if LJ_TARGET_IOS
32void sys_icache_invalidate(void *start, size_t len); 38void sys_icache_invalidate(void *start, size_t len);
33#endif 39#endif
@@ -40,11 +46,13 @@ void lj_mcode_sync(void *start, void *end)
40#endif 46#endif
41#if LJ_TARGET_X86ORX64 47#if LJ_TARGET_X86ORX64
42 UNUSED(start); UNUSED(end); 48 UNUSED(start); UNUSED(end);
49#elif LJ_TARGET_WINDOWS
50 FlushInstructionCache(GetCurrentProcess(), start, (char *)end-(char *)start);
43#elif LJ_TARGET_IOS 51#elif LJ_TARGET_IOS
44 sys_icache_invalidate(start, (char *)end-(char *)start); 52 sys_icache_invalidate(start, (char *)end-(char *)start);
45#elif LJ_TARGET_PPC 53#elif LJ_TARGET_PPC
46 lj_vm_cachesync(start, end); 54 lj_vm_cachesync(start, end);
47#elif defined(__GNUC__) 55#elif defined(__GNUC__) || defined(__clang__)
48 __clear_cache(start, end); 56 __clear_cache(start, end);
49#else 57#else
50#error "Missing builtin to flush instruction cache" 58#error "Missing builtin to flush instruction cache"
@@ -57,17 +65,14 @@ void lj_mcode_sync(void *start, void *end)
57 65
58#if LJ_TARGET_WINDOWS 66#if LJ_TARGET_WINDOWS
59 67
60#define WIN32_LEAN_AND_MEAN
61#include <windows.h>
62
63#define MCPROT_RW PAGE_READWRITE 68#define MCPROT_RW PAGE_READWRITE
64#define MCPROT_RX PAGE_EXECUTE_READ 69#define MCPROT_RX PAGE_EXECUTE_READ
65#define MCPROT_RWX PAGE_EXECUTE_READWRITE 70#define MCPROT_RWX PAGE_EXECUTE_READWRITE
66 71
67static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot) 72static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot)
68{ 73{
69 void *p = VirtualAlloc((void *)hint, sz, 74 void *p = LJ_WIN_VALLOC((void *)hint, sz,
70 MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); 75 MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot);
71 if (!p && !hint) 76 if (!p && !hint)
72 lj_trace_err(J, LJ_TRERR_MCODEAL); 77 lj_trace_err(J, LJ_TRERR_MCODEAL);
73 return p; 78 return p;
@@ -82,7 +87,7 @@ static void mcode_free(jit_State *J, void *p, size_t sz)
82static int mcode_setprot(void *p, size_t sz, DWORD prot) 87static int mcode_setprot(void *p, size_t sz, DWORD prot)
83{ 88{
84 DWORD oprot; 89 DWORD oprot;
85 return !VirtualProtect(p, sz, prot, &oprot); 90 return !LJ_WIN_VPROTECT(p, sz, prot, &oprot);
86} 91}
87 92
88#elif LJ_TARGET_POSIX 93#elif LJ_TARGET_POSIX
@@ -96,10 +101,15 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot)
96#define MCPROT_RW (PROT_READ|PROT_WRITE) 101#define MCPROT_RW (PROT_READ|PROT_WRITE)
97#define MCPROT_RX (PROT_READ|PROT_EXEC) 102#define MCPROT_RX (PROT_READ|PROT_EXEC)
98#define MCPROT_RWX (PROT_READ|PROT_WRITE|PROT_EXEC) 103#define MCPROT_RWX (PROT_READ|PROT_WRITE|PROT_EXEC)
104#ifdef PROT_MPROTECT
105#define MCPROT_CREATE (PROT_MPROTECT(MCPROT_RWX))
106#else
107#define MCPROT_CREATE 0
108#endif
99 109
100static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) 110static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot)
101{ 111{
102 void *p = mmap((void *)hint, sz, prot, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); 112 void *p = mmap((void *)hint, sz, prot|MCPROT_CREATE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
103 if (p == MAP_FAILED) { 113 if (p == MAP_FAILED) {
104 if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL); 114 if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL);
105 p = NULL; 115 p = NULL;
@@ -118,52 +128,34 @@ static int mcode_setprot(void *p, size_t sz, int prot)
118 return mprotect(p, sz, prot); 128 return mprotect(p, sz, prot);
119} 129}
120 130
121#elif LJ_64
122
123#error "Missing OS support for explicit placement of executable memory"
124
125#else 131#else
126 132
127/* Fallback allocator. This will fail if memory is not executable by default. */ 133#error "Missing OS support for explicit placement of executable memory"
128#define LUAJIT_UNPROTECT_MCODE
129#define MCPROT_RW 0
130#define MCPROT_RX 0
131#define MCPROT_RWX 0
132
133static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot)
134{
135 UNUSED(hint); UNUSED(prot);
136 return lj_mem_new(J->L, sz);
137}
138
139static void mcode_free(jit_State *J, void *p, size_t sz)
140{
141 lj_mem_free(J2G(J), p, sz);
142}
143 134
144#endif 135#endif
145 136
146/* -- MCode area protection ----------------------------------------------- */ 137/* -- MCode area protection ----------------------------------------------- */
147 138
148/* Define this ONLY if page protection twiddling becomes a bottleneck. */ 139#if LUAJIT_SECURITY_MCODE == 0
149#ifdef LUAJIT_UNPROTECT_MCODE
150 140
151/* It's generally considered to be a potential security risk to have 141/* Define this ONLY if page protection twiddling becomes a bottleneck.
142**
143** It's generally considered to be a potential security risk to have
152** pages with simultaneous write *and* execute access in a process. 144** pages with simultaneous write *and* execute access in a process.
153** 145**
154** Do not even think about using this mode for server processes or 146** Do not even think about using this mode for server processes or
155** apps handling untrusted external data (such as a browser). 147** apps handling untrusted external data.
156** 148**
157** The security risk is not in LuaJIT itself -- but if an adversary finds 149** The security risk is not in LuaJIT itself -- but if an adversary finds
158** any *other* flaw in your C application logic, then any RWX memory page 150** any *other* flaw in your C application logic, then any RWX memory pages
159** simplifies writing an exploit considerably. 151** simplify writing an exploit considerably.
160*/ 152*/
161#define MCPROT_GEN MCPROT_RWX 153#define MCPROT_GEN MCPROT_RWX
162#define MCPROT_RUN MCPROT_RWX 154#define MCPROT_RUN MCPROT_RWX
163 155
164static void mcode_protect(jit_State *J, int prot) 156static void mcode_protect(jit_State *J, int prot)
165{ 157{
166 UNUSED(J); UNUSED(prot); 158 UNUSED(J); UNUSED(prot); UNUSED(mcode_setprot);
167} 159}
168 160
169#else 161#else
@@ -222,8 +214,8 @@ static void *mcode_alloc(jit_State *J, size_t sz)
222 */ 214 */
223#if LJ_TARGET_MIPS 215#if LJ_TARGET_MIPS
224 /* Use the middle of the 256MB-aligned region. */ 216 /* Use the middle of the 256MB-aligned region. */
225 uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & 0xf0000000u) + 217 uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler &
226 0x08000000u; 218 ~(uintptr_t)0x0fffffffu) + 0x08000000u;
227#else 219#else
228 uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff; 220 uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff;
229#endif 221#endif
@@ -243,7 +235,7 @@ static void *mcode_alloc(jit_State *J, size_t sz)
243 } 235 }
244 /* Next try probing 64K-aligned pseudo-random addresses. */ 236 /* Next try probing 64K-aligned pseudo-random addresses. */
245 do { 237 do {
246 hint = LJ_PRNG_BITS(J, LJ_TARGET_JUMPRANGE-16) << 16; 238 hint = lj_prng_u64(&J2G(J)->prng) & ((1u<<LJ_TARGET_JUMPRANGE)-0x10000);
247 } while (!(hint + sz < range+range)); 239 } while (!(hint + sz < range+range));
248 hint = target + hint - range; 240 hint = target + hint - range;
249 } 241 }
@@ -256,7 +248,7 @@ static void *mcode_alloc(jit_State *J, size_t sz)
256/* All memory addresses are reachable by relative jumps. */ 248/* All memory addresses are reachable by relative jumps. */
257static void *mcode_alloc(jit_State *J, size_t sz) 249static void *mcode_alloc(jit_State *J, size_t sz)
258{ 250{
259#ifdef __OpenBSD__ 251#if defined(__OpenBSD__) || defined(__NetBSD__) || LJ_TARGET_UWP
260 /* Allow better executable memory allocation for OpenBSD W^X mode. */ 252 /* Allow better executable memory allocation for OpenBSD W^X mode. */
261 void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN); 253 void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN);
262 if (p && mcode_setprot(p, sz, MCPROT_GEN)) { 254 if (p && mcode_setprot(p, sz, MCPROT_GEN)) {
@@ -287,6 +279,7 @@ static void mcode_allocarea(jit_State *J)
287 ((MCLink *)J->mcarea)->next = oldarea; 279 ((MCLink *)J->mcarea)->next = oldarea;
288 ((MCLink *)J->mcarea)->size = sz; 280 ((MCLink *)J->mcarea)->size = sz;
289 J->szallmcarea += sz; 281 J->szallmcarea += sz;
282 J->mcbot = (MCode *)lj_err_register_mcode(J->mcarea, sz, (uint8_t *)J->mcbot);
290} 283}
291 284
292/* Free all MCode areas. */ 285/* Free all MCode areas. */
@@ -297,7 +290,9 @@ void lj_mcode_free(jit_State *J)
297 J->szallmcarea = 0; 290 J->szallmcarea = 0;
298 while (mc) { 291 while (mc) {
299 MCode *next = ((MCLink *)mc)->next; 292 MCode *next = ((MCLink *)mc)->next;
300 mcode_free(J, mc, ((MCLink *)mc)->size); 293 size_t sz = ((MCLink *)mc)->size;
294 lj_err_deregister_mcode(mc, sz, (uint8_t *)mc + sizeof(MCLink));
295 mcode_free(J, mc, sz);
301 mc = next; 296 mc = next;
302 } 297 }
303} 298}
@@ -332,35 +327,36 @@ void lj_mcode_abort(jit_State *J)
332/* Set/reset protection to allow patching of MCode areas. */ 327/* Set/reset protection to allow patching of MCode areas. */
333MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish) 328MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish)
334{ 329{
335#ifdef LUAJIT_UNPROTECT_MCODE
336 UNUSED(J); UNUSED(ptr); UNUSED(finish);
337 return NULL;
338#else
339 if (finish) { 330 if (finish) {
331#if LUAJIT_SECURITY_MCODE
340 if (J->mcarea == ptr) 332 if (J->mcarea == ptr)
341 mcode_protect(J, MCPROT_RUN); 333 mcode_protect(J, MCPROT_RUN);
342 else if (LJ_UNLIKELY(mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN))) 334 else if (LJ_UNLIKELY(mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN)))
343 mcode_protfail(J); 335 mcode_protfail(J);
336#endif
344 return NULL; 337 return NULL;
345 } else { 338 } else {
346 MCode *mc = J->mcarea; 339 MCode *mc = J->mcarea;
347 /* Try current area first to use the protection cache. */ 340 /* Try current area first to use the protection cache. */
348 if (ptr >= mc && ptr < (MCode *)((char *)mc + J->szmcarea)) { 341 if (ptr >= mc && ptr < (MCode *)((char *)mc + J->szmcarea)) {
342#if LUAJIT_SECURITY_MCODE
349 mcode_protect(J, MCPROT_GEN); 343 mcode_protect(J, MCPROT_GEN);
344#endif
350 return mc; 345 return mc;
351 } 346 }
352 /* Otherwise search through the list of MCode areas. */ 347 /* Otherwise search through the list of MCode areas. */
353 for (;;) { 348 for (;;) {
354 mc = ((MCLink *)mc)->next; 349 mc = ((MCLink *)mc)->next;
355 lua_assert(mc != NULL); 350 lj_assertJ(mc != NULL, "broken MCode area chain");
356 if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink *)mc)->size)) { 351 if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink *)mc)->size)) {
352#if LUAJIT_SECURITY_MCODE
357 if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN))) 353 if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN)))
358 mcode_protfail(J); 354 mcode_protfail(J);
355#endif
359 return mc; 356 return mc;
360 } 357 }
361 } 358 }
362 } 359 }
363#endif
364} 360}
365 361
366/* Limit of MCode reservation reached. */ 362/* Limit of MCode reservation reached. */
diff --git a/src/lj_meta.c b/src/lj_meta.c
index 15c13aa2..5940097e 100644
--- a/src/lj_meta.c
+++ b/src/lj_meta.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_meta.h" 18#include "lj_meta.h"
@@ -19,6 +20,8 @@
19#include "lj_bc.h" 20#include "lj_bc.h"
20#include "lj_vm.h" 21#include "lj_vm.h"
21#include "lj_strscan.h" 22#include "lj_strscan.h"
23#include "lj_strfmt.h"
24#include "lj_lib.h"
22 25
23/* -- Metamethod handling ------------------------------------------------- */ 26/* -- Metamethod handling ------------------------------------------------- */
24 27
@@ -44,7 +47,7 @@ void lj_meta_init(lua_State *L)
44cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name) 47cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name)
45{ 48{
46 cTValue *mo = lj_tab_getstr(mt, name); 49 cTValue *mo = lj_tab_getstr(mt, name);
47 lua_assert(mm <= MM_FAST); 50 lj_assertX(mm <= MM_FAST, "bad metamethod %d", mm);
48 if (!mo || tvisnil(mo)) { /* No metamethod? */ 51 if (!mo || tvisnil(mo)) { /* No metamethod? */
49 mt->nomm |= (uint8_t)(1u<<mm); /* Set negative cache flag. */ 52 mt->nomm |= (uint8_t)(1u<<mm); /* Set negative cache flag. */
50 return NULL; 53 return NULL;
@@ -77,12 +80,16 @@ int lj_meta_tailcall(lua_State *L, cTValue *tv)
77 TValue *base = L->base; 80 TValue *base = L->base;
78 TValue *top = L->top; 81 TValue *top = L->top;
79 const BCIns *pc = frame_pc(base-1); /* Preserve old PC from frame. */ 82 const BCIns *pc = frame_pc(base-1); /* Preserve old PC from frame. */
80 copyTV(L, base-1, tv); /* Replace frame with new object. */ 83 copyTV(L, base-1-LJ_FR2, tv); /* Replace frame with new object. */
81 top->u32.lo = LJ_CONT_TAILCALL; 84 if (LJ_FR2)
82 setframe_pc(top, pc); 85 (top++)->u64 = LJ_CONT_TAILCALL;
83 setframe_gc(top+1, obj2gco(L)); /* Dummy frame object. */ 86 else
84 setframe_ftsz(top+1, (int)((char *)(top+2) - (char *)base) + FRAME_CONT); 87 top->u32.lo = LJ_CONT_TAILCALL;
85 L->base = L->top = top+2; 88 setframe_pc(top++, pc);
89 setframe_gc(top, obj2gco(L), LJ_TTHREAD); /* Dummy frame object. */
90 if (LJ_FR2) top++;
91 setframe_ftsz(top, ((char *)(top+1) - (char *)base) + FRAME_CONT);
92 L->base = L->top = top+1;
86 /* 93 /*
87 ** before: [old_mo|PC] [... ...] 94 ** before: [old_mo|PC] [... ...]
88 ** ^base ^top 95 ** ^base ^top
@@ -113,11 +120,13 @@ static TValue *mmcall(lua_State *L, ASMFunction cont, cTValue *mo,
113 */ 120 */
114 TValue *top = L->top; 121 TValue *top = L->top;
115 if (curr_funcisL(L)) top = curr_topL(L); 122 if (curr_funcisL(L)) top = curr_topL(L);
116 setcont(top, cont); /* Assembler VM stores PC in upper word. */ 123 setcont(top++, cont); /* Assembler VM stores PC in upper word or FR2. */
117 copyTV(L, top+1, mo); /* Store metamethod and two arguments. */ 124 if (LJ_FR2) setnilV(top++);
118 copyTV(L, top+2, a); 125 copyTV(L, top++, mo); /* Store metamethod and two arguments. */
119 copyTV(L, top+3, b); 126 if (LJ_FR2) setnilV(top++);
120 return top+2; /* Return new base. */ 127 copyTV(L, top, a);
128 copyTV(L, top+1, b);
129 return top; /* Return new base. */
121} 130}
122 131
123/* -- C helpers for some instructions, called from assembler VM ----------- */ 132/* -- C helpers for some instructions, called from assembler VM ----------- */
@@ -225,27 +234,14 @@ TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb, cTValue *rc,
225 } 234 }
226} 235}
227 236
228/* In-place coercion of a number to a string. */
229static LJ_AINLINE int tostring(lua_State *L, TValue *o)
230{
231 if (tvisstr(o)) {
232 return 1;
233 } else if (tvisnumber(o)) {
234 setstrV(L, o, lj_str_fromnumber(L, o));
235 return 1;
236 } else {
237 return 0;
238 }
239}
240
241/* Helper for CAT. Coercion, iterative concat, __concat metamethod. */ 237/* Helper for CAT. Coercion, iterative concat, __concat metamethod. */
242TValue *lj_meta_cat(lua_State *L, TValue *top, int left) 238TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
243{ 239{
244 int fromc = 0; 240 int fromc = 0;
245 if (left < 0) { left = -left; fromc = 1; } 241 if (left < 0) { left = -left; fromc = 1; }
246 do { 242 do {
247 int n = 1; 243 if (!(tvisstr(top) || tvisnumber(top) || tvisbuf(top)) ||
248 if (!(tvisstr(top-1) || tvisnumber(top-1)) || !tostring(L, top)) { 244 !(tvisstr(top-1) || tvisnumber(top-1) || tvisbuf(top-1))) {
249 cTValue *mo = lj_meta_lookup(L, top-1, MM_concat); 245 cTValue *mo = lj_meta_lookup(L, top-1, MM_concat);
250 if (tvisnil(mo)) { 246 if (tvisnil(mo)) {
251 mo = lj_meta_lookup(L, top, MM_concat); 247 mo = lj_meta_lookup(L, top, MM_concat);
@@ -266,13 +262,12 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
266 ** after mm: [...][CAT stack ...] <--push-- [result] 262 ** after mm: [...][CAT stack ...] <--push-- [result]
267 ** next step: [...][CAT stack .............] 263 ** next step: [...][CAT stack .............]
268 */ 264 */
269 copyTV(L, top+2, top); /* Careful with the order of stack copies! */ 265 copyTV(L, top+2*LJ_FR2+2, top); /* Carefully ordered stack copies! */
270 copyTV(L, top+1, top-1); 266 copyTV(L, top+2*LJ_FR2+1, top-1);
271 copyTV(L, top, mo); 267 copyTV(L, top+LJ_FR2, mo);
272 setcont(top-1, lj_cont_cat); 268 setcont(top-1, lj_cont_cat);
269 if (LJ_FR2) { setnilV(top); setnilV(top+2); top += 2; }
273 return top+1; /* Trigger metamethod call. */ 270 return top+1; /* Trigger metamethod call. */
274 } else if (strV(top)->len == 0) { /* Shortcut. */
275 (void)tostring(L, top-1);
276 } else { 271 } else {
277 /* Pick as many strings as possible from the top and concatenate them: 272 /* Pick as many strings as possible from the top and concatenate them:
278 ** 273 **
@@ -281,27 +276,33 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
281 ** concat: [...][CAT stack ...] [result] 276 ** concat: [...][CAT stack ...] [result]
282 ** next step: [...][CAT stack ............] 277 ** next step: [...][CAT stack ............]
283 */ 278 */
284 MSize tlen = strV(top)->len; 279 TValue *e, *o = top;
285 char *buffer; 280 uint64_t tlen = tvisstr(o) ? strV(o)->len :
286 int i; 281 tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM;
287 for (n = 1; n <= left && tostring(L, top-n); n++) { 282 SBuf *sb;
288 MSize len = strV(top-n)->len; 283 do {
289 if (len >= LJ_MAX_STR - tlen) 284 o--; tlen += tvisstr(o) ? strV(o)->len :
290 lj_err_msg(L, LJ_ERR_STROV); 285 tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM;
291 tlen += len; 286 } while (--left > 0 && (tvisstr(o-1) || tvisnumber(o-1)));
292 } 287 if (tlen >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV);
293 buffer = lj_str_needbuf(L, &G(L)->tmpbuf, tlen); 288 sb = lj_buf_tmp_(L);
294 n--; 289 lj_buf_more(sb, (MSize)tlen);
295 tlen = 0; 290 for (e = top, top = o; o <= e; o++) {
296 for (i = n; i >= 0; i--) { 291 if (tvisstr(o)) {
297 MSize len = strV(top-i)->len; 292 GCstr *s = strV(o);
298 memcpy(buffer + tlen, strVdata(top-i), len); 293 MSize len = s->len;
299 tlen += len; 294 lj_buf_putmem(sb, strdata(s), len);
295 } else if (tvisbuf(o)) {
296 SBufExt *sbx = bufV(o);
297 lj_buf_putmem(sb, sbx->r, sbufxlen(sbx));
298 } else if (tvisint(o)) {
299 lj_strfmt_putint(sb, intV(o));
300 } else {
301 lj_strfmt_putfnum(sb, STRFMT_G14, numV(o));
302 }
300 } 303 }
301 setstrV(L, top-n, lj_str_new(L, buffer, tlen)); 304 setstrV(L, top, lj_buf_str(L, sb));
302 } 305 }
303 left -= n;
304 top -= n;
305 } while (left >= 1); 306 } while (left >= 1);
306 if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) { 307 if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) {
307 if (!fromc) L->top = curr_topL(L); 308 if (!fromc) L->top = curr_topL(L);
@@ -338,12 +339,14 @@ TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne)
338 return (TValue *)(intptr_t)ne; 339 return (TValue *)(intptr_t)ne;
339 } 340 }
340 top = curr_top(L); 341 top = curr_top(L);
341 setcont(top, ne ? lj_cont_condf : lj_cont_condt); 342 setcont(top++, ne ? lj_cont_condf : lj_cont_condt);
342 copyTV(L, top+1, mo); 343 if (LJ_FR2) setnilV(top++);
344 copyTV(L, top++, mo);
345 if (LJ_FR2) setnilV(top++);
343 it = ~(uint32_t)o1->gch.gct; 346 it = ~(uint32_t)o1->gch.gct;
344 setgcV(L, top+2, o1, it); 347 setgcV(L, top, o1, it);
345 setgcV(L, top+3, o2, it); 348 setgcV(L, top+1, o2, it);
346 return top+2; /* Trigger metamethod call. */ 349 return top; /* Trigger metamethod call. */
347 } 350 }
348 return (TValue *)(intptr_t)ne; 351 return (TValue *)(intptr_t)ne;
349} 352}
@@ -365,8 +368,8 @@ TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins)
365 } else if (op == BC_ISEQN) { 368 } else if (op == BC_ISEQN) {
366 o2 = &mref(curr_proto(L)->k, cTValue)[bc_d(ins)]; 369 o2 = &mref(curr_proto(L)->k, cTValue)[bc_d(ins)];
367 } else { 370 } else {
368 lua_assert(op == BC_ISEQP); 371 lj_assertL(op == BC_ISEQP, "bad bytecode op %d", op);
369 setitype(&tv, ~bc_d(ins)); 372 setpriV(&tv, ~bc_d(ins));
370 o2 = &tv; 373 o2 = &tv;
371 } 374 }
372 mo = lj_meta_lookup(L, o1mm, MM_eq); 375 mo = lj_meta_lookup(L, o1mm, MM_eq);
@@ -423,6 +426,18 @@ TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op)
423 } 426 }
424} 427}
425 428
429/* Helper for ISTYPE and ISNUM. Implicit coercion or error. */
430void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp)
431{
432 L->top = curr_topL(L);
433 ra++; tp--;
434 lj_assertL(LJ_DUALNUM || tp != ~LJ_TNUMX, "bad type for ISTYPE");
435 if (LJ_DUALNUM && tp == ~LJ_TNUMX) lj_lib_checkint(L, ra);
436 else if (tp == ~LJ_TNUMX+1) lj_lib_checknum(L, ra);
437 else if (tp == ~LJ_TSTR) lj_lib_checkstr(L, ra);
438 else lj_err_argtype(L, ra, lj_obj_itypename[tp]);
439}
440
426/* Helper for calls. __call metamethod. */ 441/* Helper for calls. __call metamethod. */
427void lj_meta_call(lua_State *L, TValue *func, TValue *top) 442void lj_meta_call(lua_State *L, TValue *func, TValue *top)
428{ 443{
@@ -430,7 +445,8 @@ void lj_meta_call(lua_State *L, TValue *func, TValue *top)
430 TValue *p; 445 TValue *p;
431 if (!tvisfunc(mo)) 446 if (!tvisfunc(mo))
432 lj_err_optype_call(L, func); 447 lj_err_optype_call(L, func);
433 for (p = top; p > func; p--) copyTV(L, p, p-1); 448 for (p = top; p > func+2*LJ_FR2; p--) copyTV(L, p, p-1);
449 if (LJ_FR2) copyTV(L, func+2, func);
434 copyTV(L, func, mo); 450 copyTV(L, func, mo);
435} 451}
436 452
diff --git a/src/lj_meta.h b/src/lj_meta.h
index 53ec9c0a..f067cf59 100644
--- a/src/lj_meta.h
+++ b/src/lj_meta.h
@@ -31,6 +31,7 @@ LJ_FUNCA TValue * LJ_FASTCALL lj_meta_len(lua_State *L, cTValue *o);
31LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne); 31LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne);
32LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); 32LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins);
33LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); 33LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op);
34LJ_FUNCA void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp);
34LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); 35LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top);
35LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o); 36LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o);
36 37
diff --git a/src/lj_obj.c b/src/lj_obj.c
index ce837134..cd4ca50a 100644
--- a/src/lj_obj.c
+++ b/src/lj_obj.c
@@ -20,7 +20,7 @@ LJ_DATADEF const char *const lj_obj_itypename[] = { /* ORDER LJ_T */
20}; 20};
21 21
22/* Compare two objects without calling metamethods. */ 22/* Compare two objects without calling metamethods. */
23int lj_obj_equal(cTValue *o1, cTValue *o2) 23int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2)
24{ 24{
25 if (itype(o1) == itype(o2)) { 25 if (itype(o1) == itype(o2)) {
26 if (tvispri(o1)) 26 if (tvispri(o1))
@@ -33,3 +33,19 @@ int lj_obj_equal(cTValue *o1, cTValue *o2)
33 return numberVnum(o1) == numberVnum(o2); 33 return numberVnum(o1) == numberVnum(o2);
34} 34}
35 35
36/* Return pointer to object or its object data. */
37const void * LJ_FASTCALL lj_obj_ptr(global_State *g, cTValue *o)
38{
39 UNUSED(g);
40 if (tvisudata(o))
41 return uddata(udataV(o));
42 else if (tvislightud(o))
43 return lightudV(g, o);
44 else if (LJ_HASFFI && tviscdata(o))
45 return cdataptr(cdataV(o));
46 else if (tvisgcv(o))
47 return gcV(o);
48 else
49 return NULL;
50}
51
diff --git a/src/lj_obj.h b/src/lj_obj.h
index 6cb93f89..d8b69641 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -13,44 +13,81 @@
13#include "lj_def.h" 13#include "lj_def.h"
14#include "lj_arch.h" 14#include "lj_arch.h"
15 15
16/* -- Memory references (32 bit address space) ---------------------------- */ 16/* -- Memory references --------------------------------------------------- */
17 17
18/* Memory size. */ 18/* Memory and GC object sizes. */
19typedef uint32_t MSize; 19typedef uint32_t MSize;
20#if LJ_GC64
21typedef uint64_t GCSize;
22#else
23typedef uint32_t GCSize;
24#endif
20 25
21/* Memory reference */ 26/* Memory reference */
22typedef struct MRef { 27typedef struct MRef {
28#if LJ_GC64
29 uint64_t ptr64; /* True 64 bit pointer. */
30#else
23 uint32_t ptr32; /* Pseudo 32 bit pointer. */ 31 uint32_t ptr32; /* Pseudo 32 bit pointer. */
32#endif
24} MRef; 33} MRef;
25 34
35#if LJ_GC64
36#define mref(r, t) ((t *)(void *)(r).ptr64)
37#define mrefu(r) ((r).ptr64)
38
39#define setmref(r, p) ((r).ptr64 = (uint64_t)(void *)(p))
40#define setmrefu(r, u) ((r).ptr64 = (uint64_t)(u))
41#define setmrefr(r, v) ((r).ptr64 = (v).ptr64)
42#else
26#define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32) 43#define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32)
44#define mrefu(r) ((r).ptr32)
27 45
28#define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p)) 46#define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p))
47#define setmrefu(r, u) ((r).ptr32 = (uint32_t)(u))
29#define setmrefr(r, v) ((r).ptr32 = (v).ptr32) 48#define setmrefr(r, v) ((r).ptr32 = (v).ptr32)
49#endif
30 50
31/* -- GC object references (32 bit address space) ------------------------- */ 51/* -- GC object references ------------------------------------------------ */
32 52
33/* GCobj reference */ 53/* GCobj reference */
34typedef struct GCRef { 54typedef struct GCRef {
55#if LJ_GC64
56 uint64_t gcptr64; /* True 64 bit pointer. */
57#else
35 uint32_t gcptr32; /* Pseudo 32 bit pointer. */ 58 uint32_t gcptr32; /* Pseudo 32 bit pointer. */
59#endif
36} GCRef; 60} GCRef;
37 61
38/* Common GC header for all collectable objects. */ 62/* Common GC header for all collectable objects. */
39#define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct 63#define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct
40/* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */ 64/* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */
41 65
66#if LJ_GC64
67#define gcref(r) ((GCobj *)(r).gcptr64)
68#define gcrefp(r, t) ((t *)(void *)(r).gcptr64)
69#define gcrefu(r) ((r).gcptr64)
70#define gcrefeq(r1, r2) ((r1).gcptr64 == (r2).gcptr64)
71
72#define setgcref(r, gc) ((r).gcptr64 = (uint64_t)&(gc)->gch)
73#define setgcreft(r, gc, it) \
74 (r).gcptr64 = (uint64_t)&(gc)->gch | (((uint64_t)(it)) << 47)
75#define setgcrefp(r, p) ((r).gcptr64 = (uint64_t)(p))
76#define setgcrefnull(r) ((r).gcptr64 = 0)
77#define setgcrefr(r, v) ((r).gcptr64 = (v).gcptr64)
78#else
42#define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32) 79#define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32)
43#define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32) 80#define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32)
44#define gcrefu(r) ((r).gcptr32) 81#define gcrefu(r) ((r).gcptr32)
45#define gcrefi(r) ((int32_t)(r).gcptr32)
46#define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32) 82#define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32)
47#define gcnext(gc) (gcref((gc)->gch.nextgc))
48 83
49#define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch) 84#define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch)
50#define setgcrefi(r, i) ((r).gcptr32 = (uint32_t)(i))
51#define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p)) 85#define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p))
52#define setgcrefnull(r) ((r).gcptr32 = 0) 86#define setgcrefnull(r) ((r).gcptr32 = 0)
53#define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32) 87#define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32)
88#endif
89
90#define gcnext(gc) (gcref((gc)->gch.nextgc))
54 91
55/* IMPORTANT NOTE: 92/* IMPORTANT NOTE:
56** 93**
@@ -119,11 +156,10 @@ typedef int32_t BCLine; /* Bytecode line number. */
119/* Internal assembler functions. Never call these directly from C. */ 156/* Internal assembler functions. Never call these directly from C. */
120typedef void (*ASMFunction)(void); 157typedef void (*ASMFunction)(void);
121 158
122/* Resizable string buffer. Need this here, details in lj_str.h. */ 159/* Resizable string buffer. Need this here, details in lj_buf.h. */
160#define SBufHeader char *w, *e, *b; MRef L
123typedef struct SBuf { 161typedef struct SBuf {
124 char *buf; /* String buffer base. */ 162 SBufHeader;
125 MSize n; /* String buffer length. */
126 MSize sz; /* String buffer size. */
127} SBuf; 163} SBuf;
128 164
129/* -- Tags and values ----------------------------------------------------- */ 165/* -- Tags and values ----------------------------------------------------- */
@@ -131,13 +167,23 @@ typedef struct SBuf {
131/* Frame link. */ 167/* Frame link. */
132typedef union { 168typedef union {
133 int32_t ftsz; /* Frame type and size of previous frame. */ 169 int32_t ftsz; /* Frame type and size of previous frame. */
134 MRef pcr; /* Overlaps PC for Lua frames. */ 170 MRef pcr; /* Or PC for Lua frames. */
135} FrameLink; 171} FrameLink;
136 172
137/* Tagged value. */ 173/* Tagged value. */
138typedef LJ_ALIGN(8) union TValue { 174typedef LJ_ALIGN(8) union TValue {
139 uint64_t u64; /* 64 bit pattern overlaps number. */ 175 uint64_t u64; /* 64 bit pattern overlaps number. */
140 lua_Number n; /* Number object overlaps split tag/value object. */ 176 lua_Number n; /* Number object overlaps split tag/value object. */
177#if LJ_GC64
178 GCRef gcr; /* GCobj reference with tag. */
179 int64_t it64;
180 struct {
181 LJ_ENDIAN_LOHI(
182 int32_t i; /* Integer value. */
183 , uint32_t it; /* Internal object tag. Must overlap MSW of number. */
184 )
185 };
186#else
141 struct { 187 struct {
142 LJ_ENDIAN_LOHI( 188 LJ_ENDIAN_LOHI(
143 union { 189 union {
@@ -147,12 +193,17 @@ typedef LJ_ALIGN(8) union TValue {
147 , uint32_t it; /* Internal object tag. Must overlap MSW of number. */ 193 , uint32_t it; /* Internal object tag. Must overlap MSW of number. */
148 ) 194 )
149 }; 195 };
196#endif
197#if LJ_FR2
198 int64_t ftsz; /* Frame type and size of previous frame, or PC. */
199#else
150 struct { 200 struct {
151 LJ_ENDIAN_LOHI( 201 LJ_ENDIAN_LOHI(
152 GCRef func; /* Function for next frame (or dummy L). */ 202 GCRef func; /* Function for next frame (or dummy L). */
153 , FrameLink tp; /* Link to previous frame. */ 203 , FrameLink tp; /* Link to previous frame. */
154 ) 204 )
155 } fr; 205 } fr;
206#endif
156 struct { 207 struct {
157 LJ_ENDIAN_LOHI( 208 LJ_ENDIAN_LOHI(
158 uint32_t lo; /* Lower 32 bits of number. */ 209 uint32_t lo; /* Lower 32 bits of number. */
@@ -172,6 +223,8 @@ typedef const TValue cTValue;
172 223
173/* Internal object tags. 224/* Internal object tags.
174** 225**
226** Format for 32 bit GC references (!LJ_GC64):
227**
175** Internal tags overlap the MSW of a number object (must be a double). 228** Internal tags overlap the MSW of a number object (must be a double).
176** Interpreted as a double these are special NaNs. The FPU only generates 229** Interpreted as a double these are special NaNs. The FPU only generates
177** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available 230** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available
@@ -181,11 +234,24 @@ typedef const TValue cTValue;
181** ---MSW---.---LSW--- 234** ---MSW---.---LSW---
182** primitive types | itype | | 235** primitive types | itype | |
183** lightuserdata | itype | void * | (32 bit platforms) 236** lightuserdata | itype | void * | (32 bit platforms)
184** lightuserdata |ffff| void * | (64 bit platforms, 47 bit pointers) 237** lightuserdata |ffff|seg| ofs | (64 bit platforms)
185** GC objects | itype | GCRef | 238** GC objects | itype | GCRef |
186** int (LJ_DUALNUM)| itype | int | 239** int (LJ_DUALNUM)| itype | int |
187** number -------double------ 240** number -------double------
188** 241**
242** Format for 64 bit GC references (LJ_GC64):
243**
244** The upper 13 bits must be 1 (0xfff8...) for a special NaN. The next
245** 4 bits hold the internal tag. The lowest 47 bits either hold a pointer,
246** a zero-extended 32 bit integer or all bits set to 1 for primitive types.
247**
248** ------MSW------.------LSW------
249** primitive types |1..1|itype|1..................1|
250** GC objects |1..1|itype|-------GCRef--------|
251** lightuserdata |1..1|itype|seg|------ofs-------|
252** int (LJ_DUALNUM) |1..1|itype|0..0|-----int-------|
253** number ------------double-------------
254**
189** ORDER LJ_T 255** ORDER LJ_T
190** Primitive types nil/false/true must be first, lightuserdata next. 256** Primitive types nil/false/true must be first, lightuserdata next.
191** GC objects are at the end, table/userdata must be lowest. 257** GC objects are at the end, table/userdata must be lowest.
@@ -208,7 +274,7 @@ typedef const TValue cTValue;
208#define LJ_TNUMX (~13u) 274#define LJ_TNUMX (~13u)
209 275
210/* Integers have itype == LJ_TISNUM doubles have itype < LJ_TISNUM */ 276/* Integers have itype == LJ_TISNUM doubles have itype < LJ_TISNUM */
211#if LJ_64 277#if LJ_64 && !LJ_GC64
212#define LJ_TISNUM 0xfffeffffu 278#define LJ_TISNUM 0xfffeffffu
213#else 279#else
214#define LJ_TISNUM LJ_TNUMX 280#define LJ_TISNUM LJ_TNUMX
@@ -218,14 +284,31 @@ typedef const TValue cTValue;
218#define LJ_TISGCV (LJ_TSTR+1) 284#define LJ_TISGCV (LJ_TSTR+1)
219#define LJ_TISTABUD LJ_TTAB 285#define LJ_TISTABUD LJ_TTAB
220 286
287/* Type marker for slot holding a traversal index. Must be lightuserdata. */
288#define LJ_KEYINDEX 0xfffe7fffu
289
290#if LJ_GC64
291#define LJ_GCVMASK (((uint64_t)1 << 47) - 1)
292#endif
293
294#if LJ_64
295/* To stay within 47 bits, lightuserdata is segmented. */
296#define LJ_LIGHTUD_BITS_SEG 8
297#define LJ_LIGHTUD_BITS_LO (47 - LJ_LIGHTUD_BITS_SEG)
298#endif
299
221/* -- String object ------------------------------------------------------- */ 300/* -- String object ------------------------------------------------------- */
222 301
302typedef uint32_t StrHash; /* String hash value. */
303typedef uint32_t StrID; /* String ID. */
304
223/* String object header. String payload follows. */ 305/* String object header. String payload follows. */
224typedef struct GCstr { 306typedef struct GCstr {
225 GCHeader; 307 GCHeader;
226 uint8_t reserved; /* Used by lexer for fast lookup of reserved words. */ 308 uint8_t reserved; /* Used by lexer for fast lookup of reserved words. */
227 uint8_t unused; 309 uint8_t hashalg; /* Hash algorithm. */
228 MSize hash; /* Hash of string. */ 310 StrID sid; /* Interned string ID. */
311 StrHash hash; /* Hash of string. */
229 MSize len; /* Size of string. */ 312 MSize len; /* Size of string. */
230} GCstr; 313} GCstr;
231 314
@@ -233,7 +316,6 @@ typedef struct GCstr {
233#define strdata(s) ((const char *)((s)+1)) 316#define strdata(s) ((const char *)((s)+1))
234#define strdatawr(s) ((char *)((s)+1)) 317#define strdatawr(s) ((char *)((s)+1))
235#define strVdata(o) strdata(strV(o)) 318#define strVdata(o) strdata(strV(o))
236#define sizestring(s) (sizeof(struct GCstr)+(s)->len+1)
237 319
238/* -- Userdata object ----------------------------------------------------- */ 320/* -- Userdata object ----------------------------------------------------- */
239 321
@@ -253,6 +335,7 @@ enum {
253 UDTYPE_USERDATA, /* Regular userdata. */ 335 UDTYPE_USERDATA, /* Regular userdata. */
254 UDTYPE_IO_FILE, /* I/O library FILE. */ 336 UDTYPE_IO_FILE, /* I/O library FILE. */
255 UDTYPE_FFI_CLIB, /* FFI C library namespace. */ 337 UDTYPE_FFI_CLIB, /* FFI C library namespace. */
338 UDTYPE_BUFFER, /* String buffer. */
256 UDTYPE__MAX 339 UDTYPE__MAX
257}; 340};
258 341
@@ -291,6 +374,9 @@ typedef struct GCproto {
291 uint8_t numparams; /* Number of parameters. */ 374 uint8_t numparams; /* Number of parameters. */
292 uint8_t framesize; /* Fixed frame size. */ 375 uint8_t framesize; /* Fixed frame size. */
293 MSize sizebc; /* Number of bytecode instructions. */ 376 MSize sizebc; /* Number of bytecode instructions. */
377#if LJ_GC64
378 uint32_t unused_gc64;
379#endif
294 GCRef gclist; 380 GCRef gclist;
295 MRef k; /* Split constant array (points to the middle). */ 381 MRef k; /* Split constant array (points to the middle). */
296 MRef uv; /* Upvalue list. local slot|0x8000 or parent uv idx. */ 382 MRef uv; /* Upvalue list. local slot|0x8000 or parent uv idx. */
@@ -402,7 +488,9 @@ typedef struct Node {
402 TValue val; /* Value object. Must be first field. */ 488 TValue val; /* Value object. Must be first field. */
403 TValue key; /* Key object. */ 489 TValue key; /* Key object. */
404 MRef next; /* Hash chain. */ 490 MRef next; /* Hash chain. */
491#if !LJ_GC64
405 MRef freetop; /* Top of free elements (stored in t->node[0]). */ 492 MRef freetop; /* Top of free elements (stored in t->node[0]). */
493#endif
406} Node; 494} Node;
407 495
408LJ_STATIC_ASSERT(offsetof(Node, val) == 0); 496LJ_STATIC_ASSERT(offsetof(Node, val) == 0);
@@ -417,12 +505,22 @@ typedef struct GCtab {
417 MRef node; /* Hash part. */ 505 MRef node; /* Hash part. */
418 uint32_t asize; /* Size of array part (keys [0, asize-1]). */ 506 uint32_t asize; /* Size of array part (keys [0, asize-1]). */
419 uint32_t hmask; /* Hash part mask (size of hash part - 1). */ 507 uint32_t hmask; /* Hash part mask (size of hash part - 1). */
508#if LJ_GC64
509 MRef freetop; /* Top of free elements. */
510#endif
420} GCtab; 511} GCtab;
421 512
422#define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab)) 513#define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab))
423#define tabref(r) ((GCtab *)gcref((r))) 514#define tabref(r) ((GCtab *)gcref((r)))
424#define noderef(r) (mref((r), Node)) 515#define noderef(r) (mref((r), Node))
425#define nextnode(n) (mref((n)->next, Node)) 516#define nextnode(n) (mref((n)->next, Node))
517#if LJ_GC64
518#define getfreetop(t, n) (noderef((t)->freetop))
519#define setfreetop(t, n, v) (setmref((t)->freetop, (v)))
520#else
521#define getfreetop(t, n) (noderef((n)->freetop))
522#define setfreetop(t, n, v) (setmref((n)->freetop, (v)))
523#endif
426 524
427/* -- State objects ------------------------------------------------------- */ 525/* -- State objects ------------------------------------------------------- */
428 526
@@ -481,6 +579,9 @@ typedef enum {
481 GCROOT_BASEMT_NUM = GCROOT_BASEMT + ~LJ_TNUMX, 579 GCROOT_BASEMT_NUM = GCROOT_BASEMT + ~LJ_TNUMX,
482 GCROOT_IO_INPUT, /* Userdata for default I/O input file. */ 580 GCROOT_IO_INPUT, /* Userdata for default I/O input file. */
483 GCROOT_IO_OUTPUT, /* Userdata for default I/O output file. */ 581 GCROOT_IO_OUTPUT, /* Userdata for default I/O output file. */
582#if LJ_HASFFI
583 GCROOT_FFI_FIN, /* FFI finalizer table. */
584#endif
484 GCROOT_MAX 585 GCROOT_MAX
485} GCRootID; 586} GCRootID;
486 587
@@ -488,13 +589,18 @@ typedef enum {
488#define basemt_obj(g, o) ((g)->gcroot[GCROOT_BASEMT+itypemap(o)]) 589#define basemt_obj(g, o) ((g)->gcroot[GCROOT_BASEMT+itypemap(o)])
489#define mmname_str(g, mm) (strref((g)->gcroot[GCROOT_MMNAME+(mm)])) 590#define mmname_str(g, mm) (strref((g)->gcroot[GCROOT_MMNAME+(mm)]))
490 591
592/* Garbage collector state. */
491typedef struct GCState { 593typedef struct GCState {
492 MSize total; /* Memory currently allocated. */ 594 GCSize total; /* Memory currently allocated. */
493 MSize threshold; /* Memory threshold. */ 595 GCSize threshold; /* Memory threshold. */
494 uint8_t currentwhite; /* Current white color. */ 596 uint8_t currentwhite; /* Current white color. */
495 uint8_t state; /* GC state. */ 597 uint8_t state; /* GC state. */
496 uint8_t nocdatafin; /* No cdata finalizer called. */ 598 uint8_t unused0;
497 uint8_t unused2; 599#if LJ_64
600 uint8_t lightudnum; /* Number of lightuserdata segments - 1. */
601#else
602 uint8_t unused1;
603#endif
498 MSize sweepstr; /* Sweep position in string table. */ 604 MSize sweepstr; /* Sweep position in string table. */
499 GCRef root; /* List of all collectable objects. */ 605 GCRef root; /* List of all collectable objects. */
500 MRef sweep; /* Sweep position in root list. */ 606 MRef sweep; /* Sweep position in root list. */
@@ -502,42 +608,57 @@ typedef struct GCState {
502 GCRef grayagain; /* List of objects for atomic traversal. */ 608 GCRef grayagain; /* List of objects for atomic traversal. */
503 GCRef weak; /* List of weak tables (to be cleared). */ 609 GCRef weak; /* List of weak tables (to be cleared). */
504 GCRef mmudata; /* List of userdata (to be finalized). */ 610 GCRef mmudata; /* List of userdata (to be finalized). */
611 GCSize debt; /* Debt (how much GC is behind schedule). */
612 GCSize estimate; /* Estimate of memory actually in use. */
505 MSize stepmul; /* Incremental GC step granularity. */ 613 MSize stepmul; /* Incremental GC step granularity. */
506 MSize debt; /* Debt (how much GC is behind schedule). */
507 MSize estimate; /* Estimate of memory actually in use. */
508 MSize pause; /* Pause between successive GC cycles. */ 614 MSize pause; /* Pause between successive GC cycles. */
615#if LJ_64
616 MRef lightudseg; /* Upper bits of lightuserdata segments. */
617#endif
509} GCState; 618} GCState;
510 619
620/* String interning state. */
621typedef struct StrInternState {
622 GCRef *tab; /* String hash table anchors. */
623 MSize mask; /* String hash mask (size of hash table - 1). */
624 MSize num; /* Number of strings in hash table. */
625 StrID id; /* Next string ID. */
626 uint8_t idreseed; /* String ID reseed counter. */
627 uint8_t second; /* String interning table uses secondary hashing. */
628 uint8_t unused1;
629 uint8_t unused2;
630 LJ_ALIGN(8) uint64_t seed; /* Random string seed. */
631} StrInternState;
632
511/* Global state, shared by all threads of a Lua universe. */ 633/* Global state, shared by all threads of a Lua universe. */
512typedef struct global_State { 634typedef struct global_State {
513 GCRef *strhash; /* String hash table (hash chain anchors). */
514 MSize strmask; /* String hash mask (size of hash table - 1). */
515 MSize strnum; /* Number of strings in hash table. */
516 lua_Alloc allocf; /* Memory allocator. */ 635 lua_Alloc allocf; /* Memory allocator. */
517 void *allocd; /* Memory allocator data. */ 636 void *allocd; /* Memory allocator data. */
518 GCState gc; /* Garbage collector. */ 637 GCState gc; /* Garbage collector. */
519 SBuf tmpbuf; /* Temporary buffer for string concatenation. */
520 Node nilnode; /* Fallback 1-element hash part (nil key and value). */
521 GCstr strempty; /* Empty string. */ 638 GCstr strempty; /* Empty string. */
522 uint8_t stremptyz; /* Zero terminator of empty string. */ 639 uint8_t stremptyz; /* Zero terminator of empty string. */
523 uint8_t hookmask; /* Hook mask. */ 640 uint8_t hookmask; /* Hook mask. */
524 uint8_t dispatchmode; /* Dispatch mode. */ 641 uint8_t dispatchmode; /* Dispatch mode. */
525 uint8_t vmevmask; /* VM event mask. */ 642 uint8_t vmevmask; /* VM event mask. */
643 StrInternState str; /* String interning. */
644 volatile int32_t vmstate; /* VM state or current JIT code trace number. */
526 GCRef mainthref; /* Link to main thread. */ 645 GCRef mainthref; /* Link to main thread. */
527 TValue registrytv; /* Anchor for registry. */ 646 SBuf tmpbuf; /* Temporary string buffer. */
528 TValue tmptv, tmptv2; /* Temporary TValues. */ 647 TValue tmptv, tmptv2; /* Temporary TValues. */
648 Node nilnode; /* Fallback 1-element hash part (nil key and value). */
649 TValue registrytv; /* Anchor for registry. */
529 GCupval uvhead; /* Head of double-linked list of all open upvalues. */ 650 GCupval uvhead; /* Head of double-linked list of all open upvalues. */
530 int32_t hookcount; /* Instruction hook countdown. */ 651 int32_t hookcount; /* Instruction hook countdown. */
531 int32_t hookcstart; /* Start count for instruction hook counter. */ 652 int32_t hookcstart; /* Start count for instruction hook counter. */
532 lua_Hook hookf; /* Hook function. */ 653 lua_Hook hookf; /* Hook function. */
533 lua_CFunction wrapf; /* Wrapper for C function calls. */ 654 lua_CFunction wrapf; /* Wrapper for C function calls. */
534 lua_CFunction panic; /* Called as a last resort for errors. */ 655 lua_CFunction panic; /* Called as a last resort for errors. */
535 volatile int32_t vmstate; /* VM state or current JIT code trace number. */
536 BCIns bc_cfunc_int; /* Bytecode for internal C function calls. */ 656 BCIns bc_cfunc_int; /* Bytecode for internal C function calls. */
537 BCIns bc_cfunc_ext; /* Bytecode for external C function calls. */ 657 BCIns bc_cfunc_ext; /* Bytecode for external C function calls. */
538 GCRef jit_L; /* Current JIT code lua_State or NULL. */ 658 GCRef cur_L; /* Currently executing lua_State. */
539 MRef jit_base; /* Current JIT code L->base. */ 659 MRef jit_base; /* Current JIT code L->base or NULL. */
540 MRef ctype_state; /* Pointer to C type state. */ 660 MRef ctype_state; /* Pointer to C type state. */
661 PRNGState prng; /* Global PRNG state. */
541 GCRef gcroot[GCROOT_MAX]; /* GC roots. */ 662 GCRef gcroot[GCROOT_MAX]; /* GC roots. */
542} global_State; 663} global_State;
543 664
@@ -553,9 +674,11 @@ typedef struct global_State {
553#define HOOK_ACTIVE_SHIFT 4 674#define HOOK_ACTIVE_SHIFT 4
554#define HOOK_VMEVENT 0x20 675#define HOOK_VMEVENT 0x20
555#define HOOK_GC 0x40 676#define HOOK_GC 0x40
677#define HOOK_PROFILE 0x80
556#define hook_active(g) ((g)->hookmask & HOOK_ACTIVE) 678#define hook_active(g) ((g)->hookmask & HOOK_ACTIVE)
557#define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE) 679#define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE)
558#define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC)) 680#define hook_entergc(g) \
681 ((g)->hookmask = ((g)->hookmask | (HOOK_ACTIVE|HOOK_GC)) & ~HOOK_PROFILE)
559#define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT)) 682#define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT))
560#define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE) 683#define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE)
561#define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK) 684#define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK)
@@ -583,12 +706,23 @@ struct lua_State {
583#define registry(L) (&G(L)->registrytv) 706#define registry(L) (&G(L)->registrytv)
584 707
585/* Macros to access the currently executing (Lua) function. */ 708/* Macros to access the currently executing (Lua) function. */
709#if LJ_GC64
710#define curr_func(L) (&gcval(L->base-2)->fn)
711#elif LJ_FR2
712#define curr_func(L) (&gcref((L->base-2)->gcr)->fn)
713#else
586#define curr_func(L) (&gcref((L->base-1)->fr.func)->fn) 714#define curr_func(L) (&gcref((L->base-1)->fr.func)->fn)
715#endif
587#define curr_funcisL(L) (isluafunc(curr_func(L))) 716#define curr_funcisL(L) (isluafunc(curr_func(L)))
588#define curr_proto(L) (funcproto(curr_func(L))) 717#define curr_proto(L) (funcproto(curr_func(L)))
589#define curr_topL(L) (L->base + curr_proto(L)->framesize) 718#define curr_topL(L) (L->base + curr_proto(L)->framesize)
590#define curr_top(L) (curr_funcisL(L) ? curr_topL(L) : L->top) 719#define curr_top(L) (curr_funcisL(L) ? curr_topL(L) : L->top)
591 720
721#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK)
722LJ_FUNC_NORET void lj_assert_fail(global_State *g, const char *file, int line,
723 const char *func, const char *fmt, ...);
724#endif
725
592/* -- GC object definition and conversions -------------------------------- */ 726/* -- GC object definition and conversions -------------------------------- */
593 727
594/* GC header for generic access to common fields of GC objects. */ 728/* GC header for generic access to common fields of GC objects. */
@@ -642,17 +776,18 @@ typedef union GCobj {
642 776
643/* -- TValue getters/setters ---------------------------------------------- */ 777/* -- TValue getters/setters ---------------------------------------------- */
644 778
645#ifdef LUA_USE_ASSERT
646#include "lj_gc.h"
647#endif
648
649/* Macros to test types. */ 779/* Macros to test types. */
780#if LJ_GC64
781#define itype(o) ((uint32_t)((o)->it64 >> 47))
782#define tvisnil(o) ((o)->it64 == -1)
783#else
650#define itype(o) ((o)->it) 784#define itype(o) ((o)->it)
651#define tvisnil(o) (itype(o) == LJ_TNIL) 785#define tvisnil(o) (itype(o) == LJ_TNIL)
786#endif
652#define tvisfalse(o) (itype(o) == LJ_TFALSE) 787#define tvisfalse(o) (itype(o) == LJ_TFALSE)
653#define tvistrue(o) (itype(o) == LJ_TTRUE) 788#define tvistrue(o) (itype(o) == LJ_TTRUE)
654#define tvisbool(o) (tvisfalse(o) || tvistrue(o)) 789#define tvisbool(o) (tvisfalse(o) || tvistrue(o))
655#if LJ_64 790#if LJ_64 && !LJ_GC64
656#define tvislightud(o) (((int32_t)itype(o) >> 15) == -2) 791#define tvislightud(o) (((int32_t)itype(o) >> 15) == -2)
657#else 792#else
658#define tvislightud(o) (itype(o) == LJ_TLIGHTUD) 793#define tvislightud(o) (itype(o) == LJ_TLIGHTUD)
@@ -686,7 +821,7 @@ typedef union GCobj {
686#define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64) 821#define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64)
687 822
688/* Macros to convert type ids. */ 823/* Macros to convert type ids. */
689#if LJ_64 824#if LJ_64 && !LJ_GC64
690#define itypemap(o) \ 825#define itypemap(o) \
691 (tvisnumber(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o)) 826 (tvisnumber(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o))
692#else 827#else
@@ -694,13 +829,31 @@ typedef union GCobj {
694#endif 829#endif
695 830
696/* Macros to get tagged values. */ 831/* Macros to get tagged values. */
832#if LJ_GC64
833#define gcval(o) ((GCobj *)(gcrefu((o)->gcr) & LJ_GCVMASK))
834#else
697#define gcval(o) (gcref((o)->gcr)) 835#define gcval(o) (gcref((o)->gcr))
698#define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - (o)->it)) 836#endif
837#define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - itype(o)))
699#if LJ_64 838#if LJ_64
700#define lightudV(o) \ 839#define lightudseg(u) \
701 check_exp(tvislightud(o), (void *)((o)->u64 & U64x(00007fff,ffffffff))) 840 (((u) >> LJ_LIGHTUD_BITS_LO) & ((1 << LJ_LIGHTUD_BITS_SEG)-1))
841#define lightudlo(u) \
842 ((u) & (((uint64_t)1 << LJ_LIGHTUD_BITS_LO) - 1))
843#define lightudup(p) \
844 ((uint32_t)(((p) >> LJ_LIGHTUD_BITS_LO) << (LJ_LIGHTUD_BITS_LO-32)))
845static LJ_AINLINE void *lightudV(global_State *g, cTValue *o)
846{
847 uint64_t u = o->u64;
848 uint64_t seg = lightudseg(u);
849 uint32_t *segmap = mref(g->gc.lightudseg, uint32_t);
850 lj_assertG(tvislightud(o), "lightuserdata expected");
851 if (seg == (1 << LJ_LIGHTUD_BITS_SEG)-1) return NULL;
852 lj_assertG(seg <= g->gc.lightudnum, "bad lightuserdata segment %d", seg);
853 return (void *)(((uint64_t)segmap[seg] << 32) | lightudlo(u));
854}
702#else 855#else
703#define lightudV(o) check_exp(tvislightud(o), gcrefp((o)->gcr, void)) 856#define lightudV(g, o) check_exp(tvislightud(o), gcrefp((o)->gcr, void))
704#endif 857#endif
705#define gcV(o) check_exp(tvisgcv(o), gcval(o)) 858#define gcV(o) check_exp(tvisgcv(o), gcval(o))
706#define strV(o) check_exp(tvisstr(o), &gcval(o)->str) 859#define strV(o) check_exp(tvisstr(o), &gcval(o)->str)
@@ -714,40 +867,70 @@ typedef union GCobj {
714#define intV(o) check_exp(tvisint(o), (int32_t)(o)->i) 867#define intV(o) check_exp(tvisint(o), (int32_t)(o)->i)
715 868
716/* Macros to set tagged values. */ 869/* Macros to set tagged values. */
870#if LJ_GC64
871#define setitype(o, i) ((o)->it = ((i) << 15))
872#define setnilV(o) ((o)->it64 = -1)
873#define setpriV(o, x) ((o)->it64 = (int64_t)~((uint64_t)~(x)<<47))
874#define setboolV(o, x) ((o)->it64 = (int64_t)~((uint64_t)((x)+1)<<47))
875#else
717#define setitype(o, i) ((o)->it = (i)) 876#define setitype(o, i) ((o)->it = (i))
718#define setnilV(o) ((o)->it = LJ_TNIL) 877#define setnilV(o) ((o)->it = LJ_TNIL)
719#define setboolV(o, x) ((o)->it = LJ_TFALSE-(uint32_t)(x)) 878#define setboolV(o, x) ((o)->it = LJ_TFALSE-(uint32_t)(x))
879#define setpriV(o, i) (setitype((o), (i)))
880#endif
720 881
721static LJ_AINLINE void setlightudV(TValue *o, void *p) 882static LJ_AINLINE void setrawlightudV(TValue *o, void *p)
722{ 883{
723#if LJ_64 884#if LJ_GC64
885 o->u64 = (uint64_t)p | (((uint64_t)LJ_TLIGHTUD) << 47);
886#elif LJ_64
724 o->u64 = (uint64_t)p | (((uint64_t)0xffff) << 48); 887 o->u64 = (uint64_t)p | (((uint64_t)0xffff) << 48);
725#else 888#else
726 setgcrefp(o->gcr, p); setitype(o, LJ_TLIGHTUD); 889 setgcrefp(o->gcr, p); setitype(o, LJ_TLIGHTUD);
727#endif 890#endif
728} 891}
729 892
730#if LJ_64 893#if LJ_FR2 || LJ_32
731#define checklightudptr(L, p) \ 894#define contptr(f) ((void *)(f))
732 (((uint64_t)(p) >> 47) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p)) 895#define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)contptr(f))
896#else
897#define contptr(f) \
898 ((void *)(uintptr_t)(uint32_t)((intptr_t)(f) - (intptr_t)lj_vm_asm_begin))
733#define setcont(o, f) \ 899#define setcont(o, f) \
734 ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin) 900 ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin)
735#else
736#define checklightudptr(L, p) (p)
737#define setcont(o, f) setlightudV((o), (void *)(f))
738#endif 901#endif
739 902
740#define tvchecklive(L, o) \ 903static LJ_AINLINE void checklivetv(lua_State *L, TValue *o, const char *msg)
741 UNUSED(L), lua_assert(!tvisgcv(o) || \ 904{
742 ((~itype(o) == gcval(o)->gch.gct) && !isdead(G(L), gcval(o)))) 905 UNUSED(L); UNUSED(o); UNUSED(msg);
906#if LUA_USE_ASSERT
907 if (tvisgcv(o)) {
908 lj_assertL(~itype(o) == gcval(o)->gch.gct,
909 "mismatch of TValue type %d vs GC type %d",
910 ~itype(o), gcval(o)->gch.gct);
911 /* Copy of isdead check from lj_gc.h to avoid circular include. */
912 lj_assertL(!(gcval(o)->gch.marked & (G(L)->gc.currentwhite ^ 3) & 3), msg);
913 }
914#endif
915}
743 916
744static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t itype) 917static LJ_AINLINE void setgcVraw(TValue *o, GCobj *v, uint32_t itype)
745{ 918{
746 setgcref(o->gcr, v); setitype(o, itype); tvchecklive(L, o); 919#if LJ_GC64
920 setgcreft(o->gcr, v, itype);
921#else
922 setgcref(o->gcr, v); setitype(o, itype);
923#endif
924}
925
926static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t it)
927{
928 setgcVraw(o, v, it);
929 checklivetv(L, o, "store to dead GC object");
747} 930}
748 931
749#define define_setV(name, type, tag) \ 932#define define_setV(name, type, tag) \
750static LJ_AINLINE void name(lua_State *L, TValue *o, type *v) \ 933static LJ_AINLINE void name(lua_State *L, TValue *o, const type *v) \
751{ \ 934{ \
752 setgcV(L, o, obj2gco(v), tag); \ 935 setgcV(L, o, obj2gco(v), tag); \
753} 936}
@@ -790,13 +973,17 @@ static LJ_AINLINE void setint64V(TValue *o, int64_t i)
790/* Copy tagged values. */ 973/* Copy tagged values. */
791static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2) 974static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2)
792{ 975{
793 *o1 = *o2; tvchecklive(L, o1); 976 *o1 = *o2;
977 checklivetv(L, o1, "copy of dead GC object");
794} 978}
795 979
796/* -- Number to integer conversion ---------------------------------------- */ 980/* -- Number to integer conversion ---------------------------------------- */
797 981
798#if LJ_SOFTFP 982#if LJ_SOFTFP
799LJ_ASMF int32_t lj_vm_tobit(double x); 983LJ_ASMF int32_t lj_vm_tobit(double x);
984#if LJ_TARGET_MIPS64
985LJ_ASMF int32_t lj_vm_tointg(double x);
986#endif
800#endif 987#endif
801 988
802static LJ_AINLINE int32_t lj_num2bit(lua_Number n) 989static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
@@ -810,11 +997,7 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
810#endif 997#endif
811} 998}
812 999
813#if LJ_TARGET_X86 && !defined(__SSE2__)
814#define lj_num2int(n) lj_num2bit((n))
815#else
816#define lj_num2int(n) ((int32_t)(n)) 1000#define lj_num2int(n) ((int32_t)(n))
817#endif
818 1001
819/* 1002/*
820** This must match the JIT backend behavior. In particular for archs 1003** This must match the JIT backend behavior. In particular for archs
@@ -859,6 +1042,21 @@ LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1];
859#define lj_typename(o) (lj_obj_itypename[itypemap(o)]) 1042#define lj_typename(o) (lj_obj_itypename[itypemap(o)])
860 1043
861/* Compare two objects without calling metamethods. */ 1044/* Compare two objects without calling metamethods. */
862LJ_FUNC int lj_obj_equal(cTValue *o1, cTValue *o2); 1045LJ_FUNC int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2);
1046LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(global_State *g, cTValue *o);
1047
1048#if LJ_ABI_PAUTH
1049#if LJ_TARGET_ARM64
1050#include <ptrauth.h>
1051#define lj_ptr_sign(ptr, ctx) \
1052 ptrauth_sign_unauthenticated((ptr), ptrauth_key_function_pointer, (ctx))
1053#define lj_ptr_strip(ptr) ptrauth_strip((ptr), ptrauth_key_function_pointer)
1054#else
1055#error "No support for pointer authentication for this architecture"
1056#endif
1057#else
1058#define lj_ptr_sign(ptr, ctx) (ptr)
1059#define lj_ptr_strip(ptr) (ptr)
1060#endif
863 1061
864#endif 1062#endif
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 622ff0a9..098bf431 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -14,18 +14,21 @@
14 14
15#if LJ_HASJIT 15#if LJ_HASJIT
16 16
17#include "lj_buf.h"
17#include "lj_str.h" 18#include "lj_str.h"
18#include "lj_tab.h" 19#include "lj_tab.h"
19#include "lj_ir.h" 20#include "lj_ir.h"
20#include "lj_jit.h" 21#include "lj_jit.h"
22#include "lj_ircall.h"
21#include "lj_iropt.h" 23#include "lj_iropt.h"
22#include "lj_trace.h" 24#include "lj_trace.h"
23#if LJ_HASFFI 25#if LJ_HASFFI
24#include "lj_ctype.h" 26#include "lj_ctype.h"
25#endif
26#include "lj_carith.h" 27#include "lj_carith.h"
28#endif
27#include "lj_vm.h" 29#include "lj_vm.h"
28#include "lj_strscan.h" 30#include "lj_strscan.h"
31#include "lj_strfmt.h"
29 32
30/* Here's a short description how the FOLD engine processes instructions: 33/* Here's a short description how the FOLD engine processes instructions:
31** 34**
@@ -133,8 +136,8 @@
133/* Some local macros to save typing. Undef'd at the end. */ 136/* Some local macros to save typing. Undef'd at the end. */
134#define IR(ref) (&J->cur.ir[(ref)]) 137#define IR(ref) (&J->cur.ir[(ref)])
135#define fins (&J->fold.ins) 138#define fins (&J->fold.ins)
136#define fleft (&J->fold.left) 139#define fleft (J->fold.left)
137#define fright (&J->fold.right) 140#define fright (J->fold.right)
138#define knumleft (ir_knum(fleft)->n) 141#define knumleft (ir_knum(fleft)->n)
139#define knumright (ir_knum(fright)->n) 142#define knumright (ir_knum(fright)->n)
140 143
@@ -155,13 +158,14 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
155 158
156/* Barrier to prevent folding across a GC step. 159/* Barrier to prevent folding across a GC step.
157** GC steps can only happen at the head of a trace and at LOOP. 160** GC steps can only happen at the head of a trace and at LOOP.
158** And the GC is only driven forward if there is at least one allocation. 161** And the GC is only driven forward if there's at least one allocation.
159*/ 162*/
160#define gcstep_barrier(J, ref) \ 163#define gcstep_barrier(J, ref) \
161 ((ref) < J->chain[IR_LOOP] && \ 164 ((ref) < J->chain[IR_LOOP] && \
162 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ 165 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
163 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ 166 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
164 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR])) 167 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \
168 J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA]))
165 169
166/* -- Constant folding for FP numbers ------------------------------------- */ 170/* -- Constant folding for FP numbers ------------------------------------- */
167 171
@@ -169,9 +173,6 @@ LJFOLD(ADD KNUM KNUM)
169LJFOLD(SUB KNUM KNUM) 173LJFOLD(SUB KNUM KNUM)
170LJFOLD(MUL KNUM KNUM) 174LJFOLD(MUL KNUM KNUM)
171LJFOLD(DIV KNUM KNUM) 175LJFOLD(DIV KNUM KNUM)
172LJFOLD(NEG KNUM KNUM)
173LJFOLD(ABS KNUM KNUM)
174LJFOLD(ATAN2 KNUM KNUM)
175LJFOLD(LDEXP KNUM KNUM) 176LJFOLD(LDEXP KNUM KNUM)
176LJFOLD(MIN KNUM KNUM) 177LJFOLD(MIN KNUM KNUM)
177LJFOLD(MAX KNUM KNUM) 178LJFOLD(MAX KNUM KNUM)
@@ -183,6 +184,15 @@ LJFOLDF(kfold_numarith)
183 return lj_ir_knum(J, y); 184 return lj_ir_knum(J, y);
184} 185}
185 186
187LJFOLD(NEG KNUM FLOAD)
188LJFOLD(ABS KNUM FLOAD)
189LJFOLDF(kfold_numabsneg)
190{
191 lua_Number a = knumleft;
192 lua_Number y = lj_vm_foldarith(a, a, fins->o - IR_ADD);
193 return lj_ir_knum(J, y);
194}
195
186LJFOLD(LDEXP KNUM KINT) 196LJFOLD(LDEXP KNUM KINT)
187LJFOLDF(kfold_ldexp) 197LJFOLDF(kfold_ldexp)
188{ 198{
@@ -202,13 +212,34 @@ LJFOLDF(kfold_fpmath)
202 return lj_ir_knum(J, y); 212 return lj_ir_knum(J, y);
203} 213}
204 214
205LJFOLD(POW KNUM KINT) 215LJFOLD(CALLN KNUM any)
216LJFOLDF(kfold_fpcall1)
217{
218 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
219 if (CCI_TYPE(ci) == IRT_NUM) {
220 double y = ((double (*)(double))ci->func)(knumleft);
221 return lj_ir_knum(J, y);
222 }
223 return NEXTFOLD;
224}
225
226LJFOLD(CALLN CARG IRCALL_atan2)
227LJFOLDF(kfold_fpcall2)
228{
229 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
230 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
231 double a = ir_knum(IR(fleft->op1))->n;
232 double b = ir_knum(IR(fleft->op2))->n;
233 double y = ((double (*)(double, double))ci->func)(a, b);
234 return lj_ir_knum(J, y);
235 }
236 return NEXTFOLD;
237}
238
239LJFOLD(POW KNUM KNUM)
206LJFOLDF(kfold_numpow) 240LJFOLDF(kfold_numpow)
207{ 241{
208 lua_Number a = knumleft; 242 return lj_ir_knum(J, lj_vm_foldarith(knumleft, knumright, IR_POW - IR_ADD));
209 lua_Number b = (lua_Number)fright->i;
210 lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
211 return lj_ir_knum(J, y);
212} 243}
213 244
214/* Must not use kfold_kref for numbers (could be NaN). */ 245/* Must not use kfold_kref for numbers (could be NaN). */
@@ -247,7 +278,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
247 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break; 278 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
248 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break; 279 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break;
249 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break; 280 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break;
250 default: lua_assert(0); break; 281 default: lj_assertX(0, "bad IR op %d", op); break;
251 } 282 }
252 return k1; 283 return k1;
253} 284}
@@ -319,7 +350,7 @@ LJFOLDF(kfold_intcomp)
319 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b); 350 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
320 case IR_ABC: 351 case IR_ABC:
321 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b); 352 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
322 default: lua_assert(0); return FAILFOLD; 353 default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
323 } 354 }
324} 355}
325 356
@@ -333,21 +364,29 @@ LJFOLDF(kfold_intcomp0)
333 364
334/* -- Constant folding for 64 bit integers -------------------------------- */ 365/* -- Constant folding for 64 bit integers -------------------------------- */
335 366
336static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) 367static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
368 IROp op)
337{ 369{
370 UNUSED(J);
371#if LJ_HASFFI
338 switch (op) { 372 switch (op) {
339#if LJ_64 || LJ_HASFFI
340 case IR_ADD: k1 += k2; break; 373 case IR_ADD: k1 += k2; break;
341 case IR_SUB: k1 -= k2; break; 374 case IR_SUB: k1 -= k2; break;
342#endif
343#if LJ_HASFFI
344 case IR_MUL: k1 *= k2; break; 375 case IR_MUL: k1 *= k2; break;
345 case IR_BAND: k1 &= k2; break; 376 case IR_BAND: k1 &= k2; break;
346 case IR_BOR: k1 |= k2; break; 377 case IR_BOR: k1 |= k2; break;
347 case IR_BXOR: k1 ^= k2; break; 378 case IR_BXOR: k1 ^= k2; break;
348#endif 379 case IR_BSHL: k1 <<= (k2 & 63); break;
349 default: UNUSED(k2); lua_assert(0); break; 380 case IR_BSHR: k1 >>= (k2 & 63); break;
381 case IR_BSAR: k1 = (uint64_t)((int64_t)k1 >> (k2 & 63)); break;
382 case IR_BROL: k1 = lj_rol(k1, (k2 & 63)); break;
383 case IR_BROR: k1 = lj_ror(k1, (k2 & 63)); break;
384 default: lj_assertJ(0, "bad IR op %d", op); break;
350 } 385 }
386#else
387 UNUSED(k2); UNUSED(op);
388 lj_assertJ(0, "FFI IR op without FFI");
389#endif
351 return k1; 390 return k1;
352} 391}
353 392
@@ -359,7 +398,7 @@ LJFOLD(BOR KINT64 KINT64)
359LJFOLD(BXOR KINT64 KINT64) 398LJFOLD(BXOR KINT64 KINT64)
360LJFOLDF(kfold_int64arith) 399LJFOLDF(kfold_int64arith)
361{ 400{
362 return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64, 401 return INT64FOLD(kfold_int64arith(J, ir_k64(fleft)->u64,
363 ir_k64(fright)->u64, (IROp)fins->o)); 402 ir_k64(fright)->u64, (IROp)fins->o));
364} 403}
365 404
@@ -381,7 +420,7 @@ LJFOLDF(kfold_int64arith2)
381 } 420 }
382 return INT64FOLD(k1); 421 return INT64FOLD(k1);
383#else 422#else
384 UNUSED(J); lua_assert(0); return FAILFOLD; 423 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
385#endif 424#endif
386} 425}
387 426
@@ -392,22 +431,12 @@ LJFOLD(BROL KINT64 KINT)
392LJFOLD(BROR KINT64 KINT) 431LJFOLD(BROR KINT64 KINT)
393LJFOLDF(kfold_int64shift) 432LJFOLDF(kfold_int64shift)
394{ 433{
395#if LJ_HASFFI || LJ_64 434#if LJ_HASFFI
396 uint64_t k = ir_k64(fleft)->u64; 435 uint64_t k = ir_k64(fleft)->u64;
397 int32_t sh = (fright->i & 63); 436 int32_t sh = (fright->i & 63);
398 switch ((IROp)fins->o) { 437 return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
399 case IR_BSHL: k <<= sh; break;
400#if LJ_HASFFI
401 case IR_BSHR: k >>= sh; break;
402 case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
403 case IR_BROL: k = lj_rol(k, sh); break;
404 case IR_BROR: k = lj_ror(k, sh); break;
405#endif
406 default: lua_assert(0); break;
407 }
408 return INT64FOLD(k);
409#else 438#else
410 UNUSED(J); lua_assert(0); return FAILFOLD; 439 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
411#endif 440#endif
412} 441}
413 442
@@ -417,7 +446,7 @@ LJFOLDF(kfold_bnot64)
417#if LJ_HASFFI 446#if LJ_HASFFI
418 return INT64FOLD(~ir_k64(fleft)->u64); 447 return INT64FOLD(~ir_k64(fleft)->u64);
419#else 448#else
420 UNUSED(J); lua_assert(0); return FAILFOLD; 449 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
421#endif 450#endif
422} 451}
423 452
@@ -427,7 +456,7 @@ LJFOLDF(kfold_bswap64)
427#if LJ_HASFFI 456#if LJ_HASFFI
428 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64)); 457 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
429#else 458#else
430 UNUSED(J); lua_assert(0); return FAILFOLD; 459 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
431#endif 460#endif
432} 461}
433 462
@@ -452,10 +481,10 @@ LJFOLDF(kfold_int64comp)
452 case IR_UGE: return CONDFOLD(a >= b); 481 case IR_UGE: return CONDFOLD(a >= b);
453 case IR_ULE: return CONDFOLD(a <= b); 482 case IR_ULE: return CONDFOLD(a <= b);
454 case IR_UGT: return CONDFOLD(a > b); 483 case IR_UGT: return CONDFOLD(a > b);
455 default: lua_assert(0); return FAILFOLD; 484 default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
456 } 485 }
457#else 486#else
458 UNUSED(J); lua_assert(0); return FAILFOLD; 487 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
459#endif 488#endif
460} 489}
461 490
@@ -467,7 +496,7 @@ LJFOLDF(kfold_int64comp0)
467 return DROPFOLD; 496 return DROPFOLD;
468 return NEXTFOLD; 497 return NEXTFOLD;
469#else 498#else
470 UNUSED(J); lua_assert(0); return FAILFOLD; 499 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
471#endif 500#endif
472} 501}
473 502
@@ -481,6 +510,7 @@ LJFOLDF(kfold_snew_kptr)
481} 510}
482 511
483LJFOLD(SNEW any KINT) 512LJFOLD(SNEW any KINT)
513LJFOLD(XSNEW any KINT)
484LJFOLDF(kfold_snew_empty) 514LJFOLDF(kfold_snew_empty)
485{ 515{
486 if (fright->i == 0) 516 if (fright->i == 0)
@@ -492,7 +522,7 @@ LJFOLD(STRREF KGC KINT)
492LJFOLDF(kfold_strref) 522LJFOLDF(kfold_strref)
493{ 523{
494 GCstr *str = ir_kstr(fleft); 524 GCstr *str = ir_kstr(fleft);
495 lua_assert((MSize)fright->i <= str->len); 525 lj_assertJ((MSize)fright->i <= str->len, "bad string ref");
496 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i); 526 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
497} 527}
498 528
@@ -510,7 +540,7 @@ LJFOLDF(kfold_strref_snew)
510 PHIBARRIER(ir); 540 PHIBARRIER(ir);
511 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ 541 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
512 fins->op1 = str; 542 fins->op1 = str;
513 fins->ot = IRT(IR_STRREF, IRT_P32); 543 fins->ot = IRT(IR_STRREF, IRT_PGC);
514 return RETRYFOLD; 544 return RETRYFOLD;
515 } 545 }
516 } 546 }
@@ -528,6 +558,211 @@ LJFOLDF(kfold_strcmp)
528 return NEXTFOLD; 558 return NEXTFOLD;
529} 559}
530 560
561/* -- Constant folding and forwarding for buffers ------------------------- */
562
563/*
564** Buffer ops perform stores, but their effect is limited to the buffer
565** itself. Also, buffer ops are chained: a use of an op implies a use of
566** all other ops up the chain. Conversely, if an op is unused, all ops
567** up the chain can go unsed. This largely eliminates the need to treat
568** them as stores.
569**
570** Alas, treating them as normal (IRM_N) ops doesn't work, because they
571** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP
572** or if FOLD is disabled.
573**
574** The compromise is to declare them as loads, emit them like stores and
575** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
576** fragments left over from CSE are eliminated by DCE.
577**
578** The string buffer methods emit a USE instead of a BUFSTR to keep the
579** chain alive.
580*/
581
582LJFOLD(BUFHDR any any)
583LJFOLDF(bufhdr_merge)
584{
585 return fins->op2 == IRBUFHDR_WRITE ? CSEFOLD : EMITFOLD;
586}
587
588LJFOLD(BUFPUT any BUFSTR)
589LJFOLDF(bufput_bufstr)
590{
591 if ((J->flags & JIT_F_OPT_FWD)) {
592 IRRef hdr = fright->op2;
593 /* New buffer, no other buffer op inbetween and same buffer? */
594 if (fleft->o == IR_BUFHDR && fleft->op2 == IRBUFHDR_RESET &&
595 fleft->prev == hdr &&
596 fleft->op1 == IR(hdr)->op1 &&
597 !(irt_isphi(fright->t) && IR(hdr)->prev) &&
598 (!LJ_HASBUFFER || J->chain[IR_CALLA] < hdr)) {
599 IRRef ref = fins->op1;
600 IR(ref)->op2 = IRBUFHDR_APPEND; /* Modify BUFHDR. */
601 IR(ref)->op1 = fright->op1;
602 return ref;
603 }
604 /* Replay puts to global temporary buffer. */
605 if (IR(hdr)->op2 == IRBUFHDR_RESET && !irt_isphi(fright->t)) {
606 IRIns *ir = IR(fright->op1);
607 /* For now only handle single string.reverse .lower .upper .rep. */
608 if (ir->o == IR_CALLL &&
609 ir->op2 >= IRCALL_lj_buf_putstr_reverse &&
610 ir->op2 <= IRCALL_lj_buf_putstr_rep) {
611 IRIns *carg1 = IR(ir->op1);
612 if (ir->op2 == IRCALL_lj_buf_putstr_rep) {
613 IRIns *carg2 = IR(carg1->op1);
614 if (carg2->op1 == hdr) {
615 return lj_ir_call(J, ir->op2, fins->op1, carg2->op2, carg1->op2);
616 }
617 } else if (carg1->op1 == hdr) {
618 return lj_ir_call(J, ir->op2, fins->op1, carg1->op2);
619 }
620 }
621 }
622 }
623 return EMITFOLD; /* Always emit, CSE later. */
624}
625
626LJFOLD(BUFPUT any any)
627LJFOLDF(bufput_kgc)
628{
629 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) {
630 GCstr *s2 = ir_kstr(fright);
631 if (s2->len == 0) { /* Empty string? */
632 return LEFTFOLD;
633 } else {
634 if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) &&
635 !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */
636 GCstr *s1 = ir_kstr(IR(fleft->op2));
637 IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2));
638 /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */
639 IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */
640 return fins->op1;
641 }
642 }
643 }
644 return EMITFOLD; /* Always emit, CSE later. */
645}
646
647LJFOLD(BUFSTR any any)
648LJFOLDF(bufstr_kfold_cse)
649{
650 lj_assertJ(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
651 fleft->o == IR_CALLL,
652 "bad buffer constructor IR op %d", fleft->o);
653 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
654 if (fleft->o == IR_BUFHDR) { /* No put operations? */
655 if (fleft->op2 == IRBUFHDR_RESET) /* Empty buffer? */
656 return lj_ir_kstr(J, &J2G(J)->strempty);
657 fins->op1 = fleft->op1;
658 fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */
659 return CSEFOLD;
660 } else if (fleft->o == IR_BUFPUT) {
661 IRIns *irb = IR(fleft->op1);
662 if (irb->o == IR_BUFHDR && irb->op2 == IRBUFHDR_RESET)
663 return fleft->op2; /* Shortcut for a single put operation. */
664 }
665 }
666 /* Try to CSE the whole chain. */
667 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
668 IRRef ref = J->chain[IR_BUFSTR];
669 while (ref) {
670 IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1);
671 while (ira->o == irb->o && ira->op2 == irb->op2) {
672 lj_assertJ(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
673 ira->o == IR_CALLL || ira->o == IR_CARG,
674 "bad buffer constructor IR op %d", ira->o);
675 if (ira->o == IR_BUFHDR && ira->op2 == IRBUFHDR_RESET)
676 return ref; /* CSE succeeded. */
677 if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
678 break;
679 ira = IR(ira->op1);
680 irb = IR(irb->op1);
681 }
682 ref = irs->prev;
683 }
684 }
685 return EMITFOLD; /* No CSE possible. */
686}
687
688LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse)
689LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)
690LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower)
691LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted)
692LJFOLDF(bufput_kfold_op)
693{
694 if (irref_isk(fleft->op2)) {
695 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
696 SBuf *sb = lj_buf_tmp_(J->L);
697 sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb,
698 ir_kstr(IR(fleft->op2)));
699 fins->o = IR_BUFPUT;
700 fins->op1 = fleft->op1;
701 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
702 return RETRYFOLD;
703 }
704 return EMITFOLD; /* Always emit, CSE later. */
705}
706
707LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep)
708LJFOLDF(bufput_kfold_rep)
709{
710 if (irref_isk(fleft->op2)) {
711 IRIns *irc = IR(fleft->op1);
712 if (irref_isk(irc->op2)) {
713 SBuf *sb = lj_buf_tmp_(J->L);
714 sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i);
715 fins->o = IR_BUFPUT;
716 fins->op1 = irc->op1;
717 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
718 return RETRYFOLD;
719 }
720 }
721 return EMITFOLD; /* Always emit, CSE later. */
722}
723
724LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint)
725LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int)
726LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint)
727LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum)
728LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr)
729LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar)
730LJFOLDF(bufput_kfold_fmt)
731{
732 IRIns *irc = IR(fleft->op1);
733 lj_assertJ(irref_isk(irc->op2), "SFormat must be const");
734 if (irref_isk(fleft->op2)) {
735 SFormat sf = (SFormat)IR(irc->op2)->i;
736 IRIns *ira = IR(fleft->op2);
737 SBuf *sb = lj_buf_tmp_(J->L);
738 switch (fins->op2) {
739 case IRCALL_lj_strfmt_putfxint:
740 sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64);
741 break;
742 case IRCALL_lj_strfmt_putfstr:
743 sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira));
744 break;
745 case IRCALL_lj_strfmt_putfchar:
746 sb = lj_strfmt_putfchar(sb, sf, ira->i);
747 break;
748 case IRCALL_lj_strfmt_putfnum_int:
749 case IRCALL_lj_strfmt_putfnum_uint:
750 case IRCALL_lj_strfmt_putfnum:
751 default: {
752 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
753 sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf,
754 ir_knum(ira)->n);
755 break;
756 }
757 }
758 fins->o = IR_BUFPUT;
759 fins->op1 = irc->op1;
760 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
761 return RETRYFOLD;
762 }
763 return EMITFOLD; /* Always emit, CSE later. */
764}
765
531/* -- Constant folding of pointer arithmetic ------------------------------ */ 766/* -- Constant folding of pointer arithmetic ------------------------------ */
532 767
533LJFOLD(ADD KGC KINT) 768LJFOLD(ADD KGC KINT)
@@ -648,21 +883,17 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM)
648LJFOLDF(kfold_conv_knum_int_num) 883LJFOLDF(kfold_conv_knum_int_num)
649{ 884{
650 lua_Number n = knumleft; 885 lua_Number n = knumleft;
651 if (!(fins->op2 & IRCONV_TRUNC)) { 886 int32_t k = lj_num2int(n);
652 int32_t k = lj_num2int(n); 887 if (irt_isguard(fins->t) && n != (lua_Number)k) {
653 if (irt_isguard(fins->t) && n != (lua_Number)k) { 888 /* We're about to create a guard which always fails, like CONV +1.5.
654 /* We're about to create a guard which always fails, like CONV +1.5. 889 ** Some pathological loops cause this during LICM, e.g.:
655 ** Some pathological loops cause this during LICM, e.g.: 890 ** local x,k,t = 0,1.5,{1,[1.5]=2}
656 ** local x,k,t = 0,1.5,{1,[1.5]=2} 891 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
657 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end 892 ** assert(x == 300)
658 ** assert(x == 300) 893 */
659 */ 894 return FAILFOLD;
660 return FAILFOLD;
661 }
662 return INTFOLD(k);
663 } else {
664 return INTFOLD((int32_t)n);
665 } 895 }
896 return INTFOLD(k);
666} 897}
667 898
668LJFOLD(CONV KNUM IRCONV_U32_NUM) 899LJFOLD(CONV KNUM IRCONV_U32_NUM)
@@ -690,16 +921,18 @@ LJFOLDF(kfold_conv_knum_u64_num)
690 return INT64FOLD(lj_num2u64(knumleft)); 921 return INT64FOLD(lj_num2u64(knumleft));
691} 922}
692 923
693LJFOLD(TOSTR KNUM) 924LJFOLD(TOSTR KNUM any)
694LJFOLDF(kfold_tostr_knum) 925LJFOLDF(kfold_tostr_knum)
695{ 926{
696 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft)); 927 return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft)));
697} 928}
698 929
699LJFOLD(TOSTR KINT) 930LJFOLD(TOSTR KINT any)
700LJFOLDF(kfold_tostr_kint) 931LJFOLDF(kfold_tostr_kint)
701{ 932{
702 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i)); 933 return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ?
934 lj_strfmt_int(J->L, fleft->i) :
935 lj_strfmt_char(J->L, fleft->i));
703} 936}
704 937
705LJFOLD(STRTO KGC) 938LJFOLD(STRTO KGC)
@@ -747,13 +980,13 @@ LJFOLDF(shortcut_round)
747 return NEXTFOLD; 980 return NEXTFOLD;
748} 981}
749 982
750LJFOLD(ABS ABS KNUM) 983LJFOLD(ABS ABS FLOAD)
751LJFOLDF(shortcut_left) 984LJFOLDF(shortcut_left)
752{ 985{
753 return LEFTFOLD; /* f(g(x)) ==> g(x) */ 986 return LEFTFOLD; /* f(g(x)) ==> g(x) */
754} 987}
755 988
756LJFOLD(ABS NEG KNUM) 989LJFOLD(ABS NEG FLOAD)
757LJFOLDF(shortcut_dropleft) 990LJFOLDF(shortcut_dropleft)
758{ 991{
759 PHIBARRIER(fleft); 992 PHIBARRIER(fleft);
@@ -833,8 +1066,10 @@ LJFOLDF(simplify_nummuldiv_k)
833 if (n == 1.0) { /* x o 1 ==> x */ 1066 if (n == 1.0) { /* x o 1 ==> x */
834 return LEFTFOLD; 1067 return LEFTFOLD;
835 } else if (n == -1.0) { /* x o -1 ==> -x */ 1068 } else if (n == -1.0) { /* x o -1 ==> -x */
1069 IRRef op1 = fins->op1;
1070 fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */
1071 fins->op1 = op1;
836 fins->o = IR_NEG; 1072 fins->o = IR_NEG;
837 fins->op2 = (IRRef1)lj_ir_knum_neg(J);
838 return RETRYFOLD; 1073 return RETRYFOLD;
839 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ 1074 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
840 fins->o = IR_ADD; 1075 fins->o = IR_ADD;
@@ -874,52 +1109,17 @@ LJFOLDF(simplify_nummuldiv_negneg)
874 return RETRYFOLD; 1109 return RETRYFOLD;
875} 1110}
876 1111
877LJFOLD(POW any KINT) 1112LJFOLD(POW any KNUM)
878LJFOLDF(simplify_numpow_xk) 1113LJFOLDF(simplify_numpow_k)
879{ 1114{
880 int32_t k = fright->i; 1115 if (knumright == 0.0) /* x ^ 0 ==> 1 */
881 TRef ref = fins->op1;
882 if (k == 0) /* x ^ 0 ==> 1 */
883 return lj_ir_knum_one(J); /* Result must be a number, not an int. */ 1116 return lj_ir_knum_one(J); /* Result must be a number, not an int. */
884 if (k == 1) /* x ^ 1 ==> x */ 1117 else if (knumright == 1.0) /* x ^ 1 ==> x */
885 return LEFTFOLD; 1118 return LEFTFOLD;
886 if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */ 1119 else if (knumright == 2.0) /* x ^ 2 ==> x * x */
1120 return emitir(IRTN(IR_MUL), fins->op1, fins->op1);
1121 else
887 return NEXTFOLD; 1122 return NEXTFOLD;
888 if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */
889 ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
890 k = -k;
891 }
892 /* Unroll x^k for 1 <= k <= 65536. */
893 for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */
894 ref = emitir(IRTN(IR_MUL), ref, ref);
895 if ((k >>= 1) != 0) { /* Handle trailing bits. */
896 TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
897 for (; k != 1; k >>= 1) {
898 if (k & 1)
899 ref = emitir(IRTN(IR_MUL), ref, tmp);
900 tmp = emitir(IRTN(IR_MUL), tmp, tmp);
901 }
902 ref = emitir(IRTN(IR_MUL), ref, tmp);
903 }
904 return ref;
905}
906
907LJFOLD(POW KNUM any)
908LJFOLDF(simplify_numpow_kx)
909{
910 lua_Number n = knumleft;
911 if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
912 fins->o = IR_CONV;
913#if LJ_TARGET_X86ORX64
914 fins->op1 = fins->op2;
915 fins->op2 = IRCONV_NUM_INT;
916 fins->op2 = (IRRef1)lj_opt_fold(J);
917#endif
918 fins->op1 = (IRRef1)lj_ir_knum_one(J);
919 fins->o = IR_LDEXP;
920 return RETRYFOLD;
921 }
922 return NEXTFOLD;
923} 1123}
924 1124
925/* -- Simplify conversions ------------------------------------------------ */ 1125/* -- Simplify conversions ------------------------------------------------ */
@@ -935,7 +1135,7 @@ LJFOLDF(shortcut_conv_num_int)
935} 1135}
936 1136
937LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */ 1137LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
938LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/ 1138LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32 */
939LJFOLDF(simplify_conv_int_num) 1139LJFOLDF(simplify_conv_int_num)
940{ 1140{
941 /* Fold even across PHI to avoid expensive num->int conversions in loop. */ 1141 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
@@ -970,8 +1170,10 @@ LJFOLDF(simplify_conv_i64_num)
970 1170
971LJFOLD(CONV CONV IRCONV_INT_I64) /* _INT or _U32 */ 1171LJFOLD(CONV CONV IRCONV_INT_I64) /* _INT or _U32 */
972LJFOLD(CONV CONV IRCONV_INT_U64) /* _INT or _U32 */ 1172LJFOLD(CONV CONV IRCONV_INT_U64) /* _INT or _U32 */
1173LJFOLD(CONV CONV IRCONV_INT_U32) /* _INT or _U32 */
973LJFOLD(CONV CONV IRCONV_U32_I64) /* _INT or _U32 */ 1174LJFOLD(CONV CONV IRCONV_U32_I64) /* _INT or _U32 */
974LJFOLD(CONV CONV IRCONV_U32_U64) /* _INT or _U32 */ 1175LJFOLD(CONV CONV IRCONV_U32_U64) /* _INT or _U32 */
1176LJFOLD(CONV CONV IRCONV_U32_INT) /* _INT or _U32 */
975LJFOLDF(simplify_conv_int_i64) 1177LJFOLDF(simplify_conv_int_i64)
976{ 1178{
977 int src; 1179 int src;
@@ -1004,10 +1206,10 @@ LJFOLDF(simplify_tobit_conv)
1004{ 1206{
1005 /* Fold even across PHI to avoid expensive num->int conversions in loop. */ 1207 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1006 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) { 1208 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
1007 lua_assert(irt_isnum(fleft->t)); 1209 lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
1008 return fleft->op1; 1210 return fleft->op1;
1009 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { 1211 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
1010 lua_assert(irt_isnum(fleft->t)); 1212 lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
1011 fins->o = IR_CONV; 1213 fins->o = IR_CONV;
1012 fins->op1 = fleft->op1; 1214 fins->op1 = fleft->op1;
1013 fins->op2 = (IRT_INT<<5)|IRT_U32; 1215 fins->op2 = (IRT_INT<<5)|IRT_U32;
@@ -1016,14 +1218,13 @@ LJFOLDF(simplify_tobit_conv)
1016 return NEXTFOLD; 1218 return NEXTFOLD;
1017} 1219}
1018 1220
1019/* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */ 1221/* Shortcut floor/ceil/trunc + IRT_NUM <- integer conversion. */
1020LJFOLD(FPMATH CONV IRFPM_FLOOR) 1222LJFOLD(FPMATH CONV IRFPM_FLOOR)
1021LJFOLD(FPMATH CONV IRFPM_CEIL) 1223LJFOLD(FPMATH CONV IRFPM_CEIL)
1022LJFOLD(FPMATH CONV IRFPM_TRUNC) 1224LJFOLD(FPMATH CONV IRFPM_TRUNC)
1023LJFOLDF(simplify_floor_conv) 1225LJFOLDF(simplify_floor_conv)
1024{ 1226{
1025 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT || 1227 if ((uint32_t)(fleft->op2 & IRCONV_SRCMASK) - (uint32_t)IRT_I8 <= (uint32_t)(IRT_U64 - IRT_U8))
1026 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
1027 return LEFTFOLD; 1228 return LEFTFOLD;
1028 return NEXTFOLD; 1229 return NEXTFOLD;
1029} 1230}
@@ -1047,7 +1248,7 @@ LJFOLDF(simplify_conv_sext)
1047 /* Use scalar evolution analysis results to strength-reduce sign-extension. */ 1248 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
1048 if (ref == J->scev.idx) { 1249 if (ref == J->scev.idx) {
1049 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; 1250 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
1050 lua_assert(irt_isint(J->scev.t)); 1251 lj_assertJ(irt_isint(J->scev.t), "only int SCEV supported");
1051 if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) { 1252 if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) {
1052 ok_reduce: 1253 ok_reduce:
1053#if LJ_TARGET_X64 1254#if LJ_TARGET_X64
@@ -1078,6 +1279,10 @@ LJFOLD(CONV SUB IRCONV_U32_U64)
1078LJFOLD(CONV MUL IRCONV_U32_U64) 1279LJFOLD(CONV MUL IRCONV_U32_U64)
1079LJFOLDF(simplify_conv_narrow) 1280LJFOLDF(simplify_conv_narrow)
1080{ 1281{
1282#if LJ_64
1283 UNUSED(J);
1284 return NEXTFOLD;
1285#else
1081 IROp op = (IROp)fleft->o; 1286 IROp op = (IROp)fleft->o;
1082 IRType t = irt_type(fins->t); 1287 IRType t = irt_type(fins->t);
1083 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2; 1288 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
@@ -1088,6 +1293,7 @@ LJFOLDF(simplify_conv_narrow)
1088 fins->op1 = op1; 1293 fins->op1 = op1;
1089 fins->op2 = op2; 1294 fins->op2 = op2;
1090 return RETRYFOLD; 1295 return RETRYFOLD;
1296#endif
1091} 1297}
1092 1298
1093/* Special CSE rule for CONV. */ 1299/* Special CSE rule for CONV. */
@@ -1123,7 +1329,8 @@ LJFOLDF(narrow_convert)
1123 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */ 1329 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
1124 if (J->chain[IR_LOOP]) 1330 if (J->chain[IR_LOOP])
1125 return NEXTFOLD; 1331 return NEXTFOLD;
1126 lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT); 1332 lj_assertJ(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT,
1333 "unexpected CONV TOBIT");
1127 return lj_opt_narrow_convert(J); 1334 return lj_opt_narrow_convert(J);
1128} 1335}
1129 1336
@@ -1201,7 +1408,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1201 ** But this is mainly intended for simple address arithmetic. 1408 ** But this is mainly intended for simple address arithmetic.
1202 ** Also it's easier for the backend to optimize the original multiplies. 1409 ** Also it's easier for the backend to optimize the original multiplies.
1203 */ 1410 */
1204 if (k == 1) { /* i * 1 ==> i */ 1411 if (k == 0) { /* i * 0 ==> 0 */
1412 return RIGHTFOLD;
1413 } else if (k == 1) { /* i * 1 ==> i */
1205 return LEFTFOLD; 1414 return LEFTFOLD;
1206 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ 1415 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1207 fins->o = IR_BSHL; 1416 fins->o = IR_BSHL;
@@ -1214,9 +1423,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1214LJFOLD(MUL any KINT) 1423LJFOLD(MUL any KINT)
1215LJFOLDF(simplify_intmul_k32) 1424LJFOLDF(simplify_intmul_k32)
1216{ 1425{
1217 if (fright->i == 0) /* i * 0 ==> 0 */ 1426 if (fright->i >= 0)
1218 return INTFOLD(0);
1219 else if (fright->i > 0)
1220 return simplify_intmul_k(J, fright->i); 1427 return simplify_intmul_k(J, fright->i);
1221 return NEXTFOLD; 1428 return NEXTFOLD;
1222} 1429}
@@ -1224,21 +1431,20 @@ LJFOLDF(simplify_intmul_k32)
1224LJFOLD(MUL any KINT64) 1431LJFOLD(MUL any KINT64)
1225LJFOLDF(simplify_intmul_k64) 1432LJFOLDF(simplify_intmul_k64)
1226{ 1433{
1227 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ 1434#if LJ_HASFFI
1228 return INT64FOLD(0); 1435 if (ir_kint64(fright)->u64 < 0x80000000u)
1229#if LJ_64
1230 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1231 else if (ir_kint64(fright)->u64 < 0x80000000u)
1232 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); 1436 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1233#endif
1234 return NEXTFOLD; 1437 return NEXTFOLD;
1438#else
1439 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1440#endif
1235} 1441}
1236 1442
1237LJFOLD(MOD any KINT) 1443LJFOLD(MOD any KINT)
1238LJFOLDF(simplify_intmod_k) 1444LJFOLDF(simplify_intmod_k)
1239{ 1445{
1240 int32_t k = fright->i; 1446 int32_t k = fright->i;
1241 lua_assert(k != 0); 1447 lj_assertJ(k != 0, "integer mod 0");
1242 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */ 1448 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */
1243 fins->o = IR_BAND; 1449 fins->o = IR_BAND;
1244 fins->op2 = lj_ir_kint(J, k-1); 1450 fins->op2 = lj_ir_kint(J, k-1);
@@ -1487,6 +1693,15 @@ LJFOLDF(simplify_shiftk_andk)
1487 fins->op2 = (IRRef1)lj_ir_kint(J, k); 1693 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1488 fins->ot = IRTI(IR_BAND); 1694 fins->ot = IRTI(IR_BAND);
1489 return RETRYFOLD; 1695 return RETRYFOLD;
1696 } else if (irk->o == IR_KINT64) {
1697 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, fright->i,
1698 (IROp)fins->o);
1699 IROpT ot = fleft->ot;
1700 fins->op1 = fleft->op1;
1701 fins->op1 = (IRRef1)lj_opt_fold(J);
1702 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1703 fins->ot = ot;
1704 return RETRYFOLD;
1490 } 1705 }
1491 return NEXTFOLD; 1706 return NEXTFOLD;
1492} 1707}
@@ -1502,6 +1717,47 @@ LJFOLDF(simplify_andk_shiftk)
1502 return NEXTFOLD; 1717 return NEXTFOLD;
1503} 1718}
1504 1719
1720LJFOLD(BAND BOR KINT)
1721LJFOLD(BOR BAND KINT)
1722LJFOLDF(simplify_andor_k)
1723{
1724 IRIns *irk = IR(fleft->op2);
1725 PHIBARRIER(fleft);
1726 if (irk->o == IR_KINT) {
1727 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1728 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1729 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1730 if (k == (fins->o == IR_BAND ? 0 : -1)) {
1731 fins->op1 = fleft->op1;
1732 return RETRYFOLD;
1733 }
1734 }
1735 return NEXTFOLD;
1736}
1737
1738LJFOLD(BAND BOR KINT64)
1739LJFOLD(BOR BAND KINT64)
1740LJFOLDF(simplify_andor_k64)
1741{
1742#if LJ_HASFFI
1743 IRIns *irk = IR(fleft->op2);
1744 PHIBARRIER(fleft);
1745 if (irk->o == IR_KINT64) {
1746 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
1747 (IROp)fins->o);
1748 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1749 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1750 if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) {
1751 fins->op1 = fleft->op1;
1752 return RETRYFOLD;
1753 }
1754 }
1755 return NEXTFOLD;
1756#else
1757 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1758#endif
1759}
1760
1505/* -- Reassociation ------------------------------------------------------- */ 1761/* -- Reassociation ------------------------------------------------------- */
1506 1762
1507LJFOLD(ADD ADD KINT) 1763LJFOLD(ADD ADD KINT)
@@ -1531,11 +1787,11 @@ LJFOLD(BOR BOR KINT64)
1531LJFOLD(BXOR BXOR KINT64) 1787LJFOLD(BXOR BXOR KINT64)
1532LJFOLDF(reassoc_intarith_k64) 1788LJFOLDF(reassoc_intarith_k64)
1533{ 1789{
1534#if LJ_HASFFI || LJ_64 1790#if LJ_HASFFI
1535 IRIns *irk = IR(fleft->op2); 1791 IRIns *irk = IR(fleft->op2);
1536 if (irk->o == IR_KINT64) { 1792 if (irk->o == IR_KINT64) {
1537 uint64_t k = kfold_int64arith(ir_k64(irk)->u64, 1793 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
1538 ir_k64(fright)->u64, (IROp)fins->o); 1794 (IROp)fins->o);
1539 PHIBARRIER(fleft); 1795 PHIBARRIER(fleft);
1540 fins->op1 = fleft->op1; 1796 fins->op1 = fleft->op1;
1541 fins->op2 = (IRRef1)lj_ir_kint64(J, k); 1797 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
@@ -1543,12 +1799,10 @@ LJFOLDF(reassoc_intarith_k64)
1543 } 1799 }
1544 return NEXTFOLD; 1800 return NEXTFOLD;
1545#else 1801#else
1546 UNUSED(J); lua_assert(0); return FAILFOLD; 1802 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1547#endif 1803#endif
1548} 1804}
1549 1805
1550LJFOLD(MIN MIN any)
1551LJFOLD(MAX MAX any)
1552LJFOLD(BAND BAND any) 1806LJFOLD(BAND BAND any)
1553LJFOLD(BOR BOR any) 1807LJFOLD(BOR BOR any)
1554LJFOLDF(reassoc_dup) 1808LJFOLDF(reassoc_dup)
@@ -1558,6 +1812,15 @@ LJFOLDF(reassoc_dup)
1558 return NEXTFOLD; 1812 return NEXTFOLD;
1559} 1813}
1560 1814
1815LJFOLD(MIN MIN any)
1816LJFOLD(MAX MAX any)
1817LJFOLDF(reassoc_dup_minmax)
1818{
1819 if (fins->op2 == fleft->op2)
1820 return LEFTFOLD; /* (a o b) o b ==> a o b */
1821 return NEXTFOLD;
1822}
1823
1561LJFOLD(BXOR BXOR any) 1824LJFOLD(BXOR BXOR any)
1562LJFOLDF(reassoc_bxor) 1825LJFOLDF(reassoc_bxor)
1563{ 1826{
@@ -1596,23 +1859,12 @@ LJFOLDF(reassoc_shift)
1596 return NEXTFOLD; 1859 return NEXTFOLD;
1597} 1860}
1598 1861
1599LJFOLD(MIN MIN KNUM)
1600LJFOLD(MAX MAX KNUM)
1601LJFOLD(MIN MIN KINT) 1862LJFOLD(MIN MIN KINT)
1602LJFOLD(MAX MAX KINT) 1863LJFOLD(MAX MAX KINT)
1603LJFOLDF(reassoc_minmax_k) 1864LJFOLDF(reassoc_minmax_k)
1604{ 1865{
1605 IRIns *irk = IR(fleft->op2); 1866 IRIns *irk = IR(fleft->op2);
1606 if (irk->o == IR_KNUM) { 1867 if (irk->o == IR_KINT) {
1607 lua_Number a = ir_knum(irk)->n;
1608 lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD);
1609 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1610 return LEFTFOLD;
1611 PHIBARRIER(fleft);
1612 fins->op1 = fleft->op1;
1613 fins->op2 = (IRRef1)lj_ir_knum(J, y);
1614 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1615 } else if (irk->o == IR_KINT) {
1616 int32_t a = irk->i; 1868 int32_t a = irk->i;
1617 int32_t y = kfold_intop(a, fright->i, fins->o); 1869 int32_t y = kfold_intop(a, fright->i, fins->o);
1618 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ 1870 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
@@ -1625,24 +1877,6 @@ LJFOLDF(reassoc_minmax_k)
1625 return NEXTFOLD; 1877 return NEXTFOLD;
1626} 1878}
1627 1879
1628LJFOLD(MIN MAX any)
1629LJFOLD(MAX MIN any)
1630LJFOLDF(reassoc_minmax_left)
1631{
1632 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1633 return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
1634 return NEXTFOLD;
1635}
1636
1637LJFOLD(MIN any MAX)
1638LJFOLD(MAX any MIN)
1639LJFOLDF(reassoc_minmax_right)
1640{
1641 if (fins->op1 == fright->op1 || fins->op1 == fright->op2)
1642 return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
1643 return NEXTFOLD;
1644}
1645
1646/* -- Array bounds check elimination -------------------------------------- */ 1880/* -- Array bounds check elimination -------------------------------------- */
1647 1881
1648/* Eliminate ABC across PHIs to handle t[i-1] forwarding case. 1882/* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
@@ -1773,8 +2007,6 @@ LJFOLDF(comm_comp)
1773 2007
1774LJFOLD(BAND any any) 2008LJFOLD(BAND any any)
1775LJFOLD(BOR any any) 2009LJFOLD(BOR any any)
1776LJFOLD(MIN any any)
1777LJFOLD(MAX any any)
1778LJFOLDF(comm_dup) 2010LJFOLDF(comm_dup)
1779{ 2011{
1780 if (fins->op1 == fins->op2) /* x o x ==> x */ 2012 if (fins->op1 == fins->op2) /* x o x ==> x */
@@ -1782,6 +2014,15 @@ LJFOLDF(comm_dup)
1782 return fold_comm_swap(J); 2014 return fold_comm_swap(J);
1783} 2015}
1784 2016
2017LJFOLD(MIN any any)
2018LJFOLD(MAX any any)
2019LJFOLDF(comm_dup_minmax)
2020{
2021 if (fins->op1 == fins->op2) /* x o x ==> x */
2022 return LEFTFOLD;
2023 return NEXTFOLD;
2024}
2025
1785LJFOLD(BXOR any any) 2026LJFOLD(BXOR any any)
1786LJFOLDF(comm_bxor) 2027LJFOLDF(comm_bxor)
1787{ 2028{
@@ -1818,7 +2059,7 @@ LJFOLDF(merge_eqne_snew_kgc)
1818{ 2059{
1819 GCstr *kstr = ir_kstr(fright); 2060 GCstr *kstr = ir_kstr(fright);
1820 int32_t len = (int32_t)kstr->len; 2061 int32_t len = (int32_t)kstr->len;
1821 lua_assert(irt_isstr(fins->t)); 2062 lj_assertJ(irt_isstr(fins->t), "bad equality IR type");
1822 2063
1823#if LJ_TARGET_UNALIGNED 2064#if LJ_TARGET_UNALIGNED
1824#define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */ 2065#define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
@@ -1882,7 +2123,7 @@ LJFOLD(HLOAD KKPTR)
1882LJFOLDF(kfold_hload_kkptr) 2123LJFOLDF(kfold_hload_kkptr)
1883{ 2124{
1884 UNUSED(J); 2125 UNUSED(J);
1885 lua_assert(ir_kptr(fleft) == niltvg(J2G(J))); 2126 lj_assertJ(ir_kptr(fleft) == niltvg(J2G(J)), "expected niltv");
1886 return TREF_NIL; 2127 return TREF_NIL;
1887} 2128}
1888 2129
@@ -1892,11 +2133,29 @@ LJFOLDX(lj_opt_fwd_hload)
1892LJFOLD(ULOAD any) 2133LJFOLD(ULOAD any)
1893LJFOLDX(lj_opt_fwd_uload) 2134LJFOLDX(lj_opt_fwd_uload)
1894 2135
1895LJFOLD(CALLL any IRCALL_lj_tab_len) 2136LJFOLD(ALEN any any)
1896LJFOLDX(lj_opt_fwd_tab_len) 2137LJFOLDX(lj_opt_fwd_alen)
2138
2139/* Try to merge UREFO/UREFC into referenced instruction. */
2140static TRef merge_uref(jit_State *J, IRRef ref, IRIns* ir)
2141{
2142 if (ir->o == IR_UREFO && irt_isguard(ir->t)) {
2143 /* Might be pointing to some other coroutine's stack.
2144 ** And GC might shrink said stack, thereby repointing the upvalue.
2145 ** GC might even collect said coroutine, thereby closing the upvalue.
2146 */
2147 if (gcstep_barrier(J, ref))
2148 return EMITFOLD; /* So cannot merge. */
2149 /* Current fins wants a check, but ir doesn't have one. */
2150 if ((irt_t(fins->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC) &&
2151 irt_type(ir->t) == IRT_IGC)
2152 ir->t.irt += IRT_PGC-IRT_IGC; /* So install a check. */
2153 }
2154 return ref; /* Not a TRef, but the caller doesn't care. */
2155}
1897 2156
1898/* Upvalue refs are really loads, but there are no corresponding stores. 2157/* Upvalue refs are really loads, but there are no corresponding stores.
1899** So CSE is ok for them, except for UREFO across a GC step (see below). 2158** So CSE is ok for them, except for guarded UREFO across a GC step.
1900** If the referenced function is const, its upvalue addresses are const, too. 2159** If the referenced function is const, its upvalue addresses are const, too.
1901** This can be used to improve CSE by looking for the same address, 2160** This can be used to improve CSE by looking for the same address,
1902** even if the upvalues originate from a different function. 2161** even if the upvalues originate from a different function.
@@ -1914,9 +2173,7 @@ LJFOLDF(cse_uref)
1914 if (irref_isk(ir->op1)) { 2173 if (irref_isk(ir->op1)) {
1915 GCfunc *fn2 = ir_kfunc(IR(ir->op1)); 2174 GCfunc *fn2 = ir_kfunc(IR(ir->op1));
1916 if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) { 2175 if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
1917 if (fins->o == IR_UREFO && gcstep_barrier(J, ref)) 2176 return merge_uref(J, ref, ir);
1918 break;
1919 return ref;
1920 } 2177 }
1921 } 2178 }
1922 ref = ir->prev; 2179 ref = ir->prev;
@@ -1925,6 +2182,24 @@ LJFOLDF(cse_uref)
1925 return EMITFOLD; 2182 return EMITFOLD;
1926} 2183}
1927 2184
2185/* Custom CSE for UREFO. */
2186LJFOLD(UREFO any any)
2187LJFOLDF(cse_urefo)
2188{
2189 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2190 IRRef ref = J->chain[IR_UREFO];
2191 IRRef lim = fins->op1;
2192 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2193 while (ref > lim) {
2194 IRIns *ir = IR(ref);
2195 if (ir->op12 == op12)
2196 return merge_uref(J, ref, ir);
2197 ref = ir->prev;
2198 }
2199 }
2200 return EMITFOLD;
2201}
2202
1928LJFOLD(HREFK any any) 2203LJFOLD(HREFK any any)
1929LJFOLDX(lj_opt_fwd_hrefk) 2204LJFOLDX(lj_opt_fwd_hrefk)
1930 2205
@@ -1954,6 +2229,7 @@ LJFOLDF(fwd_href_tdup)
1954** an aliased table, as it may invalidate all of the pointers and fields. 2229** an aliased table, as it may invalidate all of the pointers and fields.
1955** Only HREF needs the NEWREF check -- AREF and HREFK already depend on 2230** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1956** FLOADs. And NEWREF itself is treated like a store (see below). 2231** FLOADs. And NEWREF itself is treated like a store (see below).
2232** LREF is constant (per trace) since coroutine switches are not inlined.
1957*/ 2233*/
1958LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) 2234LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
1959LJFOLDF(fload_tab_tnew_asize) 2235LJFOLDF(fload_tab_tnew_asize)
@@ -2017,6 +2293,35 @@ LJFOLDF(fload_str_len_snew)
2017 return NEXTFOLD; 2293 return NEXTFOLD;
2018} 2294}
2019 2295
2296LJFOLD(FLOAD TOSTR IRFL_STR_LEN)
2297LJFOLDF(fload_str_len_tostr)
2298{
2299 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR)
2300 return INTFOLD(1);
2301 return NEXTFOLD;
2302}
2303
2304LJFOLD(FLOAD any IRFL_SBUF_W)
2305LJFOLD(FLOAD any IRFL_SBUF_E)
2306LJFOLD(FLOAD any IRFL_SBUF_B)
2307LJFOLD(FLOAD any IRFL_SBUF_L)
2308LJFOLD(FLOAD any IRFL_SBUF_REF)
2309LJFOLD(FLOAD any IRFL_SBUF_R)
2310LJFOLDF(fload_sbuf)
2311{
2312 TRef tr = lj_opt_fwd_fload(J);
2313 return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD;
2314}
2315
2316/* The fast function ID of function objects is immutable. */
2317LJFOLD(FLOAD KGC IRFL_FUNC_FFID)
2318LJFOLDF(fload_func_ffid_kgc)
2319{
2320 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2321 return INTFOLD((int32_t)ir_kfunc(fleft)->c.ffid);
2322 return NEXTFOLD;
2323}
2324
2020/* The C type ID of cdata objects is immutable. */ 2325/* The C type ID of cdata objects is immutable. */
2021LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) 2326LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
2022LJFOLDF(fload_cdata_typeid_kgc) 2327LJFOLDF(fload_cdata_typeid_kgc)
@@ -2063,6 +2368,8 @@ LJFOLDF(fload_cdata_ptr_int64_cnew)
2063} 2368}
2064 2369
2065LJFOLD(FLOAD any IRFL_STR_LEN) 2370LJFOLD(FLOAD any IRFL_STR_LEN)
2371LJFOLD(FLOAD any IRFL_FUNC_ENV)
2372LJFOLD(FLOAD any IRFL_THREAD_ENV)
2066LJFOLD(FLOAD any IRFL_CDATA_CTYPEID) 2373LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
2067LJFOLD(FLOAD any IRFL_CDATA_PTR) 2374LJFOLD(FLOAD any IRFL_CDATA_PTR)
2068LJFOLD(FLOAD any IRFL_CDATA_INT) 2375LJFOLD(FLOAD any IRFL_CDATA_INT)
@@ -2082,7 +2389,7 @@ LJFOLDF(fwd_sload)
2082 TRef tr = lj_opt_cse(J); 2389 TRef tr = lj_opt_cse(J);
2083 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr; 2390 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
2084 } else { 2391 } else {
2085 lua_assert(J->slot[fins->op1] != 0); 2392 lj_assertJ(J->slot[fins->op1] != 0, "uninitialized slot accessed");
2086 return J->slot[fins->op1]; 2393 return J->slot[fins->op1];
2087 } 2394 }
2088} 2395}
@@ -2113,14 +2420,9 @@ LJFOLDF(fold_base)
2113 2420
2114/* Write barriers are amenable to CSE, but not across any incremental 2421/* Write barriers are amenable to CSE, but not across any incremental
2115** GC steps. 2422** GC steps.
2116**
2117** The same logic applies to open upvalue references, because a stack
2118** may be resized during a GC step (not the current stack, but maybe that
2119** of a coroutine).
2120*/ 2423*/
2121LJFOLD(TBAR any) 2424LJFOLD(TBAR any)
2122LJFOLD(OBAR any any) 2425LJFOLD(OBAR any any)
2123LJFOLD(UREFO any any)
2124LJFOLDF(barrier_tab) 2426LJFOLDF(barrier_tab)
2125{ 2427{
2126 TRef tr = lj_opt_cse(J); 2428 TRef tr = lj_opt_cse(J);
@@ -2139,6 +2441,17 @@ LJFOLDF(barrier_tnew_tdup)
2139 return DROPFOLD; 2441 return DROPFOLD;
2140} 2442}
2141 2443
2444/* -- Profiling ----------------------------------------------------------- */
2445
2446LJFOLD(PROF any any)
2447LJFOLDF(prof)
2448{
2449 IRRef ref = J->chain[IR_PROF];
2450 if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */
2451 return ref;
2452 return EMITFOLD;
2453}
2454
2142/* -- Stores and allocations ---------------------------------------------- */ 2455/* -- Stores and allocations ---------------------------------------------- */
2143 2456
2144/* Stores and allocations cannot be folded or passed on to CSE in general. 2457/* Stores and allocations cannot be folded or passed on to CSE in general.
@@ -2161,8 +2474,10 @@ LJFOLD(XSTORE any any)
2161LJFOLDX(lj_opt_dse_xstore) 2474LJFOLDX(lj_opt_dse_xstore)
2162 2475
2163LJFOLD(NEWREF any any) /* Treated like a store. */ 2476LJFOLD(NEWREF any any) /* Treated like a store. */
2164LJFOLD(CALLS any any) 2477LJFOLD(TMPREF any any)
2478LJFOLD(CALLA any any)
2165LJFOLD(CALLL any any) /* Safeguard fallback. */ 2479LJFOLD(CALLL any any) /* Safeguard fallback. */
2480LJFOLD(CALLS any any)
2166LJFOLD(CALLXS any any) 2481LJFOLD(CALLXS any any)
2167LJFOLD(XBAR) 2482LJFOLD(XBAR)
2168LJFOLD(RETF any any) /* Modifies BASE. */ 2483LJFOLD(RETF any any) /* Modifies BASE. */
@@ -2206,8 +2521,9 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
2206 IRRef ref; 2521 IRRef ref;
2207 2522
2208 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) { 2523 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
2209 lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | 2524 lj_assertJ(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
2210 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT); 2525 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT,
2526 "bad JIT_F_OPT_DEFAULT");
2211 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */ 2527 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
2212 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N) 2528 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
2213 return lj_opt_cse(J); 2529 return lj_opt_cse(J);
@@ -2232,10 +2548,14 @@ retry:
2232 if (fins->op1 >= J->cur.nk) { 2548 if (fins->op1 >= J->cur.nk) {
2233 key += (uint32_t)IR(fins->op1)->o << 10; 2549 key += (uint32_t)IR(fins->op1)->o << 10;
2234 *fleft = *IR(fins->op1); 2550 *fleft = *IR(fins->op1);
2551 if (fins->op1 < REF_TRUE)
2552 fleft[1] = IR(fins->op1)[1];
2235 } 2553 }
2236 if (fins->op2 >= J->cur.nk) { 2554 if (fins->op2 >= J->cur.nk) {
2237 key += (uint32_t)IR(fins->op2)->o; 2555 key += (uint32_t)IR(fins->op2)->o;
2238 *fright = *IR(fins->op2); 2556 *fright = *IR(fins->op2);
2557 if (fins->op2 < REF_TRUE)
2558 fright[1] = IR(fins->op2)[1];
2239 } else { 2559 } else {
2240 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ 2560 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
2241 } 2561 }
@@ -2265,7 +2585,7 @@ retry:
2265 return lj_ir_kint(J, fins->i); 2585 return lj_ir_kint(J, fins->i);
2266 if (ref == FAILFOLD) 2586 if (ref == FAILFOLD)
2267 lj_trace_err(J, LJ_TRERR_GFAIL); 2587 lj_trace_err(J, LJ_TRERR_GFAIL);
2268 lua_assert(ref == DROPFOLD); 2588 lj_assertJ(ref == DROPFOLD, "bad fold result");
2269 return REF_DROP; 2589 return REF_DROP;
2270} 2590}
2271 2591
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
index 8a0e611f..01c2b306 100644
--- a/src/lj_opt_loop.c
+++ b/src/lj_opt_loop.c
@@ -11,7 +11,7 @@
11#if LJ_HASJIT 11#if LJ_HASJIT
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_str.h" 14#include "lj_buf.h"
15#include "lj_ir.h" 15#include "lj_ir.h"
16#include "lj_jit.h" 16#include "lj_jit.h"
17#include "lj_iropt.h" 17#include "lj_iropt.h"
@@ -225,6 +225,7 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
225 /* Setup new snapshot. */ 225 /* Setup new snapshot. */
226 snap->mapofs = (uint32_t)nmapofs; 226 snap->mapofs = (uint32_t)nmapofs;
227 snap->ref = (IRRef1)J->cur.nins; 227 snap->ref = (IRRef1)J->cur.nins;
228 snap->mcofs = 0;
228 snap->nslots = nslots; 229 snap->nslots = nslots;
229 snap->topslot = osnap->topslot; 230 snap->topslot = osnap->topslot;
230 snap->count = 0; 231 snap->count = 0;
@@ -254,9 +255,16 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
254 J->cur.nsnapmap = (uint32_t)(nmap - J->cur.snapmap); 255 J->cur.nsnapmap = (uint32_t)(nmap - J->cur.snapmap);
255} 256}
256 257
258typedef struct LoopState {
259 jit_State *J;
260 IRRef1 *subst;
261 MSize sizesubst;
262} LoopState;
263
257/* Unroll loop. */ 264/* Unroll loop. */
258static void loop_unroll(jit_State *J) 265static void loop_unroll(LoopState *lps)
259{ 266{
267 jit_State *J = lps->J;
260 IRRef1 phi[LJ_MAX_PHI]; 268 IRRef1 phi[LJ_MAX_PHI];
261 uint32_t nphi = 0; 269 uint32_t nphi = 0;
262 IRRef1 *subst; 270 IRRef1 *subst;
@@ -265,13 +273,13 @@ static void loop_unroll(jit_State *J)
265 SnapEntry *loopmap, *psentinel; 273 SnapEntry *loopmap, *psentinel;
266 IRRef ins, invar; 274 IRRef ins, invar;
267 275
268 /* Use temp buffer for substitution table. 276 /* Allocate substitution table.
269 ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. 277 ** Only non-constant refs in [REF_BIAS,invar) are valid indexes.
270 ** Caveat: don't call into the VM or run the GC or the buffer may be gone.
271 */ 278 */
272 invar = J->cur.nins; 279 invar = J->cur.nins;
273 subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, 280 lps->sizesubst = invar - REF_BIAS;
274 (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS; 281 lps->subst = lj_mem_newvec(J->L, lps->sizesubst, IRRef1);
282 subst = lps->subst - REF_BIAS;
275 subst[REF_BASE] = REF_BASE; 283 subst[REF_BASE] = REF_BASE;
276 284
277 /* LOOP separates the pre-roll from the loop body. */ 285 /* LOOP separates the pre-roll from the loop body. */
@@ -292,7 +300,8 @@ static void loop_unroll(jit_State *J)
292 loopmap = &J->cur.snapmap[loopsnap->mapofs]; 300 loopmap = &J->cur.snapmap[loopsnap->mapofs];
293 /* The PC of snapshot #0 and the loop snapshot must match. */ 301 /* The PC of snapshot #0 and the loop snapshot must match. */
294 psentinel = &loopmap[loopsnap->nent]; 302 psentinel = &loopmap[loopsnap->nent];
295 lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]); 303 lj_assertJ(*psentinel == J->cur.snapmap[J->cur.snap[0].nent],
304 "mismatched PC for loop snapshot");
296 *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */ 305 *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */
297 306
298 /* Start substitution with snapshot #1 (#0 is empty for root traces). */ 307 /* Start substitution with snapshot #1 (#0 is empty for root traces). */
@@ -345,10 +354,12 @@ static void loop_unroll(jit_State *J)
345 irr = IR(ref); 354 irr = IR(ref);
346 goto phiconv; 355 goto phiconv;
347 } 356 }
348 } else if (ref != REF_DROP && irr->o == IR_CONV && 357 } else if (ref != REF_DROP && ref > invar &&
349 ref > invar && irr->op1 < invar) { 358 ((irr->o == IR_CONV && irr->op1 < invar) ||
350 /* May need an extra PHI for a CONV. */ 359 (irr->o == IR_ALEN && irr->op2 < invar &&
351 ref = irr->op1; 360 irr->op2 != REF_NIL))) {
361 /* May need an extra PHI for a CONV or ALEN hint. */
362 ref = irr->o == IR_CONV ? irr->op1 : irr->op2;
352 irr = IR(ref); 363 irr = IR(ref);
353 phiconv: 364 phiconv:
354 if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) { 365 if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) {
@@ -363,7 +374,7 @@ static void loop_unroll(jit_State *J)
363 } 374 }
364 if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */ 375 if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */
365 J->cur.nsnapmap = (uint32_t)J->cur.snap[--J->cur.nsnap].mapofs; 376 J->cur.nsnapmap = (uint32_t)J->cur.snap[--J->cur.nsnap].mapofs;
366 lua_assert(J->cur.nsnapmap <= J->sizesnapmap); 377 lj_assertJ(J->cur.nsnapmap <= J->sizesnapmap, "bad snapshot map index");
367 *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */ 378 *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */
368 379
369 loop_emit_phi(J, subst, phi, nphi, onsnap); 380 loop_emit_phi(J, subst, phi, nphi, onsnap);
@@ -396,7 +407,7 @@ static void loop_undo(jit_State *J, IRRef ins, SnapNo nsnap, MSize nsnapmap)
396static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud) 407static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud)
397{ 408{
398 UNUSED(L); UNUSED(dummy); 409 UNUSED(L); UNUSED(dummy);
399 loop_unroll((jit_State *)ud); 410 loop_unroll((LoopState *)ud);
400 return NULL; 411 return NULL;
401} 412}
402 413
@@ -406,7 +417,13 @@ int lj_opt_loop(jit_State *J)
406 IRRef nins = J->cur.nins; 417 IRRef nins = J->cur.nins;
407 SnapNo nsnap = J->cur.nsnap; 418 SnapNo nsnap = J->cur.nsnap;
408 MSize nsnapmap = J->cur.nsnapmap; 419 MSize nsnapmap = J->cur.nsnapmap;
409 int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); 420 LoopState lps;
421 int errcode;
422 lps.J = J;
423 lps.subst = NULL;
424 lps.sizesubst = 0;
425 errcode = lj_vm_cpcall(J->L, NULL, &lps, cploop_opt);
426 lj_mem_freevec(J2G(J), lps.subst, lps.sizesubst, IRRef1);
410 if (LJ_UNLIKELY(errcode)) { 427 if (LJ_UNLIKELY(errcode)) {
411 lua_State *L = J->L; 428 lua_State *L = J->L;
412 if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */ 429 if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c
index 214fb632..29b33f29 100644
--- a/src/lj_opt_mem.c
+++ b/src/lj_opt_mem.c
@@ -17,12 +17,14 @@
17#include "lj_ir.h" 17#include "lj_ir.h"
18#include "lj_jit.h" 18#include "lj_jit.h"
19#include "lj_iropt.h" 19#include "lj_iropt.h"
20#include "lj_ircall.h"
21#include "lj_dispatch.h"
20 22
21/* Some local macros to save typing. Undef'd at the end. */ 23/* Some local macros to save typing. Undef'd at the end. */
22#define IR(ref) (&J->cur.ir[(ref)]) 24#define IR(ref) (&J->cur.ir[(ref)])
23#define fins (&J->fold.ins) 25#define fins (&J->fold.ins)
24#define fleft (&J->fold.left) 26#define fleft (J->fold.left)
25#define fright (&J->fold.right) 27#define fright (J->fold.right)
26 28
27/* 29/*
28** Caveat #1: return value is not always a TRef -- only use with tref_ref(). 30** Caveat #1: return value is not always a TRef -- only use with tref_ref().
@@ -55,8 +57,8 @@ static AliasRet aa_table(jit_State *J, IRRef ta, IRRef tb)
55{ 57{
56 IRIns *taba = IR(ta), *tabb = IR(tb); 58 IRIns *taba = IR(ta), *tabb = IR(tb);
57 int newa, newb; 59 int newa, newb;
58 lua_assert(ta != tb); 60 lj_assertJ(ta != tb, "bad usage");
59 lua_assert(irt_istab(taba->t) && irt_istab(tabb->t)); 61 lj_assertJ(irt_istab(taba->t) && irt_istab(tabb->t), "bad usage");
60 /* Disambiguate new allocations. */ 62 /* Disambiguate new allocations. */
61 newa = (taba->o == IR_TNEW || taba->o == IR_TDUP); 63 newa = (taba->o == IR_TNEW || taba->o == IR_TDUP);
62 newb = (tabb->o == IR_TNEW || tabb->o == IR_TDUP); 64 newb = (tabb->o == IR_TNEW || tabb->o == IR_TDUP);
@@ -70,6 +72,34 @@ static AliasRet aa_table(jit_State *J, IRRef ta, IRRef tb)
70 return aa_escape(J, taba, tabb); 72 return aa_escape(J, taba, tabb);
71} 73}
72 74
75/* Check whether there's no aliasing table.clear. */
76static int fwd_aa_tab_clear(jit_State *J, IRRef lim, IRRef ta)
77{
78 IRRef ref = J->chain[IR_CALLS];
79 while (ref > lim) {
80 IRIns *calls = IR(ref);
81 if (calls->op2 == IRCALL_lj_tab_clear &&
82 (ta == calls->op1 || aa_table(J, ta, calls->op1) != ALIAS_NO))
83 return 0; /* Conflict. */
84 ref = calls->prev;
85 }
86 return 1; /* No conflict. Can safely FOLD/CSE. */
87}
88
89/* Check whether there's no aliasing NEWREF/table.clear for the left operand. */
90int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim)
91{
92 IRRef ta = fins->op1;
93 IRRef ref = J->chain[IR_NEWREF];
94 while (ref > lim) {
95 IRIns *newref = IR(ref);
96 if (ta == newref->op1 || aa_table(J, ta, newref->op1) != ALIAS_NO)
97 return 0; /* Conflict. */
98 ref = newref->prev;
99 }
100 return fwd_aa_tab_clear(J, lim, ta);
101}
102
73/* Alias analysis for array and hash access using key-based disambiguation. */ 103/* Alias analysis for array and hash access using key-based disambiguation. */
74static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb) 104static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb)
75{ 105{
@@ -98,7 +128,7 @@ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb)
98 /* Disambiguate array references based on index arithmetic. */ 128 /* Disambiguate array references based on index arithmetic. */
99 int32_t ofsa = 0, ofsb = 0; 129 int32_t ofsa = 0, ofsb = 0;
100 IRRef basea = ka, baseb = kb; 130 IRRef basea = ka, baseb = kb;
101 lua_assert(refb->o == IR_AREF); 131 lj_assertJ(refb->o == IR_AREF, "expected AREF");
102 /* Gather base and offset from t[base] or t[base+-ofs]. */ 132 /* Gather base and offset from t[base] or t[base+-ofs]. */
103 if (keya->o == IR_ADD && irref_isk(keya->op2)) { 133 if (keya->o == IR_ADD && irref_isk(keya->op2)) {
104 basea = keya->op1; 134 basea = keya->op1;
@@ -116,8 +146,9 @@ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb)
116 return ALIAS_NO; /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */ 146 return ALIAS_NO; /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */
117 } else { 147 } else {
118 /* Disambiguate hash references based on the type of their keys. */ 148 /* Disambiguate hash references based on the type of their keys. */
119 lua_assert((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) && 149 lj_assertJ((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) &&
120 (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF)); 150 (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF),
151 "bad xREF IR op %d or %d", refa->o, refb->o);
121 if (!irt_sametype(keya->t, keyb->t)) 152 if (!irt_sametype(keya->t, keyb->t))
122 return ALIAS_NO; /* Different key types. */ 153 return ALIAS_NO; /* Different key types. */
123 } 154 }
@@ -151,7 +182,8 @@ static TRef fwd_ahload(jit_State *J, IRRef xref)
151 IRIns *ir = (xr->o == IR_HREFK || xr->o == IR_AREF) ? IR(xr->op1) : xr; 182 IRIns *ir = (xr->o == IR_HREFK || xr->o == IR_AREF) ? IR(xr->op1) : xr;
152 IRRef tab = ir->op1; 183 IRRef tab = ir->op1;
153 ir = IR(tab); 184 ir = IR(tab);
154 if (ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) { 185 if ((ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) &&
186 fwd_aa_tab_clear(J, tab, tab)) {
155 /* A NEWREF with a number key may end up pointing to the array part. 187 /* A NEWREF with a number key may end up pointing to the array part.
156 ** But it's referenced from HSTORE and not found in the ASTORE chain. 188 ** But it's referenced from HSTORE and not found in the ASTORE chain.
157 ** Or a NEWREF may rehash the table and move unrelated number keys. 189 ** Or a NEWREF may rehash the table and move unrelated number keys.
@@ -270,7 +302,7 @@ TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J)
270 while (ref > tab) { 302 while (ref > tab) {
271 IRIns *newref = IR(ref); 303 IRIns *newref = IR(ref);
272 if (tab == newref->op1) { 304 if (tab == newref->op1) {
273 if (fright->op1 == newref->op2) 305 if (fright->op1 == newref->op2 && fwd_aa_tab_clear(J, ref, tab))
274 return ref; /* Forward from NEWREF. */ 306 return ref; /* Forward from NEWREF. */
275 else 307 else
276 goto docse; 308 goto docse;
@@ -280,7 +312,7 @@ TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J)
280 ref = newref->prev; 312 ref = newref->prev;
281 } 313 }
282 /* No conflicting NEWREF: key location unchanged for HREFK of TDUP. */ 314 /* No conflicting NEWREF: key location unchanged for HREFK of TDUP. */
283 if (IR(tab)->o == IR_TDUP) 315 if (IR(tab)->o == IR_TDUP && fwd_aa_tab_clear(J, tab, tab))
284 fins->t.irt &= ~IRT_GUARD; /* Drop HREFK guard. */ 316 fins->t.irt &= ~IRT_GUARD; /* Drop HREFK guard. */
285docse: 317docse:
286 return CSEFOLD; 318 return CSEFOLD;
@@ -314,20 +346,6 @@ int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J)
314 return 1; /* No conflict. Can fold to niltv. */ 346 return 1; /* No conflict. Can fold to niltv. */
315} 347}
316 348
317/* Check whether there's no aliasing NEWREF for the left operand. */
318int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim)
319{
320 IRRef ta = fins->op1;
321 IRRef ref = J->chain[IR_NEWREF];
322 while (ref > lim) {
323 IRIns *newref = IR(ref);
324 if (ta == newref->op1 || aa_table(J, ta, newref->op1) != ALIAS_NO)
325 return 0; /* Conflict. */
326 ref = newref->prev;
327 }
328 return 1; /* No conflict. Can safely FOLD/CSE. */
329}
330
331/* ASTORE/HSTORE elimination. */ 349/* ASTORE/HSTORE elimination. */
332TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J) 350TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J)
333{ 351{
@@ -351,9 +369,12 @@ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J)
351 /* Different value: try to eliminate the redundant store. */ 369 /* Different value: try to eliminate the redundant store. */
352 if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ 370 if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */
353 IRIns *ir; 371 IRIns *ir;
354 /* Check for any intervening guards (includes conflicting loads). */ 372 /* Check for any intervening guards (includes conflicting loads).
373 ** Note that lj_tab_keyindex and lj_vm_next don't need guards,
374 ** since they are followed by at least one guarded VLOAD.
375 */
355 for (ir = IR(J->cur.nins-1); ir > store; ir--) 376 for (ir = IR(J->cur.nins-1); ir > store; ir--)
356 if (irt_isguard(ir->t) || ir->o == IR_CALLL) 377 if (irt_isguard(ir->t) || ir->o == IR_ALEN)
357 goto doemit; /* No elimination possible. */ 378 goto doemit; /* No elimination possible. */
358 /* Remove redundant store from chain and replace with NOP. */ 379 /* Remove redundant store from chain and replace with NOP. */
359 *refp = store->prev; 380 *refp = store->prev;
@@ -368,6 +389,67 @@ doemit:
368 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ 389 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
369} 390}
370 391
392/* ALEN forwarding. */
393TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J)
394{
395 IRRef tab = fins->op1; /* Table reference. */
396 IRRef lim = tab; /* Search limit. */
397 IRRef ref;
398
399 /* Search for conflicting HSTORE with numeric key. */
400 ref = J->chain[IR_HSTORE];
401 while (ref > lim) {
402 IRIns *store = IR(ref);
403 IRIns *href = IR(store->op1);
404 IRIns *key = IR(href->op2);
405 if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) {
406 lim = ref; /* Conflicting store found, limits search for ALEN. */
407 break;
408 }
409 ref = store->prev;
410 }
411
412 /* Try to find a matching ALEN. */
413 ref = J->chain[IR_ALEN];
414 while (ref > lim) {
415 /* CSE for ALEN only depends on the table, not the hint. */
416 if (IR(ref)->op1 == tab) {
417 IRRef sref;
418
419 /* Search for aliasing table.clear. */
420 if (!fwd_aa_tab_clear(J, ref, tab))
421 break;
422
423 /* Search for hint-forwarding or conflicting store. */
424 sref = J->chain[IR_ASTORE];
425 while (sref > ref) {
426 IRIns *store = IR(sref);
427 IRIns *aref = IR(store->op1);
428 IRIns *fref = IR(aref->op1);
429 if (tab == fref->op1) { /* ASTORE to the same table. */
430 /* Detect t[#t+1] = x idiom for push. */
431 IRIns *idx = IR(aref->op2);
432 if (!irt_isnil(store->t) &&
433 idx->o == IR_ADD && idx->op1 == ref &&
434 IR(idx->op2)->o == IR_KINT && IR(idx->op2)->i == 1) {
435 /* Note: this requires an extra PHI check in loop unroll. */
436 fins->op2 = aref->op2; /* Set ALEN hint. */
437 }
438 goto doemit; /* Conflicting store, possibly giving a hint. */
439 } else if (aa_table(J, tab, fref->op1) != ALIAS_NO) {
440 goto doemit; /* Conflicting store. */
441 }
442 sref = store->prev;
443 }
444
445 return ref; /* Plain ALEN forwarding. */
446 }
447 ref = IR(ref)->prev;
448 }
449doemit:
450 return EMITFOLD;
451}
452
371/* -- ULOAD forwarding ---------------------------------------------------- */ 453/* -- ULOAD forwarding ---------------------------------------------------- */
372 454
373/* The current alias analysis for upvalues is very simplistic. It only 455/* The current alias analysis for upvalues is very simplistic. It only
@@ -380,18 +462,23 @@ doemit:
380*/ 462*/
381static AliasRet aa_uref(IRIns *refa, IRIns *refb) 463static AliasRet aa_uref(IRIns *refa, IRIns *refb)
382{ 464{
383 if (refa->o != refb->o)
384 return ALIAS_NO; /* Different UREFx type. */
385 if (refa->op1 == refb->op1) { /* Same function. */ 465 if (refa->op1 == refb->op1) { /* Same function. */
386 if (refa->op2 == refb->op2) 466 if (refa->op2 == refb->op2)
387 return ALIAS_MUST; /* Same function, same upvalue idx. */ 467 return ALIAS_MUST; /* Same function, same upvalue idx. */
388 else 468 else
389 return ALIAS_NO; /* Same function, different upvalue idx. */ 469 return ALIAS_NO; /* Same function, different upvalue idx. */
390 } else { /* Different functions, check disambiguation hash values. */ 470 } else { /* Different functions, check disambiguation hash values. */
391 if (((refa->op2 ^ refb->op2) & 0xff)) 471 if (((refa->op2 ^ refb->op2) & 0xff)) {
392 return ALIAS_NO; /* Upvalues with different hash values cannot alias. */ 472 return ALIAS_NO; /* Upvalues with different hash values cannot alias. */
393 else 473 } else if (refa->o != refb->o) {
394 return ALIAS_MAY; /* No conclusion can be drawn for same hash value. */ 474 /* Different UREFx type, but need to confirm the UREFO really is open. */
475 if (irt_type(refa->t) == IRT_IGC) refa->t.irt += IRT_PGC-IRT_IGC;
476 else if (irt_type(refb->t) == IRT_IGC) refb->t.irt += IRT_PGC-IRT_IGC;
477 return ALIAS_NO;
478 } else {
479 /* No conclusion can be drawn for same hash value and same UREFx type. */
480 return ALIAS_MAY;
481 }
395 } 482 }
396} 483}
397 484
@@ -417,7 +504,6 @@ TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J)
417 504
418cselim: 505cselim:
419 /* Try to find a matching load. Below the conflicting store, if any. */ 506 /* Try to find a matching load. Below the conflicting store, if any. */
420
421 ref = J->chain[IR_ULOAD]; 507 ref = J->chain[IR_ULOAD];
422 while (ref > lim) { 508 while (ref > lim) {
423 IRIns *ir = IR(ref); 509 IRIns *ir = IR(ref);
@@ -547,8 +633,9 @@ TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J)
547 goto doemit; 633 goto doemit;
548 break; /* Otherwise continue searching. */ 634 break; /* Otherwise continue searching. */
549 case ALIAS_MUST: 635 case ALIAS_MUST:
550 if (store->op2 == val) /* Same value: drop the new store. */ 636 if (store->op2 == val &&
551 return DROPFOLD; 637 !(xr->op2 >= IRFL_SBUF_W && xr->op2 <= IRFL_SBUF_R))
638 return DROPFOLD; /* Same value: drop the new store. */
552 /* Different value: try to eliminate the redundant store. */ 639 /* Different value: try to eliminate the redundant store. */
553 if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ 640 if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */
554 IRIns *ir; 641 IRIns *ir;
@@ -569,6 +656,29 @@ doemit:
569 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ 656 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
570} 657}
571 658
659/* Check whether there's no aliasing buffer op between IRFL_SBUF_*. */
660int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim)
661{
662 IRRef ref;
663 if (J->chain[IR_BUFPUT] > lim)
664 return 0; /* Conflict. */
665 ref = J->chain[IR_CALLS];
666 while (ref > lim) {
667 IRIns *ir = IR(ref);
668 if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr)
669 return 0; /* Conflict. */
670 ref = ir->prev;
671 }
672 ref = J->chain[IR_CALLL];
673 while (ref > lim) {
674 IRIns *ir = IR(ref);
675 if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr)
676 return 0; /* Conflict. */
677 ref = ir->prev;
678 }
679 return 1; /* No conflict. Can safely FOLD/CSE. */
680}
681
572/* -- XLOAD forwarding and XSTORE elimination ----------------------------- */ 682/* -- XLOAD forwarding and XSTORE elimination ----------------------------- */
573 683
574/* Find cdata allocation for a reference (if any). */ 684/* Find cdata allocation for a reference (if any). */
@@ -820,35 +930,6 @@ doemit:
820 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ 930 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
821} 931}
822 932
823/* -- Forwarding of lj_tab_len -------------------------------------------- */
824
825/* This is rather simplistic right now, but better than nothing. */
826TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J)
827{
828 IRRef tab = fins->op1; /* Table reference. */
829 IRRef lim = tab; /* Search limit. */
830 IRRef ref;
831
832 /* Any ASTORE is a conflict and limits the search. */
833 if (J->chain[IR_ASTORE] > lim) lim = J->chain[IR_ASTORE];
834
835 /* Search for conflicting HSTORE with numeric key. */
836 ref = J->chain[IR_HSTORE];
837 while (ref > lim) {
838 IRIns *store = IR(ref);
839 IRIns *href = IR(store->op1);
840 IRIns *key = IR(href->op2);
841 if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) {
842 lim = ref; /* Conflicting store found, limits search for TLEN. */
843 break;
844 }
845 ref = store->prev;
846 }
847
848 /* Try to find a matching load. Below the conflicting store, if any. */
849 return lj_opt_cselim(J, lim);
850}
851
852/* -- ASTORE/HSTORE previous type analysis -------------------------------- */ 933/* -- ASTORE/HSTORE previous type analysis -------------------------------- */
853 934
854/* Check whether the previous value for a table store is non-nil. 935/* Check whether the previous value for a table store is non-nil.
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index 2f02407c..02fb9e68 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -373,17 +373,17 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
373 } else if (op == NARROW_CONV) { 373 } else if (op == NARROW_CONV) {
374 *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ 374 *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */
375 } else if (op == NARROW_SEXT) { 375 } else if (op == NARROW_SEXT) {
376 lua_assert(sp >= nc->stack+1); 376 lj_assertJ(sp >= nc->stack+1, "stack underflow");
377 sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1], 377 sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1],
378 (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); 378 (IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
379 } else if (op == NARROW_INT) { 379 } else if (op == NARROW_INT) {
380 lua_assert(next < last); 380 lj_assertJ(next < last, "missing arg to NARROW_INT");
381 *sp++ = nc->t == IRT_I64 ? 381 *sp++ = nc->t == IRT_I64 ?
382 lj_ir_kint64(J, (int64_t)(int32_t)*next++) : 382 lj_ir_kint64(J, (int64_t)(int32_t)*next++) :
383 lj_ir_kint(J, *next++); 383 lj_ir_kint(J, *next++);
384 } else { /* Regular IROpT. Pops two operands and pushes one result. */ 384 } else { /* Regular IROpT. Pops two operands and pushes one result. */
385 IRRef mode = nc->mode; 385 IRRef mode = nc->mode;
386 lua_assert(sp >= nc->stack+2); 386 lj_assertJ(sp >= nc->stack+2, "stack underflow");
387 sp--; 387 sp--;
388 /* Omit some overflow checks for array indexing. See comments above. */ 388 /* Omit some overflow checks for array indexing. See comments above. */
389 if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { 389 if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) {
@@ -399,7 +399,7 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
399 narrow_bpc_set(J, narrow_ref(ref), narrow_ref(sp[-1]), mode); 399 narrow_bpc_set(J, narrow_ref(ref), narrow_ref(sp[-1]), mode);
400 } 400 }
401 } 401 }
402 lua_assert(sp == nc->stack+1); 402 lj_assertJ(sp == nc->stack+1, "stack misalignment");
403 return nc->stack[0]; 403 return nc->stack[0];
404} 404}
405 405
@@ -453,7 +453,7 @@ static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef mode)
453TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr) 453TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr)
454{ 454{
455 IRIns *ir; 455 IRIns *ir;
456 lua_assert(tref_isnumber(tr)); 456 lj_assertJ(tref_isnumber(tr), "expected number type");
457 if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ 457 if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */
458 return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX); 458 return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX);
459 /* Omit some overflow checks for array indexing. See comments above. */ 459 /* Omit some overflow checks for array indexing. See comments above. */
@@ -500,7 +500,7 @@ TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr)
500/* Narrow C array index (overflow undefined). */ 500/* Narrow C array index (overflow undefined). */
501TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) 501TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)
502{ 502{
503 lua_assert(tref_isnumber(tr)); 503 lj_assertJ(tref_isnumber(tr), "expected number type");
504 if (tref_isnum(tr)) 504 if (tref_isnum(tr))
505 return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY); 505 return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY);
506 /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ 506 /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */
@@ -552,11 +552,16 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc)
552{ 552{
553 rc = conv_str_tonum(J, rc, vc); 553 rc = conv_str_tonum(J, rc, vc);
554 if (tref_isinteger(rc)) { 554 if (tref_isinteger(rc)) {
555 if ((uint32_t)numberVint(vc) != 0x80000000u) 555 uint32_t k = (uint32_t)numberVint(vc);
556 return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc); 556 if ((LJ_DUALNUM || k != 0) && k != 0x80000000u) {
557 TRef zero = lj_ir_kint(J, 0);
558 if (!LJ_DUALNUM)
559 emitir(IRTGI(IR_NE), rc, zero);
560 return emitir(IRTGI(IR_SUBOV), zero, rc);
561 }
557 rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); 562 rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);
558 } 563 }
559 return emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J)); 564 return emitir(IRTN(IR_NEG), rc, lj_ir_ksimd(J, LJ_KSIMD_NEG));
560} 565}
561 566
562/* Narrowing of modulo operator. */ 567/* Narrowing of modulo operator. */
@@ -580,44 +585,6 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
580 return emitir(IRTN(IR_SUB), rb, tmp); 585 return emitir(IRTN(IR_SUB), rb, tmp);
581} 586}
582 587
583/* Narrowing of power operator or math.pow. */
584TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
585{
586 rb = conv_str_tonum(J, rb, vb);
587 rb = lj_ir_tonum(J, rb); /* Left arg is always treated as an FP number. */
588 rc = conv_str_tonum(J, rc, vc);
589 /* Narrowing must be unconditional to preserve (-x)^i semantics. */
590 if (tvisint(vc) || numisint(numV(vc))) {
591 int checkrange = 0;
592 /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */
593 if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) {
594 int32_t k = numberVint(vc);
595 if (!(k >= -65536 && k <= 65536)) goto split_pow;
596 checkrange = 1;
597 }
598 if (!tref_isinteger(rc)) {
599 /* Guarded conversion to integer! */
600 rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK);
601 }
602 if (checkrange && !tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */
603 TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536));
604 emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536));
605 }
606 return emitir(IRTN(IR_POW), rb, rc);
607 }
608split_pow:
609 /* FOLD covers most cases, but some are easier to do here. */
610 if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb)))))
611 return rb; /* 1 ^ x ==> 1 */
612 rc = lj_ir_tonum(J, rc);
613 if (tref_isk(rc) && ir_knum(IR(tref_ref(rc)))->n == 0.5)
614 return emitir(IRTN(IR_FPMATH), rb, IRFPM_SQRT); /* x ^ 0.5 ==> sqrt(x) */
615 /* Split up b^c into exp2(c*log2(b)). Assembler may rejoin later. */
616 rb = emitir(IRTN(IR_FPMATH), rb, IRFPM_LOG2);
617 rc = emitir(IRTN(IR_MUL), rb, rc);
618 return emitir(IRTN(IR_FPMATH), rc, IRFPM_EXP2);
619}
620
621/* -- Predictive narrowing of induction variables ------------------------- */ 588/* -- Predictive narrowing of induction variables ------------------------- */
622 589
623/* Narrow a single runtime value. */ 590/* Narrow a single runtime value. */
@@ -631,9 +598,10 @@ static int narrow_forl(jit_State *J, cTValue *o)
631/* Narrow the FORL index type by looking at the runtime values. */ 598/* Narrow the FORL index type by looking at the runtime values. */
632IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv) 599IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv)
633{ 600{
634 lua_assert(tvisnumber(&tv[FORL_IDX]) && 601 lj_assertJ(tvisnumber(&tv[FORL_IDX]) &&
635 tvisnumber(&tv[FORL_STOP]) && 602 tvisnumber(&tv[FORL_STOP]) &&
636 tvisnumber(&tv[FORL_STEP])); 603 tvisnumber(&tv[FORL_STEP]),
604 "expected number types");
637 /* Narrow only if the runtime values of start/stop/step are all integers. */ 605 /* Narrow only if the runtime values of start/stop/step are all integers. */
638 if (narrow_forl(J, &tv[FORL_IDX]) && 606 if (narrow_forl(J, &tv[FORL_IDX]) &&
639 narrow_forl(J, &tv[FORL_STOP]) && 607 narrow_forl(J, &tv[FORL_STOP]) &&
diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c
index a9feddad..642ed750 100644
--- a/src/lj_opt_sink.c
+++ b/src/lj_opt_sink.c
@@ -86,8 +86,7 @@ static void sink_mark_ins(jit_State *J)
86 switch (ir->o) { 86 switch (ir->o) {
87 case IR_BASE: 87 case IR_BASE:
88 return; /* Finished. */ 88 return; /* Finished. */
89 case IR_CALLL: /* IRCALL_lj_tab_len */ 89 case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR: case IR_ALEN:
90 case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR:
91 irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */ 90 irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */
92 break; 91 break;
93 case IR_FLOAD: 92 case IR_FLOAD:
@@ -173,8 +172,8 @@ static void sink_remark_phi(jit_State *J)
173/* Sweep instructions and tag sunken allocations and stores. */ 172/* Sweep instructions and tag sunken allocations and stores. */
174static void sink_sweep_ins(jit_State *J) 173static void sink_sweep_ins(jit_State *J)
175{ 174{
176 IRIns *ir, *irfirst = IR(J->cur.nk); 175 IRIns *ir, *irbase = IR(REF_BASE);
177 for (ir = IR(J->cur.nins-1) ; ir >= irfirst; ir--) { 176 for (ir = IR(J->cur.nins-1) ; ir >= irbase; ir--) {
178 switch (ir->o) { 177 switch (ir->o) {
179 case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: { 178 case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: {
180 IRIns *ira = sink_checkalloc(J, ir); 179 IRIns *ira = sink_checkalloc(J, ir);
@@ -224,6 +223,13 @@ static void sink_sweep_ins(jit_State *J)
224 break; 223 break;
225 } 224 }
226 } 225 }
226 for (ir = IR(J->cur.nk); ir < irbase; ir++) {
227 irt_clearmark(ir->t);
228 ir->prev = REGSP_INIT;
229 /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
230 if (irt_is64(ir->t) && ir->o != IR_KNULL)
231 ir++;
232 }
227} 233}
228 234
229/* Allocation sinking and store sinking. 235/* Allocation sinking and store sinking.
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
index 1f2ffe54..338a61a2 100644
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -8,14 +8,15 @@
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10 10
11#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) 11#if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI))
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_str.h" 14#include "lj_buf.h"
15#include "lj_ir.h" 15#include "lj_ir.h"
16#include "lj_jit.h" 16#include "lj_jit.h"
17#include "lj_ircall.h" 17#include "lj_ircall.h"
18#include "lj_iropt.h" 18#include "lj_iropt.h"
19#include "lj_dispatch.h"
19#include "lj_vm.h" 20#include "lj_vm.h"
20 21
21/* SPLIT pass: 22/* SPLIT pass:
@@ -139,6 +140,7 @@ static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
139 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); 140 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
140 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); 141 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
141} 142}
143#endif
142 144
143/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ 145/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
144static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, 146static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
@@ -155,7 +157,6 @@ static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
155 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); 157 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
156 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); 158 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
157} 159}
158#endif
159 160
160/* Emit a CALLN with two split 64 bit arguments. */ 161/* Emit a CALLN with two split 64 bit arguments. */
161static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, 162static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
@@ -192,9 +193,121 @@ static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
192 nref = ir->op1; 193 nref = ir->op1;
193 if (ofs == 0) return nref; 194 if (ofs == 0) return nref;
194 } 195 }
195 return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs)); 196 return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs));
196} 197}
197 198
199#if LJ_HASFFI
200static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
201 IRIns *oir, IRIns *nir, IRIns *ir)
202{
203 IROp op = ir->o;
204 IRRef kref = nir->op2;
205 if (irref_isk(kref)) { /* Optimize constant shifts. */
206 int32_t k = (IR(kref)->i & 63);
207 IRRef lo = nir->op1, hi = hisubst[ir->op1];
208 if (op == IR_BROL || op == IR_BROR) {
209 if (op == IR_BROR) k = (-k & 63);
210 if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; }
211 if (k == 0) {
212 passthrough:
213 J->cur.nins--;
214 ir->prev = lo;
215 return hi;
216 } else {
217 TRef k1, k2;
218 IRRef t1, t2, t3, t4;
219 J->cur.nins--;
220 k1 = lj_ir_kint(J, k);
221 k2 = lj_ir_kint(J, (-k & 31));
222 t1 = split_emit(J, IRTI(IR_BSHL), lo, k1);
223 t2 = split_emit(J, IRTI(IR_BSHL), hi, k1);
224 t3 = split_emit(J, IRTI(IR_BSHR), lo, k2);
225 t4 = split_emit(J, IRTI(IR_BSHR), hi, k2);
226 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4);
227 return split_emit(J, IRTI(IR_BOR), t2, t3);
228 }
229 } else if (k == 0) {
230 goto passthrough;
231 } else if (k < 32) {
232 if (op == IR_BSHL) {
233 IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref);
234 IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31)));
235 return split_emit(J, IRTI(IR_BOR), t1, t2);
236 } else {
237 IRRef t1 = ir->prev, t2;
238 lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage");
239 nir->o = IR_BSHR;
240 t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
241 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
242 return split_emit(J, IRTI(op), hi, kref);
243 }
244 } else {
245 if (op == IR_BSHL) {
246 if (k == 32)
247 J->cur.nins--;
248 else
249 lo = ir->prev;
250 ir->prev = lj_ir_kint(J, 0);
251 return lo;
252 } else {
253 lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage");
254 if (k == 32) {
255 J->cur.nins--;
256 ir->prev = hi;
257 } else {
258 nir->op1 = hi;
259 }
260 if (op == IR_BSHR)
261 return lj_ir_kint(J, 0);
262 else
263 return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31));
264 }
265 }
266 }
267 return split_call_li(J, hisubst, oir, ir,
268 op - IR_BSHL + IRCALL_lj_carith_shl64);
269}
270
271static IRRef split_bitop(jit_State *J, IRRef1 *hisubst,
272 IRIns *nir, IRIns *ir)
273{
274 IROp op = ir->o;
275 IRRef hi, kref = nir->op2;
276 if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */
277 int32_t k = IR(kref)->i;
278 if (k == 0 || k == -1) {
279 if (op == IR_BAND) k = ~k;
280 if (k == 0) {
281 J->cur.nins--;
282 ir->prev = nir->op1;
283 } else if (op == IR_BXOR) {
284 nir->o = IR_BNOT;
285 nir->op2 = 0;
286 } else {
287 J->cur.nins--;
288 ir->prev = kref;
289 }
290 }
291 }
292 hi = hisubst[ir->op1];
293 kref = hisubst[ir->op2];
294 if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */
295 int32_t k = IR(kref)->i;
296 if (k == 0 || k == -1) {
297 if (op == IR_BAND) k = ~k;
298 if (k == 0) {
299 return hi;
300 } else if (op == IR_BXOR) {
301 return split_emit(J, IRTI(IR_BNOT), hi, 0);
302 } else {
303 return kref;
304 }
305 }
306 }
307 return split_emit(J, IRTI(op), hi, kref);
308}
309#endif
310
198/* Substitute references of a snapshot. */ 311/* Substitute references of a snapshot. */
199static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) 312static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
200{ 313{
@@ -214,7 +327,7 @@ static void split_ir(jit_State *J)
214 IRRef nins = J->cur.nins, nk = J->cur.nk; 327 IRRef nins = J->cur.nins, nk = J->cur.nk;
215 MSize irlen = nins - nk; 328 MSize irlen = nins - nk;
216 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); 329 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
217 IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); 330 IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need);
218 IRRef1 *hisubst; 331 IRRef1 *hisubst;
219 IRRef ref, snref; 332 IRRef ref, snref;
220 SnapShot *snap; 333 SnapShot *snap;
@@ -241,6 +354,8 @@ static void split_ir(jit_State *J)
241 ir->prev = ref; /* Identity substitution for loword. */ 354 ir->prev = ref; /* Identity substitution for loword. */
242 hisubst[ref] = 0; 355 hisubst[ref] = 0;
243 } 356 }
357 if (irt_is64(ir->t) && ir->o != IR_KNULL)
358 ref++;
244 } 359 }
245 360
246 /* Process old IR instructions. */ 361 /* Process old IR instructions. */
@@ -285,35 +400,11 @@ static void split_ir(jit_State *J)
285 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div); 400 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
286 break; 401 break;
287 case IR_POW: 402 case IR_POW:
288 hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); 403 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_pow);
289 break; 404 break;
290 case IR_FPMATH: 405 case IR_FPMATH:
291 /* Try to rejoin pow from EXP2, MUL and LOG2. */
292 if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) {
293 IRIns *irp = IR(nir->op1);
294 if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) {
295 IRIns *irm4 = IR(irp->op1);
296 IRIns *irm3 = IR(irm4->op1);
297 IRIns *irm12 = IR(irm3->op1);
298 IRIns *irl1 = IR(irm12->op1);
299 if (irm12->op1 > J->loopref && irl1->o == IR_CALLN &&
300 irl1->op2 == IRCALL_lj_vm_log2) {
301 IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */
302 IRRef arg3 = irm3->op2, arg4 = irm4->op2;
303 J->cur.nins--;
304 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3);
305 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4);
306 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow);
307 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
308 break;
309 }
310 }
311 }
312 hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); 406 hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
313 break; 407 break;
314 case IR_ATAN2:
315 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2);
316 break;
317 case IR_LDEXP: 408 case IR_LDEXP:
318 hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); 409 hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
319 break; 410 break;
@@ -321,7 +412,8 @@ static void split_ir(jit_State *J)
321 nir->o = IR_CONV; /* Pass through loword. */ 412 nir->o = IR_CONV; /* Pass through loword. */
322 nir->op2 = (IRT_INT << 5) | IRT_INT; 413 nir->op2 = (IRT_INT << 5) | IRT_INT;
323 hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), 414 hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
324 hisubst[ir->op1], hisubst[ir->op2]); 415 hisubst[ir->op1],
416 lj_ir_kint(J, (int32_t)(0x7fffffffu + (ir->o == IR_NEG))));
325 break; 417 break;
326 case IR_SLOAD: 418 case IR_SLOAD:
327 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ 419 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */
@@ -336,15 +428,24 @@ static void split_ir(jit_State *J)
336 case IR_STRTO: 428 case IR_STRTO:
337 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); 429 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
338 break; 430 break;
431 case IR_FLOAD:
432 lj_assertJ(ir->op1 == REF_NIL, "expected FLOAD from GG_State");
433 hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4));
434 nir->op2 += LJ_BE*4;
435 break;
339 case IR_XLOAD: { 436 case IR_XLOAD: {
340 IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ 437 IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */
341 J->cur.nins--; 438 J->cur.nins--;
342 hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ 439 hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */
440#if LJ_BE
441 hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2);
442 inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD);
443#endif
343 nref = lj_ir_nextins(J); 444 nref = lj_ir_nextins(J);
344 nir = IR(nref); 445 nir = IR(nref);
345 *nir = inslo; /* Re-emit lo XLOAD immediately before hi XLOAD. */ 446 *nir = inslo; /* Re-emit lo XLOAD. */
346 hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
347#if LJ_LE 447#if LJ_LE
448 hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
348 ir->prev = nref; 449 ir->prev = nref;
349#else 450#else
350 ir->prev = hi; hi = nref; 451 ir->prev = hi; hi = nref;
@@ -364,8 +465,9 @@ static void split_ir(jit_State *J)
364 break; 465 break;
365 } 466 }
366#endif 467#endif
367 lua_assert(st == IRT_INT || 468 lj_assertJ(st == IRT_INT ||
368 (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT))); 469 (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)),
470 "bad source type for CONV");
369 nir->o = IR_CALLN; 471 nir->o = IR_CALLN;
370#if LJ_32 && LJ_HASFFI 472#if LJ_32 && LJ_HASFFI
371 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : 473 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
@@ -395,7 +497,8 @@ static void split_ir(jit_State *J)
395 hi = nir->op2; 497 hi = nir->op2;
396 break; 498 break;
397 default: 499 default:
398 lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX); 500 lj_assertJ(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX,
501 "bad IR op %d", ir->o);
399 hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), 502 hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
400 hisubst[ir->op1], hisubst[ir->op2]); 503 hisubst[ir->op1], hisubst[ir->op2]);
401 break; 504 break;
@@ -438,8 +541,21 @@ static void split_ir(jit_State *J)
438 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : 541 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
439 IRCALL_lj_carith_powu64); 542 IRCALL_lj_carith_powu64);
440 break; 543 break;
544 case IR_BNOT:
545 hi = split_emit(J, IRTI(IR_BNOT), hiref, 0);
546 break;
547 case IR_BSWAP:
548 ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0);
549 hi = nref;
550 break;
551 case IR_BAND: case IR_BOR: case IR_BXOR:
552 hi = split_bitop(J, hisubst, nir, ir);
553 break;
554 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
555 hi = split_bitshift(J, hisubst, oir, nir, ir);
556 break;
441 case IR_FLOAD: 557 case IR_FLOAD:
442 lua_assert(ir->op2 == IRFL_CDATA_INT64); 558 lj_assertJ(ir->op2 == IRFL_CDATA_INT64, "only INT64 supported");
443 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); 559 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
444#if LJ_BE 560#if LJ_BE
445 ir->prev = hi; hi = nref; 561 ir->prev = hi; hi = nref;
@@ -505,7 +621,7 @@ static void split_ir(jit_State *J)
505 hi = nir->op2; 621 hi = nir->op2;
506 break; 622 break;
507 default: 623 default:
508 lua_assert(ir->o <= IR_NE); /* Comparisons. */ 624 lj_assertJ(ir->o <= IR_NE, "bad IR op %d", ir->o); /* Comparisons. */
509 split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); 625 split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
510 break; 626 break;
511 } 627 }
@@ -529,7 +645,7 @@ static void split_ir(jit_State *J)
529 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); 645 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
530#endif 646#endif
531 ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); 647 ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
532 } else if (ir->o == IR_TOSTR) { 648 } else if (ir->o == IR_TOSTR || ir->o == IR_TMPREF) {
533 if (hisubst[ir->op1]) { 649 if (hisubst[ir->op1]) {
534 if (irref_isk(ir->op1)) 650 if (irref_isk(ir->op1))
535 nir->op1 = ir->op1; 651 nir->op1 = ir->op1;
@@ -583,7 +699,7 @@ static void split_ir(jit_State *J)
583#if LJ_SOFTFP 699#if LJ_SOFTFP
584 if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { 700 if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
585 if (irt_isguard(ir->t)) { 701 if (irt_isguard(ir->t)) {
586 lua_assert(st == IRT_NUM && irt_isint(ir->t)); 702 lj_assertJ(st == IRT_NUM && irt_isint(ir->t), "bad CONV types");
587 J->cur.nins--; 703 J->cur.nins--;
588 ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); 704 ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1);
589 } else { 705 } else {
@@ -714,7 +830,7 @@ void lj_opt_split(jit_State *J)
714 if (!J->needsplit) 830 if (!J->needsplit)
715 J->needsplit = split_needsplit(J); 831 J->needsplit = split_needsplit(J);
716#else 832#else
717 lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */ 833 lj_assertJ(J->needsplit >= split_needsplit(J), "bad SPLIT state");
718#endif 834#endif
719 if (J->needsplit) { 835 if (J->needsplit) {
720 int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); 836 int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
diff --git a/src/lj_parse.c b/src/lj_parse.c
index cb200663..4fdd4c65 100644
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -13,6 +13,7 @@
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_debug.h" 15#include "lj_debug.h"
16#include "lj_buf.h"
16#include "lj_str.h" 17#include "lj_str.h"
17#include "lj_tab.h" 18#include "lj_tab.h"
18#include "lj_func.h" 19#include "lj_func.h"
@@ -21,6 +22,7 @@
21#if LJ_HASFFI 22#if LJ_HASFFI
22#include "lj_ctype.h" 23#include "lj_ctype.h"
23#endif 24#endif
25#include "lj_strfmt.h"
24#include "lj_lex.h" 26#include "lj_lex.h"
25#include "lj_parse.h" 27#include "lj_parse.h"
26#include "lj_vm.h" 28#include "lj_vm.h"
@@ -161,16 +163,22 @@ LJ_STATIC_ASSERT((int)BC_MULVV-(int)BC_ADDVV == (int)OPR_MUL-(int)OPR_ADD);
161LJ_STATIC_ASSERT((int)BC_DIVVV-(int)BC_ADDVV == (int)OPR_DIV-(int)OPR_ADD); 163LJ_STATIC_ASSERT((int)BC_DIVVV-(int)BC_ADDVV == (int)OPR_DIV-(int)OPR_ADD);
162LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD); 164LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD);
163 165
166#ifdef LUA_USE_ASSERT
167#define lj_assertFS(c, ...) (lj_assertG_(G(fs->L), (c), __VA_ARGS__))
168#else
169#define lj_assertFS(c, ...) ((void)fs)
170#endif
171
164/* -- Error handling ------------------------------------------------------ */ 172/* -- Error handling ------------------------------------------------------ */
165 173
166LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em) 174LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em)
167{ 175{
168 lj_lex_error(ls, ls->token, em); 176 lj_lex_error(ls, ls->tok, em);
169} 177}
170 178
171LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken token) 179LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken tok)
172{ 180{
173 lj_lex_error(ls, ls->token, LJ_ERR_XTOKEN, lj_lex_token2str(ls, token)); 181 lj_lex_error(ls, ls->tok, LJ_ERR_XTOKEN, lj_lex_token2str(ls, tok));
174} 182}
175 183
176LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what) 184LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what)
@@ -198,7 +206,7 @@ static BCReg const_num(FuncState *fs, ExpDesc *e)
198{ 206{
199 lua_State *L = fs->L; 207 lua_State *L = fs->L;
200 TValue *o; 208 TValue *o;
201 lua_assert(expr_isnumk(e)); 209 lj_assertFS(expr_isnumk(e), "bad usage");
202 o = lj_tab_set(L, fs->kt, &e->u.nval); 210 o = lj_tab_set(L, fs->kt, &e->u.nval);
203 if (tvhaskslot(o)) 211 if (tvhaskslot(o))
204 return tvkslot(o); 212 return tvkslot(o);
@@ -223,7 +231,7 @@ static BCReg const_gc(FuncState *fs, GCobj *gc, uint32_t itype)
223/* Add a string constant. */ 231/* Add a string constant. */
224static BCReg const_str(FuncState *fs, ExpDesc *e) 232static BCReg const_str(FuncState *fs, ExpDesc *e)
225{ 233{
226 lua_assert(expr_isstrk(e) || e->k == VGLOBAL); 234 lj_assertFS(expr_isstrk(e) || e->k == VGLOBAL, "bad usage");
227 return const_gc(fs, obj2gco(e->u.sval), LJ_TSTR); 235 return const_gc(fs, obj2gco(e->u.sval), LJ_TSTR);
228} 236}
229 237
@@ -311,7 +319,7 @@ static void jmp_patchins(FuncState *fs, BCPos pc, BCPos dest)
311{ 319{
312 BCIns *jmp = &fs->bcbase[pc].ins; 320 BCIns *jmp = &fs->bcbase[pc].ins;
313 BCPos offset = dest-(pc+1)+BCBIAS_J; 321 BCPos offset = dest-(pc+1)+BCBIAS_J;
314 lua_assert(dest != NO_JMP); 322 lj_assertFS(dest != NO_JMP, "uninitialized jump target");
315 if (offset > BCMAX_D) 323 if (offset > BCMAX_D)
316 err_syntax(fs->ls, LJ_ERR_XJUMP); 324 err_syntax(fs->ls, LJ_ERR_XJUMP);
317 setbc_d(jmp, offset); 325 setbc_d(jmp, offset);
@@ -360,7 +368,7 @@ static void jmp_patch(FuncState *fs, BCPos list, BCPos target)
360 if (target == fs->pc) { 368 if (target == fs->pc) {
361 jmp_tohere(fs, list); 369 jmp_tohere(fs, list);
362 } else { 370 } else {
363 lua_assert(target < fs->pc); 371 lj_assertFS(target < fs->pc, "bad jump target");
364 jmp_patchval(fs, list, target, NO_REG, target); 372 jmp_patchval(fs, list, target, NO_REG, target);
365 } 373 }
366} 374}
@@ -390,7 +398,7 @@ static void bcreg_free(FuncState *fs, BCReg reg)
390{ 398{
391 if (reg >= fs->nactvar) { 399 if (reg >= fs->nactvar) {
392 fs->freereg--; 400 fs->freereg--;
393 lua_assert(reg == fs->freereg); 401 lj_assertFS(reg == fs->freereg, "bad regfree");
394 } 402 }
395} 403}
396 404
@@ -540,7 +548,7 @@ static void expr_toreg_nobranch(FuncState *fs, ExpDesc *e, BCReg reg)
540 } else if (e->k <= VKTRUE) { 548 } else if (e->k <= VKTRUE) {
541 ins = BCINS_AD(BC_KPRI, reg, const_pri(e)); 549 ins = BCINS_AD(BC_KPRI, reg, const_pri(e));
542 } else { 550 } else {
543 lua_assert(e->k == VVOID || e->k == VJMP); 551 lj_assertFS(e->k == VVOID || e->k == VJMP, "bad expr type %d", e->k);
544 return; 552 return;
545 } 553 }
546 bcemit_INS(fs, ins); 554 bcemit_INS(fs, ins);
@@ -635,7 +643,7 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e)
635 ins = BCINS_AD(BC_GSET, ra, const_str(fs, var)); 643 ins = BCINS_AD(BC_GSET, ra, const_str(fs, var));
636 } else { 644 } else {
637 BCReg ra, rc; 645 BCReg ra, rc;
638 lua_assert(var->k == VINDEXED); 646 lj_assertFS(var->k == VINDEXED, "bad expr type %d", var->k);
639 ra = expr_toanyreg(fs, e); 647 ra = expr_toanyreg(fs, e);
640 rc = var->u.s.aux; 648 rc = var->u.s.aux;
641 if ((int32_t)rc < 0) { 649 if ((int32_t)rc < 0) {
@@ -643,10 +651,12 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e)
643 } else if (rc > BCMAX_C) { 651 } else if (rc > BCMAX_C) {
644 ins = BCINS_ABC(BC_TSETB, ra, var->u.s.info, rc-(BCMAX_C+1)); 652 ins = BCINS_ABC(BC_TSETB, ra, var->u.s.info, rc-(BCMAX_C+1));
645 } else { 653 } else {
654#ifdef LUA_USE_ASSERT
646 /* Free late alloced key reg to avoid assert on free of value reg. */ 655 /* Free late alloced key reg to avoid assert on free of value reg. */
647 /* This can only happen when called from expr_table(). */ 656 /* This can only happen when called from expr_table(). */
648 lua_assert(e->k != VNONRELOC || ra < fs->nactvar || 657 if (e->k == VNONRELOC && ra >= fs->nactvar && rc >= ra)
649 rc < ra || (bcreg_free(fs, rc),1)); 658 bcreg_free(fs, rc);
659#endif
650 ins = BCINS_ABC(BC_TSETV, ra, var->u.s.info, rc); 660 ins = BCINS_ABC(BC_TSETV, ra, var->u.s.info, rc);
651 } 661 }
652 } 662 }
@@ -657,19 +667,20 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e)
657/* Emit method lookup expression. */ 667/* Emit method lookup expression. */
658static void bcemit_method(FuncState *fs, ExpDesc *e, ExpDesc *key) 668static void bcemit_method(FuncState *fs, ExpDesc *e, ExpDesc *key)
659{ 669{
660 BCReg idx, func, obj = expr_toanyreg(fs, e); 670 BCReg idx, func, fr2, obj = expr_toanyreg(fs, e);
661 expr_free(fs, e); 671 expr_free(fs, e);
662 func = fs->freereg; 672 func = fs->freereg;
663 bcemit_AD(fs, BC_MOV, func+1, obj); /* Copy object to first argument. */ 673 fr2 = fs->ls->fr2;
664 lua_assert(expr_isstrk(key)); 674 bcemit_AD(fs, BC_MOV, func+1+fr2, obj); /* Copy object to 1st argument. */
675 lj_assertFS(expr_isstrk(key), "bad usage");
665 idx = const_str(fs, key); 676 idx = const_str(fs, key);
666 if (idx <= BCMAX_C) { 677 if (idx <= BCMAX_C) {
667 bcreg_reserve(fs, 2); 678 bcreg_reserve(fs, 2+fr2);
668 bcemit_ABC(fs, BC_TGETS, func, obj, idx); 679 bcemit_ABC(fs, BC_TGETS, func, obj, idx);
669 } else { 680 } else {
670 bcreg_reserve(fs, 3); 681 bcreg_reserve(fs, 3+fr2);
671 bcemit_AD(fs, BC_KSTR, func+2, idx); 682 bcemit_AD(fs, BC_KSTR, func+2+fr2, idx);
672 bcemit_ABC(fs, BC_TGETV, func, obj, func+2); 683 bcemit_ABC(fs, BC_TGETV, func, obj, func+2+fr2);
673 fs->freereg--; 684 fs->freereg--;
674 } 685 }
675 e->u.s.info = func; 686 e->u.s.info = func;
@@ -801,7 +812,8 @@ static void bcemit_arith(FuncState *fs, BinOpr opr, ExpDesc *e1, ExpDesc *e2)
801 else 812 else
802 rc = expr_toanyreg(fs, e2); 813 rc = expr_toanyreg(fs, e2);
803 /* 1st operand discharged by bcemit_binop_left, but need KNUM/KSHORT. */ 814 /* 1st operand discharged by bcemit_binop_left, but need KNUM/KSHORT. */
804 lua_assert(expr_isnumk(e1) || e1->k == VNONRELOC); 815 lj_assertFS(expr_isnumk(e1) || e1->k == VNONRELOC,
816 "bad expr type %d", e1->k);
805 expr_toval(fs, e1); 817 expr_toval(fs, e1);
806 /* Avoid two consts to satisfy bytecode constraints. */ 818 /* Avoid two consts to satisfy bytecode constraints. */
807 if (expr_isnumk(e1) && !expr_isnumk(e2) && 819 if (expr_isnumk(e1) && !expr_isnumk(e2) &&
@@ -889,19 +901,20 @@ static void bcemit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2)
889 if (op <= OPR_POW) { 901 if (op <= OPR_POW) {
890 bcemit_arith(fs, op, e1, e2); 902 bcemit_arith(fs, op, e1, e2);
891 } else if (op == OPR_AND) { 903 } else if (op == OPR_AND) {
892 lua_assert(e1->t == NO_JMP); /* List must be closed. */ 904 lj_assertFS(e1->t == NO_JMP, "jump list not closed");
893 expr_discharge(fs, e2); 905 expr_discharge(fs, e2);
894 jmp_append(fs, &e2->f, e1->f); 906 jmp_append(fs, &e2->f, e1->f);
895 *e1 = *e2; 907 *e1 = *e2;
896 } else if (op == OPR_OR) { 908 } else if (op == OPR_OR) {
897 lua_assert(e1->f == NO_JMP); /* List must be closed. */ 909 lj_assertFS(e1->f == NO_JMP, "jump list not closed");
898 expr_discharge(fs, e2); 910 expr_discharge(fs, e2);
899 jmp_append(fs, &e2->t, e1->t); 911 jmp_append(fs, &e2->t, e1->t);
900 *e1 = *e2; 912 *e1 = *e2;
901 } else if (op == OPR_CONCAT) { 913 } else if (op == OPR_CONCAT) {
902 expr_toval(fs, e2); 914 expr_toval(fs, e2);
903 if (e2->k == VRELOCABLE && bc_op(*bcptr(fs, e2)) == BC_CAT) { 915 if (e2->k == VRELOCABLE && bc_op(*bcptr(fs, e2)) == BC_CAT) {
904 lua_assert(e1->u.s.info == bc_b(*bcptr(fs, e2))-1); 916 lj_assertFS(e1->u.s.info == bc_b(*bcptr(fs, e2))-1,
917 "bad CAT stack layout");
905 expr_free(fs, e1); 918 expr_free(fs, e1);
906 setbc_b(bcptr(fs, e2), e1->u.s.info); 919 setbc_b(bcptr(fs, e2), e1->u.s.info);
907 e1->u.s.info = e2->u.s.info; 920 e1->u.s.info = e2->u.s.info;
@@ -913,8 +926,9 @@ static void bcemit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2)
913 } 926 }
914 e1->k = VRELOCABLE; 927 e1->k = VRELOCABLE;
915 } else { 928 } else {
916 lua_assert(op == OPR_NE || op == OPR_EQ || 929 lj_assertFS(op == OPR_NE || op == OPR_EQ ||
917 op == OPR_LT || op == OPR_GE || op == OPR_LE || op == OPR_GT); 930 op == OPR_LT || op == OPR_GE || op == OPR_LE || op == OPR_GT,
931 "bad binop %d", op);
918 bcemit_comp(fs, op, e1, e2); 932 bcemit_comp(fs, op, e1, e2);
919 } 933 }
920} 934}
@@ -943,10 +957,10 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e)
943 e->u.s.info = fs->freereg-1; 957 e->u.s.info = fs->freereg-1;
944 e->k = VNONRELOC; 958 e->k = VNONRELOC;
945 } else { 959 } else {
946 lua_assert(e->k == VNONRELOC); 960 lj_assertFS(e->k == VNONRELOC, "bad expr type %d", e->k);
947 } 961 }
948 } else { 962 } else {
949 lua_assert(op == BC_UNM || op == BC_LEN); 963 lj_assertFS(op == BC_UNM || op == BC_LEN, "bad unop %d", op);
950 if (op == BC_UNM && !expr_hasjump(e)) { /* Constant-fold negations. */ 964 if (op == BC_UNM && !expr_hasjump(e)) { /* Constant-fold negations. */
951#if LJ_HASFFI 965#if LJ_HASFFI
952 if (e->k == VKCDATA) { /* Fold in-place since cdata is not interned. */ 966 if (e->k == VKCDATA) { /* Fold in-place since cdata is not interned. */
@@ -986,7 +1000,7 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e)
986/* Check and consume optional token. */ 1000/* Check and consume optional token. */
987static int lex_opt(LexState *ls, LexToken tok) 1001static int lex_opt(LexState *ls, LexToken tok)
988{ 1002{
989 if (ls->token == tok) { 1003 if (ls->tok == tok) {
990 lj_lex_next(ls); 1004 lj_lex_next(ls);
991 return 1; 1005 return 1;
992 } 1006 }
@@ -996,7 +1010,7 @@ static int lex_opt(LexState *ls, LexToken tok)
996/* Check and consume token. */ 1010/* Check and consume token. */
997static void lex_check(LexState *ls, LexToken tok) 1011static void lex_check(LexState *ls, LexToken tok)
998{ 1012{
999 if (ls->token != tok) 1013 if (ls->tok != tok)
1000 err_token(ls, tok); 1014 err_token(ls, tok);
1001 lj_lex_next(ls); 1015 lj_lex_next(ls);
1002} 1016}
@@ -1010,7 +1024,7 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line)
1010 } else { 1024 } else {
1011 const char *swhat = lj_lex_token2str(ls, what); 1025 const char *swhat = lj_lex_token2str(ls, what);
1012 const char *swho = lj_lex_token2str(ls, who); 1026 const char *swho = lj_lex_token2str(ls, who);
1013 lj_lex_error(ls, ls->token, LJ_ERR_XMATCH, swhat, swho, line); 1027 lj_lex_error(ls, ls->tok, LJ_ERR_XMATCH, swhat, swho, line);
1014 } 1028 }
1015 } 1029 }
1016} 1030}
@@ -1019,9 +1033,9 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line)
1019static GCstr *lex_str(LexState *ls) 1033static GCstr *lex_str(LexState *ls)
1020{ 1034{
1021 GCstr *s; 1035 GCstr *s;
1022 if (ls->token != TK_name && (LJ_52 || ls->token != TK_goto)) 1036 if (ls->tok != TK_name && (LJ_52 || ls->tok != TK_goto))
1023 err_token(ls, TK_name); 1037 err_token(ls, TK_name);
1024 s = strV(&ls->tokenval); 1038 s = strV(&ls->tokval);
1025 lj_lex_next(ls); 1039 lj_lex_next(ls);
1026 return s; 1040 return s;
1027} 1041}
@@ -1041,8 +1055,9 @@ static void var_new(LexState *ls, BCReg n, GCstr *name)
1041 lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK); 1055 lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK);
1042 lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo); 1056 lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo);
1043 } 1057 }
1044 lua_assert((uintptr_t)name < VARNAME__MAX || 1058 lj_assertFS((uintptr_t)name < VARNAME__MAX ||
1045 lj_tab_getstr(fs->kt, name) != NULL); 1059 lj_tab_getstr(fs->kt, name) != NULL,
1060 "unanchored variable name");
1046 /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */ 1061 /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */
1047 setgcref(ls->vstack[vtop].name, obj2gco(name)); 1062 setgcref(ls->vstack[vtop].name, obj2gco(name));
1048 fs->varmap[fs->nactvar+n] = (uint16_t)vtop; 1063 fs->varmap[fs->nactvar+n] = (uint16_t)vtop;
@@ -1097,7 +1112,7 @@ static MSize var_lookup_uv(FuncState *fs, MSize vidx, ExpDesc *e)
1097 return i; /* Already exists. */ 1112 return i; /* Already exists. */
1098 /* Otherwise create a new one. */ 1113 /* Otherwise create a new one. */
1099 checklimit(fs, fs->nuv, LJ_MAX_UPVAL, "upvalues"); 1114 checklimit(fs, fs->nuv, LJ_MAX_UPVAL, "upvalues");
1100 lua_assert(e->k == VLOCAL || e->k == VUPVAL); 1115 lj_assertFS(e->k == VLOCAL || e->k == VUPVAL, "bad expr type %d", e->k);
1101 fs->uvmap[n] = (uint16_t)vidx; 1116 fs->uvmap[n] = (uint16_t)vidx;
1102 fs->uvtmp[n] = (uint16_t)(e->k == VLOCAL ? vidx : LJ_MAX_VSTACK+e->u.s.info); 1117 fs->uvtmp[n] = (uint16_t)(e->k == VLOCAL ? vidx : LJ_MAX_VSTACK+e->u.s.info);
1103 fs->nuv = n+1; 1118 fs->nuv = n+1;
@@ -1148,7 +1163,8 @@ static MSize gola_new(LexState *ls, GCstr *name, uint8_t info, BCPos pc)
1148 lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK); 1163 lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK);
1149 lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo); 1164 lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo);
1150 } 1165 }
1151 lua_assert(name == NAME_BREAK || lj_tab_getstr(fs->kt, name) != NULL); 1166 lj_assertFS(name == NAME_BREAK || lj_tab_getstr(fs->kt, name) != NULL,
1167 "unanchored label name");
1152 /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */ 1168 /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */
1153 setgcref(ls->vstack[vtop].name, obj2gco(name)); 1169 setgcref(ls->vstack[vtop].name, obj2gco(name));
1154 ls->vstack[vtop].startpc = pc; 1170 ls->vstack[vtop].startpc = pc;
@@ -1178,8 +1194,9 @@ static void gola_close(LexState *ls, VarInfo *vg)
1178 FuncState *fs = ls->fs; 1194 FuncState *fs = ls->fs;
1179 BCPos pc = vg->startpc; 1195 BCPos pc = vg->startpc;
1180 BCIns *ip = &fs->bcbase[pc].ins; 1196 BCIns *ip = &fs->bcbase[pc].ins;
1181 lua_assert(gola_isgoto(vg)); 1197 lj_assertFS(gola_isgoto(vg), "expected goto");
1182 lua_assert(bc_op(*ip) == BC_JMP || bc_op(*ip) == BC_UCLO); 1198 lj_assertFS(bc_op(*ip) == BC_JMP || bc_op(*ip) == BC_UCLO,
1199 "bad bytecode op %d", bc_op(*ip));
1183 setbc_a(ip, vg->slot); 1200 setbc_a(ip, vg->slot);
1184 if (bc_op(*ip) == BC_JMP) { 1201 if (bc_op(*ip) == BC_JMP) {
1185 BCPos next = jmp_next(fs, pc); 1202 BCPos next = jmp_next(fs, pc);
@@ -1198,9 +1215,9 @@ static void gola_resolve(LexState *ls, FuncScope *bl, MSize idx)
1198 if (gcrefeq(vg->name, vl->name) && gola_isgoto(vg)) { 1215 if (gcrefeq(vg->name, vl->name) && gola_isgoto(vg)) {
1199 if (vg->slot < vl->slot) { 1216 if (vg->slot < vl->slot) {
1200 GCstr *name = strref(var_get(ls, ls->fs, vg->slot).name); 1217 GCstr *name = strref(var_get(ls, ls->fs, vg->slot).name);
1201 lua_assert((uintptr_t)name >= VARNAME__MAX); 1218 lj_assertLS((uintptr_t)name >= VARNAME__MAX, "expected goto name");
1202 ls->linenumber = ls->fs->bcbase[vg->startpc].line; 1219 ls->linenumber = ls->fs->bcbase[vg->startpc].line;
1203 lua_assert(strref(vg->name) != NAME_BREAK); 1220 lj_assertLS(strref(vg->name) != NAME_BREAK, "unexpected break");
1204 lj_lex_error(ls, 0, LJ_ERR_XGSCOPE, 1221 lj_lex_error(ls, 0, LJ_ERR_XGSCOPE,
1205 strdata(strref(vg->name)), strdata(name)); 1222 strdata(strref(vg->name)), strdata(name));
1206 } 1223 }
@@ -1264,7 +1281,7 @@ static void fscope_begin(FuncState *fs, FuncScope *bl, int flags)
1264 bl->vstart = fs->ls->vtop; 1281 bl->vstart = fs->ls->vtop;
1265 bl->prev = fs->bl; 1282 bl->prev = fs->bl;
1266 fs->bl = bl; 1283 fs->bl = bl;
1267 lua_assert(fs->freereg == fs->nactvar); 1284 lj_assertFS(fs->freereg == fs->nactvar, "bad regalloc");
1268} 1285}
1269 1286
1270/* End a scope. */ 1287/* End a scope. */
@@ -1275,7 +1292,7 @@ static void fscope_end(FuncState *fs)
1275 fs->bl = bl->prev; 1292 fs->bl = bl->prev;
1276 var_remove(ls, bl->nactvar); 1293 var_remove(ls, bl->nactvar);
1277 fs->freereg = fs->nactvar; 1294 fs->freereg = fs->nactvar;
1278 lua_assert(bl->nactvar == fs->nactvar); 1295 lj_assertFS(bl->nactvar == fs->nactvar, "bad regalloc");
1279 if ((bl->flags & (FSCOPE_UPVAL|FSCOPE_NOCLOSE)) == FSCOPE_UPVAL) 1296 if ((bl->flags & (FSCOPE_UPVAL|FSCOPE_NOCLOSE)) == FSCOPE_UPVAL)
1280 bcemit_AJ(fs, BC_UCLO, bl->nactvar, 0); 1297 bcemit_AJ(fs, BC_UCLO, bl->nactvar, 0);
1281 if ((bl->flags & FSCOPE_BREAK)) { 1298 if ((bl->flags & FSCOPE_BREAK)) {
@@ -1310,9 +1327,12 @@ static void fs_fixup_bc(FuncState *fs, GCproto *pt, BCIns *bc, MSize n)
1310{ 1327{
1311 BCInsLine *base = fs->bcbase; 1328 BCInsLine *base = fs->bcbase;
1312 MSize i; 1329 MSize i;
1330 BCIns op;
1313 pt->sizebc = n; 1331 pt->sizebc = n;
1314 bc[0] = BCINS_AD((fs->flags & PROTO_VARARG) ? BC_FUNCV : BC_FUNCF, 1332 if (fs->ls->fr2 != LJ_FR2) op = BC_NOT; /* Mark non-native prototype. */
1315 fs->framesize, 0); 1333 else if ((fs->flags & PROTO_VARARG)) op = BC_FUNCV;
1334 else op = BC_FUNCF;
1335 bc[0] = BCINS_AD(op, fs->framesize, 0);
1316 for (i = 1; i < n; i++) 1336 for (i = 1; i < n; i++)
1317 bc[i] = base[i].ins; 1337 bc[i] = base[i].ins;
1318} 1338}
@@ -1362,13 +1382,13 @@ static void fs_fixup_k(FuncState *fs, GCproto *pt, void *kptr)
1362 Node *n = &node[i]; 1382 Node *n = &node[i];
1363 if (tvhaskslot(&n->val)) { 1383 if (tvhaskslot(&n->val)) {
1364 ptrdiff_t kidx = (ptrdiff_t)tvkslot(&n->val); 1384 ptrdiff_t kidx = (ptrdiff_t)tvkslot(&n->val);
1365 lua_assert(!tvisint(&n->key)); 1385 lj_assertFS(!tvisint(&n->key), "unexpected integer key");
1366 if (tvisnum(&n->key)) { 1386 if (tvisnum(&n->key)) {
1367 TValue *tv = &((TValue *)kptr)[kidx]; 1387 TValue *tv = &((TValue *)kptr)[kidx];
1368 if (LJ_DUALNUM) { 1388 if (LJ_DUALNUM) {
1369 lua_Number nn = numV(&n->key); 1389 lua_Number nn = numV(&n->key);
1370 int32_t k = lj_num2int(nn); 1390 int32_t k = lj_num2int(nn);
1371 lua_assert(!tvismzero(&n->key)); 1391 lj_assertFS(!tvismzero(&n->key), "unexpected -0 key");
1372 if ((lua_Number)k == nn) 1392 if ((lua_Number)k == nn)
1373 setintV(tv, k); 1393 setintV(tv, k);
1374 else 1394 else
@@ -1416,98 +1436,66 @@ static void fs_fixup_line(FuncState *fs, GCproto *pt,
1416 uint8_t *li = (uint8_t *)lineinfo; 1436 uint8_t *li = (uint8_t *)lineinfo;
1417 do { 1437 do {
1418 BCLine delta = base[i].line - first; 1438 BCLine delta = base[i].line - first;
1419 lua_assert(delta >= 0 && delta < 256); 1439 lj_assertFS(delta >= 0 && delta < 256, "bad line delta");
1420 li[i] = (uint8_t)delta; 1440 li[i] = (uint8_t)delta;
1421 } while (++i < n); 1441 } while (++i < n);
1422 } else if (LJ_LIKELY(numline < 65536)) { 1442 } else if (LJ_LIKELY(numline < 65536)) {
1423 uint16_t *li = (uint16_t *)lineinfo; 1443 uint16_t *li = (uint16_t *)lineinfo;
1424 do { 1444 do {
1425 BCLine delta = base[i].line - first; 1445 BCLine delta = base[i].line - first;
1426 lua_assert(delta >= 0 && delta < 65536); 1446 lj_assertFS(delta >= 0 && delta < 65536, "bad line delta");
1427 li[i] = (uint16_t)delta; 1447 li[i] = (uint16_t)delta;
1428 } while (++i < n); 1448 } while (++i < n);
1429 } else { 1449 } else {
1430 uint32_t *li = (uint32_t *)lineinfo; 1450 uint32_t *li = (uint32_t *)lineinfo;
1431 do { 1451 do {
1432 BCLine delta = base[i].line - first; 1452 BCLine delta = base[i].line - first;
1433 lua_assert(delta >= 0); 1453 lj_assertFS(delta >= 0, "bad line delta");
1434 li[i] = (uint32_t)delta; 1454 li[i] = (uint32_t)delta;
1435 } while (++i < n); 1455 } while (++i < n);
1436 } 1456 }
1437} 1457}
1438 1458
1439/* Resize buffer if needed. */
1440static LJ_NOINLINE void fs_buf_resize(LexState *ls, MSize len)
1441{
1442 MSize sz = ls->sb.sz * 2;
1443 while (ls->sb.n + len > sz) sz = sz * 2;
1444 lj_str_resizebuf(ls->L, &ls->sb, sz);
1445}
1446
1447static LJ_AINLINE void fs_buf_need(LexState *ls, MSize len)
1448{
1449 if (LJ_UNLIKELY(ls->sb.n + len > ls->sb.sz))
1450 fs_buf_resize(ls, len);
1451}
1452
1453/* Add string to buffer. */
1454static void fs_buf_str(LexState *ls, const char *str, MSize len)
1455{
1456 char *p = ls->sb.buf + ls->sb.n;
1457 MSize i;
1458 ls->sb.n += len;
1459 for (i = 0; i < len; i++) p[i] = str[i];
1460}
1461
1462/* Add ULEB128 value to buffer. */
1463static void fs_buf_uleb128(LexState *ls, uint32_t v)
1464{
1465 MSize n = ls->sb.n;
1466 uint8_t *p = (uint8_t *)ls->sb.buf;
1467 for (; v >= 0x80; v >>= 7)
1468 p[n++] = (uint8_t)((v & 0x7f) | 0x80);
1469 p[n++] = (uint8_t)v;
1470 ls->sb.n = n;
1471}
1472
1473/* Prepare variable info for prototype. */ 1459/* Prepare variable info for prototype. */
1474static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar) 1460static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar)
1475{ 1461{
1476 VarInfo *vs =ls->vstack, *ve; 1462 VarInfo *vs =ls->vstack, *ve;
1477 MSize i, n; 1463 MSize i, n;
1478 BCPos lastpc; 1464 BCPos lastpc;
1479 lj_str_resetbuf(&ls->sb); /* Copy to temp. string buffer. */ 1465 lj_buf_reset(&ls->sb); /* Copy to temp. string buffer. */
1480 /* Store upvalue names. */ 1466 /* Store upvalue names. */
1481 for (i = 0, n = fs->nuv; i < n; i++) { 1467 for (i = 0, n = fs->nuv; i < n; i++) {
1482 GCstr *s = strref(vs[fs->uvmap[i]].name); 1468 GCstr *s = strref(vs[fs->uvmap[i]].name);
1483 MSize len = s->len+1; 1469 MSize len = s->len+1;
1484 fs_buf_need(ls, len); 1470 char *p = lj_buf_more(&ls->sb, len);
1485 fs_buf_str(ls, strdata(s), len); 1471 p = lj_buf_wmem(p, strdata(s), len);
1472 ls->sb.w = p;
1486 } 1473 }
1487 *ofsvar = ls->sb.n; 1474 *ofsvar = sbuflen(&ls->sb);
1488 lastpc = 0; 1475 lastpc = 0;
1489 /* Store local variable names and compressed ranges. */ 1476 /* Store local variable names and compressed ranges. */
1490 for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) { 1477 for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) {
1491 if (!gola_isgotolabel(vs)) { 1478 if (!gola_isgotolabel(vs)) {
1492 GCstr *s = strref(vs->name); 1479 GCstr *s = strref(vs->name);
1493 BCPos startpc; 1480 BCPos startpc;
1481 char *p;
1494 if ((uintptr_t)s < VARNAME__MAX) { 1482 if ((uintptr_t)s < VARNAME__MAX) {
1495 fs_buf_need(ls, 1 + 2*5); 1483 p = lj_buf_more(&ls->sb, 1 + 2*5);
1496 ls->sb.buf[ls->sb.n++] = (uint8_t)(uintptr_t)s; 1484 *p++ = (char)(uintptr_t)s;
1497 } else { 1485 } else {
1498 MSize len = s->len+1; 1486 MSize len = s->len+1;
1499 fs_buf_need(ls, len + 2*5); 1487 p = lj_buf_more(&ls->sb, len + 2*5);
1500 fs_buf_str(ls, strdata(s), len); 1488 p = lj_buf_wmem(p, strdata(s), len);
1501 } 1489 }
1502 startpc = vs->startpc; 1490 startpc = vs->startpc;
1503 fs_buf_uleb128(ls, startpc-lastpc); 1491 p = lj_strfmt_wuleb128(p, startpc-lastpc);
1504 fs_buf_uleb128(ls, vs->endpc-startpc); 1492 p = lj_strfmt_wuleb128(p, vs->endpc-startpc);
1493 ls->sb.w = p;
1505 lastpc = startpc; 1494 lastpc = startpc;
1506 } 1495 }
1507 } 1496 }
1508 fs_buf_need(ls, 1); 1497 lj_buf_putb(&ls->sb, '\0'); /* Terminator for varinfo. */
1509 ls->sb.buf[ls->sb.n++] = '\0'; /* Terminator for varinfo. */ 1498 return sbuflen(&ls->sb);
1510 return ls->sb.n;
1511} 1499}
1512 1500
1513/* Fixup variable info for prototype. */ 1501/* Fixup variable info for prototype. */
@@ -1515,7 +1503,7 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar)
1515{ 1503{
1516 setmref(pt->uvinfo, p); 1504 setmref(pt->uvinfo, p);
1517 setmref(pt->varinfo, (char *)p + ofsvar); 1505 setmref(pt->varinfo, (char *)p + ofsvar);
1518 memcpy(p, ls->sb.buf, ls->sb.n); /* Copy from temp. string buffer. */ 1506 memcpy(p, ls->sb.b, sbuflen(&ls->sb)); /* Copy from temp. buffer. */
1519} 1507}
1520#else 1508#else
1521 1509
@@ -1552,7 +1540,7 @@ static void fs_fixup_ret(FuncState *fs)
1552 } 1540 }
1553 fs->bl->flags |= FSCOPE_NOCLOSE; /* Handled above. */ 1541 fs->bl->flags |= FSCOPE_NOCLOSE; /* Handled above. */
1554 fscope_end(fs); 1542 fscope_end(fs);
1555 lua_assert(fs->bl == NULL); 1543 lj_assertFS(fs->bl == NULL, "bad scope nesting");
1556 /* May need to fixup returns encoded before first function was created. */ 1544 /* May need to fixup returns encoded before first function was created. */
1557 if (fs->flags & PROTO_FIXUP_RETURN) { 1545 if (fs->flags & PROTO_FIXUP_RETURN) {
1558 BCPos pc; 1546 BCPos pc;
@@ -1624,7 +1612,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line)
1624 L->top--; /* Pop table of constants. */ 1612 L->top--; /* Pop table of constants. */
1625 ls->vtop = fs->vbase; /* Reset variable stack. */ 1613 ls->vtop = fs->vbase; /* Reset variable stack. */
1626 ls->fs = fs->prev; 1614 ls->fs = fs->prev;
1627 lua_assert(ls->fs != NULL || ls->token == TK_eof); 1615 lj_assertL(ls->fs != NULL || ls->tok == TK_eof, "bad parser state");
1628 return pt; 1616 return pt;
1629} 1617}
1630 1618
@@ -1718,15 +1706,15 @@ static void expr_bracket(LexState *ls, ExpDesc *v)
1718} 1706}
1719 1707
1720/* Get value of constant expression. */ 1708/* Get value of constant expression. */
1721static void expr_kvalue(TValue *v, ExpDesc *e) 1709static void expr_kvalue(FuncState *fs, TValue *v, ExpDesc *e)
1722{ 1710{
1711 UNUSED(fs);
1723 if (e->k <= VKTRUE) { 1712 if (e->k <= VKTRUE) {
1724 setitype(v, ~(uint32_t)e->k); 1713 setpriV(v, ~(uint32_t)e->k);
1725 } else if (e->k == VKSTR) { 1714 } else if (e->k == VKSTR) {
1726 setgcref(v->gcr, obj2gco(e->u.sval)); 1715 setgcVraw(v, obj2gco(e->u.sval), LJ_TSTR);
1727 setitype(v, LJ_TSTR);
1728 } else { 1716 } else {
1729 lua_assert(tvisnumber(expr_numtv(e))); 1717 lj_assertFS(tvisnumber(expr_numtv(e)), "bad number constant");
1730 *v = *expr_numtv(e); 1718 *v = *expr_numtv(e);
1731 } 1719 }
1732} 1720}
@@ -1746,15 +1734,15 @@ static void expr_table(LexState *ls, ExpDesc *e)
1746 bcreg_reserve(fs, 1); 1734 bcreg_reserve(fs, 1);
1747 freg++; 1735 freg++;
1748 lex_check(ls, '{'); 1736 lex_check(ls, '{');
1749 while (ls->token != '}') { 1737 while (ls->tok != '}') {
1750 ExpDesc key, val; 1738 ExpDesc key, val;
1751 vcall = 0; 1739 vcall = 0;
1752 if (ls->token == '[') { 1740 if (ls->tok == '[') {
1753 expr_bracket(ls, &key); /* Already calls expr_toval. */ 1741 expr_bracket(ls, &key); /* Already calls expr_toval. */
1754 if (!expr_isk(&key)) expr_index(fs, e, &key); 1742 if (!expr_isk(&key)) expr_index(fs, e, &key);
1755 if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++; 1743 if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++;
1756 lex_check(ls, '='); 1744 lex_check(ls, '=');
1757 } else if ((ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) && 1745 } else if ((ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) &&
1758 lj_lex_lookahead(ls) == '=') { 1746 lj_lex_lookahead(ls) == '=') {
1759 expr_str(ls, &key); 1747 expr_str(ls, &key);
1760 lex_check(ls, '='); 1748 lex_check(ls, '=');
@@ -1776,11 +1764,11 @@ static void expr_table(LexState *ls, ExpDesc *e)
1776 fs->bcbase[pc].ins = BCINS_AD(BC_TDUP, freg-1, kidx); 1764 fs->bcbase[pc].ins = BCINS_AD(BC_TDUP, freg-1, kidx);
1777 } 1765 }
1778 vcall = 0; 1766 vcall = 0;
1779 expr_kvalue(&k, &key); 1767 expr_kvalue(fs, &k, &key);
1780 v = lj_tab_set(fs->L, t, &k); 1768 v = lj_tab_set(fs->L, t, &k);
1781 lj_gc_anybarriert(fs->L, t); 1769 lj_gc_anybarriert(fs->L, t);
1782 if (expr_isk_nojump(&val)) { /* Add const key/value to template table. */ 1770 if (expr_isk_nojump(&val)) { /* Add const key/value to template table. */
1783 expr_kvalue(v, &val); 1771 expr_kvalue(fs, v, &val);
1784 } else { /* Otherwise create dummy string key (avoids lj_tab_newkey). */ 1772 } else { /* Otherwise create dummy string key (avoids lj_tab_newkey). */
1785 settabV(fs->L, v, t); /* Preserve key with table itself as value. */ 1773 settabV(fs->L, v, t); /* Preserve key with table itself as value. */
1786 fixt = 1; /* Fix this later, after all resizes. */ 1774 fixt = 1; /* Fix this later, after all resizes. */
@@ -1799,8 +1787,9 @@ static void expr_table(LexState *ls, ExpDesc *e)
1799 if (vcall) { 1787 if (vcall) {
1800 BCInsLine *ilp = &fs->bcbase[fs->pc-1]; 1788 BCInsLine *ilp = &fs->bcbase[fs->pc-1];
1801 ExpDesc en; 1789 ExpDesc en;
1802 lua_assert(bc_a(ilp->ins) == freg && 1790 lj_assertFS(bc_a(ilp->ins) == freg &&
1803 bc_op(ilp->ins) == (narr > 256 ? BC_TSETV : BC_TSETB)); 1791 bc_op(ilp->ins) == (narr > 256 ? BC_TSETV : BC_TSETB),
1792 "bad CALL code generation");
1804 expr_init(&en, VKNUM, 0); 1793 expr_init(&en, VKNUM, 0);
1805 en.u.nval.u32.lo = narr-1; 1794 en.u.nval.u32.lo = narr-1;
1806 en.u.nval.u32.hi = 0x43300000; /* Biased integer to avoid denormals. */ 1795 en.u.nval.u32.hi = 0x43300000; /* Biased integer to avoid denormals. */
@@ -1830,7 +1819,7 @@ static void expr_table(LexState *ls, ExpDesc *e)
1830 for (i = 0; i <= hmask; i++) { 1819 for (i = 0; i <= hmask; i++) {
1831 Node *n = &node[i]; 1820 Node *n = &node[i];
1832 if (tvistab(&n->val)) { 1821 if (tvistab(&n->val)) {
1833 lua_assert(tabV(&n->val) == t); 1822 lj_assertFS(tabV(&n->val) == t, "bad dummy key in template table");
1834 setnilV(&n->val); /* Turn value into nil. */ 1823 setnilV(&n->val); /* Turn value into nil. */
1835 } 1824 }
1836 } 1825 }
@@ -1847,11 +1836,11 @@ static BCReg parse_params(LexState *ls, int needself)
1847 lex_check(ls, '('); 1836 lex_check(ls, '(');
1848 if (needself) 1837 if (needself)
1849 var_new_lit(ls, nparams++, "self"); 1838 var_new_lit(ls, nparams++, "self");
1850 if (ls->token != ')') { 1839 if (ls->tok != ')') {
1851 do { 1840 do {
1852 if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { 1841 if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) {
1853 var_new(ls, nparams++, lex_str(ls)); 1842 var_new(ls, nparams++, lex_str(ls));
1854 } else if (ls->token == TK_dots) { 1843 } else if (ls->tok == TK_dots) {
1855 lj_lex_next(ls); 1844 lj_lex_next(ls);
1856 fs->flags |= PROTO_VARARG; 1845 fs->flags |= PROTO_VARARG;
1857 break; 1846 break;
@@ -1861,7 +1850,7 @@ static BCReg parse_params(LexState *ls, int needself)
1861 } while (lex_opt(ls, ',')); 1850 } while (lex_opt(ls, ','));
1862 } 1851 }
1863 var_add(ls, nparams); 1852 var_add(ls, nparams);
1864 lua_assert(fs->nactvar == nparams); 1853 lj_assertFS(fs->nactvar == nparams, "bad regalloc");
1865 bcreg_reserve(fs, nparams); 1854 bcreg_reserve(fs, nparams);
1866 lex_check(ls, ')'); 1855 lex_check(ls, ')');
1867 return nparams; 1856 return nparams;
@@ -1885,7 +1874,7 @@ static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line)
1885 fs.bclim = pfs->bclim - pfs->pc; 1874 fs.bclim = pfs->bclim - pfs->pc;
1886 bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */ 1875 bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */
1887 parse_chunk(ls); 1876 parse_chunk(ls);
1888 if (ls->token != TK_end) lex_match(ls, TK_end, TK_function, line); 1877 if (ls->tok != TK_end) lex_match(ls, TK_end, TK_function, line);
1889 pt = fs_finish(ls, (ls->lastline = ls->linenumber)); 1878 pt = fs_finish(ls, (ls->lastline = ls->linenumber));
1890 pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */ 1879 pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */
1891 pfs->bclim = (BCPos)(ls->sizebcstack - oldbase); 1880 pfs->bclim = (BCPos)(ls->sizebcstack - oldbase);
@@ -1924,13 +1913,13 @@ static void parse_args(LexState *ls, ExpDesc *e)
1924 BCIns ins; 1913 BCIns ins;
1925 BCReg base; 1914 BCReg base;
1926 BCLine line = ls->linenumber; 1915 BCLine line = ls->linenumber;
1927 if (ls->token == '(') { 1916 if (ls->tok == '(') {
1928#if !LJ_52 1917#if !LJ_52
1929 if (line != ls->lastline) 1918 if (line != ls->lastline)
1930 err_syntax(ls, LJ_ERR_XAMBIG); 1919 err_syntax(ls, LJ_ERR_XAMBIG);
1931#endif 1920#endif
1932 lj_lex_next(ls); 1921 lj_lex_next(ls);
1933 if (ls->token == ')') { /* f(). */ 1922 if (ls->tok == ')') { /* f(). */
1934 args.k = VVOID; 1923 args.k = VVOID;
1935 } else { 1924 } else {
1936 expr_list(ls, &args); 1925 expr_list(ls, &args);
@@ -1938,24 +1927,24 @@ static void parse_args(LexState *ls, ExpDesc *e)
1938 setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */ 1927 setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */
1939 } 1928 }
1940 lex_match(ls, ')', '(', line); 1929 lex_match(ls, ')', '(', line);
1941 } else if (ls->token == '{') { 1930 } else if (ls->tok == '{') {
1942 expr_table(ls, &args); 1931 expr_table(ls, &args);
1943 } else if (ls->token == TK_string) { 1932 } else if (ls->tok == TK_string) {
1944 expr_init(&args, VKSTR, 0); 1933 expr_init(&args, VKSTR, 0);
1945 args.u.sval = strV(&ls->tokenval); 1934 args.u.sval = strV(&ls->tokval);
1946 lj_lex_next(ls); 1935 lj_lex_next(ls);
1947 } else { 1936 } else {
1948 err_syntax(ls, LJ_ERR_XFUNARG); 1937 err_syntax(ls, LJ_ERR_XFUNARG);
1949 return; /* Silence compiler. */ 1938 return; /* Silence compiler. */
1950 } 1939 }
1951 lua_assert(e->k == VNONRELOC); 1940 lj_assertFS(e->k == VNONRELOC, "bad expr type %d", e->k);
1952 base = e->u.s.info; /* Base register for call. */ 1941 base = e->u.s.info; /* Base register for call. */
1953 if (args.k == VCALL) { 1942 if (args.k == VCALL) {
1954 ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1); 1943 ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1 - ls->fr2);
1955 } else { 1944 } else {
1956 if (args.k != VVOID) 1945 if (args.k != VVOID)
1957 expr_tonextreg(fs, &args); 1946 expr_tonextreg(fs, &args);
1958 ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base); 1947 ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base - ls->fr2);
1959 } 1948 }
1960 expr_init(e, VCALL, bcemit_INS(fs, ins)); 1949 expr_init(e, VCALL, bcemit_INS(fs, ins));
1961 e->u.s.aux = base; 1950 e->u.s.aux = base;
@@ -1968,33 +1957,34 @@ static void expr_primary(LexState *ls, ExpDesc *v)
1968{ 1957{
1969 FuncState *fs = ls->fs; 1958 FuncState *fs = ls->fs;
1970 /* Parse prefix expression. */ 1959 /* Parse prefix expression. */
1971 if (ls->token == '(') { 1960 if (ls->tok == '(') {
1972 BCLine line = ls->linenumber; 1961 BCLine line = ls->linenumber;
1973 lj_lex_next(ls); 1962 lj_lex_next(ls);
1974 expr(ls, v); 1963 expr(ls, v);
1975 lex_match(ls, ')', '(', line); 1964 lex_match(ls, ')', '(', line);
1976 expr_discharge(ls->fs, v); 1965 expr_discharge(ls->fs, v);
1977 } else if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { 1966 } else if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) {
1978 var_lookup(ls, v); 1967 var_lookup(ls, v);
1979 } else { 1968 } else {
1980 err_syntax(ls, LJ_ERR_XSYMBOL); 1969 err_syntax(ls, LJ_ERR_XSYMBOL);
1981 } 1970 }
1982 for (;;) { /* Parse multiple expression suffixes. */ 1971 for (;;) { /* Parse multiple expression suffixes. */
1983 if (ls->token == '.') { 1972 if (ls->tok == '.') {
1984 expr_field(ls, v); 1973 expr_field(ls, v);
1985 } else if (ls->token == '[') { 1974 } else if (ls->tok == '[') {
1986 ExpDesc key; 1975 ExpDesc key;
1987 expr_toanyreg(fs, v); 1976 expr_toanyreg(fs, v);
1988 expr_bracket(ls, &key); 1977 expr_bracket(ls, &key);
1989 expr_index(fs, v, &key); 1978 expr_index(fs, v, &key);
1990 } else if (ls->token == ':') { 1979 } else if (ls->tok == ':') {
1991 ExpDesc key; 1980 ExpDesc key;
1992 lj_lex_next(ls); 1981 lj_lex_next(ls);
1993 expr_str(ls, &key); 1982 expr_str(ls, &key);
1994 bcemit_method(fs, v, &key); 1983 bcemit_method(fs, v, &key);
1995 parse_args(ls, v); 1984 parse_args(ls, v);
1996 } else if (ls->token == '(' || ls->token == TK_string || ls->token == '{') { 1985 } else if (ls->tok == '(' || ls->tok == TK_string || ls->tok == '{') {
1997 expr_tonextreg(fs, v); 1986 expr_tonextreg(fs, v);
1987 if (ls->fr2) bcreg_reserve(fs, 1);
1998 parse_args(ls, v); 1988 parse_args(ls, v);
1999 } else { 1989 } else {
2000 break; 1990 break;
@@ -2005,14 +1995,14 @@ static void expr_primary(LexState *ls, ExpDesc *v)
2005/* Parse simple expression. */ 1995/* Parse simple expression. */
2006static void expr_simple(LexState *ls, ExpDesc *v) 1996static void expr_simple(LexState *ls, ExpDesc *v)
2007{ 1997{
2008 switch (ls->token) { 1998 switch (ls->tok) {
2009 case TK_number: 1999 case TK_number:
2010 expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokenval)) ? VKCDATA : VKNUM, 0); 2000 expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokval)) ? VKCDATA : VKNUM, 0);
2011 copyTV(ls->L, &v->u.nval, &ls->tokenval); 2001 copyTV(ls->L, &v->u.nval, &ls->tokval);
2012 break; 2002 break;
2013 case TK_string: 2003 case TK_string:
2014 expr_init(v, VKSTR, 0); 2004 expr_init(v, VKSTR, 0);
2015 v->u.sval = strV(&ls->tokenval); 2005 v->u.sval = strV(&ls->tokval);
2016 break; 2006 break;
2017 case TK_nil: 2007 case TK_nil:
2018 expr_init(v, VKNIL, 0); 2008 expr_init(v, VKNIL, 0);
@@ -2100,11 +2090,11 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit);
2100static void expr_unop(LexState *ls, ExpDesc *v) 2090static void expr_unop(LexState *ls, ExpDesc *v)
2101{ 2091{
2102 BCOp op; 2092 BCOp op;
2103 if (ls->token == TK_not) { 2093 if (ls->tok == TK_not) {
2104 op = BC_NOT; 2094 op = BC_NOT;
2105 } else if (ls->token == '-') { 2095 } else if (ls->tok == '-') {
2106 op = BC_UNM; 2096 op = BC_UNM;
2107 } else if (ls->token == '#') { 2097 } else if (ls->tok == '#') {
2108 op = BC_LEN; 2098 op = BC_LEN;
2109 } else { 2099 } else {
2110 expr_simple(ls, v); 2100 expr_simple(ls, v);
@@ -2121,7 +2111,7 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit)
2121 BinOpr op; 2111 BinOpr op;
2122 synlevel_begin(ls); 2112 synlevel_begin(ls);
2123 expr_unop(ls, v); 2113 expr_unop(ls, v);
2124 op = token2binop(ls->token); 2114 op = token2binop(ls->tok);
2125 while (op != OPR_NOBINOPR && priority[op].left > limit) { 2115 while (op != OPR_NOBINOPR && priority[op].left > limit) {
2126 ExpDesc v2; 2116 ExpDesc v2;
2127 BinOpr nextop; 2117 BinOpr nextop;
@@ -2310,9 +2300,9 @@ static void parse_func(LexState *ls, BCLine line)
2310 lj_lex_next(ls); /* Skip 'function'. */ 2300 lj_lex_next(ls); /* Skip 'function'. */
2311 /* Parse function name. */ 2301 /* Parse function name. */
2312 var_lookup(ls, &v); 2302 var_lookup(ls, &v);
2313 while (ls->token == '.') /* Multiple dot-separated fields. */ 2303 while (ls->tok == '.') /* Multiple dot-separated fields. */
2314 expr_field(ls, &v); 2304 expr_field(ls, &v);
2315 if (ls->token == ':') { /* Optional colon to signify method call. */ 2305 if (ls->tok == ':') { /* Optional colon to signify method call. */
2316 needself = 1; 2306 needself = 1;
2317 expr_field(ls, &v); 2307 expr_field(ls, &v);
2318 } 2308 }
@@ -2325,9 +2315,9 @@ static void parse_func(LexState *ls, BCLine line)
2325/* -- Control transfer statements ----------------------------------------- */ 2315/* -- Control transfer statements ----------------------------------------- */
2326 2316
2327/* Check for end of block. */ 2317/* Check for end of block. */
2328static int endofblock(LexToken token) 2318static int parse_isend(LexToken tok)
2329{ 2319{
2330 switch (token) { 2320 switch (tok) {
2331 case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof: 2321 case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof:
2332 return 1; 2322 return 1;
2333 default: 2323 default:
@@ -2342,18 +2332,22 @@ static void parse_return(LexState *ls)
2342 FuncState *fs = ls->fs; 2332 FuncState *fs = ls->fs;
2343 lj_lex_next(ls); /* Skip 'return'. */ 2333 lj_lex_next(ls); /* Skip 'return'. */
2344 fs->flags |= PROTO_HAS_RETURN; 2334 fs->flags |= PROTO_HAS_RETURN;
2345 if (endofblock(ls->token) || ls->token == ';') { /* Bare return. */ 2335 if (parse_isend(ls->tok) || ls->tok == ';') { /* Bare return. */
2346 ins = BCINS_AD(BC_RET0, 0, 1); 2336 ins = BCINS_AD(BC_RET0, 0, 1);
2347 } else { /* Return with one or more values. */ 2337 } else { /* Return with one or more values. */
2348 ExpDesc e; /* Receives the _last_ expression in the list. */ 2338 ExpDesc e; /* Receives the _last_ expression in the list. */
2349 BCReg nret = expr_list(ls, &e); 2339 BCReg nret = expr_list(ls, &e);
2350 if (nret == 1) { /* Return one result. */ 2340 if (nret == 1) { /* Return one result. */
2351 if (e.k == VCALL) { /* Check for tail call. */ 2341 if (e.k == VCALL) { /* Check for tail call. */
2342#ifdef LUAJIT_DISABLE_TAILCALL
2343 goto notailcall;
2344#else
2352 BCIns *ip = bcptr(fs, &e); 2345 BCIns *ip = bcptr(fs, &e);
2353 /* It doesn't pay off to add BC_VARGT just for 'return ...'. */ 2346 /* It doesn't pay off to add BC_VARGT just for 'return ...'. */
2354 if (bc_op(*ip) == BC_VARG) goto notailcall; 2347 if (bc_op(*ip) == BC_VARG) goto notailcall;
2355 fs->pc--; 2348 fs->pc--;
2356 ins = BCINS_AD(bc_op(*ip)-BC_CALL+BC_CALLT, bc_a(*ip), bc_c(*ip)); 2349 ins = BCINS_AD(bc_op(*ip)-BC_CALL+BC_CALLT, bc_a(*ip), bc_c(*ip));
2350#endif
2357 } else { /* Can return the result from any register. */ 2351 } else { /* Can return the result from any register. */
2358 ins = BCINS_AD(BC_RET1, expr_toanyreg(fs, &e), 2); 2352 ins = BCINS_AD(BC_RET1, expr_toanyreg(fs, &e), 2);
2359 } 2353 }
@@ -2408,18 +2402,18 @@ static void parse_label(LexState *ls)
2408 lex_check(ls, TK_label); 2402 lex_check(ls, TK_label);
2409 /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */ 2403 /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */
2410 for (;;) { 2404 for (;;) {
2411 if (ls->token == TK_label) { 2405 if (ls->tok == TK_label) {
2412 synlevel_begin(ls); 2406 synlevel_begin(ls);
2413 parse_label(ls); 2407 parse_label(ls);
2414 synlevel_end(ls); 2408 synlevel_end(ls);
2415 } else if (LJ_52 && ls->token == ';') { 2409 } else if (LJ_52 && ls->tok == ';') {
2416 lj_lex_next(ls); 2410 lj_lex_next(ls);
2417 } else { 2411 } else {
2418 break; 2412 break;
2419 } 2413 }
2420 } 2414 }
2421 /* Trailing label is considered to be outside of scope. */ 2415 /* Trailing label is considered to be outside of scope. */
2422 if (endofblock(ls->token) && ls->token != TK_until) 2416 if (parse_isend(ls->tok) && ls->tok != TK_until)
2423 ls->vstack[idx].slot = fs->bl->nactvar; 2417 ls->vstack[idx].slot = fs->bl->nactvar;
2424 gola_resolve(ls, fs->bl, idx); 2418 gola_resolve(ls, fs->bl, idx);
2425} 2419}
@@ -2576,7 +2570,8 @@ static void parse_for_iter(LexState *ls, GCstr *indexname)
2576 lex_check(ls, TK_in); 2570 lex_check(ls, TK_in);
2577 line = ls->linenumber; 2571 line = ls->linenumber;
2578 assign_adjust(ls, 3, expr_list(ls, &e), &e); 2572 assign_adjust(ls, 3, expr_list(ls, &e), &e);
2579 bcreg_bump(fs, 3); /* The iterator needs another 3 slots (func + 2 args). */ 2573 /* The iterator needs another 3 [4] slots (func [pc] | state ctl). */
2574 bcreg_bump(fs, 3+ls->fr2);
2580 isnext = (nvars <= 5 && fs->pc > exprpc && predict_next(ls, fs, exprpc)); 2575 isnext = (nvars <= 5 && fs->pc > exprpc && predict_next(ls, fs, exprpc));
2581 var_add(ls, 3); /* Hidden control variables. */ 2576 var_add(ls, 3); /* Hidden control variables. */
2582 lex_check(ls, TK_do); 2577 lex_check(ls, TK_do);
@@ -2604,9 +2599,9 @@ static void parse_for(LexState *ls, BCLine line)
2604 fscope_begin(fs, &bl, FSCOPE_LOOP); 2599 fscope_begin(fs, &bl, FSCOPE_LOOP);
2605 lj_lex_next(ls); /* Skip 'for'. */ 2600 lj_lex_next(ls); /* Skip 'for'. */
2606 varname = lex_str(ls); /* Get first variable name. */ 2601 varname = lex_str(ls); /* Get first variable name. */
2607 if (ls->token == '=') 2602 if (ls->tok == '=')
2608 parse_for_num(ls, varname, line); 2603 parse_for_num(ls, varname, line);
2609 else if (ls->token == ',' || ls->token == TK_in) 2604 else if (ls->tok == ',' || ls->tok == TK_in)
2610 parse_for_iter(ls, varname); 2605 parse_for_iter(ls, varname);
2611 else 2606 else
2612 err_syntax(ls, LJ_ERR_XFOR); 2607 err_syntax(ls, LJ_ERR_XFOR);
@@ -2632,12 +2627,12 @@ static void parse_if(LexState *ls, BCLine line)
2632 BCPos flist; 2627 BCPos flist;
2633 BCPos escapelist = NO_JMP; 2628 BCPos escapelist = NO_JMP;
2634 flist = parse_then(ls); 2629 flist = parse_then(ls);
2635 while (ls->token == TK_elseif) { /* Parse multiple 'elseif' blocks. */ 2630 while (ls->tok == TK_elseif) { /* Parse multiple 'elseif' blocks. */
2636 jmp_append(fs, &escapelist, bcemit_jmp(fs)); 2631 jmp_append(fs, &escapelist, bcemit_jmp(fs));
2637 jmp_tohere(fs, flist); 2632 jmp_tohere(fs, flist);
2638 flist = parse_then(ls); 2633 flist = parse_then(ls);
2639 } 2634 }
2640 if (ls->token == TK_else) { /* Parse optional 'else' block. */ 2635 if (ls->tok == TK_else) { /* Parse optional 'else' block. */
2641 jmp_append(fs, &escapelist, bcemit_jmp(fs)); 2636 jmp_append(fs, &escapelist, bcemit_jmp(fs));
2642 jmp_tohere(fs, flist); 2637 jmp_tohere(fs, flist);
2643 lj_lex_next(ls); /* Skip 'else'. */ 2638 lj_lex_next(ls); /* Skip 'else'. */
@@ -2655,7 +2650,7 @@ static void parse_if(LexState *ls, BCLine line)
2655static int parse_stmt(LexState *ls) 2650static int parse_stmt(LexState *ls)
2656{ 2651{
2657 BCLine line = ls->linenumber; 2652 BCLine line = ls->linenumber;
2658 switch (ls->token) { 2653 switch (ls->tok) {
2659 case TK_if: 2654 case TK_if:
2660 parse_if(ls, line); 2655 parse_if(ls, line);
2661 break; 2656 break;
@@ -2714,11 +2709,12 @@ static void parse_chunk(LexState *ls)
2714{ 2709{
2715 int islast = 0; 2710 int islast = 0;
2716 synlevel_begin(ls); 2711 synlevel_begin(ls);
2717 while (!islast && !endofblock(ls->token)) { 2712 while (!islast && !parse_isend(ls->tok)) {
2718 islast = parse_stmt(ls); 2713 islast = parse_stmt(ls);
2719 lex_opt(ls, ';'); 2714 lex_opt(ls, ';');
2720 lua_assert(ls->fs->framesize >= ls->fs->freereg && 2715 lj_assertLS(ls->fs->framesize >= ls->fs->freereg &&
2721 ls->fs->freereg >= ls->fs->nactvar); 2716 ls->fs->freereg >= ls->fs->nactvar,
2717 "bad regalloc");
2722 ls->fs->freereg = ls->fs->nactvar; /* Free registers after each stmt. */ 2718 ls->fs->freereg = ls->fs->nactvar; /* Free registers after each stmt. */
2723 } 2719 }
2724 synlevel_end(ls); 2720 synlevel_end(ls);
@@ -2749,13 +2745,12 @@ GCproto *lj_parse(LexState *ls)
2749 bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */ 2745 bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */
2750 lj_lex_next(ls); /* Read-ahead first token. */ 2746 lj_lex_next(ls); /* Read-ahead first token. */
2751 parse_chunk(ls); 2747 parse_chunk(ls);
2752 if (ls->token != TK_eof) 2748 if (ls->tok != TK_eof)
2753 err_token(ls, TK_eof); 2749 err_token(ls, TK_eof);
2754 pt = fs_finish(ls, ls->linenumber); 2750 pt = fs_finish(ls, ls->linenumber);
2755 L->top--; /* Drop chunkname. */ 2751 L->top--; /* Drop chunkname. */
2756 lua_assert(fs.prev == NULL); 2752 lj_assertL(fs.prev == NULL && ls->fs == NULL, "mismatched frame nesting");
2757 lua_assert(ls->fs == NULL); 2753 lj_assertL(pt->sizeuv == 0, "toplevel proto has upvalues");
2758 lua_assert(pt->sizeuv == 0);
2759 return pt; 2754 return pt;
2760} 2755}
2761 2756
diff --git a/src/lj_prng.c b/src/lj_prng.c
new file mode 100644
index 00000000..326b41e6
--- /dev/null
+++ b/src/lj_prng.c
@@ -0,0 +1,259 @@
1/*
2** Pseudo-random number generation.
3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_prng_c
7#define LUA_CORE
8
9/* To get the syscall prototype. */
10#if defined(__linux__) && !defined(_GNU_SOURCE)
11#define _GNU_SOURCE
12#endif
13
14#include "lj_def.h"
15#include "lj_arch.h"
16#include "lj_prng.h"
17
18/* -- PRNG step function -------------------------------------------------- */
19
20/* This implements a Tausworthe PRNG with period 2^223. Based on:
21** Tables of maximally-equidistributed combined LFSR generators,
22** Pierre L'Ecuyer, 1991, table 3, 1st entry.
23** Full-period ME-CF generator with L=64, J=4, k=223, N1=49.
24**
25** Important note: This PRNG is NOT suitable for cryptographic use!
26**
27** But it works fine for math.random(), which has an API that's not
28** suitable for cryptography, anyway.
29**
30** When used as a securely seeded global PRNG, it substantially raises
31** the difficulty for various attacks on the VM.
32*/
33
34/* Update generator i and compute a running xor of all states. */
35#define TW223_GEN(rs, z, r, i, k, q, s) \
36 z = rs->u[i]; \
37 z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \
38 r ^= z; rs->u[i] = z;
39
40#define TW223_STEP(rs, z, r) \
41 TW223_GEN(rs, z, r, 0, 63, 31, 18) \
42 TW223_GEN(rs, z, r, 1, 58, 19, 28) \
43 TW223_GEN(rs, z, r, 2, 55, 24, 7) \
44 TW223_GEN(rs, z, r, 3, 47, 21, 8)
45
46/* PRNG step function with uint64_t result. */
47LJ_NOINLINE uint64_t LJ_FASTCALL lj_prng_u64(PRNGState *rs)
48{
49 uint64_t z, r = 0;
50 TW223_STEP(rs, z, r)
51 return r;
52}
53
54/* PRNG step function with double in uint64_t result. */
55LJ_NOINLINE uint64_t LJ_FASTCALL lj_prng_u64d(PRNGState *rs)
56{
57 uint64_t z, r = 0;
58 TW223_STEP(rs, z, r)
59 /* Returns a double bit pattern in the range 1.0 <= d < 2.0. */
60 return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000);
61}
62
63/* Condition seed: ensure k[i] MSB of u[i] are non-zero. */
64static LJ_AINLINE void lj_prng_condition(PRNGState *rs)
65{
66 if (rs->u[0] < (1u << 1)) rs->u[0] += (1u << 1);
67 if (rs->u[1] < (1u << 6)) rs->u[1] += (1u << 6);
68 if (rs->u[2] < (1u << 9)) rs->u[2] += (1u << 9);
69 if (rs->u[3] < (1u << 17)) rs->u[3] += (1u << 17);
70}
71
72/* -- PRNG seeding from OS ------------------------------------------------ */
73
74#if LUAJIT_SECURITY_PRNG == 0
75
76/* Nothing to define. */
77
78#elif LJ_TARGET_XBOX360
79
80extern int XNetRandom(void *buf, unsigned int len);
81
82#elif LJ_TARGET_PS3
83
84extern int sys_get_random_number(void *buf, uint64_t len);
85
86#elif LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA
87
88extern int sceRandomGetRandomNumber(void *buf, size_t len);
89
90#elif LJ_TARGET_NX
91
92#include <unistd.h>
93
94#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOXONE
95
96#define WIN32_LEAN_AND_MEAN
97#include <windows.h>
98
99#if LJ_TARGET_UWP || LJ_TARGET_XBOXONE
100/* Must use BCryptGenRandom. */
101#include <bcrypt.h>
102#pragma comment(lib, "bcrypt.lib")
103#else
104/* If you wonder about this mess, then search online for RtlGenRandom. */
105typedef BOOLEAN (WINAPI *PRGR)(void *buf, ULONG len);
106static PRGR libfunc_rgr;
107#endif
108
109#elif LJ_TARGET_POSIX
110
111#if LJ_TARGET_LINUX
112/* Avoid a dependency on glibc 2.25+ and use the getrandom syscall instead. */
113#include <sys/syscall.h>
114#else
115
116#if LJ_TARGET_OSX && !LJ_TARGET_IOS
117/*
118** In their infinite wisdom Apple decided to disallow getentropy() in the
119** iOS App Store. Even though the call is common to all BSD-ish OS, it's
120** recommended by Apple in their own security-related docs, and, to top
121** off the foolery, /dev/urandom is handled by the same kernel code,
122** yet accessing it is actually permitted (but less efficient).
123*/
124#include <Availability.h>
125#if __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200
126#define LJ_TARGET_HAS_GETENTROPY 1
127#endif
128#elif (LJ_TARGET_BSD && !defined(__NetBSD__)) || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN || LJ_TARGET_QNX
129#define LJ_TARGET_HAS_GETENTROPY 1
130#endif
131
132#if LJ_TARGET_HAS_GETENTROPY
133extern int getentropy(void *buf, size_t len)
134#ifdef __ELF__
135 __attribute__((weak))
136#endif
137;
138#endif
139
140#endif
141
142/* For the /dev/urandom fallback. */
143#include <fcntl.h>
144#include <unistd.h>
145
146#endif
147
148#if LUAJIT_SECURITY_PRNG == 0
149
150/* If you really don't care about security, then define
151** LUAJIT_SECURITY_PRNG=0. This yields a predictable seed
152** and provides NO SECURITY against various attacks on the VM.
153**
154** BTW: This is NOT the way to get predictable table iteration,
155** predictable trace generation, predictable bytecode generation, etc.
156*/
157int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs)
158{
159 lj_prng_seed_fixed(rs); /* The fixed seed is already conditioned. */
160 return 1;
161}
162
163#else
164
165/* Securely seed PRNG from system entropy. Returns 0 on failure. */
166int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs)
167{
168#if LJ_TARGET_XBOX360
169
170 if (XNetRandom(rs->u, (unsigned int)sizeof(rs->u)) == 0)
171 goto ok;
172
173#elif LJ_TARGET_PS3
174
175 if (sys_get_random_number(rs->u, sizeof(rs->u)) == 0)
176 goto ok;
177
178#elif LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA
179
180 if (sceRandomGetRandomNumber(rs->u, sizeof(rs->u)) == 0)
181 goto ok;
182
183#elif LJ_TARGET_NX
184
185 if (getentropy(rs->u, sizeof(rs->u)) == 0)
186 goto ok;
187
188#elif LJ_TARGET_UWP || LJ_TARGET_XBOXONE
189
190 if (BCryptGenRandom(NULL, (PUCHAR)(rs->u), (ULONG)sizeof(rs->u),
191 BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0)
192 goto ok;
193
194#elif LJ_TARGET_WINDOWS
195
196 /* Keep the library loaded in case multiple VMs are started. */
197 if (!libfunc_rgr) {
198 HMODULE lib = LJ_WIN_LOADLIBA("advapi32.dll");
199 if (!lib) return 0;
200 libfunc_rgr = (PRGR)GetProcAddress(lib, "SystemFunction036");
201 if (!libfunc_rgr) return 0;
202 }
203 if (libfunc_rgr(rs->u, (ULONG)sizeof(rs->u)))
204 goto ok;
205
206#elif LJ_TARGET_POSIX
207
208#if LJ_TARGET_LINUX && defined(SYS_getrandom)
209
210 if (syscall(SYS_getrandom, rs->u, sizeof(rs->u), 0) == (long)sizeof(rs->u))
211 goto ok;
212
213#elif LJ_TARGET_HAS_GETENTROPY
214
215#ifdef __ELF__
216 if (&getentropy && getentropy(rs->u, sizeof(rs->u)) == 0)
217 goto ok;
218#else
219 if (getentropy(rs->u, sizeof(rs->u)) == 0)
220 goto ok;
221#endif
222
223#endif
224
225 /* Fallback to /dev/urandom. This may fail if the device is not
226 ** existent or accessible in a chroot or container, or if the process
227 ** or the OS ran out of file descriptors.
228 */
229 {
230 int fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC);
231 if (fd != -1) {
232 ssize_t n = read(fd, rs->u, sizeof(rs->u));
233 (void)close(fd);
234 if (n == (ssize_t)sizeof(rs->u))
235 goto ok;
236 }
237 }
238
239#else
240
241 /* Add an elif above for your OS with a secure PRNG seed.
242 ** Note that fiddling around with rand(), getpid(), time() or coercing
243 ** ASLR to yield a few bits of randomness is not helpful.
244 ** If you don't want any security, then don't pretend you have any
245 ** and simply define LUAJIT_SECURITY_PRNG=0 for the build.
246 */
247#error "Missing secure PRNG seed for this OS"
248
249#endif
250 return 0; /* Fail. */
251
252ok:
253 lj_prng_condition(rs);
254 (void)lj_prng_u64(rs);
255 return 1; /* Success. */
256}
257
258#endif
259
diff --git a/src/lj_prng.h b/src/lj_prng.h
new file mode 100644
index 00000000..3dd9dbc0
--- /dev/null
+++ b/src/lj_prng.h
@@ -0,0 +1,24 @@
1/*
2** Pseudo-random number generation.
3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_PRNG_H
7#define _LJ_PRNG_H
8
9#include "lj_def.h"
10
11LJ_FUNC int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs);
12LJ_FUNC uint64_t LJ_FASTCALL lj_prng_u64(PRNGState *rs);
13LJ_FUNC uint64_t LJ_FASTCALL lj_prng_u64d(PRNGState *rs);
14
15/* This is just the precomputed result of lib_math.c:random_seed(rs, 0.0). */
16static LJ_AINLINE void lj_prng_seed_fixed(PRNGState *rs)
17{
18 rs->u[0] = U64x(a0d27757,0a345b8c);
19 rs->u[1] = U64x(764a296c,5d4aa64f);
20 rs->u[2] = U64x(51220704,070adeaa);
21 rs->u[3] = U64x(2a2717b5,a7b7b927);
22}
23
24#endif
diff --git a/src/lj_profile.c b/src/lj_profile.c
new file mode 100644
index 00000000..8cefd5fb
--- /dev/null
+++ b/src/lj_profile.c
@@ -0,0 +1,371 @@
1/*
2** Low-overhead profiling.
3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_profile_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASPROFILE
12
13#include "lj_buf.h"
14#include "lj_frame.h"
15#include "lj_debug.h"
16#include "lj_dispatch.h"
17#if LJ_HASJIT
18#include "lj_jit.h"
19#include "lj_trace.h"
20#endif
21#include "lj_profile.h"
22
23#include "luajit.h"
24
25#if LJ_PROFILE_SIGPROF
26
27#include <sys/time.h>
28#include <signal.h>
29#define profile_lock(ps) UNUSED(ps)
30#define profile_unlock(ps) UNUSED(ps)
31
32#elif LJ_PROFILE_PTHREAD
33
34#include <pthread.h>
35#include <time.h>
36#if LJ_TARGET_PS3
37#include <sys/timer.h>
38#endif
39#define profile_lock(ps) pthread_mutex_lock(&ps->lock)
40#define profile_unlock(ps) pthread_mutex_unlock(&ps->lock)
41
42#elif LJ_PROFILE_WTHREAD
43
44#define WIN32_LEAN_AND_MEAN
45#if LJ_TARGET_XBOX360
46#include <xtl.h>
47#include <xbox.h>
48#else
49#include <windows.h>
50#endif
51typedef unsigned int (WINAPI *WMM_TPFUNC)(unsigned int);
52#define profile_lock(ps) EnterCriticalSection(&ps->lock)
53#define profile_unlock(ps) LeaveCriticalSection(&ps->lock)
54
55#endif
56
57/* Profiler state. */
58typedef struct ProfileState {
59 global_State *g; /* VM state that started the profiler. */
60 luaJIT_profile_callback cb; /* Profiler callback. */
61 void *data; /* Profiler callback data. */
62 SBuf sb; /* String buffer for stack dumps. */
63 int interval; /* Sample interval in milliseconds. */
64 int samples; /* Number of samples for next callback. */
65 int vmstate; /* VM state when profile timer triggered. */
66#if LJ_PROFILE_SIGPROF
67 struct sigaction oldsa; /* Previous SIGPROF state. */
68#elif LJ_PROFILE_PTHREAD
69 pthread_mutex_t lock; /* g->hookmask update lock. */
70 pthread_t thread; /* Timer thread. */
71 int abort; /* Abort timer thread. */
72#elif LJ_PROFILE_WTHREAD
73#if LJ_TARGET_WINDOWS
74 HINSTANCE wmm; /* WinMM library handle. */
75 WMM_TPFUNC wmm_tbp; /* WinMM timeBeginPeriod function. */
76 WMM_TPFUNC wmm_tep; /* WinMM timeEndPeriod function. */
77#endif
78 CRITICAL_SECTION lock; /* g->hookmask update lock. */
79 HANDLE thread; /* Timer thread. */
80 int abort; /* Abort timer thread. */
81#endif
82} ProfileState;
83
84/* Sadly, we have to use a static profiler state.
85**
86** The SIGPROF variant needs a static pointer to the global state, anyway.
87** And it would be hard to extend for multiple threads. You can still use
88** multiple VMs in multiple threads, but only profile one at a time.
89*/
90static ProfileState profile_state;
91
92/* Default sample interval in milliseconds. */
93#define LJ_PROFILE_INTERVAL_DEFAULT 10
94
95/* -- Profiler/hook interaction ------------------------------------------- */
96
97#if !LJ_PROFILE_SIGPROF
98void LJ_FASTCALL lj_profile_hook_enter(global_State *g)
99{
100 ProfileState *ps = &profile_state;
101 if (ps->g) {
102 profile_lock(ps);
103 hook_enter(g);
104 profile_unlock(ps);
105 } else {
106 hook_enter(g);
107 }
108}
109
110void LJ_FASTCALL lj_profile_hook_leave(global_State *g)
111{
112 ProfileState *ps = &profile_state;
113 if (ps->g) {
114 profile_lock(ps);
115 hook_leave(g);
116 profile_unlock(ps);
117 } else {
118 hook_leave(g);
119 }
120}
121#endif
122
123/* -- Profile callbacks --------------------------------------------------- */
124
125/* Callback from profile hook (HOOK_PROFILE already cleared). */
126void LJ_FASTCALL lj_profile_interpreter(lua_State *L)
127{
128 ProfileState *ps = &profile_state;
129 global_State *g = G(L);
130 uint8_t mask;
131 profile_lock(ps);
132 mask = (g->hookmask & ~HOOK_PROFILE);
133 if (!(mask & HOOK_VMEVENT)) {
134 int samples = ps->samples;
135 ps->samples = 0;
136 g->hookmask = HOOK_VMEVENT;
137 lj_dispatch_update(g);
138 profile_unlock(ps);
139 ps->cb(ps->data, L, samples, ps->vmstate); /* Invoke user callback. */
140 profile_lock(ps);
141 mask |= (g->hookmask & HOOK_PROFILE);
142 }
143 g->hookmask = mask;
144 lj_dispatch_update(g);
145 profile_unlock(ps);
146}
147
148/* Trigger profile hook. Asynchronous call from OS-specific profile timer. */
149static void profile_trigger(ProfileState *ps)
150{
151 global_State *g = ps->g;
152 uint8_t mask;
153 profile_lock(ps);
154 ps->samples++; /* Always increment number of samples. */
155 mask = g->hookmask;
156 if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT|HOOK_GC))) { /* Set profile hook. */
157 int st = g->vmstate;
158 ps->vmstate = st >= 0 ? 'N' :
159 st == ~LJ_VMST_INTERP ? 'I' :
160 st == ~LJ_VMST_C ? 'C' :
161 st == ~LJ_VMST_GC ? 'G' : 'J';
162 g->hookmask = (mask | HOOK_PROFILE);
163 lj_dispatch_update(g);
164 }
165 profile_unlock(ps);
166}
167
168/* -- OS-specific profile timer handling ---------------------------------- */
169
170#if LJ_PROFILE_SIGPROF
171
172/* SIGPROF handler. */
173static void profile_signal(int sig)
174{
175 UNUSED(sig);
176 profile_trigger(&profile_state);
177}
178
179/* Start profiling timer. */
180static void profile_timer_start(ProfileState *ps)
181{
182 int interval = ps->interval;
183 struct itimerval tm;
184 struct sigaction sa;
185 tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000;
186 tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000;
187 setitimer(ITIMER_PROF, &tm, NULL);
188#if LJ_TARGET_QNX
189 sa.sa_flags = 0;
190#else
191 sa.sa_flags = SA_RESTART;
192#endif
193 sa.sa_handler = profile_signal;
194 sigemptyset(&sa.sa_mask);
195 sigaction(SIGPROF, &sa, &ps->oldsa);
196}
197
198/* Stop profiling timer. */
199static void profile_timer_stop(ProfileState *ps)
200{
201 struct itimerval tm;
202 tm.it_value.tv_sec = tm.it_interval.tv_sec = 0;
203 tm.it_value.tv_usec = tm.it_interval.tv_usec = 0;
204 setitimer(ITIMER_PROF, &tm, NULL);
205 sigaction(SIGPROF, &ps->oldsa, NULL);
206}
207
208#elif LJ_PROFILE_PTHREAD
209
210/* POSIX timer thread. */
211static void *profile_thread(ProfileState *ps)
212{
213 int interval = ps->interval;
214#if !LJ_TARGET_PS3
215 struct timespec ts;
216 ts.tv_sec = interval / 1000;
217 ts.tv_nsec = (interval % 1000) * 1000000;
218#endif
219 while (1) {
220#if LJ_TARGET_PS3
221 sys_timer_usleep(interval * 1000);
222#else
223 nanosleep(&ts, NULL);
224#endif
225 if (ps->abort) break;
226 profile_trigger(ps);
227 }
228 return NULL;
229}
230
231/* Start profiling timer thread. */
232static void profile_timer_start(ProfileState *ps)
233{
234 pthread_mutex_init(&ps->lock, 0);
235 ps->abort = 0;
236 pthread_create(&ps->thread, NULL, (void *(*)(void *))profile_thread, ps);
237}
238
239/* Stop profiling timer thread. */
240static void profile_timer_stop(ProfileState *ps)
241{
242 ps->abort = 1;
243 pthread_join(ps->thread, NULL);
244 pthread_mutex_destroy(&ps->lock);
245}
246
247#elif LJ_PROFILE_WTHREAD
248
249/* Windows timer thread. */
250static DWORD WINAPI profile_thread(void *psx)
251{
252 ProfileState *ps = (ProfileState *)psx;
253 int interval = ps->interval;
254#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
255 ps->wmm_tbp(interval);
256#endif
257 while (1) {
258 Sleep(interval);
259 if (ps->abort) break;
260 profile_trigger(ps);
261 }
262#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
263 ps->wmm_tep(interval);
264#endif
265 return 0;
266}
267
268/* Start profiling timer thread. */
269static void profile_timer_start(ProfileState *ps)
270{
271#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
272 if (!ps->wmm) { /* Load WinMM library on-demand. */
273 ps->wmm = LJ_WIN_LOADLIBA("winmm.dll");
274 if (ps->wmm) {
275 ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeBeginPeriod");
276 ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeEndPeriod");
277 if (!ps->wmm_tbp || !ps->wmm_tep) {
278 ps->wmm = NULL;
279 return;
280 }
281 }
282 }
283#endif
284 InitializeCriticalSection(&ps->lock);
285 ps->abort = 0;
286 ps->thread = CreateThread(NULL, 0, profile_thread, ps, 0, NULL);
287}
288
289/* Stop profiling timer thread. */
290static void profile_timer_stop(ProfileState *ps)
291{
292 ps->abort = 1;
293 WaitForSingleObject(ps->thread, INFINITE);
294 DeleteCriticalSection(&ps->lock);
295}
296
297#endif
298
299/* -- Public profiling API ------------------------------------------------ */
300
301/* Start profiling. */
302LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
303 luaJIT_profile_callback cb, void *data)
304{
305 ProfileState *ps = &profile_state;
306 int interval = LJ_PROFILE_INTERVAL_DEFAULT;
307 while (*mode) {
308 int m = *mode++;
309 switch (m) {
310 case 'i':
311 interval = 0;
312 while (*mode >= '0' && *mode <= '9')
313 interval = interval * 10 + (*mode++ - '0');
314 if (interval <= 0) interval = 1;
315 break;
316#if LJ_HASJIT
317 case 'l': case 'f':
318 L2J(L)->prof_mode = m;
319 lj_trace_flushall(L);
320 break;
321#endif
322 default: /* Ignore unknown mode chars. */
323 break;
324 }
325 }
326 if (ps->g) {
327 luaJIT_profile_stop(L);
328 if (ps->g) return; /* Profiler in use by another VM. */
329 }
330 ps->g = G(L);
331 ps->interval = interval;
332 ps->cb = cb;
333 ps->data = data;
334 ps->samples = 0;
335 lj_buf_init(L, &ps->sb);
336 profile_timer_start(ps);
337}
338
339/* Stop profiling. */
340LUA_API void luaJIT_profile_stop(lua_State *L)
341{
342 ProfileState *ps = &profile_state;
343 global_State *g = ps->g;
344 if (G(L) == g) { /* Only stop profiler if started by this VM. */
345 profile_timer_stop(ps);
346 g->hookmask &= ~HOOK_PROFILE;
347 lj_dispatch_update(g);
348#if LJ_HASJIT
349 G2J(g)->prof_mode = 0;
350 lj_trace_flushall(L);
351#endif
352 lj_buf_free(g, &ps->sb);
353 ps->sb.w = ps->sb.e = NULL;
354 ps->g = NULL;
355 }
356}
357
358/* Return a compact stack dump. */
359LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
360 int depth, size_t *len)
361{
362 ProfileState *ps = &profile_state;
363 SBuf *sb = &ps->sb;
364 setsbufL(sb, L);
365 lj_buf_reset(sb);
366 lj_debug_dumpstack(L, sb, fmt, depth);
367 *len = (size_t)sbuflen(sb);
368 return sb->b;
369}
370
371#endif
diff --git a/src/lj_profile.h b/src/lj_profile.h
new file mode 100644
index 00000000..68bb9a1f
--- /dev/null
+++ b/src/lj_profile.h
@@ -0,0 +1,21 @@
1/*
2** Low-overhead profiling.
3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_PROFILE_H
7#define _LJ_PROFILE_H
8
9#include "lj_obj.h"
10
11#if LJ_HASPROFILE
12
13LJ_FUNC void LJ_FASTCALL lj_profile_interpreter(lua_State *L);
14#if !LJ_PROFILE_SIGPROF
15LJ_FUNC void LJ_FASTCALL lj_profile_hook_enter(global_State *g);
16LJ_FUNC void LJ_FASTCALL lj_profile_hook_leave(global_State *g);
17#endif
18
19#endif
20
21#endif
diff --git a/src/lj_record.c b/src/lj_record.c
index af12e256..fedd47a6 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -20,6 +20,9 @@
20#endif 20#endif
21#include "lj_bc.h" 21#include "lj_bc.h"
22#include "lj_ff.h" 22#include "lj_ff.h"
23#if LJ_HASPROFILE
24#include "lj_debug.h"
25#endif
23#include "lj_ir.h" 26#include "lj_ir.h"
24#include "lj_jit.h" 27#include "lj_jit.h"
25#include "lj_ircall.h" 28#include "lj_ircall.h"
@@ -30,6 +33,7 @@
30#include "lj_snap.h" 33#include "lj_snap.h"
31#include "lj_dispatch.h" 34#include "lj_dispatch.h"
32#include "lj_vm.h" 35#include "lj_vm.h"
36#include "lj_prng.h"
33 37
34/* Some local macros to save typing. Undef'd at the end. */ 38/* Some local macros to save typing. Undef'd at the end. */
35#define IR(ref) (&J->cur.ir[(ref)]) 39#define IR(ref) (&J->cur.ir[(ref)])
@@ -47,31 +51,52 @@
47static void rec_check_ir(jit_State *J) 51static void rec_check_ir(jit_State *J)
48{ 52{
49 IRRef i, nins = J->cur.nins, nk = J->cur.nk; 53 IRRef i, nins = J->cur.nins, nk = J->cur.nk;
50 lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536); 54 lj_assertJ(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536,
51 for (i = nins-1; i >= nk; i--) { 55 "inconsistent IR layout");
56 for (i = nk; i < nins; i++) {
52 IRIns *ir = IR(i); 57 IRIns *ir = IR(i);
53 uint32_t mode = lj_ir_mode[ir->o]; 58 uint32_t mode = lj_ir_mode[ir->o];
54 IRRef op1 = ir->op1; 59 IRRef op1 = ir->op1;
55 IRRef op2 = ir->op2; 60 IRRef op2 = ir->op2;
61 const char *err = NULL;
56 switch (irm_op1(mode)) { 62 switch (irm_op1(mode)) {
57 case IRMnone: lua_assert(op1 == 0); break; 63 case IRMnone:
58 case IRMref: lua_assert(op1 >= nk); 64 if (op1 != 0) err = "IRMnone op1 used";
59 lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break; 65 break;
66 case IRMref:
67 if (op1 < nk || (i >= REF_BIAS ? op1 >= i : op1 <= i))
68 err = "IRMref op1 out of range";
69 break;
60 case IRMlit: break; 70 case IRMlit: break;
61 case IRMcst: lua_assert(i < REF_BIAS); continue; 71 case IRMcst:
72 if (i >= REF_BIAS) { err = "constant in IR range"; break; }
73 if (irt_is64(ir->t) && ir->o != IR_KNULL)
74 i++;
75 continue;
62 } 76 }
63 switch (irm_op2(mode)) { 77 switch (irm_op2(mode)) {
64 case IRMnone: lua_assert(op2 == 0); break; 78 case IRMnone:
65 case IRMref: lua_assert(op2 >= nk); 79 if (op2) err = "IRMnone op2 used";
66 lua_assert(i >= REF_BIAS ? op2 < i : op2 > i); break; 80 break;
81 case IRMref:
82 if (op2 < nk || (i >= REF_BIAS ? op2 >= i : op2 <= i))
83 err = "IRMref op2 out of range";
84 break;
67 case IRMlit: break; 85 case IRMlit: break;
68 case IRMcst: lua_assert(0); break; 86 case IRMcst: err = "IRMcst op2"; break;
69 } 87 }
70 if (ir->prev) { 88 if (!err && ir->prev) {
71 lua_assert(ir->prev >= nk); 89 if (ir->prev < nk || (i >= REF_BIAS ? ir->prev >= i : ir->prev <= i))
72 lua_assert(i >= REF_BIAS ? ir->prev < i : ir->prev > i); 90 err = "chain out of range";
73 lua_assert(ir->o == IR_NOP || IR(ir->prev)->o == ir->o); 91 else if (ir->o != IR_NOP && IR(ir->prev)->o != ir->o)
92 err = "chain to different op";
74 } 93 }
94 lj_assertJ(!err, "bad IR %04d op %d(%04d,%04d): %s",
95 i-REF_BIAS,
96 ir->o,
97 irm_op1(mode) == IRMref ? op1-REF_BIAS : op1,
98 irm_op2(mode) == IRMref ? op2-REF_BIAS : op2,
99 err);
75 } 100 }
76} 101}
77 102
@@ -81,48 +106,80 @@ static void rec_check_slots(jit_State *J)
81 BCReg s, nslots = J->baseslot + J->maxslot; 106 BCReg s, nslots = J->baseslot + J->maxslot;
82 int32_t depth = 0; 107 int32_t depth = 0;
83 cTValue *base = J->L->base - J->baseslot; 108 cTValue *base = J->L->base - J->baseslot;
84 lua_assert(J->baseslot >= 1); 109 lj_assertJ(J->baseslot >= 1+LJ_FR2, "bad baseslot");
85 lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME)); 110 lj_assertJ(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME),
86 lua_assert(nslots <= LJ_MAX_JSLOTS); 111 "baseslot does not point to frame");
112 lj_assertJ(nslots <= LJ_MAX_JSLOTS, "slot overflow");
87 for (s = 0; s < nslots; s++) { 113 for (s = 0; s < nslots; s++) {
88 TRef tr = J->slot[s]; 114 TRef tr = J->slot[s];
89 if (tr) { 115 if (tr) {
90 cTValue *tv = &base[s]; 116 cTValue *tv = &base[s];
91 IRRef ref = tref_ref(tr); 117 IRRef ref = tref_ref(tr);
92 IRIns *ir; 118 IRIns *ir = NULL; /* Silence compiler. */
93 lua_assert(ref >= J->cur.nk && ref < J->cur.nins); 119 lj_assertJ(tv < J->L->top, "slot %d above top of Lua stack", s);
94 ir = IR(ref); 120 if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) {
95 lua_assert(irt_t(ir->t) == tref_t(tr)); 121 lj_assertJ(ref >= J->cur.nk && ref < J->cur.nins,
122 "slot %d ref %04d out of range", s, ref - REF_BIAS);
123 ir = IR(ref);
124 lj_assertJ(irt_t(ir->t) == tref_t(tr), "slot %d IR type mismatch", s);
125 }
96 if (s == 0) { 126 if (s == 0) {
97 lua_assert(tref_isfunc(tr)); 127 lj_assertJ(tref_isfunc(tr), "frame slot 0 is not a function");
128#if LJ_FR2
129 } else if (s == 1) {
130 lj_assertJ((tr & ~TREF_FRAME) == 0, "bad frame slot 1");
131#endif
98 } else if ((tr & TREF_FRAME)) { 132 } else if ((tr & TREF_FRAME)) {
99 GCfunc *fn = gco2func(frame_gc(tv)); 133 GCfunc *fn = gco2func(frame_gc(tv));
100 BCReg delta = (BCReg)(tv - frame_prev(tv)); 134 BCReg delta = (BCReg)(tv - frame_prev(tv));
101 lua_assert(tref_isfunc(tr)); 135#if LJ_FR2
102 if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); 136 lj_assertJ(!ref || ir_knum(ir)->u64 == tv->u64,
103 lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta)); 137 "frame slot %d PC mismatch", s);
138 tr = J->slot[s-1];
139 ir = IR(tref_ref(tr));
140#endif
141 lj_assertJ(tref_isfunc(tr),
142 "frame slot %d is not a function", s-LJ_FR2);
143 lj_assertJ(!tref_isk(tr) || fn == ir_kfunc(ir),
144 "frame slot %d function mismatch", s-LJ_FR2);
145 lj_assertJ(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME)
146 : (s == delta + LJ_FR2),
147 "frame slot %d broken chain", s-LJ_FR2);
104 depth++; 148 depth++;
105 } else if ((tr & TREF_CONT)) { 149 } else if ((tr & TREF_CONT)) {
106 lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); 150#if LJ_FR2
107 lua_assert((J->slot[s+1] & TREF_FRAME)); 151 lj_assertJ(!ref || ir_knum(ir)->u64 == tv->u64,
152 "cont slot %d continuation mismatch", s);
153#else
154 lj_assertJ(ir_kptr(ir) == gcrefp(tv->gcr, void),
155 "cont slot %d continuation mismatch", s);
156#endif
157 lj_assertJ((J->slot[s+1+LJ_FR2] & TREF_FRAME),
158 "cont slot %d not followed by frame", s);
108 depth++; 159 depth++;
160 } else if ((tr & TREF_KEYINDEX)) {
161 lj_assertJ(tref_isint(tr), "keyindex slot %d bad type %d",
162 s, tref_type(tr));
109 } else { 163 } else {
110 if (tvisnumber(tv)) 164 /* Number repr. may differ, but other types must be the same. */
111 lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */ 165 lj_assertJ(tvisnumber(tv) ? tref_isnumber(tr) :
112 else 166 itype2irt(tv) == tref_type(tr),
113 lua_assert(itype2irt(tv) == tref_type(tr)); 167 "slot %d type mismatch: stack type %d vs IR type %d",
168 s, itypemap(tv), tref_type(tr));
114 if (tref_isk(tr)) { /* Compare constants. */ 169 if (tref_isk(tr)) { /* Compare constants. */
115 TValue tvk; 170 TValue tvk;
116 lj_ir_kvalue(J->L, &tvk, ir); 171 lj_ir_kvalue(J->L, &tvk, ir);
117 if (!(tvisnum(&tvk) && tvisnan(&tvk))) 172 lj_assertJ((tvisnum(&tvk) && tvisnan(&tvk)) ?
118 lua_assert(lj_obj_equal(tv, &tvk)); 173 (tvisnum(tv) && tvisnan(tv)) :
119 else 174 lj_obj_equal(tv, &tvk),
120 lua_assert(tvisnum(tv) && tvisnan(tv)); 175 "slot %d const mismatch: stack %016llx vs IR %016llx",
176 s, tv->u64, tvk.u64);
121 } 177 }
122 } 178 }
123 } 179 }
124 } 180 }
125 lua_assert(J->framedepth == depth); 181 lj_assertJ(J->framedepth == depth,
182 "frame depth mismatch %d vs %d", J->framedepth, depth);
126} 183}
127#endif 184#endif
128 185
@@ -156,10 +213,11 @@ static TRef sload(jit_State *J, int32_t slot)
156/* Get TRef for current function. */ 213/* Get TRef for current function. */
157static TRef getcurrf(jit_State *J) 214static TRef getcurrf(jit_State *J)
158{ 215{
159 if (J->base[-1]) 216 if (J->base[-1-LJ_FR2])
160 return J->base[-1]; 217 return J->base[-1-LJ_FR2];
161 lua_assert(J->baseslot == 1); 218 /* Non-base frame functions ought to be loaded already. */
162 return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY); 219 lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot");
220 return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY);
163} 221}
164 222
165/* Compare for raw object equality. 223/* Compare for raw object equality.
@@ -205,6 +263,14 @@ TRef lj_record_constify(jit_State *J, cTValue *o)
205 return 0; /* Can't represent lightuserdata (pointless). */ 263 return 0; /* Can't represent lightuserdata (pointless). */
206} 264}
207 265
266/* Emit a VLOAD with the correct type. */
267TRef lj_record_vload(jit_State *J, TRef ref, MSize idx, IRType t)
268{
269 TRef tr = emitir(IRTG(IR_VLOAD, t), ref, idx);
270 if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
271 return tr;
272}
273
208/* -- Record loop ops ----------------------------------------------------- */ 274/* -- Record loop ops ----------------------------------------------------- */
209 275
210/* Loop event. */ 276/* Loop event. */
@@ -221,17 +287,21 @@ static void canonicalize_slots(jit_State *J)
221 if (LJ_DUALNUM) return; 287 if (LJ_DUALNUM) return;
222 for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { 288 for (s = J->baseslot+J->maxslot-1; s >= 1; s--) {
223 TRef tr = J->slot[s]; 289 TRef tr = J->slot[s];
224 if (tref_isinteger(tr)) { 290 if (tref_isinteger(tr) && !(tr & TREF_KEYINDEX)) {
225 IRIns *ir = IR(tref_ref(tr)); 291 IRIns *ir = IR(tref_ref(tr));
226 if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY))) 292 if (!(ir->o == IR_SLOAD && (ir->op2 & (IRSLOAD_READONLY))))
227 J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT); 293 J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT);
228 } 294 }
229 } 295 }
230} 296}
231 297
232/* Stop recording. */ 298/* Stop recording. */
233static void rec_stop(jit_State *J, TraceLink linktype, TraceNo lnk) 299void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk)
234{ 300{
301#ifdef LUAJIT_ENABLE_TABLE_BUMP
302 if (J->retryrec)
303 lj_trace_err(J, LJ_TRERR_RETRY);
304#endif
235 lj_trace_end(J); 305 lj_trace_end(J);
236 J->cur.linktype = (uint8_t)linktype; 306 J->cur.linktype = (uint8_t)linktype;
237 J->cur.link = (uint16_t)lnk; 307 J->cur.link = (uint16_t)lnk;
@@ -399,7 +469,8 @@ static void rec_for_loop(jit_State *J, const BCIns *fori, ScEvEntry *scev,
399 TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode); 469 TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode);
400 TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode); 470 TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode);
401 int tc, dir = rec_for_direction(&tv[FORL_STEP]); 471 int tc, dir = rec_for_direction(&tv[FORL_STEP]);
402 lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI); 472 lj_assertJ(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI,
473 "bad bytecode %d instead of FORI/JFORI", bc_op(*fori));
403 scev->t.irt = t; 474 scev->t.irt = t;
404 scev->dir = dir; 475 scev->dir = dir;
405 scev->stop = tref_ref(stop); 476 scev->stop = tref_ref(stop);
@@ -455,7 +526,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
455 IRT_NUM; 526 IRT_NUM;
456 for (i = FORL_IDX; i <= FORL_STEP; i++) { 527 for (i = FORL_IDX; i <= FORL_STEP; i++) {
457 if (!tr[i]) sload(J, ra+i); 528 if (!tr[i]) sload(J, ra+i);
458 lua_assert(tref_isnumber_str(tr[i])); 529 lj_assertJ(tref_isnumber_str(tr[i]), "bad FORI argument type");
459 if (tref_isstr(tr[i])) 530 if (tref_isstr(tr[i]))
460 tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0); 531 tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0);
461 if (t == IRT_INT) { 532 if (t == IRT_INT) {
@@ -499,8 +570,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
499static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) 570static LoopEvent rec_iterl(jit_State *J, const BCIns iterins)
500{ 571{
501 BCReg ra = bc_a(iterins); 572 BCReg ra = bc_a(iterins);
502 lua_assert(J->base[ra] != 0); 573 if (!tref_isnil(getslot(J, ra))) { /* Looping back? */
503 if (!tref_isnil(J->base[ra])) { /* Looping back? */
504 J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ 574 J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */
505 J->maxslot = ra-1+bc_b(J->pc[-1]); 575 J->maxslot = ra-1+bc_b(J->pc[-1]);
506 J->pc += bc_j(iterins)+1; 576 J->pc += bc_j(iterins)+1;
@@ -538,12 +608,13 @@ static int innerloopleft(jit_State *J, const BCIns *pc)
538/* Handle the case when an interpreted loop op is hit. */ 608/* Handle the case when an interpreted loop op is hit. */
539static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) 609static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
540{ 610{
541 if (J->parent == 0) { 611 if (J->parent == 0 && J->exitno == 0) {
542 if (pc == J->startpc && J->framedepth + J->retdepth == 0) { 612 if (pc == J->startpc && J->framedepth + J->retdepth == 0) {
613 if (bc_op(J->cur.startins) == BC_ITERN) return; /* See rec_itern(). */
543 /* Same loop? */ 614 /* Same loop? */
544 if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ 615 if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */
545 lj_trace_err(J, LJ_TRERR_LLEAVE); 616 lj_trace_err(J, LJ_TRERR_LLEAVE);
546 rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping root trace. */ 617 lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */
547 } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */ 618 } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */
548 /* It's usually better to abort here and wait until the inner loop 619 /* It's usually better to abort here and wait until the inner loop
549 ** is traced. But if the inner loop repeatedly didn't loop back, 620 ** is traced. But if the inner loop repeatedly didn't loop back,
@@ -568,18 +639,136 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
568/* Handle the case when an already compiled loop op is hit. */ 639/* Handle the case when an already compiled loop op is hit. */
569static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) 640static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev)
570{ 641{
571 if (J->parent == 0) { /* Root trace hit an inner loop. */ 642 if (J->parent == 0 && J->exitno == 0) { /* Root trace hit an inner loop. */
572 /* Better let the inner loop spawn a side trace back here. */ 643 /* Better let the inner loop spawn a side trace back here. */
573 lj_trace_err(J, LJ_TRERR_LINNER); 644 lj_trace_err(J, LJ_TRERR_LINNER);
574 } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */ 645 } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */
575 J->instunroll = 0; /* Cannot continue across a compiled loop op. */ 646 J->instunroll = 0; /* Cannot continue across a compiled loop op. */
576 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) 647 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0)
577 rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form an extra loop. */ 648 lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form extra loop. */
578 else 649 else
579 rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */ 650 lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */
580 } /* Side trace continues across a loop that's left or not entered. */ 651 } /* Side trace continues across a loop that's left or not entered. */
581} 652}
582 653
654/* Record ITERN. */
655static LoopEvent rec_itern(jit_State *J, BCReg ra, BCReg rb)
656{
657#if LJ_BE
658 /* YAGNI: Disabled on big-endian due to issues with lj_vm_next,
659 ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair.
660 */
661 UNUSED(ra); UNUSED(rb);
662 setintV(&J->errinfo, (int32_t)BC_ITERN);
663 lj_trace_err_info(J, LJ_TRERR_NYIBC);
664#else
665 RecordIndex ix;
666 /* Since ITERN is recorded at the start, we need our own loop detection. */
667 if (J->pc == J->startpc &&
668 J->framedepth + J->retdepth == 0 && J->parent == 0 && J->exitno == 0) {
669 IRRef ref = REF_FIRST + LJ_HASPROFILE;
670#ifdef LUAJIT_ENABLE_CHECKHOOK
671 ref += 3;
672#endif
673 if (J->cur.nins > ref ||
674 (LJ_HASPROFILE && J->cur.nins == ref && J->cur.ir[ref-1].o != IR_PROF)) {
675 J->instunroll = 0; /* Cannot continue unrolling across an ITERN. */
676 lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */
677 return LOOPEV_ENTER;
678 }
679 }
680 J->maxslot = ra;
681 lj_snap_add(J); /* Required to make JLOOP the first ins in a side-trace. */
682 ix.tab = getslot(J, ra-2);
683 ix.key = J->base[ra-1] ? J->base[ra-1] :
684 sloadt(J, (int32_t)(ra-1), IRT_GUARD|IRT_INT,
685 IRSLOAD_TYPECHECK|IRSLOAD_KEYINDEX);
686 copyTV(J->L, &ix.tabv, &J->L->base[ra-2]);
687 copyTV(J->L, &ix.keyv, &J->L->base[ra-1]);
688 ix.idxchain = (rb < 3); /* Omit value type check, if unused. */
689 ix.mobj = 1; /* We need the next index, too. */
690 J->maxslot = ra + lj_record_next(J, &ix);
691 J->needsnap = 1;
692 if (!tref_isnil(ix.key)) { /* Looping back? */
693 J->base[ra-1] = ix.mobj | TREF_KEYINDEX; /* Control var has next index. */
694 J->base[ra] = ix.key;
695 J->base[ra+1] = ix.val;
696 J->pc += bc_j(J->pc[1])+2;
697 return LOOPEV_ENTER;
698 } else {
699 J->maxslot = ra-3;
700 J->pc += 2;
701 return LOOPEV_LEAVE;
702 }
703#endif
704}
705
706/* Record ISNEXT. */
707static void rec_isnext(jit_State *J, BCReg ra)
708{
709 cTValue *b = &J->L->base[ra-3];
710 if (tvisfunc(b) && funcV(b)->c.ffid == FF_next &&
711 tvistab(b+1) && tvisnil(b+2)) {
712 /* These checks are folded away for a compiled pairs(). */
713 TRef func = getslot(J, ra-3);
714 TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), func, IRFL_FUNC_FFID);
715 emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, FF_next));
716 (void)getslot(J, ra-2); /* Type check for table. */
717 (void)getslot(J, ra-1); /* Type check for nil key. */
718 J->base[ra-1] = lj_ir_kint(J, 0) | TREF_KEYINDEX;
719 J->maxslot = ra;
720 } else { /* Abort trace. Interpreter will despecialize bytecode. */
721 lj_trace_err(J, LJ_TRERR_RECERR);
722 }
723}
724
725/* -- Record profiler hook checks ----------------------------------------- */
726
727#if LJ_HASPROFILE
728
729/* Need to insert profiler hook check? */
730static int rec_profile_need(jit_State *J, GCproto *pt, const BCIns *pc)
731{
732 GCproto *ppt;
733 lj_assertJ(J->prof_mode == 'f' || J->prof_mode == 'l',
734 "bad profiler mode %c", J->prof_mode);
735 if (!pt)
736 return 0;
737 ppt = J->prev_pt;
738 J->prev_pt = pt;
739 if (pt != ppt && ppt) {
740 J->prev_line = -1;
741 return 1;
742 }
743 if (J->prof_mode == 'l') {
744 BCLine line = lj_debug_line(pt, proto_bcpos(pt, pc));
745 BCLine pline = J->prev_line;
746 J->prev_line = line;
747 if (pline != line)
748 return 1;
749 }
750 return 0;
751}
752
753static void rec_profile_ins(jit_State *J, const BCIns *pc)
754{
755 if (J->prof_mode && rec_profile_need(J, J->pt, pc)) {
756 emitir(IRTG(IR_PROF, IRT_NIL), 0, 0);
757 lj_snap_add(J);
758 }
759}
760
761static void rec_profile_ret(jit_State *J)
762{
763 if (J->prof_mode == 'f') {
764 emitir(IRTG(IR_PROF, IRT_NIL), 0, 0);
765 J->prev_pt = NULL;
766 lj_snap_add(J);
767 }
768}
769
770#endif
771
583/* -- Record calls and returns -------------------------------------------- */ 772/* -- Record calls and returns -------------------------------------------- */
584 773
585/* Specialize to the runtime value of the called function or its prototype. */ 774/* Specialize to the runtime value of the called function or its prototype. */
@@ -590,11 +779,26 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr)
590 GCproto *pt = funcproto(fn); 779 GCproto *pt = funcproto(fn);
591 /* Too many closures created? Probably not a monomorphic function. */ 780 /* Too many closures created? Probably not a monomorphic function. */
592 if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */ 781 if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */
593 TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC); 782 TRef trpt = emitir(IRT(IR_FLOAD, IRT_PGC), tr, IRFL_FUNC_PC);
594 emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt))); 783 emitir(IRTG(IR_EQ, IRT_PGC), trpt, lj_ir_kptr(J, proto_bc(pt)));
595 (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ 784 (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */
596 return tr; 785 return tr;
597 } 786 }
787 } else {
788 /* Don't specialize to non-monomorphic builtins. */
789 switch (fn->c.ffid) {
790 case FF_coroutine_wrap_aux:
791 case FF_string_gmatch_aux:
792 /* NYI: io_file_iter doesn't have an ffid, yet. */
793 { /* Specialize to the ffid. */
794 TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), tr, IRFL_FUNC_FFID);
795 emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, fn->c.ffid));
796 }
797 return tr;
798 default:
799 /* NYI: don't specialize to non-monomorphic C functions. */
800 break;
801 }
598 } 802 }
599 /* Otherwise specialize to the function (closure) value itself. */ 803 /* Otherwise specialize to the function (closure) value itself. */
600 kfunc = lj_ir_kfunc(J, fn); 804 kfunc = lj_ir_kfunc(J, fn);
@@ -607,21 +811,31 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs)
607{ 811{
608 RecordIndex ix; 812 RecordIndex ix;
609 TValue *functv = &J->L->base[func]; 813 TValue *functv = &J->L->base[func];
610 TRef *fbase = &J->base[func]; 814 TRef kfunc, *fbase = &J->base[func];
611 ptrdiff_t i; 815 ptrdiff_t i;
612 for (i = 0; i <= nargs; i++) 816 (void)getslot(J, func); /* Ensure func has a reference. */
613 (void)getslot(J, func+i); /* Ensure func and all args have a reference. */ 817 for (i = 1; i <= nargs; i++)
818 (void)getslot(J, func+LJ_FR2+i); /* Ensure all args have a reference. */
614 if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ 819 if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */
615 ix.tab = fbase[0]; 820 ix.tab = fbase[0];
616 copyTV(J->L, &ix.tabv, functv); 821 copyTV(J->L, &ix.tabv, functv);
617 if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) 822 if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj))
618 lj_trace_err(J, LJ_TRERR_NOMM); 823 lj_trace_err(J, LJ_TRERR_NOMM);
619 for (i = ++nargs; i > 0; i--) /* Shift arguments up. */ 824 for (i = ++nargs; i > LJ_FR2; i--) /* Shift arguments up. */
620 fbase[i] = fbase[i-1]; 825 fbase[i+LJ_FR2] = fbase[i+LJ_FR2-1];
826#if LJ_FR2
827 fbase[2] = fbase[0];
828#endif
621 fbase[0] = ix.mobj; /* Replace function. */ 829 fbase[0] = ix.mobj; /* Replace function. */
622 functv = &ix.mobjv; 830 functv = &ix.mobjv;
623 } 831 }
624 fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]); 832 kfunc = rec_call_specialize(J, funcV(functv), fbase[0]);
833#if LJ_FR2
834 fbase[0] = kfunc;
835 fbase[1] = TREF_FRAME;
836#else
837 fbase[0] = kfunc | TREF_FRAME;
838#endif
625 J->maxslot = (BCReg)nargs; 839 J->maxslot = (BCReg)nargs;
626} 840}
627 841
@@ -631,8 +845,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs)
631 rec_call_setup(J, func, nargs); 845 rec_call_setup(J, func, nargs);
632 /* Bump frame. */ 846 /* Bump frame. */
633 J->framedepth++; 847 J->framedepth++;
634 J->base += func+1; 848 J->base += func+1+LJ_FR2;
635 J->baseslot += func+1; 849 J->baseslot += func+1+LJ_FR2;
636 if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) 850 if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS)
637 lj_trace_err(J, LJ_TRERR_STACKOV); 851 lj_trace_err(J, LJ_TRERR_STACKOV);
638} 852}
@@ -650,7 +864,9 @@ void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs)
650 func += cbase; 864 func += cbase;
651 } 865 }
652 /* Move func + args down. */ 866 /* Move func + args down. */
653 memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1)); 867 if (LJ_FR2 && J->baseslot == 2)
868 J->base[func+1] = TREF_FRAME;
869 memmove(&J->base[-1-LJ_FR2], &J->base[func], sizeof(TRef)*(J->maxslot+1+LJ_FR2));
654 /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ 870 /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */
655 /* Tailcalls can form a loop, so count towards the loop unroll limit. */ 871 /* Tailcalls can form a loop, so count towards the loop unroll limit. */
656 if (++J->tailcalled > J->loopunroll) 872 if (++J->tailcalled > J->loopunroll)
@@ -680,41 +896,48 @@ static int check_downrec_unroll(jit_State *J, GCproto *pt)
680 return 0; 896 return 0;
681} 897}
682 898
899static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot);
900
683/* Record return. */ 901/* Record return. */
684void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) 902void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
685{ 903{
686 TValue *frame = J->L->base - 1; 904 TValue *frame = J->L->base - 1;
687 ptrdiff_t i; 905 ptrdiff_t i;
906 BCReg baseadj = 0;
688 for (i = 0; i < gotresults; i++) 907 for (i = 0; i < gotresults; i++)
689 (void)getslot(J, rbase+i); /* Ensure all results have a reference. */ 908 (void)getslot(J, rbase+i); /* Ensure all results have a reference. */
690 while (frame_ispcall(frame)) { /* Immediately resolve pcall() returns. */ 909 while (frame_ispcall(frame)) { /* Immediately resolve pcall() returns. */
691 BCReg cbase = (BCReg)frame_delta(frame); 910 BCReg cbase = (BCReg)frame_delta(frame);
692 if (--J->framedepth <= 0) 911 if (--J->framedepth <= 0)
693 lj_trace_err(J, LJ_TRERR_NYIRETL); 912 lj_trace_err(J, LJ_TRERR_NYIRETL);
694 lua_assert(J->baseslot > 1); 913 lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return");
695 gotresults++; 914 gotresults++;
915 baseadj += cbase;
696 rbase += cbase; 916 rbase += cbase;
697 J->baseslot -= (BCReg)cbase; 917 J->baseslot -= (BCReg)cbase;
698 J->base -= cbase; 918 J->base -= cbase;
699 J->base[--rbase] = TREF_TRUE; /* Prepend true to results. */ 919 J->base[--rbase] = TREF_TRUE; /* Prepend true to results. */
700 frame = frame_prevd(frame); 920 frame = frame_prevd(frame);
921 J->needsnap = 1; /* Stop catching on-trace errors. */
701 } 922 }
702 /* Return to lower frame via interpreter for unhandled cases. */ 923 /* Return to lower frame via interpreter for unhandled cases. */
703 if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) && 924 if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) &&
704 (!frame_islua(frame) || 925 (!frame_islua(frame) ||
705 (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))))) { 926 (J->parent == 0 && J->exitno == 0 &&
927 !bc_isret(bc_op(J->cur.startins))))) {
706 /* NYI: specialize to frame type and return directly, not via RET*. */ 928 /* NYI: specialize to frame type and return directly, not via RET*. */
707 for (i = 0; i < (ptrdiff_t)rbase; i++) 929 for (i = 0; i < (ptrdiff_t)rbase; i++)
708 J->base[i] = 0; /* Purge dead slots. */ 930 J->base[i] = 0; /* Purge dead slots. */
709 J->maxslot = rbase + (BCReg)gotresults; 931 J->maxslot = rbase + (BCReg)gotresults;
710 rec_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */ 932 lj_record_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */
711 return; 933 return;
712 } 934 }
713 if (frame_isvarg(frame)) { 935 if (frame_isvarg(frame)) {
714 BCReg cbase = (BCReg)frame_delta(frame); 936 BCReg cbase = (BCReg)frame_delta(frame);
715 if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ 937 if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */
716 lj_trace_err(J, LJ_TRERR_NYIRETL); 938 lj_trace_err(J, LJ_TRERR_NYIRETL);
717 lua_assert(J->baseslot > 1); 939 lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return");
940 baseadj += cbase;
718 rbase += cbase; 941 rbase += cbase;
719 J->baseslot -= (BCReg)cbase; 942 J->baseslot -= (BCReg)cbase;
720 J->base -= cbase; 943 J->base -= cbase;
@@ -724,27 +947,28 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
724 BCIns callins = *(frame_pc(frame)-1); 947 BCIns callins = *(frame_pc(frame)-1);
725 ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; 948 ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults;
726 BCReg cbase = bc_a(callins); 949 BCReg cbase = bc_a(callins);
727 GCproto *pt = funcproto(frame_func(frame - (cbase+1))); 950 GCproto *pt = funcproto(frame_func(frame - (cbase+1+LJ_FR2)));
728 if ((pt->flags & PROTO_NOJIT)) 951 if ((pt->flags & PROTO_NOJIT))
729 lj_trace_err(J, LJ_TRERR_CJITOFF); 952 lj_trace_err(J, LJ_TRERR_CJITOFF);
730 if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { 953 if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) {
731 if (!J->cur.root && check_downrec_unroll(J, pt)) { 954 if (!J->cur.root && check_downrec_unroll(J, pt)) {
732 J->maxslot = (BCReg)(rbase + gotresults); 955 J->maxslot = (BCReg)(rbase + gotresults);
733 lj_snap_purge(J); 956 lj_snap_purge(J);
734 rec_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-recursion. */ 957 lj_record_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-rec. */
735 return; 958 return;
736 } 959 }
737 lj_snap_add(J); 960 lj_snap_add(J);
738 } 961 }
739 for (i = 0; i < nresults; i++) /* Adjust results. */ 962 for (i = 0; i < nresults; i++) /* Adjust results. */
740 J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL; 963 J->base[i-1-LJ_FR2] = i < gotresults ? J->base[rbase+i] : TREF_NIL;
741 J->maxslot = cbase+(BCReg)nresults; 964 J->maxslot = cbase+(BCReg)nresults;
742 if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ 965 if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */
743 J->framedepth--; 966 J->framedepth--;
744 lua_assert(J->baseslot > cbase+1); 967 lj_assertJ(J->baseslot > cbase+1+LJ_FR2, "bad baseslot for return");
745 J->baseslot -= cbase+1; 968 J->baseslot -= cbase+1+LJ_FR2;
746 J->base -= cbase+1; 969 J->base -= cbase+1+LJ_FR2;
747 } else if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { 970 } else if (J->parent == 0 && J->exitno == 0 &&
971 !bc_isret(bc_op(J->cur.startins))) {
748 /* Return to lower frame would leave the loop in a root trace. */ 972 /* Return to lower frame would leave the loop in a root trace. */
749 lj_trace_err(J, LJ_TRERR_LLEAVE); 973 lj_trace_err(J, LJ_TRERR_LLEAVE);
750 } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */ 974 } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */
@@ -754,14 +978,14 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
754 } else { /* Return to lower frame. Guard for the target we return to. */ 978 } else { /* Return to lower frame. Guard for the target we return to. */
755 TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); 979 TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO);
756 TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); 980 TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame));
757 emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc); 981 emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc);
758 J->retdepth++; 982 J->retdepth++;
759 J->needsnap = 1; 983 J->needsnap = 1;
760 J->scev.idx = REF_NIL; 984 J->scev.idx = REF_NIL;
761 lua_assert(J->baseslot == 1); 985 lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot for return");
762 /* Shift result slots up and clear the slots of the new frame below. */ 986 /* Shift result slots up and clear the slots of the new frame below. */
763 memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults); 987 memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults);
764 memset(J->base-1, 0, sizeof(TRef)*(cbase+1)); 988 memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2));
765 } 989 }
766 } else if (frame_iscont(frame)) { /* Return to continuation frame. */ 990 } else if (frame_iscont(frame)) { /* Return to continuation frame. */
767 ASMFunction cont = frame_contf(frame); 991 ASMFunction cont = frame_contf(frame);
@@ -770,24 +994,55 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
770 lj_trace_err(J, LJ_TRERR_NYIRETL); 994 lj_trace_err(J, LJ_TRERR_NYIRETL);
771 J->baseslot -= (BCReg)cbase; 995 J->baseslot -= (BCReg)cbase;
772 J->base -= cbase; 996 J->base -= cbase;
773 J->maxslot = cbase-2; 997 J->maxslot = cbase-(2<<LJ_FR2);
774 if (cont == lj_cont_ra) { 998 if (cont == lj_cont_ra) {
775 /* Copy result to destination slot. */ 999 /* Copy result to destination slot. */
776 BCReg dst = bc_a(*(frame_contpc(frame)-1)); 1000 BCReg dst = bc_a(*(frame_contpc(frame)-1));
777 J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL; 1001 J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL;
778 if (dst >= J->maxslot) J->maxslot = dst+1; 1002 if (dst >= J->maxslot) {
1003 J->maxslot = dst+1;
1004 }
779 } else if (cont == lj_cont_nop) { 1005 } else if (cont == lj_cont_nop) {
780 /* Nothing to do here. */ 1006 /* Nothing to do here. */
781 } else if (cont == lj_cont_cat) { 1007 } else if (cont == lj_cont_cat) {
782 lua_assert(0); 1008 BCReg bslot = bc_b(*(frame_contpc(frame)-1));
1009 TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL;
1010 if (bslot != J->maxslot) { /* Concatenate the remainder. */
1011 /* Simulate lower frame and result. */
1012 TValue *b = J->L->base - baseadj, save;
1013 /* Can't handle MM_concat + CALLT + fast func side-effects. */
1014 if (J->postproc != LJ_POST_NONE)
1015 lj_trace_err(J, LJ_TRERR_NYIRETL);
1016 J->base[J->maxslot] = tr;
1017 copyTV(J->L, &save, b-(2<<LJ_FR2));
1018 if (gotresults)
1019 copyTV(J->L, b-(2<<LJ_FR2), b+rbase);
1020 else
1021 setnilV(b-(2<<LJ_FR2));
1022 J->L->base = b - cbase;
1023 tr = rec_cat(J, bslot, cbase-(2<<LJ_FR2));
1024 b = J->L->base + cbase; /* Undo. */
1025 J->L->base = b + baseadj;
1026 copyTV(J->L, b-(2<<LJ_FR2), &save);
1027 }
1028 if (tr >= 0xffffff00) {
1029 lj_err_throw(J->L, -(int32_t)tr); /* Propagate errors. */
1030 } else if (tr) { /* Store final result. */
1031 BCReg dst = bc_a(*(frame_contpc(frame)-1));
1032 J->base[dst] = tr;
1033 if (dst >= J->maxslot) {
1034 J->maxslot = dst+1;
1035 }
1036 } /* Otherwise continue with another __concat call. */
783 } else { 1037 } else {
784 /* Result type already specialized. */ 1038 /* Result type already specialized. */
785 lua_assert(cont == lj_cont_condf || cont == lj_cont_condt); 1039 lj_assertJ(cont == lj_cont_condf || cont == lj_cont_condt,
1040 "bad continuation type");
786 } 1041 }
787 } else { 1042 } else {
788 lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ 1043 lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */
789 } 1044 }
790 lua_assert(J->baseslot >= 1); 1045 lj_assertJ(J->baseslot >= 1+LJ_FR2, "bad baseslot for return");
791} 1046}
792 1047
793/* -- Metamethod handling ------------------------------------------------- */ 1048/* -- Metamethod handling ------------------------------------------------- */
@@ -795,19 +1050,17 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
795/* Prepare to record call to metamethod. */ 1050/* Prepare to record call to metamethod. */
796static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) 1051static BCReg rec_mm_prep(jit_State *J, ASMFunction cont)
797{ 1052{
798 BCReg s, top = curr_proto(J->L)->framesize; 1053 BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize;
799 TRef trcont; 1054#if LJ_FR2
800 setcont(&J->L->base[top], cont); 1055 J->base[top] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont)));
801#if LJ_64 1056 J->base[top+1] = TREF_CONT;
802 trcont = lj_ir_kptr(J, (void *)((int64_t)cont - (int64_t)lj_vm_asm_begin));
803#else 1057#else
804 trcont = lj_ir_kptr(J, (void *)cont); 1058 J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT;
805#endif 1059#endif
806 J->base[top] = trcont | TREF_CONT;
807 J->framedepth++; 1060 J->framedepth++;
808 for (s = J->maxslot; s < top; s++) 1061 for (s = J->maxslot; s < top; s++)
809 J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ 1062 J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */
810 return top+1; 1063 return top+1+LJ_FR2;
811} 1064}
812 1065
813/* Record metamethod lookup. */ 1066/* Record metamethod lookup. */
@@ -827,7 +1080,7 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
827 cTValue *mo; 1080 cTValue *mo;
828 if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) { 1081 if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) {
829 /* Specialize to the C library namespace object. */ 1082 /* Specialize to the C library namespace object. */
830 emitir(IRTG(IR_EQ, IRT_P32), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv))); 1083 emitir(IRTG(IR_EQ, IRT_PGC), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv)));
831 } else { 1084 } else {
832 /* Specialize to the type of userdata. */ 1085 /* Specialize to the type of userdata. */
833 TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE); 1086 TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE);
@@ -855,7 +1108,8 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
855 } 1108 }
856 /* The cdata metatable is treated as immutable. */ 1109 /* The cdata metatable is treated as immutable. */
857 if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; 1110 if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt;
858 ix->mt = mix.tab = lj_ir_ktab(J, mt); 1111 ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB,
1112 GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)]));
859 goto nocheck; 1113 goto nocheck;
860 } 1114 }
861 ix->mt = mt ? mix.tab : TREF_NIL; 1115 ix->mt = mt ? mix.tab : TREF_NIL;
@@ -882,12 +1136,12 @@ nocheck:
882static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) 1136static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
883{ 1137{
884 /* Set up metamethod call first to save ix->tab and ix->tabv. */ 1138 /* Set up metamethod call first to save ix->tab and ix->tabv. */
885 BCReg func = rec_mm_prep(J, lj_cont_ra); 1139 BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra);
886 TRef *base = J->base + func; 1140 TRef *base = J->base + func;
887 TValue *basev = J->L->base + func; 1141 TValue *basev = J->L->base + func;
888 base[1] = ix->tab; base[2] = ix->key; 1142 base[1+LJ_FR2] = ix->tab; base[2+LJ_FR2] = ix->key;
889 copyTV(J->L, basev+1, &ix->tabv); 1143 copyTV(J->L, basev+1+LJ_FR2, &ix->tabv);
890 copyTV(J->L, basev+2, &ix->keyv); 1144 copyTV(J->L, basev+2+LJ_FR2, &ix->keyv);
891 if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ 1145 if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */
892 if (mm != MM_unm) { 1146 if (mm != MM_unm) {
893 ix->tab = ix->key; 1147 ix->tab = ix->key;
@@ -899,6 +1153,9 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
899 } 1153 }
900ok: 1154ok:
901 base[0] = ix->mobj; 1155 base[0] = ix->mobj;
1156#if LJ_FR2
1157 base[1] = 0;
1158#endif
902 copyTV(J->L, basev+0, &ix->mobjv); 1159 copyTV(J->L, basev+0, &ix->mobjv);
903 lj_record_call(J, func, 2); 1160 lj_record_call(J, func, 2);
904 return 0; /* No result yet. */ 1161 return 0; /* No result yet. */
@@ -915,6 +1172,8 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
915 TRef *base = J->base + func; 1172 TRef *base = J->base + func;
916 TValue *basev = J->L->base + func; 1173 TValue *basev = J->L->base + func;
917 base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); 1174 base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv);
1175 base += LJ_FR2;
1176 basev += LJ_FR2;
918 base[1] = tr; copyTV(J->L, basev+1, tv); 1177 base[1] = tr; copyTV(J->L, basev+1, tv);
919#if LJ_52 1178#if LJ_52
920 base[2] = tr; copyTV(J->L, basev+2, tv); 1179 base[2] = tr; copyTV(J->L, basev+2, tv);
@@ -924,7 +1183,7 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
924 lj_record_call(J, func, 2); 1183 lj_record_call(J, func, 2);
925 } else { 1184 } else {
926 if (LJ_52 && tref_istab(tr)) 1185 if (LJ_52 && tref_istab(tr))
927 return lj_ir_call(J, IRCALL_lj_tab_len, tr); 1186 return emitir(IRTI(IR_ALEN), tr, TREF_NIL);
928 lj_trace_err(J, LJ_TRERR_NOMM); 1187 lj_trace_err(J, LJ_TRERR_NOMM);
929 } 1188 }
930 return 0; /* No result yet. */ 1189 return 0; /* No result yet. */
@@ -934,10 +1193,10 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
934static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) 1193static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op)
935{ 1194{
936 BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); 1195 BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt);
937 TRef *base = J->base + func; 1196 TRef *base = J->base + func + LJ_FR2;
938 TValue *tv = J->L->base + func; 1197 TValue *tv = J->L->base + func + LJ_FR2;
939 base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key; 1198 base[-LJ_FR2] = ix->mobj; base[1] = ix->val; base[2] = ix->key;
940 copyTV(J->L, tv+0, &ix->mobjv); 1199 copyTV(J->L, tv-LJ_FR2, &ix->mobjv);
941 copyTV(J->L, tv+1, &ix->valv); 1200 copyTV(J->L, tv+1, &ix->valv);
942 copyTV(J->L, tv+2, &ix->keyv); 1201 copyTV(J->L, tv+2, &ix->keyv);
943 lj_record_call(J, func, 2); 1202 lj_record_call(J, func, 2);
@@ -1033,7 +1292,7 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm)
1033 ix->tab = ix->val; 1292 ix->tab = ix->val;
1034 copyTV(J->L, &ix->tabv, &ix->valv); 1293 copyTV(J->L, &ix->tabv, &ix->valv);
1035 } else { 1294 } else {
1036 lua_assert(tref_iscdata(ix->key)); 1295 lj_assertJ(tref_iscdata(ix->key), "cdata expected");
1037 ix->tab = ix->key; 1296 ix->tab = ix->key;
1038 copyTV(J->L, &ix->tabv, &ix->keyv); 1297 copyTV(J->L, &ix->tabv, &ix->keyv);
1039 } 1298 }
@@ -1044,6 +1303,72 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm)
1044 1303
1045/* -- Indexed access ------------------------------------------------------ */ 1304/* -- Indexed access ------------------------------------------------------ */
1046 1305
1306#ifdef LUAJIT_ENABLE_TABLE_BUMP
1307/* Bump table allocations in bytecode when they grow during recording. */
1308static void rec_idx_bump(jit_State *J, RecordIndex *ix)
1309{
1310 RBCHashEntry *rbc = &J->rbchash[(ix->tab & (RBCHASH_SLOTS-1))];
1311 if (tref_ref(ix->tab) == rbc->ref) {
1312 const BCIns *pc = mref(rbc->pc, const BCIns);
1313 GCtab *tb = tabV(&ix->tabv);
1314 uint32_t nhbits;
1315 IRIns *ir;
1316 if (!tvisnil(&ix->keyv))
1317 (void)lj_tab_set(J->L, tb, &ix->keyv); /* Grow table right now. */
1318 nhbits = tb->hmask > 0 ? lj_fls(tb->hmask)+1 : 0;
1319 ir = IR(tref_ref(ix->tab));
1320 if (ir->o == IR_TNEW) {
1321 uint32_t ah = bc_d(*pc);
1322 uint32_t asize = ah & 0x7ff, hbits = ah >> 11;
1323 if (nhbits > hbits) hbits = nhbits;
1324 if (tb->asize > asize) {
1325 asize = tb->asize <= 0x7ff ? tb->asize : 0x7ff;
1326 }
1327 if ((asize | (hbits<<11)) != ah) { /* Has the size changed? */
1328 /* Patch bytecode, but continue recording (for more patching). */
1329 setbc_d(pc, (asize | (hbits<<11)));
1330 /* Patching TNEW operands is only safe if the trace is aborted. */
1331 ir->op1 = asize; ir->op2 = hbits;
1332 J->retryrec = 1; /* Abort the trace at the end of recording. */
1333 }
1334 } else if (ir->o == IR_TDUP) {
1335 GCtab *tpl = gco2tab(proto_kgc(&gcref(rbc->pt)->pt, ~(ptrdiff_t)bc_d(*pc)));
1336 /* Grow template table, but preserve keys with nil values. */
1337 if ((tb->asize > tpl->asize && (1u << nhbits)-1 == tpl->hmask) ||
1338 (tb->asize == tpl->asize && (1u << nhbits)-1 > tpl->hmask)) {
1339 Node *node = noderef(tpl->node);
1340 uint32_t i, hmask = tpl->hmask, asize;
1341 TValue *array;
1342 for (i = 0; i <= hmask; i++) {
1343 if (!tvisnil(&node[i].key) && tvisnil(&node[i].val))
1344 settabV(J->L, &node[i].val, tpl);
1345 }
1346 if (!tvisnil(&ix->keyv) && tref_isk(ix->key)) {
1347 TValue *o = lj_tab_set(J->L, tpl, &ix->keyv);
1348 if (tvisnil(o)) settabV(J->L, o, tpl);
1349 }
1350 lj_tab_resize(J->L, tpl, tb->asize, nhbits);
1351 node = noderef(tpl->node);
1352 hmask = tpl->hmask;
1353 for (i = 0; i <= hmask; i++) {
1354 /* This is safe, since template tables only hold immutable values. */
1355 if (tvistab(&node[i].val))
1356 setnilV(&node[i].val);
1357 }
1358 /* The shape of the table may have changed. Clean up array part, too. */
1359 asize = tpl->asize;
1360 array = tvref(tpl->array);
1361 for (i = 0; i < asize; i++) {
1362 if (tvistab(&array[i]))
1363 setnilV(&array[i]);
1364 }
1365 J->retryrec = 1; /* Abort the trace at the end of recording. */
1366 }
1367 }
1368 }
1369}
1370#endif
1371
1047/* Record bounds-check. */ 1372/* Record bounds-check. */
1048static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) 1373static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize)
1049{ 1374{
@@ -1064,7 +1389,8 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize)
1064 /* Got scalar evolution analysis results for this reference? */ 1389 /* Got scalar evolution analysis results for this reference? */
1065 if (ref == J->scev.idx) { 1390 if (ref == J->scev.idx) {
1066 int32_t stop; 1391 int32_t stop;
1067 lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD); 1392 lj_assertJ(irt_isint(J->scev.t) && ir->o == IR_SLOAD,
1393 "only int SCEV supported");
1068 stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]); 1394 stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]);
1069 /* Runtime value for stop of loop is within bounds? */ 1395 /* Runtime value for stop of loop is within bounds? */
1070 if ((uint64_t)stop + ofs < (uint64_t)asize) { 1396 if ((uint64_t)stop + ofs < (uint64_t)asize) {
@@ -1084,11 +1410,14 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize)
1084} 1410}
1085 1411
1086/* Record indexed key lookup. */ 1412/* Record indexed key lookup. */
1087static TRef rec_idx_key(jit_State *J, RecordIndex *ix) 1413static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref,
1414 IRType1 *rbguard)
1088{ 1415{
1089 TRef key; 1416 TRef key;
1090 GCtab *t = tabV(&ix->tabv); 1417 GCtab *t = tabV(&ix->tabv);
1091 ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */ 1418 ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */
1419 *rbref = 0;
1420 rbguard->irt = 0;
1092 1421
1093 /* Integer keys are looked up in the array part first. */ 1422 /* Integer keys are looked up in the array part first. */
1094 key = ix->key; 1423 key = ix->key;
@@ -1102,8 +1431,8 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
1102 if ((MSize)k < t->asize) { /* Currently an array key? */ 1431 if ((MSize)k < t->asize) { /* Currently an array key? */
1103 TRef arrayref; 1432 TRef arrayref;
1104 rec_idx_abc(J, asizeref, ikey, t->asize); 1433 rec_idx_abc(J, asizeref, ikey, t->asize);
1105 arrayref = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_ARRAY); 1434 arrayref = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_ARRAY);
1106 return emitir(IRT(IR_AREF, IRT_P32), arrayref, ikey); 1435 return emitir(IRT(IR_AREF, IRT_PGC), arrayref, ikey);
1107 } else { /* Currently not in array (may be an array extension)? */ 1436 } else { /* Currently not in array (may be an array extension)? */
1108 emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */ 1437 emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */
1109 if (k == 0 && tref_isk(key)) 1438 if (k == 0 && tref_isk(key))
@@ -1135,19 +1464,21 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
1135 key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); 1464 key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT);
1136 if (tref_isk(key)) { 1465 if (tref_isk(key)) {
1137 /* Optimize lookup of constant hash keys. */ 1466 /* Optimize lookup of constant hash keys. */
1138 MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val); 1467 GCSize hslot = (GCSize)((char *)ix->oldv-(char *)&noderef(t->node)[0].val);
1139 if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) && 1468 if (hslot <= t->hmask*(GCSize)sizeof(Node) &&
1140 hslot <= 65535*(MSize)sizeof(Node)) { 1469 hslot <= 65535*(GCSize)sizeof(Node)) {
1141 TRef node, kslot; 1470 TRef node, kslot, hm;
1142 TRef hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); 1471 *rbref = J->cur.nins; /* Mark possible rollback point. */
1472 *rbguard = J->guardemit;
1473 hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK);
1143 emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask)); 1474 emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask));
1144 node = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_NODE); 1475 node = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_NODE);
1145 kslot = lj_ir_kslot(J, key, hslot / sizeof(Node)); 1476 kslot = lj_ir_kslot(J, key, (IRRef)(hslot / sizeof(Node)));
1146 return emitir(IRTG(IR_HREFK, IRT_P32), node, kslot); 1477 return emitir(IRTG(IR_HREFK, IRT_PGC), node, kslot);
1147 } 1478 }
1148 } 1479 }
1149 /* Fall back to a regular hash lookup. */ 1480 /* Fall back to a regular hash lookup. */
1150 return emitir(IRT(IR_HREF, IRT_P32), ix->tab, key); 1481 return emitir(IRT(IR_HREF, IRT_PGC), ix->tab, key);
1151} 1482}
1152 1483
1153/* Determine whether a key is NOT one of the fast metamethod names. */ 1484/* Determine whether a key is NOT one of the fast metamethod names. */
@@ -1172,20 +1503,22 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1172{ 1503{
1173 TRef xref; 1504 TRef xref;
1174 IROp xrefop, loadop; 1505 IROp xrefop, loadop;
1506 IRRef rbref;
1507 IRType1 rbguard;
1175 cTValue *oldv; 1508 cTValue *oldv;
1176 1509
1177 while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */ 1510 while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */
1178 /* Never call raw lj_record_idx() on non-table. */ 1511 /* Never call raw lj_record_idx() on non-table. */
1179 lua_assert(ix->idxchain != 0); 1512 lj_assertJ(ix->idxchain != 0, "bad usage");
1180 if (!lj_record_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index)) 1513 if (!lj_record_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index))
1181 lj_trace_err(J, LJ_TRERR_NOMM); 1514 lj_trace_err(J, LJ_TRERR_NOMM);
1182 handlemm: 1515 handlemm:
1183 if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ 1516 if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */
1184 BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); 1517 BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra);
1185 TRef *base = J->base + func; 1518 TRef *base = J->base + func + LJ_FR2;
1186 TValue *tv = J->L->base + func; 1519 TValue *tv = J->L->base + func + LJ_FR2;
1187 base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; 1520 base[-LJ_FR2] = ix->mobj; base[1] = ix->tab; base[2] = ix->key;
1188 setfuncV(J->L, tv+0, funcV(&ix->mobjv)); 1521 setfuncV(J->L, tv-LJ_FR2, funcV(&ix->mobjv));
1189 copyTV(J->L, tv+1, &ix->tabv); 1522 copyTV(J->L, tv+1, &ix->tabv);
1190 copyTV(J->L, tv+2, &ix->keyv); 1523 copyTV(J->L, tv+2, &ix->keyv);
1191 if (ix->val) { 1524 if (ix->val) {
@@ -1198,6 +1531,16 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1198 return 0; /* No result yet. */ 1531 return 0; /* No result yet. */
1199 } 1532 }
1200 } 1533 }
1534#if LJ_HASBUFFER
1535 /* The index table of buffer objects is treated as immutable. */
1536 if (ix->mt == TREF_NIL && !ix->val &&
1537 tref_isudata(ix->tab) && udataV(&ix->tabv)->udtype == UDTYPE_BUFFER &&
1538 tref_istab(ix->mobj) && tref_isstr(ix->key) && tref_isk(ix->key)) {
1539 cTValue *val = lj_tab_getstr(tabV(&ix->mobjv), strV(&ix->keyv));
1540 TRef tr = lj_record_constify(J, val);
1541 if (tr) return tr; /* Specialize to the value, i.e. a method. */
1542 }
1543#endif
1201 /* Otherwise retry lookup with metaobject. */ 1544 /* Otherwise retry lookup with metaobject. */
1202 ix->tab = ix->mobj; 1545 ix->tab = ix->mobj;
1203 copyTV(J->L, &ix->tabv, &ix->mobjv); 1546 copyTV(J->L, &ix->tabv, &ix->mobjv);
@@ -1217,7 +1560,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1217 } 1560 }
1218 1561
1219 /* Record the key lookup. */ 1562 /* Record the key lookup. */
1220 xref = rec_idx_key(J, ix); 1563 xref = rec_idx_key(J, ix, &rbref, &rbguard);
1221 xrefop = IR(tref_ref(xref))->o; 1564 xrefop = IR(tref_ref(xref))->o;
1222 loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD; 1565 loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD;
1223 /* The lj_meta_tset() inconsistency is gone, but better play safe. */ 1566 /* The lj_meta_tset() inconsistency is gone, but better play safe. */
@@ -1227,11 +1570,15 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1227 IRType t = itype2irt(oldv); 1570 IRType t = itype2irt(oldv);
1228 TRef res; 1571 TRef res;
1229 if (oldv == niltvg(J2G(J))) { 1572 if (oldv == niltvg(J2G(J))) {
1230 emitir(IRTG(IR_EQ, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); 1573 emitir(IRTG(IR_EQ, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
1231 res = TREF_NIL; 1574 res = TREF_NIL;
1232 } else { 1575 } else {
1233 res = emitir(IRTG(loadop, t), xref, 0); 1576 res = emitir(IRTG(loadop, t), xref, 0);
1234 } 1577 }
1578 if (tref_ref(res) < rbref) { /* HREFK + load forwarded? */
1579 lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */
1580 J->guardemit = rbguard;
1581 }
1235 if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index)) 1582 if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index))
1236 goto handlemm; 1583 goto handlemm;
1237 if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */ 1584 if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */
@@ -1239,6 +1586,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1239 } else { /* Indexed store. */ 1586 } else { /* Indexed store. */
1240 GCtab *mt = tabref(tabV(&ix->tabv)->metatable); 1587 GCtab *mt = tabref(tabV(&ix->tabv)->metatable);
1241 int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val); 1588 int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val);
1589 if (tref_ref(xref) < rbref) { /* HREFK forwarded? */
1590 lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */
1591 J->guardemit = rbguard;
1592 }
1242 if (tvisnil(oldv)) { /* Previous value was nil? */ 1593 if (tvisnil(oldv)) { /* Previous value was nil? */
1243 /* Need to duplicate the hasmm check for the early guards. */ 1594 /* Need to duplicate the hasmm check for the early guards. */
1244 int hasmm = 0; 1595 int hasmm = 0;
@@ -1249,13 +1600,13 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1249 if (hasmm) 1600 if (hasmm)
1250 emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */ 1601 emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */
1251 else if (xrefop == IR_HREF) 1602 else if (xrefop == IR_HREF)
1252 emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_P32), 1603 emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_PGC),
1253 xref, lj_ir_kkptr(J, niltvg(J2G(J)))); 1604 xref, lj_ir_kkptr(J, niltvg(J2G(J))));
1254 if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) { 1605 if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) {
1255 lua_assert(hasmm); 1606 lj_assertJ(hasmm, "inconsistent metamethod handling");
1256 goto handlemm; 1607 goto handlemm;
1257 } 1608 }
1258 lua_assert(!hasmm); 1609 lj_assertJ(!hasmm, "inconsistent metamethod handling");
1259 if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */ 1610 if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */
1260 TRef key = ix->key; 1611 TRef key = ix->key;
1261 if (tref_isinteger(key)) { /* NEWREF needs a TValue as a key. */ 1612 if (tref_isinteger(key)) { /* NEWREF needs a TValue as a key. */
@@ -1268,13 +1619,17 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1268 emitir(IRTG(IR_EQ, IRT_NUM), key, key); /* Check for !NaN. */ 1619 emitir(IRTG(IR_EQ, IRT_NUM), key, key); /* Check for !NaN. */
1269 } 1620 }
1270 } 1621 }
1271 xref = emitir(IRT(IR_NEWREF, IRT_P32), ix->tab, key); 1622 xref = emitir(IRT(IR_NEWREF, IRT_PGC), ix->tab, key);
1272 keybarrier = 0; /* NEWREF already takes care of the key barrier. */ 1623 keybarrier = 0; /* NEWREF already takes care of the key barrier. */
1624#ifdef LUAJIT_ENABLE_TABLE_BUMP
1625 if ((J->flags & JIT_F_OPT_SINK)) /* Avoid a separate flag. */
1626 rec_idx_bump(J, ix);
1627#endif
1273 } 1628 }
1274 } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) { 1629 } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) {
1275 /* Cannot derive that the previous value was non-nil, must do checks. */ 1630 /* Cannot derive that the previous value was non-nil, must do checks. */
1276 if (xrefop == IR_HREF) /* Guard against store to niltv. */ 1631 if (xrefop == IR_HREF) /* Guard against store to niltv. */
1277 emitir(IRTG(IR_NE, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); 1632 emitir(IRTG(IR_NE, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
1278 if (ix->idxchain) { /* Metamethod lookup required? */ 1633 if (ix->idxchain) { /* Metamethod lookup required? */
1279 /* A check for NULL metatable is cheaper (hoistable) than a load. */ 1634 /* A check for NULL metatable is cheaper (hoistable) than a load. */
1280 if (!mt) { 1635 if (!mt) {
@@ -1296,7 +1651,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1296 emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0); 1651 emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0);
1297 /* Invalidate neg. metamethod cache for stores with certain string keys. */ 1652 /* Invalidate neg. metamethod cache for stores with certain string keys. */
1298 if (!nommstr(J, ix->key)) { 1653 if (!nommstr(J, ix->key)) {
1299 TRef fref = emitir(IRT(IR_FREF, IRT_P32), ix->tab, IRFL_TAB_NOMM); 1654 TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ix->tab, IRFL_TAB_NOMM);
1300 emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0)); 1655 emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0));
1301 } 1656 }
1302 J->needsnap = 1; 1657 J->needsnap = 1;
@@ -1304,6 +1659,72 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1304 } 1659 }
1305} 1660}
1306 1661
1662/* Determine result type of table traversal. */
1663static IRType rec_next_types(GCtab *t, uint32_t idx)
1664{
1665 for (; idx < t->asize; idx++) {
1666 cTValue *a = arrayslot(t, idx);
1667 if (LJ_LIKELY(!tvisnil(a)))
1668 return (LJ_DUALNUM ? IRT_INT : IRT_NUM) + (itype2irt(a) << 8);
1669 }
1670 idx -= t->asize;
1671 for (; idx <= t->hmask; idx++) {
1672 Node *n = &noderef(t->node)[idx];
1673 if (!tvisnil(&n->val))
1674 return itype2irt(&n->key) + (itype2irt(&n->val) << 8);
1675 }
1676 return IRT_NIL + (IRT_NIL << 8);
1677}
1678
1679/* Record a table traversal step aka next(). */
1680int lj_record_next(jit_State *J, RecordIndex *ix)
1681{
1682 IRType t, tkey, tval;
1683 TRef trvk;
1684 t = rec_next_types(tabV(&ix->tabv), ix->keyv.u32.lo);
1685 tkey = (t & 0xff); tval = (t >> 8);
1686 trvk = lj_ir_call(J, IRCALL_lj_vm_next, ix->tab, ix->key);
1687 if (ix->mobj || tkey == IRT_NIL) {
1688 TRef idx = emitir(IRTI(IR_HIOP), trvk, trvk);
1689 /* Always check for invalid key from next() for nil result. */
1690 if (!ix->mobj) emitir(IRTGI(IR_NE), idx, lj_ir_kint(J, -1));
1691 ix->mobj = idx;
1692 }
1693 ix->key = lj_record_vload(J, trvk, 1, tkey);
1694 if (tkey == IRT_NIL || ix->idxchain) { /* Omit value type check. */
1695 ix->val = TREF_NIL;
1696 return 1;
1697 } else { /* Need value. */
1698 ix->val = lj_record_vload(J, trvk, 0, tval);
1699 return 2;
1700 }
1701}
1702
1703static void rec_tsetm(jit_State *J, BCReg ra, BCReg rn, int32_t i)
1704{
1705 RecordIndex ix;
1706 cTValue *basev = J->L->base;
1707 GCtab *t = tabV(&basev[ra-1]);
1708 settabV(J->L, &ix.tabv, t);
1709 ix.tab = getslot(J, ra-1);
1710 ix.idxchain = 0;
1711#ifdef LUAJIT_ENABLE_TABLE_BUMP
1712 if ((J->flags & JIT_F_OPT_SINK)) {
1713 if (t->asize < i+rn-ra)
1714 lj_tab_reasize(J->L, t, i+rn-ra);
1715 setnilV(&ix.keyv);
1716 rec_idx_bump(J, &ix);
1717 }
1718#endif
1719 for (; ra < rn; i++, ra++) {
1720 setintV(&ix.keyv, i);
1721 ix.key = lj_ir_kint(J, i);
1722 copyTV(J->L, &ix.valv, &basev[ra]);
1723 ix.val = getslot(J, ra);
1724 lj_record_idx(J, &ix);
1725 }
1726}
1727
1307/* -- Upvalue access ------------------------------------------------------ */ 1728/* -- Upvalue access ------------------------------------------------------ */
1308 1729
1309/* Check whether upvalue is immutable and ok to constify. */ 1730/* Check whether upvalue is immutable and ok to constify. */
@@ -1340,13 +1761,17 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val)
1340 int needbarrier = 0; 1761 int needbarrier = 0;
1341 if (rec_upvalue_constify(J, uvp)) { /* Try to constify immutable upvalue. */ 1762 if (rec_upvalue_constify(J, uvp)) { /* Try to constify immutable upvalue. */
1342 TRef tr, kfunc; 1763 TRef tr, kfunc;
1343 lua_assert(val == 0); 1764 lj_assertJ(val == 0, "bad usage");
1344 if (!tref_isk(fn)) { /* Late specialization of current function. */ 1765 if (!tref_isk(fn)) { /* Late specialization of current function. */
1345 if (J->pt->flags >= PROTO_CLC_POLY) 1766 if (J->pt->flags >= PROTO_CLC_POLY)
1346 goto noconstify; 1767 goto noconstify;
1347 kfunc = lj_ir_kfunc(J, J->fn); 1768 kfunc = lj_ir_kfunc(J, J->fn);
1348 emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); 1769 emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc);
1349 J->base[-1] = TREF_FRAME | kfunc; 1770#if LJ_FR2
1771 J->base[-2] = kfunc;
1772#else
1773 J->base[-1] = kfunc | TREF_FRAME;
1774#endif
1350 fn = kfunc; 1775 fn = kfunc;
1351 } 1776 }
1352 tr = lj_record_constify(J, uvval(uvp)); 1777 tr = lj_record_constify(J, uvval(uvp));
@@ -1357,16 +1782,16 @@ noconstify:
1357 /* Note: this effectively limits LJ_MAX_UPVAL to 127. */ 1782 /* Note: this effectively limits LJ_MAX_UPVAL to 127. */
1358 uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff); 1783 uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff);
1359 if (!uvp->closed) { 1784 if (!uvp->closed) {
1360 uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_P32), fn, uv));
1361 /* In current stack? */ 1785 /* In current stack? */
1362 if (uvval(uvp) >= tvref(J->L->stack) && 1786 if (uvval(uvp) >= tvref(J->L->stack) &&
1363 uvval(uvp) < tvref(J->L->maxstack)) { 1787 uvval(uvp) < tvref(J->L->maxstack)) {
1364 int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot)); 1788 int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot));
1365 if (slot >= 0) { /* Aliases an SSA slot? */ 1789 if (slot >= 0) { /* Aliases an SSA slot? */
1366 emitir(IRTG(IR_EQ, IRT_P32), 1790 uref = tref_ref(emitir(IRT(IR_UREFO, IRT_PGC), fn, uv));
1791 emitir(IRTG(IR_EQ, IRT_PGC),
1367 REF_BASE, 1792 REF_BASE,
1368 emitir(IRT(IR_ADD, IRT_P32), uref, 1793 emitir(IRT(IR_ADD, IRT_PGC), uref,
1369 lj_ir_kint(J, (slot - 1) * -8))); 1794 lj_ir_kintpgc(J, (slot - 1 - LJ_FR2) * -8)));
1370 slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */ 1795 slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */
1371 if (val == 0) { 1796 if (val == 0) {
1372 return getslot(J, slot); 1797 return getslot(J, slot);
@@ -1377,12 +1802,21 @@ noconstify:
1377 } 1802 }
1378 } 1803 }
1379 } 1804 }
1380 emitir(IRTG(IR_UGT, IRT_P32), 1805 /* IR_UREFO+IRT_IGC is not checked for open-ness at runtime.
1381 emitir(IRT(IR_SUB, IRT_P32), uref, REF_BASE), 1806 ** Always marked as a guard, since it might get promoted to IRT_PGC later.
1382 lj_ir_kint(J, (J->baseslot + J->maxslot) * 8)); 1807 */
1808 uref = emitir(IRTG(IR_UREFO, tref_isgcv(val) ? IRT_PGC : IRT_IGC), fn, uv);
1809 uref = tref_ref(uref);
1810 emitir(IRTG(IR_UGT, IRT_PGC),
1811 emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE),
1812 lj_ir_kintpgc(J, (J->baseslot + J->maxslot) * 8));
1383 } else { 1813 } else {
1814 /* If fn is constant, then so is the GCupval*, and the upvalue cannot
1815 ** transition back to open, so no guard is required in this case.
1816 */
1817 IRType t = (tref_isk(fn) ? 0 : IRT_GUARD) | IRT_PGC;
1818 uref = tref_ref(emitir(IRT(IR_UREFC, t), fn, uv));
1384 needbarrier = 1; 1819 needbarrier = 1;
1385 uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_P32), fn, uv));
1386 } 1820 }
1387 if (val == 0) { /* Upvalue load */ 1821 if (val == 0) { /* Upvalue load */
1388 IRType t = itype2irt(uvval(uvp)); 1822 IRType t = itype2irt(uvval(uvp));
@@ -1421,16 +1855,16 @@ static void check_call_unroll(jit_State *J, TraceNo lnk)
1421 if (count + J->tailcalled > J->param[JIT_P_recunroll]) { 1855 if (count + J->tailcalled > J->param[JIT_P_recunroll]) {
1422 J->pc++; 1856 J->pc++;
1423 if (J->framedepth + J->retdepth == 0) 1857 if (J->framedepth + J->retdepth == 0)
1424 rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-recursion. */ 1858 lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-rec. */
1425 else 1859 else
1426 rec_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */ 1860 lj_record_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */
1427 } 1861 }
1428 } else { 1862 } else {
1429 if (count > J->param[JIT_P_callunroll]) { 1863 if (count > J->param[JIT_P_callunroll]) {
1430 if (lnk) { /* Possible tail- or up-recursion. */ 1864 if (lnk) { /* Possible tail- or up-recursion. */
1431 lj_trace_flush(J, lnk); /* Flush trace that only returns. */ 1865 lj_trace_flush(J, lnk); /* Flush trace that only returns. */
1432 /* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */ 1866 /* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */
1433 hotcount_set(J2GG(J), J->pc+1, LJ_PRNG_BITS(J, 4)); 1867 hotcount_set(J2GG(J), J->pc+1, lj_prng_u64(&J2G(J)->prng) & 15u);
1434 } 1868 }
1435 lj_trace_err(J, LJ_TRERR_CUNROLL); 1869 lj_trace_err(J, LJ_TRERR_CUNROLL);
1436 } 1870 }
@@ -1457,11 +1891,14 @@ static void rec_func_setup(jit_State *J)
1457static void rec_func_vararg(jit_State *J) 1891static void rec_func_vararg(jit_State *J)
1458{ 1892{
1459 GCproto *pt = J->pt; 1893 GCproto *pt = J->pt;
1460 BCReg s, fixargs, vframe = J->maxslot+1; 1894 BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2;
1461 lua_assert((pt->flags & PROTO_VARARG)); 1895 lj_assertJ((pt->flags & PROTO_VARARG), "FUNCV in non-vararg function");
1462 if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) 1896 if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS)
1463 lj_trace_err(J, LJ_TRERR_STACKOV); 1897 lj_trace_err(J, LJ_TRERR_STACKOV);
1464 J->base[vframe-1] = J->base[-1]; /* Copy function up. */ 1898 J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */
1899#if LJ_FR2
1900 J->base[vframe-1] = TREF_FRAME;
1901#endif
1465 /* Copy fixarg slots up and set their original slots to nil. */ 1902 /* Copy fixarg slots up and set their original slots to nil. */
1466 fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; 1903 fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot;
1467 for (s = 0; s < fixargs; s++) { 1904 for (s = 0; s < fixargs; s++) {
@@ -1497,9 +1934,9 @@ static void rec_func_jit(jit_State *J, TraceNo lnk)
1497 } 1934 }
1498 J->instunroll = 0; /* Cannot continue across a compiled function. */ 1935 J->instunroll = 0; /* Cannot continue across a compiled function. */
1499 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) 1936 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0)
1500 rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-recursion. */ 1937 lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-rec. */
1501 else 1938 else
1502 rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */ 1939 lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */
1503} 1940}
1504 1941
1505/* -- Vararg handling ----------------------------------------------------- */ 1942/* -- Vararg handling ----------------------------------------------------- */
@@ -1523,8 +1960,10 @@ static int select_detect(jit_State *J)
1523static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) 1960static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
1524{ 1961{
1525 int32_t numparams = J->pt->numparams; 1962 int32_t numparams = J->pt->numparams;
1526 ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1; 1963 ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2;
1527 lua_assert(frame_isvarg(J->L->base-1)); 1964 lj_assertJ(frame_isvarg(J->L->base-1), "VARG in non-vararg frame");
1965 if (LJ_FR2 && dst > J->maxslot)
1966 J->base[dst-1] = 0; /* Prevent resurrection of unrelated slot. */
1528 if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ 1967 if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */
1529 ptrdiff_t i; 1968 ptrdiff_t i;
1530 if (nvararg < 0) nvararg = 0; 1969 if (nvararg < 0) nvararg = 0;
@@ -1537,10 +1976,10 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
1537 if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) 1976 if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS)
1538 lj_trace_err(J, LJ_TRERR_STACKOV); 1977 lj_trace_err(J, LJ_TRERR_STACKOV);
1539 for (i = 0; i < nresults; i++) 1978 for (i = 0; i < nresults; i++)
1540 J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1) : TREF_NIL; 1979 J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL;
1541 } else { /* Unknown number of varargs passed to trace. */ 1980 } else { /* Unknown number of varargs passed to trace. */
1542 TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME); 1981 TRef fr = emitir(IRTI(IR_SLOAD), LJ_FR2, IRSLOAD_READONLY|IRSLOAD_FRAME);
1543 int32_t frofs = 8*(1+numparams)+FRAME_VARG; 1982 int32_t frofs = 8*(1+LJ_FR2+numparams)+FRAME_VARG;
1544 if (nresults >= 0) { /* Known fixed number of results. */ 1983 if (nresults >= 0) { /* Known fixed number of results. */
1545 ptrdiff_t i; 1984 ptrdiff_t i;
1546 if (nvararg > 0) { 1985 if (nvararg > 0) {
@@ -1549,16 +1988,14 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
1549 if (nvararg >= nresults) 1988 if (nvararg >= nresults)
1550 emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults)); 1989 emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults));
1551 else 1990 else
1552 emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, frame_ftsz(J->L->base-1))); 1991 emitir(IRTGI(IR_EQ), fr,
1553 vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); 1992 lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1)));
1554 vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); 1993 vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
1994 vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase,
1995 lj_ir_kintpgc(J, frofs-8*(1+LJ_FR2)));
1555 for (i = 0; i < nload; i++) { 1996 for (i = 0; i < nload; i++) {
1556 IRType t = itype2irt(&J->L->base[i-1-nvararg]); 1997 IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]);
1557 TRef aref = emitir(IRT(IR_AREF, IRT_P32), 1998 J->base[dst+i] = lj_record_vload(J, vbase, (MSize)i, t);
1558 vbase, lj_ir_kint(J, (int32_t)i));
1559 TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
1560 if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
1561 J->base[dst+i] = tr;
1562 } 1999 }
1563 } else { 2000 } else {
1564 emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs)); 2001 emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs));
@@ -1604,15 +2041,15 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
1604 } 2041 }
1605 if (idx != 0 && idx <= nvararg) { 2042 if (idx != 0 && idx <= nvararg) {
1606 IRType t; 2043 IRType t;
1607 TRef aref, vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); 2044 TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
1608 vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); 2045 vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase,
1609 t = itype2irt(&J->L->base[idx-2-nvararg]); 2046 lj_ir_kintpgc(J, frofs-(8<<LJ_FR2)));
1610 aref = emitir(IRT(IR_AREF, IRT_P32), vbase, tridx); 2047 t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]);
1611 tr = emitir(IRTG(IR_VLOAD, t), aref, 0); 2048 aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx);
1612 if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ 2049 tr = lj_record_vload(J, aref, 0, t);
1613 } 2050 }
1614 J->base[dst-2] = tr; 2051 J->base[dst-2-LJ_FR2] = tr;
1615 J->maxslot = dst-1; 2052 J->maxslot = dst-1-LJ_FR2;
1616 J->bcskip = 2; /* Skip CALLM + select. */ 2053 J->bcskip = 2; /* Skip CALLM + select. */
1617 } else { 2054 } else {
1618 nyivarg: 2055 nyivarg:
@@ -1628,8 +2065,81 @@ static TRef rec_tnew(jit_State *J, uint32_t ah)
1628{ 2065{
1629 uint32_t asize = ah & 0x7ff; 2066 uint32_t asize = ah & 0x7ff;
1630 uint32_t hbits = ah >> 11; 2067 uint32_t hbits = ah >> 11;
2068 TRef tr;
1631 if (asize == 0x7ff) asize = 0x801; 2069 if (asize == 0x7ff) asize = 0x801;
1632 return emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits); 2070 tr = emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits);
2071#ifdef LUAJIT_ENABLE_TABLE_BUMP
2072 J->rbchash[(tr & (RBCHASH_SLOTS-1))].ref = tref_ref(tr);
2073 setmref(J->rbchash[(tr & (RBCHASH_SLOTS-1))].pc, J->pc);
2074 setgcref(J->rbchash[(tr & (RBCHASH_SLOTS-1))].pt, obj2gco(J->pt));
2075#endif
2076 return tr;
2077}
2078
2079/* -- Concatenation ------------------------------------------------------- */
2080
2081typedef struct RecCatDataCP {
2082 jit_State *J;
2083 RecordIndex *ix;
2084} RecCatDataCP;
2085
2086static TValue *rec_mm_concat_cp(lua_State *L, lua_CFunction dummy, void *ud)
2087{
2088 RecCatDataCP *rcd = (RecCatDataCP *)ud;
2089 UNUSED(L); UNUSED(dummy);
2090 rec_mm_arith(rcd->J, rcd->ix, MM_concat); /* Call __concat metamethod. */
2091 return NULL;
2092}
2093
2094static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot)
2095{
2096 TRef *top = &J->base[topslot];
2097 TValue savetv[5+LJ_FR2];
2098 BCReg s;
2099 RecordIndex ix;
2100 RecCatDataCP rcd;
2101 int errcode;
2102 lj_assertJ(baseslot < topslot, "bad CAT arg");
2103 for (s = baseslot; s <= topslot; s++)
2104 (void)getslot(J, s); /* Ensure all arguments have a reference. */
2105 if (tref_isnumber_str(top[0]) && tref_isnumber_str(top[-1])) {
2106 TRef tr, hdr, *trp, *xbase, *base = &J->base[baseslot];
2107 /* First convert numbers to strings. */
2108 for (trp = top; trp >= base; trp--) {
2109 if (tref_isnumber(*trp))
2110 *trp = emitir(IRT(IR_TOSTR, IRT_STR), *trp,
2111 tref_isnum(*trp) ? IRTOSTR_NUM : IRTOSTR_INT);
2112 else if (!tref_isstr(*trp))
2113 break;
2114 }
2115 xbase = ++trp;
2116 tr = hdr = emitir(IRT(IR_BUFHDR, IRT_PGC),
2117 lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
2118 do {
2119 tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, *trp++);
2120 } while (trp <= top);
2121 tr = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
2122 J->maxslot = (BCReg)(xbase - J->base);
2123 if (xbase == base) return tr; /* Return simple concatenation result. */
2124 /* Pass partial result. */
2125 topslot = J->maxslot--;
2126 *xbase = tr;
2127 top = xbase;
2128 setstrV(J->L, &ix.keyv, &J2G(J)->strempty); /* Simulate string result. */
2129 } else {
2130 J->maxslot = topslot-1;
2131 copyTV(J->L, &ix.keyv, &J->L->base[topslot]);
2132 }
2133 copyTV(J->L, &ix.tabv, &J->L->base[topslot-1]);
2134 ix.tab = top[-1];
2135 ix.key = top[0];
2136 memcpy(savetv, &J->L->base[topslot-1], sizeof(savetv)); /* Save slots. */
2137 rcd.J = J;
2138 rcd.ix = &ix;
2139 errcode = lj_vm_cpcall(J->L, NULL, &rcd, rec_mm_concat_cp);
2140 memcpy(&J->L->base[topslot-1], savetv, sizeof(savetv)); /* Restore slots. */
2141 if (errcode) return (TRef)(-errcode);
2142 return 0; /* No result yet. */
1633} 2143}
1634 2144
1635/* -- Record bytecode ops ------------------------------------------------- */ 2145/* -- Record bytecode ops ------------------------------------------------- */
@@ -1650,7 +2160,15 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond)
1650 const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); 2160 const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0);
1651 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; 2161 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
1652 /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ 2162 /* Set PC to opposite target to avoid re-recording the comp. in side trace. */
2163#if LJ_FR2
2164 SnapEntry *flink = &J->cur.snapmap[snap->mapofs + snap->nent];
2165 uint64_t pcbase;
2166 memcpy(&pcbase, flink, sizeof(uint64_t));
2167 pcbase = (pcbase & 0xff) | (u64ptr(npc) << 8);
2168 memcpy(flink, &pcbase, sizeof(uint64_t));
2169#else
1653 J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); 2170 J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc);
2171#endif
1654 J->needsnap = 1; 2172 J->needsnap = 1;
1655 if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); 2173 if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins);
1656 lj_snap_shrink(J); /* Shrink last snapshot if possible. */ 2174 lj_snap_shrink(J); /* Shrink last snapshot if possible. */
@@ -1670,7 +2188,7 @@ void lj_record_ins(jit_State *J)
1670 if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) { 2188 if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) {
1671 switch (J->postproc) { 2189 switch (J->postproc) {
1672 case LJ_POST_FIXCOMP: /* Fixup comparison. */ 2190 case LJ_POST_FIXCOMP: /* Fixup comparison. */
1673 pc = frame_pc(&J2G(J)->tmptv); 2191 pc = (const BCIns *)(uintptr_t)J2G(J)->tmptv.u64;
1674 rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1))); 2192 rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1)));
1675 /* fallthrough */ 2193 /* fallthrough */
1676 case LJ_POST_FIXGUARD: /* Fixup and emit pending guard. */ 2194 case LJ_POST_FIXGUARD: /* Fixup and emit pending guard. */
@@ -1708,7 +2226,7 @@ void lj_record_ins(jit_State *J)
1708 if (bc_op(*J->pc) >= BC__MAX) 2226 if (bc_op(*J->pc) >= BC__MAX)
1709 return; 2227 return;
1710 break; 2228 break;
1711 default: lua_assert(0); break; 2229 default: lj_assertJ(0, "bad post-processing mode"); break;
1712 } 2230 }
1713 J->postproc = LJ_POST_NONE; 2231 J->postproc = LJ_POST_NONE;
1714 } 2232 }
@@ -1716,7 +2234,7 @@ void lj_record_ins(jit_State *J)
1716 /* Need snapshot before recording next bytecode (e.g. after a store). */ 2234 /* Need snapshot before recording next bytecode (e.g. after a store). */
1717 if (J->needsnap) { 2235 if (J->needsnap) {
1718 J->needsnap = 0; 2236 J->needsnap = 0;
1719 lj_snap_purge(J); 2237 if (J->pt) lj_snap_purge(J);
1720 lj_snap_add(J); 2238 lj_snap_add(J);
1721 J->mergesnap = 1; 2239 J->mergesnap = 1;
1722 } 2240 }
@@ -1738,6 +2256,10 @@ void lj_record_ins(jit_State *J)
1738 rec_check_ir(J); 2256 rec_check_ir(J);
1739#endif 2257#endif
1740 2258
2259#if LJ_HASPROFILE
2260 rec_profile_ins(J, pc);
2261#endif
2262
1741 /* Keep a copy of the runtime values of var/num/str operands. */ 2263 /* Keep a copy of the runtime values of var/num/str operands. */
1742#define rav (&ix.valv) 2264#define rav (&ix.valv)
1743#define rbv (&ix.tabv) 2265#define rbv (&ix.tabv)
@@ -1764,9 +2286,10 @@ void lj_record_ins(jit_State *J)
1764 switch (bcmode_c(op)) { 2286 switch (bcmode_c(op)) {
1765 case BCMvar: 2287 case BCMvar:
1766 copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; 2288 copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break;
1767 case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; 2289 case BCMpri: setpriV(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break;
1768 case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc); 2290 case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc);
1769 copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) : 2291 copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) :
2292 tv->u32.hi == LJ_KEYINDEX ? (lj_ir_kint(J, 0) | TREF_KEYINDEX) :
1770 lj_ir_knumint(J, numV(tv)); } break; 2293 lj_ir_knumint(J, numV(tv)); } break;
1771 case BCMstr: { GCstr *s = gco2str(proto_kgc(J->pt, ~(ptrdiff_t)rc)); 2294 case BCMstr: { GCstr *s = gco2str(proto_kgc(J->pt, ~(ptrdiff_t)rc));
1772 setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break; 2295 setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break;
@@ -1859,6 +2382,18 @@ void lj_record_ins(jit_State *J)
1859 J->maxslot = bc_a(pc[1]); /* Shrink used slots. */ 2382 J->maxslot = bc_a(pc[1]); /* Shrink used slots. */
1860 break; 2383 break;
1861 2384
2385 case BC_ISTYPE: case BC_ISNUM:
2386 /* These coercions need to correspond with lj_meta_istype(). */
2387 if (LJ_DUALNUM && rc == ~LJ_TNUMX+1)
2388 ra = lj_opt_narrow_toint(J, ra);
2389 else if (rc == ~LJ_TNUMX+2)
2390 ra = lj_ir_tonum(J, ra);
2391 else if (rc == ~LJ_TSTR+1)
2392 ra = lj_ir_tostr(J, ra);
2393 /* else: type specialization suffices. */
2394 J->base[bc_a(ins)] = ra;
2395 break;
2396
1862 /* -- Unary ops --------------------------------------------------------- */ 2397 /* -- Unary ops --------------------------------------------------------- */
1863 2398
1864 case BC_NOT: 2399 case BC_NOT:
@@ -1870,7 +2405,7 @@ void lj_record_ins(jit_State *J)
1870 if (tref_isstr(rc)) 2405 if (tref_isstr(rc))
1871 rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); 2406 rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN);
1872 else if (!LJ_52 && tref_istab(rc)) 2407 else if (!LJ_52 && tref_istab(rc))
1873 rc = lj_ir_call(J, IRCALL_lj_tab_len, rc); 2408 rc = emitir(IRTI(IR_ALEN), rc, TREF_NIL);
1874 else 2409 else
1875 rc = rec_mm_len(J, rc, rcv); 2410 rc = rec_mm_len(J, rc, rcv);
1876 break; 2411 break;
@@ -1917,16 +2452,30 @@ void lj_record_ins(jit_State *J)
1917 2452
1918 case BC_POW: 2453 case BC_POW:
1919 if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) 2454 if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
1920 rc = lj_opt_narrow_pow(J, rb, rc, rbv, rcv); 2455 rc = lj_opt_narrow_arith(J, rb, rc, rbv, rcv, IR_POW);
1921 else 2456 else
1922 rc = rec_mm_arith(J, &ix, MM_pow); 2457 rc = rec_mm_arith(J, &ix, MM_pow);
1923 break; 2458 break;
1924 2459
2460 /* -- Miscellaneous ops ------------------------------------------------- */
2461
2462 case BC_CAT:
2463 rc = rec_cat(J, rb, rc);
2464 if (rc >= 0xffffff00)
2465 lj_err_throw(J->L, -(int32_t)rc); /* Propagate errors. */
2466 break;
2467
1925 /* -- Constant and move ops --------------------------------------------- */ 2468 /* -- Constant and move ops --------------------------------------------- */
1926 2469
1927 case BC_MOV: 2470 case BC_MOV:
1928 /* Clear gap of method call to avoid resurrecting previous refs. */ 2471 /* Clear gap of method call to avoid resurrecting previous refs. */
1929 if (ra > J->maxslot) J->base[ra-1] = 0; 2472 if (ra > J->maxslot) {
2473#if LJ_FR2
2474 memset(J->base + J->maxslot, 0, (ra - J->maxslot) * sizeof(TRef));
2475#else
2476 J->base[ra-1] = 0;
2477#endif
2478 }
1930 break; 2479 break;
1931 case BC_KSTR: case BC_KNUM: case BC_KPRI: 2480 case BC_KSTR: case BC_KNUM: case BC_KPRI:
1932 break; 2481 break;
@@ -1934,6 +2483,8 @@ void lj_record_ins(jit_State *J)
1934 rc = lj_ir_kint(J, (int32_t)(int16_t)rc); 2483 rc = lj_ir_kint(J, (int32_t)(int16_t)rc);
1935 break; 2484 break;
1936 case BC_KNIL: 2485 case BC_KNIL:
2486 if (LJ_FR2 && ra > J->maxslot)
2487 J->base[ra-1] = 0;
1937 while (ra <= rc) 2488 while (ra <= rc)
1938 J->base[ra++] = TREF_NIL; 2489 J->base[ra++] = TREF_NIL;
1939 if (rc >= J->maxslot) J->maxslot = rc+1; 2490 if (rc >= J->maxslot) J->maxslot = rc+1;
@@ -1970,6 +2521,15 @@ void lj_record_ins(jit_State *J)
1970 ix.idxchain = LJ_MAX_IDXCHAIN; 2521 ix.idxchain = LJ_MAX_IDXCHAIN;
1971 rc = lj_record_idx(J, &ix); 2522 rc = lj_record_idx(J, &ix);
1972 break; 2523 break;
2524 case BC_TGETR: case BC_TSETR:
2525 ix.idxchain = 0;
2526 rc = lj_record_idx(J, &ix);
2527 break;
2528
2529 case BC_TSETM:
2530 rec_tsetm(J, ra, (BCReg)(J->L->top - J->L->base), (int32_t)rcv->u32.lo);
2531 J->maxslot = ra; /* The table slot at ra-1 is the highest used slot. */
2532 break;
1973 2533
1974 case BC_TNEW: 2534 case BC_TNEW:
1975 rc = rec_tnew(J, rc); 2535 rc = rec_tnew(J, rc);
@@ -1977,33 +2537,38 @@ void lj_record_ins(jit_State *J)
1977 case BC_TDUP: 2537 case BC_TDUP:
1978 rc = emitir(IRTG(IR_TDUP, IRT_TAB), 2538 rc = emitir(IRTG(IR_TDUP, IRT_TAB),
1979 lj_ir_ktab(J, gco2tab(proto_kgc(J->pt, ~(ptrdiff_t)rc))), 0); 2539 lj_ir_ktab(J, gco2tab(proto_kgc(J->pt, ~(ptrdiff_t)rc))), 0);
2540#ifdef LUAJIT_ENABLE_TABLE_BUMP
2541 J->rbchash[(rc & (RBCHASH_SLOTS-1))].ref = tref_ref(rc);
2542 setmref(J->rbchash[(rc & (RBCHASH_SLOTS-1))].pc, pc);
2543 setgcref(J->rbchash[(rc & (RBCHASH_SLOTS-1))].pt, obj2gco(J->pt));
2544#endif
1980 break; 2545 break;
1981 2546
1982 /* -- Calls and vararg handling ----------------------------------------- */ 2547 /* -- Calls and vararg handling ----------------------------------------- */
1983 2548
1984 case BC_ITERC: 2549 case BC_ITERC:
1985 J->base[ra] = getslot(J, ra-3); 2550 J->base[ra] = getslot(J, ra-3);
1986 J->base[ra+1] = getslot(J, ra-2); 2551 J->base[ra+1+LJ_FR2] = getslot(J, ra-2);
1987 J->base[ra+2] = getslot(J, ra-1); 2552 J->base[ra+2+LJ_FR2] = getslot(J, ra-1);
1988 { /* Do the actual copy now because lj_record_call needs the values. */ 2553 { /* Do the actual copy now because lj_record_call needs the values. */
1989 TValue *b = &J->L->base[ra]; 2554 TValue *b = &J->L->base[ra];
1990 copyTV(J->L, b, b-3); 2555 copyTV(J->L, b, b-3);
1991 copyTV(J->L, b+1, b-2); 2556 copyTV(J->L, b+1+LJ_FR2, b-2);
1992 copyTV(J->L, b+2, b-1); 2557 copyTV(J->L, b+2+LJ_FR2, b-1);
1993 } 2558 }
1994 lj_record_call(J, ra, (ptrdiff_t)rc-1); 2559 lj_record_call(J, ra, (ptrdiff_t)rc-1);
1995 break; 2560 break;
1996 2561
1997 /* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */ 2562 /* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */
1998 case BC_CALLM: 2563 case BC_CALLM:
1999 rc = (BCReg)(J->L->top - J->L->base) - ra; 2564 rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2;
2000 /* fallthrough */ 2565 /* fallthrough */
2001 case BC_CALL: 2566 case BC_CALL:
2002 lj_record_call(J, ra, (ptrdiff_t)rc-1); 2567 lj_record_call(J, ra, (ptrdiff_t)rc-1);
2003 break; 2568 break;
2004 2569
2005 case BC_CALLMT: 2570 case BC_CALLMT:
2006 rc = (BCReg)(J->L->top - J->L->base) - ra; 2571 rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2;
2007 /* fallthrough */ 2572 /* fallthrough */
2008 case BC_CALLT: 2573 case BC_CALLT:
2009 lj_record_tailcall(J, ra, (ptrdiff_t)rc-1); 2574 lj_record_tailcall(J, ra, (ptrdiff_t)rc-1);
@@ -2020,6 +2585,9 @@ void lj_record_ins(jit_State *J)
2020 rc = (BCReg)(J->L->top - J->L->base) - ra + 1; 2585 rc = (BCReg)(J->L->top - J->L->base) - ra + 1;
2021 /* fallthrough */ 2586 /* fallthrough */
2022 case BC_RET: case BC_RET0: case BC_RET1: 2587 case BC_RET: case BC_RET0: case BC_RET1:
2588#if LJ_HASPROFILE
2589 rec_profile_ret(J);
2590#endif
2023 lj_record_ret(J, ra, (ptrdiff_t)rc-1); 2591 lj_record_ret(J, ra, (ptrdiff_t)rc-1);
2024 break; 2592 break;
2025 2593
@@ -2030,9 +2598,10 @@ void lj_record_ins(jit_State *J)
2030 J->loopref = J->cur.nins; 2598 J->loopref = J->cur.nins;
2031 break; 2599 break;
2032 case BC_JFORI: 2600 case BC_JFORI:
2033 lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL); 2601 lj_assertJ(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL,
2602 "JFORI does not point to JFORL");
2034 if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ 2603 if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */
2035 rec_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); 2604 lj_record_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J]));
2036 /* Continue tracing if the loop is not entered. */ 2605 /* Continue tracing if the loop is not entered. */
2037 break; 2606 break;
2038 2607
@@ -2042,6 +2611,9 @@ void lj_record_ins(jit_State *J)
2042 case BC_ITERL: 2611 case BC_ITERL:
2043 rec_loop_interp(J, pc, rec_iterl(J, *pc)); 2612 rec_loop_interp(J, pc, rec_iterl(J, *pc));
2044 break; 2613 break;
2614 case BC_ITERN:
2615 rec_loop_interp(J, pc, rec_itern(J, ra, rb));
2616 break;
2045 case BC_LOOP: 2617 case BC_LOOP:
2046 rec_loop_interp(J, pc, rec_loop(J, ra, 1)); 2618 rec_loop_interp(J, pc, rec_loop(J, ra, 1));
2047 break; 2619 break;
@@ -2054,7 +2626,8 @@ void lj_record_ins(jit_State *J)
2054 break; 2626 break;
2055 case BC_JLOOP: 2627 case BC_JLOOP:
2056 rec_loop_jit(J, rc, rec_loop(J, ra, 2628 rec_loop_jit(J, rc, rec_loop(J, ra,
2057 !bc_isret(bc_op(traceref(J, rc)->startins)))); 2629 !bc_isret(bc_op(traceref(J, rc)->startins)) &&
2630 bc_op(traceref(J, rc)->startins) != BC_ITERN));
2058 break; 2631 break;
2059 2632
2060 case BC_IFORL: 2633 case BC_IFORL:
@@ -2070,6 +2643,10 @@ void lj_record_ins(jit_State *J)
2070 J->maxslot = ra; /* Shrink used slots. */ 2643 J->maxslot = ra; /* Shrink used slots. */
2071 break; 2644 break;
2072 2645
2646 case BC_ISNEXT:
2647 rec_isnext(J, ra);
2648 break;
2649
2073 /* -- Function headers -------------------------------------------------- */ 2650 /* -- Function headers -------------------------------------------------- */
2074 2651
2075 case BC_FUNCF: 2652 case BC_FUNCF:
@@ -2084,7 +2661,8 @@ void lj_record_ins(jit_State *J)
2084 rec_func_lua(J); 2661 rec_func_lua(J);
2085 break; 2662 break;
2086 case BC_JFUNCV: 2663 case BC_JFUNCV:
2087 lua_assert(0); /* Cannot happen. No hotcall counting for varag funcs. */ 2664 /* Cannot happen. No hotcall counting for varag funcs. */
2665 lj_assertJ(0, "unsupported vararg hotcall");
2088 break; 2666 break;
2089 2667
2090 case BC_FUNCC: 2668 case BC_FUNCC:
@@ -2098,12 +2676,8 @@ void lj_record_ins(jit_State *J)
2098 break; 2676 break;
2099 } 2677 }
2100 /* fallthrough */ 2678 /* fallthrough */
2101 case BC_ITERN:
2102 case BC_ISNEXT:
2103 case BC_CAT:
2104 case BC_UCLO: 2679 case BC_UCLO:
2105 case BC_FNEW: 2680 case BC_FNEW:
2106 case BC_TSETM:
2107 setintV(&J->errinfo, (int32_t)op); 2681 setintV(&J->errinfo, (int32_t)op);
2108 lj_trace_err_info(J, LJ_TRERR_NYIBC); 2682 lj_trace_err_info(J, LJ_TRERR_NYIBC);
2109 break; 2683 break;
@@ -2112,15 +2686,21 @@ void lj_record_ins(jit_State *J)
2112 /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ 2686 /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */
2113 if (bcmode_a(op) == BCMdst && rc) { 2687 if (bcmode_a(op) == BCMdst && rc) {
2114 J->base[ra] = rc; 2688 J->base[ra] = rc;
2115 if (ra >= J->maxslot) J->maxslot = ra+1; 2689 if (ra >= J->maxslot) {
2690#if LJ_FR2
2691 if (ra > J->maxslot) J->base[ra-1] = 0;
2692#endif
2693 J->maxslot = ra+1;
2694 }
2116 } 2695 }
2117 2696
2118#undef rav 2697#undef rav
2119#undef rbv 2698#undef rbv
2120#undef rcv 2699#undef rcv
2121 2700
2122 /* Limit the number of recorded IR instructions. */ 2701 /* Limit the number of recorded IR instructions and constants. */
2123 if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord]) 2702 if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord] ||
2703 J->cur.nk < REF_BIAS-(IRRef)J->param[JIT_P_maxirconst])
2124 lj_trace_err(J, LJ_TRERR_TRACEOV); 2704 lj_trace_err(J, LJ_TRERR_TRACEOV);
2125} 2705}
2126 2706
@@ -2140,13 +2720,22 @@ static const BCIns *rec_setup_root(jit_State *J)
2140 J->bc_min = pc; 2720 J->bc_min = pc;
2141 break; 2721 break;
2142 case BC_ITERL: 2722 case BC_ITERL:
2143 lua_assert(bc_op(pc[-1]) == BC_ITERC); 2723 if (bc_op(pc[-1]) == BC_JLOOP)
2724 lj_trace_err(J, LJ_TRERR_LINNER);
2725 lj_assertJ(bc_op(pc[-1]) == BC_ITERC, "no ITERC before ITERL");
2144 J->maxslot = ra + bc_b(pc[-1]) - 1; 2726 J->maxslot = ra + bc_b(pc[-1]) - 1;
2145 J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); 2727 J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns);
2146 pc += 1+bc_j(ins); 2728 pc += 1+bc_j(ins);
2147 lua_assert(bc_op(pc[-1]) == BC_JMP); 2729 lj_assertJ(bc_op(pc[-1]) == BC_JMP, "ITERL does not point to JMP+1");
2148 J->bc_min = pc; 2730 J->bc_min = pc;
2149 break; 2731 break;
2732 case BC_ITERN:
2733 lj_assertJ(bc_op(pc[1]) == BC_ITERL, "no ITERL after ITERN");
2734 J->maxslot = ra;
2735 J->bc_extent = (MSize)(-bc_j(pc[1]))*sizeof(BCIns);
2736 J->bc_min = pc+2 + bc_j(pc[1]);
2737 J->state = LJ_TRACE_RECORD_1ST; /* Record the first ITERN, too. */
2738 break;
2150 case BC_LOOP: 2739 case BC_LOOP:
2151 /* Only check BC range for real loops, but not for "repeat until true". */ 2740 /* Only check BC range for real loops, but not for "repeat until true". */
2152 pcj = pc + bc_j(ins); 2741 pcj = pc + bc_j(ins);
@@ -2169,8 +2758,14 @@ static const BCIns *rec_setup_root(jit_State *J)
2169 J->maxslot = J->pt->numparams; 2758 J->maxslot = J->pt->numparams;
2170 pc++; 2759 pc++;
2171 break; 2760 break;
2761 case BC_CALLM:
2762 case BC_CALL:
2763 case BC_ITERC:
2764 /* No bytecode range check for stitched traces. */
2765 pc++;
2766 break;
2172 default: 2767 default:
2173 lua_assert(0); 2768 lj_assertJ(0, "bad root trace start bytecode %d", bc_op(ins));
2174 break; 2769 break;
2175 } 2770 }
2176 return pc; 2771 return pc;
@@ -2184,11 +2779,14 @@ void lj_record_setup(jit_State *J)
2184 /* Initialize state related to current trace. */ 2779 /* Initialize state related to current trace. */
2185 memset(J->slot, 0, sizeof(J->slot)); 2780 memset(J->slot, 0, sizeof(J->slot));
2186 memset(J->chain, 0, sizeof(J->chain)); 2781 memset(J->chain, 0, sizeof(J->chain));
2782#ifdef LUAJIT_ENABLE_TABLE_BUMP
2783 memset(J->rbchash, 0, sizeof(J->rbchash));
2784#endif
2187 memset(J->bpropcache, 0, sizeof(J->bpropcache)); 2785 memset(J->bpropcache, 0, sizeof(J->bpropcache));
2188 J->scev.idx = REF_NIL; 2786 J->scev.idx = REF_NIL;
2189 setmref(J->scev.pc, NULL); 2787 setmref(J->scev.pc, NULL);
2190 2788
2191 J->baseslot = 1; /* Invoking function is at base[-1]. */ 2789 J->baseslot = 1+LJ_FR2; /* Invoking function is at base[-1-LJ_FR2]. */
2192 J->base = J->slot + J->baseslot; 2790 J->base = J->slot + J->baseslot;
2193 J->maxslot = 0; 2791 J->maxslot = 0;
2194 J->framedepth = 0; 2792 J->framedepth = 0;
@@ -2203,7 +2801,7 @@ void lj_record_setup(jit_State *J)
2203 J->bc_extent = ~(MSize)0; 2801 J->bc_extent = ~(MSize)0;
2204 2802
2205 /* Emit instructions for fixed references. Also triggers initial IR alloc. */ 2803 /* Emit instructions for fixed references. Also triggers initial IR alloc. */
2206 emitir_raw(IRT(IR_BASE, IRT_P32), J->parent, J->exitno); 2804 emitir_raw(IRT(IR_BASE, IRT_PGC), J->parent, J->exitno);
2207 for (i = 0; i <= 2; i++) { 2805 for (i = 0; i <= 2; i++) {
2208 IRIns *ir = IR(REF_NIL-i); 2806 IRIns *ir = IR(REF_NIL-i);
2209 ir->i = 0; 2807 ir->i = 0;
@@ -2234,10 +2832,15 @@ void lj_record_setup(jit_State *J)
2234 } 2832 }
2235 lj_snap_replay(J, T); 2833 lj_snap_replay(J, T);
2236 sidecheck: 2834 sidecheck:
2237 if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || 2835 if ((traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] ||
2238 T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + 2836 T->snap[J->exitno].count >= J->param[JIT_P_hotexit] +
2239 J->param[JIT_P_tryside]) { 2837 J->param[JIT_P_tryside])) {
2240 rec_stop(J, LJ_TRLINK_INTERP, 0); 2838 if (bc_op(*J->pc) == BC_JLOOP) {
2839 BCIns startins = traceref(J, bc_d(*J->pc))->startins;
2840 if (bc_op(startins) == BC_ITERN)
2841 rec_itern(J, bc_a(startins), bc_b(startins));
2842 }
2843 lj_record_stop(J, LJ_TRLINK_INTERP, 0);
2241 } 2844 }
2242 } else { /* Root trace. */ 2845 } else { /* Root trace. */
2243 J->cur.root = 0; 2846 J->cur.root = 0;
@@ -2245,13 +2848,20 @@ void lj_record_setup(jit_State *J)
2245 J->pc = rec_setup_root(J); 2848 J->pc = rec_setup_root(J);
2246 /* Note: the loop instruction itself is recorded at the end and not 2849 /* Note: the loop instruction itself is recorded at the end and not
2247 ** at the start! So snapshot #0 needs to point to the *next* instruction. 2850 ** at the start! So snapshot #0 needs to point to the *next* instruction.
2851 ** The one exception is BC_ITERN, which sets LJ_TRACE_RECORD_1ST.
2248 */ 2852 */
2249 lj_snap_add(J); 2853 lj_snap_add(J);
2250 if (bc_op(J->cur.startins) == BC_FORL) 2854 if (bc_op(J->cur.startins) == BC_FORL)
2251 rec_for_loop(J, J->pc-1, &J->scev, 1); 2855 rec_for_loop(J, J->pc-1, &J->scev, 1);
2856 else if (bc_op(J->cur.startins) == BC_ITERC)
2857 J->startpc = NULL;
2252 if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) 2858 if (1 + J->pt->framesize >= LJ_MAX_JSLOTS)
2253 lj_trace_err(J, LJ_TRERR_STACKOV); 2859 lj_trace_err(J, LJ_TRERR_STACKOV);
2254 } 2860 }
2861#if LJ_HASPROFILE
2862 J->prev_pt = NULL;
2863 J->prev_line = -1;
2864#endif
2255#ifdef LUAJIT_ENABLE_CHECKHOOK 2865#ifdef LUAJIT_ENABLE_CHECKHOOK
2256 /* Regularly check for instruction/line hooks from compiled code and 2866 /* Regularly check for instruction/line hooks from compiled code and
2257 ** exit to the interpreter if the hooks are set. 2867 ** exit to the interpreter if the hooks are set.
diff --git a/src/lj_record.h b/src/lj_record.h
index 08b7ba20..61ccb395 100644
--- a/src/lj_record.h
+++ b/src/lj_record.h
@@ -28,7 +28,9 @@ typedef struct RecordIndex {
28 28
29LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b, 29LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b,
30 cTValue *av, cTValue *bv); 30 cTValue *av, cTValue *bv);
31LJ_FUNC void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk);
31LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o); 32LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o);
33LJ_FUNC TRef lj_record_vload(jit_State *J, TRef ref, MSize idx, IRType t);
32 34
33LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs); 35LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs);
34LJ_FUNC void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs); 36LJ_FUNC void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs);
@@ -36,6 +38,7 @@ LJ_FUNC void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults);
36 38
37LJ_FUNC int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm); 39LJ_FUNC int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm);
38LJ_FUNC TRef lj_record_idx(jit_State *J, RecordIndex *ix); 40LJ_FUNC TRef lj_record_idx(jit_State *J, RecordIndex *ix);
41LJ_FUNC int lj_record_next(jit_State *J, RecordIndex *ix);
39 42
40LJ_FUNC void lj_record_ins(jit_State *J); 43LJ_FUNC void lj_record_ins(jit_State *J);
41LJ_FUNC void lj_record_setup(jit_State *J); 44LJ_FUNC void lj_record_setup(jit_State *J);
diff --git a/src/lj_serialize.c b/src/lj_serialize.c
new file mode 100644
index 00000000..83881766
--- /dev/null
+++ b/src/lj_serialize.c
@@ -0,0 +1,539 @@
1/*
2** Object de/serialization.
3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_serialize_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASBUFFER
12#include "lj_err.h"
13#include "lj_buf.h"
14#include "lj_str.h"
15#include "lj_tab.h"
16#include "lj_udata.h"
17#if LJ_HASFFI
18#include "lj_ctype.h"
19#include "lj_cdata.h"
20#endif
21#if LJ_HASJIT
22#include "lj_ir.h"
23#endif
24#include "lj_serialize.h"
25
26/* Tags for internal serialization format. */
27enum {
28 SER_TAG_NIL, /* 0x00 */
29 SER_TAG_FALSE,
30 SER_TAG_TRUE,
31 SER_TAG_NULL,
32 SER_TAG_LIGHTUD32,
33 SER_TAG_LIGHTUD64,
34 SER_TAG_INT,
35 SER_TAG_NUM,
36 SER_TAG_TAB, /* 0x08 */
37 SER_TAG_DICT_MT = SER_TAG_TAB+6,
38 SER_TAG_DICT_STR,
39 SER_TAG_INT64, /* 0x10 */
40 SER_TAG_UINT64,
41 SER_TAG_COMPLEX,
42 SER_TAG_0x13,
43 SER_TAG_0x14,
44 SER_TAG_0x15,
45 SER_TAG_0x16,
46 SER_TAG_0x17,
47 SER_TAG_0x18, /* 0x18 */
48 SER_TAG_0x19,
49 SER_TAG_0x1a,
50 SER_TAG_0x1b,
51 SER_TAG_0x1c,
52 SER_TAG_0x1d,
53 SER_TAG_0x1e,
54 SER_TAG_0x1f,
55 SER_TAG_STR, /* 0x20 + str->len */
56};
57LJ_STATIC_ASSERT((SER_TAG_TAB & 7) == 0);
58
59/* -- Helper functions ---------------------------------------------------- */
60
61static LJ_AINLINE char *serialize_more(char *w, SBufExt *sbx, MSize sz)
62{
63 if (LJ_UNLIKELY(sz > (MSize)(sbx->e - w))) {
64 sbx->w = w;
65 w = lj_buf_more2((SBuf *)sbx, sz);
66 }
67 return w;
68}
69
70/* Write U124 to buffer. */
71static LJ_NOINLINE char *serialize_wu124_(char *w, uint32_t v)
72{
73 if (v < 0x1fe0) {
74 v -= 0xe0;
75 *w++ = (char)(0xe0 | (v >> 8)); *w++ = (char)v;
76 } else {
77 *w++ = (char)0xff;
78#if LJ_BE
79 v = lj_bswap(v);
80#endif
81 memcpy(w, &v, 4); w += 4;
82 }
83 return w;
84}
85
86static LJ_AINLINE char *serialize_wu124(char *w, uint32_t v)
87{
88 if (LJ_LIKELY(v < 0xe0)) {
89 *w++ = (char)v;
90 return w;
91 } else {
92 return serialize_wu124_(w, v);
93 }
94}
95
96static LJ_NOINLINE char *serialize_ru124_(char *r, char *w, uint32_t *pv)
97{
98 uint32_t v = *pv;
99 if (v != 0xff) {
100 if (r >= w) return NULL;
101 v = ((v & 0x1f) << 8) + *(uint8_t *)r + 0xe0; r++;
102 } else {
103 if (r + 4 > w) return NULL;
104 v = lj_getu32(r); r += 4;
105#if LJ_BE
106 v = lj_bswap(v);
107#endif
108 }
109 *pv = v;
110 return r;
111}
112
113static LJ_AINLINE char *serialize_ru124(char *r, char *w, uint32_t *pv)
114{
115 if (LJ_LIKELY(r < w)) {
116 uint32_t v = *(uint8_t *)r; r++;
117 *pv = v;
118 if (LJ_UNLIKELY(v >= 0xe0)) {
119 r = serialize_ru124_(r, w, pv);
120 }
121 return r;
122 }
123 return NULL;
124}
125
126/* Prepare string dictionary for use (once). */
127void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict)
128{
129 if (!dict->hmask) { /* No hash part means not prepared, yet. */
130 MSize i, len = lj_tab_len(dict);
131 if (!len) return;
132 lj_tab_resize(L, dict, dict->asize, hsize2hbits(len));
133 for (i = 1; i <= len && i < dict->asize; i++) {
134 cTValue *o = arrayslot(dict, i);
135 if (tvisstr(o)) {
136 if (!lj_tab_getstr(dict, strV(o))) { /* Ignore dups. */
137 lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1);
138 }
139 } else if (!tvisfalse(o)) {
140 lj_err_caller(L, LJ_ERR_BUFFER_BADOPT);
141 }
142 }
143 }
144}
145
146/* Prepare metatable dictionary for use (once). */
147void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict)
148{
149 if (!dict->hmask) { /* No hash part means not prepared, yet. */
150 MSize i, len = lj_tab_len(dict);
151 if (!len) return;
152 lj_tab_resize(L, dict, dict->asize, hsize2hbits(len));
153 for (i = 1; i <= len && i < dict->asize; i++) {
154 cTValue *o = arrayslot(dict, i);
155 if (tvistab(o)) {
156 if (tvisnil(lj_tab_get(L, dict, o))) { /* Ignore dups. */
157 lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1);
158 }
159 } else if (!tvisfalse(o)) {
160 lj_err_caller(L, LJ_ERR_BUFFER_BADOPT);
161 }
162 }
163 }
164}
165
166/* -- Internal serializer ------------------------------------------------- */
167
168/* Put serialized object into buffer. */
169static char *serialize_put(char *w, SBufExt *sbx, cTValue *o)
170{
171 if (LJ_LIKELY(tvisstr(o))) {
172 const GCstr *str = strV(o);
173 MSize len = str->len;
174 w = serialize_more(w, sbx, 5+len);
175 w = serialize_wu124(w, SER_TAG_STR + len);
176 w = lj_buf_wmem(w, strdata(str), len);
177 } else if (tvisint(o)) {
178 uint32_t x = LJ_BE ? lj_bswap((uint32_t)intV(o)) : (uint32_t)intV(o);
179 w = serialize_more(w, sbx, 1+4);
180 *w++ = SER_TAG_INT; memcpy(w, &x, 4); w += 4;
181 } else if (tvisnum(o)) {
182 uint64_t x = LJ_BE ? lj_bswap64(o->u64) : o->u64;
183 w = serialize_more(w, sbx, 1+sizeof(lua_Number));
184 *w++ = SER_TAG_NUM; memcpy(w, &x, 8); w += 8;
185 } else if (tvispri(o)) {
186 w = serialize_more(w, sbx, 1);
187 *w++ = (char)(SER_TAG_NIL + ~itype(o));
188 } else if (tvistab(o)) {
189 const GCtab *t = tabV(o);
190 uint32_t narray = 0, nhash = 0, one = 2;
191 if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH);
192 sbx->depth--;
193 if (t->asize > 0) { /* Determine max. length of array part. */
194 ptrdiff_t i;
195 TValue *array = tvref(t->array);
196 for (i = (ptrdiff_t)t->asize-1; i >= 0; i--)
197 if (!tvisnil(&array[i]))
198 break;
199 narray = (uint32_t)(i+1);
200 if (narray && tvisnil(&array[0])) one = 4;
201 }
202 if (t->hmask > 0) { /* Count number of used hash slots. */
203 uint32_t i, hmask = t->hmask;
204 Node *node = noderef(t->node);
205 for (i = 0; i <= hmask; i++)
206 nhash += !tvisnil(&node[i].val);
207 }
208 /* Write metatable index. */
209 if (LJ_UNLIKELY(tabref(sbx->dict_mt)) && tabref(t->metatable)) {
210 TValue mto;
211 Node *n;
212 settabV(sbufL(sbx), &mto, tabref(t->metatable));
213 n = hashgcref(tabref(sbx->dict_mt), mto.gcr);
214 do {
215 if (n->key.u64 == mto.u64) {
216 uint32_t idx = n->val.u32.lo;
217 w = serialize_more(w, sbx, 1+5);
218 *w++ = SER_TAG_DICT_MT;
219 w = serialize_wu124(w, idx);
220 break;
221 }
222 } while ((n = nextnode(n)));
223 }
224 /* Write number of array slots and hash slots. */
225 w = serialize_more(w, sbx, 1+2*5);
226 *w++ = (char)(SER_TAG_TAB + (nhash ? 1 : 0) + (narray ? one : 0));
227 if (narray) w = serialize_wu124(w, narray);
228 if (nhash) w = serialize_wu124(w, nhash);
229 if (narray) { /* Write array entries. */
230 cTValue *oa = tvref(t->array) + (one >> 2);
231 cTValue *oe = tvref(t->array) + narray;
232 while (oa < oe) w = serialize_put(w, sbx, oa++);
233 }
234 if (nhash) { /* Write hash entries. */
235 const Node *node = noderef(t->node) + t->hmask;
236 GCtab *dict_str = tabref(sbx->dict_str);
237 if (LJ_UNLIKELY(dict_str)) {
238 for (;; node--)
239 if (!tvisnil(&node->val)) {
240 if (LJ_LIKELY(tvisstr(&node->key))) {
241 /* Inlined lj_tab_getstr is 30% faster. */
242 const GCstr *str = strV(&node->key);
243 Node *n = hashstr(dict_str, str);
244 do {
245 if (tvisstr(&n->key) && strV(&n->key) == str) {
246 uint32_t idx = n->val.u32.lo;
247 w = serialize_more(w, sbx, 1+5);
248 *w++ = SER_TAG_DICT_STR;
249 w = serialize_wu124(w, idx);
250 break;
251 }
252 n = nextnode(n);
253 if (!n) {
254 MSize len = str->len;
255 w = serialize_more(w, sbx, 5+len);
256 w = serialize_wu124(w, SER_TAG_STR + len);
257 w = lj_buf_wmem(w, strdata(str), len);
258 break;
259 }
260 } while (1);
261 } else {
262 w = serialize_put(w, sbx, &node->key);
263 }
264 w = serialize_put(w, sbx, &node->val);
265 if (--nhash == 0) break;
266 }
267 } else {
268 for (;; node--)
269 if (!tvisnil(&node->val)) {
270 w = serialize_put(w, sbx, &node->key);
271 w = serialize_put(w, sbx, &node->val);
272 if (--nhash == 0) break;
273 }
274 }
275 }
276 sbx->depth++;
277#if LJ_HASFFI
278 } else if (tviscdata(o)) {
279 CTState *cts = ctype_cts(sbufL(sbx));
280 CType *s = ctype_raw(cts, cdataV(o)->ctypeid);
281 uint8_t *sp = cdataptr(cdataV(o));
282 if (ctype_isinteger(s->info) && s->size == 8) {
283 w = serialize_more(w, sbx, 1+8);
284 *w++ = (s->info & CTF_UNSIGNED) ? SER_TAG_UINT64 : SER_TAG_INT64;
285#if LJ_BE
286 { uint64_t u = lj_bswap64(*(uint64_t *)sp); memcpy(w, &u, 8); }
287#else
288 memcpy(w, sp, 8);
289#endif
290 w += 8;
291 } else if (ctype_iscomplex(s->info) && s->size == 16) {
292 w = serialize_more(w, sbx, 1+16);
293 *w++ = SER_TAG_COMPLEX;
294#if LJ_BE
295 { /* Only swap the doubles. The re/im order stays the same. */
296 uint64_t u = lj_bswap64(((uint64_t *)sp)[0]); memcpy(w, &u, 8);
297 u = lj_bswap64(((uint64_t *)sp)[1]); memcpy(w+8, &u, 8);
298 }
299#else
300 memcpy(w, sp, 16);
301#endif
302 w += 16;
303 } else {
304 goto badenc; /* NYI other cdata */
305 }
306#endif
307 } else if (tvislightud(o)) {
308 uintptr_t ud = (uintptr_t)lightudV(G(sbufL(sbx)), o);
309 w = serialize_more(w, sbx, 1+sizeof(ud));
310 if (ud == 0) {
311 *w++ = SER_TAG_NULL;
312 } else if (LJ_32 || checku32(ud)) {
313#if LJ_BE && LJ_64
314 ud = lj_bswap64(ud);
315#elif LJ_BE
316 ud = lj_bswap(ud);
317#endif
318 *w++ = SER_TAG_LIGHTUD32; memcpy(w, &ud, 4); w += 4;
319#if LJ_64
320 } else {
321#if LJ_BE
322 ud = lj_bswap64(ud);
323#endif
324 *w++ = SER_TAG_LIGHTUD64; memcpy(w, &ud, 8); w += 8;
325#endif
326 }
327 } else {
328 /* NYI userdata */
329#if LJ_HASFFI
330 badenc:
331#endif
332 lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADENC, lj_typename(o));
333 }
334 return w;
335}
336
337/* Get serialized object from buffer. */
338static char *serialize_get(char *r, SBufExt *sbx, TValue *o)
339{
340 char *w = sbx->w;
341 uint32_t tp;
342 r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob;
343 if (LJ_LIKELY(tp >= SER_TAG_STR)) {
344 uint32_t len = tp - SER_TAG_STR;
345 if (LJ_UNLIKELY(len > (uint32_t)(w - r))) goto eob;
346 setstrV(sbufL(sbx), o, lj_str_new(sbufL(sbx), r, len));
347 r += len;
348 } else if (tp == SER_TAG_INT) {
349 if (LJ_UNLIKELY(r + 4 > w)) goto eob;
350 setintV(o, (int32_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r)));
351 r += 4;
352 } else if (tp == SER_TAG_NUM) {
353 if (LJ_UNLIKELY(r + 8 > w)) goto eob;
354 memcpy(o, r, 8); r += 8;
355#if LJ_BE
356 o->u64 = lj_bswap64(o->u64);
357#endif
358 if (!tvisnum(o)) setnanV(o); /* Fix non-canonical NaNs. */
359 } else if (tp <= SER_TAG_TRUE) {
360 setpriV(o, ~tp);
361 } else if (tp == SER_TAG_DICT_STR) {
362 GCtab *dict_str;
363 uint32_t idx;
364 r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob;
365 idx++;
366 dict_str = tabref(sbx->dict_str);
367 if (dict_str && idx < dict_str->asize && tvisstr(arrayslot(dict_str, idx)))
368 copyTV(sbufL(sbx), o, arrayslot(dict_str, idx));
369 else
370 lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx);
371 } else if (tp >= SER_TAG_TAB && tp <= SER_TAG_DICT_MT) {
372 uint32_t narray = 0, nhash = 0;
373 GCtab *t, *mt = NULL;
374 if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH);
375 sbx->depth--;
376 if (tp == SER_TAG_DICT_MT) {
377 GCtab *dict_mt;
378 uint32_t idx;
379 r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob;
380 idx++;
381 dict_mt = tabref(sbx->dict_mt);
382 if (dict_mt && idx < dict_mt->asize && tvistab(arrayslot(dict_mt, idx)))
383 mt = tabV(arrayslot(dict_mt, idx));
384 else
385 lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx);
386 r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob;
387 if (!(tp >= SER_TAG_TAB && tp < SER_TAG_DICT_MT)) goto badtag;
388 }
389 if (tp >= SER_TAG_TAB+2) {
390 r = serialize_ru124(r, w, &narray); if (LJ_UNLIKELY(!r)) goto eob;
391 }
392 if ((tp & 1)) {
393 r = serialize_ru124(r, w, &nhash); if (LJ_UNLIKELY(!r)) goto eob;
394 }
395 t = lj_tab_new(sbufL(sbx), narray, hsize2hbits(nhash));
396 /* NOBARRIER: The table is new (marked white). */
397 setgcref(t->metatable, obj2gco(mt));
398 settabV(sbufL(sbx), o, t);
399 if (narray) {
400 TValue *oa = tvref(t->array) + (tp >= SER_TAG_TAB+4);
401 TValue *oe = tvref(t->array) + narray;
402 while (oa < oe) r = serialize_get(r, sbx, oa++);
403 }
404 if (nhash) {
405 do {
406 TValue k, *v;
407 r = serialize_get(r, sbx, &k);
408 v = lj_tab_set(sbufL(sbx), t, &k);
409 if (LJ_UNLIKELY(!tvisnil(v)))
410 lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DUPKEY);
411 r = serialize_get(r, sbx, v);
412 } while (--nhash);
413 }
414 sbx->depth++;
415#if LJ_HASFFI
416 } else if (tp >= SER_TAG_INT64 && tp <= SER_TAG_COMPLEX) {
417 uint32_t sz = tp == SER_TAG_COMPLEX ? 16 : 8;
418 GCcdata *cd;
419 if (LJ_UNLIKELY(r + sz > w)) goto eob;
420 if (LJ_UNLIKELY(!ctype_ctsG(G(sbufL(sbx))))) goto badtag;
421 cd = lj_cdata_new_(sbufL(sbx),
422 tp == SER_TAG_INT64 ? CTID_INT64 :
423 tp == SER_TAG_UINT64 ? CTID_UINT64 : CTID_COMPLEX_DOUBLE,
424 sz);
425 memcpy(cdataptr(cd), r, sz); r += sz;
426#if LJ_BE
427 *(uint64_t *)cdataptr(cd) = lj_bswap64(*(uint64_t *)cdataptr(cd));
428 if (sz == 16)
429 ((uint64_t *)cdataptr(cd))[1] = lj_bswap64(((uint64_t *)cdataptr(cd))[1]);
430#endif
431 if (sz == 16) { /* Fix non-canonical NaNs. */
432 TValue *cdo = (TValue *)cdataptr(cd);
433 if (!tvisnum(&cdo[0])) setnanV(&cdo[0]);
434 if (!tvisnum(&cdo[1])) setnanV(&cdo[1]);
435 }
436 setcdataV(sbufL(sbx), o, cd);
437#endif
438 } else if (tp <= (LJ_64 ? SER_TAG_LIGHTUD64 : SER_TAG_LIGHTUD32)) {
439 uintptr_t ud = 0;
440 if (tp == SER_TAG_LIGHTUD32) {
441 if (LJ_UNLIKELY(r + 4 > w)) goto eob;
442 ud = (uintptr_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r));
443 r += 4;
444 }
445#if LJ_64
446 else if (tp == SER_TAG_LIGHTUD64) {
447 if (LJ_UNLIKELY(r + 8 > w)) goto eob;
448 memcpy(&ud, r, 8); r += 8;
449#if LJ_BE
450 ud = lj_bswap64(ud);
451#endif
452 }
453 setrawlightudV(o, lj_lightud_intern(sbufL(sbx), (void *)ud));
454#else
455 setrawlightudV(o, (void *)ud);
456#endif
457 } else {
458badtag:
459 lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDEC, tp);
460 }
461 return r;
462eob:
463 lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_EOB);
464 return NULL;
465}
466
467/* -- External serialization API ------------------------------------------ */
468
469/* Encode to buffer. */
470SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o)
471{
472 sbx->depth = LJ_SERIALIZE_DEPTH;
473 sbx->w = serialize_put(sbx->w, sbx, o);
474 return sbx;
475}
476
477/* Decode from buffer. */
478char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o)
479{
480 sbx->depth = LJ_SERIALIZE_DEPTH;
481 return serialize_get(sbx->r, sbx, o);
482}
483
484/* Stand-alone encoding, borrowing from global temporary buffer. */
485GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o)
486{
487 SBufExt sbx;
488 char *w;
489 memset(&sbx, 0, sizeof(SBufExt));
490 lj_bufx_set_borrow(L, &sbx, &G(L)->tmpbuf);
491 sbx.depth = LJ_SERIALIZE_DEPTH;
492 w = serialize_put(sbx.w, &sbx, o);
493 return lj_str_new(L, sbx.b, (size_t)(w - sbx.b));
494}
495
496/* Stand-alone decoding, copy-on-write from string. */
497void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str)
498{
499 SBufExt sbx;
500 char *r;
501 memset(&sbx, 0, sizeof(SBufExt));
502 lj_bufx_set_cow(L, &sbx, strdata(str), str->len);
503 /* No need to set sbx.cowref here. */
504 sbx.depth = LJ_SERIALIZE_DEPTH;
505 r = serialize_get(sbx.r, &sbx, o);
506 if (r != sbx.w) lj_err_caller(L, LJ_ERR_BUFFER_LEFTOV);
507}
508
509#if LJ_HASJIT
510/* Peek into buffer to find the result IRType for specialization purposes. */
511LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx)
512{
513 uint32_t tp;
514 if (serialize_ru124(sbx->r, sbx->w, &tp)) {
515 /* This must match the handling of all tags in the decoder above. */
516 switch (tp) {
517 case SER_TAG_NIL: return IRT_NIL;
518 case SER_TAG_FALSE: return IRT_FALSE;
519 case SER_TAG_TRUE: return IRT_TRUE;
520 case SER_TAG_NULL: case SER_TAG_LIGHTUD32: case SER_TAG_LIGHTUD64:
521 return IRT_LIGHTUD;
522 case SER_TAG_INT: return LJ_DUALNUM ? IRT_INT : IRT_NUM;
523 case SER_TAG_NUM: return IRT_NUM;
524 case SER_TAG_TAB: case SER_TAG_TAB+1: case SER_TAG_TAB+2:
525 case SER_TAG_TAB+3: case SER_TAG_TAB+4: case SER_TAG_TAB+5:
526 case SER_TAG_DICT_MT:
527 return IRT_TAB;
528 case SER_TAG_INT64: case SER_TAG_UINT64: case SER_TAG_COMPLEX:
529 return IRT_CDATA;
530 case SER_TAG_DICT_STR:
531 default:
532 return IRT_STR;
533 }
534 }
535 return IRT_NIL; /* Will fail on actual decode. */
536}
537#endif
538
539#endif
diff --git a/src/lj_serialize.h b/src/lj_serialize.h
new file mode 100644
index 00000000..da823573
--- /dev/null
+++ b/src/lj_serialize.h
@@ -0,0 +1,28 @@
1/*
2** Object de/serialization.
3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_SERIALIZE_H
7#define _LJ_SERIALIZE_H
8
9#include "lj_obj.h"
10#include "lj_buf.h"
11
12#if LJ_HASBUFFER
13
14#define LJ_SERIALIZE_DEPTH 100 /* Default depth. */
15
16LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict);
17LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict);
18LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o);
19LJ_FUNC char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o);
20LJ_FUNC GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o);
21LJ_FUNC void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str);
22#if LJ_HASJIT
23LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx);
24#endif
25
26#endif
27
28#endif
diff --git a/src/lj_snap.c b/src/lj_snap.c
index 0c317b52..27f9c8e5 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -68,20 +68,37 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
68 for (s = 0; s < nslots; s++) { 68 for (s = 0; s < nslots; s++) {
69 TRef tr = J->slot[s]; 69 TRef tr = J->slot[s];
70 IRRef ref = tref_ref(tr); 70 IRRef ref = tref_ref(tr);
71#if LJ_FR2
72 if (s == 1) { /* Ignore slot 1 in LJ_FR2 mode, except if tailcalled. */
73 if ((tr & TREF_FRAME))
74 map[n++] = SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL);
75 continue;
76 }
77 if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) {
78 cTValue *base = J->L->base - J->baseslot;
79 tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64);
80 ref = tref_ref(tr);
81 }
82#endif
71 if (ref) { 83 if (ref) {
72 SnapEntry sn = SNAP_TR(s, tr); 84 SnapEntry sn = SNAP_TR(s, tr);
73 IRIns *ir = &J->cur.ir[ref]; 85 IRIns *ir = &J->cur.ir[ref];
74 if (!(sn & (SNAP_CONT|SNAP_FRAME)) && 86 if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) &&
75 ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { 87 ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
76 /* No need to snapshot unmodified non-inherited slots. */ 88 /*
77 if (!(ir->op2 & IRSLOAD_INHERIT)) 89 ** No need to snapshot unmodified non-inherited slots.
90 ** But always snapshot the function below a frame in LJ_FR2 mode.
91 */
92 if (!(ir->op2 & IRSLOAD_INHERIT) &&
93 (!LJ_FR2 || s == 0 || s+1 == nslots ||
94 !(J->slot[s+1] & (TREF_CONT|TREF_FRAME))))
78 continue; 95 continue;
79 /* No need to restore readonly slots and unmodified non-parent slots. */ 96 /* No need to restore readonly slots and unmodified non-parent slots. */
80 if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) && 97 if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) &&
81 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) 98 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
82 sn |= SNAP_NORESTORE; 99 sn |= SNAP_NORESTORE;
83 } 100 }
84 if (LJ_SOFTFP && irt_isnum(ir->t)) 101 if (LJ_SOFTFP32 && irt_isnum(ir->t))
85 sn |= SNAP_SOFTFPNUM; 102 sn |= SNAP_SOFTFPNUM;
86 map[n++] = sn; 103 map[n++] = sn;
87 } 104 }
@@ -90,35 +107,54 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
90} 107}
91 108
92/* Add frame links at the end of the snapshot. */ 109/* Add frame links at the end of the snapshot. */
93static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map) 110static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot)
94{ 111{
95 cTValue *frame = J->L->base - 1; 112 cTValue *frame = J->L->base - 1;
96 cTValue *lim = J->L->base - J->baseslot; 113 cTValue *lim = J->L->base - J->baseslot + LJ_FR2;
97 cTValue *ftop = frame + funcproto(frame_func(frame))->framesize; 114 GCfunc *fn = frame_func(frame);
115 cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
116#if LJ_FR2
117 uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2);
118 lj_assertJ(2 <= J->baseslot && J->baseslot <= 257, "bad baseslot");
119 memcpy(map, &pcbase, sizeof(uint64_t));
120#else
98 MSize f = 0; 121 MSize f = 0;
99 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ 122 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
100 lua_assert(!J->pt || 123#endif
124 lj_assertJ(!J->pt ||
101 (J->pc >= proto_bc(J->pt) && 125 (J->pc >= proto_bc(J->pt) &&
102 J->pc < proto_bc(J->pt) + J->pt->sizebc)); 126 J->pc < proto_bc(J->pt) + J->pt->sizebc), "bad snapshot PC");
103 while (frame > lim) { /* Backwards traversal of all frames above base. */ 127 while (frame > lim) { /* Backwards traversal of all frames above base. */
104 if (frame_islua(frame)) { 128 if (frame_islua(frame)) {
129#if !LJ_FR2
105 map[f++] = SNAP_MKPC(frame_pc(frame)); 130 map[f++] = SNAP_MKPC(frame_pc(frame));
131#endif
106 frame = frame_prevl(frame); 132 frame = frame_prevl(frame);
107 } else if (frame_iscont(frame)) { 133 } else if (frame_iscont(frame)) {
134#if !LJ_FR2
108 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); 135 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
109 map[f++] = SNAP_MKPC(frame_contpc(frame)); 136 map[f++] = SNAP_MKPC(frame_contpc(frame));
137#endif
110 frame = frame_prevd(frame); 138 frame = frame_prevd(frame);
111 } else { 139 } else {
112 lua_assert(!frame_isc(frame)); 140 lj_assertJ(!frame_isc(frame), "broken frame chain");
141#if !LJ_FR2
113 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); 142 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
143#endif
114 frame = frame_prevd(frame); 144 frame = frame_prevd(frame);
115 continue; 145 continue;
116 } 146 }
117 if (frame + funcproto(frame_func(frame))->framesize > ftop) 147 if (frame + funcproto(frame_func(frame))->framesize > ftop)
118 ftop = frame + funcproto(frame_func(frame))->framesize; 148 ftop = frame + funcproto(frame_func(frame))->framesize;
119 } 149 }
120 lua_assert(f == (MSize)(1 + J->framedepth)); 150 *topslot = (uint8_t)(ftop - lim);
121 return (BCReg)(ftop - lim); 151#if LJ_FR2
152 lj_assertJ(sizeof(SnapEntry) * 2 == sizeof(uint64_t), "bad SnapEntry def");
153 return 2;
154#else
155 lj_assertJ(f == (MSize)(1 + J->framedepth), "miscalculated snapshot size");
156 return f;
157#endif
122} 158}
123 159
124/* Take a snapshot of the current stack. */ 160/* Take a snapshot of the current stack. */
@@ -128,16 +164,17 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
128 MSize nent; 164 MSize nent;
129 SnapEntry *p; 165 SnapEntry *p;
130 /* Conservative estimate. */ 166 /* Conservative estimate. */
131 lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1); 167 lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1));
132 p = &J->cur.snapmap[nsnapmap]; 168 p = &J->cur.snapmap[nsnapmap];
133 nent = snapshot_slots(J, p, nslots); 169 nent = snapshot_slots(J, p, nslots);
134 snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent); 170 snap->nent = (uint8_t)nent;
171 nent += snapshot_framelinks(J, p + nent, &snap->topslot);
135 snap->mapofs = (uint32_t)nsnapmap; 172 snap->mapofs = (uint32_t)nsnapmap;
136 snap->ref = (IRRef1)J->cur.nins; 173 snap->ref = (IRRef1)J->cur.nins;
137 snap->nent = (uint8_t)nent; 174 snap->mcofs = 0;
138 snap->nslots = (uint8_t)nslots; 175 snap->nslots = (uint8_t)nslots;
139 snap->count = 0; 176 snap->count = 0;
140 J->cur.nsnapmap = (uint32_t)(nsnapmap + nent + 1 + J->framedepth); 177 J->cur.nsnapmap = (uint32_t)(nsnapmap + nent);
141} 178}
142 179
143/* Add or merge a snapshot. */ 180/* Add or merge a snapshot. */
@@ -146,8 +183,8 @@ void lj_snap_add(jit_State *J)
146 MSize nsnap = J->cur.nsnap; 183 MSize nsnap = J->cur.nsnap;
147 MSize nsnapmap = J->cur.nsnapmap; 184 MSize nsnapmap = J->cur.nsnapmap;
148 /* Merge if no ins. inbetween or if requested and no guard inbetween. */ 185 /* Merge if no ins. inbetween or if requested and no guard inbetween. */
149 if (J->mergesnap ? !irt_isguard(J->guardemit) : 186 if ((nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins) ||
150 (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { 187 (J->mergesnap && !irt_isguard(J->guardemit))) {
151 if (nsnap == 1) { /* But preserve snap #0 PC. */ 188 if (nsnap == 1) { /* But preserve snap #0 PC. */
152 emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0); 189 emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0);
153 goto nomerge; 190 goto nomerge;
@@ -194,7 +231,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
194#define DEF_SLOT(s) udf[(s)] *= 3 231#define DEF_SLOT(s) udf[(s)] *= 3
195 232
196 /* Scan through following bytecode and check for uses/defs. */ 233 /* Scan through following bytecode and check for uses/defs. */
197 lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); 234 lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc,
235 "snapshot PC out of range");
198 for (;;) { 236 for (;;) {
199 BCIns ins = *pc++; 237 BCIns ins = *pc++;
200 BCOp op = bc_op(ins); 238 BCOp op = bc_op(ins);
@@ -205,7 +243,7 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
205 switch (bcmode_c(op)) { 243 switch (bcmode_c(op)) {
206 case BCMvar: USE_SLOT(bc_c(ins)); break; 244 case BCMvar: USE_SLOT(bc_c(ins)); break;
207 case BCMrbase: 245 case BCMrbase:
208 lua_assert(op == BC_CAT); 246 lj_assertJ(op == BC_CAT, "unhandled op %d with RC rbase", op);
209 for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s); 247 for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s);
210 for (; s < maxslot; s++) DEF_SLOT(s); 248 for (; s < maxslot; s++) DEF_SLOT(s);
211 break; 249 break;
@@ -245,7 +283,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
245 case BCMbase: 283 case BCMbase:
246 if (op >= BC_CALLM && op <= BC_ITERN) { 284 if (op >= BC_CALLM && op <= BC_ITERN) {
247 BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ? 285 BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
248 maxslot : (bc_a(ins) + bc_c(ins)); 286 maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2);
287 if (LJ_FR2) DEF_SLOT(bc_a(ins)+1);
249 s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0); 288 s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0);
250 for (; s < top; s++) USE_SLOT(s); 289 for (; s < top; s++) USE_SLOT(s);
251 for (; s < maxslot; s++) DEF_SLOT(s); 290 for (; s < maxslot; s++) DEF_SLOT(s);
@@ -263,7 +302,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
263 break; 302 break;
264 default: break; 303 default: break;
265 } 304 }
266 lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); 305 lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc,
306 "use/def analysis PC out of range");
267 } 307 }
268 308
269#undef USE_SLOT 309#undef USE_SLOT
@@ -321,8 +361,8 @@ void lj_snap_shrink(jit_State *J)
321 MSize n, m, nlim, nent = snap->nent; 361 MSize n, m, nlim, nent = snap->nent;
322 uint8_t udf[SNAP_USEDEF_SLOTS]; 362 uint8_t udf[SNAP_USEDEF_SLOTS];
323 BCReg maxslot = J->maxslot; 363 BCReg maxslot = J->maxslot;
324 BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot);
325 BCReg baseslot = J->baseslot; 364 BCReg baseslot = J->baseslot;
365 BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot);
326 if (minslot < maxslot) snap_useuv(J->pt, udf); 366 if (minslot < maxslot) snap_useuv(J->pt, udf);
327 maxslot += baseslot; 367 maxslot += baseslot;
328 minslot += baseslot; 368 minslot += baseslot;
@@ -365,25 +405,26 @@ static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
365} 405}
366 406
367/* Copy RegSP from parent snapshot to the parent links of the IR. */ 407/* Copy RegSP from parent snapshot to the parent links of the IR. */
368IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir) 408IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno, IRIns *ir)
369{ 409{
370 SnapShot *snap = &T->snap[snapno]; 410 SnapShot *snap = &T->snap[snapno];
371 SnapEntry *map = &T->snapmap[snap->mapofs]; 411 SnapEntry *map = &T->snapmap[snap->mapofs];
372 BloomFilter rfilt = snap_renamefilter(T, snapno); 412 BloomFilter rfilt = snap_renamefilter(T, snapno);
373 MSize n = 0; 413 MSize n = 0;
374 IRRef ref = 0; 414 IRRef ref = 0;
415 UNUSED(J);
375 for ( ; ; ir++) { 416 for ( ; ; ir++) {
376 uint32_t rs; 417 uint32_t rs;
377 if (ir->o == IR_SLOAD) { 418 if (ir->o == IR_SLOAD) {
378 if (!(ir->op2 & IRSLOAD_PARENT)) break; 419 if (!(ir->op2 & IRSLOAD_PARENT)) break;
379 for ( ; ; n++) { 420 for ( ; ; n++) {
380 lua_assert(n < snap->nent); 421 lj_assertJ(n < snap->nent, "slot %d not found in snapshot", ir->op1);
381 if (snap_slot(map[n]) == ir->op1) { 422 if (snap_slot(map[n]) == ir->op1) {
382 ref = snap_ref(map[n++]); 423 ref = snap_ref(map[n++]);
383 break; 424 break;
384 } 425 }
385 } 426 }
386 } else if (LJ_SOFTFP && ir->o == IR_HIOP) { 427 } else if (LJ_SOFTFP32 && ir->o == IR_HIOP) {
387 ref++; 428 ref++;
388 } else if (ir->o == IR_PVAL) { 429 } else if (ir->o == IR_PVAL) {
389 ref = ir->op1 + REF_BIAS; 430 ref = ir->op1 + REF_BIAS;
@@ -394,7 +435,7 @@ IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir)
394 if (bloomtest(rfilt, ref)) 435 if (bloomtest(rfilt, ref))
395 rs = snap_renameref(T, snapno, ref, rs); 436 rs = snap_renameref(T, snapno, ref, rs);
396 ir->prev = (uint16_t)rs; 437 ir->prev = (uint16_t)rs;
397 lua_assert(regsp_used(rs)); 438 lj_assertJ(regsp_used(rs), "unused IR %04d in snapshot", ref - REF_BIAS);
398 } 439 }
399 return ir; 440 return ir;
400} 441}
@@ -409,11 +450,11 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir)
409 case IR_KPRI: return TREF_PRI(irt_type(ir->t)); 450 case IR_KPRI: return TREF_PRI(irt_type(ir->t));
410 case IR_KINT: return lj_ir_kint(J, ir->i); 451 case IR_KINT: return lj_ir_kint(J, ir->i);
411 case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); 452 case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
412 case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir)); 453 case IR_KNUM: case IR_KINT64:
413 case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir)); 454 return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64);
414 case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ 455 case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */
415 case IR_KNULL: return lj_ir_knull(J, irt_type(ir->t)); 456 case IR_KNULL: return lj_ir_knull(J, irt_type(ir->t));
416 default: lua_assert(0); return TREF_NIL; break; 457 default: lj_assertJ(0, "bad IR constant op %d", ir->o); return TREF_NIL;
417 } 458 }
418} 459}
419 460
@@ -423,7 +464,7 @@ static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref)
423 MSize j; 464 MSize j;
424 for (j = 0; j < nmax; j++) 465 for (j = 0; j < nmax; j++)
425 if (snap_ref(map[j]) == ref) 466 if (snap_ref(map[j]) == ref)
426 return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME); 467 return J->slot[snap_slot(map[j])] & ~(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME);
427 return 0; 468 return 0;
428} 469}
429 470
@@ -484,21 +525,27 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
484 goto setslot; 525 goto setslot;
485 bloomset(seen, ref); 526 bloomset(seen, ref);
486 if (irref_isk(ref)) { 527 if (irref_isk(ref)) {
487 tr = snap_replay_const(J, ir); 528 /* See special treatment of LJ_FR2 slot 1 in snapshot_slots() above. */
529 if (LJ_FR2 && (sn == SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL)))
530 tr = 0;
531 else
532 tr = snap_replay_const(J, ir);
488 } else if (!regsp_used(ir->prev)) { 533 } else if (!regsp_used(ir->prev)) {
489 pass23 = 1; 534 pass23 = 1;
490 lua_assert(s != 0); 535 lj_assertJ(s != 0, "unused slot 0 in snapshot");
491 tr = s; 536 tr = s;
492 } else { 537 } else {
493 IRType t = irt_type(ir->t); 538 IRType t = irt_type(ir->t);
494 uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; 539 uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
495 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; 540 if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
496 if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); 541 if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
542 if ((sn & SNAP_KEYINDEX)) mode |= IRSLOAD_KEYINDEX;
497 tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); 543 tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
498 } 544 }
499 setslot: 545 setslot:
500 J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ 546 /* Same as TREF_* flags. */
501 J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s); 547 J->slot[s] = tr | (sn&(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME));
548 J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2));
502 if ((sn & SNAP_FRAME)) 549 if ((sn & SNAP_FRAME))
503 J->baseslot = s+1; 550 J->baseslot = s+1;
504 } 551 }
@@ -514,8 +561,9 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
514 uint8_t m; 561 uint8_t m;
515 if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue; 562 if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
516 pass23 = 1; 563 pass23 = 1;
517 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || 564 lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP ||
518 ir->o == IR_CNEW || ir->o == IR_CNEWI); 565 ir->o == IR_CNEW || ir->o == IR_CNEWI,
566 "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o);
519 m = lj_ir_mode[ir->o]; 567 m = lj_ir_mode[ir->o];
520 if (irm_op1(m) == IRMref) snap_pref(J, T, map, nent, seen, ir->op1); 568 if (irm_op1(m) == IRMref) snap_pref(J, T, map, nent, seen, ir->op1);
521 if (irm_op2(m) == IRMref) snap_pref(J, T, map, nent, seen, ir->op2); 569 if (irm_op2(m) == IRMref) snap_pref(J, T, map, nent, seen, ir->op2);
@@ -528,13 +576,14 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
528 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { 576 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
529 if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) 577 if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
530 snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); 578 snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
531 else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && 579 else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
532 irs+1 < irlast && (irs+1)->o == IR_HIOP) 580 irs+1 < irlast && (irs+1)->o == IR_HIOP)
533 snap_pref(J, T, map, nent, seen, (irs+1)->op2); 581 snap_pref(J, T, map, nent, seen, (irs+1)->op2);
534 } 582 }
535 } 583 }
536 } else if (!irref_isk(refp) && !regsp_used(ir->prev)) { 584 } else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
537 lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); 585 lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
586 "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o);
538 J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1); 587 J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
539 } 588 }
540 } 589 }
@@ -585,9 +634,13 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
585 IRRef keyref = tref_ref(key); 634 IRRef keyref = tref_ref(key);
586 IRRef newref_ref = J->chain[IR_NEWREF]; 635 IRRef newref_ref = J->chain[IR_NEWREF];
587 IRIns *newref = &J->cur.ir[newref_ref]; 636 IRIns *newref = &J->cur.ir[newref_ref];
588 lua_assert(irref_isk(keyref)); 637 lj_assertJ(irref_isk(keyref),
638 "sunk store for parent IR %04d with bad key %04d",
639 refp - REF_BIAS, keyref - REF_BIAS);
589 if (newref_ref > allocref && newref->op2 == keyref) { 640 if (newref_ref > allocref && newref->op2 == keyref) {
590 lua_assert(newref->op1 == allocref); 641 lj_assertJ(newref->op1 == allocref,
642 "sunk store for parent IR %04d with bad tab %04d",
643 refp - REF_BIAS, allocref - REF_BIAS);
591 tmp = newref_ref; 644 tmp = newref_ref;
592 goto skip_newref; 645 goto skip_newref;
593 } 646 }
@@ -598,20 +651,21 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
598 val = snap_pref(J, T, map, nent, seen, irs->op2); 651 val = snap_pref(J, T, map, nent, seen, irs->op2);
599 if (val == 0) { 652 if (val == 0) {
600 IRIns *irc = &T->ir[irs->op2]; 653 IRIns *irc = &T->ir[irs->op2];
601 lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); 654 lj_assertJ(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT,
655 "sunk store for parent IR %04d with bad op %d",
656 refp - REF_BIAS, irc->o);
602 val = snap_pref(J, T, map, nent, seen, irc->op1); 657 val = snap_pref(J, T, map, nent, seen, irc->op1);
603 val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); 658 val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
604 } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && 659 } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
605 irs+1 < irlast && (irs+1)->o == IR_HIOP) { 660 irs+1 < irlast && (irs+1)->o == IR_HIOP) {
606 IRType t = IRT_I64; 661 IRType t = IRT_I64;
607 if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP) 662 if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP)
608 t = IRT_NUM; 663 t = IRT_NUM;
609 lj_needsplit(J); 664 lj_needsplit(J);
610 if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { 665 if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
611 uint64_t k = (uint32_t)T->ir[irs->op2].i + 666 uint64_t k = (uint32_t)T->ir[irs->op2].i +
612 ((uint64_t)T->ir[(irs+1)->op2].i << 32); 667 ((uint64_t)T->ir[(irs+1)->op2].i << 32);
613 val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, 668 val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k);
614 lj_ir_k64_find(J, k));
615 } else { 669 } else {
616 val = emitir_raw(IRT(IR_HIOP, t), val, 670 val = emitir_raw(IRT(IR_HIOP, t), val,
617 snap_pref(J, T, map, nent, seen, (irs+1)->op2)); 671 snap_pref(J, T, map, nent, seen, (irs+1)->op2));
@@ -649,7 +703,14 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
649 IRType1 t = ir->t; 703 IRType1 t = ir->t;
650 RegSP rs = ir->prev; 704 RegSP rs = ir->prev;
651 if (irref_isk(ref)) { /* Restore constant slot. */ 705 if (irref_isk(ref)) { /* Restore constant slot. */
652 lj_ir_kvalue(J->L, o, ir); 706 if (ir->o == IR_KPTR) {
707 o->u64 = (uint64_t)(uintptr_t)ir_kptr(ir);
708 } else {
709 lj_assertJ(!(ir->o == IR_KKPTR || ir->o == IR_KNULL),
710 "restore of const from IR %04d with bad op %d",
711 ref - REF_BIAS, ir->o);
712 lj_ir_kvalue(J->L, o, ir);
713 }
653 return; 714 return;
654 } 715 }
655 if (LJ_UNLIKELY(bloomtest(rfilt, ref))) 716 if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
@@ -658,22 +719,24 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
658 int32_t *sps = &ex->spill[regsp_spill(rs)]; 719 int32_t *sps = &ex->spill[regsp_spill(rs)];
659 if (irt_isinteger(t)) { 720 if (irt_isinteger(t)) {
660 setintV(o, *sps); 721 setintV(o, *sps);
661#if !LJ_SOFTFP 722#if !LJ_SOFTFP32
662 } else if (irt_isnum(t)) { 723 } else if (irt_isnum(t)) {
663 o->u64 = *(uint64_t *)sps; 724 o->u64 = *(uint64_t *)sps;
664#endif 725#endif
665 } else if (LJ_64 && irt_islightud(t)) { 726#if LJ_64 && !LJ_GC64
727 } else if (irt_islightud(t)) {
666 /* 64 bit lightuserdata which may escape already has the tag bits. */ 728 /* 64 bit lightuserdata which may escape already has the tag bits. */
667 o->u64 = *(uint64_t *)sps; 729 o->u64 = *(uint64_t *)sps;
730#endif
668 } else { 731 } else {
669 lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ 732 lj_assertJ(!irt_ispri(t), "PRI ref with spill slot");
670 setgcrefi(o->gcr, *sps); 733 setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
671 setitype(o, irt_toitype(t));
672 } 734 }
673 } else { /* Restore from register. */ 735 } else { /* Restore from register. */
674 Reg r = regsp_reg(rs); 736 Reg r = regsp_reg(rs);
675 if (ra_noreg(r)) { 737 if (ra_noreg(r)) {
676 lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); 738 lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
739 "restore from IR %04d has no reg", ref - REF_BIAS);
677 snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o); 740 snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
678 if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o)); 741 if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
679 return; 742 return;
@@ -682,21 +745,26 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
682#if !LJ_SOFTFP 745#if !LJ_SOFTFP
683 } else if (irt_isnum(t)) { 746 } else if (irt_isnum(t)) {
684 setnumV(o, ex->fpr[r-RID_MIN_FPR]); 747 setnumV(o, ex->fpr[r-RID_MIN_FPR]);
748#elif LJ_64 /* && LJ_SOFTFP */
749 } else if (irt_isnum(t)) {
750 o->u64 = ex->gpr[r-RID_MIN_GPR];
685#endif 751#endif
686 } else if (LJ_64 && irt_islightud(t)) { 752#if LJ_64 && !LJ_GC64
687 /* 64 bit lightuserdata which may escape already has the tag bits. */ 753 } else if (irt_is64(t)) {
754 /* 64 bit values that already have the tag bits. */
688 o->u64 = ex->gpr[r-RID_MIN_GPR]; 755 o->u64 = ex->gpr[r-RID_MIN_GPR];
756#endif
757 } else if (irt_ispri(t)) {
758 setpriV(o, irt_toitype(t));
689 } else { 759 } else {
690 if (!irt_ispri(t)) 760 setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t));
691 setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
692 setitype(o, irt_toitype(t));
693 } 761 }
694 } 762 }
695} 763}
696 764
697#if LJ_HASFFI 765#if LJ_HASFFI
698/* Restore raw data from the trace exit state. */ 766/* Restore raw data from the trace exit state. */
699static void snap_restoredata(GCtrace *T, ExitState *ex, 767static void snap_restoredata(jit_State *J, GCtrace *T, ExitState *ex,
700 SnapNo snapno, BloomFilter rfilt, 768 SnapNo snapno, BloomFilter rfilt,
701 IRRef ref, void *dst, CTSize sz) 769 IRRef ref, void *dst, CTSize sz)
702{ 770{
@@ -704,9 +772,10 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
704 RegSP rs = ir->prev; 772 RegSP rs = ir->prev;
705 int32_t *src; 773 int32_t *src;
706 uint64_t tmp; 774 uint64_t tmp;
775 UNUSED(J);
707 if (irref_isk(ref)) { 776 if (irref_isk(ref)) {
708 if (ir->o == IR_KNUM || ir->o == IR_KINT64) { 777 if (ir_isk64(ir)) {
709 src = mref(ir->ptr, int32_t); 778 src = (int32_t *)&ir[1];
710 } else if (sz == 8) { 779 } else if (sz == 8) {
711 tmp = (uint64_t)(uint32_t)ir->i; 780 tmp = (uint64_t)(uint32_t)ir->i;
712 src = (int32_t *)&tmp; 781 src = (int32_t *)&tmp;
@@ -726,8 +795,9 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
726 Reg r = regsp_reg(rs); 795 Reg r = regsp_reg(rs);
727 if (ra_noreg(r)) { 796 if (ra_noreg(r)) {
728 /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */ 797 /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
729 lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); 798 lj_assertJ(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
730 snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4); 799 "restore from IR %04d has no reg", ref - REF_BIAS);
800 snap_restoredata(J, T, ex, snapno, rfilt, ir->op1, dst, 4);
731 *(lua_Number *)dst = (lua_Number)*(int32_t *)dst; 801 *(lua_Number *)dst = (lua_Number)*(int32_t *)dst;
732 return; 802 return;
733 } 803 }
@@ -746,10 +816,12 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
746#endif 816#endif
747 { 817 {
748 src = (int32_t *)&ex->gpr[r-RID_MIN_GPR]; 818 src = (int32_t *)&ex->gpr[r-RID_MIN_GPR];
819 if (LJ_64 && LJ_BE && sz == 4) src++;
749 } 820 }
750 } 821 }
751 } 822 }
752 lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8); 823 lj_assertJ(sz == 1 || sz == 2 || sz == 4 || sz == 8,
824 "restore from IR %04d with bad size %d", ref - REF_BIAS, sz);
753 if (sz == 4) *(int32_t *)dst = *src; 825 if (sz == 4) *(int32_t *)dst = *src;
754 else if (sz == 8) *(int64_t *)dst = *(int64_t *)src; 826 else if (sz == 8) *(int64_t *)dst = *(int64_t *)src;
755 else if (sz == 1) *(int8_t *)dst = (int8_t)*src; 827 else if (sz == 1) *(int8_t *)dst = (int8_t)*src;
@@ -762,24 +834,27 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
762 SnapNo snapno, BloomFilter rfilt, 834 SnapNo snapno, BloomFilter rfilt,
763 IRIns *ir, TValue *o) 835 IRIns *ir, TValue *o)
764{ 836{
765 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || 837 lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP ||
766 ir->o == IR_CNEW || ir->o == IR_CNEWI); 838 ir->o == IR_CNEW || ir->o == IR_CNEWI,
839 "sunk allocation with bad op %d", ir->o);
767#if LJ_HASFFI 840#if LJ_HASFFI
768 if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { 841 if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
769 CTState *cts = ctype_cts(J->L); 842 CTState *cts = ctype_cts(J->L);
770 CTypeID id = (CTypeID)T->ir[ir->op1].i; 843 CTypeID id = (CTypeID)T->ir[ir->op1].i;
771 CTSize sz = lj_ctype_size(cts, id); 844 CTSize sz;
772 GCcdata *cd = lj_cdata_new(cts, id, sz); 845 CTInfo info = lj_ctype_info(cts, id, &sz);
846 GCcdata *cd = lj_cdata_newx(cts, id, sz, info);
773 setcdataV(J->L, o, cd); 847 setcdataV(J->L, o, cd);
774 if (ir->o == IR_CNEWI) { 848 if (ir->o == IR_CNEWI) {
775 uint8_t *p = (uint8_t *)cdataptr(cd); 849 uint8_t *p = (uint8_t *)cdataptr(cd);
776 lua_assert(sz == 4 || sz == 8); 850 lj_assertJ(sz == 4 || sz == 8, "sunk cdata with bad size %d", sz);
777 if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) { 851 if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) {
778 snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4); 852 snap_restoredata(J, T, ex, snapno, rfilt, (ir+1)->op2,
853 LJ_LE ? p+4 : p, 4);
779 if (LJ_BE) p += 4; 854 if (LJ_BE) p += 4;
780 sz = 4; 855 sz = 4;
781 } 856 }
782 snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz); 857 snap_restoredata(J, T, ex, snapno, rfilt, ir->op2, p, sz);
783 } else { 858 } else {
784 IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref]; 859 IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref];
785 for (irs = ir+1; irs < irlast; irs++) 860 for (irs = ir+1; irs < irlast; irs++)
@@ -787,8 +862,11 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
787 IRIns *iro = &T->ir[T->ir[irs->op1].op2]; 862 IRIns *iro = &T->ir[T->ir[irs->op1].op2];
788 uint8_t *p = (uint8_t *)cd; 863 uint8_t *p = (uint8_t *)cd;
789 CTSize szs; 864 CTSize szs;
790 lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD); 865 lj_assertJ(irs->o == IR_XSTORE, "sunk store with bad op %d", irs->o);
791 lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64); 866 lj_assertJ(T->ir[irs->op1].o == IR_ADD,
867 "sunk store with bad add op %d", T->ir[irs->op1].o);
868 lj_assertJ(iro->o == IR_KINT || iro->o == IR_KINT64,
869 "sunk store with bad const offset op %d", iro->o);
792 if (irt_is64(irs->t)) szs = 8; 870 if (irt_is64(irs->t)) szs = 8;
793 else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1; 871 else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1;
794 else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2; 872 else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2;
@@ -797,14 +875,16 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
797 p += (int64_t)ir_k64(iro)->u64; 875 p += (int64_t)ir_k64(iro)->u64;
798 else 876 else
799 p += iro->i; 877 p += iro->i;
800 lua_assert(p >= (uint8_t *)cdataptr(cd) && 878 lj_assertJ(p >= (uint8_t *)cdataptr(cd) &&
801 p + szs <= (uint8_t *)cdataptr(cd) + sz); 879 p + szs <= (uint8_t *)cdataptr(cd) + sz,
880 "sunk store with offset out of range");
802 if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { 881 if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
803 lua_assert(szs == 4); 882 lj_assertJ(szs == 4, "sunk store with bad size %d", szs);
804 snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4); 883 snap_restoredata(J, T, ex, snapno, rfilt, (irs+1)->op2,
884 LJ_LE ? p+4 : p, 4);
805 if (LJ_BE) p += 4; 885 if (LJ_BE) p += 4;
806 } 886 }
807 snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs); 887 snap_restoredata(J, T, ex, snapno, rfilt, irs->op2, p, szs);
808 } 888 }
809 } 889 }
810 } else 890 } else
@@ -815,12 +895,13 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
815 lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1])); 895 lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1]));
816 settabV(J->L, o, t); 896 settabV(J->L, o, t);
817 irlast = &T->ir[T->snap[snapno].ref]; 897 irlast = &T->ir[T->snap[snapno].ref];
818 for (irs = ir+1; irs < irlast; irs++) { 898 for (irs = ir+1; irs < irlast; irs++)
819 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { 899 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
820 IRIns *irk = &T->ir[irs->op1]; 900 IRIns *irk = &T->ir[irs->op1];
821 TValue tmp, *val; 901 TValue tmp, *val;
822 lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || 902 lj_assertJ(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
823 irs->o == IR_FSTORE); 903 irs->o == IR_FSTORE,
904 "sunk store with bad op %d", irs->o);
824 if (irk->o == IR_FREF) { 905 if (irk->o == IR_FREF) {
825 switch (irk->op2) { 906 switch (irk->op2) {
826 case IRFL_TAB_META: 907 case IRFL_TAB_META:
@@ -836,7 +917,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
836 /* Negative metamethod cache invalidated by lj_tab_set() below. */ 917 /* Negative metamethod cache invalidated by lj_tab_set() below. */
837 break; 918 break;
838 default: 919 default:
839 lua_assert(0); 920 lj_assertJ(0, "sunk store with bad field %d", irk->op2);
840 break; 921 break;
841 } 922 }
842 } else { 923 } else {
@@ -846,13 +927,12 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
846 val = lj_tab_set(J->L, t, &tmp); 927 val = lj_tab_set(J->L, t, &tmp);
847 /* NOBARRIER: The table is new (marked white). */ 928 /* NOBARRIER: The table is new (marked white). */
848 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); 929 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
849 if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { 930 if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
850 snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); 931 snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
851 val->u32.hi = tmp.u32.lo; 932 val->u32.hi = tmp.u32.lo;
852 } 933 }
853 } 934 }
854 } 935 }
855 }
856 } 936 }
857} 937}
858 938
@@ -865,11 +945,15 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
865 SnapShot *snap = &T->snap[snapno]; 945 SnapShot *snap = &T->snap[snapno];
866 MSize n, nent = snap->nent; 946 MSize n, nent = snap->nent;
867 SnapEntry *map = &T->snapmap[snap->mapofs]; 947 SnapEntry *map = &T->snapmap[snap->mapofs];
868 SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1]; 948#if !LJ_FR2 || defined(LUA_USE_ASSERT)
869 int32_t ftsz0; 949 SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2];
950#endif
951#if !LJ_FR2
952 ptrdiff_t ftsz0;
953#endif
870 TValue *frame; 954 TValue *frame;
871 BloomFilter rfilt = snap_renamefilter(T, snapno); 955 BloomFilter rfilt = snap_renamefilter(T, snapno);
872 const BCIns *pc = snap_pc(map[nent]); 956 const BCIns *pc = snap_pc(&map[nent]);
873 lua_State *L = J->L; 957 lua_State *L = J->L;
874 958
875 /* Set interpreter PC to the next PC to get correct error messages. */ 959 /* Set interpreter PC to the next PC to get correct error messages. */
@@ -883,8 +967,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
883 } 967 }
884 968
885 /* Fill stack slots with data from the registers and spill slots. */ 969 /* Fill stack slots with data from the registers and spill slots. */
886 frame = L->base-1; 970 frame = L->base-1-LJ_FR2;
971#if !LJ_FR2
887 ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ 972 ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */
973#endif
888 for (n = 0; n < nent; n++) { 974 for (n = 0; n < nent; n++) {
889 SnapEntry sn = map[n]; 975 SnapEntry sn = map[n];
890 if (!(sn & SNAP_NORESTORE)) { 976 if (!(sn & SNAP_NORESTORE)) {
@@ -903,18 +989,27 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
903 continue; 989 continue;
904 } 990 }
905 snap_restoreval(J, T, ex, snapno, rfilt, ref, o); 991 snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
906 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { 992 if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
907 TValue tmp; 993 TValue tmp;
908 snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); 994 snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
909 o->u32.hi = tmp.u32.lo; 995 o->u32.hi = tmp.u32.lo;
996#if !LJ_FR2
910 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { 997 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
911 /* Overwrite tag with frame link. */ 998 /* Overwrite tag with frame link. */
912 o->fr.tp.ftsz = snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0; 999 setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
913 L->base = o+1; 1000 L->base = o+1;
1001#endif
1002 } else if ((sn & SNAP_KEYINDEX)) {
1003 /* A IRT_INT key index slot is restored as a number. Undo this. */
1004 o->u32.lo = (uint32_t)(LJ_DUALNUM ? intV(o) : lj_num2int(numV(o)));
1005 o->u32.hi = LJ_KEYINDEX;
914 } 1006 }
915 } 1007 }
916 } 1008 }
917 lua_assert(map + nent == flinks); 1009#if LJ_FR2
1010 L->base += (map[nent+LJ_BE] & 0xff);
1011#endif
1012 lj_assertJ(map + nent == flinks, "inconsistent frames in snapshot");
918 1013
919 /* Compute current stack top. */ 1014 /* Compute current stack top. */
920 switch (bc_op(*pc)) { 1015 switch (bc_op(*pc)) {
diff --git a/src/lj_snap.h b/src/lj_snap.h
index 6c406f20..089d86bb 100644
--- a/src/lj_snap.h
+++ b/src/lj_snap.h
@@ -13,7 +13,8 @@
13LJ_FUNC void lj_snap_add(jit_State *J); 13LJ_FUNC void lj_snap_add(jit_State *J);
14LJ_FUNC void lj_snap_purge(jit_State *J); 14LJ_FUNC void lj_snap_purge(jit_State *J);
15LJ_FUNC void lj_snap_shrink(jit_State *J); 15LJ_FUNC void lj_snap_shrink(jit_State *J);
16LJ_FUNC IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir); 16LJ_FUNC IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno,
17 IRIns *ir);
17LJ_FUNC void lj_snap_replay(jit_State *J, GCtrace *T); 18LJ_FUNC void lj_snap_replay(jit_State *J, GCtrace *T);
18LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr); 19LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr);
19LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need); 20LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need);
diff --git a/src/lj_state.c b/src/lj_state.c
index adedb66c..6fd7d9ce 100644
--- a/src/lj_state.c
+++ b/src/lj_state.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_func.h" 18#include "lj_func.h"
@@ -24,8 +25,10 @@
24#include "lj_trace.h" 25#include "lj_trace.h"
25#include "lj_dispatch.h" 26#include "lj_dispatch.h"
26#include "lj_vm.h" 27#include "lj_vm.h"
28#include "lj_prng.h"
27#include "lj_lex.h" 29#include "lj_lex.h"
28#include "lj_alloc.h" 30#include "lj_alloc.h"
31#include "luajit.h"
29 32
30/* -- Stack handling ------------------------------------------------------ */ 33/* -- Stack handling ------------------------------------------------------ */
31 34
@@ -47,6 +50,7 @@
47** one extra slot if mobj is not a function. Only lj_meta_tset needs 5 50** one extra slot if mobj is not a function. Only lj_meta_tset needs 5
48** slots above top, but then mobj is always a function. So we can get by 51** slots above top, but then mobj is always a function. So we can get by
49** with 5 extra slots. 52** with 5 extra slots.
53** LJ_FR2: We need 2 more slots for the frame PC and the continuation PC.
50*/ 54*/
51 55
52/* Resize stack slots and adjust pointers in state. */ 56/* Resize stack slots and adjust pointers in state. */
@@ -57,9 +61,10 @@ static void resizestack(lua_State *L, MSize n)
57 MSize oldsize = L->stacksize; 61 MSize oldsize = L->stacksize;
58 MSize realsize = n + 1 + LJ_STACK_EXTRA; 62 MSize realsize = n + 1 + LJ_STACK_EXTRA;
59 GCobj *up; 63 GCobj *up;
60 lua_assert((MSize)(tvref(L->maxstack)-oldst)==L->stacksize-LJ_STACK_EXTRA-1); 64 lj_assertL((MSize)(tvref(L->maxstack)-oldst) == L->stacksize-LJ_STACK_EXTRA-1,
65 "inconsistent stack size");
61 st = (TValue *)lj_mem_realloc(L, tvref(L->stack), 66 st = (TValue *)lj_mem_realloc(L, tvref(L->stack),
62 (MSize)(L->stacksize*sizeof(TValue)), 67 (MSize)(oldsize*sizeof(TValue)),
63 (MSize)(realsize*sizeof(TValue))); 68 (MSize)(realsize*sizeof(TValue)));
64 setmref(L->stack, st); 69 setmref(L->stack, st);
65 delta = (char *)st - (char *)oldst; 70 delta = (char *)st - (char *)oldst;
@@ -67,12 +72,12 @@ static void resizestack(lua_State *L, MSize n)
67 while (oldsize < realsize) /* Clear new slots. */ 72 while (oldsize < realsize) /* Clear new slots. */
68 setnilV(st + oldsize++); 73 setnilV(st + oldsize++);
69 L->stacksize = realsize; 74 L->stacksize = realsize;
75 if ((size_t)(mref(G(L)->jit_base, char) - (char *)oldst) < oldsize)
76 setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta);
70 L->base = (TValue *)((char *)L->base + delta); 77 L->base = (TValue *)((char *)L->base + delta);
71 L->top = (TValue *)((char *)L->top + delta); 78 L->top = (TValue *)((char *)L->top + delta);
72 for (up = gcref(L->openupval); up != NULL; up = gcnext(up)) 79 for (up = gcref(L->openupval); up != NULL; up = gcnext(up))
73 setmref(gco2uv(up)->v, (TValue *)((char *)uvval(gco2uv(up)) + delta)); 80 setmref(gco2uv(up)->v, (TValue *)((char *)uvval(gco2uv(up)) + delta));
74 if (obj2gco(L) == gcref(G(L)->jit_L))
75 setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta);
76} 81}
77 82
78/* Relimit stack after error, in case the limit was overdrawn. */ 83/* Relimit stack after error, in case the limit was overdrawn. */
@@ -89,7 +94,8 @@ void lj_state_shrinkstack(lua_State *L, MSize used)
89 return; /* Avoid stack shrinking while handling stack overflow. */ 94 return; /* Avoid stack shrinking while handling stack overflow. */
90 if (4*used < L->stacksize && 95 if (4*used < L->stacksize &&
91 2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize && 96 2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize &&
92 obj2gco(L) != gcref(G(L)->jit_L)) /* Don't shrink stack of live trace. */ 97 /* Don't shrink stack of live trace. */
98 (tvref(G(L)->jit_base) == NULL || obj2gco(L) != gcref(G(L)->cur_L)))
93 resizestack(L, L->stacksize >> 1); 99 resizestack(L, L->stacksize >> 1);
94} 100}
95 101
@@ -105,6 +111,10 @@ void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need)
105 } 111 }
106 resizestack(L, n); 112 resizestack(L, n);
107 } else { /* Request would overflow. Raise a stack overflow error. */ 113 } else { /* Request would overflow. Raise a stack overflow error. */
114 if (LJ_HASJIT) {
115 TValue *base = tvref(G(L)->jit_base);
116 if (base) L->base = base;
117 }
108 if (curr_funcisL(L)) { 118 if (curr_funcisL(L)) {
109 L->top = curr_topL(L); 119 L->top = curr_topL(L);
110 if (L->top > tvref(L->maxstack)) { 120 if (L->top > tvref(L->maxstack)) {
@@ -112,7 +122,7 @@ void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need)
112 ** dummy. This can happen when BC_IFUNCF is trying to grow the stack. 122 ** dummy. This can happen when BC_IFUNCF is trying to grow the stack.
113 */ 123 */
114 L->top = L->base; 124 L->top = L->base;
115 setframe_gc(L->base - 1, obj2gco(L)); 125 setframe_gc(L->base - 1 - LJ_FR2, obj2gco(L), LJ_TTHREAD);
116 } 126 }
117 } 127 }
118 if (L->stacksize <= LJ_STACK_MAXEX) { 128 if (L->stacksize <= LJ_STACK_MAXEX) {
@@ -142,6 +152,18 @@ void LJ_FASTCALL lj_state_growstack1(lua_State *L)
142 lj_state_growstack(L, 1); 152 lj_state_growstack(L, 1);
143} 153}
144 154
155static TValue *cpgrowstack(lua_State *co, lua_CFunction dummy, void *ud)
156{
157 UNUSED(dummy);
158 lj_state_growstack(co, *(MSize *)ud);
159 return NULL;
160}
161
162int LJ_FASTCALL lj_state_cpgrowstack(lua_State *L, MSize need)
163{
164 return lj_vm_cpcall(L, NULL, &need, cpgrowstack);
165}
166
145/* Allocate basic stack for new state. */ 167/* Allocate basic stack for new state. */
146static void stack_init(lua_State *L1, lua_State *L) 168static void stack_init(lua_State *L1, lua_State *L)
147{ 169{
@@ -150,8 +172,9 @@ static void stack_init(lua_State *L1, lua_State *L)
150 L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA; 172 L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA;
151 stend = st + L1->stacksize; 173 stend = st + L1->stacksize;
152 setmref(L1->maxstack, stend - LJ_STACK_EXTRA - 1); 174 setmref(L1->maxstack, stend - LJ_STACK_EXTRA - 1);
153 L1->base = L1->top = st+1; 175 setthreadV(L1, st++, L1); /* Needed for curr_funcisL() on empty stack. */
154 setthreadV(L1, st, L1); /* Needed for curr_funcisL() on empty stack. */ 176 if (LJ_FR2) setnilV(st++);
177 L1->base = L1->top = st;
155 while (st < stend) /* Clear new slots. */ 178 while (st < stend) /* Clear new slots. */
156 setnilV(st++); 179 setnilV(st++);
157} 180}
@@ -168,12 +191,16 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud)
168 /* NOBARRIER: State initialization, all objects are white. */ 191 /* NOBARRIER: State initialization, all objects are white. */
169 setgcref(L->env, obj2gco(lj_tab_new(L, 0, LJ_MIN_GLOBAL))); 192 setgcref(L->env, obj2gco(lj_tab_new(L, 0, LJ_MIN_GLOBAL)));
170 settabV(L, registry(L), lj_tab_new(L, 0, LJ_MIN_REGISTRY)); 193 settabV(L, registry(L), lj_tab_new(L, 0, LJ_MIN_REGISTRY));
171 lj_str_resize(L, LJ_MIN_STRTAB-1); 194 lj_str_init(L);
172 lj_meta_init(L); 195 lj_meta_init(L);
173 lj_lex_init(L); 196 lj_lex_init(L);
174 fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */ 197 fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */
175 g->gc.threshold = 4*g->gc.total; 198 g->gc.threshold = 4*g->gc.total;
199#if LJ_HASFFI
200 lj_ctype_initfin(L);
201#endif
176 lj_trace_initstate(g); 202 lj_trace_initstate(g);
203 lj_err_verify();
177 return NULL; 204 return NULL;
178} 205}
179 206
@@ -182,16 +209,25 @@ static void close_state(lua_State *L)
182 global_State *g = G(L); 209 global_State *g = G(L);
183 lj_func_closeuv(L, tvref(L->stack)); 210 lj_func_closeuv(L, tvref(L->stack));
184 lj_gc_freeall(g); 211 lj_gc_freeall(g);
185 lua_assert(gcref(g->gc.root) == obj2gco(L)); 212 lj_assertG(gcref(g->gc.root) == obj2gco(L),
186 lua_assert(g->strnum == 0); 213 "main thread is not first GC object");
214 lj_assertG(g->str.num == 0, "leaked %d strings", g->str.num);
187 lj_trace_freestate(g); 215 lj_trace_freestate(g);
188#if LJ_HASFFI 216#if LJ_HASFFI
189 lj_ctype_freestate(g); 217 lj_ctype_freestate(g);
190#endif 218#endif
191 lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); 219 lj_str_freetab(g);
192 lj_str_freebuf(g, &g->tmpbuf); 220 lj_buf_free(g, &g->tmpbuf);
193 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); 221 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
194 lua_assert(g->gc.total == sizeof(GG_State)); 222#if LJ_64
223 if (mref(g->gc.lightudseg, uint32_t)) {
224 MSize segnum = g->gc.lightudnum ? (2 << lj_fls(g->gc.lightudnum)) : 2;
225 lj_mem_freevec(g, mref(g->gc.lightudseg, uint32_t), segnum, uint32_t);
226 }
227#endif
228 lj_assertG(g->gc.total == sizeof(GG_State),
229 "memory leak of %lld bytes",
230 (long long)(g->gc.total - sizeof(GG_State)));
195#ifndef LUAJIT_USE_SYSMALLOC 231#ifndef LUAJIT_USE_SYSMALLOC
196 if (g->allocf == lj_alloc_f) 232 if (g->allocf == lj_alloc_f)
197 lj_alloc_destroy(g->allocd); 233 lj_alloc_destroy(g->allocd);
@@ -200,17 +236,34 @@ static void close_state(lua_State *L)
200 g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0); 236 g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0);
201} 237}
202 238
203#if LJ_64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) 239#if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC))
204lua_State *lj_state_newstate(lua_Alloc f, void *ud) 240lua_State *lj_state_newstate(lua_Alloc allocf, void *allocd)
205#else 241#else
206LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) 242LUA_API lua_State *lua_newstate(lua_Alloc allocf, void *allocd)
207#endif 243#endif
208{ 244{
209 GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State)); 245 PRNGState prng;
210 lua_State *L = &GG->L; 246 GG_State *GG;
211 global_State *g = &GG->g; 247 lua_State *L;
212 if (GG == NULL || !checkptr32(GG)) return NULL; 248 global_State *g;
249 /* We need the PRNG for the memory allocator, so initialize this first. */
250 if (!lj_prng_seed_secure(&prng)) {
251 lj_assertX(0, "secure PRNG seeding failed");
252 /* Can only return NULL here, so this errors with "not enough memory". */
253 return NULL;
254 }
255#ifndef LUAJIT_USE_SYSMALLOC
256 if (allocf == LJ_ALLOCF_INTERNAL) {
257 allocd = lj_alloc_create(&prng);
258 if (!allocd) return NULL;
259 allocf = lj_alloc_f;
260 }
261#endif
262 GG = (GG_State *)allocf(allocd, NULL, 0, sizeof(GG_State));
263 if (GG == NULL || !checkptrGC(GG)) return NULL;
213 memset(GG, 0, sizeof(GG_State)); 264 memset(GG, 0, sizeof(GG_State));
265 L = &GG->L;
266 g = &GG->g;
214 L->gct = ~LJ_TTHREAD; 267 L->gct = ~LJ_TTHREAD;
215 L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */ 268 L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */
216 L->dummy_ffid = FF_C; 269 L->dummy_ffid = FF_C;
@@ -218,17 +271,25 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
218 g->gc.currentwhite = LJ_GC_WHITE0 | LJ_GC_FIXED; 271 g->gc.currentwhite = LJ_GC_WHITE0 | LJ_GC_FIXED;
219 g->strempty.marked = LJ_GC_WHITE0; 272 g->strempty.marked = LJ_GC_WHITE0;
220 g->strempty.gct = ~LJ_TSTR; 273 g->strempty.gct = ~LJ_TSTR;
221 g->allocf = f; 274 g->allocf = allocf;
222 g->allocd = ud; 275 g->allocd = allocd;
276 g->prng = prng;
277#ifndef LUAJIT_USE_SYSMALLOC
278 if (allocf == lj_alloc_f) {
279 lj_alloc_setprng(allocd, &g->prng);
280 }
281#endif
223 setgcref(g->mainthref, obj2gco(L)); 282 setgcref(g->mainthref, obj2gco(L));
224 setgcref(g->uvhead.prev, obj2gco(&g->uvhead)); 283 setgcref(g->uvhead.prev, obj2gco(&g->uvhead));
225 setgcref(g->uvhead.next, obj2gco(&g->uvhead)); 284 setgcref(g->uvhead.next, obj2gco(&g->uvhead));
226 g->strmask = ~(MSize)0; 285 g->str.mask = ~(MSize)0;
227 setnilV(registry(L)); 286 setnilV(registry(L));
228 setnilV(&g->nilnode.val); 287 setnilV(&g->nilnode.val);
229 setnilV(&g->nilnode.key); 288 setnilV(&g->nilnode.key);
289#if !LJ_GC64
230 setmref(g->nilnode.freetop, &g->nilnode); 290 setmref(g->nilnode.freetop, &g->nilnode);
231 lj_str_initbuf(&g->tmpbuf); 291#endif
292 lj_buf_init(NULL, &g->tmpbuf);
232 g->gc.state = GCSpause; 293 g->gc.state = GCSpause;
233 setgcref(g->gc.root, obj2gco(L)); 294 setgcref(g->gc.root, obj2gco(L));
234 setmref(g->gc.sweep, &g->gc.root); 295 setmref(g->gc.sweep, &g->gc.root);
@@ -242,7 +303,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
242 close_state(L); 303 close_state(L);
243 return NULL; 304 return NULL;
244 } 305 }
245 L->status = 0; 306 L->status = LUA_OK;
246 return L; 307 return L;
247} 308}
248 309
@@ -261,6 +322,10 @@ LUA_API void lua_close(lua_State *L)
261 global_State *g = G(L); 322 global_State *g = G(L);
262 int i; 323 int i;
263 L = mainthread(g); /* Only the main thread can be closed. */ 324 L = mainthread(g); /* Only the main thread can be closed. */
325#if LJ_HASPROFILE
326 luaJIT_profile_stop(L);
327#endif
328 setgcrefnull(g->cur_L);
264 lj_func_closeuv(L, tvref(L->stack)); 329 lj_func_closeuv(L, tvref(L->stack));
265 lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */ 330 lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */
266#if LJ_HASJIT 331#if LJ_HASJIT
@@ -270,10 +335,10 @@ LUA_API void lua_close(lua_State *L)
270#endif 335#endif
271 for (i = 0;;) { 336 for (i = 0;;) {
272 hook_enter(g); 337 hook_enter(g);
273 L->status = 0; 338 L->status = LUA_OK;
339 L->base = L->top = tvref(L->stack) + 1 + LJ_FR2;
274 L->cframe = NULL; 340 L->cframe = NULL;
275 L->base = L->top = tvref(L->stack) + 1; 341 if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == LUA_OK) {
276 if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == 0) {
277 if (++i >= 10) break; 342 if (++i >= 10) break;
278 lj_gc_separateudata(g, 1); /* Separate udata again. */ 343 lj_gc_separateudata(g, 1); /* Separate udata again. */
279 if (gcref(g->gc.mmudata) == NULL) /* Until nothing is left to do. */ 344 if (gcref(g->gc.mmudata) == NULL) /* Until nothing is left to do. */
@@ -288,7 +353,7 @@ lua_State *lj_state_new(lua_State *L)
288 lua_State *L1 = lj_mem_newobj(L, lua_State); 353 lua_State *L1 = lj_mem_newobj(L, lua_State);
289 L1->gct = ~LJ_TTHREAD; 354 L1->gct = ~LJ_TTHREAD;
290 L1->dummy_ffid = FF_C; 355 L1->dummy_ffid = FF_C;
291 L1->status = 0; 356 L1->status = LUA_OK;
292 L1->stacksize = 0; 357 L1->stacksize = 0;
293 setmref(L1->stack, NULL); 358 setmref(L1->stack, NULL);
294 L1->cframe = NULL; 359 L1->cframe = NULL;
@@ -297,15 +362,20 @@ lua_State *lj_state_new(lua_State *L)
297 setmrefr(L1->glref, L->glref); 362 setmrefr(L1->glref, L->glref);
298 setgcrefr(L1->env, L->env); 363 setgcrefr(L1->env, L->env);
299 stack_init(L1, L); /* init stack */ 364 stack_init(L1, L); /* init stack */
300 lua_assert(iswhite(obj2gco(L1))); 365 lj_assertL(iswhite(obj2gco(L1)), "new thread object is not white");
301 return L1; 366 return L1;
302} 367}
303 368
304void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) 369void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L)
305{ 370{
306 lua_assert(L != mainthread(g)); 371 lj_assertG(L != mainthread(g), "free of main thread");
307 lj_func_closeuv(L, tvref(L->stack)); 372 if (obj2gco(L) == gcref(g->cur_L))
308 lua_assert(gcref(L->openupval) == NULL); 373 setgcrefnull(g->cur_L);
374 if (gcref(L->openupval) != NULL) {
375 lj_func_closeuv(L, tvref(L->stack));
376 lj_trace_abort(g); /* For aa_uref soundness. */
377 lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues");
378 }
309 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); 379 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
310 lj_mem_freet(g, L); 380 lj_mem_freet(g, L);
311} 381}
diff --git a/src/lj_state.h b/src/lj_state.h
index dea9f58e..3850e5a1 100644
--- a/src/lj_state.h
+++ b/src/lj_state.h
@@ -18,6 +18,7 @@ LJ_FUNC void lj_state_relimitstack(lua_State *L);
18LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used); 18LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used);
19LJ_FUNCA void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need); 19LJ_FUNCA void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need);
20LJ_FUNC void LJ_FASTCALL lj_state_growstack1(lua_State *L); 20LJ_FUNC void LJ_FASTCALL lj_state_growstack1(lua_State *L);
21LJ_FUNC int LJ_FASTCALL lj_state_cpgrowstack(lua_State *L, MSize need);
21 22
22static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) 23static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need)
23{ 24{
@@ -28,8 +29,10 @@ static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need)
28 29
29LJ_FUNC lua_State *lj_state_new(lua_State *L); 30LJ_FUNC lua_State *lj_state_new(lua_State *L);
30LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L); 31LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L);
31#if LJ_64 32#if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC))
32LJ_FUNC lua_State *lj_state_newstate(lua_Alloc f, void *ud); 33LJ_FUNC lua_State *lj_state_newstate(lua_Alloc f, void *ud);
33#endif 34#endif
34 35
36#define LJ_ALLOCF_INTERNAL ((lua_Alloc)(void *)(uintptr_t)(1237<<4))
37
35#endif 38#endif
diff --git a/src/lj_str.c b/src/lj_str.c
index 7242a8e0..cfdaec6f 100644
--- a/src/lj_str.c
+++ b/src/lj_str.c
@@ -1,13 +1,8 @@
1/* 1/*
2** String handling. 2** String handling.
3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h 3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4**
5** Portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/ 4*/
8 5
9#include <stdio.h>
10
11#define lj_str_c 6#define lj_str_c
12#define LUA_CORE 7#define LUA_CORE
13 8
@@ -15,10 +10,10 @@
15#include "lj_gc.h" 10#include "lj_gc.h"
16#include "lj_err.h" 11#include "lj_err.h"
17#include "lj_str.h" 12#include "lj_str.h"
18#include "lj_state.h"
19#include "lj_char.h" 13#include "lj_char.h"
14#include "lj_prng.h"
20 15
21/* -- String interning ---------------------------------------------------- */ 16/* -- String helpers ------------------------------------------------------ */
22 17
23/* Ordered compare of strings. Assumes string data is 4-byte aligned. */ 18/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
24int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) 19int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
@@ -43,297 +38,333 @@ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
43 return (int32_t)(a->len - b->len); 38 return (int32_t)(a->len - b->len);
44} 39}
45 40
46/* Fast string data comparison. Caveat: unaligned access to 1st string! */ 41/* Find fixed string p inside string s. */
47static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) 42const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
48{ 43{
49 MSize i = 0; 44 if (plen <= slen) {
50 lua_assert(len > 0); 45 if (plen == 0) {
51 lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4); 46 return s;
52 do { /* Note: innocuous access up to end of string + 3. */ 47 } else {
53 uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i); 48 int c = *(const uint8_t *)p++;
54 if (v) { 49 plen--; slen -= plen;
55 i -= len; 50 while (slen) {
56#if LJ_LE 51 const char *q = (const char *)memchr(s, c, slen);
57 return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1; 52 if (!q) break;
58#else 53 if (memcmp(q+1, p, plen) == 0) return q;
59 return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1; 54 q++; slen -= (MSize)(q-s); s = q;
60#endif 55 }
61 } 56 }
62 i += 4; 57 }
63 } while (i < len); 58 return NULL;
64 return 0;
65} 59}
66 60
67/* Resize the string hash table (grow and shrink). */ 61/* Check whether a string has a pattern matching character. */
68void lj_str_resize(lua_State *L, MSize newmask) 62int lj_str_haspattern(GCstr *s)
69{ 63{
70 global_State *g = G(L); 64 const char *p = strdata(s), *q = p + s->len;
71 GCRef *newhash; 65 while (p < q) {
72 MSize i; 66 int c = *(const uint8_t *)p++;
73 if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) 67 if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
74 return; /* No resizing during GC traversal or if already too big. */ 68 return 1; /* Found a pattern matching char. */
75 newhash = lj_mem_newvec(L, newmask+1, GCRef);
76 memset(newhash, 0, (newmask+1)*sizeof(GCRef));
77 for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */
78 GCobj *p = gcref(g->strhash[i]);
79 while (p) { /* Follow each hash chain and reinsert all strings. */
80 MSize h = gco2str(p)->hash & newmask;
81 GCobj *next = gcnext(p);
82 /* NOBARRIER: The string table is a GC root. */
83 setgcrefr(p->gch.nextgc, newhash[h]);
84 setgcref(newhash[h], p);
85 p = next;
86 }
87 } 69 }
88 lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); 70 return 0; /* No pattern matching chars found. */
89 g->strmask = newmask;
90 g->strhash = newhash;
91} 71}
92 72
93/* Intern a string and return string object. */ 73/* -- String hashing ------------------------------------------------------ */
94GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) 74
75/* Keyed sparse ARX string hash. Constant time. */
76static StrHash hash_sparse(uint64_t seed, const char *str, MSize len)
95{ 77{
96 global_State *g; 78 /* Constants taken from lookup3 hash by Bob Jenkins. */
97 GCstr *s; 79 StrHash a, b, h = len ^ (StrHash)seed;
98 GCobj *o;
99 MSize len = (MSize)lenx;
100 MSize a, b, h = len;
101 if (lenx >= LJ_MAX_STR)
102 lj_err_msg(L, LJ_ERR_STROV);
103 g = G(L);
104 /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
105 if (len >= 4) { /* Caveat: unaligned access! */ 80 if (len >= 4) { /* Caveat: unaligned access! */
106 a = lj_getu32(str); 81 a = lj_getu32(str);
107 h ^= lj_getu32(str+len-4); 82 h ^= lj_getu32(str+len-4);
108 b = lj_getu32(str+(len>>1)-2); 83 b = lj_getu32(str+(len>>1)-2);
109 h ^= b; h -= lj_rol(b, 14); 84 h ^= b; h -= lj_rol(b, 14);
110 b += lj_getu32(str+(len>>2)-1); 85 b += lj_getu32(str+(len>>2)-1);
111 } else if (len > 0) { 86 } else {
112 a = *(const uint8_t *)str; 87 a = *(const uint8_t *)str;
113 h ^= *(const uint8_t *)(str+len-1); 88 h ^= *(const uint8_t *)(str+len-1);
114 b = *(const uint8_t *)(str+(len>>1)); 89 b = *(const uint8_t *)(str+(len>>1));
115 h ^= b; h -= lj_rol(b, 14); 90 h ^= b; h -= lj_rol(b, 14);
116 } else {
117 return &g->strempty;
118 } 91 }
119 a ^= h; a -= lj_rol(h, 11); 92 a ^= h; a -= lj_rol(h, 11);
120 b ^= a; b -= lj_rol(a, 25); 93 b ^= a; b -= lj_rol(a, 25);
121 h ^= b; h -= lj_rol(b, 16); 94 h ^= b; h -= lj_rol(b, 16);
122 /* Check if the string has already been interned. */ 95 return h;
123 o = gcref(g->strhash[h & g->strmask]);
124 if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
125 while (o != NULL) {
126 GCstr *sx = gco2str(o);
127 if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
128 /* Resurrect if dead. Can only happen with fixstring() (keywords). */
129 if (isdead(g, o)) flipwhite(o);
130 return sx; /* Return existing string. */
131 }
132 o = gcnext(o);
133 }
134 } else { /* Slow path: end of string is too close to a page boundary. */
135 while (o != NULL) {
136 GCstr *sx = gco2str(o);
137 if (sx->len == len && memcmp(str, strdata(sx), len) == 0) {
138 /* Resurrect if dead. Can only happen with fixstring() (keywords). */
139 if (isdead(g, o)) flipwhite(o);
140 return sx; /* Return existing string. */
141 }
142 o = gcnext(o);
143 }
144 }
145 /* Nope, create a new string. */
146 s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
147 newwhite(g, s);
148 s->gct = ~LJ_TSTR;
149 s->len = len;
150 s->hash = h;
151 s->reserved = 0;
152 memcpy(strdatawr(s), str, len);
153 strdatawr(s)[len] = '\0'; /* Zero-terminate string. */
154 /* Add it to string hash table. */
155 h &= g->strmask;
156 s->nextgc = g->strhash[h];
157 /* NOBARRIER: The string table is a GC root. */
158 setgcref(g->strhash[h], obj2gco(s));
159 if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */
160 lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */
161 return s; /* Return newly interned string. */
162} 96}
163 97
164void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) 98#if LUAJIT_SECURITY_STRHASH
99/* Keyed dense ARX string hash. Linear time. */
100static LJ_NOINLINE StrHash hash_dense(uint64_t seed, StrHash h,
101 const char *str, MSize len)
165{ 102{
166 g->strnum--; 103 StrHash b = lj_bswap(lj_rol(h ^ (StrHash)(seed >> 32), 4));
167 lj_mem_free(g, s, sizestring(s)); 104 if (len > 12) {
105 StrHash a = (StrHash)seed;
106 const char *pe = str+len-12, *p = pe, *q = str;
107 do {
108 a += lj_getu32(p);
109 b += lj_getu32(p+4);
110 h += lj_getu32(p+8);
111 p = q; q += 12;
112 h ^= b; h -= lj_rol(b, 14);
113 a ^= h; a -= lj_rol(h, 11);
114 b ^= a; b -= lj_rol(a, 25);
115 } while (p < pe);
116 h ^= b; h -= lj_rol(b, 16);
117 a ^= h; a -= lj_rol(h, 4);
118 b ^= a; b -= lj_rol(a, 14);
119 }
120 return b;
168} 121}
122#endif
169 123
170/* -- Type conversions ---------------------------------------------------- */ 124/* -- String interning ---------------------------------------------------- */
171 125
172/* Print number to buffer. Canonicalizes non-finite values. */ 126#define LJ_STR_MAXCOLL 32
173size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o)
174{
175 if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */
176 lua_Number n = o->n;
177#if __BIONIC__
178 if (tvismzero(o)) { s[0] = '-'; s[1] = '0'; return 2; }
179#endif
180 return (size_t)lua_number2str(s, n);
181 } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) {
182 s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3;
183 } else if ((o->u32.hi & 0x80000000) == 0) {
184 s[0] = 'i'; s[1] = 'n'; s[2] = 'f'; return 3;
185 } else {
186 s[0] = '-'; s[1] = 'i'; s[2] = 'n'; s[3] = 'f'; return 4;
187 }
188}
189 127
190/* Print integer to buffer. Returns pointer to start. */ 128/* Resize the string interning hash table (grow and shrink). */
191char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k) 129void lj_str_resize(lua_State *L, MSize newmask)
192{ 130{
193 uint32_t u = k < 0 ? ~(uint32_t)k+1u : (uint32_t)k; 131 global_State *g = G(L);
194 p += 1+10; 132 GCRef *newtab, *oldtab = g->str.tab;
195 do { *--p = (char)('0' + u % 10); } while (u /= 10); 133 MSize i;
196 if (k < 0) *--p = '-';
197 return p;
198}
199 134
200/* Convert number to string. */ 135 /* No resizing during GC traversal or if already too big. */
201GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np) 136 if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
202{ 137 return;
203 char buf[LJ_STR_NUMBUF];
204 size_t len = lj_str_bufnum(buf, (TValue *)np);
205 return lj_str_new(L, buf, len);
206}
207 138
208/* Convert integer to string. */ 139 newtab = lj_mem_newvec(L, newmask+1, GCRef);
209GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k) 140 memset(newtab, 0, (newmask+1)*sizeof(GCRef));
210{
211 char s[1+10];
212 char *p = lj_str_bufint(s, k);
213 return lj_str_new(L, p, (size_t)(s+sizeof(s)-p));
214}
215 141
216GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o) 142#if LUAJIT_SECURITY_STRHASH
217{ 143 /* Check which chains need secondary hashes. */
218 return tvisint(o) ? lj_str_fromint(L, intV(o)) : lj_str_fromnum(L, &o->n); 144 if (g->str.second) {
219} 145 int newsecond = 0;
146 /* Compute primary chain lengths. */
147 for (i = g->str.mask; i != ~(MSize)0; i--) {
148 GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1);
149 while (o) {
150 GCstr *s = gco2str(o);
151 MSize hash = s->hashalg ? hash_sparse(g->str.seed, strdata(s), s->len) :
152 s->hash;
153 hash &= newmask;
154 setgcrefp(newtab[hash], gcrefu(newtab[hash]) + 1);
155 o = gcnext(o);
156 }
157 }
158 /* Mark secondary chains. */
159 for (i = newmask; i != ~(MSize)0; i--) {
160 int secondary = gcrefu(newtab[i]) > LJ_STR_MAXCOLL;
161 newsecond |= secondary;
162 setgcrefp(newtab[i], secondary);
163 }
164 g->str.second = newsecond;
165 }
166#endif
220 167
221/* -- String formatting --------------------------------------------------- */ 168 /* Reinsert all strings from the old table into the new table. */
169 for (i = g->str.mask; i != ~(MSize)0; i--) {
170 GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1);
171 while (o) {
172 GCobj *next = gcnext(o);
173 GCstr *s = gco2str(o);
174 MSize hash = s->hash;
175#if LUAJIT_SECURITY_STRHASH
176 uintptr_t u;
177 if (LJ_LIKELY(!s->hashalg)) { /* String hashed with primary hash. */
178 hash &= newmask;
179 u = gcrefu(newtab[hash]);
180 if (LJ_UNLIKELY(u & 1)) { /* Switch string to secondary hash. */
181 s->hash = hash = hash_dense(g->str.seed, s->hash, strdata(s), s->len);
182 s->hashalg = 1;
183 hash &= newmask;
184 u = gcrefu(newtab[hash]);
185 }
186 } else { /* String hashed with secondary hash. */
187 MSize shash = hash_sparse(g->str.seed, strdata(s), s->len);
188 u = gcrefu(newtab[shash & newmask]);
189 if (u & 1) {
190 hash &= newmask;
191 u = gcrefu(newtab[hash]);
192 } else { /* Revert string back to primary hash. */
193 s->hash = shash;
194 s->hashalg = 0;
195 hash = (shash & newmask);
196 }
197 }
198 /* NOBARRIER: The string table is a GC root. */
199 setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1));
200 setgcrefp(newtab[hash], ((uintptr_t)o | (u & 1)));
201#else
202 hash &= newmask;
203 /* NOBARRIER: The string table is a GC root. */
204 setgcrefr(o->gch.nextgc, newtab[hash]);
205 setgcref(newtab[hash], o);
206#endif
207 o = next;
208 }
209 }
210
211 /* Free old table and replace with new table. */
212 lj_str_freetab(g);
213 g->str.tab = newtab;
214 g->str.mask = newmask;
215}
222 216
223static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len) 217#if LUAJIT_SECURITY_STRHASH
218/* Rehash and rechain all strings in a chain. */
219static LJ_NOINLINE GCstr *lj_str_rehash_chain(lua_State *L, StrHash hashc,
220 const char *str, MSize len)
224{ 221{
225 char *p; 222 global_State *g = G(L);
226 MSize i; 223 int ow = g->gc.state == GCSsweepstring ? otherwhite(g) : 0; /* Sweeping? */
227 if (sb->n + len > sb->sz) { 224 GCRef *strtab = g->str.tab;
228 MSize sz = sb->sz * 2; 225 MSize strmask = g->str.mask;
229 while (sb->n + len > sz) sz = sz * 2; 226 GCobj *o = gcref(strtab[hashc & strmask]);
230 lj_str_resizebuf(L, sb, sz); 227 setgcrefp(strtab[hashc & strmask], (void *)((uintptr_t)1));
228 g->str.second = 1;
229 while (o) {
230 uintptr_t u;
231 GCobj *next = gcnext(o);
232 GCstr *s = gco2str(o);
233 StrHash hash;
234 if (ow) { /* Must sweep while rechaining. */
235 if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* String alive? */
236 lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED),
237 "sweep of undead string");
238 makewhite(g, o);
239 } else { /* Free dead string. */
240 lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED,
241 "sweep of unlive string");
242 lj_str_free(g, s);
243 o = next;
244 continue;
245 }
246 }
247 hash = s->hash;
248 if (!s->hashalg) { /* Rehash with secondary hash. */
249 hash = hash_dense(g->str.seed, hash, strdata(s), s->len);
250 s->hash = hash;
251 s->hashalg = 1;
252 }
253 /* Rechain. */
254 hash &= strmask;
255 u = gcrefu(strtab[hash]);
256 setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1));
257 setgcrefp(strtab[hash], ((uintptr_t)o | (u & 1)));
258 o = next;
231 } 259 }
232 p = sb->buf + sb->n; 260 /* Try to insert the pending string again. */
233 sb->n += len; 261 return lj_str_new(L, str, len);
234 for (i = 0; i < len; i++) p[i] = str[i];
235} 262}
263#endif
264
265/* Reseed String ID from PRNG after random interval < 2^bits. */
266#if LUAJIT_SECURITY_STRID == 1
267#define STRID_RESEED_INTERVAL 8
268#elif LUAJIT_SECURITY_STRID == 2
269#define STRID_RESEED_INTERVAL 4
270#elif LUAJIT_SECURITY_STRID >= 3
271#define STRID_RESEED_INTERVAL 0
272#endif
236 273
237static void addchar(lua_State *L, SBuf *sb, int c) 274/* Allocate a new string and add to string interning table. */
275static GCstr *lj_str_alloc(lua_State *L, const char *str, MSize len,
276 StrHash hash, int hashalg)
238{ 277{
239 if (sb->n + 1 > sb->sz) { 278 GCstr *s = lj_mem_newt(L, lj_str_size(len), GCstr);
240 MSize sz = sb->sz * 2; 279 global_State *g = G(L);
241 lj_str_resizebuf(L, sb, sz); 280 uintptr_t u;
281 newwhite(g, s);
282 s->gct = ~LJ_TSTR;
283 s->len = len;
284 s->hash = hash;
285#ifndef STRID_RESEED_INTERVAL
286 s->sid = g->str.id++;
287#elif STRID_RESEED_INTERVAL
288 if (!g->str.idreseed--) {
289 uint64_t r = lj_prng_u64(&g->prng);
290 g->str.id = (StrID)r;
291 g->str.idreseed = (uint8_t)(r >> (64 - STRID_RESEED_INTERVAL));
242 } 292 }
243 sb->buf[sb->n++] = (char)c; 293 s->sid = g->str.id++;
294#else
295 s->sid = (StrID)lj_prng_u64(&g->prng);
296#endif
297 s->reserved = 0;
298 s->hashalg = (uint8_t)hashalg;
299 /* Clear last 4 bytes of allocated memory. Implies zero-termination, too. */
300 *(uint32_t *)(strdatawr(s)+(len & ~(MSize)3)) = 0;
301 memcpy(strdatawr(s), str, len);
302 /* Add to string hash table. */
303 hash &= g->str.mask;
304 u = gcrefu(g->str.tab[hash]);
305 setgcrefp(s->nextgc, (u & ~(uintptr_t)1));
306 /* NOBARRIER: The string table is a GC root. */
307 setgcrefp(g->str.tab[hash], ((uintptr_t)s | (u & 1)));
308 if (g->str.num++ > g->str.mask) /* Allow a 100% load factor. */
309 lj_str_resize(L, (g->str.mask<<1)+1); /* Grow string table. */
310 return s; /* Return newly interned string. */
244} 311}
245 312
246/* Push formatted message as a string object to Lua stack. va_list variant. */ 313/* Intern a string and return string object. */
247const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp) 314GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
248{ 315{
249 SBuf *sb = &G(L)->tmpbuf; 316 global_State *g = G(L);
250 lj_str_needbuf(L, sb, (MSize)strlen(fmt)); 317 if (lenx-1 < LJ_MAX_STR-1) {
251 lj_str_resetbuf(sb); 318 MSize len = (MSize)lenx;
252 for (;;) { 319 StrHash hash = hash_sparse(g->str.seed, str, len);
253 const char *e = strchr(fmt, '%'); 320 MSize coll = 0;
254 if (e == NULL) break; 321 int hashalg = 0;
255 addstr(L, sb, fmt, (MSize)(e-fmt)); 322 /* Check if the string has already been interned. */
256 /* This function only handles %s, %c, %d, %f and %p formats. */ 323 GCobj *o = gcref(g->str.tab[hash & g->str.mask]);
257 switch (e[1]) { 324#if LUAJIT_SECURITY_STRHASH
258 case 's': { 325 if (LJ_UNLIKELY((uintptr_t)o & 1)) { /* Secondary hash for this chain? */
259 const char *s = va_arg(argp, char *); 326 hashalg = 1;
260 if (s == NULL) s = "(null)"; 327 hash = hash_dense(g->str.seed, hash, str, len);
261 addstr(L, sb, s, (MSize)strlen(s)); 328 o = (GCobj *)(gcrefu(g->str.tab[hash & g->str.mask]) & ~(uintptr_t)1);
262 break; 329 }
263 }
264 case 'c':
265 addchar(L, sb, va_arg(argp, int));
266 break;
267 case 'd': {
268 char buf[LJ_STR_INTBUF];
269 char *p = lj_str_bufint(buf, va_arg(argp, int32_t));
270 addstr(L, sb, p, (MSize)(buf+LJ_STR_INTBUF-p));
271 break;
272 }
273 case 'f': {
274 char buf[LJ_STR_NUMBUF];
275 TValue tv;
276 MSize len;
277 tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER));
278 len = (MSize)lj_str_bufnum(buf, &tv);
279 addstr(L, sb, buf, len);
280 break;
281 }
282 case 'p': {
283#define FMTP_CHARS (2*sizeof(ptrdiff_t))
284 char buf[2+FMTP_CHARS];
285 ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *));
286 ptrdiff_t i, lasti = 2+FMTP_CHARS;
287 if (p == 0) {
288 addstr(L, sb, "NULL", 4);
289 break;
290 }
291#if LJ_64
292 /* Shorten output for 64 bit pointers. */
293 lasti = 2+2*4+((p >> 32) ? 2+2*(lj_fls((uint32_t)(p >> 32))>>3) : 0);
294#endif 330#endif
295 buf[0] = '0'; 331 while (o != NULL) {
296 buf[1] = 'x'; 332 GCstr *sx = gco2str(o);
297 for (i = lasti-1; i >= 2; i--, p >>= 4) 333 if (sx->hash == hash && sx->len == len) {
298 buf[i] = "0123456789abcdef"[(p & 15)]; 334 if (memcmp(str, strdata(sx), len) == 0) {
299 addstr(L, sb, buf, (MSize)lasti); 335 if (isdead(g, o)) flipwhite(o); /* Resurrect if dead. */
300 break; 336 return sx; /* Return existing string. */
337 }
338 coll++;
301 } 339 }
302 case '%': 340 coll++;
303 addchar(L, sb, '%'); 341 o = gcnext(o);
304 break; 342 }
305 default: 343#if LUAJIT_SECURITY_STRHASH
306 addchar(L, sb, '%'); 344 /* Rehash chain if there are too many collisions. */
307 addchar(L, sb, e[1]); 345 if (LJ_UNLIKELY(coll > LJ_STR_MAXCOLL) && !hashalg) {
308 break; 346 return lj_str_rehash_chain(L, hash, str, len);
309 } 347 }
310 fmt = e+2; 348#endif
349 /* Otherwise allocate a new string. */
350 return lj_str_alloc(L, str, len, hash, hashalg);
351 } else {
352 if (lenx)
353 lj_err_msg(L, LJ_ERR_STROV);
354 return &g->strempty;
311 } 355 }
312 addstr(L, sb, fmt, (MSize)strlen(fmt));
313 setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n));
314 incr_top(L);
315 return strVdata(L->top - 1);
316} 356}
317 357
318/* Push formatted message as a string object to Lua stack. Vararg variant. */ 358void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
319const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
320{ 359{
321 const char *msg; 360 g->str.num--;
322 va_list argp; 361 lj_mem_free(g, s, lj_str_size(s->len));
323 va_start(argp, fmt);
324 msg = lj_str_pushvf(L, fmt, argp);
325 va_end(argp);
326 return msg;
327} 362}
328 363
329/* -- Buffer handling ----------------------------------------------------- */ 364void LJ_FASTCALL lj_str_init(lua_State *L)
330
331char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz)
332{ 365{
333 if (sz > sb->sz) { 366 global_State *g = G(L);
334 if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF; 367 g->str.seed = lj_prng_u64(&g->prng);
335 lj_str_resizebuf(L, sb, sz); 368 lj_str_resize(L, LJ_MIN_STRTAB-1);
336 }
337 return sb->buf;
338} 369}
339 370
diff --git a/src/lj_str.h b/src/lj_str.h
index 8689a0d7..2a5a8190 100644
--- a/src/lj_str.h
+++ b/src/lj_str.h
@@ -10,41 +10,22 @@
10 10
11#include "lj_obj.h" 11#include "lj_obj.h"
12 12
13/* String interning. */ 13/* String helpers. */
14LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); 14LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b);
15LJ_FUNC const char *lj_str_find(const char *s, const char *f,
16 MSize slen, MSize flen);
17LJ_FUNC int lj_str_haspattern(GCstr *s);
18
19/* String interning. */
15LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); 20LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
16LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); 21LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
17LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); 22LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
23LJ_FUNC void LJ_FASTCALL lj_str_init(lua_State *L);
24#define lj_str_freetab(g) \
25 (lj_mem_freevec(g, g->str.tab, g->str.mask+1, GCRef))
18 26
19#define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s))) 27#define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s)))
20#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) 28#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
21 29#define lj_str_size(len) (sizeof(GCstr) + (((len)+4) & ~(MSize)3))
22/* Type conversions. */
23LJ_FUNC size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o);
24LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k);
25LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np);
26LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k);
27LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o);
28
29#define LJ_STR_INTBUF (1+10)
30#define LJ_STR_NUMBUF LUAI_MAXNUMBER2STR
31
32/* String formatting. */
33LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp);
34LJ_FUNC const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
35#if defined(__GNUC__)
36 __attribute__ ((format (printf, 2, 3)))
37#endif
38 ;
39
40/* Resizable string buffers. Struct definition in lj_obj.h. */
41LJ_FUNC char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz);
42
43#define lj_str_initbuf(sb) ((sb)->buf = NULL, (sb)->sz = 0)
44#define lj_str_resetbuf(sb) ((sb)->n = 0)
45#define lj_str_resizebuf(L, sb, size) \
46 ((sb)->buf = (char *)lj_mem_realloc(L, (sb)->buf, (sb)->sz, (size)), \
47 (sb)->sz = (size))
48#define lj_str_freebuf(g, sb) lj_mem_free(g, (void *)(sb)->buf, (sb)->sz)
49 30
50#endif 31#endif
diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c
new file mode 100644
index 00000000..909255db
--- /dev/null
+++ b/src/lj_strfmt.c
@@ -0,0 +1,606 @@
1/*
2** String formatting.
3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#include <stdio.h>
7
8#define lj_strfmt_c
9#define LUA_CORE
10
11#include "lj_obj.h"
12#include "lj_err.h"
13#include "lj_buf.h"
14#include "lj_str.h"
15#include "lj_meta.h"
16#include "lj_state.h"
17#include "lj_char.h"
18#include "lj_strfmt.h"
19#if LJ_HASFFI
20#include "lj_ctype.h"
21#endif
22#include "lj_lib.h"
23
24/* -- Format parser ------------------------------------------------------- */
25
26static const uint8_t strfmt_map[('x'-'A')+1] = {
27 STRFMT_A,0,0,0,STRFMT_E,STRFMT_F,STRFMT_G,0,0,0,0,0,0,
28 0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0,
29 0,0,0,0,0,0,
30 STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0,
31 0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X
32};
33
34SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs)
35{
36 const uint8_t *p = fs->p, *e = fs->e;
37 fs->str = (const char *)p;
38 for (; p < e; p++) {
39 if (*p == '%') { /* Escape char? */
40 if (p[1] == '%') { /* '%%'? */
41 fs->p = ++p+1;
42 goto retlit;
43 } else {
44 SFormat sf = 0;
45 uint32_t c;
46 if (p != (const uint8_t *)fs->str)
47 break;
48 for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) {
49 /* Parse flags. */
50 if (*p == '-') sf |= STRFMT_F_LEFT;
51 else if (*p == '+') sf |= STRFMT_F_PLUS;
52 else if (*p == '0') sf |= STRFMT_F_ZERO;
53 else if (*p == ' ') sf |= STRFMT_F_SPACE;
54 else if (*p == '#') sf |= STRFMT_F_ALT;
55 else break;
56 }
57 if ((uint32_t)*p - '0' < 10) { /* Parse width. */
58 uint32_t width = (uint32_t)*p++ - '0';
59 if ((uint32_t)*p - '0' < 10)
60 width = (uint32_t)*p++ - '0' + width*10;
61 sf |= (width << STRFMT_SH_WIDTH);
62 }
63 if (*p == '.') { /* Parse precision. */
64 uint32_t prec = 0;
65 p++;
66 if ((uint32_t)*p - '0' < 10) {
67 prec = (uint32_t)*p++ - '0';
68 if ((uint32_t)*p - '0' < 10)
69 prec = (uint32_t)*p++ - '0' + prec*10;
70 }
71 sf |= ((prec+1) << STRFMT_SH_PREC);
72 }
73 /* Parse conversion. */
74 c = (uint32_t)*p - 'A';
75 if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) {
76 uint32_t sx = strfmt_map[c];
77 if (sx) {
78 fs->p = p+1;
79 return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER));
80 }
81 }
82 /* Return error location. */
83 if (*p >= 32) p++;
84 fs->len = (MSize)(p - (const uint8_t *)fs->str);
85 fs->p = fs->e;
86 return STRFMT_ERR;
87 }
88 }
89 }
90 fs->p = p;
91retlit:
92 fs->len = (MSize)(p - (const uint8_t *)fs->str);
93 return fs->len ? STRFMT_LIT : STRFMT_EOF;
94}
95
96/* -- Raw conversions ----------------------------------------------------- */
97
98#define WINT_R(x, sh, sc) \
99 { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); }
100
101/* Write integer to buffer. */
102char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k)
103{
104 uint32_t u = (uint32_t)k;
105 if (k < 0) { u = ~u+1u; *p++ = '-'; }
106 if (u < 10000) {
107 if (u < 10) goto dig1;
108 if (u < 100) goto dig2;
109 if (u < 1000) goto dig3;
110 } else {
111 uint32_t v = u / 10000; u -= v * 10000;
112 if (v < 10000) {
113 if (v < 10) goto dig5;
114 if (v < 100) goto dig6;
115 if (v < 1000) goto dig7;
116 } else {
117 uint32_t w = v / 10000; v -= w * 10000;
118 if (w >= 10) WINT_R(w, 10, 10)
119 *p++ = (char)('0'+w);
120 }
121 WINT_R(v, 23, 1000)
122 dig7: WINT_R(v, 12, 100)
123 dig6: WINT_R(v, 10, 10)
124 dig5: *p++ = (char)('0'+v);
125 }
126 WINT_R(u, 23, 1000)
127 dig3: WINT_R(u, 12, 100)
128 dig2: WINT_R(u, 10, 10)
129 dig1: *p++ = (char)('0'+u);
130 return p;
131}
132#undef WINT_R
133
134/* Write pointer to buffer. */
135char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v)
136{
137 ptrdiff_t x = (ptrdiff_t)v;
138 MSize i, n = STRFMT_MAXBUF_PTR;
139 if (x == 0) {
140 *p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L';
141 return p;
142 }
143#if LJ_64
144 /* Shorten output for 64 bit pointers. */
145 n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0);
146#endif
147 p[0] = '0';
148 p[1] = 'x';
149 for (i = n-1; i >= 2; i--, x >>= 4)
150 p[i] = "0123456789abcdef"[(x & 15)];
151 return p+n;
152}
153
154/* Write ULEB128 to buffer. */
155char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v)
156{
157 for (; v >= 0x80; v >>= 7)
158 *p++ = (char)((v & 0x7f) | 0x80);
159 *p++ = (char)v;
160 return p;
161}
162
163/* Return string or write number to tmp buffer and return pointer to start. */
164const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp)
165{
166 SBuf *sb;
167 if (tvisstr(o)) {
168 *lenp = strV(o)->len;
169 return strVdata(o);
170 } else if (tvisbuf(o)) {
171 SBufExt *sbx = bufV(o);
172 *lenp = sbufxlen(sbx);
173 return sbx->r;
174 } else if (tvisint(o)) {
175 sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o));
176 } else if (tvisnum(o)) {
177 sb = lj_strfmt_putfnum(lj_buf_tmp_(L), STRFMT_G14, o->n);
178 } else {
179 return NULL;
180 }
181 *lenp = sbuflen(sb);
182 return sb->b;
183}
184
185/* -- Unformatted conversions to buffer ----------------------------------- */
186
187/* Add integer to buffer. */
188SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k)
189{
190 sb->w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k);
191 return sb;
192}
193
194#if LJ_HASJIT
195/* Add number to buffer. */
196SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o)
197{
198 return lj_strfmt_putfnum(sb, STRFMT_G14, o->n);
199}
200#endif
201
202SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v)
203{
204 sb->w = lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v);
205 return sb;
206}
207
208/* Add quoted string to buffer. */
209static SBuf *strfmt_putquotedlen(SBuf *sb, const char *s, MSize len)
210{
211 lj_buf_putb(sb, '"');
212 while (len--) {
213 uint32_t c = (uint32_t)(uint8_t)*s++;
214 char *w = lj_buf_more(sb, 4);
215 if (c == '"' || c == '\\' || c == '\n') {
216 *w++ = '\\';
217 } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
218 uint32_t d;
219 *w++ = '\\';
220 if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
221 *w++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
222 goto tens;
223 } else if (c >= 10) {
224 tens:
225 d = (c * 205) >> 11; c -= d * 10; *w++ = (char)('0'+d);
226 }
227 c += '0';
228 }
229 *w++ = (char)c;
230 sb->w = w;
231 }
232 lj_buf_putb(sb, '"');
233 return sb;
234}
235
236#if LJ_HASJIT
237SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str)
238{
239 return strfmt_putquotedlen(sb, strdata(str), str->len);
240}
241#endif
242
243/* -- Formatted conversions to buffer ------------------------------------- */
244
245/* Add formatted char to buffer. */
246SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c)
247{
248 MSize width = STRFMT_WIDTH(sf);
249 char *w = lj_buf_more(sb, width > 1 ? width : 1);
250 if ((sf & STRFMT_F_LEFT)) *w++ = (char)c;
251 while (width-- > 1) *w++ = ' ';
252 if (!(sf & STRFMT_F_LEFT)) *w++ = (char)c;
253 sb->w = w;
254 return sb;
255}
256
257/* Add formatted string to buffer. */
258static SBuf *strfmt_putfstrlen(SBuf *sb, SFormat sf, const char *s, MSize len)
259{
260 MSize width = STRFMT_WIDTH(sf);
261 char *w;
262 if (len > STRFMT_PREC(sf)) len = STRFMT_PREC(sf);
263 w = lj_buf_more(sb, width > len ? width : len);
264 if ((sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len);
265 while (width-- > len) *w++ = ' ';
266 if (!(sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len);
267 sb->w = w;
268 return sb;
269}
270
271#if LJ_HASJIT
272SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str)
273{
274 return strfmt_putfstrlen(sb, sf, strdata(str), str->len);
275}
276#endif
277
278/* Add formatted signed/unsigned integer to buffer. */
279SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
280{
281 char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *w;
282#ifdef LUA_USE_ASSERT
283 char *ws;
284#endif
285 MSize prefix = 0, len, prec, pprec, width, need;
286
287 /* Figure out signed prefixes. */
288 if (STRFMT_TYPE(sf) == STRFMT_INT) {
289 if ((int64_t)k < 0) {
290 k = ~k+1u;
291 prefix = 256 + '-';
292 } else if ((sf & STRFMT_F_PLUS)) {
293 prefix = 256 + '+';
294 } else if ((sf & STRFMT_F_SPACE)) {
295 prefix = 256 + ' ';
296 }
297 }
298
299 /* Convert number and store to fixed-size buffer in reverse order. */
300 prec = STRFMT_PREC(sf);
301 if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO;
302 if (k == 0) { /* Special-case zero argument. */
303 if (prec != 0 ||
304 (sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT))
305 *--q = '0';
306 } else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) { /* Decimal. */
307 uint32_t k2;
308 while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; }
309 k2 = (uint32_t)k;
310 do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2);
311 } else if ((sf & STRFMT_T_HEX)) { /* Hex. */
312 const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" :
313 "0123456789abcdef";
314 do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k);
315 if ((sf & STRFMT_F_ALT)) prefix = 512 + ((sf & STRFMT_F_UPPER) ? 'X' : 'x');
316 } else { /* Octal. */
317 do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k);
318 if ((sf & STRFMT_F_ALT)) *--q = '0';
319 }
320
321 /* Calculate sizes. */
322 len = (MSize)(buf + sizeof(buf) - q);
323 if ((int32_t)len >= (int32_t)prec) prec = len;
324 width = STRFMT_WIDTH(sf);
325 pprec = prec + (prefix >> 8);
326 need = width > pprec ? width : pprec;
327 w = lj_buf_more(sb, need);
328#ifdef LUA_USE_ASSERT
329 ws = w;
330#endif
331
332 /* Format number with leading/trailing whitespace and zeros. */
333 if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0)
334 while (width-- > pprec) *w++ = ' ';
335 if (prefix) {
336 if ((char)prefix >= 'X') *w++ = '0';
337 *w++ = (char)prefix;
338 }
339 if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO)
340 while (width-- > pprec) *w++ = '0';
341 while (prec-- > len) *w++ = '0';
342 while (q < buf + sizeof(buf)) *w++ = *q++; /* Add number itself. */
343 if ((sf & STRFMT_F_LEFT))
344 while (width-- > pprec) *w++ = ' ';
345
346 lj_assertX(need == (MSize)(w - ws), "miscalculated format size");
347 sb->w = w;
348 return sb;
349}
350
351/* Add number formatted as signed integer to buffer. */
352SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n)
353{
354 int64_t k = (int64_t)n;
355 if (checki32(k) && sf == STRFMT_INT)
356 return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */
357 else
358 return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
359}
360
361/* Add number formatted as unsigned integer to buffer. */
362SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n)
363{
364 int64_t k;
365 if (n >= 9223372036854775808.0)
366 k = (int64_t)(n - 18446744073709551616.0);
367 else
368 k = (int64_t)n;
369 return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
370}
371
372/* Format stack arguments to buffer. */
373int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry)
374{
375 int narg = (int)(L->top - L->base);
376 GCstr *fmt = lj_lib_checkstr(L, arg);
377 FormatState fs;
378 SFormat sf;
379 lj_strfmt_init(&fs, strdata(fmt), fmt->len);
380 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
381 if (sf == STRFMT_LIT) {
382 lj_buf_putmem(sb, fs.str, fs.len);
383 } else if (sf == STRFMT_ERR) {
384 lj_err_callerv(L, LJ_ERR_STRFMT,
385 strdata(lj_str_new(L, fs.str, fs.len)));
386 } else {
387 TValue *o = &L->base[arg++];
388 if (arg > narg)
389 lj_err_arg(L, arg, LJ_ERR_NOVAL);
390 switch (STRFMT_TYPE(sf)) {
391 case STRFMT_INT:
392 if (tvisint(o)) {
393 int32_t k = intV(o);
394 if (sf == STRFMT_INT)
395 lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */
396 else
397 lj_strfmt_putfxint(sb, sf, k);
398 break;
399 }
400#if LJ_HASFFI
401 if (tviscdata(o)) {
402 GCcdata *cd = cdataV(o);
403 if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) {
404 lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd));
405 break;
406 }
407 }
408#endif
409 lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg));
410 break;
411 case STRFMT_UINT:
412 if (tvisint(o)) {
413 lj_strfmt_putfxint(sb, sf, intV(o));
414 break;
415 }
416#if LJ_HASFFI
417 if (tviscdata(o)) {
418 GCcdata *cd = cdataV(o);
419 if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) {
420 lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd));
421 break;
422 }
423 }
424#endif
425 lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg));
426 break;
427 case STRFMT_NUM:
428 lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg));
429 break;
430 case STRFMT_STR: {
431 MSize len;
432 const char *s;
433 cTValue *mo;
434 if (LJ_UNLIKELY(!tvisstr(o) && !tvisbuf(o)) && retry >= 0 &&
435 !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
436 /* Call __tostring metamethod once. */
437 copyTV(L, L->top++, mo);
438 copyTV(L, L->top++, o);
439 lua_call(L, 1, 1);
440 o = &L->base[arg-1]; /* Stack may have been reallocated. */
441 copyTV(L, o, --L->top); /* Replace inline for retry. */
442 if (retry < 2) { /* Global buffer may have been overwritten. */
443 retry = 1;
444 break;
445 }
446 }
447 if (LJ_LIKELY(tvisstr(o))) {
448 len = strV(o)->len;
449 s = strVdata(o);
450#if LJ_HASBUFFER
451 } else if (tvisbuf(o)) {
452 SBufExt *sbx = bufV(o);
453 if (sbx == (SBufExt *)sb) lj_err_arg(L, arg+1, LJ_ERR_BUFFER_SELF);
454 len = sbufxlen(sbx);
455 s = sbx->r;
456#endif
457 } else {
458 GCstr *str = lj_strfmt_obj(L, o);
459 len = str->len;
460 s = strdata(str);
461 }
462 if ((sf & STRFMT_T_QUOTED))
463 strfmt_putquotedlen(sb, s, len); /* No formatting. */
464 else
465 strfmt_putfstrlen(sb, sf, s, len);
466 break;
467 }
468 case STRFMT_CHAR:
469 lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg));
470 break;
471 case STRFMT_PTR: /* No formatting. */
472 lj_strfmt_putptr(sb, lj_obj_ptr(G(L), o));
473 break;
474 default:
475 lj_assertL(0, "bad string format type");
476 break;
477 }
478 }
479 }
480 return retry;
481}
482
483/* -- Conversions to strings ---------------------------------------------- */
484
485/* Convert integer to string. */
486GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k)
487{
488 char buf[STRFMT_MAXBUF_INT];
489 MSize len = (MSize)(lj_strfmt_wint(buf, k) - buf);
490 return lj_str_new(L, buf, len);
491}
492
493/* Convert integer or number to string. */
494GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o)
495{
496 return tvisint(o) ? lj_strfmt_int(L, intV(o)) : lj_strfmt_num(L, o);
497}
498
499#if LJ_HASJIT
500/* Convert char value to string. */
501GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c)
502{
503 char buf[1];
504 buf[0] = c;
505 return lj_str_new(L, buf, 1);
506}
507#endif
508
509/* Raw conversion of object to string. */
510GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o)
511{
512 if (tvisstr(o)) {
513 return strV(o);
514 } else if (tvisnumber(o)) {
515 return lj_strfmt_number(L, o);
516 } else if (tvisnil(o)) {
517 return lj_str_newlit(L, "nil");
518 } else if (tvisfalse(o)) {
519 return lj_str_newlit(L, "false");
520 } else if (tvistrue(o)) {
521 return lj_str_newlit(L, "true");
522 } else {
523 char buf[8+2+2+16], *p = buf;
524 p = lj_buf_wmem(p, lj_typename(o), (MSize)strlen(lj_typename(o)));
525 *p++ = ':'; *p++ = ' ';
526 if (tvisfunc(o) && isffunc(funcV(o))) {
527 p = lj_buf_wmem(p, "builtin#", 8);
528 p = lj_strfmt_wint(p, funcV(o)->c.ffid);
529 } else {
530 p = lj_strfmt_wptr(p, lj_obj_ptr(G(L), o));
531 }
532 return lj_str_new(L, buf, (size_t)(p - buf));
533 }
534}
535
536/* -- Internal string formatting ------------------------------------------ */
537
538/*
539** These functions are only used for lua_pushfstring(), lua_pushvfstring()
540** and for internal string formatting (e.g. error messages). Caveat: unlike
541** string.format(), only a limited subset of formats and flags are supported!
542**
543** LuaJIT has support for a couple more formats than Lua 5.1/5.2:
544** - %d %u %o %x with full formatting, 32 bit integers only.
545** - %f and other FP formats are really %.14g.
546** - %s %c %p without formatting.
547*/
548
549/* Push formatted message as a string object to Lua stack. va_list variant. */
550const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp)
551{
552 SBuf *sb = lj_buf_tmp_(L);
553 FormatState fs;
554 SFormat sf;
555 GCstr *str;
556 lj_strfmt_init(&fs, fmt, (MSize)strlen(fmt));
557 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
558 switch (STRFMT_TYPE(sf)) {
559 case STRFMT_LIT:
560 lj_buf_putmem(sb, fs.str, fs.len);
561 break;
562 case STRFMT_INT:
563 lj_strfmt_putfxint(sb, sf, va_arg(argp, int32_t));
564 break;
565 case STRFMT_UINT:
566 lj_strfmt_putfxint(sb, sf, va_arg(argp, uint32_t));
567 break;
568 case STRFMT_NUM:
569 lj_strfmt_putfnum(sb, STRFMT_G14, va_arg(argp, lua_Number));
570 break;
571 case STRFMT_STR: {
572 const char *s = va_arg(argp, char *);
573 if (s == NULL) s = "(null)";
574 lj_buf_putmem(sb, s, (MSize)strlen(s));
575 break;
576 }
577 case STRFMT_CHAR:
578 lj_buf_putb(sb, va_arg(argp, int));
579 break;
580 case STRFMT_PTR:
581 lj_strfmt_putptr(sb, va_arg(argp, void *));
582 break;
583 case STRFMT_ERR:
584 default:
585 lj_buf_putb(sb, '?');
586 lj_assertL(0, "bad string format near offset %d", fs.len);
587 break;
588 }
589 }
590 str = lj_buf_str(L, sb);
591 setstrV(L, L->top, str);
592 incr_top(L);
593 return strdata(str);
594}
595
596/* Push formatted message as a string object to Lua stack. Vararg variant. */
597const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
598{
599 const char *msg;
600 va_list argp;
601 va_start(argp, fmt);
602 msg = lj_strfmt_pushvf(L, fmt, argp);
603 va_end(argp);
604 return msg;
605}
606
diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h
new file mode 100644
index 00000000..bd17896e
--- /dev/null
+++ b/src/lj_strfmt.h
@@ -0,0 +1,131 @@
1/*
2** String formatting.
3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_STRFMT_H
7#define _LJ_STRFMT_H
8
9#include "lj_obj.h"
10
11typedef uint32_t SFormat; /* Format indicator. */
12
13/* Format parser state. */
14typedef struct FormatState {
15 const uint8_t *p; /* Current format string pointer. */
16 const uint8_t *e; /* End of format string. */
17 const char *str; /* Returned literal string. */
18 MSize len; /* Size of literal string. */
19} FormatState;
20
21/* Format types (max. 16). */
22typedef enum FormatType {
23 STRFMT_EOF, STRFMT_ERR, STRFMT_LIT,
24 STRFMT_INT, STRFMT_UINT, STRFMT_NUM, STRFMT_STR, STRFMT_CHAR, STRFMT_PTR
25} FormatType;
26
27/* Format subtypes (bits are reused). */
28#define STRFMT_T_HEX 0x0010 /* STRFMT_UINT */
29#define STRFMT_T_OCT 0x0020 /* STRFMT_UINT */
30#define STRFMT_T_FP_A 0x0000 /* STRFMT_NUM */
31#define STRFMT_T_FP_E 0x0010 /* STRFMT_NUM */
32#define STRFMT_T_FP_F 0x0020 /* STRFMT_NUM */
33#define STRFMT_T_FP_G 0x0030 /* STRFMT_NUM */
34#define STRFMT_T_QUOTED 0x0010 /* STRFMT_STR */
35
36/* Format flags. */
37#define STRFMT_F_LEFT 0x0100
38#define STRFMT_F_PLUS 0x0200
39#define STRFMT_F_ZERO 0x0400
40#define STRFMT_F_SPACE 0x0800
41#define STRFMT_F_ALT 0x1000
42#define STRFMT_F_UPPER 0x2000
43
44/* Format indicator fields. */
45#define STRFMT_SH_WIDTH 16
46#define STRFMT_SH_PREC 24
47
48#define STRFMT_TYPE(sf) ((FormatType)((sf) & 15))
49#define STRFMT_WIDTH(sf) (((sf) >> STRFMT_SH_WIDTH) & 255u)
50#define STRFMT_PREC(sf) ((((sf) >> STRFMT_SH_PREC) & 255u) - 1u)
51#define STRFMT_FP(sf) (((sf) >> 4) & 3)
52
53/* Formats for conversion characters. */
54#define STRFMT_A (STRFMT_NUM|STRFMT_T_FP_A)
55#define STRFMT_C (STRFMT_CHAR)
56#define STRFMT_D (STRFMT_INT)
57#define STRFMT_E (STRFMT_NUM|STRFMT_T_FP_E)
58#define STRFMT_F (STRFMT_NUM|STRFMT_T_FP_F)
59#define STRFMT_G (STRFMT_NUM|STRFMT_T_FP_G)
60#define STRFMT_I STRFMT_D
61#define STRFMT_O (STRFMT_UINT|STRFMT_T_OCT)
62#define STRFMT_P (STRFMT_PTR)
63#define STRFMT_Q (STRFMT_STR|STRFMT_T_QUOTED)
64#define STRFMT_S (STRFMT_STR)
65#define STRFMT_U (STRFMT_UINT)
66#define STRFMT_X (STRFMT_UINT|STRFMT_T_HEX)
67#define STRFMT_G14 (STRFMT_G | ((14+1) << STRFMT_SH_PREC))
68
69/* Maximum buffer sizes for conversions. */
70#define STRFMT_MAXBUF_XINT (1+22) /* '0' prefix + uint64_t in octal. */
71#define STRFMT_MAXBUF_INT (1+10) /* Sign + int32_t in decimal. */
72#define STRFMT_MAXBUF_NUM 32 /* Must correspond with STRFMT_G14. */
73#define STRFMT_MAXBUF_PTR (2+2*sizeof(ptrdiff_t)) /* "0x" + hex ptr. */
74
75/* Format parser. */
76LJ_FUNC SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs);
77
78static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len)
79{
80 fs->p = (const uint8_t *)p;
81 fs->e = (const uint8_t *)p + len;
82 /* Must be NUL-terminated. May have NULs inside, too. */
83 lj_assertX(*fs->e == 0, "format not NUL-terminated");
84}
85
86/* Raw conversions. */
87LJ_FUNC char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k);
88LJ_FUNC char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v);
89LJ_FUNC char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v);
90LJ_FUNC const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp);
91
92/* Unformatted conversions to buffer. */
93LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k);
94#if LJ_HASJIT
95LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o);
96#endif
97LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v);
98#if LJ_HASJIT
99LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str);
100#endif
101
102/* Formatted conversions to buffer. */
103LJ_FUNC SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k);
104LJ_FUNC SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n);
105LJ_FUNC SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n);
106LJ_FUNC SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat, lua_Number n);
107LJ_FUNC SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat, int32_t c);
108#if LJ_HASJIT
109LJ_FUNC SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat, GCstr *str);
110#endif
111LJ_FUNC int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry);
112
113/* Conversions to strings. */
114LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k);
115LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o);
116LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o);
117#if LJ_HASJIT
118LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c);
119#endif
120LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o);
121
122/* Internal string formatting. */
123LJ_FUNC const char *lj_strfmt_pushvf(lua_State *L, const char *fmt,
124 va_list argp);
125LJ_FUNC const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
126#if defined(__GNUC__) || defined(__clang__)
127 __attribute__ ((format (printf, 2, 3)))
128#endif
129 ;
130
131#endif
diff --git a/src/lj_strfmt_num.c b/src/lj_strfmt_num.c
new file mode 100644
index 00000000..c6e776aa
--- /dev/null
+++ b/src/lj_strfmt_num.c
@@ -0,0 +1,593 @@
1/*
2** String formatting for floating-point numbers.
3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4** Contributed by Peter Cawley.
5*/
6
7#include <stdio.h>
8
9#define lj_strfmt_num_c
10#define LUA_CORE
11
12#include "lj_obj.h"
13#include "lj_buf.h"
14#include "lj_str.h"
15#include "lj_strfmt.h"
16
17/* -- Precomputed tables -------------------------------------------------- */
18
19/* Rescale factors to push the exponent of a number towards zero. */
20#define RESCALE_EXPONENTS(P, N) \
21 P(308), P(289), P(270), P(250), P(231), P(212), P(193), P(173), P(154), \
22 P(135), P(115), P(96), P(77), P(58), P(38), P(0), P(0), P(0), N(39), N(58), \
23 N(77), N(96), N(116), N(135), N(154), N(174), N(193), N(212), N(231), \
24 N(251), N(270), N(289)
25
26#define ONE_E_P(X) 1e+0 ## X
27#define ONE_E_N(X) 1e-0 ## X
28static const int16_t rescale_e[] = { RESCALE_EXPONENTS(-, +) };
29static const double rescale_n[] = { RESCALE_EXPONENTS(ONE_E_P, ONE_E_N) };
30#undef ONE_E_N
31#undef ONE_E_P
32
33/*
34** For p in range -70 through 57, this table encodes pairs (m, e) such that
35** 4*2^p <= (uint8_t)m*10^e, and is the smallest value for which this holds.
36*/
37static const int8_t four_ulp_m_e[] = {
38 34, -21, 68, -21, 14, -20, 28, -20, 55, -20, 2, -19, 3, -19, 5, -19, 9, -19,
39 -82, -18, 35, -18, 7, -17, -117, -17, 28, -17, 56, -17, 112, -16, -33, -16,
40 45, -16, 89, -16, -78, -15, 36, -15, 72, -15, -113, -14, 29, -14, 57, -14,
41 114, -13, -28, -13, 46, -13, 91, -12, -74, -12, 37, -12, 73, -12, 15, -11, 3,
42 -11, 59, -11, 2, -10, 3, -10, 5, -10, 1, -9, -69, -9, 38, -9, 75, -9, 15, -7,
43 3, -7, 6, -7, 12, -6, -17, -7, 48, -7, 96, -7, -65, -6, 39, -6, 77, -6, -103,
44 -5, 31, -5, 62, -5, 123, -4, -11, -4, 49, -4, 98, -4, -60, -3, 4, -2, 79, -3,
45 16, -2, 32, -2, 63, -2, 2, -1, 25, 0, 5, 1, 1, 2, 2, 2, 4, 2, 8, 2, 16, 2,
46 32, 2, 64, 2, -128, 2, 26, 2, 52, 2, 103, 3, -51, 3, 41, 4, 82, 4, -92, 4,
47 33, 4, 66, 4, -124, 5, 27, 5, 53, 5, 105, 6, 21, 6, 42, 6, 84, 6, 17, 7, 34,
48 7, 68, 7, 2, 8, 3, 8, 6, 8, 108, 9, -41, 9, 43, 10, 86, 9, -84, 10, 35, 10,
49 69, 10, -118, 11, 28, 11, 55, 12, 11, 13, 22, 13, 44, 13, 88, 13, -80, 13,
50 36, 13, 71, 13, -115, 14, 29, 14, 57, 14, 113, 15, -30, 15, 46, 15, 91, 15,
51 19, 16, 37, 16, 73, 16, 2, 17, 3, 17, 6, 17
52};
53
54/* min(2^32-1, 10^e-1) for e in range 0 through 10 */
55static uint32_t ndigits_dec_threshold[] = {
56 0, 9U, 99U, 999U, 9999U, 99999U, 999999U,
57 9999999U, 99999999U, 999999999U, 0xffffffffU
58};
59
60/* -- Helper functions ---------------------------------------------------- */
61
62/* Compute the number of digits in the decimal representation of x. */
63static MSize ndigits_dec(uint32_t x)
64{
65 MSize t = ((lj_fls(x | 1) * 77) >> 8) + 1; /* 2^8/77 is roughly log2(10) */
66 return t + (x > ndigits_dec_threshold[t]);
67}
68
69#define WINT_R(x, sh, sc) \
70 { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); }
71
72/* Write 9-digit unsigned integer to buffer. */
73static char *lj_strfmt_wuint9(char *p, uint32_t u)
74{
75 uint32_t v = u / 10000, w;
76 u -= v * 10000;
77 w = v / 10000;
78 v -= w * 10000;
79 *p++ = (char)('0'+w);
80 WINT_R(v, 23, 1000)
81 WINT_R(v, 12, 100)
82 WINT_R(v, 10, 10)
83 *p++ = (char)('0'+v);
84 WINT_R(u, 23, 1000)
85 WINT_R(u, 12, 100)
86 WINT_R(u, 10, 10)
87 *p++ = (char)('0'+u);
88 return p;
89}
90#undef WINT_R
91
92/* -- Extended precision arithmetic --------------------------------------- */
93
94/*
95** The "nd" format is a fixed-precision decimal representation for numbers. It
96** consists of up to 64 uint32_t values, with each uint32_t storing a value
97** in the range [0, 1e9). A number in "nd" format consists of three variables:
98**
99** uint32_t nd[64];
100** uint32_t ndlo;
101** uint32_t ndhi;
102**
103** The integral part of the number is stored in nd[0 ... ndhi], the value of
104** which is sum{i in [0, ndhi] | nd[i] * 10^(9*i)}. If the fractional part of
105** the number is zero, ndlo is zero. Otherwise, the fractional part is stored
106** in nd[ndlo ... 63], the value of which is taken to be
107** sum{i in [ndlo, 63] | nd[i] * 10^(9*(i-64))}.
108**
109** If the array part had 128 elements rather than 64, then every double would
110** have an exact representation in "nd" format. With 64 elements, all integral
111** doubles have an exact representation, and all non-integral doubles have
112** enough digits to make both %.99e and %.99f do the right thing.
113*/
114
115#if LJ_64
116#define ND_MUL2K_MAX_SHIFT 29
117#define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) / 1000000000))
118#else
119#define ND_MUL2K_MAX_SHIFT 11
120#define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) >> 9) / 1953125)
121#endif
122
123/* Multiply nd by 2^k and add carry_in (ndlo is assumed to be zero). */
124static uint32_t nd_mul2k(uint32_t* nd, uint32_t ndhi, uint32_t k,
125 uint32_t carry_in, SFormat sf)
126{
127 uint32_t i, ndlo = 0, start = 1;
128 /* Performance hacks. */
129 if (k > ND_MUL2K_MAX_SHIFT*2 && STRFMT_FP(sf) != STRFMT_FP(STRFMT_T_FP_F)) {
130 start = ndhi - (STRFMT_PREC(sf) + 17) / 8;
131 }
132 /* Real logic. */
133 while (k >= ND_MUL2K_MAX_SHIFT) {
134 for (i = ndlo; i <= ndhi; i++) {
135 uint64_t val = ((uint64_t)nd[i] << ND_MUL2K_MAX_SHIFT) | carry_in;
136 carry_in = ND_MUL2K_DIV1E9(val);
137 nd[i] = (uint32_t)val - carry_in * 1000000000;
138 }
139 if (carry_in) {
140 nd[++ndhi] = carry_in; carry_in = 0;
141 if (start++ == ndlo) ++ndlo;
142 }
143 k -= ND_MUL2K_MAX_SHIFT;
144 }
145 if (k) {
146 for (i = ndlo; i <= ndhi; i++) {
147 uint64_t val = ((uint64_t)nd[i] << k) | carry_in;
148 carry_in = ND_MUL2K_DIV1E9(val);
149 nd[i] = (uint32_t)val - carry_in * 1000000000;
150 }
151 if (carry_in) nd[++ndhi] = carry_in;
152 }
153 return ndhi;
154}
155
156/* Divide nd by 2^k (ndlo is assumed to be zero). */
157static uint32_t nd_div2k(uint32_t* nd, uint32_t ndhi, uint32_t k, SFormat sf)
158{
159 uint32_t ndlo = 0, stop1 = ~0, stop2 = ~0;
160 /* Performance hacks. */
161 if (!ndhi) {
162 if (!nd[0]) {
163 return 0;
164 } else {
165 uint32_t s = lj_ffs(nd[0]);
166 if (s >= k) { nd[0] >>= k; return 0; }
167 nd[0] >>= s; k -= s;
168 }
169 }
170 if (k > 18) {
171 if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_F)) {
172 stop1 = 63 - (int32_t)STRFMT_PREC(sf) / 9;
173 } else {
174 int32_t floorlog2 = ndhi * 29 + lj_fls(nd[ndhi]) - k;
175 int32_t floorlog10 = (int32_t)(floorlog2 * 0.30102999566398114);
176 stop1 = 62 + (floorlog10 - (int32_t)STRFMT_PREC(sf)) / 9;
177 stop2 = 61 + ndhi - (int32_t)STRFMT_PREC(sf) / 8;
178 }
179 }
180 /* Real logic. */
181 while (k >= 9) {
182 uint32_t i = ndhi, carry = 0;
183 for (;;) {
184 uint32_t val = nd[i];
185 nd[i] = (val >> 9) + carry;
186 carry = (val & 0x1ff) * 1953125;
187 if (i == ndlo) break;
188 i = (i - 1) & 0x3f;
189 }
190 if (ndlo != stop1 && ndlo != stop2) {
191 if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; }
192 if (!nd[ndhi]) { ndhi = (ndhi - 1) & 0x3f; stop2--; }
193 } else if (!nd[ndhi]) {
194 if (ndhi != ndlo) { ndhi = (ndhi - 1) & 0x3f; stop2--; }
195 else return ndlo;
196 }
197 k -= 9;
198 }
199 if (k) {
200 uint32_t mask = (1U << k) - 1, mul = 1000000000 >> k, i = ndhi, carry = 0;
201 for (;;) {
202 uint32_t val = nd[i];
203 nd[i] = (val >> k) + carry;
204 carry = (val & mask) * mul;
205 if (i == ndlo) break;
206 i = (i - 1) & 0x3f;
207 }
208 if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; }
209 }
210 return ndlo;
211}
212
213/* Add m*10^e to nd (assumes ndlo <= e/9 <= ndhi and 0 <= m <= 9). */
214static uint32_t nd_add_m10e(uint32_t* nd, uint32_t ndhi, uint8_t m, int32_t e)
215{
216 uint32_t i, carry;
217 if (e >= 0) {
218 i = (uint32_t)e/9;
219 carry = m * (ndigits_dec_threshold[e - (int32_t)i*9] + 1);
220 } else {
221 int32_t f = (e-8)/9;
222 i = (uint32_t)(64 + f);
223 carry = m * (ndigits_dec_threshold[e - f*9] + 1);
224 }
225 for (;;) {
226 uint32_t val = nd[i] + carry;
227 if (LJ_UNLIKELY(val >= 1000000000)) {
228 val -= 1000000000;
229 nd[i] = val;
230 if (LJ_UNLIKELY(i == ndhi)) {
231 ndhi = (ndhi + 1) & 0x3f;
232 nd[ndhi] = 1;
233 break;
234 }
235 carry = 1;
236 i = (i + 1) & 0x3f;
237 } else {
238 nd[i] = val;
239 break;
240 }
241 }
242 return ndhi;
243}
244
245/* Test whether two "nd" values are equal in their most significant digits. */
246static int nd_similar(uint32_t* nd, uint32_t ndhi, uint32_t* ref, MSize hilen,
247 MSize prec)
248{
249 char nd9[9], ref9[9];
250 if (hilen <= prec) {
251 if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0;
252 prec -= hilen; ref--; ndhi = (ndhi - 1) & 0x3f;
253 if (prec >= 9) {
254 if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0;
255 prec -= 9; ref--; ndhi = (ndhi - 1) & 0x3f;
256 }
257 } else {
258 prec -= hilen - 9;
259 }
260 lj_assertX(prec < 9, "bad precision %d", prec);
261 lj_strfmt_wuint9(nd9, nd[ndhi]);
262 lj_strfmt_wuint9(ref9, *ref);
263 return !memcmp(nd9, ref9, prec) && (nd9[prec] < '5') == (ref9[prec] < '5');
264}
265
266/* -- Formatted conversions to buffer ------------------------------------- */
267
268/* Write formatted floating-point number to either sb or p. */
269static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char *p)
270{
271 MSize width = STRFMT_WIDTH(sf), prec = STRFMT_PREC(sf), len;
272 TValue t;
273 t.n = n;
274 if (LJ_UNLIKELY((t.u32.hi << 1) >= 0xffe00000)) {
275 /* Handle non-finite values uniformly for %a, %e, %f, %g. */
276 int prefix = 0, ch = (sf & STRFMT_F_UPPER) ? 0x202020 : 0;
277 if (((t.u32.hi & 0x000fffff) | t.u32.lo) != 0) {
278 ch ^= ('n' << 16) | ('a' << 8) | 'n';
279 if ((sf & STRFMT_F_SPACE)) prefix = ' ';
280 } else {
281 ch ^= ('i' << 16) | ('n' << 8) | 'f';
282 if ((t.u32.hi & 0x80000000)) prefix = '-';
283 else if ((sf & STRFMT_F_PLUS)) prefix = '+';
284 else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
285 }
286 len = 3 + (prefix != 0);
287 if (!p) p = lj_buf_more(sb, width > len ? width : len);
288 if (!(sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
289 if (prefix) *p++ = prefix;
290 *p++ = (char)(ch >> 16); *p++ = (char)(ch >> 8); *p++ = (char)ch;
291 } else if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_A)) {
292 /* %a */
293 const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEFPX"
294 : "0123456789abcdefpx";
295 int32_t e = (t.u32.hi >> 20) & 0x7ff;
296 char prefix = 0, eprefix = '+';
297 if (t.u32.hi & 0x80000000) prefix = '-';
298 else if ((sf & STRFMT_F_PLUS)) prefix = '+';
299 else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
300 t.u32.hi &= 0xfffff;
301 if (e) {
302 t.u32.hi |= 0x100000;
303 e -= 1023;
304 } else if (t.u32.lo | t.u32.hi) {
305 /* Non-zero denormal - normalise it. */
306 uint32_t shift = t.u32.hi ? 20-lj_fls(t.u32.hi) : 52-lj_fls(t.u32.lo);
307 e = -1022 - shift;
308 t.u64 <<= shift;
309 }
310 /* abs(n) == t.u64 * 2^(e - 52) */
311 /* If n != 0, bit 52 of t.u64 is set, and is the highest set bit. */
312 if ((int32_t)prec < 0) {
313 /* Default precision: use smallest precision giving exact result. */
314 prec = t.u32.lo ? 13-lj_ffs(t.u32.lo)/4 : 5-lj_ffs(t.u32.hi|0x100000)/4;
315 } else if (prec < 13) {
316 /* Precision is sufficiently low as to maybe require rounding. */
317 t.u64 += (((uint64_t)1) << (51 - prec*4));
318 }
319 if (e < 0) {
320 eprefix = '-';
321 e = -e;
322 }
323 len = 5 + ndigits_dec((uint32_t)e) + prec + (prefix != 0)
324 + ((prec | (sf & STRFMT_F_ALT)) != 0);
325 if (!p) p = lj_buf_more(sb, width > len ? width : len);
326 if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) {
327 while (width-- > len) *p++ = ' ';
328 }
329 if (prefix) *p++ = prefix;
330 *p++ = '0';
331 *p++ = hexdig[17]; /* x or X */
332 if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) {
333 while (width-- > len) *p++ = '0';
334 }
335 *p++ = '0' + (t.u32.hi >> 20); /* Usually '1', sometimes '0' or '2'. */
336 if ((prec | (sf & STRFMT_F_ALT))) {
337 /* Emit fractional part. */
338 char *q = p + 1 + prec;
339 *p = '.';
340 if (prec < 13) t.u64 >>= (52 - prec*4);
341 else while (prec > 13) p[prec--] = '0';
342 while (prec) { p[prec--] = hexdig[t.u64 & 15]; t.u64 >>= 4; }
343 p = q;
344 }
345 *p++ = hexdig[16]; /* p or P */
346 *p++ = eprefix; /* + or - */
347 p = lj_strfmt_wint(p, e);
348 } else {
349 /* %e or %f or %g - begin by converting n to "nd" format. */
350 uint32_t nd[64];
351 uint32_t ndhi = 0, ndlo, i;
352 int32_t e = (t.u32.hi >> 20) & 0x7ff, ndebias = 0;
353 char prefix = 0, *q;
354 if (t.u32.hi & 0x80000000) prefix = '-';
355 else if ((sf & STRFMT_F_PLUS)) prefix = '+';
356 else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
357 prec += ((int32_t)prec >> 31) & 7; /* Default precision is 6. */
358 if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_G)) {
359 /* %g - decrement precision if non-zero (to make it like %e). */
360 prec--;
361 prec ^= (uint32_t)((int32_t)prec >> 31);
362 }
363 if ((sf & STRFMT_T_FP_E) && prec < 14 && n != 0) {
364 /* Precision is sufficiently low that rescaling will probably work. */
365 if ((ndebias = rescale_e[e >> 6])) {
366 t.n = n * rescale_n[e >> 6];
367 if (LJ_UNLIKELY(!e)) t.n *= 1e10, ndebias -= 10;
368 t.u64 -= 2; /* Convert 2ulp below (later we convert 2ulp above). */
369 nd[0] = 0x100000 | (t.u32.hi & 0xfffff);
370 e = ((t.u32.hi >> 20) & 0x7ff) - 1075 - (ND_MUL2K_MAX_SHIFT < 29);
371 goto load_t_lo; rescale_failed:
372 t.n = n;
373 e = (t.u32.hi >> 20) & 0x7ff;
374 ndebias = ndhi = 0;
375 }
376 }
377 nd[0] = t.u32.hi & 0xfffff;
378 if (e == 0) e++; else nd[0] |= 0x100000;
379 e -= 1043;
380 if (t.u32.lo) {
381 e -= 32 + (ND_MUL2K_MAX_SHIFT < 29); load_t_lo:
382#if ND_MUL2K_MAX_SHIFT >= 29
383 nd[0] = (nd[0] << 3) | (t.u32.lo >> 29);
384 ndhi = nd_mul2k(nd, ndhi, 29, t.u32.lo & 0x1fffffff, sf);
385#elif ND_MUL2K_MAX_SHIFT >= 11
386 ndhi = nd_mul2k(nd, ndhi, 11, t.u32.lo >> 21, sf);
387 ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo >> 10) & 0x7ff, sf);
388 ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo << 1) & 0x7ff, sf);
389#else
390#error "ND_MUL2K_MAX_SHIFT too small"
391#endif
392 }
393 if (e >= 0) {
394 ndhi = nd_mul2k(nd, ndhi, (uint32_t)e, 0, sf);
395 ndlo = 0;
396 } else {
397 ndlo = nd_div2k(nd, ndhi, (uint32_t)-e, sf);
398 if (ndhi && !nd[ndhi]) ndhi--;
399 }
400 /* abs(n) == nd * 10^ndebias (for slightly loose interpretation of ==) */
401 if ((sf & STRFMT_T_FP_E)) {
402 /* %e or %g - assume %e and start by calculating nd's exponent (nde). */
403 char eprefix = '+';
404 int32_t nde = -1;
405 MSize hilen;
406 if (ndlo && !nd[ndhi]) {
407 ndhi = 64; do {} while (!nd[--ndhi]);
408 nde -= 64 * 9;
409 }
410 hilen = ndigits_dec(nd[ndhi]);
411 nde += ndhi * 9 + hilen;
412 if (ndebias) {
413 /*
414 ** Rescaling was performed, but this introduced some error, and might
415 ** have pushed us across a rounding boundary. We check whether this
416 ** error affected the result by introducing even more error (2ulp in
417 ** either direction), and seeing whether a rounding boundary was
418 ** crossed. Having already converted the -2ulp case, we save off its
419 ** most significant digits, convert the +2ulp case, and compare them.
420 */
421 int32_t eidx = e + 70 + (ND_MUL2K_MAX_SHIFT < 29)
422 + (t.u32.lo >= 0xfffffffe && !(~t.u32.hi << 12));
423 const int8_t *m_e = four_ulp_m_e + eidx * 2;
424 lj_assertG_(G(sbufL(sb)), 0 <= eidx && eidx < 128, "bad eidx %d", eidx);
425 nd[33] = nd[ndhi];
426 nd[32] = nd[(ndhi - 1) & 0x3f];
427 nd[31] = nd[(ndhi - 2) & 0x3f];
428 nd_add_m10e(nd, ndhi, (uint8_t)*m_e, m_e[1]);
429 if (LJ_UNLIKELY(!nd_similar(nd, ndhi, nd + 33, hilen, prec + 1))) {
430 goto rescale_failed;
431 }
432 }
433 if ((int32_t)(prec - nde) < (0x3f & -(int32_t)ndlo) * 9) {
434 /* Precision is sufficiently low as to maybe require rounding. */
435 ndhi = nd_add_m10e(nd, ndhi, 5, nde - prec - 1);
436 nde += (hilen != ndigits_dec(nd[ndhi]));
437 }
438 nde += ndebias;
439 if ((sf & STRFMT_T_FP_F)) {
440 /* %g */
441 if ((int32_t)prec >= nde && nde >= -4) {
442 if (nde < 0) ndhi = 0;
443 prec -= nde;
444 goto g_format_like_f;
445 } else if (!(sf & STRFMT_F_ALT) && prec && width > 5) {
446 /* Decrease precision in order to strip trailing zeroes. */
447 char tail[9];
448 uint32_t maxprec = hilen - 1 + ((ndhi - ndlo) & 0x3f) * 9;
449 if (prec >= maxprec) prec = maxprec;
450 else ndlo = (ndhi - (((int32_t)(prec - hilen) + 9) / 9)) & 0x3f;
451 i = prec - hilen - (((ndhi - ndlo) & 0x3f) * 9) + 10;
452 lj_strfmt_wuint9(tail, nd[ndlo]);
453 while (prec && tail[--i] == '0') {
454 prec--;
455 if (!i) {
456 if (ndlo == ndhi) { prec = 0; break; }
457 ndlo = (ndlo + 1) & 0x3f;
458 lj_strfmt_wuint9(tail, nd[ndlo]);
459 i = 9;
460 }
461 }
462 }
463 }
464 if (nde < 0) {
465 /* Make nde non-negative. */
466 eprefix = '-';
467 nde = -nde;
468 }
469 len = 3 + prec + (prefix != 0) + ndigits_dec((uint32_t)nde) + (nde < 10)
470 + ((prec | (sf & STRFMT_F_ALT)) != 0);
471 if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 5);
472 if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) {
473 while (width-- > len) *p++ = ' ';
474 }
475 if (prefix) *p++ = prefix;
476 if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) {
477 while (width-- > len) *p++ = '0';
478 }
479 q = lj_strfmt_wint(p + 1, nd[ndhi]);
480 p[0] = p[1]; /* Put leading digit in the correct place. */
481 if ((prec | (sf & STRFMT_F_ALT))) {
482 /* Emit fractional part. */
483 p[1] = '.'; p += 2;
484 prec -= (MSize)(q - p); p = q; /* Account for digits already emitted. */
485 /* Then emit chunks of 9 digits (this may emit 8 digits too many). */
486 for (i = ndhi; (int32_t)prec > 0 && i != ndlo; prec -= 9) {
487 i = (i - 1) & 0x3f;
488 p = lj_strfmt_wuint9(p, nd[i]);
489 }
490 if ((sf & STRFMT_T_FP_F) && !(sf & STRFMT_F_ALT)) {
491 /* %g (and not %#g) - strip trailing zeroes. */
492 p += (int32_t)prec & ((int32_t)prec >> 31);
493 while (p[-1] == '0') p--;
494 if (p[-1] == '.') p--;
495 } else {
496 /* %e (or %#g) - emit trailing zeroes. */
497 while ((int32_t)prec > 0) { *p++ = '0'; prec--; }
498 p += (int32_t)prec;
499 }
500 } else {
501 p++;
502 }
503 *p++ = (sf & STRFMT_F_UPPER) ? 'E' : 'e';
504 *p++ = eprefix; /* + or - */
505 if (nde < 10) *p++ = '0'; /* Always at least two digits of exponent. */
506 p = lj_strfmt_wint(p, nde);
507 } else {
508 /* %f (or, shortly, %g in %f style) */
509 if (prec < (MSize)(0x3f & -(int32_t)ndlo) * 9) {
510 /* Precision is sufficiently low as to maybe require rounding. */
511 ndhi = nd_add_m10e(nd, ndhi, 5, 0 - prec - 1);
512 }
513 g_format_like_f:
514 if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT) && prec && width) {
515 /* Decrease precision in order to strip trailing zeroes. */
516 if (ndlo) {
517 /* nd has a fractional part; we need to look at its digits. */
518 char tail[9];
519 uint32_t maxprec = (64 - ndlo) * 9;
520 if (prec >= maxprec) prec = maxprec;
521 else ndlo = 64 - (prec + 8) / 9;
522 i = prec - ((63 - ndlo) * 9);
523 lj_strfmt_wuint9(tail, nd[ndlo]);
524 while (prec && tail[--i] == '0') {
525 prec--;
526 if (!i) {
527 if (ndlo == 63) { prec = 0; break; }
528 lj_strfmt_wuint9(tail, nd[++ndlo]);
529 i = 9;
530 }
531 }
532 } else {
533 /* nd has no fractional part, so precision goes straight to zero. */
534 prec = 0;
535 }
536 }
537 len = ndhi * 9 + ndigits_dec(nd[ndhi]) + prec + (prefix != 0)
538 + ((prec | (sf & STRFMT_F_ALT)) != 0);
539 if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 8);
540 if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) {
541 while (width-- > len) *p++ = ' ';
542 }
543 if (prefix) *p++ = prefix;
544 if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) {
545 while (width-- > len) *p++ = '0';
546 }
547 /* Emit integer part. */
548 p = lj_strfmt_wint(p, nd[ndhi]);
549 i = ndhi;
550 while (i) p = lj_strfmt_wuint9(p, nd[--i]);
551 if ((prec | (sf & STRFMT_F_ALT))) {
552 /* Emit fractional part. */
553 *p++ = '.';
554 /* Emit chunks of 9 digits (this may emit 8 digits too many). */
555 while ((int32_t)prec > 0 && i != ndlo) {
556 i = (i - 1) & 0x3f;
557 p = lj_strfmt_wuint9(p, nd[i]);
558 prec -= 9;
559 }
560 if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT)) {
561 /* %g (and not %#g) - strip trailing zeroes. */
562 p += (int32_t)prec & ((int32_t)prec >> 31);
563 while (p[-1] == '0') p--;
564 if (p[-1] == '.') p--;
565 } else {
566 /* %f (or %#g) - emit trailing zeroes. */
567 while ((int32_t)prec > 0) { *p++ = '0'; prec--; }
568 p += (int32_t)prec;
569 }
570 }
571 }
572 }
573 if ((sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
574 return p;
575}
576
577/* Add formatted floating-point number to buffer. */
578SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n)
579{
580 sb->w = lj_strfmt_wfnum(sb, sf, n, NULL);
581 return sb;
582}
583
584/* -- Conversions to strings ---------------------------------------------- */
585
586/* Convert number to string. */
587GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o)
588{
589 char buf[STRFMT_MAXBUF_NUM];
590 MSize len = (MSize)(lj_strfmt_wfnum(NULL, STRFMT_G14, o->n, buf) - buf);
591 return lj_str_new(L, buf, len);
592}
593
diff --git a/src/lj_strscan.c b/src/lj_strscan.c
index 79c0c569..9dbf477a 100644
--- a/src/lj_strscan.c
+++ b/src/lj_strscan.c
@@ -80,7 +80,7 @@ static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t neg)
80 /* Avoid double rounding for denormals. */ 80 /* Avoid double rounding for denormals. */
81 if (LJ_UNLIKELY(ex2 <= -1075 && x != 0)) { 81 if (LJ_UNLIKELY(ex2 <= -1075 && x != 0)) {
82 /* NYI: all of this generates way too much code on 32 bit CPUs. */ 82 /* NYI: all of this generates way too much code on 32 bit CPUs. */
83#if defined(__GNUC__) && LJ_64 83#if (defined(__GNUC__) || defined(__clang__)) && LJ_64
84 int32_t b = (int32_t)(__builtin_clzll(x)^63); 84 int32_t b = (int32_t)(__builtin_clzll(x)^63);
85#else 85#else
86 int32_t b = (x>>32) ? 32+(int32_t)lj_fls((uint32_t)(x>>32)) : 86 int32_t b = (x>>32) ? 32+(int32_t)lj_fls((uint32_t)(x>>32)) :
@@ -94,7 +94,7 @@ static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t neg)
94 } 94 }
95 95
96 /* Convert to double using a signed int64_t conversion, then rescale. */ 96 /* Convert to double using a signed int64_t conversion, then rescale. */
97 lua_assert((int64_t)x >= 0); 97 lj_assertX((int64_t)x >= 0, "bad double conversion");
98 n = (double)(int64_t)x; 98 n = (double)(int64_t)x;
99 if (neg) n = -n; 99 if (neg) n = -n;
100 if (ex2) n = ldexp(n, ex2); 100 if (ex2) n = ldexp(n, ex2);
@@ -142,7 +142,7 @@ static StrScanFmt strscan_hex(const uint8_t *p, TValue *o,
142 break; 142 break;
143 } 143 }
144 144
145 /* Reduce range then convert to double. */ 145 /* Reduce range, then convert to double. */
146 if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; } 146 if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; }
147 strscan_double(x, o, ex2, neg); 147 strscan_double(x, o, ex2, neg);
148 return fmt; 148 return fmt;
@@ -264,7 +264,7 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o,
264 uint32_t hi = 0, lo = (uint32_t)(xip-xi); 264 uint32_t hi = 0, lo = (uint32_t)(xip-xi);
265 int32_t ex2 = 0, idig = (int32_t)lo + (ex10 >> 1); 265 int32_t ex2 = 0, idig = (int32_t)lo + (ex10 >> 1);
266 266
267 lua_assert(lo > 0 && (ex10 & 1) == 0); 267 lj_assertX(lo > 0 && (ex10 & 1) == 0, "bad lo %d ex10 %d", lo, ex10);
268 268
269 /* Handle simple overflow/underflow. */ 269 /* Handle simple overflow/underflow. */
270 if (idig > 310/2) { if (neg) setminfV(o); else setpinfV(o); return fmt; } 270 if (idig > 310/2) { if (neg) setminfV(o); else setpinfV(o); return fmt; }
@@ -328,10 +328,55 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o,
328 return fmt; 328 return fmt;
329} 329}
330 330
331/* Parse binary number. */
332static StrScanFmt strscan_bin(const uint8_t *p, TValue *o,
333 StrScanFmt fmt, uint32_t opt,
334 int32_t ex2, int32_t neg, uint32_t dig)
335{
336 uint64_t x = 0;
337 uint32_t i;
338
339 if (ex2 || dig > 64) return STRSCAN_ERROR;
340
341 /* Scan binary digits. */
342 for (i = dig; i; i--, p++) {
343 if ((*p & ~1) != '0') return STRSCAN_ERROR;
344 x = (x << 1) | (*p & 1);
345 }
346
347 /* Format-specific handling. */
348 switch (fmt) {
349 case STRSCAN_INT:
350 if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) {
351 o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
352 return STRSCAN_INT; /* Fast path for 32 bit integers. */
353 }
354 if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; break; }
355 /* fallthrough */
356 case STRSCAN_U32:
357 if (dig > 32) return STRSCAN_ERROR;
358 o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
359 return STRSCAN_U32;
360 case STRSCAN_I64:
361 case STRSCAN_U64:
362 o->u64 = neg ? ~x+1u : x;
363 return fmt;
364 default:
365 break;
366 }
367
368 /* Reduce range, then convert to double. */
369 if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; }
370 strscan_double(x, o, ex2, neg);
371 return fmt;
372}
373
331/* Scan string containing a number. Returns format. Returns value in o. */ 374/* Scan string containing a number. Returns format. Returns value in o. */
332StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) 375StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
376 uint32_t opt)
333{ 377{
334 int32_t neg = 0; 378 int32_t neg = 0;
379 const uint8_t *pe = p + len;
335 380
336 /* Remove leading space, parse sign and non-numbers. */ 381 /* Remove leading space, parse sign and non-numbers. */
337 if (LJ_UNLIKELY(!lj_char_isdigit(*p))) { 382 if (LJ_UNLIKELY(!lj_char_isdigit(*p))) {
@@ -349,7 +394,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
349 p += 3; 394 p += 3;
350 } 395 }
351 while (lj_char_isspace(*p)) p++; 396 while (lj_char_isspace(*p)) p++;
352 if (*p) return STRSCAN_ERROR; 397 if (*p || p < pe) return STRSCAN_ERROR;
353 o->u64 = tmp.u64; 398 o->u64 = tmp.u64;
354 return STRSCAN_NUM; 399 return STRSCAN_NUM;
355 } 400 }
@@ -366,8 +411,12 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
366 411
367 /* Determine base and skip leading zeros. */ 412 /* Determine base and skip leading zeros. */
368 if (LJ_UNLIKELY(*p <= '0')) { 413 if (LJ_UNLIKELY(*p <= '0')) {
369 if (*p == '0' && casecmp(p[1], 'x')) 414 if (*p == '0') {
370 base = 16, cmask = LJ_CHAR_XDIGIT, p += 2; 415 if (casecmp(p[1], 'x'))
416 base = 16, cmask = LJ_CHAR_XDIGIT, p += 2;
417 else if (casecmp(p[1], 'b'))
418 base = 2, cmask = LJ_CHAR_DIGIT, p += 2;
419 }
371 for ( ; ; p++) { 420 for ( ; ; p++) {
372 if (*p == '0') { 421 if (*p == '0') {
373 hasdig = 1; 422 hasdig = 1;
@@ -396,6 +445,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
396 445
397 /* Handle decimal point. */ 446 /* Handle decimal point. */
398 if (dp) { 447 if (dp) {
448 if (base == 2) return STRSCAN_ERROR;
399 fmt = STRSCAN_NUM; 449 fmt = STRSCAN_NUM;
400 if (dig) { 450 if (dig) {
401 ex = (int32_t)(dp-(p-1)); dp = p-1; 451 ex = (int32_t)(dp-(p-1)); dp = p-1;
@@ -406,7 +456,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
406 } 456 }
407 457
408 /* Parse exponent. */ 458 /* Parse exponent. */
409 if (casecmp(*p, (uint32_t)(base == 16 ? 'p' : 'e'))) { 459 if (base >= 10 && casecmp(*p, (uint32_t)(base == 16 ? 'p' : 'e'))) {
410 uint32_t xx; 460 uint32_t xx;
411 int negx = 0; 461 int negx = 0;
412 fmt = STRSCAN_NUM; p++; 462 fmt = STRSCAN_NUM; p++;
@@ -445,6 +495,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
445 while (lj_char_isspace(*p)) p++; 495 while (lj_char_isspace(*p)) p++;
446 if (*p) return STRSCAN_ERROR; 496 if (*p) return STRSCAN_ERROR;
447 } 497 }
498 if (p < pe) return STRSCAN_ERROR;
448 499
449 /* Fast path for decimal 32 bit integers. */ 500 /* Fast path for decimal 32 bit integers. */
450 if (fmt == STRSCAN_INT && base == 10 && 501 if (fmt == STRSCAN_INT && base == 10 &&
@@ -466,6 +517,8 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
466 return strscan_oct(sp, o, fmt, neg, dig); 517 return strscan_oct(sp, o, fmt, neg, dig);
467 if (base == 16) 518 if (base == 16)
468 fmt = strscan_hex(sp, o, fmt, opt, ex, neg, dig); 519 fmt = strscan_hex(sp, o, fmt, opt, ex, neg, dig);
520 else if (base == 2)
521 fmt = strscan_bin(sp, o, fmt, opt, ex, neg, dig);
469 else 522 else
470 fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig); 523 fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig);
471 524
@@ -481,18 +534,19 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
481 534
482int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o) 535int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o)
483{ 536{
484 StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o, 537 StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o,
485 STRSCAN_OPT_TONUM); 538 STRSCAN_OPT_TONUM);
486 lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM); 539 lj_assertX(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM, "bad scan format");
487 return (fmt != STRSCAN_ERROR); 540 return (fmt != STRSCAN_ERROR);
488} 541}
489 542
490#if LJ_DUALNUM 543#if LJ_DUALNUM
491int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o) 544int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o)
492{ 545{
493 StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o, 546 StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o,
494 STRSCAN_OPT_TOINT); 547 STRSCAN_OPT_TOINT);
495 lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT); 548 lj_assertX(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT,
549 "bad scan format");
496 if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM); 550 if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM);
497 return (fmt != STRSCAN_ERROR); 551 return (fmt != STRSCAN_ERROR);
498} 552}
diff --git a/src/lj_strscan.h b/src/lj_strscan.h
index 8d036e6f..a6047cf2 100644
--- a/src/lj_strscan.h
+++ b/src/lj_strscan.h
@@ -22,7 +22,8 @@ typedef enum {
22 STRSCAN_INT, STRSCAN_U32, STRSCAN_I64, STRSCAN_U64, 22 STRSCAN_INT, STRSCAN_U32, STRSCAN_I64, STRSCAN_U64,
23} StrScanFmt; 23} StrScanFmt;
24 24
25LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt); 25LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
26 uint32_t opt);
26LJ_FUNC int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o); 27LJ_FUNC int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o);
27#if LJ_DUALNUM 28#if LJ_DUALNUM
28LJ_FUNC int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o); 29LJ_FUNC int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o);
diff --git a/src/lj_tab.c b/src/lj_tab.c
index 26485dcd..535a69f5 100644
--- a/src/lj_tab.c
+++ b/src/lj_tab.c
@@ -16,25 +16,10 @@
16 16
17/* -- Object hashing ------------------------------------------------------ */ 17/* -- Object hashing ------------------------------------------------------ */
18 18
19/* Hash values are masked with the table hash mask and used as an index. */
20static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash)
21{
22 Node *n = noderef(t->node);
23 return &n[hash & t->hmask];
24}
25
26/* String hashes are precomputed when they are interned. */
27#define hashstr(t, s) hashmask(t, (s)->hash)
28
29#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi)))
30#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1))
31#define hashptr(t, p) hashlohi((t), u32ptr(p), u32ptr(p) + HASH_BIAS)
32#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS)
33
34/* Hash an arbitrary key and return its anchor position in the hash table. */ 19/* Hash an arbitrary key and return its anchor position in the hash table. */
35static Node *hashkey(const GCtab *t, cTValue *key) 20static Node *hashkey(const GCtab *t, cTValue *key)
36{ 21{
37 lua_assert(!tvisint(key)); 22 lj_assertX(!tvisint(key), "attempt to hash integer");
38 if (tvisstr(key)) 23 if (tvisstr(key))
39 return hashstr(t, strV(key)); 24 return hashstr(t, strV(key));
40 else if (tvisnum(key)) 25 else if (tvisnum(key))
@@ -53,13 +38,13 @@ static LJ_AINLINE void newhpart(lua_State *L, GCtab *t, uint32_t hbits)
53{ 38{
54 uint32_t hsize; 39 uint32_t hsize;
55 Node *node; 40 Node *node;
56 lua_assert(hbits != 0); 41 lj_assertL(hbits != 0, "zero hash size");
57 if (hbits > LJ_MAX_HBITS) 42 if (hbits > LJ_MAX_HBITS)
58 lj_err_msg(L, LJ_ERR_TABOV); 43 lj_err_msg(L, LJ_ERR_TABOV);
59 hsize = 1u << hbits; 44 hsize = 1u << hbits;
60 node = lj_mem_newvec(L, hsize, Node); 45 node = lj_mem_newvec(L, hsize, Node);
61 setmref(node->freetop, &node[hsize]);
62 setmref(t->node, node); 46 setmref(t->node, node);
47 setfreetop(t, node, &node[hsize]);
63 t->hmask = hsize-1; 48 t->hmask = hsize-1;
64} 49}
65 50
@@ -74,7 +59,7 @@ static LJ_AINLINE void clearhpart(GCtab *t)
74{ 59{
75 uint32_t i, hmask = t->hmask; 60 uint32_t i, hmask = t->hmask;
76 Node *node = noderef(t->node); 61 Node *node = noderef(t->node);
77 lua_assert(t->hmask != 0); 62 lj_assertX(t->hmask != 0, "empty hash part");
78 for (i = 0; i <= hmask; i++) { 63 for (i = 0; i <= hmask; i++) {
79 Node *n = &node[i]; 64 Node *n = &node[i];
80 setmref(n->next, NULL); 65 setmref(n->next, NULL);
@@ -98,7 +83,8 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
98 GCtab *t; 83 GCtab *t;
99 /* First try to colocate the array part. */ 84 /* First try to colocate the array part. */
100 if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <= LJ_MAX_COLOSIZE) { 85 if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <= LJ_MAX_COLOSIZE) {
101 lua_assert((sizeof(GCtab) & 7) == 0); 86 Node *nilnode;
87 lj_assertL((sizeof(GCtab) & 7) == 0, "bad GCtab size");
102 t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize)); 88 t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize));
103 t->gct = ~LJ_TTAB; 89 t->gct = ~LJ_TTAB;
104 t->nomm = (uint8_t)~0; 90 t->nomm = (uint8_t)~0;
@@ -107,8 +93,13 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
107 setgcrefnull(t->metatable); 93 setgcrefnull(t->metatable);
108 t->asize = asize; 94 t->asize = asize;
109 t->hmask = 0; 95 t->hmask = 0;
110 setmref(t->node, &G(L)->nilnode); 96 nilnode = &G(L)->nilnode;
97 setmref(t->node, nilnode);
98#if LJ_GC64
99 setmref(t->freetop, nilnode);
100#endif
111 } else { /* Otherwise separately allocate the array part. */ 101 } else { /* Otherwise separately allocate the array part. */
102 Node *nilnode;
112 t = lj_mem_newobj(L, GCtab); 103 t = lj_mem_newobj(L, GCtab);
113 t->gct = ~LJ_TTAB; 104 t->gct = ~LJ_TTAB;
114 t->nomm = (uint8_t)~0; 105 t->nomm = (uint8_t)~0;
@@ -117,7 +108,11 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
117 setgcrefnull(t->metatable); 108 setgcrefnull(t->metatable);
118 t->asize = 0; /* In case the array allocation fails. */ 109 t->asize = 0; /* In case the array allocation fails. */
119 t->hmask = 0; 110 t->hmask = 0;
120 setmref(t->node, &G(L)->nilnode); 111 nilnode = &G(L)->nilnode;
112 setmref(t->node, nilnode);
113#if LJ_GC64
114 setmref(t->freetop, nilnode);
115#endif
121 if (asize > 0) { 116 if (asize > 0) {
122 if (asize > LJ_MAX_ASIZE) 117 if (asize > LJ_MAX_ASIZE)
123 lj_err_msg(L, LJ_ERR_TABOV); 118 lj_err_msg(L, LJ_ERR_TABOV);
@@ -149,6 +144,12 @@ GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits)
149 return t; 144 return t;
150} 145}
151 146
147/* The API of this function conforms to lua_createtable(). */
148GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h)
149{
150 return lj_tab_new(L, (uint32_t)(a > 0 ? a+1 : 0), hsize2hbits(h));
151}
152
152#if LJ_HASJIT 153#if LJ_HASJIT
153GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize) 154GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize)
154{ 155{
@@ -165,7 +166,8 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
165 GCtab *t; 166 GCtab *t;
166 uint32_t asize, hmask; 167 uint32_t asize, hmask;
167 t = newtab(L, kt->asize, kt->hmask > 0 ? lj_fls(kt->hmask)+1 : 0); 168 t = newtab(L, kt->asize, kt->hmask > 0 ? lj_fls(kt->hmask)+1 : 0);
168 lua_assert(kt->asize == t->asize && kt->hmask == t->hmask); 169 lj_assertL(kt->asize == t->asize && kt->hmask == t->hmask,
170 "mismatched size of table and template");
169 t->nomm = 0; /* Keys with metamethod names may be present. */ 171 t->nomm = 0; /* Keys with metamethod names may be present. */
170 asize = kt->asize; 172 asize = kt->asize;
171 if (asize > 0) { 173 if (asize > 0) {
@@ -185,7 +187,7 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
185 Node *node = noderef(t->node); 187 Node *node = noderef(t->node);
186 Node *knode = noderef(kt->node); 188 Node *knode = noderef(kt->node);
187 ptrdiff_t d = (char *)node - (char *)knode; 189 ptrdiff_t d = (char *)node - (char *)knode;
188 setmref(node->freetop, (Node *)((char *)noderef(knode->freetop) + d)); 190 setfreetop(t, node, (Node *)((char *)getfreetop(kt, knode) + d));
189 for (i = 0; i <= hmask; i++) { 191 for (i = 0; i <= hmask; i++) {
190 Node *kn = &knode[i]; 192 Node *kn = &knode[i];
191 Node *n = &node[i]; 193 Node *n = &node[i];
@@ -198,6 +200,17 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
198 return t; 200 return t;
199} 201}
200 202
203/* Clear a table. */
204void LJ_FASTCALL lj_tab_clear(GCtab *t)
205{
206 clearapart(t);
207 if (t->hmask > 0) {
208 Node *node = noderef(t->node);
209 setfreetop(t, node, &node[t->hmask+1]);
210 clearhpart(t);
211 }
212}
213
201/* Free a table. */ 214/* Free a table. */
202void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t) 215void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t)
203{ 216{
@@ -214,7 +227,7 @@ void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t)
214/* -- Table resizing ------------------------------------------------------ */ 227/* -- Table resizing ------------------------------------------------------ */
215 228
216/* Resize a table to fit the new array/hash part sizes. */ 229/* Resize a table to fit the new array/hash part sizes. */
217static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) 230void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits)
218{ 231{
219 Node *oldnode = noderef(t->node); 232 Node *oldnode = noderef(t->node);
220 uint32_t oldasize = t->asize; 233 uint32_t oldasize = t->asize;
@@ -247,6 +260,9 @@ static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits)
247 } else { 260 } else {
248 global_State *g = G(L); 261 global_State *g = G(L);
249 setmref(t->node, &g->nilnode); 262 setmref(t->node, &g->nilnode);
263#if LJ_GC64
264 setmref(t->freetop, &g->nilnode);
265#endif
250 t->hmask = 0; 266 t->hmask = 0;
251 } 267 }
252 if (asize < oldasize) { /* Array part shrinks? */ 268 if (asize < oldasize) { /* Array part shrinks? */
@@ -276,7 +292,7 @@ static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits)
276 292
277static uint32_t countint(cTValue *key, uint32_t *bins) 293static uint32_t countint(cTValue *key, uint32_t *bins)
278{ 294{
279 lua_assert(!tvisint(key)); 295 lj_assertX(!tvisint(key), "bad integer key");
280 if (tvisnum(key)) { 296 if (tvisnum(key)) {
281 lua_Number nk = numV(key); 297 lua_Number nk = numV(key);
282 int32_t k = lj_num2int(nk); 298 int32_t k = lj_num2int(nk);
@@ -348,19 +364,12 @@ static void rehashtab(lua_State *L, GCtab *t, cTValue *ek)
348 asize += countint(ek, bins); 364 asize += countint(ek, bins);
349 na = bestasize(bins, &asize); 365 na = bestasize(bins, &asize);
350 total -= na; 366 total -= na;
351 resizetab(L, t, asize, hsize2hbits(total)); 367 lj_tab_resize(L, t, asize, hsize2hbits(total));
352} 368}
353 369
354#if LJ_HASFFI
355void lj_tab_rehash(lua_State *L, GCtab *t)
356{
357 rehashtab(L, t, niltv(L));
358}
359#endif
360
361void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize) 370void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize)
362{ 371{
363 resizetab(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0); 372 lj_tab_resize(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0);
364} 373}
365 374
366/* -- Table getters ------------------------------------------------------- */ 375/* -- Table getters ------------------------------------------------------- */
@@ -378,7 +387,7 @@ cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key)
378 return NULL; 387 return NULL;
379} 388}
380 389
381cTValue *lj_tab_getstr(GCtab *t, GCstr *key) 390cTValue *lj_tab_getstr(GCtab *t, const GCstr *key)
382{ 391{
383 Node *n = hashstr(t, key); 392 Node *n = hashstr(t, key);
384 do { 393 do {
@@ -428,16 +437,17 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
428 Node *n = hashkey(t, key); 437 Node *n = hashkey(t, key);
429 if (!tvisnil(&n->val) || t->hmask == 0) { 438 if (!tvisnil(&n->val) || t->hmask == 0) {
430 Node *nodebase = noderef(t->node); 439 Node *nodebase = noderef(t->node);
431 Node *collide, *freenode = noderef(nodebase->freetop); 440 Node *collide, *freenode = getfreetop(t, nodebase);
432 lua_assert(freenode >= nodebase && freenode <= nodebase+t->hmask+1); 441 lj_assertL(freenode >= nodebase && freenode <= nodebase+t->hmask+1,
442 "bad freenode");
433 do { 443 do {
434 if (freenode == nodebase) { /* No free node found? */ 444 if (freenode == nodebase) { /* No free node found? */
435 rehashtab(L, t, key); /* Rehash table. */ 445 rehashtab(L, t, key); /* Rehash table. */
436 return lj_tab_set(L, t, key); /* Retry key insertion. */ 446 return lj_tab_set(L, t, key); /* Retry key insertion. */
437 } 447 }
438 } while (!tvisnil(&(--freenode)->key)); 448 } while (!tvisnil(&(--freenode)->key));
439 setmref(nodebase->freetop, freenode); 449 setfreetop(t, nodebase, freenode);
440 lua_assert(freenode != &G(L)->nilnode); 450 lj_assertL(freenode != &G(L)->nilnode, "store to fallback hash");
441 collide = hashkey(t, &n->key); 451 collide = hashkey(t, &n->key);
442 if (collide != n) { /* Colliding node not the main node? */ 452 if (collide != n) { /* Colliding node not the main node? */
443 while (noderef(collide->next) != n) /* Find predecessor. */ 453 while (noderef(collide->next) != n) /* Find predecessor. */
@@ -493,7 +503,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
493 if (LJ_UNLIKELY(tvismzero(&n->key))) 503 if (LJ_UNLIKELY(tvismzero(&n->key)))
494 n->key.u64 = 0; 504 n->key.u64 = 0;
495 lj_gc_anybarriert(L, t); 505 lj_gc_anybarriert(L, t);
496 lua_assert(tvisnil(&n->val)); 506 lj_assertL(tvisnil(&n->val), "new hash slot is not empty");
497 return &n->val; 507 return &n->val;
498} 508}
499 509
@@ -510,7 +520,7 @@ TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key)
510 return lj_tab_newkey(L, t, &k); 520 return lj_tab_newkey(L, t, &k);
511} 521}
512 522
513TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key) 523TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key)
514{ 524{
515 TValue k; 525 TValue k;
516 Node *n = hashstr(t, key); 526 Node *n = hashstr(t, key);
@@ -551,103 +561,126 @@ TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key)
551 561
552/* -- Table traversal ----------------------------------------------------- */ 562/* -- Table traversal ----------------------------------------------------- */
553 563
554/* Get the traversal index of a key. */ 564/* Table traversal indexes:
555static uint32_t keyindex(lua_State *L, GCtab *t, cTValue *key) 565**
566** Array key index: [0 .. t->asize-1]
567** Hash key index: [t->asize .. t->asize+t->hmask]
568** Invalid key: ~0
569*/
570
571/* Get the successor traversal index of a key. */
572uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key)
556{ 573{
557 TValue tmp; 574 TValue tmp;
558 if (tvisint(key)) { 575 if (tvisint(key)) {
559 int32_t k = intV(key); 576 int32_t k = intV(key);
560 if ((uint32_t)k < t->asize) 577 if ((uint32_t)k < t->asize)
561 return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */ 578 return (uint32_t)k + 1;
562 setnumV(&tmp, (lua_Number)k); 579 setnumV(&tmp, (lua_Number)k);
563 key = &tmp; 580 key = &tmp;
564 } else if (tvisnum(key)) { 581 } else if (tvisnum(key)) {
565 lua_Number nk = numV(key); 582 lua_Number nk = numV(key);
566 int32_t k = lj_num2int(nk); 583 int32_t k = lj_num2int(nk);
567 if ((uint32_t)k < t->asize && nk == (lua_Number)k) 584 if ((uint32_t)k < t->asize && nk == (lua_Number)k)
568 return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */ 585 return (uint32_t)k + 1;
569 } 586 }
570 if (!tvisnil(key)) { 587 if (!tvisnil(key)) {
571 Node *n = hashkey(t, key); 588 Node *n = hashkey(t, key);
572 do { 589 do {
573 if (lj_obj_equal(&n->key, key)) 590 if (lj_obj_equal(&n->key, key))
574 return t->asize + (uint32_t)(n - noderef(t->node)); 591 return t->asize + (uint32_t)((n+1) - noderef(t->node));
575 /* Hash key indexes: [t->asize..t->asize+t->nmask] */
576 } while ((n = nextnode(n))); 592 } while ((n = nextnode(n)));
577 if (key->u32.hi == 0xfffe7fff) /* ITERN was despecialized while running. */ 593 if (key->u32.hi == LJ_KEYINDEX) /* Despecialized ITERN while running. */
578 return key->u32.lo - 1; 594 return key->u32.lo;
579 lj_err_msg(L, LJ_ERR_NEXTIDX); 595 return ~0u; /* Invalid key to next. */
580 return 0; /* unreachable */
581 } 596 }
582 return ~0u; /* A nil key starts the traversal. */ 597 return 0; /* A nil key starts the traversal. */
583} 598}
584 599
585/* Advance to the next step in a table traversal. */ 600/* Get the next key/value pair of a table traversal. */
586int lj_tab_next(lua_State *L, GCtab *t, TValue *key) 601int lj_tab_next(GCtab *t, cTValue *key, TValue *o)
587{ 602{
588 uint32_t i = keyindex(L, t, key); /* Find predecessor key index. */ 603 uint32_t idx = lj_tab_keyindex(t, key); /* Find successor index of key. */
589 for (i++; i < t->asize; i++) /* First traverse the array keys. */ 604 /* First traverse the array part. */
590 if (!tvisnil(arrayslot(t, i))) { 605 for (; idx < t->asize; idx++) {
591 setintV(key, i); 606 cTValue *a = arrayslot(t, idx);
592 copyTV(L, key+1, arrayslot(t, i)); 607 if (LJ_LIKELY(!tvisnil(a))) {
608 setintV(o, idx);
609 o[1] = *a;
593 return 1; 610 return 1;
594 } 611 }
595 for (i -= t->asize; i <= t->hmask; i++) { /* Then traverse the hash keys. */ 612 }
596 Node *n = &noderef(t->node)[i]; 613 idx -= t->asize;
614 /* Then traverse the hash part. */
615 for (; idx <= t->hmask; idx++) {
616 Node *n = &noderef(t->node)[idx];
597 if (!tvisnil(&n->val)) { 617 if (!tvisnil(&n->val)) {
598 copyTV(L, key, &n->key); 618 o[0] = n->key;
599 copyTV(L, key+1, &n->val); 619 o[1] = n->val;
600 return 1; 620 return 1;
601 } 621 }
602 } 622 }
603 return 0; /* End of traversal. */ 623 return (int32_t)idx < 0 ? -1 : 0; /* Invalid key or end of traversal. */
604} 624}
605 625
606/* -- Table length calculation -------------------------------------------- */ 626/* -- Table length calculation -------------------------------------------- */
607 627
608static MSize unbound_search(GCtab *t, MSize j) 628/* Compute table length. Slow path with mixed array/hash lookups. */
629LJ_NOINLINE static MSize tab_len_slow(GCtab *t, size_t hi)
609{ 630{
610 cTValue *tv; 631 cTValue *tv;
611 MSize i = j; /* i is zero or a present index */ 632 size_t lo = hi;
612 j++; 633 hi++;
613 /* find `i' and `j' such that i is present and j is not */ 634 /* Widening search for an upper bound. */
614 while ((tv = lj_tab_getint(t, (int32_t)j)) && !tvisnil(tv)) { 635 while ((tv = lj_tab_getint(t, (int32_t)hi)) && !tvisnil(tv)) {
615 i = j; 636 lo = hi;
616 j *= 2; 637 hi += hi;
617 if (j > (MSize)(INT_MAX-2)) { /* overflow? */ 638 if (hi > (size_t)(INT_MAX-2)) { /* Punt and do a linear search. */
618 /* table was built with bad purposes: resort to linear search */ 639 lo = 1;
619 i = 1; 640 while ((tv = lj_tab_getint(t, (int32_t)lo)) && !tvisnil(tv)) lo++;
620 while ((tv = lj_tab_getint(t, (int32_t)i)) && !tvisnil(tv)) i++; 641 return (MSize)(lo - 1);
621 return i - 1;
622 } 642 }
623 } 643 }
624 /* now do a binary search between them */ 644 /* Binary search to find a non-nil to nil transition. */
625 while (j - i > 1) { 645 while (hi - lo > 1) {
626 MSize m = (i+j)/2; 646 size_t mid = (lo+hi) >> 1;
627 cTValue *tvb = lj_tab_getint(t, (int32_t)m); 647 cTValue *tvb = lj_tab_getint(t, (int32_t)mid);
628 if (tvb && !tvisnil(tvb)) i = m; else j = m; 648 if (tvb && !tvisnil(tvb)) lo = mid; else hi = mid;
629 } 649 }
630 return i; 650 return (MSize)lo;
631} 651}
632 652
633/* 653/* Compute table length. Fast path. */
634** Try to find a boundary in table `t'. A `boundary' is an integer index
635** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil).
636*/
637MSize LJ_FASTCALL lj_tab_len(GCtab *t) 654MSize LJ_FASTCALL lj_tab_len(GCtab *t)
638{ 655{
639 MSize j = (MSize)t->asize; 656 size_t hi = (size_t)t->asize;
640 if (j > 1 && tvisnil(arrayslot(t, j-1))) { 657 if (hi) hi--;
641 MSize i = 1; 658 /* In a growing array the last array element is very likely nil. */
642 while (j - i > 1) { 659 if (hi > 0 && LJ_LIKELY(tvisnil(arrayslot(t, hi)))) {
643 MSize m = (i+j)/2; 660 /* Binary search to find a non-nil to nil transition in the array. */
644 if (tvisnil(arrayslot(t, m-1))) j = m; else i = m; 661 size_t lo = 0;
662 while (hi - lo > 1) {
663 size_t mid = (lo+hi) >> 1;
664 if (tvisnil(arrayslot(t, mid))) hi = mid; else lo = mid;
645 } 665 }
646 return i-1; 666 return (MSize)lo;
667 }
668 /* Without a hash part, there's an implicit nil after the last element. */
669 return t->hmask ? tab_len_slow(t, hi) : (MSize)hi;
670}
671
672#if LJ_HASJIT
673/* Verify hinted table length or compute it. */
674MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint)
675{
676 size_t asize = (size_t)t->asize;
677 cTValue *tv = arrayslot(t, hint);
678 if (LJ_LIKELY(hint+1 < asize)) {
679 if (LJ_LIKELY(!tvisnil(tv) && tvisnil(tv+1))) return (MSize)hint;
680 } else if (hint+1 <= asize && LJ_LIKELY(t->hmask == 0) && !tvisnil(tv)) {
681 return (MSize)hint;
647 } 682 }
648 if (j) j--; 683 return lj_tab_len(t);
649 if (t->hmask <= 0)
650 return j;
651 return unbound_search(t, j);
652} 684}
685#endif
653 686
diff --git a/src/lj_tab.h b/src/lj_tab.h
index bb3e273d..9c03fa16 100644
--- a/src/lj_tab.h
+++ b/src/lj_tab.h
@@ -31,30 +31,49 @@ static LJ_AINLINE uint32_t hashrot(uint32_t lo, uint32_t hi)
31 return hi; 31 return hi;
32} 32}
33 33
34/* Hash values are masked with the table hash mask and used as an index. */
35static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash)
36{
37 Node *n = noderef(t->node);
38 return &n[hash & t->hmask];
39}
40
41/* String IDs are generated when a string is interned. */
42#define hashstr(t, s) hashmask(t, (s)->sid)
43
44#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi)))
45#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1))
46#if LJ_GC64
47#define hashgcref(t, r) \
48 hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32))
49#else
50#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS)
51#endif
52
34#define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) 53#define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0)
35 54
36LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); 55LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits);
56LJ_FUNC GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h);
37#if LJ_HASJIT 57#if LJ_HASJIT
38LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize); 58LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize);
39#endif 59#endif
40LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt); 60LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt);
61LJ_FUNC void LJ_FASTCALL lj_tab_clear(GCtab *t);
41LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); 62LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t);
42#if LJ_HASFFI 63LJ_FUNC void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits);
43LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t);
44#endif
45LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); 64LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize);
46 65
47/* Caveat: all getters except lj_tab_get() can return NULL! */ 66/* Caveat: all getters except lj_tab_get() can return NULL! */
48 67
49LJ_FUNCA cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key); 68LJ_FUNCA cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key);
50LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, GCstr *key); 69LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, const GCstr *key);
51LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key); 70LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key);
52 71
53/* Caveat: all setters require a write barrier for the stored value. */ 72/* Caveat: all setters require a write barrier for the stored value. */
54 73
55LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key); 74LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key);
56LJ_FUNC TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key); 75LJ_FUNCA TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key);
57LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key); 76LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key);
58LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); 77LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
59 78
60#define inarray(t, key) ((MSize)(key) < (MSize)(t)->asize) 79#define inarray(t, key) ((MSize)(key) < (MSize)(t)->asize)
@@ -64,7 +83,11 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
64#define lj_tab_setint(L, t, key) \ 83#define lj_tab_setint(L, t, key) \
65 (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key))) 84 (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key)))
66 85
67LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key); 86LJ_FUNC uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key);
87LJ_FUNCA int lj_tab_next(GCtab *t, cTValue *key, TValue *o);
68LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t); 88LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t);
89#if LJ_HASJIT
90LJ_FUNC MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint);
91#endif
69 92
70#endif 93#endif
diff --git a/src/lj_target.h b/src/lj_target.h
index 8c881652..e7322c07 100644
--- a/src/lj_target.h
+++ b/src/lj_target.h
@@ -55,10 +55,16 @@ typedef uint32_t RegSP;
55/* Bitset for registers. 32 registers suffice for most architectures. 55/* Bitset for registers. 32 registers suffice for most architectures.
56** Note that one set holds bits for both GPRs and FPRs. 56** Note that one set holds bits for both GPRs and FPRs.
57*/ 57*/
58#if LJ_TARGET_PPC || LJ_TARGET_MIPS 58#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
59typedef uint64_t RegSet; 59typedef uint64_t RegSet;
60#define RSET_BITS 6
61#define rset_picktop_(rs) ((Reg)lj_fls64(rs))
62#define rset_pickbot_(rs) ((Reg)lj_ffs64(rs))
60#else 63#else
61typedef uint32_t RegSet; 64typedef uint32_t RegSet;
65#define RSET_BITS 5
66#define rset_picktop_(rs) ((Reg)lj_fls(rs))
67#define rset_pickbot_(rs) ((Reg)lj_ffs(rs))
62#endif 68#endif
63 69
64#define RID2RSET(r) (((RegSet)1) << (r)) 70#define RID2RSET(r) (((RegSet)1) << (r))
@@ -69,13 +75,6 @@ typedef uint32_t RegSet;
69#define rset_set(rs, r) (rs |= RID2RSET(r)) 75#define rset_set(rs, r) (rs |= RID2RSET(r))
70#define rset_clear(rs, r) (rs &= ~RID2RSET(r)) 76#define rset_clear(rs, r) (rs &= ~RID2RSET(r))
71#define rset_exclude(rs, r) (rs & ~RID2RSET(r)) 77#define rset_exclude(rs, r) (rs & ~RID2RSET(r))
72#if LJ_TARGET_PPC || LJ_TARGET_MIPS
73#define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63))
74#define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs))
75#else
76#define rset_picktop(rs) ((Reg)lj_fls(rs))
77#define rset_pickbot(rs) ((Reg)lj_ffs(rs))
78#endif
79 78
80/* -- Register allocation cost -------------------------------------------- */ 79/* -- Register allocation cost -------------------------------------------- */
81 80
@@ -138,6 +137,8 @@ typedef uint32_t RegCost;
138#include "lj_target_x86.h" 137#include "lj_target_x86.h"
139#elif LJ_TARGET_ARM 138#elif LJ_TARGET_ARM
140#include "lj_target_arm.h" 139#include "lj_target_arm.h"
140#elif LJ_TARGET_ARM64
141#include "lj_target_arm64.h"
141#elif LJ_TARGET_PPC 142#elif LJ_TARGET_PPC
142#include "lj_target_ppc.h" 143#include "lj_target_ppc.h"
143#elif LJ_TARGET_MIPS 144#elif LJ_TARGET_MIPS
@@ -150,7 +151,8 @@ typedef uint32_t RegCost;
150/* Return the address of an exit stub. */ 151/* Return the address of an exit stub. */
151static LJ_AINLINE char *exitstub_addr_(char **group, uint32_t exitno) 152static LJ_AINLINE char *exitstub_addr_(char **group, uint32_t exitno)
152{ 153{
153 lua_assert(group[exitno / EXITSTUBS_PER_GROUP] != NULL); 154 lj_assertX(group[exitno / EXITSTUBS_PER_GROUP] != NULL,
155 "exit stub group for exit %d uninitialized", exitno);
154 return (char *)group[exitno / EXITSTUBS_PER_GROUP] + 156 return (char *)group[exitno / EXITSTUBS_PER_GROUP] +
155 EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP); 157 EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP);
156} 158}
diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h
index cdc8776d..7170fcab 100644
--- a/src/lj_target_arm.h
+++ b/src/lj_target_arm.h
@@ -211,6 +211,7 @@ typedef enum ARMIns {
211 /* ARMv6T2 */ 211 /* ARMv6T2 */
212 ARMI_MOVW = 0xe3000000, 212 ARMI_MOVW = 0xe3000000,
213 ARMI_MOVT = 0xe3400000, 213 ARMI_MOVT = 0xe3400000,
214 ARMI_BFI = 0xe7c00010,
214 215
215 /* VFP */ 216 /* VFP */
216 ARMI_VMOV_D = 0xeeb00b40, 217 ARMI_VMOV_D = 0xeeb00b40,
@@ -243,10 +244,6 @@ typedef enum ARMIns {
243 ARMI_VCVT_S32_F64 = 0xeebd0bc0, 244 ARMI_VCVT_S32_F64 = 0xeebd0bc0,
244 ARMI_VCVT_U32_F32 = 0xeebc0ac0, 245 ARMI_VCVT_U32_F32 = 0xeebc0ac0,
245 ARMI_VCVT_U32_F64 = 0xeebc0bc0, 246 ARMI_VCVT_U32_F64 = 0xeebc0bc0,
246 ARMI_VCVTR_S32_F32 = 0xeebd0a40,
247 ARMI_VCVTR_S32_F64 = 0xeebd0b40,
248 ARMI_VCVTR_U32_F32 = 0xeebc0a40,
249 ARMI_VCVTR_U32_F64 = 0xeebc0b40,
250 ARMI_VCVT_F32_S32 = 0xeeb80ac0, 247 ARMI_VCVT_F32_S32 = 0xeeb80ac0,
251 ARMI_VCVT_F64_S32 = 0xeeb80bc0, 248 ARMI_VCVT_F64_S32 = 0xeeb80bc0,
252 ARMI_VCVT_F32_U32 = 0xeeb80a40, 249 ARMI_VCVT_F32_U32 = 0xeeb80a40,
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
new file mode 100644
index 00000000..7d11395d
--- /dev/null
+++ b/src/lj_target_arm64.h
@@ -0,0 +1,346 @@
1/*
2** Definitions for ARM64 CPUs.
3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_TARGET_ARM64_H
7#define _LJ_TARGET_ARM64_H
8
9/* -- Registers IDs ------------------------------------------------------- */
10
11#define GPRDEF(_) \
12 _(X0) _(X1) _(X2) _(X3) _(X4) _(X5) _(X6) _(X7) \
13 _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \
14 _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \
15 _(X24) _(X25) _(X26) _(X27) _(X28) _(FP) _(LR) _(SP)
16#define FPRDEF(_) \
17 _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \
18 _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) \
19 _(D16) _(D17) _(D18) _(D19) _(D20) _(D21) _(D22) _(D23) \
20 _(D24) _(D25) _(D26) _(D27) _(D28) _(D29) _(D30) _(D31)
21#define VRIDDEF(_)
22
23#define RIDENUM(name) RID_##name,
24
25enum {
26 GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
27 FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
28 RID_MAX,
29 RID_TMP = RID_LR,
30 RID_ZERO = RID_SP,
31
32 /* Calling conventions. */
33 RID_RET = RID_X0,
34 RID_RETLO = RID_X0,
35 RID_RETHI = RID_X1,
36 RID_FPRET = RID_D0,
37
38 /* These definitions must match with the *.dasc file(s): */
39 RID_BASE = RID_X19, /* Interpreter BASE. */
40 RID_LPC = RID_X21, /* Interpreter PC. */
41 RID_GL = RID_X22, /* Interpreter GL. */
42 RID_LREG = RID_X23, /* Interpreter L. */
43
44 /* Register ranges [min, max) and number of registers. */
45 RID_MIN_GPR = RID_X0,
46 RID_MAX_GPR = RID_SP+1,
47 RID_MIN_FPR = RID_MAX_GPR,
48 RID_MAX_FPR = RID_D31+1,
49 RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
50 RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
51};
52
53#define RID_NUM_KREF RID_NUM_GPR
54#define RID_MIN_KREF RID_X0
55
56/* -- Register sets ------------------------------------------------------- */
57
58/* Make use of all registers, except for x18, fp, lr and sp. */
59#define RSET_FIXED \
60 (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)|\
61 RID2RSET(RID_GL))
62#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
63#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
64#define RSET_ALL (RSET_GPR|RSET_FPR)
65#define RSET_INIT RSET_ALL
66
67/* lr is an implicit scratch register. */
68#define RSET_SCRATCH_GPR (RSET_RANGE(RID_X0, RID_X17+1))
69#define RSET_SCRATCH_FPR \
70 (RSET_RANGE(RID_D0, RID_D7+1)|RSET_RANGE(RID_D16, RID_D31+1))
71#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
72#define REGARG_FIRSTGPR RID_X0
73#define REGARG_LASTGPR RID_X7
74#define REGARG_NUMGPR 8
75#define REGARG_FIRSTFPR RID_D0
76#define REGARG_LASTFPR RID_D7
77#define REGARG_NUMFPR 8
78
79/* -- Spill slots --------------------------------------------------------- */
80
81/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
82**
83** SPS_FIXED: Available fixed spill slots in interpreter frame.
84** This definition must match with the vm_arm64.dasc file.
85** Pre-allocate some slots to avoid sp adjust in every root trace.
86**
87** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
88*/
89#define SPS_FIXED 4
90#define SPS_FIRST 2
91
92#define SPOFS_TMP 0
93
94#define sps_scale(slot) (4 * (int32_t)(slot))
95#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3)
96
97/* -- Exit state ---------------------------------------------------------- */
98
99/* This definition must match with the *.dasc file(s). */
100typedef struct {
101 lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
102 intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
103 int32_t spill[256]; /* Spill slots. */
104} ExitState;
105
106/* Highest exit + 1 indicates stack check. */
107#define EXITSTATE_CHECKEXIT 1
108
109/* Return the address of a per-trace exit stub. */
110static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
111{
112 while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */
113 return p + 3 + exitno;
114}
115/* Avoid dependence on lj_jit.h if only including lj_target.h. */
116#define exitstub_trace_addr(T, exitno) \
117 exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno))
118
119/* -- Instructions -------------------------------------------------------- */
120
121/* ARM64 instructions are always little-endian. Swap for ARM64BE. */
122#if LJ_BE
123#define A64I_LE(x) (lj_bswap(x))
124#else
125#define A64I_LE(x) (x)
126#endif
127
128/* Instruction fields. */
129#define A64F_D(r) (r)
130#define A64F_N(r) ((r) << 5)
131#define A64F_A(r) ((r) << 10)
132#define A64F_M(r) ((r) << 16)
133#define A64F_IMMS(x) ((x) << 10)
134#define A64F_IMMR(x) ((x) << 16)
135#define A64F_U16(x) ((x) << 5)
136#define A64F_U12(x) ((x) << 10)
137#define A64F_S26(x) (((uint32_t)(x) & 0x03ffffffu))
138#define A64F_S19(x) (((uint32_t)(x) & 0x7ffffu) << 5)
139#define A64F_S14(x) (((uint32_t)(x) & 0x3fffu) << 5)
140#define A64F_S9(x) ((x) << 12)
141#define A64F_BIT(x) ((x) << 19)
142#define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10))
143#define A64F_EX(ex) (A64I_EX | ((ex) << 13))
144#define A64F_EXSH(ex,x) (A64I_EX | ((ex) << 13) | ((x) << 10))
145#define A64F_FP8(x) ((x) << 13)
146#define A64F_CC(cc) ((cc) << 12)
147#define A64F_LSL16(x) (((x) / 16) << 21)
148#define A64F_BSH(sh) ((sh) << 10)
149
150/* Check for valid field range. */
151#define A64F_S_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0)
152
153typedef enum A64Ins {
154 A64I_S = 0x20000000,
155 A64I_X = 0x80000000,
156 A64I_EX = 0x00200000,
157 A64I_ON = 0x00200000,
158 A64I_K12 = 0x1a000000,
159 A64I_K13 = 0x18000000,
160 A64I_LS_U = 0x01000000,
161 A64I_LS_S = 0x00800000,
162 A64I_LS_R = 0x01200800,
163 A64I_LS_SH = 0x00001000,
164 A64I_LS_UXTWx = 0x00004000,
165 A64I_LS_SXTWx = 0x0000c000,
166 A64I_LS_SXTXx = 0x0000e000,
167 A64I_LS_LSLx = 0x00006000,
168
169 A64I_ADDw = 0x0b000000,
170 A64I_ADDx = 0x8b000000,
171 A64I_ADDSw = 0x2b000000,
172 A64I_ADDSx = 0xab000000,
173 A64I_NEGw = 0x4b0003e0,
174 A64I_NEGx = 0xcb0003e0,
175 A64I_SUBw = 0x4b000000,
176 A64I_SUBx = 0xcb000000,
177 A64I_SUBSw = 0x6b000000,
178 A64I_SUBSx = 0xeb000000,
179
180 A64I_MULw = 0x1b007c00,
181 A64I_MULx = 0x9b007c00,
182 A64I_SMULL = 0x9b207c00,
183
184 A64I_ANDw = 0x0a000000,
185 A64I_ANDx = 0x8a000000,
186 A64I_ANDSw = 0x6a000000,
187 A64I_ANDSx = 0xea000000,
188 A64I_EORw = 0x4a000000,
189 A64I_EORx = 0xca000000,
190 A64I_ORRw = 0x2a000000,
191 A64I_ORRx = 0xaa000000,
192 A64I_TSTw = 0x6a00001f,
193 A64I_TSTx = 0xea00001f,
194
195 A64I_CMPw = 0x6b00001f,
196 A64I_CMPx = 0xeb00001f,
197 A64I_CMNw = 0x2b00001f,
198 A64I_CMNx = 0xab00001f,
199 A64I_CCMPw = 0x7a400000,
200 A64I_CCMPx = 0xfa400000,
201 A64I_CSELw = 0x1a800000,
202 A64I_CSELx = 0x9a800000,
203
204 A64I_ASRw = 0x13007c00,
205 A64I_ASRx = 0x9340fc00,
206 A64I_LSLx = 0xd3400000,
207 A64I_LSRx = 0xd340fc00,
208 A64I_SHRw = 0x1ac02000,
209 A64I_SHRx = 0x9ac02000, /* lsl/lsr/asr/ror x0, x0, x0 */
210 A64I_REVw = 0x5ac00800,
211 A64I_REVx = 0xdac00c00,
212
213 A64I_EXTRw = 0x13800000,
214 A64I_EXTRx = 0x93c00000,
215 A64I_BFMw = 0x33000000,
216 A64I_BFMx = 0xb3400000,
217 A64I_SBFMw = 0x13000000,
218 A64I_SBFMx = 0x93400000,
219 A64I_SXTBw = 0x13001c00,
220 A64I_SXTHw = 0x13003c00,
221 A64I_SXTW = 0x93407c00,
222 A64I_UBFMw = 0x53000000,
223 A64I_UBFMx = 0xd3400000,
224 A64I_UXTBw = 0x53001c00,
225 A64I_UXTHw = 0x53003c00,
226
227 A64I_MOVw = 0x2a0003e0,
228 A64I_MOVx = 0xaa0003e0,
229 A64I_MVNw = 0x2a2003e0,
230 A64I_MVNx = 0xaa2003e0,
231 A64I_MOVKw = 0x72800000,
232 A64I_MOVKx = 0xf2800000,
233 A64I_MOVZw = 0x52800000,
234 A64I_MOVZx = 0xd2800000,
235 A64I_MOVNw = 0x12800000,
236 A64I_MOVNx = 0x92800000,
237 A64I_ADR = 0x10000000,
238 A64I_ADRP = 0x90000000,
239
240 A64I_LDRB = 0x39400000,
241 A64I_LDRH = 0x79400000,
242 A64I_LDRw = 0xb9400000,
243 A64I_LDRx = 0xf9400000,
244 A64I_LDRLw = 0x18000000,
245 A64I_LDRLx = 0x58000000,
246 A64I_STRB = 0x39000000,
247 A64I_STRH = 0x79000000,
248 A64I_STRw = 0xb9000000,
249 A64I_STRx = 0xf9000000,
250 A64I_STPw = 0x29000000,
251 A64I_STPx = 0xa9000000,
252 A64I_LDPw = 0x29400000,
253 A64I_LDPx = 0xa9400000,
254
255 A64I_B = 0x14000000,
256 A64I_BCC = 0x54000000,
257 A64I_BL = 0x94000000,
258 A64I_BR = 0xd61f0000,
259 A64I_BLR = 0xd63f0000,
260 A64I_TBZ = 0x36000000,
261 A64I_TBNZ = 0x37000000,
262 A64I_CBZ = 0x34000000,
263 A64I_CBNZ = 0x35000000,
264
265 A64I_BRAAZ = 0xd61f081f,
266 A64I_BLRAAZ = 0xd63f081f,
267
268 A64I_NOP = 0xd503201f,
269
270 /* FP */
271 A64I_FADDd = 0x1e602800,
272 A64I_FSUBd = 0x1e603800,
273 A64I_FMADDd = 0x1f400000,
274 A64I_FMSUBd = 0x1f408000,
275 A64I_FNMADDd = 0x1f600000,
276 A64I_FNMSUBd = 0x1f608000,
277 A64I_FMULd = 0x1e600800,
278 A64I_FDIVd = 0x1e601800,
279 A64I_FNEGd = 0x1e614000,
280 A64I_FABS = 0x1e60c000,
281 A64I_FSQRTd = 0x1e61c000,
282 A64I_LDRs = 0xbd400000,
283 A64I_LDRd = 0xfd400000,
284 A64I_LDRLd = 0x5c000000,
285 A64I_STRs = 0xbd000000,
286 A64I_STRd = 0xfd000000,
287 A64I_LDPs = 0x2d400000,
288 A64I_LDPd = 0x6d400000,
289 A64I_STPs = 0x2d000000,
290 A64I_STPd = 0x6d000000,
291 A64I_FCMPd = 0x1e602000,
292 A64I_FCMPZd = 0x1e602008,
293 A64I_FCSELd = 0x1e600c00,
294 A64I_FRINTMd = 0x1e654000,
295 A64I_FRINTPd = 0x1e64c000,
296 A64I_FRINTZd = 0x1e65c000,
297
298 A64I_FCVT_F32_F64 = 0x1e624000,
299 A64I_FCVT_F64_F32 = 0x1e22c000,
300 A64I_FCVT_F32_S32 = 0x1e220000,
301 A64I_FCVT_F64_S32 = 0x1e620000,
302 A64I_FCVT_F32_U32 = 0x1e230000,
303 A64I_FCVT_F64_U32 = 0x1e630000,
304 A64I_FCVT_F32_S64 = 0x9e220000,
305 A64I_FCVT_F64_S64 = 0x9e620000,
306 A64I_FCVT_F32_U64 = 0x9e230000,
307 A64I_FCVT_F64_U64 = 0x9e630000,
308 A64I_FCVT_S32_F64 = 0x1e780000,
309 A64I_FCVT_S32_F32 = 0x1e380000,
310 A64I_FCVT_U32_F64 = 0x1e790000,
311 A64I_FCVT_U32_F32 = 0x1e390000,
312 A64I_FCVT_S64_F64 = 0x9e780000,
313 A64I_FCVT_S64_F32 = 0x9e380000,
314 A64I_FCVT_U64_F64 = 0x9e790000,
315 A64I_FCVT_U64_F32 = 0x9e390000,
316
317 A64I_FMOV_S = 0x1e204000,
318 A64I_FMOV_D = 0x1e604000,
319 A64I_FMOV_R_S = 0x1e260000,
320 A64I_FMOV_S_R = 0x1e270000,
321 A64I_FMOV_R_D = 0x9e660000,
322 A64I_FMOV_D_R = 0x9e670000,
323 A64I_FMOV_DI = 0x1e601000,
324 A64I_MOVI_DI = 0x2f000400,
325} A64Ins;
326
327#define A64I_BR_AUTH (LJ_ABI_PAUTH ? A64I_BRAAZ : A64I_BR)
328#define A64I_BLR_AUTH (LJ_ABI_PAUTH ? A64I_BLRAAZ : A64I_BLR)
329
330typedef enum A64Shift {
331 A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR
332} A64Shift;
333
334typedef enum A64Extend {
335 A64EX_UXTB, A64EX_UXTH, A64EX_UXTW, A64EX_UXTX,
336 A64EX_SXTB, A64EX_SXTH, A64EX_SXTW, A64EX_SXTX,
337} A64Extend;
338
339/* ARM condition codes. */
340typedef enum A64CC {
341 CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC,
342 CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL,
343 CC_HS = CC_CS, CC_LO = CC_CC
344} A64CC;
345
346#endif
diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h
index 01ec0117..a3353884 100644
--- a/src/lj_target_mips.h
+++ b/src/lj_target_mips.h
@@ -13,11 +13,15 @@
13 _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \ 13 _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \
14 _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \ 14 _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \
15 _(R24) _(R25) _(SYS1) _(SYS2) _(R28) _(SP) _(R30) _(RA) 15 _(R24) _(R25) _(SYS1) _(SYS2) _(R28) _(SP) _(R30) _(RA)
16#if LJ_SOFTFP
17#define FPRDEF(_)
18#else
16#define FPRDEF(_) \ 19#define FPRDEF(_) \
17 _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ 20 _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \
18 _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ 21 _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \
19 _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ 22 _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \
20 _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) 23 _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31)
24#endif
21#define VRIDDEF(_) 25#define VRIDDEF(_)
22 26
23#define RIDENUM(name) RID_##name, 27#define RIDENUM(name) RID_##name,
@@ -39,7 +43,11 @@ enum {
39 RID_RETHI = RID_R2, 43 RID_RETHI = RID_R2,
40 RID_RETLO = RID_R3, 44 RID_RETLO = RID_R3,
41#endif 45#endif
46#if LJ_SOFTFP
47 RID_FPRET = RID_R2,
48#else
42 RID_FPRET = RID_F0, 49 RID_FPRET = RID_F0,
50#endif
43 RID_CFUNCADDR = RID_R25, 51 RID_CFUNCADDR = RID_R25,
44 52
45 /* These definitions must match with the *.dasc file(s): */ 53 /* These definitions must match with the *.dasc file(s): */
@@ -52,8 +60,12 @@ enum {
52 /* Register ranges [min, max) and number of registers. */ 60 /* Register ranges [min, max) and number of registers. */
53 RID_MIN_GPR = RID_R0, 61 RID_MIN_GPR = RID_R0,
54 RID_MAX_GPR = RID_RA+1, 62 RID_MAX_GPR = RID_RA+1,
55 RID_MIN_FPR = RID_F0, 63 RID_MIN_FPR = RID_MAX_GPR,
64#if LJ_SOFTFP
65 RID_MAX_FPR = RID_MIN_FPR,
66#else
56 RID_MAX_FPR = RID_F31+1, 67 RID_MAX_FPR = RID_F31+1,
68#endif
57 RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, 69 RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
58 RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */ 70 RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */
59}; 71};
@@ -68,28 +80,60 @@ enum {
68 (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\ 80 (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\
69 RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)|RID2RSET(RID_GP)) 81 RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)|RID2RSET(RID_GP))
70#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) 82#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
83#if LJ_SOFTFP
84#define RSET_FPR 0
85#else
86#if LJ_32
71#define RSET_FPR \ 87#define RSET_FPR \
72 (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ 88 (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\
73 RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ 89 RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\
74 RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\ 90 RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\
75 RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30)) 91 RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30))
76#define RSET_ALL (RSET_GPR|RSET_FPR) 92#else
77#define RSET_INIT RSET_ALL 93#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
94#endif
95#endif
96#define RSET_ALL (RSET_GPR|RSET_FPR)
97#define RSET_INIT RSET_ALL
78 98
79#define RSET_SCRATCH_GPR \ 99#define RSET_SCRATCH_GPR \
80 (RSET_RANGE(RID_R1, RID_R15+1)|\ 100 (RSET_RANGE(RID_R1, RID_R15+1)|\
81 RID2RSET(RID_R24)|RID2RSET(RID_R25)) 101 RID2RSET(RID_R24)|RID2RSET(RID_R25))
102#if LJ_SOFTFP
103#define RSET_SCRATCH_FPR 0
104#else
105#if LJ_32
82#define RSET_SCRATCH_FPR \ 106#define RSET_SCRATCH_FPR \
83 (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ 107 (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\
84 RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ 108 RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\
85 RID2RSET(RID_F16)|RID2RSET(RID_F18)) 109 RID2RSET(RID_F16)|RID2RSET(RID_F18))
110#else
111#define RSET_SCRATCH_FPR RSET_RANGE(RID_F0, RID_F24)
112#endif
113#endif
86#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) 114#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
87#define REGARG_FIRSTGPR RID_R4 115#define REGARG_FIRSTGPR RID_R4
116#if LJ_32
88#define REGARG_LASTGPR RID_R7 117#define REGARG_LASTGPR RID_R7
89#define REGARG_NUMGPR 4 118#define REGARG_NUMGPR 4
119#else
120#define REGARG_LASTGPR RID_R11
121#define REGARG_NUMGPR 8
122#endif
123#if LJ_ABI_SOFTFP
124#define REGARG_FIRSTFPR 0
125#define REGARG_LASTFPR 0
126#define REGARG_NUMFPR 0
127#else
90#define REGARG_FIRSTFPR RID_F12 128#define REGARG_FIRSTFPR RID_F12
129#if LJ_32
91#define REGARG_LASTFPR RID_F14 130#define REGARG_LASTFPR RID_F14
92#define REGARG_NUMFPR 2 131#define REGARG_NUMFPR 2
132#else
133#define REGARG_LASTFPR RID_F19
134#define REGARG_NUMFPR 8
135#endif
136#endif
93 137
94/* -- Spill slots --------------------------------------------------------- */ 138/* -- Spill slots --------------------------------------------------------- */
95 139
@@ -100,7 +144,11 @@ enum {
100** 144**
101** SPS_FIRST: First spill slot for general use. 145** SPS_FIRST: First spill slot for general use.
102*/ 146*/
147#if LJ_32
103#define SPS_FIXED 5 148#define SPS_FIXED 5
149#else
150#define SPS_FIXED 4
151#endif
104#define SPS_FIRST 4 152#define SPS_FIRST 4
105 153
106#define SPOFS_TMP 0 154#define SPOFS_TMP 0
@@ -112,8 +160,10 @@ enum {
112 160
113/* This definition must match with the *.dasc file(s). */ 161/* This definition must match with the *.dasc file(s). */
114typedef struct { 162typedef struct {
163#if !LJ_SOFTFP
115 lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ 164 lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
116 int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ 165#endif
166 intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
117 int32_t spill[256]; /* Spill slots. */ 167 int32_t spill[256]; /* Spill slots. */
118} ExitState; 168} ExitState;
119 169
@@ -142,52 +192,87 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p)
142#define MIPSF_F(r) ((r) << 6) 192#define MIPSF_F(r) ((r) << 6)
143#define MIPSF_A(n) ((n) << 6) 193#define MIPSF_A(n) ((n) << 6)
144#define MIPSF_M(n) ((n) << 11) 194#define MIPSF_M(n) ((n) << 11)
195#define MIPSF_L(n) ((n) << 6)
145 196
146typedef enum MIPSIns { 197typedef enum MIPSIns {
198 MIPSI_D = 0x38,
199 MIPSI_DV = 0x10,
200 MIPSI_D32 = 0x3c,
147 /* Integer instructions. */ 201 /* Integer instructions. */
148 MIPSI_MOVE = 0x00000021, 202 MIPSI_MOVE = 0x00000025,
149 MIPSI_NOP = 0x00000000, 203 MIPSI_NOP = 0x00000000,
150 204
151 MIPSI_LI = 0x24000000, 205 MIPSI_LI = 0x24000000,
152 MIPSI_LU = 0x34000000, 206 MIPSI_LU = 0x34000000,
153 MIPSI_LUI = 0x3c000000, 207 MIPSI_LUI = 0x3c000000,
154 208
155 MIPSI_ADDIU = 0x24000000, 209 MIPSI_AND = 0x00000024,
156 MIPSI_ANDI = 0x30000000, 210 MIPSI_ANDI = 0x30000000,
211 MIPSI_OR = 0x00000025,
157 MIPSI_ORI = 0x34000000, 212 MIPSI_ORI = 0x34000000,
213 MIPSI_XOR = 0x00000026,
158 MIPSI_XORI = 0x38000000, 214 MIPSI_XORI = 0x38000000,
215 MIPSI_NOR = 0x00000027,
216
217 MIPSI_SLT = 0x0000002a,
218 MIPSI_SLTU = 0x0000002b,
159 MIPSI_SLTI = 0x28000000, 219 MIPSI_SLTI = 0x28000000,
160 MIPSI_SLTIU = 0x2c000000, 220 MIPSI_SLTIU = 0x2c000000,
161 221
162 MIPSI_ADDU = 0x00000021, 222 MIPSI_ADDU = 0x00000021,
223 MIPSI_ADDIU = 0x24000000,
224 MIPSI_SUB = 0x00000022,
163 MIPSI_SUBU = 0x00000023, 225 MIPSI_SUBU = 0x00000023,
226
227#if !LJ_TARGET_MIPSR6
164 MIPSI_MUL = 0x70000002, 228 MIPSI_MUL = 0x70000002,
165 MIPSI_AND = 0x00000024, 229 MIPSI_DIV = 0x0000001a,
166 MIPSI_OR = 0x00000025, 230 MIPSI_DIVU = 0x0000001b,
167 MIPSI_XOR = 0x00000026, 231
168 MIPSI_NOR = 0x00000027,
169 MIPSI_SLT = 0x0000002a,
170 MIPSI_SLTU = 0x0000002b,
171 MIPSI_MOVZ = 0x0000000a, 232 MIPSI_MOVZ = 0x0000000a,
172 MIPSI_MOVN = 0x0000000b, 233 MIPSI_MOVN = 0x0000000b,
234 MIPSI_MFHI = 0x00000010,
235 MIPSI_MFLO = 0x00000012,
236 MIPSI_MULT = 0x00000018,
237#else
238 MIPSI_MUL = 0x00000098,
239 MIPSI_MUH = 0x000000d8,
240 MIPSI_DIV = 0x0000009a,
241 MIPSI_DIVU = 0x0000009b,
242
243 MIPSI_SELEQZ = 0x00000035,
244 MIPSI_SELNEZ = 0x00000037,
245#endif
173 246
174 MIPSI_SLL = 0x00000000, 247 MIPSI_SLL = 0x00000000,
175 MIPSI_SRL = 0x00000002, 248 MIPSI_SRL = 0x00000002,
176 MIPSI_SRA = 0x00000003, 249 MIPSI_SRA = 0x00000003,
177 MIPSI_ROTR = 0x00200002, /* MIPS32R2 */ 250 MIPSI_ROTR = 0x00200002, /* MIPSXXR2 */
251 MIPSI_DROTR = 0x0020003a,
252 MIPSI_DROTR32 = 0x0020003e,
178 MIPSI_SLLV = 0x00000004, 253 MIPSI_SLLV = 0x00000004,
179 MIPSI_SRLV = 0x00000006, 254 MIPSI_SRLV = 0x00000006,
180 MIPSI_SRAV = 0x00000007, 255 MIPSI_SRAV = 0x00000007,
181 MIPSI_ROTRV = 0x00000046, /* MIPS32R2 */ 256 MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */
257 MIPSI_DROTRV = 0x00000056,
258
259 MIPSI_INS = 0x7c000004, /* MIPSXXR2 */
182 260
183 MIPSI_SEB = 0x7c000420, /* MIPS32R2 */ 261 MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */
184 MIPSI_SEH = 0x7c000620, /* MIPS32R2 */ 262 MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */
185 MIPSI_WSBH = 0x7c0000a0, /* MIPS32R2 */ 263 MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */
264 MIPSI_DSBH = 0x7c0000a4,
186 265
187 MIPSI_B = 0x10000000, 266 MIPSI_B = 0x10000000,
188 MIPSI_J = 0x08000000, 267 MIPSI_J = 0x08000000,
189 MIPSI_JAL = 0x0c000000, 268 MIPSI_JAL = 0x0c000000,
269#if !LJ_TARGET_MIPSR6
270 MIPSI_JALX = 0x74000000,
190 MIPSI_JR = 0x00000008, 271 MIPSI_JR = 0x00000008,
272#else
273 MIPSI_JR = 0x00000009,
274 MIPSI_BALC = 0xe8000000,
275#endif
191 MIPSI_JALR = 0x0000f809, 276 MIPSI_JALR = 0x0000f809,
192 277
193 MIPSI_BEQ = 0x10000000, 278 MIPSI_BEQ = 0x10000000,
@@ -199,7 +284,9 @@ typedef enum MIPSIns {
199 284
200 /* Load/store instructions. */ 285 /* Load/store instructions. */
201 MIPSI_LW = 0x8c000000, 286 MIPSI_LW = 0x8c000000,
287 MIPSI_LD = 0xdc000000,
202 MIPSI_SW = 0xac000000, 288 MIPSI_SW = 0xac000000,
289 MIPSI_SD = 0xfc000000,
203 MIPSI_LB = 0x80000000, 290 MIPSI_LB = 0x80000000,
204 MIPSI_SB = 0xa0000000, 291 MIPSI_SB = 0xa0000000,
205 MIPSI_LH = 0x84000000, 292 MIPSI_LH = 0x84000000,
@@ -211,11 +298,69 @@ typedef enum MIPSIns {
211 MIPSI_LDC1 = 0xd4000000, 298 MIPSI_LDC1 = 0xd4000000,
212 MIPSI_SDC1 = 0xf4000000, 299 MIPSI_SDC1 = 0xf4000000,
213 300
301 /* MIPS64 instructions. */
302 MIPSI_DADD = 0x0000002c,
303 MIPSI_DADDU = 0x0000002d,
304 MIPSI_DADDIU = 0x64000000,
305 MIPSI_DSUB = 0x0000002e,
306 MIPSI_DSUBU = 0x0000002f,
307#if !LJ_TARGET_MIPSR6
308 MIPSI_DDIV = 0x0000001e,
309 MIPSI_DDIVU = 0x0000001f,
310 MIPSI_DMULT = 0x0000001c,
311 MIPSI_DMULTU = 0x0000001d,
312#else
313 MIPSI_DDIV = 0x0000009e,
314 MIPSI_DMOD = 0x000000de,
315 MIPSI_DDIVU = 0x0000009f,
316 MIPSI_DMODU = 0x000000df,
317 MIPSI_DMUL = 0x0000009c,
318 MIPSI_DMUH = 0x000000dc,
319#endif
320
321 MIPSI_DSLL = 0x00000038,
322 MIPSI_DSRL = 0x0000003a,
323 MIPSI_DSLLV = 0x00000014,
324 MIPSI_DSRLV = 0x00000016,
325 MIPSI_DSRA = 0x0000003b,
326 MIPSI_DSRAV = 0x00000017,
327 MIPSI_DSRA32 = 0x0000003f,
328 MIPSI_DSLL32 = 0x0000003c,
329 MIPSI_DSRL32 = 0x0000003e,
330 MIPSI_DSHD = 0x7c000164,
331
332 MIPSI_AADDU = LJ_32 ? MIPSI_ADDU : MIPSI_DADDU,
333 MIPSI_AADDIU = LJ_32 ? MIPSI_ADDIU : MIPSI_DADDIU,
334 MIPSI_ASUBU = LJ_32 ? MIPSI_SUBU : MIPSI_DSUBU,
335 MIPSI_AL = LJ_32 ? MIPSI_LW : MIPSI_LD,
336 MIPSI_AS = LJ_32 ? MIPSI_SW : MIPSI_SD,
337#if LJ_TARGET_MIPSR6
338 MIPSI_LSA = 0x00000005,
339 MIPSI_DLSA = 0x00000015,
340 MIPSI_ALSA = LJ_32 ? MIPSI_LSA : MIPSI_DLSA,
341#endif
342
343 /* Extract/insert instructions. */
344 MIPSI_DEXTM = 0x7c000001,
345 MIPSI_DEXTU = 0x7c000002,
346 MIPSI_DEXT = 0x7c000003,
347 MIPSI_DINSM = 0x7c000005,
348 MIPSI_DINSU = 0x7c000006,
349 MIPSI_DINS = 0x7c000007,
350
351 MIPSI_FLOOR_D = 0x4620000b,
352
214 /* FP instructions. */ 353 /* FP instructions. */
215 MIPSI_MOV_S = 0x46000006, 354 MIPSI_MOV_S = 0x46000006,
216 MIPSI_MOV_D = 0x46200006, 355 MIPSI_MOV_D = 0x46200006,
356#if !LJ_TARGET_MIPSR6
217 MIPSI_MOVT_D = 0x46210011, 357 MIPSI_MOVT_D = 0x46210011,
218 MIPSI_MOVF_D = 0x46200011, 358 MIPSI_MOVF_D = 0x46200011,
359#else
360 MIPSI_MIN_D = 0x4620001C,
361 MIPSI_MAX_D = 0x4620001E,
362 MIPSI_SEL_D = 0x46200010,
363#endif
219 364
220 MIPSI_ABS_D = 0x46200005, 365 MIPSI_ABS_D = 0x46200005,
221 MIPSI_NEG_D = 0x46200007, 366 MIPSI_NEG_D = 0x46200007,
@@ -235,23 +380,37 @@ typedef enum MIPSIns {
235 MIPSI_CVT_W_D = 0x46200024, 380 MIPSI_CVT_W_D = 0x46200024,
236 MIPSI_CVT_S_W = 0x46800020, 381 MIPSI_CVT_S_W = 0x46800020,
237 MIPSI_CVT_D_W = 0x46800021, 382 MIPSI_CVT_D_W = 0x46800021,
383 MIPSI_CVT_S_L = 0x46a00020,
384 MIPSI_CVT_D_L = 0x46a00021,
238 385
239 MIPSI_TRUNC_W_S = 0x4600000d, 386 MIPSI_TRUNC_W_S = 0x4600000d,
240 MIPSI_TRUNC_W_D = 0x4620000d, 387 MIPSI_TRUNC_W_D = 0x4620000d,
388 MIPSI_TRUNC_L_S = 0x46000009,
389 MIPSI_TRUNC_L_D = 0x46200009,
241 MIPSI_FLOOR_W_S = 0x4600000f, 390 MIPSI_FLOOR_W_S = 0x4600000f,
242 MIPSI_FLOOR_W_D = 0x4620000f, 391 MIPSI_FLOOR_W_D = 0x4620000f,
243 392
244 MIPSI_MFC1 = 0x44000000, 393 MIPSI_MFC1 = 0x44000000,
245 MIPSI_MTC1 = 0x44800000, 394 MIPSI_MTC1 = 0x44800000,
395 MIPSI_DMTC1 = 0x44a00000,
396 MIPSI_DMFC1 = 0x44200000,
246 397
398#if !LJ_TARGET_MIPSR6
247 MIPSI_BC1F = 0x45000000, 399 MIPSI_BC1F = 0x45000000,
248 MIPSI_BC1T = 0x45010000, 400 MIPSI_BC1T = 0x45010000,
249
250 MIPSI_C_EQ_D = 0x46200032, 401 MIPSI_C_EQ_D = 0x46200032,
402 MIPSI_C_OLT_S = 0x46000034,
251 MIPSI_C_OLT_D = 0x46200034, 403 MIPSI_C_OLT_D = 0x46200034,
252 MIPSI_C_ULT_D = 0x46200035, 404 MIPSI_C_ULT_D = 0x46200035,
253 MIPSI_C_OLE_D = 0x46200036, 405 MIPSI_C_OLE_D = 0x46200036,
254 MIPSI_C_ULE_D = 0x46200037, 406 MIPSI_C_ULE_D = 0x46200037,
407#else
408 MIPSI_BC1EQZ = 0x45200000,
409 MIPSI_BC1NEZ = 0x45a00000,
410 MIPSI_CMP_EQ_D = 0x46a00002,
411 MIPSI_CMP_LT_S = 0x46800004,
412 MIPSI_CMP_LT_D = 0x46a00004,
413#endif
255 414
256} MIPSIns; 415} MIPSIns;
257 416
diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h
index d881c540..50620c50 100644
--- a/src/lj_target_ppc.h
+++ b/src/lj_target_ppc.h
@@ -104,7 +104,7 @@ enum {
104/* This definition must match with the *.dasc file(s). */ 104/* This definition must match with the *.dasc file(s). */
105typedef struct { 105typedef struct {
106 lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ 106 lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
107 int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ 107 intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
108 int32_t spill[256]; /* Spill slots. */ 108 int32_t spill[256]; /* Spill slots. */
109} ExitState; 109} ExitState;
110 110
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index 7d0e5e6d..3482309b 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -22,7 +22,7 @@
22 _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) 22 _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7)
23#endif 23#endif
24#define VRIDDEF(_) \ 24#define VRIDDEF(_) \
25 _(MRM) 25 _(MRM) _(RIP)
26 26
27#define RIDENUM(name) RID_##name, 27#define RIDENUM(name) RID_##name,
28 28
@@ -31,15 +31,16 @@ enum {
31 FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ 31 FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
32 RID_MAX, 32 RID_MAX,
33 RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ 33 RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */
34 RID_RIP = RID_MAX+5, /* Pseudo-id for RIP (x64 only), rm bits = 5. */
34 35
35 /* Calling conventions. */ 36 /* Calling conventions. */
37 RID_SP = RID_ESP,
36 RID_RET = RID_EAX, 38 RID_RET = RID_EAX,
37#if LJ_64 39#if LJ_64
38 RID_FPRET = RID_XMM0, 40 RID_FPRET = RID_XMM0,
39#else 41#endif
40 RID_RETLO = RID_EAX, 42 RID_RETLO = RID_EAX,
41 RID_RETHI = RID_EDX, 43 RID_RETHI = RID_EDX,
42#endif
43 44
44 /* These definitions must match with the *.dasc file(s): */ 45 /* These definitions must match with the *.dasc file(s): */
45 RID_BASE = RID_EDX, /* Interpreter BASE. */ 46 RID_BASE = RID_EDX, /* Interpreter BASE. */
@@ -62,8 +63,10 @@ enum {
62 63
63/* -- Register sets ------------------------------------------------------- */ 64/* -- Register sets ------------------------------------------------------- */
64 65
65/* Make use of all registers, except the stack pointer. */ 66/* Make use of all registers, except the stack pointer (and maybe DISPATCH). */
66#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP)) 67#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \
68 - RID2RSET(RID_ESP) \
69 - LJ_GC64*RID2RSET(RID_DISPATCH))
67#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) 70#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
68#define RSET_ALL (RSET_GPR|RSET_FPR) 71#define RSET_ALL (RSET_GPR|RSET_FPR)
69#define RSET_INIT RSET_ALL 72#define RSET_INIT RSET_ALL
@@ -113,8 +116,8 @@ enum {
113 116
114#if LJ_64 117#if LJ_64
115/* Prefer the low 8 regs of each type to reduce REX prefixes. */ 118/* Prefer the low 8 regs of each type to reduce REX prefixes. */
116#undef rset_picktop 119#undef rset_picktop_
117#define rset_picktop(rs) (lj_fls(lj_bswap(rs)) ^ 0x18) 120#define rset_picktop_(rs) (lj_fls(lj_bswap(rs)) ^ 0x18)
118#endif 121#endif
119 122
120/* -- Spill slots --------------------------------------------------------- */ 123/* -- Spill slots --------------------------------------------------------- */
@@ -131,7 +134,11 @@ enum {
131#define SPS_FIXED (4*2) 134#define SPS_FIXED (4*2)
132#define SPS_FIRST (4*2) /* Don't use callee register save area. */ 135#define SPS_FIRST (4*2) /* Don't use callee register save area. */
133#else 136#else
137#if LJ_GC64
138#define SPS_FIXED 2
139#else
134#define SPS_FIXED 4 140#define SPS_FIXED 4
141#endif
135#define SPS_FIRST 2 142#define SPS_FIRST 2
136#endif 143#endif
137#else 144#else
@@ -157,6 +164,8 @@ typedef struct {
157#define EXITSTUB_SPACING (2+2) 164#define EXITSTUB_SPACING (2+2)
158#define EXITSTUBS_PER_GROUP 32 165#define EXITSTUBS_PER_GROUP 32
159 166
167#define EXITTRACE_VMSTATE 1 /* g->vmstate has traceno on exit. */
168
160/* -- x86 ModRM operand encoding ------------------------------------------ */ 169/* -- x86 ModRM operand encoding ------------------------------------------ */
161 170
162typedef enum { 171typedef enum {
@@ -184,12 +193,18 @@ typedef struct {
184#define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24))) 193#define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24)))
185#define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24))) 194#define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24)))
186 195
196#define XV_660f38(o) ((uint32_t)(0x79e2c4 + (0x##o<<24)))
197#define XV_f20f38(o) ((uint32_t)(0x7be2c4 + (0x##o<<24)))
198#define XV_f20f3a(o) ((uint32_t)(0x7be3c4 + (0x##o<<24)))
199#define XV_f30f38(o) ((uint32_t)(0x7ae2c4 + (0x##o<<24)))
200
187/* This list of x86 opcodes is not intended to be complete. Opcodes are only 201/* This list of x86 opcodes is not intended to be complete. Opcodes are only
188** included when needed. Take a look at DynASM or jit.dis_x86 to see the 202** included when needed. Take a look at DynASM or jit.dis_x86 to see the
189** whole mess. 203** whole mess.
190*/ 204*/
191typedef enum { 205typedef enum {
192 /* Fixed length opcodes. XI_* prefix. */ 206 /* Fixed length opcodes. XI_* prefix. */
207 XI_O16 = 0x66,
193 XI_NOP = 0x90, 208 XI_NOP = 0x90,
194 XI_XCHGa = 0x90, 209 XI_XCHGa = 0x90,
195 XI_CALL = 0xe8, 210 XI_CALL = 0xe8,
@@ -207,26 +222,28 @@ typedef enum {
207 XI_PUSHi8 = 0x6a, 222 XI_PUSHi8 = 0x6a,
208 XI_TESTb = 0x84, 223 XI_TESTb = 0x84,
209 XI_TEST = 0x85, 224 XI_TEST = 0x85,
225 XI_INT3 = 0xcc,
210 XI_MOVmi = 0xc7, 226 XI_MOVmi = 0xc7,
211 XI_GROUP5 = 0xff, 227 XI_GROUP5 = 0xff,
212 228
213 /* Note: little-endian byte-order! */ 229 /* Note: little-endian byte-order! */
214 XI_FLDZ = 0xeed9, 230 XI_FLDZ = 0xeed9,
215 XI_FLD1 = 0xe8d9, 231 XI_FLD1 = 0xe8d9,
216 XI_FLDLG2 = 0xecd9,
217 XI_FLDLN2 = 0xedd9,
218 XI_FDUP = 0xc0d9, /* Really fld st0. */ 232 XI_FDUP = 0xc0d9, /* Really fld st0. */
219 XI_FPOP = 0xd8dd, /* Really fstp st0. */ 233 XI_FPOP = 0xd8dd, /* Really fstp st0. */
220 XI_FPOP1 = 0xd9dd, /* Really fstp st1. */ 234 XI_FPOP1 = 0xd9dd, /* Really fstp st1. */
221 XI_FRNDINT = 0xfcd9, 235 XI_FRNDINT = 0xfcd9,
222 XI_FSIN = 0xfed9,
223 XI_FCOS = 0xffd9,
224 XI_FPTAN = 0xf2d9,
225 XI_FPATAN = 0xf3d9,
226 XI_FSCALE = 0xfdd9, 236 XI_FSCALE = 0xfdd9,
227 XI_FYL2X = 0xf1d9, 237 XI_FYL2X = 0xf1d9,
228 238
239 /* VEX-encoded instructions. XV_* prefix. */
240 XV_RORX = XV_f20f3a(f0),
241 XV_SARX = XV_f30f38(f7),
242 XV_SHLX = XV_660f38(f7),
243 XV_SHRX = XV_f20f38(f7),
244
229 /* Variable-length opcodes. XO_* prefix. */ 245 /* Variable-length opcodes. XO_* prefix. */
246 XO_OR = XO_(0b),
230 XO_MOV = XO_(8b), 247 XO_MOV = XO_(8b),
231 XO_MOVto = XO_(89), 248 XO_MOVto = XO_(89),
232 XO_MOVtow = XO_66(89), 249 XO_MOVtow = XO_66(89),
@@ -277,10 +294,8 @@ typedef enum {
277 XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */ 294 XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */
278 XO_UCOMISD = XO_660f(2e), 295 XO_UCOMISD = XO_660f(2e),
279 XO_CVTSI2SD = XO_f20f(2a), 296 XO_CVTSI2SD = XO_f20f(2a),
280 XO_CVTSD2SI = XO_f20f(2d),
281 XO_CVTTSD2SI= XO_f20f(2c), 297 XO_CVTTSD2SI= XO_f20f(2c),
282 XO_CVTSI2SS = XO_f30f(2a), 298 XO_CVTSI2SS = XO_f30f(2a),
283 XO_CVTSS2SI = XO_f30f(2d),
284 XO_CVTTSS2SI= XO_f30f(2c), 299 XO_CVTTSS2SI= XO_f30f(2c),
285 XO_CVTSS2SD = XO_f30f(5a), 300 XO_CVTSS2SD = XO_f30f(5a),
286 XO_CVTSD2SS = XO_f20f(5a), 301 XO_CVTSD2SS = XO_f20f(5a),
diff --git a/src/lj_trace.c b/src/lj_trace.c
index d015f2ab..a5e316e1 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -30,6 +30,7 @@
30#include "lj_vm.h" 30#include "lj_vm.h"
31#include "lj_vmevent.h" 31#include "lj_vmevent.h"
32#include "lj_target.h" 32#include "lj_target.h"
33#include "lj_prng.h"
33 34
34/* -- Error handling ------------------------------------------------------ */ 35/* -- Error handling ------------------------------------------------------ */
35 36
@@ -104,7 +105,8 @@ static void perftools_addtrace(GCtrace *T)
104 name++; 105 name++;
105 else 106 else
106 name = "(string)"; 107 name = "(string)";
107 lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc); 108 lj_assertX(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc,
109 "trace PC out of range");
108 lineno = lj_debug_line(pt, proto_bcpos(pt, startpc)); 110 lineno = lj_debug_line(pt, proto_bcpos(pt, startpc));
109 if (!fp) { 111 if (!fp) {
110 char fname[40]; 112 char fname[40];
@@ -117,15 +119,26 @@ static void perftools_addtrace(GCtrace *T)
117} 119}
118#endif 120#endif
119 121
120/* Allocate space for copy of trace. */ 122/* Allocate space for copy of T. */
121static GCtrace *trace_save_alloc(jit_State *J) 123GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T)
122{ 124{
123 size_t sztr = ((sizeof(GCtrace)+7)&~7); 125 size_t sztr = ((sizeof(GCtrace)+7)&~7);
124 size_t szins = (J->cur.nins-J->cur.nk)*sizeof(IRIns); 126 size_t szins = (T->nins-T->nk)*sizeof(IRIns);
125 size_t sz = sztr + szins + 127 size_t sz = sztr + szins +
126 J->cur.nsnap*sizeof(SnapShot) + 128 T->nsnap*sizeof(SnapShot) +
127 J->cur.nsnapmap*sizeof(SnapEntry); 129 T->nsnapmap*sizeof(SnapEntry);
128 return lj_mem_newt(J->L, (MSize)sz, GCtrace); 130 GCtrace *T2 = lj_mem_newt(L, (MSize)sz, GCtrace);
131 char *p = (char *)T2 + sztr;
132 T2->gct = ~LJ_TTRACE;
133 T2->marked = 0;
134 T2->traceno = 0;
135 T2->ir = (IRIns *)p - T->nk;
136 T2->nins = T->nins;
137 T2->nk = T->nk;
138 T2->nsnap = T->nsnap;
139 T2->nsnapmap = T->nsnapmap;
140 memcpy(p, T->ir + T->nk, szins);
141 return T2;
129} 142}
130 143
131/* Save current trace by copying and compacting it. */ 144/* Save current trace by copying and compacting it. */
@@ -139,12 +152,15 @@ static void trace_save(jit_State *J, GCtrace *T)
139 setgcrefp(J2G(J)->gc.root, T); 152 setgcrefp(J2G(J)->gc.root, T);
140 newwhite(J2G(J), T); 153 newwhite(J2G(J), T);
141 T->gct = ~LJ_TTRACE; 154 T->gct = ~LJ_TTRACE;
142 T->ir = (IRIns *)p - J->cur.nk; 155 T->ir = (IRIns *)p - J->cur.nk; /* The IR has already been copied above. */
143 memcpy(p, J->cur.ir+J->cur.nk, szins); 156#if LJ_ABI_PAUTH
157 T->mcauth = lj_ptr_sign((ASMFunction)T->mcode, T);
158#endif
144 p += szins; 159 p += szins;
145 TRACE_APPENDVEC(snap, nsnap, SnapShot) 160 TRACE_APPENDVEC(snap, nsnap, SnapShot)
146 TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry) 161 TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry)
147 J->cur.traceno = 0; 162 J->cur.traceno = 0;
163 J->curfinal = NULL;
148 setgcrefp(J->trace[T->traceno], T); 164 setgcrefp(J->trace[T->traceno], T);
149 lj_gc_barriertrace(J2G(J), T->traceno); 165 lj_gc_barriertrace(J2G(J), T->traceno);
150 lj_gdbjit_addtrace(J, T); 166 lj_gdbjit_addtrace(J, T);
@@ -172,7 +188,7 @@ void lj_trace_reenableproto(GCproto *pt)
172{ 188{
173 if ((pt->flags & PROTO_ILOOP)) { 189 if ((pt->flags & PROTO_ILOOP)) {
174 BCIns *bc = proto_bc(pt); 190 BCIns *bc = proto_bc(pt);
175 BCPos i, sizebc = pt->sizebc;; 191 BCPos i, sizebc = pt->sizebc;
176 pt->flags &= ~PROTO_ILOOP; 192 pt->flags &= ~PROTO_ILOOP;
177 if (bc_op(bc[0]) == BC_IFUNCF) 193 if (bc_op(bc[0]) == BC_IFUNCF)
178 setbc_op(&bc[0], BC_FUNCF); 194 setbc_op(&bc[0], BC_FUNCF);
@@ -194,27 +210,28 @@ static void trace_unpatch(jit_State *J, GCtrace *T)
194 return; /* No need to unpatch branches in parent traces (yet). */ 210 return; /* No need to unpatch branches in parent traces (yet). */
195 switch (bc_op(*pc)) { 211 switch (bc_op(*pc)) {
196 case BC_JFORL: 212 case BC_JFORL:
197 lua_assert(traceref(J, bc_d(*pc)) == T); 213 lj_assertJ(traceref(J, bc_d(*pc)) == T, "JFORL references other trace");
198 *pc = T->startins; 214 *pc = T->startins;
199 pc += bc_j(T->startins); 215 pc += bc_j(T->startins);
200 lua_assert(bc_op(*pc) == BC_JFORI); 216 lj_assertJ(bc_op(*pc) == BC_JFORI, "FORL does not point to JFORI");
201 setbc_op(pc, BC_FORI); 217 setbc_op(pc, BC_FORI);
202 break; 218 break;
203 case BC_JITERL: 219 case BC_JITERL:
204 case BC_JLOOP: 220 case BC_JLOOP:
205 lua_assert(op == BC_ITERL || op == BC_LOOP || bc_isret(op)); 221 lj_assertJ(op == BC_ITERL || op == BC_ITERN || op == BC_LOOP ||
222 bc_isret(op), "bad original bytecode %d", op);
206 *pc = T->startins; 223 *pc = T->startins;
207 break; 224 break;
208 case BC_JMP: 225 case BC_JMP:
209 lua_assert(op == BC_ITERL); 226 lj_assertJ(op == BC_ITERL, "bad original bytecode %d", op);
210 pc += bc_j(*pc)+2; 227 pc += bc_j(*pc)+2;
211 if (bc_op(*pc) == BC_JITERL) { 228 if (bc_op(*pc) == BC_JITERL) {
212 lua_assert(traceref(J, bc_d(*pc)) == T); 229 lj_assertJ(traceref(J, bc_d(*pc)) == T, "JITERL references other trace");
213 *pc = T->startins; 230 *pc = T->startins;
214 } 231 }
215 break; 232 break;
216 case BC_JFUNCF: 233 case BC_JFUNCF:
217 lua_assert(op == BC_FUNCF); 234 lj_assertJ(op == BC_FUNCF, "bad original bytecode %d", op);
218 *pc = T->startins; 235 *pc = T->startins;
219 break; 236 break;
220 default: /* Already unpatched. */ 237 default: /* Already unpatched. */
@@ -226,7 +243,8 @@ static void trace_unpatch(jit_State *J, GCtrace *T)
226static void trace_flushroot(jit_State *J, GCtrace *T) 243static void trace_flushroot(jit_State *J, GCtrace *T)
227{ 244{
228 GCproto *pt = &gcref(T->startpt)->pt; 245 GCproto *pt = &gcref(T->startpt)->pt;
229 lua_assert(T->root == 0 && pt != NULL); 246 lj_assertJ(T->root == 0, "not a root trace");
247 lj_assertJ(pt != NULL, "trace has no prototype");
230 /* First unpatch any modified bytecode. */ 248 /* First unpatch any modified bytecode. */
231 trace_unpatch(J, T); 249 trace_unpatch(J, T);
232 /* Unlink root trace from chain anchored in prototype. */ 250 /* Unlink root trace from chain anchored in prototype. */
@@ -274,7 +292,7 @@ int lj_trace_flushall(lua_State *L)
274 if (T->root == 0) 292 if (T->root == 0)
275 trace_flushroot(J, T); 293 trace_flushroot(J, T);
276 lj_gdbjit_deltrace(J, T); 294 lj_gdbjit_deltrace(J, T);
277 T->traceno = 0; 295 T->traceno = T->link = 0; /* Blacklist the link for cont_stitch. */
278 setgcrefnull(J->trace[i]); 296 setgcrefnull(J->trace[i]);
279 } 297 }
280 } 298 }
@@ -296,13 +314,42 @@ void lj_trace_initstate(global_State *g)
296{ 314{
297 jit_State *J = G2J(g); 315 jit_State *J = G2J(g);
298 TValue *tv; 316 TValue *tv;
299 /* Initialize SIMD constants. */ 317
318 /* Initialize aligned SIMD constants. */
300 tv = LJ_KSIMD(J, LJ_KSIMD_ABS); 319 tv = LJ_KSIMD(J, LJ_KSIMD_ABS);
301 tv[0].u64 = U64x(7fffffff,ffffffff); 320 tv[0].u64 = U64x(7fffffff,ffffffff);
302 tv[1].u64 = U64x(7fffffff,ffffffff); 321 tv[1].u64 = U64x(7fffffff,ffffffff);
303 tv = LJ_KSIMD(J, LJ_KSIMD_NEG); 322 tv = LJ_KSIMD(J, LJ_KSIMD_NEG);
304 tv[0].u64 = U64x(80000000,00000000); 323 tv[0].u64 = U64x(80000000,00000000);
305 tv[1].u64 = U64x(80000000,00000000); 324 tv[1].u64 = U64x(80000000,00000000);
325
326 /* Initialize 32/64 bit constants. */
327#if LJ_TARGET_X86ORX64
328 J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000);
329#if LJ_32
330 J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000);
331#endif
332 J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000);
333 J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000;
334#endif
335#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64
336 J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
337#endif
338#if LJ_TARGET_PPC
339 J->k32[LJ_K32_2P52_2P31] = 0x59800004;
340 J->k32[LJ_K32_2P52] = 0x59800000;
341#endif
342#if LJ_TARGET_PPC || LJ_TARGET_MIPS
343 J->k32[LJ_K32_2P31] = 0x4f000000;
344#endif
345#if LJ_TARGET_MIPS
346 J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
347#if LJ_64
348 J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000);
349 J->k32[LJ_K32_2P63] = 0x5f000000;
350 J->k32[LJ_K32_M2P64] = 0xdf800000;
351#endif
352#endif
306} 353}
307 354
308/* Free everything associated with the JIT compiler state. */ 355/* Free everything associated with the JIT compiler state. */
@@ -313,11 +360,11 @@ void lj_trace_freestate(global_State *g)
313 { /* This assumes all traces have already been freed. */ 360 { /* This assumes all traces have already been freed. */
314 ptrdiff_t i; 361 ptrdiff_t i;
315 for (i = 1; i < (ptrdiff_t)J->sizetrace; i++) 362 for (i = 1; i < (ptrdiff_t)J->sizetrace; i++)
316 lua_assert(i == (ptrdiff_t)J->cur.traceno || traceref(J, i) == NULL); 363 lj_assertG(i == (ptrdiff_t)J->cur.traceno || traceref(J, i) == NULL,
364 "trace still allocated");
317 } 365 }
318#endif 366#endif
319 lj_mcode_free(J); 367 lj_mcode_free(J);
320 lj_ir_k64_freeall(J);
321 lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry); 368 lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry);
322 lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot); 369 lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot);
323 lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns); 370 lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns);
@@ -329,8 +376,13 @@ void lj_trace_freestate(global_State *g)
329/* Blacklist a bytecode instruction. */ 376/* Blacklist a bytecode instruction. */
330static void blacklist_pc(GCproto *pt, BCIns *pc) 377static void blacklist_pc(GCproto *pt, BCIns *pc)
331{ 378{
332 setbc_op(pc, (int)bc_op(*pc)+(int)BC_ILOOP-(int)BC_LOOP); 379 if (bc_op(*pc) == BC_ITERN) {
333 pt->flags |= PROTO_ILOOP; 380 setbc_op(pc, BC_ITERC);
381 setbc_op(pc+1+bc_j(pc[1]), BC_JMP);
382 } else {
383 setbc_op(pc, (int)bc_op(*pc)+(int)BC_ILOOP-(int)BC_LOOP);
384 pt->flags |= PROTO_ILOOP;
385 }
334} 386}
335 387
336/* Penalize a bytecode instruction. */ 388/* Penalize a bytecode instruction. */
@@ -341,7 +393,7 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e)
341 if (mref(J->penalty[i].pc, const BCIns) == pc) { /* Cache slot found? */ 393 if (mref(J->penalty[i].pc, const BCIns) == pc) { /* Cache slot found? */
342 /* First try to bump its hotcount several times. */ 394 /* First try to bump its hotcount several times. */
343 val = ((uint32_t)J->penalty[i].val << 1) + 395 val = ((uint32_t)J->penalty[i].val << 1) +
344 LJ_PRNG_BITS(J, PENALTY_RNDBITS); 396 (lj_prng_u64(&J2G(J)->prng) & ((1u<<PENALTY_RNDBITS)-1));
345 if (val > PENALTY_MAX) { 397 if (val > PENALTY_MAX) {
346 blacklist_pc(pt, pc); /* Blacklist it, if that didn't help. */ 398 blacklist_pc(pt, pc); /* Blacklist it, if that didn't help. */
347 return; 399 return;
@@ -367,10 +419,11 @@ static void trace_start(jit_State *J)
367 TraceNo traceno; 419 TraceNo traceno;
368 420
369 if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ 421 if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */
370 if (J->parent == 0) { 422 if (J->parent == 0 && J->exitno == 0 && bc_op(*J->pc) != BC_ITERN) {
371 /* Lazy bytecode patching to disable hotcount events. */ 423 /* Lazy bytecode patching to disable hotcount events. */
372 lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || 424 lj_assertJ(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL ||
373 bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF); 425 bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF,
426 "bad hot bytecode %d", bc_op(*J->pc));
374 setbc_op(J->pc, (int)bc_op(*J->pc)+(int)BC_ILOOP-(int)BC_LOOP); 427 setbc_op(J->pc, (int)bc_op(*J->pc)+(int)BC_ILOOP-(int)BC_LOOP);
375 J->pt->flags |= PROTO_ILOOP; 428 J->pt->flags |= PROTO_ILOOP;
376 } 429 }
@@ -378,10 +431,17 @@ static void trace_start(jit_State *J)
378 return; 431 return;
379 } 432 }
380 433
434 /* Ensuring forward progress for BC_ITERN can trigger hotcount again. */
435 if (!J->parent && bc_op(*J->pc) == BC_JLOOP) { /* Already compiled. */
436 J->state = LJ_TRACE_IDLE; /* Silently ignored. */
437 return;
438 }
439
381 /* Get a new trace number. */ 440 /* Get a new trace number. */
382 traceno = trace_findfree(J); 441 traceno = trace_findfree(J);
383 if (LJ_UNLIKELY(traceno == 0)) { /* No free trace? */ 442 if (LJ_UNLIKELY(traceno == 0)) { /* No free trace? */
384 lua_assert((J2G(J)->hookmask & HOOK_GC) == 0); 443 lj_assertJ((J2G(J)->hookmask & HOOK_GC) == 0,
444 "recorder called from GC hook");
385 lj_trace_flushall(J->L); 445 lj_trace_flushall(J->L);
386 J->state = LJ_TRACE_IDLE; /* Silently ignored. */ 446 J->state = LJ_TRACE_IDLE; /* Silently ignored. */
387 return; 447 return;
@@ -401,6 +461,8 @@ static void trace_start(jit_State *J)
401 J->guardemit.irt = 0; 461 J->guardemit.irt = 0;
402 J->postproc = LJ_POST_NONE; 462 J->postproc = LJ_POST_NONE;
403 lj_resetsplit(J); 463 lj_resetsplit(J);
464 J->retryrec = 0;
465 J->ktrace = 0;
404 setgcref(J->cur.startpt, obj2gco(J->pt)); 466 setgcref(J->cur.startpt, obj2gco(J->pt));
405 467
406 L = J->L; 468 L = J->L;
@@ -412,6 +474,12 @@ static void trace_start(jit_State *J)
412 if (J->parent) { 474 if (J->parent) {
413 setintV(L->top++, J->parent); 475 setintV(L->top++, J->parent);
414 setintV(L->top++, J->exitno); 476 setintV(L->top++, J->exitno);
477 } else {
478 BCOp op = bc_op(*J->pc);
479 if (op == BC_CALLM || op == BC_CALL || op == BC_ITERC) {
480 setintV(L->top++, J->exitno); /* Parent of stitched trace. */
481 setintV(L->top++, -1);
482 }
415 } 483 }
416 ); 484 );
417 lj_record_setup(J); 485 lj_record_setup(J);
@@ -424,7 +492,7 @@ static void trace_stop(jit_State *J)
424 BCOp op = bc_op(J->cur.startins); 492 BCOp op = bc_op(J->cur.startins);
425 GCproto *pt = &gcref(J->cur.startpt)->pt; 493 GCproto *pt = &gcref(J->cur.startpt)->pt;
426 TraceNo traceno = J->cur.traceno; 494 TraceNo traceno = J->cur.traceno;
427 GCtrace *T = trace_save_alloc(J); /* Do this first. May throw OOM. */ 495 GCtrace *T = J->curfinal;
428 lua_State *L; 496 lua_State *L;
429 497
430 switch (op) { 498 switch (op) {
@@ -442,6 +510,7 @@ static void trace_stop(jit_State *J)
442 J->cur.nextroot = pt->trace; 510 J->cur.nextroot = pt->trace;
443 pt->trace = (TraceNo1)traceno; 511 pt->trace = (TraceNo1)traceno;
444 break; 512 break;
513 case BC_ITERN:
445 case BC_RET: 514 case BC_RET:
446 case BC_RET0: 515 case BC_RET0:
447 case BC_RET1: 516 case BC_RET1:
@@ -449,7 +518,7 @@ static void trace_stop(jit_State *J)
449 goto addroot; 518 goto addroot;
450 case BC_JMP: 519 case BC_JMP:
451 /* Patch exit branch in parent to side trace entry. */ 520 /* Patch exit branch in parent to side trace entry. */
452 lua_assert(J->parent != 0 && J->cur.root != 0); 521 lj_assertJ(J->parent != 0 && J->cur.root != 0, "not a side trace");
453 lj_asm_patchexit(J, traceref(J, J->parent), J->exitno, J->cur.mcode); 522 lj_asm_patchexit(J, traceref(J, J->parent), J->exitno, J->cur.mcode);
454 /* Avoid compiling a side trace twice (stack resizing uses parent exit). */ 523 /* Avoid compiling a side trace twice (stack resizing uses parent exit). */
455 { 524 {
@@ -465,8 +534,14 @@ static void trace_stop(jit_State *J)
465 root->nextside = (TraceNo1)traceno; 534 root->nextside = (TraceNo1)traceno;
466 } 535 }
467 break; 536 break;
537 case BC_CALLM:
538 case BC_CALL:
539 case BC_ITERC:
540 /* Trace stitching: patch link of previous trace. */
541 traceref(J, J->exitno)->link = traceno;
542 break;
468 default: 543 default:
469 lua_assert(0); 544 lj_assertJ(0, "bad stop bytecode %d", op);
470 break; 545 break;
471 } 546 }
472 547
@@ -479,6 +554,7 @@ static void trace_stop(jit_State *J)
479 lj_vmevent_send(L, TRACE, 554 lj_vmevent_send(L, TRACE,
480 setstrV(L, L->top++, lj_str_newlit(L, "stop")); 555 setstrV(L, L->top++, lj_str_newlit(L, "stop"));
481 setintV(L->top++, traceno); 556 setintV(L->top++, traceno);
557 setfuncV(L, L->top++, J->fn);
482 ); 558 );
483} 559}
484 560
@@ -486,8 +562,8 @@ static void trace_stop(jit_State *J)
486static int trace_downrec(jit_State *J) 562static int trace_downrec(jit_State *J)
487{ 563{
488 /* Restart recording at the return instruction. */ 564 /* Restart recording at the return instruction. */
489 lua_assert(J->pt != NULL); 565 lj_assertJ(J->pt != NULL, "no active prototype");
490 lua_assert(bc_isret(bc_op(*J->pc))); 566 lj_assertJ(bc_isret(bc_op(*J->pc)), "not at a return bytecode");
491 if (bc_op(*J->pc) == BC_RETM) 567 if (bc_op(*J->pc) == BC_RETM)
492 return 0; /* NYI: down-recursion with RETM. */ 568 return 0; /* NYI: down-recursion with RETM. */
493 J->parent = 0; 569 J->parent = 0;
@@ -506,6 +582,10 @@ static int trace_abort(jit_State *J)
506 582
507 J->postproc = LJ_POST_NONE; 583 J->postproc = LJ_POST_NONE;
508 lj_mcode_abort(J); 584 lj_mcode_abort(J);
585 if (J->curfinal) {
586 lj_trace_free(J2G(J), J->curfinal);
587 J->curfinal = NULL;
588 }
509 if (tvisnumber(L->top-1)) 589 if (tvisnumber(L->top-1))
510 e = (TraceError)numberVint(L->top-1); 590 e = (TraceError)numberVint(L->top-1);
511 if (e == LJ_TRERR_MCODELM) { 591 if (e == LJ_TRERR_MCODELM) {
@@ -514,8 +594,17 @@ static int trace_abort(jit_State *J)
514 return 1; /* Retry ASM with new MCode area. */ 594 return 1; /* Retry ASM with new MCode area. */
515 } 595 }
516 /* Penalize or blacklist starting bytecode instruction. */ 596 /* Penalize or blacklist starting bytecode instruction. */
517 if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) 597 if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) {
518 penalty_pc(J, &gcref(J->cur.startpt)->pt, mref(J->cur.startpc, BCIns), e); 598 if (J->exitno == 0) {
599 BCIns *startpc = mref(J->cur.startpc, BCIns);
600 if (e == LJ_TRERR_RETRY)
601 hotcount_set(J2GG(J), startpc+1, 1); /* Immediate retry. */
602 else
603 penalty_pc(J, &gcref(J->cur.startpt)->pt, startpc, e);
604 } else {
605 traceref(J, J->exitno)->link = J->exitno; /* Self-link is blacklisted. */
606 }
607 }
519 608
520 /* Is there anything to abort? */ 609 /* Is there anything to abort? */
521 traceno = J->cur.traceno; 610 traceno = J->cur.traceno;
@@ -524,7 +613,7 @@ static int trace_abort(jit_State *J)
524 J->cur.link = 0; 613 J->cur.link = 0;
525 J->cur.linktype = LJ_TRLINK_NONE; 614 J->cur.linktype = LJ_TRLINK_NONE;
526 lj_vmevent_send(L, TRACE, 615 lj_vmevent_send(L, TRACE,
527 cTValue *bot = tvref(L->stack); 616 cTValue *bot = tvref(L->stack)+LJ_FR2;
528 cTValue *frame; 617 cTValue *frame;
529 const BCIns *pc; 618 const BCIns *pc;
530 BCPos pos = 0; 619 BCPos pos = 0;
@@ -587,8 +676,13 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud)
587 J->state = LJ_TRACE_RECORD; /* trace_start() may change state. */ 676 J->state = LJ_TRACE_RECORD; /* trace_start() may change state. */
588 trace_start(J); 677 trace_start(J);
589 lj_dispatch_update(J2G(J)); 678 lj_dispatch_update(J2G(J));
590 break; 679 if (J->state != LJ_TRACE_RECORD_1ST)
680 break;
681 /* fallthrough */
591 682
683 case LJ_TRACE_RECORD_1ST:
684 J->state = LJ_TRACE_RECORD;
685 /* fallthrough */
592 case LJ_TRACE_RECORD: 686 case LJ_TRACE_RECORD:
593 trace_pendpatch(J, 0); 687 trace_pendpatch(J, 0);
594 setvmstate(J2G(J), RECORD); 688 setvmstate(J2G(J), RECORD);
@@ -694,15 +788,30 @@ static void trace_hotside(jit_State *J, const BCIns *pc)
694{ 788{
695 SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno]; 789 SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno];
696 if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT)) && 790 if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT)) &&
791 isluafunc(curr_func(J->L)) &&
697 snap->count != SNAPCOUNT_DONE && 792 snap->count != SNAPCOUNT_DONE &&
698 ++snap->count >= J->param[JIT_P_hotexit]) { 793 ++snap->count >= J->param[JIT_P_hotexit]) {
699 lua_assert(J->state == LJ_TRACE_IDLE); 794 lj_assertJ(J->state == LJ_TRACE_IDLE, "hot side exit while recording");
700 /* J->parent is non-zero for a side trace. */ 795 /* J->parent is non-zero for a side trace. */
701 J->state = LJ_TRACE_START; 796 J->state = LJ_TRACE_START;
702 lj_trace_ins(J, pc); 797 lj_trace_ins(J, pc);
703 } 798 }
704} 799}
705 800
801/* Stitch a new trace to the previous trace. */
802void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc)
803{
804 /* Only start a new trace if not recording or inside __gc call or vmevent. */
805 if (J->state == LJ_TRACE_IDLE &&
806 !(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) {
807 J->parent = 0; /* Have to treat it like a root trace. */
808 /* J->exitno is set to the invoking trace. */
809 J->state = LJ_TRACE_START;
810 lj_trace_ins(J, pc);
811 }
812}
813
814
706/* Tiny struct to pass data to protected call. */ 815/* Tiny struct to pass data to protected call. */
707typedef struct ExitDataCP { 816typedef struct ExitDataCP {
708 jit_State *J; 817 jit_State *J;
@@ -746,7 +855,7 @@ static void trace_exit_regs(lua_State *L, ExitState *ex)
746} 855}
747#endif 856#endif
748 857
749#ifdef EXITSTATE_PCREG 858#if defined(EXITSTATE_PCREG) || (LJ_UNWIND_JIT && !EXITTRACE_VMSTATE)
750/* Determine trace number from pc of exit instruction. */ 859/* Determine trace number from pc of exit instruction. */
751static TraceNo trace_exit_find(jit_State *J, MCode *pc) 860static TraceNo trace_exit_find(jit_State *J, MCode *pc)
752{ 861{
@@ -756,7 +865,7 @@ static TraceNo trace_exit_find(jit_State *J, MCode *pc)
756 if (T && pc >= T->mcode && pc < (MCode *)((char *)T->mcode + T->szmcode)) 865 if (T && pc >= T->mcode && pc < (MCode *)((char *)T->mcode + T->szmcode))
757 return traceno; 866 return traceno;
758 } 867 }
759 lua_assert(0); 868 lj_assertJ(0, "bad exit pc");
760 return 0; 869 return 0;
761} 870}
762#endif 871#endif
@@ -768,68 +877,81 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
768 lua_State *L = J->L; 877 lua_State *L = J->L;
769 ExitState *ex = (ExitState *)exptr; 878 ExitState *ex = (ExitState *)exptr;
770 ExitDataCP exd; 879 ExitDataCP exd;
771 int errcode; 880 int errcode, exitcode = J->exitcode;
772 const BCIns *pc; 881 TValue exiterr;
882 const BCIns *pc, *retpc;
773 void *cf; 883 void *cf;
774 GCtrace *T; 884 GCtrace *T;
885
886 setnilV(&exiterr);
887 if (exitcode) { /* Trace unwound with error code. */
888 J->exitcode = 0;
889 copyTV(L, &exiterr, L->top-1);
890 }
891
775#ifdef EXITSTATE_PCREG 892#ifdef EXITSTATE_PCREG
776 J->parent = trace_exit_find(J, (MCode *)(intptr_t)ex->gpr[EXITSTATE_PCREG]); 893 J->parent = trace_exit_find(J, (MCode *)(intptr_t)ex->gpr[EXITSTATE_PCREG]);
777#endif 894#endif
778 T = traceref(J, J->parent); UNUSED(T); 895 T = traceref(J, J->parent); UNUSED(T);
779#ifdef EXITSTATE_CHECKEXIT 896#ifdef EXITSTATE_CHECKEXIT
780 if (J->exitno == T->nsnap) { /* Treat stack check like a parent exit. */ 897 if (J->exitno == T->nsnap) { /* Treat stack check like a parent exit. */
781 lua_assert(T->root != 0); 898 lj_assertJ(T->root != 0, "stack check in root trace");
782 J->exitno = T->ir[REF_BASE].op2; 899 J->exitno = T->ir[REF_BASE].op2;
783 J->parent = T->ir[REF_BASE].op1; 900 J->parent = T->ir[REF_BASE].op1;
784 T = traceref(J, J->parent); 901 T = traceref(J, J->parent);
785 } 902 }
786#endif 903#endif
787 lua_assert(T != NULL && J->exitno < T->nsnap); 904 lj_assertJ(T != NULL && J->exitno < T->nsnap, "bad trace or exit number");
788 exd.J = J; 905 exd.J = J;
789 exd.exptr = exptr; 906 exd.exptr = exptr;
790 errcode = lj_vm_cpcall(L, NULL, &exd, trace_exit_cp); 907 errcode = lj_vm_cpcall(L, NULL, &exd, trace_exit_cp);
791 if (errcode) 908 if (errcode)
792 return -errcode; /* Return negated error code. */ 909 return -errcode; /* Return negated error code. */
793 910
794 lj_vmevent_send(L, TEXIT, 911 if (exitcode) copyTV(L, L->top++, &exiterr); /* Anchor the error object. */
795 lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); 912
796 setintV(L->top++, J->parent); 913 if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)))
797 setintV(L->top++, J->exitno); 914 lj_vmevent_send(L, TEXIT,
798 trace_exit_regs(L, ex); 915 lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK);
799 ); 916 setintV(L->top++, J->parent);
917 setintV(L->top++, J->exitno);
918 trace_exit_regs(L, ex);
919 );
800 920
801 pc = exd.pc; 921 pc = exd.pc;
802 cf = cframe_raw(L->cframe); 922 cf = cframe_raw(L->cframe);
803 setcframe_pc(cf, pc); 923 setcframe_pc(cf, pc);
804 if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) { 924 if (exitcode) {
925 return -exitcode;
926 } else if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) {
927 /* Just exit to interpreter. */
928 } else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) {
805 if (!(G(L)->hookmask & HOOK_GC)) 929 if (!(G(L)->hookmask & HOOK_GC))
806 lj_gc_step(L); /* Exited because of GC: drive GC forward. */ 930 lj_gc_step(L); /* Exited because of GC: drive GC forward. */
807 } else if ((J->flags & JIT_F_ON)) { 931 } else if ((J->flags & JIT_F_ON)) {
808 trace_hotside(J, pc); 932 trace_hotside(J, pc);
809 } 933 }
810 if (bc_op(*pc) == BC_JLOOP) { 934 /* Return MULTRES or 0 or -17. */
811 BCIns *retpc = &traceref(J, bc_d(*pc))->startins;
812 if (bc_isret(bc_op(*retpc))) {
813 if (J->state == LJ_TRACE_RECORD) {
814 J->patchins = *pc;
815 J->patchpc = (BCIns *)pc;
816 *J->patchpc = *retpc;
817 J->bcskip = 1;
818 } else {
819 pc = retpc;
820 setcframe_pc(cf, pc);
821 }
822 }
823 }
824 /* Return MULTRES or 0. */
825 ERRNO_RESTORE 935 ERRNO_RESTORE
826 switch (bc_op(*pc)) { 936 switch (bc_op(*pc)) {
827 case BC_CALLM: case BC_CALLMT: 937 case BC_CALLM: case BC_CALLMT:
828 return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc)); 938 return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc) - LJ_FR2);
829 case BC_RETM: 939 case BC_RETM:
830 return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc)); 940 return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc));
831 case BC_TSETM: 941 case BC_TSETM:
832 return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc)); 942 return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc));
943 case BC_JLOOP:
944 retpc = &traceref(J, bc_d(*pc))->startins;
945 if (bc_isret(bc_op(*retpc)) || bc_op(*retpc) == BC_ITERN) {
946 /* Dispatch to original ins to ensure forward progress. */
947 if (J->state != LJ_TRACE_RECORD) return -17;
948 /* Unpatch bytecode when recording. */
949 J->patchins = *pc;
950 J->patchpc = (BCIns *)pc;
951 *J->patchpc = *retpc;
952 J->bcskip = 1;
953 }
954 return 0;
833 default: 955 default:
834 if (bc_op(*pc) >= BC_FUNCF) 956 if (bc_op(*pc) >= BC_FUNCF)
835 return (int)((BCReg)(L->top - L->base) + 1); 957 return (int)((BCReg)(L->top - L->base) + 1);
@@ -837,4 +959,41 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
837 } 959 }
838} 960}
839 961
962#if LJ_UNWIND_JIT
963/* Given an mcode address determine trace exit address for unwinding. */
964uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo *ep)
965{
966#if EXITTRACE_VMSTATE
967 TraceNo traceno = J2G(J)->vmstate;
968#else
969 TraceNo traceno = trace_exit_find(J, (MCode *)addr);
970#endif
971 GCtrace *T = traceref(J, traceno);
972 if (T
973#if EXITTRACE_VMSTATE
974 && addr >= (uintptr_t)T->mcode && addr < (uintptr_t)T->mcode + T->szmcode
975#endif
976 ) {
977 SnapShot *snap = T->snap;
978 SnapNo lo = 0, exitno = T->nsnap;
979 uintptr_t ofs = (uintptr_t)((MCode *)addr - T->mcode); /* MCode units! */
980 /* Rightmost binary search for mcode offset to determine exit number. */
981 do {
982 SnapNo mid = (lo+exitno) >> 1;
983 if (ofs < snap[mid].mcofs) exitno = mid; else lo = mid + 1;
984 } while (lo < exitno);
985 exitno--;
986 *ep = exitno;
987#ifdef EXITSTUBS_PER_GROUP
988 return (uintptr_t)exitstub_addr(J, exitno);
989#else
990 return (uintptr_t)exitstub_trace_addr(T, exitno);
991#endif
992 }
993 /* Cannot correlate addr with trace/exit. This will be fatal. */
994 lj_assertJ(0, "bad exit pc");
995 return 0;
996}
997#endif
998
840#endif 999#endif
diff --git a/src/lj_trace.h b/src/lj_trace.h
index d708a217..5dcd365e 100644
--- a/src/lj_trace.h
+++ b/src/lj_trace.h
@@ -23,6 +23,7 @@ LJ_FUNC_NORET void lj_trace_err(jit_State *J, TraceError e);
23LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e); 23LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e);
24 24
25/* Trace management. */ 25/* Trace management. */
26LJ_FUNC GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T);
26LJ_FUNC void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T); 27LJ_FUNC void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T);
27LJ_FUNC void lj_trace_reenableproto(GCproto *pt); 28LJ_FUNC void lj_trace_reenableproto(GCproto *pt);
28LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt); 29LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt);
@@ -34,7 +35,11 @@ LJ_FUNC void lj_trace_freestate(global_State *g);
34/* Event handling. */ 35/* Event handling. */
35LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc); 36LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc);
36LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc); 37LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc);
38LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc);
37LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr); 39LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr);
40#if LJ_UNWIND_EXT
41LJ_FUNC uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo *ep);
42#endif
38 43
39/* Signal asynchronous abort of trace or end of trace. */ 44/* Signal asynchronous abort of trace or end of trace. */
40#define lj_trace_abort(g) (G2J(g)->state &= ~LJ_TRACE_ACTIVE) 45#define lj_trace_abort(g) (G2J(g)->state &= ~LJ_TRACE_ACTIVE)
diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h
index 8b28e65c..08134dc5 100644
--- a/src/lj_traceerr.h
+++ b/src/lj_traceerr.h
@@ -7,11 +7,13 @@
7 7
8/* Recording. */ 8/* Recording. */
9TREDEF(RECERR, "error thrown or hook called during recording") 9TREDEF(RECERR, "error thrown or hook called during recording")
10TREDEF(TRACEUV, "trace too short")
10TREDEF(TRACEOV, "trace too long") 11TREDEF(TRACEOV, "trace too long")
11TREDEF(STACKOV, "trace too deep") 12TREDEF(STACKOV, "trace too deep")
12TREDEF(SNAPOV, "too many snapshots") 13TREDEF(SNAPOV, "too many snapshots")
13TREDEF(BLACKL, "blacklisted") 14TREDEF(BLACKL, "blacklisted")
14TREDEF(NYIBC, "NYI: bytecode %d") 15TREDEF(RETRY, "retry recording")
16TREDEF(NYIBC, "NYI: bytecode %s")
15 17
16/* Recording loop ops. */ 18/* Recording loop ops. */
17TREDEF(LLEAVE, "leaving loop in root trace") 19TREDEF(LLEAVE, "leaving loop in root trace")
@@ -23,8 +25,6 @@ TREDEF(BADTYPE, "bad argument type")
23TREDEF(CJITOFF, "JIT compilation disabled for function") 25TREDEF(CJITOFF, "JIT compilation disabled for function")
24TREDEF(CUNROLL, "call unroll limit reached") 26TREDEF(CUNROLL, "call unroll limit reached")
25TREDEF(DOWNREC, "down-recursion, restarting") 27TREDEF(DOWNREC, "down-recursion, restarting")
26TREDEF(NYICF, "NYI: C function %s")
27TREDEF(NYIFF, "NYI: FastFunc %s")
28TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s") 28TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s")
29TREDEF(NYIRETL, "NYI: return to lower frame") 29TREDEF(NYIRETL, "NYI: return to lower frame")
30 30
diff --git a/src/lj_udata.c b/src/lj_udata.c
index b17c2529..7acd9e32 100644
--- a/src/lj_udata.c
+++ b/src/lj_udata.c
@@ -8,6 +8,7 @@
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h"
11#include "lj_udata.h" 12#include "lj_udata.h"
12 13
13GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env) 14GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env)
@@ -32,3 +33,30 @@ void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud)
32 lj_mem_free(g, ud, sizeudata(ud)); 33 lj_mem_free(g, ud, sizeudata(ud));
33} 34}
34 35
36#if LJ_64
37void *lj_lightud_intern(lua_State *L, void *p)
38{
39 global_State *g = G(L);
40 uint64_t u = (uint64_t)p;
41 uint32_t up = lightudup(u);
42 uint32_t *segmap = mref(g->gc.lightudseg, uint32_t);
43 MSize segnum = g->gc.lightudnum;
44 if (segmap) {
45 MSize seg;
46 for (seg = 0; seg <= segnum; seg++)
47 if (segmap[seg] == up) /* Fast path. */
48 return (void *)(((uint64_t)seg << LJ_LIGHTUD_BITS_LO) | lightudlo(u));
49 segnum++;
50 /* Leave last segment unused to avoid clash with ITERN key. */
51 if (segnum >= (1 << LJ_LIGHTUD_BITS_SEG)-1) lj_err_msg(L, LJ_ERR_BADLU);
52 }
53 if (!((segnum-1) & segnum) && segnum != 1) {
54 lj_mem_reallocvec(L, segmap, segnum, segnum ? 2*segnum : 2u, uint32_t);
55 setmref(g->gc.lightudseg, segmap);
56 }
57 g->gc.lightudnum = segnum;
58 segmap[segnum] = up;
59 return (void *)(((uint64_t)segnum << LJ_LIGHTUD_BITS_LO) | lightudlo(u));
60}
61#endif
62
diff --git a/src/lj_udata.h b/src/lj_udata.h
index f1c2ca7f..143bf81d 100644
--- a/src/lj_udata.h
+++ b/src/lj_udata.h
@@ -10,5 +10,8 @@
10 10
11LJ_FUNC GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env); 11LJ_FUNC GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env);
12LJ_FUNC void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud); 12LJ_FUNC void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud);
13#if LJ_64
14LJ_FUNC void * LJ_FASTCALL lj_lightud_intern(lua_State *L, void *p);
15#endif
13 16
14#endif 17#endif
diff --git a/src/lj_vm.h b/src/lj_vm.h
index 133a78cc..63d09439 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -17,11 +17,18 @@ LJ_ASMF int lj_vm_cpcall(lua_State *L, lua_CFunction func, void *ud,
17LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef); 17LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef);
18LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_c(void *cframe, int errcode); 18LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_c(void *cframe, int errcode);
19LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_ff(void *cframe); 19LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_ff(void *cframe);
20#if LJ_ABI_WIN && LJ_TARGET_X86
21LJ_ASMF_NORET void LJ_FASTCALL lj_vm_rtlunwind(void *cframe, void *excptrec,
22 void *unwinder, int errcode);
23#endif
20LJ_ASMF void lj_vm_unwind_c_eh(void); 24LJ_ASMF void lj_vm_unwind_c_eh(void);
21LJ_ASMF void lj_vm_unwind_ff_eh(void); 25LJ_ASMF void lj_vm_unwind_ff_eh(void);
22#if LJ_TARGET_X86ORX64 26#if LJ_TARGET_X86ORX64
23LJ_ASMF void lj_vm_unwind_rethrow(void); 27LJ_ASMF void lj_vm_unwind_rethrow(void);
24#endif 28#endif
29#if LJ_TARGET_MIPS
30LJ_ASMF void lj_vm_unwind_stub(void);
31#endif
25 32
26/* Miscellaneous functions. */ 33/* Miscellaneous functions. */
27#if LJ_TARGET_X86ORX64 34#if LJ_TARGET_X86ORX64
@@ -43,13 +50,15 @@ LJ_ASMF void lj_vm_record(void);
43LJ_ASMF void lj_vm_inshook(void); 50LJ_ASMF void lj_vm_inshook(void);
44LJ_ASMF void lj_vm_rethook(void); 51LJ_ASMF void lj_vm_rethook(void);
45LJ_ASMF void lj_vm_callhook(void); 52LJ_ASMF void lj_vm_callhook(void);
53LJ_ASMF void lj_vm_profhook(void);
54LJ_ASMF void lj_vm_IITERN(void);
46 55
47/* Trace exit handling. */ 56/* Trace exit handling. */
48LJ_ASMF void lj_vm_exit_handler(void); 57LJ_ASMF char lj_vm_exit_handler[];
49LJ_ASMF void lj_vm_exit_interp(void); 58LJ_ASMF char lj_vm_exit_interp[];
50 59
51/* Internal math helper functions. */ 60/* Internal math helper functions. */
52#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC 61#if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP)
53#define lj_vm_floor floor 62#define lj_vm_floor floor
54#define lj_vm_ceil ceil 63#define lj_vm_ceil ceil
55#else 64#else
@@ -60,23 +69,22 @@ LJ_ASMF double lj_vm_floor_sf(double);
60LJ_ASMF double lj_vm_ceil_sf(double); 69LJ_ASMF double lj_vm_ceil_sf(double);
61#endif 70#endif
62#endif 71#endif
63#if defined(LUAJIT_NO_LOG2) || LJ_TARGET_X86ORX64 72#ifdef LUAJIT_NO_LOG2
64LJ_ASMF double lj_vm_log2(double); 73LJ_ASMF double lj_vm_log2(double);
65#else 74#else
66#define lj_vm_log2 log2 75#define lj_vm_log2 log2
67#endif 76#endif
77#if !(defined(_LJ_DISPATCH_H) && LJ_TARGET_MIPS)
78LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
79#endif
68 80
69#if LJ_HASJIT 81#if LJ_HASJIT
70#if LJ_TARGET_X86ORX64 82#if LJ_TARGET_X86ORX64
71LJ_ASMF void lj_vm_floor_sse(void); 83LJ_ASMF void lj_vm_floor_sse(void);
72LJ_ASMF void lj_vm_ceil_sse(void); 84LJ_ASMF void lj_vm_ceil_sse(void);
73LJ_ASMF void lj_vm_trunc_sse(void); 85LJ_ASMF void lj_vm_trunc_sse(void);
74LJ_ASMF void lj_vm_exp_x87(void); 86#endif
75LJ_ASMF void lj_vm_exp2_x87(void); 87#if LJ_TARGET_PPC || LJ_TARGET_ARM64
76LJ_ASMF void lj_vm_pow_sse(void);
77LJ_ASMF void lj_vm_powi_sse(void);
78#else
79#if LJ_TARGET_PPC
80#define lj_vm_trunc trunc 88#define lj_vm_trunc trunc
81#else 89#else
82LJ_ASMF double lj_vm_trunc(double); 90LJ_ASMF double lj_vm_trunc(double);
@@ -84,17 +92,10 @@ LJ_ASMF double lj_vm_trunc(double);
84LJ_ASMF double lj_vm_trunc_sf(double); 92LJ_ASMF double lj_vm_trunc_sf(double);
85#endif 93#endif
86#endif 94#endif
87LJ_ASMF double lj_vm_powi(double, int32_t);
88#ifdef LUAJIT_NO_EXP2
89LJ_ASMF double lj_vm_exp2(double);
90#else
91#define lj_vm_exp2 exp2
92#endif
93#endif
94LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
95#if LJ_HASFFI 95#if LJ_HASFFI
96LJ_ASMF int lj_vm_errno(void); 96LJ_ASMF int lj_vm_errno(void);
97#endif 97#endif
98LJ_ASMF TValue *lj_vm_next(GCtab *t, uint32_t idx);
98#endif 99#endif
99 100
100/* Continuations for metamethods. */ 101/* Continuations for metamethods. */
@@ -104,13 +105,12 @@ LJ_ASMF void lj_cont_nop(void); /* Do nothing, just continue execution. */
104LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */ 105LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */
105LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */ 106LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */
106LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */ 107LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */
107 108LJ_ASMF void lj_cont_stitch(void); /* Trace stitching. */
108enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
109 109
110/* Start of the ASM code. */ 110/* Start of the ASM code. */
111LJ_ASMF char lj_vm_asm_begin[]; 111LJ_ASMF char lj_vm_asm_begin[];
112 112
113/* Bytecode offsets are relative to lj_vm_asm_begin. */ 113/* Bytecode offsets are relative to lj_vm_asm_begin. */
114#define makeasmfunc(ofs) ((ASMFunction)(lj_vm_asm_begin + (ofs))) 114#define makeasmfunc(ofs) lj_ptr_sign((ASMFunction)(lj_vm_asm_begin + (ofs)), 0)
115 115
116#endif 116#endif
diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c
index a25547ee..477ee79b 100644
--- a/src/lj_vmevent.c
+++ b/src/lj_vmevent.c
@@ -27,6 +27,7 @@ ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev)
27 if (tv && tvisfunc(tv)) { 27 if (tv && tvisfunc(tv)) {
28 lj_state_checkstack(L, LUA_MINSTACK); 28 lj_state_checkstack(L, LUA_MINSTACK);
29 setfuncV(L, L->top++, funcV(tv)); 29 setfuncV(L, L->top++, funcV(tv));
30 if (LJ_FR2) setnilV(L->top++);
30 return savestack(L, L->top); 31 return savestack(L, L->top);
31 } 32 }
32 } 33 }
diff --git a/src/lj_vmevent.h b/src/lj_vmevent.h
index 13fb9b92..d757a8b5 100644
--- a/src/lj_vmevent.h
+++ b/src/lj_vmevent.h
@@ -24,9 +24,10 @@
24/* VM event IDs. */ 24/* VM event IDs. */
25typedef enum { 25typedef enum {
26 VMEVENT_DEF(BC, 0x00003883), 26 VMEVENT_DEF(BC, 0x00003883),
27 VMEVENT_DEF(TRACE, 0xb2d91467), 27 VMEVENT_DEF(TRACE, 0x12d91467),
28 VMEVENT_DEF(RECORD, 0x9284bf4f), 28 VMEVENT_DEF(RECORD, 0x1284bf4f),
29 VMEVENT_DEF(TEXIT, 0xb29df2b0), 29 VMEVENT_DEF(TEXIT, 0x129df2b0),
30 VMEVENT_DEF(ERRFIN, 0x12d93888),
30 LJ_VMEVENT__MAX 31 LJ_VMEVENT__MAX
31} VMEvent; 32} VMEvent;
32 33
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
index 2da5f6b7..3351e72b 100644
--- a/src/lj_vmmath.c
+++ b/src/lj_vmmath.c
@@ -13,16 +13,40 @@
13#include "lj_ir.h" 13#include "lj_ir.h"
14#include "lj_vm.h" 14#include "lj_vm.h"
15 15
16/* -- Helper functions for generated machine code ------------------------- */ 16/* -- Wrapper functions --------------------------------------------------- */
17 17
18#if LJ_TARGET_X86ORX64 18#if LJ_TARGET_X86 && __ELF__ && __PIC__
19/* Wrapper functions to avoid linker issues on OSX. */ 19/* Wrapper functions to deal with the ELF/x86 PIC disaster. */
20LJ_FUNCA double lj_vm_sinh(double x) { return sinh(x); } 20LJ_FUNCA double lj_wrap_log(double x) { return log(x); }
21LJ_FUNCA double lj_vm_cosh(double x) { return cosh(x); } 21LJ_FUNCA double lj_wrap_log10(double x) { return log10(x); }
22LJ_FUNCA double lj_vm_tanh(double x) { return tanh(x); } 22LJ_FUNCA double lj_wrap_exp(double x) { return exp(x); }
23LJ_FUNCA double lj_wrap_sin(double x) { return sin(x); }
24LJ_FUNCA double lj_wrap_cos(double x) { return cos(x); }
25LJ_FUNCA double lj_wrap_tan(double x) { return tan(x); }
26LJ_FUNCA double lj_wrap_asin(double x) { return asin(x); }
27LJ_FUNCA double lj_wrap_acos(double x) { return acos(x); }
28LJ_FUNCA double lj_wrap_atan(double x) { return atan(x); }
29LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); }
30LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); }
31LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); }
32LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); }
33LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); }
34LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); }
23#endif 35#endif
24 36
25#if !LJ_TARGET_X86ORX64 37/* -- Helper functions ---------------------------------------------------- */
38
39/* Required to prevent the C compiler from applying FMA optimizations.
40**
41** Yes, there's -ffp-contract and the FP_CONTRACT pragma ... in theory.
42** But the current state of C compilers is a mess in this regard.
43** Also, this function is not performance sensitive at all.
44*/
45LJ_NOINLINE static double lj_vm_floormul(double x, double y)
46{
47 return lj_vm_floor(x / y) * y;
48}
49
26double lj_vm_foldarith(double x, double y, int op) 50double lj_vm_foldarith(double x, double y, int op)
27{ 51{
28 switch (op) { 52 switch (op) {
@@ -30,42 +54,27 @@ double lj_vm_foldarith(double x, double y, int op)
30 case IR_SUB - IR_ADD: return x-y; break; 54 case IR_SUB - IR_ADD: return x-y; break;
31 case IR_MUL - IR_ADD: return x*y; break; 55 case IR_MUL - IR_ADD: return x*y; break;
32 case IR_DIV - IR_ADD: return x/y; break; 56 case IR_DIV - IR_ADD: return x/y; break;
33 case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break; 57 case IR_MOD - IR_ADD: return x-lj_vm_floormul(x, y); break;
34 case IR_POW - IR_ADD: return pow(x, y); break; 58 case IR_POW - IR_ADD: return pow(x, y); break;
35 case IR_NEG - IR_ADD: return -x; break; 59 case IR_NEG - IR_ADD: return -x; break;
36 case IR_ABS - IR_ADD: return fabs(x); break; 60 case IR_ABS - IR_ADD: return fabs(x); break;
37#if LJ_HASJIT 61#if LJ_HASJIT
38 case IR_ATAN2 - IR_ADD: return atan2(x, y); break;
39 case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break; 62 case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break;
40 case IR_MIN - IR_ADD: return x > y ? y : x; break; 63 case IR_MIN - IR_ADD: return x < y ? x : y; break;
41 case IR_MAX - IR_ADD: return x < y ? y : x; break; 64 case IR_MAX - IR_ADD: return x > y ? x : y; break;
42#endif 65#endif
43 default: return x; 66 default: return x;
44 } 67 }
45} 68}
46#endif
47 69
48#if LJ_HASJIT 70/* -- Helper functions for generated machine code ------------------------- */
49
50#ifdef LUAJIT_NO_LOG2
51double lj_vm_log2(double a)
52{
53 return log(a) * 1.4426950408889634074;
54}
55#endif
56
57#ifdef LUAJIT_NO_EXP2
58double lj_vm_exp2(double a)
59{
60 return exp(a * 0.6931471805599453);
61}
62#endif
63 71
64#if !(LJ_TARGET_ARM || LJ_TARGET_PPC) 72#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS
65int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) 73int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
66{ 74{
67 uint32_t y, ua, ub; 75 uint32_t y, ua, ub;
68 lua_assert(b != 0); /* This must be checked before using this function. */ 76 /* This must be checked before using this function. */
77 lj_assertX(b != 0, "modulo with zero divisor");
69 ua = a < 0 ? ~(uint32_t)a+1u : (uint32_t)a; 78 ua = a < 0 ? ~(uint32_t)a+1u : (uint32_t)a;
70 ub = b < 0 ? ~(uint32_t)b+1u : (uint32_t)b; 79 ub = b < 0 ? ~(uint32_t)b+1u : (uint32_t)b;
71 y = ua % ub; 80 y = ua % ub;
@@ -75,38 +84,14 @@ int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
75} 84}
76#endif 85#endif
77 86
78#if !LJ_TARGET_X86ORX64 87#if LJ_HASJIT
79/* Unsigned x^k. */
80static double lj_vm_powui(double x, uint32_t k)
81{
82 double y;
83 lua_assert(k != 0);
84 for (; (k & 1) == 0; k >>= 1) x *= x;
85 y = x;
86 if ((k >>= 1) != 0) {
87 for (;;) {
88 x *= x;
89 if (k == 1) break;
90 if (k & 1) y *= x;
91 k >>= 1;
92 }
93 y *= x;
94 }
95 return y;
96}
97 88
98/* Signed x^k. */ 89#ifdef LUAJIT_NO_LOG2
99double lj_vm_powi(double x, int32_t k) 90double lj_vm_log2(double a)
100{ 91{
101 if (k > 1) 92 return log(a) * 1.4426950408889634074;
102 return lj_vm_powui(x, (uint32_t)k);
103 else if (k == 1)
104 return x;
105 else if (k == 0)
106 return 1.0;
107 else
108 return 1.0 / lj_vm_powui(x, ~(uint32_t)k+1u);
109} 93}
94#endif
110 95
111/* Computes fpm(x) for extended math functions. */ 96/* Computes fpm(x) for extended math functions. */
112double lj_vm_foldfpm(double x, int fpm) 97double lj_vm_foldfpm(double x, int fpm)
@@ -116,19 +101,12 @@ double lj_vm_foldfpm(double x, int fpm)
116 case IRFPM_CEIL: return lj_vm_ceil(x); 101 case IRFPM_CEIL: return lj_vm_ceil(x);
117 case IRFPM_TRUNC: return lj_vm_trunc(x); 102 case IRFPM_TRUNC: return lj_vm_trunc(x);
118 case IRFPM_SQRT: return sqrt(x); 103 case IRFPM_SQRT: return sqrt(x);
119 case IRFPM_EXP: return exp(x);
120 case IRFPM_EXP2: return lj_vm_exp2(x);
121 case IRFPM_LOG: return log(x); 104 case IRFPM_LOG: return log(x);
122 case IRFPM_LOG2: return lj_vm_log2(x); 105 case IRFPM_LOG2: return lj_vm_log2(x);
123 case IRFPM_LOG10: return log10(x); 106 default: lj_assertX(0, "bad fpm %d", fpm);
124 case IRFPM_SIN: return sin(x);
125 case IRFPM_COS: return cos(x);
126 case IRFPM_TAN: return tan(x);
127 default: lua_assert(0);
128 } 107 }
129 return 0; 108 return 0;
130} 109}
131#endif
132 110
133#if LJ_HASFFI 111#if LJ_HASFFI
134int lj_vm_errno(void) 112int lj_vm_errno(void)
diff --git a/src/ljamalg.c b/src/ljamalg.c
index f9315d5c..f1dce6a3 100644
--- a/src/ljamalg.c
+++ b/src/ljamalg.c
@@ -3,16 +3,6 @@
3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h 3** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4*/ 4*/
5 5
6/*
7+--------------------------------------------------------------------------+
8| WARNING: Compiling the amalgamation needs a lot of virtual memory |
9| (around 300 MB with GCC 4.x)! If you don't have enough physical memory |
10| your machine will start swapping to disk and the compile will not finish |
11| within a reasonable amount of time. |
12| So either compile on a bigger machine or use the non-amalgamated build. |
13+--------------------------------------------------------------------------+
14*/
15
16#define ljamalg_c 6#define ljamalg_c
17#define LUA_CORE 7#define LUA_CORE
18 8
@@ -28,23 +18,30 @@
28#include "lua.h" 18#include "lua.h"
29#include "lauxlib.h" 19#include "lauxlib.h"
30 20
21#include "lj_assert.c"
31#include "lj_gc.c" 22#include "lj_gc.c"
32#include "lj_err.c" 23#include "lj_err.c"
33#include "lj_char.c" 24#include "lj_char.c"
34#include "lj_bc.c" 25#include "lj_bc.c"
35#include "lj_obj.c" 26#include "lj_obj.c"
27#include "lj_buf.c"
36#include "lj_str.c" 28#include "lj_str.c"
37#include "lj_tab.c" 29#include "lj_tab.c"
38#include "lj_func.c" 30#include "lj_func.c"
39#include "lj_udata.c" 31#include "lj_udata.c"
40#include "lj_meta.c" 32#include "lj_meta.c"
41#include "lj_debug.c" 33#include "lj_debug.c"
34#include "lj_prng.c"
42#include "lj_state.c" 35#include "lj_state.c"
43#include "lj_dispatch.c" 36#include "lj_dispatch.c"
44#include "lj_vmevent.c" 37#include "lj_vmevent.c"
45#include "lj_vmmath.c" 38#include "lj_vmmath.c"
46#include "lj_strscan.c" 39#include "lj_strscan.c"
40#include "lj_strfmt.c"
41#include "lj_strfmt_num.c"
42#include "lj_serialize.c"
47#include "lj_api.c" 43#include "lj_api.c"
44#include "lj_profile.c"
48#include "lj_lex.c" 45#include "lj_lex.c"
49#include "lj_parse.c" 46#include "lj_parse.c"
50#include "lj_bcread.c" 47#include "lj_bcread.c"
@@ -89,5 +86,6 @@
89#include "lib_bit.c" 86#include "lib_bit.c"
90#include "lib_jit.c" 87#include "lib_jit.c"
91#include "lib_ffi.c" 88#include "lib_ffi.c"
89#include "lib_buffer.c"
92#include "lib_init.c" 90#include "lib_init.c"
93 91
diff --git a/src/lua.h b/src/lua.h
index 2bd683c2..6d1634d1 100644
--- a/src/lua.h
+++ b/src/lua.h
@@ -39,7 +39,8 @@
39#define lua_upvalueindex(i) (LUA_GLOBALSINDEX-(i)) 39#define lua_upvalueindex(i) (LUA_GLOBALSINDEX-(i))
40 40
41 41
42/* thread status; 0 is OK */ 42/* thread status */
43#define LUA_OK 0
43#define LUA_YIELD 1 44#define LUA_YIELD 1
44#define LUA_ERRRUN 2 45#define LUA_ERRRUN 2
45#define LUA_ERRSYNTAX 3 46#define LUA_ERRSYNTAX 3
@@ -226,6 +227,7 @@ LUA_API int (lua_status) (lua_State *L);
226#define LUA_GCSTEP 5 227#define LUA_GCSTEP 5
227#define LUA_GCSETPAUSE 6 228#define LUA_GCSETPAUSE 6
228#define LUA_GCSETSTEPMUL 7 229#define LUA_GCSETSTEPMUL 7
230#define LUA_GCISRUNNING 9
229 231
230LUA_API int (lua_gc) (lua_State *L, int what, int data); 232LUA_API int (lua_gc) (lua_State *L, int what, int data);
231 233
@@ -346,6 +348,13 @@ LUA_API void *lua_upvalueid (lua_State *L, int idx, int n);
346LUA_API void lua_upvaluejoin (lua_State *L, int idx1, int n1, int idx2, int n2); 348LUA_API void lua_upvaluejoin (lua_State *L, int idx1, int n1, int idx2, int n2);
347LUA_API int lua_loadx (lua_State *L, lua_Reader reader, void *dt, 349LUA_API int lua_loadx (lua_State *L, lua_Reader reader, void *dt,
348 const char *chunkname, const char *mode); 350 const char *chunkname, const char *mode);
351LUA_API const lua_Number *lua_version (lua_State *L);
352LUA_API void lua_copy (lua_State *L, int fromidx, int toidx);
353LUA_API lua_Number lua_tonumberx (lua_State *L, int idx, int *isnum);
354LUA_API lua_Integer lua_tointegerx (lua_State *L, int idx, int *isnum);
355
356/* From Lua 5.3. */
357LUA_API int lua_isyieldable (lua_State *L);
349 358
350 359
351struct lua_Debug { 360struct lua_Debug {
diff --git a/src/luaconf.h b/src/luaconf.h
index 8fc3eee5..1cf3a03c 100644
--- a/src/luaconf.h
+++ b/src/luaconf.h
@@ -37,7 +37,6 @@
37#endif 37#endif
38#define LUA_LROOT "/usr/local" 38#define LUA_LROOT "/usr/local"
39#define LUA_LUADIR "/lua/5.1/" 39#define LUA_LUADIR "/lua/5.1/"
40#define LUA_LJDIR "/luajit-2.0/"
41 40
42#ifdef LUA_ROOT 41#ifdef LUA_ROOT
43#define LUA_JROOT LUA_ROOT 42#define LUA_JROOT LUA_ROOT
@@ -51,7 +50,11 @@
51#define LUA_RCPATH 50#define LUA_RCPATH
52#endif 51#endif
53 52
54#define LUA_JPATH ";" LUA_JROOT "/share" LUA_LJDIR "?.lua" 53#ifndef LUA_LJDIR
54#define LUA_LJDIR LUA_JROOT "/share/luajit-2.1"
55#endif
56
57#define LUA_JPATH ";" LUA_LJDIR "/?.lua"
55#define LUA_LLDIR LUA_LROOT "/share" LUA_LUADIR 58#define LUA_LLDIR LUA_LROOT "/share" LUA_LUADIR
56#define LUA_LCDIR LUA_LROOT "/" LUA_LMULTILIB LUA_LUADIR 59#define LUA_LCDIR LUA_LROOT "/" LUA_LMULTILIB LUA_LUADIR
57#define LUA_LLPATH ";" LUA_LLDIR "?.lua;" LUA_LLDIR "?/init.lua" 60#define LUA_LLPATH ";" LUA_LLDIR "?.lua;" LUA_LLDIR "?/init.lua"
@@ -79,7 +82,7 @@
79#define LUA_IGMARK "-" 82#define LUA_IGMARK "-"
80#define LUA_PATH_CONFIG \ 83#define LUA_PATH_CONFIG \
81 LUA_DIRSEP "\n" LUA_PATHSEP "\n" LUA_PATH_MARK "\n" \ 84 LUA_DIRSEP "\n" LUA_PATHSEP "\n" LUA_PATH_MARK "\n" \
82 LUA_EXECDIR "\n" LUA_IGMARK 85 LUA_EXECDIR "\n" LUA_IGMARK "\n"
83 86
84/* Quoting in error messages. */ 87/* Quoting in error messages. */
85#define LUA_QL(x) "'" x "'" 88#define LUA_QL(x) "'" x "'"
@@ -92,10 +95,6 @@
92#define LUAI_GCMUL 200 /* Run GC at 200% of allocation speed. */ 95#define LUAI_GCMUL 200 /* Run GC at 200% of allocation speed. */
93#define LUA_MAXCAPTURES 32 /* Max. pattern captures. */ 96#define LUA_MAXCAPTURES 32 /* Max. pattern captures. */
94 97
95/* Compatibility with older library function names. */
96#define LUA_COMPAT_MOD /* OLD: math.mod, NEW: math.fmod */
97#define LUA_COMPAT_GFIND /* OLD: string.gfind, NEW: string.gmatch */
98
99/* Configuration for the frontend (the luajit executable). */ 98/* Configuration for the frontend (the luajit executable). */
100#if defined(luajit_c) 99#if defined(luajit_c)
101#define LUA_PROGNAME "luajit" /* Fallback frontend name. */ 100#define LUA_PROGNAME "luajit" /* Fallback frontend name. */
@@ -140,7 +139,7 @@
140 139
141#define LUALIB_API LUA_API 140#define LUALIB_API LUA_API
142 141
143/* Support for internal assertions. */ 142/* Compatibility support for assertions. */
144#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) 143#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK)
145#include <assert.h> 144#include <assert.h>
146#endif 145#endif
diff --git a/src/luajit.c b/src/luajit.c
index 85579cda..73e29d44 100644
--- a/src/luajit.c
+++ b/src/luajit.c
@@ -62,8 +62,9 @@ static void laction(int i)
62 62
63static void print_usage(void) 63static void print_usage(void)
64{ 64{
65 fprintf(stderr, 65 fputs("usage: ", stderr);
66 "usage: %s [options]... [script [args]...].\n" 66 fputs(progname, stderr);
67 fputs(" [options]... [script [args]...].\n"
67 "Available options are:\n" 68 "Available options are:\n"
68 " -e chunk Execute string " LUA_QL("chunk") ".\n" 69 " -e chunk Execute string " LUA_QL("chunk") ".\n"
69 " -l name Require library " LUA_QL("name") ".\n" 70 " -l name Require library " LUA_QL("name") ".\n"
@@ -74,16 +75,14 @@ static void print_usage(void)
74 " -v Show version information.\n" 75 " -v Show version information.\n"
75 " -E Ignore environment variables.\n" 76 " -E Ignore environment variables.\n"
76 " -- Stop handling options.\n" 77 " -- Stop handling options.\n"
77 " - Execute stdin and stop handling options.\n" 78 " - Execute stdin and stop handling options.\n", stderr);
78 ,
79 progname);
80 fflush(stderr); 79 fflush(stderr);
81} 80}
82 81
83static void l_message(const char *msg) 82static void l_message(const char *msg)
84{ 83{
85 if (progname) fprintf(stderr, "%s: ", progname); 84 if (progname) { fputs(progname, stderr); fputc(':', stderr); fputc(' ', stderr); }
86 fprintf(stderr, "%s\n", msg); 85 fputs(msg, stderr); fputc('\n', stderr);
87 fflush(stderr); 86 fflush(stderr);
88} 87}
89 88
@@ -126,7 +125,7 @@ static int docall(lua_State *L, int narg, int clear)
126#endif 125#endif
127 lua_remove(L, base); /* remove traceback function */ 126 lua_remove(L, base); /* remove traceback function */
128 /* force a complete garbage collection in case of errors */ 127 /* force a complete garbage collection in case of errors */
129 if (status != 0) lua_gc(L, LUA_GCCOLLECT, 0); 128 if (status != LUA_OK) lua_gc(L, LUA_GCCOLLECT, 0);
130 return status; 129 return status;
131} 130}
132 131
@@ -155,22 +154,15 @@ static void print_jit_status(lua_State *L)
155 lua_settop(L, 0); /* clear stack */ 154 lua_settop(L, 0); /* clear stack */
156} 155}
157 156
158static int getargs(lua_State *L, char **argv, int n) 157static void createargtable(lua_State *L, char **argv, int argc, int argf)
159{ 158{
160 int narg;
161 int i; 159 int i;
162 int argc = 0; 160 lua_createtable(L, argc - argf, argf);
163 while (argv[argc]) argc++; /* count total number of arguments */
164 narg = argc - (n + 1); /* number of arguments to the script */
165 luaL_checkstack(L, narg + 3, "too many arguments to script");
166 for (i = n+1; i < argc; i++)
167 lua_pushstring(L, argv[i]);
168 lua_createtable(L, narg, n + 1);
169 for (i = 0; i < argc; i++) { 161 for (i = 0; i < argc; i++) {
170 lua_pushstring(L, argv[i]); 162 lua_pushstring(L, argv[i]);
171 lua_rawseti(L, -2, i - n); 163 lua_rawseti(L, -2, i - argf);
172 } 164 }
173 return narg; 165 lua_setglobal(L, "arg");
174} 166}
175 167
176static int dofile(lua_State *L, const char *name) 168static int dofile(lua_State *L, const char *name)
@@ -259,9 +251,9 @@ static void dotty(lua_State *L)
259 const char *oldprogname = progname; 251 const char *oldprogname = progname;
260 progname = NULL; 252 progname = NULL;
261 while ((status = loadline(L)) != -1) { 253 while ((status = loadline(L)) != -1) {
262 if (status == 0) status = docall(L, 0, 0); 254 if (status == LUA_OK) status = docall(L, 0, 0);
263 report(L, status); 255 report(L, status);
264 if (status == 0 && lua_gettop(L) > 0) { /* any result to print? */ 256 if (status == LUA_OK && lua_gettop(L) > 0) { /* any result to print? */
265 lua_getglobal(L, "print"); 257 lua_getglobal(L, "print");
266 lua_insert(L, 1); 258 lua_insert(L, 1);
267 if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0) 259 if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0)
@@ -275,21 +267,30 @@ static void dotty(lua_State *L)
275 progname = oldprogname; 267 progname = oldprogname;
276} 268}
277 269
278static int handle_script(lua_State *L, char **argv, int n) 270static int handle_script(lua_State *L, char **argx)
279{ 271{
280 int status; 272 int status;
281 const char *fname; 273 const char *fname = argx[0];
282 int narg = getargs(L, argv, n); /* collect arguments */ 274 if (strcmp(fname, "-") == 0 && strcmp(argx[-1], "--") != 0)
283 lua_setglobal(L, "arg");
284 fname = argv[n];
285 if (strcmp(fname, "-") == 0 && strcmp(argv[n-1], "--") != 0)
286 fname = NULL; /* stdin */ 275 fname = NULL; /* stdin */
287 status = luaL_loadfile(L, fname); 276 status = luaL_loadfile(L, fname);
288 lua_insert(L, -(narg+1)); 277 if (status == LUA_OK) {
289 if (status == 0) 278 /* Fetch args from arg table. LUA_INIT or -e might have changed them. */
279 int narg = 0;
280 lua_getglobal(L, "arg");
281 if (lua_istable(L, -1)) {
282 do {
283 narg++;
284 lua_rawgeti(L, -narg, narg);
285 } while (!lua_isnil(L, -1));
286 lua_pop(L, 1);
287 lua_remove(L, -narg);
288 narg--;
289 } else {
290 lua_pop(L, 1);
291 }
290 status = docall(L, narg, 0); 292 status = docall(L, narg, 0);
291 else 293 }
292 lua_pop(L, narg);
293 return report(L, status); 294 return report(L, status);
294} 295}
295 296
@@ -385,7 +386,8 @@ static int dobytecode(lua_State *L, char **argv)
385 } 386 }
386 for (argv++; *argv != NULL; narg++, argv++) 387 for (argv++; *argv != NULL; narg++, argv++)
387 lua_pushstring(L, *argv); 388 lua_pushstring(L, *argv);
388 return report(L, lua_pcall(L, narg, 0, 0)); 389 report(L, lua_pcall(L, narg, 0, 0));
390 return -1;
389} 391}
390 392
391/* check that argument has no extra characters at the end */ 393/* check that argument has no extra characters at the end */
@@ -406,7 +408,7 @@ static int collectargs(char **argv, int *flags)
406 switch (argv[i][1]) { /* Check option. */ 408 switch (argv[i][1]) { /* Check option. */
407 case '-': 409 case '-':
408 notail(argv[i]); 410 notail(argv[i]);
409 return (argv[i+1] != NULL ? i+1 : 0); 411 return i+1;
410 case '\0': 412 case '\0':
411 return i; 413 return i;
412 case 'i': 414 case 'i':
@@ -432,23 +434,23 @@ static int collectargs(char **argv, int *flags)
432 case 'b': /* LuaJIT extension */ 434 case 'b': /* LuaJIT extension */
433 if (*flags) return -1; 435 if (*flags) return -1;
434 *flags |= FLAGS_EXEC; 436 *flags |= FLAGS_EXEC;
435 return 0; 437 return i+1;
436 case 'E': 438 case 'E':
437 *flags |= FLAGS_NOENV; 439 *flags |= FLAGS_NOENV;
438 break; 440 break;
439 default: return -1; /* invalid option */ 441 default: return -1; /* invalid option */
440 } 442 }
441 } 443 }
442 return 0; 444 return i;
443} 445}
444 446
445static int runargs(lua_State *L, char **argv, int n) 447static int runargs(lua_State *L, char **argv, int argn)
446{ 448{
447 int i; 449 int i;
448 for (i = 1; i < n; i++) { 450 for (i = 1; i < argn; i++) {
449 if (argv[i] == NULL) continue; 451 if (argv[i] == NULL) continue;
450 lua_assert(argv[i][0] == '-'); 452 lua_assert(argv[i][0] == '-');
451 switch (argv[i][1]) { /* option */ 453 switch (argv[i][1]) {
452 case 'e': { 454 case 'e': {
453 const char *chunk = argv[i] + 2; 455 const char *chunk = argv[i] + 2;
454 if (*chunk == '\0') chunk = argv[++i]; 456 if (*chunk == '\0') chunk = argv[++i];
@@ -462,10 +464,10 @@ static int runargs(lua_State *L, char **argv, int n)
462 if (*filename == '\0') filename = argv[++i]; 464 if (*filename == '\0') filename = argv[++i];
463 lua_assert(filename != NULL); 465 lua_assert(filename != NULL);
464 if (dolibrary(L, filename)) 466 if (dolibrary(L, filename))
465 return 1; /* stop if file fails */ 467 return 1;
466 break; 468 break;
467 } 469 }
468 case 'j': { /* LuaJIT extension */ 470 case 'j': { /* LuaJIT extension. */
469 const char *cmd = argv[i] + 2; 471 const char *cmd = argv[i] + 2;
470 if (*cmd == '\0') cmd = argv[++i]; 472 if (*cmd == '\0') cmd = argv[++i];
471 lua_assert(cmd != NULL); 473 lua_assert(cmd != NULL);
@@ -473,16 +475,16 @@ static int runargs(lua_State *L, char **argv, int n)
473 return 1; 475 return 1;
474 break; 476 break;
475 } 477 }
476 case 'O': /* LuaJIT extension */ 478 case 'O': /* LuaJIT extension. */
477 if (dojitopt(L, argv[i] + 2)) 479 if (dojitopt(L, argv[i] + 2))
478 return 1; 480 return 1;
479 break; 481 break;
480 case 'b': /* LuaJIT extension */ 482 case 'b': /* LuaJIT extension. */
481 return dobytecode(L, argv+i); 483 return dobytecode(L, argv+i);
482 default: break; 484 default: break;
483 } 485 }
484 } 486 }
485 return 0; 487 return LUA_OK;
486} 488}
487 489
488static int handle_luainit(lua_State *L) 490static int handle_luainit(lua_State *L)
@@ -493,7 +495,7 @@ static int handle_luainit(lua_State *L)
493 const char *init = getenv(LUA_INIT); 495 const char *init = getenv(LUA_INIT);
494#endif 496#endif
495 if (init == NULL) 497 if (init == NULL)
496 return 0; /* status OK */ 498 return LUA_OK;
497 else if (init[0] == '@') 499 else if (init[0] == '@')
498 return dofile(L, init+1); 500 return dofile(L, init+1);
499 else 501 else
@@ -510,44 +512,55 @@ static int pmain(lua_State *L)
510{ 512{
511 struct Smain *s = &smain; 513 struct Smain *s = &smain;
512 char **argv = s->argv; 514 char **argv = s->argv;
513 int script; 515 int argn;
514 int flags = 0; 516 int flags = 0;
515 globalL = L; 517 globalL = L;
516 LUAJIT_VERSION_SYM(); /* linker-enforced version check */ 518 LUAJIT_VERSION_SYM(); /* Linker-enforced version check. */
517 script = collectargs(argv, &flags); 519
518 if (script < 0) { /* invalid args? */ 520 argn = collectargs(argv, &flags);
521 if (argn < 0) { /* Invalid args? */
519 print_usage(); 522 print_usage();
520 s->status = 1; 523 s->status = 1;
521 return 0; 524 return 0;
522 } 525 }
526
523 if ((flags & FLAGS_NOENV)) { 527 if ((flags & FLAGS_NOENV)) {
524 lua_pushboolean(L, 1); 528 lua_pushboolean(L, 1);
525 lua_setfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); 529 lua_setfield(L, LUA_REGISTRYINDEX, "LUA_NOENV");
526 } 530 }
527 lua_gc(L, LUA_GCSTOP, 0); /* stop collector during initialization */ 531
528 luaL_openlibs(L); /* open libraries */ 532 /* Stop collector during library initialization. */
533 lua_gc(L, LUA_GCSTOP, 0);
534 luaL_openlibs(L);
529 lua_gc(L, LUA_GCRESTART, -1); 535 lua_gc(L, LUA_GCRESTART, -1);
536
537 createargtable(L, argv, s->argc, argn);
538
530 if (!(flags & FLAGS_NOENV)) { 539 if (!(flags & FLAGS_NOENV)) {
531 s->status = handle_luainit(L); 540 s->status = handle_luainit(L);
532 if (s->status != 0) return 0; 541 if (s->status != LUA_OK) return 0;
533 } 542 }
543
534 if ((flags & FLAGS_VERSION)) print_version(); 544 if ((flags & FLAGS_VERSION)) print_version();
535 s->status = runargs(L, argv, (script > 0) ? script : s->argc); 545
536 if (s->status != 0) return 0; 546 s->status = runargs(L, argv, argn);
537 if (script) { 547 if (s->status != LUA_OK) return 0;
538 s->status = handle_script(L, argv, script); 548
539 if (s->status != 0) return 0; 549 if (s->argc > argn) {
550 s->status = handle_script(L, argv + argn);
551 if (s->status != LUA_OK) return 0;
540 } 552 }
553
541 if ((flags & FLAGS_INTERACTIVE)) { 554 if ((flags & FLAGS_INTERACTIVE)) {
542 print_jit_status(L); 555 print_jit_status(L);
543 dotty(L); 556 dotty(L);
544 } else if (script == 0 && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) { 557 } else if (s->argc == argn && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) {
545 if (lua_stdin_is_tty()) { 558 if (lua_stdin_is_tty()) {
546 print_version(); 559 print_version();
547 print_jit_status(L); 560 print_jit_status(L);
548 dotty(L); 561 dotty(L);
549 } else { 562 } else {
550 dofile(L, NULL); /* executes stdin as a file */ 563 dofile(L, NULL); /* Executes stdin as a file. */
551 } 564 }
552 } 565 }
553 return 0; 566 return 0;
@@ -558,7 +571,7 @@ int main(int argc, char **argv)
558 int status; 571 int status;
559 lua_State *L; 572 lua_State *L;
560 if (!argv[0]) argv = empty_argv; else if (argv[0][0]) progname = argv[0]; 573 if (!argv[0]) argv = empty_argv; else if (argv[0][0]) progname = argv[0];
561 L = lua_open(); /* create state */ 574 L = lua_open();
562 if (L == NULL) { 575 if (L == NULL) {
563 l_message("cannot create state: not enough memory"); 576 l_message("cannot create state: not enough memory");
564 return EXIT_FAILURE; 577 return EXIT_FAILURE;
@@ -568,6 +581,6 @@ int main(int argc, char **argv)
568 status = lua_cpcall(L, pmain, NULL); 581 status = lua_cpcall(L, pmain, NULL);
569 report(L, status); 582 report(L, status);
570 lua_close(L); 583 lua_close(L);
571 return (status || smain.status) ? EXIT_FAILURE : EXIT_SUCCESS; 584 return (status || smain.status > 0) ? EXIT_FAILURE : EXIT_SUCCESS;
572} 585}
573 586
diff --git a/src/luajit_rolling.h b/src/luajit_rolling.h
index e7ff2c23..2d04402c 100644
--- a/src/luajit_rolling.h
+++ b/src/luajit_rolling.h
@@ -30,9 +30,9 @@
30 30
31#include "lua.h" 31#include "lua.h"
32 32
33#define LUAJIT_VERSION "LuaJIT 2.0.ROLLING" 33#define LUAJIT_VERSION "LuaJIT 2.1.ROLLING"
34#define LUAJIT_VERSION_NUM 20099 /* Deprecated. */ 34#define LUAJIT_VERSION_NUM 20199 /* Deprecated. */
35#define LUAJIT_VERSION_SYM luaJIT_version_2_0_ROLLING 35#define LUAJIT_VERSION_SYM luaJIT_version_2_1_ROLLING
36#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2023 Mike Pall" 36#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2023 Mike Pall"
37#define LUAJIT_URL "https://luajit.org/" 37#define LUAJIT_URL "https://luajit.org/"
38 38
@@ -64,6 +64,15 @@ enum {
64/* Control the JIT engine. */ 64/* Control the JIT engine. */
65LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode); 65LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode);
66 66
67/* Low-overhead profiling API. */
68typedef void (*luaJIT_profile_callback)(void *data, lua_State *L,
69 int samples, int vmstate);
70LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
71 luaJIT_profile_callback cb, void *data);
72LUA_API void luaJIT_profile_stop(lua_State *L);
73LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
74 int depth, size_t *len);
75
67/* Enforce (dynamic) linker error for version mismatches. Call from main. */ 76/* Enforce (dynamic) linker error for version mismatches. Call from main. */
68LUA_API void LUAJIT_VERSION_SYM(void); 77LUA_API void LUAJIT_VERSION_SYM(void);
69 78
diff --git a/src/lualib.h b/src/lualib.h
index 316782fc..ea116eb1 100644
--- a/src/lualib.h
+++ b/src/lualib.h
@@ -33,6 +33,7 @@ LUALIB_API int luaopen_debug(lua_State *L);
33LUALIB_API int luaopen_bit(lua_State *L); 33LUALIB_API int luaopen_bit(lua_State *L);
34LUALIB_API int luaopen_jit(lua_State *L); 34LUALIB_API int luaopen_jit(lua_State *L);
35LUALIB_API int luaopen_ffi(lua_State *L); 35LUALIB_API int luaopen_ffi(lua_State *L);
36LUALIB_API int luaopen_string_buffer(lua_State *L);
36 37
37LUALIB_API void luaL_openlibs(lua_State *L); 38LUALIB_API void luaL_openlibs(lua_State *L);
38 39
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat
index 0ac65409..5b493ba5 100644
--- a/src/msvcbuild.bat
+++ b/src/msvcbuild.bat
@@ -5,6 +5,7 @@
5@rem Then cd to this directory and run this script. Use the following 5@rem Then cd to this directory and run this script. Use the following
6@rem options (in order), if needed. The default is a dynamic release build. 6@rem options (in order), if needed. The default is a dynamic release build.
7@rem 7@rem
8@rem nogc64 disable LJ_GC64 mode for x64
8@rem debug emit debug symbols 9@rem debug emit debug symbols
9@rem amalg amalgamated build 10@rem amalg amalgamated build
10@rem static static linkage 11@rem static static linkage
@@ -12,46 +13,71 @@
12@if not defined INCLUDE goto :FAIL 13@if not defined INCLUDE goto :FAIL
13 14
14@setlocal 15@setlocal
15@rem Add more debug flags here, e.g. DEBUGCFLAGS=/DLUA_USE_APICHECK 16@rem Add more debug flags here, e.g. DEBUGCFLAGS=/DLUA_USE_ASSERT
16@set DEBUGCFLAGS= 17@set DEBUGCFLAGS=
17@set LJCOMPILE=cl /nologo /c /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE /D_CRT_STDIO_INLINE=__declspec(dllexport)__inline 18@set LJCOMPILE=cl /nologo /c /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE /D_CRT_STDIO_INLINE=__declspec(dllexport)__inline
18@set LJDYNBUILD=/MD /DLUA_BUILD_AS_DLL 19@set LJDYNBUILD=/DLUA_BUILD_AS_DLL /MD
20@set LJDYNBUILD_DEBUG=/DLUA_BUILD_AS_DLL /MDd
21@set LJCOMPILETARGET=/Zi
22@set LJLINKTYPE=/DEBUG /RELEASE
23@set LJLINKTYPE_DEBUG=/DEBUG
24@set LJLINKTARGET=/OPT:REF /OPT:ICF /INCREMENTAL:NO
19@set LJLINK=link /nologo 25@set LJLINK=link /nologo
20@set LJMT=mt /nologo 26@set LJMT=mt /nologo
21@set LJLIB=lib /nologo /nodefaultlib 27@set LJLIB=lib /nologo /nodefaultlib
22@set DASMDIR=..\dynasm 28@set DASMDIR=..\dynasm
23@set DASM=%DASMDIR%\dynasm.lua 29@set DASM=%DASMDIR%\dynasm.lua
30@set DASC=vm_x64.dasc
24@set LJDLLNAME=lua51.dll 31@set LJDLLNAME=lua51.dll
25@set LJLIBNAME=lua51.lib 32@set LJLIBNAME=lua51.lib
26@set BUILDTYPE=release 33@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
27@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
28 34
35@setlocal
36@call :SETHOSTVARS
29%LJCOMPILE% host\minilua.c 37%LJCOMPILE% host\minilua.c
30@if errorlevel 1 goto :BAD 38@if errorlevel 1 goto :BAD
31%LJLINK% /out:minilua.exe minilua.obj 39%LJLINK% /out:minilua.exe minilua.obj
32@if errorlevel 1 goto :BAD 40@if errorlevel 1 goto :BAD
33if exist minilua.exe.manifest^ 41if exist minilua.exe.manifest^
34 %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe 42 %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
43@endlocal
35 44
36@set DASMFLAGS=-D WIN -D JIT -D FFI -D P64 45@set DASMFLAGS=-D WIN -D JIT -D FFI -D ENDIAN_LE -D FPU -D P64
37@set LJARCH=x64 46@set LJARCH=x64
38@minilua 47@minilua
39@if errorlevel 8 goto :X64 48@if errorlevel 8 goto :NO32
40@set DASMFLAGS=-D WIN -D JIT -D FFI 49@set DASC=vm_x86.dasc
50@set DASMFLAGS=-D WIN -D JIT -D FFI -D ENDIAN_LE -D FPU
41@set LJARCH=x86 51@set LJARCH=x86
52@set LJCOMPILE=%LJCOMPILE% /arch:SSE2
53@goto :DA
54:NO32
55@if "%VSCMD_ARG_TGT_ARCH%" neq "arm64" goto :X64
56@set DASC=vm_arm64.dasc
57@set DASMTARGET=-D LUAJIT_TARGET=LUAJIT_ARCH_ARM64
58@set LJARCH=arm64
59@goto :DA
42:X64 60:X64
43minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc 61@if "%1" neq "nogc64" goto :DA
62@shift
63@set DASC=vm_x86.dasc
64@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64
65:DA
66minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
44@if errorlevel 1 goto :BAD 67@if errorlevel 1 goto :BAD
45 68
46if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt ) 69if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
47minilua host\genversion.lua 70minilua host\genversion.lua
48 71
49%LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c 72@setlocal
73@call :SETHOSTVARS
74%LJCOMPILE% /I "." /I %DASMDIR% %DASMTARGET% host\buildvm*.c
50@if errorlevel 1 goto :BAD 75@if errorlevel 1 goto :BAD
51%LJLINK% /out:buildvm.exe buildvm*.obj 76%LJLINK% /out:buildvm.exe buildvm*.obj
52@if errorlevel 1 goto :BAD 77@if errorlevel 1 goto :BAD
53if exist buildvm.exe.manifest^ 78if exist buildvm.exe.manifest^
54 %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe 79 %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
80@endlocal
55 81
56buildvm -m peobj -o lj_vm.obj 82buildvm -m peobj -o lj_vm.obj
57@if errorlevel 1 goto :BAD 83@if errorlevel 1 goto :BAD
@@ -70,16 +96,17 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
70 96
71@if "%1" neq "debug" goto :NODEBUG 97@if "%1" neq "debug" goto :NODEBUG
72@shift 98@shift
73@set BUILDTYPE=debug 99@set LJCOMPILE=%LJCOMPILE% %DEBUGCFLAGS%
74@set LJCOMPILE=%LJCOMPILE% /Zi %DEBUGCFLAGS% 100@set LJDYNBUILD=%LJDYNBUILD_DEBUG%
75@set LJDYNBUILD=/MDd /DLUA_BUILD_AS_DLL 101@set LJLINKTYPE=%LJLINKTYPE_DEBUG%
76:NODEBUG 102:NODEBUG
77@set LJLINK=%LJLINK% /%BUILDTYPE% 103@set LJCOMPILE=%LJCOMPILE% %LJCOMPILETARGET%
104@set LJLINK=%LJLINK% %LJLINKTYPE% %LJLINKTARGET%
78@if "%1"=="amalg" goto :AMALGDLL 105@if "%1"=="amalg" goto :AMALGDLL
79@if "%1"=="static" goto :STATIC 106@if "%1"=="static" goto :STATIC
80%LJCOMPILE% %LJDYNBUILD% lj_*.c lib_*.c 107%LJCOMPILE% %LJDYNBUILD% lj_*.c lib_*.c
81@if errorlevel 1 goto :BAD 108@if errorlevel 1 goto :BAD
82%LJLINK% /DLL /out:%LJDLLNAME% lj_*.obj lib_*.obj 109%LJLINK% /DLL /OUT:%LJDLLNAME% lj_*.obj lib_*.obj
83@if errorlevel 1 goto :BAD 110@if errorlevel 1 goto :BAD
84@goto :MTDLL 111@goto :MTDLL
85:STATIC 112:STATIC
@@ -89,9 +116,16 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
89@if errorlevel 1 goto :BAD 116@if errorlevel 1 goto :BAD
90@goto :MTDLL 117@goto :MTDLL
91:AMALGDLL 118:AMALGDLL
119@if "%2"=="static" goto :AMALGSTATIC
92%LJCOMPILE% %LJDYNBUILD% ljamalg.c 120%LJCOMPILE% %LJDYNBUILD% ljamalg.c
93@if errorlevel 1 goto :BAD 121@if errorlevel 1 goto :BAD
94%LJLINK% /DLL /out:%LJDLLNAME% ljamalg.obj lj_vm.obj 122%LJLINK% /DLL /OUT:%LJDLLNAME% ljamalg.obj lj_vm.obj
123@if errorlevel 1 goto :BAD
124@goto :MTDLL
125:AMALGSTATIC
126%LJCOMPILE% ljamalg.c
127@if errorlevel 1 goto :BAD
128%LJLINK% /OUT:%LJDLLNAME% ljamalg.obj lj_vm.obj
95@if errorlevel 1 goto :BAD 129@if errorlevel 1 goto :BAD
96:MTDLL 130:MTDLL
97if exist %LJDLLNAME%.manifest^ 131if exist %LJDLLNAME%.manifest^
@@ -99,7 +133,7 @@ if exist %LJDLLNAME%.manifest^
99 133
100%LJCOMPILE% luajit.c 134%LJCOMPILE% luajit.c
101@if errorlevel 1 goto :BAD 135@if errorlevel 1 goto :BAD
102%LJLINK% /out:luajit.exe luajit.obj %LJLIBNAME% 136%LJLINK% /OUT:luajit.exe luajit.obj %LJLIBNAME%
103@if errorlevel 1 goto :BAD 137@if errorlevel 1 goto :BAD
104if exist luajit.exe.manifest^ 138if exist luajit.exe.manifest^
105 %LJMT% -manifest luajit.exe.manifest -outputresource:luajit.exe 139 %LJMT% -manifest luajit.exe.manifest -outputresource:luajit.exe
@@ -111,6 +145,12 @@ if exist luajit.exe.manifest^
111@echo === Successfully built LuaJIT for Windows/%LJARCH% === 145@echo === Successfully built LuaJIT for Windows/%LJARCH% ===
112 146
113@goto :END 147@goto :END
148:SETHOSTVARS
149@if "%VSCMD_ARG_HOST_ARCH%_%VSCMD_ARG_TGT_ARCH%" equ "x64_arm64" (
150 call "%VSINSTALLDIR%Common7\Tools\VsDevCmd.bat" -arch=%VSCMD_ARG_HOST_ARCH% -no_logo
151 echo on
152)
153@goto :END
114:BAD 154:BAD
115@echo. 155@echo.
116@echo ******************************************************* 156@echo *******************************************************
diff --git a/src/nxbuild.bat b/src/nxbuild.bat
new file mode 100644
index 00000000..91513397
--- /dev/null
+++ b/src/nxbuild.bat
@@ -0,0 +1,165 @@
1@rem Script to build LuaJIT with NintendoSDK + NX Addon.
2@rem Donated to the public domain by Swyter.
3@rem
4@rem To run this script you must open a "Native Tools Command Prompt for VS".
5@rem
6@rem Either the x86 version for NX32, or x64 for the NX64 target.
7@rem This is because the pointer size of the LuaJIT host tools (buildvm.exe)
8@rem must match the cross-compiled target (32 or 64 bits).
9@rem
10@rem Then cd to this directory and run this script.
11@rem
12@rem Recommended invocation:
13@rem
14@rem nxbuild # release build, amalgamated
15@rem nxbuild debug # debug build, amalgamated
16@rem
17@rem Additional command-line options (not generally recommended):
18@rem
19@rem noamalg # (after debug) non-amalgamated build
20
21@if not defined INCLUDE goto :FAIL
22@if not defined NINTENDO_SDK_ROOT goto :FAIL
23@if not defined PLATFORM goto :FAIL
24
25@if "%platform%" == "x86" goto :DO_NX32
26@if "%platform%" == "x64" goto :DO_NX64
27
28@echo Error: Current host platform is %platform%!
29@echo.
30@goto :FAIL
31
32@setlocal
33
34:DO_NX32
35@set DASC=vm_arm.dasc
36@set DASMFLAGS= -D HFABI -D FPU
37@set DASMTARGET= -D LUAJIT_TARGET=LUAJIT_ARCH_ARM
38@set HOST_PTR_SIZE=4
39goto :BEGIN
40
41:DO_NX64
42@set DASC=vm_arm64.dasc
43@set DASMFLAGS= -D ENDIAN_LE
44@set DASMTARGET= -D LUAJIT_TARGET=LUAJIT_ARCH_ARM64
45@set HOST_PTR_SIZE=8
46
47:BEGIN
48@rem ---- Host compiler ----
49@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /wo4146 /wo4244 /D_CRT_SECURE_NO_DEPRECATE
50@set LJLINK=link /nologo
51@set LJMT=mt /nologo
52@set DASMDIR=..\dynasm
53@set DASM=%DASMDIR%\dynasm.lua
54@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
55
56%LJCOMPILE% host\minilua.c
57@if errorlevel 1 goto :BAD
58%LJLINK% /out:minilua.exe minilua.obj
59@if errorlevel 1 goto :BAD
60if exist minilua.exe.manifest^
61 %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
62
63@rem Check that we have the right 32/64 bit host compiler to generate the right virtual machine files.
64@minilua
65@if "%ERRORLEVEL%" == "%HOST_PTR_SIZE%" goto :PASSED_PTR_CHECK
66
67@echo The pointer size of the host in bytes (%HOST_PTR_SIZE%) does not match the expected value (%errorlevel%).
68@echo Check that the script is being ran under the correct x86/x64 VS prompt.
69@goto :BAD
70
71:PASSED_PTR_CHECK
72@set DASMFLAGS=%DASMFLAGS% %DASMTARGET% -D LJ_TARGET_NX -D LUAJIT_OS=LUAJIT_OS_OTHER -D LUAJIT_DISABLE_JIT -D LUAJIT_DISABLE_FFI
73minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
74@if errorlevel 1 goto :BAD
75
76if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
77minilua host\genversion.lua
78
79%LJCOMPILE% /I "." /I %DASMDIR% %DASMTARGET% -D LJ_TARGET_NX -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI host\buildvm*.c
80@if errorlevel 1 goto :BAD
81%LJLINK% /out:buildvm.exe buildvm*.obj
82@if errorlevel 1 goto :BAD
83if exist buildvm.exe.manifest^
84 %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
85
86buildvm -m elfasm -o lj_vm.s
87@if errorlevel 1 goto :BAD
88buildvm -m bcdef -o lj_bcdef.h %ALL_LIB%
89@if errorlevel 1 goto :BAD
90buildvm -m ffdef -o lj_ffdef.h %ALL_LIB%
91@if errorlevel 1 goto :BAD
92buildvm -m libdef -o lj_libdef.h %ALL_LIB%
93@if errorlevel 1 goto :BAD
94buildvm -m recdef -o lj_recdef.h %ALL_LIB%
95@if errorlevel 1 goto :BAD
96buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB%
97@if errorlevel 1 goto :BAD
98buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
99@if errorlevel 1 goto :BAD
100
101@rem ---- Cross compiler ----
102@set NXCOMPILER_ROOT="%NINTENDO_SDK_ROOT%\Compilers\NintendoClang"
103@if "%platform%" neq "x64" goto :NX32_CROSSBUILD
104@set LJCOMPILE="%NXCOMPILER_ROOT%\bin\clang" --target=aarch64-nintendo-nx-elf -Wall -I%NINTENDO_SDK_ROOT%\Include %DASMTARGET% -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -c
105@set LJLIB="%NXCOMPILER_ROOT%\bin\llvm-ar" rc
106@set TARGETLIB_SUFFIX=nx64
107
108%NXCOMPILER_ROOT%\bin\clang --target=aarch64-nintendo-nx-elf -o lj_vm.o -c lj_vm.s
109goto :DEBUGCHECK
110
111:NX32_CROSSBUILD
112@set LJCOMPILE="%NXCOMPILER_ROOT%\bin\clang" --target=armv7l-nintendo-nx-eabihf -Wall -I%NINTENDO_SDK_ROOT%\Include %DASMTARGET% -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -c
113@set LJLIB="%NXCOMPILER_ROOT%\bin\llvm-ar" rc
114@set TARGETLIB_SUFFIX=nx32
115
116%NXCOMPILER_ROOT%\bin\clang --target=armv7l-nintendo-nx-eabihf -o lj_vm.o -c lj_vm.s
117:DEBUGCHECK
118
119@if "%1" neq "debug" goto :NODEBUG
120@shift
121@set LJCOMPILE=%LJCOMPILE% -DNN_SDK_BUILD_DEBUG -g -O0
122@set TARGETLIB=libluajitD_%TARGETLIB_SUFFIX%.a
123goto :BUILD
124:NODEBUG
125@set LJCOMPILE=%LJCOMPILE% -DNN_SDK_BUILD_RELEASE -O3
126@set TARGETLIB=libluajit_%TARGETLIB_SUFFIX%.a
127:BUILD
128del %TARGETLIB%
129@set LJCOMPILE=%LJCOMPILE% -fPIC
130@if "%1" neq "noamalg" goto :AMALG
131for %%f in (lj_*.c lib_*.c) do (
132 %LJCOMPILE% %%f
133 @if errorlevel 1 goto :BAD
134)
135
136%LJLIB% %TARGETLIB% lj_*.o lib_*.o
137@if errorlevel 1 goto :BAD
138@goto :NOAMALG
139:AMALG
140%LJCOMPILE% ljamalg.c
141@if errorlevel 1 goto :BAD
142%LJLIB% %TARGETLIB% ljamalg.o lj_vm.o
143@if errorlevel 1 goto :BAD
144:NOAMALG
145
146@del *.o *.obj *.manifest minilua.exe buildvm.exe
147@echo.
148@echo === Successfully built LuaJIT for Nintendo Switch (%TARGETLIB_SUFFIX%) ===
149
150@goto :END
151:BAD
152@echo.
153@echo *******************************************************
154@echo *** Build FAILED -- Please check the error messages ***
155@echo *******************************************************
156@goto :END
157:FAIL
158@echo To run this script you must open a "Native Tools Command Prompt for VS".
159@echo.
160@echo Either the x86 version for NX32, or x64 for the NX64 target.
161@echo This is because the pointer size of the LuaJIT host tools (buildvm.exe)
162@echo must match the cross-compiled target (32 or 64 bits).
163@echo.
164@echo Keep in mind that NintendoSDK + NX Addon must be installed, too.
165:END
diff --git a/src/ps4build.bat b/src/ps4build.bat
index 5ae92171..d6f73e5a 100644
--- a/src/ps4build.bat
+++ b/src/ps4build.bat
@@ -2,7 +2,19 @@
2@rem Donated to the public domain. 2@rem Donated to the public domain.
3@rem 3@rem
4@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler) 4@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler)
5@rem or "VS2015 x64 Native Tools Command Prompt".
6@rem
5@rem Then cd to this directory and run this script. 7@rem Then cd to this directory and run this script.
8@rem
9@rem Recommended invocation:
10@rem
11@rem ps4build release build, amalgamated, 64-bit GC
12@rem ps4build debug debug build, amalgamated, 64-bit GC
13@rem
14@rem Additional command-line options (not generally recommended):
15@rem
16@rem gc32 (before debug) 32-bit GC
17@rem noamalg (after debug) non-amalgamated build
6 18
7@if not defined INCLUDE goto :FAIL 19@if not defined INCLUDE goto :FAIL
8@if not defined SCE_ORBIS_SDK_DIR goto :FAIL 20@if not defined SCE_ORBIS_SDK_DIR goto :FAIL
@@ -14,7 +26,15 @@
14@set LJMT=mt /nologo 26@set LJMT=mt /nologo
15@set DASMDIR=..\dynasm 27@set DASMDIR=..\dynasm
16@set DASM=%DASMDIR%\dynasm.lua 28@set DASM=%DASMDIR%\dynasm.lua
17@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c 29@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
30@set GC64=
31@set DASC=vm_x64.dasc
32
33@if "%1" neq "gc32" goto :NOGC32
34@shift
35@set GC64=-DLUAJIT_DISABLE_GC64
36@set DASC=vm_x86.dasc
37:NOGC32
18 38
19%LJCOMPILE% host\minilua.c 39%LJCOMPILE% host\minilua.c
20@if errorlevel 1 goto :BAD 40@if errorlevel 1 goto :BAD
@@ -28,13 +48,14 @@ if exist minilua.exe.manifest^
28@if not errorlevel 8 goto :FAIL 48@if not errorlevel 8 goto :FAIL
29 49
30@set DASMFLAGS=-D P64 -D NO_UNWIND 50@set DASMFLAGS=-D P64 -D NO_UNWIND
31minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc 51minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
32@if errorlevel 1 goto :BAD 52@if errorlevel 1 goto :BAD
33 53
34if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt ) 54if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
35minilua host\genversion.lua 55minilua host\genversion.lua
36 56
37%LJCOMPILE% /I "." /I %DASMDIR% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c 57%LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -DLUAJIT_NO_UNWIND host\buildvm*.c
58
38@if errorlevel 1 goto :BAD 59@if errorlevel 1 goto :BAD
39%LJLINK% /out:buildvm.exe buildvm*.obj 60%LJLINK% /out:buildvm.exe buildvm*.obj
40@if errorlevel 1 goto :BAD 61@if errorlevel 1 goto :BAD
@@ -57,23 +78,23 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
57@if errorlevel 1 goto :BAD 78@if errorlevel 1 goto :BAD
58 79
59@rem ---- Cross compiler ---- 80@rem ---- Cross compiler ----
60@set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI 81@set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI %GC64%
61@set LJLIB="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-ar" rcus 82@set LJLIB="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-ar" rcus
62@set INCLUDE="" 83@set INCLUDE=""
63 84
64orbis-as -o lj_vm.o lj_vm.s 85"%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-as" -o lj_vm.o lj_vm.s
65 86
66@if "%1" neq "debug" goto :NODEBUG 87@if "%1" neq "debug" goto :NODEBUG
67@shift 88@shift
68@set LJCOMPILE=%LJCOMPILE% -g -O0 89@set LJCOMPILE=%LJCOMPILE% -g -O0
69@set TARGETLIB=libluajitD.a 90@set TARGETLIB=libluajitD_ps4.a
70goto :BUILD 91goto :BUILD
71:NODEBUG 92:NODEBUG
72@set LJCOMPILE=%LJCOMPILE% -O2 93@set LJCOMPILE=%LJCOMPILE% -O2
73@set TARGETLIB=libluajit.a 94@set TARGETLIB=libluajit_ps4.a
74:BUILD 95:BUILD
75del %TARGETLIB% 96del %TARGETLIB%
76@if "%1"=="amalg" goto :AMALG 97@if "%1" neq "noamalg" goto :AMALG
77for %%f in (lj_*.c lib_*.c) do ( 98for %%f in (lj_*.c lib_*.c) do (
78 %LJCOMPILE% %%f 99 %LJCOMPILE% %%f
79 @if errorlevel 1 goto :BAD 100 @if errorlevel 1 goto :BAD
diff --git a/src/ps5build.bat b/src/ps5build.bat
new file mode 100644
index 00000000..30b719d6
--- /dev/null
+++ b/src/ps5build.bat
@@ -0,0 +1,126 @@
1@rem Script to build LuaJIT with the PS5 SDK.
2@rem Donated to the public domain.
3@rem
4@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler)
5@rem or "VS20xx x64 Native Tools Command Prompt".
6@rem
7@rem Then cd to this directory and run this script.
8@rem
9@rem Recommended invocation:
10@rem
11@rem ps5build release build, amalgamated, 64-bit GC
12@rem ps5build debug debug build, amalgamated, 64-bit GC
13@rem
14@rem Additional command-line options (not generally recommended):
15@rem
16@rem gc32 (before debug) 32-bit GC
17@rem noamalg (after debug) non-amalgamated build
18
19@if not defined INCLUDE goto :FAIL
20@if not defined SCE_PROSPERO_SDK_DIR goto :FAIL
21
22@setlocal
23@rem ---- Host compiler ----
24@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE
25@set LJLINK=link /nologo
26@set LJMT=mt /nologo
27@set DASMDIR=..\dynasm
28@set DASM=%DASMDIR%\dynasm.lua
29@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
30@set GC64=
31@set DASC=vm_x64.dasc
32
33@if "%1" neq "gc32" goto :NOGC32
34@shift
35@set GC64=-DLUAJIT_DISABLE_GC64
36@set DASC=vm_x86.dasc
37:NOGC32
38
39%LJCOMPILE% host\minilua.c
40@if errorlevel 1 goto :BAD
41%LJLINK% /out:minilua.exe minilua.obj
42@if errorlevel 1 goto :BAD
43if exist minilua.exe.manifest^
44 %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
45
46@rem Check for 64 bit host compiler.
47@minilua
48@if not errorlevel 8 goto :FAIL
49
50@set DASMFLAGS=-D P64 -D NO_UNWIND
51minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
52@if errorlevel 1 goto :BAD
53
54if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
55minilua host\genversion.lua
56
57%LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c
58@if errorlevel 1 goto :BAD
59%LJLINK% /out:buildvm.exe buildvm*.obj
60@if errorlevel 1 goto :BAD
61if exist buildvm.exe.manifest^
62 %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
63
64buildvm -m elfasm -o lj_vm.s
65@if errorlevel 1 goto :BAD
66buildvm -m bcdef -o lj_bcdef.h %ALL_LIB%
67@if errorlevel 1 goto :BAD
68buildvm -m ffdef -o lj_ffdef.h %ALL_LIB%
69@if errorlevel 1 goto :BAD
70buildvm -m libdef -o lj_libdef.h %ALL_LIB%
71@if errorlevel 1 goto :BAD
72buildvm -m recdef -o lj_recdef.h %ALL_LIB%
73@if errorlevel 1 goto :BAD
74buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB%
75@if errorlevel 1 goto :BAD
76buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
77@if errorlevel 1 goto :BAD
78
79@rem ---- Cross compiler ----
80@set LJCOMPILE="%SCE_PROSPERO_SDK_DIR%\host_tools\bin\prospero-clang" -c -Wall -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC %GC64%
81@set LJLIB="%SCE_PROSPERO_SDK_DIR%\host_tools\bin\prospero-llvm-ar" rcus
82@set INCLUDE=""
83
84"%SCE_PROSPERO_SDK_DIR%\host_tools\bin\prospero-clang" -c -o lj_vm.o lj_vm.s
85
86@if "%1" neq "debug" goto :NODEBUG
87@shift
88@set LJCOMPILE=%LJCOMPILE% -g -O0
89@set TARGETLIB=libluajitD_ps5.a
90goto :BUILD
91:NODEBUG
92@set LJCOMPILE=%LJCOMPILE% -O2
93@set TARGETLIB=libluajit_ps5.a
94:BUILD
95del %TARGETLIB%
96@if "%1" neq "noamalg" goto :AMALG
97for %%f in (lj_*.c lib_*.c) do (
98 %LJCOMPILE% %%f
99 @if errorlevel 1 goto :BAD
100)
101
102%LJLIB% %TARGETLIB% lj_*.o lib_*.o
103@if errorlevel 1 goto :BAD
104@goto :NOAMALG
105:AMALG
106%LJCOMPILE% ljamalg.c
107@if errorlevel 1 goto :BAD
108%LJLIB% %TARGETLIB% ljamalg.o lj_vm.o
109@if errorlevel 1 goto :BAD
110:NOAMALG
111
112@del *.o *.obj *.manifest minilua.exe buildvm.exe
113@echo.
114@echo === Successfully built LuaJIT for PS5 ===
115
116@goto :END
117:BAD
118@echo.
119@echo *******************************************************
120@echo *** Build FAILED -- Please check the error messages ***
121@echo *******************************************************
122@goto :END
123:FAIL
124@echo To run this script you must open a "Visual Studio .NET Command Prompt"
125@echo (64 bit host compiler). The PS5 Prospero SDK must be installed, too.
126:END
diff --git a/src/psvitabuild.bat b/src/psvitabuild.bat
index 132fed0a..8d176e52 100644
--- a/src/psvitabuild.bat
+++ b/src/psvitabuild.bat
@@ -14,7 +14,7 @@
14@set LJMT=mt /nologo 14@set LJMT=mt /nologo
15@set DASMDIR=..\dynasm 15@set DASMDIR=..\dynasm
16@set DASM=%DASMDIR%\dynasm.lua 16@set DASM=%DASMDIR%\dynasm.lua
17@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c 17@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
18 18
19%LJCOMPILE% host\minilua.c 19%LJCOMPILE% host\minilua.c
20@if errorlevel 1 goto :BAD 20@if errorlevel 1 goto :BAD
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index d23e57c1..4cad5d83 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -99,6 +99,7 @@
99|.type NODE, Node 99|.type NODE, Node
100|.type NARGS8, int 100|.type NARGS8, int
101|.type TRACE, GCtrace 101|.type TRACE, GCtrace
102|.type SBUF, SBuf
102| 103|
103|//----------------------------------------------------------------------- 104|//-----------------------------------------------------------------------
104| 105|
@@ -372,6 +373,17 @@ static void build_subroutines(BuildCtx *ctx)
372 | st_vmstate CARG2 373 | st_vmstate CARG2
373 | b ->vm_returnc 374 | b ->vm_returnc
374 | 375 |
376 |->vm_unwind_ext: // Complete external unwind.
377#if !LJ_NO_UNWIND
378 | push {r0, r1, r2, lr}
379 | bl extern _Unwind_Complete
380 | ldr r0, [sp]
381 | bl extern _Unwind_DeleteException
382 | pop {r0, r1, r2, lr}
383 | mov r0, r1
384 | bx r2
385#endif
386 |
375 |//----------------------------------------------------------------------- 387 |//-----------------------------------------------------------------------
376 |//-- Grow stack for calls ----------------------------------------------- 388 |//-- Grow stack for calls -----------------------------------------------
377 |//----------------------------------------------------------------------- 389 |//-----------------------------------------------------------------------
@@ -418,13 +430,14 @@ static void build_subroutines(BuildCtx *ctx)
418 | add CARG2, sp, #CFRAME_RESUME 430 | add CARG2, sp, #CFRAME_RESUME
419 | ldrb CARG1, L->status 431 | ldrb CARG1, L->status
420 | str CARG3, SAVE_ERRF 432 | str CARG3, SAVE_ERRF
421 | str CARG2, L->cframe 433 | str L, SAVE_PC // Any value outside of bytecode is ok.
422 | str CARG3, SAVE_CFRAME 434 | str CARG3, SAVE_CFRAME
423 | cmp CARG1, #0 435 | cmp CARG1, #0
424 | str L, SAVE_PC // Any value outside of bytecode is ok. 436 | str CARG2, L->cframe
425 | beq >3 437 | beq >3
426 | 438 |
427 | // Resume after yield (like a return). 439 | // Resume after yield (like a return).
440 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
428 | mov RA, BASE 441 | mov RA, BASE
429 | ldr BASE, L->base 442 | ldr BASE, L->base
430 | ldr CARG1, L->top 443 | ldr CARG1, L->top
@@ -458,14 +471,15 @@ static void build_subroutines(BuildCtx *ctx)
458 | str CARG3, SAVE_NRES 471 | str CARG3, SAVE_NRES
459 | mov L, CARG1 472 | mov L, CARG1
460 | str CARG1, SAVE_L 473 | str CARG1, SAVE_L
461 | mov BASE, CARG2
462 | str sp, L->cframe // Add our C frame to cframe chain.
463 | ldr DISPATCH, L->glref // Setup pointer to dispatch table. 474 | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
475 | mov BASE, CARG2
464 | str CARG1, SAVE_PC // Any value outside of bytecode is ok. 476 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
465 | str RC, SAVE_CFRAME 477 | str RC, SAVE_CFRAME
466 | add DISPATCH, DISPATCH, #GG_G2DISP 478 | add DISPATCH, DISPATCH, #GG_G2DISP
479 | str sp, L->cframe // Add our C frame to cframe chain.
467 | 480 |
468 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 481 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
482 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
469 | ldr RB, L->base // RB = old base (for vmeta_call). 483 | ldr RB, L->base // RB = old base (for vmeta_call).
470 | ldr CARG1, L->top 484 | ldr CARG1, L->top
471 | mov MASKR8, #255 485 | mov MASKR8, #255
@@ -491,20 +505,21 @@ static void build_subroutines(BuildCtx *ctx)
491 | mov L, CARG1 505 | mov L, CARG1
492 | ldr RA, L:CARG1->stack 506 | ldr RA, L:CARG1->stack
493 | str CARG1, SAVE_L 507 | str CARG1, SAVE_L
508 | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
494 | ldr RB, L->top 509 | ldr RB, L->top
495 | str CARG1, SAVE_PC // Any value outside of bytecode is ok. 510 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
496 | ldr RC, L->cframe 511 | ldr RC, L->cframe
512 | add DISPATCH, DISPATCH, #GG_G2DISP
497 | sub RA, RA, RB // Compute -savestack(L, L->top). 513 | sub RA, RA, RB // Compute -savestack(L, L->top).
498 | str sp, L->cframe // Add our C frame to cframe chain.
499 | mov RB, #0 514 | mov RB, #0
500 | str RA, SAVE_NRES // Neg. delta means cframe w/o frame. 515 | str RA, SAVE_NRES // Neg. delta means cframe w/o frame.
501 | str RB, SAVE_ERRF // No error function. 516 | str RB, SAVE_ERRF // No error function.
502 | str RC, SAVE_CFRAME 517 | str RC, SAVE_CFRAME
518 | str sp, L->cframe // Add our C frame to cframe chain.
519 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
503 | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud) 520 | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud)
504 | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
505 | movs BASE, CRET1 521 | movs BASE, CRET1
506 | mov PC, #FRAME_CP 522 | mov PC, #FRAME_CP
507 | add DISPATCH, DISPATCH, #GG_G2DISP
508 | bne <3 // Else continue with the call. 523 | bne <3 // Else continue with the call.
509 | b ->vm_leave_cp // No base? Just remove C frame. 524 | b ->vm_leave_cp // No base? Just remove C frame.
510 | 525 |
@@ -614,6 +629,16 @@ static void build_subroutines(BuildCtx *ctx)
614 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. 629 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
615 | b ->vm_call_dispatch_f 630 | b ->vm_call_dispatch_f
616 | 631 |
632 |->vmeta_tgetr:
633 | .IOS mov RC, BASE
634 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
635 | // Returns cTValue * or NULL.
636 | .IOS mov BASE, RC
637 | cmp CRET1, #0
638 | ldrdne CARG12, [CRET1]
639 | mvneq CARG2, #~LJ_TNIL
640 | b ->BC_TGETR_Z
641 |
617 |//----------------------------------------------------------------------- 642 |//-----------------------------------------------------------------------
618 | 643 |
619 |->vmeta_tsets1: 644 |->vmeta_tsets1:
@@ -671,6 +696,16 @@ static void build_subroutines(BuildCtx *ctx)
671 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. 696 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
672 | b ->vm_call_dispatch_f 697 | b ->vm_call_dispatch_f
673 | 698 |
699 |->vmeta_tsetr:
700 | str BASE, L->base
701 | .IOS mov RC, BASE
702 | mov CARG1, L
703 | str PC, SAVE_PC
704 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
705 | // Returns TValue *.
706 | .IOS mov BASE, RC
707 | b ->BC_TSETR_Z
708 |
674 |//-- Comparison metamethods --------------------------------------------- 709 |//-- Comparison metamethods ---------------------------------------------
675 | 710 |
676 |->vmeta_comp: 711 |->vmeta_comp:
@@ -735,6 +770,17 @@ static void build_subroutines(BuildCtx *ctx)
735 | b <3 770 | b <3
736 |.endif 771 |.endif
737 | 772 |
773 |->vmeta_istype:
774 | sub PC, PC, #4
775 | str BASE, L->base
776 | mov CARG1, L
777 | lsr CARG2, RA, #3
778 | mov CARG3, RC
779 | str PC, SAVE_PC
780 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
781 | .IOS ldr BASE, L->base
782 | b ->cont_nop
783 |
738 |//-- Arithmetic metamethods --------------------------------------------- 784 |//-- Arithmetic metamethods ---------------------------------------------
739 | 785 |
740 |->vmeta_arith_vn: 786 |->vmeta_arith_vn:
@@ -966,9 +1012,9 @@ static void build_subroutines(BuildCtx *ctx)
966 | cmp TAB:RB, #0 1012 | cmp TAB:RB, #0
967 | beq ->fff_restv 1013 | beq ->fff_restv
968 | ldr CARG3, TAB:RB->hmask 1014 | ldr CARG3, TAB:RB->hmask
969 | ldr CARG4, STR:RC->hash 1015 | ldr CARG4, STR:RC->sid
970 | ldr NODE:INS, TAB:RB->node 1016 | ldr NODE:INS, TAB:RB->node
971 | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask 1017 | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask
972 | add CARG3, CARG3, CARG3, lsl #1 1018 | add CARG3, CARG3, CARG3, lsl #1
973 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 1019 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8
974 |3: // Rearranged logic, because we expect _not_ to find the key. 1020 |3: // Rearranged logic, because we expect _not_ to find the key.
@@ -1052,7 +1098,7 @@ static void build_subroutines(BuildCtx *ctx)
1052 | ffgccheck 1098 | ffgccheck
1053 | mov CARG1, L 1099 | mov CARG1, L
1054 | mov CARG2, BASE 1100 | mov CARG2, BASE
1055 | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) 1101 | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
1056 | // Returns GCstr *. 1102 | // Returns GCstr *.
1057 | ldr BASE, L->base 1103 | ldr BASE, L->base
1058 | mvn CARG2, #~LJ_TSTR 1104 | mvn CARG2, #~LJ_TSTR
@@ -1065,24 +1111,18 @@ static void build_subroutines(BuildCtx *ctx)
1065 | checktab CARG2, ->fff_fallback 1111 | checktab CARG2, ->fff_fallback
1066 | strd CARG34, [BASE, NARGS8:RC] // Set missing 2nd arg to nil. 1112 | strd CARG34, [BASE, NARGS8:RC] // Set missing 2nd arg to nil.
1067 | ldr PC, [BASE, FRAME_PC] 1113 | ldr PC, [BASE, FRAME_PC]
1068 | mov CARG2, CARG1 1114 | add CARG2, BASE, #8
1069 | str BASE, L->base // Add frame since C call can throw. 1115 | sub CARG3, BASE, #8
1070 | mov CARG1, L 1116 | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
1071 | str BASE, L->top // Dummy frame length is ok. 1117 | // Returns 1=found, 0=end, -1=error.
1072 | add CARG3, BASE, #8
1073 | str PC, SAVE_PC
1074 | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1075 | // Returns 0 at end of traversal.
1076 | .IOS ldr BASE, L->base 1118 | .IOS ldr BASE, L->base
1077 | cmp CRET1, #0 1119 | cmp CRET1, #0
1078 | mvneq CRET2, #~LJ_TNIL 1120 | mov RC, #(2+1)*8
1079 | beq ->fff_restv // End of traversal: return nil. 1121 | bgt ->fff_res // Found key/value.
1080 | ldrd CARG12, [BASE, #8] // Copy key and value to results. 1122 | bmi ->fff_fallback // Invalid key.
1081 | ldrd CARG34, [BASE, #16] 1123 | // End of traversal: return nil.
1082 | mov RC, #(2+1)*8 1124 | mvn CRET2, #~LJ_TNIL
1083 | strd CARG12, [BASE, #-8] 1125 | b ->fff_restv
1084 | strd CARG34, [BASE]
1085 | b ->fff_res
1086 | 1126 |
1087 |.ffunc_1 pairs 1127 |.ffunc_1 pairs
1088 | checktab CARG2, ->fff_fallback 1128 | checktab CARG2, ->fff_fallback
@@ -1237,9 +1277,10 @@ static void build_subroutines(BuildCtx *ctx)
1237 | ldr CARG3, L:RA->base 1277 | ldr CARG3, L:RA->base
1238 | mv_vmstate CARG2, INTERP 1278 | mv_vmstate CARG2, INTERP
1239 | ldr CARG4, L:RA->top 1279 | ldr CARG4, L:RA->top
1240 | st_vmstate CARG2
1241 | cmp CRET1, #LUA_YIELD 1280 | cmp CRET1, #LUA_YIELD
1242 | ldr BASE, L->base 1281 | ldr BASE, L->base
1282 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
1283 | st_vmstate CARG2
1243 | bhi >8 1284 | bhi >8
1244 | subs RC, CARG4, CARG3 1285 | subs RC, CARG4, CARG3
1245 | ldr CARG1, L->maxstack 1286 | ldr CARG1, L->maxstack
@@ -1507,19 +1548,6 @@ static void build_subroutines(BuildCtx *ctx)
1507 | math_extern2 atan2 1548 | math_extern2 atan2
1508 | math_extern2 fmod 1549 | math_extern2 fmod
1509 | 1550 |
1510 |->ff_math_deg:
1511 |.if FPU
1512 | .ffunc_d math_rad
1513 | vldr d1, CFUNC:CARG3->upvalue[0]
1514 | vmul.f64 d0, d0, d1
1515 | b ->fff_resd
1516 |.else
1517 | .ffunc_n math_rad
1518 | ldrd CARG34, CFUNC:CARG3->upvalue[0]
1519 | bl extern __aeabi_dmul
1520 | b ->fff_restv
1521 |.endif
1522 |
1523 |.if HFABI 1551 |.if HFABI
1524 | .ffunc math_ldexp 1552 | .ffunc math_ldexp
1525 | ldr CARG4, [BASE, #4] 1553 | ldr CARG4, [BASE, #4]
@@ -1689,17 +1717,11 @@ static void build_subroutines(BuildCtx *ctx)
1689 |.endif 1717 |.endif
1690 |.endmacro 1718 |.endmacro
1691 | 1719 |
1692 | math_minmax math_min, gt, hi 1720 | math_minmax math_min, gt, pl
1693 | math_minmax math_max, lt, lo 1721 | math_minmax math_max, lt, le
1694 | 1722 |
1695 |//-- String library ----------------------------------------------------- 1723 |//-- String library -----------------------------------------------------
1696 | 1724 |
1697 |.ffunc_1 string_len
1698 | checkstr CARG2, ->fff_fallback
1699 | ldr CARG1, STR:CARG1->len
1700 | mvn CARG2, #~LJ_TISNUM
1701 | b ->fff_restv
1702 |
1703 |.ffunc string_byte // Only handle the 1-arg case here. 1725 |.ffunc string_byte // Only handle the 1-arg case here.
1704 | ldrd CARG12, [BASE] 1726 | ldrd CARG12, [BASE]
1705 | ldr PC, [BASE, FRAME_PC] 1727 | ldr PC, [BASE, FRAME_PC]
@@ -1732,6 +1754,7 @@ static void build_subroutines(BuildCtx *ctx)
1732 | mov CARG1, L 1754 | mov CARG1, L
1733 | str PC, SAVE_PC 1755 | str PC, SAVE_PC
1734 | bl extern lj_str_new // (lua_State *L, char *str, size_t l) 1756 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
1757 |->fff_resstr:
1735 | // Returns GCstr *. 1758 | // Returns GCstr *.
1736 | ldr BASE, L->base 1759 | ldr BASE, L->base
1737 | mvn CARG2, #~LJ_TSTR 1760 | mvn CARG2, #~LJ_TSTR
@@ -1775,91 +1798,28 @@ static void build_subroutines(BuildCtx *ctx)
1775 | mvn CARG2, #~LJ_TSTR 1798 | mvn CARG2, #~LJ_TSTR
1776 | b ->fff_restv 1799 | b ->fff_restv
1777 | 1800 |
1778 |.ffunc string_rep // Only handle the 1-char case inline. 1801 |.macro ffstring_op, name
1802 | .ffunc string_ .. name
1779 | ffgccheck 1803 | ffgccheck
1780 | ldrd CARG12, [BASE] 1804 | ldr CARG3, [BASE, #4]
1781 | ldrd CARG34, [BASE, #8]
1782 | cmp NARGS8:RC, #16
1783 | bne ->fff_fallback // Exactly 2 arguments
1784 | checktp CARG2, LJ_TSTR
1785 | checktpeq CARG4, LJ_TISNUM
1786 | bne ->fff_fallback
1787 | subs CARG4, CARG3, #1
1788 | ldr CARG2, STR:CARG1->len
1789 | blt ->fff_emptystr // Count <= 0?
1790 | cmp CARG2, #1
1791 | blo ->fff_emptystr // Zero-length string?
1792 | bne ->fff_fallback // Fallback for > 1-char strings.
1793 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
1794 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
1795 | ldr CARG1, STR:CARG1[1]
1796 | cmp RB, CARG3
1797 | blo ->fff_fallback
1798 |1: // Fill buffer with char.
1799 | strb CARG1, [CARG2, CARG4]
1800 | subs CARG4, CARG4, #1
1801 | bge <1
1802 | b ->fff_newstr
1803 |
1804 |.ffunc string_reverse
1805 | ffgccheck
1806 | ldrd CARG12, [BASE]
1807 | cmp NARGS8:RC, #8
1808 | blo ->fff_fallback
1809 | checkstr CARG2, ->fff_fallback
1810 | ldr CARG3, STR:CARG1->len
1811 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
1812 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
1813 | mov CARG4, CARG3
1814 | add CARG1, STR:CARG1, #sizeof(GCstr)
1815 | cmp RB, CARG3
1816 | blo ->fff_fallback
1817 |1: // Reverse string copy.
1818 | ldrb RB, [CARG1], #1
1819 | subs CARG4, CARG4, #1
1820 | blt ->fff_newstr
1821 | strb RB, [CARG2, CARG4]
1822 | b <1
1823 |
1824 |.macro ffstring_case, name, lo
1825 | .ffunc name
1826 | ffgccheck
1827 | ldrd CARG12, [BASE]
1828 | cmp NARGS8:RC, #8 1805 | cmp NARGS8:RC, #8
1806 | ldr STR:CARG2, [BASE]
1829 | blo ->fff_fallback 1807 | blo ->fff_fallback
1830 | checkstr CARG2, ->fff_fallback 1808 | sub SBUF:CARG1, DISPATCH, #-DISPATCH_GL(tmpbuf)
1831 | ldr CARG3, STR:CARG1->len 1809 | checkstr CARG3, ->fff_fallback
1832 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)] 1810 | ldr CARG4, SBUF:CARG1->b
1833 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)] 1811 | str BASE, L->base
1834 | mov CARG4, #0 1812 | str PC, SAVE_PC
1835 | add CARG1, STR:CARG1, #sizeof(GCstr) 1813 | str L, SBUF:CARG1->L
1836 | cmp RB, CARG3 1814 | str CARG4, SBUF:CARG1->w
1837 | blo ->fff_fallback 1815 | bl extern lj_buf_putstr_ .. name
1838 |1: // ASCII case conversion. 1816 | bl extern lj_buf_tostr
1839 | ldrb RB, [CARG1, CARG4] 1817 | b ->fff_resstr
1840 | cmp CARG4, CARG3
1841 | bhs ->fff_newstr
1842 | sub RC, RB, #lo
1843 | cmp RC, #26
1844 | eorlo RB, RB, #0x20
1845 | strb RB, [CARG2, CARG4]
1846 | add CARG4, CARG4, #1
1847 | b <1
1848 |.endmacro 1818 |.endmacro
1849 | 1819 |
1850 |ffstring_case string_lower, 65 1820 |ffstring_op reverse
1851 |ffstring_case string_upper, 97 1821 |ffstring_op lower
1852 | 1822 |ffstring_op upper
1853 |//-- Table library ------------------------------------------------------
1854 |
1855 |.ffunc_1 table_getn
1856 | checktab CARG2, ->fff_fallback
1857 | .IOS mov RA, BASE
1858 | bl extern lj_tab_len // (GCtab *t)
1859 | // Returns uint32_t (but less than 2^31).
1860 | .IOS mov BASE, RA
1861 | mvn CARG2, #~LJ_TISNUM
1862 | b ->fff_restv
1863 | 1823 |
1864 |//-- Bit library -------------------------------------------------------- 1824 |//-- Bit library --------------------------------------------------------
1865 | 1825 |
@@ -2145,6 +2105,66 @@ static void build_subroutines(BuildCtx *ctx)
2145 | ldr INS, [PC, #-4] 2105 | ldr INS, [PC, #-4]
2146 | bx CRET1 2106 | bx CRET1
2147 | 2107 |
2108 |->cont_stitch: // Trace stitching.
2109 |.if JIT
2110 | // RA = resultptr, CARG4 = meta base
2111 | ldr RB, SAVE_MULTRES
2112 | ldr INS, [PC, #-4]
2113 | ldr TRACE:CARG3, [CARG4, #-24] // Save previous trace.
2114 | subs RB, RB, #8
2115 | decode_RA8 RC, INS // Call base.
2116 | beq >2
2117 |1: // Move results down.
2118 | ldrd CARG12, [RA]
2119 | add RA, RA, #8
2120 | subs RB, RB, #8
2121 | strd CARG12, [BASE, RC]
2122 | add RC, RC, #8
2123 | bne <1
2124 |2:
2125 | decode_RA8 RA, INS
2126 | decode_RB8 RB, INS
2127 | add RA, RA, RB
2128 |3:
2129 | cmp RA, RC
2130 | mvn CARG2, #~LJ_TNIL
2131 | bhi >9 // More results wanted?
2132 |
2133 | ldrh RA, TRACE:CARG3->traceno
2134 | ldrh RC, TRACE:CARG3->link
2135 | cmp RC, RA
2136 | beq ->cont_nop // Blacklisted.
2137 | cmp RC, #0
2138 | bne =>BC_JLOOP // Jump to stitched trace.
2139 |
2140 | // Stitch a new trace to the previous trace.
2141 | str RA, [DISPATCH, #DISPATCH_J(exitno)]
2142 | str L, [DISPATCH, #DISPATCH_J(L)]
2143 | str BASE, L->base
2144 | sub CARG1, DISPATCH, #-GG_DISP2J
2145 | mov CARG2, PC
2146 | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2147 | ldr BASE, L->base
2148 | b ->cont_nop
2149 |
2150 |9: // Fill up results with nil.
2151 | strd CARG12, [BASE, RC]
2152 | add RC, RC, #8
2153 | b <3
2154 |.endif
2155 |
2156 |->vm_profhook: // Dispatch target for profiler hook.
2157#if LJ_HASPROFILE
2158 | mov CARG1, L
2159 | str BASE, L->base
2160 | mov CARG2, PC
2161 | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2162 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2163 | ldr BASE, L->base
2164 | sub PC, PC, #4
2165 | b ->cont_nop
2166#endif
2167 |
2148 |//----------------------------------------------------------------------- 2168 |//-----------------------------------------------------------------------
2149 |//-- Trace exit handler ------------------------------------------------- 2169 |//-- Trace exit handler -------------------------------------------------
2150 |//----------------------------------------------------------------------- 2170 |//-----------------------------------------------------------------------
@@ -2169,14 +2189,14 @@ static void build_subroutines(BuildCtx *ctx)
2169 | add CARG1, CARG1, CARG2, asr #6 2189 | add CARG1, CARG1, CARG2, asr #6
2170 | ldr CARG2, [lr, #4] // Load exit stub group offset. 2190 | ldr CARG2, [lr, #4] // Load exit stub group offset.
2171 | sub CARG1, CARG1, lr 2191 | sub CARG1, CARG1, lr
2172 | ldr L, [DISPATCH, #DISPATCH_GL(jit_L)] 2192 | ldr L, [DISPATCH, #DISPATCH_GL(cur_L)]
2173 | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. 2193 | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number.
2174 | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)] 2194 | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
2175 | str CARG1, [DISPATCH, #DISPATCH_J(exitno)] 2195 | str CARG1, [DISPATCH, #DISPATCH_J(exitno)]
2176 | mov CARG4, #0 2196 | mov CARG4, #0
2177 | str L, [DISPATCH, #DISPATCH_J(L)]
2178 | str BASE, L->base 2197 | str BASE, L->base
2179 | str CARG4, [DISPATCH, #DISPATCH_GL(jit_L)] 2198 | str L, [DISPATCH, #DISPATCH_J(L)]
2199 | str CARG4, [DISPATCH, #DISPATCH_GL(jit_base)]
2180 | sub CARG1, DISPATCH, #-GG_DISP2J 2200 | sub CARG1, DISPATCH, #-GG_DISP2J
2181 | mov CARG2, sp 2201 | mov CARG2, sp
2182 | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) 2202 | bl extern lj_trace_exit // (jit_State *J, ExitState *ex)
@@ -2194,14 +2214,15 @@ static void build_subroutines(BuildCtx *ctx)
2194 |.if JIT 2214 |.if JIT
2195 | ldr L, SAVE_L 2215 | ldr L, SAVE_L
2196 |1: 2216 |1:
2197 | cmp CARG1, #0 2217 | cmn CARG1, #LUA_ERRERR
2198 | blt >3 // Check for error from exit. 2218 | bhs >9 // Check for error from exit.
2199 | lsl RC, CARG1, #3 2219 | lsl RC, CARG1, #3
2200 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] 2220 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2201 | str RC, SAVE_MULTRES 2221 | str RC, SAVE_MULTRES
2202 | mov CARG3, #0 2222 | mov CARG3, #0
2223 | str BASE, L->base
2203 | ldr CARG2, LFUNC:CARG2->field_pc 2224 | ldr CARG2, LFUNC:CARG2->field_pc
2204 | str CARG3, [DISPATCH, #DISPATCH_GL(jit_L)] 2225 | str CARG3, [DISPATCH, #DISPATCH_GL(jit_base)]
2205 | mv_vmstate CARG4, INTERP 2226 | mv_vmstate CARG4, INTERP
2206 | ldr KBASE, [CARG2, #PC2PROTO(k)] 2227 | ldr KBASE, [CARG2, #PC2PROTO(k)]
2207 | // Modified copy of ins_next which handles function header dispatch, too. 2228 | // Modified copy of ins_next which handles function header dispatch, too.
@@ -2210,17 +2231,48 @@ static void build_subroutines(BuildCtx *ctx)
2210 | ldr INS, [PC], #4 2231 | ldr INS, [PC], #4
2211 | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. 2232 | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8.
2212 | st_vmstate CARG4 2233 | st_vmstate CARG4
2234 | cmn CARG1, #17 // Static dispatch?
2235 | beq >5
2236 | cmp OP, #BC_FUNCC+2 // Fast function?
2237 | bhs >4
2238 |2:
2213 | cmp OP, #BC_FUNCF // Function header? 2239 | cmp OP, #BC_FUNCF // Function header?
2214 | ldr OP, [DISPATCH, OP, lsl #2] 2240 | ldr OP, [DISPATCH, OP, lsl #2]
2215 | decode_RA8 RA, INS 2241 | decode_RA8 RA, INS
2216 | lsrlo RC, INS, #16 // No: Decode operands A*8 and D. 2242 | lsrlo RC, INS, #16 // No: Decode operands A*8 and D.
2217 | subhs RC, RC, #8 2243 | subhs RC, RC, #8
2218 | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8 2244 | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8
2245 | ldrhs CARG3, [BASE, FRAME_FUNC]
2219 | bx OP 2246 | bx OP
2220 | 2247 |
2221 |3: // Rethrow error from the right C frame. 2248 |4: // Check frame below fast function.
2249 | ldr CARG1, [BASE, FRAME_PC]
2250 | ands CARG2, CARG1, #FRAME_TYPE
2251 | bne <2 // Trace stitching continuation?
2252 | // Otherwise set KBASE for Lua function below fast function.
2253 | ldr CARG3, [CARG1, #-4]
2254 | decode_RA8 CARG1, CARG3
2255 | sub CARG2, BASE, CARG1
2256 | ldr LFUNC:CARG3, [CARG2, #-16]
2257 | ldr CARG3, LFUNC:CARG3->field_pc
2258 | ldr KBASE, [CARG3, #PC2PROTO(k)]
2259 | b <2
2260 |
2261 |5: // Dispatch to static entry of original ins replaced by BC_JLOOP.
2262 | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)]
2263 | decode_RD RC, INS
2264 | ldr TRACE:CARG1, [CARG1, RC, lsl #2]
2265 | ldr INS, TRACE:CARG1->startins
2266 | decode_OP OP, INS
2267 | decode_RA8 RA, INS
2268 | add OP, DISPATCH, OP, lsl #2
2269 | decode_RD RC, INS
2270 | ldr pc, [OP, #GG_DISP2STATIC]
2271 |
2272 |9: // Rethrow error from the right C frame.
2273 | rsb CARG2, CARG1, #0
2222 | mov CARG1, L 2274 | mov CARG1, L
2223 | bl extern lj_err_run // (lua_State *L) 2275 | bl extern lj_err_trace // (lua_State *L, int errcode)
2224 |.endif 2276 |.endif
2225 | 2277 |
2226 |//----------------------------------------------------------------------- 2278 |//-----------------------------------------------------------------------
@@ -2403,6 +2455,64 @@ static void build_subroutines(BuildCtx *ctx)
2403 |//-- Miscellaneous functions -------------------------------------------- 2455 |//-- Miscellaneous functions --------------------------------------------
2404 |//----------------------------------------------------------------------- 2456 |//-----------------------------------------------------------------------
2405 | 2457 |
2458 |.define NEXT_TAB, TAB:CARG1
2459 |.define NEXT_RES, CARG1
2460 |.define NEXT_IDX, CARG2
2461 |.define NEXT_TMP0, CARG3
2462 |.define NEXT_TMP1, CARG4
2463 |.define NEXT_LIM, r12
2464 |.define NEXT_RES_PTR, sp
2465 |.define NEXT_RES_VAL, [sp]
2466 |.define NEXT_RES_KEY_I, [sp, #8]
2467 |.define NEXT_RES_KEY_IT, [sp, #12]
2468 |
2469 |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
2470 |// Next idx returned in CRET2.
2471 |->vm_next:
2472 |.if JIT
2473 | ldr NEXT_TMP0, NEXT_TAB->array
2474 | ldr NEXT_LIM, NEXT_TAB->asize
2475 | add NEXT_TMP0, NEXT_TMP0, NEXT_IDX, lsl #3
2476 |1: // Traverse array part.
2477 | subs NEXT_TMP1, NEXT_IDX, NEXT_LIM
2478 | bhs >5
2479 | ldr NEXT_TMP1, [NEXT_TMP0, #4]
2480 | str NEXT_IDX, NEXT_RES_KEY_I
2481 | add NEXT_TMP0, NEXT_TMP0, #8
2482 | add NEXT_IDX, NEXT_IDX, #1
2483 | checktp NEXT_TMP1, LJ_TNIL
2484 | beq <1 // Skip holes in array part.
2485 | ldr NEXT_TMP0, [NEXT_TMP0, #-8]
2486 | mov NEXT_RES, NEXT_RES_PTR
2487 | strd NEXT_TMP0, NEXT_RES_VAL // Stores NEXT_TMP1, too.
2488 | mvn NEXT_TMP0, #~LJ_TISNUM
2489 | str NEXT_TMP0, NEXT_RES_KEY_IT
2490 | bx lr
2491 |
2492 |5: // Traverse hash part.
2493 | ldr NEXT_TMP0, NEXT_TAB->hmask
2494 | ldr NODE:NEXT_RES, NEXT_TAB->node
2495 | add NEXT_TMP1, NEXT_TMP1, NEXT_TMP1, lsl #1
2496 | add NEXT_LIM, NEXT_LIM, NEXT_TMP0
2497 | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP1, lsl #3
2498 |6:
2499 | cmp NEXT_IDX, NEXT_LIM
2500 | bhi >9
2501 | ldr NEXT_TMP1, NODE:NEXT_RES->val.it
2502 | checktp NEXT_TMP1, LJ_TNIL
2503 | add NEXT_IDX, NEXT_IDX, #1
2504 | bxne lr
2505 | // Skip holes in hash part.
2506 | add NEXT_RES, NEXT_RES, #sizeof(Node)
2507 | b <6
2508 |
2509 |9: // End of iteration. Set the key to nil (not the value).
2510 | mvn NEXT_TMP0, #0
2511 | mov NEXT_RES, NEXT_RES_PTR
2512 | str NEXT_TMP0, NEXT_RES_KEY_IT
2513 | bx lr
2514 |.endif
2515 |
2406 |//----------------------------------------------------------------------- 2516 |//-----------------------------------------------------------------------
2407 |//-- FFI helper functions ----------------------------------------------- 2517 |//-- FFI helper functions -----------------------------------------------
2408 |//----------------------------------------------------------------------- 2518 |//-----------------------------------------------------------------------
@@ -2479,16 +2589,16 @@ static void build_subroutines(BuildCtx *ctx)
2479 |.endif 2589 |.endif
2480 | mov r11, sp 2590 | mov r11, sp
2481 | sub sp, sp, CARG1 // Readjust stack. 2591 | sub sp, sp, CARG1 // Readjust stack.
2482 | subs CARG2, CARG2, #1 2592 | subs CARG2, CARG2, #4
2483 |.if HFABI 2593 |.if HFABI
2484 | vldm RB, {d0-d7} 2594 | vldm RB, {d0-d7}
2485 |.endif 2595 |.endif
2486 | ldr RB, CCSTATE->func 2596 | ldr RB, CCSTATE->func
2487 | bmi >2 2597 | bmi >2
2488 |1: // Copy stack slots. 2598 |1: // Copy stack slots.
2489 | ldr CARG4, [CARG3, CARG2, lsl #2] 2599 | ldr CARG4, [CARG3, CARG2]
2490 | str CARG4, [sp, CARG2, lsl #2] 2600 | str CARG4, [sp, CARG2]
2491 | subs CARG2, CARG2, #1 2601 | subs CARG2, CARG2, #4
2492 | bpl <1 2602 | bpl <1
2493 |2: 2603 |2:
2494 | ldrd CARG12, CCSTATE->gpr[0] 2604 | ldrd CARG12, CCSTATE->gpr[0]
@@ -2850,6 +2960,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2850 | ins_next 2960 | ins_next
2851 break; 2961 break;
2852 2962
2963 case BC_ISTYPE:
2964 | // RA = src*8, RC = -type
2965 | ldrd CARG12, [BASE, RA]
2966 | ins_next1
2967 | cmn CARG2, RC
2968 | ins_next2
2969 | bne ->vmeta_istype
2970 | ins_next3
2971 break;
2972 case BC_ISNUM:
2973 | // RA = src*8, RC = -(TISNUM-1)
2974 | ldrd CARG12, [BASE, RA]
2975 | ins_next1
2976 | checktp CARG2, LJ_TISNUM
2977 | ins_next2
2978 | bhs ->vmeta_istype
2979 | ins_next3
2980 break;
2981
2853 /* -- Unary ops --------------------------------------------------------- */ 2982 /* -- Unary ops --------------------------------------------------------- */
2854 2983
2855 case BC_MOV: 2984 case BC_MOV:
@@ -3454,10 +3583,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3454 |->BC_TGETS_Z: 3583 |->BC_TGETS_Z:
3455 | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 3584 | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8
3456 | ldr CARG3, TAB:CARG1->hmask 3585 | ldr CARG3, TAB:CARG1->hmask
3457 | ldr CARG4, STR:RC->hash 3586 | ldr CARG4, STR:RC->sid
3458 | ldr NODE:INS, TAB:CARG1->node 3587 | ldr NODE:INS, TAB:CARG1->node
3459 | mov TAB:RB, TAB:CARG1 3588 | mov TAB:RB, TAB:CARG1
3460 | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask 3589 | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask
3461 | add CARG3, CARG3, CARG3, lsl #1 3590 | add CARG3, CARG3, CARG3, lsl #1
3462 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 3591 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8
3463 |1: 3592 |1:
@@ -3520,6 +3649,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3520 | bne <1 // 'no __index' flag set: done. 3649 | bne <1 // 'no __index' flag set: done.
3521 | b ->vmeta_tgetb 3650 | b ->vmeta_tgetb
3522 break; 3651 break;
3652 case BC_TGETR:
3653 | decode_RB8 RB, INS
3654 | decode_RC8 RC, INS
3655 | // RA = dst*8, RB = table*8, RC = key*8
3656 | ldr TAB:CARG1, [BASE, RB]
3657 | ldr CARG2, [BASE, RC]
3658 | ldr CARG4, TAB:CARG1->array
3659 | ldr CARG3, TAB:CARG1->asize
3660 | add CARG4, CARG4, CARG2, lsl #3
3661 | cmp CARG2, CARG3 // In array part?
3662 | bhs ->vmeta_tgetr
3663 | ldrd CARG12, [CARG4]
3664 |->BC_TGETR_Z:
3665 | ins_next1
3666 | ins_next2
3667 | strd CARG12, [BASE, RA]
3668 | ins_next3
3669 break;
3523 3670
3524 case BC_TSETV: 3671 case BC_TSETV:
3525 | decode_RB8 RB, INS 3672 | decode_RB8 RB, INS
@@ -3583,10 +3730,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3583 |->BC_TSETS_Z: 3730 |->BC_TSETS_Z:
3584 | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 3731 | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8
3585 | ldr CARG3, TAB:CARG1->hmask 3732 | ldr CARG3, TAB:CARG1->hmask
3586 | ldr CARG4, STR:RC->hash 3733 | ldr CARG4, STR:RC->sid
3587 | ldr NODE:INS, TAB:CARG1->node 3734 | ldr NODE:INS, TAB:CARG1->node
3588 | mov TAB:RB, TAB:CARG1 3735 | mov TAB:RB, TAB:CARG1
3589 | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask 3736 | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask
3590 | add CARG3, CARG3, CARG3, lsl #1 3737 | add CARG3, CARG3, CARG3, lsl #1
3591 | mov CARG4, #0 3738 | mov CARG4, #0
3592 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 3739 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8
@@ -3690,6 +3837,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3690 | barrierback TAB:CARG1, INS, CARG3 3837 | barrierback TAB:CARG1, INS, CARG3
3691 | b <2 3838 | b <2
3692 break; 3839 break;
3840 case BC_TSETR:
3841 | decode_RB8 RB, INS
3842 | decode_RC8 RC, INS
3843 | // RA = src*8, RB = table*8, RC = key*8
3844 | ldr TAB:CARG2, [BASE, RB]
3845 | ldr CARG3, [BASE, RC]
3846 | ldrb INS, TAB:CARG2->marked
3847 | ldr CARG1, TAB:CARG2->array
3848 | ldr CARG4, TAB:CARG2->asize
3849 | tst INS, #LJ_GC_BLACK // isblack(table)
3850 | add CARG1, CARG1, CARG3, lsl #3
3851 | bne >7
3852 |2:
3853 | cmp CARG3, CARG4 // In array part?
3854 | bhs ->vmeta_tsetr
3855 |->BC_TSETR_Z:
3856 | ldrd CARG34, [BASE, RA]
3857 | ins_next1
3858 | ins_next2
3859 | strd CARG34, [CARG1]
3860 | ins_next3
3861 |
3862 |7: // Possible table write barrier for the value. Skip valiswhite check.
3863 | barrierback TAB:CARG2, INS, RB
3864 | b <2
3865 break;
3693 3866
3694 case BC_TSETM: 3867 case BC_TSETM:
3695 | // RA = base*8 (table at base-1), RC = num_const (start index) 3868 | // RA = base*8 (table at base-1), RC = num_const (start index)
@@ -3830,10 +4003,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3830 break; 4003 break;
3831 4004
3832 case BC_ITERN: 4005 case BC_ITERN:
3833 | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1))
3834 |.if JIT 4006 |.if JIT
3835 | // NYI: add hotloop, record BC_ITERN. 4007 | hotloop
3836 |.endif 4008 |.endif
4009 |->vm_IITERN:
4010 | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1))
3837 | add RA, BASE, RA 4011 | add RA, BASE, RA
3838 | ldr TAB:RB, [RA, #-16] 4012 | ldr TAB:RB, [RA, #-16]
3839 | ldr CARG1, [RA, #-8] // Get index from control var. 4013 | ldr CARG1, [RA, #-8] // Get index from control var.
@@ -3899,7 +4073,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3899 | ins_next1 4073 | ins_next1
3900 | ins_next2 4074 | ins_next2
3901 | mov CARG1, #0 4075 | mov CARG1, #0
3902 | mvn CARG2, #0x00018000 4076 | mvn CARG2, #~LJ_KEYINDEX
3903 | strd CARG1, [RA, #-8] // Initialize control var. 4077 | strd CARG1, [RA, #-8] // Initialize control var.
3904 |1: 4078 |1:
3905 | ins_next3 4079 | ins_next3
@@ -3908,9 +4082,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3908 | mov OP, #BC_ITERC 4082 | mov OP, #BC_ITERC
3909 | strb CARG1, [PC, #-4] 4083 | strb CARG1, [PC, #-4]
3910 | sub PC, RC, #0x20000 4084 | sub PC, RC, #0x20000
4085 |.if JIT
4086 | ldrb CARG1, [PC]
4087 | cmp CARG1, #BC_ITERN
4088 | bne >6
4089 |.endif
3911 | strb OP, [PC] // Subsumes ins_next1. 4090 | strb OP, [PC] // Subsumes ins_next1.
3912 | ins_next2 4091 | ins_next2
3913 | b <1 4092 | b <1
4093 |.if JIT
4094 |6: // Unpatch JLOOP.
4095 | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)]
4096 | ldrh CARG2, [PC, #2]
4097 | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2]
4098 | // Subsumes ins_next1 and ins_next2.
4099 | ldr INS, TRACE:CARG1->startins
4100 | bfi INS, OP, #0, #8
4101 | str INS, [PC], #4
4102 | b <1
4103 |.endif
3914 break; 4104 break;
3915 4105
3916 case BC_VARG: 4106 case BC_VARG:
@@ -4287,7 +4477,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4287 | st_vmstate CARG2 4477 | st_vmstate CARG2
4288 | ldr RA, TRACE:RC->mcode 4478 | ldr RA, TRACE:RC->mcode
4289 | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)] 4479 | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
4290 | str L, [DISPATCH, #DISPATCH_GL(jit_L)] 4480 | str L, [DISPATCH, #DISPATCH_GL(tmpbuf.L)]
4291 | bx RA 4481 | bx RA
4292 |.endif 4482 |.endif
4293 break; 4483 break;
@@ -4405,6 +4595,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4405 | ldr BASE, L->base 4595 | ldr BASE, L->base
4406 | mv_vmstate CARG3, INTERP 4596 | mv_vmstate CARG3, INTERP
4407 | ldr CRET2, L->top 4597 | ldr CRET2, L->top
4598 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
4408 | lsl RC, CRET1, #3 4599 | lsl RC, CRET1, #3
4409 | st_vmstate CARG3 4600 | st_vmstate CARG3
4410 | ldr PC, [BASE, FRAME_PC] 4601 | ldr PC, [BASE, FRAME_PC]
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
new file mode 100644
index 00000000..696affc6
--- /dev/null
+++ b/src/vm_arm64.dasc
@@ -0,0 +1,4222 @@
1|// Low-level VM code for ARM64 CPUs.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4|
5|.arch arm64
6|.section code_op, code_sub
7|
8|.actionlist build_actionlist
9|.globals GLOB_
10|.globalnames globnames
11|.externnames extnames
12|
13|// Note: The ragged indentation of the instructions is intentional.
14|// The starting columns indicate data dependencies.
15|
16|//-----------------------------------------------------------------------
17|
18|// ARM64 registers and the AAPCS64 ABI 1.0 at a glance:
19|//
20|// x0-x17 temp, x19-x28 callee-saved, x29 fp, x30 lr
21|// x18 is reserved on most platforms. Don't use it, save it or restore it.
22|// x31 doesn't exist. Register number 31 either means xzr/wzr (zero) or sp,
23|// depending on the instruction.
24|// v0-v7 temp, v8-v15 callee-saved (only d8-d15 preserved), v16-v31 temp
25|//
26|// x0-x7/v0-v7 hold parameters and results.
27|
28|// Fixed register assignments for the interpreter.
29|
30|// The following must be C callee-save.
31|.define BASE, x19 // Base of current Lua stack frame.
32|.define KBASE, x20 // Constants of current Lua function.
33|.define PC, x21 // Next PC.
34|.define GLREG, x22 // Global state.
35|.define LREG, x23 // Register holding lua_State (also in SAVE_L).
36|.define TISNUM, x24 // Constant LJ_TISNUM << 47.
37|.define TISNUMhi, x25 // Constant LJ_TISNUM << 15.
38|.define TISNIL, x26 // Constant -1LL.
39|.define fp, x29 // Yes, we have to maintain a frame pointer.
40|
41|.define ST_INTERP, w26 // Constant -1.
42|
43|// The following temporaries are not saved across C calls, except for RA/RC.
44|.define RA, x27
45|.define RC, x28
46|.define RB, x17
47|.define RAw, w27
48|.define RCw, w28
49|.define RBw, w17
50|.define INS, x16
51|.define INSw, w16
52|.define ITYPE, x15
53|.define TMP0, x8
54|.define TMP1, x9
55|.define TMP2, x10
56|.define TMP3, x11
57|.define TMP0w, w8
58|.define TMP1w, w9
59|.define TMP2w, w10
60|.define TMP3w, w11
61|
62|// Calling conventions. Also used as temporaries.
63|.define CARG1, x0
64|.define CARG2, x1
65|.define CARG3, x2
66|.define CARG4, x3
67|.define CARG5, x4
68|.define CARG1w, w0
69|.define CARG2w, w1
70|.define CARG3w, w2
71|.define CARG4w, w3
72|.define CARG5w, w4
73|
74|.define FARG1, d0
75|.define FARG2, d1
76|
77|.define CRET1, x0
78|.define CRET1w, w0
79|
80|//-----------------------------------------------------------------------
81|
82|// ARM64e pointer authentication codes (PAC).
83|.if PAUTH
84|.macro sp_auth; pacibsp; .endmacro
85|.macro br_auth, reg; braaz reg; .endmacro
86|.macro blr_auth, reg; blraaz reg; .endmacro
87|.macro ret_auth; retab; .endmacro
88|.else
89|.macro sp_auth; .endmacro
90|.macro br_auth, reg; br reg; .endmacro
91|.macro blr_auth, reg; blr reg; .endmacro
92|.macro ret_auth; ret; .endmacro
93|.endif
94|
95|//-----------------------------------------------------------------------
96|
97|// Stack layout while in interpreter. Must match with lj_frame.h.
98|
99|.define CFRAME_SPACE, 208
100|//----- 16 byte aligned, <-- sp entering interpreter
101|.define SAVE_FP_LR_, 192
102|.define SAVE_GPR_, 112 // 112+10*8: 64 bit GPR saves
103|.define SAVE_FPR_, 48 // 48+8*8: 64 bit FPR saves
104|// Unused [sp, #44] // 32 bit values
105|.define SAVE_NRES, [sp, #40]
106|.define SAVE_ERRF, [sp, #36]
107|.define SAVE_MULTRES, [sp, #32]
108|.define TMPD, [sp, #24] // 64 bit values
109|.define SAVE_L, [sp, #16]
110|.define SAVE_PC, [sp, #8]
111|.define SAVE_CFRAME, [sp, #0]
112|//----- 16 byte aligned, <-- sp while in interpreter.
113|
114|.define TMPDofs, #24
115|
116|.if WIN
117|// Windows unwind data is suited to r1 stored first.
118|.macro stp_unwind, r1, r2, where
119| stp r1, r2, where
120|.endmacro
121|.macro ldp_unwind, r1, r2, where
122| ldp r1, r2, where
123|.endmacro
124|.macro ldp_unwind, r1, r2, where, post_index
125| ldp r1, r2, where, post_index
126|.endmacro
127|.else
128|// Otherwise store r2 first for compact unwind info (OSX).
129|.macro stp_unwind, r1, r2, where
130| stp r2, r1, where
131|.endmacro
132|.macro ldp_unwind, r1, r2, where
133| ldp r2, r1, where
134|.endmacro
135|.macro ldp_unwind, r1, r2, where, post_index
136| ldp r2, r1, where, post_index
137|.endmacro
138|.endif
139|
140|.macro save_, gpr1, gpr2, fpr1, fpr2
141| stp_unwind d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(14-fpr1)*8]
142| stp_unwind x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(27-gpr1)*8]
143|.endmacro
144|.macro rest_, gpr1, gpr2, fpr1, fpr2
145| ldp_unwind d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(14-fpr1)*8]
146| ldp_unwind x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(27-gpr1)*8]
147|.endmacro
148|
149|.macro saveregs
150| sp_auth
151| sub sp, sp, # CFRAME_SPACE
152| stp fp, lr, [sp, # SAVE_FP_LR_]
153| add fp, sp, # SAVE_FP_LR_
154| stp_unwind x19, x20, [sp, # SAVE_GPR_+(27-19)*8]
155| save_ 21, 22, 8, 9
156| save_ 23, 24, 10, 11
157| save_ 25, 26, 12, 13
158| save_ 27, 28, 14, 15
159|.endmacro
160|.macro restoreregs
161| ldp_unwind x19, x20, [sp, # SAVE_GPR_+(27-19)*8]
162| rest_ 21, 22, 8, 9
163| rest_ 23, 24, 10, 11
164| rest_ 25, 26, 12, 13
165| rest_ 27, 28, 14, 15
166| ldp fp, lr, [sp, # SAVE_FP_LR_]
167| add sp, sp, # CFRAME_SPACE
168|.endmacro
169|
170|// Type definitions. Some of these are only used for documentation.
171|.type L, lua_State, LREG
172|.type GL, global_State, GLREG
173|.type TVALUE, TValue
174|.type GCOBJ, GCobj
175|.type STR, GCstr
176|.type TAB, GCtab
177|.type LFUNC, GCfuncL
178|.type CFUNC, GCfuncC
179|.type PROTO, GCproto
180|.type UPVAL, GCupval
181|.type NODE, Node
182|.type NARGS8, int
183|.type TRACE, GCtrace
184|.type SBUF, SBuf
185|
186|//-----------------------------------------------------------------------
187|
188|// Trap for not-yet-implemented parts.
189|.macro NYI; brk; .endmacro
190|
191|//-----------------------------------------------------------------------
192|
193|// Access to frame relative to BASE.
194|.define FRAME_FUNC, #-16
195|.define FRAME_PC, #-8
196|
197|// Endian-specific defines.
198|.if ENDIAN_LE
199|.define LO, 0
200|.define OFS_RD, 2
201|.define OFS_RB, 3
202|.define OFS_RA, 1
203|.define OFS_OP, 0
204|.else
205|.define LO, 4
206|.define OFS_RD, 0
207|.define OFS_RB, 0
208|.define OFS_RA, 2
209|.define OFS_OP, 3
210|.endif
211|
212|.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro
213|.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro
214|.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro
215|.macro decode_RD, dst, ins; ubfx dst, ins, #16, #16; .endmacro
216|.macro decode_RC8RD, dst, src; ubfiz dst, src, #3, #8; .endmacro
217|
218|// Instruction decode+dispatch.
219|.macro ins_NEXT
220| ldr INSw, [PC], #4
221| add TMP1, GL, INS, uxtb #3
222| decode_RA RA, INS
223| ldr TMP0, [TMP1, #GG_G2DISP]
224| decode_RD RC, INS
225| br_auth TMP0
226|.endmacro
227|
228|// Instruction footer.
229|.if 1
230| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
231| .define ins_next, ins_NEXT
232| .define ins_next_, ins_NEXT
233|.else
234| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
235| // Affects only certain kinds of benchmarks (and only with -j off).
236| .macro ins_next
237| b ->ins_next
238| .endmacro
239| .macro ins_next_
240| ->ins_next:
241| ins_NEXT
242| .endmacro
243|.endif
244|
245|// Call decode and dispatch.
246|.macro ins_callt
247| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
248| ldr PC, LFUNC:CARG3->pc
249| ldr INSw, [PC], #4
250| add TMP1, GL, INS, uxtb #3
251| decode_RA RA, INS
252| ldr TMP0, [TMP1, #GG_G2DISP]
253| add RA, BASE, RA, lsl #3
254| br_auth TMP0
255|.endmacro
256|
257|.macro ins_call
258| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
259| str PC, [BASE, FRAME_PC]
260| ins_callt
261|.endmacro
262|
263|//-----------------------------------------------------------------------
264|
265|// Macros to check the TValue type and extract the GCobj. Branch on failure.
266|.macro checktp, reg, tp, target
267| asr ITYPE, reg, #47
268| cmn ITYPE, #-tp
269| and reg, reg, #LJ_GCVMASK
270| bne target
271|.endmacro
272|.macro checktp, dst, reg, tp, target
273| asr ITYPE, reg, #47
274| cmn ITYPE, #-tp
275| and dst, reg, #LJ_GCVMASK
276| bne target
277|.endmacro
278|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
279|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
280|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
281|.macro checkint, reg, target
282| cmp TISNUMhi, reg, lsr #32
283| bne target
284|.endmacro
285|.macro checknum, reg, target
286| cmp TISNUMhi, reg, lsr #32
287| bls target
288|.endmacro
289|.macro checknumber, reg, target
290| cmp TISNUMhi, reg, lsr #32
291| blo target
292|.endmacro
293|
294|.macro init_constants
295| movn TISNIL, #0
296| movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
297| movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
298|.endmacro
299|
300|.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro
301|.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro
302|.macro mov_nil, reg; mov reg, TISNIL; .endmacro
303|.macro cmp_nil, reg; cmp reg, TISNIL; .endmacro
304|.macro add_TISNUM, dst, src; add dst, src, TISNUM; .endmacro
305|
306#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field))
307|
308#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
309|
310|.macro hotcheck, delta
311| lsr CARG1, PC, #1
312| and CARG1, CARG1, #126
313| add CARG1, CARG1, #GG_G2DISP+GG_DISP2HOT
314| ldrh CARG2w, [GL, CARG1]
315| subs CARG2, CARG2, #delta
316| strh CARG2w, [GL, CARG1]
317|.endmacro
318|
319|.macro hotloop
320| hotcheck HOTCOUNT_LOOP
321| blo ->vm_hotloop
322|.endmacro
323|
324|.macro hotcall
325| hotcheck HOTCOUNT_CALL
326| blo ->vm_hotcall
327|.endmacro
328|
329|// Set current VM state.
330|.macro mv_vmstate, reg, st; movn reg, #LJ_VMST_..st; .endmacro
331|.macro st_vmstate, reg; str reg, GL->vmstate; .endmacro
332|
333|// Move table write barrier back. Overwrites mark and tmp.
334|.macro barrierback, tab, mark, tmp
335| ldr tmp, GL->gc.grayagain
336| and mark, mark, #~LJ_GC_BLACK // black2gray(tab)
337| str tab, GL->gc.grayagain
338| strb mark, tab->marked
339| str tmp, tab->gclist
340|.endmacro
341|
342|//-----------------------------------------------------------------------
343
344#if !LJ_DUALNUM
345#error "Only dual-number mode supported for ARM64 target"
346#endif
347
348/* Generate subroutines used by opcodes and other parts of the VM. */
349/* The .code_sub section should be last to help static branch prediction. */
350static void build_subroutines(BuildCtx *ctx)
351{
352 |.code_sub
353 |
354 |//-----------------------------------------------------------------------
355 |//-- Return handling ----------------------------------------------------
356 |//-----------------------------------------------------------------------
357 |
358 |->vm_returnp:
359 | // See vm_return. Also: RB = previous base.
360 | tbz PC, #2, ->cont_dispatch // (PC & FRAME_P) == 0?
361 |
362 | // Return from pcall or xpcall fast func.
363 | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame.
364 | mov_true TMP0
365 | mov BASE, RB
366 | // Prepending may overwrite the pcall frame, so do it at the end.
367 | str TMP0, [RA, #-8]! // Prepend true to results.
368 |
369 |->vm_returnc:
370 | adds RC, RC, #8 // RC = (nresults+1)*8.
371 | mov CRET1, #LUA_YIELD
372 | beq ->vm_unwind_c_eh
373 | str RCw, SAVE_MULTRES
374 | ands CARG1, PC, #FRAME_TYPE
375 | beq ->BC_RET_Z // Handle regular return to Lua.
376 |
377 |->vm_return:
378 | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return
379 | // CARG1 = PC & FRAME_TYPE
380 | and RB, PC, #~FRAME_TYPEP
381 | cmp CARG1, #FRAME_C
382 | sub RB, BASE, RB // RB = previous base.
383 | bne ->vm_returnp
384 |
385 | str RB, L->base
386 | ldrsw CARG2, SAVE_NRES // CARG2 = nresults+1.
387 | mv_vmstate TMP0w, C
388 | sub BASE, BASE, #16
389 | subs TMP2, RC, #8
390 | st_vmstate TMP0w
391 | beq >2
392 |1:
393 | subs TMP2, TMP2, #8
394 | ldr TMP0, [RA], #8
395 | str TMP0, [BASE], #8
396 | bne <1
397 |2:
398 | cmp RC, CARG2, lsl #3 // More/less results wanted?
399 | bne >6
400 |3:
401 | str BASE, L->top // Store new top.
402 |
403 |->vm_leave_cp:
404 | ldr RC, SAVE_CFRAME // Restore previous C frame.
405 | mov CRET1, #0 // Ok return status for vm_pcall.
406 | str RC, L->cframe
407 |
408 |->vm_leave_unw:
409 | restoreregs
410 | ret_auth
411 |
412 |6:
413 | bgt >7 // Less results wanted?
414 | // More results wanted. Check stack size and fill up results with nil.
415 | ldr CARG3, L->maxstack
416 | cmp BASE, CARG3
417 | bhs >8
418 | str TISNIL, [BASE], #8
419 | add RC, RC, #8
420 | b <2
421 |
422 |7: // Less results wanted.
423 | cbz CARG2, <3 // LUA_MULTRET+1 case?
424 | sub CARG1, RC, CARG2, lsl #3
425 | sub BASE, BASE, CARG1 // Shrink top.
426 | b <3
427 |
428 |8: // Corner case: need to grow stack for filling up results.
429 | // This can happen if:
430 | // - A C function grows the stack (a lot).
431 | // - The GC shrinks the stack in between.
432 | // - A return back from a lua_call() with (high) nresults adjustment.
433 | str BASE, L->top // Save current top held in BASE (yes).
434 | mov CARG1, L
435 | bl extern lj_state_growstack // (lua_State *L, int n)
436 | ldr BASE, L->top // Need the (realloced) L->top in BASE.
437 | ldrsw CARG2, SAVE_NRES
438 | b <2
439 |
440 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
441 | // (void *cframe, int errcode)
442 | add fp, CARG1, # SAVE_FP_LR_
443 | mov sp, CARG1
444 | mov CRET1, CARG2
445 | ldr L, SAVE_L
446 | ldr GL, L->glref
447 |->vm_unwind_c_eh: // Landing pad for external unwinder.
448 | mv_vmstate TMP0w, C
449 | st_vmstate TMP0w
450 | b ->vm_leave_unw
451 |
452 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
453 | // (void *cframe)
454 | add fp, CARG1, # SAVE_FP_LR_
455 | mov sp, CARG1
456 | ldr L, SAVE_L
457 | init_constants
458 | ldr GL, L->glref // Setup pointer to global state.
459 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
460 | mov RC, #16 // 2 results: false + error message.
461 | ldr BASE, L->base
462 | mov_false TMP0
463 | sub RA, BASE, #8 // Results start at BASE-8.
464 | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame.
465 | str TMP0, [BASE, #-8] // Prepend false to error message.
466 | st_vmstate ST_INTERP
467 | b ->vm_returnc
468 |
469 |//-----------------------------------------------------------------------
470 |//-- Grow stack for calls -----------------------------------------------
471 |//-----------------------------------------------------------------------
472 |
473 |->vm_growstack_c: // Grow stack for C function.
474 | // CARG1 = L
475 | mov CARG2, #LUA_MINSTACK
476 | b >2
477 |
478 |->vm_growstack_l: // Grow stack for Lua function.
479 | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
480 | add RC, BASE, RC
481 | sub RA, RA, BASE
482 | mov CARG1, L
483 | stp BASE, RC, L->base
484 | add PC, PC, #4 // Must point after first instruction.
485 | lsr CARG2, RA, #3
486 |2:
487 | // L->base = new base, L->top = top
488 | str PC, SAVE_PC
489 | bl extern lj_state_growstack // (lua_State *L, int n)
490 | ldp BASE, RC, L->base
491 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
492 | sub NARGS8:RC, RC, BASE
493 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
494 | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
495 | ins_callt // Just retry the call.
496 |
497 |//-----------------------------------------------------------------------
498 |//-- Entry points into the assembler VM ---------------------------------
499 |//-----------------------------------------------------------------------
500 |
501 |->vm_resume: // Setup C frame and resume thread.
502 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
503 | saveregs
504 | mov L, CARG1
505 | ldr GL, L->glref // Setup pointer to global state.
506 | mov BASE, CARG2
507 | str L, SAVE_L
508 | mov PC, #FRAME_CP
509 | str wzr, SAVE_NRES
510 | add TMP0, sp, #CFRAME_RESUME
511 | ldrb TMP1w, L->status
512 | str wzr, SAVE_ERRF
513 | str L, SAVE_PC // Any value outside of bytecode is ok.
514 | str xzr, SAVE_CFRAME
515 | str TMP0, L->cframe
516 | cbz TMP1w, >3
517 |
518 | // Resume after yield (like a return).
519 | str L, GL->cur_L
520 | mov RA, BASE
521 | ldp BASE, CARG1, L->base
522 | init_constants
523 | ldr PC, [BASE, FRAME_PC]
524 | strb wzr, L->status
525 | sub RC, CARG1, BASE
526 | ands CARG1, PC, #FRAME_TYPE
527 | add RC, RC, #8
528 | st_vmstate ST_INTERP
529 | str RCw, SAVE_MULTRES
530 | beq ->BC_RET_Z
531 | b ->vm_return
532 |
533 |->vm_pcall: // Setup protected C frame and enter VM.
534 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
535 | saveregs
536 | mov PC, #FRAME_CP
537 | str CARG4w, SAVE_ERRF
538 | b >1
539 |
540 |->vm_call: // Setup C frame and enter VM.
541 | // (lua_State *L, TValue *base, int nres1)
542 | saveregs
543 | mov PC, #FRAME_C
544 |
545 |1: // Entry point for vm_pcall above (PC = ftype).
546 | ldr RC, L:CARG1->cframe
547 | str CARG3w, SAVE_NRES
548 | mov L, CARG1
549 | str CARG1, SAVE_L
550 | ldr GL, L->glref // Setup pointer to global state.
551 | mov BASE, CARG2
552 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
553 | add TMP0, sp, #0
554 | str RC, SAVE_CFRAME
555 | str TMP0, L->cframe // Add our C frame to cframe chain.
556 |
557 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
558 | str L, GL->cur_L
559 | ldp RB, CARG1, L->base // RB = old base (for vmeta_call).
560 | add PC, PC, BASE
561 | init_constants
562 | sub PC, PC, RB // PC = frame delta + frame type
563 | sub NARGS8:RC, CARG1, BASE
564 | st_vmstate ST_INTERP
565 |
566 |->vm_call_dispatch:
567 | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC
568 | ldr CARG3, [BASE, FRAME_FUNC]
569 | checkfunc CARG3, ->vmeta_call
570 |
571 |->vm_call_dispatch_f:
572 | ins_call
573 | // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC
574 |
575 |->vm_cpcall: // Setup protected C frame, call C.
576 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
577 | saveregs
578 | mov L, CARG1
579 | ldr RA, L:CARG1->stack
580 | str CARG1, SAVE_L
581 | ldr GL, L->glref // Setup pointer to global state.
582 | ldr RB, L->top
583 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
584 | ldr RC, L->cframe
585 | sub RA, RA, RB // Compute -savestack(L, L->top).
586 | str RAw, SAVE_NRES // Neg. delta means cframe w/o frame.
587 | str wzr, SAVE_ERRF // No error function.
588 | add TMP0, sp, #0
589 | str RC, SAVE_CFRAME
590 | str TMP0, L->cframe // Add our C frame to cframe chain.
591 | str L, GL->cur_L
592 | blr_auth CARG4 // (lua_State *L, lua_CFunction func, void *ud)
593 | mov BASE, CRET1
594 | mov PC, #FRAME_CP
595 | cbnz BASE, <3 // Else continue with the call.
596 | b ->vm_leave_cp // No base? Just remove C frame.
597 |
598 |//-----------------------------------------------------------------------
599 |//-- Metamethod handling ------------------------------------------------
600 |//-----------------------------------------------------------------------
601 |
602 |//-- Continuation dispatch ----------------------------------------------
603 |
604 |->cont_dispatch:
605 | // BASE = meta base, RA = resultptr, RC = (nresults+1)*8
606 | ldr LFUNC:CARG3, [RB, FRAME_FUNC]
607 | ldr CARG1, [BASE, #-32] // Get continuation.
608 | mov CARG4, BASE
609 | mov BASE, RB // Restore caller BASE.
610 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
611 |.if FFI
612 | cmp CARG1, #1
613 |.endif
614 | ldr PC, [CARG4, #-24] // Restore PC from [cont|PC].
615 | add TMP0, RA, RC
616 | str TISNIL, [TMP0, #-8] // Ensure one valid arg.
617 |.if FFI
618 | bls >1
619 |.endif
620 | ldr CARG3, LFUNC:CARG3->pc
621 | ldr KBASE, [CARG3, #PC2PROTO(k)]
622 | // BASE = base, RA = resultptr, CARG4 = meta base
623 | br_auth CARG1
624 |
625 |.if FFI
626 |1:
627 | beq ->cont_ffi_callback // cont = 1: return from FFI callback.
628 | // cont = 0: tailcall from C function.
629 | sub CARG4, CARG4, #32
630 | sub RC, CARG4, BASE
631 | b ->vm_call_tail
632 |.endif
633 |
634 |->cont_cat: // RA = resultptr, CARG4 = meta base
635 | ldr INSw, [PC, #-4]
636 | sub CARG2, CARG4, #32
637 | ldr TMP0, [RA]
638 | str BASE, L->base
639 | decode_RB RB, INS
640 | decode_RA RA, INS
641 | add TMP1, BASE, RB, lsl #3
642 | subs TMP1, CARG2, TMP1
643 | beq >1
644 | str TMP0, [CARG2]
645 | lsr CARG3, TMP1, #3
646 | b ->BC_CAT_Z
647 |
648 |1:
649 | str TMP0, [BASE, RA, lsl #3]
650 | b ->cont_nop
651 |
652 |//-- Table indexing metamethods -----------------------------------------
653 |
654 |->vmeta_tgets1:
655 | movn CARG4, #~LJ_TSTR
656 | add CARG2, BASE, RB, lsl #3
657 | add CARG4, STR:RC, CARG4, lsl #47
658 | b >2
659 |
660 |->vmeta_tgets:
661 | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48
662 | str CARG2, GL->tmptv
663 | add CARG2, GL, #offsetof(global_State, tmptv)
664 |2:
665 | add CARG3, sp, TMPDofs
666 | str CARG4, TMPD
667 | b >1
668 |
669 |->vmeta_tgetb: // RB = table, RC = index
670 | add_TISNUM RC, RC
671 | add CARG2, BASE, RB, lsl #3
672 | add CARG3, sp, TMPDofs
673 | str RC, TMPD
674 | b >1
675 |
676 |->vmeta_tgetv: // RB = table, RC = key
677 | add CARG2, BASE, RB, lsl #3
678 | add CARG3, BASE, RC, lsl #3
679 |1:
680 | str BASE, L->base
681 | mov CARG1, L
682 | str PC, SAVE_PC
683 | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
684 | // Returns TValue * (finished) or NULL (metamethod).
685 | cbz CRET1, >3
686 | ldr TMP0, [CRET1]
687 | str TMP0, [BASE, RA, lsl #3]
688 | ins_next
689 |
690 |3: // Call __index metamethod.
691 | // BASE = base, L->top = new base, stack = cont/func/t/k
692 | sub TMP1, BASE, #FRAME_CONT
693 | ldr BASE, L->top
694 | mov NARGS8:RC, #16 // 2 args for func(t, k).
695 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
696 | str PC, [BASE, #-24] // [cont|PC]
697 | sub PC, BASE, TMP1
698 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
699 | b ->vm_call_dispatch_f
700 |
701 |->vmeta_tgetr:
702 | sxtw CARG2, TMP1w
703 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
704 | // Returns cTValue * or NULL.
705 | mov_nil TMP0
706 | cbz CRET1, ->BC_TGETR_Z
707 | ldr TMP0, [CRET1]
708 | b ->BC_TGETR_Z
709 |
710 |//-----------------------------------------------------------------------
711 |
712 |->vmeta_tsets1:
713 | movn CARG4, #~LJ_TSTR
714 | add CARG2, BASE, RB, lsl #3
715 | add CARG4, STR:RC, CARG4, lsl #47
716 | b >2
717 |
718 |->vmeta_tsets:
719 | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48
720 | str CARG2, GL->tmptv
721 | add CARG2, GL, #offsetof(global_State, tmptv)
722 |2:
723 | add CARG3, sp, TMPDofs
724 | str CARG4, TMPD
725 | b >1
726 |
727 |->vmeta_tsetb: // RB = table, RC = index
728 | add_TISNUM RC, RC
729 | add CARG2, BASE, RB, lsl #3
730 | add CARG3, sp, TMPDofs
731 | str RC, TMPD
732 | b >1
733 |
734 |->vmeta_tsetv:
735 | add CARG2, BASE, RB, lsl #3
736 | add CARG3, BASE, RC, lsl #3
737 |1:
738 | str BASE, L->base
739 | mov CARG1, L
740 | str PC, SAVE_PC
741 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
742 | // Returns TValue * (finished) or NULL (metamethod).
743 | ldr TMP0, [BASE, RA, lsl #3]
744 | cbz CRET1, >3
745 | // NOBARRIER: lj_meta_tset ensures the table is not black.
746 | str TMP0, [CRET1]
747 | ins_next
748 |
749 |3: // Call __newindex metamethod.
750 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
751 | sub TMP1, BASE, #FRAME_CONT
752 | ldr BASE, L->top
753 | mov NARGS8:RC, #24 // 3 args for func(t, k, v).
754 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
755 | str TMP0, [BASE, #16] // Copy value to third argument.
756 | str PC, [BASE, #-24] // [cont|PC]
757 | sub PC, BASE, TMP1
758 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
759 | b ->vm_call_dispatch_f
760 |
761 |->vmeta_tsetr:
762 | sxtw CARG3, TMP1w
763 | str BASE, L->base
764 | mov CARG1, L
765 | str PC, SAVE_PC
766 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
767 | // Returns TValue *.
768 | b ->BC_TSETR_Z
769 |
770 |//-- Comparison metamethods ---------------------------------------------
771 |
772 |->vmeta_comp:
773 | add CARG2, BASE, RA, lsl #3
774 | sub PC, PC, #4
775 | add CARG3, BASE, RC, lsl #3
776 | str BASE, L->base
777 | mov CARG1, L
778 | str PC, SAVE_PC
779 | uxtb CARG4w, INSw
780 | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
781 | // Returns 0/1 or TValue * (metamethod).
782 |3:
783 | cmp CRET1, #1
784 | bhi ->vmeta_binop
785 |4:
786 | ldrh RBw, [PC, # OFS_RD]
787 | add PC, PC, #4
788 | add RB, PC, RB, lsl #2
789 | sub RB, RB, #0x20000
790 | csel PC, PC, RB, lo
791 |->cont_nop:
792 | ins_next
793 |
794 |->cont_ra: // RA = resultptr
795 | ldr INSw, [PC, #-4]
796 | ldr TMP0, [RA]
797 | decode_RA TMP1, INS
798 | str TMP0, [BASE, TMP1, lsl #3]
799 | b ->cont_nop
800 |
801 |->cont_condt: // RA = resultptr
802 | ldr TMP0, [RA]
803 | mov_true TMP1
804 | cmp TMP1, TMP0 // Branch if result is true.
805 | b <4
806 |
807 |->cont_condf: // RA = resultptr
808 | ldr TMP0, [RA]
809 | mov_false TMP1
810 | cmp TMP0, TMP1 // Branch if result is false.
811 | b <4
812 |
813 |->vmeta_equal:
814 | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
815 | and TAB:CARG3, CARG3, #LJ_GCVMASK
816 | sub PC, PC, #4
817 | str BASE, L->base
818 | mov CARG1, L
819 | str PC, SAVE_PC
820 | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
821 | // Returns 0/1 or TValue * (metamethod).
822 | b <3
823 |
824 |->vmeta_equal_cd:
825 |.if FFI
826 | sub PC, PC, #4
827 | str BASE, L->base
828 | mov CARG1, L
829 | mov CARG2, INS
830 | str PC, SAVE_PC
831 | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op)
832 | // Returns 0/1 or TValue * (metamethod).
833 | b <3
834 |.endif
835 |
836 |->vmeta_istype:
837 | sub PC, PC, #4
838 | str BASE, L->base
839 | mov CARG1, L
840 | mov CARG2, RA
841 | mov CARG3, RC
842 | str PC, SAVE_PC
843 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
844 | b ->cont_nop
845 |
846 |//-- Arithmetic metamethods ---------------------------------------------
847 |
848 |->vmeta_arith_vn:
849 | add CARG3, BASE, RB, lsl #3
850 | add CARG4, KBASE, RC, lsl #3
851 | b >1
852 |
853 |->vmeta_arith_nv:
854 | add CARG4, BASE, RB, lsl #3
855 | add CARG3, KBASE, RC, lsl #3
856 | b >1
857 |
858 |->vmeta_unm:
859 | add CARG3, BASE, RC, lsl #3
860 | mov CARG4, CARG3
861 | b >1
862 |
863 |->vmeta_arith_vv:
864 | add CARG3, BASE, RB, lsl #3
865 | add CARG4, BASE, RC, lsl #3
866 |1:
867 | uxtb CARG5w, INSw
868 | add CARG2, BASE, RA, lsl #3
869 | str BASE, L->base
870 | mov CARG1, L
871 | str PC, SAVE_PC
872 | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
873 | // Returns NULL (finished) or TValue * (metamethod).
874 | cbz CRET1, ->cont_nop
875 |
876 | // Call metamethod for binary op.
877 |->vmeta_binop:
878 | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
879 | sub TMP1, CRET1, BASE
880 | str PC, [CRET1, #-24] // [cont|PC]
881 | add PC, TMP1, #FRAME_CONT
882 | mov BASE, CRET1
883 | mov NARGS8:RC, #16 // 2 args for func(o1, o2).
884 | b ->vm_call_dispatch
885 |
886 |->vmeta_len:
887 | add CARG2, BASE, RC, lsl #3
888#if LJ_52
889 | mov TAB:RC, TAB:CARG1 // Save table (ignored for other types).
890#endif
891 | str BASE, L->base
892 | mov CARG1, L
893 | str PC, SAVE_PC
894 | bl extern lj_meta_len // (lua_State *L, TValue *o)
895 | // Returns NULL (retry) or TValue * (metamethod base).
896#if LJ_52
897 | cbnz CRET1, ->vmeta_binop // Binop call for compatibility.
898 | mov TAB:CARG1, TAB:RC
899 | b ->BC_LEN_Z
900#else
901 | b ->vmeta_binop // Binop call for compatibility.
902#endif
903 |
904 |//-- Call metamethod ----------------------------------------------------
905 |
906 |->vmeta_call: // Resolve and call __call metamethod.
907 | // RB = old base, BASE = new base, RC = nargs*8
908 | mov CARG1, L
909 | str RB, L->base // This is the callers base!
910 | sub CARG2, BASE, #16
911 | str PC, SAVE_PC
912 | add CARG3, BASE, NARGS8:RC
913 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
914 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
915 | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now.
916 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
917 | ins_call
918 |
919 |->vmeta_callt: // Resolve __call for BC_CALLT.
920 | // BASE = old base, RA = new base, RC = nargs*8
921 | mov CARG1, L
922 | str BASE, L->base
923 | sub CARG2, RA, #16
924 | str PC, SAVE_PC
925 | add CARG3, RA, NARGS8:RC
926 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
927 | ldr TMP1, [RA, FRAME_FUNC] // Guaranteed to be a function here.
928 | ldr PC, [BASE, FRAME_PC]
929 | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now.
930 | and LFUNC:CARG3, TMP1, #LJ_GCVMASK
931 | b ->BC_CALLT2_Z
932 |
933 |//-- Argument coercion for 'for' statement ------------------------------
934 |
935 |->vmeta_for:
936 | mov CARG1, L
937 | str BASE, L->base
938 | mov CARG2, RA
939 | str PC, SAVE_PC
940 | bl extern lj_meta_for // (lua_State *L, TValue *base)
941 | ldr INSw, [PC, #-4]
942 |.if JIT
943 | uxtb TMP0w, INSw
944 |.endif
945 | decode_RA RA, INS
946 | decode_RD RC, INS
947 |.if JIT
948 | cmp TMP0, #BC_JFORI
949 | beq =>BC_JFORI
950 |.endif
951 | b =>BC_FORI
952 |
953 |//-----------------------------------------------------------------------
954 |//-- Fast functions -----------------------------------------------------
955 |//-----------------------------------------------------------------------
956 |
957 |.macro .ffunc, name
958 |->ff_ .. name:
959 |.endmacro
960 |
961 |.macro .ffunc_1, name
962 |->ff_ .. name:
963 | ldr CARG1, [BASE]
964 | cmp NARGS8:RC, #8
965 | blo ->fff_fallback
966 |.endmacro
967 |
968 |.macro .ffunc_2, name
969 |->ff_ .. name:
970 | ldp CARG1, CARG2, [BASE]
971 | cmp NARGS8:RC, #16
972 | blo ->fff_fallback
973 |.endmacro
974 |
975 |.macro .ffunc_n, name
976 | .ffunc name
977 | ldr CARG1, [BASE]
978 | cmp NARGS8:RC, #8
979 | ldr FARG1, [BASE]
980 | blo ->fff_fallback
981 | checknum CARG1, ->fff_fallback
982 |.endmacro
983 |
984 |.macro .ffunc_nn, name
985 | .ffunc name
986 | ldp CARG1, CARG2, [BASE]
987 | cmp NARGS8:RC, #16
988 | ldp FARG1, FARG2, [BASE]
989 | blo ->fff_fallback
990 | checknum CARG1, ->fff_fallback
991 | checknum CARG2, ->fff_fallback
992 |.endmacro
993 |
994 |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2.
995 |.macro ffgccheck
996 | ldp CARG1, CARG2, GL->gc.total // Assumes threshold follows total.
997 | cmp CARG1, CARG2
998 | blt >1
999 | bl ->fff_gcstep
1000 |1:
1001 |.endmacro
1002 |
1003 |//-- Base library: checks -----------------------------------------------
1004 |
1005 |.ffunc_1 assert
1006 | ldr PC, [BASE, FRAME_PC]
1007 | mov_false TMP1
1008 | cmp CARG1, TMP1
1009 | bhs ->fff_fallback
1010 | str CARG1, [BASE, #-16]
1011 | sub RB, BASE, #8
1012 | subs RA, NARGS8:RC, #8
1013 | add RC, NARGS8:RC, #8 // Compute (nresults+1)*8.
1014 | cbz RA, ->fff_res // Done if exactly 1 argument.
1015 |1:
1016 | ldr CARG1, [RB, #16]
1017 | sub RA, RA, #8
1018 | str CARG1, [RB], #8
1019 | cbnz RA, <1
1020 | b ->fff_res
1021 |
1022 |.ffunc_1 type
1023 | mov TMP0, #~LJ_TISNUM
1024 | asr ITYPE, CARG1, #47
1025 | cmn ITYPE, #~LJ_TISNUM
1026 | csinv TMP1, TMP0, ITYPE, lo
1027 | add TMP1, TMP1, #offsetof(GCfuncC, upvalue)/8
1028 | ldr CARG1, [CFUNC:CARG3, TMP1, lsl #3]
1029 | b ->fff_restv
1030 |
1031 |//-- Base library: getters and setters ---------------------------------
1032 |
1033 |.ffunc_1 getmetatable
1034 | asr ITYPE, CARG1, #47
1035 | cmn ITYPE, #-LJ_TTAB
1036 | ccmn ITYPE, #-LJ_TUDATA, #4, ne
1037 | and TAB:CARG1, CARG1, #LJ_GCVMASK
1038 | bne >6
1039 |1: // Field metatable must be at same offset for GCtab and GCudata!
1040 | ldr TAB:RB, TAB:CARG1->metatable
1041 |2:
1042 | mov_nil CARG1
1043 | ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable]
1044 | cbz TAB:RB, ->fff_restv
1045 | ldr TMP1w, TAB:RB->hmask
1046 | ldr TMP2w, STR:RC->sid
1047 | ldr NODE:CARG3, TAB:RB->node
1048 | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask
1049 | add TMP1, TMP1, TMP1, lsl #1
1050 | movn CARG4, #~LJ_TSTR
1051 | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
1052 | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for.
1053 |3: // Rearranged logic, because we expect _not_ to find the key.
1054 | ldp CARG1, TMP0, NODE:CARG3->val
1055 | ldr NODE:CARG3, NODE:CARG3->next
1056 | cmp TMP0, CARG4
1057 | beq >5
1058 | cbnz NODE:CARG3, <3
1059 |4:
1060 | mov CARG1, RB // Use metatable as default result.
1061 | movk CARG1, #(LJ_TTAB>>1)&0xffff, lsl #48
1062 | b ->fff_restv
1063 |5:
1064 | cmp_nil TMP0
1065 | bne ->fff_restv
1066 | b <4
1067 |
1068 |6:
1069 | movn TMP0, #~LJ_TISNUM
1070 | cmp ITYPE, TMP0
1071 | csel ITYPE, ITYPE, TMP0, hs
1072 | sub TMP1, GL, ITYPE, lsl #3
1073 | ldr TAB:RB, [TMP1, #offsetof(global_State, gcroot[GCROOT_BASEMT])-8]
1074 | b <2
1075 |
1076 |.ffunc_2 setmetatable
1077 | // Fast path: no mt for table yet and not clearing the mt.
1078 | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
1079 | ldr TAB:TMP0, TAB:TMP1->metatable
1080 | asr ITYPE, CARG2, #47
1081 | ldrb TMP2w, TAB:TMP1->marked
1082 | cmn ITYPE, #-LJ_TTAB
1083 | and TAB:CARG2, CARG2, #LJ_GCVMASK
1084 | ccmp TAB:TMP0, #0, #0, eq
1085 | bne ->fff_fallback
1086 | str TAB:CARG2, TAB:TMP1->metatable
1087 | tbz TMP2w, #2, ->fff_restv // isblack(table)
1088 | barrierback TAB:TMP1, TMP2w, TMP0
1089 | b ->fff_restv
1090 |
1091 |.ffunc rawget
1092 | ldr CARG2, [BASE]
1093 | cmp NARGS8:RC, #16
1094 | blo ->fff_fallback
1095 | checktab CARG2, ->fff_fallback
1096 | mov CARG1, L
1097 | add CARG3, BASE, #8
1098 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1099 | // Returns cTValue *.
1100 | ldr CARG1, [CRET1]
1101 | b ->fff_restv
1102 |
1103 |//-- Base library: conversions ------------------------------------------
1104 |
1105 |.ffunc tonumber
1106 | // Only handles the number case inline (without a base argument).
1107 | ldr CARG1, [BASE]
1108 | cmp NARGS8:RC, #8
1109 | bne ->fff_fallback
1110 | checknumber CARG1, ->fff_fallback
1111 | b ->fff_restv
1112 |
1113 |.ffunc_1 tostring
1114 | // Only handles the string or number case inline.
1115 | asr ITYPE, CARG1, #47
1116 | cmn ITYPE, #-LJ_TSTR
1117 | // A __tostring method in the string base metatable is ignored.
1118 | beq ->fff_restv
1119 | // Handle numbers inline, unless a number base metatable is present.
1120 | ldr TMP1, GL->gcroot[GCROOT_BASEMT_NUM]
1121 | str BASE, L->base
1122 | cmn ITYPE, #-LJ_TISNUM
1123 | ccmp TMP1, #0, #0, ls
1124 | str PC, SAVE_PC // Redundant (but a defined value).
1125 | bne ->fff_fallback
1126 | ffgccheck
1127 | mov CARG1, L
1128 | mov CARG2, BASE
1129 | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
1130 | // Returns GCstr *.
1131 | movn TMP1, #~LJ_TSTR
1132 | ldr BASE, L->base
1133 | add CARG1, CARG1, TMP1, lsl #47
1134 | b ->fff_restv
1135 |
1136 |//-- Base library: iterators -------------------------------------------
1137 |
1138 |.ffunc_1 next
1139 | checktp CARG1, LJ_TTAB, ->fff_fallback
1140 | str TISNIL, [BASE, NARGS8:RC] // Set missing 2nd arg to nil.
1141 | ldr PC, [BASE, FRAME_PC]
1142 | add CARG2, BASE, #8
1143 | sub CARG3, BASE, #16
1144 | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
1145 | // Returns 1=found, 0=end, -1=error.
1146 | mov RC, #(2+1)*8
1147 | tbnz CRET1w, #31, ->fff_fallback // Invalid key.
1148 | cbnz CRET1, ->fff_res // Found key/value.
1149 | // End of traversal: return nil.
1150 | str TISNIL, [BASE, #-16]
1151 | b ->fff_res1
1152 |
1153 |.ffunc_1 pairs
1154 | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
1155#if LJ_52
1156 | ldr TAB:CARG2, TAB:TMP1->metatable
1157#endif
1158 | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0]
1159 | ldr PC, [BASE, FRAME_PC]
1160#if LJ_52
1161 | cbnz TAB:CARG2, ->fff_fallback
1162#endif
1163 | mov RC, #(3+1)*8
1164 | stp CFUNC:CARG4, CARG1, [BASE, #-16]
1165 | str TISNIL, [BASE]
1166 | b ->fff_res
1167 |
1168 |.ffunc_2 ipairs_aux
1169 | checktab CARG1, ->fff_fallback
1170 | checkint CARG2, ->fff_fallback
1171 | ldr TMP1w, TAB:CARG1->asize
1172 | ldr CARG3, TAB:CARG1->array
1173 | ldr TMP0w, TAB:CARG1->hmask
1174 | add CARG2w, CARG2w, #1
1175 | cmp CARG2w, TMP1w
1176 | ldr PC, [BASE, FRAME_PC]
1177 | add_TISNUM TMP2, CARG2
1178 | mov RC, #(0+1)*8
1179 | str TMP2, [BASE, #-16]
1180 | bhs >2 // Not in array part?
1181 | ldr TMP0, [CARG3, CARG2, lsl #3]
1182 |1:
1183 | mov TMP1, #(2+1)*8
1184 | cmp_nil TMP0
1185 | str TMP0, [BASE, #-8]
1186 | csel RC, RC, TMP1, eq
1187 | b ->fff_res
1188 |2: // Check for empty hash part first. Otherwise call C function.
1189 | cbz TMP0w, ->fff_res
1190 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
1191 | // Returns cTValue * or NULL.
1192 | cbz CRET1, ->fff_res
1193 | ldr TMP0, [CRET1]
1194 | b <1
1195 |
1196 |.ffunc_1 ipairs
1197 | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
1198#if LJ_52
1199 | ldr TAB:CARG2, TAB:TMP1->metatable
1200#endif
1201 | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0]
1202 | ldr PC, [BASE, FRAME_PC]
1203#if LJ_52
1204 | cbnz TAB:CARG2, ->fff_fallback
1205#endif
1206 | mov RC, #(3+1)*8
1207 | stp CFUNC:CARG4, CARG1, [BASE, #-16]
1208 | str TISNUM, [BASE]
1209 | b ->fff_res
1210 |
1211 |//-- Base library: catch errors ----------------------------------------
1212 |
1213 |.ffunc pcall
1214 | ldr TMP1, L->maxstack
1215 | add TMP2, BASE, NARGS8:RC
1216 | cmp TMP1, TMP2
1217 | blo ->fff_fallback
1218 | cmp NARGS8:RC, #8
1219 | ldrb TMP0w, GL->hookmask
1220 | blo ->fff_fallback
1221 | sub NARGS8:RC, NARGS8:RC, #8
1222 | mov RB, BASE
1223 | add BASE, BASE, #16
1224 | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1
1225 | add PC, TMP0, #16+FRAME_PCALL
1226 | beq ->vm_call_dispatch
1227 |1:
1228 | add TMP2, BASE, NARGS8:RC
1229 |2:
1230 | ldr TMP0, [TMP2, #-16]
1231 | str TMP0, [TMP2, #-8]!
1232 | cmp TMP2, BASE
1233 | bne <2
1234 | b ->vm_call_dispatch
1235 |
1236 |.ffunc xpcall
1237 | ldr TMP1, L->maxstack
1238 | add TMP2, BASE, NARGS8:RC
1239 | cmp TMP1, TMP2
1240 | blo ->fff_fallback
1241 | ldp CARG1, CARG2, [BASE]
1242 | ldrb TMP0w, GL->hookmask
1243 | subs NARGS8:TMP1, NARGS8:RC, #16
1244 | blo ->fff_fallback
1245 | mov RB, BASE
1246 | asr ITYPE, CARG2, #47
1247 | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1
1248 | cmn ITYPE, #-LJ_TFUNC
1249 | add PC, TMP0, #24+FRAME_PCALL
1250 | bne ->fff_fallback // Traceback must be a function.
1251 | mov NARGS8:RC, NARGS8:TMP1
1252 | add BASE, BASE, #24
1253 | stp CARG2, CARG1, [RB] // Swap function and traceback.
1254 | cbz NARGS8:RC, ->vm_call_dispatch
1255 | b <1
1256 |
1257 |//-- Coroutine library --------------------------------------------------
1258 |
1259 |.macro coroutine_resume_wrap, resume
1260 |.if resume
1261 |.ffunc_1 coroutine_resume
1262 | checktp CARG1, LJ_TTHREAD, ->fff_fallback
1263 |.else
1264 |.ffunc coroutine_wrap_aux
1265 | ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr
1266 | and L:CARG1, CARG1, #LJ_GCVMASK
1267 |.endif
1268 | ldr PC, [BASE, FRAME_PC]
1269 | str BASE, L->base
1270 | ldp RB, CARG2, L:CARG1->base
1271 | ldrb TMP1w, L:CARG1->status
1272 | add TMP0, CARG2, TMP1
1273 | str PC, SAVE_PC
1274 | cmp TMP0, RB
1275 | beq ->fff_fallback
1276 | cmp TMP1, #LUA_YIELD
1277 | add TMP0, CARG2, #8
1278 | csel CARG2, CARG2, TMP0, hs
1279 | ldr CARG4, L:CARG1->maxstack
1280 | add CARG3, CARG2, NARGS8:RC
1281 | ldr RB, L:CARG1->cframe
1282 | ccmp CARG3, CARG4, #2, ls
1283 | ccmp RB, #0, #2, ls
1284 | bhi ->fff_fallback
1285 |.if resume
1286 | sub CARG3, CARG3, #8 // Keep resumed thread in stack for GC.
1287 | add BASE, BASE, #8
1288 | sub NARGS8:RC, NARGS8:RC, #8
1289 |.endif
1290 | str CARG3, L:CARG1->top
1291 | str BASE, L->top
1292 | cbz NARGS8:RC, >3
1293 |2: // Move args to coroutine.
1294 | ldr TMP0, [BASE, RB]
1295 | cmp RB, NARGS8:RC
1296 | str TMP0, [CARG2, RB]
1297 | add RB, RB, #8
1298 | bne <2
1299 |3:
1300 | mov CARG3, #0
1301 | mov L:RA, L:CARG1
1302 | mov CARG4, #0
1303 | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1304 | // Returns thread status.
1305 |4:
1306 | ldp CARG3, CARG4, L:RA->base
1307 | cmp CRET1, #LUA_YIELD
1308 | ldr BASE, L->base
1309 | str L, GL->cur_L
1310 | st_vmstate ST_INTERP
1311 | bhi >8
1312 | sub RC, CARG4, CARG3
1313 | ldr CARG1, L->maxstack
1314 | add CARG2, BASE, RC
1315 | cbz RC, >6 // No results?
1316 | cmp CARG2, CARG1
1317 | mov RB, #0
1318 | bhi >9 // Need to grow stack?
1319 |
1320 | sub CARG4, RC, #8
1321 | str CARG3, L:RA->top // Clear coroutine stack.
1322 |5: // Move results from coroutine.
1323 | ldr TMP0, [CARG3, RB]
1324 | cmp RB, CARG4
1325 | str TMP0, [BASE, RB]
1326 | add RB, RB, #8
1327 | bne <5
1328 |6:
1329 |.if resume
1330 | mov_true TMP1
1331 | add RC, RC, #16
1332 |7:
1333 | str TMP1, [BASE, #-8] // Prepend true/false to results.
1334 | sub RA, BASE, #8
1335 |.else
1336 | mov RA, BASE
1337 | add RC, RC, #8
1338 |.endif
1339 | ands CARG1, PC, #FRAME_TYPE
1340 | str PC, SAVE_PC
1341 | str RCw, SAVE_MULTRES
1342 | beq ->BC_RET_Z
1343 | b ->vm_return
1344 |
1345 |8: // Coroutine returned with error (at co->top-1).
1346 |.if resume
1347 | ldr TMP0, [CARG4, #-8]!
1348 | mov_false TMP1
1349 | mov RC, #(2+1)*8
1350 | str CARG4, L:RA->top // Remove error from coroutine stack.
1351 | str TMP0, [BASE] // Copy error message.
1352 | b <7
1353 |.else
1354 | mov CARG1, L
1355 | mov CARG2, L:RA
1356 | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1357 | // Never returns.
1358 |.endif
1359 |
1360 |9: // Handle stack expansion on return from yield.
1361 | mov CARG1, L
1362 | lsr CARG2, RC, #3
1363 | bl extern lj_state_growstack // (lua_State *L, int n)
1364 | mov CRET1, #0
1365 | b <4
1366 |.endmacro
1367 |
1368 | coroutine_resume_wrap 1 // coroutine.resume
1369 | coroutine_resume_wrap 0 // coroutine.wrap
1370 |
1371 |.ffunc coroutine_yield
1372 | ldr TMP0, L->cframe
1373 | add TMP1, BASE, NARGS8:RC
1374 | mov CRET1, #LUA_YIELD
1375 | stp BASE, TMP1, L->base
1376 | tbz TMP0, #0, ->fff_fallback
1377 | str xzr, L->cframe
1378 | strb CRET1w, L->status
1379 | b ->vm_leave_unw
1380 |
1381 |//-- Math library -------------------------------------------------------
1382 |
1383 |.macro math_round, func, round
1384 | .ffunc math_ .. func
1385 | ldr CARG1, [BASE]
1386 | cmp NARGS8:RC, #8
1387 | ldr d0, [BASE]
1388 | blo ->fff_fallback
1389 | cmp TISNUMhi, CARG1, lsr #32
1390 | beq ->fff_restv
1391 | blo ->fff_fallback
1392 | round d0, d0
1393 | b ->fff_resn
1394 |.endmacro
1395 |
1396 | math_round floor, frintm
1397 | math_round ceil, frintp
1398 |
1399 |.ffunc_1 math_abs
1400 | checknumber CARG1, ->fff_fallback
1401 | and CARG1, CARG1, #U64x(7fffffff,ffffffff)
1402 | bne ->fff_restv
1403 | eor CARG2w, CARG1w, CARG1w, asr #31
1404 | movz CARG3, #0x41e0, lsl #48 // 2^31.
1405 | subs CARG1w, CARG2w, CARG1w, asr #31
1406 | add_TISNUM CARG1, CARG1
1407 | csel CARG1, CARG1, CARG3, pl
1408 | // Fallthrough.
1409 |
1410 |->fff_restv:
1411 | // CARG1 = TValue result.
1412 | ldr PC, [BASE, FRAME_PC]
1413 | str CARG1, [BASE, #-16]
1414 |->fff_res1:
1415 | // PC = return.
1416 | mov RC, #(1+1)*8
1417 |->fff_res:
1418 | // RC = (nresults+1)*8, PC = return.
1419 | ands CARG1, PC, #FRAME_TYPE
1420 | str RCw, SAVE_MULTRES
1421 | sub RA, BASE, #16
1422 | bne ->vm_return
1423 | ldr INSw, [PC, #-4]
1424 | decode_RB RB, INS
1425 |5:
1426 | cmp RC, RB, lsl #3 // More results expected?
1427 | blo >6
1428 | decode_RA TMP1, INS
1429 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1430 | sub BASE, RA, TMP1, lsl #3
1431 | ins_next
1432 |
1433 |6: // Fill up results with nil.
1434 | add TMP1, RA, RC
1435 | add RC, RC, #8
1436 | str TISNIL, [TMP1, #-8]
1437 | b <5
1438 |
1439 |.macro math_extern, func
1440 | .ffunc_n math_ .. func
1441 | bl extern func
1442 | b ->fff_resn
1443 |.endmacro
1444 |
1445 |.macro math_extern2, func
1446 | .ffunc_nn math_ .. func
1447 | bl extern func
1448 | b ->fff_resn
1449 |.endmacro
1450 |
1451 |.ffunc_n math_sqrt
1452 | fsqrt d0, d0
1453 |->fff_resn:
1454 | ldr PC, [BASE, FRAME_PC]
1455 | str d0, [BASE, #-16]
1456 | b ->fff_res1
1457 |
1458 |.ffunc math_log
1459 | ldr CARG1, [BASE]
1460 | cmp NARGS8:RC, #8
1461 | ldr FARG1, [BASE]
1462 | bne ->fff_fallback // Need exactly 1 argument.
1463 | checknum CARG1, ->fff_fallback
1464 | bl extern log
1465 | b ->fff_resn
1466 |
1467 | math_extern log10
1468 | math_extern exp
1469 | math_extern sin
1470 | math_extern cos
1471 | math_extern tan
1472 | math_extern asin
1473 | math_extern acos
1474 | math_extern atan
1475 | math_extern sinh
1476 | math_extern cosh
1477 | math_extern tanh
1478 | math_extern2 pow
1479 | math_extern2 atan2
1480 | math_extern2 fmod
1481 |
1482 |.ffunc_2 math_ldexp
1483 | ldr FARG1, [BASE]
1484 | checknum CARG1, ->fff_fallback
1485 | checkint CARG2, ->fff_fallback
1486 | sxtw CARG1, CARG2w
1487 | bl extern ldexp // (double x, int exp)
1488 | b ->fff_resn
1489 |
1490 |.ffunc_n math_frexp
1491 | add CARG1, sp, TMPDofs
1492 | bl extern frexp
1493 | ldr CARG2w, TMPD
1494 | ldr PC, [BASE, FRAME_PC]
1495 | str d0, [BASE, #-16]
1496 | mov RC, #(2+1)*8
1497 | add_TISNUM CARG2, CARG2
1498 | str CARG2, [BASE, #-8]
1499 | b ->fff_res
1500 |
1501 |.ffunc_n math_modf
1502 | sub CARG1, BASE, #16
1503 | ldr PC, [BASE, FRAME_PC]
1504 | bl extern modf
1505 | mov RC, #(2+1)*8
1506 | str d0, [BASE, #-8]
1507 | b ->fff_res
1508 |
1509 |.macro math_minmax, name, cond, fcond
1510 | .ffunc_1 name
1511 | add RB, BASE, RC
1512 | add RA, BASE, #8
1513 | checkint CARG1, >4
1514 |1: // Handle integers.
1515 | ldr CARG2, [RA]
1516 | cmp RA, RB
1517 | bhs ->fff_restv
1518 | checkint CARG2, >3
1519 | cmp CARG1w, CARG2w
1520 | add RA, RA, #8
1521 | csel CARG1, CARG2, CARG1, cond
1522 | b <1
1523 |3: // Convert intermediate result to number and continue below.
1524 | scvtf d0, CARG1w
1525 | blo ->fff_fallback
1526 | ldr d1, [RA]
1527 | b >6
1528 |
1529 |4:
1530 | ldr d0, [BASE]
1531 | blo ->fff_fallback
1532 |5: // Handle numbers.
1533 | ldr CARG2, [RA]
1534 | ldr d1, [RA]
1535 | cmp RA, RB
1536 | bhs ->fff_resn
1537 | checknum CARG2, >7
1538 |6:
1539 | fcmp d0, d1
1540 | add RA, RA, #8
1541 | fcsel d0, d1, d0, fcond
1542 | b <5
1543 |7: // Convert integer to number and continue above.
1544 | scvtf d1, CARG2w
1545 | blo ->fff_fallback
1546 | b <6
1547 |.endmacro
1548 |
1549 | math_minmax math_min, gt, pl
1550 | math_minmax math_max, lt, le
1551 |
1552 |//-- String library -----------------------------------------------------
1553 |
1554 |.ffunc string_byte // Only handle the 1-arg case here.
1555 | ldp PC, CARG1, [BASE, FRAME_PC]
1556 | cmp NARGS8:RC, #8
1557 | asr ITYPE, CARG1, #47
1558 | ccmn ITYPE, #-LJ_TSTR, #0, eq
1559 | and STR:CARG1, CARG1, #LJ_GCVMASK
1560 | bne ->fff_fallback
1561 | ldrb TMP0w, STR:CARG1[1] // Access is always ok (NUL at end).
1562 | ldr CARG3w, STR:CARG1->len
1563 | add_TISNUM TMP0, TMP0
1564 | str TMP0, [BASE, #-16]
1565 | mov RC, #(0+1)*8
1566 | cbz CARG3, ->fff_res
1567 | b ->fff_res1
1568 |
1569 |.ffunc string_char // Only handle the 1-arg case here.
1570 | ffgccheck
1571 | ldp PC, CARG1, [BASE, FRAME_PC]
1572 | cmp CARG1w, #255
1573 | ccmp NARGS8:RC, #8, #0, ls // Need exactly 1 argument.
1574 | bne ->fff_fallback
1575 | checkint CARG1, ->fff_fallback
1576 | mov CARG3, #1
1577 | // Point to the char inside the integer in the stack slot.
1578 |.if ENDIAN_LE
1579 | mov CARG2, BASE
1580 |.else
1581 | add CARG2, BASE, #7
1582 |.endif
1583 |->fff_newstr:
1584 | // CARG2 = str, CARG3 = len.
1585 | str BASE, L->base
1586 | mov CARG1, L
1587 | str PC, SAVE_PC
1588 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
1589 |->fff_resstr:
1590 | // Returns GCstr *.
1591 | ldr BASE, L->base
1592 | movn TMP1, #~LJ_TSTR
1593 | add CARG1, CARG1, TMP1, lsl #47
1594 | b ->fff_restv
1595 |
1596 |.ffunc string_sub
1597 | ffgccheck
1598 | ldr CARG1, [BASE]
1599 | ldr CARG3, [BASE, #16]
1600 | cmp NARGS8:RC, #16
1601 | movn RB, #0
1602 | beq >1
1603 | blo ->fff_fallback
1604 | checkint CARG3, ->fff_fallback
1605 | sxtw RB, CARG3w
1606 |1:
1607 | ldr CARG2, [BASE, #8]
1608 | checkstr CARG1, ->fff_fallback
1609 | ldr TMP1w, STR:CARG1->len
1610 | checkint CARG2, ->fff_fallback
1611 | sxtw CARG2, CARG2w
1612 | // CARG1 = str, TMP1 = str->len, CARG2 = start, RB = end
1613 | add TMP2, RB, TMP1
1614 | cmp RB, #0
1615 | add TMP0, CARG2, TMP1
1616 | csinc RB, RB, TMP2, ge // if (end < 0) end += len+1
1617 | cmp CARG2, #0
1618 | csinc CARG2, CARG2, TMP0, ge // if (start < 0) start += len+1
1619 | cmp RB, #0
1620 | csel RB, RB, xzr, ge // if (end < 0) end = 0
1621 | cmp CARG2, #1
1622 | csinc CARG2, CARG2, xzr, ge // if (start < 1) start = 1
1623 | cmp RB, TMP1
1624 | csel RB, RB, TMP1, le // if (end > len) end = len
1625 | add CARG1, STR:CARG1, #sizeof(GCstr)-1
1626 | subs CARG3, RB, CARG2 // len = end - start
1627 | add CARG2, CARG1, CARG2
1628 | add CARG3, CARG3, #1 // len += 1
1629 | bge ->fff_newstr
1630 | add STR:CARG1, GL, #offsetof(global_State, strempty)
1631 | movn TMP1, #~LJ_TSTR
1632 | add CARG1, CARG1, TMP1, lsl #47
1633 | b ->fff_restv
1634 |
1635 |.macro ffstring_op, name
1636 | .ffunc string_ .. name
1637 | ffgccheck
1638 | ldr CARG2, [BASE]
1639 | cmp NARGS8:RC, #8
1640 | asr ITYPE, CARG2, #47
1641 | ccmn ITYPE, #-LJ_TSTR, #0, hs
1642 | and STR:CARG2, CARG2, #LJ_GCVMASK
1643 | bne ->fff_fallback
1644 | ldr TMP0, GL->tmpbuf.b
1645 | add SBUF:CARG1, GL, #offsetof(global_State, tmpbuf)
1646 | str BASE, L->base
1647 | str PC, SAVE_PC
1648 | str L, GL->tmpbuf.L
1649 | str TMP0, GL->tmpbuf.w
1650 | bl extern lj_buf_putstr_ .. name
1651 | bl extern lj_buf_tostr
1652 | b ->fff_resstr
1653 |.endmacro
1654 |
1655 |ffstring_op reverse
1656 |ffstring_op lower
1657 |ffstring_op upper
1658 |
1659 |//-- Bit library --------------------------------------------------------
1660 |
1661 |// FP number to bit conversion. Clobbers CARG1-CARG2, FARG1-FARG2.
1662 |->vm_tobit_fb:
1663 | bls ->fff_fallback
1664 | fmov FARG1, CARG1
1665 | movz CARG2, #0x4338, lsl #48
1666 | fmov FARG2, CARG2
1667 | fadd FARG1, FARG1, FARG2
1668 | fmov CARG1w, s0
1669 | br lr
1670 |
1671 |.macro .ffunc_bit, name
1672 | .ffunc_1 bit_..name
1673 | adr lr, >1
1674 | checkint CARG1, ->vm_tobit_fb
1675 |1:
1676 |.endmacro
1677 |
1678 |.macro .ffunc_bit_op, name, ins
1679 | .ffunc_bit name
1680 | mov RA, #8
1681 | mov TMP0w, CARG1w
1682 | adr lr, >2
1683 |1:
1684 | ldr CARG1, [BASE, RA]
1685 | cmp RA, NARGS8:RC
1686 | add RA, RA, #8
1687 | bge >9
1688 | checkint CARG1, ->vm_tobit_fb
1689 |2:
1690 | ins TMP0w, TMP0w, CARG1w
1691 | b <1
1692 |.endmacro
1693 |
1694 |.ffunc_bit_op band, and
1695 |.ffunc_bit_op bor, orr
1696 |.ffunc_bit_op bxor, eor
1697 |
1698 |.ffunc_bit tobit
1699 | mov TMP0w, CARG1w
1700 |9: // Label reused by .ffunc_bit_op users.
1701 | add_TISNUM CARG1, TMP0
1702 | b ->fff_restv
1703 |
1704 |.ffunc_bit bswap
1705 | rev TMP0w, CARG1w
1706 | add_TISNUM CARG1, TMP0
1707 | b ->fff_restv
1708 |
1709 |.ffunc_bit bnot
1710 | mvn TMP0w, CARG1w
1711 | add_TISNUM CARG1, TMP0
1712 | b ->fff_restv
1713 |
1714 |.macro .ffunc_bit_sh, name, ins, shmod
1715 | .ffunc bit_..name
1716 | ldp TMP0, CARG1, [BASE]
1717 | cmp NARGS8:RC, #16
1718 | blo ->fff_fallback
1719 | adr lr, >1
1720 | checkint CARG1, ->vm_tobit_fb
1721 |1:
1722 |.if shmod == 0
1723 | mov TMP1, CARG1
1724 |.else
1725 | neg TMP1, CARG1
1726 |.endif
1727 | mov CARG1, TMP0
1728 | adr lr, >2
1729 | checkint CARG1, ->vm_tobit_fb
1730 |2:
1731 | ins TMP0w, CARG1w, TMP1w
1732 | add_TISNUM CARG1, TMP0
1733 | b ->fff_restv
1734 |.endmacro
1735 |
1736 |.ffunc_bit_sh lshift, lsl, 0
1737 |.ffunc_bit_sh rshift, lsr, 0
1738 |.ffunc_bit_sh arshift, asr, 0
1739 |.ffunc_bit_sh rol, ror, 1
1740 |.ffunc_bit_sh ror, ror, 0
1741 |
1742 |//-----------------------------------------------------------------------
1743 |
1744 |->fff_fallback: // Call fast function fallback handler.
1745 | // BASE = new base, RC = nargs*8
1746 | ldp CFUNC:CARG3, PC, [BASE, FRAME_FUNC] // Fallback may overwrite PC.
1747 | ldr TMP2, L->maxstack
1748 | add TMP1, BASE, NARGS8:RC
1749 | stp BASE, TMP1, L->base
1750 | and CFUNC:CARG3, CARG3, #LJ_GCVMASK
1751 | add TMP1, TMP1, #8*LUA_MINSTACK
1752 | ldr CARG3, CFUNC:CARG3->f
1753 | str PC, SAVE_PC // Redundant (but a defined value).
1754 | cmp TMP1, TMP2
1755 | mov CARG1, L
1756 | bhi >5 // Need to grow stack.
1757 | blr_auth CARG3 // (lua_State *L)
1758 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
1759 | ldr BASE, L->base
1760 | cmp CRET1w, #0
1761 | lsl RC, CRET1, #3
1762 | sub RA, BASE, #16
1763 | bgt ->fff_res // Returned nresults+1?
1764 |1: // Returned 0 or -1: retry fast path.
1765 | ldr CARG1, L->top
1766 | ldr CFUNC:CARG3, [BASE, FRAME_FUNC]
1767 | sub NARGS8:RC, CARG1, BASE
1768 | bne ->vm_call_tail // Returned -1?
1769 | and CFUNC:CARG3, CARG3, #LJ_GCVMASK
1770 | ins_callt // Returned 0: retry fast path.
1771 |
1772 |// Reconstruct previous base for vmeta_call during tailcall.
1773 |->vm_call_tail:
1774 | ands TMP0, PC, #FRAME_TYPE
1775 | and TMP1, PC, #~FRAME_TYPEP
1776 | bne >3
1777 | ldrb RAw, [PC, #-4+OFS_RA]
1778 | lsl RA, RA, #3
1779 | add TMP1, RA, #16
1780 |3:
1781 | sub RB, BASE, TMP1
1782 | b ->vm_call_dispatch // Resolve again for tailcall.
1783 |
1784 |5: // Grow stack for fallback handler.
1785 | mov CARG2, #LUA_MINSTACK
1786 | bl extern lj_state_growstack // (lua_State *L, int n)
1787 | ldr BASE, L->base
1788 | cmp CARG1, CARG1 // Set zero-flag to force retry.
1789 | b <1
1790 |
1791 |->fff_gcstep: // Call GC step function.
1792 | // BASE = new base, RC = nargs*8
1793 | sp_auth
1794 | add CARG2, BASE, NARGS8:RC // Calculate L->top.
1795 | mov RA, lr
1796 | stp BASE, CARG2, L->base
1797 | str PC, SAVE_PC // Redundant (but a defined value).
1798 | mov CARG1, L
1799 | bl extern lj_gc_step // (lua_State *L)
1800 | ldp BASE, CARG2, L->base
1801 | ldr CFUNC:CARG3, [BASE, FRAME_FUNC]
1802 | mov lr, RA // Help return address predictor.
1803 | sub NARGS8:RC, CARG2, BASE // Calculate nargs*8.
1804 | and CFUNC:CARG3, CARG3, #LJ_GCVMASK
1805 | ret_auth
1806 |
1807 |//-----------------------------------------------------------------------
1808 |//-- Special dispatch targets -------------------------------------------
1809 |//-----------------------------------------------------------------------
1810 |
1811 |->vm_record: // Dispatch target for recording phase.
1812 |.if JIT
1813 | ldrb CARG1w, GL->hookmask
1814 | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent.
1815 | bne >5
1816 | // Decrement the hookcount for consistency, but always do the call.
1817 | ldr CARG2w, GL->hookcount
1818 | tst CARG1, #HOOK_ACTIVE
1819 | bne >1
1820 | sub CARG2w, CARG2w, #1
1821 | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT
1822 | beq >1
1823 | str CARG2w, GL->hookcount
1824 | b >1
1825 |.endif
1826 |
1827 |->vm_rethook: // Dispatch target for return hooks.
1828 | ldrb TMP2w, GL->hookmask
1829 | tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1 // Hook already active?
1830 |5: // Re-dispatch to static ins.
1831 | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
1832 | br_auth TMP0
1833 |
1834 |->vm_inshook: // Dispatch target for instr/line hooks.
1835 | ldrb TMP2w, GL->hookmask
1836 | ldr TMP3w, GL->hookcount
1837 | tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5 // Hook already active?
1838 | tst TMP2w, #LUA_MASKLINE|LUA_MASKCOUNT
1839 | beq <5
1840 | sub TMP3w, TMP3w, #1
1841 | str TMP3w, GL->hookcount
1842 | cbz TMP3w, >1
1843 | tbz TMP2w, #LUA_HOOKLINE, <5
1844 |1:
1845 | mov CARG1, L
1846 | str BASE, L->base
1847 | mov CARG2, PC
1848 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
1849 | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
1850 |3:
1851 | ldr BASE, L->base
1852 |4: // Re-dispatch to static ins.
1853 | ldr INSw, [PC, #-4]
1854 | add TMP1, GL, INS, uxtb #3
1855 | decode_RA RA, INS
1856 | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
1857 | decode_RD RC, INS
1858 | br_auth TMP0
1859 |
1860 |->cont_hook: // Continue from hook yield.
1861 | ldr CARG1, [CARG4, #-40]
1862 | add PC, PC, #4
1863 | str CARG1w, SAVE_MULTRES // Restore MULTRES for *M ins.
1864 | b <4
1865 |
1866 |->vm_hotloop: // Hot loop counter underflow.
1867 |.if JIT
1868 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L).
1869 | add CARG1, GL, #GG_G2DISP+GG_DISP2J
1870 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
1871 | str PC, SAVE_PC
1872 | ldr CARG3, LFUNC:CARG3->pc
1873 | mov CARG2, PC
1874 | str L, [GL, #GL_J(L)]
1875 | ldrb CARG3w, [CARG3, #PC2PROTO(framesize)]
1876 | str BASE, L->base
1877 | add CARG3, BASE, CARG3, lsl #3
1878 | str CARG3, L->top
1879 | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc)
1880 | b <3
1881 |.endif
1882 |
1883 |->vm_callhook: // Dispatch target for call hooks.
1884 | mov CARG2, PC
1885 |.if JIT
1886 | b >1
1887 |.endif
1888 |
1889 |->vm_hotcall: // Hot call counter underflow.
1890 |.if JIT
1891 | orr CARG2, PC, #1
1892 |1:
1893 |.endif
1894 | add TMP1, BASE, NARGS8:RC
1895 | str PC, SAVE_PC
1896 | mov CARG1, L
1897 | sub RA, RA, BASE
1898 | stp BASE, TMP1, L->base
1899 | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
1900 | // Returns ASMFunction.
1901 | ldp BASE, TMP1, L->base
1902 | str xzr, SAVE_PC // Invalidate for subsequent line hook.
1903 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
1904 | add RA, BASE, RA
1905 | sub NARGS8:RC, TMP1, BASE
1906 | ldr INSw, [PC, #-4]
1907 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
1908 | br_auth CRET1
1909 |
1910 |->cont_stitch: // Trace stitching.
1911 |.if JIT
1912 | // RA = resultptr, CARG4 = meta base
1913 | ldr RBw, SAVE_MULTRES
1914 | ldr INSw, [PC, #-4]
1915 | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace.
1916 | subs RB, RB, #8
1917 | decode_RA RC, INS // Call base.
1918 | and CARG3, CARG3, #LJ_GCVMASK
1919 | beq >2
1920 |1: // Move results down.
1921 | ldr CARG1, [RA], #8
1922 | subs RB, RB, #8
1923 | str CARG1, [BASE, RC, lsl #3]
1924 | add RC, RC, #1
1925 | bne <1
1926 |2:
1927 | decode_RA RA, INS
1928 | decode_RB RB, INS
1929 | add RA, RA, RB
1930 |3:
1931 | cmp RA, RC
1932 | bhi >9 // More results wanted?
1933 |
1934 | ldrh RAw, TRACE:CARG3->traceno
1935 | ldrh RCw, TRACE:CARG3->link
1936 | cmp RCw, RAw
1937 | beq ->cont_nop // Blacklisted.
1938 | cmp RCw, #0
1939 | bne =>BC_JLOOP // Jump to stitched trace.
1940 |
1941 | // Stitch a new trace to the previous trace.
1942 | mov CARG1, #GL_J(exitno)
1943 | str RAw, [GL, CARG1]
1944 | mov CARG1, #GL_J(L)
1945 | str L, [GL, CARG1]
1946 | str BASE, L->base
1947 | add CARG1, GL, #GG_G2J
1948 | mov CARG2, PC
1949 | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
1950 | ldr BASE, L->base
1951 | b ->cont_nop
1952 |
1953 |9: // Fill up results with nil.
1954 | str TISNIL, [BASE, RC, lsl #3]
1955 | add RC, RC, #1
1956 | b <3
1957 |.endif
1958 |
1959 |->vm_profhook: // Dispatch target for profiler hook.
1960#if LJ_HASPROFILE
1961 | mov CARG1, L
1962 | str BASE, L->base
1963 | mov CARG2, PC
1964 | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
1965 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
1966 | ldr BASE, L->base
1967 | sub PC, PC, #4
1968 | b ->cont_nop
1969#endif
1970 |
1971 |//-----------------------------------------------------------------------
1972 |//-- Trace exit handler -------------------------------------------------
1973 |//-----------------------------------------------------------------------
1974 |
1975 |.macro savex_, a, b
1976 | stp d..a, d..b, [sp, #a*8]
1977 | stp x..a, x..b, [sp, #32*8+a*8]
1978 |.endmacro
1979 |
1980 |->vm_exit_handler:
1981 |.if JIT
1982 | sub sp, sp, #(64*8)
1983 | savex_, 0, 1
1984 | savex_, 2, 3
1985 | savex_, 4, 5
1986 | savex_, 6, 7
1987 | savex_, 8, 9
1988 | savex_, 10, 11
1989 | savex_, 12, 13
1990 | savex_, 14, 15
1991 | savex_, 16, 17
1992 | savex_, 18, 19
1993 | savex_, 20, 21
1994 | savex_, 22, 23
1995 | savex_, 24, 25
1996 | savex_, 26, 27
1997 | savex_, 28, 29
1998 | stp d30, d31, [sp, #30*8]
1999 | ldr CARG1, [sp, #64*8] // Load original value of lr.
2000 | add CARG3, sp, #64*8 // Recompute original value of sp.
2001 | mv_vmstate CARG4w, EXIT
2002 | stp xzr, CARG3, [sp, #62*8] // Store 0/sp in RID_LR/RID_SP.
2003 | sub CARG1, CARG1, lr
2004 | ldr L, GL->cur_L
2005 | lsr CARG1, CARG1, #2
2006 | ldr BASE, GL->jit_base
2007 | sub CARG1, CARG1, #2
2008 | ldr CARG2w, [lr] // Load trace number.
2009 | st_vmstate CARG4w
2010 |.if ENDIAN_BE
2011 | rev32 CARG2, CARG2
2012 |.endif
2013 | str BASE, L->base
2014 | ubfx CARG2w, CARG2w, #5, #16
2015 | str CARG1w, [GL, #GL_J(exitno)]
2016 | str CARG2w, [GL, #GL_J(parent)]
2017 | str L, [GL, #GL_J(L)]
2018 | str xzr, GL->jit_base
2019 | add CARG1, GL, #GG_G2J
2020 | mov CARG2, sp
2021 | bl extern lj_trace_exit // (jit_State *J, ExitState *ex)
2022 | // Returns MULTRES (unscaled) or negated error code.
2023 | ldr CARG2, L->cframe
2024 | ldr BASE, L->base
2025 | and sp, CARG2, #CFRAME_RAWMASK
2026 | ldr PC, SAVE_PC // Get SAVE_PC.
2027 | str L, SAVE_L // Set SAVE_L (on-trace resume/yield).
2028 | b >1
2029 |.endif
2030 |
2031 |->vm_exit_interp:
2032 | // CARG1 = MULTRES or negated error code, BASE, PC and GL set.
2033 |.if JIT
2034 | ldr L, SAVE_L
2035 |1:
2036 | init_constants
2037 | cmn CARG1w, #LUA_ERRERR
2038 | bhs >9 // Check for error from exit.
2039 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2040 | lsl RC, CARG1, #3
2041 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2042 | str RCw, SAVE_MULTRES
2043 | str BASE, L->base
2044 | ldr CARG2, LFUNC:CARG2->pc
2045 | str xzr, GL->jit_base
2046 | mv_vmstate CARG4w, INTERP
2047 | ldr KBASE, [CARG2, #PC2PROTO(k)]
2048 | // Modified copy of ins_next which handles function header dispatch, too.
2049 | ldrb RBw, [PC, # OFS_OP]
2050 | ldr INSw, [PC], #4
2051 | st_vmstate CARG4w
2052 | cmn CARG1w, #17 // Static dispatch?
2053 | beq >5
2054 | cmp RBw, #BC_FUNCC+2 // Fast function?
2055 | add TMP1, GL, INS, uxtb #3
2056 | bhs >4
2057 |2:
2058 | cmp RBw, #BC_FUNCF // Function header?
2059 | add TMP0, GL, RB, uxtb #3
2060 | ldr RB, [TMP0, #GG_G2DISP]
2061 | decode_RA RA, INS
2062 | lsr TMP0, INS, #16
2063 | csel RC, TMP0, RC, lo
2064 | blo >3
2065 | ldr CARG3, [BASE, FRAME_FUNC]
2066 | sub RC, RC, #8
2067 | add RA, BASE, RA, lsl #3 // Yes: RA = BASE+framesize*8, RC = nargs*8
2068 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
2069 |3:
2070 | br_auth RB
2071 |
2072 |4: // Check frame below fast function.
2073 | ldr CARG1, [BASE, FRAME_PC]
2074 | ands CARG2, CARG1, #FRAME_TYPE
2075 | bne <2 // Trace stitching continuation?
2076 | // Otherwise set KBASE for Lua function below fast function.
2077 | ldr CARG3w, [CARG1, #-4]
2078 | decode_RA CARG1, CARG3
2079 | sub CARG2, BASE, CARG1, lsl #3
2080 | ldr LFUNC:CARG3, [CARG2, #-32]
2081 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
2082 | ldr CARG3, LFUNC:CARG3->pc
2083 | ldr KBASE, [CARG3, #PC2PROTO(k)]
2084 | b <2
2085 |
2086 |5: // Dispatch to static entry of original ins replaced by BC_JLOOP.
2087 | ldr RA, [GL, #GL_J(trace)]
2088 | decode_RD RC, INS
2089 | ldr TRACE:RA, [RA, RC, lsl #3]
2090 | ldr INSw, TRACE:RA->startins
2091 | add TMP0, GL, INS, uxtb #3
2092 | decode_RA RA, INS
2093 | ldr RB, [TMP0, #GG_G2DISP+GG_DISP2STATIC]
2094 | decode_RD RC, INS
2095 | br_auth RB
2096 |
2097 |9: // Rethrow error from the right C frame.
2098 | neg CARG2w, CARG1w
2099 | mov CARG1, L
2100 | bl extern lj_err_trace // (lua_State *L, int errcode)
2101 |.endif
2102 |
2103 |//-----------------------------------------------------------------------
2104 |//-- Math helper functions ----------------------------------------------
2105 |//-----------------------------------------------------------------------
2106 |
2107 | // int lj_vm_modi(int dividend, int divisor);
2108 |->vm_modi:
2109 | eor CARG4w, CARG1w, CARG2w
2110 | cmp CARG4w, #0
2111 | eor CARG3w, CARG1w, CARG1w, asr #31
2112 | eor CARG4w, CARG2w, CARG2w, asr #31
2113 | sub CARG3w, CARG3w, CARG1w, asr #31
2114 | sub CARG4w, CARG4w, CARG2w, asr #31
2115 | udiv CARG1w, CARG3w, CARG4w
2116 | msub CARG1w, CARG1w, CARG4w, CARG3w
2117 | ccmp CARG1w, #0, #4, mi
2118 | sub CARG3w, CARG1w, CARG4w
2119 | csel CARG1w, CARG1w, CARG3w, eq
2120 | eor CARG3w, CARG1w, CARG2w
2121 | cmp CARG3w, #0
2122 | cneg CARG1w, CARG1w, mi
2123 | ret
2124 |
2125 |//-----------------------------------------------------------------------
2126 |//-- Miscellaneous functions --------------------------------------------
2127 |//-----------------------------------------------------------------------
2128 |
2129 |.define NEXT_TAB, TAB:CARG1
2130 |.define NEXT_RES, CARG1
2131 |.define NEXT_IDX, CARG2w
2132 |.define NEXT_LIM, CARG3w
2133 |.define NEXT_TMP0, TMP0
2134 |.define NEXT_TMP0w, TMP0w
2135 |.define NEXT_TMP1, TMP1
2136 |.define NEXT_TMP1w, TMP1w
2137 |.define NEXT_RES_PTR, sp
2138 |.define NEXT_RES_VAL, [sp]
2139 |.define NEXT_RES_KEY, [sp, #8]
2140 |
2141 |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
2142 |// Next idx returned in CRET2w.
2143 |->vm_next:
2144 |.if JIT
2145 | ldr NEXT_LIM, NEXT_TAB->asize
2146 | ldr NEXT_TMP1, NEXT_TAB->array
2147 |1: // Traverse array part.
2148 | subs NEXT_TMP0w, NEXT_IDX, NEXT_LIM
2149 | bhs >5 // Index points after array part?
2150 | ldr NEXT_TMP0, [NEXT_TMP1, NEXT_IDX, uxtw #3]
2151 | cmn NEXT_TMP0, #-LJ_TNIL
2152 | cinc NEXT_IDX, NEXT_IDX, eq
2153 | beq <1 // Skip holes in array part.
2154 | str NEXT_TMP0, NEXT_RES_VAL
2155 | movz NEXT_TMP0w, #(LJ_TISNUM>>1)&0xffff, lsl #16
2156 | stp NEXT_IDX, NEXT_TMP0w, NEXT_RES_KEY
2157 | add NEXT_IDX, NEXT_IDX, #1
2158 | mov NEXT_RES, NEXT_RES_PTR
2159 |4:
2160 | ret
2161 |
2162 |5: // Traverse hash part.
2163 | ldr NEXT_TMP1w, NEXT_TAB->hmask
2164 | ldr NODE:NEXT_RES, NEXT_TAB->node
2165 | add NEXT_TMP0w, NEXT_TMP0w, NEXT_TMP0w, lsl #1
2166 | add NEXT_LIM, NEXT_LIM, NEXT_TMP1w
2167 | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP0w, uxtw #3
2168 |6:
2169 | cmp NEXT_IDX, NEXT_LIM
2170 | bhi >9
2171 | ldr NEXT_TMP0, NODE:NEXT_RES->val
2172 | cmn NEXT_TMP0, #-LJ_TNIL
2173 | add NEXT_IDX, NEXT_IDX, #1
2174 | bne <4
2175 | // Skip holes in hash part.
2176 | add NODE:NEXT_RES, NODE:NEXT_RES, #sizeof(Node)
2177 | b <6
2178 |
2179 |9: // End of iteration. Set the key to nil (not the value).
2180 | movn NEXT_TMP0, #0
2181 | str NEXT_TMP0, NEXT_RES_KEY
2182 | mov NEXT_RES, NEXT_RES_PTR
2183 | ret
2184 |.endif
2185 |
2186 |//-----------------------------------------------------------------------
2187 |//-- FFI helper functions -----------------------------------------------
2188 |//-----------------------------------------------------------------------
2189 |
2190 |// Handler for callback functions.
2191 |// Saveregs already performed. Callback slot number in w9, g in x10.
2192 |->vm_ffi_callback:
2193 |.if FFI
2194 |.type CTSTATE, CTState, PC
2195 | saveregs
2196 | ldr CTSTATE, GL:x10->ctype_state
2197 | mov GL, x10
2198 | add x10, sp, # CFRAME_SPACE
2199 | str w9, CTSTATE->cb.slot
2200 | stp x0, x1, CTSTATE->cb.gpr[0]
2201 | stp d0, d1, CTSTATE->cb.fpr[0]
2202 | stp x2, x3, CTSTATE->cb.gpr[2]
2203 | stp d2, d3, CTSTATE->cb.fpr[2]
2204 | stp x4, x5, CTSTATE->cb.gpr[4]
2205 | stp d4, d5, CTSTATE->cb.fpr[4]
2206 | stp x6, x7, CTSTATE->cb.gpr[6]
2207 | stp d6, d7, CTSTATE->cb.fpr[6]
2208 | str x10, CTSTATE->cb.stack
2209 | mov CARG1, CTSTATE
2210 | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok.
2211 | mov CARG2, sp
2212 | bl extern lj_ccallback_enter // (CTState *cts, void *cf)
2213 | // Returns lua_State *.
2214 | ldp BASE, RC, L:CRET1->base
2215 | init_constants
2216 | mov L, CRET1
2217 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
2218 | sub RC, RC, BASE
2219 | st_vmstate ST_INTERP
2220 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
2221 | ins_callt
2222 |.endif
2223 |
2224 |->cont_ffi_callback: // Return from FFI callback.
2225 |.if FFI
2226 | ldr CTSTATE, GL->ctype_state
2227 | stp BASE, CARG4, L->base
2228 | str L, CTSTATE->L
2229 | mov CARG1, CTSTATE
2230 | mov CARG2, RA
2231 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
2232 | ldp x0, x1, CTSTATE->cb.gpr[0]
2233 | ldp d0, d1, CTSTATE->cb.fpr[0]
2234 | b ->vm_leave_unw
2235 |.endif
2236 |
2237 |->vm_ffi_call: // Call C function via FFI.
2238 | // Caveat: needs special frame unwinding, see below.
2239 |.if FFI
2240 | .type CCSTATE, CCallState, x19
2241 | sp_auth
2242 | stp_unwind CCSTATE, x20, [sp, #-32]!
2243 | stp fp, lr, [sp, #16]
2244 | add fp, sp, #16
2245 | mov CCSTATE, x0
2246 | ldr TMP0w, CCSTATE:x0->spadj
2247 | ldrb TMP1w, CCSTATE->nsp
2248 | add TMP2, CCSTATE, #offsetof(CCallState, stack)
2249 | subs TMP1, TMP1, #8
2250 | ldr TMP3, CCSTATE->func
2251 | sub sp, sp, TMP0
2252 | bmi >2
2253 |1: // Copy stack slots
2254 | ldr TMP0, [TMP2, TMP1]
2255 | str TMP0, [sp, TMP1]
2256 | subs TMP1, TMP1, #8
2257 | bpl <1
2258 |2:
2259 | ldp x0, x1, CCSTATE->gpr[0]
2260 | ldp d0, d1, CCSTATE->fpr[0]
2261 | ldp x2, x3, CCSTATE->gpr[2]
2262 | ldp d2, d3, CCSTATE->fpr[2]
2263 | ldp x4, x5, CCSTATE->gpr[4]
2264 | ldp d4, d5, CCSTATE->fpr[4]
2265 | ldp x6, x7, CCSTATE->gpr[6]
2266 | ldp d6, d7, CCSTATE->fpr[6]
2267 | ldr x8, CCSTATE->retp
2268 | blr_auth TMP3
2269 | sub sp, fp, #16
2270 | stp x0, x1, CCSTATE->gpr[0]
2271 | stp d0, d1, CCSTATE->fpr[0]
2272 | stp d2, d3, CCSTATE->fpr[2]
2273 | ldp fp, lr, [sp, #16]
2274 | ldp_unwind CCSTATE, x20, [sp], #32
2275 | ret_auth
2276 |.endif
2277 |// Note: vm_ffi_call must be the last function in this object file!
2278 |
2279 |//-----------------------------------------------------------------------
2280}
2281
2282/* Generate the code for a single instruction. */
2283static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2284{
2285 int vk = 0;
2286 |=>defop:
2287
2288 switch (op) {
2289
2290 /* -- Comparison ops ---------------------------------------------------- */
2291
2292 /* Remember: all ops branch for a true comparison, fall through otherwise. */
2293
2294 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2295 | // RA = src1, RC = src2, JMP with RC = target
2296 | ldr CARG1, [BASE, RA, lsl #3]
2297 | ldrh RBw, [PC, # OFS_RD]
2298 | ldr CARG2, [BASE, RC, lsl #3]
2299 | add PC, PC, #4
2300 | add RB, PC, RB, lsl #2
2301 | sub RB, RB, #0x20000
2302 | checkint CARG1, >3
2303 | checkint CARG2, >4
2304 | cmp CARG1w, CARG2w
2305 if (op == BC_ISLT) {
2306 | csel PC, RB, PC, lt
2307 } else if (op == BC_ISGE) {
2308 | csel PC, RB, PC, ge
2309 } else if (op == BC_ISLE) {
2310 | csel PC, RB, PC, le
2311 } else {
2312 | csel PC, RB, PC, gt
2313 }
2314 |1:
2315 | ins_next
2316 |
2317 |3: // RA not int.
2318 | ldr FARG1, [BASE, RA, lsl #3]
2319 | blo ->vmeta_comp
2320 | ldr FARG2, [BASE, RC, lsl #3]
2321 | cmp TISNUMhi, CARG2, lsr #32
2322 | bhi >5
2323 | bne ->vmeta_comp
2324 | // RA number, RC int.
2325 | scvtf FARG2, CARG2w
2326 | b >5
2327 |
2328 |4: // RA int, RC not int
2329 | ldr FARG2, [BASE, RC, lsl #3]
2330 | blo ->vmeta_comp
2331 | // RA int, RC number.
2332 | scvtf FARG1, CARG1w
2333 |
2334 |5: // RA number, RC number
2335 | fcmp FARG1, FARG2
2336 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
2337 if (op == BC_ISLT) {
2338 | csel PC, RB, PC, lo
2339 } else if (op == BC_ISGE) {
2340 | csel PC, RB, PC, hs
2341 } else if (op == BC_ISLE) {
2342 | csel PC, RB, PC, ls
2343 } else {
2344 | csel PC, RB, PC, hi
2345 }
2346 | b <1
2347 break;
2348
2349 case BC_ISEQV: case BC_ISNEV:
2350 vk = op == BC_ISEQV;
2351 | // RA = src1, RC = src2, JMP with RC = target
2352 | ldr CARG1, [BASE, RA, lsl #3]
2353 | add RC, BASE, RC, lsl #3
2354 | ldrh RBw, [PC, # OFS_RD]
2355 | ldr CARG3, [RC]
2356 | add PC, PC, #4
2357 | add RB, PC, RB, lsl #2
2358 | sub RB, RB, #0x20000
2359 | asr ITYPE, CARG3, #47
2360 | cmn ITYPE, #-LJ_TISNUM
2361 if (vk) {
2362 | bls ->BC_ISEQN_Z
2363 } else {
2364 | bls ->BC_ISNEN_Z
2365 }
2366 | // RC is not a number.
2367 | asr TMP0, CARG1, #47
2368 |.if FFI
2369 | // Check if RC or RA is a cdata.
2370 | cmn ITYPE, #-LJ_TCDATA
2371 | ccmn TMP0, #-LJ_TCDATA, #4, ne
2372 | beq ->vmeta_equal_cd
2373 |.endif
2374 | cmp CARG1, CARG3
2375 | bne >2
2376 | // Tag and value are equal.
2377 if (vk) {
2378 |->BC_ISEQV_Z:
2379 | mov PC, RB // Perform branch.
2380 }
2381 |1:
2382 | ins_next
2383 |
2384 |2: // Check if the tags are the same and it's a table or userdata.
2385 | cmp ITYPE, TMP0
2386 | ccmn ITYPE, #-LJ_TISTABUD, #2, eq
2387 if (vk) {
2388 | bhi <1
2389 } else {
2390 | bhi ->BC_ISEQV_Z // Reuse code from opposite instruction.
2391 }
2392 | // Different tables or userdatas. Need to check __eq metamethod.
2393 | // Field metatable must be at same offset for GCtab and GCudata!
2394 | and TAB:CARG2, CARG1, #LJ_GCVMASK
2395 | ldr TAB:TMP2, TAB:CARG2->metatable
2396 if (vk) {
2397 | cbz TAB:TMP2, <1 // No metatable?
2398 | ldrb TMP1w, TAB:TMP2->nomm
2399 | mov CARG4, #0 // ne = 0
2400 | tbnz TMP1w, #MM_eq, <1 // 'no __eq' flag set: done.
2401 } else {
2402 | cbz TAB:TMP2, ->BC_ISEQV_Z // No metatable?
2403 | ldrb TMP1w, TAB:TMP2->nomm
2404 | mov CARG4, #1 // ne = 1.
2405 | tbnz TMP1w, #MM_eq, ->BC_ISEQV_Z // 'no __eq' flag set: done.
2406 }
2407 | b ->vmeta_equal
2408 break;
2409
2410 case BC_ISEQS: case BC_ISNES:
2411 vk = op == BC_ISEQS;
2412 | // RA = src, RC = str_const (~), JMP with RC = target
2413 | ldr CARG1, [BASE, RA, lsl #3]
2414 | mvn RC, RC
2415 | ldrh RBw, [PC, # OFS_RD]
2416 | ldr CARG2, [KBASE, RC, lsl #3]
2417 | add PC, PC, #4
2418 | movn TMP0, #~LJ_TSTR
2419 |.if FFI
2420 | asr ITYPE, CARG1, #47
2421 |.endif
2422 | add RB, PC, RB, lsl #2
2423 | add CARG2, CARG2, TMP0, lsl #47
2424 | sub RB, RB, #0x20000
2425 |.if FFI
2426 | cmn ITYPE, #-LJ_TCDATA
2427 | beq ->vmeta_equal_cd
2428 |.endif
2429 | cmp CARG1, CARG2
2430 if (vk) {
2431 | csel PC, RB, PC, eq
2432 } else {
2433 | csel PC, RB, PC, ne
2434 }
2435 | ins_next
2436 break;
2437
2438 case BC_ISEQN: case BC_ISNEN:
2439 vk = op == BC_ISEQN;
2440 | // RA = src, RC = num_const (~), JMP with RC = target
2441 | ldr CARG1, [BASE, RA, lsl #3]
2442 | add RC, KBASE, RC, lsl #3
2443 | ldrh RBw, [PC, # OFS_RD]
2444 | ldr CARG3, [RC]
2445 | add PC, PC, #4
2446 | add RB, PC, RB, lsl #2
2447 | sub RB, RB, #0x20000
2448 if (vk) {
2449 |->BC_ISEQN_Z:
2450 } else {
2451 |->BC_ISNEN_Z:
2452 }
2453 | checkint CARG1, >4
2454 | checkint CARG3, >6
2455 | cmp CARG1w, CARG3w
2456 |1:
2457 if (vk) {
2458 | csel PC, RB, PC, eq
2459 |2:
2460 } else {
2461 |2:
2462 | csel PC, RB, PC, ne
2463 }
2464 |3:
2465 | ins_next
2466 |
2467 |4: // RA not int.
2468 |.if FFI
2469 | blo >7
2470 |.else
2471 | blo <2
2472 |.endif
2473 | ldr FARG1, [BASE, RA, lsl #3]
2474 | ldr FARG2, [RC]
2475 | cmp TISNUMhi, CARG3, lsr #32
2476 | bne >5
2477 | // RA number, RC int.
2478 | scvtf FARG2, CARG3w
2479 |5:
2480 | // RA number, RC number.
2481 | fcmp FARG1, FARG2
2482 | b <1
2483 |
2484 |6: // RA int, RC number
2485 | ldr FARG2, [RC]
2486 | scvtf FARG1, CARG1w
2487 | fcmp FARG1, FARG2
2488 | b <1
2489 |
2490 |.if FFI
2491 |7:
2492 | asr ITYPE, CARG1, #47
2493 | cmn ITYPE, #-LJ_TCDATA
2494 | bne <2
2495 | b ->vmeta_equal_cd
2496 |.endif
2497 break;
2498
2499 case BC_ISEQP: case BC_ISNEP:
2500 vk = op == BC_ISEQP;
2501 | // RA = src, RC = primitive_type (~), JMP with RC = target
2502 | ldr TMP0, [BASE, RA, lsl #3]
2503 | ldrh RBw, [PC, # OFS_RD]
2504 | add PC, PC, #4
2505 | add RC, RC, #1
2506 | add RB, PC, RB, lsl #2
2507 |.if FFI
2508 | asr ITYPE, TMP0, #47
2509 | cmn ITYPE, #-LJ_TCDATA
2510 | beq ->vmeta_equal_cd
2511 | cmn RC, ITYPE
2512 |.else
2513 | cmn RC, TMP0, asr #47
2514 |.endif
2515 | sub RB, RB, #0x20000
2516 if (vk) {
2517 | csel PC, RB, PC, eq
2518 } else {
2519 | csel PC, RB, PC, ne
2520 }
2521 | ins_next
2522 break;
2523
2524 /* -- Unary test and copy ops ------------------------------------------- */
2525
2526 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
2527 | // RA = dst or unused, RC = src, JMP with RC = target
2528 | ldrh RBw, [PC, # OFS_RD]
2529 | ldr TMP0, [BASE, RC, lsl #3]
2530 | add PC, PC, #4
2531 | mov_false TMP1
2532 | add RB, PC, RB, lsl #2
2533 | cmp TMP0, TMP1
2534 | sub RB, RB, #0x20000
2535 if (op == BC_ISTC || op == BC_IST) {
2536 if (op == BC_ISTC) {
2537 | csel RA, RA, RC, lo
2538 }
2539 | csel PC, RB, PC, lo
2540 } else {
2541 if (op == BC_ISFC) {
2542 | csel RA, RA, RC, hs
2543 }
2544 | csel PC, RB, PC, hs
2545 }
2546 if (op == BC_ISTC || op == BC_ISFC) {
2547 | str TMP0, [BASE, RA, lsl #3]
2548 }
2549 | ins_next
2550 break;
2551
2552 case BC_ISTYPE:
2553 | // RA = src, RC = -type
2554 | ldr TMP0, [BASE, RA, lsl #3]
2555 | cmn RC, TMP0, asr #47
2556 | bne ->vmeta_istype
2557 | ins_next
2558 break;
2559 case BC_ISNUM:
2560 | // RA = src, RC = -(TISNUM-1)
2561 | ldr TMP0, [BASE, RA]
2562 | checknum TMP0, ->vmeta_istype
2563 | ins_next
2564 break;
2565
2566 /* -- Unary ops --------------------------------------------------------- */
2567
2568 case BC_MOV:
2569 | // RA = dst, RC = src
2570 | ldr TMP0, [BASE, RC, lsl #3]
2571 | str TMP0, [BASE, RA, lsl #3]
2572 | ins_next
2573 break;
2574 case BC_NOT:
2575 | // RA = dst, RC = src
2576 | ldr TMP0, [BASE, RC, lsl #3]
2577 | mov_false TMP1
2578 | mov_true TMP2
2579 | cmp TMP0, TMP1
2580 | csel TMP0, TMP1, TMP2, lo
2581 | str TMP0, [BASE, RA, lsl #3]
2582 | ins_next
2583 break;
2584 case BC_UNM:
2585 | // RA = dst, RC = src
2586 | ldr TMP0, [BASE, RC, lsl #3]
2587 | asr ITYPE, TMP0, #47
2588 | cmn ITYPE, #-LJ_TISNUM
2589 | bhi ->vmeta_unm
2590 | eor TMP0, TMP0, #U64x(80000000,00000000)
2591 | bne >5
2592 | negs TMP0w, TMP0w
2593 | movz CARG3, #0x41e0, lsl #48 // 2^31.
2594 | add_TISNUM TMP0, TMP0
2595 | csel TMP0, TMP0, CARG3, vc
2596 |5:
2597 | str TMP0, [BASE, RA, lsl #3]
2598 | ins_next
2599 break;
2600 case BC_LEN:
2601 | // RA = dst, RC = src
2602 | ldr CARG1, [BASE, RC, lsl #3]
2603 | asr ITYPE, CARG1, #47
2604 | cmn ITYPE, #-LJ_TSTR
2605 | and CARG1, CARG1, #LJ_GCVMASK
2606 | bne >2
2607 | ldr CARG1w, STR:CARG1->len
2608 |1:
2609 | add_TISNUM CARG1, CARG1
2610 | str CARG1, [BASE, RA, lsl #3]
2611 | ins_next
2612 |
2613 |2:
2614 | cmn ITYPE, #-LJ_TTAB
2615 | bne ->vmeta_len
2616#if LJ_52
2617 | ldr TAB:CARG2, TAB:CARG1->metatable
2618 | cbnz TAB:CARG2, >9
2619 |3:
2620#endif
2621 |->BC_LEN_Z:
2622 | bl extern lj_tab_len // (GCtab *t)
2623 | // Returns uint32_t (but less than 2^31).
2624 | b <1
2625 |
2626#if LJ_52
2627 |9:
2628 | ldrb TMP1w, TAB:CARG2->nomm
2629 | tbnz TMP1w, #MM_len, <3 // 'no __len' flag set: done.
2630 | b ->vmeta_len
2631#endif
2632 break;
2633
2634 /* -- Binary ops -------------------------------------------------------- */
2635
2636 |.macro ins_arithcheck_int, target
2637 | checkint CARG1, target
2638 | checkint CARG2, target
2639 |.endmacro
2640 |
2641 |.macro ins_arithcheck_num, target
2642 | checknum CARG1, target
2643 | checknum CARG2, target
2644 |.endmacro
2645 |
2646 |.macro ins_arithcheck_nzdiv, target
2647 | cbz CARG2w, target
2648 |.endmacro
2649 |
2650 |.macro ins_arithhead
2651 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2652 ||if (vk == 1) {
2653 | and RC, RC, #255
2654 | decode_RB RB, INS
2655 ||} else {
2656 | decode_RB RB, INS
2657 | and RC, RC, #255
2658 ||}
2659 |.endmacro
2660 |
2661 |.macro ins_arithload, reg1, reg2
2662 | // RA = dst, RB = src1, RC = src2 | num_const
2663 ||switch (vk) {
2664 ||case 0:
2665 | ldr reg1, [BASE, RB, lsl #3]
2666 | ldr reg2, [KBASE, RC, lsl #3]
2667 || break;
2668 ||case 1:
2669 | ldr reg1, [KBASE, RC, lsl #3]
2670 | ldr reg2, [BASE, RB, lsl #3]
2671 || break;
2672 ||default:
2673 | ldr reg1, [BASE, RB, lsl #3]
2674 | ldr reg2, [BASE, RC, lsl #3]
2675 || break;
2676 ||}
2677 |.endmacro
2678 |
2679 |.macro ins_arithfallback, ins
2680 ||switch (vk) {
2681 ||case 0:
2682 | ins ->vmeta_arith_vn
2683 || break;
2684 ||case 1:
2685 | ins ->vmeta_arith_nv
2686 || break;
2687 ||default:
2688 | ins ->vmeta_arith_vv
2689 || break;
2690 ||}
2691 |.endmacro
2692 |
2693 |.macro ins_arithmod, res, reg1, reg2
2694 | fdiv d2, reg1, reg2
2695 | frintm d2, d2
2696 | // Cannot use fmsub, because FMA is not enabled by default.
2697 | fmul d2, d2, reg2
2698 | fsub res, reg1, d2
2699 |.endmacro
2700 |
2701 |.macro ins_arithdn, intins, fpins
2702 | ins_arithhead
2703 | ins_arithload CARG1, CARG2
2704 | ins_arithcheck_int >5
2705 |.if "intins" == "smull"
2706 | smull CARG1, CARG1w, CARG2w
2707 | cmp CARG1, CARG1, sxtw
2708 | mov CARG1w, CARG1w
2709 | ins_arithfallback bne
2710 |.elif "intins" == "ins_arithmodi"
2711 | ins_arithfallback ins_arithcheck_nzdiv
2712 | bl ->vm_modi
2713 |.else
2714 | intins CARG1w, CARG1w, CARG2w
2715 | ins_arithfallback bvs
2716 |.endif
2717 | add_TISNUM CARG1, CARG1
2718 | str CARG1, [BASE, RA, lsl #3]
2719 |4:
2720 | ins_next
2721 |
2722 |5: // FP variant.
2723 | ins_arithload FARG1, FARG2
2724 | ins_arithfallback ins_arithcheck_num
2725 | fpins FARG1, FARG1, FARG2
2726 | str FARG1, [BASE, RA, lsl #3]
2727 | b <4
2728 |.endmacro
2729 |
2730 |.macro ins_arithfp, fpins
2731 | ins_arithhead
2732 | ins_arithload CARG1, CARG2
2733 | ins_arithload FARG1, FARG2
2734 | ins_arithfallback ins_arithcheck_num
2735 |.if "fpins" == "fpow"
2736 | bl extern pow
2737 |.else
2738 | fpins FARG1, FARG1, FARG2
2739 |.endif
2740 | str FARG1, [BASE, RA, lsl #3]
2741 | ins_next
2742 |.endmacro
2743
2744 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2745 | ins_arithdn adds, fadd
2746 break;
2747 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2748 | ins_arithdn subs, fsub
2749 break;
2750 case BC_MULVN: case BC_MULNV: case BC_MULVV:
2751 | ins_arithdn smull, fmul
2752 break;
2753 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
2754 | ins_arithfp fdiv
2755 break;
2756 case BC_MODVN: case BC_MODNV: case BC_MODVV:
2757 | ins_arithdn ins_arithmodi, ins_arithmod
2758 break;
2759 case BC_POW:
2760 | // NYI: (partial) integer arithmetic.
2761 | ins_arithfp fpow
2762 break;
2763
2764 case BC_CAT:
2765 | decode_RB RB, INS
2766 | and RC, RC, #255
2767 | // RA = dst, RB = src_start, RC = src_end
2768 | str BASE, L->base
2769 | sub CARG3, RC, RB
2770 | add CARG2, BASE, RC, lsl #3
2771 |->BC_CAT_Z:
2772 | // RA = dst, CARG2 = top-1, CARG3 = left
2773 | mov CARG1, L
2774 | str PC, SAVE_PC
2775 | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
2776 | // Returns NULL (finished) or TValue * (metamethod).
2777 | ldrb RBw, [PC, #-4+OFS_RB]
2778 | ldr BASE, L->base
2779 | cbnz CRET1, ->vmeta_binop
2780 | ldr TMP0, [BASE, RB, lsl #3]
2781 | str TMP0, [BASE, RA, lsl #3] // Copy result to RA.
2782 | ins_next
2783 break;
2784
2785 /* -- Constant ops ------------------------------------------------------ */
2786
2787 case BC_KSTR:
2788 | // RA = dst, RC = str_const (~)
2789 | mvn RC, RC
2790 | ldr TMP0, [KBASE, RC, lsl #3]
2791 | movn TMP1, #~LJ_TSTR
2792 | add TMP0, TMP0, TMP1, lsl #47
2793 | str TMP0, [BASE, RA, lsl #3]
2794 | ins_next
2795 break;
2796 case BC_KCDATA:
2797 |.if FFI
2798 | // RA = dst, RC = cdata_const (~)
2799 | mvn RC, RC
2800 | ldr TMP0, [KBASE, RC, lsl #3]
2801 | movn TMP1, #~LJ_TCDATA
2802 | add TMP0, TMP0, TMP1, lsl #47
2803 | str TMP0, [BASE, RA, lsl #3]
2804 | ins_next
2805 |.endif
2806 break;
2807 case BC_KSHORT:
2808 | // RA = dst, RC = int16_literal
2809 | sxth RCw, RCw
2810 | add_TISNUM TMP0, RC
2811 | str TMP0, [BASE, RA, lsl #3]
2812 | ins_next
2813 break;
2814 case BC_KNUM:
2815 | // RA = dst, RC = num_const
2816 | ldr TMP0, [KBASE, RC, lsl #3]
2817 | str TMP0, [BASE, RA, lsl #3]
2818 | ins_next
2819 break;
2820 case BC_KPRI:
2821 | // RA = dst, RC = primitive_type (~)
2822 | mvn TMP0, RC, lsl #47
2823 | str TMP0, [BASE, RA, lsl #3]
2824 | ins_next
2825 break;
2826 case BC_KNIL:
2827 | // RA = base, RC = end
2828 | add RA, BASE, RA, lsl #3
2829 | add RC, BASE, RC, lsl #3
2830 | str TISNIL, [RA], #8
2831 |1:
2832 | cmp RA, RC
2833 | str TISNIL, [RA], #8
2834 | blt <1
2835 | ins_next_
2836 break;
2837
2838 /* -- Upvalue and function ops ------------------------------------------ */
2839
2840 case BC_UGET:
2841 | // RA = dst, RC = uvnum
2842 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2843 | add RC, RC, #offsetof(GCfuncL, uvptr)/8
2844 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2845 | ldr UPVAL:CARG2, [LFUNC:CARG2, RC, lsl #3]
2846 | ldr CARG2, UPVAL:CARG2->v
2847 | ldr TMP0, [CARG2]
2848 | str TMP0, [BASE, RA, lsl #3]
2849 | ins_next
2850 break;
2851 case BC_USETV:
2852 | // RA = uvnum, RC = src
2853 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2854 | add RA, RA, #offsetof(GCfuncL, uvptr)/8
2855 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2856 | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3]
2857 | ldr CARG3, [BASE, RC, lsl #3]
2858 | ldr CARG2, UPVAL:CARG1->v
2859 | ldrb TMP2w, UPVAL:CARG1->marked
2860 | ldrb TMP0w, UPVAL:CARG1->closed
2861 | asr ITYPE, CARG3, #47
2862 | str CARG3, [CARG2]
2863 | add ITYPE, ITYPE, #-LJ_TISGCV
2864 | tst TMP2w, #LJ_GC_BLACK // isblack(uv)
2865 | ccmp TMP0w, #0, #4, ne // && uv->closed
2866 | ccmn ITYPE, #-(LJ_TNUMX - LJ_TISGCV), #0, ne // && tvisgcv(v)
2867 | bhi >2
2868 |1:
2869 | ins_next
2870 |
2871 |2: // Check if new value is white.
2872 | and GCOBJ:CARG3, CARG3, #LJ_GCVMASK
2873 | ldrb TMP1w, GCOBJ:CARG3->gch.marked
2874 | tst TMP1w, #LJ_GC_WHITES // iswhite(str)
2875 | beq <1
2876 | // Crossed a write barrier. Move the barrier forward.
2877 | mov CARG1, GL
2878 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
2879 | b <1
2880 break;
2881 case BC_USETS:
2882 | // RA = uvnum, RC = str_const (~)
2883 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2884 | add RA, RA, #offsetof(GCfuncL, uvptr)/8
2885 | mvn RC, RC
2886 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2887 | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3]
2888 | ldr STR:CARG3, [KBASE, RC, lsl #3]
2889 | movn TMP0, #~LJ_TSTR
2890 | ldr CARG2, UPVAL:CARG1->v
2891 | ldrb TMP2w, UPVAL:CARG1->marked
2892 | add TMP0, STR:CARG3, TMP0, lsl #47
2893 | ldrb TMP1w, STR:CARG3->marked
2894 | str TMP0, [CARG2]
2895 | tbnz TMP2w, #2, >2 // isblack(uv)
2896 |1:
2897 | ins_next
2898 |
2899 |2: // Check if string is white and ensure upvalue is closed.
2900 | ldrb TMP0w, UPVAL:CARG1->closed
2901 | tst TMP1w, #LJ_GC_WHITES // iswhite(str)
2902 | ccmp TMP0w, #0, #4, ne
2903 | beq <1
2904 | // Crossed a write barrier. Move the barrier forward.
2905 | mov CARG1, GL
2906 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
2907 | b <1
2908 break;
2909 case BC_USETN:
2910 | // RA = uvnum, RC = num_const
2911 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2912 | add RA, RA, #offsetof(GCfuncL, uvptr)/8
2913 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2914 | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3]
2915 | ldr TMP0, [KBASE, RC, lsl #3]
2916 | ldr CARG2, UPVAL:CARG2->v
2917 | str TMP0, [CARG2]
2918 | ins_next
2919 break;
2920 case BC_USETP:
2921 | // RA = uvnum, RC = primitive_type (~)
2922 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2923 | add RA, RA, #offsetof(GCfuncL, uvptr)/8
2924 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2925 | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3]
2926 | mvn TMP0, RC, lsl #47
2927 | ldr CARG2, UPVAL:CARG2->v
2928 | str TMP0, [CARG2]
2929 | ins_next
2930 break;
2931
2932 case BC_UCLO:
2933 | // RA = level, RC = target
2934 | ldr CARG3, L->openupval
2935 | add RC, PC, RC, lsl #2
2936 | str BASE, L->base
2937 | sub PC, RC, #0x20000
2938 | cbz CARG3, >1
2939 | mov CARG1, L
2940 | add CARG2, BASE, RA, lsl #3
2941 | bl extern lj_func_closeuv // (lua_State *L, TValue *level)
2942 | ldr BASE, L->base
2943 |1:
2944 | ins_next
2945 break;
2946
2947 case BC_FNEW:
2948 | // RA = dst, RC = proto_const (~) (holding function prototype)
2949 | mvn RC, RC
2950 | str BASE, L->base
2951 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
2952 | str PC, SAVE_PC
2953 | ldr CARG2, [KBASE, RC, lsl #3]
2954 | mov CARG1, L
2955 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
2956 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
2957 | bl extern lj_func_newL_gc
2958 | // Returns GCfuncL *.
2959 | ldr BASE, L->base
2960 | movn TMP0, #~LJ_TFUNC
2961 | add CRET1, CRET1, TMP0, lsl #47
2962 | str CRET1, [BASE, RA, lsl #3]
2963 | ins_next
2964 break;
2965
2966 /* -- Table ops --------------------------------------------------------- */
2967
2968 case BC_TNEW:
2969 case BC_TDUP:
2970 | // RA = dst, RC = (hbits|asize) | tab_const (~)
2971 | ldp CARG3, CARG4, GL->gc.total // Assumes threshold follows total.
2972 | str BASE, L->base
2973 | str PC, SAVE_PC
2974 | mov CARG1, L
2975 | cmp CARG3, CARG4
2976 | bhs >5
2977 |1:
2978 if (op == BC_TNEW) {
2979 | and CARG2, RC, #0x7ff
2980 | lsr CARG3, RC, #11
2981 | cmp CARG2, #0x7ff
2982 | mov TMP0, #0x801
2983 | csel CARG2, CARG2, TMP0, ne
2984 | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
2985 | // Returns GCtab *.
2986 } else {
2987 | mvn RC, RC
2988 | ldr CARG2, [KBASE, RC, lsl #3]
2989 | bl extern lj_tab_dup // (lua_State *L, Table *kt)
2990 | // Returns GCtab *.
2991 }
2992 | ldr BASE, L->base
2993 | movk CRET1, #(LJ_TTAB>>1)&0xffff, lsl #48
2994 | str CRET1, [BASE, RA, lsl #3]
2995 | ins_next
2996 |
2997 |5:
2998 | bl extern lj_gc_step_fixtop // (lua_State *L)
2999 | mov CARG1, L
3000 | b <1
3001 break;
3002
3003 case BC_GGET:
3004 | // RA = dst, RC = str_const (~)
3005 case BC_GSET:
3006 | // RA = src, RC = str_const (~)
3007 | ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
3008 | mvn RC, RC
3009 | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
3010 | ldr TAB:CARG2, LFUNC:CARG1->env
3011 | ldr STR:RC, [KBASE, RC, lsl #3]
3012 if (op == BC_GGET) {
3013 | b ->BC_TGETS_Z
3014 } else {
3015 | b ->BC_TSETS_Z
3016 }
3017 break;
3018
3019 case BC_TGETV:
3020 | decode_RB RB, INS
3021 | and RC, RC, #255
3022 | // RA = dst, RB = table, RC = key
3023 | ldr CARG2, [BASE, RB, lsl #3]
3024 | ldr TMP1, [BASE, RC, lsl #3]
3025 | checktab CARG2, ->vmeta_tgetv
3026 | checkint TMP1, >9 // Integer key?
3027 | ldr CARG3, TAB:CARG2->array
3028 | ldr CARG1w, TAB:CARG2->asize
3029 | add CARG3, CARG3, TMP1, uxtw #3
3030 | cmp TMP1w, CARG1w // In array part?
3031 | bhs ->vmeta_tgetv
3032 | ldr TMP0, [CARG3]
3033 | cmp_nil TMP0
3034 | beq >5
3035 |1:
3036 | str TMP0, [BASE, RA, lsl #3]
3037 | ins_next
3038 |
3039 |5: // Check for __index if table value is nil.
3040 | ldr TAB:CARG1, TAB:CARG2->metatable
3041 | cbz TAB:CARG1, <1 // No metatable: done.
3042 | ldrb TMP1w, TAB:CARG1->nomm
3043 | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done.
3044 | b ->vmeta_tgetv
3045 |
3046 |9:
3047 | asr ITYPE, TMP1, #47
3048 | cmn ITYPE, #-LJ_TSTR // String key?
3049 | bne ->vmeta_tgetv
3050 | and STR:RC, TMP1, #LJ_GCVMASK
3051 | b ->BC_TGETS_Z
3052 break;
3053 case BC_TGETS:
3054 | decode_RB RB, INS
3055 | and RC, RC, #255
3056 | // RA = dst, RB = table, RC = str_const (~)
3057 | ldr CARG2, [BASE, RB, lsl #3]
3058 | mvn RC, RC
3059 | ldr STR:RC, [KBASE, RC, lsl #3]
3060 | checktab CARG2, ->vmeta_tgets1
3061 |->BC_TGETS_Z:
3062 | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = dst
3063 | ldr TMP1w, TAB:CARG2->hmask
3064 | ldr TMP2w, STR:RC->sid
3065 | ldr NODE:CARG3, TAB:CARG2->node
3066 | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask
3067 | add TMP1, TMP1, TMP1, lsl #1
3068 | movn CARG4, #~LJ_TSTR
3069 | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
3070 | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for.
3071 |1:
3072 | ldp TMP0, CARG1, NODE:CARG3->val
3073 | ldr NODE:CARG3, NODE:CARG3->next
3074 | cmp CARG1, CARG4
3075 | bne >4
3076 | cmp_nil TMP0
3077 | beq >5
3078 |3:
3079 | str TMP0, [BASE, RA, lsl #3]
3080 | ins_next
3081 |
3082 |4: // Follow hash chain.
3083 | cbnz NODE:CARG3, <1
3084 | // End of hash chain: key not found, nil result.
3085 | mov_nil TMP0
3086 |
3087 |5: // Check for __index if table value is nil.
3088 | ldr TAB:CARG1, TAB:CARG2->metatable
3089 | cbz TAB:CARG1, <3 // No metatable: done.
3090 | ldrb TMP1w, TAB:CARG1->nomm
3091 | tbnz TMP1w, #MM_index, <3 // 'no __index' flag set: done.
3092 | b ->vmeta_tgets
3093 break;
3094 case BC_TGETB:
3095 | decode_RB RB, INS
3096 | and RC, RC, #255
3097 | // RA = dst, RB = table, RC = index
3098 | ldr CARG2, [BASE, RB, lsl #3]
3099 | checktab CARG2, ->vmeta_tgetb
3100 | ldr CARG3, TAB:CARG2->array
3101 | ldr CARG1w, TAB:CARG2->asize
3102 | add CARG3, CARG3, RC, lsl #3
3103 | cmp RCw, CARG1w // In array part?
3104 | bhs ->vmeta_tgetb
3105 | ldr TMP0, [CARG3]
3106 | cmp_nil TMP0
3107 | beq >5
3108 |1:
3109 | str TMP0, [BASE, RA, lsl #3]
3110 | ins_next
3111 |
3112 |5: // Check for __index if table value is nil.
3113 | ldr TAB:CARG1, TAB:CARG2->metatable
3114 | cbz TAB:CARG1, <1 // No metatable: done.
3115 | ldrb TMP1w, TAB:CARG1->nomm
3116 | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done.
3117 | b ->vmeta_tgetb
3118 break;
3119 case BC_TGETR:
3120 | decode_RB RB, INS
3121 | and RC, RC, #255
3122 | // RA = dst, RB = table, RC = key
3123 | ldr CARG1, [BASE, RB, lsl #3]
3124 | ldr TMP1, [BASE, RC, lsl #3]
3125 | and TAB:CARG1, CARG1, #LJ_GCVMASK
3126 | ldr CARG3, TAB:CARG1->array
3127 | ldr TMP2w, TAB:CARG1->asize
3128 | add CARG3, CARG3, TMP1w, uxtw #3
3129 | cmp TMP1w, TMP2w // In array part?
3130 | bhs ->vmeta_tgetr
3131 | ldr TMP0, [CARG3]
3132 |->BC_TGETR_Z:
3133 | str TMP0, [BASE, RA, lsl #3]
3134 | ins_next
3135 break;
3136
3137 case BC_TSETV:
3138 | decode_RB RB, INS
3139 | and RC, RC, #255
3140 | // RA = src, RB = table, RC = key
3141 | ldr CARG2, [BASE, RB, lsl #3]
3142 | ldr TMP1, [BASE, RC, lsl #3]
3143 | checktab CARG2, ->vmeta_tsetv
3144 | checkint TMP1, >9 // Integer key?
3145 | ldr CARG3, TAB:CARG2->array
3146 | ldr CARG1w, TAB:CARG2->asize
3147 | add CARG3, CARG3, TMP1, uxtw #3
3148 | cmp TMP1w, CARG1w // In array part?
3149 | bhs ->vmeta_tsetv
3150 | ldr TMP1, [CARG3]
3151 | ldr TMP0, [BASE, RA, lsl #3]
3152 | ldrb TMP2w, TAB:CARG2->marked
3153 | cmp_nil TMP1 // Previous value is nil?
3154 | beq >5
3155 |1:
3156 | str TMP0, [CARG3]
3157 | tbnz TMP2w, #2, >7 // isblack(table)
3158 |2:
3159 | ins_next
3160 |
3161 |5: // Check for __newindex if previous value is nil.
3162 | ldr TAB:CARG1, TAB:CARG2->metatable
3163 | cbz TAB:CARG1, <1 // No metatable: done.
3164 | ldrb TMP1w, TAB:CARG1->nomm
3165 | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done.
3166 | b ->vmeta_tsetv
3167 |
3168 |7: // Possible table write barrier for the value. Skip valiswhite check.
3169 | barrierback TAB:CARG2, TMP2w, TMP1
3170 | b <2
3171 |
3172 |9:
3173 | asr ITYPE, TMP1, #47
3174 | cmn ITYPE, #-LJ_TSTR // String key?
3175 | bne ->vmeta_tsetv
3176 | and STR:RC, TMP1, #LJ_GCVMASK
3177 | b ->BC_TSETS_Z
3178 break;
3179 case BC_TSETS:
3180 | decode_RB RB, INS
3181 | and RC, RC, #255
3182 | // RA = dst, RB = table, RC = str_const (~)
3183 | ldr CARG2, [BASE, RB, lsl #3]
3184 | mvn RC, RC
3185 | ldr STR:RC, [KBASE, RC, lsl #3]
3186 | checktab CARG2, ->vmeta_tsets1
3187 |->BC_TSETS_Z:
3188 | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = src
3189 | ldr TMP1w, TAB:CARG2->hmask
3190 | ldr TMP2w, STR:RC->sid
3191 | ldr NODE:CARG3, TAB:CARG2->node
3192 | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask
3193 | add TMP1, TMP1, TMP1, lsl #1
3194 | movn CARG4, #~LJ_TSTR
3195 | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
3196 | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for.
3197 | strb wzr, TAB:CARG2->nomm // Clear metamethod cache.
3198 |1:
3199 | ldp TMP1, CARG1, NODE:CARG3->val
3200 | ldr NODE:TMP3, NODE:CARG3->next
3201 | ldrb TMP2w, TAB:CARG2->marked
3202 | cmp CARG1, CARG4
3203 | bne >5
3204 | ldr TMP0, [BASE, RA, lsl #3]
3205 | cmp_nil TMP1 // Previous value is nil?
3206 | beq >4
3207 |2:
3208 | str TMP0, NODE:CARG3->val
3209 | tbnz TMP2w, #2, >7 // isblack(table)
3210 |3:
3211 | ins_next
3212 |
3213 |4: // Check for __newindex if previous value is nil.
3214 | ldr TAB:CARG1, TAB:CARG2->metatable
3215 | cbz TAB:CARG1, <2 // No metatable: done.
3216 | ldrb TMP1w, TAB:CARG1->nomm
3217 | tbnz TMP1w, #MM_newindex, <2 // 'no __newindex' flag set: done.
3218 | b ->vmeta_tsets
3219 |
3220 |5: // Follow hash chain.
3221 | mov NODE:CARG3, NODE:TMP3
3222 | cbnz NODE:TMP3, <1
3223 | // End of hash chain: key not found, add a new one.
3224 |
3225 | // But check for __newindex first.
3226 | ldr TAB:CARG1, TAB:CARG2->metatable
3227 | cbz TAB:CARG1, >6 // No metatable: continue.
3228 | ldrb TMP1w, TAB:CARG1->nomm
3229 | // 'no __newindex' flag NOT set: check.
3230 | tbz TMP1w, #MM_newindex, ->vmeta_tsets
3231 |6:
3232 | movn TMP1, #~LJ_TSTR
3233 | str PC, SAVE_PC
3234 | add TMP0, STR:RC, TMP1, lsl #47
3235 | str BASE, L->base
3236 | mov CARG1, L
3237 | str TMP0, TMPD
3238 | add CARG3, sp, TMPDofs
3239 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
3240 | // Returns TValue *.
3241 | ldr BASE, L->base
3242 | ldr TMP0, [BASE, RA, lsl #3]
3243 | str TMP0, [CRET1]
3244 | b <3 // No 2nd write barrier needed.
3245 |
3246 |7: // Possible table write barrier for the value. Skip valiswhite check.
3247 | barrierback TAB:CARG2, TMP2w, TMP1
3248 | b <3
3249 break;
3250 case BC_TSETB:
3251 | decode_RB RB, INS
3252 | and RC, RC, #255
3253 | // RA = src, RB = table, RC = index
3254 | ldr CARG2, [BASE, RB, lsl #3]
3255 | checktab CARG2, ->vmeta_tsetb
3256 | ldr CARG3, TAB:CARG2->array
3257 | ldr CARG1w, TAB:CARG2->asize
3258 | add CARG3, CARG3, RC, lsl #3
3259 | cmp RCw, CARG1w // In array part?
3260 | bhs ->vmeta_tsetb
3261 | ldr TMP1, [CARG3]
3262 | ldr TMP0, [BASE, RA, lsl #3]
3263 | ldrb TMP2w, TAB:CARG2->marked
3264 | cmp_nil TMP1 // Previous value is nil?
3265 | beq >5
3266 |1:
3267 | str TMP0, [CARG3]
3268 | tbnz TMP2w, #2, >7 // isblack(table)
3269 |2:
3270 | ins_next
3271 |
3272 |5: // Check for __newindex if previous value is nil.
3273 | ldr TAB:CARG1, TAB:CARG2->metatable
3274 | cbz TAB:CARG1, <1 // No metatable: done.
3275 | ldrb TMP1w, TAB:CARG1->nomm
3276 | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done.
3277 | b ->vmeta_tsetb
3278 |
3279 |7: // Possible table write barrier for the value. Skip valiswhite check.
3280 | barrierback TAB:CARG2, TMP2w, TMP1
3281 | b <2
3282 break;
3283 case BC_TSETR:
3284 | decode_RB RB, INS
3285 | and RC, RC, #255
3286 | // RA = src, RB = table, RC = key
3287 | ldr CARG2, [BASE, RB, lsl #3]
3288 | ldr TMP1, [BASE, RC, lsl #3]
3289 | and TAB:CARG2, CARG2, #LJ_GCVMASK
3290 | ldr CARG1, TAB:CARG2->array
3291 | ldrb TMP2w, TAB:CARG2->marked
3292 | ldr CARG4w, TAB:CARG2->asize
3293 | add CARG1, CARG1, TMP1, uxtw #3
3294 | tbnz TMP2w, #2, >7 // isblack(table)
3295 |2:
3296 | cmp TMP1w, CARG4w // In array part?
3297 | bhs ->vmeta_tsetr
3298 |->BC_TSETR_Z:
3299 | ldr TMP0, [BASE, RA, lsl #3]
3300 | str TMP0, [CARG1]
3301 | ins_next
3302 |
3303 |7: // Possible table write barrier for the value. Skip valiswhite check.
3304 | barrierback TAB:CARG2, TMP2w, TMP0
3305 | b <2
3306 break;
3307
3308 case BC_TSETM:
3309 | // RA = base (table at base-1), RC = num_const (start index)
3310 | add RA, BASE, RA, lsl #3
3311 |1:
3312 | ldr RBw, SAVE_MULTRES
3313 | ldr TAB:CARG2, [RA, #-8] // Guaranteed to be a table.
3314 | ldr TMP1, [KBASE, RC, lsl #3] // Integer constant is in lo-word.
3315 | sub RB, RB, #8
3316 | cbz RB, >4 // Nothing to copy?
3317 | and TAB:CARG2, CARG2, #LJ_GCVMASK
3318 | ldr CARG1w, TAB:CARG2->asize
3319 | add CARG3w, TMP1w, RBw, lsr #3
3320 | ldr CARG4, TAB:CARG2->array
3321 | cmp CARG3, CARG1
3322 | add RB, RA, RB
3323 | bhi >5
3324 | add TMP1, CARG4, TMP1w, uxtw #3
3325 | ldrb TMP2w, TAB:CARG2->marked
3326 |3: // Copy result slots to table.
3327 | ldr TMP0, [RA], #8
3328 | str TMP0, [TMP1], #8
3329 | cmp RA, RB
3330 | blo <3
3331 | tbnz TMP2w, #2, >7 // isblack(table)
3332 |4:
3333 | ins_next
3334 |
3335 |5: // Need to resize array part.
3336 | str BASE, L->base
3337 | mov CARG1, L
3338 | str PC, SAVE_PC
3339 | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
3340 | // Must not reallocate the stack.
3341 | b <1
3342 |
3343 |7: // Possible table write barrier for any value. Skip valiswhite check.
3344 | barrierback TAB:CARG2, TMP2w, TMP1
3345 | b <4
3346 break;
3347
3348 /* -- Calls and vararg handling ----------------------------------------- */
3349
3350 case BC_CALLM:
3351 | // RA = base, (RB = nresults+1,) RC = extra_nargs
3352 | ldr TMP0w, SAVE_MULTRES
3353 | decode_RC8RD NARGS8:RC, RC
3354 | add NARGS8:RC, NARGS8:RC, TMP0
3355 | b ->BC_CALL_Z
3356 break;
3357 case BC_CALL:
3358 | decode_RC8RD NARGS8:RC, RC
3359 | // RA = base, (RB = nresults+1,) RC = (nargs+1)*8
3360 |->BC_CALL_Z:
3361 | mov RB, BASE // Save old BASE for vmeta_call.
3362 | add BASE, BASE, RA, lsl #3
3363 | ldr CARG3, [BASE], #16
3364 | sub NARGS8:RC, NARGS8:RC, #8
3365 | checkfunc CARG3, ->vmeta_call
3366 | ins_call
3367 break;
3368
3369 case BC_CALLMT:
3370 | // RA = base, (RB = 0,) RC = extra_nargs
3371 | ldr TMP0w, SAVE_MULTRES
3372 | add NARGS8:RC, TMP0, RC, lsl #3
3373 | b ->BC_CALLT1_Z
3374 break;
3375 case BC_CALLT:
3376 | lsl NARGS8:RC, RC, #3
3377 | // RA = base, (RB = 0,) RC = (nargs+1)*8
3378 |->BC_CALLT1_Z:
3379 | add RA, BASE, RA, lsl #3
3380 | ldr TMP1, [RA], #16
3381 | sub NARGS8:RC, NARGS8:RC, #8
3382 | checktp CARG3, TMP1, LJ_TFUNC, ->vmeta_callt
3383 | ldr PC, [BASE, FRAME_PC]
3384 |->BC_CALLT2_Z:
3385 | mov RB, #0
3386 | ldrb TMP2w, LFUNC:CARG3->ffid
3387 | tst PC, #FRAME_TYPE
3388 | bne >7
3389 |1:
3390 | str TMP1, [BASE, FRAME_FUNC] // Copy function down, but keep PC.
3391 | cbz NARGS8:RC, >3
3392 |2:
3393 | ldr TMP0, [RA, RB]
3394 | add TMP1, RB, #8
3395 | cmp TMP1, NARGS8:RC
3396 | str TMP0, [BASE, RB]
3397 | mov RB, TMP1
3398 | bne <2
3399 |3:
3400 | cmp TMP2, #1 // (> FF_C) Calling a fast function?
3401 | bhi >5
3402 |4:
3403 | ins_callt
3404 |
3405 |5: // Tailcall to a fast function with a Lua frame below.
3406 | ldrb RAw, [PC, #-4+OFS_RA]
3407 | sub CARG1, BASE, RA, lsl #3
3408 | ldr LFUNC:CARG1, [CARG1, #-32]
3409 | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
3410 | ldr CARG1, LFUNC:CARG1->pc
3411 | ldr KBASE, [CARG1, #PC2PROTO(k)]
3412 | b <4
3413 |
3414 |7: // Tailcall from a vararg function.
3415 | eor PC, PC, #FRAME_VARG
3416 | tst PC, #FRAME_TYPEP // Vararg frame below?
3417 | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below.
3418 | bne <1
3419 | sub BASE, BASE, PC
3420 | ldr PC, [BASE, FRAME_PC]
3421 | tst PC, #FRAME_TYPE
3422 | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below.
3423 | b <1
3424 break;
3425
3426 case BC_ITERC:
3427 | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
3428 | add RA, BASE, RA, lsl #3
3429 | ldr CARG3, [RA, #-24]
3430 | mov RB, BASE // Save old BASE for vmeta_call.
3431 | ldp CARG1, CARG2, [RA, #-16]
3432 | add BASE, RA, #16
3433 | mov NARGS8:RC, #16 // Iterators get 2 arguments.
3434 | str CARG3, [RA] // Copy callable.
3435 | stp CARG1, CARG2, [RA, #16] // Copy state and control var.
3436 | checkfunc CARG3, ->vmeta_call
3437 | ins_call
3438 break;
3439
3440 case BC_ITERN:
3441 |.if JIT
3442 | hotloop
3443 |.endif
3444 |->vm_IITERN:
3445 | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
3446 | add RA, BASE, RA, lsl #3
3447 | ldr TAB:RB, [RA, #-16]
3448 | ldrh TMP3w, [PC, # OFS_RD]
3449 | ldr CARG1w, [RA, #-8+LO] // Get index from control var.
3450 | add PC, PC, #4
3451 | add TMP3, PC, TMP3, lsl #2
3452 | and TAB:RB, RB, #LJ_GCVMASK
3453 | sub TMP3, TMP3, #0x20000
3454 | ldr TMP1w, TAB:RB->asize
3455 | ldr CARG2, TAB:RB->array
3456 |1: // Traverse array part.
3457 | subs RC, CARG1, TMP1
3458 | add CARG3, CARG2, CARG1, lsl #3
3459 | bhs >5 // Index points after array part?
3460 | ldr TMP0, [CARG3]
3461 | cmp_nil TMP0
3462 | cinc CARG1, CARG1, eq // Skip holes in array part.
3463 | beq <1
3464 | add_TISNUM CARG1, CARG1
3465 | stp CARG1, TMP0, [RA]
3466 | add CARG1, CARG1, #1
3467 |3:
3468 | str CARG1w, [RA, #-8+LO] // Update control var.
3469 | mov PC, TMP3
3470 |4:
3471 | ins_next
3472 |
3473 |5: // Traverse hash part.
3474 | ldr TMP2w, TAB:RB->hmask
3475 | ldr NODE:RB, TAB:RB->node
3476 |6:
3477 | add CARG1, RC, RC, lsl #1
3478 | cmp RC, TMP2 // End of iteration? Branch to ITERN+1.
3479 | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8
3480 | bhi <4
3481 | ldp TMP0, CARG1, NODE:CARG3->val
3482 | cmp_nil TMP0
3483 | add RC, RC, #1
3484 | beq <6 // Skip holes in hash part.
3485 | stp CARG1, TMP0, [RA]
3486 | add CARG1, RC, TMP1
3487 | b <3
3488 break;
3489
3490 case BC_ISNEXT:
3491 | // RA = base, RC = target (points to ITERN)
3492 | add RA, BASE, RA, lsl #3
3493 | ldr CFUNC:CARG1, [RA, #-24]
3494 | add RC, PC, RC, lsl #2
3495 | ldp TAB:CARG3, CARG4, [RA, #-16]
3496 | sub RC, RC, #0x20000
3497 | checkfunc CFUNC:CARG1, >5
3498 | asr TMP0, TAB:CARG3, #47
3499 | ldrb TMP1w, CFUNC:CARG1->ffid
3500 | cmp_nil CARG4
3501 | ccmn TMP0, #-LJ_TTAB, #0, eq
3502 | ccmp TMP1w, #FF_next_N, #0, eq
3503 | bne >5
3504 | mov TMP0w, #0xfffe7fff // LJ_KEYINDEX
3505 | lsl TMP0, TMP0, #32
3506 | str TMP0, [RA, #-8] // Initialize control var.
3507 |1:
3508 | mov PC, RC
3509 | ins_next
3510 |
3511 |5: // Despecialize bytecode if any of the checks fail.
3512 |.if JIT
3513 | ldrb TMP2w, [RC, # OFS_OP]
3514 |.endif
3515 | mov TMP0, #BC_JMP
3516 | mov TMP1, #BC_ITERC
3517 | strb TMP0w, [PC, #-4+OFS_OP]
3518 |.if JIT
3519 | cmp TMP2w, #BC_ITERN
3520 | bne >6
3521 |.endif
3522 | strb TMP1w, [RC, # OFS_OP]
3523 | b <1
3524 |.if JIT
3525 |6: // Unpatch JLOOP.
3526 | ldr RA, [GL, #GL_J(trace)]
3527 | ldrh TMP2w, [RC, # OFS_RD]
3528 | ldr TRACE:RA, [RA, TMP2, lsl #3]
3529 | ldr TMP2w, TRACE:RA->startins
3530 | bfxil TMP2w, TMP1w, #0, #8
3531 | str TMP2w, [RC]
3532 | b <1
3533 |.endif
3534 break;
3535
3536 case BC_VARG:
3537 | decode_RB RB, INS
3538 | and RC, RC, #255
3539 | // RA = base, RB = (nresults+1), RC = numparams
3540 | ldr TMP1, [BASE, FRAME_PC]
3541 | add TMP0, BASE, RC, lsl #3
3542 | add RC, BASE, RA, lsl #3 // RC = destination
3543 | add TMP0, TMP0, #FRAME_VARG
3544 | add TMP2, RC, RB, lsl #3
3545 | sub RA, TMP0, TMP1 // RA = vbase
3546 | // Note: RA may now be even _above_ BASE if nargs was < numparams.
3547 | sub TMP3, BASE, #16 // TMP3 = vtop
3548 | cbz RB, >5
3549 | sub TMP2, TMP2, #16
3550 |1: // Copy vararg slots to destination slots.
3551 | cmp RA, TMP3
3552 | ldr TMP0, [RA], #8
3553 | csinv TMP0, TMP0, xzr, lo // TISNIL = ~xzr
3554 | cmp RC, TMP2
3555 | str TMP0, [RC], #8
3556 | blo <1
3557 |2:
3558 | ins_next
3559 |
3560 |5: // Copy all varargs.
3561 | ldr TMP0, L->maxstack
3562 | subs TMP2, TMP3, RA
3563 | csel RB, xzr, TMP2, le // MULTRES = (max(vtop-vbase,0)+1)*8
3564 | add RB, RB, #8
3565 | add TMP1, RC, TMP2
3566 | str RBw, SAVE_MULTRES
3567 | ble <2 // Nothing to copy.
3568 | cmp TMP1, TMP0
3569 | bhi >7
3570 |6:
3571 | ldr TMP0, [RA], #8
3572 | str TMP0, [RC], #8
3573 | cmp RA, TMP3
3574 | blo <6
3575 | b <2
3576 |
3577 |7: // Grow stack for varargs.
3578 | lsr CARG2, TMP2, #3
3579 | stp BASE, RC, L->base
3580 | mov CARG1, L
3581 | sub RA, RA, BASE // Need delta, because BASE may change.
3582 | str PC, SAVE_PC
3583 | bl extern lj_state_growstack // (lua_State *L, int n)
3584 | ldp BASE, RC, L->base
3585 | add RA, BASE, RA
3586 | sub TMP3, BASE, #16
3587 | b <6
3588 break;
3589
3590 /* -- Returns ----------------------------------------------------------- */
3591
3592 case BC_RETM:
3593 | // RA = results, RC = extra results
3594 | ldr TMP0w, SAVE_MULTRES
3595 | ldr PC, [BASE, FRAME_PC]
3596 | add RA, BASE, RA, lsl #3
3597 | add RC, TMP0, RC, lsl #3
3598 | b ->BC_RETM_Z
3599 break;
3600
3601 case BC_RET:
3602 | // RA = results, RC = nresults+1
3603 | ldr PC, [BASE, FRAME_PC]
3604 | lsl RC, RC, #3
3605 | add RA, BASE, RA, lsl #3
3606 |->BC_RETM_Z:
3607 | str RCw, SAVE_MULTRES
3608 |1:
3609 | ands CARG1, PC, #FRAME_TYPE
3610 | eor CARG2, PC, #FRAME_VARG
3611 | bne ->BC_RETV2_Z
3612 |
3613 |->BC_RET_Z:
3614 | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return
3615 | ldr INSw, [PC, #-4]
3616 | subs TMP1, RC, #8
3617 | sub CARG3, BASE, #16
3618 | beq >3
3619 |2:
3620 | ldr TMP0, [RA], #8
3621 | add BASE, BASE, #8
3622 | sub TMP1, TMP1, #8
3623 | str TMP0, [BASE, #-24]
3624 | cbnz TMP1, <2
3625 |3:
3626 | decode_RA RA, INS
3627 | sub CARG4, CARG3, RA, lsl #3
3628 | decode_RB RB, INS
3629 | ldr LFUNC:CARG1, [CARG4, FRAME_FUNC]
3630 |5:
3631 | cmp RC, RB, lsl #3 // More results expected?
3632 | blo >6
3633 | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
3634 | mov BASE, CARG4
3635 | ldr CARG2, LFUNC:CARG1->pc
3636 | ldr KBASE, [CARG2, #PC2PROTO(k)]
3637 | ins_next
3638 |
3639 |6: // Fill up results with nil.
3640 | add BASE, BASE, #8
3641 | add RC, RC, #8
3642 | str TISNIL, [BASE, #-24]
3643 | b <5
3644 |
3645 |->BC_RETV1_Z: // Non-standard return case.
3646 | add RA, BASE, RA, lsl #3
3647 |->BC_RETV2_Z:
3648 | tst CARG2, #FRAME_TYPEP
3649 | bne ->vm_return
3650 | // Return from vararg function: relocate BASE down.
3651 | sub BASE, BASE, CARG2
3652 | ldr PC, [BASE, FRAME_PC]
3653 | b <1
3654 break;
3655
3656 case BC_RET0: case BC_RET1:
3657 | // RA = results, RC = nresults+1
3658 | ldr PC, [BASE, FRAME_PC]
3659 | lsl RC, RC, #3
3660 | str RCw, SAVE_MULTRES
3661 | ands CARG1, PC, #FRAME_TYPE
3662 | eor CARG2, PC, #FRAME_VARG
3663 | bne ->BC_RETV1_Z
3664 | ldr INSw, [PC, #-4]
3665 if (op == BC_RET1) {
3666 | ldr TMP0, [BASE, RA, lsl #3]
3667 }
3668 | sub CARG4, BASE, #16
3669 | decode_RA RA, INS
3670 | sub BASE, CARG4, RA, lsl #3
3671 if (op == BC_RET1) {
3672 | str TMP0, [CARG4], #8
3673 }
3674 | decode_RB RB, INS
3675 | ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
3676 |5:
3677 | cmp RC, RB, lsl #3
3678 | blo >6
3679 | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
3680 | ldr CARG2, LFUNC:CARG1->pc
3681 | ldr KBASE, [CARG2, #PC2PROTO(k)]
3682 | ins_next
3683 |
3684 |6: // Fill up results with nil.
3685 | add RC, RC, #8
3686 | str TISNIL, [CARG4], #8
3687 | b <5
3688 break;
3689
3690 /* -- Loops and branches ------------------------------------------------ */
3691
3692 |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4]
3693 |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12]
3694 |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20]
3695 |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28]
3696
3697 case BC_FORL:
3698 |.if JIT
3699 | hotloop
3700 |.endif
3701 | // Fall through. Assumes BC_IFORL follows.
3702 break;
3703
3704 case BC_JFORI:
3705 case BC_JFORL:
3706#if !LJ_HASJIT
3707 break;
3708#endif
3709 case BC_FORI:
3710 case BC_IFORL:
3711 | // RA = base, RC = target (after end of loop or start of loop)
3712 vk = (op == BC_IFORL || op == BC_JFORL);
3713 | add RA, BASE, RA, lsl #3
3714 | ldp CARG1, CARG2, FOR_IDX // CARG1 = IDX, CARG2 = STOP
3715 | ldr CARG3, FOR_STEP // CARG3 = STEP
3716 if (op != BC_JFORL) {
3717 | add RC, PC, RC, lsl #2
3718 | sub RC, RC, #0x20000
3719 }
3720 | checkint CARG1, >5
3721 if (!vk) {
3722 | checkint CARG2, ->vmeta_for
3723 | checkint CARG3, ->vmeta_for
3724 | tbnz CARG3w, #31, >4
3725 | cmp CARG1w, CARG2w
3726 } else {
3727 | adds CARG1w, CARG1w, CARG3w
3728 | bvs >2
3729 | add_TISNUM TMP0, CARG1
3730 | tbnz CARG3w, #31, >4
3731 | cmp CARG1w, CARG2w
3732 }
3733 |1:
3734 if (op == BC_FORI) {
3735 | csel PC, RC, PC, gt
3736 } else if (op == BC_JFORI) {
3737 | mov PC, RC
3738 | ldrh RCw, [RC, #-4+OFS_RD]
3739 } else if (op == BC_IFORL) {
3740 | csel PC, RC, PC, le
3741 }
3742 if (vk) {
3743 | str TMP0, FOR_IDX
3744 | str TMP0, FOR_EXT
3745 } else {
3746 | str CARG1, FOR_EXT
3747 }
3748 if (op == BC_JFORI || op == BC_JFORL) {
3749 | ble =>BC_JLOOP
3750 }
3751 |2:
3752 | ins_next
3753 |
3754 |4: // Invert check for negative step.
3755 | cmp CARG2w, CARG1w
3756 | b <1
3757 |
3758 |5: // FP loop.
3759 | ldp d0, d1, FOR_IDX
3760 | blo ->vmeta_for
3761 if (!vk) {
3762 | checknum CARG2, ->vmeta_for
3763 | checknum CARG3, ->vmeta_for
3764 | str d0, FOR_EXT
3765 } else {
3766 | ldr d2, FOR_STEP
3767 | fadd d0, d0, d2
3768 }
3769 | tbnz CARG3, #63, >7
3770 | fcmp d0, d1
3771 |6:
3772 if (vk) {
3773 | str d0, FOR_IDX
3774 | str d0, FOR_EXT
3775 }
3776 if (op == BC_FORI) {
3777 | csel PC, RC, PC, hi
3778 } else if (op == BC_JFORI) {
3779 | ldrh RCw, [RC, #-4+OFS_RD]
3780 | bls =>BC_JLOOP
3781 } else if (op == BC_IFORL) {
3782 | csel PC, RC, PC, ls
3783 } else {
3784 | bls =>BC_JLOOP
3785 }
3786 | b <2
3787 |
3788 |7: // Invert check for negative step.
3789 | fcmp d1, d0
3790 | b <6
3791 break;
3792
3793 case BC_ITERL:
3794 |.if JIT
3795 | hotloop
3796 |.endif
3797 | // Fall through. Assumes BC_IITERL follows.
3798 break;
3799
3800 case BC_JITERL:
3801#if !LJ_HASJIT
3802 break;
3803#endif
3804 case BC_IITERL:
3805 | // RA = base, RC = target
3806 | ldr CARG1, [BASE, RA, lsl #3]
3807 | add TMP1, BASE, RA, lsl #3
3808 | cmp_nil CARG1
3809 | beq >1 // Stop if iterator returned nil.
3810 if (op == BC_JITERL) {
3811 | str CARG1, [TMP1, #-8]
3812 | b =>BC_JLOOP
3813 } else {
3814 | add TMP0, PC, RC, lsl #2 // Otherwise save control var + branch.
3815 | sub PC, TMP0, #0x20000
3816 | str CARG1, [TMP1, #-8]
3817 }
3818 |1:
3819 | ins_next
3820 break;
3821
3822 case BC_LOOP:
3823 | // RA = base, RC = target (loop extent)
3824 | // Note: RA/RC is only used by trace recorder to determine scope/extent
3825 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
3826 |.if JIT
3827 | hotloop
3828 |.endif
3829 | // Fall through. Assumes BC_ILOOP follows.
3830 break;
3831
3832 case BC_ILOOP:
3833 | // RA = base, RC = target (loop extent)
3834 | ins_next
3835 break;
3836
3837 case BC_JLOOP:
3838 |.if JIT
3839 | // RA = base (ignored), RC = traceno
3840 | ldr CARG1, [GL, #GL_J(trace)]
3841 | st_vmstate wzr // Traces on ARM64 don't store the trace #, so use 0.
3842 | ldr TRACE:RC, [CARG1, RC, lsl #3]
3843 |.if PAUTH
3844 | ldr RA, TRACE:RC->mcauth
3845 |.else
3846 | ldr RA, TRACE:RC->mcode
3847 |.endif
3848 | str BASE, GL->jit_base
3849 | str L, GL->tmpbuf.L
3850 | sub sp, sp, #16 // See SPS_FIXED. Avoids sp adjust in every root trace.
3851 |.if PAUTH
3852 | braa RA, RC
3853 |.else
3854 | br RA
3855 |.endif
3856 |.endif
3857 break;
3858
3859 case BC_JMP:
3860 | // RA = base (only used by trace recorder), RC = target
3861 | add RC, PC, RC, lsl #2
3862 | sub PC, RC, #0x20000
3863 | ins_next
3864 break;
3865
3866 /* -- Function headers -------------------------------------------------- */
3867
3868 case BC_FUNCF:
3869 |.if JIT
3870 | hotcall
3871 |.endif
3872 case BC_FUNCV: /* NYI: compiled vararg functions. */
3873 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
3874 break;
3875
3876 case BC_JFUNCF:
3877#if !LJ_HASJIT
3878 break;
3879#endif
3880 case BC_IFUNCF:
3881 | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
3882 | ldr CARG1, L->maxstack
3883 | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)]
3884 | ldr KBASE, [PC, #-4+PC2PROTO(k)]
3885 | cmp RA, CARG1
3886 | bhi ->vm_growstack_l
3887 |2:
3888 | cmp NARGS8:RC, TMP1, lsl #3 // Check for missing parameters.
3889 | blo >3
3890 if (op == BC_JFUNCF) {
3891 | decode_RD RC, INS
3892 | b =>BC_JLOOP
3893 } else {
3894 | ins_next
3895 }
3896 |
3897 |3: // Clear missing parameters.
3898 | str TISNIL, [BASE, NARGS8:RC]
3899 | add NARGS8:RC, NARGS8:RC, #8
3900 | b <2
3901 break;
3902
3903 case BC_JFUNCV:
3904#if !LJ_HASJIT
3905 break;
3906#endif
3907 | NYI // NYI: compiled vararg functions
3908 break; /* NYI: compiled vararg functions. */
3909
3910 case BC_IFUNCV:
3911 | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
3912 | ldr CARG1, L->maxstack
3913 | movn TMP0, #~LJ_TFUNC
3914 | add TMP2, BASE, RC
3915 | add LFUNC:CARG3, CARG3, TMP0, lsl #47
3916 | add RA, RA, RC
3917 | sub CARG1, CARG1, #8
3918 | add TMP0, RC, #16+FRAME_VARG
3919 | str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC.
3920 | ldr KBASE, [PC, #-4+PC2PROTO(k)]
3921 | cmp RA, CARG1
3922 | str TMP0, [TMP2], #8 // Store delta + FRAME_VARG.
3923 | bhs ->vm_growstack_l
3924 | sub RC, TMP2, #16
3925 | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)]
3926 | mov RA, BASE
3927 | mov BASE, TMP2
3928 | cbz TMP1, >2
3929 |1:
3930 | cmp RA, RC // Less args than parameters?
3931 | bhs >3
3932 | ldr TMP0, [RA]
3933 | sub TMP1, TMP1, #1
3934 | str TISNIL, [RA], #8 // Clear old fixarg slot (help the GC).
3935 | str TMP0, [TMP2], #8
3936 | cbnz TMP1, <1
3937 |2:
3938 | ins_next
3939 |
3940 |3:
3941 | sub TMP1, TMP1, #1
3942 | str TISNIL, [TMP2], #8
3943 | cbz TMP1, <2
3944 | b <3
3945 break;
3946
3947 case BC_FUNCC:
3948 case BC_FUNCCW:
3949 | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8
3950 if (op == BC_FUNCC) {
3951 | ldr CARG4, CFUNC:CARG3->f
3952 } else {
3953 | ldr CARG4, GL->wrapf
3954 }
3955 | add CARG2, RA, NARGS8:RC
3956 | ldr CARG1, L->maxstack
3957 | add RC, BASE, NARGS8:RC
3958 | cmp CARG2, CARG1
3959 | stp BASE, RC, L->base
3960 if (op == BC_FUNCCW) {
3961 | ldr CARG2, CFUNC:CARG3->f
3962 }
3963 | mv_vmstate TMP0w, C
3964 | mov CARG1, L
3965 | bhi ->vm_growstack_c // Need to grow stack.
3966 | st_vmstate TMP0w
3967 | blr_auth CARG4 // (lua_State *L [, lua_CFunction f])
3968 | // Returns nresults.
3969 | ldp BASE, TMP1, L->base
3970 | str L, GL->cur_L
3971 | sbfiz RC, CRET1, #3, #32
3972 | st_vmstate ST_INTERP
3973 | ldr PC, [BASE, FRAME_PC]
3974 | sub RA, TMP1, RC // RA = L->top - nresults*8
3975 | b ->vm_returnc
3976 break;
3977
3978 /* ---------------------------------------------------------------------- */
3979
3980 default:
3981 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
3982 exit(2);
3983 break;
3984 }
3985}
3986
3987static int build_backend(BuildCtx *ctx)
3988{
3989 int op;
3990
3991 dasm_growpc(Dst, BC__MAX);
3992
3993 build_subroutines(ctx);
3994
3995 |.code_op
3996 for (op = 0; op < BC__MAX; op++)
3997 build_ins(ctx, (BCOp)op, op);
3998
3999 return BC__MAX;
4000}
4001
4002/* Emit pseudo frame-info for all assembler functions. */
4003static void emit_asm_debug(BuildCtx *ctx)
4004{
4005 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
4006 int i;
4007 switch (ctx->mode) {
4008 case BUILD_elfasm:
4009 fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n");
4010 fprintf(ctx->fp,
4011 ".Lframe0:\n"
4012 "\t.long .LECIE0-.LSCIE0\n"
4013 ".LSCIE0:\n"
4014 "\t.long 0xffffffff\n"
4015 "\t.byte 0x1\n"
4016 "\t.string \"\"\n"
4017 "\t.uleb128 0x1\n"
4018 "\t.sleb128 -8\n"
4019 "\t.byte 30\n" /* Return address is in lr. */
4020 "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */
4021 "\t.align 3\n"
4022 ".LECIE0:\n\n");
4023 fprintf(ctx->fp,
4024 ".LSFDE0:\n"
4025 "\t.long .LEFDE0-.LASFDE0\n"
4026 ".LASFDE0:\n"
4027 "\t.long .Lframe0\n"
4028 "\t.quad .Lbegin\n"
4029 "\t.quad %d\n"
4030 "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
4031 "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */
4032 fcofs);
4033 for (i = 19; i <= 28; i++) /* offset x19-x28 */
4034 fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
4035 for (i = 8; i <= 15; i++) /* offset d8-d15 */
4036 fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
4037 64+i, i+(3+(28-19+1)-8));
4038 fprintf(ctx->fp,
4039 "\t.align 3\n"
4040 ".LEFDE0:\n\n");
4041#if LJ_HASFFI
4042 fprintf(ctx->fp,
4043 ".LSFDE1:\n"
4044 "\t.long .LEFDE1-.LASFDE1\n"
4045 ".LASFDE1:\n"
4046 "\t.long .Lframe0\n"
4047 "\t.quad lj_vm_ffi_call\n"
4048 "\t.quad %d\n"
4049 "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
4050 "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */
4051 "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */
4052 "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */
4053 "\t.align 3\n"
4054 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
4055#endif
4056#if !LJ_NO_UNWIND
4057 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",%%progbits\n");
4058 fprintf(ctx->fp,
4059 ".Lframe1:\n"
4060 "\t.long .LECIE1-.LSCIE1\n"
4061 ".LSCIE1:\n"
4062 "\t.long 0\n"
4063 "\t.byte 0x1\n"
4064 "\t.string \"zPR\"\n"
4065 "\t.uleb128 0x1\n"
4066 "\t.sleb128 -8\n"
4067 "\t.byte 30\n" /* Return address is in lr. */
4068 "\t.uleb128 6\n" /* augmentation length */
4069 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4070 "\t.long lj_err_unwind_dwarf-.\n"
4071 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4072 "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */
4073 "\t.align 3\n"
4074 ".LECIE1:\n\n");
4075 fprintf(ctx->fp,
4076 ".LSFDE2:\n"
4077 "\t.long .LEFDE2-.LASFDE2\n"
4078 ".LASFDE2:\n"
4079 "\t.long .LASFDE2-.Lframe1\n"
4080 "\t.long .Lbegin-.\n"
4081 "\t.long %d\n"
4082 "\t.uleb128 0\n" /* augmentation length */
4083 "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
4084 "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */
4085 fcofs);
4086 for (i = 19; i <= 28; i++) /* offset x19-x28 */
4087 fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
4088 for (i = 8; i <= 15; i++) /* offset d8-d15 */
4089 fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
4090 64+i, i+(3+(28-19+1)-8));
4091 fprintf(ctx->fp,
4092 "\t.align 3\n"
4093 ".LEFDE2:\n\n");
4094#if LJ_HASFFI
4095 fprintf(ctx->fp,
4096 ".Lframe2:\n"
4097 "\t.long .LECIE2-.LSCIE2\n"
4098 ".LSCIE2:\n"
4099 "\t.long 0\n"
4100 "\t.byte 0x1\n"
4101 "\t.string \"zR\"\n"
4102 "\t.uleb128 0x1\n"
4103 "\t.sleb128 -8\n"
4104 "\t.byte 30\n" /* Return address is in lr. */
4105 "\t.uleb128 1\n" /* augmentation length */
4106 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4107 "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */
4108 "\t.align 3\n"
4109 ".LECIE2:\n\n");
4110 fprintf(ctx->fp,
4111 ".LSFDE3:\n"
4112 "\t.long .LEFDE3-.LASFDE3\n"
4113 ".LASFDE3:\n"
4114 "\t.long .LASFDE3-.Lframe2\n"
4115 "\t.long lj_vm_ffi_call-.\n"
4116 "\t.long %d\n"
4117 "\t.uleb128 0\n" /* augmentation length */
4118 "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
4119 "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */
4120 "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */
4121 "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */
4122 "\t.align 3\n"
4123 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
4124#endif
4125#endif
4126 break;
4127#if !LJ_NO_UNWIND
4128 case BUILD_machasm: {
4129#if LJ_HASFFI
4130 int fcsize = 0;
4131#endif
4132 int j;
4133 fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
4134 fprintf(ctx->fp,
4135 "EH_frame1:\n"
4136 "\t.set L$set$x,LECIEX-LSCIEX\n"
4137 "\t.long L$set$x\n"
4138 "LSCIEX:\n"
4139 "\t.long 0\n"
4140 "\t.byte 0x1\n"
4141 "\t.ascii \"zPR\\0\"\n"
4142 "\t.uleb128 0x1\n"
4143 "\t.sleb128 -8\n"
4144 "\t.byte 30\n" /* Return address is in lr. */
4145 "\t.uleb128 6\n" /* augmentation length */
4146 "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
4147 "\t.long _lj_err_unwind_dwarf@GOT-.\n"
4148 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4149 "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */
4150 "\t.align 3\n"
4151 "LECIEX:\n\n");
4152 for (j = 0; j < ctx->nsym; j++) {
4153 const char *name = ctx->sym[j].name;
4154 int32_t size = ctx->sym[j+1].ofs - ctx->sym[j].ofs;
4155 if (size == 0) continue;
4156#if LJ_HASFFI
4157 if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
4158#endif
4159 fprintf(ctx->fp,
4160 "LSFDE%d:\n"
4161 "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
4162 "\t.long L$set$%d\n"
4163 "LASFDE%d:\n"
4164 "\t.long LASFDE%d-EH_frame1\n"
4165 "\t.long %s-.\n"
4166 "\t.long %d\n"
4167 "\t.uleb128 0\n" /* augmentation length */
4168 "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
4169 "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */
4170 j, j, j, j, j, j, j, name, size);
4171 for (i = 19; i <= 28; i++) /* offset x19-x28 */
4172 fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
4173 for (i = 8; i <= 15; i++) /* offset d8-d15 */
4174 fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
4175 64+i, i+(3+(28-19+1)-8));
4176 fprintf(ctx->fp,
4177 "\t.align 3\n"
4178 "LEFDE%d:\n\n", j);
4179 }
4180#if LJ_HASFFI
4181 if (fcsize) {
4182 fprintf(ctx->fp,
4183 "EH_frame2:\n"
4184 "\t.set L$set$y,LECIEY-LSCIEY\n"
4185 "\t.long L$set$y\n"
4186 "LSCIEY:\n"
4187 "\t.long 0\n"
4188 "\t.byte 0x1\n"
4189 "\t.ascii \"zR\\0\"\n"
4190 "\t.uleb128 0x1\n"
4191 "\t.sleb128 -8\n"
4192 "\t.byte 30\n" /* Return address is in lr. */
4193 "\t.uleb128 1\n" /* augmentation length */
4194 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4195 "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */
4196 "\t.align 3\n"
4197 "LECIEY:\n\n");
4198 fprintf(ctx->fp,
4199 "LSFDEY:\n"
4200 "\t.set L$set$yy,LEFDEY-LASFDEY\n"
4201 "\t.long L$set$yy\n"
4202 "LASFDEY:\n"
4203 "\t.long LASFDEY-EH_frame2\n"
4204 "\t.long _lj_vm_ffi_call-.\n"
4205 "\t.long %d\n"
4206 "\t.uleb128 0\n" /* augmentation length */
4207 "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
4208 "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */
4209 "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */
4210 "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */
4211 "\t.align 3\n"
4212 "LEFDEY:\n\n", fcsize);
4213 }
4214#endif
4215 }
4216 break;
4217#endif
4218 default:
4219 break;
4220 }
4221}
4222
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index c4c0a416..8760a1f6 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -1,6 +1,9 @@
1|// Low-level VM code for MIPS CPUs. 1|// Low-level VM code for MIPS CPUs.
2|// Bytecode interpreter, fast functions and helper functions. 2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h 3|// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4|//
5|// MIPS soft-float support contributed by Djordje Kovacevic and
6|// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc.
4| 7|
5|.arch mips 8|.arch mips
6|.section code_op, code_sub 9|.section code_op, code_sub
@@ -18,6 +21,12 @@
18|// Fixed register assignments for the interpreter. 21|// Fixed register assignments for the interpreter.
19|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra 22|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra
20| 23|
24|.macro .FPU, a, b
25|.if FPU
26| a, b
27|.endif
28|.endmacro
29|
21|// The following must be C callee-save (but BASE is often refetched). 30|// The following must be C callee-save (but BASE is often refetched).
22|.define BASE, r16 // Base of current Lua stack frame. 31|.define BASE, r16 // Base of current Lua stack frame.
23|.define KBASE, r17 // Constants of current Lua function. 32|.define KBASE, r17 // Constants of current Lua function.
@@ -25,13 +34,15 @@
25|.define DISPATCH, r19 // Opcode dispatch table. 34|.define DISPATCH, r19 // Opcode dispatch table.
26|.define LREG, r20 // Register holding lua_State (also in SAVE_L). 35|.define LREG, r20 // Register holding lua_State (also in SAVE_L).
27|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. 36|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8.
28|// NYI: r22 currently unused.
29| 37|
30|.define JGL, r30 // On-trace: global_State + 32768. 38|.define JGL, r30 // On-trace: global_State + 32768.
31| 39|
32|// Constants for type-comparisons, stores and conversions. C callee-save. 40|// Constants for type-comparisons, stores and conversions. C callee-save.
41|.define TISNUM, r22
33|.define TISNIL, r30 42|.define TISNIL, r30
43|.if FPU
34|.define TOBIT, f30 // 2^52 + 2^51. 44|.define TOBIT, f30 // 2^52 + 2^51.
45|.endif
35| 46|
36|// The following temporaries are not saved across C calls, except for RA. 47|// The following temporaries are not saved across C calls, except for RA.
37|.define RA, r23 // Callee-save. 48|.define RA, r23 // Callee-save.
@@ -46,7 +57,7 @@
46|.define TMP2, r14 57|.define TMP2, r14
47|.define TMP3, r15 58|.define TMP3, r15
48| 59|
49|// Calling conventions. 60|// MIPS o32 calling convention.
50|.define CFUNCADDR, r25 61|.define CFUNCADDR, r25
51|.define CARG1, r4 62|.define CARG1, r4
52|.define CARG2, r5 63|.define CARG2, r5
@@ -56,13 +67,33 @@
56|.define CRET1, r2 67|.define CRET1, r2
57|.define CRET2, r3 68|.define CRET2, r3
58| 69|
70|.if ENDIAN_LE
71|.define SFRETLO, CRET1
72|.define SFRETHI, CRET2
73|.define SFARG1LO, CARG1
74|.define SFARG1HI, CARG2
75|.define SFARG2LO, CARG3
76|.define SFARG2HI, CARG4
77|.else
78|.define SFRETLO, CRET2
79|.define SFRETHI, CRET1
80|.define SFARG1LO, CARG2
81|.define SFARG1HI, CARG1
82|.define SFARG2LO, CARG4
83|.define SFARG2HI, CARG3
84|.endif
85|
86|.if FPU
59|.define FARG1, f12 87|.define FARG1, f12
60|.define FARG2, f14 88|.define FARG2, f14
61| 89|
62|.define FRET1, f0 90|.define FRET1, f0
63|.define FRET2, f2 91|.define FRET2, f2
92|.endif
64| 93|
65|// Stack layout while in interpreter. Must match with lj_frame.h. 94|// Stack layout while in interpreter. Must match with lj_frame.h.
95|.if FPU // MIPS32 hard-float.
96|
66|.define CFRAME_SPACE, 112 // Delta for sp. 97|.define CFRAME_SPACE, 112 // Delta for sp.
67| 98|
68|.define SAVE_ERRF, 124(sp) // 32 bit C frame info. 99|.define SAVE_ERRF, 124(sp) // 32 bit C frame info.
@@ -72,6 +103,20 @@
72|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. 103|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
73|.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves. 104|.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves.
74|.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves. 105|.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves.
106|
107|.else // MIPS32 soft-float
108|
109|.define CFRAME_SPACE, 64 // Delta for sp.
110|
111|.define SAVE_ERRF, 76(sp) // 32 bit C frame info.
112|.define SAVE_NRES, 72(sp)
113|.define SAVE_CFRAME, 68(sp)
114|.define SAVE_L, 64(sp)
115|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
116|.define SAVE_GPR_, 24 // .. 24+10*4: 32 bit GPR saves.
117|
118|.endif
119|
75|.define SAVE_PC, 20(sp) 120|.define SAVE_PC, 20(sp)
76|.define ARG5, 16(sp) 121|.define ARG5, 16(sp)
77|.define CSAVE_4, 12(sp) 122|.define CSAVE_4, 12(sp)
@@ -83,43 +128,45 @@
83|.define ARG5_OFS, 16 128|.define ARG5_OFS, 16
84|.define SAVE_MULTRES, ARG5 129|.define SAVE_MULTRES, ARG5
85| 130|
131|//-----------------------------------------------------------------------
132|
86|.macro saveregs 133|.macro saveregs
87| addiu sp, sp, -CFRAME_SPACE 134| addiu sp, sp, -CFRAME_SPACE
88| sw ra, SAVE_GPR_+9*4(sp) 135| sw ra, SAVE_GPR_+9*4(sp)
89| sw r30, SAVE_GPR_+8*4(sp) 136| sw r30, SAVE_GPR_+8*4(sp)
90| sdc1 f30, SAVE_FPR_+5*8(sp) 137| .FPU sdc1 f30, SAVE_FPR_+5*8(sp)
91| sw r23, SAVE_GPR_+7*4(sp) 138| sw r23, SAVE_GPR_+7*4(sp)
92| sw r22, SAVE_GPR_+6*4(sp) 139| sw r22, SAVE_GPR_+6*4(sp)
93| sdc1 f28, SAVE_FPR_+4*8(sp) 140| .FPU sdc1 f28, SAVE_FPR_+4*8(sp)
94| sw r21, SAVE_GPR_+5*4(sp) 141| sw r21, SAVE_GPR_+5*4(sp)
95| sw r20, SAVE_GPR_+4*4(sp) 142| sw r20, SAVE_GPR_+4*4(sp)
96| sdc1 f26, SAVE_FPR_+3*8(sp) 143| .FPU sdc1 f26, SAVE_FPR_+3*8(sp)
97| sw r19, SAVE_GPR_+3*4(sp) 144| sw r19, SAVE_GPR_+3*4(sp)
98| sw r18, SAVE_GPR_+2*4(sp) 145| sw r18, SAVE_GPR_+2*4(sp)
99| sdc1 f24, SAVE_FPR_+2*8(sp) 146| .FPU sdc1 f24, SAVE_FPR_+2*8(sp)
100| sw r17, SAVE_GPR_+1*4(sp) 147| sw r17, SAVE_GPR_+1*4(sp)
101| sw r16, SAVE_GPR_+0*4(sp) 148| sw r16, SAVE_GPR_+0*4(sp)
102| sdc1 f22, SAVE_FPR_+1*8(sp) 149| .FPU sdc1 f22, SAVE_FPR_+1*8(sp)
103| sdc1 f20, SAVE_FPR_+0*8(sp) 150| .FPU sdc1 f20, SAVE_FPR_+0*8(sp)
104|.endmacro 151|.endmacro
105| 152|
106|.macro restoreregs_ret 153|.macro restoreregs_ret
107| lw ra, SAVE_GPR_+9*4(sp) 154| lw ra, SAVE_GPR_+9*4(sp)
108| lw r30, SAVE_GPR_+8*4(sp) 155| lw r30, SAVE_GPR_+8*4(sp)
109| ldc1 f30, SAVE_FPR_+5*8(sp) 156| .FPU ldc1 f30, SAVE_FPR_+5*8(sp)
110| lw r23, SAVE_GPR_+7*4(sp) 157| lw r23, SAVE_GPR_+7*4(sp)
111| lw r22, SAVE_GPR_+6*4(sp) 158| lw r22, SAVE_GPR_+6*4(sp)
112| ldc1 f28, SAVE_FPR_+4*8(sp) 159| .FPU ldc1 f28, SAVE_FPR_+4*8(sp)
113| lw r21, SAVE_GPR_+5*4(sp) 160| lw r21, SAVE_GPR_+5*4(sp)
114| lw r20, SAVE_GPR_+4*4(sp) 161| lw r20, SAVE_GPR_+4*4(sp)
115| ldc1 f26, SAVE_FPR_+3*8(sp) 162| .FPU ldc1 f26, SAVE_FPR_+3*8(sp)
116| lw r19, SAVE_GPR_+3*4(sp) 163| lw r19, SAVE_GPR_+3*4(sp)
117| lw r18, SAVE_GPR_+2*4(sp) 164| lw r18, SAVE_GPR_+2*4(sp)
118| ldc1 f24, SAVE_FPR_+2*8(sp) 165| .FPU ldc1 f24, SAVE_FPR_+2*8(sp)
119| lw r17, SAVE_GPR_+1*4(sp) 166| lw r17, SAVE_GPR_+1*4(sp)
120| lw r16, SAVE_GPR_+0*4(sp) 167| lw r16, SAVE_GPR_+0*4(sp)
121| ldc1 f22, SAVE_FPR_+1*8(sp) 168| .FPU ldc1 f22, SAVE_FPR_+1*8(sp)
122| ldc1 f20, SAVE_FPR_+0*8(sp) 169| .FPU ldc1 f20, SAVE_FPR_+0*8(sp)
123| jr ra 170| jr ra
124| addiu sp, sp, CFRAME_SPACE 171| addiu sp, sp, CFRAME_SPACE
125|.endmacro 172|.endmacro
@@ -138,11 +185,12 @@
138|.type NODE, Node 185|.type NODE, Node
139|.type NARGS8, int 186|.type NARGS8, int
140|.type TRACE, GCtrace 187|.type TRACE, GCtrace
188|.type SBUF, SBuf
141| 189|
142|//----------------------------------------------------------------------- 190|//-----------------------------------------------------------------------
143| 191|
144|// Trap for not-yet-implemented parts. 192|// Trap for not-yet-implemented parts.
145|.macro NYI; .long 0xf0f0f0f0; .endmacro 193|.macro NYI; .long 0xec1cf0f0; .endmacro
146| 194|
147|// Macros to mark delay slots. 195|// Macros to mark delay slots.
148|.macro ., a; a; .endmacro 196|.macro ., a; a; .endmacro
@@ -152,13 +200,23 @@
152|//----------------------------------------------------------------------- 200|//-----------------------------------------------------------------------
153| 201|
154|// Endian-specific defines. 202|// Endian-specific defines.
155|.define FRAME_PC, LJ_ENDIAN_SELECT(-4,-8) 203|.if ENDIAN_LE
156|.define FRAME_FUNC, LJ_ENDIAN_SELECT(-8,-4) 204|.define FRAME_PC, -4
157|.define HI, LJ_ENDIAN_SELECT(4,0) 205|.define FRAME_FUNC, -8
158|.define LO, LJ_ENDIAN_SELECT(0,4) 206|.define HI, 4
159|.define OFS_RD, LJ_ENDIAN_SELECT(2,0) 207|.define LO, 0
160|.define OFS_RA, LJ_ENDIAN_SELECT(1,2) 208|.define OFS_RD, 2
161|.define OFS_OP, LJ_ENDIAN_SELECT(0,3) 209|.define OFS_RA, 1
210|.define OFS_OP, 0
211|.else
212|.define FRAME_PC, -8
213|.define FRAME_FUNC, -4
214|.define HI, 0
215|.define LO, 4
216|.define OFS_RD, 0
217|.define OFS_RA, 2
218|.define OFS_OP, 3
219|.endif
162| 220|
163|// Instruction decode. 221|// Instruction decode.
164|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro 222|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro
@@ -353,9 +411,11 @@ static void build_subroutines(BuildCtx *ctx)
353 |. sll TMP2, TMP2, 3 411 |. sll TMP2, TMP2, 3
354 |1: 412 |1:
355 | addiu TMP1, TMP1, -8 413 | addiu TMP1, TMP1, -8
356 | ldc1 f0, 0(RA) 414 | lw SFRETHI, HI(RA)
415 | lw SFRETLO, LO(RA)
357 | addiu RA, RA, 8 416 | addiu RA, RA, 8
358 | sdc1 f0, 0(BASE) 417 | sw SFRETHI, HI(BASE)
418 | sw SFRETLO, LO(BASE)
359 | bnez TMP1, <1 419 | bnez TMP1, <1
360 |. addiu BASE, BASE, 8 420 |. addiu BASE, BASE, 8
361 | 421 |
@@ -424,15 +484,16 @@ static void build_subroutines(BuildCtx *ctx)
424 | and sp, CARG1, AT 484 | and sp, CARG1, AT
425 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 485 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
426 | lw L, SAVE_L 486 | lw L, SAVE_L
427 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 487 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
488 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
428 | li TISNIL, LJ_TNIL 489 | li TISNIL, LJ_TNIL
429 | lw BASE, L->base 490 | lw BASE, L->base
430 | lw DISPATCH, L->glref // Setup pointer to dispatch table. 491 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
431 | mtc1 TMP3, TOBIT 492 | .FPU mtc1 TMP3, TOBIT
432 | li TMP1, LJ_TFALSE 493 | li TMP1, LJ_TFALSE
433 | li_vmstate INTERP 494 | li_vmstate INTERP
434 | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame. 495 | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame.
435 | cvt.d.s TOBIT, TOBIT 496 | .FPU cvt.d.s TOBIT, TOBIT
436 | addiu RA, BASE, -8 // Results start at BASE-8. 497 | addiu RA, BASE, -8 // Results start at BASE-8.
437 | addiu DISPATCH, DISPATCH, GG_G2DISP 498 | addiu DISPATCH, DISPATCH, GG_G2DISP
438 | sw TMP1, HI(RA) // Prepend false to error message. 499 | sw TMP1, HI(RA) // Prepend false to error message.
@@ -440,6 +501,10 @@ static void build_subroutines(BuildCtx *ctx)
440 | b ->vm_returnc 501 | b ->vm_returnc
441 |. li RD, 16 // 2 results: false + error message. 502 |. li RD, 16 // 2 results: false + error message.
442 | 503 |
504 |->vm_unwind_stub: // Jump to exit stub from unwinder.
505 | jr CARG1
506 |. move ra, CARG2
507 |
443 |//----------------------------------------------------------------------- 508 |//-----------------------------------------------------------------------
444 |//-- Grow stack for calls ----------------------------------------------- 509 |//-- Grow stack for calls -----------------------------------------------
445 |//----------------------------------------------------------------------- 510 |//-----------------------------------------------------------------------
@@ -486,21 +551,23 @@ static void build_subroutines(BuildCtx *ctx)
486 | addiu DISPATCH, DISPATCH, GG_G2DISP 551 | addiu DISPATCH, DISPATCH, GG_G2DISP
487 | sw r0, SAVE_NRES 552 | sw r0, SAVE_NRES
488 | sw r0, SAVE_ERRF 553 | sw r0, SAVE_ERRF
489 | sw TMP0, L->cframe 554 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
490 | sw r0, SAVE_CFRAME 555 | sw r0, SAVE_CFRAME
491 | beqz TMP1, >3 556 | beqz TMP1, >3
492 |. sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 557 |. sw TMP0, L->cframe
493 | 558 |
494 | // Resume after yield (like a return). 559 | // Resume after yield (like a return).
560 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
495 | move RA, BASE 561 | move RA, BASE
496 | lw BASE, L->base 562 | lw BASE, L->base
563 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
497 | lw TMP1, L->top 564 | lw TMP1, L->top
498 | lw PC, FRAME_PC(BASE) 565 | lw PC, FRAME_PC(BASE)
499 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 566 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
500 | subu RD, TMP1, BASE 567 | subu RD, TMP1, BASE
501 | mtc1 TMP3, TOBIT 568 | .FPU mtc1 TMP3, TOBIT
502 | sb r0, L->status 569 | sb r0, L->status
503 | cvt.d.s TOBIT, TOBIT 570 | .FPU cvt.d.s TOBIT, TOBIT
504 | li_vmstate INTERP 571 | li_vmstate INTERP
505 | addiu RD, RD, 8 572 | addiu RD, RD, 8
506 | st_vmstate 573 | st_vmstate
@@ -525,25 +592,27 @@ static void build_subroutines(BuildCtx *ctx)
525 | 592 |
526 |1: // Entry point for vm_pcall above (PC = ftype). 593 |1: // Entry point for vm_pcall above (PC = ftype).
527 | lw TMP1, L:CARG1->cframe 594 | lw TMP1, L:CARG1->cframe
528 | sw CARG3, SAVE_NRES
529 | move L, CARG1 595 | move L, CARG1
530 | sw CARG1, SAVE_L 596 | sw CARG3, SAVE_NRES
531 | move BASE, CARG2
532 | sw sp, L->cframe // Add our C frame to cframe chain.
533 | lw DISPATCH, L->glref // Setup pointer to dispatch table. 597 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
598 | sw CARG1, SAVE_L
599 | move BASE, CARG2
600 | addiu DISPATCH, DISPATCH, GG_G2DISP
534 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 601 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
535 | sw TMP1, SAVE_CFRAME 602 | sw TMP1, SAVE_CFRAME
536 | addiu DISPATCH, DISPATCH, GG_G2DISP 603 | sw sp, L->cframe // Add our C frame to cframe chain.
537 | 604 |
538 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 605 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
606 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
539 | lw TMP2, L->base // TMP2 = old base (used in vmeta_call). 607 | lw TMP2, L->base // TMP2 = old base (used in vmeta_call).
540 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 608 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
609 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
541 | lw TMP1, L->top 610 | lw TMP1, L->top
542 | mtc1 TMP3, TOBIT 611 | .FPU mtc1 TMP3, TOBIT
543 | addu PC, PC, BASE 612 | addu PC, PC, BASE
544 | subu NARGS8:RC, TMP1, BASE 613 | subu NARGS8:RC, TMP1, BASE
545 | subu PC, PC, TMP2 // PC = frame delta + frame type 614 | subu PC, PC, TMP2 // PC = frame delta + frame type
546 | cvt.d.s TOBIT, TOBIT 615 | .FPU cvt.d.s TOBIT, TOBIT
547 | li_vmstate INTERP 616 | li_vmstate INTERP
548 | li TISNIL, LJ_TNIL 617 | li TISNIL, LJ_TNIL
549 | st_vmstate 618 | st_vmstate
@@ -566,20 +635,21 @@ static void build_subroutines(BuildCtx *ctx)
566 | lw TMP0, L:CARG1->stack 635 | lw TMP0, L:CARG1->stack
567 | sw CARG1, SAVE_L 636 | sw CARG1, SAVE_L
568 | lw TMP1, L->top 637 | lw TMP1, L->top
638 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
569 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 639 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
570 | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). 640 | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
571 | lw TMP1, L->cframe 641 | lw TMP1, L->cframe
572 | sw sp, L->cframe // Add our C frame to cframe chain. 642 | addiu DISPATCH, DISPATCH, GG_G2DISP
573 | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. 643 | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
574 | sw r0, SAVE_ERRF // No error function. 644 | sw r0, SAVE_ERRF // No error function.
575 | move CFUNCADDR, CARG4 645 | sw TMP1, SAVE_CFRAME
646 | sw sp, L->cframe // Add our C frame to cframe chain.
647 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
576 | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) 648 | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
577 |. sw TMP1, SAVE_CFRAME 649 |. move CFUNCADDR, CARG4
578 | move BASE, CRET1 650 | move BASE, CRET1
579 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
580 | li PC, FRAME_CP
581 | bnez CRET1, <3 // Else continue with the call. 651 | bnez CRET1, <3 // Else continue with the call.
582 |. addiu DISPATCH, DISPATCH, GG_G2DISP 652 |. li PC, FRAME_CP
583 | b ->vm_leave_cp // No base? Just remove C frame. 653 | b ->vm_leave_cp // No base? Just remove C frame.
584 |. nop 654 |. nop
585 | 655 |
@@ -624,7 +694,8 @@ static void build_subroutines(BuildCtx *ctx)
624 |->cont_cat: // RA = resultptr, RB = meta base 694 |->cont_cat: // RA = resultptr, RB = meta base
625 | lw INS, -4(PC) 695 | lw INS, -4(PC)
626 | addiu CARG2, RB, -16 696 | addiu CARG2, RB, -16
627 | ldc1 f0, 0(RA) 697 | lw SFRETHI, HI(RA)
698 | lw SFRETLO, LO(RA)
628 | decode_RB8a MULTRES, INS 699 | decode_RB8a MULTRES, INS
629 | decode_RA8a RA, INS 700 | decode_RA8a RA, INS
630 | decode_RB8b MULTRES 701 | decode_RB8b MULTRES
@@ -632,11 +703,13 @@ static void build_subroutines(BuildCtx *ctx)
632 | addu TMP1, BASE, MULTRES 703 | addu TMP1, BASE, MULTRES
633 | sw BASE, L->base 704 | sw BASE, L->base
634 | subu CARG3, CARG2, TMP1 705 | subu CARG3, CARG2, TMP1
706 | sw SFRETHI, HI(CARG2)
635 | bne TMP1, CARG2, ->BC_CAT_Z 707 | bne TMP1, CARG2, ->BC_CAT_Z
636 |. sdc1 f0, 0(CARG2) 708 |. sw SFRETLO, LO(CARG2)
637 | addu RA, BASE, RA 709 | addu RA, BASE, RA
710 | sw SFRETHI, HI(RA)
638 | b ->cont_nop 711 | b ->cont_nop
639 |. sdc1 f0, 0(RA) 712 |. sw SFRETLO, LO(RA)
640 | 713 |
641 |//-- Table indexing metamethods ----------------------------------------- 714 |//-- Table indexing metamethods -----------------------------------------
642 | 715 |
@@ -659,10 +732,9 @@ static void build_subroutines(BuildCtx *ctx)
659 |. sw TMP1, HI(CARG3) 732 |. sw TMP1, HI(CARG3)
660 | 733 |
661 |->vmeta_tgetb: // TMP0 = index 734 |->vmeta_tgetb: // TMP0 = index
662 | mtc1 TMP0, f0
663 | cvt.d.w f0, f0
664 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 735 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
665 | sdc1 f0, 0(CARG3) 736 | sw TMP0, LO(CARG3)
737 | sw TISNUM, HI(CARG3)
666 | 738 |
667 |->vmeta_tgetv: 739 |->vmeta_tgetv:
668 |1: 740 |1:
@@ -674,9 +746,11 @@ static void build_subroutines(BuildCtx *ctx)
674 | // Returns TValue * (finished) or NULL (metamethod). 746 | // Returns TValue * (finished) or NULL (metamethod).
675 | beqz CRET1, >3 747 | beqz CRET1, >3
676 |. addiu TMP1, BASE, -FRAME_CONT 748 |. addiu TMP1, BASE, -FRAME_CONT
677 | ldc1 f0, 0(CRET1) 749 | lw SFARG1HI, HI(CRET1)
750 | lw SFARG2HI, LO(CRET1)
678 | ins_next1 751 | ins_next1
679 | sdc1 f0, 0(RA) 752 | sw SFARG1HI, HI(RA)
753 | sw SFARG2HI, LO(RA)
680 | ins_next2 754 | ins_next2
681 | 755 |
682 |3: // Call __index metamethod. 756 |3: // Call __index metamethod.
@@ -688,6 +762,17 @@ static void build_subroutines(BuildCtx *ctx)
688 | b ->vm_call_dispatch_f 762 | b ->vm_call_dispatch_f
689 |. li NARGS8:RC, 16 // 2 args for func(t, k). 763 |. li NARGS8:RC, 16 // 2 args for func(t, k).
690 | 764 |
765 |->vmeta_tgetr:
766 | load_got lj_tab_getinth
767 | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
768 |. nop
769 | // Returns cTValue * or NULL.
770 | beqz CRET1, ->BC_TGETR_Z
771 |. move SFARG2HI, TISNIL
772 | lw SFARG2HI, HI(CRET1)
773 | b ->BC_TGETR_Z
774 |. lw SFARG2LO, LO(CRET1)
775 |
691 |//----------------------------------------------------------------------- 776 |//-----------------------------------------------------------------------
692 | 777 |
693 |->vmeta_tsets1: 778 |->vmeta_tsets1:
@@ -709,10 +794,9 @@ static void build_subroutines(BuildCtx *ctx)
709 |. sw TMP1, HI(CARG3) 794 |. sw TMP1, HI(CARG3)
710 | 795 |
711 |->vmeta_tsetb: // TMP0 = index 796 |->vmeta_tsetb: // TMP0 = index
712 | mtc1 TMP0, f0
713 | cvt.d.w f0, f0
714 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 797 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
715 | sdc1 f0, 0(CARG3) 798 | sw TMP0, LO(CARG3)
799 | sw TISNUM, HI(CARG3)
716 | 800 |
717 |->vmeta_tsetv: 801 |->vmeta_tsetv:
718 |1: 802 |1:
@@ -722,11 +806,13 @@ static void build_subroutines(BuildCtx *ctx)
722 | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 806 | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
723 |. move CARG1, L 807 |. move CARG1, L
724 | // Returns TValue * (finished) or NULL (metamethod). 808 | // Returns TValue * (finished) or NULL (metamethod).
809 | lw SFARG1HI, HI(RA)
725 | beqz CRET1, >3 810 | beqz CRET1, >3
726 |. ldc1 f0, 0(RA) 811 |. lw SFARG1LO, LO(RA)
727 | // NOBARRIER: lj_meta_tset ensures the table is not black. 812 | // NOBARRIER: lj_meta_tset ensures the table is not black.
728 | ins_next1 813 | ins_next1
729 | sdc1 f0, 0(CRET1) 814 | sw SFARG1HI, HI(CRET1)
815 | sw SFARG1LO, LO(CRET1)
730 | ins_next2 816 | ins_next2
731 | 817 |
732 |3: // Call __newindex metamethod. 818 |3: // Call __newindex metamethod.
@@ -736,14 +822,27 @@ static void build_subroutines(BuildCtx *ctx)
736 | sw PC, -16+HI(BASE) // [cont|PC] 822 | sw PC, -16+HI(BASE) // [cont|PC]
737 | subu PC, BASE, TMP1 823 | subu PC, BASE, TMP1
738 | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 824 | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
739 | sdc1 f0, 16(BASE) // Copy value to third argument. 825 | sw SFARG1HI, 16+HI(BASE) // Copy value to third argument.
826 | sw SFARG1LO, 16+LO(BASE)
740 | b ->vm_call_dispatch_f 827 | b ->vm_call_dispatch_f
741 |. li NARGS8:RC, 24 // 3 args for func(t, k, v) 828 |. li NARGS8:RC, 24 // 3 args for func(t, k, v)
742 | 829 |
830 |->vmeta_tsetr:
831 | load_got lj_tab_setinth
832 | sw BASE, L->base
833 | sw PC, SAVE_PC
834 | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
835 |. move CARG1, L
836 | // Returns TValue *.
837 | b ->BC_TSETR_Z
838 |. nop
839 |
743 |//-- Comparison metamethods --------------------------------------------- 840 |//-- Comparison metamethods ---------------------------------------------
744 | 841 |
745 |->vmeta_comp: 842 |->vmeta_comp:
746 | // CARG2, CARG3 are already set by BC_ISLT/BC_ISGE/BC_ISLE/BC_ISGT. 843 | // RA/RD point to o1/o2.
844 | move CARG2, RA
845 | move CARG3, RD
747 | load_got lj_meta_comp 846 | load_got lj_meta_comp
748 | addiu PC, PC, -4 847 | addiu PC, PC, -4
749 | sw BASE, L->base 848 | sw BASE, L->base
@@ -769,11 +868,13 @@ static void build_subroutines(BuildCtx *ctx)
769 | 868 |
770 |->cont_ra: // RA = resultptr 869 |->cont_ra: // RA = resultptr
771 | lbu TMP1, -4+OFS_RA(PC) 870 | lbu TMP1, -4+OFS_RA(PC)
772 | ldc1 f0, 0(RA) 871 | lw SFRETHI, HI(RA)
872 | lw SFRETLO, LO(RA)
773 | sll TMP1, TMP1, 3 873 | sll TMP1, TMP1, 3
774 | addu TMP1, BASE, TMP1 874 | addu TMP1, BASE, TMP1
875 | sw SFRETHI, HI(TMP1)
775 | b ->cont_nop 876 | b ->cont_nop
776 |. sdc1 f0, 0(TMP1) 877 |. sw SFRETLO, LO(TMP1)
777 | 878 |
778 |->cont_condt: // RA = resultptr 879 |->cont_condt: // RA = resultptr
779 | lw TMP0, HI(RA) 880 | lw TMP0, HI(RA)
@@ -788,8 +889,11 @@ static void build_subroutines(BuildCtx *ctx)
788 |. addiu TMP2, AT, -1 // Branch if result is false. 889 |. addiu TMP2, AT, -1 // Branch if result is false.
789 | 890 |
790 |->vmeta_equal: 891 |->vmeta_equal:
791 | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. 892 | // SFARG1LO/SFARG2LO point to o1/o2. TMP0 is set to 0/1.
792 | load_got lj_meta_equal 893 | load_got lj_meta_equal
894 | move CARG2, SFARG1LO
895 | move CARG3, SFARG2LO
896 | move CARG4, TMP0
793 | addiu PC, PC, -4 897 | addiu PC, PC, -4
794 | sw BASE, L->base 898 | sw BASE, L->base
795 | sw PC, SAVE_PC 899 | sw PC, SAVE_PC
@@ -813,17 +917,31 @@ static void build_subroutines(BuildCtx *ctx)
813 |. nop 917 |. nop
814 |.endif 918 |.endif
815 | 919 |
920 |->vmeta_istype:
921 | load_got lj_meta_istype
922 | addiu PC, PC, -4
923 | sw BASE, L->base
924 | srl CARG2, RA, 3
925 | srl CARG3, RD, 3
926 | sw PC, SAVE_PC
927 | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
928 |. move CARG1, L
929 | b ->cont_nop
930 |. nop
931 |
816 |//-- Arithmetic metamethods --------------------------------------------- 932 |//-- Arithmetic metamethods ---------------------------------------------
817 | 933 |
818 |->vmeta_unm: 934 |->vmeta_unm:
819 | move CARG4, CARG3 935 | move RC, RB
820 | 936 |
821 |->vmeta_arith: 937 |->vmeta_arith:
822 | load_got lj_meta_arith 938 | load_got lj_meta_arith
823 | decode_OP1 TMP0, INS 939 | decode_OP1 TMP0, INS
824 | sw BASE, L->base 940 | sw BASE, L->base
825 | sw PC, SAVE_PC
826 | move CARG2, RA 941 | move CARG2, RA
942 | sw PC, SAVE_PC
943 | move CARG3, RB
944 | move CARG4, RC
827 | sw TMP0, ARG5 945 | sw TMP0, ARG5
828 | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) 946 | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
829 |. move CARG1, L 947 |. move CARG1, L
@@ -931,40 +1049,52 @@ static void build_subroutines(BuildCtx *ctx)
931 | 1049 |
932 |.macro .ffunc_1, name 1050 |.macro .ffunc_1, name
933 |->ff_ .. name: 1051 |->ff_ .. name:
1052 | lw SFARG1HI, HI(BASE)
934 | beqz NARGS8:RC, ->fff_fallback 1053 | beqz NARGS8:RC, ->fff_fallback
935 |. lw CARG3, HI(BASE) 1054 |. lw SFARG1LO, LO(BASE)
936 | lw CARG1, LO(BASE)
937 |.endmacro 1055 |.endmacro
938 | 1056 |
939 |.macro .ffunc_2, name 1057 |.macro .ffunc_2, name
940 |->ff_ .. name: 1058 |->ff_ .. name:
941 | sltiu AT, NARGS8:RC, 16 1059 | sltiu AT, NARGS8:RC, 16
942 | lw CARG3, HI(BASE) 1060 | lw SFARG1HI, HI(BASE)
943 | bnez AT, ->fff_fallback 1061 | bnez AT, ->fff_fallback
944 |. lw CARG4, 8+HI(BASE) 1062 |. lw SFARG2HI, 8+HI(BASE)
945 | lw CARG1, LO(BASE) 1063 | lw SFARG1LO, LO(BASE)
946 | lw CARG2, 8+LO(BASE) 1064 | lw SFARG2LO, 8+LO(BASE)
947 |.endmacro 1065 |.endmacro
948 | 1066 |
949 |.macro .ffunc_n, name // Caveat: has delay slot! 1067 |.macro .ffunc_n, name // Caveat: has delay slot!
950 |->ff_ .. name: 1068 |->ff_ .. name:
951 | lw CARG3, HI(BASE) 1069 | lw SFARG1HI, HI(BASE)
1070 |.if FPU
1071 | ldc1 FARG1, 0(BASE)
1072 |.else
1073 | lw SFARG1LO, LO(BASE)
1074 |.endif
952 | beqz NARGS8:RC, ->fff_fallback 1075 | beqz NARGS8:RC, ->fff_fallback
953 |. ldc1 FARG1, 0(BASE) 1076 |. sltiu AT, SFARG1HI, LJ_TISNUM
954 | sltiu AT, CARG3, LJ_TISNUM
955 | beqz AT, ->fff_fallback 1077 | beqz AT, ->fff_fallback
956 |.endmacro 1078 |.endmacro
957 | 1079 |
958 |.macro .ffunc_nn, name // Caveat: has delay slot! 1080 |.macro .ffunc_nn, name // Caveat: has delay slot!
959 |->ff_ .. name: 1081 |->ff_ .. name:
960 | sltiu AT, NARGS8:RC, 16 1082 | sltiu AT, NARGS8:RC, 16
961 | lw CARG3, HI(BASE) 1083 | lw SFARG1HI, HI(BASE)
962 | bnez AT, ->fff_fallback 1084 | bnez AT, ->fff_fallback
963 |. lw CARG4, 8+HI(BASE) 1085 |. lw SFARG2HI, 8+HI(BASE)
964 | ldc1 FARG1, 0(BASE) 1086 | sltiu TMP0, SFARG1HI, LJ_TISNUM
965 | ldc1 FARG2, 8(BASE) 1087 |.if FPU
966 | sltiu TMP0, CARG3, LJ_TISNUM 1088 | ldc1 FARG1, 0(BASE)
967 | sltiu TMP1, CARG4, LJ_TISNUM 1089 |.else
1090 | lw SFARG1LO, LO(BASE)
1091 |.endif
1092 | sltiu TMP1, SFARG2HI, LJ_TISNUM
1093 |.if FPU
1094 | ldc1 FARG2, 8(BASE)
1095 |.else
1096 | lw SFARG2LO, 8+LO(BASE)
1097 |.endif
968 | and TMP0, TMP0, TMP1 1098 | and TMP0, TMP0, TMP1
969 | beqz TMP0, ->fff_fallback 1099 | beqz TMP0, ->fff_fallback
970 |.endmacro 1100 |.endmacro
@@ -980,53 +1110,55 @@ static void build_subroutines(BuildCtx *ctx)
980 |//-- Base library: checks ----------------------------------------------- 1110 |//-- Base library: checks -----------------------------------------------
981 | 1111 |
982 |.ffunc_1 assert 1112 |.ffunc_1 assert
983 | sltiu AT, CARG3, LJ_TISTRUECOND 1113 | sltiu AT, SFARG1HI, LJ_TISTRUECOND
984 | beqz AT, ->fff_fallback 1114 | beqz AT, ->fff_fallback
985 |. addiu RA, BASE, -8 1115 |. addiu RA, BASE, -8
986 | lw PC, FRAME_PC(BASE) 1116 | lw PC, FRAME_PC(BASE)
987 | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. 1117 | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
988 | addu TMP2, RA, NARGS8:RC 1118 | addu TMP2, RA, NARGS8:RC
989 | sw CARG3, HI(RA) 1119 | sw SFARG1HI, HI(RA)
990 | addiu TMP1, BASE, 8 1120 | addiu TMP1, BASE, 8
991 | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. 1121 | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument.
992 |. sw CARG1, LO(RA) 1122 |. sw SFARG1LO, LO(RA)
993 |1: 1123 |1:
994 | ldc1 f0, 0(TMP1) 1124 | lw SFRETHI, HI(TMP1)
995 | sdc1 f0, -8(TMP1) 1125 | lw SFRETLO, LO(TMP1)
1126 | sw SFRETHI, -8+HI(TMP1)
1127 | sw SFRETLO, -8+LO(TMP1)
996 | bne TMP1, TMP2, <1 1128 | bne TMP1, TMP2, <1
997 |. addiu TMP1, TMP1, 8 1129 |. addiu TMP1, TMP1, 8
998 | b ->fff_res 1130 | b ->fff_res
999 |. nop 1131 |. nop
1000 | 1132 |
1001 |.ffunc type 1133 |.ffunc type
1002 | lw CARG3, HI(BASE) 1134 | lw SFARG1HI, HI(BASE)
1003 | li TMP1, LJ_TISNUM
1004 | beqz NARGS8:RC, ->fff_fallback 1135 | beqz NARGS8:RC, ->fff_fallback
1005 |. sltiu TMP0, CARG3, LJ_TISNUM 1136 |. sltiu TMP0, SFARG1HI, LJ_TISNUM
1006 | movz TMP1, CARG3, TMP0 1137 | movn SFARG1HI, TISNUM, TMP0
1007 | not TMP1, TMP1 1138 | not TMP1, SFARG1HI
1008 | sll TMP1, TMP1, 3 1139 | sll TMP1, TMP1, 3
1009 | addu TMP1, CFUNC:RB, TMP1 1140 | addu TMP1, CFUNC:RB, TMP1
1010 | b ->fff_resn 1141 | lw SFARG1HI, CFUNC:TMP1->upvalue[0].u32.hi
1011 |. ldc1 FRET1, CFUNC:TMP1->upvalue 1142 | b ->fff_restv
1143 |. lw SFARG1LO, CFUNC:TMP1->upvalue[0].u32.lo
1012 | 1144 |
1013 |//-- Base library: getters and setters --------------------------------- 1145 |//-- Base library: getters and setters ---------------------------------
1014 | 1146 |
1015 |.ffunc_1 getmetatable 1147 |.ffunc_1 getmetatable
1016 | li AT, LJ_TTAB 1148 | li AT, LJ_TTAB
1017 | bne CARG3, AT, >6 1149 | bne SFARG1HI, AT, >6
1018 |. li AT, LJ_TUDATA 1150 |. li AT, LJ_TUDATA
1019 |1: // Field metatable must be at same offset for GCtab and GCudata! 1151 |1: // Field metatable must be at same offset for GCtab and GCudata!
1020 | lw TAB:CARG1, TAB:CARG1->metatable 1152 | lw TAB:SFARG1LO, TAB:SFARG1LO->metatable
1021 |2: 1153 |2:
1022 | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) 1154 | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
1023 | beqz TAB:CARG1, ->fff_restv 1155 | beqz TAB:SFARG1LO, ->fff_restv
1024 |. li CARG3, LJ_TNIL 1156 |. li SFARG1HI, LJ_TNIL
1025 | lw TMP0, TAB:CARG1->hmask 1157 | lw TMP0, TAB:SFARG1LO->hmask
1026 | li CARG3, LJ_TTAB // Use metatable as default result. 1158 | li SFARG1HI, LJ_TTAB // Use metatable as default result.
1027 | lw TMP1, STR:RC->hash 1159 | lw TMP1, STR:RC->sid
1028 | lw NODE:TMP2, TAB:CARG1->node 1160 | lw NODE:TMP2, TAB:SFARG1LO->node
1029 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 1161 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
1030 | sll TMP0, TMP1, 5 1162 | sll TMP0, TMP1, 5
1031 | sll TMP1, TMP1, 3 1163 | sll TMP1, TMP1, 3
1032 | subu TMP1, TMP0, TMP1 1164 | subu TMP1, TMP0, TMP1
@@ -1037,7 +1169,7 @@ static void build_subroutines(BuildCtx *ctx)
1037 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) 1169 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
1038 | lw NODE:TMP3, NODE:TMP2->next 1170 | lw NODE:TMP3, NODE:TMP2->next
1039 | bne CARG4, AT, >4 1171 | bne CARG4, AT, >4
1040 |. lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) 1172 |. lw CARG3, offsetof(Node, val)+HI(NODE:TMP2)
1041 | beq TMP0, STR:RC, >5 1173 | beq TMP0, STR:RC, >5
1042 |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2) 1174 |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2)
1043 |4: 1175 |4:
@@ -1046,36 +1178,35 @@ static void build_subroutines(BuildCtx *ctx)
1046 | b <3 1178 | b <3
1047 |. nop 1179 |. nop
1048 |5: 1180 |5:
1049 | beq CARG2, TISNIL, ->fff_restv // Ditto for nil value. 1181 | beq CARG3, TISNIL, ->fff_restv // Ditto for nil value.
1050 |. nop 1182 |. nop
1051 | move CARG3, CARG2 // Return value of mt.__metatable. 1183 | move SFARG1HI, CARG3 // Return value of mt.__metatable.
1052 | b ->fff_restv 1184 | b ->fff_restv
1053 |. move CARG1, TMP1 1185 |. move SFARG1LO, TMP1
1054 | 1186 |
1055 |6: 1187 |6:
1056 | beq CARG3, AT, <1 1188 | beq SFARG1HI, AT, <1
1057 |. sltiu TMP0, CARG3, LJ_TISNUM 1189 |. sltu AT, TISNUM, SFARG1HI
1058 | li TMP1, LJ_TISNUM 1190 | movz SFARG1HI, TISNUM, AT
1059 | movz TMP1, CARG3, TMP0 1191 | not TMP1, SFARG1HI
1060 | not TMP1, TMP1
1061 | sll TMP1, TMP1, 2 1192 | sll TMP1, TMP1, 2
1062 | addu TMP1, DISPATCH, TMP1 1193 | addu TMP1, DISPATCH, TMP1
1063 | b <2 1194 | b <2
1064 |. lw TAB:CARG1, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1) 1195 |. lw TAB:SFARG1LO, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1)
1065 | 1196 |
1066 |.ffunc_2 setmetatable 1197 |.ffunc_2 setmetatable
1067 | // Fast path: no mt for table yet and not clearing the mt. 1198 | // Fast path: no mt for table yet and not clearing the mt.
1068 | li AT, LJ_TTAB 1199 | li AT, LJ_TTAB
1069 | bne CARG3, AT, ->fff_fallback 1200 | bne SFARG1HI, AT, ->fff_fallback
1070 |. addiu CARG4, CARG4, -LJ_TTAB 1201 |. addiu SFARG2HI, SFARG2HI, -LJ_TTAB
1071 | lw TAB:TMP1, TAB:CARG1->metatable 1202 | lw TAB:TMP1, TAB:SFARG1LO->metatable
1072 | lbu TMP3, TAB:CARG1->marked 1203 | lbu TMP3, TAB:SFARG1LO->marked
1073 | or AT, CARG4, TAB:TMP1 1204 | or AT, SFARG2HI, TAB:TMP1
1074 | bnez AT, ->fff_fallback 1205 | bnez AT, ->fff_fallback
1075 |. andi AT, TMP3, LJ_GC_BLACK // isblack(table) 1206 |. andi AT, TMP3, LJ_GC_BLACK // isblack(table)
1076 | beqz AT, ->fff_restv 1207 | beqz AT, ->fff_restv
1077 |. sw TAB:CARG2, TAB:CARG1->metatable 1208 |. sw TAB:SFARG2LO, TAB:SFARG1LO->metatable
1078 | barrierback TAB:CARG1, TMP3, TMP0, ->fff_restv 1209 | barrierback TAB:SFARG1LO, TMP3, TMP0, ->fff_restv
1079 | 1210 |
1080 |.ffunc rawget 1211 |.ffunc rawget
1081 | lw CARG4, HI(BASE) 1212 | lw CARG4, HI(BASE)
@@ -1089,90 +1220,89 @@ static void build_subroutines(BuildCtx *ctx)
1089 | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1220 | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1090 |. move CARG1, L 1221 |. move CARG1, L
1091 | // Returns cTValue *. 1222 | // Returns cTValue *.
1092 | b ->fff_resn 1223 | lw SFARG1HI, HI(CRET1)
1093 |. ldc1 FRET1, 0(CRET1) 1224 | b ->fff_restv
1225 |. lw SFARG1LO, LO(CRET1)
1094 | 1226 |
1095 |//-- Base library: conversions ------------------------------------------ 1227 |//-- Base library: conversions ------------------------------------------
1096 | 1228 |
1097 |.ffunc tonumber 1229 |.ffunc tonumber
1098 | // Only handles the number case inline (without a base argument). 1230 | // Only handles the number case inline (without a base argument).
1099 | lw CARG1, HI(BASE) 1231 | lw CARG1, HI(BASE)
1100 | xori AT, NARGS8:RC, 8 1232 | xori AT, NARGS8:RC, 8 // Exactly one number argument.
1101 | sltiu CARG1, CARG1, LJ_TISNUM 1233 | sltu TMP0, TISNUM, CARG1
1102 | movn CARG1, r0, AT 1234 | or AT, AT, TMP0
1103 | beqz CARG1, ->fff_fallback // Exactly one number argument. 1235 | bnez AT, ->fff_fallback
1104 |. ldc1 FRET1, 0(BASE) 1236 |. lw SFARG1HI, HI(BASE)
1105 | b ->fff_resn 1237 | b ->fff_restv
1106 |. nop 1238 |. lw SFARG1LO, LO(BASE)
1107 | 1239 |
1108 |.ffunc_1 tostring 1240 |.ffunc_1 tostring
1109 | // Only handles the string or number case inline. 1241 | // Only handles the string or number case inline.
1110 | li AT, LJ_TSTR 1242 | li AT, LJ_TSTR
1111 | // A __tostring method in the string base metatable is ignored. 1243 | // A __tostring method in the string base metatable is ignored.
1112 | beq CARG3, AT, ->fff_restv // String key? 1244 | beq SFARG1HI, AT, ->fff_restv // String key?
1113 | // Handle numbers inline, unless a number base metatable is present. 1245 | // Handle numbers inline, unless a number base metatable is present.
1114 |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) 1246 |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
1115 | sltiu TMP0, CARG3, LJ_TISNUM 1247 | sltu TMP0, TISNUM, SFARG1HI
1116 | sltiu TMP1, TMP1, 1 1248 | or TMP0, TMP0, TMP1
1117 | and TMP0, TMP0, TMP1 1249 | bnez TMP0, ->fff_fallback
1118 | beqz TMP0, ->fff_fallback
1119 |. sw BASE, L->base // Add frame since C call can throw. 1250 |. sw BASE, L->base // Add frame since C call can throw.
1120 | ffgccheck 1251 | ffgccheck
1121 |. sw PC, SAVE_PC // Redundant (but a defined value). 1252 |. sw PC, SAVE_PC // Redundant (but a defined value).
1122 | load_got lj_str_fromnum 1253 | load_got lj_strfmt_number
1123 | move CARG1, L 1254 | move CARG1, L
1124 | call_intern lj_str_fromnum // (lua_State *L, lua_Number *np) 1255 | call_intern lj_strfmt_number // (lua_State *L, cTValue *o)
1125 |. move CARG2, BASE 1256 |. move CARG2, BASE
1126 | // Returns GCstr *. 1257 | // Returns GCstr *.
1127 | li CARG3, LJ_TSTR 1258 | li SFARG1HI, LJ_TSTR
1128 | b ->fff_restv 1259 | b ->fff_restv
1129 |. move CARG1, CRET1 1260 |. move SFARG1LO, CRET1
1130 | 1261 |
1131 |//-- Base library: iterators ------------------------------------------- 1262 |//-- Base library: iterators -------------------------------------------
1132 | 1263 |
1133 |.ffunc next 1264 |.ffunc next
1134 | lw CARG1, HI(BASE) 1265 | lw CARG2, HI(BASE)
1135 | lw TAB:CARG2, LO(BASE) 1266 | lw TAB:CARG1, LO(BASE)
1136 | beqz NARGS8:RC, ->fff_fallback 1267 | beqz NARGS8:RC, ->fff_fallback
1137 |. addu TMP2, BASE, NARGS8:RC 1268 |. addu TMP2, BASE, NARGS8:RC
1138 | li AT, LJ_TTAB 1269 | li AT, LJ_TTAB
1139 | sw TISNIL, HI(TMP2) // Set missing 2nd arg to nil. 1270 | sw TISNIL, HI(TMP2) // Set missing 2nd arg to nil.
1140 | bne CARG1, AT, ->fff_fallback 1271 | bne CARG2, AT, ->fff_fallback
1141 |. lw PC, FRAME_PC(BASE) 1272 |. lw PC, FRAME_PC(BASE)
1142 | load_got lj_tab_next 1273 | load_got lj_tab_next
1143 | sw BASE, L->base // Add frame since C call can throw. 1274 | addiu CARG2, BASE, 8
1144 | sw BASE, L->top // Dummy frame length is ok. 1275 | call_intern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
1145 | addiu CARG3, BASE, 8 1276 |. addiu CARG3, BASE, -8
1146 | sw PC, SAVE_PC 1277 | // Returns 1=found, 0=end, -1=error.
1147 | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) 1278 | addiu RA, BASE, -8
1148 |. move CARG1, L 1279 | bgtz CRET1, ->fff_res // Found key/value.
1149 | // Returns 0 at end of traversal. 1280 |. li RD, (2+1)*8
1150 | beqz CRET1, ->fff_restv // End of traversal: return nil. 1281 | beqz CRET1, ->fff_restv // End of traversal: return nil.
1151 |. li CARG3, LJ_TNIL 1282 |. li SFARG1HI, LJ_TNIL
1152 | ldc1 f0, 8(BASE) // Copy key and value to results. 1283 | lw CFUNC:RB, FRAME_FUNC(BASE)
1153 | addiu RA, BASE, -8 1284 | b ->fff_fallback // Invalid key.
1154 | ldc1 f2, 16(BASE) 1285 |. li RC, 2*8
1155 | li RD, (2+1)*8
1156 | sdc1 f0, 0(RA)
1157 | b ->fff_res
1158 |. sdc1 f2, 8(RA)
1159 | 1286 |
1160 |.ffunc_1 pairs 1287 |.ffunc_1 pairs
1161 | li AT, LJ_TTAB 1288 | li AT, LJ_TTAB
1162 | bne CARG3, AT, ->fff_fallback 1289 | bne SFARG1HI, AT, ->fff_fallback
1163 |. lw PC, FRAME_PC(BASE) 1290 |. lw PC, FRAME_PC(BASE)
1164#if LJ_52 1291#if LJ_52
1165 | lw TAB:TMP2, TAB:CARG1->metatable 1292 | lw TAB:TMP2, TAB:SFARG1LO->metatable
1166 | ldc1 f0, CFUNC:RB->upvalue[0] 1293 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1294 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1167 | bnez TAB:TMP2, ->fff_fallback 1295 | bnez TAB:TMP2, ->fff_fallback
1168#else 1296#else
1169 | ldc1 f0, CFUNC:RB->upvalue[0] 1297 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1298 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1170#endif 1299#endif
1171 |. addiu RA, BASE, -8 1300 |. addiu RA, BASE, -8
1172 | sw TISNIL, 8+HI(BASE) 1301 | sw TISNIL, 8+HI(BASE)
1173 | li RD, (3+1)*8 1302 | sw TMP0, HI(RA)
1303 | sw TMP1, LO(RA)
1174 | b ->fff_res 1304 | b ->fff_res
1175 |. sdc1 f0, 0(RA) 1305 |. li RD, (3+1)*8
1176 | 1306 |
1177 |.ffunc ipairs_aux 1307 |.ffunc ipairs_aux
1178 | sltiu AT, NARGS8:RC, 16 1308 | sltiu AT, NARGS8:RC, 16
@@ -1180,35 +1310,32 @@ static void build_subroutines(BuildCtx *ctx)
1180 | lw TAB:CARG1, LO(BASE) 1310 | lw TAB:CARG1, LO(BASE)
1181 | lw CARG4, 8+HI(BASE) 1311 | lw CARG4, 8+HI(BASE)
1182 | bnez AT, ->fff_fallback 1312 | bnez AT, ->fff_fallback
1183 |. ldc1 FARG2, 8(BASE) 1313 |. addiu CARG3, CARG3, -LJ_TTAB
1184 | addiu CARG3, CARG3, -LJ_TTAB 1314 | xor CARG4, CARG4, TISNUM
1185 | sltiu AT, CARG4, LJ_TISNUM 1315 | and AT, CARG3, CARG4
1186 | li TMP0, 1 1316 | bnez AT, ->fff_fallback
1187 | movn AT, r0, CARG3
1188 | mtc1 TMP0, FARG1
1189 | beqz AT, ->fff_fallback
1190 |. lw PC, FRAME_PC(BASE) 1317 |. lw PC, FRAME_PC(BASE)
1191 | cvt.w.d FRET1, FARG2 1318 | lw TMP2, 8+LO(BASE)
1192 | cvt.d.w FARG1, FARG1
1193 | lw TMP0, TAB:CARG1->asize 1319 | lw TMP0, TAB:CARG1->asize
1194 | lw TMP1, TAB:CARG1->array 1320 | lw TMP1, TAB:CARG1->array
1195 | mfc1 TMP2, FRET1
1196 | addiu RA, BASE, -8
1197 | add.d FARG2, FARG2, FARG1
1198 | addiu TMP2, TMP2, 1 1321 | addiu TMP2, TMP2, 1
1322 | sw TISNUM, -8+HI(BASE)
1199 | sltu AT, TMP2, TMP0 1323 | sltu AT, TMP2, TMP0
1324 | sw TMP2, -8+LO(BASE)
1325 | beqz AT, >2 // Not in array part?
1326 |. addiu RA, BASE, -8
1200 | sll TMP3, TMP2, 3 1327 | sll TMP3, TMP2, 3
1201 | addu TMP3, TMP1, TMP3 1328 | addu TMP3, TMP1, TMP3
1202 | beqz AT, >2 // Not in array part? 1329 | lw TMP1, HI(TMP3)
1203 |. sdc1 FARG2, 0(RA) 1330 | lw TMP2, LO(TMP3)
1204 | lw TMP2, HI(TMP3)
1205 | ldc1 f0, 0(TMP3)
1206 |1: 1331 |1:
1207 | beq TMP2, TISNIL, ->fff_res // End of iteration, return 0 results. 1332 | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results.
1208 |. li RD, (0+1)*8 1333 |. li RD, (0+1)*8
1209 | li RD, (2+1)*8 1334 | sw TMP1, 8+HI(RA)
1335 | sw TMP2, 8+LO(RA)
1210 | b ->fff_res 1336 | b ->fff_res
1211 |. sdc1 f0, 8(RA) 1337 |. li RD, (2+1)*8
1338 |
1212 |2: // Check for empty hash part first. Otherwise call C function. 1339 |2: // Check for empty hash part first. Otherwise call C function.
1213 | lw TMP0, TAB:CARG1->hmask 1340 | lw TMP0, TAB:CARG1->hmask
1214 | load_got lj_tab_getinth 1341 | load_got lj_tab_getinth
@@ -1219,27 +1346,30 @@ static void build_subroutines(BuildCtx *ctx)
1219 | // Returns cTValue * or NULL. 1346 | // Returns cTValue * or NULL.
1220 | beqz CRET1, ->fff_res 1347 | beqz CRET1, ->fff_res
1221 |. li RD, (0+1)*8 1348 |. li RD, (0+1)*8
1222 | lw TMP2, HI(CRET1) 1349 | lw TMP1, HI(CRET1)
1223 | b <1 1350 | b <1
1224 |. ldc1 f0, 0(CRET1) 1351 |. lw TMP2, LO(CRET1)
1225 | 1352 |
1226 |.ffunc_1 ipairs 1353 |.ffunc_1 ipairs
1227 | li AT, LJ_TTAB 1354 | li AT, LJ_TTAB
1228 | bne CARG3, AT, ->fff_fallback 1355 | bne SFARG1HI, AT, ->fff_fallback
1229 |. lw PC, FRAME_PC(BASE) 1356 |. lw PC, FRAME_PC(BASE)
1230#if LJ_52 1357#if LJ_52
1231 | lw TAB:TMP2, TAB:CARG1->metatable 1358 | lw TAB:TMP2, TAB:SFARG1LO->metatable
1232 | ldc1 f0, CFUNC:RB->upvalue[0] 1359 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1360 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1233 | bnez TAB:TMP2, ->fff_fallback 1361 | bnez TAB:TMP2, ->fff_fallback
1234#else 1362#else
1235 | ldc1 f0, CFUNC:RB->upvalue[0] 1363 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1364 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1236#endif 1365#endif
1237 |. addiu RA, BASE, -8 1366 |. addiu RA, BASE, -8
1238 | sw r0, 8+HI(BASE) 1367 | sw TISNUM, 8+HI(BASE)
1239 | sw r0, 8+LO(BASE) 1368 | sw r0, 8+LO(BASE)
1240 | li RD, (3+1)*8 1369 | sw TMP0, HI(RA)
1370 | sw TMP1, LO(RA)
1241 | b ->fff_res 1371 | b ->fff_res
1242 |. sdc1 f0, 0(RA) 1372 |. li RD, (3+1)*8
1243 | 1373 |
1244 |//-- Base library: catch errors ---------------------------------------- 1374 |//-- Base library: catch errors ----------------------------------------
1245 | 1375 |
@@ -1267,8 +1397,9 @@ static void build_subroutines(BuildCtx *ctx)
1267 | sltu TMP1, TMP1, TMP2 1397 | sltu TMP1, TMP1, TMP2
1268 | or AT, AT, TMP1 1398 | or AT, AT, TMP1
1269 | bnez AT, ->fff_fallback 1399 | bnez AT, ->fff_fallback
1270 |. ldc1 FARG2, 8(BASE) 1400 |. lw CARG3, 8+LO(BASE)
1271 | ldc1 FARG1, 0(BASE) 1401 | lw CARG1, LO(BASE)
1402 | lw CARG2, HI(BASE)
1272 | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) 1403 | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
1273 | li AT, LJ_TFUNC 1404 | li AT, LJ_TFUNC
1274 | move TMP2, BASE 1405 | move TMP2, BASE
@@ -1276,9 +1407,11 @@ static void build_subroutines(BuildCtx *ctx)
1276 | addiu BASE, BASE, 16 1407 | addiu BASE, BASE, 16
1277 | // Remember active hook before pcall. 1408 | // Remember active hook before pcall.
1278 | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT 1409 | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
1279 | sdc1 FARG2, 0(TMP2) // Swap function and traceback. 1410 | sw CARG3, LO(TMP2) // Swap function and traceback.
1411 | sw CARG4, HI(TMP2)
1280 | andi TMP3, TMP3, 1 1412 | andi TMP3, TMP3, 1
1281 | sdc1 FARG1, 8(TMP2) 1413 | sw CARG1, 8+LO(TMP2)
1414 | sw CARG2, 8+HI(TMP2)
1282 | addiu PC, TMP3, 16+FRAME_PCALL 1415 | addiu PC, TMP3, 16+FRAME_PCALL
1283 | b ->vm_call_dispatch 1416 | b ->vm_call_dispatch
1284 |. addiu NARGS8:RC, NARGS8:RC, -16 1417 |. addiu NARGS8:RC, NARGS8:RC, -16
@@ -1287,7 +1420,10 @@ static void build_subroutines(BuildCtx *ctx)
1287 | 1420 |
1288 |.macro coroutine_resume_wrap, resume 1421 |.macro coroutine_resume_wrap, resume
1289 |.if resume 1422 |.if resume
1290 |.ffunc_1 coroutine_resume 1423 |.ffunc coroutine_resume
1424 | lw CARG3, HI(BASE)
1425 | beqz NARGS8:RC, ->fff_fallback
1426 |. lw CARG1, LO(BASE)
1291 | li AT, LJ_TTHREAD 1427 | li AT, LJ_TTHREAD
1292 | bne CARG3, AT, ->fff_fallback 1428 | bne CARG3, AT, ->fff_fallback
1293 |.else 1429 |.else
@@ -1322,11 +1458,13 @@ static void build_subroutines(BuildCtx *ctx)
1322 | move CARG3, CARG2 1458 | move CARG3, CARG2
1323 | sw BASE, L->top 1459 | sw BASE, L->top
1324 |2: // Move args to coroutine. 1460 |2: // Move args to coroutine.
1325 | ldc1 f0, 0(BASE) 1461 | lw SFRETHI, HI(BASE)
1462 | lw SFRETLO, LO(BASE)
1326 | sltu AT, BASE, TMP1 1463 | sltu AT, BASE, TMP1
1327 | beqz AT, >3 1464 | beqz AT, >3
1328 |. addiu BASE, BASE, 8 1465 |. addiu BASE, BASE, 8
1329 | sdc1 f0, 0(CARG3) 1466 | sw SFRETHI, HI(CARG3)
1467 | sw SFRETLO, LO(CARG3)
1330 | b <2 1468 | b <2
1331 |. addiu CARG3, CARG3, 8 1469 |. addiu CARG3, CARG3, 8
1332 |3: 1470 |3:
@@ -1339,6 +1477,7 @@ static void build_subroutines(BuildCtx *ctx)
1339 | lw TMP3, L:RA->top 1477 | lw TMP3, L:RA->top
1340 | li_vmstate INTERP 1478 | li_vmstate INTERP
1341 | lw BASE, L->base 1479 | lw BASE, L->base
1480 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
1342 | st_vmstate 1481 | st_vmstate
1343 | beqz AT, >8 1482 | beqz AT, >8
1344 |. subu RD, TMP3, TMP2 1483 |. subu RD, TMP3, TMP2
@@ -1351,10 +1490,12 @@ static void build_subroutines(BuildCtx *ctx)
1351 | sw TMP2, L:RA->top // Clear coroutine stack. 1490 | sw TMP2, L:RA->top // Clear coroutine stack.
1352 | move TMP1, BASE 1491 | move TMP1, BASE
1353 |5: // Move results from coroutine. 1492 |5: // Move results from coroutine.
1354 | ldc1 f0, 0(TMP2) 1493 | lw SFRETHI, HI(TMP2)
1494 | lw SFRETLO, LO(TMP2)
1355 | addiu TMP2, TMP2, 8 1495 | addiu TMP2, TMP2, 8
1356 | sltu AT, TMP2, TMP3 1496 | sltu AT, TMP2, TMP3
1357 | sdc1 f0, 0(TMP1) 1497 | sw SFRETHI, HI(TMP1)
1498 | sw SFRETLO, LO(TMP1)
1358 | bnez AT, <5 1499 | bnez AT, <5
1359 |. addiu TMP1, TMP1, 8 1500 |. addiu TMP1, TMP1, 8
1360 |6: 1501 |6:
@@ -1379,12 +1520,14 @@ static void build_subroutines(BuildCtx *ctx)
1379 |.if resume 1520 |.if resume
1380 | addiu TMP3, TMP3, -8 1521 | addiu TMP3, TMP3, -8
1381 | li TMP1, LJ_TFALSE 1522 | li TMP1, LJ_TFALSE
1382 | ldc1 f0, 0(TMP3) 1523 | lw SFRETHI, HI(TMP3)
1524 | lw SFRETLO, LO(TMP3)
1383 | sw TMP3, L:RA->top // Remove error from coroutine stack. 1525 | sw TMP3, L:RA->top // Remove error from coroutine stack.
1384 | li RD, (2+1)*8 1526 | li RD, (2+1)*8
1385 | sw TMP1, -8+HI(BASE) // Prepend false to results. 1527 | sw TMP1, -8+HI(BASE) // Prepend false to results.
1386 | addiu RA, BASE, -8 1528 | addiu RA, BASE, -8
1387 | sdc1 f0, 0(BASE) // Copy error message. 1529 | sw SFRETHI, HI(BASE) // Copy error message.
1530 | sw SFRETLO, LO(BASE)
1388 | b <7 1531 | b <7
1389 |. andi TMP0, PC, FRAME_TYPE 1532 |. andi TMP0, PC, FRAME_TYPE
1390 |.else 1533 |.else
@@ -1420,20 +1563,29 @@ static void build_subroutines(BuildCtx *ctx)
1420 | 1563 |
1421 |//-- Math library ------------------------------------------------------- 1564 |//-- Math library -------------------------------------------------------
1422 | 1565 |
1423 |.ffunc_n math_abs 1566 |.ffunc_1 math_abs
1424 |. abs.d FRET1, FARG1 1567 | bne SFARG1HI, TISNUM, >1
1425 |->fff_resn: 1568 |. sra TMP0, SFARG1LO, 31
1426 | lw PC, FRAME_PC(BASE) 1569 | xor TMP1, SFARG1LO, TMP0
1427 | addiu RA, BASE, -8 1570 | subu SFARG1LO, TMP1, TMP0
1428 | b ->fff_res1 1571 | bgez SFARG1LO, ->fff_restv
1429 |. sdc1 FRET1, -8(BASE) 1572 |. nop
1573 | lui SFARG1HI, 0x41e0 // 2^31 as a double.
1574 | b ->fff_restv
1575 |. li SFARG1LO, 0
1576 |1:
1577 | sltiu AT, SFARG1HI, LJ_TISNUM
1578 | beqz AT, ->fff_fallback
1579 |. sll SFARG1HI, SFARG1HI, 1
1580 | srl SFARG1HI, SFARG1HI, 1
1581 |// fallthrough
1430 | 1582 |
1431 |->fff_restv: 1583 |->fff_restv:
1432 | // CARG3/CARG1 = TValue result. 1584 | // SFARG1LO/SFARG1HI = TValue result.
1433 | lw PC, FRAME_PC(BASE) 1585 | lw PC, FRAME_PC(BASE)
1434 | sw CARG3, -8+HI(BASE) 1586 | sw SFARG1HI, -8+HI(BASE)
1435 | addiu RA, BASE, -8 1587 | addiu RA, BASE, -8
1436 | sw CARG1, -8+LO(BASE) 1588 | sw SFARG1LO, -8+LO(BASE)
1437 |->fff_res1: 1589 |->fff_res1:
1438 | // RA = results, PC = return. 1590 | // RA = results, PC = return.
1439 | li RD, (1+1)*8 1591 | li RD, (1+1)*8
@@ -1462,15 +1614,19 @@ static void build_subroutines(BuildCtx *ctx)
1462 |. sw TISNIL, -8+HI(TMP1) 1614 |. sw TISNIL, -8+HI(TMP1)
1463 | 1615 |
1464 |.macro math_extern, func 1616 |.macro math_extern, func
1465 |->ff_math_ .. func: 1617 | .ffunc math_ .. func
1466 | lw CARG3, HI(BASE) 1618 | lw SFARG1HI, HI(BASE)
1467 | beqz NARGS8:RC, ->fff_fallback 1619 | beqz NARGS8:RC, ->fff_fallback
1468 |. load_got func 1620 |. load_got func
1469 | sltiu AT, CARG3, LJ_TISNUM 1621 | sltiu AT, SFARG1HI, LJ_TISNUM
1470 | beqz AT, ->fff_fallback 1622 | beqz AT, ->fff_fallback
1471 |. nop 1623 |.if FPU
1472 | call_extern
1473 |. ldc1 FARG1, 0(BASE) 1624 |. ldc1 FARG1, 0(BASE)
1625 |.else
1626 |. lw SFARG1LO, LO(BASE)
1627 |.endif
1628 | call_extern
1629 |. nop
1474 | b ->fff_resn 1630 | b ->fff_resn
1475 |. nop 1631 |. nop
1476 |.endmacro 1632 |.endmacro
@@ -1484,10 +1640,22 @@ static void build_subroutines(BuildCtx *ctx)
1484 |. nop 1640 |. nop
1485 |.endmacro 1641 |.endmacro
1486 | 1642 |
1643 |// TODO: Return integer type if result is integer (own sf implementation).
1487 |.macro math_round, func 1644 |.macro math_round, func
1488 | .ffunc_n math_ .. func 1645 |->ff_math_ .. func:
1489 |. nop 1646 | lw SFARG1HI, HI(BASE)
1647 | beqz NARGS8:RC, ->fff_fallback
1648 |. lw SFARG1LO, LO(BASE)
1649 | beq SFARG1HI, TISNUM, ->fff_restv
1650 |. sltu AT, SFARG1HI, TISNUM
1651 | beqz AT, ->fff_fallback
1652 |.if FPU
1653 |. ldc1 FARG1, 0(BASE)
1490 | bal ->vm_ .. func 1654 | bal ->vm_ .. func
1655 |.else
1656 |. load_got func
1657 | call_extern
1658 |.endif
1491 |. nop 1659 |. nop
1492 | b ->fff_resn 1660 | b ->fff_resn
1493 |. nop 1661 |. nop
@@ -1497,15 +1665,19 @@ static void build_subroutines(BuildCtx *ctx)
1497 | math_round ceil 1665 | math_round ceil
1498 | 1666 |
1499 |.ffunc math_log 1667 |.ffunc math_log
1500 | lw CARG3, HI(BASE)
1501 | li AT, 8 1668 | li AT, 8
1502 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. 1669 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
1503 |. load_got log 1670 |. lw SFARG1HI, HI(BASE)
1504 | sltiu AT, CARG3, LJ_TISNUM 1671 | sltiu AT, SFARG1HI, LJ_TISNUM
1505 | beqz AT, ->fff_fallback 1672 | beqz AT, ->fff_fallback
1506 |. nop 1673 |. load_got log
1674 |.if FPU
1507 | call_extern 1675 | call_extern
1508 |. ldc1 FARG1, 0(BASE) 1676 |. ldc1 FARG1, 0(BASE)
1677 |.else
1678 | call_extern
1679 |. lw SFARG1LO, LO(BASE)
1680 |.endif
1509 | b ->fff_resn 1681 | b ->fff_resn
1510 |. nop 1682 |. nop
1511 | 1683 |
@@ -1524,23 +1696,43 @@ static void build_subroutines(BuildCtx *ctx)
1524 | math_extern2 atan2 1696 | math_extern2 atan2
1525 | math_extern2 fmod 1697 | math_extern2 fmod
1526 | 1698 |
1699 |.if FPU
1527 |.ffunc_n math_sqrt 1700 |.ffunc_n math_sqrt
1528 |. sqrt.d FRET1, FARG1 1701 |. sqrt.d FRET1, FARG1
1529 | b ->fff_resn 1702 |// fallthrough to ->fff_resn
1530 |. nop 1703 |.else
1704 | math_extern sqrt
1705 |.endif
1531 | 1706 |
1532 |->ff_math_deg: 1707 |->fff_resn:
1533 |.ffunc_n math_rad 1708 | lw PC, FRAME_PC(BASE)
1534 |. ldc1 FARG2, CFUNC:RB->upvalue[0] 1709 | addiu RA, BASE, -8
1535 | b ->fff_resn 1710 |.if FPU
1536 |. mul.d FRET1, FARG1, FARG2 1711 | b ->fff_res1
1712 |. sdc1 FRET1, -8(BASE)
1713 |.else
1714 | sw SFRETHI, -8+HI(BASE)
1715 | b ->fff_res1
1716 |. sw SFRETLO, -8+LO(BASE)
1717 |.endif
1537 | 1718 |
1538 |.ffunc_nn math_ldexp 1719 |
1539 | cvt.w.d FARG2, FARG2 1720 |.ffunc math_ldexp
1721 | sltiu AT, NARGS8:RC, 16
1722 | lw SFARG1HI, HI(BASE)
1723 | bnez AT, ->fff_fallback
1724 |. lw CARG4, 8+HI(BASE)
1725 | bne CARG4, TISNUM, ->fff_fallback
1540 | load_got ldexp 1726 | load_got ldexp
1541 | mfc1 CARG3, FARG2 1727 |. sltu AT, SFARG1HI, TISNUM
1728 | beqz AT, ->fff_fallback
1729 |.if FPU
1730 |. ldc1 FARG1, 0(BASE)
1731 |.else
1732 |. lw SFARG1LO, LO(BASE)
1733 |.endif
1542 | call_extern 1734 | call_extern
1543 |. nop 1735 |. lw CARG3, 8+LO(BASE)
1544 | b ->fff_resn 1736 | b ->fff_resn
1545 |. nop 1737 |. nop
1546 | 1738 |
@@ -1551,10 +1743,17 @@ static void build_subroutines(BuildCtx *ctx)
1551 |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 1743 |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
1552 | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) 1744 | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH)
1553 | addiu RA, BASE, -8 1745 | addiu RA, BASE, -8
1746 |.if FPU
1554 | mtc1 TMP1, FARG2 1747 | mtc1 TMP1, FARG2
1555 | sdc1 FRET1, 0(RA) 1748 | sdc1 FRET1, 0(RA)
1556 | cvt.d.w FARG2, FARG2 1749 | cvt.d.w FARG2, FARG2
1557 | sdc1 FARG2, 8(RA) 1750 | sdc1 FARG2, 8(RA)
1751 |.else
1752 | sw SFRETLO, LO(RA)
1753 | sw SFRETHI, HI(RA)
1754 | sw TMP1, 8+LO(RA)
1755 | sw TISNUM, 8+HI(RA)
1756 |.endif
1558 | b ->fff_res 1757 | b ->fff_res
1559 |. li RD, (2+1)*8 1758 |. li RD, (2+1)*8
1560 | 1759 |
@@ -1564,49 +1763,109 @@ static void build_subroutines(BuildCtx *ctx)
1564 | call_extern 1763 | call_extern
1565 |. addiu CARG3, BASE, -8 1764 |. addiu CARG3, BASE, -8
1566 | addiu RA, BASE, -8 1765 | addiu RA, BASE, -8
1766 |.if FPU
1567 | sdc1 FRET1, 0(BASE) 1767 | sdc1 FRET1, 0(BASE)
1768 |.else
1769 | sw SFRETLO, LO(BASE)
1770 | sw SFRETHI, HI(BASE)
1771 |.endif
1568 | b ->fff_res 1772 | b ->fff_res
1569 |. li RD, (2+1)*8 1773 |. li RD, (2+1)*8
1570 | 1774 |
1571 |.macro math_minmax, name, ismax 1775 |.macro math_minmax, name, intins, ismax
1572 |->ff_ .. name: 1776 | .ffunc_1 name
1573 | lw CARG3, HI(BASE) 1777 | addu TMP3, BASE, NARGS8:RC
1574 | beqz NARGS8:RC, ->fff_fallback 1778 | bne SFARG1HI, TISNUM, >5
1575 |. ldc1 FRET1, 0(BASE) 1779 |. addiu TMP2, BASE, 8
1576 | sltiu AT, CARG3, LJ_TISNUM 1780 |1: // Handle integers.
1781 |. lw SFARG2HI, HI(TMP2)
1782 | beq TMP2, TMP3, ->fff_restv
1783 |. lw SFARG2LO, LO(TMP2)
1784 | bne SFARG2HI, TISNUM, >3
1785 |. slt AT, SFARG1LO, SFARG2LO
1786 | intins SFARG1LO, SFARG2LO, AT
1787 | b <1
1788 |. addiu TMP2, TMP2, 8
1789 |
1790 |3: // Convert intermediate result to number and continue with number loop.
1791 | sltiu AT, SFARG2HI, LJ_TISNUM
1577 | beqz AT, ->fff_fallback 1792 | beqz AT, ->fff_fallback
1578 |. addu TMP2, BASE, NARGS8:RC 1793 |.if FPU
1579 | addiu TMP1, BASE, 8 1794 |. mtc1 SFARG1LO, FRET1
1580 | beq TMP1, TMP2, ->fff_resn 1795 | cvt.d.w FRET1, FRET1
1581 |1: 1796 | b >7
1582 |. lw CARG3, HI(TMP1) 1797 |. ldc1 FARG1, 0(TMP2)
1583 | ldc1 FARG1, 0(TMP1) 1798 |.else
1584 | addiu TMP1, TMP1, 8 1799 |. nop
1585 | sltiu AT, CARG3, LJ_TISNUM 1800 | bal ->vm_sfi2d_1
1801 |. nop
1802 | b >7
1803 |. nop
1804 |.endif
1805 |
1806 |5:
1807 |. sltiu AT, SFARG1HI, LJ_TISNUM
1586 | beqz AT, ->fff_fallback 1808 | beqz AT, ->fff_fallback
1809 |.if FPU
1810 |. ldc1 FRET1, 0(BASE)
1811 |.endif
1812 |
1813 |6: // Handle numbers.
1814 |. lw SFARG2HI, HI(TMP2)
1815 |.if FPU
1816 | beq TMP2, TMP3, ->fff_resn
1817 |.else
1818 | beq TMP2, TMP3, ->fff_restv
1819 |.endif
1820 |. sltiu AT, SFARG2HI, LJ_TISNUM
1821 | beqz AT, >8
1822 |.if FPU
1823 |. ldc1 FARG1, 0(TMP2)
1824 |.else
1825 |. lw SFARG2LO, LO(TMP2)
1826 |.endif
1827 |7:
1828 |.if FPU
1587 |.if ismax 1829 |.if ismax
1588 |. c.olt.d FARG1, FRET1 1830 | c.olt.d FARG1, FRET1
1589 |.else 1831 |.else
1590 |. c.olt.d FRET1, FARG1 1832 | c.olt.d FRET1, FARG1
1591 |.endif 1833 |.endif
1592 | bne TMP1, TMP2, <1 1834 | movf.d FRET1, FARG1
1593 |. movf.d FRET1, FARG1 1835 |.else
1594 | b ->fff_resn 1836 |.if ismax
1837 | bal ->vm_sfcmpogt
1838 |.else
1839 | bal ->vm_sfcmpolt
1840 |.endif
1841 |. nop
1842 | movz SFARG1LO, SFARG2LO, CRET1
1843 | movz SFARG1HI, SFARG2HI, CRET1
1844 |.endif
1845 | b <6
1846 |. addiu TMP2, TMP2, 8
1847 |
1848 |8: // Convert integer to number and continue with number loop.
1849 | bne SFARG2HI, TISNUM, ->fff_fallback
1850 |.if FPU
1851 |. lwc1 FARG1, LO(TMP2)
1852 | b <7
1853 |. cvt.d.w FARG1, FARG1
1854 |.else
1855 |. nop
1856 | bal ->vm_sfi2d_2
1595 |. nop 1857 |. nop
1858 | b <7
1859 |. nop
1860 |.endif
1861 |
1596 |.endmacro 1862 |.endmacro
1597 | 1863 |
1598 | math_minmax math_min, 0 1864 | math_minmax math_min, movz, 0
1599 | math_minmax math_max, 1 1865 | math_minmax math_max, movn, 1
1600 | 1866 |
1601 |//-- String library ----------------------------------------------------- 1867 |//-- String library -----------------------------------------------------
1602 | 1868 |
1603 |.ffunc_1 string_len
1604 | li AT, LJ_TSTR
1605 | bne CARG3, AT, ->fff_fallback
1606 |. nop
1607 | b ->fff_resi
1608 |. lw CRET1, STR:CARG1->len
1609 |
1610 |.ffunc string_byte // Only handle the 1-arg case here. 1869 |.ffunc string_byte // Only handle the 1-arg case here.
1611 | lw CARG3, HI(BASE) 1870 | lw CARG3, HI(BASE)
1612 | lw STR:CARG1, LO(BASE) 1871 | lw STR:CARG1, LO(BASE)
@@ -1616,33 +1875,31 @@ static void build_subroutines(BuildCtx *ctx)
1616 | bnez AT, ->fff_fallback // Need exactly 1 string argument. 1875 | bnez AT, ->fff_fallback // Need exactly 1 string argument.
1617 |. nop 1876 |. nop
1618 | lw TMP0, STR:CARG1->len 1877 | lw TMP0, STR:CARG1->len
1619 | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1620 | addiu RA, BASE, -8 1878 | addiu RA, BASE, -8
1879 | lw PC, FRAME_PC(BASE)
1621 | sltu RD, r0, TMP0 1880 | sltu RD, r0, TMP0
1622 | mtc1 TMP1, f0 1881 | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1623 | addiu RD, RD, 1 1882 | addiu RD, RD, 1
1624 | cvt.d.w f0, f0
1625 | lw PC, FRAME_PC(BASE)
1626 | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 1883 | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8
1884 | sw TISNUM, HI(RA)
1627 | b ->fff_res 1885 | b ->fff_res
1628 |. sdc1 f0, 0(RA) 1886 |. sw TMP1, LO(RA)
1629 | 1887 |
1630 |.ffunc string_char // Only handle the 1-arg case here. 1888 |.ffunc string_char // Only handle the 1-arg case here.
1631 | ffgccheck 1889 | ffgccheck
1632 |. nop 1890 |. nop
1633 | lw CARG3, HI(BASE) 1891 | lw CARG3, HI(BASE)
1634 | ldc1 FARG1, 0(BASE) 1892 | lw CARG1, LO(BASE)
1635 | li AT, 8 1893 | li TMP1, 255
1636 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. 1894 | xori AT, NARGS8:RC, 8 // Exactly 1 argument.
1637 |. sltiu AT, CARG3, LJ_TISNUM 1895 | xor TMP0, CARG3, TISNUM // Integer.
1638 | beqz AT, ->fff_fallback 1896 | sltu TMP1, TMP1, CARG1 // !(255 < n).
1897 | or AT, AT, TMP0
1898 | or AT, AT, TMP1
1899 | bnez AT, ->fff_fallback
1639 |. li CARG3, 1 1900 |. li CARG3, 1
1640 | cvt.w.d FARG1, FARG1
1641 | addiu CARG2, sp, ARG5_OFS 1901 | addiu CARG2, sp, ARG5_OFS
1642 | sltiu AT, TMP0, 256 1902 | sb CARG1, ARG5
1643 | mfc1 TMP0, FARG1
1644 | beqz AT, ->fff_fallback
1645 |. sw TMP0, ARG5
1646 |->fff_newstr: 1903 |->fff_newstr:
1647 | load_got lj_str_new 1904 | load_got lj_str_new
1648 | sw BASE, L->base 1905 | sw BASE, L->base
@@ -1651,35 +1908,30 @@ static void build_subroutines(BuildCtx *ctx)
1651 |. move CARG1, L 1908 |. move CARG1, L
1652 | // Returns GCstr *. 1909 | // Returns GCstr *.
1653 | lw BASE, L->base 1910 | lw BASE, L->base
1654 | move CARG1, CRET1 1911 |->fff_resstr:
1912 | move SFARG1LO, CRET1
1655 | b ->fff_restv 1913 | b ->fff_restv
1656 |. li CARG3, LJ_TSTR 1914 |. li SFARG1HI, LJ_TSTR
1657 | 1915 |
1658 |.ffunc string_sub 1916 |.ffunc string_sub
1659 | ffgccheck 1917 | ffgccheck
1660 |. nop 1918 |. nop
1661 | addiu AT, NARGS8:RC, -16 1919 | addiu AT, NARGS8:RC, -16
1662 | lw CARG3, 16+HI(BASE) 1920 | lw CARG3, 16+HI(BASE)
1663 | ldc1 f0, 16(BASE)
1664 | lw TMP0, HI(BASE) 1921 | lw TMP0, HI(BASE)
1665 | lw STR:CARG1, LO(BASE) 1922 | lw STR:CARG1, LO(BASE)
1666 | bltz AT, ->fff_fallback 1923 | bltz AT, ->fff_fallback
1667 | lw CARG2, 8+HI(BASE) 1924 |. lw CARG2, 8+HI(BASE)
1668 | ldc1 f2, 8(BASE)
1669 | beqz AT, >1 1925 | beqz AT, >1
1670 |. li CARG4, -1 1926 |. li CARG4, -1
1671 | cvt.w.d f0, f0 1927 | bne CARG3, TISNUM, ->fff_fallback
1672 | sltiu AT, CARG3, LJ_TISNUM 1928 |. lw CARG4, 16+LO(BASE)
1673 | beqz AT, ->fff_fallback
1674 |. mfc1 CARG4, f0
1675 |1: 1929 |1:
1676 | sltiu AT, CARG2, LJ_TISNUM 1930 | bne CARG2, TISNUM, ->fff_fallback
1677 | beqz AT, ->fff_fallback
1678 |. li AT, LJ_TSTR 1931 |. li AT, LJ_TSTR
1679 | cvt.w.d f2, f2
1680 | bne TMP0, AT, ->fff_fallback 1932 | bne TMP0, AT, ->fff_fallback
1681 |. lw CARG2, STR:CARG1->len 1933 |. lw CARG3, 8+LO(BASE)
1682 | mfc1 CARG3, f2 1934 | lw CARG2, STR:CARG1->len
1683 | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end 1935 | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
1684 | slt AT, CARG4, r0 1936 | slt AT, CARG4, r0
1685 | addiu TMP0, CARG2, 1 1937 | addiu TMP0, CARG2, 1
@@ -1701,139 +1953,130 @@ static void build_subroutines(BuildCtx *ctx)
1701 | bgez CARG3, ->fff_newstr 1953 | bgez CARG3, ->fff_newstr
1702 |. addiu CARG3, CARG3, 1 // len++ 1954 |. addiu CARG3, CARG3, 1 // len++
1703 |->fff_emptystr: // Return empty string. 1955 |->fff_emptystr: // Return empty string.
1704 | addiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty) 1956 | addiu STR:SFARG1LO, DISPATCH, DISPATCH_GL(strempty)
1705 | b ->fff_restv 1957 | b ->fff_restv
1706 |. li CARG3, LJ_TSTR 1958 |. li SFARG1HI, LJ_TSTR
1707 | 1959 |
1708 |.ffunc string_rep // Only handle the 1-char case inline. 1960 |.macro ffstring_op, name
1709 | ffgccheck 1961 | .ffunc string_ .. name
1710 |. nop
1711 | lw TMP0, HI(BASE)
1712 | addiu AT, NARGS8:RC, -16 // Exactly 2 arguments.
1713 | lw CARG4, 8+HI(BASE)
1714 | lw STR:CARG1, LO(BASE)
1715 | addiu TMP0, TMP0, -LJ_TSTR
1716 | ldc1 f0, 8(BASE)
1717 | or AT, AT, TMP0
1718 | bnez AT, ->fff_fallback
1719 |. sltiu AT, CARG4, LJ_TISNUM
1720 | cvt.w.d f0, f0
1721 | beqz AT, ->fff_fallback
1722 |. lw TMP0, STR:CARG1->len
1723 | mfc1 CARG3, f0
1724 | lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1725 | li AT, 1
1726 | blez CARG3, ->fff_emptystr // Count <= 0?
1727 |. sltu AT, AT, TMP0
1728 | beqz TMP0, ->fff_emptystr // Zero length string?
1729 |. sltu TMP0, TMP1, CARG3
1730 | or AT, AT, TMP0
1731 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1732 | bnez AT, ->fff_fallback // Fallback for > 1-char strings.
1733 |. lbu TMP0, STR:CARG1[1]
1734 | addu TMP2, CARG2, CARG3
1735 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
1736 | addiu TMP2, TMP2, -1
1737 | sltu AT, CARG2, TMP2
1738 | bnez AT, <1
1739 |. sb TMP0, 0(TMP2)
1740 | b ->fff_newstr
1741 |. nop
1742 |
1743 |.ffunc string_reverse
1744 | ffgccheck 1962 | ffgccheck
1745 |. nop 1963 |. nop
1746 | lw CARG3, HI(BASE) 1964 | lw CARG3, HI(BASE)
1747 | lw STR:CARG1, LO(BASE) 1965 | lw STR:CARG2, LO(BASE)
1748 | beqz NARGS8:RC, ->fff_fallback 1966 | beqz NARGS8:RC, ->fff_fallback
1749 |. li AT, LJ_TSTR 1967 |. li AT, LJ_TSTR
1750 | bne CARG3, AT, ->fff_fallback 1968 | bne CARG3, AT, ->fff_fallback
1751 |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 1969 |. addiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf)
1752 | lw CARG3, STR:CARG1->len 1970 | load_got lj_buf_putstr_ .. name
1753 | addiu CARG1, STR:CARG1, #STR 1971 | lw TMP0, SBUF:CARG1->b
1754 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 1972 | sw L, SBUF:CARG1->L
1755 | sltu AT, TMP1, CARG3 1973 | sw BASE, L->base
1756 | bnez AT, ->fff_fallback 1974 | sw TMP0, SBUF:CARG1->w
1757 |. addu TMP3, CARG1, CARG3 1975 | call_intern extern lj_buf_putstr_ .. name
1758 | addu CARG4, CARG2, CARG3 1976 |. sw PC, SAVE_PC
1759 |1: // Reverse string copy. 1977 | load_got lj_buf_tostr
1760 | lbu TMP1, 0(CARG1) 1978 | call_intern lj_buf_tostr
1761 | sltu AT, CARG1, TMP3 1979 |. move SBUF:CARG1, SBUF:CRET1
1762 | beqz AT, ->fff_newstr 1980 | b ->fff_resstr
1763 |. addiu CARG1, CARG1, 1 1981 |. lw BASE, L->base
1764 | addiu CARG4, CARG4, -1
1765 | b <1
1766 | sb TMP1, 0(CARG4)
1767 |
1768 |.macro ffstring_case, name, lo
1769 | .ffunc name
1770 | ffgccheck
1771 |. nop
1772 | lw CARG3, HI(BASE)
1773 | lw STR:CARG1, LO(BASE)
1774 | beqz NARGS8:RC, ->fff_fallback
1775 |. li AT, LJ_TSTR
1776 | bne CARG3, AT, ->fff_fallback
1777 |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1778 | lw CARG3, STR:CARG1->len
1779 | addiu CARG1, STR:CARG1, #STR
1780 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1781 | sltu AT, TMP1, CARG3
1782 | bnez AT, ->fff_fallback
1783 |. addu TMP3, CARG1, CARG3
1784 | move CARG4, CARG2
1785 |1: // ASCII case conversion.
1786 | lbu TMP1, 0(CARG1)
1787 | sltu AT, CARG1, TMP3
1788 | beqz AT, ->fff_newstr
1789 |. addiu TMP0, TMP1, -lo
1790 | xori TMP2, TMP1, 0x20
1791 | sltiu AT, TMP0, 26
1792 | movn TMP1, TMP2, AT
1793 | addiu CARG1, CARG1, 1
1794 | sb TMP1, 0(CARG4)
1795 | b <1
1796 |. addiu CARG4, CARG4, 1
1797 |.endmacro 1982 |.endmacro
1798 | 1983 |
1799 |ffstring_case string_lower, 65 1984 |ffstring_op reverse
1800 |ffstring_case string_upper, 97 1985 |ffstring_op lower
1986 |ffstring_op upper
1801 | 1987 |
1802 |//-- Table library ------------------------------------------------------ 1988 |//-- Bit library --------------------------------------------------------
1803 | 1989 |
1804 |.ffunc_1 table_getn 1990 |->vm_tobit_fb:
1805 | li AT, LJ_TTAB 1991 | beqz TMP1, ->fff_fallback
1806 | bne CARG3, AT, ->fff_fallback 1992 |.if FPU
1807 |. load_got lj_tab_len 1993 |. ldc1 FARG1, 0(BASE)
1808 | call_intern lj_tab_len // (GCtab *t) 1994 | add.d FARG1, FARG1, TOBIT
1809 |. nop 1995 | jr ra
1810 | // Returns uint32_t (but less than 2^31). 1996 |. mfc1 CRET1, FARG1
1811 | b ->fff_resi 1997 |.else
1998 |// FP number to bit conversion for soft-float.
1999 |->vm_tobit:
2000 | sll TMP0, SFARG1HI, 1
2001 | lui AT, 0x0020
2002 | addu TMP0, TMP0, AT
2003 | slt AT, TMP0, r0
2004 | movz SFARG1LO, r0, AT
2005 | beqz AT, >2
2006 |. li TMP1, 0x3e0
2007 | not TMP1, TMP1
2008 | sra TMP0, TMP0, 21
2009 | subu TMP0, TMP1, TMP0
2010 | slt AT, TMP0, r0
2011 | bnez AT, >1
2012 |. sll TMP1, SFARG1HI, 11
2013 | lui AT, 0x8000
2014 | or TMP1, TMP1, AT
2015 | srl AT, SFARG1LO, 21
2016 | or TMP1, TMP1, AT
2017 | slt AT, SFARG1HI, r0
2018 | beqz AT, >2
2019 |. srlv SFARG1LO, TMP1, TMP0
2020 | subu SFARG1LO, r0, SFARG1LO
2021 |2:
2022 | jr ra
2023 |. move CRET1, SFARG1LO
2024 |1:
2025 | addiu TMP0, TMP0, 21
2026 | srlv TMP1, SFARG1LO, TMP0
2027 | li AT, 20
2028 | subu TMP0, AT, TMP0
2029 | sll SFARG1LO, SFARG1HI, 12
2030 | sllv AT, SFARG1LO, TMP0
2031 | or SFARG1LO, TMP1, AT
2032 | slt AT, SFARG1HI, r0
2033 | beqz AT, <2
1812 |. nop 2034 |. nop
1813 | 2035 | jr ra
1814 |//-- Bit library -------------------------------------------------------- 2036 |. subu CRET1, r0, SFARG1LO
2037 |.endif
1815 | 2038 |
1816 |.macro .ffunc_bit, name 2039 |.macro .ffunc_bit, name
1817 | .ffunc_n bit_..name 2040 | .ffunc_1 bit_..name
1818 |. add.d FARG1, FARG1, TOBIT 2041 | beq SFARG1HI, TISNUM, >6
1819 | mfc1 CRET1, FARG1 2042 |. move CRET1, SFARG1LO
2043 | bal ->vm_tobit_fb
2044 |. sltu TMP1, SFARG1HI, TISNUM
2045 |6:
1820 |.endmacro 2046 |.endmacro
1821 | 2047 |
1822 |.macro .ffunc_bit_op, name, ins 2048 |.macro .ffunc_bit_op, name, ins
1823 | .ffunc_bit name 2049 | .ffunc_bit name
1824 | addiu TMP1, BASE, 8 2050 | addiu TMP2, BASE, 8
1825 | addu TMP2, BASE, NARGS8:RC 2051 | addu TMP3, BASE, NARGS8:RC
1826 |1: 2052 |1:
1827 | lw CARG4, HI(TMP1) 2053 | lw SFARG1HI, HI(TMP2)
1828 | beq TMP1, TMP2, ->fff_resi 2054 | beq TMP2, TMP3, ->fff_resi
1829 |. ldc1 FARG1, 0(TMP1) 2055 |. lw SFARG1LO, LO(TMP2)
1830 | sltiu AT, CARG4, LJ_TISNUM 2056 |.if FPU
1831 | beqz AT, ->fff_fallback 2057 | bne SFARG1HI, TISNUM, >2
1832 | add.d FARG1, FARG1, TOBIT 2058 |. addiu TMP2, TMP2, 8
1833 | mfc1 CARG2, FARG1
1834 | ins CRET1, CRET1, CARG2
1835 | b <1 2059 | b <1
1836 |. addiu TMP1, TMP1, 8 2060 |. ins CRET1, CRET1, SFARG1LO
2061 |2:
2062 | ldc1 FARG1, -8(TMP2)
2063 | sltu TMP1, SFARG1HI, TISNUM
2064 | beqz TMP1, ->fff_fallback
2065 |. add.d FARG1, FARG1, TOBIT
2066 | mfc1 SFARG1LO, FARG1
2067 | b <1
2068 |. ins CRET1, CRET1, SFARG1LO
2069 |.else
2070 | beq SFARG1HI, TISNUM, >2
2071 |. move CRET2, CRET1
2072 | bal ->vm_tobit_fb
2073 |. sltu TMP1, SFARG1HI, TISNUM
2074 | move SFARG1LO, CRET2
2075 |2:
2076 | ins CRET1, CRET1, SFARG1LO
2077 | b <1
2078 |. addiu TMP2, TMP2, 8
2079 |.endif
1837 |.endmacro 2080 |.endmacro
1838 | 2081 |
1839 |.ffunc_bit_op band, and 2082 |.ffunc_bit_op band, and
@@ -1857,24 +2100,28 @@ static void build_subroutines(BuildCtx *ctx)
1857 |. not CRET1, CRET1 2100 |. not CRET1, CRET1
1858 | 2101 |
1859 |.macro .ffunc_bit_sh, name, ins, shmod 2102 |.macro .ffunc_bit_sh, name, ins, shmod
1860 | .ffunc_nn bit_..name 2103 | .ffunc_2 bit_..name
1861 |. add.d FARG1, FARG1, TOBIT 2104 | beq SFARG1HI, TISNUM, >1
1862 | add.d FARG2, FARG2, TOBIT 2105 |. nop
1863 | mfc1 CARG1, FARG1 2106 | bal ->vm_tobit_fb
1864 | mfc1 CARG2, FARG2 2107 |. sltu TMP1, SFARG1HI, TISNUM
2108 | move SFARG1LO, CRET1
2109 |1:
2110 | bne SFARG2HI, TISNUM, ->fff_fallback
2111 |. nop
1865 |.if shmod == 1 2112 |.if shmod == 1
1866 | li AT, 32 2113 | li AT, 32
1867 | subu TMP0, AT, CARG2 2114 | subu TMP0, AT, SFARG2LO
1868 | sllv CARG2, CARG1, CARG2 2115 | sllv SFARG2LO, SFARG1LO, SFARG2LO
1869 | srlv CARG1, CARG1, TMP0 2116 | srlv SFARG1LO, SFARG1LO, TMP0
1870 |.elif shmod == 2 2117 |.elif shmod == 2
1871 | li AT, 32 2118 | li AT, 32
1872 | subu TMP0, AT, CARG2 2119 | subu TMP0, AT, SFARG2LO
1873 | srlv CARG2, CARG1, CARG2 2120 | srlv SFARG2LO, SFARG1LO, SFARG2LO
1874 | sllv CARG1, CARG1, TMP0 2121 | sllv SFARG1LO, SFARG1LO, TMP0
1875 |.endif 2122 |.endif
1876 | b ->fff_resi 2123 | b ->fff_resi
1877 |. ins CRET1, CARG1, CARG2 2124 |. ins CRET1, SFARG1LO, SFARG2LO
1878 |.endmacro 2125 |.endmacro
1879 | 2126 |
1880 |.ffunc_bit_sh lshift, sllv, 0 2127 |.ffunc_bit_sh lshift, sllv, 0
@@ -1886,9 +2133,11 @@ static void build_subroutines(BuildCtx *ctx)
1886 | 2133 |
1887 |.ffunc_bit tobit 2134 |.ffunc_bit tobit
1888 |->fff_resi: 2135 |->fff_resi:
1889 | mtc1 CRET1, FRET1 2136 | lw PC, FRAME_PC(BASE)
1890 | b ->fff_resn 2137 | addiu RA, BASE, -8
1891 |. cvt.d.w FRET1, FRET1 2138 | sw TISNUM, -8+HI(BASE)
2139 | b ->fff_res1
2140 |. sw CRET1, -8+LO(BASE)
1892 | 2141 |
1893 |//----------------------------------------------------------------------- 2142 |//-----------------------------------------------------------------------
1894 | 2143 |
@@ -2075,19 +2324,96 @@ static void build_subroutines(BuildCtx *ctx)
2075 | jr CRET1 2324 | jr CRET1
2076 |. lw INS, -4(PC) 2325 |. lw INS, -4(PC)
2077 | 2326 |
2327 |->cont_stitch: // Trace stitching.
2328 |.if JIT
2329 | // RA = resultptr, RB = meta base
2330 | lw INS, -4(PC)
2331 | lw TMP2, -24+LO(RB) // Save previous trace.
2332 | decode_RA8a RC, INS
2333 | addiu AT, MULTRES, -8
2334 | decode_RA8b RC
2335 | beqz AT, >2
2336 |. addu RC, BASE, RC // Call base.
2337 |1: // Move results down.
2338 | lw SFRETHI, HI(RA)
2339 | lw SFRETLO, LO(RA)
2340 | addiu AT, AT, -8
2341 | addiu RA, RA, 8
2342 | sw SFRETHI, HI(RC)
2343 | sw SFRETLO, LO(RC)
2344 | bnez AT, <1
2345 |. addiu RC, RC, 8
2346 |2:
2347 | decode_RA8a RA, INS
2348 | decode_RB8a RB, INS
2349 | decode_RA8b RA
2350 | decode_RB8b RB
2351 | addu RA, RA, RB
2352 | addu RA, BASE, RA
2353 |3:
2354 | sltu AT, RC, RA
2355 | bnez AT, >9 // More results wanted?
2356 |. nop
2357 |
2358 | lhu TMP3, TRACE:TMP2->traceno
2359 | lhu RD, TRACE:TMP2->link
2360 | beq RD, TMP3, ->cont_nop // Blacklisted.
2361 |. load_got lj_dispatch_stitch
2362 | bnez RD, =>BC_JLOOP // Jump to stitched trace.
2363 |. sll RD, RD, 3
2364 |
2365 | // Stitch a new trace to the previous trace.
2366 | sw TMP3, DISPATCH_J(exitno)(DISPATCH)
2367 | sw L, DISPATCH_J(L)(DISPATCH)
2368 | sw BASE, L->base
2369 | addiu CARG1, DISPATCH, GG_DISP2J
2370 | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2371 |. move CARG2, PC
2372 | b ->cont_nop
2373 |. lw BASE, L->base
2374 |
2375 |9:
2376 | sw TISNIL, HI(RC)
2377 | b <3
2378 |. addiu RC, RC, 8
2379 |.endif
2380 |
2381 |->vm_profhook: // Dispatch target for profiler hook.
2382#if LJ_HASPROFILE
2383 | load_got lj_dispatch_profile
2384 | sw MULTRES, SAVE_MULTRES
2385 | move CARG2, PC
2386 | sw BASE, L->base
2387 | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2388 |. move CARG1, L
2389 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2390 | addiu PC, PC, -4
2391 | b ->cont_nop
2392 |. lw BASE, L->base
2393#endif
2394 |
2078 |//----------------------------------------------------------------------- 2395 |//-----------------------------------------------------------------------
2079 |//-- Trace exit handler ------------------------------------------------- 2396 |//-- Trace exit handler -------------------------------------------------
2080 |//----------------------------------------------------------------------- 2397 |//-----------------------------------------------------------------------
2081 | 2398 |
2082 |.macro savex_, a, b 2399 |.macro savex_, a, b
2400 |.if FPU
2083 | sdc1 f..a, 16+a*8(sp) 2401 | sdc1 f..a, 16+a*8(sp)
2084 | sw r..a, 16+32*8+a*4(sp) 2402 | sw r..a, 16+32*8+a*4(sp)
2085 | sw r..b, 16+32*8+b*4(sp) 2403 | sw r..b, 16+32*8+b*4(sp)
2404 |.else
2405 | sw r..a, 16+a*4(sp)
2406 | sw r..b, 16+b*4(sp)
2407 |.endif
2086 |.endmacro 2408 |.endmacro
2087 | 2409 |
2088 |->vm_exit_handler: 2410 |->vm_exit_handler:
2089 |.if JIT 2411 |.if JIT
2412 |.if FPU
2090 | addiu sp, sp, -(16+32*8+32*4) 2413 | addiu sp, sp, -(16+32*8+32*4)
2414 |.else
2415 | addiu sp, sp, -(16+32*4)
2416 |.endif
2091 | savex_ 0, 1 2417 | savex_ 0, 1
2092 | savex_ 2, 3 2418 | savex_ 2, 3
2093 | savex_ 4, 5 2419 | savex_ 4, 5
@@ -2102,25 +2428,34 @@ static void build_subroutines(BuildCtx *ctx)
2102 | savex_ 22, 23 2428 | savex_ 22, 23
2103 | savex_ 24, 25 2429 | savex_ 24, 25
2104 | savex_ 26, 27 2430 | savex_ 26, 27
2431 |.if FPU
2105 | sdc1 f28, 16+28*8(sp) 2432 | sdc1 f28, 16+28*8(sp)
2106 | sw r28, 16+32*8+28*4(sp)
2107 | sdc1 f30, 16+30*8(sp) 2433 | sdc1 f30, 16+30*8(sp)
2434 | sw r28, 16+32*8+28*4(sp)
2108 | sw r30, 16+32*8+30*4(sp) 2435 | sw r30, 16+32*8+30*4(sp)
2109 | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP. 2436 | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP.
2437 | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
2438 | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP
2439 |.else
2440 | sw r28, 16+28*4(sp)
2441 | sw r30, 16+30*4(sp)
2442 | sw r0, 16+31*4(sp) // Clear RID_TMP.
2443 | addiu TMP2, sp, 16+32*4 // Recompute original value of sp.
2444 | sw TMP2, 16+29*4(sp) // Store sp in RID_SP
2445 |.endif
2110 | li_vmstate EXIT 2446 | li_vmstate EXIT
2111 | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
2112 | addiu DISPATCH, JGL, -GG_DISP2G-32768 2447 | addiu DISPATCH, JGL, -GG_DISP2G-32768
2113 | lw TMP1, 0(TMP2) // Load exit number. 2448 | lw TMP1, 0(TMP2) // Load exit number.
2114 | st_vmstate 2449 | st_vmstate
2115 | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP. 2450 | lw L, DISPATCH_GL(cur_L)(DISPATCH)
2116 | lw L, DISPATCH_GL(jit_L)(DISPATCH) 2451 | lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
2117 | lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
2118 | load_got lj_trace_exit 2452 | load_got lj_trace_exit
2119 | sw L, DISPATCH_J(L)(DISPATCH) 2453 | sw L, DISPATCH_J(L)(DISPATCH)
2120 | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. 2454 | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number.
2455 | sw BASE, L->base
2121 | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. 2456 | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number.
2122 | addiu CARG1, DISPATCH, GG_DISP2J 2457 | addiu CARG1, DISPATCH, GG_DISP2J
2123 | sw BASE, L->base 2458 | sw r0, DISPATCH_GL(jit_base)(DISPATCH)
2124 | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) 2459 | call_intern lj_trace_exit // (jit_State *J, ExitState *ex)
2125 |. addiu CARG2, sp, 16 2460 |. addiu CARG2, sp, 16
2126 | // Returns MULTRES (unscaled) or negated error code. 2461 | // Returns MULTRES (unscaled) or negated error code.
@@ -2136,29 +2471,34 @@ static void build_subroutines(BuildCtx *ctx)
2136 |.if JIT 2471 |.if JIT
2137 | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. 2472 | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
2138 | lw L, SAVE_L 2473 | lw L, SAVE_L
2139 | addiu DISPATCH, JGL, -GG_DISP2G-32768 2474 | addiu DISPATCH, JGL, -GG_DISP2G-32768
2475 | sw BASE, L->base
2140 |1: 2476 |1:
2141 | bltz CRET1, >3 // Check for error from exit. 2477 | sltiu TMP0, CRET1, -LUA_ERRERR // Check for error from exit.
2142 |. lw LFUNC:TMP1, FRAME_FUNC(BASE) 2478 | beqz TMP0, >9
2143 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2479 |. lw LFUNC:RB, FRAME_FUNC(BASE)
2480 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2144 | sll MULTRES, CRET1, 3 2481 | sll MULTRES, CRET1, 3
2145 | li TISNIL, LJ_TNIL 2482 | li TISNIL, LJ_TNIL
2483 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2146 | sw MULTRES, SAVE_MULTRES 2484 | sw MULTRES, SAVE_MULTRES
2147 | mtc1 TMP3, TOBIT 2485 | .FPU mtc1 TMP3, TOBIT
2148 | lw TMP1, LFUNC:TMP1->pc 2486 | lw TMP1, LFUNC:RB->pc
2149 | sw r0, DISPATCH_GL(jit_L)(DISPATCH) 2487 | sw r0, DISPATCH_GL(jit_base)(DISPATCH)
2150 | lw KBASE, PC2PROTO(k)(TMP1) 2488 | lw KBASE, PC2PROTO(k)(TMP1)
2151 | cvt.d.s TOBIT, TOBIT 2489 | .FPU cvt.d.s TOBIT, TOBIT
2152 | // Modified copy of ins_next which handles function header dispatch, too. 2490 | // Modified copy of ins_next which handles function header dispatch, too.
2153 | lw INS, 0(PC) 2491 | lw INS, 0(PC)
2154 | addiu PC, PC, 4 2492 | addiu CRET1, CRET1, 17 // Static dispatch?
2155 | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 2493 | // Assumes TISNIL == ~LJ_VMST_INTERP == -1
2156 | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) 2494 | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
2495 | decode_RD8a RD, INS
2496 | beqz CRET1, >5
2497 |. addiu PC, PC, 4
2157 | decode_OP4a TMP1, INS 2498 | decode_OP4a TMP1, INS
2158 | decode_OP4b TMP1 2499 | decode_OP4b TMP1
2159 | sltiu TMP2, TMP1, BC_FUNCF*4 // Function header?
2160 | addu TMP0, DISPATCH, TMP1 2500 | addu TMP0, DISPATCH, TMP1
2161 | decode_RD8a RD, INS 2501 | sltiu TMP2, TMP1, BC_FUNCF*4
2162 | lw AT, 0(TMP0) 2502 | lw AT, 0(TMP0)
2163 | decode_RA8a RA, INS 2503 | decode_RA8a RA, INS
2164 | beqz TMP2, >2 2504 | beqz TMP2, >2
@@ -2166,13 +2506,46 @@ static void build_subroutines(BuildCtx *ctx)
2166 | jr AT 2506 | jr AT
2167 |. decode_RD8b RD 2507 |. decode_RD8b RD
2168 |2: 2508 |2:
2509 | sltiu TMP2, TMP1, (BC_FUNCC+2)*4 // Fast function?
2510 | bnez TMP2, >3
2511 |. lw TMP1, FRAME_PC(BASE)
2512 | // Check frame below fast function.
2513 | andi TMP0, TMP1, FRAME_TYPE
2514 | bnez TMP0, >3 // Trace stitching continuation?
2515 |. nop
2516 | // Otherwise set KBASE for Lua function below fast function.
2517 | lw TMP2, -4(TMP1)
2518 | decode_RA8a TMP0, TMP2
2519 | decode_RA8b TMP0
2520 | subu TMP1, BASE, TMP0
2521 | lw LFUNC:TMP2, -8+FRAME_FUNC(TMP1)
2522 | lw TMP1, LFUNC:TMP2->pc
2523 | lw KBASE, PC2PROTO(k)(TMP1)
2524 |3:
2169 | addiu RC, MULTRES, -8 2525 | addiu RC, MULTRES, -8
2170 | jr AT 2526 | jr AT
2171 |. addu RA, RA, BASE 2527 |. addu RA, RA, BASE
2172 | 2528 |
2173 |3: // Rethrow error from the right C frame. 2529 |5: // Dispatch to static entry of original ins replaced by BC_JLOOP.
2174 | load_got lj_err_run 2530 | lw TMP0, DISPATCH_J(trace)(DISPATCH)
2175 | call_intern lj_err_run // (lua_State *L) 2531 | decode_RD4b RD
2532 | addu TMP0, TMP0, RD
2533 | lw TRACE:TMP2, 0(TMP0)
2534 | lw INS, TRACE:TMP2->startins
2535 | decode_OP4a TMP1, INS
2536 | decode_OP4b TMP1
2537 | addu TMP0, DISPATCH, TMP1
2538 | decode_RD8a RD, INS
2539 | lw AT, GG_DISP2STATIC(TMP0)
2540 | decode_RA8a RA, INS
2541 | decode_RD8b RD
2542 | jr AT
2543 |. decode_RA8b RA
2544 |
2545 |9: // Rethrow error from the right C frame.
2546 | load_got lj_err_trace
2547 | sub CARG2, r0, CRET1
2548 | call_intern lj_err_trace // (lua_State *L, int errcode)
2176 |. move CARG1, L 2549 |. move CARG1, L
2177 |.endif 2550 |.endif
2178 | 2551 |
@@ -2180,8 +2553,9 @@ static void build_subroutines(BuildCtx *ctx)
2180 |//-- Math helper functions ---------------------------------------------- 2553 |//-- Math helper functions ----------------------------------------------
2181 |//----------------------------------------------------------------------- 2554 |//-----------------------------------------------------------------------
2182 | 2555 |
2556 |// Hard-float round to integer.
2183 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. 2557 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1.
2184 |.macro vm_round, func 2558 |.macro vm_round_hf, func
2185 | lui TMP0, 0x4330 // Hiword of 2^52 (double). 2559 | lui TMP0, 0x4330 // Hiword of 2^52 (double).
2186 | mtc1 r0, f4 2560 | mtc1 r0, f4
2187 | mtc1 TMP0, f5 2561 | mtc1 TMP0, f5
@@ -2223,6 +2597,12 @@ static void build_subroutines(BuildCtx *ctx)
2223 |. mov.d FRET1, FARG1 2597 |. mov.d FRET1, FARG1
2224 |.endmacro 2598 |.endmacro
2225 | 2599 |
2600 |.macro vm_round, func
2601 |.if FPU
2602 | vm_round_hf, func
2603 |.endif
2604 |.endmacro
2605 |
2226 |->vm_floor: 2606 |->vm_floor:
2227 | vm_round floor 2607 | vm_round floor
2228 |->vm_ceil: 2608 |->vm_ceil:
@@ -2232,10 +2612,286 @@ static void build_subroutines(BuildCtx *ctx)
2232 | vm_round trunc 2612 | vm_round trunc
2233 |.endif 2613 |.endif
2234 | 2614 |
2615 |// Soft-float integer to number conversion.
2616 |.macro sfi2d, AHI, ALO
2617 |.if not FPU
2618 | beqz ALO, >9 // Handle zero first.
2619 |. sra TMP0, ALO, 31
2620 | xor TMP1, ALO, TMP0
2621 | subu TMP1, TMP1, TMP0 // Absolute value in TMP1.
2622 | clz AHI, TMP1
2623 | andi TMP0, TMP0, 0x800 // Mask sign bit.
2624 | li AT, 0x3ff+31-1
2625 | sllv TMP1, TMP1, AHI // Align mantissa left with leading 1.
2626 | subu AHI, AT, AHI // Exponent - 1 in AHI.
2627 | sll ALO, TMP1, 21
2628 | or AHI, AHI, TMP0 // Sign | Exponent.
2629 | srl TMP1, TMP1, 11
2630 | sll AHI, AHI, 20 // Align left.
2631 | jr ra
2632 |. addu AHI, AHI, TMP1 // Add mantissa, increment exponent.
2633 |9:
2634 | jr ra
2635 |. li AHI, 0
2636 |.endif
2637 |.endmacro
2638 |
2639 |// Input SFARG1LO. Output: SFARG1*. Temporaries: AT, TMP0, TMP1.
2640 |->vm_sfi2d_1:
2641 | sfi2d SFARG1HI, SFARG1LO
2642 |
2643 |// Input SFARG2LO. Output: SFARG2*. Temporaries: AT, TMP0, TMP1.
2644 |->vm_sfi2d_2:
2645 | sfi2d SFARG2HI, SFARG2LO
2646 |
2647 |// Soft-float comparison. Equivalent to c.eq.d.
2648 |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2649 |->vm_sfcmpeq:
2650 |.if not FPU
2651 | sll AT, SFARG1HI, 1
2652 | sll TMP0, SFARG2HI, 1
2653 | or CRET1, SFARG1LO, SFARG2LO
2654 | or TMP1, AT, TMP0
2655 | or TMP1, TMP1, CRET1
2656 | beqz TMP1, >8 // Both args +-0: return 1.
2657 |. sltu CRET1, r0, SFARG1LO
2658 | lui TMP1, 0xffe0
2659 | addu AT, AT, CRET1
2660 | sltu CRET1, r0, SFARG2LO
2661 | sltu AT, TMP1, AT
2662 | addu TMP0, TMP0, CRET1
2663 | sltu TMP0, TMP1, TMP0
2664 | or TMP1, AT, TMP0
2665 | bnez TMP1, >9 // Either arg is NaN: return 0;
2666 |. xor TMP0, SFARG1HI, SFARG2HI
2667 | xor TMP1, SFARG1LO, SFARG2LO
2668 | or AT, TMP0, TMP1
2669 | jr ra
2670 |. sltiu CRET1, AT, 1 // Same values: return 1.
2671 |8:
2672 | jr ra
2673 |. li CRET1, 1
2674 |9:
2675 | jr ra
2676 |. li CRET1, 0
2677 |.endif
2678 |
2679 |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d.
2680 |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2.
2681 |->vm_sfcmpult:
2682 |.if not FPU
2683 | b >1
2684 |. li CRET2, 1
2685 |.endif
2686 |
2687 |->vm_sfcmpolt:
2688 |.if not FPU
2689 | li CRET2, 0
2690 |1:
2691 | sll AT, SFARG1HI, 1
2692 | sll TMP0, SFARG2HI, 1
2693 | or CRET1, SFARG1LO, SFARG2LO
2694 | or TMP1, AT, TMP0
2695 | or TMP1, TMP1, CRET1
2696 | beqz TMP1, >8 // Both args +-0: return 0.
2697 |. sltu CRET1, r0, SFARG1LO
2698 | lui TMP1, 0xffe0
2699 | addu AT, AT, CRET1
2700 | sltu CRET1, r0, SFARG2LO
2701 | sltu AT, TMP1, AT
2702 | addu TMP0, TMP0, CRET1
2703 | sltu TMP0, TMP1, TMP0
2704 | or TMP1, AT, TMP0
2705 | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
2706 |. and AT, SFARG1HI, SFARG2HI
2707 | bltz AT, >5 // Both args negative?
2708 |. nop
2709 | beq SFARG1HI, SFARG2HI, >8
2710 |. sltu CRET1, SFARG1LO, SFARG2LO
2711 | jr ra
2712 |. slt CRET1, SFARG1HI, SFARG2HI
2713 |5: // Swap conditions if both operands are negative.
2714 | beq SFARG1HI, SFARG2HI, >8
2715 |. sltu CRET1, SFARG2LO, SFARG1LO
2716 | jr ra
2717 |. slt CRET1, SFARG2HI, SFARG1HI
2718 |8:
2719 | jr ra
2720 |. nop
2721 |9:
2722 | jr ra
2723 |. move CRET1, CRET2
2724 |.endif
2725 |
2726 |->vm_sfcmpogt:
2727 |.if not FPU
2728 | sll AT, SFARG2HI, 1
2729 | sll TMP0, SFARG1HI, 1
2730 | or CRET1, SFARG2LO, SFARG1LO
2731 | or TMP1, AT, TMP0
2732 | or TMP1, TMP1, CRET1
2733 | beqz TMP1, >8 // Both args +-0: return 0.
2734 |. sltu CRET1, r0, SFARG2LO
2735 | lui TMP1, 0xffe0
2736 | addu AT, AT, CRET1
2737 | sltu CRET1, r0, SFARG1LO
2738 | sltu AT, TMP1, AT
2739 | addu TMP0, TMP0, CRET1
2740 | sltu TMP0, TMP1, TMP0
2741 | or TMP1, AT, TMP0
2742 | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
2743 |. and AT, SFARG2HI, SFARG1HI
2744 | bltz AT, >5 // Both args negative?
2745 |. nop
2746 | beq SFARG2HI, SFARG1HI, >8
2747 |. sltu CRET1, SFARG2LO, SFARG1LO
2748 | jr ra
2749 |. slt CRET1, SFARG2HI, SFARG1HI
2750 |5: // Swap conditions if both operands are negative.
2751 | beq SFARG2HI, SFARG1HI, >8
2752 |. sltu CRET1, SFARG1LO, SFARG2LO
2753 | jr ra
2754 |. slt CRET1, SFARG1HI, SFARG2HI
2755 |8:
2756 | jr ra
2757 |. nop
2758 |9:
2759 | jr ra
2760 |. li CRET1, 0
2761 |.endif
2762 |
2763 |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a.
2764 |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2765 |->vm_sfcmpolex:
2766 |.if not FPU
2767 | sll AT, SFARG1HI, 1
2768 | sll TMP0, SFARG2HI, 1
2769 | or CRET1, SFARG1LO, SFARG2LO
2770 | or TMP1, AT, TMP0
2771 | or TMP1, TMP1, CRET1
2772 | beqz TMP1, >8 // Both args +-0: return 1.
2773 |. sltu CRET1, r0, SFARG1LO
2774 | lui TMP1, 0xffe0
2775 | addu AT, AT, CRET1
2776 | sltu CRET1, r0, SFARG2LO
2777 | sltu AT, TMP1, AT
2778 | addu TMP0, TMP0, CRET1
2779 | sltu TMP0, TMP1, TMP0
2780 | or TMP1, AT, TMP0
2781 | bnez TMP1, >9 // Either arg is NaN: return 0;
2782 |. and AT, SFARG1HI, SFARG2HI
2783 | xor AT, AT, TMP3
2784 | bltz AT, >5 // Both args negative?
2785 |. nop
2786 | beq SFARG1HI, SFARG2HI, >6
2787 |. sltu CRET1, SFARG2LO, SFARG1LO
2788 | jr ra
2789 |. slt CRET1, SFARG2HI, SFARG1HI
2790 |5: // Swap conditions if both operands are negative.
2791 | beq SFARG1HI, SFARG2HI, >6
2792 |. sltu CRET1, SFARG1LO, SFARG2LO
2793 | slt CRET1, SFARG1HI, SFARG2HI
2794 |6:
2795 | jr ra
2796 |. nop
2797 |8:
2798 | jr ra
2799 |. li CRET1, 1
2800 |9:
2801 | jr ra
2802 |. li CRET1, 0
2803 |.endif
2804 |
2805 |.macro sfmin_max, name, fpcall
2806 |->vm_sf .. name:
2807 |.if JIT and not FPU
2808 | move TMP2, ra
2809 | bal ->fpcall
2810 |. nop
2811 | move TMP0, CRET1
2812 | move SFRETHI, SFARG1HI
2813 | move SFRETLO, SFARG1LO
2814 | move ra, TMP2
2815 | movz SFRETHI, SFARG2HI, TMP0
2816 | jr ra
2817 |. movz SFRETLO, SFARG2LO, TMP0
2818 |.endif
2819 |.endmacro
2820 |
2821 | sfmin_max min, vm_sfcmpolt
2822 | sfmin_max max, vm_sfcmpogt
2823 |
2235 |//----------------------------------------------------------------------- 2824 |//-----------------------------------------------------------------------
2236 |//-- Miscellaneous functions -------------------------------------------- 2825 |//-- Miscellaneous functions --------------------------------------------
2237 |//----------------------------------------------------------------------- 2826 |//-----------------------------------------------------------------------
2238 | 2827 |
2828 |.define NEXT_TAB, TAB:CARG1
2829 |.define NEXT_IDX, CARG2
2830 |.define NEXT_ASIZE, CARG3
2831 |.define NEXT_NIL, CARG4
2832 |.define NEXT_TMP0, r12
2833 |.define NEXT_TMP1, r13
2834 |.define NEXT_TMP2, r14
2835 |.define NEXT_RES_VK, CRET1
2836 |.define NEXT_RES_IDX, CRET2
2837 |.define NEXT_RES_PTR, sp
2838 |.define NEXT_RES_VAL_I, 0(sp)
2839 |.define NEXT_RES_VAL_IT, 4(sp)
2840 |.define NEXT_RES_KEY_I, 8(sp)
2841 |.define NEXT_RES_KEY_IT, 12(sp)
2842 |
2843 |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
2844 |// Next idx returned in CRET2.
2845 |->vm_next:
2846 |.if JIT and ENDIAN_LE
2847 | lw NEXT_ASIZE, NEXT_TAB->asize
2848 | lw NEXT_TMP0, NEXT_TAB->array
2849 | li NEXT_NIL, LJ_TNIL
2850 |1: // Traverse array part.
2851 | sltu AT, NEXT_IDX, NEXT_ASIZE
2852 | sll NEXT_TMP1, NEXT_IDX, 3
2853 | beqz AT, >5
2854 |. addu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1
2855 | lw NEXT_TMP2, 4(NEXT_TMP1)
2856 | sw NEXT_IDX, NEXT_RES_KEY_I
2857 | beq NEXT_TMP2, NEXT_NIL, <1
2858 |. addiu NEXT_IDX, NEXT_IDX, 1
2859 | lw NEXT_TMP0, 0(NEXT_TMP1)
2860 | li AT, LJ_TISNUM
2861 | sw NEXT_TMP2, NEXT_RES_VAL_IT
2862 | sw AT, NEXT_RES_KEY_IT
2863 | sw NEXT_TMP0, NEXT_RES_VAL_I
2864 | move NEXT_RES_VK, NEXT_RES_PTR
2865 | jr ra
2866 |. move NEXT_RES_IDX, NEXT_IDX
2867 |
2868 |5: // Traverse hash part.
2869 | subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE
2870 | lw NODE:NEXT_RES_VK, NEXT_TAB->node
2871 | sll NEXT_TMP2, NEXT_RES_IDX, 5
2872 | lw NEXT_TMP0, NEXT_TAB->hmask
2873 | sll AT, NEXT_RES_IDX, 3
2874 | subu AT, NEXT_TMP2, AT
2875 | addu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT
2876 |6:
2877 | sltu AT, NEXT_TMP0, NEXT_RES_IDX
2878 | bnez AT, >8
2879 |. nop
2880 | lw NEXT_TMP2, NODE:NEXT_RES_VK->val.it
2881 | bne NEXT_TMP2, NEXT_NIL, >9
2882 |. addiu NEXT_RES_IDX, NEXT_RES_IDX, 1
2883 | // Skip holes in hash part.
2884 | b <6
2885 |. addiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node)
2886 |
2887 |8: // End of iteration. Set the key to nil (not the value).
2888 | sw NEXT_NIL, NEXT_RES_KEY_IT
2889 | move NEXT_RES_VK, NEXT_RES_PTR
2890 |9:
2891 | jr ra
2892 |. addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE
2893 |.endif
2894 |
2239 |//----------------------------------------------------------------------- 2895 |//-----------------------------------------------------------------------
2240 |//-- FFI helper functions ----------------------------------------------- 2896 |//-- FFI helper functions -----------------------------------------------
2241 |//----------------------------------------------------------------------- 2897 |//-----------------------------------------------------------------------
@@ -2251,10 +2907,10 @@ static void build_subroutines(BuildCtx *ctx)
2251 | sw r1, CTSTATE->cb.slot 2907 | sw r1, CTSTATE->cb.slot
2252 | sw CARG1, CTSTATE->cb.gpr[0] 2908 | sw CARG1, CTSTATE->cb.gpr[0]
2253 | sw CARG2, CTSTATE->cb.gpr[1] 2909 | sw CARG2, CTSTATE->cb.gpr[1]
2254 | sdc1 FARG1, CTSTATE->cb.fpr[0] 2910 | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0]
2255 | sw CARG3, CTSTATE->cb.gpr[2] 2911 | sw CARG3, CTSTATE->cb.gpr[2]
2256 | sw CARG4, CTSTATE->cb.gpr[3] 2912 | sw CARG4, CTSTATE->cb.gpr[3]
2257 | sdc1 FARG2, CTSTATE->cb.fpr[1] 2913 | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1]
2258 | addiu TMP0, sp, CFRAME_SPACE+16 2914 | addiu TMP0, sp, CFRAME_SPACE+16
2259 | sw TMP0, CTSTATE->cb.stack 2915 | sw TMP0, CTSTATE->cb.stack
2260 | sw r0, SAVE_PC // Any value outside of bytecode is ok. 2916 | sw r0, SAVE_PC // Any value outside of bytecode is ok.
@@ -2264,15 +2920,16 @@ static void build_subroutines(BuildCtx *ctx)
2264 | // Returns lua_State *. 2920 | // Returns lua_State *.
2265 | lw BASE, L:CRET1->base 2921 | lw BASE, L:CRET1->base
2266 | lw RC, L:CRET1->top 2922 | lw RC, L:CRET1->top
2923 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2267 | move L, CRET1 2924 | move L, CRET1
2268 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2925 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2269 | lw LFUNC:RB, FRAME_FUNC(BASE) 2926 | lw LFUNC:RB, FRAME_FUNC(BASE)
2270 | mtc1 TMP3, TOBIT 2927 | .FPU mtc1 TMP3, TOBIT
2271 | li_vmstate INTERP 2928 | li_vmstate INTERP
2272 | li TISNIL, LJ_TNIL 2929 | li TISNIL, LJ_TNIL
2273 | subu RC, RC, BASE 2930 | subu RC, RC, BASE
2274 | st_vmstate 2931 | st_vmstate
2275 | cvt.d.s TOBIT, TOBIT 2932 | .FPU cvt.d.s TOBIT, TOBIT
2276 | ins_callt 2933 | ins_callt
2277 |.endif 2934 |.endif
2278 | 2935 |
@@ -2286,11 +2943,11 @@ static void build_subroutines(BuildCtx *ctx)
2286 | move CARG2, RA 2943 | move CARG2, RA
2287 | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) 2944 | call_intern lj_ccallback_leave // (CTState *cts, TValue *o)
2288 |. move CARG1, CTSTATE 2945 |. move CARG1, CTSTATE
2946 | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0]
2289 | lw CRET1, CTSTATE->cb.gpr[0] 2947 | lw CRET1, CTSTATE->cb.gpr[0]
2290 | ldc1 FRET1, CTSTATE->cb.fpr[0] 2948 | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1]
2291 | lw CRET2, CTSTATE->cb.gpr[1]
2292 | b ->vm_leave_unw 2949 | b ->vm_leave_unw
2293 |. ldc1 FRET2, CTSTATE->cb.fpr[1] 2950 |. lw CRET2, CTSTATE->cb.gpr[1]
2294 |.endif 2951 |.endif
2295 | 2952 |
2296 |->vm_ffi_call: // Call C function via FFI. 2953 |->vm_ffi_call: // Call C function via FFI.
@@ -2302,7 +2959,6 @@ static void build_subroutines(BuildCtx *ctx)
2302 | move TMP2, sp 2959 | move TMP2, sp
2303 | subu sp, sp, TMP1 2960 | subu sp, sp, TMP1
2304 | sw ra, -4(TMP2) 2961 | sw ra, -4(TMP2)
2305 | sll CARG2, CARG2, 2
2306 | sw r16, -8(TMP2) 2962 | sw r16, -8(TMP2)
2307 | sw CCSTATE, -12(TMP2) 2963 | sw CCSTATE, -12(TMP2)
2308 | move r16, TMP2 2964 | move r16, TMP2
@@ -2322,8 +2978,8 @@ static void build_subroutines(BuildCtx *ctx)
2322 | lw CARG2, CCSTATE->gpr[1] 2978 | lw CARG2, CCSTATE->gpr[1]
2323 | lw CARG3, CCSTATE->gpr[2] 2979 | lw CARG3, CCSTATE->gpr[2]
2324 | lw CARG4, CCSTATE->gpr[3] 2980 | lw CARG4, CCSTATE->gpr[3]
2325 | ldc1 FARG1, CCSTATE->fpr[0] 2981 | .FPU ldc1 FARG1, CCSTATE->fpr[0]
2326 | ldc1 FARG2, CCSTATE->fpr[1] 2982 | .FPU ldc1 FARG2, CCSTATE->fpr[1]
2327 | jalr CFUNCADDR 2983 | jalr CFUNCADDR
2328 |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. 2984 |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
2329 | lw CCSTATE:TMP1, -12(r16) 2985 | lw CCSTATE:TMP1, -12(r16)
@@ -2331,8 +2987,13 @@ static void build_subroutines(BuildCtx *ctx)
2331 | lw ra, -4(r16) 2987 | lw ra, -4(r16)
2332 | sw CRET1, CCSTATE:TMP1->gpr[0] 2988 | sw CRET1, CCSTATE:TMP1->gpr[0]
2333 | sw CRET2, CCSTATE:TMP1->gpr[1] 2989 | sw CRET2, CCSTATE:TMP1->gpr[1]
2990 |.if FPU
2334 | sdc1 FRET1, CCSTATE:TMP1->fpr[0] 2991 | sdc1 FRET1, CCSTATE:TMP1->fpr[0]
2335 | sdc1 FRET2, CCSTATE:TMP1->fpr[1] 2992 | sdc1 FRET2, CCSTATE:TMP1->fpr[1]
2993 |.else
2994 | sw CARG1, CCSTATE:TMP1->gpr[2] // Soft-float: complex double .im part.
2995 | sw CARG2, CCSTATE:TMP1->gpr[3]
2996 |.endif
2336 | move sp, r16 2997 | move sp, r16
2337 | jr ra 2998 | jr ra
2338 |. move r16, TMP2 2999 |. move r16, TMP2
@@ -2356,82 +3017,143 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2356 3017
2357 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 3018 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2358 | // RA = src1*8, RD = src2*8, JMP with RD = target 3019 | // RA = src1*8, RD = src2*8, JMP with RD = target
2359 | addu CARG2, BASE, RA 3020 |.macro bc_comp, FRA, FRD, RAHI, RALO, RDHI, RDLO, movop, fmovop, fcomp, sfcomp
2360 | addu CARG3, BASE, RD 3021 | addu RA, BASE, RA
2361 | lw TMP0, HI(CARG2) 3022 | addu RD, BASE, RD
2362 | lw TMP1, HI(CARG3) 3023 | lw RAHI, HI(RA)
2363 | ldc1 f0, 0(CARG2) 3024 | lw RDHI, HI(RD)
2364 | ldc1 f2, 0(CARG3)
2365 | sltiu TMP0, TMP0, LJ_TISNUM
2366 | sltiu TMP1, TMP1, LJ_TISNUM
2367 | lhu TMP2, OFS_RD(PC) 3025 | lhu TMP2, OFS_RD(PC)
2368 | and TMP0, TMP0, TMP1
2369 | addiu PC, PC, 4 3026 | addiu PC, PC, 4
2370 | beqz TMP0, ->vmeta_comp 3027 | bne RAHI, TISNUM, >2
2371 |. lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) 3028 |. lw RALO, LO(RA)
2372 | decode_RD4b TMP2 3029 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2373 | addu TMP2, TMP2, TMP1 3030 | lw RDLO, LO(RD)
2374 if (op == BC_ISLT || op == BC_ISGE) { 3031 | bne RDHI, TISNUM, >5
2375 | c.olt.d f0, f2 3032 |. decode_RD4b TMP2
2376 } else { 3033 | slt AT, SFARG1LO, SFARG2LO
2377 | c.ole.d f0, f2 3034 | addu TMP2, TMP2, TMP3
2378 } 3035 | movop TMP2, r0, AT
2379 if (op == BC_ISLT || op == BC_ISLE) {
2380 | movf TMP2, r0
2381 } else {
2382 | movt TMP2, r0
2383 }
2384 | addu PC, PC, TMP2
2385 |1: 3036 |1:
3037 | addu PC, PC, TMP2
2386 | ins_next 3038 | ins_next
3039 |
3040 |2: // RA is not an integer.
3041 | sltiu AT, RAHI, LJ_TISNUM
3042 | beqz AT, ->vmeta_comp
3043 |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3044 | sltiu AT, RDHI, LJ_TISNUM
3045 |.if FPU
3046 | ldc1 FRA, 0(RA)
3047 | ldc1 FRD, 0(RD)
3048 |.else
3049 | lw RDLO, LO(RD)
3050 |.endif
3051 | beqz AT, >4
3052 |. decode_RD4b TMP2
3053 |3: // RA and RD are both numbers.
3054 |.if FPU
3055 | fcomp f20, f22
3056 | addu TMP2, TMP2, TMP3
3057 | b <1
3058 |. fmovop TMP2, r0
3059 |.else
3060 | bal sfcomp
3061 |. addu TMP2, TMP2, TMP3
3062 | b <1
3063 |. movop TMP2, r0, CRET1
3064 |.endif
3065 |
3066 |4: // RA is a number, RD is not a number.
3067 | bne RDHI, TISNUM, ->vmeta_comp
3068 | // RA is a number, RD is an integer. Convert RD to a number.
3069 |.if FPU
3070 |. lwc1 FRD, LO(RD)
3071 | b <3
3072 |. cvt.d.w FRD, FRD
3073 |.else
3074 |. nop
3075 |.if "RDHI" == "SFARG1HI"
3076 | bal ->vm_sfi2d_1
3077 |.else
3078 | bal ->vm_sfi2d_2
3079 |.endif
3080 |. nop
3081 | b <3
3082 |. nop
3083 |.endif
3084 |
3085 |5: // RA is an integer, RD is not an integer
3086 | sltiu AT, RDHI, LJ_TISNUM
3087 | beqz AT, ->vmeta_comp
3088 | // RA is an integer, RD is a number. Convert RA to a number.
3089 |.if FPU
3090 |. mtc1 RALO, FRA
3091 | ldc1 FRD, 0(RD)
3092 | b <3
3093 | cvt.d.w FRA, FRA
3094 |.else
3095 |. nop
3096 |.if "RAHI" == "SFARG1HI"
3097 | bal ->vm_sfi2d_1
3098 |.else
3099 | bal ->vm_sfi2d_2
3100 |.endif
3101 |. nop
3102 | b <3
3103 |. nop
3104 |.endif
3105 |.endmacro
3106 |
3107 if (op == BC_ISLT) {
3108 | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movz, movf, c.olt.d, ->vm_sfcmpolt
3109 } else if (op == BC_ISGE) {
3110 | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movn, movt, c.olt.d, ->vm_sfcmpolt
3111 } else if (op == BC_ISLE) {
3112 | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movn, movt, c.ult.d, ->vm_sfcmpult
3113 } else {
3114 | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movz, movf, c.ult.d, ->vm_sfcmpult
3115 }
2387 break; 3116 break;
2388 3117
2389 case BC_ISEQV: case BC_ISNEV: 3118 case BC_ISEQV: case BC_ISNEV:
2390 vk = op == BC_ISEQV; 3119 vk = op == BC_ISEQV;
2391 | // RA = src1*8, RD = src2*8, JMP with RD = target 3120 | // RA = src1*8, RD = src2*8, JMP with RD = target
2392 | addu RA, BASE, RA 3121 | addu RA, BASE, RA
2393 | addiu PC, PC, 4 3122 | addiu PC, PC, 4
2394 | lw TMP0, HI(RA)
2395 | ldc1 f0, 0(RA)
2396 | addu RD, BASE, RD 3123 | addu RD, BASE, RD
3124 | lw SFARG1HI, HI(RA)
2397 | lhu TMP2, -4+OFS_RD(PC) 3125 | lhu TMP2, -4+OFS_RD(PC)
2398 | lw TMP1, HI(RD) 3126 | lw SFARG2HI, HI(RD)
2399 | ldc1 f2, 0(RD)
2400 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3127 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2401 | sltiu AT, TMP0, LJ_TISNUM 3128 | sltu AT, TISNUM, SFARG1HI
2402 | sltiu CARG1, TMP1, LJ_TISNUM 3129 | sltu TMP0, TISNUM, SFARG2HI
2403 | decode_RD4b TMP2 3130 | or AT, AT, TMP0
2404 | and AT, AT, CARG1
2405 | beqz AT, >5
2406 |. addu TMP2, TMP2, TMP3
2407 | c.eq.d f0, f2
2408 if (vk) { 3131 if (vk) {
2409 | movf TMP2, r0 3132 | beqz AT, ->BC_ISEQN_Z
2410 } else { 3133 } else {
2411 | movt TMP2, r0 3134 | beqz AT, ->BC_ISNEN_Z
2412 } 3135 }
2413 |1: 3136 |. decode_RD4b TMP2
2414 | addu PC, PC, TMP2 3137 | // Either or both types are not numbers.
2415 | ins_next 3138 | lw SFARG1LO, LO(RA)
2416 |5: // Either or both types are not numbers. 3139 | lw SFARG2LO, LO(RD)
2417 | lw CARG2, LO(RA) 3140 | addu TMP2, TMP2, TMP3
2418 | lw CARG3, LO(RD)
2419 |.if FFI 3141 |.if FFI
2420 | li TMP3, LJ_TCDATA 3142 | li TMP3, LJ_TCDATA
2421 | beq TMP0, TMP3, ->vmeta_equal_cd 3143 | beq SFARG1HI, TMP3, ->vmeta_equal_cd
2422 |.endif 3144 |.endif
2423 |. sltiu AT, TMP0, LJ_TISPRI // Not a primitive? 3145 |. sltiu AT, SFARG1HI, LJ_TISPRI // Not a primitive?
2424 |.if FFI 3146 |.if FFI
2425 | beq TMP1, TMP3, ->vmeta_equal_cd 3147 | beq SFARG2HI, TMP3, ->vmeta_equal_cd
2426 |.endif 3148 |.endif
2427 |. xor TMP3, CARG2, CARG3 // Same tv? 3149 |. xor TMP3, SFARG1LO, SFARG2LO // Same tv?
2428 | xor TMP1, TMP1, TMP0 // Same type? 3150 | xor SFARG2HI, SFARG2HI, SFARG1HI // Same type?
2429 | sltiu CARG1, TMP0, LJ_TISTABUD+1 // Table or userdata? 3151 | sltiu TMP0, SFARG1HI, LJ_TISTABUD+1 // Table or userdata?
2430 | movz TMP3, r0, AT // Ignore tv if primitive. 3152 | movz TMP3, r0, AT // Ignore tv if primitive.
2431 | movn CARG1, r0, TMP1 // Tab/ud and same type? 3153 | movn TMP0, r0, SFARG2HI // Tab/ud and same type?
2432 | or AT, TMP1, TMP3 // Same type && (pri||same tv). 3154 | or AT, SFARG2HI, TMP3 // Same type && (pri||same tv).
2433 | movz CARG1, r0, AT 3155 | movz TMP0, r0, AT
2434 | beqz CARG1, <1 // Done if not tab/ud or not same type or same tv. 3156 | beqz TMP0, >1 // Done if not tab/ud or not same type or same tv.
2435 if (vk) { 3157 if (vk) {
2436 |. movn TMP2, r0, AT 3158 |. movn TMP2, r0, AT
2437 } else { 3159 } else {
@@ -2439,15 +3161,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2439 } 3161 }
2440 | // Different tables or userdatas. Need to check __eq metamethod. 3162 | // Different tables or userdatas. Need to check __eq metamethod.
2441 | // Field metatable must be at same offset for GCtab and GCudata! 3163 | // Field metatable must be at same offset for GCtab and GCudata!
2442 | lw TAB:TMP1, TAB:CARG2->metatable 3164 | lw TAB:TMP1, TAB:SFARG1LO->metatable
2443 | beqz TAB:TMP1, <1 // No metatable? 3165 | beqz TAB:TMP1, >1 // No metatable?
2444 |. nop 3166 |. nop
2445 | lbu TMP1, TAB:TMP1->nomm 3167 | lbu TMP1, TAB:TMP1->nomm
2446 | andi TMP1, TMP1, 1<<MM_eq 3168 | andi TMP1, TMP1, 1<<MM_eq
2447 | bnez TMP1, <1 // Or 'no __eq' flag set? 3169 | bnez TMP1, >1 // Or 'no __eq' flag set?
2448 |. nop 3170 |. nop
2449 | b ->vmeta_equal // Handle __eq metamethod. 3171 | b ->vmeta_equal // Handle __eq metamethod.
2450 |. li CARG4, 1-vk // ne = 0 or 1. 3172 |. li TMP0, 1-vk // ne = 0 or 1.
3173 |1:
3174 | addu PC, PC, TMP2
3175 | ins_next
2451 break; 3176 break;
2452 3177
2453 case BC_ISEQS: case BC_ISNES: 3178 case BC_ISEQS: case BC_ISNES:
@@ -2484,38 +3209,124 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2484 vk = op == BC_ISEQN; 3209 vk = op == BC_ISEQN;
2485 | // RA = src*8, RD = num_const*8, JMP with RD = target 3210 | // RA = src*8, RD = num_const*8, JMP with RD = target
2486 | addu RA, BASE, RA 3211 | addu RA, BASE, RA
2487 | addiu PC, PC, 4 3212 | addu RD, KBASE, RD
2488 | lw TMP0, HI(RA) 3213 | lw SFARG1HI, HI(RA)
2489 | ldc1 f0, 0(RA) 3214 | lw SFARG2HI, HI(RD)
2490 | addu RD, KBASE, RD 3215 | lhu TMP2, OFS_RD(PC)
2491 | lhu TMP2, -4+OFS_RD(PC) 3216 | addiu PC, PC, 4
2492 | ldc1 f2, 0(RD)
2493 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3217 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2494 | sltiu AT, TMP0, LJ_TISNUM
2495 | decode_RD4b TMP2 3218 | decode_RD4b TMP2
2496 |.if FFI
2497 | beqz AT, >5
2498 |.else
2499 | beqz AT, >1
2500 |.endif
2501 |. addu TMP2, TMP2, TMP3
2502 | c.eq.d f0, f2
2503 if (vk) { 3219 if (vk) {
2504 | movf TMP2, r0 3220 |->BC_ISEQN_Z:
2505 | addu PC, PC, TMP2 3221 } else {
3222 |->BC_ISNEN_Z:
3223 }
3224 | bne SFARG1HI, TISNUM, >3
3225 |. lw SFARG1LO, LO(RA)
3226 | lw SFARG2LO, LO(RD)
3227 | addu TMP2, TMP2, TMP3
3228 | bne SFARG2HI, TISNUM, >6
3229 |. xor AT, SFARG1LO, SFARG2LO
3230 if (vk) {
3231 | movn TMP2, r0, AT
2506 |1: 3232 |1:
3233 | addu PC, PC, TMP2
3234 |2:
2507 } else { 3235 } else {
2508 | movt TMP2, r0 3236 | movz TMP2, r0, AT
2509 |1: 3237 |1:
3238 |2:
2510 | addu PC, PC, TMP2 3239 | addu PC, PC, TMP2
2511 } 3240 }
2512 | ins_next 3241 | ins_next
3242 |
3243 |3: // RA is not an integer.
3244 | sltiu AT, SFARG1HI, LJ_TISNUM
2513 |.if FFI 3245 |.if FFI
2514 |5: 3246 | beqz AT, >8
2515 | li AT, LJ_TCDATA 3247 |.else
2516 | beq TMP0, AT, ->vmeta_equal_cd 3248 | beqz AT, <2
3249 |.endif
3250 |. addu TMP2, TMP2, TMP3
3251 | sltiu AT, SFARG2HI, LJ_TISNUM
3252 |.if FPU
3253 | ldc1 f20, 0(RA)
3254 | ldc1 f22, 0(RD)
3255 |.endif
3256 | beqz AT, >5
3257 |. lw SFARG2LO, LO(RD)
3258 |4: // RA and RD are both numbers.
3259 |.if FPU
3260 | c.eq.d f20, f22
3261 | b <1
3262 if (vk) {
3263 |. movf TMP2, r0
3264 } else {
3265 |. movt TMP2, r0
3266 }
3267 |.else
3268 | bal ->vm_sfcmpeq
2517 |. nop 3269 |. nop
2518 | b <1 3270 | b <1
3271 if (vk) {
3272 |. movz TMP2, r0, CRET1
3273 } else {
3274 |. movn TMP2, r0, CRET1
3275 }
3276 |.endif
3277 |
3278 |5: // RA is a number, RD is not a number.
3279 |.if FFI
3280 | bne SFARG2HI, TISNUM, >9
3281 |.else
3282 | bne SFARG2HI, TISNUM, <2
3283 |.endif
3284 | // RA is a number, RD is an integer. Convert RD to a number.
3285 |.if FPU
3286 |. lwc1 f22, LO(RD)
3287 | b <4
3288 |. cvt.d.w f22, f22
3289 |.else
3290 |. nop
3291 | bal ->vm_sfi2d_2
3292 |. nop
3293 | b <4
3294 |. nop
3295 |.endif
3296 |
3297 |6: // RA is an integer, RD is not an integer
3298 | sltiu AT, SFARG2HI, LJ_TISNUM
3299 |.if FFI
3300 | beqz AT, >9
3301 |.else
3302 | beqz AT, <2
3303 |.endif
3304 | // RA is an integer, RD is a number. Convert RA to a number.
3305 |.if FPU
3306 |. mtc1 SFARG1LO, f20
3307 | ldc1 f22, 0(RD)
3308 | b <4
3309 | cvt.d.w f20, f20
3310 |.else
3311 |. nop
3312 | bal ->vm_sfi2d_1
3313 |. nop
3314 | b <4
3315 |. nop
3316 |.endif
3317 |
3318 |.if FFI
3319 |8:
3320 | li AT, LJ_TCDATA
3321 | bne SFARG1HI, AT, <2
3322 |. nop
3323 | b ->vmeta_equal_cd
3324 |. nop
3325 |9:
3326 | li AT, LJ_TCDATA
3327 | bne SFARG2HI, AT, <2
3328 |. nop
3329 | b ->vmeta_equal_cd
2519 |. nop 3330 |. nop
2520 |.endif 3331 |.endif
2521 break; 3332 break;
@@ -2567,7 +3378,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2567 | addu PC, PC, TMP2 3378 | addu PC, PC, TMP2
2568 } else { 3379 } else {
2569 | sltiu TMP0, TMP0, LJ_TISTRUECOND 3380 | sltiu TMP0, TMP0, LJ_TISTRUECOND
2570 | ldc1 f0, 0(RD) 3381 | lw SFRETHI, HI(RD)
3382 | lw SFRETLO, LO(RD)
2571 if (op == BC_ISTC) { 3383 if (op == BC_ISTC) {
2572 | beqz TMP0, >1 3384 | beqz TMP0, >1
2573 } else { 3385 } else {
@@ -2577,22 +3389,45 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2577 | decode_RD4b TMP2 3389 | decode_RD4b TMP2
2578 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3390 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2579 | addu TMP2, TMP2, TMP3 3391 | addu TMP2, TMP2, TMP3
2580 | sdc1 f0, 0(RA) 3392 | sw SFRETHI, HI(RA)
3393 | sw SFRETLO, LO(RA)
2581 | addu PC, PC, TMP2 3394 | addu PC, PC, TMP2
2582 |1: 3395 |1:
2583 } 3396 }
2584 | ins_next 3397 | ins_next
2585 break; 3398 break;
2586 3399
3400 case BC_ISTYPE:
3401 | // RA = src*8, RD = -type*8
3402 | addu TMP2, BASE, RA
3403 | srl TMP1, RD, 3
3404 | lw TMP0, HI(TMP2)
3405 | ins_next1
3406 | addu AT, TMP0, TMP1
3407 | bnez AT, ->vmeta_istype
3408 |. ins_next2
3409 break;
3410 case BC_ISNUM:
3411 | // RA = src*8, RD = -(TISNUM-1)*8
3412 | addu TMP2, BASE, RA
3413 | lw TMP0, HI(TMP2)
3414 | ins_next1
3415 | sltiu AT, TMP0, LJ_TISNUM
3416 | beqz AT, ->vmeta_istype
3417 |. ins_next2
3418 break;
3419
2587 /* -- Unary ops --------------------------------------------------------- */ 3420 /* -- Unary ops --------------------------------------------------------- */
2588 3421
2589 case BC_MOV: 3422 case BC_MOV:
2590 | // RA = dst*8, RD = src*8 3423 | // RA = dst*8, RD = src*8
2591 | addu RD, BASE, RD 3424 | addu RD, BASE, RD
2592 | addu RA, BASE, RA 3425 | addu RA, BASE, RA
2593 | ldc1 f0, 0(RD) 3426 | lw SFRETHI, HI(RD)
3427 | lw SFRETLO, LO(RD)
2594 | ins_next1 3428 | ins_next1
2595 | sdc1 f0, 0(RA) 3429 | sw SFRETHI, HI(RA)
3430 | sw SFRETLO, LO(RA)
2596 | ins_next2 3431 | ins_next2
2597 break; 3432 break;
2598 case BC_NOT: 3433 case BC_NOT:
@@ -2609,16 +3444,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2609 break; 3444 break;
2610 case BC_UNM: 3445 case BC_UNM:
2611 | // RA = dst*8, RD = src*8 3446 | // RA = dst*8, RD = src*8
2612 | addu CARG3, BASE, RD 3447 | addu RB, BASE, RD
3448 | lw SFARG1HI, HI(RB)
2613 | addu RA, BASE, RA 3449 | addu RA, BASE, RA
2614 | lw TMP0, HI(CARG3) 3450 | bne SFARG1HI, TISNUM, >2
2615 | ldc1 f0, 0(CARG3) 3451 |. lw SFARG1LO, LO(RB)
2616 | sltiu AT, TMP0, LJ_TISNUM 3452 | lui TMP1, 0x8000
2617 | beqz AT, ->vmeta_unm 3453 | beq SFARG1LO, TMP1, ->vmeta_unm // Meta handler deals with -2^31.
2618 |. neg.d f0, f0 3454 |. negu SFARG1LO, SFARG1LO
3455 |1:
2619 | ins_next1 3456 | ins_next1
2620 | sdc1 f0, 0(RA) 3457 | sw SFARG1HI, HI(RA)
3458 | sw SFARG1LO, LO(RA)
2621 | ins_next2 3459 | ins_next2
3460 |2:
3461 | sltiu AT, SFARG1HI, LJ_TISNUM
3462 | beqz AT, ->vmeta_unm
3463 |. lui TMP1, 0x8000
3464 | b <1
3465 |. xor SFARG1HI, SFARG1HI, TMP1
2622 break; 3466 break;
2623 case BC_LEN: 3467 case BC_LEN:
2624 | // RA = dst*8, RD = src*8 3468 | // RA = dst*8, RD = src*8
@@ -2629,12 +3473,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2629 | li AT, LJ_TSTR 3473 | li AT, LJ_TSTR
2630 | bne TMP0, AT, >2 3474 | bne TMP0, AT, >2
2631 |. li AT, LJ_TTAB 3475 |. li AT, LJ_TTAB
2632 | lw CRET1, STR:CARG1->len 3476 | lw CRET1, STR:CARG1->len
2633 |1: 3477 |1:
2634 | mtc1 CRET1, f0
2635 | cvt.d.w f0, f0
2636 | ins_next1 3478 | ins_next1
2637 | sdc1 f0, 0(RA) 3479 | sw TISNUM, HI(RA)
3480 | sw CRET1, LO(RA)
2638 | ins_next2 3481 | ins_next2
2639 |2: 3482 |2:
2640 | bne TMP0, AT, ->vmeta_len 3483 | bne TMP0, AT, ->vmeta_len
@@ -2665,104 +3508,232 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2665 3508
2666 /* -- Binary ops -------------------------------------------------------- */ 3509 /* -- Binary ops -------------------------------------------------------- */
2667 3510
2668 |.macro ins_arithpre 3511 |.macro fpmod, a, b, c
3512 | bal ->vm_floor // floor(b/c)
3513 |. div.d FARG1, b, c
3514 | mul.d a, FRET1, c
3515 | sub.d a, b, a // b - floor(b/c)*c
3516 |.endmacro
3517
3518 |.macro sfpmod
3519 | addiu sp, sp, -16
3520 |
3521 | load_got __divdf3
3522 | sw SFARG1HI, HI(sp)
3523 | sw SFARG1LO, LO(sp)
3524 | sw SFARG2HI, 8+HI(sp)
3525 | call_extern
3526 |. sw SFARG2LO, 8+LO(sp)
3527 |
3528 | load_got floor
3529 | move SFARG1HI, SFRETHI
3530 | call_extern
3531 |. move SFARG1LO, SFRETLO
3532 |
3533 | load_got __muldf3
3534 | move SFARG1HI, SFRETHI
3535 | move SFARG1LO, SFRETLO
3536 | lw SFARG2HI, 8+HI(sp)
3537 | call_extern
3538 |. lw SFARG2LO, 8+LO(sp)
3539 |
3540 | load_got __subdf3
3541 | lw SFARG1HI, HI(sp)
3542 | lw SFARG1LO, LO(sp)
3543 | move SFARG2HI, SFRETHI
3544 | call_extern
3545 |. move SFARG2LO, SFRETLO
3546 |
3547 | addiu sp, sp, 16
3548 |.endmacro
3549
3550 |.macro ins_arithpre, label
2669 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3551 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2670 | decode_RB8a RB, INS
2671 | decode_RB8b RB
2672 | decode_RDtoRC8 RC, RD
2673 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 3552 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
2674 ||switch (vk) { 3553 ||switch (vk) {
2675 ||case 0: 3554 ||case 0:
2676 | addu CARG3, BASE, RB 3555 | decode_RB8a RB, INS
2677 | addu CARG4, KBASE, RC 3556 | decode_RB8b RB
2678 | lw TMP1, HI(CARG3) 3557 | decode_RDtoRC8 RC, RD
2679 | ldc1 f20, 0(CARG3) 3558 | // RA = dst*8, RB = src1*8, RC = num_const*8
2680 | ldc1 f22, 0(CARG4) 3559 | addu RB, BASE, RB
2681 | sltiu AT, TMP1, LJ_TISNUM 3560 |.if "label" ~= "none"
3561 | b label
3562 |.endif
3563 |. addu RC, KBASE, RC
2682 || break; 3564 || break;
2683 ||case 1: 3565 ||case 1:
2684 | addu CARG4, BASE, RB 3566 | decode_RB8a RC, INS
2685 | addu CARG3, KBASE, RC 3567 | decode_RB8b RC
2686 | lw TMP1, HI(CARG4) 3568 | decode_RDtoRC8 RB, RD
2687 | ldc1 f22, 0(CARG4) 3569 | // RA = dst*8, RB = num_const*8, RC = src1*8
2688 | ldc1 f20, 0(CARG3) 3570 | addu RC, BASE, RC
2689 | sltiu AT, TMP1, LJ_TISNUM 3571 |.if "label" ~= "none"
3572 | b label
3573 |.endif
3574 |. addu RB, KBASE, RB
2690 || break; 3575 || break;
2691 ||default: 3576 ||default:
2692 | addu CARG3, BASE, RB 3577 | decode_RB8a RB, INS
2693 | addu CARG4, BASE, RC 3578 | decode_RB8b RB
2694 | lw TMP1, HI(CARG3) 3579 | decode_RDtoRC8 RC, RD
2695 | lw TMP2, HI(CARG4) 3580 | // RA = dst*8, RB = src1*8, RC = src2*8
2696 | ldc1 f20, 0(CARG3) 3581 | addu RB, BASE, RB
2697 | ldc1 f22, 0(CARG4) 3582 |.if "label" ~= "none"
2698 | sltiu AT, TMP1, LJ_TISNUM 3583 | b label
2699 | sltiu TMP0, TMP2, LJ_TISNUM 3584 |.endif
2700 | and AT, AT, TMP0 3585 |. addu RC, BASE, RC
2701 || break; 3586 || break;
2702 ||} 3587 ||}
2703 | beqz AT, ->vmeta_arith
2704 |. addu RA, BASE, RA
2705 |.endmacro 3588 |.endmacro
2706 | 3589 |
2707 |.macro fpmod, a, b, c 3590 |.macro ins_arith, intins, fpins, fpcall, label
2708 |->BC_MODVN_Z: 3591 | ins_arithpre none
2709 | bal ->vm_floor // floor(b/c)
2710 |. div.d FARG1, b, c
2711 | mul.d a, FRET1, c
2712 | sub.d a, b, a // b - floor(b/c)*c
2713 |.endmacro
2714 | 3592 |
2715 |.macro ins_arith, ins 3593 |.if "label" ~= "none"
2716 | ins_arithpre 3594 |label:
2717 |.if "ins" == "fpmod_" 3595 |.endif
2718 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3596 |
2719 |. nop 3597 | lw SFARG1HI, HI(RB)
3598 | lw SFARG2HI, HI(RC)
3599 |
3600 |.if "intins" ~= "div"
3601 |
3602 | // Check for two integers.
3603 | lw SFARG1LO, LO(RB)
3604 | bne SFARG1HI, TISNUM, >5
3605 |. lw SFARG2LO, LO(RC)
3606 | bne SFARG2HI, TISNUM, >5
3607 |
3608 |.if "intins" == "addu"
3609 |. intins CRET1, SFARG1LO, SFARG2LO
3610 | xor TMP1, CRET1, SFARG1LO // ((y^a) & (y^b)) < 0: overflow.
3611 | xor TMP2, CRET1, SFARG2LO
3612 | and TMP1, TMP1, TMP2
3613 | bltz TMP1, ->vmeta_arith
3614 |. addu RA, BASE, RA
3615 |.elif "intins" == "subu"
3616 |. intins CRET1, SFARG1LO, SFARG2LO
3617 | xor TMP1, CRET1, SFARG1LO // ((y^a) & (a^b)) < 0: overflow.
3618 | xor TMP2, SFARG1LO, SFARG2LO
3619 | and TMP1, TMP1, TMP2
3620 | bltz TMP1, ->vmeta_arith
3621 |. addu RA, BASE, RA
3622 |.elif "intins" == "mult"
3623 |. intins SFARG1LO, SFARG2LO
3624 | mflo CRET1
3625 | mfhi TMP2
3626 | sra TMP1, CRET1, 31
3627 | bne TMP1, TMP2, ->vmeta_arith
3628 |. addu RA, BASE, RA
2720 |.else 3629 |.else
2721 | ins f0, f20, f22 3630 |. load_got lj_vm_modi
3631 | beqz SFARG2LO, ->vmeta_arith
3632 |. addu RA, BASE, RA
3633 |.if ENDIAN_BE
3634 | move CARG1, SFARG1LO
3635 |.endif
3636 | call_extern
3637 |. move CARG2, SFARG2LO
3638 |.endif
3639 |
2722 | ins_next1 3640 | ins_next1
2723 | sdc1 f0, 0(RA) 3641 | sw TISNUM, HI(RA)
3642 | sw CRET1, LO(RA)
3643 |3:
2724 | ins_next2 3644 | ins_next2
3645 |
3646 |.elif not FPU
3647 |
3648 | lw SFARG1LO, LO(RB)
3649 | lw SFARG2LO, LO(RC)
3650 |
2725 |.endif 3651 |.endif
3652 |
3653 |5: // Check for two numbers.
3654 | .FPU ldc1 f20, 0(RB)
3655 | sltiu AT, SFARG1HI, LJ_TISNUM
3656 | sltiu TMP0, SFARG2HI, LJ_TISNUM
3657 | .FPU ldc1 f22, 0(RC)
3658 | and AT, AT, TMP0
3659 | beqz AT, ->vmeta_arith
3660 |. addu RA, BASE, RA
3661 |
3662 |.if FPU
3663 | fpins FRET1, f20, f22
3664 |.elif "fpcall" == "sfpmod"
3665 | sfpmod
3666 |.else
3667 | load_got fpcall
3668 | call_extern
3669 |. nop
3670 |.endif
3671 |
3672 | ins_next1
3673 |.if not FPU
3674 | sw SFRETHI, HI(RA)
3675 |.endif
3676 |.if "intins" ~= "div"
3677 | b <3
3678 |.endif
3679 |.if FPU
3680 |. sdc1 FRET1, 0(RA)
3681 |.else
3682 |. sw SFRETLO, LO(RA)
3683 |.endif
3684 |.if "intins" == "div"
3685 | ins_next2
3686 |.endif
3687 |
2726 |.endmacro 3688 |.endmacro
2727 3689
2728 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3690 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2729 | ins_arith add.d 3691 | ins_arith addu, add.d, __adddf3, none
2730 break; 3692 break;
2731 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3693 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2732 | ins_arith sub.d 3694 | ins_arith subu, sub.d, __subdf3, none
2733 break; 3695 break;
2734 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3696 case BC_MULVN: case BC_MULNV: case BC_MULVV:
2735 | ins_arith mul.d 3697 | ins_arith mult, mul.d, __muldf3, none
3698 break;
3699 case BC_DIVVN:
3700 | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z
2736 break; 3701 break;
2737 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3702 case BC_DIVNV: case BC_DIVVV:
2738 | ins_arith div.d 3703 | ins_arithpre ->BC_DIVVN_Z
2739 break; 3704 break;
2740 case BC_MODVN: 3705 case BC_MODVN:
2741 | ins_arith fpmod 3706 | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z
2742 break; 3707 break;
2743 case BC_MODNV: case BC_MODVV: 3708 case BC_MODNV: case BC_MODVV:
2744 | ins_arith fpmod_ 3709 | ins_arithpre ->BC_MODVN_Z
2745 break; 3710 break;
2746 case BC_POW: 3711 case BC_POW:
2747 | decode_RB8a RB, INS 3712 | ins_arithpre none
2748 | decode_RB8b RB 3713 | lw SFARG1HI, HI(RB)
2749 | decode_RDtoRC8 RC, RD 3714 | lw SFARG2HI, HI(RC)
2750 | addu CARG3, BASE, RB 3715 | sltiu AT, SFARG1HI, LJ_TISNUM
2751 | addu CARG4, BASE, RC 3716 | sltiu TMP0, SFARG2HI, LJ_TISNUM
2752 | lw TMP1, HI(CARG3)
2753 | lw TMP2, HI(CARG4)
2754 | ldc1 FARG1, 0(CARG3)
2755 | ldc1 FARG2, 0(CARG4)
2756 | sltiu AT, TMP1, LJ_TISNUM
2757 | sltiu TMP0, TMP2, LJ_TISNUM
2758 | and AT, AT, TMP0 3717 | and AT, AT, TMP0
2759 | load_got pow 3718 | load_got pow
2760 | beqz AT, ->vmeta_arith 3719 | beqz AT, ->vmeta_arith
2761 |. addu RA, BASE, RA 3720 |. addu RA, BASE, RA
3721 |.if FPU
3722 | ldc1 FARG1, 0(RB)
3723 | ldc1 FARG2, 0(RC)
3724 |.else
3725 | lw SFARG1LO, LO(RB)
3726 | lw SFARG2LO, LO(RC)
3727 |.endif
2762 | call_extern 3728 | call_extern
2763 |. nop 3729 |. nop
2764 | ins_next1 3730 | ins_next1
3731 |.if FPU
2765 | sdc1 FRET1, 0(RA) 3732 | sdc1 FRET1, 0(RA)
3733 |.else
3734 | sw SFRETHI, HI(RA)
3735 | sw SFRETLO, LO(RA)
3736 |.endif
2766 | ins_next2 3737 | ins_next2
2767 break; 3738 break;
2768 3739
@@ -2785,10 +3756,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2785 | bnez CRET1, ->vmeta_binop 3756 | bnez CRET1, ->vmeta_binop
2786 |. lw BASE, L->base 3757 |. lw BASE, L->base
2787 | addu RB, BASE, MULTRES 3758 | addu RB, BASE, MULTRES
2788 | ldc1 f0, 0(RB) 3759 | lw SFRETHI, HI(RB)
3760 | lw SFRETLO, LO(RB)
2789 | addu RA, BASE, RA 3761 | addu RA, BASE, RA
2790 | ins_next1 3762 | ins_next1
2791 | sdc1 f0, 0(RA) // Copy result from RB to RA. 3763 | sw SFRETHI, HI(RA)
3764 | sw SFRETLO, LO(RA)
2792 | ins_next2 3765 | ins_next2
2793 break; 3766 break;
2794 3767
@@ -2823,20 +3796,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2823 case BC_KSHORT: 3796 case BC_KSHORT:
2824 | // RA = dst*8, RD = int16_literal*8 3797 | // RA = dst*8, RD = int16_literal*8
2825 | sra RD, INS, 16 3798 | sra RD, INS, 16
2826 | mtc1 RD, f0
2827 | addu RA, BASE, RA 3799 | addu RA, BASE, RA
2828 | cvt.d.w f0, f0
2829 | ins_next1 3800 | ins_next1
2830 | sdc1 f0, 0(RA) 3801 | sw TISNUM, HI(RA)
3802 | sw RD, LO(RA)
2831 | ins_next2 3803 | ins_next2
2832 break; 3804 break;
2833 case BC_KNUM: 3805 case BC_KNUM:
2834 | // RA = dst*8, RD = num_const*8 3806 | // RA = dst*8, RD = num_const*8
2835 | addu RD, KBASE, RD 3807 | addu RD, KBASE, RD
2836 | addu RA, BASE, RA 3808 | addu RA, BASE, RA
2837 | ldc1 f0, 0(RD) 3809 | lw SFRETHI, HI(RD)
3810 | lw SFRETLO, LO(RD)
2838 | ins_next1 3811 | ins_next1
2839 | sdc1 f0, 0(RA) 3812 | sw SFRETHI, HI(RA)
3813 | sw SFRETLO, LO(RA)
2840 | ins_next2 3814 | ins_next2
2841 break; 3815 break;
2842 case BC_KPRI: 3816 case BC_KPRI:
@@ -2872,9 +3846,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2872 | lw UPVAL:RB, LFUNC:RD->uvptr 3846 | lw UPVAL:RB, LFUNC:RD->uvptr
2873 | ins_next1 3847 | ins_next1
2874 | lw TMP1, UPVAL:RB->v 3848 | lw TMP1, UPVAL:RB->v
2875 | ldc1 f0, 0(TMP1) 3849 | lw SFRETHI, HI(TMP1)
3850 | lw SFRETLO, LO(TMP1)
2876 | addu RA, BASE, RA 3851 | addu RA, BASE, RA
2877 | sdc1 f0, 0(RA) 3852 | sw SFRETHI, HI(RA)
3853 | sw SFRETLO, LO(RA)
2878 | ins_next2 3854 | ins_next2
2879 break; 3855 break;
2880 case BC_USETV: 3856 case BC_USETV:
@@ -2883,26 +3859,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2883 | srl RA, RA, 1 3859 | srl RA, RA, 1
2884 | addu RD, BASE, RD 3860 | addu RD, BASE, RD
2885 | addu RA, RA, LFUNC:RB 3861 | addu RA, RA, LFUNC:RB
2886 | ldc1 f0, 0(RD)
2887 | lw UPVAL:RB, LFUNC:RA->uvptr 3862 | lw UPVAL:RB, LFUNC:RA->uvptr
3863 | lw SFRETHI, HI(RD)
3864 | lw SFRETLO, LO(RD)
2888 | lbu TMP3, UPVAL:RB->marked 3865 | lbu TMP3, UPVAL:RB->marked
2889 | lw CARG2, UPVAL:RB->v 3866 | lw CARG2, UPVAL:RB->v
2890 | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 3867 | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
2891 | lbu TMP0, UPVAL:RB->closed 3868 | lbu TMP0, UPVAL:RB->closed
2892 | lw TMP2, HI(RD) 3869 | sw SFRETHI, HI(CARG2)
2893 | sdc1 f0, 0(CARG2) 3870 | sw SFRETLO, LO(CARG2)
2894 | li AT, LJ_GC_BLACK|1 3871 | li AT, LJ_GC_BLACK|1
2895 | or TMP3, TMP3, TMP0 3872 | or TMP3, TMP3, TMP0
2896 | beq TMP3, AT, >2 // Upvalue is closed and black? 3873 | beq TMP3, AT, >2 // Upvalue is closed and black?
2897 |. addiu TMP2, TMP2, -(LJ_TNUMX+1) 3874 |. addiu TMP2, SFRETHI, -(LJ_TNUMX+1)
2898 |1: 3875 |1:
2899 | ins_next 3876 | ins_next
2900 | 3877 |
2901 |2: // Check if new value is collectable. 3878 |2: // Check if new value is collectable.
2902 | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) 3879 | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
2903 | beqz AT, <1 // tvisgcv(v) 3880 | beqz AT, <1 // tvisgcv(v)
2904 |. lw TMP1, LO(RD) 3881 |. nop
2905 | lbu TMP3, GCOBJ:TMP1->gch.marked 3882 | lbu TMP3, GCOBJ:SFRETLO->gch.marked
2906 | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) 3883 | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
2907 | beqz TMP3, <1 3884 | beqz TMP3, <1
2908 |. load_got lj_gc_barrieruv 3885 |. load_got lj_gc_barrieruv
@@ -2950,11 +3927,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2950 | srl RA, RA, 1 3927 | srl RA, RA, 1
2951 | addu RD, KBASE, RD 3928 | addu RD, KBASE, RD
2952 | addu RA, RA, LFUNC:RB 3929 | addu RA, RA, LFUNC:RB
2953 | ldc1 f0, 0(RD) 3930 | lw UPVAL:RB, LFUNC:RA->uvptr
2954 | lw UPVAL:RB, LFUNC:RA->uvptr 3931 | lw SFRETHI, HI(RD)
3932 | lw SFRETLO, LO(RD)
3933 | lw TMP1, UPVAL:RB->v
2955 | ins_next1 3934 | ins_next1
2956 | lw TMP1, UPVAL:RB->v 3935 | sw SFRETHI, HI(TMP1)
2957 | sdc1 f0, 0(TMP1) 3936 | sw SFRETLO, LO(TMP1)
2958 | ins_next2 3937 | ins_next2
2959 break; 3938 break;
2960 case BC_USETP: 3939 case BC_USETP:
@@ -2964,10 +3943,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2964 | srl TMP0, RD, 3 3943 | srl TMP0, RD, 3
2965 | addu RA, RA, LFUNC:RB 3944 | addu RA, RA, LFUNC:RB
2966 | not TMP0, TMP0 3945 | not TMP0, TMP0
2967 | lw UPVAL:RB, LFUNC:RA->uvptr 3946 | lw UPVAL:RB, LFUNC:RA->uvptr
2968 | ins_next1 3947 | ins_next1
2969 | lw TMP1, UPVAL:RB->v 3948 | lw TMP1, UPVAL:RB->v
2970 | sw TMP0, HI(TMP1) 3949 | sw TMP0, HI(TMP1)
2971 | ins_next2 3950 | ins_next2
2972 break; 3951 break;
2973 3952
@@ -3003,8 +3982,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3003 | li TMP0, LJ_TFUNC 3982 | li TMP0, LJ_TFUNC
3004 | ins_next1 3983 | ins_next1
3005 | addu RA, BASE, RA 3984 | addu RA, BASE, RA
3006 | sw TMP0, HI(RA)
3007 | sw LFUNC:CRET1, LO(RA) 3985 | sw LFUNC:CRET1, LO(RA)
3986 | sw TMP0, HI(RA)
3008 | ins_next2 3987 | ins_next2
3009 break; 3988 break;
3010 3989
@@ -3085,31 +4064,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3085 | lw TMP2, HI(CARG3) 4064 | lw TMP2, HI(CARG3)
3086 | lw TAB:RB, LO(CARG2) 4065 | lw TAB:RB, LO(CARG2)
3087 | li AT, LJ_TTAB 4066 | li AT, LJ_TTAB
3088 | ldc1 f0, 0(CARG3)
3089 | bne TMP1, AT, ->vmeta_tgetv 4067 | bne TMP1, AT, ->vmeta_tgetv
3090 |. addu RA, BASE, RA 4068 |. addu RA, BASE, RA
3091 | sltiu AT, TMP2, LJ_TISNUM 4069 | bne TMP2, TISNUM, >5
3092 | beqz AT, >5 4070 |. lw RC, LO(CARG3)
3093 |. li AT, LJ_TSTR 4071 | lw TMP0, TAB:RB->asize
3094 |
3095 | // Convert number key to integer, check for integerness and range.
3096 | cvt.w.d f2, f0
3097 | lw TMP0, TAB:RB->asize
3098 | mfc1 TMP2, f2
3099 | cvt.d.w f4, f2
3100 | lw TMP1, TAB:RB->array 4072 | lw TMP1, TAB:RB->array
3101 | c.eq.d f0, f4 4073 | sltu AT, RC, TMP0
3102 | sltu AT, TMP2, TMP0 4074 | sll TMP2, RC, 3
3103 | movf AT, r0
3104 | sll TMP2, TMP2, 3
3105 | beqz AT, ->vmeta_tgetv // Integer key and in array part? 4075 | beqz AT, ->vmeta_tgetv // Integer key and in array part?
3106 |. addu TMP2, TMP1, TMP2 4076 |. addu TMP2, TMP1, TMP2
3107 | lw TMP0, HI(TMP2) 4077 | lw SFRETHI, HI(TMP2)
3108 | beq TMP0, TISNIL, >2 4078 | beq SFRETHI, TISNIL, >2
3109 |. ldc1 f0, 0(TMP2) 4079 |. lw SFRETLO, LO(TMP2)
3110 |1: 4080 |1:
3111 | ins_next1 4081 | ins_next1
3112 | sdc1 f0, 0(RA) 4082 | sw SFRETHI, HI(RA)
4083 | sw SFRETLO, LO(RA)
3113 | ins_next2 4084 | ins_next2
3114 | 4085 |
3115 |2: // Check for __index if table value is nil. 4086 |2: // Check for __index if table value is nil.
@@ -3124,8 +4095,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3124 |. nop 4095 |. nop
3125 | 4096 |
3126 |5: 4097 |5:
4098 | li AT, LJ_TSTR
3127 | bne TMP2, AT, ->vmeta_tgetv 4099 | bne TMP2, AT, ->vmeta_tgetv
3128 |. lw STR:RC, LO(CARG3) 4100 |. nop
3129 | b ->BC_TGETS_Z // String key? 4101 | b ->BC_TGETS_Z // String key?
3130 |. nop 4102 |. nop
3131 break; 4103 break;
@@ -3146,9 +4118,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3146 |->BC_TGETS_Z: 4118 |->BC_TGETS_Z:
3147 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 4119 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
3148 | lw TMP0, TAB:RB->hmask 4120 | lw TMP0, TAB:RB->hmask
3149 | lw TMP1, STR:RC->hash 4121 | lw TMP1, STR:RC->sid
3150 | lw NODE:TMP2, TAB:RB->node 4122 | lw NODE:TMP2, TAB:RB->node
3151 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 4123 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
3152 | sll TMP0, TMP1, 5 4124 | sll TMP0, TMP1, 5
3153 | sll TMP1, TMP1, 3 4125 | sll TMP1, TMP1, 3
3154 | subu TMP1, TMP0, TMP1 4126 | subu TMP1, TMP0, TMP1
@@ -3157,18 +4129,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3157 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) 4129 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
3158 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) 4130 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
3159 | lw NODE:TMP1, NODE:TMP2->next 4131 | lw NODE:TMP1, NODE:TMP2->next
3160 | lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) 4132 | lw SFRETHI, offsetof(Node, val)+HI(NODE:TMP2)
3161 | addiu CARG1, CARG1, -LJ_TSTR 4133 | addiu CARG1, CARG1, -LJ_TSTR
3162 | xor TMP0, TMP0, STR:RC 4134 | xor TMP0, TMP0, STR:RC
3163 | or AT, CARG1, TMP0 4135 | or AT, CARG1, TMP0
3164 | bnez AT, >4 4136 | bnez AT, >4
3165 |. lw TAB:TMP3, TAB:RB->metatable 4137 |. lw TAB:TMP3, TAB:RB->metatable
3166 | beq CARG2, TISNIL, >5 // Key found, but nil value? 4138 | beq SFRETHI, TISNIL, >5 // Key found, but nil value?
3167 |. lw CARG1, offsetof(Node, val)+LO(NODE:TMP2) 4139 |. lw SFRETLO, offsetof(Node, val)+LO(NODE:TMP2)
3168 |3: 4140 |3:
3169 | ins_next1 4141 | ins_next1
3170 | sw CARG2, HI(RA) 4142 | sw SFRETHI, HI(RA)
3171 | sw CARG1, LO(RA) 4143 | sw SFRETLO, LO(RA)
3172 | ins_next2 4144 | ins_next2
3173 | 4145 |
3174 |4: // Follow hash chain. 4146 |4: // Follow hash chain.
@@ -3178,7 +4150,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3178 | 4150 |
3179 |5: // Check for __index if table value is nil. 4151 |5: // Check for __index if table value is nil.
3180 | beqz TAB:TMP3, <3 // No metatable: done. 4152 | beqz TAB:TMP3, <3 // No metatable: done.
3181 |. li CARG2, LJ_TNIL 4153 |. li SFRETHI, LJ_TNIL
3182 | lbu TMP0, TAB:TMP3->nomm 4154 | lbu TMP0, TAB:TMP3->nomm
3183 | andi TMP0, TMP0, 1<<MM_index 4155 | andi TMP0, TMP0, 1<<MM_index
3184 | bnez TMP0, <3 // 'no __index' flag set: done. 4156 | bnez TMP0, <3 // 'no __index' flag set: done.
@@ -3203,12 +4175,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3203 | sltu AT, TMP0, TMP1 4175 | sltu AT, TMP0, TMP1
3204 | beqz AT, ->vmeta_tgetb 4176 | beqz AT, ->vmeta_tgetb
3205 |. addu RC, TMP2, RC 4177 |. addu RC, TMP2, RC
3206 | lw TMP1, HI(RC) 4178 | lw SFRETHI, HI(RC)
3207 | beq TMP1, TISNIL, >5 4179 | beq SFRETHI, TISNIL, >5
3208 |. ldc1 f0, 0(RC) 4180 |. lw SFRETLO, LO(RC)
3209 |1: 4181 |1:
3210 | ins_next1 4182 | ins_next1
3211 | sdc1 f0, 0(RA) 4183 | sw SFRETHI, HI(RA)
4184 | sw SFRETLO, LO(RA)
3212 | ins_next2 4185 | ins_next2
3213 | 4186 |
3214 |5: // Check for __index if table value is nil. 4187 |5: // Check for __index if table value is nil.
@@ -3219,9 +4192,33 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3219 | andi TMP1, TMP1, 1<<MM_index 4192 | andi TMP1, TMP1, 1<<MM_index
3220 | bnez TMP1, <1 // 'no __index' flag set: done. 4193 | bnez TMP1, <1 // 'no __index' flag set: done.
3221 |. nop 4194 |. nop
3222 | b ->vmeta_tgetb // Caveat: preserve TMP0! 4195 | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2!
3223 |. nop 4196 |. nop
3224 break; 4197 break;
4198 case BC_TGETR:
4199 | // RA = dst*8, RB = table*8, RC = key*8
4200 | decode_RB8a RB, INS
4201 | decode_RB8b RB
4202 | decode_RDtoRC8 RC, RD
4203 | addu RB, BASE, RB
4204 | addu RC, BASE, RC
4205 | lw TAB:CARG1, LO(RB)
4206 | lw CARG2, LO(RC)
4207 | addu RA, BASE, RA
4208 | lw TMP0, TAB:CARG1->asize
4209 | lw TMP1, TAB:CARG1->array
4210 | sltu AT, CARG2, TMP0
4211 | sll TMP2, CARG2, 3
4212 | beqz AT, ->vmeta_tgetr // In array part?
4213 |. addu CRET1, TMP1, TMP2
4214 | lw SFARG2HI, HI(CRET1)
4215 | lw SFARG2LO, LO(CRET1)
4216 |->BC_TGETR_Z:
4217 | ins_next1
4218 | sw SFARG2HI, HI(RA)
4219 | sw SFARG2LO, LO(RA)
4220 | ins_next2
4221 break;
3225 4222
3226 case BC_TSETV: 4223 case BC_TSETV:
3227 | // RA = src*8, RB = table*8, RC = key*8 4224 | // RA = src*8, RB = table*8, RC = key*8
@@ -3234,33 +4231,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3234 | lw TMP2, HI(CARG3) 4231 | lw TMP2, HI(CARG3)
3235 | lw TAB:RB, LO(CARG2) 4232 | lw TAB:RB, LO(CARG2)
3236 | li AT, LJ_TTAB 4233 | li AT, LJ_TTAB
3237 | ldc1 f0, 0(CARG3)
3238 | bne TMP1, AT, ->vmeta_tsetv 4234 | bne TMP1, AT, ->vmeta_tsetv
3239 |. addu RA, BASE, RA 4235 |. addu RA, BASE, RA
3240 | sltiu AT, TMP2, LJ_TISNUM 4236 | bne TMP2, TISNUM, >5
3241 | beqz AT, >5 4237 |. lw RC, LO(CARG3)
3242 |. li AT, LJ_TSTR 4238 | lw TMP0, TAB:RB->asize
3243 |
3244 | // Convert number key to integer, check for integerness and range.
3245 | cvt.w.d f2, f0
3246 | lw TMP0, TAB:RB->asize
3247 | mfc1 TMP2, f2
3248 | cvt.d.w f4, f2
3249 | lw TMP1, TAB:RB->array 4239 | lw TMP1, TAB:RB->array
3250 | c.eq.d f0, f4 4240 | sltu AT, RC, TMP0
3251 | sltu AT, TMP2, TMP0 4241 | sll TMP2, RC, 3
3252 | movf AT, r0
3253 | sll TMP2, TMP2, 3
3254 | beqz AT, ->vmeta_tsetv // Integer key and in array part? 4242 | beqz AT, ->vmeta_tsetv // Integer key and in array part?
3255 |. addu TMP1, TMP1, TMP2 4243 |. addu TMP1, TMP1, TMP2
3256 | lbu TMP3, TAB:RB->marked
3257 | lw TMP0, HI(TMP1) 4244 | lw TMP0, HI(TMP1)
4245 | lbu TMP3, TAB:RB->marked
4246 | lw SFRETHI, HI(RA)
3258 | beq TMP0, TISNIL, >3 4247 | beq TMP0, TISNIL, >3
3259 |. ldc1 f0, 0(RA) 4248 |. lw SFRETLO, LO(RA)
3260 |1: 4249 |1:
3261 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4250 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
3262 | bnez AT, >7 4251 | sw SFRETHI, HI(TMP1)
3263 |. sdc1 f0, 0(TMP1) 4252 | bnez AT, >7
4253 |. sw SFRETLO, LO(TMP1)
3264 |2: 4254 |2:
3265 | ins_next 4255 | ins_next
3266 | 4256 |
@@ -3276,8 +4266,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3276 |. nop 4266 |. nop
3277 | 4267 |
3278 |5: 4268 |5:
4269 | li AT, LJ_TSTR
3279 | bne TMP2, AT, ->vmeta_tsetv 4270 | bne TMP2, AT, ->vmeta_tsetv
3280 |. lw STR:RC, LO(CARG3) 4271 |. nop
3281 | b ->BC_TSETS_Z // String key? 4272 | b ->BC_TSETS_Z // String key?
3282 |. nop 4273 |. nop
3283 | 4274 |
@@ -3301,15 +4292,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3301 |->BC_TSETS_Z: 4292 |->BC_TSETS_Z:
3302 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 4293 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8
3303 | lw TMP0, TAB:RB->hmask 4294 | lw TMP0, TAB:RB->hmask
3304 | lw TMP1, STR:RC->hash 4295 | lw TMP1, STR:RC->sid
3305 | lw NODE:TMP2, TAB:RB->node 4296 | lw NODE:TMP2, TAB:RB->node
3306 | sb r0, TAB:RB->nomm // Clear metamethod cache. 4297 | sb r0, TAB:RB->nomm // Clear metamethod cache.
3307 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 4298 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
3308 | sll TMP0, TMP1, 5 4299 | sll TMP0, TMP1, 5
3309 | sll TMP1, TMP1, 3 4300 | sll TMP1, TMP1, 3
3310 | subu TMP1, TMP0, TMP1 4301 | subu TMP1, TMP0, TMP1
3311 | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 4302 | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
4303 |.if FPU
3312 | ldc1 f20, 0(RA) 4304 | ldc1 f20, 0(RA)
4305 |.else
4306 | lw SFRETHI, HI(RA)
4307 | lw SFRETLO, LO(RA)
4308 |.endif
3313 |1: 4309 |1:
3314 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) 4310 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
3315 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) 4311 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
@@ -3323,8 +4319,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3323 |. lw TAB:TMP0, TAB:RB->metatable 4319 |. lw TAB:TMP0, TAB:RB->metatable
3324 |2: 4320 |2:
3325 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4321 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4322 |.if FPU
3326 | bnez AT, >7 4323 | bnez AT, >7
3327 |. sdc1 f20, NODE:TMP2->val 4324 |. sdc1 f20, NODE:TMP2->val
4325 |.else
4326 | sw SFRETHI, NODE:TMP2->val.u32.hi
4327 | bnez AT, >7
4328 |. sw SFRETLO, NODE:TMP2->val.u32.lo
4329 |.endif
3328 |3: 4330 |3:
3329 | ins_next 4331 | ins_next
3330 | 4332 |
@@ -3362,8 +4364,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3362 |. move CARG1, L 4364 |. move CARG1, L
3363 | // Returns TValue *. 4365 | // Returns TValue *.
3364 | lw BASE, L->base 4366 | lw BASE, L->base
4367 |.if FPU
3365 | b <3 // No 2nd write barrier needed. 4368 | b <3 // No 2nd write barrier needed.
3366 |. sdc1 f20, 0(CRET1) 4369 |. sdc1 f20, 0(CRET1)
4370 |.else
4371 | lw SFARG1HI, HI(RA)
4372 | lw SFARG1LO, LO(RA)
4373 | sw SFARG1HI, HI(CRET1)
4374 | b <3 // No 2nd write barrier needed.
4375 |. sw SFARG1LO, LO(CRET1)
4376 |.endif
3367 | 4377 |
3368 |7: // Possible table write barrier for the value. Skip valiswhite check. 4378 |7: // Possible table write barrier for the value. Skip valiswhite check.
3369 | barrierback TAB:RB, TMP3, TMP0, <3 4379 | barrierback TAB:RB, TMP3, TMP0, <3
@@ -3388,11 +4398,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3388 | lw TMP1, HI(RC) 4398 | lw TMP1, HI(RC)
3389 | lbu TMP3, TAB:RB->marked 4399 | lbu TMP3, TAB:RB->marked
3390 | beq TMP1, TISNIL, >5 4400 | beq TMP1, TISNIL, >5
3391 |. ldc1 f0, 0(RA)
3392 |1: 4401 |1:
4402 |. lw SFRETHI, HI(RA)
4403 | lw SFRETLO, LO(RA)
3393 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4404 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4405 | sw SFRETHI, HI(RC)
3394 | bnez AT, >7 4406 | bnez AT, >7
3395 |. sdc1 f0, 0(RC) 4407 |. sw SFRETLO, LO(RC)
3396 |2: 4408 |2:
3397 | ins_next 4409 | ins_next
3398 | 4410 |
@@ -3404,12 +4416,43 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3404 | andi TMP1, TMP1, 1<<MM_newindex 4416 | andi TMP1, TMP1, 1<<MM_newindex
3405 | bnez TMP1, <1 // 'no __newindex' flag set: done. 4417 | bnez TMP1, <1 // 'no __newindex' flag set: done.
3406 |. nop 4418 |. nop
3407 | b ->vmeta_tsetb // Caveat: preserve TMP0! 4419 | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2!
3408 |. nop 4420 |. nop
3409 | 4421 |
3410 |7: // Possible table write barrier for the value. Skip valiswhite check. 4422 |7: // Possible table write barrier for the value. Skip valiswhite check.
3411 | barrierback TAB:RB, TMP3, TMP0, <2 4423 | barrierback TAB:RB, TMP3, TMP0, <2
3412 break; 4424 break;
4425 case BC_TSETR:
4426 | // RA = dst*8, RB = table*8, RC = key*8
4427 | decode_RB8a RB, INS
4428 | decode_RB8b RB
4429 | decode_RDtoRC8 RC, RD
4430 | addu CARG1, BASE, RB
4431 | addu CARG3, BASE, RC
4432 | lw TAB:CARG2, LO(CARG1)
4433 | lw CARG3, LO(CARG3)
4434 | lbu TMP3, TAB:CARG2->marked
4435 | lw TMP0, TAB:CARG2->asize
4436 | lw TMP1, TAB:CARG2->array
4437 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4438 | bnez AT, >7
4439 |. addu RA, BASE, RA
4440 |2:
4441 | sltu AT, CARG3, TMP0
4442 | sll TMP2, CARG3, 3
4443 | beqz AT, ->vmeta_tsetr // In array part?
4444 |. addu CRET1, TMP1, TMP2
4445 |->BC_TSETR_Z:
4446 | lw SFARG1HI, HI(RA)
4447 | lw SFARG1LO, LO(RA)
4448 | ins_next1
4449 | sw SFARG1HI, HI(CRET1)
4450 | sw SFARG1LO, LO(CRET1)
4451 | ins_next2
4452 |
4453 |7: // Possible table write barrier for the value. Skip valiswhite check.
4454 | barrierback TAB:CARG2, TMP3, CRET1, <2
4455 break;
3413 4456
3414 case BC_TSETM: 4457 case BC_TSETM:
3415 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 4458 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
@@ -3432,10 +4475,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3432 | addu TMP1, TMP1, CARG1 4475 | addu TMP1, TMP1, CARG1
3433 | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4476 | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table)
3434 |3: // Copy result slots to table. 4477 |3: // Copy result slots to table.
3435 | ldc1 f0, 0(RA) 4478 | lw SFRETHI, HI(RA)
4479 | lw SFRETLO, LO(RA)
3436 | addiu RA, RA, 8 4480 | addiu RA, RA, 8
3437 | sltu AT, RA, TMP2 4481 | sltu AT, RA, TMP2
3438 | sdc1 f0, 0(TMP1) 4482 | sw SFRETHI, HI(TMP1)
4483 | sw SFRETLO, LO(TMP1)
3439 | bnez AT, <3 4484 | bnez AT, <3
3440 |. addiu TMP1, TMP1, 8 4485 |. addiu TMP1, TMP1, 8
3441 | bnez TMP0, >7 4486 | bnez TMP0, >7
@@ -3510,10 +4555,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3510 | beqz NARGS8:RC, >3 4555 | beqz NARGS8:RC, >3
3511 |. move TMP3, NARGS8:RC 4556 |. move TMP3, NARGS8:RC
3512 |2: 4557 |2:
3513 | ldc1 f0, 0(RA) 4558 | lw SFRETHI, HI(RA)
4559 | lw SFRETLO, LO(RA)
3514 | addiu RA, RA, 8 4560 | addiu RA, RA, 8
3515 | addiu TMP3, TMP3, -8 4561 | addiu TMP3, TMP3, -8
3516 | sdc1 f0, 0(TMP2) 4562 | sw SFRETHI, HI(TMP2)
4563 | sw SFRETLO, LO(TMP2)
3517 | bnez TMP3, <2 4564 | bnez TMP3, <2
3518 |. addiu TMP2, TMP2, 8 4565 |. addiu TMP2, TMP2, 8
3519 |3: 4566 |3:
@@ -3550,12 +4597,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3550 | li AT, LJ_TFUNC 4597 | li AT, LJ_TFUNC
3551 | lw TMP1, -24+HI(BASE) 4598 | lw TMP1, -24+HI(BASE)
3552 | lw LFUNC:RB, -24+LO(BASE) 4599 | lw LFUNC:RB, -24+LO(BASE)
3553 | ldc1 f2, -8(BASE) 4600 | lw SFARG1HI, -16+HI(BASE)
3554 | ldc1 f0, -16(BASE) 4601 | lw SFARG1LO, -16+LO(BASE)
4602 | lw SFARG2HI, -8+HI(BASE)
4603 | lw SFARG2LO, -8+LO(BASE)
3555 | sw TMP1, HI(BASE) // Copy callable. 4604 | sw TMP1, HI(BASE) // Copy callable.
3556 | sw LFUNC:RB, LO(BASE) 4605 | sw LFUNC:RB, LO(BASE)
3557 | sdc1 f2, 16(BASE) // Copy control var. 4606 | sw SFARG1HI, 8+HI(BASE) // Copy state.
3558 | sdc1 f0, 8(BASE) // Copy state. 4607 | sw SFARG1LO, 8+LO(BASE)
4608 | sw SFARG2HI, 16+HI(BASE) // Copy control var.
4609 | sw SFARG2LO, 16+LO(BASE)
3559 | addiu BASE, BASE, 8 4610 | addiu BASE, BASE, 8
3560 | bne TMP1, AT, ->vmeta_call 4611 | bne TMP1, AT, ->vmeta_call
3561 |. li NARGS8:RC, 16 // Iterators get 2 arguments. 4612 |. li NARGS8:RC, 16 // Iterators get 2 arguments.
@@ -3563,10 +4614,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3563 break; 4614 break;
3564 4615
3565 case BC_ITERN: 4616 case BC_ITERN:
3566 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) 4617 |.if JIT and ENDIAN_LE
3567 |.if JIT 4618 | hotloop
3568 | // NYI: add hotloop, record BC_ITERN.
3569 |.endif 4619 |.endif
4620 |->vm_IITERN:
4621 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
3570 | addu RA, BASE, RA 4622 | addu RA, BASE, RA
3571 | lw TAB:RB, -16+LO(RA) 4623 | lw TAB:RB, -16+LO(RA)
3572 | lw RC, -8+LO(RA) // Get index from control var. 4624 | lw RC, -8+LO(RA) // Get index from control var.
@@ -3578,20 +4630,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3578 | beqz AT, >5 // Index points after array part? 4630 | beqz AT, >5 // Index points after array part?
3579 |. sll TMP3, RC, 3 4631 |. sll TMP3, RC, 3
3580 | addu TMP3, TMP1, TMP3 4632 | addu TMP3, TMP1, TMP3
3581 | lw TMP2, HI(TMP3) 4633 | lw SFARG1HI, HI(TMP3)
3582 | ldc1 f0, 0(TMP3) 4634 | lw SFARG1LO, LO(TMP3)
3583 | mtc1 RC, f2
3584 | lhu RD, -4+OFS_RD(PC) 4635 | lhu RD, -4+OFS_RD(PC)
3585 | beq TMP2, TISNIL, <1 // Skip holes in array part. 4636 | sw TISNUM, HI(RA)
4637 | sw RC, LO(RA)
4638 | beq SFARG1HI, TISNIL, <1 // Skip holes in array part.
3586 |. addiu RC, RC, 1 4639 |. addiu RC, RC, 1
3587 | cvt.d.w f2, f2 4640 | sw SFARG1HI, 8+HI(RA)
4641 | sw SFARG1LO, 8+LO(RA)
3588 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 4642 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3589 | sdc1 f0, 8(RA)
3590 | decode_RD4b RD 4643 | decode_RD4b RD
3591 | addu RD, RD, TMP3 4644 | addu RD, RD, TMP3
3592 | sw RC, -8+LO(RA) // Update control var. 4645 | sw RC, -8+LO(RA) // Update control var.
3593 | addu PC, PC, RD 4646 | addu PC, PC, RD
3594 | sdc1 f2, 0(RA)
3595 |3: 4647 |3:
3596 | ins_next 4648 | ins_next
3597 | 4649 |
@@ -3606,18 +4658,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3606 | sll RB, RC, 3 4658 | sll RB, RC, 3
3607 | subu TMP3, TMP3, RB 4659 | subu TMP3, TMP3, RB
3608 | addu NODE:TMP3, TMP3, TMP2 4660 | addu NODE:TMP3, TMP3, TMP2
3609 | lw RB, HI(NODE:TMP3) 4661 | lw SFARG1HI, NODE:TMP3->val.u32.hi
3610 | ldc1 f0, 0(NODE:TMP3) 4662 | lw SFARG1LO, NODE:TMP3->val.u32.lo
3611 | lhu RD, -4+OFS_RD(PC) 4663 | lhu RD, -4+OFS_RD(PC)
3612 | beq RB, TISNIL, <6 // Skip holes in hash part. 4664 | beq SFARG1HI, TISNIL, <6 // Skip holes in hash part.
3613 |. addiu RC, RC, 1 4665 |. addiu RC, RC, 1
3614 | ldc1 f2, NODE:TMP3->key 4666 | lw SFARG2HI, NODE:TMP3->key.u32.hi
4667 | lw SFARG2LO, NODE:TMP3->key.u32.lo
3615 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 4668 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3616 | sdc1 f0, 8(RA) 4669 | sw SFARG1HI, 8+HI(RA)
4670 | sw SFARG1LO, 8+LO(RA)
3617 | addu RC, RC, TMP0 4671 | addu RC, RC, TMP0
3618 | decode_RD4b RD 4672 | decode_RD4b RD
3619 | addu RD, RD, TMP3 4673 | addu RD, RD, TMP3
3620 | sdc1 f2, 0(RA) 4674 | sw SFARG2HI, HI(RA)
4675 | sw SFARG2LO, LO(RA)
3621 | addu PC, PC, RD 4676 | addu PC, PC, RD
3622 | b <3 4677 | b <3
3623 |. sw RC, -8+LO(RA) // Update control var. 4678 |. sw RC, -8+LO(RA) // Update control var.
@@ -3642,9 +4697,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3642 | addiu CARG2, CARG2, -FF_next_N 4697 | addiu CARG2, CARG2, -FF_next_N
3643 | or CARG2, CARG2, CARG3 4698 | or CARG2, CARG2, CARG3
3644 | bnez CARG2, >5 4699 | bnez CARG2, >5
3645 |. lui TMP1, 0xfffe 4700 |. lui TMP1, (LJ_KEYINDEX >> 16)
3646 | addu PC, TMP0, TMP2 4701 | addu PC, TMP0, TMP2
3647 | ori TMP1, TMP1, 0x7fff 4702 | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff)
3648 | sw r0, -8+LO(RA) // Initialize control var. 4703 | sw r0, -8+LO(RA) // Initialize control var.
3649 | sw TMP1, -8+HI(RA) 4704 | sw TMP1, -8+HI(RA)
3650 |1: 4705 |1:
@@ -3653,9 +4708,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3653 | li TMP3, BC_JMP 4708 | li TMP3, BC_JMP
3654 | li TMP1, BC_ITERC 4709 | li TMP1, BC_ITERC
3655 | sb TMP3, -4+OFS_OP(PC) 4710 | sb TMP3, -4+OFS_OP(PC)
3656 | addu PC, TMP0, TMP2 4711 | addu PC, TMP0, TMP2
4712 |.if JIT
4713 | lb TMP0, OFS_OP(PC)
4714 | li AT, BC_ITERN
4715 | bne TMP0, AT, >6
4716 |. lhu TMP2, OFS_RD(PC)
4717 |.endif
3657 | b <1 4718 | b <1
3658 |. sb TMP1, OFS_OP(PC) 4719 |. sb TMP1, OFS_OP(PC)
4720 |.if JIT
4721 |6: // Unpatch JLOOP.
4722 | lw TMP0, DISPATCH_J(trace)(DISPATCH)
4723 | sll TMP2, TMP2, 2
4724 | addu TMP0, TMP0, TMP2
4725 | lw TRACE:TMP2, 0(TMP0)
4726 | lw TMP0, TRACE:TMP2->startins
4727 | li AT, -256
4728 | and TMP0, TMP0, AT
4729 | or TMP0, TMP0, TMP1
4730 | b <1
4731 |. sw TMP0, 0(PC)
4732 |.endif
3659 break; 4733 break;
3660 4734
3661 case BC_VARG: 4735 case BC_VARG:
@@ -3697,9 +4771,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3697 | bnez AT, >7 4771 | bnez AT, >7
3698 |. addiu MULTRES, TMP1, 8 4772 |. addiu MULTRES, TMP1, 8
3699 |6: 4773 |6:
3700 | ldc1 f0, 0(RC) 4774 | lw SFRETHI, HI(RC)
4775 | lw SFRETLO, LO(RC)
3701 | addiu RC, RC, 8 4776 | addiu RC, RC, 8
3702 | sdc1 f0, 0(RA) 4777 | sw SFRETHI, HI(RA)
4778 | sw SFRETLO, LO(RA)
3703 | sltu AT, RC, TMP3 4779 | sltu AT, RC, TMP3
3704 | bnez AT, <6 // More vararg slots? 4780 | bnez AT, <6 // More vararg slots?
3705 |. addiu RA, RA, 8 4781 |. addiu RA, RA, 8
@@ -3755,10 +4831,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3755 | beqz RC, >3 4831 | beqz RC, >3
3756 |. subu BASE, TMP2, TMP0 4832 |. subu BASE, TMP2, TMP0
3757 |2: 4833 |2:
3758 | ldc1 f0, 0(RA) 4834 | lw SFRETHI, HI(RA)
4835 | lw SFRETLO, LO(RA)
3759 | addiu RA, RA, 8 4836 | addiu RA, RA, 8
3760 | addiu RC, RC, -8 4837 | addiu RC, RC, -8
3761 | sdc1 f0, 0(TMP2) 4838 | sw SFRETHI, HI(TMP2)
4839 | sw SFRETLO, LO(TMP2)
3762 | bnez RC, <2 4840 | bnez RC, <2
3763 |. addiu TMP2, TMP2, 8 4841 |. addiu TMP2, TMP2, 8
3764 |3: 4842 |3:
@@ -3799,14 +4877,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3799 | lw INS, -4(PC) 4877 | lw INS, -4(PC)
3800 | addiu TMP2, BASE, -8 4878 | addiu TMP2, BASE, -8
3801 if (op == BC_RET1) { 4879 if (op == BC_RET1) {
3802 | ldc1 f0, 0(RA) 4880 | lw SFRETHI, HI(RA)
4881 | lw SFRETLO, LO(RA)
3803 } 4882 }
3804 | decode_RB8a RB, INS 4883 | decode_RB8a RB, INS
3805 | decode_RA8a RA, INS 4884 | decode_RA8a RA, INS
3806 | decode_RB8b RB 4885 | decode_RB8b RB
3807 | decode_RA8b RA 4886 | decode_RA8b RA
3808 if (op == BC_RET1) { 4887 if (op == BC_RET1) {
3809 | sdc1 f0, 0(TMP2) 4888 | sw SFRETHI, HI(TMP2)
4889 | sw SFRETLO, LO(TMP2)
3810 } 4890 }
3811 | subu BASE, TMP2, RA 4891 | subu BASE, TMP2, RA
3812 |5: 4892 |5:
@@ -3848,69 +4928,147 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3848 | // RA = base*8, RD = target (after end of loop or start of loop) 4928 | // RA = base*8, RD = target (after end of loop or start of loop)
3849 vk = (op == BC_IFORL || op == BC_JFORL); 4929 vk = (op == BC_IFORL || op == BC_JFORL);
3850 | addu RA, BASE, RA 4930 | addu RA, BASE, RA
3851 if (vk) { 4931 | lw SFARG1HI, FORL_IDX*8+HI(RA)
3852 | ldc1 f0, FORL_IDX*8(RA) 4932 | lw SFARG1LO, FORL_IDX*8+LO(RA)
3853 | ldc1 f4, FORL_STEP*8(RA)
3854 | ldc1 f2, FORL_STOP*8(RA)
3855 | lw TMP3, FORL_STEP*8+HI(RA)
3856 | add.d f0, f0, f4
3857 | sdc1 f0, FORL_IDX*8(RA)
3858 } else {
3859 | lw TMP1, FORL_IDX*8+HI(RA)
3860 | lw TMP3, FORL_STEP*8+HI(RA)
3861 | lw TMP2, FORL_STOP*8+HI(RA)
3862 | sltiu TMP1, TMP1, LJ_TISNUM
3863 | sltiu TMP0, TMP3, LJ_TISNUM
3864 | sltiu TMP2, TMP2, LJ_TISNUM
3865 | and TMP1, TMP1, TMP0
3866 | and TMP1, TMP1, TMP2
3867 | ldc1 f0, FORL_IDX*8(RA)
3868 | beqz TMP1, ->vmeta_for
3869 |. ldc1 f2, FORL_STOP*8(RA)
3870 }
3871 if (op != BC_JFORL) { 4933 if (op != BC_JFORL) {
3872 | srl RD, RD, 1 4934 | srl RD, RD, 1
3873 | lui TMP0, (-(BCBIAS_J*4 >> 16) & 65535) 4935 | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
4936 | addu TMP2, RD, TMP2
3874 } 4937 }
3875 | c.le.d 0, f0, f2 4938 if (!vk) {
3876 | c.le.d 1, f2, f0 4939 | lw SFARG2HI, FORL_STOP*8+HI(RA)
3877 | sdc1 f0, FORL_EXT*8(RA) 4940 | lw SFARG2LO, FORL_STOP*8+LO(RA)
4941 | bne SFARG1HI, TISNUM, >5
4942 |. lw SFRETHI, FORL_STEP*8+HI(RA)
4943 | xor AT, SFARG2HI, TISNUM
4944 | lw SFRETLO, FORL_STEP*8+LO(RA)
4945 | xor TMP0, SFRETHI, TISNUM
4946 | or AT, AT, TMP0
4947 | bnez AT, ->vmeta_for
4948 |. slt AT, SFRETLO, r0
4949 | slt CRET1, SFARG2LO, SFARG1LO
4950 | slt TMP1, SFARG1LO, SFARG2LO
4951 | movn CRET1, TMP1, AT
4952 } else {
4953 | bne SFARG1HI, TISNUM, >5
4954 |. lw SFARG2LO, FORL_STEP*8+LO(RA)
4955 | lw SFRETLO, FORL_STOP*8+LO(RA)
4956 | move TMP3, SFARG1LO
4957 | addu SFARG1LO, SFARG1LO, SFARG2LO
4958 | xor TMP0, SFARG1LO, TMP3
4959 | xor TMP1, SFARG1LO, SFARG2LO
4960 | and TMP0, TMP0, TMP1
4961 | slt TMP1, SFARG1LO, SFRETLO
4962 | slt CRET1, SFRETLO, SFARG1LO
4963 | slt AT, SFARG2LO, r0
4964 | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow.
4965 | movn CRET1, TMP1, AT
4966 | or CRET1, CRET1, TMP0
4967 }
4968 |1:
4969 if (op == BC_FORI) {
4970 | movz TMP2, r0, CRET1
4971 | addu PC, PC, TMP2
4972 } else if (op == BC_JFORI) {
4973 | addu PC, PC, TMP2
4974 | lhu RD, -4+OFS_RD(PC)
4975 } else if (op == BC_IFORL) {
4976 | movn TMP2, r0, CRET1
4977 | addu PC, PC, TMP2
4978 }
4979 if (vk) {
4980 | sw SFARG1HI, FORL_IDX*8+HI(RA)
4981 | sw SFARG1LO, FORL_IDX*8+LO(RA)
4982 }
4983 | ins_next1
4984 | sw SFARG1HI, FORL_EXT*8+HI(RA)
4985 | sw SFARG1LO, FORL_EXT*8+LO(RA)
4986 |2:
3878 if (op == BC_JFORI) { 4987 if (op == BC_JFORI) {
3879 | li TMP1, 1 4988 | beqz CRET1, =>BC_JLOOP
3880 | li TMP2, 1
3881 | addu TMP0, RD, TMP0
3882 | slt TMP3, TMP3, r0
3883 | movf TMP1, r0, 0
3884 | addu PC, PC, TMP0
3885 | movf TMP2, r0, 1
3886 | lhu RD, -4+OFS_RD(PC)
3887 | movn TMP1, TMP2, TMP3
3888 | bnez TMP1, =>BC_JLOOP
3889 |. decode_RD8b RD 4989 |. decode_RD8b RD
3890 } else if (op == BC_JFORL) { 4990 } else if (op == BC_JFORL) {
3891 | li TMP1, 1 4991 | beqz CRET1, =>BC_JLOOP
3892 | li TMP2, 1 4992 }
3893 | slt TMP3, TMP3, r0 4993 | ins_next2
3894 | movf TMP1, r0, 0 4994 |
3895 | movf TMP2, r0, 1 4995 |5: // FP loop.
3896 | movn TMP1, TMP2, TMP3 4996 |.if FPU
3897 | bnez TMP1, =>BC_JLOOP 4997 if (!vk) {
4998 | ldc1 f0, FORL_IDX*8(RA)
4999 | ldc1 f2, FORL_STOP*8(RA)
5000 | sltiu TMP0, SFARG1HI, LJ_TISNUM
5001 | sltiu TMP1, SFARG2HI, LJ_TISNUM
5002 | sltiu AT, SFRETHI, LJ_TISNUM
5003 | and TMP0, TMP0, TMP1
5004 | and AT, AT, TMP0
5005 | beqz AT, ->vmeta_for
5006 |. slt TMP3, SFRETHI, r0
5007 | c.ole.d 0, f0, f2
5008 | c.ole.d 1, f2, f0
5009 | li CRET1, 1
5010 | movt CRET1, r0, 0
5011 | movt AT, r0, 1
5012 | b <1
5013 |. movn CRET1, AT, TMP3
5014 } else {
5015 | ldc1 f0, FORL_IDX*8(RA)
5016 | ldc1 f4, FORL_STEP*8(RA)
5017 | ldc1 f2, FORL_STOP*8(RA)
5018 | lw SFARG2HI, FORL_STEP*8+HI(RA)
5019 | add.d f0, f0, f4
5020 | c.ole.d 0, f0, f2
5021 | c.ole.d 1, f2, f0
5022 | slt TMP3, SFARG2HI, r0
5023 | li CRET1, 1
5024 | li AT, 1
5025 | movt CRET1, r0, 0
5026 | movt AT, r0, 1
5027 | movn CRET1, AT, TMP3
5028 if (op == BC_IFORL) {
5029 | movn TMP2, r0, CRET1
5030 | addu PC, PC, TMP2
5031 }
5032 | sdc1 f0, FORL_IDX*8(RA)
5033 | ins_next1
5034 | b <2
5035 |. sdc1 f0, FORL_EXT*8(RA)
5036 }
5037 |.else
5038 if (!vk) {
5039 | sltiu TMP0, SFARG1HI, LJ_TISNUM
5040 | sltiu TMP1, SFARG2HI, LJ_TISNUM
5041 | sltiu AT, SFRETHI, LJ_TISNUM
5042 | and TMP0, TMP0, TMP1
5043 | and AT, AT, TMP0
5044 | beqz AT, ->vmeta_for
5045 |. nop
5046 | bal ->vm_sfcmpolex
5047 |. move TMP3, SFRETHI
5048 | b <1
3898 |. nop 5049 |. nop
3899 } else { 5050 } else {
3900 | addu TMP1, RD, TMP0 5051 | lw SFARG2HI, FORL_STEP*8+HI(RA)
3901 | slt TMP3, TMP3, r0 5052 | load_got __adddf3
3902 | move TMP2, TMP1 5053 | call_extern
3903 if (op == BC_FORI) { 5054 |. sw TMP2, ARG5
3904 | movt TMP1, r0, 0 5055 | lw SFARG2HI, FORL_STOP*8+HI(RA)
3905 | movt TMP2, r0, 1 5056 | lw SFARG2LO, FORL_STOP*8+LO(RA)
5057 | move SFARG1HI, SFRETHI
5058 | move SFARG1LO, SFRETLO
5059 | bal ->vm_sfcmpolex
5060 |. lw TMP3, FORL_STEP*8+HI(RA)
5061 if ( op == BC_JFORL ) {
5062 | lhu RD, -4+OFS_RD(PC)
5063 | lw TMP2, ARG5
5064 | b <1
5065 |. decode_RD8b RD
3906 } else { 5066 } else {
3907 | movf TMP1, r0, 0 5067 | b <1
3908 | movf TMP2, r0, 1 5068 |. lw TMP2, ARG5
3909 } 5069 }
3910 | movn TMP1, TMP2, TMP3
3911 | addu PC, PC, TMP1
3912 } 5070 }
3913 | ins_next 5071 |.endif
3914 break; 5072 break;
3915 5073
3916 case BC_ITERL: 5074 case BC_ITERL:
@@ -3969,8 +5127,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3969 | sw AT, DISPATCH_GL(vmstate)(DISPATCH) 5127 | sw AT, DISPATCH_GL(vmstate)(DISPATCH)
3970 | lw TRACE:TMP2, 0(TMP1) 5128 | lw TRACE:TMP2, 0(TMP1)
3971 | sw BASE, DISPATCH_GL(jit_base)(DISPATCH) 5129 | sw BASE, DISPATCH_GL(jit_base)(DISPATCH)
3972 | sw L, DISPATCH_GL(jit_L)(DISPATCH)
3973 | lw TMP2, TRACE:TMP2->mcode 5130 | lw TMP2, TRACE:TMP2->mcode
5131 | sw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
3974 | jr TMP2 5132 | jr TMP2
3975 |. addiu JGL, DISPATCH, GG_DISP2G+32768 5133 |. addiu JGL, DISPATCH, GG_DISP2G+32768
3976 |.endif 5134 |.endif
@@ -4096,6 +5254,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4096 | li_vmstate INTERP 5254 | li_vmstate INTERP
4097 | lw PC, FRAME_PC(BASE) // Fetch PC of caller. 5255 | lw PC, FRAME_PC(BASE) // Fetch PC of caller.
4098 | subu RA, TMP1, RD // RA = L->top - nresults*8 5256 | subu RA, TMP1, RD // RA = L->top - nresults*8
5257 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
4099 | b ->vm_returnc 5258 | b ->vm_returnc
4100 |. st_vmstate 5259 |. st_vmstate
4101 break; 5260 break;
@@ -4158,8 +5317,10 @@ static void emit_asm_debug(BuildCtx *ctx)
4158 fcofs, CFRAME_SIZE); 5317 fcofs, CFRAME_SIZE);
4159 for (i = 23; i >= 16; i--) 5318 for (i = 23; i >= 16; i--)
4160 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); 5319 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
5320#if !LJ_SOFTFP
4161 for (i = 30; i >= 20; i -= 2) 5321 for (i = 30; i >= 20; i -= 2)
4162 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); 5322 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
5323#endif
4163 fprintf(ctx->fp, 5324 fprintf(ctx->fp,
4164 "\t.align 2\n" 5325 "\t.align 2\n"
4165 ".LEFDE0:\n\n"); 5326 ".LEFDE0:\n\n");
@@ -4211,8 +5372,10 @@ static void emit_asm_debug(BuildCtx *ctx)
4211 fcofs, CFRAME_SIZE); 5372 fcofs, CFRAME_SIZE);
4212 for (i = 23; i >= 16; i--) 5373 for (i = 23; i >= 16; i--)
4213 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); 5374 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
5375#if !LJ_SOFTFP
4214 for (i = 30; i >= 20; i -= 2) 5376 for (i = 30; i >= 20; i -= 2)
4215 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); 5377 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
5378#endif
4216 fprintf(ctx->fp, 5379 fprintf(ctx->fp,
4217 "\t.align 2\n" 5380 "\t.align 2\n"
4218 ".LEFDE2:\n\n"); 5381 ".LEFDE2:\n\n");
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
new file mode 100644
index 00000000..a8d20413
--- /dev/null
+++ b/src/vm_mips64.dasc
@@ -0,0 +1,5565 @@
1|// Low-level VM code for MIPS64 CPUs.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4|//
5|// Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
6|// Sponsored by Cisco Systems, Inc.
7|
8|.arch mips64
9|.section code_op, code_sub
10|
11|.actionlist build_actionlist
12|.globals GLOB_
13|.globalnames globnames
14|.externnames extnames
15|
16|// Note: The ragged indentation of the instructions is intentional.
17|// The starting columns indicate data dependencies.
18|
19|//-----------------------------------------------------------------------
20|
21|// Fixed register assignments for the interpreter.
22|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra
23|
24|.macro .FPU, a, b
25|.if FPU
26| a, b
27|.endif
28|.endmacro
29|
30|// The following must be C callee-save (but BASE is often refetched).
31|.define BASE, r16 // Base of current Lua stack frame.
32|.define KBASE, r17 // Constants of current Lua function.
33|.define PC, r18 // Next PC.
34|.define DISPATCH, r19 // Opcode dispatch table.
35|.define LREG, r20 // Register holding lua_State (also in SAVE_L).
36|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8.
37|
38|.define JGL, r30 // On-trace: global_State + 32768.
39|
40|// Constants for type-comparisons, stores and conversions. C callee-save.
41|.define TISNIL, r30
42|.define TISNUM, r22
43|.if FPU
44|.define TOBIT, f30 // 2^52 + 2^51.
45|.endif
46|
47|// The following temporaries are not saved across C calls, except for RA.
48|.define RA, r23 // Callee-save.
49|.define RB, r8
50|.define RC, r9
51|.define RD, r10
52|.define INS, r11
53|
54|.define AT, r1 // Assembler temporary.
55|.define TMP0, r12
56|.define TMP1, r13
57|.define TMP2, r14
58|.define TMP3, r15
59|
60|// MIPS n64 calling convention.
61|.define CFUNCADDR, r25
62|.define CARG1, r4
63|.define CARG2, r5
64|.define CARG3, r6
65|.define CARG4, r7
66|.define CARG5, r8
67|.define CARG6, r9
68|.define CARG7, r10
69|.define CARG8, r11
70|
71|.define CRET1, r2
72|.define CRET2, r3
73|
74|.if FPU
75|.define FARG1, f12
76|.define FARG2, f13
77|.define FARG3, f14
78|.define FARG4, f15
79|.define FARG5, f16
80|.define FARG6, f17
81|.define FARG7, f18
82|.define FARG8, f19
83|
84|.define FRET1, f0
85|.define FRET2, f2
86|
87|.define FTMP0, f20
88|.define FTMP1, f21
89|.define FTMP2, f22
90|.endif
91|
92|// Stack layout while in interpreter. Must match with lj_frame.h.
93|.if FPU // MIPS64 hard-float.
94|
95|.define CFRAME_SPACE, 192 // Delta for sp.
96|
97|//----- 16 byte aligned, <-- sp entering interpreter
98|.define SAVE_ERRF, 188(sp) // 32 bit values.
99|.define SAVE_NRES, 184(sp)
100|.define SAVE_CFRAME, 176(sp) // 64 bit values.
101|.define SAVE_L, 168(sp)
102|.define SAVE_PC, 160(sp)
103|//----- 16 byte aligned
104|.define SAVE_GPR_, 80 // .. 80+10*8: 64 bit GPR saves.
105|.define SAVE_FPR_, 16 // .. 16+8*8: 64 bit FPR saves.
106|
107|.else // MIPS64 soft-float
108|
109|.define CFRAME_SPACE, 128 // Delta for sp.
110|
111|//----- 16 byte aligned, <-- sp entering interpreter
112|.define SAVE_ERRF, 124(sp) // 32 bit values.
113|.define SAVE_NRES, 120(sp)
114|.define SAVE_CFRAME, 112(sp) // 64 bit values.
115|.define SAVE_L, 104(sp)
116|.define SAVE_PC, 96(sp)
117|//----- 16 byte aligned
118|.define SAVE_GPR_, 16 // .. 16+10*8: 64 bit GPR saves.
119|
120|.endif
121|
122|.define TMPX, 8(sp) // Unused by interpreter, temp for JIT code.
123|.define TMPD, 0(sp)
124|//----- 16 byte aligned
125|
126|.define TMPD_OFS, 0
127|
128|.define SAVE_MULTRES, TMPD
129|
130|//-----------------------------------------------------------------------
131|
132|.macro saveregs
133| daddiu sp, sp, -CFRAME_SPACE
134| sd ra, SAVE_GPR_+9*8(sp)
135| sd r30, SAVE_GPR_+8*8(sp)
136| .FPU sdc1 f31, SAVE_FPR_+7*8(sp)
137| sd r23, SAVE_GPR_+7*8(sp)
138| .FPU sdc1 f30, SAVE_FPR_+6*8(sp)
139| sd r22, SAVE_GPR_+6*8(sp)
140| .FPU sdc1 f29, SAVE_FPR_+5*8(sp)
141| sd r21, SAVE_GPR_+5*8(sp)
142| .FPU sdc1 f28, SAVE_FPR_+4*8(sp)
143| sd r20, SAVE_GPR_+4*8(sp)
144| .FPU sdc1 f27, SAVE_FPR_+3*8(sp)
145| sd r19, SAVE_GPR_+3*8(sp)
146| .FPU sdc1 f26, SAVE_FPR_+2*8(sp)
147| sd r18, SAVE_GPR_+2*8(sp)
148| .FPU sdc1 f25, SAVE_FPR_+1*8(sp)
149| sd r17, SAVE_GPR_+1*8(sp)
150| .FPU sdc1 f24, SAVE_FPR_+0*8(sp)
151| sd r16, SAVE_GPR_+0*8(sp)
152|.endmacro
153|
154|.macro restoreregs_ret
155| ld ra, SAVE_GPR_+9*8(sp)
156| ld r30, SAVE_GPR_+8*8(sp)
157| ld r23, SAVE_GPR_+7*8(sp)
158| .FPU ldc1 f31, SAVE_FPR_+7*8(sp)
159| ld r22, SAVE_GPR_+6*8(sp)
160| .FPU ldc1 f30, SAVE_FPR_+6*8(sp)
161| ld r21, SAVE_GPR_+5*8(sp)
162| .FPU ldc1 f29, SAVE_FPR_+5*8(sp)
163| ld r20, SAVE_GPR_+4*8(sp)
164| .FPU ldc1 f28, SAVE_FPR_+4*8(sp)
165| ld r19, SAVE_GPR_+3*8(sp)
166| .FPU ldc1 f27, SAVE_FPR_+3*8(sp)
167| ld r18, SAVE_GPR_+2*8(sp)
168| .FPU ldc1 f26, SAVE_FPR_+2*8(sp)
169| ld r17, SAVE_GPR_+1*8(sp)
170| .FPU ldc1 f25, SAVE_FPR_+1*8(sp)
171| ld r16, SAVE_GPR_+0*8(sp)
172| .FPU ldc1 f24, SAVE_FPR_+0*8(sp)
173| jr ra
174| daddiu sp, sp, CFRAME_SPACE
175|.endmacro
176|
177|// Type definitions. Some of these are only used for documentation.
178|.type L, lua_State, LREG
179|.type GL, global_State
180|.type TVALUE, TValue
181|.type GCOBJ, GCobj
182|.type STR, GCstr
183|.type TAB, GCtab
184|.type LFUNC, GCfuncL
185|.type CFUNC, GCfuncC
186|.type PROTO, GCproto
187|.type UPVAL, GCupval
188|.type NODE, Node
189|.type NARGS8, int
190|.type TRACE, GCtrace
191|.type SBUF, SBuf
192|
193|//-----------------------------------------------------------------------
194|
195|// Trap for not-yet-implemented parts.
196|.macro NYI; .long 0xec1cf0f0; .endmacro
197|
198|// Macros to mark delay slots.
199|.macro ., a; a; .endmacro
200|.macro ., a,b; a,b; .endmacro
201|.macro ., a,b,c; a,b,c; .endmacro
202|.macro ., a,b,c,d; a,b,c,d; .endmacro
203|
204|.define FRAME_PC, -8
205|.define FRAME_FUNC, -16
206|
207|//-----------------------------------------------------------------------
208|
209|// Endian-specific defines.
210|.if ENDIAN_LE
211|.define HI, 4
212|.define LO, 0
213|.define OFS_RD, 2
214|.define OFS_RA, 1
215|.define OFS_OP, 0
216|.else
217|.define HI, 0
218|.define LO, 4
219|.define OFS_RD, 0
220|.define OFS_RA, 2
221|.define OFS_OP, 3
222|.endif
223|
224|// Instruction decode.
225|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro
226|.macro decode_OP8a, dst, ins; andi dst, ins, 0xff; .endmacro
227|.macro decode_OP8b, dst; sll dst, dst, 3; .endmacro
228|.macro decode_RC8a, dst, ins; srl dst, ins, 13; .endmacro
229|.macro decode_RC8b, dst; andi dst, dst, 0x7f8; .endmacro
230|.macro decode_RD4b, dst; sll dst, dst, 2; .endmacro
231|.macro decode_RA8a, dst, ins; srl dst, ins, 5; .endmacro
232|.macro decode_RA8b, dst; andi dst, dst, 0x7f8; .endmacro
233|.macro decode_RB8a, dst, ins; srl dst, ins, 21; .endmacro
234|.macro decode_RB8b, dst; andi dst, dst, 0x7f8; .endmacro
235|.macro decode_RD8a, dst, ins; srl dst, ins, 16; .endmacro
236|.macro decode_RD8b, dst; sll dst, dst, 3; .endmacro
237|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro
238|
239|// Instruction fetch.
240|.macro ins_NEXT1
241| lw INS, 0(PC)
242| daddiu PC, PC, 4
243|.endmacro
244|// Instruction decode+dispatch.
245|.macro ins_NEXT2
246| decode_OP8a TMP1, INS
247| decode_OP8b TMP1
248| daddu TMP0, DISPATCH, TMP1
249| decode_RD8a RD, INS
250| ld AT, 0(TMP0)
251| decode_RA8a RA, INS
252| decode_RD8b RD
253| jr AT
254| decode_RA8b RA
255|.endmacro
256|.macro ins_NEXT
257| ins_NEXT1
258| ins_NEXT2
259|.endmacro
260|
261|// Instruction footer.
262|.if 1
263| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
264| .define ins_next, ins_NEXT
265| .define ins_next_, ins_NEXT
266| .define ins_next1, ins_NEXT1
267| .define ins_next2, ins_NEXT2
268|.else
269| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
270| // Affects only certain kinds of benchmarks (and only with -j off).
271| .macro ins_next
272| b ->ins_next
273| .endmacro
274| .macro ins_next1
275| .endmacro
276| .macro ins_next2
277| b ->ins_next
278| .endmacro
279| .macro ins_next_
280| ->ins_next:
281| ins_NEXT
282| .endmacro
283|.endif
284|
285|// Call decode and dispatch.
286|.macro ins_callt
287| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
288| ld PC, LFUNC:RB->pc
289| lw INS, 0(PC)
290| daddiu PC, PC, 4
291| decode_OP8a TMP1, INS
292| decode_RA8a RA, INS
293| decode_OP8b TMP1
294| decode_RA8b RA
295| daddu TMP0, DISPATCH, TMP1
296| ld TMP0, 0(TMP0)
297| jr TMP0
298| daddu RA, RA, BASE
299|.endmacro
300|
301|.macro ins_call
302| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
303| sd PC, FRAME_PC(BASE)
304| ins_callt
305|.endmacro
306|
307|//-----------------------------------------------------------------------
308|
309|.macro branch_RD
310| srl TMP0, RD, 1
311| lui AT, (-(BCBIAS_J*4 >> 16) & 65535)
312| addu TMP0, TMP0, AT
313| daddu PC, PC, TMP0
314|.endmacro
315|
316|// Assumes DISPATCH is relative to GL.
317#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
318#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
319#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch))
320#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name)
321|
322#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
323|
324|.macro load_got, func
325| ld CFUNCADDR, DISPATCH_GOT(func)(DISPATCH)
326|.endmacro
327|// Much faster. Sadly, there's no easy way to force the required code layout.
328|// .macro call_intern, func; bal extern func; .endmacro
329|.macro call_intern, func; jalr CFUNCADDR; .endmacro
330|.macro call_extern; jalr CFUNCADDR; .endmacro
331|.macro jmp_extern; jr CFUNCADDR; .endmacro
332|
333|.macro hotcheck, delta, target
334| dsrl TMP1, PC, 1
335| andi TMP1, TMP1, 126
336| daddu TMP1, TMP1, DISPATCH
337| lhu TMP2, GG_DISP2HOT(TMP1)
338| addiu TMP2, TMP2, -delta
339| bltz TMP2, target
340|. sh TMP2, GG_DISP2HOT(TMP1)
341|.endmacro
342|
343|.macro hotloop
344| hotcheck HOTCOUNT_LOOP, ->vm_hotloop
345|.endmacro
346|
347|.macro hotcall
348| hotcheck HOTCOUNT_CALL, ->vm_hotcall
349|.endmacro
350|
351|// Set current VM state. Uses TMP0.
352|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro
353|.macro st_vmstate; sw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro
354|
355|// Move table write barrier back. Overwrites mark and tmp.
356|.macro barrierback, tab, mark, tmp, target
357| ld tmp, DISPATCH_GL(gc.grayagain)(DISPATCH)
358| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab)
359| sd tab, DISPATCH_GL(gc.grayagain)(DISPATCH)
360| sb mark, tab->marked
361| b target
362|. sd tmp, tab->gclist
363|.endmacro
364|
365|// Clear type tag. Isolate lowest 14+32+1=47 bits of reg.
366|.macro cleartp, reg; dextm reg, reg, 0, 14; .endmacro
367|.macro cleartp, dst, reg; dextm dst, reg, 0, 14; .endmacro
368|
369|// Set type tag: Merge 17 type bits into bits [15+32=47, 31+32+1=64) of dst.
370|.macro settp, dst, tp; dinsu dst, tp, 15, 31; .endmacro
371|
372|// Extract (negative) type tag.
373|.macro gettp, dst, src; dsra dst, src, 47; .endmacro
374|
375|// Macros to check the TValue type and extract the GCobj. Branch on failure.
376|.macro checktp, reg, tp, target
377| gettp AT, reg
378| daddiu AT, AT, tp
379| bnez AT, target
380|. cleartp reg
381|.endmacro
382|.macro checktp, dst, reg, tp, target
383| gettp AT, reg
384| daddiu AT, AT, tp
385| bnez AT, target
386|. cleartp dst, reg
387|.endmacro
388|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro
389|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro
390|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro
391|.macro checkint, reg, target // Caveat: has delay slot!
392| gettp AT, reg
393| bne AT, TISNUM, target
394|.endmacro
395|.macro checknum, reg, target // Caveat: has delay slot!
396| gettp AT, reg
397| sltiu AT, AT, LJ_TISNUM
398| beqz AT, target
399|.endmacro
400|
401|.macro mov_false, reg
402| lu reg, 0x8000
403| dsll reg, reg, 32
404| not reg, reg
405|.endmacro
406|.macro mov_true, reg
407| li reg, 0x0001
408| dsll reg, reg, 48
409| not reg, reg
410|.endmacro
411|
412|//-----------------------------------------------------------------------
413
414/* Generate subroutines used by opcodes and other parts of the VM. */
415/* The .code_sub section should be last to help static branch prediction. */
416static void build_subroutines(BuildCtx *ctx)
417{
418 |.code_sub
419 |
420 |//-----------------------------------------------------------------------
421 |//-- Return handling ----------------------------------------------------
422 |//-----------------------------------------------------------------------
423 |
424 |->vm_returnp:
425 | // See vm_return. Also: TMP2 = previous base.
426 | andi AT, PC, FRAME_P
427 | beqz AT, ->cont_dispatch
428 |
429 | // Return from pcall or xpcall fast func.
430 |. mov_true TMP1
431 | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame.
432 | move BASE, TMP2 // Restore caller base.
433 | // Prepending may overwrite the pcall frame, so do it at the end.
434 | sd TMP1, -8(RA) // Prepend true to results.
435 | daddiu RA, RA, -8
436 |
437 |->vm_returnc:
438 | addiu RD, RD, 8 // RD = (nresults+1)*8.
439 | andi TMP0, PC, FRAME_TYPE
440 | beqz RD, ->vm_unwind_c_eh
441 |. li CRET1, LUA_YIELD
442 | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua.
443 |. move MULTRES, RD
444 |
445 |->vm_return:
446 | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return
447 | // TMP0 = PC & FRAME_TYPE
448 | li TMP2, -8
449 | xori AT, TMP0, FRAME_C
450 | and TMP2, PC, TMP2
451 | bnez AT, ->vm_returnp
452 | dsubu TMP2, BASE, TMP2 // TMP2 = previous base.
453 |
454 | addiu TMP1, RD, -8
455 | sd TMP2, L->base
456 | li_vmstate C
457 | lw TMP2, SAVE_NRES
458 | daddiu BASE, BASE, -16
459 | st_vmstate
460 | beqz TMP1, >2
461 |. sll TMP2, TMP2, 3
462 |1:
463 | addiu TMP1, TMP1, -8
464 | ld CRET1, 0(RA)
465 | daddiu RA, RA, 8
466 | sd CRET1, 0(BASE)
467 | bnez TMP1, <1
468 |. daddiu BASE, BASE, 8
469 |
470 |2:
471 | bne TMP2, RD, >6
472 |3:
473 |. sd BASE, L->top // Store new top.
474 |
475 |->vm_leave_cp:
476 | ld TMP0, SAVE_CFRAME // Restore previous C frame.
477 | move CRET1, r0 // Ok return status for vm_pcall.
478 | sd TMP0, L->cframe
479 |
480 |->vm_leave_unw:
481 | restoreregs_ret
482 |
483 |6:
484 | ld TMP1, L->maxstack
485 | slt AT, TMP2, RD
486 | bnez AT, >7 // Less results wanted?
487 | // More results wanted. Check stack size and fill up results with nil.
488 |. slt AT, BASE, TMP1
489 | beqz AT, >8
490 |. nop
491 | sd TISNIL, 0(BASE)
492 | addiu RD, RD, 8
493 | b <2
494 |. daddiu BASE, BASE, 8
495 |
496 |7: // Less results wanted.
497 | subu TMP0, RD, TMP2
498 | dsubu TMP0, BASE, TMP0 // Either keep top or shrink it.
499 |.if MIPSR6
500 | selnez TMP0, TMP0, TMP2 // LUA_MULTRET+1 case?
501 | seleqz BASE, BASE, TMP2
502 | b <3
503 |. or BASE, BASE, TMP0
504 |.else
505 | b <3
506 |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case?
507 |.endif
508 |
509 |8: // Corner case: need to grow stack for filling up results.
510 | // This can happen if:
511 | // - A C function grows the stack (a lot).
512 | // - The GC shrinks the stack in between.
513 | // - A return back from a lua_call() with (high) nresults adjustment.
514 | load_got lj_state_growstack
515 | move MULTRES, RD
516 | srl CARG2, TMP2, 3
517 | call_intern lj_state_growstack // (lua_State *L, int n)
518 |. move CARG1, L
519 | lw TMP2, SAVE_NRES
520 | ld BASE, L->top // Need the (realloced) L->top in BASE.
521 | move RD, MULTRES
522 | b <2
523 |. sll TMP2, TMP2, 3
524 |
525 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
526 | // (void *cframe, int errcode)
527 | move sp, CARG1
528 | move CRET1, CARG2
529 |->vm_unwind_c_eh: // Landing pad for external unwinder.
530 | ld L, SAVE_L
531 | li TMP0, ~LJ_VMST_C
532 | ld GL:TMP1, L->glref
533 | b ->vm_leave_unw
534 |. sw TMP0, GL:TMP1->vmstate
535 |
536 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
537 | // (void *cframe)
538 | li AT, -4
539 | and sp, CARG1, AT
540 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
541 | ld L, SAVE_L
542 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
543 | li TISNIL, LJ_TNIL
544 | li TISNUM, LJ_TISNUM
545 | ld BASE, L->base
546 | ld DISPATCH, L->glref // Setup pointer to dispatch table.
547 | .FPU mtc1 TMP3, TOBIT
548 | mov_false TMP1
549 | li_vmstate INTERP
550 | ld PC, FRAME_PC(BASE) // Fetch PC of previous frame.
551 | .FPU cvt.d.s TOBIT, TOBIT
552 | daddiu RA, BASE, -8 // Results start at BASE-8.
553 | daddiu DISPATCH, DISPATCH, GG_G2DISP
554 | sd TMP1, 0(RA) // Prepend false to error message.
555 | st_vmstate
556 | b ->vm_returnc
557 |. li RD, 16 // 2 results: false + error message.
558 |
559 |->vm_unwind_stub: // Jump to exit stub from unwinder.
560 | jr CARG1
561 |. move ra, CARG2
562 |
563 |//-----------------------------------------------------------------------
564 |//-- Grow stack for calls -----------------------------------------------
565 |//-----------------------------------------------------------------------
566 |
567 |->vm_growstack_c: // Grow stack for C function.
568 | b >2
569 |. li CARG2, LUA_MINSTACK
570 |
571 |->vm_growstack_l: // Grow stack for Lua function.
572 | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
573 | daddu RC, BASE, RC
574 | dsubu RA, RA, BASE
575 | sd BASE, L->base
576 | daddiu PC, PC, 4 // Must point after first instruction.
577 | sd RC, L->top
578 | srl CARG2, RA, 3
579 |2:
580 | // L->base = new base, L->top = top
581 | load_got lj_state_growstack
582 | sd PC, SAVE_PC
583 | call_intern lj_state_growstack // (lua_State *L, int n)
584 |. move CARG1, L
585 | ld BASE, L->base
586 | ld RC, L->top
587 | ld LFUNC:RB, FRAME_FUNC(BASE)
588 | dsubu RC, RC, BASE
589 | cleartp LFUNC:RB
590 | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
591 | ins_callt // Just retry the call.
592 |
593 |//-----------------------------------------------------------------------
594 |//-- Entry points into the assembler VM ---------------------------------
595 |//-----------------------------------------------------------------------
596 |
597 |->vm_resume: // Setup C frame and resume thread.
598 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
599 | saveregs
600 | move L, CARG1
601 | ld DISPATCH, L->glref // Setup pointer to dispatch table.
602 | move BASE, CARG2
603 | lbu TMP1, L->status
604 | sd L, SAVE_L
605 | li PC, FRAME_CP
606 | daddiu TMP0, sp, CFRAME_RESUME
607 | daddiu DISPATCH, DISPATCH, GG_G2DISP
608 | sw r0, SAVE_NRES
609 | sw r0, SAVE_ERRF
610 | sd CARG1, SAVE_PC // Any value outside of bytecode is ok.
611 | sd r0, SAVE_CFRAME
612 | beqz TMP1, >3
613 |. sd TMP0, L->cframe
614 |
615 | // Resume after yield (like a return).
616 | sd L, DISPATCH_GL(cur_L)(DISPATCH)
617 | move RA, BASE
618 | ld BASE, L->base
619 | ld TMP1, L->top
620 | ld PC, FRAME_PC(BASE)
621 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
622 | dsubu RD, TMP1, BASE
623 | .FPU mtc1 TMP3, TOBIT
624 | sb r0, L->status
625 | .FPU cvt.d.s TOBIT, TOBIT
626 | li_vmstate INTERP
627 | daddiu RD, RD, 8
628 | st_vmstate
629 | move MULTRES, RD
630 | andi TMP0, PC, FRAME_TYPE
631 | li TISNIL, LJ_TNIL
632 | beqz TMP0, ->BC_RET_Z
633 |. li TISNUM, LJ_TISNUM
634 | b ->vm_return
635 |. nop
636 |
637 |->vm_pcall: // Setup protected C frame and enter VM.
638 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
639 | saveregs
640 | sw CARG4, SAVE_ERRF
641 | b >1
642 |. li PC, FRAME_CP
643 |
644 |->vm_call: // Setup C frame and enter VM.
645 | // (lua_State *L, TValue *base, int nres1)
646 | saveregs
647 | li PC, FRAME_C
648 |
649 |1: // Entry point for vm_pcall above (PC = ftype).
650 | ld TMP1, L:CARG1->cframe
651 | move L, CARG1
652 | sw CARG3, SAVE_NRES
653 | ld DISPATCH, L->glref // Setup pointer to dispatch table.
654 | sd CARG1, SAVE_L
655 | move BASE, CARG2
656 | daddiu DISPATCH, DISPATCH, GG_G2DISP
657 | sd CARG1, SAVE_PC // Any value outside of bytecode is ok.
658 | sd TMP1, SAVE_CFRAME
659 | sd sp, L->cframe // Add our C frame to cframe chain.
660 |
661 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
662 | sd L, DISPATCH_GL(cur_L)(DISPATCH)
663 | ld TMP2, L->base // TMP2 = old base (used in vmeta_call).
664 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
665 | ld TMP1, L->top
666 | .FPU mtc1 TMP3, TOBIT
667 | daddu PC, PC, BASE
668 | dsubu NARGS8:RC, TMP1, BASE
669 | li TISNUM, LJ_TISNUM
670 | dsubu PC, PC, TMP2 // PC = frame delta + frame type
671 | .FPU cvt.d.s TOBIT, TOBIT
672 | li_vmstate INTERP
673 | li TISNIL, LJ_TNIL
674 | st_vmstate
675 |
676 |->vm_call_dispatch:
677 | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
678 | ld LFUNC:RB, FRAME_FUNC(BASE)
679 | checkfunc LFUNC:RB, ->vmeta_call
680 |
681 |->vm_call_dispatch_f:
682 | ins_call
683 | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC
684 |
685 |->vm_cpcall: // Setup protected C frame, call C.
686 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
687 | saveregs
688 | move L, CARG1
689 | ld TMP0, L:CARG1->stack
690 | sd CARG1, SAVE_L
691 | ld TMP1, L->top
692 | ld DISPATCH, L->glref // Setup pointer to dispatch table.
693 | sd CARG1, SAVE_PC // Any value outside of bytecode is ok.
694 | dsubu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
695 | ld TMP1, L->cframe
696 | daddiu DISPATCH, DISPATCH, GG_G2DISP
697 | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
698 | sw r0, SAVE_ERRF // No error function.
699 | sd TMP1, SAVE_CFRAME
700 | sd sp, L->cframe // Add our C frame to cframe chain.
701 | sd L, DISPATCH_GL(cur_L)(DISPATCH)
702 | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
703 |. move CFUNCADDR, CARG4
704 | move BASE, CRET1
705 | bnez CRET1, <3 // Else continue with the call.
706 |. li PC, FRAME_CP
707 | b ->vm_leave_cp // No base? Just remove C frame.
708 |. nop
709 |
710 |//-----------------------------------------------------------------------
711 |//-- Metamethod handling ------------------------------------------------
712 |//-----------------------------------------------------------------------
713 |
714 |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the
715 |// stack, so BASE doesn't need to be reloaded across these calls.
716 |
717 |//-- Continuation dispatch ----------------------------------------------
718 |
719 |->cont_dispatch:
720 | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
721 | ld TMP0, -32(BASE) // Continuation.
722 | move RB, BASE
723 | move BASE, TMP2 // Restore caller BASE.
724 | ld LFUNC:TMP1, FRAME_FUNC(TMP2)
725 |.if FFI
726 | sltiu AT, TMP0, 2
727 |.endif
728 | ld PC, -24(RB) // Restore PC from [cont|PC].
729 | cleartp LFUNC:TMP1
730 | daddu TMP2, RA, RD
731 |.if FFI
732 | bnez AT, >1
733 |.endif
734 |. sd TISNIL, -8(TMP2) // Ensure one valid arg.
735 | ld TMP1, LFUNC:TMP1->pc
736 | // BASE = base, RA = resultptr, RB = meta base
737 | jr TMP0 // Jump to continuation.
738 |. ld KBASE, PC2PROTO(k)(TMP1)
739 |
740 |.if FFI
741 |1:
742 | bnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback.
743 | // cont = 0: tailcall from C function.
744 |. daddiu TMP1, RB, -32
745 | b ->vm_call_tail
746 |. dsubu RC, TMP1, BASE
747 |.endif
748 |
749 |->cont_cat: // RA = resultptr, RB = meta base
750 | lw INS, -4(PC)
751 | daddiu CARG2, RB, -32
752 | ld CRET1, 0(RA)
753 | decode_RB8a MULTRES, INS
754 | decode_RA8a RA, INS
755 | decode_RB8b MULTRES
756 | decode_RA8b RA
757 | daddu TMP1, BASE, MULTRES
758 | sd BASE, L->base
759 | dsubu CARG3, CARG2, TMP1
760 | bne TMP1, CARG2, ->BC_CAT_Z
761 |. sd CRET1, 0(CARG2)
762 | daddu RA, BASE, RA
763 | b ->cont_nop
764 |. sd CRET1, 0(RA)
765 |
766 |//-- Table indexing metamethods -----------------------------------------
767 |
768 |->vmeta_tgets1:
769 | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
770 | li TMP0, LJ_TSTR
771 | settp STR:RC, TMP0
772 | b >1
773 |. sd STR:RC, 0(CARG3)
774 |
775 |->vmeta_tgets:
776 | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv)
777 | li TMP0, LJ_TTAB
778 | li TMP1, LJ_TSTR
779 | settp TAB:RB, TMP0
780 | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2)
781 | sd TAB:RB, 0(CARG2)
782 | settp STR:RC, TMP1
783 | b >1
784 |. sd STR:RC, 0(CARG3)
785 |
786 |->vmeta_tgetb: // TMP0 = index
787 | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
788 | settp TMP0, TISNUM
789 | sd TMP0, 0(CARG3)
790 |
791 |->vmeta_tgetv:
792 |1:
793 | load_got lj_meta_tget
794 | sd BASE, L->base
795 | sd PC, SAVE_PC
796 | call_intern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
797 |. move CARG1, L
798 | // Returns TValue * (finished) or NULL (metamethod).
799 | beqz CRET1, >3
800 |. daddiu TMP1, BASE, -FRAME_CONT
801 | ld CARG1, 0(CRET1)
802 | ins_next1
803 | sd CARG1, 0(RA)
804 | ins_next2
805 |
806 |3: // Call __index metamethod.
807 | // BASE = base, L->top = new base, stack = cont/func/t/k
808 | ld BASE, L->top
809 | sd PC, -24(BASE) // [cont|PC]
810 | dsubu PC, BASE, TMP1
811 | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
812 | cleartp LFUNC:RB
813 | b ->vm_call_dispatch_f
814 |. li NARGS8:RC, 16 // 2 args for func(t, k).
815 |
816 |->vmeta_tgetr:
817 | load_got lj_tab_getinth
818 | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
819 |. nop
820 | // Returns cTValue * or NULL.
821 | beqz CRET1, ->BC_TGETR_Z
822 |. move CARG2, TISNIL
823 | b ->BC_TGETR_Z
824 |. ld CARG2, 0(CRET1)
825 |
826 |//-----------------------------------------------------------------------
827 |
828 |->vmeta_tsets1:
829 | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
830 | li TMP0, LJ_TSTR
831 | settp STR:RC, TMP0
832 | b >1
833 |. sd STR:RC, 0(CARG3)
834 |
835 |->vmeta_tsets:
836 | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv)
837 | li TMP0, LJ_TTAB
838 | li TMP1, LJ_TSTR
839 | settp TAB:RB, TMP0
840 | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2)
841 | sd TAB:RB, 0(CARG2)
842 | settp STR:RC, TMP1
843 | b >1
844 |. sd STR:RC, 0(CARG3)
845 |
846 |->vmeta_tsetb: // TMP0 = index
847 | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
848 | settp TMP0, TISNUM
849 | sd TMP0, 0(CARG3)
850 |
851 |->vmeta_tsetv:
852 |1:
853 | load_got lj_meta_tset
854 | sd BASE, L->base
855 | sd PC, SAVE_PC
856 | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
857 |. move CARG1, L
858 | // Returns TValue * (finished) or NULL (metamethod).
859 | beqz CRET1, >3
860 |. ld CARG1, 0(RA)
861 | // NOBARRIER: lj_meta_tset ensures the table is not black.
862 | ins_next1
863 | sd CARG1, 0(CRET1)
864 | ins_next2
865 |
866 |3: // Call __newindex metamethod.
867 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
868 | daddiu TMP1, BASE, -FRAME_CONT
869 | ld BASE, L->top
870 | sd PC, -24(BASE) // [cont|PC]
871 | dsubu PC, BASE, TMP1
872 | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
873 | cleartp LFUNC:RB
874 | sd CARG1, 16(BASE) // Copy value to third argument.
875 | b ->vm_call_dispatch_f
876 |. li NARGS8:RC, 24 // 3 args for func(t, k, v)
877 |
878 |->vmeta_tsetr:
879 | load_got lj_tab_setinth
880 | sd BASE, L->base
881 | sd PC, SAVE_PC
882 | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
883 |. move CARG1, L
884 | // Returns TValue *.
885 | b ->BC_TSETR_Z
886 |. nop
887 |
888 |//-- Comparison metamethods ---------------------------------------------
889 |
890 |->vmeta_comp:
891 | // RA/RD point to o1/o2.
892 | move CARG2, RA
893 | move CARG3, RD
894 | load_got lj_meta_comp
895 | daddiu PC, PC, -4
896 | sd BASE, L->base
897 | sd PC, SAVE_PC
898 | decode_OP1 CARG4, INS
899 | call_intern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
900 |. move CARG1, L
901 | // Returns 0/1 or TValue * (metamethod).
902 |3:
903 | sltiu AT, CRET1, 2
904 | beqz AT, ->vmeta_binop
905 | negu TMP2, CRET1
906 |4:
907 | lhu RD, OFS_RD(PC)
908 | daddiu PC, PC, 4
909 | lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535)
910 | sll RD, RD, 2
911 | addu RD, RD, TMP1
912 | and RD, RD, TMP2
913 | daddu PC, PC, RD
914 |->cont_nop:
915 | ins_next
916 |
917 |->cont_ra: // RA = resultptr
918 | lbu TMP1, -4+OFS_RA(PC)
919 | ld CRET1, 0(RA)
920 | sll TMP1, TMP1, 3
921 | daddu TMP1, BASE, TMP1
922 | b ->cont_nop
923 |. sd CRET1, 0(TMP1)
924 |
925 |->cont_condt: // RA = resultptr
926 | ld TMP0, 0(RA)
927 | gettp TMP0, TMP0
928 | sltiu AT, TMP0, LJ_TISTRUECOND
929 | b <4
930 |. negu TMP2, AT // Branch if result is true.
931 |
932 |->cont_condf: // RA = resultptr
933 | ld TMP0, 0(RA)
934 | gettp TMP0, TMP0
935 | sltiu AT, TMP0, LJ_TISTRUECOND
936 | b <4
937 |. addiu TMP2, AT, -1 // Branch if result is false.
938 |
939 |->vmeta_equal:
940 | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1.
941 | load_got lj_meta_equal
942 | cleartp LFUNC:CARG3, CARG2
943 | cleartp LFUNC:CARG2, CARG1
944 | move CARG4, TMP0
945 | daddiu PC, PC, -4
946 | sd BASE, L->base
947 | sd PC, SAVE_PC
948 | call_intern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
949 |. move CARG1, L
950 | // Returns 0/1 or TValue * (metamethod).
951 | b <3
952 |. nop
953 |
954 |->vmeta_equal_cd:
955 |.if FFI
956 | load_got lj_meta_equal_cd
957 | move CARG2, INS
958 | daddiu PC, PC, -4
959 | sd BASE, L->base
960 | sd PC, SAVE_PC
961 | call_intern lj_meta_equal_cd // (lua_State *L, BCIns op)
962 |. move CARG1, L
963 | // Returns 0/1 or TValue * (metamethod).
964 | b <3
965 |. nop
966 |.endif
967 |
968 |->vmeta_istype:
969 | load_got lj_meta_istype
970 | daddiu PC, PC, -4
971 | sd BASE, L->base
972 | srl CARG2, RA, 3
973 | srl CARG3, RD, 3
974 | sd PC, SAVE_PC
975 | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
976 |. move CARG1, L
977 | b ->cont_nop
978 |. nop
979 |
980 |//-- Arithmetic metamethods ---------------------------------------------
981 |
982 |->vmeta_unm:
983 | move RC, RB
984 |
985 |->vmeta_arith:
986 | load_got lj_meta_arith
987 | sd BASE, L->base
988 | move CARG2, RA
989 | sd PC, SAVE_PC
990 | move CARG3, RB
991 | move CARG4, RC
992 | decode_OP1 CARG5, INS // CARG5 == RB.
993 | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
994 |. move CARG1, L
995 | // Returns NULL (finished) or TValue * (metamethod).
996 | beqz CRET1, ->cont_nop
997 |. nop
998 |
999 | // Call metamethod for binary op.
1000 |->vmeta_binop:
1001 | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
1002 | dsubu TMP1, CRET1, BASE
1003 | sd PC, -24(CRET1) // [cont|PC]
1004 | move TMP2, BASE
1005 | daddiu PC, TMP1, FRAME_CONT
1006 | move BASE, CRET1
1007 | b ->vm_call_dispatch
1008 |. li NARGS8:RC, 16 // 2 args for func(o1, o2).
1009 |
1010 |->vmeta_len:
1011 | // CARG2 already set by BC_LEN.
1012#if LJ_52
1013 | move MULTRES, CARG1
1014#endif
1015 | load_got lj_meta_len
1016 | sd BASE, L->base
1017 | sd PC, SAVE_PC
1018 | call_intern lj_meta_len // (lua_State *L, TValue *o)
1019 |. move CARG1, L
1020 | // Returns NULL (retry) or TValue * (metamethod base).
1021#if LJ_52
1022 | bnez CRET1, ->vmeta_binop // Binop call for compatibility.
1023 |. nop
1024 | b ->BC_LEN_Z
1025 |. move CARG1, MULTRES
1026#else
1027 | b ->vmeta_binop // Binop call for compatibility.
1028 |. nop
1029#endif
1030 |
1031 |//-- Call metamethod ----------------------------------------------------
1032 |
1033 |->vmeta_call: // Resolve and call __call metamethod.
1034 | // TMP2 = old base, BASE = new base, RC = nargs*8
1035 | load_got lj_meta_call
1036 | sd TMP2, L->base // This is the callers base!
1037 | daddiu CARG2, BASE, -16
1038 | sd PC, SAVE_PC
1039 | daddu CARG3, BASE, RC
1040 | move MULTRES, NARGS8:RC
1041 | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
1042 |. move CARG1, L
1043 | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
1044 | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now.
1045 | cleartp LFUNC:RB
1046 | ins_call
1047 |
1048 |->vmeta_callt: // Resolve __call for BC_CALLT.
1049 | // BASE = old base, RA = new base, RC = nargs*8
1050 | load_got lj_meta_call
1051 | sd BASE, L->base
1052 | daddiu CARG2, RA, -16
1053 | sd PC, SAVE_PC
1054 | daddu CARG3, RA, RC
1055 | move MULTRES, NARGS8:RC
1056 | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
1057 |. move CARG1, L
1058 | ld RB, FRAME_FUNC(RA) // Guaranteed to be a function here.
1059 | ld TMP1, FRAME_PC(BASE)
1060 | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now.
1061 | b ->BC_CALLT_Z
1062 |. cleartp LFUNC:CARG3, RB
1063 |
1064 |//-- Argument coercion for 'for' statement ------------------------------
1065 |
1066 |->vmeta_for:
1067 | load_got lj_meta_for
1068 | sd BASE, L->base
1069 | move CARG2, RA
1070 | sd PC, SAVE_PC
1071 | move MULTRES, INS
1072 | call_intern lj_meta_for // (lua_State *L, TValue *base)
1073 |. move CARG1, L
1074 |.if JIT
1075 | decode_OP1 TMP0, MULTRES
1076 | li AT, BC_JFORI
1077 |.endif
1078 | decode_RA8a RA, MULTRES
1079 | decode_RD8a RD, MULTRES
1080 | decode_RA8b RA
1081 |.if JIT
1082 | beq TMP0, AT, =>BC_JFORI
1083 |. decode_RD8b RD
1084 | b =>BC_FORI
1085 |. nop
1086 |.else
1087 | b =>BC_FORI
1088 |. decode_RD8b RD
1089 |.endif
1090 |
1091 |//-----------------------------------------------------------------------
1092 |//-- Fast functions -----------------------------------------------------
1093 |//-----------------------------------------------------------------------
1094 |
1095 |.macro .ffunc, name
1096 |->ff_ .. name:
1097 |.endmacro
1098 |
1099 |.macro .ffunc_1, name
1100 |->ff_ .. name:
1101 | beqz NARGS8:RC, ->fff_fallback
1102 |. ld CARG1, 0(BASE)
1103 |.endmacro
1104 |
1105 |.macro .ffunc_2, name
1106 |->ff_ .. name:
1107 | sltiu AT, NARGS8:RC, 16
1108 | ld CARG1, 0(BASE)
1109 | bnez AT, ->fff_fallback
1110 |. ld CARG2, 8(BASE)
1111 |.endmacro
1112 |
1113 |.macro .ffunc_n, name // Caveat: has delay slot!
1114 |->ff_ .. name:
1115 | ld CARG1, 0(BASE)
1116 | beqz NARGS8:RC, ->fff_fallback
1117 | // Either ldc1 or the 1st instruction of checknum is in the delay slot.
1118 | .FPU ldc1 FARG1, 0(BASE)
1119 | checknum CARG1, ->fff_fallback
1120 |.endmacro
1121 |
1122 |.macro .ffunc_nn, name // Caveat: has delay slot!
1123 |->ff_ .. name:
1124 | ld CARG1, 0(BASE)
1125 | sltiu AT, NARGS8:RC, 16
1126 | ld CARG2, 8(BASE)
1127 | bnez AT, ->fff_fallback
1128 |. gettp TMP0, CARG1
1129 | gettp TMP1, CARG2
1130 | sltiu TMP0, TMP0, LJ_TISNUM
1131 | sltiu TMP1, TMP1, LJ_TISNUM
1132 | .FPU ldc1 FARG1, 0(BASE)
1133 | and TMP0, TMP0, TMP1
1134 | .FPU ldc1 FARG2, 8(BASE)
1135 | beqz TMP0, ->fff_fallback
1136 |.endmacro
1137 |
1138 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot!
1139 |// MIPSR6: no delay slot, but a forbidden slot.
1140 |.macro ffgccheck
1141 | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH)
1142 | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
1143 | dsubu AT, TMP0, TMP1
1144 |.if MIPSR6
1145 | bgezalc AT, ->fff_gcstep
1146 |.else
1147 | bgezal AT, ->fff_gcstep
1148 |.endif
1149 |.endmacro
1150 |
1151 |//-- Base library: checks -----------------------------------------------
1152 |.ffunc_1 assert
1153 | gettp AT, CARG1
1154 | sltiu AT, AT, LJ_TISTRUECOND
1155 | beqz AT, ->fff_fallback
1156 |. daddiu RA, BASE, -16
1157 | ld PC, FRAME_PC(BASE)
1158 | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
1159 | daddu TMP2, RA, RD
1160 | daddiu TMP1, BASE, 8
1161 | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument.
1162 |. sd CARG1, 0(RA)
1163 |1:
1164 | ld CRET1, 0(TMP1)
1165 | sd CRET1, -16(TMP1)
1166 | bne TMP1, TMP2, <1
1167 |. daddiu TMP1, TMP1, 8
1168 | b ->fff_res
1169 |. nop
1170 |
1171 |.ffunc_1 type
1172 | gettp TMP0, CARG1
1173 | sltu TMP1, TISNUM, TMP0
1174 | not TMP2, TMP0
1175 | li TMP3, ~LJ_TISNUM
1176 |.if MIPSR6
1177 | selnez TMP2, TMP2, TMP1
1178 | seleqz TMP3, TMP3, TMP1
1179 | or TMP2, TMP2, TMP3
1180 |.else
1181 | movz TMP2, TMP3, TMP1
1182 |.endif
1183 | dsll TMP2, TMP2, 3
1184 | daddu TMP2, CFUNC:RB, TMP2
1185 | b ->fff_restv
1186 |. ld CARG1, CFUNC:TMP2->upvalue
1187 |
1188 |//-- Base library: getters and setters ---------------------------------
1189 |
1190 |.ffunc_1 getmetatable
1191 | gettp TMP2, CARG1
1192 | daddiu TMP0, TMP2, -LJ_TTAB
1193 | daddiu TMP1, TMP2, -LJ_TUDATA
1194 |.if MIPSR6
1195 | selnez TMP0, TMP1, TMP0
1196 |.else
1197 | movn TMP0, TMP1, TMP0
1198 |.endif
1199 | bnez TMP0, >6
1200 |. cleartp TAB:CARG1
1201 |1: // Field metatable must be at same offset for GCtab and GCudata!
1202 | ld TAB:RB, TAB:CARG1->metatable
1203 |2:
1204 | ld STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
1205 | beqz TAB:RB, ->fff_restv
1206 |. li CARG1, LJ_TNIL
1207 | lw TMP0, TAB:RB->hmask
1208 | lw TMP1, STR:RC->sid
1209 | ld NODE:TMP2, TAB:RB->node
1210 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
1211 | dsll TMP0, TMP1, 5
1212 | dsll TMP1, TMP1, 3
1213 | dsubu TMP1, TMP0, TMP1
1214 | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
1215 | li CARG4, LJ_TSTR
1216 | settp STR:RC, CARG4 // Tagged key to look for.
1217 |3: // Rearranged logic, because we expect _not_ to find the key.
1218 | ld TMP0, NODE:TMP2->key
1219 | ld CARG1, NODE:TMP2->val
1220 | ld NODE:TMP2, NODE:TMP2->next
1221 | beq RC, TMP0, >5
1222 |. li AT, LJ_TTAB
1223 | bnez NODE:TMP2, <3
1224 |. nop
1225 |4:
1226 | move CARG1, RB
1227 | b ->fff_restv // Not found, keep default result.
1228 |. settp CARG1, AT
1229 |5:
1230 | bne CARG1, TISNIL, ->fff_restv
1231 |. nop
1232 | b <4 // Ditto for nil value.
1233 |. nop
1234 |
1235 |6:
1236 | sltiu AT, TMP2, LJ_TISNUM
1237 |.if MIPSR6
1238 | selnez TMP0, TISNUM, AT
1239 | seleqz AT, TMP2, AT
1240 | or TMP2, TMP0, AT
1241 |.else
1242 | movn TMP2, TISNUM, AT
1243 |.endif
1244 | dsll TMP2, TMP2, 3
1245 | dsubu TMP0, DISPATCH, TMP2
1246 | b <2
1247 |. ld TAB:RB, DISPATCH_GL(gcroot[GCROOT_BASEMT])-8(TMP0)
1248 |
1249 |.ffunc_2 setmetatable
1250 | // Fast path: no mt for table yet and not clearing the mt.
1251 | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback
1252 | gettp TMP3, CARG2
1253 | ld TAB:TMP0, TAB:TMP1->metatable
1254 | lbu TMP2, TAB:TMP1->marked
1255 | daddiu AT, TMP3, -LJ_TTAB
1256 | cleartp TAB:CARG2
1257 | or AT, AT, TAB:TMP0
1258 | bnez AT, ->fff_fallback
1259 |. andi AT, TMP2, LJ_GC_BLACK // isblack(table)
1260 | beqz AT, ->fff_restv
1261 |. sd TAB:CARG2, TAB:TMP1->metatable
1262 | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv
1263 |
1264 |.ffunc rawget
1265 | ld CARG2, 0(BASE)
1266 | sltiu AT, NARGS8:RC, 16
1267 | load_got lj_tab_get
1268 | gettp TMP0, CARG2
1269 | cleartp CARG2
1270 | daddiu TMP0, TMP0, -LJ_TTAB
1271 | or AT, AT, TMP0
1272 | bnez AT, ->fff_fallback
1273 |. daddiu CARG3, BASE, 8
1274 | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1275 |. move CARG1, L
1276 | b ->fff_restv
1277 |. ld CARG1, 0(CRET1)
1278 |
1279 |//-- Base library: conversions ------------------------------------------
1280 |
1281 |.ffunc tonumber
1282 | // Only handles the number case inline (without a base argument).
1283 | ld CARG1, 0(BASE)
1284 | xori AT, NARGS8:RC, 8 // Exactly one number argument.
1285 | gettp TMP1, CARG1
1286 | sltu TMP0, TISNUM, TMP1
1287 | or AT, AT, TMP0
1288 | bnez AT, ->fff_fallback
1289 |. nop
1290 | b ->fff_restv
1291 |. nop
1292 |
1293 |.ffunc_1 tostring
1294 | // Only handles the string or number case inline.
1295 | gettp TMP0, CARG1
1296 | daddiu AT, TMP0, -LJ_TSTR
1297 | // A __tostring method in the string base metatable is ignored.
1298 | beqz AT, ->fff_restv // String key?
1299 | // Handle numbers inline, unless a number base metatable is present.
1300 |. ld TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
1301 | sltu TMP0, TISNUM, TMP0
1302 | or TMP0, TMP0, TMP1
1303 | bnez TMP0, ->fff_fallback
1304 |. sd BASE, L->base // Add frame since C call can throw.
1305 |.if MIPSR6
1306 | sd PC, SAVE_PC // Redundant (but a defined value).
1307 | ffgccheck
1308 |.else
1309 | ffgccheck
1310 |. sd PC, SAVE_PC // Redundant (but a defined value).
1311 |.endif
1312 | load_got lj_strfmt_number
1313 | move CARG1, L
1314 | call_intern lj_strfmt_number // (lua_State *L, cTValue *o)
1315 |. move CARG2, BASE
1316 | // Returns GCstr *.
1317 | li AT, LJ_TSTR
1318 | settp CRET1, AT
1319 | b ->fff_restv
1320 |. move CARG1, CRET1
1321 |
1322 |//-- Base library: iterators -------------------------------------------
1323 |
1324 |.ffunc_1 next
1325 | checktp CARG1, -LJ_TTAB, ->fff_fallback
1326 | daddu TMP2, BASE, NARGS8:RC
1327 | sd TISNIL, 0(TMP2) // Set missing 2nd arg to nil.
1328 | load_got lj_tab_next
1329 | ld PC, FRAME_PC(BASE)
1330 | daddiu CARG2, BASE, 8
1331 | call_intern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
1332 |. daddiu CARG3, BASE, -16
1333 | // Returns 1=found, 0=end, -1=error.
1334 | daddiu RA, BASE, -16
1335 | bgtz CRET1, ->fff_res // Found key/value.
1336 |. li RD, (2+1)*8
1337 | beqz CRET1, ->fff_restv // End of traversal: return nil.
1338 |. move CARG1, TISNIL
1339 | ld CFUNC:RB, FRAME_FUNC(BASE)
1340 | cleartp CFUNC:RB
1341 | b ->fff_fallback // Invalid key.
1342 |. li RC, 2*8
1343 |
1344 |.ffunc_1 pairs
1345 | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
1346 | ld PC, FRAME_PC(BASE)
1347#if LJ_52
1348 | ld TAB:TMP2, TAB:TMP1->metatable
1349 | ld TMP0, CFUNC:RB->upvalue[0]
1350 | bnez TAB:TMP2, ->fff_fallback
1351#else
1352 | ld TMP0, CFUNC:RB->upvalue[0]
1353#endif
1354 |. daddiu RA, BASE, -16
1355 | sd TISNIL, 0(BASE)
1356 | sd CARG1, -8(BASE)
1357 | sd TMP0, 0(RA)
1358 | b ->fff_res
1359 |. li RD, (3+1)*8
1360 |
1361 |.ffunc_2 ipairs_aux
1362 | checktab CARG1, ->fff_fallback
1363 | checkint CARG2, ->fff_fallback
1364 |. lw TMP0, TAB:CARG1->asize
1365 | ld TMP1, TAB:CARG1->array
1366 | ld PC, FRAME_PC(BASE)
1367 | sextw TMP2, CARG2
1368 | addiu TMP2, TMP2, 1
1369 | sltu AT, TMP2, TMP0
1370 | daddiu RA, BASE, -16
1371 | zextw TMP0, TMP2
1372 | settp TMP0, TISNUM
1373 | beqz AT, >2 // Not in array part?
1374 |. sd TMP0, 0(RA)
1375 | dsll TMP3, TMP2, 3
1376 | daddu TMP3, TMP1, TMP3
1377 | ld TMP1, 0(TMP3)
1378 |1:
1379 | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results.
1380 |. li RD, (0+1)*8
1381 | sd TMP1, -8(BASE)
1382 | b ->fff_res
1383 |. li RD, (2+1)*8
1384 |2: // Check for empty hash part first. Otherwise call C function.
1385 | lw TMP0, TAB:CARG1->hmask
1386 | load_got lj_tab_getinth
1387 | beqz TMP0, ->fff_res
1388 |. li RD, (0+1)*8
1389 | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
1390 |. move CARG2, TMP2
1391 | // Returns cTValue * or NULL.
1392 | beqz CRET1, ->fff_res
1393 |. li RD, (0+1)*8
1394 | b <1
1395 |. ld TMP1, 0(CRET1)
1396 |
1397 |.ffunc_1 ipairs
1398 | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
1399 | ld PC, FRAME_PC(BASE)
1400#if LJ_52
1401 | ld TAB:TMP2, TAB:TMP1->metatable
1402 | ld CFUNC:TMP0, CFUNC:RB->upvalue[0]
1403 | bnez TAB:TMP2, ->fff_fallback
1404#else
1405 | ld TMP0, CFUNC:RB->upvalue[0]
1406#endif
1407 | daddiu RA, BASE, -16
1408 | dsll AT, TISNUM, 47
1409 | sd CARG1, -8(BASE)
1410 | sd AT, 0(BASE)
1411 | sd CFUNC:TMP0, 0(RA)
1412 | b ->fff_res
1413 |. li RD, (3+1)*8
1414 |
1415 |//-- Base library: catch errors ----------------------------------------
1416 |
1417 |.ffunc pcall
1418 | ld TMP1, L->maxstack
1419 | daddu TMP2, BASE, NARGS8:RC
1420 | sltu AT, TMP1, TMP2
1421 | bnez AT, ->fff_fallback
1422 |. lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
1423 | daddiu NARGS8:RC, NARGS8:RC, -8
1424 | bltz NARGS8:RC, ->fff_fallback
1425 |. move TMP2, BASE
1426 | daddiu BASE, BASE, 16
1427 | // Remember active hook before pcall.
1428 | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
1429 | andi TMP3, TMP3, 1
1430 | daddiu PC, TMP3, 16+FRAME_PCALL
1431 | beqz NARGS8:RC, ->vm_call_dispatch
1432 |1:
1433 |. daddu TMP0, BASE, NARGS8:RC
1434 |2:
1435 | ld TMP1, -16(TMP0)
1436 | sd TMP1, -8(TMP0)
1437 | daddiu TMP0, TMP0, -8
1438 | bne TMP0, BASE, <2
1439 |. nop
1440 | b ->vm_call_dispatch
1441 |. nop
1442 |
1443 |.ffunc xpcall
1444 | ld TMP1, L->maxstack
1445 | daddu TMP2, BASE, NARGS8:RC
1446 | sltu AT, TMP1, TMP2
1447 | bnez AT, ->fff_fallback
1448 |. ld CARG1, 0(BASE)
1449 | daddiu NARGS8:TMP0, NARGS8:RC, -16
1450 | ld CARG2, 8(BASE)
1451 | bltz NARGS8:TMP0, ->fff_fallback
1452 |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
1453 | gettp AT, CARG2
1454 | daddiu AT, AT, -LJ_TFUNC
1455 | bnez AT, ->fff_fallback // Traceback must be a function.
1456 |. move TMP2, BASE
1457 | move NARGS8:RC, NARGS8:TMP0
1458 | daddiu BASE, BASE, 24
1459 | // Remember active hook before pcall.
1460 | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
1461 | sd CARG2, 0(TMP2) // Swap function and traceback.
1462 | andi TMP3, TMP3, 1
1463 | sd CARG1, 8(TMP2)
1464 | beqz NARGS8:RC, ->vm_call_dispatch
1465 |. daddiu PC, TMP3, 24+FRAME_PCALL
1466 | b <1
1467 |. nop
1468 |
1469 |//-- Coroutine library --------------------------------------------------
1470 |
1471 |.macro coroutine_resume_wrap, resume
1472 |.if resume
1473 |.ffunc_1 coroutine_resume
1474 | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback
1475 |.else
1476 |.ffunc coroutine_wrap_aux
1477 | ld L:CARG1, CFUNC:RB->upvalue[0].gcr
1478 | cleartp L:CARG1
1479 |.endif
1480 | lbu TMP0, L:CARG1->status
1481 | ld TMP1, L:CARG1->cframe
1482 | ld CARG2, L:CARG1->top
1483 | ld TMP2, L:CARG1->base
1484 | addiu AT, TMP0, -LUA_YIELD
1485 | daddu CARG3, CARG2, TMP0
1486 | daddiu TMP3, CARG2, 8
1487 |.if MIPSR6
1488 | seleqz CARG2, CARG2, AT
1489 | selnez TMP3, TMP3, AT
1490 | bgtz AT, ->fff_fallback // st > LUA_YIELD?
1491 |. or CARG2, TMP3, CARG2
1492 |.else
1493 | bgtz AT, ->fff_fallback // st > LUA_YIELD?
1494 |. movn CARG2, TMP3, AT
1495 |.endif
1496 | xor TMP2, TMP2, CARG3
1497 | bnez TMP1, ->fff_fallback // cframe != 0?
1498 |. or AT, TMP2, TMP0
1499 | ld TMP0, L:CARG1->maxstack
1500 | beqz AT, ->fff_fallback // base == top && st == 0?
1501 |. ld PC, FRAME_PC(BASE)
1502 | daddu TMP2, CARG2, NARGS8:RC
1503 | sltu AT, TMP0, TMP2
1504 | bnez AT, ->fff_fallback // Stack overflow?
1505 |. sd PC, SAVE_PC
1506 | sd BASE, L->base
1507 |1:
1508 |.if resume
1509 | daddiu BASE, BASE, 8 // Keep resumed thread in stack for GC.
1510 | daddiu NARGS8:RC, NARGS8:RC, -8
1511 | daddiu TMP2, TMP2, -8
1512 |.endif
1513 | sd TMP2, L:CARG1->top
1514 | daddu TMP1, BASE, NARGS8:RC
1515 | move CARG3, CARG2
1516 | sd BASE, L->top
1517 |2: // Move args to coroutine.
1518 | ld CRET1, 0(BASE)
1519 | sltu AT, BASE, TMP1
1520 | beqz AT, >3
1521 |. daddiu BASE, BASE, 8
1522 | sd CRET1, 0(CARG3)
1523 | b <2
1524 |. daddiu CARG3, CARG3, 8
1525 |3:
1526 | bal ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1527 |. move L:RA, L:CARG1
1528 | // Returns thread status.
1529 |4:
1530 | ld TMP2, L:RA->base
1531 | sltiu AT, CRET1, LUA_YIELD+1
1532 | ld TMP3, L:RA->top
1533 | li_vmstate INTERP
1534 | ld BASE, L->base
1535 | sd L, DISPATCH_GL(cur_L)(DISPATCH)
1536 | st_vmstate
1537 | beqz AT, >8
1538 |. dsubu RD, TMP3, TMP2
1539 | ld TMP0, L->maxstack
1540 | beqz RD, >6 // No results?
1541 |. daddu TMP1, BASE, RD
1542 | sltu AT, TMP0, TMP1
1543 | bnez AT, >9 // Need to grow stack?
1544 |. daddu TMP3, TMP2, RD
1545 | sd TMP2, L:RA->top // Clear coroutine stack.
1546 | move TMP1, BASE
1547 |5: // Move results from coroutine.
1548 | ld CRET1, 0(TMP2)
1549 | daddiu TMP2, TMP2, 8
1550 | sltu AT, TMP2, TMP3
1551 | sd CRET1, 0(TMP1)
1552 | bnez AT, <5
1553 |. daddiu TMP1, TMP1, 8
1554 |6:
1555 | andi TMP0, PC, FRAME_TYPE
1556 |.if resume
1557 | mov_true TMP1
1558 | daddiu RA, BASE, -8
1559 | sd TMP1, -8(BASE) // Prepend true to results.
1560 | daddiu RD, RD, 16
1561 |.else
1562 | move RA, BASE
1563 | daddiu RD, RD, 8
1564 |.endif
1565 |7:
1566 | sd PC, SAVE_PC
1567 | beqz TMP0, ->BC_RET_Z
1568 |. move MULTRES, RD
1569 | b ->vm_return
1570 |. nop
1571 |
1572 |8: // Coroutine returned with error (at co->top-1).
1573 |.if resume
1574 | daddiu TMP3, TMP3, -8
1575 | mov_false TMP1
1576 | ld CRET1, 0(TMP3)
1577 | sd TMP3, L:RA->top // Remove error from coroutine stack.
1578 | li RD, (2+1)*8
1579 | sd TMP1, -8(BASE) // Prepend false to results.
1580 | daddiu RA, BASE, -8
1581 | sd CRET1, 0(BASE) // Copy error message.
1582 | b <7
1583 |. andi TMP0, PC, FRAME_TYPE
1584 |.else
1585 | load_got lj_ffh_coroutine_wrap_err
1586 | move CARG2, L:RA
1587 | call_intern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1588 |. move CARG1, L
1589 |.endif
1590 |
1591 |9: // Handle stack expansion on return from yield.
1592 | load_got lj_state_growstack
1593 | srl CARG2, RD, 3
1594 | call_intern lj_state_growstack // (lua_State *L, int n)
1595 |. move CARG1, L
1596 | b <4
1597 |. li CRET1, 0
1598 |.endmacro
1599 |
1600 | coroutine_resume_wrap 1 // coroutine.resume
1601 | coroutine_resume_wrap 0 // coroutine.wrap
1602 |
1603 |.ffunc coroutine_yield
1604 | ld TMP0, L->cframe
1605 | daddu TMP1, BASE, NARGS8:RC
1606 | sd BASE, L->base
1607 | andi TMP0, TMP0, CFRAME_RESUME
1608 | sd TMP1, L->top
1609 | beqz TMP0, ->fff_fallback
1610 |. li CRET1, LUA_YIELD
1611 | sd r0, L->cframe
1612 | b ->vm_leave_unw
1613 |. sb CRET1, L->status
1614 |
1615 |//-- Math library -------------------------------------------------------
1616 |
1617 |.ffunc_1 math_abs
1618 | gettp CARG2, CARG1
1619 | daddiu AT, CARG2, -LJ_TISNUM
1620 | bnez AT, >1
1621 |. sextw TMP1, CARG1
1622 | sra TMP0, TMP1, 31 // Extract sign.
1623 | xor TMP1, TMP1, TMP0
1624 | dsubu CARG1, TMP1, TMP0
1625 | dsll TMP3, CARG1, 32
1626 | bgez TMP3, ->fff_restv
1627 |. settp CARG1, TISNUM
1628 | li CARG1, 0x41e0 // 2^31 as a double.
1629 | b ->fff_restv
1630 |. dsll CARG1, CARG1, 48
1631 |1:
1632 | sltiu AT, CARG2, LJ_TISNUM
1633 | beqz AT, ->fff_fallback
1634 |. dextm CARG1, CARG1, 0, 30
1635 |// fallthrough
1636 |
1637 |->fff_restv:
1638 | // CARG1 = TValue result.
1639 | ld PC, FRAME_PC(BASE)
1640 | daddiu RA, BASE, -16
1641 | sd CARG1, -16(BASE)
1642 |->fff_res1:
1643 | // RA = results, PC = return.
1644 | li RD, (1+1)*8
1645 |->fff_res:
1646 | // RA = results, RD = (nresults+1)*8, PC = return.
1647 | andi TMP0, PC, FRAME_TYPE
1648 | bnez TMP0, ->vm_return
1649 |. move MULTRES, RD
1650 | lw INS, -4(PC)
1651 | decode_RB8a RB, INS
1652 | decode_RB8b RB
1653 |5:
1654 | sltu AT, RD, RB
1655 | bnez AT, >6 // More results expected?
1656 |. decode_RA8a TMP0, INS
1657 | decode_RA8b TMP0
1658 | ins_next1
1659 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1660 | dsubu BASE, RA, TMP0
1661 | ins_next2
1662 |
1663 |6: // Fill up results with nil.
1664 | daddu TMP1, RA, RD
1665 | daddiu RD, RD, 8
1666 | b <5
1667 |. sd TISNIL, -8(TMP1)
1668 |
1669 |.macro math_extern, func
1670 | .ffunc_n math_ .. func
1671 | load_got func
1672 | call_extern
1673 |. nop
1674 | b ->fff_resn
1675 |. nop
1676 |.endmacro
1677 |
1678 |.macro math_extern2, func
1679 | .ffunc_nn math_ .. func
1680 |. load_got func
1681 | call_extern
1682 |. nop
1683 | b ->fff_resn
1684 |. nop
1685 |.endmacro
1686 |
1687 |// TODO: Return integer type if result is integer (own sf implementation).
1688 |.macro math_round, func
1689 |->ff_math_ .. func:
1690 | ld CARG1, 0(BASE)
1691 | beqz NARGS8:RC, ->fff_fallback
1692 |. gettp TMP0, CARG1
1693 | beq TMP0, TISNUM, ->fff_restv
1694 |. sltu AT, TMP0, TISNUM
1695 | beqz AT, ->fff_fallback
1696 |.if FPU
1697 |. ldc1 FARG1, 0(BASE)
1698 | bal ->vm_ .. func
1699 |. nop
1700 |.else
1701 |. load_got func
1702 | call_extern
1703 |. nop
1704 |.endif
1705 | b ->fff_resn
1706 |. nop
1707 |.endmacro
1708 |
1709 | math_round floor
1710 | math_round ceil
1711 |
1712 |.ffunc math_log
1713 | li AT, 8
1714 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
1715 |. ld CARG1, 0(BASE)
1716 | checknum CARG1, ->fff_fallback
1717 |. load_got log
1718 |.if FPU
1719 | call_extern
1720 |. ldc1 FARG1, 0(BASE)
1721 |.else
1722 | call_extern
1723 |. nop
1724 |.endif
1725 | b ->fff_resn
1726 |. nop
1727 |
1728 | math_extern log10
1729 | math_extern exp
1730 | math_extern sin
1731 | math_extern cos
1732 | math_extern tan
1733 | math_extern asin
1734 | math_extern acos
1735 | math_extern atan
1736 | math_extern sinh
1737 | math_extern cosh
1738 | math_extern tanh
1739 | math_extern2 pow
1740 | math_extern2 atan2
1741 | math_extern2 fmod
1742 |
1743 |.if FPU
1744 |.ffunc_n math_sqrt
1745 |. sqrt.d FRET1, FARG1
1746 |// fallthrough to ->fff_resn
1747 |.else
1748 | math_extern sqrt
1749 |.endif
1750 |
1751 |->fff_resn:
1752 | ld PC, FRAME_PC(BASE)
1753 | daddiu RA, BASE, -16
1754 | b ->fff_res1
1755 |.if FPU
1756 |. sdc1 FRET1, 0(RA)
1757 |.else
1758 |. sd CRET1, 0(RA)
1759 |.endif
1760 |
1761 |
1762 |.ffunc_2 math_ldexp
1763 | checknum CARG1, ->fff_fallback
1764 | checkint CARG2, ->fff_fallback
1765 |. load_got ldexp
1766 | .FPU ldc1 FARG1, 0(BASE)
1767 | call_extern
1768 |. lw CARG2, 8+LO(BASE)
1769 | b ->fff_resn
1770 |. nop
1771 |
1772 |.ffunc_n math_frexp
1773 | load_got frexp
1774 | ld PC, FRAME_PC(BASE)
1775 | call_extern
1776 |. daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv)
1777 | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH)
1778 | daddiu RA, BASE, -16
1779 |.if FPU
1780 | mtc1 TMP1, FARG2
1781 | sdc1 FRET1, 0(RA)
1782 | cvt.d.w FARG2, FARG2
1783 | sdc1 FARG2, 8(RA)
1784 |.else
1785 | sd CRET1, 0(RA)
1786 | zextw TMP1, TMP1
1787 | settp TMP1, TISNUM
1788 | sd TMP1, 8(RA)
1789 |.endif
1790 | b ->fff_res
1791 |. li RD, (2+1)*8
1792 |
1793 |.ffunc_n math_modf
1794 | load_got modf
1795 | ld PC, FRAME_PC(BASE)
1796 | call_extern
1797 |. daddiu CARG2, BASE, -16
1798 | daddiu RA, BASE, -16
1799 |.if FPU
1800 | sdc1 FRET1, -8(BASE)
1801 |.else
1802 | sd CRET1, -8(BASE)
1803 |.endif
1804 | b ->fff_res
1805 |. li RD, (2+1)*8
1806 |
1807 |.macro math_minmax, name, intins, intinsc, fpins
1808 | .ffunc_1 name
1809 | daddu TMP3, BASE, NARGS8:RC
1810 | checkint CARG1, >5
1811 |. daddiu TMP2, BASE, 8
1812 |1: // Handle integers.
1813 | beq TMP2, TMP3, ->fff_restv
1814 |. ld CARG2, 0(TMP2)
1815 | checkint CARG2, >3
1816 |. sextw CARG1, CARG1
1817 | lw CARG2, LO(TMP2)
1818 |. slt AT, CARG1, CARG2
1819 |.if MIPSR6
1820 | intins TMP1, CARG2, AT
1821 | intinsc CARG1, CARG1, AT
1822 | or CARG1, CARG1, TMP1
1823 |.else
1824 | intins CARG1, CARG2, AT
1825 |.endif
1826 | daddiu TMP2, TMP2, 8
1827 | zextw CARG1, CARG1
1828 | b <1
1829 |. settp CARG1, TISNUM
1830 |
1831 |3: // Convert intermediate result to number and continue with number loop.
1832 | checknum CARG2, ->fff_fallback
1833 |.if FPU
1834 |. mtc1 CARG1, FRET1
1835 | cvt.d.w FRET1, FRET1
1836 | b >7
1837 |. ldc1 FARG1, 0(TMP2)
1838 |.else
1839 |. nop
1840 | bal ->vm_sfi2d_1
1841 |. nop
1842 | b >7
1843 |. nop
1844 |.endif
1845 |
1846 |5:
1847 | .FPU ldc1 FRET1, 0(BASE)
1848 | checknum CARG1, ->fff_fallback
1849 |6: // Handle numbers.
1850 |. ld CARG2, 0(TMP2)
1851 | beq TMP2, TMP3, ->fff_resn
1852 |.if FPU
1853 | ldc1 FARG1, 0(TMP2)
1854 |.else
1855 | move CRET1, CARG1
1856 |.endif
1857 | checknum CARG2, >8
1858 |. nop
1859 |7:
1860 |.if FPU
1861 |.if MIPSR6
1862 | fpins FRET1, FRET1, FARG1
1863 |.else
1864 |.if fpins // ismax
1865 | c.olt.d FARG1, FRET1
1866 |.else
1867 | c.olt.d FRET1, FARG1
1868 |.endif
1869 | movf.d FRET1, FARG1
1870 |.endif
1871 |.else
1872 |.if fpins // ismax
1873 | bal ->vm_sfcmpogt
1874 |.else
1875 | bal ->vm_sfcmpolt
1876 |.endif
1877 |. nop
1878 |.if MIPSR6
1879 | seleqz AT, CARG2, CRET1
1880 | selnez CARG1, CARG1, CRET1
1881 | or CARG1, CARG1, AT
1882 |.else
1883 | movz CARG1, CARG2, CRET1
1884 |.endif
1885 |.endif
1886 | b <6
1887 |. daddiu TMP2, TMP2, 8
1888 |
1889 |8: // Convert integer to number and continue with number loop.
1890 | checkint CARG2, ->fff_fallback
1891 |.if FPU
1892 |. lwc1 FARG1, LO(TMP2)
1893 | b <7
1894 |. cvt.d.w FARG1, FARG1
1895 |.else
1896 |. lw CARG2, LO(TMP2)
1897 | bal ->vm_sfi2d_2
1898 |. nop
1899 | b <7
1900 |. nop
1901 |.endif
1902 |
1903 |.endmacro
1904 |
1905 |.if MIPSR6
1906 | math_minmax math_min, seleqz, selnez, min.d
1907 | math_minmax math_max, selnez, seleqz, max.d
1908 |.else
1909 | math_minmax math_min, movz, _, 0
1910 | math_minmax math_max, movn, _, 1
1911 |.endif
1912 |
1913 |//-- String library -----------------------------------------------------
1914 |
1915 |.ffunc string_byte // Only handle the 1-arg case here.
1916 | ld CARG1, 0(BASE)
1917 | gettp TMP0, CARG1
1918 | xori AT, NARGS8:RC, 8
1919 | daddiu TMP0, TMP0, -LJ_TSTR
1920 | or AT, AT, TMP0
1921 | bnez AT, ->fff_fallback // Need exactly 1 string argument.
1922 |. cleartp STR:CARG1
1923 | lw TMP0, STR:CARG1->len
1924 | daddiu RA, BASE, -16
1925 | ld PC, FRAME_PC(BASE)
1926 | sltu RD, r0, TMP0
1927 | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1928 | addiu RD, RD, 1
1929 | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8
1930 | settp TMP1, TISNUM
1931 | b ->fff_res
1932 |. sd TMP1, 0(RA)
1933 |
1934 |.ffunc string_char // Only handle the 1-arg case here.
1935 | ffgccheck
1936 |.if not MIPSR6
1937 |. nop
1938 |.endif
1939 | ld CARG1, 0(BASE)
1940 | gettp TMP0, CARG1
1941 | xori AT, NARGS8:RC, 8 // Exactly 1 argument.
1942 | daddiu TMP0, TMP0, -LJ_TISNUM // Integer.
1943 | li TMP1, 255
1944 | sextw CARG1, CARG1
1945 | or AT, AT, TMP0
1946 | sltu TMP1, TMP1, CARG1 // !(255 < n).
1947 | or AT, AT, TMP1
1948 | bnez AT, ->fff_fallback
1949 |. li CARG3, 1
1950 | daddiu CARG2, sp, TMPD_OFS
1951 | sb CARG1, TMPD
1952 |->fff_newstr:
1953 | load_got lj_str_new
1954 | sd BASE, L->base
1955 | sd PC, SAVE_PC
1956 | call_intern lj_str_new // (lua_State *L, char *str, size_t l)
1957 |. move CARG1, L
1958 | // Returns GCstr *.
1959 | ld BASE, L->base
1960 |->fff_resstr:
1961 | li AT, LJ_TSTR
1962 | settp CRET1, AT
1963 | b ->fff_restv
1964 |. move CARG1, CRET1
1965 |
1966 |.ffunc string_sub
1967 | ffgccheck
1968 |.if not MIPSR6
1969 |. nop
1970 |.endif
1971 | addiu AT, NARGS8:RC, -16
1972 | ld TMP0, 0(BASE)
1973 | bltz AT, ->fff_fallback
1974 |. gettp TMP3, TMP0
1975 | cleartp STR:CARG1, TMP0
1976 | ld CARG2, 8(BASE)
1977 | beqz AT, >1
1978 |. li CARG4, -1
1979 | ld CARG3, 16(BASE)
1980 | checkint CARG3, ->fff_fallback
1981 |. sextw CARG4, CARG3
1982 |1:
1983 | checkint CARG2, ->fff_fallback
1984 |. li AT, LJ_TSTR
1985 | bne TMP3, AT, ->fff_fallback
1986 |. sextw CARG3, CARG2
1987 | lw CARG2, STR:CARG1->len
1988 | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
1989 | slt AT, CARG4, r0
1990 | addiu TMP0, CARG2, 1
1991 | addu TMP1, CARG4, TMP0
1992 | slt TMP3, CARG3, r0
1993 |.if MIPSR6
1994 | seleqz CARG4, CARG4, AT
1995 | selnez TMP1, TMP1, AT
1996 | or CARG4, TMP1, CARG4 // if (end < 0) end += len+1
1997 |.else
1998 | movn CARG4, TMP1, AT // if (end < 0) end += len+1
1999 |.endif
2000 | addu TMP1, CARG3, TMP0
2001 |.if MIPSR6
2002 | selnez TMP1, TMP1, TMP3
2003 | seleqz CARG3, CARG3, TMP3
2004 | or CARG3, TMP1, CARG3 // if (start < 0) start += len+1
2005 | li TMP2, 1
2006 | slt AT, CARG4, r0
2007 | slt TMP3, r0, CARG3
2008 | seleqz CARG4, CARG4, AT // if (end < 0) end = 0
2009 | selnez CARG3, CARG3, TMP3
2010 | seleqz TMP2, TMP2, TMP3
2011 | or CARG3, TMP2, CARG3 // if (start < 1) start = 1
2012 | slt AT, CARG2, CARG4
2013 | seleqz CARG4, CARG4, AT
2014 | selnez CARG2, CARG2, AT
2015 | or CARG4, CARG2, CARG4 // if (end > len) end = len
2016 |.else
2017 | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1
2018 | li TMP2, 1
2019 | slt AT, CARG4, r0
2020 | slt TMP3, r0, CARG3
2021 | movn CARG4, r0, AT // if (end < 0) end = 0
2022 | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1
2023 | slt AT, CARG2, CARG4
2024 | movn CARG4, CARG2, AT // if (end > len) end = len
2025 |.endif
2026 | daddu CARG2, STR:CARG1, CARG3
2027 | subu CARG3, CARG4, CARG3 // len = end - start
2028 | daddiu CARG2, CARG2, sizeof(GCstr)-1
2029 | bgez CARG3, ->fff_newstr
2030 |. addiu CARG3, CARG3, 1 // len++
2031 |->fff_emptystr: // Return empty string.
2032 | li AT, LJ_TSTR
2033 | daddiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty)
2034 | b ->fff_restv
2035 |. settp CARG1, AT
2036 |
2037 |.macro ffstring_op, name
2038 | .ffunc string_ .. name
2039 | ffgccheck
2040 |. nop
2041 | beqz NARGS8:RC, ->fff_fallback
2042 |. ld CARG2, 0(BASE)
2043 | checkstr STR:CARG2, ->fff_fallback
2044 | daddiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf)
2045 | load_got lj_buf_putstr_ .. name
2046 | ld TMP0, SBUF:CARG1->b
2047 | sd L, SBUF:CARG1->L
2048 | sd BASE, L->base
2049 | sd TMP0, SBUF:CARG1->w
2050 | call_intern extern lj_buf_putstr_ .. name
2051 |. sd PC, SAVE_PC
2052 | load_got lj_buf_tostr
2053 | call_intern lj_buf_tostr
2054 |. move SBUF:CARG1, SBUF:CRET1
2055 | b ->fff_resstr
2056 |. ld BASE, L->base
2057 |.endmacro
2058 |
2059 |ffstring_op reverse
2060 |ffstring_op lower
2061 |ffstring_op upper
2062 |
2063 |//-- Bit library --------------------------------------------------------
2064 |
2065 |->vm_tobit_fb:
2066 | beqz TMP1, ->fff_fallback
2067 |.if FPU
2068 |. ldc1 FARG1, 0(BASE)
2069 | add.d FARG1, FARG1, TOBIT
2070 | mfc1 CRET1, FARG1
2071 | jr ra
2072 |. zextw CRET1, CRET1
2073 |.else
2074 |// FP number to bit conversion for soft-float.
2075 |->vm_tobit:
2076 | dsll TMP0, CARG1, 1
2077 | li CARG3, 1076
2078 | dsrl AT, TMP0, 53
2079 | dsubu CARG3, CARG3, AT
2080 | sltiu AT, CARG3, 54
2081 | beqz AT, >1
2082 |. dextm TMP0, TMP0, 0, 20
2083 | dinsu TMP0, AT, 21, 21
2084 | slt AT, CARG1, r0
2085 | dsrlv CRET1, TMP0, CARG3
2086 | dsubu TMP0, r0, CRET1
2087 |.if MIPSR6
2088 | selnez TMP0, TMP0, AT
2089 | seleqz CRET1, CRET1, AT
2090 | or CRET1, CRET1, TMP0
2091 |.else
2092 | movn CRET1, TMP0, AT
2093 |.endif
2094 | jr ra
2095 |. zextw CRET1, CRET1
2096 |1:
2097 | jr ra
2098 |. move CRET1, r0
2099 |
2100 |// FP number to int conversion with a check for soft-float.
2101 |// Modifies CARG1, CRET1, CRET2, TMP0, AT.
2102 |->vm_tointg:
2103 |.if JIT
2104 | dsll CRET2, CARG1, 1
2105 | beqz CRET2, >2
2106 |. li TMP0, 1076
2107 | dsrl AT, CRET2, 53
2108 | dsubu TMP0, TMP0, AT
2109 | sltiu AT, TMP0, 54
2110 | beqz AT, >1
2111 |. dextm CRET2, CRET2, 0, 20
2112 | dinsu CRET2, AT, 21, 21
2113 | slt AT, CARG1, r0
2114 | dsrlv CRET1, CRET2, TMP0
2115 | dsubu CARG1, r0, CRET1
2116 |.if MIPSR6
2117 | seleqz CRET1, CRET1, AT
2118 | selnez CARG1, CARG1, AT
2119 | or CRET1, CRET1, CARG1
2120 |.else
2121 | movn CRET1, CARG1, AT
2122 |.endif
2123 | li CARG1, 64
2124 | subu TMP0, CARG1, TMP0
2125 | dsllv CRET2, CRET2, TMP0 // Integer check.
2126 | sextw AT, CRET1
2127 | xor AT, CRET1, AT // Range check.
2128 |.if MIPSR6
2129 | seleqz AT, AT, CRET2
2130 | selnez CRET2, CRET2, CRET2
2131 | jr ra
2132 |. or CRET2, AT, CRET2
2133 |.else
2134 | jr ra
2135 |. movz CRET2, AT, CRET2
2136 |.endif
2137 |1:
2138 | jr ra
2139 |. li CRET2, 1
2140 |2:
2141 | jr ra
2142 |. move CRET1, r0
2143 |.endif
2144 |.endif
2145 |
2146 |.macro .ffunc_bit, name
2147 | .ffunc_1 bit_..name
2148 | gettp TMP0, CARG1
2149 | beq TMP0, TISNUM, >6
2150 |. zextw CRET1, CARG1
2151 | bal ->vm_tobit_fb
2152 |. sltiu TMP1, TMP0, LJ_TISNUM
2153 |6:
2154 |.endmacro
2155 |
2156 |.macro .ffunc_bit_op, name, bins
2157 | .ffunc_bit name
2158 | daddiu TMP2, BASE, 8
2159 | daddu TMP3, BASE, NARGS8:RC
2160 |1:
2161 | beq TMP2, TMP3, ->fff_resi
2162 |. ld CARG1, 0(TMP2)
2163 | gettp TMP0, CARG1
2164 |.if FPU
2165 | bne TMP0, TISNUM, >2
2166 |. daddiu TMP2, TMP2, 8
2167 | zextw CARG1, CARG1
2168 | b <1
2169 |. bins CRET1, CRET1, CARG1
2170 |2:
2171 | ldc1 FARG1, -8(TMP2)
2172 | sltiu AT, TMP0, LJ_TISNUM
2173 | beqz AT, ->fff_fallback
2174 |. add.d FARG1, FARG1, TOBIT
2175 | mfc1 CARG1, FARG1
2176 | zextw CARG1, CARG1
2177 | b <1
2178 |. bins CRET1, CRET1, CARG1
2179 |.else
2180 | beq TMP0, TISNUM, >2
2181 |. move CRET2, CRET1
2182 | bal ->vm_tobit_fb
2183 |. sltiu TMP1, TMP0, LJ_TISNUM
2184 | move CARG1, CRET2
2185 |2:
2186 | zextw CARG1, CARG1
2187 | bins CRET1, CRET1, CARG1
2188 | b <1
2189 |. daddiu TMP2, TMP2, 8
2190 |.endif
2191 |.endmacro
2192 |
2193 |.ffunc_bit_op band, and
2194 |.ffunc_bit_op bor, or
2195 |.ffunc_bit_op bxor, xor
2196 |
2197 |.ffunc_bit bswap
2198 | dsrl TMP0, CRET1, 8
2199 | dsrl TMP1, CRET1, 24
2200 | andi TMP2, TMP0, 0xff00
2201 | dins TMP1, CRET1, 24, 31
2202 | dins TMP2, TMP0, 16, 23
2203 | b ->fff_resi
2204 |. or CRET1, TMP1, TMP2
2205 |
2206 |.ffunc_bit bnot
2207 | not CRET1, CRET1
2208 | b ->fff_resi
2209 |. zextw CRET1, CRET1
2210 |
2211 |.macro .ffunc_bit_sh, name, shins, shmod
2212 | .ffunc_2 bit_..name
2213 | gettp TMP0, CARG1
2214 | beq TMP0, TISNUM, >1
2215 |. nop
2216 | bal ->vm_tobit_fb
2217 |. sltiu TMP1, TMP0, LJ_TISNUM
2218 | move CARG1, CRET1
2219 |1:
2220 | gettp TMP0, CARG2
2221 | bne TMP0, TISNUM, ->fff_fallback
2222 |. zextw CARG2, CARG2
2223 | sextw CARG1, CARG1
2224 |.if shmod == 1
2225 | negu CARG2, CARG2
2226 |.endif
2227 | shins CRET1, CARG1, CARG2
2228 | b ->fff_resi
2229 |. zextw CRET1, CRET1
2230 |.endmacro
2231 |
2232 |.ffunc_bit_sh lshift, sllv, 0
2233 |.ffunc_bit_sh rshift, srlv, 0
2234 |.ffunc_bit_sh arshift, srav, 0
2235 |.ffunc_bit_sh rol, rotrv, 1
2236 |.ffunc_bit_sh ror, rotrv, 0
2237 |
2238 |.ffunc_bit tobit
2239 |->fff_resi:
2240 | ld PC, FRAME_PC(BASE)
2241 | daddiu RA, BASE, -16
2242 | settp CRET1, TISNUM
2243 | b ->fff_res1
2244 |. sd CRET1, -16(BASE)
2245 |
2246 |//-----------------------------------------------------------------------
2247 |->fff_fallback: // Call fast function fallback handler.
2248 | // BASE = new base, RB = CFUNC, RC = nargs*8
2249 | ld TMP3, CFUNC:RB->f
2250 | daddu TMP1, BASE, NARGS8:RC
2251 | ld PC, FRAME_PC(BASE) // Fallback may overwrite PC.
2252 | daddiu TMP0, TMP1, 8*LUA_MINSTACK
2253 | ld TMP2, L->maxstack
2254 | sd PC, SAVE_PC // Redundant (but a defined value).
2255 | sltu AT, TMP2, TMP0
2256 | sd BASE, L->base
2257 | sd TMP1, L->top
2258 | bnez AT, >5 // Need to grow stack.
2259 |. move CFUNCADDR, TMP3
2260 | jalr TMP3 // (lua_State *L)
2261 |. move CARG1, L
2262 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
2263 | ld BASE, L->base
2264 | sll RD, CRET1, 3
2265 | bgtz CRET1, ->fff_res // Returned nresults+1?
2266 |. daddiu RA, BASE, -16
2267 |1: // Returned 0 or -1: retry fast path.
2268 | ld LFUNC:RB, FRAME_FUNC(BASE)
2269 | ld TMP0, L->top
2270 | cleartp LFUNC:RB
2271 | bnez CRET1, ->vm_call_tail // Returned -1?
2272 |. dsubu NARGS8:RC, TMP0, BASE
2273 | ins_callt // Returned 0: retry fast path.
2274 |
2275 |// Reconstruct previous base for vmeta_call during tailcall.
2276 |->vm_call_tail:
2277 | andi TMP0, PC, FRAME_TYPE
2278 | li AT, -4
2279 | bnez TMP0, >3
2280 |. and TMP1, PC, AT
2281 | lbu TMP1, OFS_RA(PC)
2282 | sll TMP1, TMP1, 3
2283 | addiu TMP1, TMP1, 16
2284 |3:
2285 | b ->vm_call_dispatch // Resolve again for tailcall.
2286 |. dsubu TMP2, BASE, TMP1
2287 |
2288 |5: // Grow stack for fallback handler.
2289 | load_got lj_state_growstack
2290 | li CARG2, LUA_MINSTACK
2291 | call_intern lj_state_growstack // (lua_State *L, int n)
2292 |. move CARG1, L
2293 | ld BASE, L->base
2294 | b <1
2295 |. li CRET1, 0 // Force retry.
2296 |
2297 |->fff_gcstep: // Call GC step function.
2298 | // BASE = new base, RC = nargs*8
2299 | move MULTRES, ra
2300 | load_got lj_gc_step
2301 | sd BASE, L->base
2302 | daddu TMP0, BASE, NARGS8:RC
2303 | sd PC, SAVE_PC // Redundant (but a defined value).
2304 | sd TMP0, L->top
2305 | call_intern lj_gc_step // (lua_State *L)
2306 |. move CARG1, L
2307 | ld BASE, L->base
2308 | move ra, MULTRES
2309 | ld TMP0, L->top
2310 | ld CFUNC:RB, FRAME_FUNC(BASE)
2311 | cleartp CFUNC:RB
2312 | jr ra
2313 |. dsubu NARGS8:RC, TMP0, BASE
2314 |
2315 |//-----------------------------------------------------------------------
2316 |//-- Special dispatch targets -------------------------------------------
2317 |//-----------------------------------------------------------------------
2318 |
2319 |->vm_record: // Dispatch target for recording phase.
2320 |.if JIT
2321 | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
2322 | andi AT, TMP3, HOOK_VMEVENT // No recording while in vmevent.
2323 | bnez AT, >5
2324 | // Decrement the hookcount for consistency, but always do the call.
2325 |. lw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
2326 | andi AT, TMP3, HOOK_ACTIVE
2327 | bnez AT, >1
2328 |. addiu TMP2, TMP2, -1
2329 | andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
2330 | beqz AT, >1
2331 |. nop
2332 | b >1
2333 |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
2334 |.endif
2335 |
2336 |->vm_rethook: // Dispatch target for return hooks.
2337 | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
2338 | andi AT, TMP3, HOOK_ACTIVE // Hook already active?
2339 | beqz AT, >1
2340 |5: // Re-dispatch to static ins.
2341 |. ld AT, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4.
2342 | jr AT
2343 |. nop
2344 |
2345 |->vm_inshook: // Dispatch target for instr/line hooks.
2346 | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
2347 | lw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
2348 | andi AT, TMP3, HOOK_ACTIVE // Hook already active?
2349 | bnez AT, <5
2350 |. andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
2351 | beqz AT, <5
2352 |. addiu TMP2, TMP2, -1
2353 | beqz TMP2, >1
2354 |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
2355 | andi AT, TMP3, LUA_MASKLINE
2356 | beqz AT, <5
2357 |1:
2358 |. load_got lj_dispatch_ins
2359 | sw MULTRES, SAVE_MULTRES
2360 | move CARG2, PC
2361 | sd BASE, L->base
2362 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2363 | call_intern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
2364 |. move CARG1, L
2365 |3:
2366 | ld BASE, L->base
2367 |4: // Re-dispatch to static ins.
2368 | lw INS, -4(PC)
2369 | decode_OP8a TMP1, INS
2370 | decode_OP8b TMP1
2371 | daddu TMP0, DISPATCH, TMP1
2372 | decode_RD8a RD, INS
2373 | ld AT, GG_DISP2STATIC(TMP0)
2374 | decode_RA8a RA, INS
2375 | decode_RD8b RD
2376 | jr AT
2377 | decode_RA8b RA
2378 |
2379 |->cont_hook: // Continue from hook yield.
2380 | daddiu PC, PC, 4
2381 | b <4
2382 |. lw MULTRES, -24+LO(RB) // Restore MULTRES for *M ins.
2383 |
2384 |->vm_hotloop: // Hot loop counter underflow.
2385 |.if JIT
2386 | ld LFUNC:TMP1, FRAME_FUNC(BASE)
2387 | daddiu CARG1, DISPATCH, GG_DISP2J
2388 | cleartp LFUNC:TMP1
2389 | sd PC, SAVE_PC
2390 | ld TMP1, LFUNC:TMP1->pc
2391 | move CARG2, PC
2392 | sd L, DISPATCH_J(L)(DISPATCH)
2393 | lbu TMP1, PC2PROTO(framesize)(TMP1)
2394 | load_got lj_trace_hot
2395 | sd BASE, L->base
2396 | dsll TMP1, TMP1, 3
2397 | daddu TMP1, BASE, TMP1
2398 | call_intern lj_trace_hot // (jit_State *J, const BCIns *pc)
2399 |. sd TMP1, L->top
2400 | b <3
2401 |. nop
2402 |.endif
2403 |
2404 |
2405 |->vm_callhook: // Dispatch target for call hooks.
2406 |.if JIT
2407 | b >1
2408 |.endif
2409 |. move CARG2, PC
2410 |
2411 |->vm_hotcall: // Hot call counter underflow.
2412 |.if JIT
2413 | ori CARG2, PC, 1
2414 |1:
2415 |.endif
2416 | load_got lj_dispatch_call
2417 | daddu TMP0, BASE, RC
2418 | sd PC, SAVE_PC
2419 | sd BASE, L->base
2420 | dsubu RA, RA, BASE
2421 | sd TMP0, L->top
2422 | call_intern lj_dispatch_call // (lua_State *L, const BCIns *pc)
2423 |. move CARG1, L
2424 | // Returns ASMFunction.
2425 | ld BASE, L->base
2426 | ld TMP0, L->top
2427 | sd r0, SAVE_PC // Invalidate for subsequent line hook.
2428 | dsubu NARGS8:RC, TMP0, BASE
2429 | daddu RA, BASE, RA
2430 | ld LFUNC:RB, FRAME_FUNC(BASE)
2431 | cleartp LFUNC:RB
2432 | jr CRET1
2433 |. lw INS, -4(PC)
2434 |
2435 |->cont_stitch: // Trace stitching.
2436 |.if JIT
2437 | // RA = resultptr, RB = meta base
2438 | lw INS, -4(PC)
2439 | ld TRACE:TMP2, -40(RB) // Save previous trace.
2440 | decode_RA8a RC, INS
2441 | daddiu AT, MULTRES, -8
2442 | cleartp TRACE:TMP2
2443 | decode_RA8b RC
2444 | beqz AT, >2
2445 |. daddu RC, BASE, RC // Call base.
2446 |1: // Move results down.
2447 | ld CARG1, 0(RA)
2448 | daddiu AT, AT, -8
2449 | daddiu RA, RA, 8
2450 | sd CARG1, 0(RC)
2451 | bnez AT, <1
2452 |. daddiu RC, RC, 8
2453 |2:
2454 | decode_RA8a RA, INS
2455 | decode_RB8a RB, INS
2456 | decode_RA8b RA
2457 | decode_RB8b RB
2458 | daddu RA, RA, RB
2459 | daddu RA, BASE, RA
2460 |3:
2461 | sltu AT, RC, RA
2462 | bnez AT, >9 // More results wanted?
2463 |. nop
2464 |
2465 | lhu TMP3, TRACE:TMP2->traceno
2466 | lhu RD, TRACE:TMP2->link
2467 | beq RD, TMP3, ->cont_nop // Blacklisted.
2468 |. load_got lj_dispatch_stitch
2469 | bnez RD, =>BC_JLOOP // Jump to stitched trace.
2470 |. sll RD, RD, 3
2471 |
2472 | // Stitch a new trace to the previous trace.
2473 | sw TMP3, DISPATCH_J(exitno)(DISPATCH)
2474 | sd L, DISPATCH_J(L)(DISPATCH)
2475 | sd BASE, L->base
2476 | daddiu CARG1, DISPATCH, GG_DISP2J
2477 | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2478 |. move CARG2, PC
2479 | b ->cont_nop
2480 |. ld BASE, L->base
2481 |
2482 |9:
2483 | sd TISNIL, 0(RC)
2484 | b <3
2485 |. daddiu RC, RC, 8
2486 |.endif
2487 |
2488 |->vm_profhook: // Dispatch target for profiler hook.
2489#if LJ_HASPROFILE
2490 | load_got lj_dispatch_profile
2491 | sw MULTRES, SAVE_MULTRES
2492 | move CARG2, PC
2493 | sd BASE, L->base
2494 | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2495 |. move CARG1, L
2496 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2497 | daddiu PC, PC, -4
2498 | b ->cont_nop
2499 |. ld BASE, L->base
2500#endif
2501 |
2502 |//-----------------------------------------------------------------------
2503 |//-- Trace exit handler -------------------------------------------------
2504 |//-----------------------------------------------------------------------
2505 |
2506 |.macro savex_, a, b
2507 |.if FPU
2508 | sdc1 f..a, a*8(sp)
2509 | sdc1 f..b, b*8(sp)
2510 | sd r..a, 32*8+a*8(sp)
2511 | sd r..b, 32*8+b*8(sp)
2512 |.else
2513 | sd r..a, a*8(sp)
2514 | sd r..b, b*8(sp)
2515 |.endif
2516 |.endmacro
2517 |
2518 |->vm_exit_handler:
2519 |.if JIT
2520 |.if FPU
2521 | daddiu sp, sp, -(32*8+32*8)
2522 |.else
2523 | daddiu sp, sp, -(32*8)
2524 |.endif
2525 | savex_ 0, 1
2526 | savex_ 2, 3
2527 | savex_ 4, 5
2528 | savex_ 6, 7
2529 | savex_ 8, 9
2530 | savex_ 10, 11
2531 | savex_ 12, 13
2532 | savex_ 14, 15
2533 | savex_ 16, 17
2534 | savex_ 18, 19
2535 | savex_ 20, 21
2536 | savex_ 22, 23
2537 | savex_ 24, 25
2538 | savex_ 26, 27
2539 | savex_ 28, 30
2540 |.if FPU
2541 | sdc1 f29, 29*8(sp)
2542 | sdc1 f31, 31*8(sp)
2543 | sd r0, 32*8+31*8(sp) // Clear RID_TMP.
2544 | daddiu TMP2, sp, 32*8+32*8 // Recompute original value of sp.
2545 | sd TMP2, 32*8+29*8(sp) // Store sp in RID_SP
2546 |.else
2547 | sd r0, 31*8(sp) // Clear RID_TMP.
2548 | daddiu TMP2, sp, 32*8 // Recompute original value of sp.
2549 | sd TMP2, 29*8(sp) // Store sp in RID_SP
2550 |.endif
2551 | li_vmstate EXIT
2552 | daddiu DISPATCH, JGL, -GG_DISP2G-32768
2553 | lw TMP1, 0(TMP2) // Load exit number.
2554 | st_vmstate
2555 | ld L, DISPATCH_GL(cur_L)(DISPATCH)
2556 | ld BASE, DISPATCH_GL(jit_base)(DISPATCH)
2557 | load_got lj_trace_exit
2558 | sd L, DISPATCH_J(L)(DISPATCH)
2559 | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number.
2560 | sd BASE, L->base
2561 | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number.
2562 | daddiu CARG1, DISPATCH, GG_DISP2J
2563 | sd r0, DISPATCH_GL(jit_base)(DISPATCH)
2564 | call_intern lj_trace_exit // (jit_State *J, ExitState *ex)
2565 |. move CARG2, sp
2566 | // Returns MULTRES (unscaled) or negated error code.
2567 | ld TMP1, L->cframe
2568 | li AT, -4
2569 | ld BASE, L->base
2570 | and sp, TMP1, AT
2571 | ld PC, SAVE_PC // Get SAVE_PC.
2572 | b >1
2573 |. sd L, SAVE_L // Set SAVE_L (on-trace resume/yield).
2574 |.endif
2575 |->vm_exit_interp:
2576 |.if JIT
2577 | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
2578 | ld L, SAVE_L
2579 | daddiu DISPATCH, JGL, -GG_DISP2G-32768
2580 | sd BASE, L->base
2581 |1:
2582 | sltiu TMP0, CRET1, -LUA_ERRERR // Check for error from exit.
2583 | beqz TMP0, >9
2584 |. ld LFUNC:RB, FRAME_FUNC(BASE)
2585 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2586 | dsll MULTRES, CRET1, 3
2587 | cleartp LFUNC:RB
2588 | sw MULTRES, SAVE_MULTRES
2589 | li TISNIL, LJ_TNIL
2590 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2591 | .FPU mtc1 TMP3, TOBIT
2592 | ld TMP1, LFUNC:RB->pc
2593 | sd r0, DISPATCH_GL(jit_base)(DISPATCH)
2594 | ld KBASE, PC2PROTO(k)(TMP1)
2595 | .FPU cvt.d.s TOBIT, TOBIT
2596 | // Modified copy of ins_next which handles function header dispatch, too.
2597 | lw INS, 0(PC)
2598 | addiu CRET1, CRET1, 17 // Static dispatch?
2599 | // Assumes TISNIL == ~LJ_VMST_INTERP == -1
2600 | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
2601 | decode_RD8a RD, INS
2602 | beqz CRET1, >5
2603 |. daddiu PC, PC, 4
2604 | decode_OP8a TMP1, INS
2605 | decode_OP8b TMP1
2606 | daddu TMP0, DISPATCH, TMP1
2607 | sltiu TMP2, TMP1, BC_FUNCF*8
2608 | ld AT, 0(TMP0)
2609 | decode_RA8a RA, INS
2610 | beqz TMP2, >2
2611 |. decode_RA8b RA
2612 | jr AT
2613 |. decode_RD8b RD
2614 |2:
2615 | sltiu TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function?
2616 | bnez TMP2, >3
2617 |. ld TMP1, FRAME_PC(BASE)
2618 | // Check frame below fast function.
2619 | andi TMP0, TMP1, FRAME_TYPE
2620 | bnez TMP0, >3 // Trace stitching continuation?
2621 |. nop
2622 | // Otherwise set KBASE for Lua function below fast function.
2623 | lw TMP2, -4(TMP1)
2624 | decode_RA8a TMP0, TMP2
2625 | decode_RA8b TMP0
2626 | dsubu TMP1, BASE, TMP0
2627 | ld LFUNC:TMP2, -32(TMP1)
2628 | cleartp LFUNC:TMP2
2629 | ld TMP1, LFUNC:TMP2->pc
2630 | ld KBASE, PC2PROTO(k)(TMP1)
2631 |3:
2632 | daddiu RC, MULTRES, -8
2633 | jr AT
2634 |. daddu RA, RA, BASE
2635 |
2636 |5: // Dispatch to static entry of original ins replaced by BC_JLOOP.
2637 | ld TMP0, DISPATCH_J(trace)(DISPATCH)
2638 | decode_RD8b RD
2639 | daddu TMP0, TMP0, RD
2640 | ld TRACE:TMP2, 0(TMP0)
2641 | lw INS, TRACE:TMP2->startins
2642 | decode_OP8a TMP1, INS
2643 | decode_OP8b TMP1
2644 | daddu TMP0, DISPATCH, TMP1
2645 | decode_RD8a RD, INS
2646 | ld AT, GG_DISP2STATIC(TMP0)
2647 | decode_RA8a RA, INS
2648 | decode_RD8b RD
2649 | jr AT
2650 |. decode_RA8b RA
2651 |
2652 |9: // Rethrow error from the right C frame.
2653 | load_got lj_err_trace
2654 | sub CARG2, r0, CRET1
2655 | call_intern lj_err_trace // (lua_State *L, int errcode)
2656 |. move CARG1, L
2657 |.endif
2658 |
2659 |//-----------------------------------------------------------------------
2660 |//-- Math helper functions ----------------------------------------------
2661 |//-----------------------------------------------------------------------
2662 |
2663 |// Hard-float round to integer.
2664 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1.
2665 |// MIPSR6: Modifies FTMP1, too.
2666 |.macro vm_round_hf, func
2667 | lui TMP0, 0x4330 // Hiword of 2^52 (double).
2668 | dsll TMP0, TMP0, 32
2669 | dmtc1 TMP0, f4
2670 | abs.d FRET2, FARG1 // |x|
2671 | dmfc1 AT, FARG1
2672 |.if MIPSR6
2673 | cmp.lt.d FTMP1, FRET2, f4
2674 | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52
2675 | bc1eqz FTMP1, >1 // Truncate only if |x| < 2^52.
2676 |.else
2677 | c.olt.d 0, FRET2, f4
2678 | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52
2679 | bc1f 0, >1 // Truncate only if |x| < 2^52.
2680 |.endif
2681 |. sub.d FRET1, FRET1, f4
2682 | slt AT, AT, r0
2683 |.if "func" == "ceil"
2684 | lui TMP0, 0xbff0 // Hiword of -1 (double). Preserves -0.
2685 |.else
2686 | lui TMP0, 0x3ff0 // Hiword of +1 (double).
2687 |.endif
2688 |.if "func" == "trunc"
2689 | dsll TMP0, TMP0, 32
2690 | dmtc1 TMP0, f4
2691 |.if MIPSR6
2692 | cmp.lt.d FTMP1, FRET2, FRET1 // |x| < result?
2693 | sub.d FRET2, FRET1, f4
2694 | sel.d FTMP1, FRET1, FRET2 // If yes, subtract +1.
2695 | dmtc1 AT, FRET1
2696 | neg.d FRET2, FTMP1
2697 | jr ra
2698 |. sel.d FRET1, FTMP1, FRET2 // Merge sign bit back in.
2699 |.else
2700 | c.olt.d 0, FRET2, FRET1 // |x| < result?
2701 | sub.d FRET2, FRET1, f4
2702 | movt.d FRET1, FRET2, 0 // If yes, subtract +1.
2703 | neg.d FRET2, FRET1
2704 | jr ra
2705 |. movn.d FRET1, FRET2, AT // Merge sign bit back in.
2706 |.endif
2707 |.else
2708 | neg.d FRET2, FRET1
2709 | dsll TMP0, TMP0, 32
2710 | dmtc1 TMP0, f4
2711 |.if MIPSR6
2712 | dmtc1 AT, FTMP1
2713 | sel.d FTMP1, FRET1, FRET2
2714 |.if "func" == "ceil"
2715 | cmp.lt.d FRET1, FTMP1, FARG1 // x > result?
2716 |.else
2717 | cmp.lt.d FRET1, FARG1, FTMP1 // x < result?
2718 |.endif
2719 | sub.d FRET2, FTMP1, f4 // If yes, subtract +-1.
2720 | jr ra
2721 |. sel.d FRET1, FTMP1, FRET2
2722 |.else
2723 | movn.d FRET1, FRET2, AT // Merge sign bit back in.
2724 |.if "func" == "ceil"
2725 | c.olt.d 0, FRET1, FARG1 // x > result?
2726 |.else
2727 | c.olt.d 0, FARG1, FRET1 // x < result?
2728 |.endif
2729 | sub.d FRET2, FRET1, f4 // If yes, subtract +-1.
2730 | jr ra
2731 |. movt.d FRET1, FRET2, 0
2732 |.endif
2733 |.endif
2734 |1:
2735 | jr ra
2736 |. mov.d FRET1, FARG1
2737 |.endmacro
2738 |
2739 |.macro vm_round, func
2740 |.if FPU
2741 | vm_round_hf, func
2742 |.endif
2743 |.endmacro
2744 |
2745 |->vm_floor:
2746 | vm_round floor
2747 |->vm_ceil:
2748 | vm_round ceil
2749 |->vm_trunc:
2750 |.if JIT
2751 | vm_round trunc
2752 |.endif
2753 |
2754 |// Soft-float integer to number conversion.
2755 |.macro sfi2d, ARG
2756 |.if not FPU
2757 | beqz ARG, >9 // Handle zero first.
2758 |. sra TMP0, ARG, 31
2759 | xor TMP1, ARG, TMP0
2760 | dsubu TMP1, TMP1, TMP0 // Absolute value in TMP1.
2761 | dclz ARG, TMP1
2762 | addiu ARG, ARG, -11
2763 | li AT, 0x3ff+63-11-1
2764 | dsllv TMP1, TMP1, ARG // Align mantissa left with leading 1.
2765 | subu ARG, AT, ARG // Exponent - 1.
2766 | ins ARG, TMP0, 11, 11 // Sign | Exponent.
2767 | dsll ARG, ARG, 52 // Align left.
2768 | jr ra
2769 |. daddu ARG, ARG, TMP1 // Add mantissa, increment exponent.
2770 |9:
2771 | jr ra
2772 |. nop
2773 |.endif
2774 |.endmacro
2775 |
2776 |// Input CARG1. Output: CARG1. Temporaries: AT, TMP0, TMP1.
2777 |->vm_sfi2d_1:
2778 | sfi2d CARG1
2779 |
2780 |// Input CARG2. Output: CARG2. Temporaries: AT, TMP0, TMP1.
2781 |->vm_sfi2d_2:
2782 | sfi2d CARG2
2783 |
2784 |// Soft-float comparison. Equivalent to c.eq.d.
2785 |// Input: CARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2786 |->vm_sfcmpeq:
2787 |.if not FPU
2788 | dsll AT, CARG1, 1
2789 | dsll TMP0, CARG2, 1
2790 | or TMP1, AT, TMP0
2791 | beqz TMP1, >8 // Both args +-0: return 1.
2792 |. lui TMP1, 0xffe0
2793 | dsll TMP1, TMP1, 32
2794 | sltu AT, TMP1, AT
2795 | sltu TMP0, TMP1, TMP0
2796 | or TMP1, AT, TMP0
2797 | bnez TMP1, >9 // Either arg is NaN: return 0;
2798 |. xor AT, CARG1, CARG2
2799 | jr ra
2800 |. sltiu CRET1, AT, 1 // Same values: return 1.
2801 |8:
2802 | jr ra
2803 |. li CRET1, 1
2804 |9:
2805 | jr ra
2806 |. li CRET1, 0
2807 |.endif
2808 |
2809 |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d.
2810 |// Input: CARG1, CARG2. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2.
2811 |->vm_sfcmpult:
2812 |.if not FPU
2813 | b >1
2814 |. li CRET2, 1
2815 |.endif
2816 |
2817 |->vm_sfcmpolt:
2818 |.if not FPU
2819 | li CRET2, 0
2820 |1:
2821 | dsll AT, CARG1, 1
2822 | dsll TMP0, CARG2, 1
2823 | or TMP1, AT, TMP0
2824 | beqz TMP1, >8 // Both args +-0: return 0.
2825 |. lui TMP1, 0xffe0
2826 | dsll TMP1, TMP1, 32
2827 | sltu AT, TMP1, AT
2828 | sltu TMP0, TMP1, TMP0
2829 | or TMP1, AT, TMP0
2830 | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
2831 |. and AT, CARG1, CARG2
2832 | bltz AT, >5 // Both args negative?
2833 |. nop
2834 | jr ra
2835 |. slt CRET1, CARG1, CARG2
2836 |5: // Swap conditions if both operands are negative.
2837 | jr ra
2838 |. slt CRET1, CARG2, CARG1
2839 |8:
2840 | jr ra
2841 |. li CRET1, 0
2842 |9:
2843 | jr ra
2844 |. move CRET1, CRET2
2845 |.endif
2846 |
2847 |->vm_sfcmpogt:
2848 |.if not FPU
2849 | dsll AT, CARG2, 1
2850 | dsll TMP0, CARG1, 1
2851 | or TMP1, AT, TMP0
2852 | beqz TMP1, >8 // Both args +-0: return 0.
2853 |. lui TMP1, 0xffe0
2854 | dsll TMP1, TMP1, 32
2855 | sltu AT, TMP1, AT
2856 | sltu TMP0, TMP1, TMP0
2857 | or TMP1, AT, TMP0
2858 | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
2859 |. and AT, CARG2, CARG1
2860 | bltz AT, >5 // Both args negative?
2861 |. nop
2862 | jr ra
2863 |. slt CRET1, CARG2, CARG1
2864 |5: // Swap conditions if both operands are negative.
2865 | jr ra
2866 |. slt CRET1, CARG1, CARG2
2867 |8:
2868 | jr ra
2869 |. li CRET1, 0
2870 |9:
2871 | jr ra
2872 |. li CRET1, 0
2873 |.endif
2874 |
2875 |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a.
2876 |// Input: CARG1, CARG2, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2877 |->vm_sfcmpolex:
2878 |.if not FPU
2879 | dsll AT, CARG1, 1
2880 | dsll TMP0, CARG2, 1
2881 | or TMP1, AT, TMP0
2882 | beqz TMP1, >8 // Both args +-0: return 1.
2883 |. lui TMP1, 0xffe0
2884 | dsll TMP1, TMP1, 32
2885 | sltu AT, TMP1, AT
2886 | sltu TMP0, TMP1, TMP0
2887 | or TMP1, AT, TMP0
2888 | bnez TMP1, >9 // Either arg is NaN: return 0;
2889 |. and AT, CARG1, CARG2
2890 | xor AT, AT, TMP3
2891 | bltz AT, >5 // Both args negative?
2892 |. nop
2893 | jr ra
2894 |. slt CRET1, CARG2, CARG1
2895 |5: // Swap conditions if both operands are negative.
2896 | jr ra
2897 |. slt CRET1, CARG1, CARG2
2898 |8:
2899 | jr ra
2900 |. li CRET1, 1
2901 |9:
2902 | jr ra
2903 |. li CRET1, 0
2904 |.endif
2905 |
2906 |.macro sfmin_max, name, fpcall
2907 |->vm_sf .. name:
2908 |.if JIT and not FPU
2909 | move TMP2, ra
2910 | bal ->fpcall
2911 |. nop
2912 | move ra, TMP2
2913 | move TMP0, CRET1
2914 | move CRET1, CARG1
2915 |.if MIPSR6
2916 | selnez CRET1, CRET1, TMP0
2917 | seleqz TMP0, CARG2, TMP0
2918 | jr ra
2919 |. or CRET1, CRET1, TMP0
2920 |.else
2921 | jr ra
2922 |. movz CRET1, CARG2, TMP0
2923 |.endif
2924 |.endif
2925 |.endmacro
2926 |
2927 | sfmin_max min, vm_sfcmpolt
2928 | sfmin_max max, vm_sfcmpogt
2929 |
2930 |//-----------------------------------------------------------------------
2931 |//-- Miscellaneous functions --------------------------------------------
2932 |//-----------------------------------------------------------------------
2933 |
2934 |.define NEXT_TAB, TAB:CARG1
2935 |.define NEXT_IDX, CARG2
2936 |.define NEXT_ASIZE, CARG3
2937 |.define NEXT_NIL, CARG4
2938 |.define NEXT_TMP0, r12
2939 |.define NEXT_TMP1, r13
2940 |.define NEXT_TMP2, r14
2941 |.define NEXT_RES_VK, CRET1
2942 |.define NEXT_RES_IDX, CRET2
2943 |.define NEXT_RES_PTR, sp
2944 |.define NEXT_RES_VAL, 0(sp)
2945 |.define NEXT_RES_KEY, 8(sp)
2946 |
2947 |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
2948 |// Next idx returned in CRET2.
2949 |->vm_next:
2950 |.if JIT and ENDIAN_LE
2951 | lw NEXT_ASIZE, NEXT_TAB->asize
2952 | ld NEXT_TMP0, NEXT_TAB->array
2953 | li NEXT_NIL, LJ_TNIL
2954 |1: // Traverse array part.
2955 | sltu AT, NEXT_IDX, NEXT_ASIZE
2956 | sll NEXT_TMP1, NEXT_IDX, 3
2957 | beqz AT, >5
2958 |. daddu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1
2959 | li AT, LJ_TISNUM
2960 | ld NEXT_TMP2, 0(NEXT_TMP1)
2961 | dsll AT, AT, 47
2962 | or NEXT_TMP1, NEXT_IDX, AT
2963 | beq NEXT_TMP2, NEXT_NIL, <1
2964 |. addiu NEXT_IDX, NEXT_IDX, 1
2965 | sd NEXT_TMP2, NEXT_RES_VAL
2966 | sd NEXT_TMP1, NEXT_RES_KEY
2967 | move NEXT_RES_VK, NEXT_RES_PTR
2968 | jr ra
2969 |. move NEXT_RES_IDX, NEXT_IDX
2970 |
2971 |5: // Traverse hash part.
2972 | subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE
2973 | ld NODE:NEXT_RES_VK, NEXT_TAB->node
2974 | sll NEXT_TMP2, NEXT_RES_IDX, 5
2975 | lw NEXT_TMP0, NEXT_TAB->hmask
2976 | sll AT, NEXT_RES_IDX, 3
2977 | subu AT, NEXT_TMP2, AT
2978 | daddu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT
2979 |6:
2980 | sltu AT, NEXT_TMP0, NEXT_RES_IDX
2981 | bnez AT, >8
2982 |. nop
2983 | ld NEXT_TMP2, NODE:NEXT_RES_VK->val
2984 | bne NEXT_TMP2, NEXT_NIL, >9
2985 |. addiu NEXT_RES_IDX, NEXT_RES_IDX, 1
2986 | // Skip holes in hash part.
2987 | b <6
2988 |. daddiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node)
2989 |
2990 |8: // End of iteration. Set the key to nil (not the value).
2991 | sd NEXT_NIL, NEXT_RES_KEY
2992 | move NEXT_RES_VK, NEXT_RES_PTR
2993 |9:
2994 | jr ra
2995 |. addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE
2996 |.endif
2997 |
2998 |//-----------------------------------------------------------------------
2999 |//-- FFI helper functions -----------------------------------------------
3000 |//-----------------------------------------------------------------------
3001 |
3002 |// Handler for callback functions. Callback slot number in r1, g in r2.
3003 |->vm_ffi_callback:
3004 |.if FFI
3005 |.type CTSTATE, CTState, PC
3006 | saveregs
3007 | ld CTSTATE, GL:r2->ctype_state
3008 | daddiu DISPATCH, r2, GG_G2DISP
3009 | load_got lj_ccallback_enter
3010 | sw r1, CTSTATE->cb.slot
3011 | sd CARG1, CTSTATE->cb.gpr[0]
3012 | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0]
3013 | sd CARG2, CTSTATE->cb.gpr[1]
3014 | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1]
3015 | sd CARG3, CTSTATE->cb.gpr[2]
3016 | .FPU sdc1 FARG3, CTSTATE->cb.fpr[2]
3017 | sd CARG4, CTSTATE->cb.gpr[3]
3018 | .FPU sdc1 FARG4, CTSTATE->cb.fpr[3]
3019 | sd CARG5, CTSTATE->cb.gpr[4]
3020 | .FPU sdc1 FARG5, CTSTATE->cb.fpr[4]
3021 | sd CARG6, CTSTATE->cb.gpr[5]
3022 | .FPU sdc1 FARG6, CTSTATE->cb.fpr[5]
3023 | sd CARG7, CTSTATE->cb.gpr[6]
3024 | .FPU sdc1 FARG7, CTSTATE->cb.fpr[6]
3025 | sd CARG8, CTSTATE->cb.gpr[7]
3026 | .FPU sdc1 FARG8, CTSTATE->cb.fpr[7]
3027 | daddiu TMP0, sp, CFRAME_SPACE
3028 | sd TMP0, CTSTATE->cb.stack
3029 | sd r0, SAVE_PC // Any value outside of bytecode is ok.
3030 | move CARG2, sp
3031 | call_intern lj_ccallback_enter // (CTState *cts, void *cf)
3032 |. move CARG1, CTSTATE
3033 | // Returns lua_State *.
3034 | ld BASE, L:CRET1->base
3035 | ld RC, L:CRET1->top
3036 | move L, CRET1
3037 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
3038 | ld LFUNC:RB, FRAME_FUNC(BASE)
3039 | .FPU mtc1 TMP3, TOBIT
3040 | li TISNIL, LJ_TNIL
3041 | li TISNUM, LJ_TISNUM
3042 | li_vmstate INTERP
3043 | subu RC, RC, BASE
3044 | cleartp LFUNC:RB
3045 | st_vmstate
3046 | .FPU cvt.d.s TOBIT, TOBIT
3047 | ins_callt
3048 |.endif
3049 |
3050 |->cont_ffi_callback: // Return from FFI callback.
3051 |.if FFI
3052 | load_got lj_ccallback_leave
3053 | ld CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH)
3054 | sd BASE, L->base
3055 | sd RB, L->top
3056 | sd L, CTSTATE->L
3057 | move CARG2, RA
3058 | call_intern lj_ccallback_leave // (CTState *cts, TValue *o)
3059 |. move CARG1, CTSTATE
3060 | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0]
3061 | ld CRET1, CTSTATE->cb.gpr[0]
3062 | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1]
3063 | b ->vm_leave_unw
3064 |. ld CRET2, CTSTATE->cb.gpr[1]
3065 |.endif
3066 |
3067 |->vm_ffi_call: // Call C function via FFI.
3068 | // Caveat: needs special frame unwinding, see below.
3069 |.if FFI
3070 | .type CCSTATE, CCallState, CARG1
3071 | lw TMP1, CCSTATE->spadj
3072 | lbu CARG2, CCSTATE->nsp
3073 | move TMP2, sp
3074 | dsubu sp, sp, TMP1
3075 | sd ra, -8(TMP2)
3076 | sd r16, -16(TMP2)
3077 | sd CCSTATE, -24(TMP2)
3078 | move r16, TMP2
3079 | daddiu TMP1, CCSTATE, offsetof(CCallState, stack)
3080 | move TMP2, sp
3081 | beqz CARG2, >2
3082 |. daddu TMP3, TMP1, CARG2
3083 |1:
3084 | ld TMP0, 0(TMP1)
3085 | daddiu TMP1, TMP1, 8
3086 | sltu AT, TMP1, TMP3
3087 | sd TMP0, 0(TMP2)
3088 | bnez AT, <1
3089 |. daddiu TMP2, TMP2, 8
3090 |2:
3091 | ld CFUNCADDR, CCSTATE->func
3092 | .FPU ldc1 FARG1, CCSTATE->gpr[0]
3093 | ld CARG2, CCSTATE->gpr[1]
3094 | .FPU ldc1 FARG2, CCSTATE->gpr[1]
3095 | ld CARG3, CCSTATE->gpr[2]
3096 | .FPU ldc1 FARG3, CCSTATE->gpr[2]
3097 | ld CARG4, CCSTATE->gpr[3]
3098 | .FPU ldc1 FARG4, CCSTATE->gpr[3]
3099 | ld CARG5, CCSTATE->gpr[4]
3100 | .FPU ldc1 FARG5, CCSTATE->gpr[4]
3101 | ld CARG6, CCSTATE->gpr[5]
3102 | .FPU ldc1 FARG6, CCSTATE->gpr[5]
3103 | ld CARG7, CCSTATE->gpr[6]
3104 | .FPU ldc1 FARG7, CCSTATE->gpr[6]
3105 | ld CARG8, CCSTATE->gpr[7]
3106 | .FPU ldc1 FARG8, CCSTATE->gpr[7]
3107 | jalr CFUNCADDR
3108 |. ld CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
3109 | ld CCSTATE:TMP1, -24(r16)
3110 | ld TMP2, -16(r16)
3111 | ld ra, -8(r16)
3112 | sd CRET1, CCSTATE:TMP1->gpr[0]
3113 | sd CRET2, CCSTATE:TMP1->gpr[1]
3114 |.if FPU
3115 | sdc1 FRET1, CCSTATE:TMP1->fpr[0]
3116 | sdc1 FRET2, CCSTATE:TMP1->fpr[1]
3117 |.else
3118 | sd CARG1, CCSTATE:TMP1->gpr[2] // 2nd FP struct field for soft-float.
3119 |.endif
3120 | move sp, r16
3121 | jr ra
3122 |. move r16, TMP2
3123 |.endif
3124 |// Note: vm_ffi_call must be the last function in this object file!
3125 |
3126 |//-----------------------------------------------------------------------
3127}
3128
3129/* Generate the code for a single instruction. */
3130static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3131{
3132 int vk = 0;
3133 |=>defop:
3134
3135 switch (op) {
3136
3137 /* -- Comparison ops ---------------------------------------------------- */
3138
3139 /* Remember: all ops branch for a true comparison, fall through otherwise. */
3140
3141 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
3142 | // RA = src1*8, RD = src2*8, JMP with RD = target
3143 |.macro bc_comp, FRA, FRD, ARGRA, ARGRD, movop, fmovop, fcomp, sfcomp
3144 | daddu RA, BASE, RA
3145 | daddu RD, BASE, RD
3146 | ld ARGRA, 0(RA)
3147 | ld ARGRD, 0(RD)
3148 | lhu TMP2, OFS_RD(PC)
3149 | gettp CARG3, ARGRA
3150 | gettp CARG4, ARGRD
3151 | bne CARG3, TISNUM, >2
3152 |. daddiu PC, PC, 4
3153 | bne CARG4, TISNUM, >5
3154 |. decode_RD4b TMP2
3155 | sextw ARGRA, ARGRA
3156 | sextw ARGRD, ARGRD
3157 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3158 | slt AT, CARG1, CARG2
3159 | addu TMP2, TMP2, TMP3
3160 |.if MIPSR6
3161 | movop TMP2, TMP2, AT
3162 |.else
3163 | movop TMP2, r0, AT
3164 |.endif
3165 |1:
3166 | daddu PC, PC, TMP2
3167 | ins_next
3168 |
3169 |2: // RA is not an integer.
3170 | sltiu AT, CARG3, LJ_TISNUM
3171 | beqz AT, ->vmeta_comp
3172 |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3173 | sltiu AT, CARG4, LJ_TISNUM
3174 | beqz AT, >4
3175 |. decode_RD4b TMP2
3176 |.if FPU
3177 | ldc1 FRA, 0(RA)
3178 | ldc1 FRD, 0(RD)
3179 |.endif
3180 |3: // RA and RD are both numbers.
3181 |.if FPU
3182 |.if MIPSR6
3183 | fcomp FTMP0, FTMP0, FTMP2
3184 | addu TMP2, TMP2, TMP3
3185 | mfc1 TMP3, FTMP0
3186 | b <1
3187 |. fmovop TMP2, TMP2, TMP3
3188 |.else
3189 | fcomp FTMP0, FTMP2
3190 | addu TMP2, TMP2, TMP3
3191 | b <1
3192 |. fmovop TMP2, r0
3193 |.endif
3194 |.else
3195 | bal sfcomp
3196 |. addu TMP2, TMP2, TMP3
3197 | b <1
3198 |.if MIPSR6
3199 |. movop TMP2, TMP2, CRET1
3200 |.else
3201 |. movop TMP2, r0, CRET1
3202 |.endif
3203 |.endif
3204 |
3205 |4: // RA is a number, RD is not a number.
3206 | bne CARG4, TISNUM, ->vmeta_comp
3207 | // RA is a number, RD is an integer. Convert RD to a number.
3208 |.if FPU
3209 |. lwc1 FRD, LO(RD)
3210 | ldc1 FRA, 0(RA)
3211 | b <3
3212 |. cvt.d.w FRD, FRD
3213 |.else
3214 |.if "ARGRD" == "CARG1"
3215 |. sextw CARG1, CARG1
3216 | bal ->vm_sfi2d_1
3217 |. nop
3218 |.else
3219 |. sextw CARG2, CARG2
3220 | bal ->vm_sfi2d_2
3221 |. nop
3222 |.endif
3223 | b <3
3224 |. nop
3225 |.endif
3226 |
3227 |5: // RA is an integer, RD is not an integer
3228 | sltiu AT, CARG4, LJ_TISNUM
3229 | beqz AT, ->vmeta_comp
3230 |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3231 | // RA is an integer, RD is a number. Convert RA to a number.
3232 |.if FPU
3233 | lwc1 FRA, LO(RA)
3234 | ldc1 FRD, 0(RD)
3235 | b <3
3236 | cvt.d.w FRA, FRA
3237 |.else
3238 |.if "ARGRA" == "CARG1"
3239 | bal ->vm_sfi2d_1
3240 |. sextw CARG1, CARG1
3241 |.else
3242 | bal ->vm_sfi2d_2
3243 |. sextw CARG2, CARG2
3244 |.endif
3245 | b <3
3246 |. nop
3247 |.endif
3248 |.endmacro
3249 |
3250 |.if MIPSR6
3251 if (op == BC_ISLT) {
3252 | bc_comp FTMP0, FTMP2, CARG1, CARG2, selnez, selnez, cmp.lt.d, ->vm_sfcmpolt
3253 } else if (op == BC_ISGE) {
3254 | bc_comp FTMP0, FTMP2, CARG1, CARG2, seleqz, seleqz, cmp.lt.d, ->vm_sfcmpolt
3255 } else if (op == BC_ISLE) {
3256 | bc_comp FTMP2, FTMP0, CARG2, CARG1, seleqz, seleqz, cmp.ult.d, ->vm_sfcmpult
3257 } else {
3258 | bc_comp FTMP2, FTMP0, CARG2, CARG1, selnez, selnez, cmp.ult.d, ->vm_sfcmpult
3259 }
3260 |.else
3261 if (op == BC_ISLT) {
3262 | bc_comp FTMP0, FTMP2, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt
3263 } else if (op == BC_ISGE) {
3264 | bc_comp FTMP0, FTMP2, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt
3265 } else if (op == BC_ISLE) {
3266 | bc_comp FTMP2, FTMP0, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult
3267 } else {
3268 | bc_comp FTMP2, FTMP0, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult
3269 }
3270 |.endif
3271 break;
3272
3273 case BC_ISEQV: case BC_ISNEV:
3274 vk = op == BC_ISEQV;
3275 | // RA = src1*8, RD = src2*8, JMP with RD = target
3276 | daddu RA, BASE, RA
3277 | daddiu PC, PC, 4
3278 | daddu RD, BASE, RD
3279 | ld CARG1, 0(RA)
3280 | lhu TMP2, -4+OFS_RD(PC)
3281 | ld CARG2, 0(RD)
3282 | gettp CARG3, CARG1
3283 | gettp CARG4, CARG2
3284 | sltu AT, TISNUM, CARG3
3285 | sltu TMP1, TISNUM, CARG4
3286 | or AT, AT, TMP1
3287 if (vk) {
3288 | beqz AT, ->BC_ISEQN_Z
3289 } else {
3290 | beqz AT, ->BC_ISNEN_Z
3291 }
3292 | // Either or both types are not numbers.
3293 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3294 |.if FFI
3295 |. li AT, LJ_TCDATA
3296 | beq CARG3, AT, ->vmeta_equal_cd
3297 |.endif
3298 | decode_RD4b TMP2
3299 |.if FFI
3300 | beq CARG4, AT, ->vmeta_equal_cd
3301 |. nop
3302 |.endif
3303 | bne CARG1, CARG2, >2
3304 |. addu TMP2, TMP2, TMP3
3305 | // Tag and value are equal.
3306 if (vk) {
3307 |->BC_ISEQV_Z:
3308 | daddu PC, PC, TMP2
3309 }
3310 |1:
3311 | ins_next
3312 |
3313 |2: // Check if the tags are the same and it's a table or userdata.
3314 | xor AT, CARG3, CARG4 // Same type?
3315 | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata?
3316 |.if MIPSR6
3317 | seleqz TMP0, TMP0, AT
3318 |.else
3319 | movn TMP0, r0, AT
3320 |.endif
3321 if (vk) {
3322 | beqz TMP0, <1
3323 } else {
3324 | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction.
3325 }
3326 | // Different tables or userdatas. Need to check __eq metamethod.
3327 | // Field metatable must be at same offset for GCtab and GCudata!
3328 |. cleartp TAB:TMP1, CARG1
3329 | ld TAB:TMP3, TAB:TMP1->metatable
3330 if (vk) {
3331 | beqz TAB:TMP3, <1 // No metatable?
3332 |. nop
3333 | lbu TMP3, TAB:TMP3->nomm
3334 | andi TMP3, TMP3, 1<<MM_eq
3335 | bnez TMP3, >1 // Or 'no __eq' flag set?
3336 } else {
3337 | beqz TAB:TMP3,->BC_ISEQV_Z // No metatable?
3338 |. nop
3339 | lbu TMP3, TAB:TMP3->nomm
3340 | andi TMP3, TMP3, 1<<MM_eq
3341 | bnez TMP3, ->BC_ISEQV_Z // Or 'no __eq' flag set?
3342 }
3343 |. nop
3344 | b ->vmeta_equal // Handle __eq metamethod.
3345 |. li TMP0, 1-vk // ne = 0 or 1.
3346 break;
3347
3348 case BC_ISEQS: case BC_ISNES:
3349 vk = op == BC_ISEQS;
3350 | // RA = src*8, RD = str_const*8 (~), JMP with RD = target
3351 | daddu RA, BASE, RA
3352 | daddiu PC, PC, 4
3353 | ld CARG1, 0(RA)
3354 | dsubu RD, KBASE, RD
3355 | lhu TMP2, -4+OFS_RD(PC)
3356 | ld CARG2, -8(RD) // KBASE-8-str_const*8
3357 |.if FFI
3358 | gettp TMP0, CARG1
3359 | li AT, LJ_TCDATA
3360 |.endif
3361 | li TMP1, LJ_TSTR
3362 | decode_RD4b TMP2
3363 |.if FFI
3364 | beq TMP0, AT, ->vmeta_equal_cd
3365 |.endif
3366 |. settp CARG2, TMP1
3367 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3368 | xor TMP1, CARG1, CARG2
3369 | addu TMP2, TMP2, TMP3
3370 |.if MIPSR6
3371 if (vk) {
3372 | seleqz TMP2, TMP2, TMP1
3373 } else {
3374 | selnez TMP2, TMP2, TMP1
3375 }
3376 |.else
3377 if (vk) {
3378 | movn TMP2, r0, TMP1
3379 } else {
3380 | movz TMP2, r0, TMP1
3381 }
3382 |.endif
3383 | daddu PC, PC, TMP2
3384 | ins_next
3385 break;
3386
3387 case BC_ISEQN: case BC_ISNEN:
3388 vk = op == BC_ISEQN;
3389 | // RA = src*8, RD = num_const*8, JMP with RD = target
3390 | daddu RA, BASE, RA
3391 | daddu RD, KBASE, RD
3392 | ld CARG1, 0(RA)
3393 | ld CARG2, 0(RD)
3394 | lhu TMP2, OFS_RD(PC)
3395 | gettp CARG3, CARG1
3396 | gettp CARG4, CARG2
3397 | daddiu PC, PC, 4
3398 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3399 if (vk) {
3400 |->BC_ISEQN_Z:
3401 } else {
3402 |->BC_ISNEN_Z:
3403 }
3404 | bne CARG3, TISNUM, >3
3405 |. decode_RD4b TMP2
3406 | bne CARG4, TISNUM, >6
3407 |. addu TMP2, TMP2, TMP3
3408 | xor AT, CARG1, CARG2
3409 |.if MIPSR6
3410 if (vk) {
3411 | seleqz TMP2, TMP2, AT
3412 |1:
3413 | daddu PC, PC, TMP2
3414 |2:
3415 } else {
3416 | selnez TMP2, TMP2, AT
3417 |1:
3418 |2:
3419 | daddu PC, PC, TMP2
3420 }
3421 |.else
3422 if (vk) {
3423 | movn TMP2, r0, AT
3424 |1:
3425 | daddu PC, PC, TMP2
3426 |2:
3427 } else {
3428 | movz TMP2, r0, AT
3429 |1:
3430 |2:
3431 | daddu PC, PC, TMP2
3432 }
3433 |.endif
3434 | ins_next
3435 |
3436 |3: // RA is not an integer.
3437 | sltu AT, CARG3, TISNUM
3438 |.if FFI
3439 | beqz AT, >8
3440 |.else
3441 | beqz AT, <2
3442 |.endif
3443 |. addu TMP2, TMP2, TMP3
3444 | sltu AT, CARG4, TISNUM
3445 |.if FPU
3446 | ldc1 FTMP0, 0(RA)
3447 | ldc1 FTMP2, 0(RD)
3448 |.endif
3449 | beqz AT, >5
3450 |. nop
3451 |4: // RA and RD are both numbers.
3452 |.if FPU
3453 |.if MIPSR6
3454 | cmp.eq.d FTMP0, FTMP0, FTMP2
3455 | dmfc1 TMP1, FTMP0
3456 | b <1
3457 if (vk) {
3458 |. selnez TMP2, TMP2, TMP1
3459 } else {
3460 |. seleqz TMP2, TMP2, TMP1
3461 }
3462 |.else
3463 | c.eq.d FTMP0, FTMP2
3464 | b <1
3465 if (vk) {
3466 |. movf TMP2, r0
3467 } else {
3468 |. movt TMP2, r0
3469 }
3470 |.endif
3471 |.else
3472 | bal ->vm_sfcmpeq
3473 |. nop
3474 | b <1
3475 |.if MIPSR6
3476 if (vk) {
3477 |. selnez TMP2, TMP2, CRET1
3478 } else {
3479 |. seleqz TMP2, TMP2, CRET1
3480 }
3481 |.else
3482 if (vk) {
3483 |. movz TMP2, r0, CRET1
3484 } else {
3485 |. movn TMP2, r0, CRET1
3486 }
3487 |.endif
3488 |.endif
3489 |
3490 |5: // RA is a number, RD is not a number.
3491 |.if FFI
3492 | bne CARG4, TISNUM, >9
3493 |.else
3494 | bne CARG4, TISNUM, <2
3495 |.endif
3496 | // RA is a number, RD is an integer. Convert RD to a number.
3497 |.if FPU
3498 |. lwc1 FTMP2, LO(RD)
3499 | b <4
3500 |. cvt.d.w FTMP2, FTMP2
3501 |.else
3502 |. sextw CARG2, CARG2
3503 | bal ->vm_sfi2d_2
3504 |. nop
3505 | b <4
3506 |. nop
3507 |.endif
3508 |
3509 |6: // RA is an integer, RD is not an integer
3510 | sltu AT, CARG4, TISNUM
3511 |.if FFI
3512 | beqz AT, >9
3513 |.else
3514 | beqz AT, <2
3515 |.endif
3516 | // RA is an integer, RD is a number. Convert RA to a number.
3517 |.if FPU
3518 |. lwc1 FTMP0, LO(RA)
3519 | ldc1 FTMP2, 0(RD)
3520 | b <4
3521 | cvt.d.w FTMP0, FTMP0
3522 |.else
3523 |. sextw CARG1, CARG1
3524 | bal ->vm_sfi2d_1
3525 |. nop
3526 | b <4
3527 |. nop
3528 |.endif
3529 |
3530 |.if FFI
3531 |8:
3532 | li AT, LJ_TCDATA
3533 | bne CARG3, AT, <2
3534 |. nop
3535 | b ->vmeta_equal_cd
3536 |. nop
3537 |9:
3538 | li AT, LJ_TCDATA
3539 | bne CARG4, AT, <2
3540 |. nop
3541 | b ->vmeta_equal_cd
3542 |. nop
3543 |.endif
3544 break;
3545
3546 case BC_ISEQP: case BC_ISNEP:
3547 vk = op == BC_ISEQP;
3548 | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
3549 | daddu RA, BASE, RA
3550 | srl TMP1, RD, 3
3551 | ld TMP0, 0(RA)
3552 | lhu TMP2, OFS_RD(PC)
3553 | not TMP1, TMP1
3554 | gettp TMP0, TMP0
3555 | daddiu PC, PC, 4
3556 |.if FFI
3557 | li AT, LJ_TCDATA
3558 | beq TMP0, AT, ->vmeta_equal_cd
3559 |.endif
3560 |. xor TMP0, TMP0, TMP1
3561 | decode_RD4b TMP2
3562 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3563 | addu TMP2, TMP2, TMP3
3564 |.if MIPSR6
3565 if (vk) {
3566 | seleqz TMP2, TMP2, TMP0
3567 } else {
3568 | selnez TMP2, TMP2, TMP0
3569 }
3570 |.else
3571 if (vk) {
3572 | movn TMP2, r0, TMP0
3573 } else {
3574 | movz TMP2, r0, TMP0
3575 }
3576 |.endif
3577 | daddu PC, PC, TMP2
3578 | ins_next
3579 break;
3580
3581 /* -- Unary test and copy ops ------------------------------------------- */
3582
3583 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
3584 | // RA = dst*8 or unused, RD = src*8, JMP with RD = target
3585 | daddu RD, BASE, RD
3586 | lhu TMP2, OFS_RD(PC)
3587 | ld TMP0, 0(RD)
3588 | daddiu PC, PC, 4
3589 | gettp TMP0, TMP0
3590 | sltiu TMP0, TMP0, LJ_TISTRUECOND
3591 if (op == BC_IST || op == BC_ISF) {
3592 | decode_RD4b TMP2
3593 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3594 | addu TMP2, TMP2, TMP3
3595 |.if MIPSR6
3596 if (op == BC_IST) {
3597 | selnez TMP2, TMP2, TMP0;
3598 } else {
3599 | seleqz TMP2, TMP2, TMP0;
3600 }
3601 |.else
3602 if (op == BC_IST) {
3603 | movz TMP2, r0, TMP0
3604 } else {
3605 | movn TMP2, r0, TMP0
3606 }
3607 |.endif
3608 | daddu PC, PC, TMP2
3609 } else {
3610 | ld CRET1, 0(RD)
3611 if (op == BC_ISTC) {
3612 | beqz TMP0, >1
3613 } else {
3614 | bnez TMP0, >1
3615 }
3616 |. daddu RA, BASE, RA
3617 | decode_RD4b TMP2
3618 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3619 | addu TMP2, TMP2, TMP3
3620 | sd CRET1, 0(RA)
3621 | daddu PC, PC, TMP2
3622 |1:
3623 }
3624 | ins_next
3625 break;
3626
3627 case BC_ISTYPE:
3628 | // RA = src*8, RD = -type*8
3629 | daddu TMP2, BASE, RA
3630 | srl TMP1, RD, 3
3631 | ld TMP0, 0(TMP2)
3632 | ins_next1
3633 | gettp TMP0, TMP0
3634 | daddu AT, TMP0, TMP1
3635 | bnez AT, ->vmeta_istype
3636 |. ins_next2
3637 break;
3638 case BC_ISNUM:
3639 | // RA = src*8, RD = -(TISNUM-1)*8
3640 | daddu TMP2, BASE, RA
3641 | ld TMP0, 0(TMP2)
3642 | ins_next1
3643 | checknum TMP0, ->vmeta_istype
3644 |. ins_next2
3645 break;
3646
3647 /* -- Unary ops --------------------------------------------------------- */
3648
3649 case BC_MOV:
3650 | // RA = dst*8, RD = src*8
3651 | daddu RD, BASE, RD
3652 | daddu RA, BASE, RA
3653 | ld CRET1, 0(RD)
3654 | ins_next1
3655 | sd CRET1, 0(RA)
3656 | ins_next2
3657 break;
3658 case BC_NOT:
3659 | // RA = dst*8, RD = src*8
3660 | daddu RD, BASE, RD
3661 | daddu RA, BASE, RA
3662 | ld TMP0, 0(RD)
3663 | li AT, LJ_TTRUE
3664 | gettp TMP0, TMP0
3665 | sltu TMP0, AT, TMP0
3666 | addiu TMP0, TMP0, 1
3667 | dsll TMP0, TMP0, 47
3668 | not TMP0, TMP0
3669 | ins_next1
3670 | sd TMP0, 0(RA)
3671 | ins_next2
3672 break;
3673 case BC_UNM:
3674 | // RA = dst*8, RD = src*8
3675 | daddu RB, BASE, RD
3676 | ld CARG1, 0(RB)
3677 | daddu RA, BASE, RA
3678 | gettp CARG3, CARG1
3679 | bne CARG3, TISNUM, >2
3680 |. lui TMP1, 0x8000
3681 | sextw CARG1, CARG1
3682 | beq CARG1, TMP1, ->vmeta_unm // Meta handler deals with -2^31.
3683 |. negu CARG1, CARG1
3684 | zextw CARG1, CARG1
3685 | settp CARG1, TISNUM
3686 |1:
3687 | ins_next1
3688 | sd CARG1, 0(RA)
3689 | ins_next2
3690 |2:
3691 | sltiu AT, CARG3, LJ_TISNUM
3692 | beqz AT, ->vmeta_unm
3693 |. dsll TMP1, TMP1, 32
3694 | b <1
3695 |. xor CARG1, CARG1, TMP1
3696 break;
3697 case BC_LEN:
3698 | // RA = dst*8, RD = src*8
3699 | daddu CARG2, BASE, RD
3700 | daddu RA, BASE, RA
3701 | ld TMP0, 0(CARG2)
3702 | gettp TMP1, TMP0
3703 | daddiu AT, TMP1, -LJ_TSTR
3704 | bnez AT, >2
3705 |. cleartp STR:CARG1, TMP0
3706 | lw CRET1, STR:CARG1->len
3707 |1:
3708 | settp CRET1, TISNUM
3709 | ins_next1
3710 | sd CRET1, 0(RA)
3711 | ins_next2
3712 |2:
3713 | daddiu AT, TMP1, -LJ_TTAB
3714 | bnez AT, ->vmeta_len
3715 |. nop
3716#if LJ_52
3717 | ld TAB:TMP2, TAB:CARG1->metatable
3718 | bnez TAB:TMP2, >9
3719 |. nop
3720 |3:
3721#endif
3722 |->BC_LEN_Z:
3723 | load_got lj_tab_len
3724 | call_intern lj_tab_len // (GCtab *t)
3725 |. nop
3726 | // Returns uint32_t (but less than 2^31).
3727 | b <1
3728 |. nop
3729#if LJ_52
3730 |9:
3731 | lbu TMP0, TAB:TMP2->nomm
3732 | andi TMP0, TMP0, 1<<MM_len
3733 | bnez TMP0, <3 // 'no __len' flag set: done.
3734 |. nop
3735 | b ->vmeta_len
3736 |. nop
3737#endif
3738 break;
3739
3740 /* -- Binary ops -------------------------------------------------------- */
3741
3742 |.macro fpmod, a, b, c
3743 | bal ->vm_floor // floor(b/c)
3744 |. div.d FARG1, b, c
3745 | mul.d a, FRET1, c
3746 | sub.d a, b, a // b - floor(b/c)*c
3747 |.endmacro
3748
3749 |.macro sfpmod
3750 | daddiu sp, sp, -16
3751 |
3752 | load_got __divdf3
3753 | sd CARG1, 0(sp)
3754 | call_extern
3755 |. sd CARG2, 8(sp)
3756 |
3757 | load_got floor
3758 | call_extern
3759 |. move CARG1, CRET1
3760 |
3761 | load_got __muldf3
3762 | move CARG1, CRET1
3763 | call_extern
3764 |. ld CARG2, 8(sp)
3765 |
3766 | load_got __subdf3
3767 | ld CARG1, 0(sp)
3768 | call_extern
3769 |. move CARG2, CRET1
3770 |
3771 | daddiu sp, sp, 16
3772 |.endmacro
3773
3774 |.macro ins_arithpre, label
3775 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3776 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
3777 ||switch (vk) {
3778 ||case 0:
3779 | decode_RB8a RB, INS
3780 | decode_RB8b RB
3781 | decode_RDtoRC8 RC, RD
3782 | // RA = dst*8, RB = src1*8, RC = num_const*8
3783 | daddu RB, BASE, RB
3784 |.if "label" ~= "none"
3785 | b label
3786 |.endif
3787 |. daddu RC, KBASE, RC
3788 || break;
3789 ||case 1:
3790 | decode_RB8a RC, INS
3791 | decode_RB8b RC
3792 | decode_RDtoRC8 RB, RD
3793 | // RA = dst*8, RB = num_const*8, RC = src1*8
3794 | daddu RC, BASE, RC
3795 |.if "label" ~= "none"
3796 | b label
3797 |.endif
3798 |. daddu RB, KBASE, RB
3799 || break;
3800 ||default:
3801 | decode_RB8a RB, INS
3802 | decode_RB8b RB
3803 | decode_RDtoRC8 RC, RD
3804 | // RA = dst*8, RB = src1*8, RC = src2*8
3805 | daddu RB, BASE, RB
3806 |.if "label" ~= "none"
3807 | b label
3808 |.endif
3809 |. daddu RC, BASE, RC
3810 || break;
3811 ||}
3812 |.endmacro
3813 |
3814 |.macro ins_arith, intins, fpins, fpcall, label
3815 | ins_arithpre none
3816 |
3817 |.if "label" ~= "none"
3818 |label:
3819 |.endif
3820 |
3821 |// Used in 5.
3822 | ld CARG1, 0(RB)
3823 | ld CARG2, 0(RC)
3824 | gettp TMP0, CARG1
3825 | gettp TMP1, CARG2
3826 |
3827 |.if "intins" ~= "div"
3828 |
3829 | // Check for two integers.
3830 | sextw CARG3, CARG1
3831 | bne TMP0, TISNUM, >5
3832 |. sextw CARG4, CARG2
3833 | bne TMP1, TISNUM, >5
3834 |
3835 |.if "intins" == "addu"
3836 |. intins CRET1, CARG3, CARG4
3837 | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow.
3838 | xor TMP2, CRET1, CARG4
3839 | and TMP1, TMP1, TMP2
3840 | bltz TMP1, ->vmeta_arith
3841 |. daddu RA, BASE, RA
3842 |.elif "intins" == "subu"
3843 |. intins CRET1, CARG3, CARG4
3844 | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow.
3845 | xor TMP2, CARG3, CARG4
3846 | and TMP1, TMP1, TMP2
3847 | bltz TMP1, ->vmeta_arith
3848 |. daddu RA, BASE, RA
3849 |.elif "intins" == "mult"
3850 |.if MIPSR6
3851 |. nop
3852 | mul CRET1, CARG3, CARG4
3853 | muh TMP2, CARG3, CARG4
3854 |.else
3855 |. intins CARG3, CARG4
3856 | mflo CRET1
3857 | mfhi TMP2
3858 |.endif
3859 | sra TMP1, CRET1, 31
3860 | bne TMP1, TMP2, ->vmeta_arith
3861 |. daddu RA, BASE, RA
3862 |.else
3863 |. load_got lj_vm_modi
3864 | beqz CARG4, ->vmeta_arith
3865 |. daddu RA, BASE, RA
3866 | move CARG1, CARG3
3867 | call_extern
3868 |. move CARG2, CARG4
3869 |.endif
3870 |
3871 | zextw CRET1, CRET1
3872 | settp CRET1, TISNUM
3873 | ins_next1
3874 | sd CRET1, 0(RA)
3875 |3:
3876 | ins_next2
3877 |
3878 |.endif
3879 |
3880 |5: // Check for two numbers.
3881 | .FPU ldc1 FTMP0, 0(RB)
3882 | sltu AT, TMP0, TISNUM
3883 | sltu TMP0, TMP1, TISNUM
3884 | .FPU ldc1 FTMP2, 0(RC)
3885 | and AT, AT, TMP0
3886 | beqz AT, ->vmeta_arith
3887 |. daddu RA, BASE, RA
3888 |
3889 |.if FPU
3890 | fpins FRET1, FTMP0, FTMP2
3891 |.elif "fpcall" == "sfpmod"
3892 | sfpmod
3893 |.else
3894 | load_got fpcall
3895 | call_extern
3896 |. nop
3897 |.endif
3898 |
3899 | ins_next1
3900 |.if "intins" ~= "div"
3901 | b <3
3902 |.endif
3903 |.if FPU
3904 |. sdc1 FRET1, 0(RA)
3905 |.else
3906 |. sd CRET1, 0(RA)
3907 |.endif
3908 |.if "intins" == "div"
3909 | ins_next2
3910 |.endif
3911 |
3912 |.endmacro
3913
3914 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
3915 | ins_arith addu, add.d, __adddf3, none
3916 break;
3917 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
3918 | ins_arith subu, sub.d, __subdf3, none
3919 break;
3920 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3921 | ins_arith mult, mul.d, __muldf3, none
3922 break;
3923 case BC_DIVVN:
3924 | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z
3925 break;
3926 case BC_DIVNV: case BC_DIVVV:
3927 | ins_arithpre ->BC_DIVVN_Z
3928 break;
3929 case BC_MODVN:
3930 | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z
3931 break;
3932 case BC_MODNV: case BC_MODVV:
3933 | ins_arithpre ->BC_MODVN_Z
3934 break;
3935 case BC_POW:
3936 | ins_arithpre none
3937 | ld CARG1, 0(RB)
3938 | ld CARG2, 0(RC)
3939 | gettp TMP0, CARG1
3940 | gettp TMP1, CARG2
3941 | sltiu TMP0, TMP0, LJ_TISNUM
3942 | sltiu TMP1, TMP1, LJ_TISNUM
3943 | and AT, TMP0, TMP1
3944 | load_got pow
3945 | beqz AT, ->vmeta_arith
3946 |. daddu RA, BASE, RA
3947 |.if FPU
3948 | ldc1 FARG1, 0(RB)
3949 | ldc1 FARG2, 0(RC)
3950 |.endif
3951 | call_extern
3952 |. nop
3953 | ins_next1
3954 |.if FPU
3955 | sdc1 FRET1, 0(RA)
3956 |.else
3957 | sd CRET1, 0(RA)
3958 |.endif
3959 | ins_next2
3960 break;
3961
3962 case BC_CAT:
3963 | // RA = dst*8, RB = src_start*8, RC = src_end*8
3964 | decode_RB8a RB, INS
3965 | decode_RB8b RB
3966 | decode_RDtoRC8 RC, RD
3967 | dsubu CARG3, RC, RB
3968 | sd BASE, L->base
3969 | daddu CARG2, BASE, RC
3970 | move MULTRES, RB
3971 |->BC_CAT_Z:
3972 | load_got lj_meta_cat
3973 | srl CARG3, CARG3, 3
3974 | sd PC, SAVE_PC
3975 | call_intern lj_meta_cat // (lua_State *L, TValue *top, int left)
3976 |. move CARG1, L
3977 | // Returns NULL (finished) or TValue * (metamethod).
3978 | bnez CRET1, ->vmeta_binop
3979 |. ld BASE, L->base
3980 | daddu RB, BASE, MULTRES
3981 | ld CRET1, 0(RB)
3982 | daddu RA, BASE, RA
3983 | ins_next1
3984 | sd CRET1, 0(RA)
3985 | ins_next2
3986 break;
3987
3988 /* -- Constant ops ------------------------------------------------------ */
3989
3990 case BC_KSTR:
3991 | // RA = dst*8, RD = str_const*8 (~)
3992 | dsubu TMP1, KBASE, RD
3993 | ins_next1
3994 | li TMP2, LJ_TSTR
3995 | ld TMP0, -8(TMP1) // KBASE-8-str_const*8
3996 | daddu RA, BASE, RA
3997 | settp TMP0, TMP2
3998 | sd TMP0, 0(RA)
3999 | ins_next2
4000 break;
4001 case BC_KCDATA:
4002 |.if FFI
4003 | // RA = dst*8, RD = cdata_const*8 (~)
4004 | dsubu TMP1, KBASE, RD
4005 | ins_next1
4006 | ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8
4007 | li TMP2, LJ_TCDATA
4008 | daddu RA, BASE, RA
4009 | settp TMP0, TMP2
4010 | sd TMP0, 0(RA)
4011 | ins_next2
4012 |.endif
4013 break;
4014 case BC_KSHORT:
4015 | // RA = dst*8, RD = int16_literal*8
4016 | sra RD, INS, 16
4017 | daddu RA, BASE, RA
4018 | zextw RD, RD
4019 | ins_next1
4020 | settp RD, TISNUM
4021 | sd RD, 0(RA)
4022 | ins_next2
4023 break;
4024 case BC_KNUM:
4025 | // RA = dst*8, RD = num_const*8
4026 | daddu RD, KBASE, RD
4027 | daddu RA, BASE, RA
4028 | ld CRET1, 0(RD)
4029 | ins_next1
4030 | sd CRET1, 0(RA)
4031 | ins_next2
4032 break;
4033 case BC_KPRI:
4034 | // RA = dst*8, RD = primitive_type*8 (~)
4035 | daddu RA, BASE, RA
4036 | dsll TMP0, RD, 44
4037 | not TMP0, TMP0
4038 | ins_next1
4039 | sd TMP0, 0(RA)
4040 | ins_next2
4041 break;
4042 case BC_KNIL:
4043 | // RA = base*8, RD = end*8
4044 | daddu RA, BASE, RA
4045 | sd TISNIL, 0(RA)
4046 | daddiu RA, RA, 8
4047 | daddu RD, BASE, RD
4048 |1:
4049 | sd TISNIL, 0(RA)
4050 | slt AT, RA, RD
4051 | bnez AT, <1
4052 |. daddiu RA, RA, 8
4053 | ins_next_
4054 break;
4055
4056 /* -- Upvalue and function ops ------------------------------------------ */
4057
4058 case BC_UGET:
4059 | // RA = dst*8, RD = uvnum*8
4060 | ld LFUNC:RB, FRAME_FUNC(BASE)
4061 | daddu RA, BASE, RA
4062 | cleartp LFUNC:RB
4063 | daddu RD, RD, LFUNC:RB
4064 | ld UPVAL:RB, LFUNC:RD->uvptr
4065 | ins_next1
4066 | ld TMP1, UPVAL:RB->v
4067 | ld CRET1, 0(TMP1)
4068 | sd CRET1, 0(RA)
4069 | ins_next2
4070 break;
4071 case BC_USETV:
4072 | // RA = uvnum*8, RD = src*8
4073 | ld LFUNC:RB, FRAME_FUNC(BASE)
4074 | daddu RD, BASE, RD
4075 | cleartp LFUNC:RB
4076 | daddu RA, RA, LFUNC:RB
4077 | ld UPVAL:RB, LFUNC:RA->uvptr
4078 | ld CRET1, 0(RD)
4079 | lbu TMP3, UPVAL:RB->marked
4080 | ld CARG2, UPVAL:RB->v
4081 | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
4082 | lbu TMP0, UPVAL:RB->closed
4083 | gettp TMP2, CRET1
4084 | sd CRET1, 0(CARG2)
4085 | li AT, LJ_GC_BLACK|1
4086 | or TMP3, TMP3, TMP0
4087 | beq TMP3, AT, >2 // Upvalue is closed and black?
4088 |. daddiu TMP2, TMP2, -(LJ_TNUMX+1)
4089 |1:
4090 | ins_next
4091 |
4092 |2: // Check if new value is collectable.
4093 | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
4094 | beqz AT, <1 // tvisgcv(v)
4095 |. cleartp GCOBJ:CRET1, CRET1
4096 | lbu TMP3, GCOBJ:CRET1->gch.marked
4097 | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
4098 | beqz TMP3, <1
4099 |. load_got lj_gc_barrieruv
4100 | // Crossed a write barrier. Move the barrier forward.
4101 | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv)
4102 |. daddiu CARG1, DISPATCH, GG_DISP2G
4103 | b <1
4104 |. nop
4105 break;
4106 case BC_USETS:
4107 | // RA = uvnum*8, RD = str_const*8 (~)
4108 | ld LFUNC:RB, FRAME_FUNC(BASE)
4109 | dsubu TMP1, KBASE, RD
4110 | cleartp LFUNC:RB
4111 | daddu RA, RA, LFUNC:RB
4112 | ld UPVAL:RB, LFUNC:RA->uvptr
4113 | ld STR:TMP1, -8(TMP1) // KBASE-8-str_const*8
4114 | lbu TMP2, UPVAL:RB->marked
4115 | ld CARG2, UPVAL:RB->v
4116 | lbu TMP3, STR:TMP1->marked
4117 | andi AT, TMP2, LJ_GC_BLACK // isblack(uv)
4118 | lbu TMP2, UPVAL:RB->closed
4119 | li TMP0, LJ_TSTR
4120 | settp TMP1, TMP0
4121 | bnez AT, >2
4122 |. sd TMP1, 0(CARG2)
4123 |1:
4124 | ins_next
4125 |
4126 |2: // Check if string is white and ensure upvalue is closed.
4127 | beqz TMP2, <1
4128 |. andi AT, TMP3, LJ_GC_WHITES // iswhite(str)
4129 | beqz AT, <1
4130 |. load_got lj_gc_barrieruv
4131 | // Crossed a write barrier. Move the barrier forward.
4132 | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv)
4133 |. daddiu CARG1, DISPATCH, GG_DISP2G
4134 | b <1
4135 |. nop
4136 break;
4137 case BC_USETN:
4138 | // RA = uvnum*8, RD = num_const*8
4139 | ld LFUNC:RB, FRAME_FUNC(BASE)
4140 | daddu RD, KBASE, RD
4141 | cleartp LFUNC:RB
4142 | daddu RA, RA, LFUNC:RB
4143 | ld UPVAL:RB, LFUNC:RA->uvptr
4144 | ld CRET1, 0(RD)
4145 | ld TMP1, UPVAL:RB->v
4146 | ins_next1
4147 | sd CRET1, 0(TMP1)
4148 | ins_next2
4149 break;
4150 case BC_USETP:
4151 | // RA = uvnum*8, RD = primitive_type*8 (~)
4152 | ld LFUNC:RB, FRAME_FUNC(BASE)
4153 | dsll TMP0, RD, 44
4154 | cleartp LFUNC:RB
4155 | daddu RA, RA, LFUNC:RB
4156 | not TMP0, TMP0
4157 | ld UPVAL:RB, LFUNC:RA->uvptr
4158 | ins_next1
4159 | ld TMP1, UPVAL:RB->v
4160 | sd TMP0, 0(TMP1)
4161 | ins_next2
4162 break;
4163
4164 case BC_UCLO:
4165 | // RA = level*8, RD = target
4166 | ld TMP2, L->openupval
4167 | branch_RD // Do this first since RD is not saved.
4168 | load_got lj_func_closeuv
4169 | sd BASE, L->base
4170 | beqz TMP2, >1
4171 |. move CARG1, L
4172 | call_intern lj_func_closeuv // (lua_State *L, TValue *level)
4173 |. daddu CARG2, BASE, RA
4174 | ld BASE, L->base
4175 |1:
4176 | ins_next
4177 break;
4178
4179 case BC_FNEW:
4180 | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype)
4181 | load_got lj_func_newL_gc
4182 | dsubu TMP1, KBASE, RD
4183 | ld CARG3, FRAME_FUNC(BASE)
4184 | ld CARG2, -8(TMP1) // KBASE-8-tab_const*8
4185 | sd BASE, L->base
4186 | sd PC, SAVE_PC
4187 | cleartp CARG3
4188 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
4189 | call_intern lj_func_newL_gc
4190 |. move CARG1, L
4191 | // Returns GCfuncL *.
4192 | li TMP0, LJ_TFUNC
4193 | ld BASE, L->base
4194 | ins_next1
4195 | settp CRET1, TMP0
4196 | daddu RA, BASE, RA
4197 | sd CRET1, 0(RA)
4198 | ins_next2
4199 break;
4200
4201 /* -- Table ops --------------------------------------------------------- */
4202
4203 case BC_TNEW:
4204 case BC_TDUP:
4205 | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~)
4206 | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH)
4207 | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
4208 | sd BASE, L->base
4209 | sd PC, SAVE_PC
4210 | sltu AT, TMP0, TMP1
4211 | beqz AT, >5
4212 |1:
4213 if (op == BC_TNEW) {
4214 | load_got lj_tab_new
4215 | srl CARG2, RD, 3
4216 | andi CARG2, CARG2, 0x7ff
4217 | li TMP0, 0x801
4218 | addiu AT, CARG2, -0x7ff
4219 | srl CARG3, RD, 14
4220 |.if MIPSR6
4221 | seleqz TMP0, TMP0, AT
4222 | selnez CARG2, CARG2, AT
4223 | or CARG2, CARG2, TMP0
4224 |.else
4225 | movz CARG2, TMP0, AT
4226 |.endif
4227 | // (lua_State *L, int32_t asize, uint32_t hbits)
4228 | call_intern lj_tab_new
4229 |. move CARG1, L
4230 | // Returns Table *.
4231 } else {
4232 | load_got lj_tab_dup
4233 | dsubu TMP1, KBASE, RD
4234 | move CARG1, L
4235 | call_intern lj_tab_dup // (lua_State *L, Table *kt)
4236 |. ld CARG2, -8(TMP1) // KBASE-8-str_const*8
4237 | // Returns Table *.
4238 }
4239 | li TMP0, LJ_TTAB
4240 | ld BASE, L->base
4241 | ins_next1
4242 | daddu RA, BASE, RA
4243 | settp CRET1, TMP0
4244 | sd CRET1, 0(RA)
4245 | ins_next2
4246 |5:
4247 | load_got lj_gc_step_fixtop
4248 | move MULTRES, RD
4249 | call_intern lj_gc_step_fixtop // (lua_State *L)
4250 |. move CARG1, L
4251 | b <1
4252 |. move RD, MULTRES
4253 break;
4254
4255 case BC_GGET:
4256 | // RA = dst*8, RD = str_const*8 (~)
4257 case BC_GSET:
4258 | // RA = src*8, RD = str_const*8 (~)
4259 | ld LFUNC:TMP2, FRAME_FUNC(BASE)
4260 | dsubu TMP1, KBASE, RD
4261 | ld STR:RC, -8(TMP1) // KBASE-8-str_const*8
4262 | cleartp LFUNC:TMP2
4263 | ld TAB:RB, LFUNC:TMP2->env
4264 if (op == BC_GGET) {
4265 | b ->BC_TGETS_Z
4266 } else {
4267 | b ->BC_TSETS_Z
4268 }
4269 |. daddu RA, BASE, RA
4270 break;
4271
4272 case BC_TGETV:
4273 | // RA = dst*8, RB = table*8, RC = key*8
4274 | decode_RB8a RB, INS
4275 | decode_RB8b RB
4276 | decode_RDtoRC8 RC, RD
4277 | daddu CARG2, BASE, RB
4278 | daddu CARG3, BASE, RC
4279 | ld TAB:RB, 0(CARG2)
4280 | ld TMP2, 0(CARG3)
4281 | daddu RA, BASE, RA
4282 | checktab TAB:RB, ->vmeta_tgetv
4283 | gettp TMP3, TMP2
4284 | bne TMP3, TISNUM, >5 // Integer key?
4285 |. lw TMP0, TAB:RB->asize
4286 | sextw TMP2, TMP2
4287 | ld TMP1, TAB:RB->array
4288 | sltu AT, TMP2, TMP0
4289 | sll TMP2, TMP2, 3
4290 | beqz AT, ->vmeta_tgetv // Integer key and in array part?
4291 |. daddu TMP2, TMP1, TMP2
4292 | ld AT, 0(TMP2)
4293 | beq AT, TISNIL, >2
4294 |. ld CRET1, 0(TMP2)
4295 |1:
4296 | ins_next1
4297 | sd CRET1, 0(RA)
4298 | ins_next2
4299 |
4300 |2: // Check for __index if table value is nil.
4301 | ld TAB:TMP2, TAB:RB->metatable
4302 | beqz TAB:TMP2, <1 // No metatable: done.
4303 |. nop
4304 | lbu TMP0, TAB:TMP2->nomm
4305 | andi TMP0, TMP0, 1<<MM_index
4306 | bnez TMP0, <1 // 'no __index' flag set: done.
4307 |. nop
4308 | b ->vmeta_tgetv
4309 |. nop
4310 |
4311 |5:
4312 | li AT, LJ_TSTR
4313 | bne TMP3, AT, ->vmeta_tgetv
4314 |. cleartp RC, TMP2
4315 | b ->BC_TGETS_Z // String key?
4316 |. nop
4317 break;
4318 case BC_TGETS:
4319 | // RA = dst*8, RB = table*8, RC = str_const*8 (~)
4320 | decode_RB8a RB, INS
4321 | decode_RB8b RB
4322 | decode_RC8a RC, INS
4323 | daddu CARG2, BASE, RB
4324 | decode_RC8b RC
4325 | ld TAB:RB, 0(CARG2)
4326 | dsubu CARG3, KBASE, RC
4327 | daddu RA, BASE, RA
4328 | ld STR:RC, -8(CARG3) // KBASE-8-str_const*8
4329 | checktab TAB:RB, ->vmeta_tgets1
4330 |->BC_TGETS_Z:
4331 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
4332 | lw TMP0, TAB:RB->hmask
4333 | lw TMP1, STR:RC->sid
4334 | ld NODE:TMP2, TAB:RB->node
4335 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
4336 | sll TMP0, TMP1, 5
4337 | sll TMP1, TMP1, 3
4338 | subu TMP1, TMP0, TMP1
4339 | li TMP3, LJ_TSTR
4340 | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
4341 | settp STR:RC, TMP3 // Tagged key to look for.
4342 |1:
4343 | ld CARG1, NODE:TMP2->key
4344 | ld CRET1, NODE:TMP2->val
4345 | ld NODE:TMP1, NODE:TMP2->next
4346 | bne CARG1, RC, >4
4347 |. ld TAB:TMP3, TAB:RB->metatable
4348 | beq CRET1, TISNIL, >5 // Key found, but nil value?
4349 |. nop
4350 |3:
4351 | ins_next1
4352 | sd CRET1, 0(RA)
4353 | ins_next2
4354 |
4355 |4: // Follow hash chain.
4356 | bnez NODE:TMP1, <1
4357 |. move NODE:TMP2, NODE:TMP1
4358 | // End of hash chain: key not found, nil result.
4359 |
4360 |5: // Check for __index if table value is nil.
4361 | beqz TAB:TMP3, <3 // No metatable: done.
4362 |. move CRET1, TISNIL
4363 | lbu TMP0, TAB:TMP3->nomm
4364 | andi TMP0, TMP0, 1<<MM_index
4365 | bnez TMP0, <3 // 'no __index' flag set: done.
4366 |. nop
4367 | b ->vmeta_tgets
4368 |. nop
4369 break;
4370 case BC_TGETB:
4371 | // RA = dst*8, RB = table*8, RC = index*8
4372 | decode_RB8a RB, INS
4373 | decode_RB8b RB
4374 | daddu CARG2, BASE, RB
4375 | decode_RDtoRC8 RC, RD
4376 | ld TAB:RB, 0(CARG2)
4377 | daddu RA, BASE, RA
4378 | srl TMP0, RC, 3
4379 | checktab TAB:RB, ->vmeta_tgetb
4380 | lw TMP1, TAB:RB->asize
4381 | ld TMP2, TAB:RB->array
4382 | sltu AT, TMP0, TMP1
4383 | beqz AT, ->vmeta_tgetb
4384 |. daddu RC, TMP2, RC
4385 | ld AT, 0(RC)
4386 | beq AT, TISNIL, >5
4387 |. ld CRET1, 0(RC)
4388 |1:
4389 | ins_next1
4390 | sd CRET1, 0(RA)
4391 | ins_next2
4392 |
4393 |5: // Check for __index if table value is nil.
4394 | ld TAB:TMP2, TAB:RB->metatable
4395 | beqz TAB:TMP2, <1 // No metatable: done.
4396 |. nop
4397 | lbu TMP1, TAB:TMP2->nomm
4398 | andi TMP1, TMP1, 1<<MM_index
4399 | bnez TMP1, <1 // 'no __index' flag set: done.
4400 |. nop
4401 | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2!
4402 |. nop
4403 break;
4404 case BC_TGETR:
4405 | // RA = dst*8, RB = table*8, RC = key*8
4406 | decode_RB8a RB, INS
4407 | decode_RB8b RB
4408 | decode_RDtoRC8 RC, RD
4409 | daddu RB, BASE, RB
4410 | daddu RC, BASE, RC
4411 | ld TAB:CARG1, 0(RB)
4412 | lw CARG2, LO(RC)
4413 | daddu RA, BASE, RA
4414 | cleartp TAB:CARG1
4415 | lw TMP0, TAB:CARG1->asize
4416 | ld TMP1, TAB:CARG1->array
4417 | sltu AT, CARG2, TMP0
4418 | sll TMP2, CARG2, 3
4419 | beqz AT, ->vmeta_tgetr // In array part?
4420 |. daddu CRET1, TMP1, TMP2
4421 | ld CARG2, 0(CRET1)
4422 |->BC_TGETR_Z:
4423 | ins_next1
4424 | sd CARG2, 0(RA)
4425 | ins_next2
4426 break;
4427
4428 case BC_TSETV:
4429 | // RA = src*8, RB = table*8, RC = key*8
4430 | decode_RB8a RB, INS
4431 | decode_RB8b RB
4432 | decode_RDtoRC8 RC, RD
4433 | daddu CARG2, BASE, RB
4434 | daddu CARG3, BASE, RC
4435 | ld RB, 0(CARG2)
4436 | ld TMP2, 0(CARG3)
4437 | daddu RA, BASE, RA
4438 | checktab RB, ->vmeta_tsetv
4439 | checkint TMP2, >5
4440 |. sextw RC, TMP2
4441 | lw TMP0, TAB:RB->asize
4442 | ld TMP1, TAB:RB->array
4443 | sltu AT, RC, TMP0
4444 | sll TMP2, RC, 3
4445 | beqz AT, ->vmeta_tsetv // Integer key and in array part?
4446 |. daddu TMP1, TMP1, TMP2
4447 | ld TMP0, 0(TMP1)
4448 | lbu TMP3, TAB:RB->marked
4449 | beq TMP0, TISNIL, >3
4450 |. ld CRET1, 0(RA)
4451 |1:
4452 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4453 | bnez AT, >7
4454 |. sd CRET1, 0(TMP1)
4455 |2:
4456 | ins_next
4457 |
4458 |3: // Check for __newindex if previous value is nil.
4459 | ld TAB:TMP2, TAB:RB->metatable
4460 | beqz TAB:TMP2, <1 // No metatable: done.
4461 |. nop
4462 | lbu TMP2, TAB:TMP2->nomm
4463 | andi TMP2, TMP2, 1<<MM_newindex
4464 | bnez TMP2, <1 // 'no __newindex' flag set: done.
4465 |. nop
4466 | b ->vmeta_tsetv
4467 |. nop
4468 |
4469 |5:
4470 | gettp AT, TMP2
4471 | daddiu AT, AT, -LJ_TSTR
4472 | bnez AT, ->vmeta_tsetv
4473 |. nop
4474 | b ->BC_TSETS_Z // String key?
4475 |. cleartp STR:RC, TMP2
4476 |
4477 |7: // Possible table write barrier for the value. Skip valiswhite check.
4478 | barrierback TAB:RB, TMP3, TMP0, <2
4479 break;
4480 case BC_TSETS:
4481 | // RA = src*8, RB = table*8, RC = str_const*8 (~)
4482 | decode_RB8a RB, INS
4483 | decode_RB8b RB
4484 | daddu CARG2, BASE, RB
4485 | decode_RC8a RC, INS
4486 | ld TAB:RB, 0(CARG2)
4487 | decode_RC8b RC
4488 | dsubu CARG3, KBASE, RC
4489 | ld RC, -8(CARG3) // KBASE-8-str_const*8
4490 | daddu RA, BASE, RA
4491 | cleartp STR:RC
4492 | checktab TAB:RB, ->vmeta_tsets1
4493 |->BC_TSETS_Z:
4494 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8
4495 | lw TMP0, TAB:RB->hmask
4496 | lw TMP1, STR:RC->sid
4497 | ld NODE:TMP2, TAB:RB->node
4498 | sb r0, TAB:RB->nomm // Clear metamethod cache.
4499 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
4500 | sll TMP0, TMP1, 5
4501 | sll TMP1, TMP1, 3
4502 | subu TMP1, TMP0, TMP1
4503 | li TMP3, LJ_TSTR
4504 | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
4505 | settp STR:RC, TMP3 // Tagged key to look for.
4506 |.if FPU
4507 | ldc1 FTMP0, 0(RA)
4508 |.else
4509 | ld CRET1, 0(RA)
4510 |.endif
4511 |1:
4512 | ld TMP0, NODE:TMP2->key
4513 | ld CARG2, NODE:TMP2->val
4514 | ld NODE:TMP1, NODE:TMP2->next
4515 | bne TMP0, RC, >5
4516 |. lbu TMP3, TAB:RB->marked
4517 | beq CARG2, TISNIL, >4 // Key found, but nil value?
4518 |. ld TAB:TMP0, TAB:RB->metatable
4519 |2:
4520 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4521 | bnez AT, >7
4522 |.if FPU
4523 |. sdc1 FTMP0, NODE:TMP2->val
4524 |.else
4525 |. sd CRET1, NODE:TMP2->val
4526 |.endif
4527 |3:
4528 | ins_next
4529 |
4530 |4: // Check for __newindex if previous value is nil.
4531 | beqz TAB:TMP0, <2 // No metatable: done.
4532 |. nop
4533 | lbu TMP0, TAB:TMP0->nomm
4534 | andi TMP0, TMP0, 1<<MM_newindex
4535 | bnez TMP0, <2 // 'no __newindex' flag set: done.
4536 |. nop
4537 | b ->vmeta_tsets
4538 |. nop
4539 |
4540 |5: // Follow hash chain.
4541 | bnez NODE:TMP1, <1
4542 |. move NODE:TMP2, NODE:TMP1
4543 | // End of hash chain: key not found, add a new one
4544 |
4545 | // But check for __newindex first.
4546 | ld TAB:TMP2, TAB:RB->metatable
4547 | beqz TAB:TMP2, >6 // No metatable: continue.
4548 |. daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
4549 | lbu TMP0, TAB:TMP2->nomm
4550 | andi TMP0, TMP0, 1<<MM_newindex
4551 | beqz TMP0, ->vmeta_tsets // 'no __newindex' flag NOT set: check.
4552 |6:
4553 | load_got lj_tab_newkey
4554 | sd RC, 0(CARG3)
4555 | sd BASE, L->base
4556 | move CARG2, TAB:RB
4557 | sd PC, SAVE_PC
4558 | call_intern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k
4559 |. move CARG1, L
4560 | // Returns TValue *.
4561 | ld BASE, L->base
4562 |.if FPU
4563 | b <3 // No 2nd write barrier needed.
4564 |. sdc1 FTMP0, 0(CRET1)
4565 |.else
4566 | ld CARG1, 0(RA)
4567 | b <3 // No 2nd write barrier needed.
4568 |. sd CARG1, 0(CRET1)
4569 |.endif
4570 |
4571 |7: // Possible table write barrier for the value. Skip valiswhite check.
4572 | barrierback TAB:RB, TMP3, TMP0, <3
4573 break;
4574 case BC_TSETB:
4575 | // RA = src*8, RB = table*8, RC = index*8
4576 | decode_RB8a RB, INS
4577 | decode_RB8b RB
4578 | daddu CARG2, BASE, RB
4579 | decode_RDtoRC8 RC, RD
4580 | ld TAB:RB, 0(CARG2)
4581 | daddu RA, BASE, RA
4582 | srl TMP0, RC, 3
4583 | checktab RB, ->vmeta_tsetb
4584 | lw TMP1, TAB:RB->asize
4585 | ld TMP2, TAB:RB->array
4586 | sltu AT, TMP0, TMP1
4587 | beqz AT, ->vmeta_tsetb
4588 |. daddu RC, TMP2, RC
4589 | ld TMP1, 0(RC)
4590 | lbu TMP3, TAB:RB->marked
4591 | beq TMP1, TISNIL, >5
4592 |1:
4593 |. ld CRET1, 0(RA)
4594 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4595 | bnez AT, >7
4596 |. sd CRET1, 0(RC)
4597 |2:
4598 | ins_next
4599 |
4600 |5: // Check for __newindex if previous value is nil.
4601 | ld TAB:TMP2, TAB:RB->metatable
4602 | beqz TAB:TMP2, <1 // No metatable: done.
4603 |. nop
4604 | lbu TMP1, TAB:TMP2->nomm
4605 | andi TMP1, TMP1, 1<<MM_newindex
4606 | bnez TMP1, <1 // 'no __newindex' flag set: done.
4607 |. nop
4608 | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2!
4609 |. nop
4610 |
4611 |7: // Possible table write barrier for the value. Skip valiswhite check.
4612 | barrierback TAB:RB, TMP3, TMP0, <2
4613 break;
4614 case BC_TSETR:
4615 | // RA = dst*8, RB = table*8, RC = key*8
4616 | decode_RB8a RB, INS
4617 | decode_RB8b RB
4618 | decode_RDtoRC8 RC, RD
4619 | daddu CARG1, BASE, RB
4620 | daddu CARG3, BASE, RC
4621 | ld TAB:CARG2, 0(CARG1)
4622 | lw CARG3, LO(CARG3)
4623 | cleartp TAB:CARG2
4624 | lbu TMP3, TAB:CARG2->marked
4625 | lw TMP0, TAB:CARG2->asize
4626 | ld TMP1, TAB:CARG2->array
4627 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4628 | bnez AT, >7
4629 |. daddu RA, BASE, RA
4630 |2:
4631 | sltu AT, CARG3, TMP0
4632 | sll TMP2, CARG3, 3
4633 | beqz AT, ->vmeta_tsetr // In array part?
4634 |. daddu CRET1, TMP1, TMP2
4635 |->BC_TSETR_Z:
4636 | ld CARG1, 0(RA)
4637 | ins_next1
4638 | sd CARG1, 0(CRET1)
4639 | ins_next2
4640 |
4641 |7: // Possible table write barrier for the value. Skip valiswhite check.
4642 | barrierback TAB:CARG2, TMP3, CRET1, <2
4643 break;
4644
4645 case BC_TSETM:
4646 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
4647 | daddu RA, BASE, RA
4648 |1:
4649 | daddu TMP3, KBASE, RD
4650 | ld TAB:CARG2, -8(RA) // Guaranteed to be a table.
4651 | addiu TMP0, MULTRES, -8
4652 | lw TMP3, LO(TMP3) // Integer constant is in lo-word.
4653 | beqz TMP0, >4 // Nothing to copy?
4654 |. srl CARG3, TMP0, 3
4655 | cleartp CARG2
4656 | addu CARG3, CARG3, TMP3
4657 | lw TMP2, TAB:CARG2->asize
4658 | sll TMP1, TMP3, 3
4659 | lbu TMP3, TAB:CARG2->marked
4660 | ld CARG1, TAB:CARG2->array
4661 | sltu AT, TMP2, CARG3
4662 | bnez AT, >5
4663 |. daddu TMP2, RA, TMP0
4664 | daddu TMP1, TMP1, CARG1
4665 | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4666 |3: // Copy result slots to table.
4667 | ld CRET1, 0(RA)
4668 | daddiu RA, RA, 8
4669 | sltu AT, RA, TMP2
4670 | sd CRET1, 0(TMP1)
4671 | bnez AT, <3
4672 |. daddiu TMP1, TMP1, 8
4673 | bnez TMP0, >7
4674 |. nop
4675 |4:
4676 | ins_next
4677 |
4678 |5: // Need to resize array part.
4679 | load_got lj_tab_reasize
4680 | sd BASE, L->base
4681 | sd PC, SAVE_PC
4682 | move BASE, RD
4683 | call_intern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
4684 |. move CARG1, L
4685 | // Must not reallocate the stack.
4686 | move RD, BASE
4687 | b <1
4688 |. ld BASE, L->base // Reload BASE for lack of a saved register.
4689 |
4690 |7: // Possible table write barrier for any value. Skip valiswhite check.
4691 | barrierback TAB:CARG2, TMP3, TMP0, <4
4692 break;
4693
4694 /* -- Calls and vararg handling ----------------------------------------- */
4695
4696 case BC_CALLM:
4697 | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8
4698 | decode_RDtoRC8 NARGS8:RC, RD
4699 | b ->BC_CALL_Z
4700 |. addu NARGS8:RC, NARGS8:RC, MULTRES
4701 break;
4702 case BC_CALL:
4703 | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
4704 | decode_RDtoRC8 NARGS8:RC, RD
4705 |->BC_CALL_Z:
4706 | move TMP2, BASE
4707 | daddu BASE, BASE, RA
4708 | ld LFUNC:RB, 0(BASE)
4709 | daddiu BASE, BASE, 16
4710 | addiu NARGS8:RC, NARGS8:RC, -8
4711 | checkfunc RB, ->vmeta_call
4712 | ins_call
4713 break;
4714
4715 case BC_CALLMT:
4716 | // RA = base*8, (RB = 0,) RC = extra_nargs*8
4717 | addu NARGS8:RD, NARGS8:RD, MULTRES // BC_CALLT gets RC from RD.
4718 | // Fall through. Assumes BC_CALLT follows.
4719 break;
4720 case BC_CALLT:
4721 | // RA = base*8, (RB = 0,) RC = (nargs+1)*8
4722 | daddu RA, BASE, RA
4723 | ld RB, 0(RA)
4724 | move NARGS8:RC, RD
4725 | ld TMP1, FRAME_PC(BASE)
4726 | daddiu RA, RA, 16
4727 | addiu NARGS8:RC, NARGS8:RC, -8
4728 | checktp CARG3, RB, -LJ_TFUNC, ->vmeta_callt
4729 |->BC_CALLT_Z:
4730 | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'.
4731 | lbu TMP3, LFUNC:CARG3->ffid
4732 | bnez TMP0, >7
4733 |. xori TMP2, TMP1, FRAME_VARG
4734 |1:
4735 | sd RB, FRAME_FUNC(BASE) // Copy function down, but keep PC.
4736 | sltiu AT, TMP3, 2 // (> FF_C) Calling a fast function?
4737 | move TMP2, BASE
4738 | move RB, CARG3
4739 | beqz NARGS8:RC, >3
4740 |. move TMP3, NARGS8:RC
4741 |2:
4742 | ld CRET1, 0(RA)
4743 | daddiu RA, RA, 8
4744 | addiu TMP3, TMP3, -8
4745 | sd CRET1, 0(TMP2)
4746 | bnez TMP3, <2
4747 |. daddiu TMP2, TMP2, 8
4748 |3:
4749 | or TMP0, TMP0, AT
4750 | beqz TMP0, >5
4751 |. nop
4752 |4:
4753 | ins_callt
4754 |
4755 |5: // Tailcall to a fast function with a Lua frame below.
4756 | lw INS, -4(TMP1)
4757 | decode_RA8a RA, INS
4758 | decode_RA8b RA
4759 | dsubu TMP1, BASE, RA
4760 | ld TMP1, -32(TMP1)
4761 | cleartp LFUNC:TMP1
4762 | ld TMP1, LFUNC:TMP1->pc
4763 | b <4
4764 |. ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE.
4765 |
4766 |7: // Tailcall from a vararg function.
4767 | andi AT, TMP2, FRAME_TYPEP
4768 | bnez AT, <1 // Vararg frame below?
4769 |. dsubu TMP2, BASE, TMP2 // Relocate BASE down.
4770 | move BASE, TMP2
4771 | ld TMP1, FRAME_PC(TMP2)
4772 | b <1
4773 |. andi TMP0, TMP1, FRAME_TYPE
4774 break;
4775
4776 case BC_ITERC:
4777 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
4778 | move TMP2, BASE // Save old BASE fir vmeta_call.
4779 | daddu BASE, BASE, RA
4780 | ld RB, -24(BASE)
4781 | ld CARG1, -16(BASE)
4782 | ld CARG2, -8(BASE)
4783 | li NARGS8:RC, 16 // Iterators get 2 arguments.
4784 | sd RB, 0(BASE) // Copy callable.
4785 | sd CARG1, 16(BASE) // Copy state.
4786 | sd CARG2, 24(BASE) // Copy control var.
4787 | daddiu BASE, BASE, 16
4788 | checkfunc RB, ->vmeta_call
4789 | ins_call
4790 break;
4791
4792 case BC_ITERN:
4793 |.if JIT and ENDIAN_LE
4794 | hotloop
4795 |.endif
4796 |->vm_IITERN:
4797 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
4798 | daddu RA, BASE, RA
4799 | ld TAB:RB, -16(RA)
4800 | lw RC, -8+LO(RA) // Get index from control var.
4801 | cleartp TAB:RB
4802 | daddiu PC, PC, 4
4803 | lw TMP0, TAB:RB->asize
4804 | ld TMP1, TAB:RB->array
4805 | dsll CARG3, TISNUM, 47
4806 |1: // Traverse array part.
4807 | sltu AT, RC, TMP0
4808 | beqz AT, >5 // Index points after array part?
4809 |. sll TMP3, RC, 3
4810 | daddu TMP3, TMP1, TMP3
4811 | ld CARG1, 0(TMP3)
4812 | lhu RD, -4+OFS_RD(PC)
4813 | or TMP2, RC, CARG3
4814 | beq CARG1, TISNIL, <1 // Skip holes in array part.
4815 |. addiu RC, RC, 1
4816 | sd TMP2, 0(RA)
4817 | sd CARG1, 8(RA)
4818 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
4819 | decode_RD4b RD
4820 | daddu RD, RD, TMP3
4821 | sw RC, -8+LO(RA) // Update control var.
4822 | daddu PC, PC, RD
4823 |3:
4824 | ins_next
4825 |
4826 |5: // Traverse hash part.
4827 | lw TMP1, TAB:RB->hmask
4828 | subu RC, RC, TMP0
4829 | ld TMP2, TAB:RB->node
4830 |6:
4831 | sltu AT, TMP1, RC // End of iteration? Branch to ITERL+1.
4832 | bnez AT, <3
4833 |. sll TMP3, RC, 5
4834 | sll RB, RC, 3
4835 | subu TMP3, TMP3, RB
4836 | daddu NODE:TMP3, TMP3, TMP2
4837 | ld CARG1, 0(NODE:TMP3)
4838 | lhu RD, -4+OFS_RD(PC)
4839 | beq CARG1, TISNIL, <6 // Skip holes in hash part.
4840 |. addiu RC, RC, 1
4841 | ld CARG2, NODE:TMP3->key
4842 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
4843 | sd CARG1, 8(RA)
4844 | addu RC, RC, TMP0
4845 | decode_RD4b RD
4846 | addu RD, RD, TMP3
4847 | sd CARG2, 0(RA)
4848 | daddu PC, PC, RD
4849 | b <3
4850 |. sw RC, -8+LO(RA) // Update control var.
4851 break;
4852
4853 case BC_ISNEXT:
4854 | // RA = base*8, RD = target (points to ITERN)
4855 | daddu RA, BASE, RA
4856 | srl TMP0, RD, 1
4857 | ld CFUNC:CARG1, -24(RA)
4858 | daddu TMP0, PC, TMP0
4859 | ld CARG2, -16(RA)
4860 | ld CARG3, -8(RA)
4861 | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
4862 | checkfunc CFUNC:CARG1, >5
4863 | gettp CARG2, CARG2
4864 | daddiu CARG2, CARG2, -LJ_TTAB
4865 | lbu TMP1, CFUNC:CARG1->ffid
4866 | daddiu CARG3, CARG3, -LJ_TNIL
4867 | or AT, CARG2, CARG3
4868 | daddiu TMP1, TMP1, -FF_next_N
4869 | or AT, AT, TMP1
4870 | bnez AT, >5
4871 |. lui TMP1, (LJ_KEYINDEX >> 16)
4872 | daddu PC, TMP0, TMP2
4873 | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff)
4874 | dsll TMP1, TMP1, 32
4875 | sd TMP1, -8(RA)
4876 |1:
4877 | ins_next
4878 |5: // Despecialize bytecode if any of the checks fail.
4879 | li TMP3, BC_JMP
4880 | li TMP1, BC_ITERC
4881 | sb TMP3, -4+OFS_OP(PC)
4882 | daddu PC, TMP0, TMP2
4883 |.if JIT
4884 | lb TMP0, OFS_OP(PC)
4885 | li AT, BC_ITERN
4886 | bne TMP0, AT, >6
4887 |. lhu TMP2, OFS_RD(PC)
4888 |.endif
4889 | b <1
4890 |. sb TMP1, OFS_OP(PC)
4891 |.if JIT
4892 |6: // Unpatch JLOOP.
4893 | ld TMP0, DISPATCH_J(trace)(DISPATCH)
4894 | sll TMP2, TMP2, 3
4895 | daddu TMP0, TMP0, TMP2
4896 | ld TRACE:TMP2, 0(TMP0)
4897 | lw TMP0, TRACE:TMP2->startins
4898 | li AT, -256
4899 | and TMP0, TMP0, AT
4900 | or TMP0, TMP0, TMP1
4901 | b <1
4902 |. sw TMP0, 0(PC)
4903 |.endif
4904 break;
4905
4906 case BC_VARG:
4907 | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8
4908 | ld TMP0, FRAME_PC(BASE)
4909 | decode_RDtoRC8 RC, RD
4910 | decode_RB8a RB, INS
4911 | daddu RC, BASE, RC
4912 | decode_RB8b RB
4913 | daddu RA, BASE, RA
4914 | daddiu RC, RC, FRAME_VARG
4915 | daddu TMP2, RA, RB
4916 | daddiu TMP3, BASE, -16 // TMP3 = vtop
4917 | dsubu RC, RC, TMP0 // RC = vbase
4918 | // Note: RC may now be even _above_ BASE if nargs was < numparams.
4919 | beqz RB, >5 // Copy all varargs?
4920 |. dsubu TMP1, TMP3, RC
4921 | daddiu TMP2, TMP2, -16
4922 |1: // Copy vararg slots to destination slots.
4923 | ld CARG1, 0(RC)
4924 | sltu AT, RC, TMP3
4925 | daddiu RC, RC, 8
4926 |.if MIPSR6
4927 | selnez CARG1, CARG1, AT
4928 | seleqz AT, TISNIL, AT
4929 | or CARG1, CARG1, AT
4930 |.else
4931 | movz CARG1, TISNIL, AT
4932 |.endif
4933 | sd CARG1, 0(RA)
4934 | sltu AT, RA, TMP2
4935 | bnez AT, <1
4936 |. daddiu RA, RA, 8
4937 |3:
4938 | ins_next
4939 |
4940 |5: // Copy all varargs.
4941 | ld TMP0, L->maxstack
4942 | blez TMP1, <3 // No vararg slots?
4943 |. li MULTRES, 8 // MULTRES = (0+1)*8
4944 | daddu TMP2, RA, TMP1
4945 | sltu AT, TMP0, TMP2
4946 | bnez AT, >7
4947 |. daddiu MULTRES, TMP1, 8
4948 |6:
4949 | ld CRET1, 0(RC)
4950 | daddiu RC, RC, 8
4951 | sd CRET1, 0(RA)
4952 | sltu AT, RC, TMP3
4953 | bnez AT, <6 // More vararg slots?
4954 |. daddiu RA, RA, 8
4955 | b <3
4956 |. nop
4957 |
4958 |7: // Grow stack for varargs.
4959 | load_got lj_state_growstack
4960 | sd RA, L->top
4961 | dsubu RA, RA, BASE
4962 | sd BASE, L->base
4963 | dsubu BASE, RC, BASE // Need delta, because BASE may change.
4964 | sd PC, SAVE_PC
4965 | srl CARG2, TMP1, 3
4966 | call_intern lj_state_growstack // (lua_State *L, int n)
4967 |. move CARG1, L
4968 | move RC, BASE
4969 | ld BASE, L->base
4970 | daddu RA, BASE, RA
4971 | daddu RC, BASE, RC
4972 | b <6
4973 |. daddiu TMP3, BASE, -16
4974 break;
4975
4976 /* -- Returns ----------------------------------------------------------- */
4977
4978 case BC_RETM:
4979 | // RA = results*8, RD = extra_nresults*8
4980 | addu RD, RD, MULTRES // MULTRES >= 8, so RD >= 8.
4981 | // Fall through. Assumes BC_RET follows.
4982 break;
4983
4984 case BC_RET:
4985 | // RA = results*8, RD = (nresults+1)*8
4986 | ld PC, FRAME_PC(BASE)
4987 | daddu RA, BASE, RA
4988 | move MULTRES, RD
4989 |1:
4990 | andi TMP0, PC, FRAME_TYPE
4991 | bnez TMP0, ->BC_RETV_Z
4992 |. xori TMP1, PC, FRAME_VARG
4993 |
4994 |->BC_RET_Z:
4995 | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return
4996 | lw INS, -4(PC)
4997 | daddiu TMP2, BASE, -16
4998 | daddiu RC, RD, -8
4999 | decode_RA8a TMP0, INS
5000 | decode_RB8a RB, INS
5001 | decode_RA8b TMP0
5002 | decode_RB8b RB
5003 | daddu TMP3, TMP2, RB
5004 | beqz RC, >3
5005 |. dsubu BASE, TMP2, TMP0
5006 |2:
5007 | ld CRET1, 0(RA)
5008 | daddiu RA, RA, 8
5009 | daddiu RC, RC, -8
5010 | sd CRET1, 0(TMP2)
5011 | bnez RC, <2
5012 |. daddiu TMP2, TMP2, 8
5013 |3:
5014 | daddiu TMP3, TMP3, -8
5015 |5:
5016 | sltu AT, TMP2, TMP3
5017 | bnez AT, >6
5018 |. ld LFUNC:TMP1, FRAME_FUNC(BASE)
5019 | ins_next1
5020 | cleartp LFUNC:TMP1
5021 | ld TMP1, LFUNC:TMP1->pc
5022 | ld KBASE, PC2PROTO(k)(TMP1)
5023 | ins_next2
5024 |
5025 |6: // Fill up results with nil.
5026 | sd TISNIL, 0(TMP2)
5027 | b <5
5028 |. daddiu TMP2, TMP2, 8
5029 |
5030 |->BC_RETV_Z: // Non-standard return case.
5031 | andi TMP2, TMP1, FRAME_TYPEP
5032 | bnez TMP2, ->vm_return
5033 |. nop
5034 | // Return from vararg function: relocate BASE down.
5035 | dsubu BASE, BASE, TMP1
5036 | b <1
5037 |. ld PC, FRAME_PC(BASE)
5038 break;
5039
5040 case BC_RET0: case BC_RET1:
5041 | // RA = results*8, RD = (nresults+1)*8
5042 | ld PC, FRAME_PC(BASE)
5043 | daddu RA, BASE, RA
5044 | move MULTRES, RD
5045 | andi TMP0, PC, FRAME_TYPE
5046 | bnez TMP0, ->BC_RETV_Z
5047 |. xori TMP1, PC, FRAME_VARG
5048 | lw INS, -4(PC)
5049 | daddiu TMP2, BASE, -16
5050 if (op == BC_RET1) {
5051 | ld CRET1, 0(RA)
5052 }
5053 | decode_RB8a RB, INS
5054 | decode_RA8a RA, INS
5055 | decode_RB8b RB
5056 | decode_RA8b RA
5057 | dsubu BASE, TMP2, RA
5058 if (op == BC_RET1) {
5059 | sd CRET1, 0(TMP2)
5060 }
5061 |5:
5062 | sltu AT, RD, RB
5063 | bnez AT, >6
5064 |. ld TMP1, FRAME_FUNC(BASE)
5065 | ins_next1
5066 | cleartp LFUNC:TMP1
5067 | ld TMP1, LFUNC:TMP1->pc
5068 | ld KBASE, PC2PROTO(k)(TMP1)
5069 | ins_next2
5070 |
5071 |6: // Fill up results with nil.
5072 | daddiu TMP2, TMP2, 8
5073 | daddiu RD, RD, 8
5074 | b <5
5075 if (op == BC_RET1) {
5076 |. sd TISNIL, 0(TMP2)
5077 } else {
5078 |. sd TISNIL, -8(TMP2)
5079 }
5080 break;
5081
5082 /* -- Loops and branches ------------------------------------------------ */
5083
5084 case BC_FORL:
5085 |.if JIT
5086 | hotloop
5087 |.endif
5088 | // Fall through. Assumes BC_IFORL follows.
5089 break;
5090
5091 case BC_JFORI:
5092 case BC_JFORL:
5093#if !LJ_HASJIT
5094 break;
5095#endif
5096 case BC_FORI:
5097 case BC_IFORL:
5098 | // RA = base*8, RD = target (after end of loop or start of loop)
5099 vk = (op == BC_IFORL || op == BC_JFORL);
5100 | daddu RA, BASE, RA
5101 | ld CARG1, FORL_IDX*8(RA) // IDX CARG1 - CARG3 type
5102 | gettp CARG3, CARG1
5103 if (op != BC_JFORL) {
5104 | srl RD, RD, 1
5105 | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
5106 | daddu TMP2, RD, TMP2
5107 }
5108 if (!vk) {
5109 | ld CARG2, FORL_STOP*8(RA) // STOP CARG2 - CARG4 type
5110 | ld CRET1, FORL_STEP*8(RA) // STEP CRET1 - CRET2 type
5111 | gettp CARG4, CARG2
5112 | bne CARG3, TISNUM, >5
5113 |. gettp CRET2, CRET1
5114 | bne CARG4, TISNUM, ->vmeta_for
5115 |. sextw CARG3, CARG1
5116 | bne CRET2, TISNUM, ->vmeta_for
5117 |. sextw CARG2, CARG2
5118 | dext AT, CRET1, 31, 0
5119 | slt CRET1, CARG2, CARG3
5120 | slt TMP1, CARG3, CARG2
5121 |.if MIPSR6
5122 | selnez TMP1, TMP1, AT
5123 | seleqz CRET1, CRET1, AT
5124 | or CRET1, CRET1, TMP1
5125 |.else
5126 | movn CRET1, TMP1, AT
5127 |.endif
5128 } else {
5129 | bne CARG3, TISNUM, >5
5130 |. ld CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type
5131 | ld CRET1, FORL_STOP*8(RA) // STOP CRET1 - CRET2 type
5132 | sextw TMP3, CARG1
5133 | sextw CARG2, CARG2
5134 | sextw CRET1, CRET1
5135 | addu CARG1, TMP3, CARG2
5136 | xor TMP0, CARG1, TMP3
5137 | xor TMP1, CARG1, CARG2
5138 | and TMP0, TMP0, TMP1
5139 | slt TMP1, CARG1, CRET1
5140 | slt CRET1, CRET1, CARG1
5141 | slt AT, CARG2, r0
5142 | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow.
5143 |.if MIPSR6
5144 | selnez TMP1, TMP1, AT
5145 | seleqz CRET1, CRET1, AT
5146 | or CRET1, CRET1, TMP1
5147 |.else
5148 | movn CRET1, TMP1, AT
5149 |.endif
5150 | or CRET1, CRET1, TMP0
5151 | zextw CARG1, CARG1
5152 | settp CARG1, TISNUM
5153 }
5154 |1:
5155 if (op == BC_FORI) {
5156 |.if MIPSR6
5157 | selnez TMP2, TMP2, CRET1
5158 |.else
5159 | movz TMP2, r0, CRET1
5160 |.endif
5161 | daddu PC, PC, TMP2
5162 } else if (op == BC_JFORI) {
5163 | daddu PC, PC, TMP2
5164 | lhu RD, -4+OFS_RD(PC)
5165 } else if (op == BC_IFORL) {
5166 |.if MIPSR6
5167 | seleqz TMP2, TMP2, CRET1
5168 |.else
5169 | movn TMP2, r0, CRET1
5170 |.endif
5171 | daddu PC, PC, TMP2
5172 }
5173 if (vk) {
5174 | sd CARG1, FORL_IDX*8(RA)
5175 }
5176 | ins_next1
5177 | sd CARG1, FORL_EXT*8(RA)
5178 |2:
5179 if (op == BC_JFORI) {
5180 | beqz CRET1, =>BC_JLOOP
5181 |. decode_RD8b RD
5182 } else if (op == BC_JFORL) {
5183 | beqz CRET1, =>BC_JLOOP
5184 }
5185 | ins_next2
5186 |
5187 |5: // FP loop.
5188 |.if FPU
5189 if (!vk) {
5190 | ldc1 f0, FORL_IDX*8(RA)
5191 | ldc1 f2, FORL_STOP*8(RA)
5192 | sltiu TMP0, CARG3, LJ_TISNUM
5193 | sltiu TMP1, CARG4, LJ_TISNUM
5194 | sltiu AT, CRET2, LJ_TISNUM
5195 | ld TMP3, FORL_STEP*8(RA)
5196 | and TMP0, TMP0, TMP1
5197 | and AT, AT, TMP0
5198 | beqz AT, ->vmeta_for
5199 |. slt TMP3, TMP3, r0
5200 |.if MIPSR6
5201 | dmtc1 TMP3, FTMP2
5202 | cmp.lt.d FTMP0, f0, f2
5203 | cmp.lt.d FTMP1, f2, f0
5204 | sel.d FTMP2, FTMP1, FTMP0
5205 | b <1
5206 |. dmfc1 CRET1, FTMP2
5207 |.else
5208 | c.ole.d 0, f0, f2
5209 | c.ole.d 1, f2, f0
5210 | li CRET1, 1
5211 | movt CRET1, r0, 0
5212 | movt AT, r0, 1
5213 | b <1
5214 |. movn CRET1, AT, TMP3
5215 |.endif
5216 } else {
5217 | ldc1 f0, FORL_IDX*8(RA)
5218 | ldc1 f4, FORL_STEP*8(RA)
5219 | ldc1 f2, FORL_STOP*8(RA)
5220 | ld TMP3, FORL_STEP*8(RA)
5221 | add.d f0, f0, f4
5222 |.if MIPSR6
5223 | slt TMP3, TMP3, r0
5224 | dmtc1 TMP3, FTMP2
5225 | cmp.lt.d FTMP0, f0, f2
5226 | cmp.lt.d FTMP1, f2, f0
5227 | sel.d FTMP2, FTMP1, FTMP0
5228 | dmfc1 CRET1, FTMP2
5229 if (op == BC_IFORL) {
5230 | seleqz TMP2, TMP2, CRET1
5231 | daddu PC, PC, TMP2
5232 }
5233 |.else
5234 | c.ole.d 0, f0, f2
5235 | c.ole.d 1, f2, f0
5236 | slt TMP3, TMP3, r0
5237 | li CRET1, 1
5238 | li AT, 1
5239 | movt CRET1, r0, 0
5240 | movt AT, r0, 1
5241 | movn CRET1, AT, TMP3
5242 if (op == BC_IFORL) {
5243 | movn TMP2, r0, CRET1
5244 | daddu PC, PC, TMP2
5245 }
5246 |.endif
5247 | sdc1 f0, FORL_IDX*8(RA)
5248 | ins_next1
5249 | b <2
5250 |. sdc1 f0, FORL_EXT*8(RA)
5251 }
5252 |.else
5253 if (!vk) {
5254 | sltiu TMP0, CARG3, LJ_TISNUM
5255 | sltiu TMP1, CARG4, LJ_TISNUM
5256 | sltiu AT, CRET2, LJ_TISNUM
5257 | and TMP0, TMP0, TMP1
5258 | and AT, AT, TMP0
5259 | beqz AT, ->vmeta_for
5260 |. nop
5261 | bal ->vm_sfcmpolex
5262 |. lw TMP3, FORL_STEP*8+HI(RA)
5263 | b <1
5264 |. nop
5265 } else {
5266 | load_got __adddf3
5267 | call_extern
5268 |. sw TMP2, TMPD
5269 | ld CARG2, FORL_STOP*8(RA)
5270 | move CARG1, CRET1
5271 if ( op == BC_JFORL ) {
5272 | lhu RD, -4+OFS_RD(PC)
5273 | decode_RD8b RD
5274 }
5275 | bal ->vm_sfcmpolex
5276 |. lw TMP3, FORL_STEP*8+HI(RA)
5277 | b <1
5278 |. lw TMP2, TMPD
5279 }
5280 |.endif
5281 break;
5282
5283 case BC_ITERL:
5284 |.if JIT
5285 | hotloop
5286 |.endif
5287 | // Fall through. Assumes BC_IITERL follows.
5288 break;
5289
5290 case BC_JITERL:
5291#if !LJ_HASJIT
5292 break;
5293#endif
5294 case BC_IITERL:
5295 | // RA = base*8, RD = target
5296 | daddu RA, BASE, RA
5297 | ld TMP1, 0(RA)
5298 | beq TMP1, TISNIL, >1 // Stop if iterator returned nil.
5299 |. nop
5300 if (op == BC_JITERL) {
5301 | b =>BC_JLOOP
5302 |. sd TMP1, -8(RA)
5303 } else {
5304 | branch_RD // Otherwise save control var + branch.
5305 | sd TMP1, -8(RA)
5306 }
5307 |1:
5308 | ins_next
5309 break;
5310
5311 case BC_LOOP:
5312 | // RA = base*8, RD = target (loop extent)
5313 | // Note: RA/RD is only used by trace recorder to determine scope/extent
5314 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
5315 |.if JIT
5316 | hotloop
5317 |.endif
5318 | // Fall through. Assumes BC_ILOOP follows.
5319 break;
5320
5321 case BC_ILOOP:
5322 | // RA = base*8, RD = target (loop extent)
5323 | ins_next
5324 break;
5325
5326 case BC_JLOOP:
5327 |.if JIT
5328 | // RA = base*8 (ignored), RD = traceno*8
5329 | ld TMP1, DISPATCH_J(trace)(DISPATCH)
5330 | li AT, 0
5331 | daddu TMP1, TMP1, RD
5332 | // Traces on MIPS don't store the trace number, so use 0.
5333 | sd AT, DISPATCH_GL(vmstate)(DISPATCH)
5334 | ld TRACE:TMP2, 0(TMP1)
5335 | sd BASE, DISPATCH_GL(jit_base)(DISPATCH)
5336 | ld TMP2, TRACE:TMP2->mcode
5337 | sd L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
5338 | jr TMP2
5339 |. daddiu JGL, DISPATCH, GG_DISP2G+32768
5340 |.endif
5341 break;
5342
5343 case BC_JMP:
5344 | // RA = base*8 (only used by trace recorder), RD = target
5345 | branch_RD
5346 | ins_next
5347 break;
5348
5349 /* -- Function headers -------------------------------------------------- */
5350
5351 case BC_FUNCF:
5352 |.if JIT
5353 | hotcall
5354 |.endif
5355 case BC_FUNCV: /* NYI: compiled vararg functions. */
5356 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
5357 break;
5358
5359 case BC_JFUNCF:
5360#if !LJ_HASJIT
5361 break;
5362#endif
5363 case BC_IFUNCF:
5364 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
5365 | ld TMP2, L->maxstack
5366 | lbu TMP1, -4+PC2PROTO(numparams)(PC)
5367 | ld KBASE, -4+PC2PROTO(k)(PC)
5368 | sltu AT, TMP2, RA
5369 | bnez AT, ->vm_growstack_l
5370 |. sll TMP1, TMP1, 3
5371 if (op != BC_JFUNCF) {
5372 | ins_next1
5373 }
5374 |2:
5375 | sltu AT, NARGS8:RC, TMP1 // Check for missing parameters.
5376 | bnez AT, >3
5377 |. daddu AT, BASE, NARGS8:RC
5378 if (op == BC_JFUNCF) {
5379 | decode_RD8a RD, INS
5380 | b =>BC_JLOOP
5381 |. decode_RD8b RD
5382 } else {
5383 | ins_next2
5384 }
5385 |
5386 |3: // Clear missing parameters.
5387 | sd TISNIL, 0(AT)
5388 | b <2
5389 |. addiu NARGS8:RC, NARGS8:RC, 8
5390 break;
5391
5392 case BC_JFUNCV:
5393#if !LJ_HASJIT
5394 break;
5395#endif
5396 | NYI // NYI: compiled vararg functions
5397 break; /* NYI: compiled vararg functions. */
5398
5399 case BC_IFUNCV:
5400 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
5401 | li TMP0, LJ_TFUNC
5402 | daddu TMP1, BASE, RC
5403 | ld TMP2, L->maxstack
5404 | settp LFUNC:RB, TMP0
5405 | daddu TMP0, RA, RC
5406 | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC.
5407 | daddiu TMP2, TMP2, -8
5408 | daddiu TMP3, RC, 16+FRAME_VARG
5409 | sltu AT, TMP0, TMP2
5410 | ld KBASE, -4+PC2PROTO(k)(PC)
5411 | beqz AT, ->vm_growstack_l
5412 |. sd TMP3, 8(TMP1) // Store delta + FRAME_VARG.
5413 | lbu TMP2, -4+PC2PROTO(numparams)(PC)
5414 | move RA, BASE
5415 | move RC, TMP1
5416 | ins_next1
5417 | beqz TMP2, >3
5418 |. daddiu BASE, TMP1, 16
5419 |1:
5420 | ld TMP0, 0(RA)
5421 | sltu AT, RA, RC // Less args than parameters?
5422 | move CARG1, TMP0
5423 |.if MIPSR6
5424 | selnez TMP0, TMP0, AT
5425 | seleqz TMP3, TISNIL, AT
5426 | or TMP0, TMP0, TMP3
5427 | seleqz TMP3, CARG1, AT
5428 | selnez CARG1, TISNIL, AT
5429 | or CARG1, CARG1, TMP3
5430 |.else
5431 | movz TMP0, TISNIL, AT // Clear missing parameters.
5432 | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC).
5433 |.endif
5434 | addiu TMP2, TMP2, -1
5435 | sd TMP0, 16(TMP1)
5436 | daddiu TMP1, TMP1, 8
5437 | sd CARG1, 0(RA)
5438 | bnez TMP2, <1
5439 |. daddiu RA, RA, 8
5440 |3:
5441 | ins_next2
5442 break;
5443
5444 case BC_FUNCC:
5445 case BC_FUNCCW:
5446 | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
5447 if (op == BC_FUNCC) {
5448 | ld CFUNCADDR, CFUNC:RB->f
5449 } else {
5450 | ld CFUNCADDR, DISPATCH_GL(wrapf)(DISPATCH)
5451 }
5452 | daddu TMP1, RA, NARGS8:RC
5453 | ld TMP2, L->maxstack
5454 | daddu RC, BASE, NARGS8:RC
5455 | sd BASE, L->base
5456 | sltu AT, TMP2, TMP1
5457 | sd RC, L->top
5458 | li_vmstate C
5459 if (op == BC_FUNCCW) {
5460 | ld CARG2, CFUNC:RB->f
5461 }
5462 | bnez AT, ->vm_growstack_c // Need to grow stack.
5463 |. move CARG1, L
5464 | jalr CFUNCADDR // (lua_State *L [, lua_CFunction f])
5465 |. st_vmstate
5466 | // Returns nresults.
5467 | ld BASE, L->base
5468 | sll RD, CRET1, 3
5469 | ld TMP1, L->top
5470 | li_vmstate INTERP
5471 | ld PC, FRAME_PC(BASE) // Fetch PC of caller.
5472 | dsubu RA, TMP1, RD // RA = L->top - nresults*8
5473 | sd L, DISPATCH_GL(cur_L)(DISPATCH)
5474 | b ->vm_returnc
5475 |. st_vmstate
5476 break;
5477
5478 /* ---------------------------------------------------------------------- */
5479
5480 default:
5481 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
5482 exit(2);
5483 break;
5484 }
5485}
5486
5487static int build_backend(BuildCtx *ctx)
5488{
5489 int op;
5490
5491 dasm_growpc(Dst, BC__MAX);
5492
5493 build_subroutines(ctx);
5494
5495 |.code_op
5496 for (op = 0; op < BC__MAX; op++)
5497 build_ins(ctx, (BCOp)op, op);
5498
5499 return BC__MAX;
5500}
5501
5502/* Emit pseudo frame-info for all assembler functions. */
5503static void emit_asm_debug(BuildCtx *ctx)
5504{
5505 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
5506 int i;
5507 switch (ctx->mode) {
5508 case BUILD_elfasm:
5509 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
5510 fprintf(ctx->fp,
5511 ".Lframe0:\n"
5512 "\t.4byte .LECIE0-.LSCIE0\n"
5513 ".LSCIE0:\n"
5514 "\t.4byte 0xffffffff\n"
5515 "\t.byte 0x1\n"
5516 "\t.string \"\"\n"
5517 "\t.uleb128 0x1\n"
5518 "\t.sleb128 -4\n"
5519 "\t.byte 31\n"
5520 "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n"
5521 "\t.align 2\n"
5522 ".LECIE0:\n\n");
5523 fprintf(ctx->fp,
5524 ".LSFDE0:\n"
5525 "\t.4byte .LEFDE0-.LASFDE0\n"
5526 ".LASFDE0:\n"
5527 "\t.4byte .Lframe0\n"
5528 "\t.8byte .Lbegin\n"
5529 "\t.8byte %d\n"
5530 "\t.byte 0xe\n\t.uleb128 %d\n"
5531 "\t.byte 0x9f\n\t.sleb128 2*5\n"
5532 "\t.byte 0x9e\n\t.sleb128 2*6\n",
5533 fcofs, CFRAME_SIZE);
5534 for (i = 23; i >= 16; i--)
5535 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(30-i));
5536#if !LJ_SOFTFP
5537 for (i = 31; i >= 24; i--)
5538 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(46-i));
5539#endif
5540 fprintf(ctx->fp,
5541 "\t.align 2\n"
5542 ".LEFDE0:\n\n");
5543#if LJ_HASFFI
5544 fprintf(ctx->fp,
5545 ".LSFDE1:\n"
5546 "\t.4byte .LEFDE1-.LASFDE1\n"
5547 ".LASFDE1:\n"
5548 "\t.4byte .Lframe0\n"
5549 "\t.4byte lj_vm_ffi_call\n"
5550 "\t.4byte %d\n"
5551 "\t.byte 0x9f\n\t.uleb128 2*1\n"
5552 "\t.byte 0x90\n\t.uleb128 2*2\n"
5553 "\t.byte 0xd\n\t.uleb128 0x10\n"
5554 "\t.align 2\n"
5555 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
5556#endif
5557#if !LJ_NO_UNWIND
5558 /* NYI */
5559#endif
5560 break;
5561 default:
5562 break;
5563 }
5564}
5565
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index d6792f2c..abcc03e5 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -1,4 +1,4 @@
1|// Low-level VM code for PowerPC CPUs. 1|// Low-level VM code for PowerPC 32 bit or 32on64 bit mode.
2|// Bytecode interpreter, fast functions and helper functions. 2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h 3|// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4| 4|
@@ -18,7 +18,6 @@
18|// DynASM defines used by the PPC port: 18|// DynASM defines used by the PPC port:
19|// 19|//
20|// P64 64 bit pointers (only for GPR64 testing). 20|// P64 64 bit pointers (only for GPR64 testing).
21|// Note: a full PPC64 _LP64 port is not planned.
22|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). 21|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3).
23|// Affects reg saves, stack layout, carry/overflow/dot flags etc. 22|// Affects reg saves, stack layout, carry/overflow/dot flags etc.
24|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). 23|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360).
@@ -103,6 +102,18 @@
103|// Fixed register assignments for the interpreter. 102|// Fixed register assignments for the interpreter.
104|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) 103|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA)
105| 104|
105|.macro .FPU, a, b
106|.if FPU
107| a, b
108|.endif
109|.endmacro
110|
111|.macro .FPU, a, b, c
112|.if FPU
113| a, b, c
114|.endif
115|.endmacro
116|
106|// The following must be C callee-save (but BASE is often refetched). 117|// The following must be C callee-save (but BASE is often refetched).
107|.define BASE, r14 // Base of current Lua stack frame. 118|.define BASE, r14 // Base of current Lua stack frame.
108|.define KBASE, r15 // Constants of current Lua function. 119|.define KBASE, r15 // Constants of current Lua function.
@@ -116,8 +127,10 @@
116|.define TISNUM, r22 127|.define TISNUM, r22
117|.define TISNIL, r23 128|.define TISNIL, r23
118|.define ZERO, r24 129|.define ZERO, r24
130|.if FPU
119|.define TOBIT, f30 // 2^52 + 2^51. 131|.define TOBIT, f30 // 2^52 + 2^51.
120|.define TONUM, f31 // 2^52 + 2^51 + 2^31. 132|.define TONUM, f31 // 2^52 + 2^51 + 2^31.
133|.endif
121| 134|
122|// The following temporaries are not saved across C calls, except for RA. 135|// The following temporaries are not saved across C calls, except for RA.
123|.define RA, r20 // Callee-save. 136|.define RA, r20 // Callee-save.
@@ -133,6 +146,7 @@
133| 146|
134|// Saved temporaries. 147|// Saved temporaries.
135|.define SAVE0, r21 148|.define SAVE0, r21
149|.define SAVE1, r25
136| 150|
137|// Calling conventions. 151|// Calling conventions.
138|.define CARG1, r3 152|.define CARG1, r3
@@ -141,8 +155,10 @@
141|.define CARG4, r6 // Overlaps TMP3. 155|.define CARG4, r6 // Overlaps TMP3.
142|.define CARG5, r7 // Overlaps INS. 156|.define CARG5, r7 // Overlaps INS.
143| 157|
158|.if FPU
144|.define FARG1, f1 159|.define FARG1, f1
145|.define FARG2, f2 160|.define FARG2, f2
161|.endif
146| 162|
147|.define CRET1, r3 163|.define CRET1, r3
148|.define CRET2, r4 164|.define CRET2, r4
@@ -213,10 +229,16 @@
213|.endif 229|.endif
214|.else 230|.else
215| 231|
232|.if FPU
216|.define SAVE_LR, 276(sp) 233|.define SAVE_LR, 276(sp)
217|.define CFRAME_SPACE, 272 // Delta for sp. 234|.define CFRAME_SPACE, 272 // Delta for sp.
218|// Back chain for sp: 272(sp) <-- sp entering interpreter 235|// Back chain for sp: 272(sp) <-- sp entering interpreter
219|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. 236|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves.
237|.else
238|.define SAVE_LR, 132(sp)
239|.define CFRAME_SPACE, 128 // Delta for sp.
240|// Back chain for sp: 128(sp) <-- sp entering interpreter
241|.endif
220|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. 242|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves.
221|.define SAVE_CR, 52(sp) // 32 bit CR save. 243|.define SAVE_CR, 52(sp) // 32 bit CR save.
222|.define SAVE_ERRF, 48(sp) // 32 bit C frame info. 244|.define SAVE_ERRF, 48(sp) // 32 bit C frame info.
@@ -226,16 +248,25 @@
226|.define SAVE_PC, 32(sp) 248|.define SAVE_PC, 32(sp)
227|.define SAVE_MULTRES, 28(sp) 249|.define SAVE_MULTRES, 28(sp)
228|.define UNUSED1, 24(sp) 250|.define UNUSED1, 24(sp)
251|.if FPU
229|.define TMPD_LO, 20(sp) 252|.define TMPD_LO, 20(sp)
230|.define TMPD_HI, 16(sp) 253|.define TMPD_HI, 16(sp)
231|.define TONUM_LO, 12(sp) 254|.define TONUM_LO, 12(sp)
232|.define TONUM_HI, 8(sp) 255|.define TONUM_HI, 8(sp)
256|.else
257|.define SFSAVE_4, 20(sp)
258|.define SFSAVE_3, 16(sp)
259|.define SFSAVE_2, 12(sp)
260|.define SFSAVE_1, 8(sp)
261|.endif
233|// Next frame lr: 4(sp) 262|// Next frame lr: 4(sp)
234|// Back chain for sp: 0(sp) <-- sp while in interpreter 263|// Back chain for sp: 0(sp) <-- sp while in interpreter
235| 264|
265|.if FPU
236|.define TMPD_BLO, 23(sp) 266|.define TMPD_BLO, 23(sp)
237|.define TMPD, TMPD_HI 267|.define TMPD, TMPD_HI
238|.define TONUM_D, TONUM_HI 268|.define TONUM_D, TONUM_HI
269|.endif
239| 270|
240|.endif 271|.endif
241| 272|
@@ -245,7 +276,7 @@
245|.else 276|.else
246| stw r..reg, SAVE_GPR_+(reg-14)*4(sp) 277| stw r..reg, SAVE_GPR_+(reg-14)*4(sp)
247|.endif 278|.endif
248| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 279| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
249|.endmacro 280|.endmacro
250|.macro rest_, reg 281|.macro rest_, reg
251|.if GPR64 282|.if GPR64
@@ -253,7 +284,7 @@
253|.else 284|.else
254| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) 285| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp)
255|.endif 286|.endif
256| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 287| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
257|.endmacro 288|.endmacro
258| 289|
259|.macro saveregs 290|.macro saveregs
@@ -316,19 +347,14 @@
316|.type NODE, Node 347|.type NODE, Node
317|.type NARGS8, int 348|.type NARGS8, int
318|.type TRACE, GCtrace 349|.type TRACE, GCtrace
350|.type SBUF, SBuf
319| 351|
320|//----------------------------------------------------------------------- 352|//-----------------------------------------------------------------------
321| 353|
322|// These basic macros should really be part of DynASM.
323|.macro srwi, rx, ry, n; rlwinm rx, ry, 32-n, n, 31; .endmacro
324|.macro slwi, rx, ry, n; rlwinm rx, ry, n, 0, 31-n; .endmacro
325|.macro rotlwi, rx, ry, n; rlwinm rx, ry, n, 0, 31; .endmacro
326|.macro rotlw, rx, ry, rn; rlwnm rx, ry, rn, 0, 31; .endmacro
327|.macro subi, rx, ry, i; addi rx, ry, -i; .endmacro
328|
329|// Trap for not-yet-implemented parts. 354|// Trap for not-yet-implemented parts.
330|.macro NYI; tw 4, sp, sp; .endmacro 355|.macro NYI; tw 4, sp, sp; .endmacro
331| 356|
357|.if FPU
332|// int/FP conversions. 358|// int/FP conversions.
333|.macro tonum_i, freg, reg 359|.macro tonum_i, freg, reg
334| xoris reg, reg, 0x8000 360| xoris reg, reg, 0x8000
@@ -352,6 +378,7 @@
352|.macro toint, reg, freg 378|.macro toint, reg, freg
353| toint reg, freg, freg 379| toint reg, freg, freg
354|.endmacro 380|.endmacro
381|.endif
355| 382|
356|//----------------------------------------------------------------------- 383|//-----------------------------------------------------------------------
357| 384|
@@ -539,9 +566,19 @@ static void build_subroutines(BuildCtx *ctx)
539 | beq >2 566 | beq >2
540 |1: 567 |1:
541 | addic. TMP1, TMP1, -8 568 | addic. TMP1, TMP1, -8
569 |.if FPU
542 | lfd f0, 0(RA) 570 | lfd f0, 0(RA)
571 |.else
572 | lwz CARG1, 0(RA)
573 | lwz CARG2, 4(RA)
574 |.endif
543 | addi RA, RA, 8 575 | addi RA, RA, 8
576 |.if FPU
544 | stfd f0, 0(BASE) 577 | stfd f0, 0(BASE)
578 |.else
579 | stw CARG1, 0(BASE)
580 | stw CARG2, 4(BASE)
581 |.endif
545 | addi BASE, BASE, 8 582 | addi BASE, BASE, 8
546 | bney <1 583 | bney <1
547 | 584 |
@@ -619,23 +656,23 @@ static void build_subroutines(BuildCtx *ctx)
619 | .toc ld TOCREG, SAVE_TOC 656 | .toc ld TOCREG, SAVE_TOC
620 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 657 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
621 | lp BASE, L->base 658 | lp BASE, L->base
622 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 659 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
623 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 660 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
624 | li ZERO, 0 661 | li ZERO, 0
625 | stw TMP3, TMPD 662 | .FPU stw TMP3, TMPD
626 | li TMP1, LJ_TFALSE 663 | li TMP1, LJ_TFALSE
627 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 664 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
628 | li TISNIL, LJ_TNIL 665 | li TISNIL, LJ_TNIL
629 | li_vmstate INTERP 666 | li_vmstate INTERP
630 | lfs TOBIT, TMPD 667 | .FPU lfs TOBIT, TMPD
631 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. 668 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
632 | la RA, -8(BASE) // Results start at BASE-8. 669 | la RA, -8(BASE) // Results start at BASE-8.
633 | stw TMP3, TMPD 670 | .FPU stw TMP3, TMPD
634 | addi DISPATCH, DISPATCH, GG_G2DISP 671 | addi DISPATCH, DISPATCH, GG_G2DISP
635 | stw TMP1, 0(RA) // Prepend false to error message. 672 | stw TMP1, 0(RA) // Prepend false to error message.
636 | li RD, 16 // 2 results: false + error message. 673 | li RD, 16 // 2 results: false + error message.
637 | st_vmstate 674 | st_vmstate
638 | lfs TONUM, TMPD 675 | .FPU lfs TONUM, TMPD
639 | b ->vm_returnc 676 | b ->vm_returnc
640 | 677 |
641 |//----------------------------------------------------------------------- 678 |//-----------------------------------------------------------------------
@@ -684,33 +721,34 @@ static void build_subroutines(BuildCtx *ctx)
684 | stw CARG3, SAVE_NRES 721 | stw CARG3, SAVE_NRES
685 | cmplwi TMP1, 0 722 | cmplwi TMP1, 0
686 | stw CARG3, SAVE_ERRF 723 | stw CARG3, SAVE_ERRF
687 | stp TMP0, L->cframe
688 | stp CARG3, SAVE_CFRAME 724 | stp CARG3, SAVE_CFRAME
689 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 725 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
726 | stp TMP0, L->cframe
690 | beq >3 727 | beq >3
691 | 728 |
692 | // Resume after yield (like a return). 729 | // Resume after yield (like a return).
730 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
693 | mr RA, BASE 731 | mr RA, BASE
694 | lp BASE, L->base 732 | lp BASE, L->base
695 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 733 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
696 | lp TMP1, L->top 734 | lp TMP1, L->top
697 | lwz PC, FRAME_PC(BASE) 735 | lwz PC, FRAME_PC(BASE)
698 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 736 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
699 | stb CARG3, L->status 737 | stb CARG3, L->status
700 | stw TMP3, TMPD 738 | .FPU stw TMP3, TMPD
701 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 739 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
702 | lfs TOBIT, TMPD 740 | .FPU lfs TOBIT, TMPD
703 | sub RD, TMP1, BASE 741 | sub RD, TMP1, BASE
704 | stw TMP3, TMPD 742 | .FPU stw TMP3, TMPD
705 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 743 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
706 | addi RD, RD, 8 744 | addi RD, RD, 8
707 | stw TMP0, TONUM_HI 745 | .FPU stw TMP0, TONUM_HI
708 | li_vmstate INTERP 746 | li_vmstate INTERP
709 | li ZERO, 0 747 | li ZERO, 0
710 | st_vmstate 748 | st_vmstate
711 | andix. TMP0, PC, FRAME_TYPE 749 | andix. TMP0, PC, FRAME_TYPE
712 | mr MULTRES, RD 750 | mr MULTRES, RD
713 | lfs TONUM, TMPD 751 | .FPU lfs TONUM, TMPD
714 | li TISNIL, LJ_TNIL 752 | li TISNIL, LJ_TNIL
715 | beq ->BC_RET_Z 753 | beq ->BC_RET_Z
716 | b ->vm_return 754 | b ->vm_return
@@ -729,33 +767,34 @@ static void build_subroutines(BuildCtx *ctx)
729 | 767 |
730 |1: // Entry point for vm_pcall above (PC = ftype). 768 |1: // Entry point for vm_pcall above (PC = ftype).
731 | lp TMP1, L:CARG1->cframe 769 | lp TMP1, L:CARG1->cframe
732 | stw CARG3, SAVE_NRES
733 | mr L, CARG1 770 | mr L, CARG1
734 | stw CARG1, SAVE_L 771 | stw CARG3, SAVE_NRES
735 | mr BASE, CARG2
736 | stp sp, L->cframe // Add our C frame to cframe chain.
737 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 772 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
773 | stw CARG1, SAVE_L
774 | mr BASE, CARG2
775 | addi DISPATCH, DISPATCH, GG_G2DISP
738 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 776 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
739 | stp TMP1, SAVE_CFRAME 777 | stp TMP1, SAVE_CFRAME
740 | addi DISPATCH, DISPATCH, GG_G2DISP 778 | stp sp, L->cframe // Add our C frame to cframe chain.
741 | 779 |
742 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 780 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
781 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
743 | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). 782 | lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
744 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 783 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
745 | lp TMP1, L->top 784 | lp TMP1, L->top
746 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 785 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
747 | add PC, PC, BASE 786 | add PC, PC, BASE
748 | stw TMP3, TMPD 787 | .FPU stw TMP3, TMPD
749 | li ZERO, 0 788 | li ZERO, 0
750 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 789 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
751 | lfs TOBIT, TMPD 790 | .FPU lfs TOBIT, TMPD
752 | sub PC, PC, TMP2 // PC = frame delta + frame type 791 | sub PC, PC, TMP2 // PC = frame delta + frame type
753 | stw TMP3, TMPD 792 | .FPU stw TMP3, TMPD
754 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 793 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
755 | sub NARGS8:RC, TMP1, BASE 794 | sub NARGS8:RC, TMP1, BASE
756 | stw TMP0, TONUM_HI 795 | .FPU stw TMP0, TONUM_HI
757 | li_vmstate INTERP 796 | li_vmstate INTERP
758 | lfs TONUM, TMPD 797 | .FPU lfs TONUM, TMPD
759 | li TISNIL, LJ_TNIL 798 | li TISNIL, LJ_TNIL
760 | st_vmstate 799 | st_vmstate
761 | 800 |
@@ -776,15 +815,18 @@ static void build_subroutines(BuildCtx *ctx)
776 | lwz TMP0, L:CARG1->stack 815 | lwz TMP0, L:CARG1->stack
777 | stw CARG1, SAVE_L 816 | stw CARG1, SAVE_L
778 | lp TMP1, L->top 817 | lp TMP1, L->top
818 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
779 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 819 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
780 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). 820 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
781 | lp TMP1, L->cframe 821 | lp TMP1, L->cframe
782 | stp sp, L->cframe // Add our C frame to cframe chain. 822 | addi DISPATCH, DISPATCH, GG_G2DISP
783 | .toc lp CARG4, 0(CARG4) 823 | .toc lp CARG4, 0(CARG4)
784 | li TMP2, 0 824 | li TMP2, 0
785 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. 825 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
786 | stw TMP2, SAVE_ERRF // No error function. 826 | stw TMP2, SAVE_ERRF // No error function.
787 | stp TMP1, SAVE_CFRAME 827 | stp TMP1, SAVE_CFRAME
828 | stp sp, L->cframe // Add our C frame to cframe chain.
829 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
788 | mtctr CARG4 830 | mtctr CARG4
789 | bctrl // (lua_State *L, lua_CFunction func, void *ud) 831 | bctrl // (lua_State *L, lua_CFunction func, void *ud)
790 |.if PPE 832 |.if PPE
@@ -793,9 +835,7 @@ static void build_subroutines(BuildCtx *ctx)
793 |.else 835 |.else
794 | mr. BASE, CRET1 836 | mr. BASE, CRET1
795 |.endif 837 |.endif
796 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 838 | li PC, FRAME_CP
797 | li PC, FRAME_CP
798 | addi DISPATCH, DISPATCH, GG_G2DISP
799 | bne <3 // Else continue with the call. 839 | bne <3 // Else continue with the call.
800 | b ->vm_leave_cp // No base? Just remove C frame. 840 | b ->vm_leave_cp // No base? Just remove C frame.
801 | 841 |
@@ -842,15 +882,30 @@ static void build_subroutines(BuildCtx *ctx)
842 | lwz INS, -4(PC) 882 | lwz INS, -4(PC)
843 | subi CARG2, RB, 16 883 | subi CARG2, RB, 16
844 | decode_RB8 SAVE0, INS 884 | decode_RB8 SAVE0, INS
885 |.if FPU
845 | lfd f0, 0(RA) 886 | lfd f0, 0(RA)
887 |.else
888 | lwz TMP2, 0(RA)
889 | lwz TMP3, 4(RA)
890 |.endif
846 | add TMP1, BASE, SAVE0 891 | add TMP1, BASE, SAVE0
847 | stp BASE, L->base 892 | stp BASE, L->base
848 | cmplw TMP1, CARG2 893 | cmplw TMP1, CARG2
849 | sub CARG3, CARG2, TMP1 894 | sub CARG3, CARG2, TMP1
850 | decode_RA8 RA, INS 895 | decode_RA8 RA, INS
896 |.if FPU
851 | stfd f0, 0(CARG2) 897 | stfd f0, 0(CARG2)
898 |.else
899 | stw TMP2, 0(CARG2)
900 | stw TMP3, 4(CARG2)
901 |.endif
852 | bney ->BC_CAT_Z 902 | bney ->BC_CAT_Z
903 |.if FPU
853 | stfdx f0, BASE, RA 904 | stfdx f0, BASE, RA
905 |.else
906 | stwux TMP2, RA, BASE
907 | stw TMP3, 4(RA)
908 |.endif
854 | b ->cont_nop 909 | b ->cont_nop
855 | 910 |
856 |//-- Table indexing metamethods ----------------------------------------- 911 |//-- Table indexing metamethods -----------------------------------------
@@ -903,9 +958,19 @@ static void build_subroutines(BuildCtx *ctx)
903 | // Returns TValue * (finished) or NULL (metamethod). 958 | // Returns TValue * (finished) or NULL (metamethod).
904 | cmplwi CRET1, 0 959 | cmplwi CRET1, 0
905 | beq >3 960 | beq >3
961 |.if FPU
906 | lfd f0, 0(CRET1) 962 | lfd f0, 0(CRET1)
963 |.else
964 | lwz TMP0, 0(CRET1)
965 | lwz TMP1, 4(CRET1)
966 |.endif
907 | ins_next1 967 | ins_next1
968 |.if FPU
908 | stfdx f0, BASE, RA 969 | stfdx f0, BASE, RA
970 |.else
971 | stwux TMP0, RA, BASE
972 | stw TMP1, 4(RA)
973 |.endif
909 | ins_next2 974 | ins_next2
910 | 975 |
911 |3: // Call __index metamethod. 976 |3: // Call __index metamethod.
@@ -918,6 +983,22 @@ static void build_subroutines(BuildCtx *ctx)
918 | li NARGS8:RC, 16 // 2 args for func(t, k). 983 | li NARGS8:RC, 16 // 2 args for func(t, k).
919 | b ->vm_call_dispatch_f 984 | b ->vm_call_dispatch_f
920 | 985 |
986 |->vmeta_tgetr:
987 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
988 | // Returns cTValue * or NULL.
989 | cmplwi CRET1, 0
990 | beq >1
991 |.if FPU
992 | lfd f14, 0(CRET1)
993 |.else
994 | lwz SAVE0, 0(CRET1)
995 | lwz SAVE1, 4(CRET1)
996 |.endif
997 | b ->BC_TGETR_Z
998 |1:
999 | stwx TISNIL, BASE, RA
1000 | b ->cont_nop
1001 |
921 |//----------------------------------------------------------------------- 1002 |//-----------------------------------------------------------------------
922 | 1003 |
923 |->vmeta_tsets1: 1004 |->vmeta_tsets1:
@@ -967,11 +1048,21 @@ static void build_subroutines(BuildCtx *ctx)
967 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 1048 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
968 | // Returns TValue * (finished) or NULL (metamethod). 1049 | // Returns TValue * (finished) or NULL (metamethod).
969 | cmplwi CRET1, 0 1050 | cmplwi CRET1, 0
1051 |.if FPU
970 | lfdx f0, BASE, RA 1052 | lfdx f0, BASE, RA
1053 |.else
1054 | lwzux TMP2, RA, BASE
1055 | lwz TMP3, 4(RA)
1056 |.endif
971 | beq >3 1057 | beq >3
972 | // NOBARRIER: lj_meta_tset ensures the table is not black. 1058 | // NOBARRIER: lj_meta_tset ensures the table is not black.
973 | ins_next1 1059 | ins_next1
1060 |.if FPU
974 | stfd f0, 0(CRET1) 1061 | stfd f0, 0(CRET1)
1062 |.else
1063 | stw TMP2, 0(CRET1)
1064 | stw TMP3, 4(CRET1)
1065 |.endif
975 | ins_next2 1066 | ins_next2
976 | 1067 |
977 |3: // Call __newindex metamethod. 1068 |3: // Call __newindex metamethod.
@@ -982,9 +1073,28 @@ static void build_subroutines(BuildCtx *ctx)
982 | add PC, TMP1, BASE 1073 | add PC, TMP1, BASE
983 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 1074 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
984 | li NARGS8:RC, 24 // 3 args for func(t, k, v) 1075 | li NARGS8:RC, 24 // 3 args for func(t, k, v)
1076 |.if FPU
985 | stfd f0, 16(BASE) // Copy value to third argument. 1077 | stfd f0, 16(BASE) // Copy value to third argument.
1078 |.else
1079 | stw TMP2, 16(BASE)
1080 | stw TMP3, 20(BASE)
1081 |.endif
986 | b ->vm_call_dispatch_f 1082 | b ->vm_call_dispatch_f
987 | 1083 |
1084 |->vmeta_tsetr:
1085 | stp BASE, L->base
1086 | mr CARG1, L
1087 | stw PC, SAVE_PC
1088 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1089 | // Returns TValue *.
1090 |.if FPU
1091 | stfd f14, 0(CRET1)
1092 |.else
1093 | stw SAVE0, 0(CRET1)
1094 | stw SAVE1, 4(CRET1)
1095 |.endif
1096 | b ->cont_nop
1097 |
988 |//-- Comparison metamethods --------------------------------------------- 1098 |//-- Comparison metamethods ---------------------------------------------
989 | 1099 |
990 |->vmeta_comp: 1100 |->vmeta_comp:
@@ -1021,9 +1131,19 @@ static void build_subroutines(BuildCtx *ctx)
1021 | 1131 |
1022 |->cont_ra: // RA = resultptr 1132 |->cont_ra: // RA = resultptr
1023 | lwz INS, -4(PC) 1133 | lwz INS, -4(PC)
1134 |.if FPU
1024 | lfd f0, 0(RA) 1135 | lfd f0, 0(RA)
1136 |.else
1137 | lwz CARG1, 0(RA)
1138 | lwz CARG2, 4(RA)
1139 |.endif
1025 | decode_RA8 TMP1, INS 1140 | decode_RA8 TMP1, INS
1141 |.if FPU
1026 | stfdx f0, BASE, TMP1 1142 | stfdx f0, BASE, TMP1
1143 |.else
1144 | stwux CARG1, TMP1, BASE
1145 | stw CARG2, 4(TMP1)
1146 |.endif
1027 | b ->cont_nop 1147 | b ->cont_nop
1028 | 1148 |
1029 |->cont_condt: // RA = resultptr 1149 |->cont_condt: // RA = resultptr
@@ -1063,6 +1183,16 @@ static void build_subroutines(BuildCtx *ctx)
1063 | b <3 1183 | b <3
1064 |.endif 1184 |.endif
1065 | 1185 |
1186 |->vmeta_istype:
1187 | subi PC, PC, 4
1188 | stp BASE, L->base
1189 | srwi CARG2, RA, 3
1190 | mr CARG1, L
1191 | srwi CARG3, RD, 3
1192 | stw PC, SAVE_PC
1193 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1194 | b ->cont_nop
1195 |
1066 |//-- Arithmetic metamethods --------------------------------------------- 1196 |//-- Arithmetic metamethods ---------------------------------------------
1067 | 1197 |
1068 |->vmeta_arith_nv: 1198 |->vmeta_arith_nv:
@@ -1219,22 +1349,32 @@ static void build_subroutines(BuildCtx *ctx)
1219 |.macro .ffunc_n, name 1349 |.macro .ffunc_n, name
1220 |->ff_ .. name: 1350 |->ff_ .. name:
1221 | cmplwi NARGS8:RC, 8 1351 | cmplwi NARGS8:RC, 8
1222 | lwz CARG3, 0(BASE) 1352 | lwz CARG1, 0(BASE)
1353 |.if FPU
1223 | lfd FARG1, 0(BASE) 1354 | lfd FARG1, 0(BASE)
1355 |.else
1356 | lwz CARG2, 4(BASE)
1357 |.endif
1224 | blt ->fff_fallback 1358 | blt ->fff_fallback
1225 | checknum CARG3; bge ->fff_fallback 1359 | checknum CARG1; bge ->fff_fallback
1226 |.endmacro 1360 |.endmacro
1227 | 1361 |
1228 |.macro .ffunc_nn, name 1362 |.macro .ffunc_nn, name
1229 |->ff_ .. name: 1363 |->ff_ .. name:
1230 | cmplwi NARGS8:RC, 16 1364 | cmplwi NARGS8:RC, 16
1231 | lwz CARG3, 0(BASE) 1365 | lwz CARG1, 0(BASE)
1366 |.if FPU
1232 | lfd FARG1, 0(BASE) 1367 | lfd FARG1, 0(BASE)
1233 | lwz CARG4, 8(BASE) 1368 | lwz CARG3, 8(BASE)
1234 | lfd FARG2, 8(BASE) 1369 | lfd FARG2, 8(BASE)
1370 |.else
1371 | lwz CARG2, 4(BASE)
1372 | lwz CARG3, 8(BASE)
1373 | lwz CARG4, 12(BASE)
1374 |.endif
1235 | blt ->fff_fallback 1375 | blt ->fff_fallback
1376 | checknum CARG1; bge ->fff_fallback
1236 | checknum CARG3; bge ->fff_fallback 1377 | checknum CARG3; bge ->fff_fallback
1237 | checknum CARG4; bge ->fff_fallback
1238 |.endmacro 1378 |.endmacro
1239 | 1379 |
1240 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. 1380 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
@@ -1255,14 +1395,21 @@ static void build_subroutines(BuildCtx *ctx)
1255 | bge cr1, ->fff_fallback 1395 | bge cr1, ->fff_fallback
1256 | stw CARG3, 0(RA) 1396 | stw CARG3, 0(RA)
1257 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. 1397 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
1398 | addi TMP1, BASE, 8
1399 | add TMP2, RA, NARGS8:RC
1258 | stw CARG1, 4(RA) 1400 | stw CARG1, 4(RA)
1259 | beq ->fff_res // Done if exactly 1 argument. 1401 | beq ->fff_res // Done if exactly 1 argument.
1260 | li TMP1, 8
1261 | subi RC, RC, 8
1262 |1: 1402 |1:
1263 | cmplw TMP1, RC 1403 | cmplw TMP1, TMP2
1264 | lfdx f0, BASE, TMP1 1404 |.if FPU
1265 | stfdx f0, RA, TMP1 1405 | lfd f0, 0(TMP1)
1406 | stfd f0, 0(TMP1)
1407 |.else
1408 | lwz CARG1, 0(TMP1)
1409 | lwz CARG2, 4(TMP1)
1410 | stw CARG1, -8(TMP1)
1411 | stw CARG2, -4(TMP1)
1412 |.endif
1266 | addi TMP1, TMP1, 8 1413 | addi TMP1, TMP1, 8
1267 | bney <1 1414 | bney <1
1268 | b ->fff_res 1415 | b ->fff_res
@@ -1277,8 +1424,14 @@ static void build_subroutines(BuildCtx *ctx)
1277 | orc TMP1, TMP2, TMP0 1424 | orc TMP1, TMP2, TMP0
1278 | addi TMP1, TMP1, ~LJ_TISNUM+1 1425 | addi TMP1, TMP1, ~LJ_TISNUM+1
1279 | slwi TMP1, TMP1, 3 1426 | slwi TMP1, TMP1, 3
1427 |.if FPU
1280 | la TMP2, CFUNC:RB->upvalue 1428 | la TMP2, CFUNC:RB->upvalue
1281 | lfdx FARG1, TMP2, TMP1 1429 | lfdx FARG1, TMP2, TMP1
1430 |.else
1431 | add TMP1, CFUNC:RB, TMP1
1432 | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi
1433 | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo
1434 |.endif
1282 | b ->fff_resn 1435 | b ->fff_resn
1283 | 1436 |
1284 |//-- Base library: getters and setters --------------------------------- 1437 |//-- Base library: getters and setters ---------------------------------
@@ -1294,9 +1447,9 @@ static void build_subroutines(BuildCtx *ctx)
1294 | beq ->fff_restv 1447 | beq ->fff_restv
1295 | lwz TMP0, TAB:CARG1->hmask 1448 | lwz TMP0, TAB:CARG1->hmask
1296 | li CARG3, LJ_TTAB // Use metatable as default result. 1449 | li CARG3, LJ_TTAB // Use metatable as default result.
1297 | lwz TMP1, STR:RC->hash 1450 | lwz TMP1, STR:RC->sid
1298 | lwz NODE:TMP2, TAB:CARG1->node 1451 | lwz NODE:TMP2, TAB:CARG1->node
1299 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 1452 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
1300 | slwi TMP0, TMP1, 5 1453 | slwi TMP0, TMP1, 5
1301 | slwi TMP1, TMP1, 3 1454 | slwi TMP1, TMP1, 3
1302 | sub TMP1, TMP0, TMP1 1455 | sub TMP1, TMP0, TMP1
@@ -1356,7 +1509,12 @@ static void build_subroutines(BuildCtx *ctx)
1356 | mr CARG1, L 1509 | mr CARG1, L
1357 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1510 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1358 | // Returns cTValue *. 1511 | // Returns cTValue *.
1512 |.if FPU
1359 | lfd FARG1, 0(CRET1) 1513 | lfd FARG1, 0(CRET1)
1514 |.else
1515 | lwz CARG2, 4(CRET1)
1516 | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1.
1517 |.endif
1360 | b ->fff_resn 1518 | b ->fff_resn
1361 | 1519 |
1362 |//-- Base library: conversions ------------------------------------------ 1520 |//-- Base library: conversions ------------------------------------------
@@ -1365,7 +1523,11 @@ static void build_subroutines(BuildCtx *ctx)
1365 | // Only handles the number case inline (without a base argument). 1523 | // Only handles the number case inline (without a base argument).
1366 | cmplwi NARGS8:RC, 8 1524 | cmplwi NARGS8:RC, 8
1367 | lwz CARG1, 0(BASE) 1525 | lwz CARG1, 0(BASE)
1526 |.if FPU
1368 | lfd FARG1, 0(BASE) 1527 | lfd FARG1, 0(BASE)
1528 |.else
1529 | lwz CARG2, 4(BASE)
1530 |.endif
1369 | bne ->fff_fallback // Exactly one argument. 1531 | bne ->fff_fallback // Exactly one argument.
1370 | checknum CARG1; bgt ->fff_fallback 1532 | checknum CARG1; bgt ->fff_fallback
1371 | b ->fff_resn 1533 | b ->fff_resn
@@ -1387,9 +1549,9 @@ static void build_subroutines(BuildCtx *ctx)
1387 | mr CARG1, L 1549 | mr CARG1, L
1388 | mr CARG2, BASE 1550 | mr CARG2, BASE
1389 |.if DUALNUM 1551 |.if DUALNUM
1390 | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) 1552 | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
1391 |.else 1553 |.else
1392 | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np) 1554 | bl extern lj_strfmt_num // (lua_State *L, lua_Number *np)
1393 |.endif 1555 |.endif
1394 | // Returns GCstr *. 1556 | // Returns GCstr *.
1395 | li CARG3, LJ_TSTR 1557 | li CARG3, LJ_TSTR
@@ -1397,32 +1559,24 @@ static void build_subroutines(BuildCtx *ctx)
1397 | 1559 |
1398 |//-- Base library: iterators ------------------------------------------- 1560 |//-- Base library: iterators -------------------------------------------
1399 | 1561 |
1400 |.ffunc next 1562 |.ffunc_1 next
1401 | cmplwi NARGS8:RC, 8
1402 | lwz CARG1, 0(BASE)
1403 | lwz TAB:CARG2, 4(BASE)
1404 | blt ->fff_fallback
1405 | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil. 1563 | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil.
1406 | checktab CARG1 1564 | checktab CARG3
1407 | lwz PC, FRAME_PC(BASE) 1565 | lwz PC, FRAME_PC(BASE)
1408 | bne ->fff_fallback 1566 | bne ->fff_fallback
1409 | stp BASE, L->base // Add frame since C call can throw. 1567 | la CARG2, 8(BASE)
1410 | mr CARG1, L 1568 | la CARG3, -8(BASE)
1411 | stp BASE, L->top // Dummy frame length is ok. 1569 | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
1412 | la CARG3, 8(BASE) 1570 | // Returns 1=found, 0=end, -1=error.
1413 | stw PC, SAVE_PC 1571 | cmpwi CRET1, 0
1414 | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1415 | // Returns 0 at end of traversal.
1416 | cmplwi CRET1, 0
1417 | li CARG3, LJ_TNIL
1418 | beq ->fff_restv // End of traversal: return nil.
1419 | lfd f0, 8(BASE) // Copy key and value to results.
1420 | la RA, -8(BASE) 1572 | la RA, -8(BASE)
1421 | lfd f1, 16(BASE)
1422 | stfd f0, 0(RA)
1423 | li RD, (2+1)*8 1573 | li RD, (2+1)*8
1424 | stfd f1, 8(RA) 1574 | bgt ->fff_res // Found key/value.
1425 | b ->fff_res 1575 | li CARG3, LJ_TNIL
1576 | beq ->fff_restv // End of traversal: return nil.
1577 | lwz CFUNC:RB, FRAME_FUNC(BASE)
1578 | li NARGS8:RC, 2*8
1579 | b ->fff_fallback // Invalid key.
1426 | 1580 |
1427 |.ffunc_1 pairs 1581 |.ffunc_1 pairs
1428 | checktab CARG3 1582 | checktab CARG3
@@ -1430,17 +1584,32 @@ static void build_subroutines(BuildCtx *ctx)
1430 | bne ->fff_fallback 1584 | bne ->fff_fallback
1431#if LJ_52 1585#if LJ_52
1432 | lwz TAB:TMP2, TAB:CARG1->metatable 1586 | lwz TAB:TMP2, TAB:CARG1->metatable
1587 |.if FPU
1433 | lfd f0, CFUNC:RB->upvalue[0] 1588 | lfd f0, CFUNC:RB->upvalue[0]
1589 |.else
1590 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1591 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1592 |.endif
1434 | cmplwi TAB:TMP2, 0 1593 | cmplwi TAB:TMP2, 0
1435 | la RA, -8(BASE) 1594 | la RA, -8(BASE)
1436 | bne ->fff_fallback 1595 | bne ->fff_fallback
1437#else 1596#else
1597 |.if FPU
1438 | lfd f0, CFUNC:RB->upvalue[0] 1598 | lfd f0, CFUNC:RB->upvalue[0]
1599 |.else
1600 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1601 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1602 |.endif
1439 | la RA, -8(BASE) 1603 | la RA, -8(BASE)
1440#endif 1604#endif
1441 | stw TISNIL, 8(BASE) 1605 | stw TISNIL, 8(BASE)
1442 | li RD, (3+1)*8 1606 | li RD, (3+1)*8
1607 |.if FPU
1443 | stfd f0, 0(RA) 1608 | stfd f0, 0(RA)
1609 |.else
1610 | stw TMP0, 0(RA)
1611 | stw TMP1, 4(RA)
1612 |.endif
1444 | b ->fff_res 1613 | b ->fff_res
1445 | 1614 |
1446 |.ffunc ipairs_aux 1615 |.ffunc ipairs_aux
@@ -1486,14 +1655,24 @@ static void build_subroutines(BuildCtx *ctx)
1486 | stfd FARG2, 0(RA) 1655 | stfd FARG2, 0(RA)
1487 |.endif 1656 |.endif
1488 | ble >2 // Not in array part? 1657 | ble >2 // Not in array part?
1658 |.if FPU
1489 | lwzx TMP2, TMP1, TMP3 1659 | lwzx TMP2, TMP1, TMP3
1490 | lfdx f0, TMP1, TMP3 1660 | lfdx f0, TMP1, TMP3
1661 |.else
1662 | lwzux TMP2, TMP1, TMP3
1663 | lwz TMP3, 4(TMP1)
1664 |.endif
1491 |1: 1665 |1:
1492 | checknil TMP2 1666 | checknil TMP2
1493 | li RD, (0+1)*8 1667 | li RD, (0+1)*8
1494 | beq ->fff_res // End of iteration, return 0 results. 1668 | beq ->fff_res // End of iteration, return 0 results.
1495 | li RD, (2+1)*8 1669 | li RD, (2+1)*8
1670 |.if FPU
1496 | stfd f0, 8(RA) 1671 | stfd f0, 8(RA)
1672 |.else
1673 | stw TMP2, 8(RA)
1674 | stw TMP3, 12(RA)
1675 |.endif
1497 | b ->fff_res 1676 | b ->fff_res
1498 |2: // Check for empty hash part first. Otherwise call C function. 1677 |2: // Check for empty hash part first. Otherwise call C function.
1499 | lwz TMP0, TAB:CARG1->hmask 1678 | lwz TMP0, TAB:CARG1->hmask
@@ -1507,7 +1686,11 @@ static void build_subroutines(BuildCtx *ctx)
1507 | li RD, (0+1)*8 1686 | li RD, (0+1)*8
1508 | beq ->fff_res 1687 | beq ->fff_res
1509 | lwz TMP2, 0(CRET1) 1688 | lwz TMP2, 0(CRET1)
1689 |.if FPU
1510 | lfd f0, 0(CRET1) 1690 | lfd f0, 0(CRET1)
1691 |.else
1692 | lwz TMP3, 4(CRET1)
1693 |.endif
1511 | b <1 1694 | b <1
1512 | 1695 |
1513 |.ffunc_1 ipairs 1696 |.ffunc_1 ipairs
@@ -1516,12 +1699,22 @@ static void build_subroutines(BuildCtx *ctx)
1516 | bne ->fff_fallback 1699 | bne ->fff_fallback
1517#if LJ_52 1700#if LJ_52
1518 | lwz TAB:TMP2, TAB:CARG1->metatable 1701 | lwz TAB:TMP2, TAB:CARG1->metatable
1702 |.if FPU
1519 | lfd f0, CFUNC:RB->upvalue[0] 1703 | lfd f0, CFUNC:RB->upvalue[0]
1704 |.else
1705 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1706 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1707 |.endif
1520 | cmplwi TAB:TMP2, 0 1708 | cmplwi TAB:TMP2, 0
1521 | la RA, -8(BASE) 1709 | la RA, -8(BASE)
1522 | bne ->fff_fallback 1710 | bne ->fff_fallback
1523#else 1711#else
1712 |.if FPU
1524 | lfd f0, CFUNC:RB->upvalue[0] 1713 | lfd f0, CFUNC:RB->upvalue[0]
1714 |.else
1715 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1716 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1717 |.endif
1525 | la RA, -8(BASE) 1718 | la RA, -8(BASE)
1526#endif 1719#endif
1527 |.if DUALNUM 1720 |.if DUALNUM
@@ -1531,7 +1724,12 @@ static void build_subroutines(BuildCtx *ctx)
1531 |.endif 1724 |.endif
1532 | stw ZERO, 12(BASE) 1725 | stw ZERO, 12(BASE)
1533 | li RD, (3+1)*8 1726 | li RD, (3+1)*8
1727 |.if FPU
1534 | stfd f0, 0(RA) 1728 | stfd f0, 0(RA)
1729 |.else
1730 | stw TMP0, 0(RA)
1731 | stw TMP1, 4(RA)
1732 |.endif
1535 | b ->fff_res 1733 | b ->fff_res
1536 | 1734 |
1537 |//-- Base library: catch errors ---------------------------------------- 1735 |//-- Base library: catch errors ----------------------------------------
@@ -1556,21 +1754,35 @@ static void build_subroutines(BuildCtx *ctx)
1556 | lwz TMP1, L->maxstack 1754 | lwz TMP1, L->maxstack
1557 | add TMP2, BASE, NARGS8:RC 1755 | add TMP2, BASE, NARGS8:RC
1558 | cmplwi NARGS8:RC, 16 1756 | cmplwi NARGS8:RC, 16
1559 | lwz CARG4, 8(BASE) 1757 | lwz CARG3, 8(BASE)
1560 | cmplw cr1, TMP1, TMP2 1758 | cmplw cr1, TMP1, TMP2
1759 |.if FPU
1561 | lfd FARG2, 8(BASE) 1760 | lfd FARG2, 8(BASE)
1562 | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 1761 | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
1563 | lfd FARG1, 0(BASE) 1762 | lfd FARG1, 0(BASE)
1763 |.else
1764 | lwz CARG1, 0(BASE)
1765 | lwz CARG2, 4(BASE)
1766 | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
1767 | lwz CARG4, 12(BASE)
1768 |.endif
1564 | blt ->fff_fallback 1769 | blt ->fff_fallback
1565 | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) 1770 | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH)
1566 | mr TMP2, BASE 1771 | mr TMP2, BASE
1567 | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. 1772 | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function.
1568 | la BASE, 16(BASE) 1773 | la BASE, 16(BASE)
1569 | // Remember active hook before pcall. 1774 | // Remember active hook before pcall.
1570 | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 1775 | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31
1776 |.if FPU
1571 | stfd FARG2, 0(TMP2) // Swap function and traceback. 1777 | stfd FARG2, 0(TMP2) // Swap function and traceback.
1572 | subi NARGS8:RC, NARGS8:RC, 16
1573 | stfd FARG1, 8(TMP2) 1778 | stfd FARG1, 8(TMP2)
1779 |.else
1780 | stw CARG3, 0(TMP2)
1781 | stw CARG4, 4(TMP2)
1782 | stw CARG1, 8(TMP2)
1783 | stw CARG2, 12(TMP2)
1784 |.endif
1785 | subi NARGS8:RC, NARGS8:RC, 16
1574 | addi PC, TMP1, 16+FRAME_PCALL 1786 | addi PC, TMP1, 16+FRAME_PCALL
1575 | b ->vm_call_dispatch 1787 | b ->vm_call_dispatch
1576 | 1788 |
@@ -1613,9 +1825,21 @@ static void build_subroutines(BuildCtx *ctx)
1613 | stp BASE, L->top 1825 | stp BASE, L->top
1614 |2: // Move args to coroutine. 1826 |2: // Move args to coroutine.
1615 | cmpw TMP1, NARGS8:RC 1827 | cmpw TMP1, NARGS8:RC
1828 |.if FPU
1616 | lfdx f0, BASE, TMP1 1829 | lfdx f0, BASE, TMP1
1830 |.else
1831 | add CARG3, BASE, TMP1
1832 | lwz TMP2, 0(CARG3)
1833 | lwz TMP3, 4(CARG3)
1834 |.endif
1617 | beq >3 1835 | beq >3
1836 |.if FPU
1618 | stfdx f0, CARG2, TMP1 1837 | stfdx f0, CARG2, TMP1
1838 |.else
1839 | add CARG3, CARG2, TMP1
1840 | stw TMP2, 0(CARG3)
1841 | stw TMP3, 4(CARG3)
1842 |.endif
1619 | addi TMP1, TMP1, 8 1843 | addi TMP1, TMP1, 8
1620 | b <2 1844 | b <2
1621 |3: 1845 |3:
@@ -1630,6 +1854,7 @@ static void build_subroutines(BuildCtx *ctx)
1630 | lp TMP3, L:SAVE0->top 1854 | lp TMP3, L:SAVE0->top
1631 | li_vmstate INTERP 1855 | li_vmstate INTERP
1632 | lp BASE, L->base 1856 | lp BASE, L->base
1857 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
1633 | st_vmstate 1858 | st_vmstate
1634 | bgt >8 1859 | bgt >8
1635 | sub RD, TMP3, TMP2 1860 | sub RD, TMP3, TMP2
@@ -1645,8 +1870,17 @@ static void build_subroutines(BuildCtx *ctx)
1645 | stp TMP2, L:SAVE0->top // Clear coroutine stack. 1870 | stp TMP2, L:SAVE0->top // Clear coroutine stack.
1646 |5: // Move results from coroutine. 1871 |5: // Move results from coroutine.
1647 | cmplw TMP1, TMP3 1872 | cmplw TMP1, TMP3
1873 |.if FPU
1648 | lfdx f0, TMP2, TMP1 1874 | lfdx f0, TMP2, TMP1
1649 | stfdx f0, BASE, TMP1 1875 | stfdx f0, BASE, TMP1
1876 |.else
1877 | add CARG3, TMP2, TMP1
1878 | lwz CARG1, 0(CARG3)
1879 | lwz CARG2, 4(CARG3)
1880 | add CARG3, BASE, TMP1
1881 | stw CARG1, 0(CARG3)
1882 | stw CARG2, 4(CARG3)
1883 |.endif
1650 | addi TMP1, TMP1, 8 1884 | addi TMP1, TMP1, 8
1651 | bne <5 1885 | bne <5
1652 |6: 1886 |6:
@@ -1671,12 +1905,22 @@ static void build_subroutines(BuildCtx *ctx)
1671 | andix. TMP0, PC, FRAME_TYPE 1905 | andix. TMP0, PC, FRAME_TYPE
1672 | la TMP3, -8(TMP3) 1906 | la TMP3, -8(TMP3)
1673 | li TMP1, LJ_TFALSE 1907 | li TMP1, LJ_TFALSE
1908 |.if FPU
1674 | lfd f0, 0(TMP3) 1909 | lfd f0, 0(TMP3)
1910 |.else
1911 | lwz CARG1, 0(TMP3)
1912 | lwz CARG2, 4(TMP3)
1913 |.endif
1675 | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. 1914 | stp TMP3, L:SAVE0->top // Remove error from coroutine stack.
1676 | li RD, (2+1)*8 1915 | li RD, (2+1)*8
1677 | stw TMP1, -8(BASE) // Prepend false to results. 1916 | stw TMP1, -8(BASE) // Prepend false to results.
1678 | la RA, -8(BASE) 1917 | la RA, -8(BASE)
1918 |.if FPU
1679 | stfd f0, 0(BASE) // Copy error message. 1919 | stfd f0, 0(BASE) // Copy error message.
1920 |.else
1921 | stw CARG1, 0(BASE) // Copy error message.
1922 | stw CARG2, 4(BASE)
1923 |.endif
1680 | b <7 1924 | b <7
1681 |.else 1925 |.else
1682 | mr CARG1, L 1926 | mr CARG1, L
@@ -1855,7 +2099,12 @@ static void build_subroutines(BuildCtx *ctx)
1855 | lus CARG1, 0x8000 // -(2^31). 2099 | lus CARG1, 0x8000 // -(2^31).
1856 | beqy ->fff_resi 2100 | beqy ->fff_resi
1857 |5: 2101 |5:
2102 |.if FPU
1858 | lfd FARG1, 0(BASE) 2103 | lfd FARG1, 0(BASE)
2104 |.else
2105 | lwz CARG1, 0(BASE)
2106 | lwz CARG2, 4(BASE)
2107 |.endif
1859 | blex func 2108 | blex func
1860 | b ->fff_resn 2109 | b ->fff_resn
1861 |.endmacro 2110 |.endmacro
@@ -1879,10 +2128,14 @@ static void build_subroutines(BuildCtx *ctx)
1879 | 2128 |
1880 |.ffunc math_log 2129 |.ffunc math_log
1881 | cmplwi NARGS8:RC, 8 2130 | cmplwi NARGS8:RC, 8
1882 | lwz CARG3, 0(BASE) 2131 | lwz CARG1, 0(BASE)
1883 | lfd FARG1, 0(BASE)
1884 | bne ->fff_fallback // Need exactly 1 argument. 2132 | bne ->fff_fallback // Need exactly 1 argument.
1885 | checknum CARG3; bge ->fff_fallback 2133 | checknum CARG1; bge ->fff_fallback
2134 |.if FPU
2135 | lfd FARG1, 0(BASE)
2136 |.else
2137 | lwz CARG2, 4(BASE)
2138 |.endif
1886 | blex log 2139 | blex log
1887 | b ->fff_resn 2140 | b ->fff_resn
1888 | 2141 |
@@ -1901,26 +2154,27 @@ static void build_subroutines(BuildCtx *ctx)
1901 | math_extern2 atan2 2154 | math_extern2 atan2
1902 | math_extern2 fmod 2155 | math_extern2 fmod
1903 | 2156 |
1904 |->ff_math_deg:
1905 |.ffunc_n math_rad
1906 | lfd FARG2, CFUNC:RB->upvalue[0]
1907 | fmul FARG1, FARG1, FARG2
1908 | b ->fff_resn
1909 |
1910 |.if DUALNUM 2157 |.if DUALNUM
1911 |.ffunc math_ldexp 2158 |.ffunc math_ldexp
1912 | cmplwi NARGS8:RC, 16 2159 | cmplwi NARGS8:RC, 16
1913 | lwz CARG3, 0(BASE) 2160 | lwz TMP0, 0(BASE)
2161 |.if FPU
1914 | lfd FARG1, 0(BASE) 2162 | lfd FARG1, 0(BASE)
1915 | lwz CARG4, 8(BASE) 2163 |.else
2164 | lwz CARG1, 0(BASE)
2165 | lwz CARG2, 4(BASE)
2166 |.endif
2167 | lwz TMP1, 8(BASE)
1916 |.if GPR64 2168 |.if GPR64
1917 | lwz CARG2, 12(BASE) 2169 | lwz CARG2, 12(BASE)
1918 |.else 2170 |.elif FPU
1919 | lwz CARG1, 12(BASE) 2171 | lwz CARG1, 12(BASE)
2172 |.else
2173 | lwz CARG3, 12(BASE)
1920 |.endif 2174 |.endif
1921 | blt ->fff_fallback 2175 | blt ->fff_fallback
1922 | checknum CARG3; bge ->fff_fallback 2176 | checknum TMP0; bge ->fff_fallback
1923 | checknum CARG4; bne ->fff_fallback 2177 | checknum TMP1; bne ->fff_fallback
1924 |.else 2178 |.else
1925 |.ffunc_nn math_ldexp 2179 |.ffunc_nn math_ldexp
1926 |.if GPR64 2180 |.if GPR64
@@ -1935,8 +2189,10 @@ static void build_subroutines(BuildCtx *ctx)
1935 |.ffunc_n math_frexp 2189 |.ffunc_n math_frexp
1936 |.if GPR64 2190 |.if GPR64
1937 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 2191 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
1938 |.else 2192 |.elif FPU
1939 | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) 2193 | la CARG1, DISPATCH_GL(tmptv)(DISPATCH)
2194 |.else
2195 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
1940 |.endif 2196 |.endif
1941 | lwz PC, FRAME_PC(BASE) 2197 | lwz PC, FRAME_PC(BASE)
1942 | blex frexp 2198 | blex frexp
@@ -1945,7 +2201,12 @@ static void build_subroutines(BuildCtx *ctx)
1945 |.if not DUALNUM 2201 |.if not DUALNUM
1946 | tonum_i FARG2, TMP1 2202 | tonum_i FARG2, TMP1
1947 |.endif 2203 |.endif
2204 |.if FPU
1948 | stfd FARG1, 0(RA) 2205 | stfd FARG1, 0(RA)
2206 |.else
2207 | stw CRET1, 0(RA)
2208 | stw CRET2, 4(RA)
2209 |.endif
1949 | li RD, (2+1)*8 2210 | li RD, (2+1)*8
1950 |.if DUALNUM 2211 |.if DUALNUM
1951 | stw TISNUM, 8(RA) 2212 | stw TISNUM, 8(RA)
@@ -1958,13 +2219,20 @@ static void build_subroutines(BuildCtx *ctx)
1958 |.ffunc_n math_modf 2219 |.ffunc_n math_modf
1959 |.if GPR64 2220 |.if GPR64
1960 | la CARG2, -8(BASE) 2221 | la CARG2, -8(BASE)
1961 |.else 2222 |.elif FPU
1962 | la CARG1, -8(BASE) 2223 | la CARG1, -8(BASE)
2224 |.else
2225 | la CARG3, -8(BASE)
1963 |.endif 2226 |.endif
1964 | lwz PC, FRAME_PC(BASE) 2227 | lwz PC, FRAME_PC(BASE)
1965 | blex modf 2228 | blex modf
1966 | la RA, -8(BASE) 2229 | la RA, -8(BASE)
2230 |.if FPU
1967 | stfd FARG1, 0(BASE) 2231 | stfd FARG1, 0(BASE)
2232 |.else
2233 | stw CRET1, 0(BASE)
2234 | stw CRET2, 4(BASE)
2235 |.endif
1968 | li RD, (2+1)*8 2236 | li RD, (2+1)*8
1969 | b ->fff_res 2237 | b ->fff_res
1970 | 2238 |
@@ -1972,13 +2240,13 @@ static void build_subroutines(BuildCtx *ctx)
1972 |.if DUALNUM 2240 |.if DUALNUM
1973 | .ffunc_1 name 2241 | .ffunc_1 name
1974 | checknum CARG3 2242 | checknum CARG3
1975 | addi TMP1, BASE, 8 2243 | addi SAVE0, BASE, 8
1976 | add TMP2, BASE, NARGS8:RC 2244 | add SAVE1, BASE, NARGS8:RC
1977 | bne >4 2245 | bne >4
1978 |1: // Handle integers. 2246 |1: // Handle integers.
1979 | lwz CARG4, 0(TMP1) 2247 | lwz CARG4, 0(SAVE0)
1980 | cmplw cr1, TMP1, TMP2 2248 | cmplw cr1, SAVE0, SAVE1
1981 | lwz CARG2, 4(TMP1) 2249 | lwz CARG2, 4(SAVE0)
1982 | bge cr1, ->fff_resi 2250 | bge cr1, ->fff_resi
1983 | checknum CARG4 2251 | checknum CARG4
1984 | xoris TMP0, CARG1, 0x8000 2252 | xoris TMP0, CARG1, 0x8000
@@ -1995,36 +2263,76 @@ static void build_subroutines(BuildCtx *ctx)
1995 |.if GPR64 2263 |.if GPR64
1996 | rldicl CARG1, CARG1, 0, 32 2264 | rldicl CARG1, CARG1, 0, 32
1997 |.endif 2265 |.endif
1998 | addi TMP1, TMP1, 8 2266 | addi SAVE0, SAVE0, 8
1999 | b <1 2267 | b <1
2000 |3: 2268 |3:
2001 | bge ->fff_fallback 2269 | bge ->fff_fallback
2002 | // Convert intermediate result to number and continue below. 2270 | // Convert intermediate result to number and continue below.
2271 |.if FPU
2003 | tonum_i FARG1, CARG1 2272 | tonum_i FARG1, CARG1
2004 | lfd FARG2, 0(TMP1) 2273 | lfd FARG2, 0(SAVE0)
2274 |.else
2275 | mr CARG2, CARG1
2276 | bl ->vm_sfi2d_1
2277 | lwz CARG3, 0(SAVE0)
2278 | lwz CARG4, 4(SAVE0)
2279 |.endif
2005 | b >6 2280 | b >6
2006 |4: 2281 |4:
2282 |.if FPU
2007 | lfd FARG1, 0(BASE) 2283 | lfd FARG1, 0(BASE)
2284 |.else
2285 | lwz CARG1, 0(BASE)
2286 | lwz CARG2, 4(BASE)
2287 |.endif
2008 | bge ->fff_fallback 2288 | bge ->fff_fallback
2009 |5: // Handle numbers. 2289 |5: // Handle numbers.
2010 | lwz CARG4, 0(TMP1) 2290 | lwz CARG3, 0(SAVE0)
2011 | cmplw cr1, TMP1, TMP2 2291 | cmplw cr1, SAVE0, SAVE1
2012 | lfd FARG2, 0(TMP1) 2292 |.if FPU
2293 | lfd FARG2, 0(SAVE0)
2294 |.else
2295 | lwz CARG4, 4(SAVE0)
2296 |.endif
2013 | bge cr1, ->fff_resn 2297 | bge cr1, ->fff_resn
2014 | checknum CARG4; bge >7 2298 | checknum CARG3; bge >7
2015 |6: 2299 |6:
2016 | fsub f0, FARG1, FARG2 2300 | addi SAVE0, SAVE0, 8
2017 | addi TMP1, TMP1, 8 2301 |.if FPU
2018 |.if ismax 2302 |.if ismax
2303 | fsub f0, FARG1, FARG2
2304 |.else
2305 | fsub f0, FARG2, FARG1
2306 |.endif
2019 | fsel FARG1, f0, FARG1, FARG2 2307 | fsel FARG1, f0, FARG1, FARG2
2020 |.else 2308 |.else
2021 | fsel FARG1, f0, FARG2, FARG1 2309 | stw CARG1, SFSAVE_1
2310 | stw CARG2, SFSAVE_2
2311 | stw CARG3, SFSAVE_3
2312 | stw CARG4, SFSAVE_4
2313 | blex __ledf2
2314 | cmpwi CRET1, 0
2315 |.if ismax
2316 | blt >8
2317 |.else
2318 | bge >8
2319 |.endif
2320 | lwz CARG1, SFSAVE_1
2321 | lwz CARG2, SFSAVE_2
2322 | b <5
2323 |8:
2324 | lwz CARG1, SFSAVE_3
2325 | lwz CARG2, SFSAVE_4
2022 |.endif 2326 |.endif
2023 | b <5 2327 | b <5
2024 |7: // Convert integer to number and continue above. 2328 |7: // Convert integer to number and continue above.
2025 | lwz CARG2, 4(TMP1) 2329 | lwz CARG3, 4(SAVE0)
2026 | bne ->fff_fallback 2330 | bne ->fff_fallback
2027 | tonum_i FARG2, CARG2 2331 |.if FPU
2332 | tonum_i FARG2, CARG3
2333 |.else
2334 | bl ->vm_sfi2d_2
2335 |.endif
2028 | b <6 2336 | b <6
2029 |.else 2337 |.else
2030 | .ffunc_n name 2338 | .ffunc_n name
@@ -2036,13 +2344,13 @@ static void build_subroutines(BuildCtx *ctx)
2036 | checknum CARG2 2344 | checknum CARG2
2037 | bge cr1, ->fff_resn 2345 | bge cr1, ->fff_resn
2038 | bge ->fff_fallback 2346 | bge ->fff_fallback
2039 | fsub f0, FARG1, FARG2
2040 | addi TMP1, TMP1, 8
2041 |.if ismax 2347 |.if ismax
2042 | fsel FARG1, f0, FARG1, FARG2 2348 | fsub f0, FARG1, FARG2
2043 |.else 2349 |.else
2044 | fsel FARG1, f0, FARG2, FARG1 2350 | fsub f0, FARG2, FARG1
2045 |.endif 2351 |.endif
2352 | addi TMP1, TMP1, 8
2353 | fsel FARG1, f0, FARG1, FARG2
2046 | b <1 2354 | b <1
2047 |.endif 2355 |.endif
2048 |.endmacro 2356 |.endmacro
@@ -2052,11 +2360,6 @@ static void build_subroutines(BuildCtx *ctx)
2052 | 2360 |
2053 |//-- String library ----------------------------------------------------- 2361 |//-- String library -----------------------------------------------------
2054 | 2362 |
2055 |.ffunc_1 string_len
2056 | checkstr CARG3; bne ->fff_fallback
2057 | lwz CRET1, STR:CARG1->len
2058 | b ->fff_resi
2059 |
2060 |.ffunc string_byte // Only handle the 1-arg case here. 2363 |.ffunc string_byte // Only handle the 1-arg case here.
2061 | cmplwi NARGS8:RC, 8 2364 | cmplwi NARGS8:RC, 8
2062 | lwz CARG3, 0(BASE) 2365 | lwz CARG3, 0(BASE)
@@ -2111,6 +2414,7 @@ static void build_subroutines(BuildCtx *ctx)
2111 | stp BASE, L->base 2414 | stp BASE, L->base
2112 | stw PC, SAVE_PC 2415 | stw PC, SAVE_PC
2113 | bl extern lj_str_new // (lua_State *L, char *str, size_t l) 2416 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
2417 |->fff_resstr:
2114 | // Returns GCstr *. 2418 | // Returns GCstr *.
2115 | lp BASE, L->base 2419 | lp BASE, L->base
2116 | li CARG3, LJ_TSTR 2420 | li CARG3, LJ_TSTR
@@ -2188,114 +2492,29 @@ static void build_subroutines(BuildCtx *ctx)
2188 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) 2492 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0)
2189 | b <3 2493 | b <3
2190 | 2494 |
2191 |.ffunc string_rep // Only handle the 1-char case inline. 2495 |.macro ffstring_op, name
2192 | ffgccheck 2496 | .ffunc string_ .. name
2193 | cmplwi NARGS8:RC, 16
2194 | lwz TMP0, 0(BASE)
2195 | lwz STR:CARG1, 4(BASE)
2196 | lwz CARG4, 8(BASE)
2197 |.if DUALNUM
2198 | lwz CARG3, 12(BASE)
2199 |.else
2200 | lfd FARG2, 8(BASE)
2201 |.endif
2202 | bne ->fff_fallback // Exactly 2 arguments.
2203 | checkstr TMP0; bne ->fff_fallback
2204 |.if DUALNUM
2205 | checknum CARG4; bne ->fff_fallback
2206 |.else
2207 | checknum CARG4; bge ->fff_fallback
2208 | toint CARG3, FARG2
2209 |.endif
2210 | lwz TMP0, STR:CARG1->len
2211 | cmpwi CARG3, 0
2212 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
2213 | ble >2 // Count <= 0? (or non-int)
2214 | cmplwi TMP0, 1
2215 | subi TMP2, CARG3, 1
2216 | blt >2 // Zero length string?
2217 | cmplw cr1, TMP1, CARG3
2218 | bne ->fff_fallback // Fallback for > 1-char strings.
2219 | lbz TMP0, STR:CARG1[1]
2220 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
2221 | blt cr1, ->fff_fallback
2222 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2223 | cmplwi TMP2, 0
2224 | stbx TMP0, CARG2, TMP2
2225 | subi TMP2, TMP2, 1
2226 | bne <1
2227 | b ->fff_newstr
2228 |2: // Return empty string.
2229 | la STR:CARG1, DISPATCH_GL(strempty)(DISPATCH)
2230 | li CARG3, LJ_TSTR
2231 | b ->fff_restv
2232 |
2233 |.ffunc string_reverse
2234 | ffgccheck
2235 | cmplwi NARGS8:RC, 8
2236 | lwz CARG3, 0(BASE)
2237 | lwz STR:CARG1, 4(BASE)
2238 | blt ->fff_fallback
2239 | checkstr CARG3
2240 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
2241 | bne ->fff_fallback
2242 | lwz CARG3, STR:CARG1->len
2243 | la CARG1, #STR(STR:CARG1)
2244 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
2245 | li TMP2, 0
2246 | cmplw TMP1, CARG3
2247 | subi TMP3, CARG3, 1
2248 | blt ->fff_fallback
2249 |1: // Reverse string copy.
2250 | cmpwi TMP3, 0
2251 | lbzx TMP1, CARG1, TMP2
2252 | blty ->fff_newstr
2253 | stbx TMP1, CARG2, TMP3
2254 | subi TMP3, TMP3, 1
2255 | addi TMP2, TMP2, 1
2256 | b <1
2257 |
2258 |.macro ffstring_case, name, lo
2259 | .ffunc name
2260 | ffgccheck 2497 | ffgccheck
2261 | cmplwi NARGS8:RC, 8 2498 | cmplwi NARGS8:RC, 8
2262 | lwz CARG3, 0(BASE) 2499 | lwz CARG3, 0(BASE)
2263 | lwz STR:CARG1, 4(BASE) 2500 | lwz STR:CARG2, 4(BASE)
2264 | blt ->fff_fallback 2501 | blt ->fff_fallback
2265 | checkstr CARG3 2502 | checkstr CARG3
2266 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 2503 | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH)
2267 | bne ->fff_fallback 2504 | bne ->fff_fallback
2268 | lwz CARG3, STR:CARG1->len 2505 | lwz TMP0, SBUF:CARG1->b
2269 | la CARG1, #STR(STR:CARG1) 2506 | stw L, SBUF:CARG1->L
2270 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 2507 | stp BASE, L->base
2271 | cmplw TMP1, CARG3 2508 | stw PC, SAVE_PC
2272 | li TMP2, 0 2509 | stw TMP0, SBUF:CARG1->w
2273 | blt ->fff_fallback 2510 | bl extern lj_buf_putstr_ .. name
2274 |1: // ASCII case conversion. 2511 | bl extern lj_buf_tostr
2275 | cmplw TMP2, CARG3 2512 | b ->fff_resstr
2276 | lbzx TMP1, CARG1, TMP2
2277 | bgey ->fff_newstr
2278 | subi TMP0, TMP1, lo
2279 | xori TMP3, TMP1, 0x20
2280 | addic TMP0, TMP0, -26
2281 | subfe TMP3, TMP3, TMP3
2282 | rlwinm TMP3, TMP3, 0, 26, 26 // x &= 0x20.
2283 | xor TMP1, TMP1, TMP3
2284 | stbx TMP1, CARG2, TMP2
2285 | addi TMP2, TMP2, 1
2286 | b <1
2287 |.endmacro 2513 |.endmacro
2288 | 2514 |
2289 |ffstring_case string_lower, 65 2515 |ffstring_op reverse
2290 |ffstring_case string_upper, 97 2516 |ffstring_op lower
2291 | 2517 |ffstring_op upper
2292 |//-- Table library ------------------------------------------------------
2293 |
2294 |.ffunc_1 table_getn
2295 | checktab CARG3; bne ->fff_fallback
2296 | bl extern lj_tab_len // (GCtab *t)
2297 | // Returns uint32_t (but less than 2^31).
2298 | b ->fff_resi
2299 | 2518 |
2300 |//-- Bit library -------------------------------------------------------- 2519 |//-- Bit library --------------------------------------------------------
2301 | 2520 |
@@ -2313,28 +2532,37 @@ static void build_subroutines(BuildCtx *ctx)
2313 | 2532 |
2314 |.macro .ffunc_bit_op, name, ins 2533 |.macro .ffunc_bit_op, name, ins
2315 | .ffunc_bit name 2534 | .ffunc_bit name
2316 | addi TMP1, BASE, 8 2535 | addi SAVE0, BASE, 8
2317 | add TMP2, BASE, NARGS8:RC 2536 | add SAVE1, BASE, NARGS8:RC
2318 |1: 2537 |1:
2319 | lwz CARG4, 0(TMP1) 2538 | lwz CARG4, 0(SAVE0)
2320 | cmplw cr1, TMP1, TMP2 2539 | cmplw cr1, SAVE0, SAVE1
2321 |.if DUALNUM 2540 |.if DUALNUM
2322 | lwz CARG2, 4(TMP1) 2541 | lwz CARG2, 4(SAVE0)
2323 |.else 2542 |.else
2324 | lfd FARG1, 0(TMP1) 2543 | lfd FARG1, 0(SAVE0)
2325 |.endif 2544 |.endif
2326 | bgey cr1, ->fff_resi 2545 | bgey cr1, ->fff_resi
2327 | checknum CARG4 2546 | checknum CARG4
2328 |.if DUALNUM 2547 |.if DUALNUM
2548 |.if FPU
2329 | bnel ->fff_bitop_fb 2549 | bnel ->fff_bitop_fb
2330 |.else 2550 |.else
2551 | beq >3
2552 | stw CARG1, SFSAVE_1
2553 | bl ->fff_bitop_fb
2554 | mr CARG2, CARG1
2555 | lwz CARG1, SFSAVE_1
2556 |3:
2557 |.endif
2558 |.else
2331 | fadd FARG1, FARG1, TOBIT 2559 | fadd FARG1, FARG1, TOBIT
2332 | bge ->fff_fallback 2560 | bge ->fff_fallback
2333 | stfd FARG1, TMPD 2561 | stfd FARG1, TMPD
2334 | lwz CARG2, TMPD_LO 2562 | lwz CARG2, TMPD_LO
2335 |.endif 2563 |.endif
2336 | ins CARG1, CARG1, CARG2 2564 | ins CARG1, CARG1, CARG2
2337 | addi TMP1, TMP1, 8 2565 | addi SAVE0, SAVE0, 8
2338 | b <1 2566 | b <1
2339 |.endmacro 2567 |.endmacro
2340 | 2568 |
@@ -2356,7 +2584,14 @@ static void build_subroutines(BuildCtx *ctx)
2356 |.macro .ffunc_bit_sh, name, ins, shmod 2584 |.macro .ffunc_bit_sh, name, ins, shmod
2357 |.if DUALNUM 2585 |.if DUALNUM
2358 | .ffunc_2 bit_..name 2586 | .ffunc_2 bit_..name
2587 |.if FPU
2359 | checknum CARG3; bnel ->fff_tobit_fb 2588 | checknum CARG3; bnel ->fff_tobit_fb
2589 |.else
2590 | checknum CARG3; beq >1
2591 | bl ->fff_tobit_fb
2592 | lwz CARG2, 12(BASE) // Conversion polluted CARG2.
2593 |1:
2594 |.endif
2360 | // Note: no inline conversion from number for 2nd argument! 2595 | // Note: no inline conversion from number for 2nd argument!
2361 | checknum CARG4; bne ->fff_fallback 2596 | checknum CARG4; bne ->fff_fallback
2362 |.else 2597 |.else
@@ -2393,27 +2628,77 @@ static void build_subroutines(BuildCtx *ctx)
2393 |->fff_resn: 2628 |->fff_resn:
2394 | lwz PC, FRAME_PC(BASE) 2629 | lwz PC, FRAME_PC(BASE)
2395 | la RA, -8(BASE) 2630 | la RA, -8(BASE)
2631 |.if FPU
2396 | stfd FARG1, -8(BASE) 2632 | stfd FARG1, -8(BASE)
2633 |.else
2634 | stw CARG1, -8(BASE)
2635 | stw CARG2, -4(BASE)
2636 |.endif
2397 | b ->fff_res1 2637 | b ->fff_res1
2398 | 2638 |
2399 |// Fallback FP number to bit conversion. 2639 |// Fallback FP number to bit conversion.
2400 |->fff_tobit_fb: 2640 |->fff_tobit_fb:
2401 |.if DUALNUM 2641 |.if DUALNUM
2642 |.if FPU
2402 | lfd FARG1, 0(BASE) 2643 | lfd FARG1, 0(BASE)
2403 | bgt ->fff_fallback 2644 | bgt ->fff_fallback
2404 | fadd FARG1, FARG1, TOBIT 2645 | fadd FARG1, FARG1, TOBIT
2405 | stfd FARG1, TMPD 2646 | stfd FARG1, TMPD
2406 | lwz CARG1, TMPD_LO 2647 | lwz CARG1, TMPD_LO
2407 | blr 2648 | blr
2649 |.else
2650 | bgt ->fff_fallback
2651 | mr CARG2, CARG1
2652 | mr CARG1, CARG3
2653 |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2.
2654 |->vm_tobit:
2655 | slwi TMP2, CARG1, 1
2656 | addis TMP2, TMP2, 0x0020
2657 | cmpwi TMP2, 0
2658 | bge >2
2659 | li TMP1, 0x3e0
2660 | srawi TMP2, TMP2, 21
2661 | not TMP1, TMP1
2662 | sub. TMP2, TMP1, TMP2
2663 | cmpwi cr7, CARG1, 0
2664 | blt >1
2665 | slwi TMP1, CARG1, 11
2666 | srwi TMP0, CARG2, 21
2667 | oris TMP1, TMP1, 0x8000
2668 | or TMP1, TMP1, TMP0
2669 | srw CARG1, TMP1, TMP2
2670 | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
2671 | neg CARG1, CARG1
2672 | blr
2673 |1:
2674 | addi TMP2, TMP2, 21
2675 | srw TMP1, CARG2, TMP2
2676 | slwi CARG2, CARG1, 12
2677 | subfic TMP2, TMP2, 20
2678 | slw TMP0, CARG2, TMP2
2679 | or CARG1, TMP1, TMP0
2680 | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
2681 | neg CARG1, CARG1
2682 | blr
2683 |2:
2684 | li CARG1, 0
2685 | blr
2686 |.endif
2408 |.endif 2687 |.endif
2409 |->fff_bitop_fb: 2688 |->fff_bitop_fb:
2410 |.if DUALNUM 2689 |.if DUALNUM
2411 | lfd FARG1, 0(TMP1) 2690 |.if FPU
2691 | lfd FARG1, 0(SAVE0)
2412 | bgt ->fff_fallback 2692 | bgt ->fff_fallback
2413 | fadd FARG1, FARG1, TOBIT 2693 | fadd FARG1, FARG1, TOBIT
2414 | stfd FARG1, TMPD 2694 | stfd FARG1, TMPD
2415 | lwz CARG2, TMPD_LO 2695 | lwz CARG2, TMPD_LO
2416 | blr 2696 | blr
2697 |.else
2698 | bgt ->fff_fallback
2699 | mr CARG1, CARG4
2700 | b ->vm_tobit
2701 |.endif
2417 |.endif 2702 |.endif
2418 | 2703 |
2419 |//----------------------------------------------------------------------- 2704 |//-----------------------------------------------------------------------
@@ -2597,15 +2882,88 @@ static void build_subroutines(BuildCtx *ctx)
2597 | mtctr CRET1 2882 | mtctr CRET1
2598 | bctr 2883 | bctr
2599 | 2884 |
2885 |->cont_stitch: // Trace stitching.
2886 |.if JIT
2887 | // RA = resultptr, RB = meta base
2888 | lwz INS, -4(PC)
2889 | lwz TRACE:TMP2, -20(RB) // Save previous trace.
2890 | addic. TMP1, MULTRES, -8
2891 | decode_RA8 RC, INS // Call base.
2892 | beq >2
2893 |1: // Move results down.
2894 |.if FPU
2895 | lfd f0, 0(RA)
2896 |.else
2897 | lwz CARG1, 0(RA)
2898 | lwz CARG2, 4(RA)
2899 |.endif
2900 | addic. TMP1, TMP1, -8
2901 | addi RA, RA, 8
2902 |.if FPU
2903 | stfdx f0, BASE, RC
2904 |.else
2905 | add CARG3, BASE, RC
2906 | stw CARG1, 0(CARG3)
2907 | stw CARG2, 4(CARG3)
2908 |.endif
2909 | addi RC, RC, 8
2910 | bne <1
2911 |2:
2912 | decode_RA8 RA, INS
2913 | decode_RB8 RB, INS
2914 | add RA, RA, RB
2915 |3:
2916 | cmplw RA, RC
2917 | bgt >9 // More results wanted?
2918 |
2919 | lhz TMP3, TRACE:TMP2->traceno
2920 | lhz RD, TRACE:TMP2->link
2921 | cmpw RD, TMP3
2922 | cmpwi cr1, RD, 0
2923 | beq ->cont_nop // Blacklisted.
2924 | slwi RD, RD, 3
2925 | bne cr1, =>BC_JLOOP // Jump to stitched trace.
2926 |
2927 | // Stitch a new trace to the previous trace.
2928 | stw TMP3, DISPATCH_J(exitno)(DISPATCH)
2929 | stp L, DISPATCH_J(L)(DISPATCH)
2930 | stp BASE, L->base
2931 | addi CARG1, DISPATCH, GG_DISP2J
2932 | mr CARG2, PC
2933 | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2934 | lp BASE, L->base
2935 | b ->cont_nop
2936 |
2937 |9:
2938 | stwx TISNIL, BASE, RC
2939 | addi RC, RC, 8
2940 | b <3
2941 |.endif
2942 |
2943 |->vm_profhook: // Dispatch target for profiler hook.
2944#if LJ_HASPROFILE
2945 | mr CARG1, L
2946 | stw MULTRES, SAVE_MULTRES
2947 | mr CARG2, PC
2948 | stp BASE, L->base
2949 | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2950 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2951 | lp BASE, L->base
2952 | subi PC, PC, 4
2953 | b ->cont_nop
2954#endif
2955 |
2600 |//----------------------------------------------------------------------- 2956 |//-----------------------------------------------------------------------
2601 |//-- Trace exit handler ------------------------------------------------- 2957 |//-- Trace exit handler -------------------------------------------------
2602 |//----------------------------------------------------------------------- 2958 |//-----------------------------------------------------------------------
2603 | 2959 |
2604 |.macro savex_, a, b, c, d 2960 |.macro savex_, a, b, c, d
2961 |.if FPU
2605 | stfd f..a, 16+a*8(sp) 2962 | stfd f..a, 16+a*8(sp)
2606 | stfd f..b, 16+b*8(sp) 2963 | stfd f..b, 16+b*8(sp)
2607 | stfd f..c, 16+c*8(sp) 2964 | stfd f..c, 16+c*8(sp)
2608 | stfd f..d, 16+d*8(sp) 2965 | stfd f..d, 16+d*8(sp)
2966 |.endif
2609 |.endmacro 2967 |.endmacro
2610 | 2968 |
2611 |->vm_exit_handler: 2969 |->vm_exit_handler:
@@ -2631,16 +2989,16 @@ static void build_subroutines(BuildCtx *ctx)
2631 | savex_ 20,21,22,23 2989 | savex_ 20,21,22,23
2632 | lhz CARG4, 2(CARG3) // Load trace number. 2990 | lhz CARG4, 2(CARG3) // Load trace number.
2633 | savex_ 24,25,26,27 2991 | savex_ 24,25,26,27
2634 | lwz L, DISPATCH_GL(jit_L)(DISPATCH) 2992 | lwz L, DISPATCH_GL(cur_L)(DISPATCH)
2635 | savex_ 28,29,30,31 2993 | savex_ 28,29,30,31
2636 | sub CARG3, TMP0, CARG3 // Compute exit number. 2994 | sub CARG3, TMP0, CARG3 // Compute exit number.
2637 | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) 2995 | lp BASE, DISPATCH_GL(jit_base)(DISPATCH)
2638 | srwi CARG3, CARG3, 2 2996 | srwi CARG3, CARG3, 2
2639 | stw L, DISPATCH_J(L)(DISPATCH) 2997 | stp L, DISPATCH_J(L)(DISPATCH)
2640 | subi CARG3, CARG3, 2 2998 | subi CARG3, CARG3, 2
2641 | stw TMP1, DISPATCH_GL(jit_L)(DISPATCH)
2642 | stw CARG4, DISPATCH_J(parent)(DISPATCH)
2643 | stp BASE, L->base 2999 | stp BASE, L->base
3000 | stw CARG4, DISPATCH_J(parent)(DISPATCH)
3001 | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH)
2644 | addi CARG1, DISPATCH, GG_DISP2J 3002 | addi CARG1, DISPATCH, GG_DISP2J
2645 | stw CARG3, DISPATCH_J(exitno)(DISPATCH) 3003 | stw CARG3, DISPATCH_J(exitno)(DISPATCH)
2646 | addi CARG2, sp, 16 3004 | addi CARG2, sp, 16
@@ -2664,33 +3022,37 @@ static void build_subroutines(BuildCtx *ctx)
2664 | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set. 3022 | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set.
2665 | lwz L, SAVE_L 3023 | lwz L, SAVE_L
2666 | addi DISPATCH, JGL, -GG_DISP2G-32768 3024 | addi DISPATCH, JGL, -GG_DISP2G-32768
3025 | stp BASE, L->base
2667 |1: 3026 |1:
2668 | cmpwi CARG1, 0 3027 | li TMP2, -LUA_ERRERR
2669 | blt >3 // Check for error from exit. 3028 | cmplw CARG1, TMP2
2670 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 3029 | bge >9 // Check for error from exit.
3030 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2671 | slwi MULTRES, CARG1, 3 3031 | slwi MULTRES, CARG1, 3
2672 | li TMP2, 0 3032 | li TMP2, 0
2673 | stw MULTRES, SAVE_MULTRES 3033 | stw MULTRES, SAVE_MULTRES
2674 | lwz TMP1, LFUNC:TMP1->pc 3034 | lwz TMP1, LFUNC:RB->pc
2675 | stw TMP2, DISPATCH_GL(jit_L)(DISPATCH) 3035 | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH)
2676 | lwz KBASE, PC2PROTO(k)(TMP1) 3036 | lwz KBASE, PC2PROTO(k)(TMP1)
2677 | // Setup type comparison constants. 3037 | // Setup type comparison constants.
2678 | li TISNUM, LJ_TISNUM 3038 | li TISNUM, LJ_TISNUM
2679 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 3039 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2680 | stw TMP3, TMPD 3040 | .FPU stw TMP3, TMPD
2681 | li ZERO, 0 3041 | li ZERO, 0
2682 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 3042 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
2683 | lfs TOBIT, TMPD 3043 | .FPU lfs TOBIT, TMPD
2684 | stw TMP3, TMPD 3044 | .FPU stw TMP3, TMPD
2685 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 3045 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
2686 | li TISNIL, LJ_TNIL 3046 | li TISNIL, LJ_TNIL
2687 | stw TMP0, TONUM_HI 3047 | .FPU stw TMP0, TONUM_HI
2688 | lfs TONUM, TMPD 3048 | .FPU lfs TONUM, TMPD
2689 | // Modified copy of ins_next which handles function header dispatch, too. 3049 | // Modified copy of ins_next which handles function header dispatch, too.
2690 | lwz INS, 0(PC) 3050 | lwz INS, 0(PC)
2691 | addi PC, PC, 4 3051 | addi PC, PC, 4
2692 | // Assumes TISNIL == ~LJ_VMST_INTERP == -1. 3052 | // Assumes TISNIL == ~LJ_VMST_INTERP == -1.
2693 | stw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) 3053 | stw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
3054 | cmpwi CARG1, -17 // Static dispatch?
3055 | beq >5
2694 | decode_OPP TMP1, INS 3056 | decode_OPP TMP1, INS
2695 | decode_RA8 RA, INS 3057 | decode_RA8 RA, INS
2696 | lpx TMP0, DISPATCH, TMP1 3058 | lpx TMP0, DISPATCH, TMP1
@@ -2702,20 +3064,78 @@ static void build_subroutines(BuildCtx *ctx)
2702 | decode_RC8 RC, INS 3064 | decode_RC8 RC, INS
2703 | bctr 3065 | bctr
2704 |2: 3066 |2:
3067 | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function?
3068 | blt >3
3069 | // Check frame below fast function.
3070 | lwz TMP1, FRAME_PC(BASE)
3071 | andix. TMP0, TMP1, FRAME_TYPE
3072 | bney >3 // Trace stitching continuation?
3073 | // Otherwise set KBASE for Lua function below fast function.
3074 | lwz TMP2, -4(TMP1)
3075 | decode_RA8 TMP0, TMP2
3076 | sub TMP1, BASE, TMP0
3077 | lwz LFUNC:TMP2, -12(TMP1)
3078 | lwz TMP1, LFUNC:TMP2->pc
3079 | lwz KBASE, PC2PROTO(k)(TMP1)
3080 |3:
2705 | subi RC, MULTRES, 8 3081 | subi RC, MULTRES, 8
2706 | add RA, RA, BASE 3082 | add RA, RA, BASE
2707 | bctr 3083 | bctr
2708 | 3084 |
2709 |3: // Rethrow error from the right C frame. 3085 |5: // Dispatch to static entry of original ins replaced by BC_JLOOP.
3086 | lwz TMP1, DISPATCH_J(trace)(DISPATCH)
3087 | decode_RD4 RD, INS
3088 | lwzx TRACE:TMP1, TMP1, RD
3089 | lwz INS, TRACE:TMP1->startins
3090 | decode_OPP TMP1, INS
3091 | addi TMP1, TMP1, GG_DISP2STATIC
3092 | lpx TMP0, DISPATCH, TMP1
3093 | mtctr TMP0
3094 | decode_RB8 RB, INS
3095 | decode_RD8 RD, INS
3096 | decode_RA8 RA, INS
3097 | decode_RC8 RC, INS
3098 | bctr
3099 |
3100 |9: // Rethrow error from the right C frame.
3101 | neg CARG2, CARG1
2710 | mr CARG1, L 3102 | mr CARG1, L
2711 | bl extern lj_err_run // (lua_State *L) 3103 | bl extern lj_err_trace // (lua_State *L, int errcode)
2712 |.endif 3104 |.endif
2713 | 3105 |
2714 |//----------------------------------------------------------------------- 3106 |//-----------------------------------------------------------------------
2715 |//-- Math helper functions ---------------------------------------------- 3107 |//-- Math helper functions ----------------------------------------------
2716 |//----------------------------------------------------------------------- 3108 |//-----------------------------------------------------------------------
2717 | 3109 |
2718 |// NYI: Use internal implementations of floor, ceil, trunc. 3110 |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp.
3111 |
3112 |.macro sfi2d, AHI, ALO
3113 |.if not FPU
3114 | mr. AHI, ALO
3115 | bclr 12, 2 // Handle zero first.
3116 | srawi TMP0, ALO, 31
3117 | xor TMP1, ALO, TMP0
3118 | sub TMP1, TMP1, TMP0 // Absolute value in TMP1.
3119 | cntlzw AHI, TMP1
3120 | andix. TMP0, TMP0, 0x800 // Mask sign bit.
3121 | slw TMP1, TMP1, AHI // Align mantissa left with leading 1.
3122 | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI.
3123 | slwi ALO, TMP1, 21
3124 | or AHI, AHI, TMP0 // Sign | Exponent.
3125 | srwi TMP1, TMP1, 11
3126 | slwi AHI, AHI, 20 // Align left.
3127 | add AHI, AHI, TMP1 // Add mantissa, increment exponent.
3128 | blr
3129 |.endif
3130 |.endmacro
3131 |
3132 |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1.
3133 |->vm_sfi2d_1:
3134 | sfi2d CARG1, CARG2
3135 |
3136 |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1.
3137 |->vm_sfi2d_2:
3138 | sfi2d CARG3, CARG4
2719 | 3139 |
2720 |->vm_modi: 3140 |->vm_modi:
2721 | divwo. TMP0, CARG1, CARG2 3141 | divwo. TMP0, CARG1, CARG2
@@ -2770,6 +3190,11 @@ static void build_subroutines(BuildCtx *ctx)
2770 | blr 3190 | blr
2771 |.endif 3191 |.endif
2772 | 3192 |
3193 |->vm_next:
3194 |.if JIT
3195 | NYI // On big-endian.
3196 |.endif
3197 |
2773 |//----------------------------------------------------------------------- 3198 |//-----------------------------------------------------------------------
2774 |//-- FFI helper functions ----------------------------------------------- 3199 |//-- FFI helper functions -----------------------------------------------
2775 |//----------------------------------------------------------------------- 3200 |//-----------------------------------------------------------------------
@@ -2783,21 +3208,21 @@ static void build_subroutines(BuildCtx *ctx)
2783 | addi DISPATCH, r12, GG_G2DISP 3208 | addi DISPATCH, r12, GG_G2DISP
2784 | stw r11, CTSTATE->cb.slot 3209 | stw r11, CTSTATE->cb.slot
2785 | stw r3, CTSTATE->cb.gpr[0] 3210 | stw r3, CTSTATE->cb.gpr[0]
2786 | stfd f1, CTSTATE->cb.fpr[0] 3211 | .FPU stfd f1, CTSTATE->cb.fpr[0]
2787 | stw r4, CTSTATE->cb.gpr[1] 3212 | stw r4, CTSTATE->cb.gpr[1]
2788 | stfd f2, CTSTATE->cb.fpr[1] 3213 | .FPU stfd f2, CTSTATE->cb.fpr[1]
2789 | stw r5, CTSTATE->cb.gpr[2] 3214 | stw r5, CTSTATE->cb.gpr[2]
2790 | stfd f3, CTSTATE->cb.fpr[2] 3215 | .FPU stfd f3, CTSTATE->cb.fpr[2]
2791 | stw r6, CTSTATE->cb.gpr[3] 3216 | stw r6, CTSTATE->cb.gpr[3]
2792 | stfd f4, CTSTATE->cb.fpr[3] 3217 | .FPU stfd f4, CTSTATE->cb.fpr[3]
2793 | stw r7, CTSTATE->cb.gpr[4] 3218 | stw r7, CTSTATE->cb.gpr[4]
2794 | stfd f5, CTSTATE->cb.fpr[4] 3219 | .FPU stfd f5, CTSTATE->cb.fpr[4]
2795 | stw r8, CTSTATE->cb.gpr[5] 3220 | stw r8, CTSTATE->cb.gpr[5]
2796 | stfd f6, CTSTATE->cb.fpr[5] 3221 | .FPU stfd f6, CTSTATE->cb.fpr[5]
2797 | stw r9, CTSTATE->cb.gpr[6] 3222 | stw r9, CTSTATE->cb.gpr[6]
2798 | stfd f7, CTSTATE->cb.fpr[6] 3223 | .FPU stfd f7, CTSTATE->cb.fpr[6]
2799 | stw r10, CTSTATE->cb.gpr[7] 3224 | stw r10, CTSTATE->cb.gpr[7]
2800 | stfd f8, CTSTATE->cb.fpr[7] 3225 | .FPU stfd f8, CTSTATE->cb.fpr[7]
2801 | addi TMP0, sp, CFRAME_SPACE+8 3226 | addi TMP0, sp, CFRAME_SPACE+8
2802 | stw TMP0, CTSTATE->cb.stack 3227 | stw TMP0, CTSTATE->cb.stack
2803 | mr CARG1, CTSTATE 3228 | mr CARG1, CTSTATE
@@ -2808,21 +3233,21 @@ static void build_subroutines(BuildCtx *ctx)
2808 | lp BASE, L:CRET1->base 3233 | lp BASE, L:CRET1->base
2809 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 3234 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2810 | lp RC, L:CRET1->top 3235 | lp RC, L:CRET1->top
2811 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 3236 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2812 | li ZERO, 0 3237 | li ZERO, 0
2813 | mr L, CRET1 3238 | mr L, CRET1
2814 | stw TMP3, TMPD 3239 | .FPU stw TMP3, TMPD
2815 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 3240 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
2816 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3241 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2817 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 3242 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
2818 | stw TMP0, TONUM_HI 3243 | .FPU stw TMP0, TONUM_HI
2819 | li TISNIL, LJ_TNIL 3244 | li TISNIL, LJ_TNIL
2820 | li_vmstate INTERP 3245 | li_vmstate INTERP
2821 | lfs TOBIT, TMPD 3246 | .FPU lfs TOBIT, TMPD
2822 | stw TMP3, TMPD 3247 | .FPU stw TMP3, TMPD
2823 | sub RC, RC, BASE 3248 | sub RC, RC, BASE
2824 | st_vmstate 3249 | st_vmstate
2825 | lfs TONUM, TMPD 3250 | .FPU lfs TONUM, TMPD
2826 | ins_callt 3251 | ins_callt
2827 |.endif 3252 |.endif
2828 | 3253 |
@@ -2836,7 +3261,7 @@ static void build_subroutines(BuildCtx *ctx)
2836 | mr CARG2, RA 3261 | mr CARG2, RA
2837 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) 3262 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
2838 | lwz CRET1, CTSTATE->cb.gpr[0] 3263 | lwz CRET1, CTSTATE->cb.gpr[0]
2839 | lfd FARG1, CTSTATE->cb.fpr[0] 3264 | .FPU lfd FARG1, CTSTATE->cb.fpr[0]
2840 | lwz CRET2, CTSTATE->cb.gpr[1] 3265 | lwz CRET2, CTSTATE->cb.gpr[1]
2841 | b ->vm_leave_unw 3266 | b ->vm_leave_unw
2842 |.endif 3267 |.endif
@@ -2853,14 +3278,13 @@ static void build_subroutines(BuildCtx *ctx)
2853 | stw TMP0, 4(sp) 3278 | stw TMP0, 4(sp)
2854 | cmpwi cr1, CARG3, 0 3279 | cmpwi cr1, CARG3, 0
2855 | mr TMP2, sp 3280 | mr TMP2, sp
2856 | addic. CARG2, CARG2, -1 3281 | addic. CARG2, CARG2, -4
2857 | stwux sp, sp, TMP1 3282 | stwux sp, sp, TMP1
2858 | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls. 3283 | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls.
2859 | stw r14, -4(TMP2) 3284 | stw r14, -4(TMP2)
2860 | stw CCSTATE, -8(TMP2) 3285 | stw CCSTATE, -8(TMP2)
2861 | mr r14, TMP2 3286 | mr r14, TMP2
2862 | la TMP1, CCSTATE->stack 3287 | la TMP1, CCSTATE->stack
2863 | slwi CARG2, CARG2, 2
2864 | blty >2 3288 | blty >2
2865 | la TMP2, 8(sp) 3289 | la TMP2, 8(sp)
2866 |1: 3290 |1:
@@ -2870,14 +3294,14 @@ static void build_subroutines(BuildCtx *ctx)
2870 | bge <1 3294 | bge <1
2871 |2: 3295 |2:
2872 | bney cr1, >3 3296 | bney cr1, >3
2873 | lfd f1, CCSTATE->fpr[0] 3297 | .FPU lfd f1, CCSTATE->fpr[0]
2874 | lfd f2, CCSTATE->fpr[1] 3298 | .FPU lfd f2, CCSTATE->fpr[1]
2875 | lfd f3, CCSTATE->fpr[2] 3299 | .FPU lfd f3, CCSTATE->fpr[2]
2876 | lfd f4, CCSTATE->fpr[3] 3300 | .FPU lfd f4, CCSTATE->fpr[3]
2877 | lfd f5, CCSTATE->fpr[4] 3301 | .FPU lfd f5, CCSTATE->fpr[4]
2878 | lfd f6, CCSTATE->fpr[5] 3302 | .FPU lfd f6, CCSTATE->fpr[5]
2879 | lfd f7, CCSTATE->fpr[6] 3303 | .FPU lfd f7, CCSTATE->fpr[6]
2880 | lfd f8, CCSTATE->fpr[7] 3304 | .FPU lfd f8, CCSTATE->fpr[7]
2881 |3: 3305 |3:
2882 | lp TMP0, CCSTATE->func 3306 | lp TMP0, CCSTATE->func
2883 | lwz CARG2, CCSTATE->gpr[1] 3307 | lwz CARG2, CCSTATE->gpr[1]
@@ -2894,7 +3318,7 @@ static void build_subroutines(BuildCtx *ctx)
2894 | lwz TMP2, -4(r14) 3318 | lwz TMP2, -4(r14)
2895 | lwz TMP0, 4(r14) 3319 | lwz TMP0, 4(r14)
2896 | stw CARG1, CCSTATE:TMP1->gpr[0] 3320 | stw CARG1, CCSTATE:TMP1->gpr[0]
2897 | stfd FARG1, CCSTATE:TMP1->fpr[0] 3321 | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0]
2898 | stw CARG2, CCSTATE:TMP1->gpr[1] 3322 | stw CARG2, CCSTATE:TMP1->gpr[1]
2899 | mtlr TMP0 3323 | mtlr TMP0
2900 | stw CARG3, CCSTATE:TMP1->gpr[2] 3324 | stw CARG3, CCSTATE:TMP1->gpr[2]
@@ -2923,19 +3347,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2923 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 3347 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2924 | // RA = src1*8, RD = src2*8, JMP with RD = target 3348 | // RA = src1*8, RD = src2*8, JMP with RD = target
2925 |.if DUALNUM 3349 |.if DUALNUM
2926 | lwzux TMP0, RA, BASE 3350 | lwzux CARG1, RA, BASE
2927 | addi PC, PC, 4 3351 | addi PC, PC, 4
2928 | lwz CARG2, 4(RA) 3352 | lwz CARG2, 4(RA)
2929 | lwzux TMP1, RD, BASE 3353 | lwzux CARG3, RD, BASE
2930 | lwz TMP2, -4(PC) 3354 | lwz TMP2, -4(PC)
2931 | checknum cr0, TMP0 3355 | checknum cr0, CARG1
2932 | lwz CARG3, 4(RD) 3356 | lwz CARG4, 4(RD)
2933 | decode_RD4 TMP2, TMP2 3357 | decode_RD4 TMP2, TMP2
2934 | checknum cr1, TMP1 3358 | checknum cr1, CARG3
2935 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3359 | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16)
2936 | bne cr0, >7 3360 | bne cr0, >7
2937 | bne cr1, >8 3361 | bne cr1, >8
2938 | cmpw CARG2, CARG3 3362 | cmpw CARG2, CARG4
2939 if (op == BC_ISLT) { 3363 if (op == BC_ISLT) {
2940 | bge >2 3364 | bge >2
2941 } else if (op == BC_ISGE) { 3365 } else if (op == BC_ISGE) {
@@ -2946,28 +3370,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2946 | ble >2 3370 | ble >2
2947 } 3371 }
2948 |1: 3372 |1:
2949 | add PC, PC, TMP2 3373 | add PC, PC, SAVE0
2950 |2: 3374 |2:
2951 | ins_next 3375 | ins_next
2952 | 3376 |
2953 |7: // RA is not an integer. 3377 |7: // RA is not an integer.
2954 | bgt cr0, ->vmeta_comp 3378 | bgt cr0, ->vmeta_comp
2955 | // RA is a number. 3379 | // RA is a number.
2956 | lfd f0, 0(RA) 3380 | .FPU lfd f0, 0(RA)
2957 | bgt cr1, ->vmeta_comp 3381 | bgt cr1, ->vmeta_comp
2958 | blt cr1, >4 3382 | blt cr1, >4
2959 | // RA is a number, RD is an integer. 3383 | // RA is a number, RD is an integer.
2960 | tonum_i f1, CARG3 3384 |.if FPU
3385 | tonum_i f1, CARG4
3386 |.else
3387 | bl ->vm_sfi2d_2
3388 |.endif
2961 | b >5 3389 | b >5
2962 | 3390 |
2963 |8: // RA is an integer, RD is not an integer. 3391 |8: // RA is an integer, RD is not an integer.
2964 | bgt cr1, ->vmeta_comp 3392 | bgt cr1, ->vmeta_comp
2965 | // RA is an integer, RD is a number. 3393 | // RA is an integer, RD is a number.
3394 |.if FPU
2966 | tonum_i f0, CARG2 3395 | tonum_i f0, CARG2
3396 |.else
3397 | bl ->vm_sfi2d_1
3398 |.endif
2967 |4: 3399 |4:
2968 | lfd f1, 0(RD) 3400 | .FPU lfd f1, 0(RD)
2969 |5: 3401 |5:
3402 |.if FPU
2970 | fcmpu cr0, f0, f1 3403 | fcmpu cr0, f0, f1
3404 |.else
3405 | blex __ledf2
3406 | cmpwi CRET1, 0
3407 |.endif
2971 if (op == BC_ISLT) { 3408 if (op == BC_ISLT) {
2972 | bge <2 3409 | bge <2
2973 } else if (op == BC_ISGE) { 3410 } else if (op == BC_ISGE) {
@@ -3015,42 +3452,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3015 vk = op == BC_ISEQV; 3452 vk = op == BC_ISEQV;
3016 | // RA = src1*8, RD = src2*8, JMP with RD = target 3453 | // RA = src1*8, RD = src2*8, JMP with RD = target
3017 |.if DUALNUM 3454 |.if DUALNUM
3018 | lwzux TMP0, RA, BASE 3455 | lwzux CARG1, RA, BASE
3019 | addi PC, PC, 4 3456 | addi PC, PC, 4
3020 | lwz CARG2, 4(RA) 3457 | lwz CARG2, 4(RA)
3021 | lwzux TMP1, RD, BASE 3458 | lwzux CARG3, RD, BASE
3022 | checknum cr0, TMP0 3459 | checknum cr0, CARG1
3023 | lwz TMP2, -4(PC) 3460 | lwz SAVE0, -4(PC)
3024 | checknum cr1, TMP1 3461 | checknum cr1, CARG3
3025 | decode_RD4 TMP2, TMP2 3462 | decode_RD4 SAVE0, SAVE0
3026 | lwz CARG3, 4(RD) 3463 | lwz CARG4, 4(RD)
3027 | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt 3464 | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
3028 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3465 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3029 if (vk) { 3466 if (vk) {
3030 | ble cr7, ->BC_ISEQN_Z 3467 | ble cr7, ->BC_ISEQN_Z
3031 } else { 3468 } else {
3032 | ble cr7, ->BC_ISNEN_Z 3469 | ble cr7, ->BC_ISNEN_Z
3033 } 3470 }
3034 |.else 3471 |.else
3035 | lwzux TMP0, RA, BASE 3472 | lwzux CARG1, RA, BASE
3036 | lwz TMP2, 0(PC) 3473 | lwz SAVE0, 0(PC)
3037 | lfd f0, 0(RA) 3474 | lfd f0, 0(RA)
3038 | addi PC, PC, 4 3475 | addi PC, PC, 4
3039 | lwzux TMP1, RD, BASE 3476 | lwzux CARG3, RD, BASE
3040 | checknum cr0, TMP0 3477 | checknum cr0, CARG1
3041 | decode_RD4 TMP2, TMP2 3478 | decode_RD4 SAVE0, SAVE0
3042 | lfd f1, 0(RD) 3479 | lfd f1, 0(RD)
3043 | checknum cr1, TMP1 3480 | checknum cr1, CARG3
3044 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3481 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3045 | bge cr0, >5 3482 | bge cr0, >5
3046 | bge cr1, >5 3483 | bge cr1, >5
3047 | fcmpu cr0, f0, f1 3484 | fcmpu cr0, f0, f1
3048 if (vk) { 3485 if (vk) {
3049 | bne >1 3486 | bne >1
3050 | add PC, PC, TMP2 3487 | add PC, PC, SAVE0
3051 } else { 3488 } else {
3052 | beq >1 3489 | beq >1
3053 | add PC, PC, TMP2 3490 | add PC, PC, SAVE0
3054 } 3491 }
3055 |1: 3492 |1:
3056 | ins_next 3493 | ins_next
@@ -3058,36 +3495,36 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3058 |5: // Either or both types are not numbers. 3495 |5: // Either or both types are not numbers.
3059 |.if not DUALNUM 3496 |.if not DUALNUM
3060 | lwz CARG2, 4(RA) 3497 | lwz CARG2, 4(RA)
3061 | lwz CARG3, 4(RD) 3498 | lwz CARG4, 4(RD)
3062 |.endif 3499 |.endif
3063 |.if FFI 3500 |.if FFI
3064 | cmpwi cr7, TMP0, LJ_TCDATA 3501 | cmpwi cr7, CARG1, LJ_TCDATA
3065 | cmpwi cr5, TMP1, LJ_TCDATA 3502 | cmpwi cr5, CARG3, LJ_TCDATA
3066 |.endif 3503 |.endif
3067 | not TMP3, TMP0 3504 | not TMP2, CARG1
3068 | cmplw TMP0, TMP1 3505 | cmplw CARG1, CARG3
3069 | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? 3506 | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive?
3070 |.if FFI 3507 |.if FFI
3071 | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq 3508 | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq
3072 |.endif 3509 |.endif
3073 | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? 3510 | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata?
3074 |.if FFI 3511 |.if FFI
3075 | beq cr7, ->vmeta_equal_cd 3512 | beq cr7, ->vmeta_equal_cd
3076 |.endif 3513 |.endif
3077 | cmplw cr5, CARG2, CARG3 3514 | cmplw cr5, CARG2, CARG4
3078 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. 3515 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive.
3079 | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. 3516 | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type.
3080 | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. 3517 | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv.
3081 | mr SAVE0, PC 3518 | mr SAVE1, PC
3082 | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. 3519 | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2.
3083 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. 3520 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2.
3084 if (vk) { 3521 if (vk) {
3085 | bne cr0, >6 3522 | bne cr0, >6
3086 | add PC, PC, TMP2 3523 | add PC, PC, SAVE0
3087 |6: 3524 |6:
3088 } else { 3525 } else {
3089 | beq cr0, >6 3526 | beq cr0, >6
3090 | add PC, PC, TMP2 3527 | add PC, PC, SAVE0
3091 |6: 3528 |6:
3092 } 3529 }
3093 |.if DUALNUM 3530 |.if DUALNUM
@@ -3102,6 +3539,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3102 | 3539 |
3103 | // Different tables or userdatas. Need to check __eq metamethod. 3540 | // Different tables or userdatas. Need to check __eq metamethod.
3104 | // Field metatable must be at same offset for GCtab and GCudata! 3541 | // Field metatable must be at same offset for GCtab and GCudata!
3542 | mr CARG3, CARG4
3105 | lwz TAB:TMP2, TAB:CARG2->metatable 3543 | lwz TAB:TMP2, TAB:CARG2->metatable
3106 | li CARG4, 1-vk // ne = 0 or 1. 3544 | li CARG4, 1-vk // ne = 0 or 1.
3107 | cmplwi TAB:TMP2, 0 3545 | cmplwi TAB:TMP2, 0
@@ -3109,7 +3547,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3109 | lbz TMP2, TAB:TMP2->nomm 3547 | lbz TMP2, TAB:TMP2->nomm
3110 | andix. TMP2, TMP2, 1<<MM_eq 3548 | andix. TMP2, TMP2, 1<<MM_eq
3111 | bne <1 // Or 'no __eq' flag set? 3549 | bne <1 // Or 'no __eq' flag set?
3112 | mr PC, SAVE0 // Restore old PC. 3550 | mr PC, SAVE1 // Restore old PC.
3113 | b ->vmeta_equal // Handle __eq metamethod. 3551 | b ->vmeta_equal // Handle __eq metamethod.
3114 break; 3552 break;
3115 3553
@@ -3150,16 +3588,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3150 vk = op == BC_ISEQN; 3588 vk = op == BC_ISEQN;
3151 | // RA = src*8, RD = num_const*8, JMP with RD = target 3589 | // RA = src*8, RD = num_const*8, JMP with RD = target
3152 |.if DUALNUM 3590 |.if DUALNUM
3153 | lwzux TMP0, RA, BASE 3591 | lwzux CARG1, RA, BASE
3154 | addi PC, PC, 4 3592 | addi PC, PC, 4
3155 | lwz CARG2, 4(RA) 3593 | lwz CARG2, 4(RA)
3156 | lwzux TMP1, RD, KBASE 3594 | lwzux CARG3, RD, KBASE
3157 | checknum cr0, TMP0 3595 | checknum cr0, CARG1
3158 | lwz TMP2, -4(PC) 3596 | lwz SAVE0, -4(PC)
3159 | checknum cr1, TMP1 3597 | checknum cr1, CARG3
3160 | decode_RD4 TMP2, TMP2 3598 | decode_RD4 SAVE0, SAVE0
3161 | lwz CARG3, 4(RD) 3599 | lwz CARG4, 4(RD)
3162 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3600 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3163 if (vk) { 3601 if (vk) {
3164 |->BC_ISEQN_Z: 3602 |->BC_ISEQN_Z:
3165 } else { 3603 } else {
@@ -3167,7 +3605,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3167 } 3605 }
3168 | bne cr0, >7 3606 | bne cr0, >7
3169 | bne cr1, >8 3607 | bne cr1, >8
3170 | cmpw CARG2, CARG3 3608 | cmpw CARG2, CARG4
3171 |4: 3609 |4:
3172 |.else 3610 |.else
3173 if (vk) { 3611 if (vk) {
@@ -3175,20 +3613,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3175 } else { 3613 } else {
3176 |->BC_ISNEN_Z: // Dummy label. 3614 |->BC_ISNEN_Z: // Dummy label.
3177 } 3615 }
3178 | lwzx TMP0, BASE, RA 3616 | lwzx CARG1, BASE, RA
3179 | addi PC, PC, 4 3617 | addi PC, PC, 4
3180 | lfdx f0, BASE, RA 3618 | lfdx f0, BASE, RA
3181 | lwz TMP2, -4(PC) 3619 | lwz SAVE0, -4(PC)
3182 | lfdx f1, KBASE, RD 3620 | lfdx f1, KBASE, RD
3183 | decode_RD4 TMP2, TMP2 3621 | decode_RD4 SAVE0, SAVE0
3184 | checknum TMP0 3622 | checknum CARG1
3185 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3623 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3186 | bge >3 3624 | bge >3
3187 | fcmpu cr0, f0, f1 3625 | fcmpu cr0, f0, f1
3188 |.endif 3626 |.endif
3189 if (vk) { 3627 if (vk) {
3190 | bne >1 3628 | bne >1
3191 | add PC, PC, TMP2 3629 | add PC, PC, SAVE0
3192 |1: 3630 |1:
3193 |.if not FFI 3631 |.if not FFI
3194 |3: 3632 |3:
@@ -3199,13 +3637,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3199 |.if not FFI 3637 |.if not FFI
3200 |3: 3638 |3:
3201 |.endif 3639 |.endif
3202 | add PC, PC, TMP2 3640 | add PC, PC, SAVE0
3203 |2: 3641 |2:
3204 } 3642 }
3205 | ins_next 3643 | ins_next
3206 |.if FFI 3644 |.if FFI
3207 |3: 3645 |3:
3208 | cmpwi TMP0, LJ_TCDATA 3646 | cmpwi CARG1, LJ_TCDATA
3209 | beq ->vmeta_equal_cd 3647 | beq ->vmeta_equal_cd
3210 | b <1 3648 | b <1
3211 |.endif 3649 |.endif
@@ -3213,18 +3651,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3213 |7: // RA is not an integer. 3651 |7: // RA is not an integer.
3214 | bge cr0, <3 3652 | bge cr0, <3
3215 | // RA is a number. 3653 | // RA is a number.
3216 | lfd f0, 0(RA) 3654 | .FPU lfd f0, 0(RA)
3217 | blt cr1, >1 3655 | blt cr1, >1
3218 | // RA is a number, RD is an integer. 3656 | // RA is a number, RD is an integer.
3219 | tonum_i f1, CARG3 3657 |.if FPU
3658 | tonum_i f1, CARG4
3659 |.else
3660 | bl ->vm_sfi2d_2
3661 |.endif
3220 | b >2 3662 | b >2
3221 | 3663 |
3222 |8: // RA is an integer, RD is a number. 3664 |8: // RA is an integer, RD is a number.
3665 |.if FPU
3223 | tonum_i f0, CARG2 3666 | tonum_i f0, CARG2
3667 |.else
3668 | bl ->vm_sfi2d_1
3669 |.endif
3224 |1: 3670 |1:
3225 | lfd f1, 0(RD) 3671 | .FPU lfd f1, 0(RD)
3226 |2: 3672 |2:
3673 |.if FPU
3227 | fcmpu cr0, f0, f1 3674 | fcmpu cr0, f0, f1
3675 |.else
3676 | blex __ledf2
3677 | cmpwi CRET1, 0
3678 |.endif
3228 | b <4 3679 | b <4
3229 |.endif 3680 |.endif
3230 break; 3681 break;
@@ -3279,7 +3730,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3279 | add PC, PC, TMP2 3730 | add PC, PC, TMP2
3280 } else { 3731 } else {
3281 | li TMP1, LJ_TFALSE 3732 | li TMP1, LJ_TFALSE
3733 |.if FPU
3282 | lfdx f0, BASE, RD 3734 | lfdx f0, BASE, RD
3735 |.else
3736 | lwzux CARG1, RD, BASE
3737 | lwz CARG2, 4(RD)
3738 |.endif
3283 | cmplw TMP0, TMP1 3739 | cmplw TMP0, TMP1
3284 if (op == BC_ISTC) { 3740 if (op == BC_ISTC) {
3285 | bge >1 3741 | bge >1
@@ -3288,20 +3744,55 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3288 } 3744 }
3289 | addis PC, PC, -(BCBIAS_J*4 >> 16) 3745 | addis PC, PC, -(BCBIAS_J*4 >> 16)
3290 | decode_RD4 TMP2, INS 3746 | decode_RD4 TMP2, INS
3747 |.if FPU
3291 | stfdx f0, BASE, RA 3748 | stfdx f0, BASE, RA
3749 |.else
3750 | stwux CARG1, RA, BASE
3751 | stw CARG2, 4(RA)
3752 |.endif
3292 | add PC, PC, TMP2 3753 | add PC, PC, TMP2
3293 |1: 3754 |1:
3294 } 3755 }
3295 | ins_next 3756 | ins_next
3296 break; 3757 break;
3297 3758
3759 case BC_ISTYPE:
3760 | // RA = src*8, RD = -type*8
3761 | lwzx TMP0, BASE, RA
3762 | srwi TMP1, RD, 3
3763 | ins_next1
3764 |.if not PPE and not GPR64
3765 | add. TMP0, TMP0, TMP1
3766 |.else
3767 | neg TMP1, TMP1
3768 | cmpw TMP0, TMP1
3769 |.endif
3770 | bne ->vmeta_istype
3771 | ins_next2
3772 break;
3773 case BC_ISNUM:
3774 | // RA = src*8, RD = -(TISNUM-1)*8
3775 | lwzx TMP0, BASE, RA
3776 | ins_next1
3777 | checknum TMP0
3778 | bge ->vmeta_istype
3779 | ins_next2
3780 break;
3781
3298 /* -- Unary ops --------------------------------------------------------- */ 3782 /* -- Unary ops --------------------------------------------------------- */
3299 3783
3300 case BC_MOV: 3784 case BC_MOV:
3301 | // RA = dst*8, RD = src*8 3785 | // RA = dst*8, RD = src*8
3302 | ins_next1 3786 | ins_next1
3787 |.if FPU
3303 | lfdx f0, BASE, RD 3788 | lfdx f0, BASE, RD
3304 | stfdx f0, BASE, RA 3789 | stfdx f0, BASE, RA
3790 |.else
3791 | lwzux TMP0, RD, BASE
3792 | lwz TMP1, 4(RD)
3793 | stwux TMP0, RA, BASE
3794 | stw TMP1, 4(RA)
3795 |.endif
3305 | ins_next2 3796 | ins_next2
3306 break; 3797 break;
3307 case BC_NOT: 3798 case BC_NOT:
@@ -3403,44 +3894,65 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3403 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3894 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3404 ||switch (vk) { 3895 ||switch (vk) {
3405 ||case 0: 3896 ||case 0:
3406 | lwzx TMP1, BASE, RB 3897 | lwzx CARG1, BASE, RB
3407 | .if DUALNUM 3898 | .if DUALNUM
3408 | lwzx TMP2, KBASE, RC 3899 | lwzx CARG3, KBASE, RC
3409 | .endif 3900 | .endif
3901 | .if FPU
3410 | lfdx f14, BASE, RB 3902 | lfdx f14, BASE, RB
3411 | lfdx f15, KBASE, RC 3903 | lfdx f15, KBASE, RC
3904 | .else
3905 | add TMP1, BASE, RB
3906 | add TMP2, KBASE, RC
3907 | lwz CARG2, 4(TMP1)
3908 | lwz CARG4, 4(TMP2)
3909 | .endif
3412 | .if DUALNUM 3910 | .if DUALNUM
3413 | checknum cr0, TMP1 3911 | checknum cr0, CARG1
3414 | checknum cr1, TMP2 3912 | checknum cr1, CARG3
3415 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3913 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3416 | bge ->vmeta_arith_vn 3914 | bge ->vmeta_arith_vn
3417 | .else 3915 | .else
3418 | checknum TMP1; bge ->vmeta_arith_vn 3916 | checknum CARG1; bge ->vmeta_arith_vn
3419 | .endif 3917 | .endif
3420 || break; 3918 || break;
3421 ||case 1: 3919 ||case 1:
3422 | lwzx TMP1, BASE, RB 3920 | lwzx CARG1, BASE, RB
3423 | .if DUALNUM 3921 | .if DUALNUM
3424 | lwzx TMP2, KBASE, RC 3922 | lwzx CARG3, KBASE, RC
3425 | .endif 3923 | .endif
3924 | .if FPU
3426 | lfdx f15, BASE, RB 3925 | lfdx f15, BASE, RB
3427 | lfdx f14, KBASE, RC 3926 | lfdx f14, KBASE, RC
3927 | .else
3928 | add TMP1, BASE, RB
3929 | add TMP2, KBASE, RC
3930 | lwz CARG2, 4(TMP1)
3931 | lwz CARG4, 4(TMP2)
3932 | .endif
3428 | .if DUALNUM 3933 | .if DUALNUM
3429 | checknum cr0, TMP1 3934 | checknum cr0, CARG1
3430 | checknum cr1, TMP2 3935 | checknum cr1, CARG3
3431 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3936 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3432 | bge ->vmeta_arith_nv 3937 | bge ->vmeta_arith_nv
3433 | .else 3938 | .else
3434 | checknum TMP1; bge ->vmeta_arith_nv 3939 | checknum CARG1; bge ->vmeta_arith_nv
3435 | .endif 3940 | .endif
3436 || break; 3941 || break;
3437 ||default: 3942 ||default:
3438 | lwzx TMP1, BASE, RB 3943 | lwzx CARG1, BASE, RB
3439 | lwzx TMP2, BASE, RC 3944 | lwzx CARG3, BASE, RC
3945 | .if FPU
3440 | lfdx f14, BASE, RB 3946 | lfdx f14, BASE, RB
3441 | lfdx f15, BASE, RC 3947 | lfdx f15, BASE, RC
3442 | checknum cr0, TMP1 3948 | .else
3443 | checknum cr1, TMP2 3949 | add TMP1, BASE, RB
3950 | add TMP2, BASE, RC
3951 | lwz CARG2, 4(TMP1)
3952 | lwz CARG4, 4(TMP2)
3953 | .endif
3954 | checknum cr0, CARG1
3955 | checknum cr1, CARG3
3444 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3956 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3445 | bge ->vmeta_arith_vv 3957 | bge ->vmeta_arith_vv
3446 || break; 3958 || break;
@@ -3474,48 +3986,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3474 | fsub a, b, a // b - floor(b/c)*c 3986 | fsub a, b, a // b - floor(b/c)*c
3475 |.endmacro 3987 |.endmacro
3476 | 3988 |
3989 |.macro sfpmod
3990 |->BC_MODVN_Z:
3991 | stw CARG1, SFSAVE_1
3992 | stw CARG2, SFSAVE_2
3993 | mr SAVE0, CARG3
3994 | mr SAVE1, CARG4
3995 | blex __divdf3
3996 | blex floor
3997 | mr CARG3, SAVE0
3998 | mr CARG4, SAVE1
3999 | blex __muldf3
4000 | mr CARG3, CRET1
4001 | mr CARG4, CRET2
4002 | lwz CARG1, SFSAVE_1
4003 | lwz CARG2, SFSAVE_2
4004 | blex __subdf3
4005 |.endmacro
4006 |
3477 |.macro ins_arithfp, fpins 4007 |.macro ins_arithfp, fpins
3478 | ins_arithpre 4008 | ins_arithpre
3479 |.if "fpins" == "fpmod_" 4009 |.if "fpins" == "fpmod_"
3480 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 4010 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3481 |.else 4011 |.elif FPU
3482 | fpins f0, f14, f15 4012 | fpins f0, f14, f15
3483 | ins_next1 4013 | ins_next1
3484 | stfdx f0, BASE, RA 4014 | stfdx f0, BASE, RA
3485 | ins_next2 4015 | ins_next2
4016 |.else
4017 | blex __divdf3 // Only soft-float div uses this macro.
4018 | ins_next1
4019 | stwux CRET1, RA, BASE
4020 | stw CRET2, 4(RA)
4021 | ins_next2
3486 |.endif 4022 |.endif
3487 |.endmacro 4023 |.endmacro
3488 | 4024 |
3489 |.macro ins_arithdn, intins, fpins 4025 |.macro ins_arithdn, intins, fpins, fpcall
3490 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 4026 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
3491 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 4027 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3492 ||switch (vk) { 4028 ||switch (vk) {
3493 ||case 0: 4029 ||case 0:
3494 | lwzux TMP1, RB, BASE 4030 | lwzux CARG1, RB, BASE
3495 | lwzux TMP2, RC, KBASE 4031 | lwzux CARG3, RC, KBASE
3496 | lwz CARG1, 4(RB) 4032 | lwz CARG2, 4(RB)
3497 | checknum cr0, TMP1 4033 | checknum cr0, CARG1
3498 | lwz CARG2, 4(RC) 4034 | lwz CARG4, 4(RC)
4035 | checknum cr1, CARG3
3499 || break; 4036 || break;
3500 ||case 1: 4037 ||case 1:
3501 | lwzux TMP1, RB, BASE 4038 | lwzux CARG3, RB, BASE
3502 | lwzux TMP2, RC, KBASE 4039 | lwzux CARG1, RC, KBASE
3503 | lwz CARG2, 4(RB) 4040 | lwz CARG4, 4(RB)
3504 | checknum cr0, TMP1 4041 | checknum cr0, CARG3
3505 | lwz CARG1, 4(RC) 4042 | lwz CARG2, 4(RC)
4043 | checknum cr1, CARG1
3506 || break; 4044 || break;
3507 ||default: 4045 ||default:
3508 | lwzux TMP1, RB, BASE 4046 | lwzux CARG1, RB, BASE
3509 | lwzux TMP2, RC, BASE 4047 | lwzux CARG3, RC, BASE
3510 | lwz CARG1, 4(RB) 4048 | lwz CARG2, 4(RB)
3511 | checknum cr0, TMP1 4049 | checknum cr0, CARG1
3512 | lwz CARG2, 4(RC) 4050 | lwz CARG4, 4(RC)
4051 | checknum cr1, CARG3
3513 || break; 4052 || break;
3514 ||} 4053 ||}
3515 | checknum cr1, TMP2
3516 | bne >5 4054 | bne >5
3517 | bne cr1, >5 4055 | bne cr1, >5
3518 | intins CARG1, CARG1, CARG2 4056 |.if "intins" == "intmod"
4057 | mr CARG1, CARG2
4058 | mr CARG2, CARG4
4059 |.endif
4060 | intins CARG1, CARG2, CARG4
3519 | bso >4 4061 | bso >4
3520 |1: 4062 |1:
3521 | ins_next1 4063 | ins_next1
@@ -3527,29 +4069,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3527 | checkov TMP0, <1 // Ignore unrelated overflow. 4069 | checkov TMP0, <1 // Ignore unrelated overflow.
3528 | ins_arithfallback b 4070 | ins_arithfallback b
3529 |5: // FP variant. 4071 |5: // FP variant.
4072 |.if FPU
3530 ||if (vk == 1) { 4073 ||if (vk == 1) {
3531 | lfd f15, 0(RB) 4074 | lfd f15, 0(RB)
3532 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3533 | lfd f14, 0(RC) 4075 | lfd f14, 0(RC)
3534 ||} else { 4076 ||} else {
3535 | lfd f14, 0(RB) 4077 | lfd f14, 0(RB)
3536 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3537 | lfd f15, 0(RC) 4078 | lfd f15, 0(RC)
3538 ||} 4079 ||}
4080 |.endif
4081 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3539 | ins_arithfallback bge 4082 | ins_arithfallback bge
3540 |.if "fpins" == "fpmod_" 4083 |.if "fpins" == "fpmod_"
3541 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 4084 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3542 |.else 4085 |.else
4086 |.if FPU
3543 | fpins f0, f14, f15 4087 | fpins f0, f14, f15
3544 | ins_next1
3545 | stfdx f0, BASE, RA 4088 | stfdx f0, BASE, RA
4089 |.else
4090 |.if "fpcall" == "sfpmod"
4091 | sfpmod
4092 |.else
4093 | blex fpcall
4094 |.endif
4095 | stwux CRET1, RA, BASE
4096 | stw CRET2, 4(RA)
4097 |.endif
4098 | ins_next1
3546 | b <2 4099 | b <2
3547 |.endif 4100 |.endif
3548 |.endmacro 4101 |.endmacro
3549 | 4102 |
3550 |.macro ins_arith, intins, fpins 4103 |.macro ins_arith, intins, fpins, fpcall
3551 |.if DUALNUM 4104 |.if DUALNUM
3552 | ins_arithdn intins, fpins 4105 | ins_arithdn intins, fpins, fpcall
3553 |.else 4106 |.else
3554 | ins_arithfp fpins 4107 | ins_arithfp fpins
3555 |.endif 4108 |.endif
@@ -3564,9 +4117,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3564 | addo. TMP0, TMP0, TMP1 4117 | addo. TMP0, TMP0, TMP1
3565 | add y, a, b 4118 | add y, a, b
3566 |.endmacro 4119 |.endmacro
3567 | ins_arith addo32., fadd 4120 | ins_arith addo32., fadd, __adddf3
3568 |.else 4121 |.else
3569 | ins_arith addo., fadd 4122 | ins_arith addo., fadd, __adddf3
3570 |.endif 4123 |.endif
3571 break; 4124 break;
3572 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 4125 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
@@ -3578,36 +4131,48 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3578 | subo. TMP0, TMP0, TMP1 4131 | subo. TMP0, TMP0, TMP1
3579 | sub y, a, b 4132 | sub y, a, b
3580 |.endmacro 4133 |.endmacro
3581 | ins_arith subo32., fsub 4134 | ins_arith subo32., fsub, __subdf3
3582 |.else 4135 |.else
3583 | ins_arith subo., fsub 4136 | ins_arith subo., fsub, __subdf3
3584 |.endif 4137 |.endif
3585 break; 4138 break;
3586 case BC_MULVN: case BC_MULNV: case BC_MULVV: 4139 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3587 | ins_arith mullwo., fmul 4140 | ins_arith mullwo., fmul, __muldf3
3588 break; 4141 break;
3589 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 4142 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
3590 | ins_arithfp fdiv 4143 | ins_arithfp fdiv
3591 break; 4144 break;
3592 case BC_MODVN: 4145 case BC_MODVN:
3593 | ins_arith intmod, fpmod 4146 | ins_arith intmod, fpmod, sfpmod
3594 break; 4147 break;
3595 case BC_MODNV: case BC_MODVV: 4148 case BC_MODNV: case BC_MODVV:
3596 | ins_arith intmod, fpmod_ 4149 | ins_arith intmod, fpmod_, sfpmod
3597 break; 4150 break;
3598 case BC_POW: 4151 case BC_POW:
3599 | // NYI: (partial) integer arithmetic. 4152 | // NYI: (partial) integer arithmetic.
3600 | lwzx TMP1, BASE, RB 4153 | lwzx CARG1, BASE, RB
4154 | lwzx CARG3, BASE, RC
4155 |.if FPU
3601 | lfdx FARG1, BASE, RB 4156 | lfdx FARG1, BASE, RB
3602 | lwzx TMP2, BASE, RC
3603 | lfdx FARG2, BASE, RC 4157 | lfdx FARG2, BASE, RC
3604 | checknum cr0, TMP1 4158 |.else
3605 | checknum cr1, TMP2 4159 | add TMP1, BASE, RB
4160 | add TMP2, BASE, RC
4161 | lwz CARG2, 4(TMP1)
4162 | lwz CARG4, 4(TMP2)
4163 |.endif
4164 | checknum cr0, CARG1
4165 | checknum cr1, CARG3
3606 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 4166 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3607 | bge ->vmeta_arith_vv 4167 | bge ->vmeta_arith_vv
3608 | blex pow 4168 | blex pow
3609 | ins_next1 4169 | ins_next1
4170 |.if FPU
3610 | stfdx FARG1, BASE, RA 4171 | stfdx FARG1, BASE, RA
4172 |.else
4173 | stwux CARG1, RA, BASE
4174 | stw CARG2, 4(RA)
4175 |.endif
3611 | ins_next2 4176 | ins_next2
3612 break; 4177 break;
3613 4178
@@ -3627,8 +4192,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3627 | lp BASE, L->base 4192 | lp BASE, L->base
3628 | bne ->vmeta_binop 4193 | bne ->vmeta_binop
3629 | ins_next1 4194 | ins_next1
4195 |.if FPU
3630 | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. 4196 | lfdx f0, BASE, SAVE0 // Copy result from RB to RA.
3631 | stfdx f0, BASE, RA 4197 | stfdx f0, BASE, RA
4198 |.else
4199 | lwzux TMP0, SAVE0, BASE
4200 | lwz TMP1, 4(SAVE0)
4201 | stwux TMP0, RA, BASE
4202 | stw TMP1, 4(RA)
4203 |.endif
3632 | ins_next2 4204 | ins_next2
3633 break; 4205 break;
3634 4206
@@ -3691,8 +4263,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3691 case BC_KNUM: 4263 case BC_KNUM:
3692 | // RA = dst*8, RD = num_const*8 4264 | // RA = dst*8, RD = num_const*8
3693 | ins_next1 4265 | ins_next1
4266 |.if FPU
3694 | lfdx f0, KBASE, RD 4267 | lfdx f0, KBASE, RD
3695 | stfdx f0, BASE, RA 4268 | stfdx f0, BASE, RA
4269 |.else
4270 | lwzux TMP0, RD, KBASE
4271 | lwz TMP1, 4(RD)
4272 | stwux TMP0, RA, BASE
4273 | stw TMP1, 4(RA)
4274 |.endif
3696 | ins_next2 4275 | ins_next2
3697 break; 4276 break;
3698 case BC_KPRI: 4277 case BC_KPRI:
@@ -3725,8 +4304,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3725 | lwzx UPVAL:RB, LFUNC:RB, RD 4304 | lwzx UPVAL:RB, LFUNC:RB, RD
3726 | ins_next1 4305 | ins_next1
3727 | lwz TMP1, UPVAL:RB->v 4306 | lwz TMP1, UPVAL:RB->v
4307 |.if FPU
3728 | lfd f0, 0(TMP1) 4308 | lfd f0, 0(TMP1)
3729 | stfdx f0, BASE, RA 4309 | stfdx f0, BASE, RA
4310 |.else
4311 | lwz TMP2, 0(TMP1)
4312 | lwz TMP3, 4(TMP1)
4313 | stwux TMP2, RA, BASE
4314 | stw TMP3, 4(RA)
4315 |.endif
3730 | ins_next2 4316 | ins_next2
3731 break; 4317 break;
3732 case BC_USETV: 4318 case BC_USETV:
@@ -3734,14 +4320,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3734 | lwz LFUNC:RB, FRAME_FUNC(BASE) 4320 | lwz LFUNC:RB, FRAME_FUNC(BASE)
3735 | srwi RA, RA, 1 4321 | srwi RA, RA, 1
3736 | addi RA, RA, offsetof(GCfuncL, uvptr) 4322 | addi RA, RA, offsetof(GCfuncL, uvptr)
4323 |.if FPU
3737 | lfdux f0, RD, BASE 4324 | lfdux f0, RD, BASE
4325 |.else
4326 | lwzux CARG1, RD, BASE
4327 | lwz CARG3, 4(RD)
4328 |.endif
3738 | lwzx UPVAL:RB, LFUNC:RB, RA 4329 | lwzx UPVAL:RB, LFUNC:RB, RA
3739 | lbz TMP3, UPVAL:RB->marked 4330 | lbz TMP3, UPVAL:RB->marked
3740 | lwz CARG2, UPVAL:RB->v 4331 | lwz CARG2, UPVAL:RB->v
3741 | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 4332 | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
3742 | lbz TMP0, UPVAL:RB->closed 4333 | lbz TMP0, UPVAL:RB->closed
3743 | lwz TMP2, 0(RD) 4334 | lwz TMP2, 0(RD)
4335 |.if FPU
3744 | stfd f0, 0(CARG2) 4336 | stfd f0, 0(CARG2)
4337 |.else
4338 | stw CARG1, 0(CARG2)
4339 | stw CARG3, 4(CARG2)
4340 |.endif
3745 | cmplwi cr1, TMP0, 0 4341 | cmplwi cr1, TMP0, 0
3746 | lwz TMP1, 4(RD) 4342 | lwz TMP1, 4(RD)
3747 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 4343 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
@@ -3797,11 +4393,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3797 | lwz LFUNC:RB, FRAME_FUNC(BASE) 4393 | lwz LFUNC:RB, FRAME_FUNC(BASE)
3798 | srwi RA, RA, 1 4394 | srwi RA, RA, 1
3799 | addi RA, RA, offsetof(GCfuncL, uvptr) 4395 | addi RA, RA, offsetof(GCfuncL, uvptr)
4396 |.if FPU
3800 | lfdx f0, KBASE, RD 4397 | lfdx f0, KBASE, RD
4398 |.else
4399 | lwzux TMP2, RD, KBASE
4400 | lwz TMP3, 4(RD)
4401 |.endif
3801 | lwzx UPVAL:RB, LFUNC:RB, RA 4402 | lwzx UPVAL:RB, LFUNC:RB, RA
3802 | ins_next1 4403 | ins_next1
3803 | lwz TMP1, UPVAL:RB->v 4404 | lwz TMP1, UPVAL:RB->v
4405 |.if FPU
3804 | stfd f0, 0(TMP1) 4406 | stfd f0, 0(TMP1)
4407 |.else
4408 | stw TMP2, 0(TMP1)
4409 | stw TMP3, 4(TMP1)
4410 |.endif
3805 | ins_next2 4411 | ins_next2
3806 break; 4412 break;
3807 case BC_USETP: 4413 case BC_USETP:
@@ -3949,11 +4555,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3949 |.endif 4555 |.endif
3950 | ble ->vmeta_tgetv // Integer key and in array part? 4556 | ble ->vmeta_tgetv // Integer key and in array part?
3951 | lwzx TMP0, TMP1, TMP2 4557 | lwzx TMP0, TMP1, TMP2
4558 |.if FPU
3952 | lfdx f14, TMP1, TMP2 4559 | lfdx f14, TMP1, TMP2
4560 |.else
4561 | lwzux SAVE0, TMP1, TMP2
4562 | lwz SAVE1, 4(TMP1)
4563 |.endif
3953 | checknil TMP0; beq >2 4564 | checknil TMP0; beq >2
3954 |1: 4565 |1:
3955 | ins_next1 4566 | ins_next1
4567 |.if FPU
3956 | stfdx f14, BASE, RA 4568 | stfdx f14, BASE, RA
4569 |.else
4570 | stwux SAVE0, RA, BASE
4571 | stw SAVE1, 4(RA)
4572 |.endif
3957 | ins_next2 4573 | ins_next2
3958 | 4574 |
3959 |2: // Check for __index if table value is nil. 4575 |2: // Check for __index if table value is nil.
@@ -3984,9 +4600,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3984 |->BC_TGETS_Z: 4600 |->BC_TGETS_Z:
3985 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 4601 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
3986 | lwz TMP0, TAB:RB->hmask 4602 | lwz TMP0, TAB:RB->hmask
3987 | lwz TMP1, STR:RC->hash 4603 | lwz TMP1, STR:RC->sid
3988 | lwz NODE:TMP2, TAB:RB->node 4604 | lwz NODE:TMP2, TAB:RB->node
3989 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 4605 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
3990 | slwi TMP0, TMP1, 5 4606 | slwi TMP0, TMP1, 5
3991 | slwi TMP1, TMP1, 3 4607 | slwi TMP1, TMP1, 3
3992 | sub TMP1, TMP0, TMP1 4608 | sub TMP1, TMP0, TMP1
@@ -4029,12 +4645,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4029 | lwz TMP1, TAB:RB->asize 4645 | lwz TMP1, TAB:RB->asize
4030 | lwz TMP2, TAB:RB->array 4646 | lwz TMP2, TAB:RB->array
4031 | cmplw TMP0, TMP1; bge ->vmeta_tgetb 4647 | cmplw TMP0, TMP1; bge ->vmeta_tgetb
4648 |.if FPU
4032 | lwzx TMP1, TMP2, RC 4649 | lwzx TMP1, TMP2, RC
4033 | lfdx f0, TMP2, RC 4650 | lfdx f0, TMP2, RC
4651 |.else
4652 | lwzux TMP1, TMP2, RC
4653 | lwz TMP3, 4(TMP2)
4654 |.endif
4034 | checknil TMP1; beq >5 4655 | checknil TMP1; beq >5
4035 |1: 4656 |1:
4036 | ins_next1 4657 | ins_next1
4658 |.if FPU
4037 | stfdx f0, BASE, RA 4659 | stfdx f0, BASE, RA
4660 |.else
4661 | stwux TMP1, RA, BASE
4662 | stw TMP3, 4(RA)
4663 |.endif
4038 | ins_next2 4664 | ins_next2
4039 | 4665 |
4040 |5: // Check for __index if table value is nil. 4666 |5: // Check for __index if table value is nil.
@@ -4046,6 +4672,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4046 | bne <1 // 'no __index' flag set: done. 4672 | bne <1 // 'no __index' flag set: done.
4047 | b ->vmeta_tgetb // Caveat: preserve TMP0! 4673 | b ->vmeta_tgetb // Caveat: preserve TMP0!
4048 break; 4674 break;
4675 case BC_TGETR:
4676 | // RA = dst*8, RB = table*8, RC = key*8
4677 | add RB, BASE, RB
4678 | lwz TAB:CARG1, 4(RB)
4679 |.if DUALNUM
4680 | add RC, BASE, RC
4681 | lwz TMP0, TAB:CARG1->asize
4682 | lwz CARG2, 4(RC)
4683 | lwz TMP1, TAB:CARG1->array
4684 |.else
4685 | lfdx f0, BASE, RC
4686 | lwz TMP0, TAB:CARG1->asize
4687 | toint CARG2, f0
4688 | lwz TMP1, TAB:CARG1->array
4689 |.endif
4690 | cmplw TMP0, CARG2
4691 | slwi TMP2, CARG2, 3
4692 | ble ->vmeta_tgetr // In array part?
4693 |.if FPU
4694 | lfdx f14, TMP1, TMP2
4695 |.else
4696 | lwzux SAVE0, TMP2, TMP1
4697 | lwz SAVE1, 4(TMP2)
4698 |.endif
4699 |->BC_TGETR_Z:
4700 | ins_next1
4701 |.if FPU
4702 | stfdx f14, BASE, RA
4703 |.else
4704 | stwux SAVE0, RA, BASE
4705 | stw SAVE1, 4(RA)
4706 |.endif
4707 | ins_next2
4708 break;
4049 4709
4050 case BC_TSETV: 4710 case BC_TSETV:
4051 | // RA = src*8, RB = table*8, RC = key*8 4711 | // RA = src*8, RB = table*8, RC = key*8
@@ -4084,11 +4744,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4084 | ble ->vmeta_tsetv // Integer key and in array part? 4744 | ble ->vmeta_tsetv // Integer key and in array part?
4085 | lwzx TMP2, TMP1, TMP0 4745 | lwzx TMP2, TMP1, TMP0
4086 | lbz TMP3, TAB:RB->marked 4746 | lbz TMP3, TAB:RB->marked
4747 |.if FPU
4087 | lfdx f14, BASE, RA 4748 | lfdx f14, BASE, RA
4749 |.else
4750 | add SAVE1, BASE, RA
4751 | lwz SAVE0, 0(SAVE1)
4752 | lwz SAVE1, 4(SAVE1)
4753 |.endif
4088 | checknil TMP2; beq >3 4754 | checknil TMP2; beq >3
4089 |1: 4755 |1:
4090 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) 4756 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
4757 |.if FPU
4091 | stfdx f14, TMP1, TMP0 4758 | stfdx f14, TMP1, TMP0
4759 |.else
4760 | stwux SAVE0, TMP1, TMP0
4761 | stw SAVE1, 4(TMP1)
4762 |.endif
4092 | bne >7 4763 | bne >7
4093 |2: 4764 |2:
4094 | ins_next 4765 | ins_next
@@ -4125,11 +4796,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4125 |->BC_TSETS_Z: 4796 |->BC_TSETS_Z:
4126 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8 4797 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8
4127 | lwz TMP0, TAB:RB->hmask 4798 | lwz TMP0, TAB:RB->hmask
4128 | lwz TMP1, STR:RC->hash 4799 | lwz TMP1, STR:RC->sid
4129 | lwz NODE:TMP2, TAB:RB->node 4800 | lwz NODE:TMP2, TAB:RB->node
4130 | stb ZERO, TAB:RB->nomm // Clear metamethod cache. 4801 | stb ZERO, TAB:RB->nomm // Clear metamethod cache.
4131 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 4802 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
4803 |.if FPU
4132 | lfdx f14, BASE, RA 4804 | lfdx f14, BASE, RA
4805 |.else
4806 | add CARG2, BASE, RA
4807 | lwz SAVE0, 0(CARG2)
4808 | lwz SAVE1, 4(CARG2)
4809 |.endif
4133 | slwi TMP0, TMP1, 5 4810 | slwi TMP0, TMP1, 5
4134 | slwi TMP1, TMP1, 3 4811 | slwi TMP1, TMP1, 3
4135 | sub TMP1, TMP0, TMP1 4812 | sub TMP1, TMP0, TMP1
@@ -4145,7 +4822,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4145 | checknil CARG2; beq >4 // Key found, but nil value? 4822 | checknil CARG2; beq >4 // Key found, but nil value?
4146 |2: 4823 |2:
4147 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4824 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4825 |.if FPU
4148 | stfd f14, NODE:TMP2->val 4826 | stfd f14, NODE:TMP2->val
4827 |.else
4828 | stw SAVE0, NODE:TMP2->val.u32.hi
4829 | stw SAVE1, NODE:TMP2->val.u32.lo
4830 |.endif
4149 | bne >7 4831 | bne >7
4150 |3: 4832 |3:
4151 | ins_next 4833 | ins_next
@@ -4184,7 +4866,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4184 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) 4866 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
4185 | // Returns TValue *. 4867 | // Returns TValue *.
4186 | lp BASE, L->base 4868 | lp BASE, L->base
4869 |.if FPU
4187 | stfd f14, 0(CRET1) 4870 | stfd f14, 0(CRET1)
4871 |.else
4872 | stw SAVE0, 0(CRET1)
4873 | stw SAVE1, 4(CRET1)
4874 |.endif
4188 | b <3 // No 2nd write barrier needed. 4875 | b <3 // No 2nd write barrier needed.
4189 | 4876 |
4190 |7: // Possible table write barrier for the value. Skip valiswhite check. 4877 |7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -4201,13 +4888,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4201 | lwz TMP2, TAB:RB->array 4888 | lwz TMP2, TAB:RB->array
4202 | lbz TMP3, TAB:RB->marked 4889 | lbz TMP3, TAB:RB->marked
4203 | cmplw TMP0, TMP1 4890 | cmplw TMP0, TMP1
4891 |.if FPU
4204 | lfdx f14, BASE, RA 4892 | lfdx f14, BASE, RA
4893 |.else
4894 | add CARG2, BASE, RA
4895 | lwz SAVE0, 0(CARG2)
4896 | lwz SAVE1, 4(CARG2)
4897 |.endif
4205 | bge ->vmeta_tsetb 4898 | bge ->vmeta_tsetb
4206 | lwzx TMP1, TMP2, RC 4899 | lwzx TMP1, TMP2, RC
4207 | checknil TMP1; beq >5 4900 | checknil TMP1; beq >5
4208 |1: 4901 |1:
4209 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4902 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4903 |.if FPU
4210 | stfdx f14, TMP2, RC 4904 | stfdx f14, TMP2, RC
4905 |.else
4906 | stwux SAVE0, RC, TMP2
4907 | stw SAVE1, 4(RC)
4908 |.endif
4211 | bne >7 4909 | bne >7
4212 |2: 4910 |2:
4213 | ins_next 4911 | ins_next
@@ -4225,6 +4923,49 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4225 | barrierback TAB:RB, TMP3, TMP0 4923 | barrierback TAB:RB, TMP3, TMP0
4226 | b <2 4924 | b <2
4227 break; 4925 break;
4926 case BC_TSETR:
4927 | // RA = dst*8, RB = table*8, RC = key*8
4928 | add RB, BASE, RB
4929 | lwz TAB:CARG2, 4(RB)
4930 |.if DUALNUM
4931 | add RC, BASE, RC
4932 | lbz TMP3, TAB:CARG2->marked
4933 | lwz TMP0, TAB:CARG2->asize
4934 | lwz CARG3, 4(RC)
4935 | lwz TMP1, TAB:CARG2->array
4936 |.else
4937 | lfdx f0, BASE, RC
4938 | lbz TMP3, TAB:CARG2->marked
4939 | lwz TMP0, TAB:CARG2->asize
4940 | toint CARG3, f0
4941 | lwz TMP1, TAB:CARG2->array
4942 |.endif
4943 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
4944 | bne >7
4945 |2:
4946 | cmplw TMP0, CARG3
4947 | slwi TMP2, CARG3, 3
4948 |.if FPU
4949 | lfdx f14, BASE, RA
4950 |.else
4951 | lwzux SAVE0, RA, BASE
4952 | lwz SAVE1, 4(RA)
4953 |.endif
4954 | ble ->vmeta_tsetr // In array part?
4955 | ins_next1
4956 |.if FPU
4957 | stfdx f14, TMP1, TMP2
4958 |.else
4959 | stwux SAVE0, TMP1, TMP2
4960 | stw SAVE1, 4(TMP1)
4961 |.endif
4962 | ins_next2
4963 |
4964 |7: // Possible table write barrier for the value. Skip valiswhite check.
4965 | barrierback TAB:CARG2, TMP3, TMP2
4966 | b <2
4967 break;
4968
4228 4969
4229 case BC_TSETM: 4970 case BC_TSETM:
4230 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 4971 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
@@ -4247,10 +4988,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4247 | add TMP1, TMP1, TMP0 4988 | add TMP1, TMP1, TMP0
4248 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4989 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4249 |3: // Copy result slots to table. 4990 |3: // Copy result slots to table.
4991 |.if FPU
4250 | lfd f0, 0(RA) 4992 | lfd f0, 0(RA)
4993 |.else
4994 | lwz SAVE0, 0(RA)
4995 | lwz SAVE1, 4(RA)
4996 |.endif
4251 | addi RA, RA, 8 4997 | addi RA, RA, 8
4252 | cmpw cr1, RA, TMP2 4998 | cmpw cr1, RA, TMP2
4999 |.if FPU
4253 | stfd f0, 0(TMP1) 5000 | stfd f0, 0(TMP1)
5001 |.else
5002 | stw SAVE0, 0(TMP1)
5003 | stw SAVE1, 4(TMP1)
5004 |.endif
4254 | addi TMP1, TMP1, 8 5005 | addi TMP1, TMP1, 8
4255 | blt cr1, <3 5006 | blt cr1, <3
4256 | bne >7 5007 | bne >7
@@ -4317,9 +5068,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4317 | beq cr1, >3 5068 | beq cr1, >3
4318 |2: 5069 |2:
4319 | addi TMP3, TMP2, 8 5070 | addi TMP3, TMP2, 8
5071 |.if FPU
4320 | lfdx f0, RA, TMP2 5072 | lfdx f0, RA, TMP2
5073 |.else
5074 | add CARG3, RA, TMP2
5075 | lwz CARG1, 0(CARG3)
5076 | lwz CARG2, 4(CARG3)
5077 |.endif
4321 | cmplw cr1, TMP3, NARGS8:RC 5078 | cmplw cr1, TMP3, NARGS8:RC
5079 |.if FPU
4322 | stfdx f0, BASE, TMP2 5080 | stfdx f0, BASE, TMP2
5081 |.else
5082 | stwux CARG1, TMP2, BASE
5083 | stw CARG2, 4(TMP2)
5084 |.endif
4323 | mr TMP2, TMP3 5085 | mr TMP2, TMP3
4324 | bne cr1, <2 5086 | bne cr1, <2
4325 |3: 5087 |3:
@@ -4352,14 +5114,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4352 | add BASE, BASE, RA 5114 | add BASE, BASE, RA
4353 | lwz TMP1, -24(BASE) 5115 | lwz TMP1, -24(BASE)
4354 | lwz LFUNC:RB, -20(BASE) 5116 | lwz LFUNC:RB, -20(BASE)
5117 |.if FPU
4355 | lfd f1, -8(BASE) 5118 | lfd f1, -8(BASE)
4356 | lfd f0, -16(BASE) 5119 | lfd f0, -16(BASE)
5120 |.else
5121 | lwz CARG1, -8(BASE)
5122 | lwz CARG2, -4(BASE)
5123 | lwz CARG3, -16(BASE)
5124 | lwz CARG4, -12(BASE)
5125 |.endif
4357 | stw TMP1, 0(BASE) // Copy callable. 5126 | stw TMP1, 0(BASE) // Copy callable.
4358 | stw LFUNC:RB, 4(BASE) 5127 | stw LFUNC:RB, 4(BASE)
4359 | checkfunc TMP1 5128 | checkfunc TMP1
4360 | stfd f1, 16(BASE) // Copy control var.
4361 | li NARGS8:RC, 16 // Iterators get 2 arguments. 5129 | li NARGS8:RC, 16 // Iterators get 2 arguments.
5130 |.if FPU
5131 | stfd f1, 16(BASE) // Copy control var.
4362 | stfdu f0, 8(BASE) // Copy state. 5132 | stfdu f0, 8(BASE) // Copy state.
5133 |.else
5134 | stw CARG1, 16(BASE) // Copy control var.
5135 | stw CARG2, 20(BASE)
5136 | stwu CARG3, 8(BASE) // Copy state.
5137 | stw CARG4, 4(BASE)
5138 |.endif
4363 | bne ->vmeta_call 5139 | bne ->vmeta_call
4364 | ins_call 5140 | ins_call
4365 break; 5141 break;
@@ -4367,8 +5143,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4367 case BC_ITERN: 5143 case BC_ITERN:
4368 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) 5144 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
4369 |.if JIT 5145 |.if JIT
4370 | // NYI: add hotloop, record BC_ITERN. 5146 | // NYI on big-endian
4371 |.endif 5147 |.endif
5148 |->vm_IITERN:
4372 | add RA, BASE, RA 5149 | add RA, BASE, RA
4373 | lwz TAB:RB, -12(RA) 5150 | lwz TAB:RB, -12(RA)
4374 | lwz RC, -4(RA) // Get index from control var. 5151 | lwz RC, -4(RA) // Get index from control var.
@@ -4380,7 +5157,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4380 | slwi TMP3, RC, 3 5157 | slwi TMP3, RC, 3
4381 | bge >5 // Index points after array part? 5158 | bge >5 // Index points after array part?
4382 | lwzx TMP2, TMP1, TMP3 5159 | lwzx TMP2, TMP1, TMP3
5160 |.if FPU
4383 | lfdx f0, TMP1, TMP3 5161 | lfdx f0, TMP1, TMP3
5162 |.else
5163 | lwzux CARG1, TMP3, TMP1
5164 | lwz CARG2, 4(TMP3)
5165 |.endif
4384 | checknil TMP2 5166 | checknil TMP2
4385 | lwz INS, -4(PC) 5167 | lwz INS, -4(PC)
4386 | beq >4 5168 | beq >4
@@ -4392,7 +5174,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4392 |.endif 5174 |.endif
4393 | addi RC, RC, 1 5175 | addi RC, RC, 1
4394 | addis TMP3, PC, -(BCBIAS_J*4 >> 16) 5176 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
5177 |.if FPU
4395 | stfd f0, 8(RA) 5178 | stfd f0, 8(RA)
5179 |.else
5180 | stw CARG1, 8(RA)
5181 | stw CARG2, 12(RA)
5182 |.endif
4396 | decode_RD4 TMP1, INS 5183 | decode_RD4 TMP1, INS
4397 | stw RC, -4(RA) // Update control var. 5184 | stw RC, -4(RA) // Update control var.
4398 | add PC, TMP1, TMP3 5185 | add PC, TMP1, TMP3
@@ -4417,17 +5204,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4417 | slwi RB, RC, 3 5204 | slwi RB, RC, 3
4418 | sub TMP3, TMP3, RB 5205 | sub TMP3, TMP3, RB
4419 | lwzx RB, TMP2, TMP3 5206 | lwzx RB, TMP2, TMP3
5207 |.if FPU
4420 | lfdx f0, TMP2, TMP3 5208 | lfdx f0, TMP2, TMP3
5209 |.else
5210 | add CARG3, TMP2, TMP3
5211 | lwz CARG1, 0(CARG3)
5212 | lwz CARG2, 4(CARG3)
5213 |.endif
4421 | add NODE:TMP3, TMP2, TMP3 5214 | add NODE:TMP3, TMP2, TMP3
4422 | checknil RB 5215 | checknil RB
4423 | lwz INS, -4(PC) 5216 | lwz INS, -4(PC)
4424 | beq >7 5217 | beq >7
5218 |.if FPU
4425 | lfd f1, NODE:TMP3->key 5219 | lfd f1, NODE:TMP3->key
5220 |.else
5221 | lwz CARG3, NODE:TMP3->key.u32.hi
5222 | lwz CARG4, NODE:TMP3->key.u32.lo
5223 |.endif
4426 | addis TMP2, PC, -(BCBIAS_J*4 >> 16) 5224 | addis TMP2, PC, -(BCBIAS_J*4 >> 16)
5225 |.if FPU
4427 | stfd f0, 8(RA) 5226 | stfd f0, 8(RA)
5227 |.else
5228 | stw CARG1, 8(RA)
5229 | stw CARG2, 12(RA)
5230 |.endif
4428 | add RC, RC, TMP0 5231 | add RC, RC, TMP0
4429 | decode_RD4 TMP1, INS 5232 | decode_RD4 TMP1, INS
5233 |.if FPU
4430 | stfd f1, 0(RA) 5234 | stfd f1, 0(RA)
5235 |.else
5236 | stw CARG3, 0(RA)
5237 | stw CARG4, 4(RA)
5238 |.endif
4431 | addi RC, RC, 1 5239 | addi RC, RC, 1
4432 | add PC, TMP1, TMP2 5240 | add PC, TMP1, TMP2
4433 | stw RC, -4(RA) // Update control var. 5241 | stw RC, -4(RA) // Update control var.
@@ -4456,8 +5264,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4456 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq 5264 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
4457 | add TMP3, PC, TMP0 5265 | add TMP3, PC, TMP0
4458 | bne cr0, >5 5266 | bne cr0, >5
4459 | lus TMP1, 0xfffe 5267 | lus TMP1, (LJ_KEYINDEX >> 16)
4460 | ori TMP1, TMP1, 0x7fff 5268 | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff)
4461 | stw ZERO, -4(RA) // Initialize control var. 5269 | stw ZERO, -4(RA) // Initialize control var.
4462 | stw TMP1, -8(RA) 5270 | stw TMP1, -8(RA)
4463 | addis PC, TMP3, -(BCBIAS_J*4 >> 16) 5271 | addis PC, TMP3, -(BCBIAS_J*4 >> 16)
@@ -4468,6 +5276,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4468 | li TMP1, BC_ITERC 5276 | li TMP1, BC_ITERC
4469 | stb TMP0, -1(PC) 5277 | stb TMP0, -1(PC)
4470 | addis PC, TMP3, -(BCBIAS_J*4 >> 16) 5278 | addis PC, TMP3, -(BCBIAS_J*4 >> 16)
5279 | // NYI on big-endian: unpatch JLOOP.
4471 | stb TMP1, 3(PC) 5280 | stb TMP1, 3(PC)
4472 | b <1 5281 | b <1
4473 break; 5282 break;
@@ -4493,9 +5302,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4493 | subi TMP2, TMP2, 16 5302 | subi TMP2, TMP2, 16
4494 | ble >2 // No vararg slots? 5303 | ble >2 // No vararg slots?
4495 |1: // Copy vararg slots to destination slots. 5304 |1: // Copy vararg slots to destination slots.
5305 |.if FPU
4496 | lfd f0, 0(RC) 5306 | lfd f0, 0(RC)
5307 |.else
5308 | lwz CARG1, 0(RC)
5309 | lwz CARG2, 4(RC)
5310 |.endif
4497 | addi RC, RC, 8 5311 | addi RC, RC, 8
5312 |.if FPU
4498 | stfd f0, 0(RA) 5313 | stfd f0, 0(RA)
5314 |.else
5315 | stw CARG1, 0(RA)
5316 | stw CARG2, 4(RA)
5317 |.endif
4499 | cmplw RA, TMP2 5318 | cmplw RA, TMP2
4500 | cmplw cr1, RC, TMP3 5319 | cmplw cr1, RC, TMP3
4501 | bge >3 // All destination slots filled? 5320 | bge >3 // All destination slots filled?
@@ -4518,9 +5337,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4518 | addi MULTRES, TMP1, 8 5337 | addi MULTRES, TMP1, 8
4519 | bgt >7 5338 | bgt >7
4520 |6: 5339 |6:
5340 |.if FPU
4521 | lfd f0, 0(RC) 5341 | lfd f0, 0(RC)
5342 |.else
5343 | lwz CARG1, 0(RC)
5344 | lwz CARG2, 4(RC)
5345 |.endif
4522 | addi RC, RC, 8 5346 | addi RC, RC, 8
5347 |.if FPU
4523 | stfd f0, 0(RA) 5348 | stfd f0, 0(RA)
5349 |.else
5350 | stw CARG1, 0(RA)
5351 | stw CARG2, 4(RA)
5352 |.endif
4524 | cmplw RC, TMP3 5353 | cmplw RC, TMP3
4525 | addi RA, RA, 8 5354 | addi RA, RA, 8
4526 | blt <6 // More vararg slots? 5355 | blt <6 // More vararg slots?
@@ -4571,14 +5400,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4571 | li TMP1, 0 5400 | li TMP1, 0
4572 |2: 5401 |2:
4573 | addi TMP3, TMP1, 8 5402 | addi TMP3, TMP1, 8
5403 |.if FPU
4574 | lfdx f0, RA, TMP1 5404 | lfdx f0, RA, TMP1
5405 |.else
5406 | add CARG3, RA, TMP1
5407 | lwz CARG1, 0(CARG3)
5408 | lwz CARG2, 4(CARG3)
5409 |.endif
4575 | cmpw TMP3, RC 5410 | cmpw TMP3, RC
5411 |.if FPU
4576 | stfdx f0, TMP2, TMP1 5412 | stfdx f0, TMP2, TMP1
5413 |.else
5414 | add CARG3, TMP2, TMP1
5415 | stw CARG1, 0(CARG3)
5416 | stw CARG2, 4(CARG3)
5417 |.endif
4577 | beq >3 5418 | beq >3
4578 | addi TMP1, TMP3, 8 5419 | addi TMP1, TMP3, 8
5420 |.if FPU
4579 | lfdx f1, RA, TMP3 5421 | lfdx f1, RA, TMP3
5422 |.else
5423 | add CARG3, RA, TMP3
5424 | lwz CARG1, 0(CARG3)
5425 | lwz CARG2, 4(CARG3)
5426 |.endif
4580 | cmpw TMP1, RC 5427 | cmpw TMP1, RC
5428 |.if FPU
4581 | stfdx f1, TMP2, TMP3 5429 | stfdx f1, TMP2, TMP3
5430 |.else
5431 | add CARG3, TMP2, TMP3
5432 | stw CARG1, 0(CARG3)
5433 | stw CARG2, 4(CARG3)
5434 |.endif
4582 | bne <2 5435 | bne <2
4583 |3: 5436 |3:
4584 |5: 5437 |5:
@@ -4620,8 +5473,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4620 | subi TMP2, BASE, 8 5473 | subi TMP2, BASE, 8
4621 | decode_RB8 RB, INS 5474 | decode_RB8 RB, INS
4622 if (op == BC_RET1) { 5475 if (op == BC_RET1) {
5476 |.if FPU
4623 | lfd f0, 0(RA) 5477 | lfd f0, 0(RA)
4624 | stfd f0, 0(TMP2) 5478 | stfd f0, 0(TMP2)
5479 |.else
5480 | lwz CARG1, 0(RA)
5481 | lwz CARG2, 4(RA)
5482 | stw CARG1, 0(TMP2)
5483 | stw CARG2, 4(TMP2)
5484 |.endif
4625 } 5485 }
4626 |5: 5486 |5:
4627 | cmplw RB, RD 5487 | cmplw RB, RD
@@ -4682,11 +5542,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4682 |4: 5542 |4:
4683 | stw CARG1, FORL_IDX*8+4(RA) 5543 | stw CARG1, FORL_IDX*8+4(RA)
4684 } else { 5544 } else {
4685 | lwz TMP3, FORL_STEP*8(RA) 5545 | lwz SAVE0, FORL_STEP*8(RA)
4686 | lwz CARG3, FORL_STEP*8+4(RA) 5546 | lwz CARG3, FORL_STEP*8+4(RA)
4687 | lwz TMP2, FORL_STOP*8(RA) 5547 | lwz TMP2, FORL_STOP*8(RA)
4688 | lwz CARG2, FORL_STOP*8+4(RA) 5548 | lwz CARG2, FORL_STOP*8+4(RA)
4689 | cmplw cr7, TMP3, TISNUM 5549 | cmplw cr7, SAVE0, TISNUM
4690 | cmplw cr1, TMP2, TISNUM 5550 | cmplw cr1, TMP2, TISNUM
4691 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq 5551 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
4692 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 5552 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
@@ -4729,41 +5589,80 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4729 if (vk) { 5589 if (vk) {
4730 |.if DUALNUM 5590 |.if DUALNUM
4731 |9: // FP loop. 5591 |9: // FP loop.
5592 |.if FPU
4732 | lfd f1, FORL_IDX*8(RA) 5593 | lfd f1, FORL_IDX*8(RA)
4733 |.else 5594 |.else
5595 | lwz CARG1, FORL_IDX*8(RA)
5596 | lwz CARG2, FORL_IDX*8+4(RA)
5597 |.endif
5598 |.else
4734 | lfdux f1, RA, BASE 5599 | lfdux f1, RA, BASE
4735 |.endif 5600 |.endif
5601 |.if FPU
4736 | lfd f3, FORL_STEP*8(RA) 5602 | lfd f3, FORL_STEP*8(RA)
4737 | lfd f2, FORL_STOP*8(RA) 5603 | lfd f2, FORL_STOP*8(RA)
4738 | lwz TMP3, FORL_STEP*8(RA)
4739 | fadd f1, f1, f3 5604 | fadd f1, f1, f3
4740 | stfd f1, FORL_IDX*8(RA) 5605 | stfd f1, FORL_IDX*8(RA)
5606 |.else
5607 | lwz CARG3, FORL_STEP*8(RA)
5608 | lwz CARG4, FORL_STEP*8+4(RA)
5609 | mr SAVE1, RD
5610 | blex __adddf3
5611 | mr RD, SAVE1
5612 | stw CRET1, FORL_IDX*8(RA)
5613 | stw CRET2, FORL_IDX*8+4(RA)
5614 | lwz CARG3, FORL_STOP*8(RA)
5615 | lwz CARG4, FORL_STOP*8+4(RA)
5616 |.endif
5617 | lwz SAVE0, FORL_STEP*8(RA)
4741 } else { 5618 } else {
4742 |.if DUALNUM 5619 |.if DUALNUM
4743 |9: // FP loop. 5620 |9: // FP loop.
4744 |.else 5621 |.else
4745 | lwzux TMP1, RA, BASE 5622 | lwzux TMP1, RA, BASE
4746 | lwz TMP3, FORL_STEP*8(RA) 5623 | lwz SAVE0, FORL_STEP*8(RA)
4747 | lwz TMP2, FORL_STOP*8(RA) 5624 | lwz TMP2, FORL_STOP*8(RA)
4748 | cmplw cr0, TMP1, TISNUM 5625 | cmplw cr0, TMP1, TISNUM
4749 | cmplw cr7, TMP3, TISNUM 5626 | cmplw cr7, SAVE0, TISNUM
4750 | cmplw cr1, TMP2, TISNUM 5627 | cmplw cr1, TMP2, TISNUM
4751 |.endif 5628 |.endif
5629 |.if FPU
4752 | lfd f1, FORL_IDX*8(RA) 5630 | lfd f1, FORL_IDX*8(RA)
5631 |.else
5632 | lwz CARG1, FORL_IDX*8(RA)
5633 | lwz CARG2, FORL_IDX*8+4(RA)
5634 |.endif
4753 | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt 5635 | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
4754 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 5636 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
5637 |.if FPU
4755 | lfd f2, FORL_STOP*8(RA) 5638 | lfd f2, FORL_STOP*8(RA)
5639 |.else
5640 | lwz CARG3, FORL_STOP*8(RA)
5641 | lwz CARG4, FORL_STOP*8+4(RA)
5642 |.endif
4756 | bge ->vmeta_for 5643 | bge ->vmeta_for
4757 } 5644 }
4758 | cmpwi cr6, TMP3, 0 5645 | cmpwi cr6, SAVE0, 0
4759 if (op != BC_JFORL) { 5646 if (op != BC_JFORL) {
4760 | srwi RD, RD, 1 5647 | srwi RD, RD, 1
4761 } 5648 }
5649 |.if FPU
4762 | stfd f1, FORL_EXT*8(RA) 5650 | stfd f1, FORL_EXT*8(RA)
5651 |.else
5652 | stw CARG1, FORL_EXT*8(RA)
5653 | stw CARG2, FORL_EXT*8+4(RA)
5654 |.endif
4763 if (op != BC_JFORL) { 5655 if (op != BC_JFORL) {
4764 | add RD, PC, RD 5656 | add RD, PC, RD
4765 } 5657 }
5658 |.if FPU
4766 | fcmpu cr0, f1, f2 5659 | fcmpu cr0, f1, f2
5660 |.else
5661 | mr SAVE1, RD
5662 | blex __ledf2
5663 | cmpwi CRET1, 0
5664 | mr RD, SAVE1
5665 |.endif
4767 if (op == BC_JFORI) { 5666 if (op == BC_JFORI) {
4768 | addis PC, RD, -(BCBIAS_J*4 >> 16) 5667 | addis PC, RD, -(BCBIAS_J*4 >> 16)
4769 } 5668 }
@@ -4866,8 +5765,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4866 | lp TMP2, TRACE:TMP2->mcode 5765 | lp TMP2, TRACE:TMP2->mcode
4867 | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) 5766 | stw BASE, DISPATCH_GL(jit_base)(DISPATCH)
4868 | mtctr TMP2 5767 | mtctr TMP2
4869 | stw L, DISPATCH_GL(jit_L)(DISPATCH)
4870 | addi JGL, DISPATCH, GG_DISP2G+32768 5768 | addi JGL, DISPATCH, GG_DISP2G+32768
5769 | stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
4871 | bctr 5770 | bctr
4872 |.endif 5771 |.endif
4873 break; 5772 break;
@@ -5002,6 +5901,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5002 | lp TMP1, L->top 5901 | lp TMP1, L->top
5003 | li_vmstate INTERP 5902 | li_vmstate INTERP
5004 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. 5903 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller.
5904 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
5005 | sub RA, TMP1, RD // RA = L->top - nresults*8 5905 | sub RA, TMP1, RD // RA = L->top - nresults*8
5006 | st_vmstate 5906 | st_vmstate
5007 | b ->vm_returnc 5907 | b ->vm_returnc
diff --git a/src/vm_ppcspe.dasc b/src/vm_ppcspe.dasc
deleted file mode 100644
index ea33c08b..00000000
--- a/src/vm_ppcspe.dasc
+++ /dev/null
@@ -1,3699 +0,0 @@
1|// Low-level VM code for PowerPC/e500 CPUs.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4|
5|.arch ppc
6|.section code_op, code_sub
7|
8|.actionlist build_actionlist
9|.globals GLOB_
10|.globalnames globnames
11|.externnames extnames
12|
13|// Note: The ragged indentation of the instructions is intentional.
14|// The starting columns indicate data dependencies.
15|
16|//-----------------------------------------------------------------------
17|
18|// Fixed register assignments for the interpreter.
19|// Don't use: r1 = sp, r2 and r13 = reserved and/or small data area ptr
20|
21|// The following must be C callee-save (but BASE is often refetched).
22|.define BASE, r14 // Base of current Lua stack frame.
23|.define KBASE, r15 // Constants of current Lua function.
24|.define PC, r16 // Next PC.
25|.define DISPATCH, r17 // Opcode dispatch table.
26|.define LREG, r18 // Register holding lua_State (also in SAVE_L).
27|.define MULTRES, r19 // Size of multi-result: (nresults+1)*8.
28|
29|// Constants for vectorized type-comparisons (hi+low GPR). C callee-save.
30|.define TISNUM, r22
31|.define TISSTR, r23
32|.define TISTAB, r24
33|.define TISFUNC, r25
34|.define TISNIL, r26
35|.define TOBIT, r27
36|.define ZERO, TOBIT // Zero in lo word.
37|
38|// The following temporaries are not saved across C calls, except for RA.
39|.define RA, r20 // Callee-save.
40|.define RB, r10
41|.define RC, r11
42|.define RD, r12
43|.define INS, r7 // Overlaps CARG5.
44|
45|.define TMP0, r0
46|.define TMP1, r8
47|.define TMP2, r9
48|.define TMP3, r6 // Overlaps CARG4.
49|
50|// Saved temporaries.
51|.define SAVE0, r21
52|
53|// Calling conventions.
54|.define CARG1, r3
55|.define CARG2, r4
56|.define CARG3, r5
57|.define CARG4, r6 // Overlaps TMP3.
58|.define CARG5, r7 // Overlaps INS.
59|
60|.define CRET1, r3
61|.define CRET2, r4
62|
63|// Stack layout while in interpreter. Must match with lj_frame.h.
64|.define SAVE_LR, 188(sp)
65|.define CFRAME_SPACE, 184 // Delta for sp.
66|// Back chain for sp: 184(sp) <-- sp entering interpreter
67|.define SAVE_r31, 176(sp) // 64 bit register saves.
68|.define SAVE_r30, 168(sp)
69|.define SAVE_r29, 160(sp)
70|.define SAVE_r28, 152(sp)
71|.define SAVE_r27, 144(sp)
72|.define SAVE_r26, 136(sp)
73|.define SAVE_r25, 128(sp)
74|.define SAVE_r24, 120(sp)
75|.define SAVE_r23, 112(sp)
76|.define SAVE_r22, 104(sp)
77|.define SAVE_r21, 96(sp)
78|.define SAVE_r20, 88(sp)
79|.define SAVE_r19, 80(sp)
80|.define SAVE_r18, 72(sp)
81|.define SAVE_r17, 64(sp)
82|.define SAVE_r16, 56(sp)
83|.define SAVE_r15, 48(sp)
84|.define SAVE_r14, 40(sp)
85|.define SAVE_CR, 36(sp)
86|.define UNUSED1, 32(sp)
87|.define SAVE_ERRF, 28(sp) // 32 bit C frame info.
88|.define SAVE_NRES, 24(sp)
89|.define SAVE_CFRAME, 20(sp)
90|.define SAVE_L, 16(sp)
91|.define SAVE_PC, 12(sp)
92|.define SAVE_MULTRES, 8(sp)
93|// Next frame lr: 4(sp)
94|// Back chain for sp: 0(sp) <-- sp while in interpreter
95|
96|.macro save_, reg; evstdd reg, SAVE_..reg; .endmacro
97|.macro rest_, reg; evldd reg, SAVE_..reg; .endmacro
98|
99|.macro saveregs
100| stwu sp, -CFRAME_SPACE(sp)
101| save_ r14; save_ r15; save_ r16; save_ r17; save_ r18; save_ r19
102| mflr r0; mfcr r12
103| save_ r20; save_ r21; save_ r22; save_ r23; save_ r24; save_ r25
104| stw r0, SAVE_LR; stw r12, SAVE_CR
105| save_ r26; save_ r27; save_ r28; save_ r29; save_ r30; save_ r31
106|.endmacro
107|
108|.macro restoreregs
109| lwz r0, SAVE_LR; lwz r12, SAVE_CR
110| rest_ r14; rest_ r15; rest_ r16; rest_ r17; rest_ r18; rest_ r19
111| mtlr r0; mtcrf 0x38, r12
112| rest_ r20; rest_ r21; rest_ r22; rest_ r23; rest_ r24; rest_ r25
113| rest_ r26; rest_ r27; rest_ r28; rest_ r29; rest_ r30; rest_ r31
114| addi sp, sp, CFRAME_SPACE
115|.endmacro
116|
117|// Type definitions. Some of these are only used for documentation.
118|.type L, lua_State, LREG
119|.type GL, global_State
120|.type TVALUE, TValue
121|.type GCOBJ, GCobj
122|.type STR, GCstr
123|.type TAB, GCtab
124|.type LFUNC, GCfuncL
125|.type CFUNC, GCfuncC
126|.type PROTO, GCproto
127|.type UPVAL, GCupval
128|.type NODE, Node
129|.type NARGS8, int
130|.type TRACE, GCtrace
131|
132|//-----------------------------------------------------------------------
133|
134|// These basic macros should really be part of DynASM.
135|.macro srwi, rx, ry, n; rlwinm rx, ry, 32-n, n, 31; .endmacro
136|.macro slwi, rx, ry, n; rlwinm rx, ry, n, 0, 31-n; .endmacro
137|.macro rotlwi, rx, ry, n; rlwinm rx, ry, n, 0, 31; .endmacro
138|.macro rotlw, rx, ry, rn; rlwnm rx, ry, rn, 0, 31; .endmacro
139|.macro subi, rx, ry, i; addi rx, ry, -i; .endmacro
140|
141|// Trap for not-yet-implemented parts.
142|.macro NYI; tw 4, sp, sp; .endmacro
143|
144|//-----------------------------------------------------------------------
145|
146|// Access to frame relative to BASE.
147|.define FRAME_PC, -8
148|.define FRAME_FUNC, -4
149|
150|// Instruction decode.
151|.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro
152|.macro decode_RA8, dst, ins; rlwinm dst, ins, 27, 21, 28; .endmacro
153|.macro decode_RB8, dst, ins; rlwinm dst, ins, 11, 21, 28; .endmacro
154|.macro decode_RC8, dst, ins; rlwinm dst, ins, 19, 21, 28; .endmacro
155|.macro decode_RD8, dst, ins; rlwinm dst, ins, 19, 13, 28; .endmacro
156|
157|.macro decode_OP1, dst, ins; rlwinm dst, ins, 0, 24, 31; .endmacro
158|.macro decode_RD4, dst, ins; rlwinm dst, ins, 18, 14, 29; .endmacro
159|
160|// Instruction fetch.
161|.macro ins_NEXT1
162| lwz INS, 0(PC)
163| addi PC, PC, 4
164|.endmacro
165|// Instruction decode+dispatch.
166|.macro ins_NEXT2
167| decode_OP4 TMP1, INS
168| decode_RB8 RB, INS
169| decode_RD8 RD, INS
170| lwzx TMP0, DISPATCH, TMP1
171| decode_RA8 RA, INS
172| decode_RC8 RC, INS
173| mtctr TMP0
174| bctr
175|.endmacro
176|.macro ins_NEXT
177| ins_NEXT1
178| ins_NEXT2
179|.endmacro
180|
181|// Instruction footer.
182|.if 1
183| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
184| .define ins_next, ins_NEXT
185| .define ins_next_, ins_NEXT
186| .define ins_next1, ins_NEXT1
187| .define ins_next2, ins_NEXT2
188|.else
189| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
190| // Affects only certain kinds of benchmarks (and only with -j off).
191| .macro ins_next
192| b ->ins_next
193| .endmacro
194| .macro ins_next1
195| .endmacro
196| .macro ins_next2
197| b ->ins_next
198| .endmacro
199| .macro ins_next_
200| ->ins_next:
201| ins_NEXT
202| .endmacro
203|.endif
204|
205|// Call decode and dispatch.
206|.macro ins_callt
207| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
208| lwz PC, LFUNC:RB->pc
209| lwz INS, 0(PC)
210| addi PC, PC, 4
211| decode_OP4 TMP1, INS
212| decode_RA8 RA, INS
213| lwzx TMP0, DISPATCH, TMP1
214| add RA, RA, BASE
215| mtctr TMP0
216| bctr
217|.endmacro
218|
219|.macro ins_call
220| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
221| stw PC, FRAME_PC(BASE)
222| ins_callt
223|.endmacro
224|
225|//-----------------------------------------------------------------------
226|
227|// Macros to test operand types.
228|.macro checknum, reg; evcmpltu reg, TISNUM; .endmacro
229|.macro checkstr, reg; evcmpeq reg, TISSTR; .endmacro
230|.macro checktab, reg; evcmpeq reg, TISTAB; .endmacro
231|.macro checkfunc, reg; evcmpeq reg, TISFUNC; .endmacro
232|.macro checknil, reg; evcmpeq reg, TISNIL; .endmacro
233|.macro checkok, label; blt label; .endmacro
234|.macro checkfail, label; bge label; .endmacro
235|.macro checkanyfail, label; bns label; .endmacro
236|.macro checkallok, label; bso label; .endmacro
237|
238|.macro branch_RD
239| srwi TMP0, RD, 1
240| add PC, PC, TMP0
241| addis PC, PC, -(BCBIAS_J*4 >> 16)
242|.endmacro
243|
244|// Assumes DISPATCH is relative to GL.
245#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
246#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
247|
248#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
249|
250|.macro hotloop
251| NYI
252|.endmacro
253|
254|.macro hotcall
255| NYI
256|.endmacro
257|
258|// Set current VM state. Uses TMP0.
259|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro
260|.macro st_vmstate; stw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro
261|
262|// Move table write barrier back. Overwrites mark and tmp.
263|.macro barrierback, tab, mark, tmp
264| lwz tmp, DISPATCH_GL(gc.grayagain)(DISPATCH)
265| // Assumes LJ_GC_BLACK is 0x04.
266| rlwinm mark, mark, 0, 30, 28 // black2gray(tab)
267| stw tab, DISPATCH_GL(gc.grayagain)(DISPATCH)
268| stb mark, tab->marked
269| stw tmp, tab->gclist
270|.endmacro
271|
272|//-----------------------------------------------------------------------
273
274/* Generate subroutines used by opcodes and other parts of the VM. */
275/* The .code_sub section should be last to help static branch prediction. */
276static void build_subroutines(BuildCtx *ctx)
277{
278 |.code_sub
279 |
280 |//-----------------------------------------------------------------------
281 |//-- Return handling ----------------------------------------------------
282 |//-----------------------------------------------------------------------
283 |
284 |->vm_returnp:
285 | // See vm_return. Also: TMP2 = previous base.
286 | andi. TMP0, PC, FRAME_P
287 | evsplati TMP1, LJ_TTRUE
288 | beq ->cont_dispatch
289 |
290 | // Return from pcall or xpcall fast func.
291 | lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame.
292 | mr BASE, TMP2 // Restore caller base.
293 | // Prepending may overwrite the pcall frame, so do it at the end.
294 | stwu TMP1, FRAME_PC(RA) // Prepend true to results.
295 |
296 |->vm_returnc:
297 | addi RD, RD, 8 // RD = (nresults+1)*8.
298 | andi. TMP0, PC, FRAME_TYPE
299 | cmpwi cr1, RD, 0
300 | li CRET1, LUA_YIELD
301 | beq cr1, ->vm_unwind_c_eh
302 | mr MULTRES, RD
303 | beq ->BC_RET_Z // Handle regular return to Lua.
304 |
305 |->vm_return:
306 | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return
307 | // TMP0 = PC & FRAME_TYPE
308 | cmpwi TMP0, FRAME_C
309 | rlwinm TMP2, PC, 0, 0, 28
310 | li_vmstate C
311 | sub TMP2, BASE, TMP2 // TMP2 = previous base.
312 | bne ->vm_returnp
313 |
314 | addic. TMP1, RD, -8
315 | stw TMP2, L->base
316 | lwz TMP2, SAVE_NRES
317 | subi BASE, BASE, 8
318 | st_vmstate
319 | slwi TMP2, TMP2, 3
320 | beq >2
321 |1:
322 | addic. TMP1, TMP1, -8
323 | evldd TMP0, 0(RA)
324 | addi RA, RA, 8
325 | evstdd TMP0, 0(BASE)
326 | addi BASE, BASE, 8
327 | bne <1
328 |
329 |2:
330 | cmpw TMP2, RD // More/less results wanted?
331 | bne >6
332 |3:
333 | stw BASE, L->top // Store new top.
334 |
335 |->vm_leave_cp:
336 | lwz TMP0, SAVE_CFRAME // Restore previous C frame.
337 | li CRET1, 0 // Ok return status for vm_pcall.
338 | stw TMP0, L->cframe
339 |
340 |->vm_leave_unw:
341 | restoreregs
342 | blr
343 |
344 |6:
345 | ble >7 // Less results wanted?
346 | // More results wanted. Check stack size and fill up results with nil.
347 | lwz TMP1, L->maxstack
348 | cmplw BASE, TMP1
349 | bge >8
350 | evstdd TISNIL, 0(BASE)
351 | addi RD, RD, 8
352 | addi BASE, BASE, 8
353 | b <2
354 |
355 |7: // Less results wanted.
356 | sub TMP0, RD, TMP2
357 | cmpwi TMP2, 0 // LUA_MULTRET+1 case?
358 | sub TMP0, BASE, TMP0 // Subtract the difference.
359 | iseleq BASE, BASE, TMP0 // Either keep top or shrink it.
360 | b <3
361 |
362 |8: // Corner case: need to grow stack for filling up results.
363 | // This can happen if:
364 | // - A C function grows the stack (a lot).
365 | // - The GC shrinks the stack in between.
366 | // - A return back from a lua_call() with (high) nresults adjustment.
367 | stw BASE, L->top // Save current top held in BASE (yes).
368 | mr SAVE0, RD
369 | mr CARG2, TMP2
370 | mr CARG1, L
371 | bl extern lj_state_growstack // (lua_State *L, int n)
372 | lwz TMP2, SAVE_NRES
373 | mr RD, SAVE0
374 | slwi TMP2, TMP2, 3
375 | lwz BASE, L->top // Need the (realloced) L->top in BASE.
376 | b <2
377 |
378 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
379 | // (void *cframe, int errcode)
380 | mr sp, CARG1
381 | mr CRET1, CARG2
382 |->vm_unwind_c_eh: // Landing pad for external unwinder.
383 | lwz L, SAVE_L
384 | li TMP0, ~LJ_VMST_C
385 | lwz GL:TMP1, L->glref
386 | stw TMP0, GL:TMP1->vmstate
387 | b ->vm_leave_unw
388 |
389 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
390 | // (void *cframe)
391 | rlwinm sp, CARG1, 0, 0, 29
392 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
393 | lwz L, SAVE_L
394 | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants.
395 | evsplati TISFUNC, LJ_TFUNC
396 | lus TOBIT, 0x4338
397 | evsplati TISTAB, LJ_TTAB
398 | li TMP0, 0
399 | lwz BASE, L->base
400 | evmergelo TOBIT, TOBIT, TMP0
401 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
402 | evsplati TISSTR, LJ_TSTR
403 | li TMP1, LJ_TFALSE
404 | evsplati TISNIL, LJ_TNIL
405 | li_vmstate INTERP
406 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
407 | la RA, -8(BASE) // Results start at BASE-8.
408 | addi DISPATCH, DISPATCH, GG_G2DISP
409 | stw TMP1, 0(RA) // Prepend false to error message.
410 | li RD, 16 // 2 results: false + error message.
411 | st_vmstate
412 | b ->vm_returnc
413 |
414 |//-----------------------------------------------------------------------
415 |//-- Grow stack for calls -----------------------------------------------
416 |//-----------------------------------------------------------------------
417 |
418 |->vm_growstack_c: // Grow stack for C function.
419 | li CARG2, LUA_MINSTACK
420 | b >2
421 |
422 |->vm_growstack_l: // Grow stack for Lua function.
423 | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
424 | add RC, BASE, RC
425 | sub RA, RA, BASE
426 | stw BASE, L->base
427 | addi PC, PC, 4 // Must point after first instruction.
428 | stw RC, L->top
429 | srwi CARG2, RA, 3
430 |2:
431 | // L->base = new base, L->top = top
432 | stw PC, SAVE_PC
433 | mr CARG1, L
434 | bl extern lj_state_growstack // (lua_State *L, int n)
435 | lwz BASE, L->base
436 | lwz RC, L->top
437 | lwz LFUNC:RB, FRAME_FUNC(BASE)
438 | sub RC, RC, BASE
439 | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
440 | ins_callt // Just retry the call.
441 |
442 |//-----------------------------------------------------------------------
443 |//-- Entry points into the assembler VM ---------------------------------
444 |//-----------------------------------------------------------------------
445 |
446 |->vm_resume: // Setup C frame and resume thread.
447 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
448 | saveregs
449 | mr L, CARG1
450 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
451 | mr BASE, CARG2
452 | lbz TMP1, L->status
453 | stw L, SAVE_L
454 | li PC, FRAME_CP
455 | addi TMP0, sp, CFRAME_RESUME
456 | addi DISPATCH, DISPATCH, GG_G2DISP
457 | stw CARG3, SAVE_NRES
458 | cmplwi TMP1, 0
459 | stw CARG3, SAVE_ERRF
460 | stw TMP0, L->cframe
461 | stw CARG3, SAVE_CFRAME
462 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
463 | beq >3
464 |
465 | // Resume after yield (like a return).
466 | mr RA, BASE
467 | lwz BASE, L->base
468 | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants.
469 | lwz TMP1, L->top
470 | evsplati TISFUNC, LJ_TFUNC
471 | lus TOBIT, 0x4338
472 | evsplati TISTAB, LJ_TTAB
473 | lwz PC, FRAME_PC(BASE)
474 | li TMP2, 0
475 | evsplati TISSTR, LJ_TSTR
476 | sub RD, TMP1, BASE
477 | evmergelo TOBIT, TOBIT, TMP2
478 | stb CARG3, L->status
479 | andi. TMP0, PC, FRAME_TYPE
480 | li_vmstate INTERP
481 | addi RD, RD, 8
482 | evsplati TISNIL, LJ_TNIL
483 | mr MULTRES, RD
484 | st_vmstate
485 | beq ->BC_RET_Z
486 | b ->vm_return
487 |
488 |->vm_pcall: // Setup protected C frame and enter VM.
489 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
490 | saveregs
491 | li PC, FRAME_CP
492 | stw CARG4, SAVE_ERRF
493 | b >1
494 |
495 |->vm_call: // Setup C frame and enter VM.
496 | // (lua_State *L, TValue *base, int nres1)
497 | saveregs
498 | li PC, FRAME_C
499 |
500 |1: // Entry point for vm_pcall above (PC = ftype).
501 | lwz TMP1, L:CARG1->cframe
502 | stw CARG3, SAVE_NRES
503 | mr L, CARG1
504 | stw CARG1, SAVE_L
505 | mr BASE, CARG2
506 | stw sp, L->cframe // Add our C frame to cframe chain.
507 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
508 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
509 | stw TMP1, SAVE_CFRAME
510 | addi DISPATCH, DISPATCH, GG_G2DISP
511 |
512 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
513 | lwz TMP2, L->base // TMP2 = old base (used in vmeta_call).
514 | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants.
515 | lwz TMP1, L->top
516 | evsplati TISFUNC, LJ_TFUNC
517 | add PC, PC, BASE
518 | evsplati TISTAB, LJ_TTAB
519 | lus TOBIT, 0x4338
520 | li TMP0, 0
521 | sub PC, PC, TMP2 // PC = frame delta + frame type
522 | evsplati TISSTR, LJ_TSTR
523 | sub NARGS8:RC, TMP1, BASE
524 | evmergelo TOBIT, TOBIT, TMP0
525 | li_vmstate INTERP
526 | evsplati TISNIL, LJ_TNIL
527 | st_vmstate
528 |
529 |->vm_call_dispatch:
530 | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
531 | li TMP0, -8
532 | evlddx LFUNC:RB, BASE, TMP0
533 | checkfunc LFUNC:RB
534 | checkfail ->vmeta_call
535 |
536 |->vm_call_dispatch_f:
537 | ins_call
538 | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC
539 |
540 |->vm_cpcall: // Setup protected C frame, call C.
541 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
542 | saveregs
543 | mr L, CARG1
544 | lwz TMP0, L:CARG1->stack
545 | stw CARG1, SAVE_L
546 | lwz TMP1, L->top
547 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
548 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
549 | lwz TMP1, L->cframe
550 | stw sp, L->cframe // Add our C frame to cframe chain.
551 | li TMP2, 0
552 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
553 | stw TMP2, SAVE_ERRF // No error function.
554 | stw TMP1, SAVE_CFRAME
555 | mtctr CARG4
556 | bctrl // (lua_State *L, lua_CFunction func, void *ud)
557 | mr. BASE, CRET1
558 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
559 | li PC, FRAME_CP
560 | addi DISPATCH, DISPATCH, GG_G2DISP
561 | bne <3 // Else continue with the call.
562 | b ->vm_leave_cp // No base? Just remove C frame.
563 |
564 |//-----------------------------------------------------------------------
565 |//-- Metamethod handling ------------------------------------------------
566 |//-----------------------------------------------------------------------
567 |
568 |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the
569 |// stack, so BASE doesn't need to be reloaded across these calls.
570 |
571 |//-- Continuation dispatch ----------------------------------------------
572 |
573 |->cont_dispatch:
574 | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
575 | lwz TMP0, -12(BASE) // Continuation.
576 | mr RB, BASE
577 | mr BASE, TMP2 // Restore caller BASE.
578 | lwz LFUNC:TMP1, FRAME_FUNC(TMP2)
579 | cmplwi TMP0, 0
580 | lwz PC, -16(RB) // Restore PC from [cont|PC].
581 | beq >1
582 | subi TMP2, RD, 8
583 | lwz TMP1, LFUNC:TMP1->pc
584 | evstddx TISNIL, RA, TMP2 // Ensure one valid arg.
585 | lwz KBASE, PC2PROTO(k)(TMP1)
586 | // BASE = base, RA = resultptr, RB = meta base
587 | mtctr TMP0
588 | bctr // Jump to continuation.
589 |
590 |1: // Tail call from C function.
591 | subi TMP1, RB, 16
592 | sub RC, TMP1, BASE
593 | b ->vm_call_tail
594 |
595 |->cont_cat: // RA = resultptr, RB = meta base
596 | lwz INS, -4(PC)
597 | subi CARG2, RB, 16
598 | decode_RB8 SAVE0, INS
599 | evldd TMP0, 0(RA)
600 | add TMP1, BASE, SAVE0
601 | stw BASE, L->base
602 | cmplw TMP1, CARG2
603 | sub CARG3, CARG2, TMP1
604 | decode_RA8 RA, INS
605 | evstdd TMP0, 0(CARG2)
606 | bne ->BC_CAT_Z
607 | evstddx TMP0, BASE, RA
608 | b ->cont_nop
609 |
610 |//-- Table indexing metamethods -----------------------------------------
611 |
612 |->vmeta_tgets1:
613 | evmergelo STR:RC, TISSTR, STR:RC
614 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
615 | decode_RB8 RB, INS
616 | evstdd STR:RC, 0(CARG3)
617 | add CARG2, BASE, RB
618 | b >1
619 |
620 |->vmeta_tgets:
621 | evmergelo TAB:RB, TISTAB, TAB:RB
622 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
623 | evmergelo STR:RC, TISSTR, STR:RC
624 | evstdd TAB:RB, 0(CARG2)
625 | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
626 | evstdd STR:RC, 0(CARG3)
627 | b >1
628 |
629 |->vmeta_tgetb: // TMP0 = index
630 | efdcfsi TMP0, TMP0
631 | decode_RB8 RB, INS
632 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
633 | add CARG2, BASE, RB
634 | evstdd TMP0, 0(CARG3)
635 | b >1
636 |
637 |->vmeta_tgetv:
638 | decode_RB8 RB, INS
639 | decode_RC8 RC, INS
640 | add CARG2, BASE, RB
641 | add CARG3, BASE, RC
642 |1:
643 | stw BASE, L->base
644 | mr CARG1, L
645 | stw PC, SAVE_PC
646 | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
647 | // Returns TValue * (finished) or NULL (metamethod).
648 | cmplwi CRET1, 0
649 | beq >3
650 | evldd TMP0, 0(CRET1)
651 | evstddx TMP0, BASE, RA
652 | ins_next
653 |
654 |3: // Call __index metamethod.
655 | // BASE = base, L->top = new base, stack = cont/func/t/k
656 | subfic TMP1, BASE, FRAME_CONT
657 | lwz BASE, L->top
658 | stw PC, -16(BASE) // [cont|PC]
659 | add PC, TMP1, BASE
660 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
661 | li NARGS8:RC, 16 // 2 args for func(t, k).
662 | b ->vm_call_dispatch_f
663 |
664 |//-----------------------------------------------------------------------
665 |
666 |->vmeta_tsets1:
667 | evmergelo STR:RC, TISSTR, STR:RC
668 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
669 | decode_RB8 RB, INS
670 | evstdd STR:RC, 0(CARG3)
671 | add CARG2, BASE, RB
672 | b >1
673 |
674 |->vmeta_tsets:
675 | evmergelo TAB:RB, TISTAB, TAB:RB
676 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
677 | evmergelo STR:RC, TISSTR, STR:RC
678 | evstdd TAB:RB, 0(CARG2)
679 | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
680 | evstdd STR:RC, 0(CARG3)
681 | b >1
682 |
683 |->vmeta_tsetb: // TMP0 = index
684 | efdcfsi TMP0, TMP0
685 | decode_RB8 RB, INS
686 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
687 | add CARG2, BASE, RB
688 | evstdd TMP0, 0(CARG3)
689 | b >1
690 |
691 |->vmeta_tsetv:
692 | decode_RB8 RB, INS
693 | decode_RC8 RC, INS
694 | add CARG2, BASE, RB
695 | add CARG3, BASE, RC
696 |1:
697 | stw BASE, L->base
698 | mr CARG1, L
699 | stw PC, SAVE_PC
700 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
701 | // Returns TValue * (finished) or NULL (metamethod).
702 | cmplwi CRET1, 0
703 | evlddx TMP0, BASE, RA
704 | beq >3
705 | // NOBARRIER: lj_meta_tset ensures the table is not black.
706 | evstdd TMP0, 0(CRET1)
707 | ins_next
708 |
709 |3: // Call __newindex metamethod.
710 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
711 | subfic TMP1, BASE, FRAME_CONT
712 | lwz BASE, L->top
713 | stw PC, -16(BASE) // [cont|PC]
714 | add PC, TMP1, BASE
715 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
716 | li NARGS8:RC, 24 // 3 args for func(t, k, v)
717 | evstdd TMP0, 16(BASE) // Copy value to third argument.
718 | b ->vm_call_dispatch_f
719 |
720 |//-- Comparison metamethods ---------------------------------------------
721 |
722 |->vmeta_comp:
723 | mr CARG1, L
724 | subi PC, PC, 4
725 | add CARG2, BASE, RA
726 | stw PC, SAVE_PC
727 | add CARG3, BASE, RD
728 | stw BASE, L->base
729 | decode_OP1 CARG4, INS
730 | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
731 | // Returns 0/1 or TValue * (metamethod).
732 |3:
733 | cmplwi CRET1, 1
734 | bgt ->vmeta_binop
735 |4:
736 | lwz INS, 0(PC)
737 | addi PC, PC, 4
738 | decode_RD4 TMP2, INS
739 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
740 | add TMP2, TMP2, TMP3
741 | isellt PC, PC, TMP2
742 |->cont_nop:
743 | ins_next
744 |
745 |->cont_ra: // RA = resultptr
746 | lwz INS, -4(PC)
747 | evldd TMP0, 0(RA)
748 | decode_RA8 TMP1, INS
749 | evstddx TMP0, BASE, TMP1
750 | b ->cont_nop
751 |
752 |->cont_condt: // RA = resultptr
753 | lwz TMP0, 0(RA)
754 | li TMP1, LJ_TTRUE
755 | cmplw TMP1, TMP0 // Branch if result is true.
756 | b <4
757 |
758 |->cont_condf: // RA = resultptr
759 | lwz TMP0, 0(RA)
760 | li TMP1, LJ_TFALSE
761 | cmplw TMP0, TMP1 // Branch if result is false.
762 | b <4
763 |
764 |->vmeta_equal:
765 | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
766 | subi PC, PC, 4
767 | stw BASE, L->base
768 | mr CARG1, L
769 | stw PC, SAVE_PC
770 | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
771 | // Returns 0/1 or TValue * (metamethod).
772 | b <3
773 |
774 |//-- Arithmetic metamethods ---------------------------------------------
775 |
776 |->vmeta_arith_vn:
777 | add CARG3, BASE, RB
778 | add CARG4, KBASE, RC
779 | b >1
780 |
781 |->vmeta_arith_nv:
782 | add CARG3, KBASE, RC
783 | add CARG4, BASE, RB
784 | b >1
785 |
786 |->vmeta_unm:
787 | add CARG3, BASE, RD
788 | mr CARG4, CARG3
789 | b >1
790 |
791 |->vmeta_arith_vv:
792 | add CARG3, BASE, RB
793 | add CARG4, BASE, RC
794 |1:
795 | add CARG2, BASE, RA
796 | stw BASE, L->base
797 | mr CARG1, L
798 | stw PC, SAVE_PC
799 | decode_OP1 CARG5, INS // Caveat: CARG5 overlaps INS.
800 | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
801 | // Returns NULL (finished) or TValue * (metamethod).
802 | cmplwi CRET1, 0
803 | beq ->cont_nop
804 |
805 | // Call metamethod for binary op.
806 |->vmeta_binop:
807 | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
808 | sub TMP1, CRET1, BASE
809 | stw PC, -16(CRET1) // [cont|PC]
810 | mr TMP2, BASE
811 | addi PC, TMP1, FRAME_CONT
812 | mr BASE, CRET1
813 | li NARGS8:RC, 16 // 2 args for func(o1, o2).
814 | b ->vm_call_dispatch
815 |
816 |->vmeta_len:
817#if LJ_52
818 | mr SAVE0, CARG1
819#endif
820 | add CARG2, BASE, RD
821 | stw BASE, L->base
822 | mr CARG1, L
823 | stw PC, SAVE_PC
824 | bl extern lj_meta_len // (lua_State *L, TValue *o)
825 | // Returns NULL (retry) or TValue * (metamethod base).
826#if LJ_52
827 | cmplwi CRET1, 0
828 | bne ->vmeta_binop // Binop call for compatibility.
829 | mr CARG1, SAVE0
830 | b ->BC_LEN_Z
831#else
832 | b ->vmeta_binop // Binop call for compatibility.
833#endif
834 |
835 |//-- Call metamethod ----------------------------------------------------
836 |
837 |->vmeta_call: // Resolve and call __call metamethod.
838 | // TMP2 = old base, BASE = new base, RC = nargs*8
839 | mr CARG1, L
840 | stw TMP2, L->base // This is the callers base!
841 | subi CARG2, BASE, 8
842 | stw PC, SAVE_PC
843 | add CARG3, BASE, RC
844 | mr SAVE0, NARGS8:RC
845 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
846 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
847 | addi NARGS8:RC, SAVE0, 8 // Got one more argument now.
848 | ins_call
849 |
850 |->vmeta_callt: // Resolve __call for BC_CALLT.
851 | // BASE = old base, RA = new base, RC = nargs*8
852 | mr CARG1, L
853 | stw BASE, L->base
854 | subi CARG2, RA, 8
855 | stw PC, SAVE_PC
856 | add CARG3, RA, RC
857 | mr SAVE0, NARGS8:RC
858 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
859 | lwz TMP1, FRAME_PC(BASE)
860 | addi NARGS8:RC, SAVE0, 8 // Got one more argument now.
861 | lwz LFUNC:RB, FRAME_FUNC(RA) // Guaranteed to be a function here.
862 | b ->BC_CALLT_Z
863 |
864 |//-- Argument coercion for 'for' statement ------------------------------
865 |
866 |->vmeta_for:
867 | mr CARG1, L
868 | stw BASE, L->base
869 | mr CARG2, RA
870 | stw PC, SAVE_PC
871 | mr SAVE0, INS
872 | bl extern lj_meta_for // (lua_State *L, TValue *base)
873 |.if JIT
874 | decode_OP1 TMP0, SAVE0
875 |.endif
876 | decode_RA8 RA, SAVE0
877 |.if JIT
878 | cmpwi TMP0, BC_JFORI
879 |.endif
880 | decode_RD8 RD, SAVE0
881 |.if JIT
882 | beq =>BC_JFORI
883 |.endif
884 | b =>BC_FORI
885 |
886 |//-----------------------------------------------------------------------
887 |//-- Fast functions -----------------------------------------------------
888 |//-----------------------------------------------------------------------
889 |
890 |.macro .ffunc, name
891 |->ff_ .. name:
892 |.endmacro
893 |
894 |.macro .ffunc_1, name
895 |->ff_ .. name:
896 | cmplwi NARGS8:RC, 8
897 | evldd CARG1, 0(BASE)
898 | blt ->fff_fallback
899 |.endmacro
900 |
901 |.macro .ffunc_2, name
902 |->ff_ .. name:
903 | cmplwi NARGS8:RC, 16
904 | evldd CARG1, 0(BASE)
905 | evldd CARG2, 8(BASE)
906 | blt ->fff_fallback
907 |.endmacro
908 |
909 |.macro .ffunc_n, name
910 | .ffunc_1 name
911 | checknum CARG1
912 | checkfail ->fff_fallback
913 |.endmacro
914 |
915 |.macro .ffunc_nn, name
916 | .ffunc_2 name
917 | evmergehi TMP0, CARG1, CARG2
918 | checknum TMP0
919 | checkanyfail ->fff_fallback
920 |.endmacro
921 |
922 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
923 |.macro ffgccheck
924 | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH)
925 | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
926 | cmplw TMP0, TMP1
927 | bgel ->fff_gcstep
928 |.endmacro
929 |
930 |//-- Base library: checks -----------------------------------------------
931 |
932 |.ffunc assert
933 | cmplwi NARGS8:RC, 8
934 | evldd TMP0, 0(BASE)
935 | blt ->fff_fallback
936 | evaddw TMP1, TISNIL, TISNIL // Synthesize LJ_TFALSE.
937 | la RA, -8(BASE)
938 | evcmpltu cr1, TMP0, TMP1
939 | lwz PC, FRAME_PC(BASE)
940 | bge cr1, ->fff_fallback
941 | evstdd TMP0, 0(RA)
942 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
943 | beq ->fff_res // Done if exactly 1 argument.
944 | li TMP1, 8
945 | subi RC, RC, 8
946 |1:
947 | cmplw TMP1, RC
948 | evlddx TMP0, BASE, TMP1
949 | evstddx TMP0, RA, TMP1
950 | addi TMP1, TMP1, 8
951 | bne <1
952 | b ->fff_res
953 |
954 |.ffunc type
955 | cmplwi NARGS8:RC, 8
956 | lwz CARG1, 0(BASE)
957 | blt ->fff_fallback
958 | li TMP2, ~LJ_TNUMX
959 | cmplw CARG1, TISNUM
960 | not TMP1, CARG1
961 | isellt TMP1, TMP2, TMP1
962 | slwi TMP1, TMP1, 3
963 | la TMP2, CFUNC:RB->upvalue
964 | evlddx STR:CRET1, TMP2, TMP1
965 | b ->fff_restv
966 |
967 |//-- Base library: getters and setters ---------------------------------
968 |
969 |.ffunc_1 getmetatable
970 | checktab CARG1
971 | evmergehi TMP1, CARG1, CARG1
972 | checkfail >6
973 |1: // Field metatable must be at same offset for GCtab and GCudata!
974 | lwz TAB:RB, TAB:CARG1->metatable
975 |2:
976 | evmr CRET1, TISNIL
977 | cmplwi TAB:RB, 0
978 | lwz STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
979 | beq ->fff_restv
980 | lwz TMP0, TAB:RB->hmask
981 | evmergelo CRET1, TISTAB, TAB:RB // Use metatable as default result.
982 | lwz TMP1, STR:RC->hash
983 | lwz NODE:TMP2, TAB:RB->node
984 | evmergelo STR:RC, TISSTR, STR:RC
985 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
986 | slwi TMP0, TMP1, 5
987 | slwi TMP1, TMP1, 3
988 | sub TMP1, TMP0, TMP1
989 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
990 |3: // Rearranged logic, because we expect _not_ to find the key.
991 | evldd TMP0, NODE:TMP2->key
992 | evldd TMP1, NODE:TMP2->val
993 | evcmpeq TMP0, STR:RC
994 | lwz NODE:TMP2, NODE:TMP2->next
995 | checkallok >5
996 | cmplwi NODE:TMP2, 0
997 | beq ->fff_restv // Not found, keep default result.
998 | b <3
999 |5:
1000 | checknil TMP1
1001 | checkok ->fff_restv // Ditto for nil value.
1002 | evmr CRET1, TMP1 // Return value of mt.__metatable.
1003 | b ->fff_restv
1004 |
1005 |6:
1006 | cmpwi TMP1, LJ_TUDATA
1007 | not TMP1, TMP1
1008 | beq <1
1009 | checknum CARG1
1010 | slwi TMP1, TMP1, 2
1011 | li TMP2, 4*~LJ_TNUMX
1012 | isellt TMP1, TMP2, TMP1
1013 | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH)
1014 | lwzx TAB:RB, TMP2, TMP1
1015 | b <2
1016 |
1017 |.ffunc_2 setmetatable
1018 | // Fast path: no mt for table yet and not clearing the mt.
1019 | evmergehi TMP0, TAB:CARG1, TAB:CARG2
1020 | checktab TMP0
1021 | checkanyfail ->fff_fallback
1022 | lwz TAB:TMP1, TAB:CARG1->metatable
1023 | cmplwi TAB:TMP1, 0
1024 | lbz TMP3, TAB:CARG1->marked
1025 | bne ->fff_fallback
1026 | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
1027 | stw TAB:CARG2, TAB:CARG1->metatable
1028 | beq ->fff_restv
1029 | barrierback TAB:CARG1, TMP3, TMP0
1030 | b ->fff_restv
1031 |
1032 |.ffunc rawget
1033 | cmplwi NARGS8:RC, 16
1034 | evldd CARG2, 0(BASE)
1035 | blt ->fff_fallback
1036 | checktab CARG2
1037 | la CARG3, 8(BASE)
1038 | checkfail ->fff_fallback
1039 | mr CARG1, L
1040 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1041 | // Returns cTValue *.
1042 | evldd CRET1, 0(CRET1)
1043 | b ->fff_restv
1044 |
1045 |//-- Base library: conversions ------------------------------------------
1046 |
1047 |.ffunc tonumber
1048 | // Only handles the number case inline (without a base argument).
1049 | cmplwi NARGS8:RC, 8
1050 | evldd CARG1, 0(BASE)
1051 | bne ->fff_fallback // Exactly one argument.
1052 | checknum CARG1
1053 | checkok ->fff_restv
1054 | b ->fff_fallback
1055 |
1056 |.ffunc_1 tostring
1057 | // Only handles the string or number case inline.
1058 | checkstr CARG1
1059 | // A __tostring method in the string base metatable is ignored.
1060 | checkok ->fff_restv // String key?
1061 | // Handle numbers inline, unless a number base metatable is present.
1062 | lwz TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
1063 | checknum CARG1
1064 | cmplwi cr1, TMP0, 0
1065 | stw BASE, L->base // Add frame since C call can throw.
1066 | crand 4*cr0+eq, 4*cr0+lt, 4*cr1+eq
1067 | stw PC, SAVE_PC // Redundant (but a defined value).
1068 | bne ->fff_fallback
1069 | ffgccheck
1070 | mr CARG1, L
1071 | mr CARG2, BASE
1072 | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np)
1073 | // Returns GCstr *.
1074 | evmergelo STR:CRET1, TISSTR, STR:CRET1
1075 | b ->fff_restv
1076 |
1077 |//-- Base library: iterators -------------------------------------------
1078 |
1079 |.ffunc next
1080 | cmplwi NARGS8:RC, 8
1081 | evldd CARG2, 0(BASE)
1082 | blt ->fff_fallback
1083 | evstddx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil.
1084 | checktab TAB:CARG2
1085 | lwz PC, FRAME_PC(BASE)
1086 | checkfail ->fff_fallback
1087 | stw BASE, L->base // Add frame since C call can throw.
1088 | mr CARG1, L
1089 | stw BASE, L->top // Dummy frame length is ok.
1090 | la CARG3, 8(BASE)
1091 | stw PC, SAVE_PC
1092 | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1093 | // Returns 0 at end of traversal.
1094 | cmplwi CRET1, 0
1095 | evmr CRET1, TISNIL
1096 | beq ->fff_restv // End of traversal: return nil.
1097 | evldd TMP0, 8(BASE) // Copy key and value to results.
1098 | la RA, -8(BASE)
1099 | evldd TMP1, 16(BASE)
1100 | evstdd TMP0, 0(RA)
1101 | li RD, (2+1)*8
1102 | evstdd TMP1, 8(RA)
1103 | b ->fff_res
1104 |
1105 |.ffunc_1 pairs
1106 | checktab TAB:CARG1
1107 | lwz PC, FRAME_PC(BASE)
1108 | checkfail ->fff_fallback
1109#if LJ_52
1110 | lwz TAB:TMP2, TAB:CARG1->metatable
1111 | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
1112 | cmplwi TAB:TMP2, 0
1113 | la RA, -8(BASE)
1114 | bne ->fff_fallback
1115#else
1116 | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
1117 | la RA, -8(BASE)
1118#endif
1119 | evstdd TISNIL, 8(BASE)
1120 | li RD, (3+1)*8
1121 | evstdd CFUNC:TMP0, 0(RA)
1122 | b ->fff_res
1123 |
1124 |.ffunc_2 ipairs_aux
1125 | checktab TAB:CARG1
1126 | lwz PC, FRAME_PC(BASE)
1127 | checkfail ->fff_fallback
1128 | checknum CARG2
1129 | lus TMP3, 0x3ff0
1130 | checkfail ->fff_fallback
1131 | efdctsi TMP2, CARG2
1132 | lwz TMP0, TAB:CARG1->asize
1133 | evmergelo TMP3, TMP3, ZERO
1134 | lwz TMP1, TAB:CARG1->array
1135 | efdadd CARG2, CARG2, TMP3
1136 | addi TMP2, TMP2, 1
1137 | la RA, -8(BASE)
1138 | cmplw TMP0, TMP2
1139 | slwi TMP3, TMP2, 3
1140 | evstdd CARG2, 0(RA)
1141 | ble >2 // Not in array part?
1142 | evlddx TMP1, TMP1, TMP3
1143 |1:
1144 | checknil TMP1
1145 | li RD, (0+1)*8
1146 | checkok ->fff_res // End of iteration, return 0 results.
1147 | li RD, (2+1)*8
1148 | evstdd TMP1, 8(RA)
1149 | b ->fff_res
1150 |2: // Check for empty hash part first. Otherwise call C function.
1151 | lwz TMP0, TAB:CARG1->hmask
1152 | cmplwi TMP0, 0
1153 | li RD, (0+1)*8
1154 | beq ->fff_res
1155 | mr CARG2, TMP2
1156 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
1157 | // Returns cTValue * or NULL.
1158 | cmplwi CRET1, 0
1159 | li RD, (0+1)*8
1160 | beq ->fff_res
1161 | evldd TMP1, 0(CRET1)
1162 | b <1
1163 |
1164 |.ffunc_1 ipairs
1165 | checktab TAB:CARG1
1166 | lwz PC, FRAME_PC(BASE)
1167 | checkfail ->fff_fallback
1168#if LJ_52
1169 | lwz TAB:TMP2, TAB:CARG1->metatable
1170 | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
1171 | cmplwi TAB:TMP2, 0
1172 | la RA, -8(BASE)
1173 | bne ->fff_fallback
1174#else
1175 | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
1176 | la RA, -8(BASE)
1177#endif
1178 | evsplati TMP1, 0
1179 | li RD, (3+1)*8
1180 | evstdd TMP1, 8(BASE)
1181 | evstdd CFUNC:TMP0, 0(RA)
1182 | b ->fff_res
1183 |
1184 |//-- Base library: catch errors ----------------------------------------
1185 |
1186 |.ffunc pcall
1187 | lwz TMP1, L->maxstack
1188 | add TMP2, BASE, NARGS8:RC
1189 | cmplwi NARGS8:RC, 8
1190 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
1191 | cmplw cr1, TMP1, TMP2
1192 | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
1193 | blt ->fff_fallback
1194 | mr TMP2, BASE
1195 | la BASE, 8(BASE)
1196 | // Remember active hook before pcall.
1197 | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31
1198 | subi NARGS8:RC, NARGS8:RC, 8
1199 | addi PC, TMP3, 8+FRAME_PCALL
1200 | b ->vm_call_dispatch
1201 |
1202 |.ffunc_2 xpcall
1203 | lwz TMP1, L->maxstack
1204 | add TMP2, BASE, NARGS8:RC
1205 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
1206 | mr TMP2, BASE
1207 | cmplw TMP1, TMP2
1208 | blt ->fff_fallback
1209 | checkfunc CARG2 // Traceback must be a function.
1210 | checkfail ->fff_fallback
1211 | la BASE, 16(BASE)
1212 | // Remember active hook before pcall.
1213 | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31
1214 | evstdd CARG2, 0(TMP2) // Swap function and traceback.
1215 | subi NARGS8:RC, NARGS8:RC, 16
1216 | evstdd CARG1, 8(TMP2)
1217 | addi PC, TMP3, 16+FRAME_PCALL
1218 | b ->vm_call_dispatch
1219 |
1220 |//-- Coroutine library --------------------------------------------------
1221 |
1222 |.macro coroutine_resume_wrap, resume
1223 |.if resume
1224 |.ffunc_1 coroutine_resume
1225 | evmergehi TMP0, L:CARG1, L:CARG1
1226 |.else
1227 |.ffunc coroutine_wrap_aux
1228 | lwz L:CARG1, CFUNC:RB->upvalue[0].gcr
1229 |.endif
1230 |.if resume
1231 | cmpwi TMP0, LJ_TTHREAD
1232 | bne ->fff_fallback
1233 |.endif
1234 | lbz TMP0, L:CARG1->status
1235 | lwz TMP1, L:CARG1->cframe
1236 | lwz CARG2, L:CARG1->top
1237 | cmplwi cr0, TMP0, LUA_YIELD
1238 | lwz TMP2, L:CARG1->base
1239 | cmplwi cr1, TMP1, 0
1240 | lwz TMP0, L:CARG1->maxstack
1241 | cmplw cr7, CARG2, TMP2
1242 | lwz PC, FRAME_PC(BASE)
1243 | crorc 4*cr6+lt, 4*cr0+gt, 4*cr1+eq // st>LUA_YIELD || cframe!=0
1244 | add TMP2, CARG2, NARGS8:RC
1245 | crandc 4*cr6+gt, 4*cr7+eq, 4*cr0+eq // base==top && st!=LUA_YIELD
1246 | cmplw cr1, TMP2, TMP0
1247 | cror 4*cr6+lt, 4*cr6+lt, 4*cr6+gt
1248 | stw PC, SAVE_PC
1249 | cror 4*cr6+lt, 4*cr6+lt, 4*cr1+gt // cond1 || cond2 || stackov
1250 | stw BASE, L->base
1251 | blt cr6, ->fff_fallback
1252 |1:
1253 |.if resume
1254 | addi BASE, BASE, 8 // Keep resumed thread in stack for GC.
1255 | subi NARGS8:RC, NARGS8:RC, 8
1256 | subi TMP2, TMP2, 8
1257 |.endif
1258 | stw TMP2, L:CARG1->top
1259 | li TMP1, 0
1260 | stw BASE, L->top
1261 |2: // Move args to coroutine.
1262 | cmpw TMP1, NARGS8:RC
1263 | evlddx TMP0, BASE, TMP1
1264 | beq >3
1265 | evstddx TMP0, CARG2, TMP1
1266 | addi TMP1, TMP1, 8
1267 | b <2
1268 |3:
1269 | li CARG3, 0
1270 | mr L:SAVE0, L:CARG1
1271 | li CARG4, 0
1272 | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1273 | // Returns thread status.
1274 |4:
1275 | lwz TMP2, L:SAVE0->base
1276 | cmplwi CRET1, LUA_YIELD
1277 | lwz TMP3, L:SAVE0->top
1278 | li_vmstate INTERP
1279 | lwz BASE, L->base
1280 | st_vmstate
1281 | bgt >8
1282 | sub RD, TMP3, TMP2
1283 | lwz TMP0, L->maxstack
1284 | cmplwi RD, 0
1285 | add TMP1, BASE, RD
1286 | beq >6 // No results?
1287 | cmplw TMP1, TMP0
1288 | li TMP1, 0
1289 | bgt >9 // Need to grow stack?
1290 |
1291 | subi TMP3, RD, 8
1292 | stw TMP2, L:SAVE0->top // Clear coroutine stack.
1293 |5: // Move results from coroutine.
1294 | cmplw TMP1, TMP3
1295 | evlddx TMP0, TMP2, TMP1
1296 | evstddx TMP0, BASE, TMP1
1297 | addi TMP1, TMP1, 8
1298 | bne <5
1299 |6:
1300 | andi. TMP0, PC, FRAME_TYPE
1301 |.if resume
1302 | li TMP1, LJ_TTRUE
1303 | la RA, -8(BASE)
1304 | stw TMP1, -8(BASE) // Prepend true to results.
1305 | addi RD, RD, 16
1306 |.else
1307 | mr RA, BASE
1308 | addi RD, RD, 8
1309 |.endif
1310 |7:
1311 | stw PC, SAVE_PC
1312 | mr MULTRES, RD
1313 | beq ->BC_RET_Z
1314 | b ->vm_return
1315 |
1316 |8: // Coroutine returned with error (at co->top-1).
1317 |.if resume
1318 | andi. TMP0, PC, FRAME_TYPE
1319 | la TMP3, -8(TMP3)
1320 | li TMP1, LJ_TFALSE
1321 | evldd TMP0, 0(TMP3)
1322 | stw TMP3, L:SAVE0->top // Remove error from coroutine stack.
1323 | li RD, (2+1)*8
1324 | stw TMP1, -8(BASE) // Prepend false to results.
1325 | la RA, -8(BASE)
1326 | evstdd TMP0, 0(BASE) // Copy error message.
1327 | b <7
1328 |.else
1329 | mr CARG1, L
1330 | mr CARG2, L:SAVE0
1331 | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1332 |.endif
1333 |
1334 |9: // Handle stack expansion on return from yield.
1335 | mr CARG1, L
1336 | srwi CARG2, RD, 3
1337 | bl extern lj_state_growstack // (lua_State *L, int n)
1338 | li CRET1, 0
1339 | b <4
1340 |.endmacro
1341 |
1342 | coroutine_resume_wrap 1 // coroutine.resume
1343 | coroutine_resume_wrap 0 // coroutine.wrap
1344 |
1345 |.ffunc coroutine_yield
1346 | lwz TMP0, L->cframe
1347 | add TMP1, BASE, NARGS8:RC
1348 | stw BASE, L->base
1349 | andi. TMP0, TMP0, CFRAME_RESUME
1350 | stw TMP1, L->top
1351 | li CRET1, LUA_YIELD
1352 | beq ->fff_fallback
1353 | stw ZERO, L->cframe
1354 | stb CRET1, L->status
1355 | b ->vm_leave_unw
1356 |
1357 |//-- Math library -------------------------------------------------------
1358 |
1359 |.ffunc_n math_abs
1360 | efdabs CRET1, CARG1
1361 | // Fallthrough.
1362 |
1363 |->fff_restv:
1364 | // CRET1 = TValue result.
1365 | lwz PC, FRAME_PC(BASE)
1366 | la RA, -8(BASE)
1367 | evstdd CRET1, 0(RA)
1368 |->fff_res1:
1369 | // RA = results, PC = return.
1370 | li RD, (1+1)*8
1371 |->fff_res:
1372 | // RA = results, RD = (nresults+1)*8, PC = return.
1373 | andi. TMP0, PC, FRAME_TYPE
1374 | mr MULTRES, RD
1375 | bne ->vm_return
1376 | lwz INS, -4(PC)
1377 | decode_RB8 RB, INS
1378 |5:
1379 | cmplw RB, RD // More results expected?
1380 | decode_RA8 TMP0, INS
1381 | bgt >6
1382 | ins_next1
1383 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1384 | sub BASE, RA, TMP0
1385 | ins_next2
1386 |
1387 |6: // Fill up results with nil.
1388 | subi TMP1, RD, 8
1389 | addi RD, RD, 8
1390 | evstddx TISNIL, RA, TMP1
1391 | b <5
1392 |
1393 |.macro math_extern, func
1394 | .ffunc math_ .. func
1395 | cmplwi NARGS8:RC, 8
1396 | evldd CARG2, 0(BASE)
1397 | blt ->fff_fallback
1398 | checknum CARG2
1399 | evmergehi CARG1, CARG2, CARG2
1400 | checkfail ->fff_fallback
1401 | bl extern func@plt
1402 | evmergelo CRET1, CRET1, CRET2
1403 | b ->fff_restv
1404 |.endmacro
1405 |
1406 |.macro math_extern2, func
1407 | .ffunc math_ .. func
1408 | cmplwi NARGS8:RC, 16
1409 | evldd CARG2, 0(BASE)
1410 | evldd CARG4, 8(BASE)
1411 | blt ->fff_fallback
1412 | evmergehi CARG1, CARG4, CARG2
1413 | checknum CARG1
1414 | evmergehi CARG3, CARG4, CARG4
1415 | checkanyfail ->fff_fallback
1416 | bl extern func@plt
1417 | evmergelo CRET1, CRET1, CRET2
1418 | b ->fff_restv
1419 |.endmacro
1420 |
1421 |.macro math_round, func
1422 | .ffunc math_ .. func
1423 | cmplwi NARGS8:RC, 8
1424 | evldd CARG2, 0(BASE)
1425 | blt ->fff_fallback
1426 | checknum CARG2
1427 | evmergehi CARG1, CARG2, CARG2
1428 | checkfail ->fff_fallback
1429 | lwz PC, FRAME_PC(BASE)
1430 | bl ->vm_..func.._hilo;
1431 | la RA, -8(BASE)
1432 | evstdd CRET2, 0(RA)
1433 | b ->fff_res1
1434 |.endmacro
1435 |
1436 | math_round floor
1437 | math_round ceil
1438 |
1439 | math_extern sqrt
1440 |
1441 |.ffunc math_log
1442 | cmplwi NARGS8:RC, 8
1443 | evldd CARG2, 0(BASE)
1444 | bne ->fff_fallback // Need exactly 1 argument.
1445 | checknum CARG2
1446 | evmergehi CARG1, CARG2, CARG2
1447 | checkfail ->fff_fallback
1448 | bl extern log@plt
1449 | evmergelo CRET1, CRET1, CRET2
1450 | b ->fff_restv
1451 |
1452 | math_extern log10
1453 | math_extern exp
1454 | math_extern sin
1455 | math_extern cos
1456 | math_extern tan
1457 | math_extern asin
1458 | math_extern acos
1459 | math_extern atan
1460 | math_extern sinh
1461 | math_extern cosh
1462 | math_extern tanh
1463 | math_extern2 pow
1464 | math_extern2 atan2
1465 | math_extern2 fmod
1466 |
1467 |->ff_math_deg:
1468 |.ffunc_n math_rad
1469 | evldd CARG2, CFUNC:RB->upvalue[0]
1470 | efdmul CRET1, CARG1, CARG2
1471 | b ->fff_restv
1472 |
1473 |.ffunc math_ldexp
1474 | cmplwi NARGS8:RC, 16
1475 | evldd CARG2, 0(BASE)
1476 | evldd CARG4, 8(BASE)
1477 | blt ->fff_fallback
1478 | evmergehi CARG1, CARG4, CARG2
1479 | checknum CARG1
1480 | checkanyfail ->fff_fallback
1481 | efdctsi CARG3, CARG4
1482 | bl extern ldexp@plt
1483 | evmergelo CRET1, CRET1, CRET2
1484 | b ->fff_restv
1485 |
1486 |.ffunc math_frexp
1487 | cmplwi NARGS8:RC, 8
1488 | evldd CARG2, 0(BASE)
1489 | blt ->fff_fallback
1490 | checknum CARG2
1491 | evmergehi CARG1, CARG2, CARG2
1492 | checkfail ->fff_fallback
1493 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
1494 | lwz PC, FRAME_PC(BASE)
1495 | bl extern frexp@plt
1496 | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH)
1497 | evmergelo CRET1, CRET1, CRET2
1498 | efdcfsi CRET2, TMP1
1499 | la RA, -8(BASE)
1500 | evstdd CRET1, 0(RA)
1501 | li RD, (2+1)*8
1502 | evstdd CRET2, 8(RA)
1503 | b ->fff_res
1504 |
1505 |.ffunc math_modf
1506 | cmplwi NARGS8:RC, 8
1507 | evldd CARG2, 0(BASE)
1508 | blt ->fff_fallback
1509 | checknum CARG2
1510 | evmergehi CARG1, CARG2, CARG2
1511 | checkfail ->fff_fallback
1512 | la CARG3, -8(BASE)
1513 | lwz PC, FRAME_PC(BASE)
1514 | bl extern modf@plt
1515 | evmergelo CRET1, CRET1, CRET2
1516 | la RA, -8(BASE)
1517 | evstdd CRET1, 0(BASE)
1518 | li RD, (2+1)*8
1519 | b ->fff_res
1520 |
1521 |.macro math_minmax, name, cmpop
1522 | .ffunc_1 name
1523 | checknum CARG1
1524 | li TMP1, 8
1525 | checkfail ->fff_fallback
1526 |1:
1527 | evlddx CARG2, BASE, TMP1
1528 | cmplw cr1, TMP1, NARGS8:RC
1529 | checknum CARG2
1530 | bge cr1, ->fff_restv // Ok, since CRET1 = CARG1.
1531 | checkfail ->fff_fallback
1532 | cmpop CARG2, CARG1
1533 | addi TMP1, TMP1, 8
1534 | crmove 4*cr0+lt, 4*cr0+gt
1535 | evsel CARG1, CARG2, CARG1
1536 | b <1
1537 |.endmacro
1538 |
1539 | math_minmax math_min, efdtstlt
1540 | math_minmax math_max, efdtstgt
1541 |
1542 |//-- String library -----------------------------------------------------
1543 |
1544 |.ffunc_1 string_len
1545 | checkstr STR:CARG1
1546 | checkfail ->fff_fallback
1547 | lwz TMP0, STR:CARG1->len
1548 | efdcfsi CRET1, TMP0
1549 | b ->fff_restv
1550 |
1551 |.ffunc string_byte // Only handle the 1-arg case here.
1552 | cmplwi NARGS8:RC, 8
1553 | evldd STR:CARG1, 0(BASE)
1554 | bne ->fff_fallback // Need exactly 1 argument.
1555 | checkstr STR:CARG1
1556 | la RA, -8(BASE)
1557 | checkfail ->fff_fallback
1558 | lwz TMP0, STR:CARG1->len
1559 | li RD, (0+1)*8
1560 | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1561 | li TMP2, (1+1)*8
1562 | cmplwi TMP0, 0
1563 | lwz PC, FRAME_PC(BASE)
1564 | efdcfsi CRET1, TMP1
1565 | iseleq RD, RD, TMP2
1566 | evstdd CRET1, 0(RA)
1567 | b ->fff_res
1568 |
1569 |.ffunc string_char // Only handle the 1-arg case here.
1570 | ffgccheck
1571 | cmplwi NARGS8:RC, 8
1572 | evldd CARG1, 0(BASE)
1573 | bne ->fff_fallback // Exactly 1 argument.
1574 | checknum CARG1
1575 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
1576 | checkfail ->fff_fallback
1577 | efdctsiz TMP0, CARG1
1578 | li CARG3, 1
1579 | cmplwi TMP0, 255
1580 | stb TMP0, 0(CARG2)
1581 | bgt ->fff_fallback
1582 |->fff_newstr:
1583 | mr CARG1, L
1584 | stw BASE, L->base
1585 | stw PC, SAVE_PC
1586 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
1587 | // Returns GCstr *.
1588 | lwz BASE, L->base
1589 | evmergelo STR:CRET1, TISSTR, STR:CRET1
1590 | b ->fff_restv
1591 |
1592 |.ffunc string_sub
1593 | ffgccheck
1594 | cmplwi NARGS8:RC, 16
1595 | evldd CARG3, 16(BASE)
1596 | evldd STR:CARG1, 0(BASE)
1597 | blt ->fff_fallback
1598 | evldd CARG2, 8(BASE)
1599 | li TMP2, -1
1600 | beq >1
1601 | checknum CARG3
1602 | checkfail ->fff_fallback
1603 | efdctsiz TMP2, CARG3
1604 |1:
1605 | checknum CARG2
1606 | checkfail ->fff_fallback
1607 | checkstr STR:CARG1
1608 | efdctsiz TMP1, CARG2
1609 | checkfail ->fff_fallback
1610 | lwz TMP0, STR:CARG1->len
1611 | cmplw TMP0, TMP2 // len < end? (unsigned compare)
1612 | add TMP3, TMP2, TMP0
1613 | blt >5
1614 |2:
1615 | cmpwi TMP1, 0 // start <= 0?
1616 | add TMP3, TMP1, TMP0
1617 | ble >7
1618 |3:
1619 | sub. CARG3, TMP2, TMP1
1620 | addi CARG2, STR:CARG1, #STR-1
1621 | addi CARG3, CARG3, 1
1622 | add CARG2, CARG2, TMP1
1623 | isellt CARG3, r0, CARG3
1624 | b ->fff_newstr
1625 |
1626 |5: // Negative end or overflow.
1627 | cmpw TMP0, TMP2
1628 | addi TMP3, TMP3, 1
1629 | iselgt TMP2, TMP3, TMP0 // end = end > len ? len : end+len+1
1630 | b <2
1631 |
1632 |7: // Negative start or underflow.
1633 | cmpwi cr1, TMP3, 0
1634 | iseleq TMP1, r0, TMP3
1635 | isel TMP1, r0, TMP1, 4*cr1+lt
1636 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0)
1637 | b <3
1638 |
1639 |.ffunc string_rep // Only handle the 1-char case inline.
1640 | ffgccheck
1641 | cmplwi NARGS8:RC, 16
1642 | evldd CARG1, 0(BASE)
1643 | evldd CARG2, 8(BASE)
1644 | bne ->fff_fallback // Exactly 2 arguments.
1645 | checknum CARG2
1646 | checkfail ->fff_fallback
1647 | checkstr STR:CARG1
1648 | efdctsiz CARG3, CARG2
1649 | checkfail ->fff_fallback
1650 | lwz TMP0, STR:CARG1->len
1651 | cmpwi CARG3, 0
1652 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1653 | ble >2 // Count <= 0? (or non-int)
1654 | cmplwi TMP0, 1
1655 | subi TMP2, CARG3, 1
1656 | blt >2 // Zero length string?
1657 | cmplw cr1, TMP1, CARG3
1658 | bne ->fff_fallback // Fallback for > 1-char strings.
1659 | lbz TMP0, STR:CARG1[1]
1660 | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1661 | blt cr1, ->fff_fallback
1662 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
1663 | cmplwi TMP2, 0
1664 | stbx TMP0, CARG2, TMP2
1665 | subi TMP2, TMP2, 1
1666 | bne <1
1667 | b ->fff_newstr
1668 |2: // Return empty string.
1669 | la STR:CRET1, DISPATCH_GL(strempty)(DISPATCH)
1670 | evmergelo CRET1, TISSTR, STR:CRET1
1671 | b ->fff_restv
1672 |
1673 |.ffunc string_reverse
1674 | ffgccheck
1675 | cmplwi NARGS8:RC, 8
1676 | evldd CARG1, 0(BASE)
1677 | blt ->fff_fallback
1678 | checkstr STR:CARG1
1679 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1680 | checkfail ->fff_fallback
1681 | lwz CARG3, STR:CARG1->len
1682 | la CARG1, #STR(STR:CARG1)
1683 | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1684 | li TMP2, 0
1685 | cmplw TMP1, CARG3
1686 | subi TMP3, CARG3, 1
1687 | blt ->fff_fallback
1688 |1: // Reverse string copy.
1689 | cmpwi TMP3, 0
1690 | lbzx TMP1, CARG1, TMP2
1691 | blt ->fff_newstr
1692 | stbx TMP1, CARG2, TMP3
1693 | subi TMP3, TMP3, 1
1694 | addi TMP2, TMP2, 1
1695 | b <1
1696 |
1697 |.macro ffstring_case, name, lo
1698 | .ffunc name
1699 | ffgccheck
1700 | cmplwi NARGS8:RC, 8
1701 | evldd CARG1, 0(BASE)
1702 | blt ->fff_fallback
1703 | checkstr STR:CARG1
1704 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1705 | checkfail ->fff_fallback
1706 | lwz CARG3, STR:CARG1->len
1707 | la CARG1, #STR(STR:CARG1)
1708 | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1709 | cmplw TMP1, CARG3
1710 | li TMP2, 0
1711 | blt ->fff_fallback
1712 |1: // ASCII case conversion.
1713 | cmplw TMP2, CARG3
1714 | lbzx TMP1, CARG1, TMP2
1715 | bge ->fff_newstr
1716 | subi TMP0, TMP1, lo
1717 | xori TMP3, TMP1, 0x20
1718 | cmplwi TMP0, 26
1719 | isellt TMP1, TMP3, TMP1
1720 | stbx TMP1, CARG2, TMP2
1721 | addi TMP2, TMP2, 1
1722 | b <1
1723 |.endmacro
1724 |
1725 |ffstring_case string_lower, 65
1726 |ffstring_case string_upper, 97
1727 |
1728 |//-- Table library ------------------------------------------------------
1729 |
1730 |.ffunc_1 table_getn
1731 | checktab CARG1
1732 | checkfail ->fff_fallback
1733 | bl extern lj_tab_len // (GCtab *t)
1734 | // Returns uint32_t (but less than 2^31).
1735 | efdcfsi CRET1, CRET1
1736 | b ->fff_restv
1737 |
1738 |//-- Bit library --------------------------------------------------------
1739 |
1740 |.macro .ffunc_bit, name
1741 | .ffunc_n bit_..name
1742 | efdadd CARG1, CARG1, TOBIT
1743 |.endmacro
1744 |
1745 |.ffunc_bit tobit
1746 |->fff_resbit:
1747 | efdcfsi CRET1, CARG1
1748 | b ->fff_restv
1749 |
1750 |.macro .ffunc_bit_op, name, ins
1751 | .ffunc_bit name
1752 | li TMP1, 8
1753 |1:
1754 | evlddx CARG2, BASE, TMP1
1755 | cmplw cr1, TMP1, NARGS8:RC
1756 | checknum CARG2
1757 | bge cr1, ->fff_resbit
1758 | checkfail ->fff_fallback
1759 | efdadd CARG2, CARG2, TOBIT
1760 | ins CARG1, CARG1, CARG2
1761 | addi TMP1, TMP1, 8
1762 | b <1
1763 |.endmacro
1764 |
1765 |.ffunc_bit_op band, and
1766 |.ffunc_bit_op bor, or
1767 |.ffunc_bit_op bxor, xor
1768 |
1769 |.ffunc_bit bswap
1770 | rotlwi TMP0, CARG1, 8
1771 | rlwimi TMP0, CARG1, 24, 0, 7
1772 | rlwimi TMP0, CARG1, 24, 16, 23
1773 | efdcfsi CRET1, TMP0
1774 | b ->fff_restv
1775 |
1776 |.ffunc_bit bnot
1777 | not TMP0, CARG1
1778 | efdcfsi CRET1, TMP0
1779 | b ->fff_restv
1780 |
1781 |.macro .ffunc_bit_sh, name, ins, shmod
1782 | .ffunc_nn bit_..name
1783 | efdadd CARG2, CARG2, TOBIT
1784 | efdadd CARG1, CARG1, TOBIT
1785 |.if shmod == 1
1786 | rlwinm CARG2, CARG2, 0, 27, 31
1787 |.elif shmod == 2
1788 | neg CARG2, CARG2
1789 |.endif
1790 | ins TMP0, CARG1, CARG2
1791 | efdcfsi CRET1, TMP0
1792 | b ->fff_restv
1793 |.endmacro
1794 |
1795 |.ffunc_bit_sh lshift, slw, 1
1796 |.ffunc_bit_sh rshift, srw, 1
1797 |.ffunc_bit_sh arshift, sraw, 1
1798 |.ffunc_bit_sh rol, rotlw, 0
1799 |.ffunc_bit_sh ror, rotlw, 2
1800 |
1801 |//-----------------------------------------------------------------------
1802 |
1803 |->fff_fallback: // Call fast function fallback handler.
1804 | // BASE = new base, RB = CFUNC, RC = nargs*8
1805 | lwz TMP3, CFUNC:RB->f
1806 | add TMP1, BASE, NARGS8:RC
1807 | lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC.
1808 | addi TMP0, TMP1, 8*LUA_MINSTACK
1809 | lwz TMP2, L->maxstack
1810 | stw PC, SAVE_PC // Redundant (but a defined value).
1811 | cmplw TMP0, TMP2
1812 | stw BASE, L->base
1813 | stw TMP1, L->top
1814 | mr CARG1, L
1815 | bgt >5 // Need to grow stack.
1816 | mtctr TMP3
1817 | bctrl // (lua_State *L)
1818 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
1819 | lwz BASE, L->base
1820 | cmpwi CRET1, 0
1821 | slwi RD, CRET1, 3
1822 | la RA, -8(BASE)
1823 | bgt ->fff_res // Returned nresults+1?
1824 |1: // Returned 0 or -1: retry fast path.
1825 | lwz TMP0, L->top
1826 | lwz LFUNC:RB, FRAME_FUNC(BASE)
1827 | sub NARGS8:RC, TMP0, BASE
1828 | bne ->vm_call_tail // Returned -1?
1829 | ins_callt // Returned 0: retry fast path.
1830 |
1831 |// Reconstruct previous base for vmeta_call during tailcall.
1832 |->vm_call_tail:
1833 | andi. TMP0, PC, FRAME_TYPE
1834 | rlwinm TMP1, PC, 0, 0, 28
1835 | bne >3
1836 | lwz INS, -4(PC)
1837 | decode_RA8 TMP1, INS
1838 | addi TMP1, TMP1, 8
1839 |3:
1840 | sub TMP2, BASE, TMP1
1841 | b ->vm_call_dispatch // Resolve again for tailcall.
1842 |
1843 |5: // Grow stack for fallback handler.
1844 | li CARG2, LUA_MINSTACK
1845 | bl extern lj_state_growstack // (lua_State *L, int n)
1846 | lwz BASE, L->base
1847 | cmpw TMP0, TMP0 // Set 4*cr0+eq to force retry.
1848 | b <1
1849 |
1850 |->fff_gcstep: // Call GC step function.
1851 | // BASE = new base, RC = nargs*8
1852 | mflr SAVE0
1853 | stw BASE, L->base
1854 | add TMP0, BASE, NARGS8:RC
1855 | stw PC, SAVE_PC // Redundant (but a defined value).
1856 | stw TMP0, L->top
1857 | mr CARG1, L
1858 | bl extern lj_gc_step // (lua_State *L)
1859 | lwz BASE, L->base
1860 | mtlr SAVE0
1861 | lwz TMP0, L->top
1862 | sub NARGS8:RC, TMP0, BASE
1863 | lwz CFUNC:RB, FRAME_FUNC(BASE)
1864 | blr
1865 |
1866 |//-----------------------------------------------------------------------
1867 |//-- Special dispatch targets -------------------------------------------
1868 |//-----------------------------------------------------------------------
1869 |
1870 |->vm_record: // Dispatch target for recording phase.
1871 |.if JIT
1872 | NYI
1873 |.endif
1874 |
1875 |->vm_rethook: // Dispatch target for return hooks.
1876 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
1877 | andi. TMP0, TMP3, HOOK_ACTIVE // Hook already active?
1878 | beq >1
1879 |5: // Re-dispatch to static ins.
1880 | addi TMP1, TMP1, GG_DISP2STATIC // Assumes decode_OP4 TMP1, INS.
1881 | lwzx TMP0, DISPATCH, TMP1
1882 | mtctr TMP0
1883 | bctr
1884 |
1885 |->vm_inshook: // Dispatch target for instr/line hooks.
1886 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
1887 | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH)
1888 | andi. TMP0, TMP3, HOOK_ACTIVE // Hook already active?
1889 | rlwinm TMP0, TMP3, 31-LUA_HOOKLINE, 31, 0
1890 | bne <5
1891 |
1892 | cmpwi cr1, TMP0, 0
1893 | addic. TMP2, TMP2, -1
1894 | beq cr1, <5
1895 | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
1896 | beq >1
1897 | bge cr1, <5
1898 |1:
1899 | mr CARG1, L
1900 | stw MULTRES, SAVE_MULTRES
1901 | mr CARG2, PC
1902 | stw BASE, L->base
1903 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
1904 | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
1905 |3:
1906 | lwz BASE, L->base
1907 |4: // Re-dispatch to static ins.
1908 | lwz INS, -4(PC)
1909 | decode_OP4 TMP1, INS
1910 | decode_RB8 RB, INS
1911 | addi TMP1, TMP1, GG_DISP2STATIC
1912 | decode_RD8 RD, INS
1913 | lwzx TMP0, DISPATCH, TMP1
1914 | decode_RA8 RA, INS
1915 | decode_RC8 RC, INS
1916 | mtctr TMP0
1917 | bctr
1918 |
1919 |->cont_hook: // Continue from hook yield.
1920 | addi PC, PC, 4
1921 | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins.
1922 | b <4
1923 |
1924 |->vm_hotloop: // Hot loop counter underflow.
1925 |.if JIT
1926 | NYI
1927 |.endif
1928 |
1929 |->vm_callhook: // Dispatch target for call hooks.
1930 | mr CARG2, PC
1931 |.if JIT
1932 | b >1
1933 |.endif
1934 |
1935 |->vm_hotcall: // Hot call counter underflow.
1936 |.if JIT
1937 | ori CARG2, PC, 1
1938 |1:
1939 |.endif
1940 | add TMP0, BASE, RC
1941 | stw PC, SAVE_PC
1942 | mr CARG1, L
1943 | stw BASE, L->base
1944 | sub RA, RA, BASE
1945 | stw TMP0, L->top
1946 | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
1947 | // Returns ASMFunction.
1948 | lwz BASE, L->base
1949 | lwz TMP0, L->top
1950 | stw ZERO, SAVE_PC // Invalidate for subsequent line hook.
1951 | sub NARGS8:RC, TMP0, BASE
1952 | add RA, BASE, RA
1953 | lwz LFUNC:RB, FRAME_FUNC(BASE)
1954 | mtctr CRET1
1955 | bctr
1956 |
1957 |//-----------------------------------------------------------------------
1958 |//-- Trace exit handler -------------------------------------------------
1959 |//-----------------------------------------------------------------------
1960 |
1961 |->vm_exit_handler:
1962 |.if JIT
1963 | NYI
1964 |.endif
1965 |->vm_exit_interp:
1966 |.if JIT
1967 | NYI
1968 |.endif
1969 |
1970 |//-----------------------------------------------------------------------
1971 |//-- Math helper functions ----------------------------------------------
1972 |//-----------------------------------------------------------------------
1973 |
1974 |// FP value rounding. Called by math.floor/math.ceil fast functions
1975 |// and from JIT code.
1976 |//
1977 |// This can be inlined if the CPU has the frin/friz/frip/frim instructions.
1978 |// The alternative hard-float approaches have a deep dependency chain.
1979 |// The resulting latency is at least 3x-7x the double-precision FP latency
1980 |// (e500v2: 6cy, e600: 5cy, Cell: 10cy) or around 20-70 cycles.
1981 |//
1982 |// The soft-float approach is tedious, but much faster (e500v2: ~11cy/~6cy).
1983 |// However it relies on a fast way to transfer the FP value to GPRs
1984 |// (e500v2: 0cy for lo-word, 1cy for hi-word).
1985 |//
1986 |.macro vm_round, name, mode
1987 | // Used temporaries: TMP0, TMP1, TMP2, TMP3.
1988 |->name.._efd: // Input: CARG2, output: CRET2
1989 | evmergehi CARG1, CARG2, CARG2
1990 |->name.._hilo:
1991 | // Input: CARG1 (hi), CARG2 (hi, lo), output: CRET2
1992 | rlwinm TMP2, CARG1, 12, 21, 31
1993 | addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023
1994 | li TMP1, -1
1995 | cmplwi cr1, TMP2, 51 // 0 <= exp <= 51?
1996 | subfic TMP0, TMP2, 52
1997 | bgt cr1, >1
1998 | lus TMP3, 0xfff0
1999 | slw TMP0, TMP1, TMP0 // lomask = -1 << (52-exp)
2000 | sraw TMP1, TMP3, TMP2 // himask = (int32_t)0xfff00000 >> exp
2001 |.if mode == 2 // trunc(x):
2002 | evmergelo TMP0, TMP1, TMP0
2003 | evand CRET2, CARG2, TMP0 // hi &= himask, lo &= lomask
2004 |.else
2005 | andc TMP2, CARG2, TMP0
2006 | andc TMP3, CARG1, TMP1
2007 | or TMP2, TMP2, TMP3 // ztest = (hi&~himask) | (lo&~lomask)
2008 | srawi TMP3, CARG1, 31 // signmask = (int32_t)hi >> 31
2009 |.if mode == 0 // floor(x):
2010 | and. TMP2, TMP2, TMP3 // iszero = ((ztest & signmask) == 0)
2011 |.else // ceil(x):
2012 | andc. TMP2, TMP2, TMP3 // iszero = ((ztest & ~signmask) == 0)
2013 |.endif
2014 | and CARG2, CARG2, TMP0 // lo &= lomask
2015 | and CARG1, CARG1, TMP1 // hi &= himask
2016 | subc TMP0, CARG2, TMP0
2017 | iseleq TMP0, CARG2, TMP0 // lo = iszero ? lo : lo-lomask
2018 | sube TMP1, CARG1, TMP1
2019 | iseleq TMP1, CARG1, TMP1 // hi = iszero ? hi : hi-himask+carry
2020 | evmergelo CRET2, TMP1, TMP0
2021 |.endif
2022 | blr
2023 |1:
2024 | bgtlr // Already done if >=2^52, +-inf or nan.
2025 |.if mode == 2 // trunc(x):
2026 | rlwinm TMP1, CARG1, 0, 0, 0 // hi = sign(x)
2027 | li TMP0, 0
2028 | evmergelo CRET2, TMP1, TMP0
2029 |.else
2030 | rlwinm TMP2, CARG1, 0, 1, 31
2031 | srawi TMP0, CARG1, 31 // signmask = (int32_t)hi >> 31
2032 | or TMP2, TMP2, CARG2 // ztest = abs(hi) | lo
2033 | lus TMP1, 0x3ff0
2034 |.if mode == 0 // floor(x):
2035 | and. TMP2, TMP2, TMP0 // iszero = ((ztest & signmask) == 0)
2036 |.else // ceil(x):
2037 | andc. TMP2, TMP2, TMP0 // iszero = ((ztest & ~signmask) == 0)
2038 |.endif
2039 | li TMP0, 0
2040 | iseleq TMP1, r0, TMP1
2041 | rlwimi CARG1, TMP1, 0, 1, 31 // hi = sign(x) | (iszero ? 0.0 : 1.0)
2042 | evmergelo CRET2, CARG1, TMP0
2043 |.endif
2044 | blr
2045 |.endmacro
2046 |
2047 |->vm_floor:
2048 | mflr CARG3
2049 | evmergelo CARG2, CARG1, CARG2
2050 | bl ->vm_floor_hilo
2051 | mtlr CARG3
2052 | evmergehi CRET1, CRET2, CRET2
2053 | blr
2054 |
2055 | vm_round vm_floor, 0
2056 | vm_round vm_ceil, 1
2057 |.if JIT
2058 | vm_round vm_trunc, 2
2059 |.else
2060 |->vm_trunc_efd:
2061 |->vm_trunc_hilo:
2062 |.endif
2063 |
2064 |//-----------------------------------------------------------------------
2065 |//-- Miscellaneous functions --------------------------------------------
2066 |//-----------------------------------------------------------------------
2067 |
2068 |//-----------------------------------------------------------------------
2069 |//-- FFI helper functions -----------------------------------------------
2070 |//-----------------------------------------------------------------------
2071 |
2072 |->vm_ffi_call:
2073 |.if FFI
2074 | NYI
2075 |.endif
2076 |
2077 |//-----------------------------------------------------------------------
2078}
2079
2080/* Generate the code for a single instruction. */
2081static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2082{
2083 int vk = 0;
2084 |=>defop:
2085
2086 switch (op) {
2087
2088 /* -- Comparison ops ---------------------------------------------------- */
2089
2090 /* Remember: all ops branch for a true comparison, fall through otherwise. */
2091
2092 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2093 | // RA = src1*8, RD = src2*8, JMP with RD = target
2094 | evlddx TMP0, BASE, RA
2095 | addi PC, PC, 4
2096 | evlddx TMP1, BASE, RD
2097 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
2098 | lwz TMP2, -4(PC)
2099 | evmergehi RB, TMP0, TMP1
2100 | decode_RD4 TMP2, TMP2
2101 | checknum RB
2102 | add TMP2, TMP2, TMP3
2103 | checkanyfail ->vmeta_comp
2104 | efdcmplt TMP0, TMP1
2105 if (op == BC_ISLE || op == BC_ISGT) {
2106 | efdcmpeq cr1, TMP0, TMP1
2107 | cror 4*cr0+gt, 4*cr0+gt, 4*cr1+gt
2108 }
2109 if (op == BC_ISLT || op == BC_ISLE) {
2110 | iselgt PC, TMP2, PC
2111 } else {
2112 | iselgt PC, PC, TMP2
2113 }
2114 | ins_next
2115 break;
2116
2117 case BC_ISEQV: case BC_ISNEV:
2118 vk = op == BC_ISEQV;
2119 | // RA = src1*8, RD = src2*8, JMP with RD = target
2120 | evlddx CARG2, BASE, RA
2121 | addi PC, PC, 4
2122 | evlddx CARG3, BASE, RD
2123 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
2124 | lwz TMP2, -4(PC)
2125 | evmergehi RB, CARG2, CARG3
2126 | decode_RD4 TMP2, TMP2
2127 | checknum RB
2128 | add TMP2, TMP2, TMP3
2129 | checkanyfail >5
2130 | efdcmpeq CARG2, CARG3
2131 if (vk) {
2132 | iselgt PC, TMP2, PC
2133 } else {
2134 | iselgt PC, PC, TMP2
2135 }
2136 |1:
2137 | ins_next
2138 |
2139 |5: // Either or both types are not numbers.
2140 | evcmpeq CARG2, CARG3
2141 | not TMP3, RB
2142 | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive?
2143 | crorc 4*cr7+lt, 4*cr0+so, 4*cr0+lt // 1: Same tv or different type.
2144 | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata?
2145 | crandc 4*cr7+gt, 4*cr0+lt, 4*cr1+gt // 2: Same type and primitive.
2146 | mr SAVE0, PC
2147 if (vk) {
2148 | isel PC, TMP2, PC, 4*cr7+gt
2149 } else {
2150 | isel TMP2, PC, TMP2, 4*cr7+gt
2151 }
2152 | cror 4*cr7+lt, 4*cr7+lt, 4*cr7+gt // 1 or 2.
2153 if (vk) {
2154 | isel PC, TMP2, PC, 4*cr0+so
2155 } else {
2156 | isel PC, PC, TMP2, 4*cr0+so
2157 }
2158 | blt cr7, <1 // Done if 1 or 2.
2159 | blt cr6, <1 // Done if not tab/ud.
2160 |
2161 | // Different tables or userdatas. Need to check __eq metamethod.
2162 | // Field metatable must be at same offset for GCtab and GCudata!
2163 | lwz TAB:TMP2, TAB:CARG2->metatable
2164 | li CARG4, 1-vk // ne = 0 or 1.
2165 | cmplwi TAB:TMP2, 0
2166 | beq <1 // No metatable?
2167 | lbz TMP2, TAB:TMP2->nomm
2168 | andi. TMP2, TMP2, 1<<MM_eq
2169 | bne <1 // Or 'no __eq' flag set?
2170 | mr PC, SAVE0 // Restore old PC.
2171 | b ->vmeta_equal // Handle __eq metamethod.
2172 break;
2173
2174 case BC_ISEQS: case BC_ISNES:
2175 vk = op == BC_ISEQS;
2176 | // RA = src*8, RD = str_const*8 (~), JMP with RD = target
2177 | evlddx TMP0, BASE, RA
2178 | srwi RD, RD, 1
2179 | lwz INS, 0(PC)
2180 | subfic RD, RD, -4
2181 | addi PC, PC, 4
2182 | lwzx STR:TMP1, KBASE, RD // KBASE-4-str_const*4
2183 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
2184 | decode_RD4 TMP2, INS
2185 | evmergelo STR:TMP1, TISSTR, STR:TMP1
2186 | add TMP2, TMP2, TMP3
2187 | evcmpeq TMP0, STR:TMP1
2188 if (vk) {
2189 | isel PC, TMP2, PC, 4*cr0+so
2190 } else {
2191 | isel PC, PC, TMP2, 4*cr0+so
2192 }
2193 | ins_next
2194 break;
2195
2196 case BC_ISEQN: case BC_ISNEN:
2197 vk = op == BC_ISEQN;
2198 | // RA = src*8, RD = num_const*8, JMP with RD = target
2199 | evlddx TMP0, BASE, RA
2200 | addi PC, PC, 4
2201 | evlddx TMP1, KBASE, RD
2202 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
2203 | lwz INS, -4(PC)
2204 | checknum TMP0
2205 | checkfail >5
2206 | efdcmpeq TMP0, TMP1
2207 |1:
2208 | decode_RD4 TMP2, INS
2209 | add TMP2, TMP2, TMP3
2210 if (vk) {
2211 | iselgt PC, TMP2, PC
2212 |5:
2213 } else {
2214 | iselgt PC, PC, TMP2
2215 }
2216 |3:
2217 | ins_next
2218 if (!vk) {
2219 |5:
2220 | decode_RD4 TMP2, INS
2221 | add PC, TMP2, TMP3
2222 | b <3
2223 }
2224 break;
2225
2226 case BC_ISEQP: case BC_ISNEP:
2227 vk = op == BC_ISEQP;
2228 | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
2229 | lwzx TMP0, BASE, RA
2230 | srwi TMP1, RD, 3
2231 | lwz INS, 0(PC)
2232 | addi PC, PC, 4
2233 | not TMP1, TMP1
2234 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
2235 | cmplw TMP0, TMP1
2236 | decode_RD4 TMP2, INS
2237 | add TMP2, TMP2, TMP3
2238 if (vk) {
2239 | iseleq PC, TMP2, PC
2240 } else {
2241 | iseleq PC, PC, TMP2
2242 }
2243 | ins_next
2244 break;
2245
2246 /* -- Unary test and copy ops ------------------------------------------- */
2247
2248 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
2249 | // RA = dst*8 or unused, RD = src*8, JMP with RD = target
2250 | evlddx TMP0, BASE, RD
2251 | evaddw TMP1, TISNIL, TISNIL // Synthesize LJ_TFALSE.
2252 | lwz INS, 0(PC)
2253 | evcmpltu TMP0, TMP1
2254 | addi PC, PC, 4
2255 if (op == BC_IST || op == BC_ISF) {
2256 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
2257 | decode_RD4 TMP2, INS
2258 | add TMP2, TMP2, TMP3
2259 if (op == BC_IST) {
2260 | isellt PC, TMP2, PC
2261 } else {
2262 | isellt PC, PC, TMP2
2263 }
2264 } else {
2265 if (op == BC_ISTC) {
2266 | checkfail >1
2267 } else {
2268 | checkok >1
2269 }
2270 | addis PC, PC, -(BCBIAS_J*4 >> 16)
2271 | decode_RD4 TMP2, INS
2272 | evstddx TMP0, BASE, RA
2273 | add PC, PC, TMP2
2274 |1:
2275 }
2276 | ins_next
2277 break;
2278
2279 /* -- Unary ops --------------------------------------------------------- */
2280
2281 case BC_MOV:
2282 | // RA = dst*8, RD = src*8
2283 | ins_next1
2284 | evlddx TMP0, BASE, RD
2285 | evstddx TMP0, BASE, RA
2286 | ins_next2
2287 break;
2288 case BC_NOT:
2289 | // RA = dst*8, RD = src*8
2290 | ins_next1
2291 | lwzx TMP0, BASE, RD
2292 | subfic TMP1, TMP0, LJ_TTRUE
2293 | adde TMP0, TMP0, TMP1
2294 | stwx TMP0, BASE, RA
2295 | ins_next2
2296 break;
2297 case BC_UNM:
2298 | // RA = dst*8, RD = src*8
2299 | evlddx TMP0, BASE, RD
2300 | checknum TMP0
2301 | checkfail ->vmeta_unm
2302 | efdneg TMP0, TMP0
2303 | ins_next1
2304 | evstddx TMP0, BASE, RA
2305 | ins_next2
2306 break;
2307 case BC_LEN:
2308 | // RA = dst*8, RD = src*8
2309 | evlddx CARG1, BASE, RD
2310 | checkstr CARG1
2311 | checkfail >2
2312 | lwz CRET1, STR:CARG1->len
2313 |1:
2314 | ins_next1
2315 | efdcfsi TMP0, CRET1
2316 | evstddx TMP0, BASE, RA
2317 | ins_next2
2318 |2:
2319 | checktab CARG1
2320 | checkfail ->vmeta_len
2321#if LJ_52
2322 | lwz TAB:TMP2, TAB:CARG1->metatable
2323 | cmplwi TAB:TMP2, 0
2324 | bne >9
2325 |3:
2326#endif
2327 |->BC_LEN_Z:
2328 | bl extern lj_tab_len // (GCtab *t)
2329 | // Returns uint32_t (but less than 2^31).
2330 | b <1
2331#if LJ_52
2332 |9:
2333 | lbz TMP0, TAB:TMP2->nomm
2334 | andi. TMP0, TMP0, 1<<MM_len
2335 | bne <3 // 'no __len' flag set: done.
2336 | b ->vmeta_len
2337#endif
2338 break;
2339
2340 /* -- Binary ops -------------------------------------------------------- */
2341
2342 |.macro ins_arithpre, t0, t1
2343 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
2344 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2345 ||switch (vk) {
2346 ||case 0:
2347 | evlddx t0, BASE, RB
2348 | checknum t0
2349 | evlddx t1, KBASE, RC
2350 | checkfail ->vmeta_arith_vn
2351 || break;
2352 ||case 1:
2353 | evlddx t1, BASE, RB
2354 | checknum t1
2355 | evlddx t0, KBASE, RC
2356 | checkfail ->vmeta_arith_nv
2357 || break;
2358 ||default:
2359 | evlddx t0, BASE, RB
2360 | evlddx t1, BASE, RC
2361 | evmergehi TMP2, t0, t1
2362 | checknum TMP2
2363 | checkanyfail ->vmeta_arith_vv
2364 || break;
2365 ||}
2366 |.endmacro
2367 |
2368 |.macro ins_arith, ins
2369 | ins_arithpre TMP0, TMP1
2370 | ins_next1
2371 | ins TMP0, TMP0, TMP1
2372 | evstddx TMP0, BASE, RA
2373 | ins_next2
2374 |.endmacro
2375
2376 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2377 | ins_arith efdadd
2378 break;
2379 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2380 | ins_arith efdsub
2381 break;
2382 case BC_MULVN: case BC_MULNV: case BC_MULVV:
2383 | ins_arith efdmul
2384 break;
2385 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
2386 | ins_arith efddiv
2387 break;
2388 case BC_MODVN:
2389 | ins_arithpre RD, SAVE0
2390 |->BC_MODVN_Z:
2391 | efddiv CARG2, RD, SAVE0
2392 | bl ->vm_floor_efd // floor(b/c)
2393 | efdmul TMP0, CRET2, SAVE0
2394 | ins_next1
2395 | efdsub TMP0, RD, TMP0 // b - floor(b/c)*c
2396 | evstddx TMP0, BASE, RA
2397 | ins_next2
2398 break;
2399 case BC_MODNV: case BC_MODVV:
2400 | ins_arithpre RD, SAVE0
2401 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
2402 break;
2403 case BC_POW:
2404 | evlddx CARG2, BASE, RB
2405 | evlddx CARG4, BASE, RC
2406 | evmergehi CARG1, CARG4, CARG2
2407 | checknum CARG1
2408 | evmergehi CARG3, CARG4, CARG4
2409 | checkanyfail ->vmeta_arith_vv
2410 | bl extern pow@plt
2411 | evmergelo CRET2, CRET1, CRET2
2412 | evstddx CRET2, BASE, RA
2413 | ins_next
2414 break;
2415
2416 case BC_CAT:
2417 | // RA = dst*8, RB = src_start*8, RC = src_end*8
2418 | sub CARG3, RC, RB
2419 | stw BASE, L->base
2420 | add CARG2, BASE, RC
2421 | mr SAVE0, RB
2422 |->BC_CAT_Z:
2423 | stw PC, SAVE_PC
2424 | mr CARG1, L
2425 | srwi CARG3, CARG3, 3
2426 | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
2427 | // Returns NULL (finished) or TValue * (metamethod).
2428 | cmplwi CRET1, 0
2429 | lwz BASE, L->base
2430 | bne ->vmeta_binop
2431 | evlddx TMP0, BASE, SAVE0 // Copy result from RB to RA.
2432 | evstddx TMP0, BASE, RA
2433 | ins_next
2434 break;
2435
2436 /* -- Constant ops ------------------------------------------------------ */
2437
2438 case BC_KSTR:
2439 | // RA = dst*8, RD = str_const*8 (~)
2440 | ins_next1
2441 | srwi TMP1, RD, 1
2442 | subfic TMP1, TMP1, -4
2443 | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4
2444 | evmergelo TMP0, TISSTR, TMP0
2445 | evstddx TMP0, BASE, RA
2446 | ins_next2
2447 break;
2448 case BC_KCDATA:
2449 |.if FFI
2450 | // RA = dst*8, RD = cdata_const*8 (~)
2451 | ins_next1
2452 | srwi TMP1, RD, 1
2453 | subfic TMP1, TMP1, -4
2454 | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4
2455 | li TMP2, LJ_TCDATA
2456 | evmergelo TMP0, TMP2, TMP0
2457 | evstddx TMP0, BASE, RA
2458 | ins_next2
2459 |.endif
2460 break;
2461 case BC_KSHORT:
2462 | // RA = dst*8, RD = int16_literal*8
2463 | srwi TMP1, RD, 3
2464 | extsh TMP1, TMP1
2465 | ins_next1
2466 | efdcfsi TMP0, TMP1
2467 | evstddx TMP0, BASE, RA
2468 | ins_next2
2469 break;
2470 case BC_KNUM:
2471 | // RA = dst*8, RD = num_const*8
2472 | evlddx TMP0, KBASE, RD
2473 | ins_next1
2474 | evstddx TMP0, BASE, RA
2475 | ins_next2
2476 break;
2477 case BC_KPRI:
2478 | // RA = dst*8, RD = primitive_type*8 (~)
2479 | srwi TMP1, RD, 3
2480 | not TMP0, TMP1
2481 | ins_next1
2482 | stwx TMP0, BASE, RA
2483 | ins_next2
2484 break;
2485 case BC_KNIL:
2486 | // RA = base*8, RD = end*8
2487 | evstddx TISNIL, BASE, RA
2488 | addi RA, RA, 8
2489 |1:
2490 | evstddx TISNIL, BASE, RA
2491 | cmpw RA, RD
2492 | addi RA, RA, 8
2493 | blt <1
2494 | ins_next_
2495 break;
2496
2497 /* -- Upvalue and function ops ------------------------------------------ */
2498
2499 case BC_UGET:
2500 | // RA = dst*8, RD = uvnum*8
2501 | ins_next1
2502 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2503 | srwi RD, RD, 1
2504 | addi RD, RD, offsetof(GCfuncL, uvptr)
2505 | lwzx UPVAL:RB, LFUNC:RB, RD
2506 | lwz TMP1, UPVAL:RB->v
2507 | evldd TMP0, 0(TMP1)
2508 | evstddx TMP0, BASE, RA
2509 | ins_next2
2510 break;
2511 case BC_USETV:
2512 | // RA = uvnum*8, RD = src*8
2513 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2514 | srwi RA, RA, 1
2515 | addi RA, RA, offsetof(GCfuncL, uvptr)
2516 | evlddx TMP1, BASE, RD
2517 | lwzx UPVAL:RB, LFUNC:RB, RA
2518 | lbz TMP3, UPVAL:RB->marked
2519 | lwz CARG2, UPVAL:RB->v
2520 | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
2521 | lbz TMP0, UPVAL:RB->closed
2522 | evmergehi TMP2, TMP1, TMP1
2523 | evstdd TMP1, 0(CARG2)
2524 | cmplwi cr1, TMP0, 0
2525 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
2526 | subi TMP2, TMP2, (LJ_TISNUM+1)
2527 | bne >2 // Upvalue is closed and black?
2528 |1:
2529 | ins_next
2530 |
2531 |2: // Check if new value is collectable.
2532 | cmplwi TMP2, LJ_TISGCV - (LJ_TISNUM+1)
2533 | bge <1 // tvisgcv(v)
2534 | lbz TMP3, GCOBJ:TMP1->gch.marked
2535 | andi. TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
2536 | la CARG1, GG_DISP2G(DISPATCH)
2537 | // Crossed a write barrier. Move the barrier forward.
2538 | beq <1
2539 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
2540 | b <1
2541 break;
2542 case BC_USETS:
2543 | // RA = uvnum*8, RD = str_const*8 (~)
2544 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2545 | srwi TMP1, RD, 1
2546 | srwi RA, RA, 1
2547 | subfic TMP1, TMP1, -4
2548 | addi RA, RA, offsetof(GCfuncL, uvptr)
2549 | lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4
2550 | lwzx UPVAL:RB, LFUNC:RB, RA
2551 | evmergelo STR:TMP1, TISSTR, STR:TMP1
2552 | lbz TMP3, UPVAL:RB->marked
2553 | lwz CARG2, UPVAL:RB->v
2554 | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
2555 | lbz TMP3, STR:TMP1->marked
2556 | lbz TMP2, UPVAL:RB->closed
2557 | evstdd STR:TMP1, 0(CARG2)
2558 | bne >2
2559 |1:
2560 | ins_next
2561 |
2562 |2: // Check if string is white and ensure upvalue is closed.
2563 | andi. TMP3, TMP3, LJ_GC_WHITES // iswhite(str)
2564 | cmplwi cr1, TMP2, 0
2565 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
2566 | la CARG1, GG_DISP2G(DISPATCH)
2567 | // Crossed a write barrier. Move the barrier forward.
2568 | beq <1
2569 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
2570 | b <1
2571 break;
2572 case BC_USETN:
2573 | // RA = uvnum*8, RD = num_const*8
2574 | ins_next1
2575 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2576 | srwi RA, RA, 1
2577 | addi RA, RA, offsetof(GCfuncL, uvptr)
2578 | evlddx TMP0, KBASE, RD
2579 | lwzx UPVAL:RB, LFUNC:RB, RA
2580 | lwz TMP1, UPVAL:RB->v
2581 | evstdd TMP0, 0(TMP1)
2582 | ins_next2
2583 break;
2584 case BC_USETP:
2585 | // RA = uvnum*8, RD = primitive_type*8 (~)
2586 | ins_next1
2587 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2588 | srwi RA, RA, 1
2589 | addi RA, RA, offsetof(GCfuncL, uvptr)
2590 | srwi TMP0, RD, 3
2591 | lwzx UPVAL:RB, LFUNC:RB, RA
2592 | not TMP0, TMP0
2593 | lwz TMP1, UPVAL:RB->v
2594 | stw TMP0, 0(TMP1)
2595 | ins_next2
2596 break;
2597
2598 case BC_UCLO:
2599 | // RA = level*8, RD = target
2600 | lwz TMP1, L->openupval
2601 | branch_RD // Do this first since RD is not saved.
2602 | stw BASE, L->base
2603 | cmplwi TMP1, 0
2604 | mr CARG1, L
2605 | beq >1
2606 | add CARG2, BASE, RA
2607 | bl extern lj_func_closeuv // (lua_State *L, TValue *level)
2608 | lwz BASE, L->base
2609 |1:
2610 | ins_next
2611 break;
2612
2613 case BC_FNEW:
2614 | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype)
2615 | srwi TMP1, RD, 1
2616 | stw BASE, L->base
2617 | subfic TMP1, TMP1, -4
2618 | stw PC, SAVE_PC
2619 | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4
2620 | mr CARG1, L
2621 | lwz CARG3, FRAME_FUNC(BASE)
2622 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
2623 | bl extern lj_func_newL_gc
2624 | // Returns GCfuncL *.
2625 | lwz BASE, L->base
2626 | evmergelo LFUNC:CRET1, TISFUNC, LFUNC:CRET1
2627 | evstddx LFUNC:CRET1, BASE, RA
2628 | ins_next
2629 break;
2630
2631 /* -- Table ops --------------------------------------------------------- */
2632
2633 case BC_TNEW:
2634 case BC_TDUP:
2635 | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~)
2636 | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH)
2637 | mr CARG1, L
2638 | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
2639 | stw BASE, L->base
2640 | cmplw TMP0, TMP1
2641 | stw PC, SAVE_PC
2642 | bge >5
2643 |1:
2644 if (op == BC_TNEW) {
2645 | rlwinm CARG2, RD, 29, 21, 31
2646 | rlwinm CARG3, RD, 18, 27, 31
2647 | cmpwi CARG2, 0x7ff
2648 | li TMP1, 0x801
2649 | iseleq CARG2, TMP1, CARG2
2650 | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
2651 | // Returns Table *.
2652 } else {
2653 | srwi TMP1, RD, 1
2654 | subfic TMP1, TMP1, -4
2655 | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4
2656 | bl extern lj_tab_dup // (lua_State *L, Table *kt)
2657 | // Returns Table *.
2658 }
2659 | lwz BASE, L->base
2660 | evmergelo TAB:CRET1, TISTAB, TAB:CRET1
2661 | evstddx TAB:CRET1, BASE, RA
2662 | ins_next
2663 |5:
2664 | mr SAVE0, RD
2665 | bl extern lj_gc_step_fixtop // (lua_State *L)
2666 | mr RD, SAVE0
2667 | mr CARG1, L
2668 | b <1
2669 break;
2670
2671 case BC_GGET:
2672 | // RA = dst*8, RD = str_const*8 (~)
2673 case BC_GSET:
2674 | // RA = src*8, RD = str_const*8 (~)
2675 | lwz LFUNC:TMP2, FRAME_FUNC(BASE)
2676 | srwi TMP1, RD, 1
2677 | lwz TAB:RB, LFUNC:TMP2->env
2678 | subfic TMP1, TMP1, -4
2679 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
2680 if (op == BC_GGET) {
2681 | b ->BC_TGETS_Z
2682 } else {
2683 | b ->BC_TSETS_Z
2684 }
2685 break;
2686
2687 case BC_TGETV:
2688 | // RA = dst*8, RB = table*8, RC = key*8
2689 | evlddx TAB:RB, BASE, RB
2690 | evlddx RC, BASE, RC
2691 | checktab TAB:RB
2692 | checkfail ->vmeta_tgetv
2693 | checknum RC
2694 | checkfail >5
2695 | // Convert number key to integer
2696 | efdctsi TMP2, RC
2697 | lwz TMP0, TAB:RB->asize
2698 | efdcfsi TMP1, TMP2
2699 | cmplw cr0, TMP0, TMP2
2700 | efdcmpeq cr1, RC, TMP1
2701 | lwz TMP1, TAB:RB->array
2702 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+gt
2703 | slwi TMP2, TMP2, 3
2704 | ble ->vmeta_tgetv // Integer key and in array part?
2705 | evlddx TMP1, TMP1, TMP2
2706 | checknil TMP1
2707 | checkok >2
2708 |1:
2709 | evstddx TMP1, BASE, RA
2710 | ins_next
2711 |
2712 |2: // Check for __index if table value is nil.
2713 | lwz TAB:TMP2, TAB:RB->metatable
2714 | cmplwi TAB:TMP2, 0
2715 | beq <1 // No metatable: done.
2716 | lbz TMP0, TAB:TMP2->nomm
2717 | andi. TMP0, TMP0, 1<<MM_index
2718 | bne <1 // 'no __index' flag set: done.
2719 | b ->vmeta_tgetv
2720 |
2721 |5:
2722 | checkstr STR:RC // String key?
2723 | checkok ->BC_TGETS_Z
2724 | b ->vmeta_tgetv
2725 break;
2726 case BC_TGETS:
2727 | // RA = dst*8, RB = table*8, RC = str_const*8 (~)
2728 | evlddx TAB:RB, BASE, RB
2729 | srwi TMP1, RC, 1
2730 | checktab TAB:RB
2731 | subfic TMP1, TMP1, -4
2732 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
2733 | checkfail ->vmeta_tgets1
2734 |->BC_TGETS_Z:
2735 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
2736 | lwz TMP0, TAB:RB->hmask
2737 | lwz TMP1, STR:RC->hash
2738 | lwz NODE:TMP2, TAB:RB->node
2739 | evmergelo STR:RC, TISSTR, STR:RC
2740 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
2741 | slwi TMP0, TMP1, 5
2742 | slwi TMP1, TMP1, 3
2743 | sub TMP1, TMP0, TMP1
2744 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
2745 |1:
2746 | evldd TMP0, NODE:TMP2->key
2747 | evldd TMP1, NODE:TMP2->val
2748 | evcmpeq TMP0, STR:RC
2749 | checkanyfail >4
2750 | checknil TMP1
2751 | checkok >5 // Key found, but nil value?
2752 |3:
2753 | evstddx TMP1, BASE, RA
2754 | ins_next
2755 |
2756 |4: // Follow hash chain.
2757 | lwz NODE:TMP2, NODE:TMP2->next
2758 | cmplwi NODE:TMP2, 0
2759 | bne <1
2760 | // End of hash chain: key not found, nil result.
2761 | evmr TMP1, TISNIL
2762 |
2763 |5: // Check for __index if table value is nil.
2764 | lwz TAB:TMP2, TAB:RB->metatable
2765 | cmplwi TAB:TMP2, 0
2766 | beq <3 // No metatable: done.
2767 | lbz TMP0, TAB:TMP2->nomm
2768 | andi. TMP0, TMP0, 1<<MM_index
2769 | bne <3 // 'no __index' flag set: done.
2770 | b ->vmeta_tgets
2771 break;
2772 case BC_TGETB:
2773 | // RA = dst*8, RB = table*8, RC = index*8
2774 | evlddx TAB:RB, BASE, RB
2775 | srwi TMP0, RC, 3
2776 | checktab TAB:RB
2777 | checkfail ->vmeta_tgetb
2778 | lwz TMP1, TAB:RB->asize
2779 | lwz TMP2, TAB:RB->array
2780 | cmplw TMP0, TMP1
2781 | bge ->vmeta_tgetb
2782 | evlddx TMP1, TMP2, RC
2783 | checknil TMP1
2784 | checkok >5
2785 |1:
2786 | ins_next1
2787 | evstddx TMP1, BASE, RA
2788 | ins_next2
2789 |
2790 |5: // Check for __index if table value is nil.
2791 | lwz TAB:TMP2, TAB:RB->metatable
2792 | cmplwi TAB:TMP2, 0
2793 | beq <1 // No metatable: done.
2794 | lbz TMP2, TAB:TMP2->nomm
2795 | andi. TMP2, TMP2, 1<<MM_index
2796 | bne <1 // 'no __index' flag set: done.
2797 | b ->vmeta_tgetb // Caveat: preserve TMP0!
2798 break;
2799
2800 case BC_TSETV:
2801 | // RA = src*8, RB = table*8, RC = key*8
2802 | evlddx TAB:RB, BASE, RB
2803 | evlddx RC, BASE, RC
2804 | checktab TAB:RB
2805 | checkfail ->vmeta_tsetv
2806 | checknum RC
2807 | checkfail >5
2808 | // Convert number key to integer
2809 | efdctsi TMP2, RC
2810 | evlddx SAVE0, BASE, RA
2811 | lwz TMP0, TAB:RB->asize
2812 | efdcfsi TMP1, TMP2
2813 | cmplw cr0, TMP0, TMP2
2814 | efdcmpeq cr1, RC, TMP1
2815 | lwz TMP1, TAB:RB->array
2816 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+gt
2817 | slwi TMP0, TMP2, 3
2818 | ble ->vmeta_tsetv // Integer key and in array part?
2819 | lbz TMP3, TAB:RB->marked
2820 | evlddx TMP2, TMP1, TMP0
2821 | checknil TMP2
2822 | checkok >3
2823 |1:
2824 | andi. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
2825 | evstddx SAVE0, TMP1, TMP0
2826 | bne >7
2827 |2:
2828 | ins_next
2829 |
2830 |3: // Check for __newindex if previous value is nil.
2831 | lwz TAB:TMP2, TAB:RB->metatable
2832 | cmplwi TAB:TMP2, 0
2833 | beq <1 // No metatable: done.
2834 | lbz TMP2, TAB:TMP2->nomm
2835 | andi. TMP2, TMP2, 1<<MM_newindex
2836 | bne <1 // 'no __newindex' flag set: done.
2837 | b ->vmeta_tsetv
2838 |
2839 |5:
2840 | checkstr STR:RC // String key?
2841 | checkok ->BC_TSETS_Z
2842 | b ->vmeta_tsetv
2843 |
2844 |7: // Possible table write barrier for the value. Skip valiswhite check.
2845 | barrierback TAB:RB, TMP3, TMP0
2846 | b <2
2847 break;
2848 case BC_TSETS:
2849 | // RA = src*8, RB = table*8, RC = str_const*8 (~)
2850 | evlddx TAB:RB, BASE, RB
2851 | srwi TMP1, RC, 1
2852 | checktab TAB:RB
2853 | subfic TMP1, TMP1, -4
2854 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
2855 | checkfail ->vmeta_tsets1
2856 |->BC_TSETS_Z:
2857 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8
2858 | lwz TMP0, TAB:RB->hmask
2859 | lwz TMP1, STR:RC->hash
2860 | lwz NODE:TMP2, TAB:RB->node
2861 | evmergelo STR:RC, TISSTR, STR:RC
2862 | stb ZERO, TAB:RB->nomm // Clear metamethod cache.
2863 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
2864 | evlddx SAVE0, BASE, RA
2865 | slwi TMP0, TMP1, 5
2866 | slwi TMP1, TMP1, 3
2867 | sub TMP1, TMP0, TMP1
2868 | lbz TMP3, TAB:RB->marked
2869 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
2870 |1:
2871 | evldd TMP0, NODE:TMP2->key
2872 | evldd TMP1, NODE:TMP2->val
2873 | evcmpeq TMP0, STR:RC
2874 | checkanyfail >5
2875 | checknil TMP1
2876 | checkok >4 // Key found, but nil value?
2877 |2:
2878 | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
2879 | evstdd SAVE0, NODE:TMP2->val
2880 | bne >7
2881 |3:
2882 | ins_next
2883 |
2884 |4: // Check for __newindex if previous value is nil.
2885 | lwz TAB:TMP1, TAB:RB->metatable
2886 | cmplwi TAB:TMP1, 0
2887 | beq <2 // No metatable: done.
2888 | lbz TMP0, TAB:TMP1->nomm
2889 | andi. TMP0, TMP0, 1<<MM_newindex
2890 | bne <2 // 'no __newindex' flag set: done.
2891 | b ->vmeta_tsets
2892 |
2893 |5: // Follow hash chain.
2894 | lwz NODE:TMP2, NODE:TMP2->next
2895 | cmplwi NODE:TMP2, 0
2896 | bne <1
2897 | // End of hash chain: key not found, add a new one.
2898 |
2899 | // But check for __newindex first.
2900 | lwz TAB:TMP1, TAB:RB->metatable
2901 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
2902 | stw PC, SAVE_PC
2903 | mr CARG1, L
2904 | cmplwi TAB:TMP1, 0
2905 | stw BASE, L->base
2906 | beq >6 // No metatable: continue.
2907 | lbz TMP0, TAB:TMP1->nomm
2908 | andi. TMP0, TMP0, 1<<MM_newindex
2909 | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check.
2910 |6:
2911 | mr CARG2, TAB:RB
2912 | evstdd STR:RC, 0(CARG3)
2913 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
2914 | // Returns TValue *.
2915 | lwz BASE, L->base
2916 | evstdd SAVE0, 0(CRET1)
2917 | b <3 // No 2nd write barrier needed.
2918 |
2919 |7: // Possible table write barrier for the value. Skip valiswhite check.
2920 | barrierback TAB:RB, TMP3, TMP0
2921 | b <3
2922 break;
2923 case BC_TSETB:
2924 | // RA = src*8, RB = table*8, RC = index*8
2925 | evlddx TAB:RB, BASE, RB
2926 | srwi TMP0, RC, 3
2927 | checktab TAB:RB
2928 | checkfail ->vmeta_tsetb
2929 | lwz TMP1, TAB:RB->asize
2930 | lwz TMP2, TAB:RB->array
2931 | lbz TMP3, TAB:RB->marked
2932 | cmplw TMP0, TMP1
2933 | evlddx SAVE0, BASE, RA
2934 | bge ->vmeta_tsetb
2935 | evlddx TMP1, TMP2, RC
2936 | checknil TMP1
2937 | checkok >5
2938 |1:
2939 | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
2940 | evstddx SAVE0, TMP2, RC
2941 | bne >7
2942 |2:
2943 | ins_next
2944 |
2945 |5: // Check for __newindex if previous value is nil.
2946 | lwz TAB:TMP1, TAB:RB->metatable
2947 | cmplwi TAB:TMP1, 0
2948 | beq <1 // No metatable: done.
2949 | lbz TMP1, TAB:TMP1->nomm
2950 | andi. TMP1, TMP1, 1<<MM_newindex
2951 | bne <1 // 'no __newindex' flag set: done.
2952 | b ->vmeta_tsetb // Caveat: preserve TMP0!
2953 |
2954 |7: // Possible table write barrier for the value. Skip valiswhite check.
2955 | barrierback TAB:RB, TMP3, TMP0
2956 | b <2
2957 break;
2958
2959 case BC_TSETM:
2960 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
2961 | add RA, BASE, RA
2962 |1:
2963 | add TMP3, KBASE, RD
2964 | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table.
2965 | addic. TMP0, MULTRES, -8
2966 | lwz TMP3, 4(TMP3) // Integer constant is in lo-word.
2967 | srwi CARG3, TMP0, 3
2968 | beq >4 // Nothing to copy?
2969 | add CARG3, CARG3, TMP3
2970 | lwz TMP2, TAB:CARG2->asize
2971 | slwi TMP1, TMP3, 3
2972 | lbz TMP3, TAB:CARG2->marked
2973 | cmplw CARG3, TMP2
2974 | add TMP2, RA, TMP0
2975 | lwz TMP0, TAB:CARG2->array
2976 | bgt >5
2977 | add TMP1, TMP1, TMP0
2978 | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
2979 |3: // Copy result slots to table.
2980 | evldd TMP0, 0(RA)
2981 | addi RA, RA, 8
2982 | cmpw cr1, RA, TMP2
2983 | evstdd TMP0, 0(TMP1)
2984 | addi TMP1, TMP1, 8
2985 | blt cr1, <3
2986 | bne >7
2987 |4:
2988 | ins_next
2989 |
2990 |5: // Need to resize array part.
2991 | stw BASE, L->base
2992 | mr CARG1, L
2993 | stw PC, SAVE_PC
2994 | mr SAVE0, RD
2995 | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
2996 | // Must not reallocate the stack.
2997 | mr RD, SAVE0
2998 | b <1
2999 |
3000 |7: // Possible table write barrier for any value. Skip valiswhite check.
3001 | barrierback TAB:CARG2, TMP3, TMP0
3002 | b <4
3003 break;
3004
3005 /* -- Calls and vararg handling ----------------------------------------- */
3006
3007 case BC_CALLM:
3008 | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8
3009 | add NARGS8:RC, NARGS8:RC, MULTRES
3010 | // Fall through. Assumes BC_CALL follows.
3011 break;
3012 case BC_CALL:
3013 | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
3014 | evlddx LFUNC:RB, BASE, RA
3015 | mr TMP2, BASE
3016 | add BASE, BASE, RA
3017 | subi NARGS8:RC, NARGS8:RC, 8
3018 | checkfunc LFUNC:RB
3019 | addi BASE, BASE, 8
3020 | checkfail ->vmeta_call
3021 | ins_call
3022 break;
3023
3024 case BC_CALLMT:
3025 | // RA = base*8, (RB = 0,) RC = extra_nargs*8
3026 | add NARGS8:RC, NARGS8:RC, MULTRES
3027 | // Fall through. Assumes BC_CALLT follows.
3028 break;
3029 case BC_CALLT:
3030 | // RA = base*8, (RB = 0,) RC = (nargs+1)*8
3031 | evlddx LFUNC:RB, BASE, RA
3032 | add RA, BASE, RA
3033 | lwz TMP1, FRAME_PC(BASE)
3034 | subi NARGS8:RC, NARGS8:RC, 8
3035 | checkfunc LFUNC:RB
3036 | addi RA, RA, 8
3037 | checkfail ->vmeta_callt
3038 |->BC_CALLT_Z:
3039 | andi. TMP0, TMP1, FRAME_TYPE // Caveat: preserve cr0 until the crand.
3040 | lbz TMP3, LFUNC:RB->ffid
3041 | xori TMP2, TMP1, FRAME_VARG
3042 | cmplwi cr1, NARGS8:RC, 0
3043 | bne >7
3044 |1:
3045 | stw LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC.
3046 | li TMP2, 0
3047 | cmplwi cr7, TMP3, 1 // (> FF_C) Calling a fast function?
3048 | beq cr1, >3
3049 |2:
3050 | addi TMP3, TMP2, 8
3051 | evlddx TMP0, RA, TMP2
3052 | cmplw cr1, TMP3, NARGS8:RC
3053 | evstddx TMP0, BASE, TMP2
3054 | mr TMP2, TMP3
3055 | bne cr1, <2
3056 |3:
3057 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+gt
3058 | beq >5
3059 |4:
3060 | ins_callt
3061 |
3062 |5: // Tailcall to a fast function with a Lua frame below.
3063 | lwz INS, -4(TMP1)
3064 | decode_RA8 RA, INS
3065 | sub TMP1, BASE, RA
3066 | lwz LFUNC:TMP1, FRAME_FUNC-8(TMP1)
3067 | lwz TMP1, LFUNC:TMP1->pc
3068 | lwz KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE.
3069 | b <4
3070 |
3071 |7: // Tailcall from a vararg function.
3072 | andi. TMP0, TMP2, FRAME_TYPEP
3073 | bne <1 // Vararg frame below?
3074 | sub BASE, BASE, TMP2 // Relocate BASE down.
3075 | lwz TMP1, FRAME_PC(BASE)
3076 | andi. TMP0, TMP1, FRAME_TYPE
3077 | b <1
3078 break;
3079
3080 case BC_ITERC:
3081 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
3082 | subi RA, RA, 24 // evldd doesn't support neg. offsets.
3083 | mr TMP2, BASE
3084 | evlddx LFUNC:RB, BASE, RA
3085 | add BASE, BASE, RA
3086 | evldd TMP0, 8(BASE)
3087 | evldd TMP1, 16(BASE)
3088 | evstdd LFUNC:RB, 24(BASE) // Copy callable.
3089 | checkfunc LFUNC:RB
3090 | evstdd TMP0, 32(BASE) // Copy state.
3091 | li NARGS8:RC, 16 // Iterators get 2 arguments.
3092 | evstdd TMP1, 40(BASE) // Copy control var.
3093 | addi BASE, BASE, 32
3094 | checkfail ->vmeta_call
3095 | ins_call
3096 break;
3097
3098 case BC_ITERN:
3099 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
3100 |.if JIT
3101 | // NYI: add hotloop, record BC_ITERN.
3102 |.endif
3103 | add RA, BASE, RA
3104 | lwz TAB:RB, -12(RA)
3105 | lwz RC, -4(RA) // Get index from control var.
3106 | lwz TMP0, TAB:RB->asize
3107 | lwz TMP1, TAB:RB->array
3108 | addi PC, PC, 4
3109 |1: // Traverse array part.
3110 | cmplw RC, TMP0
3111 | slwi TMP3, RC, 3
3112 | bge >5 // Index points after array part?
3113 | evlddx TMP2, TMP1, TMP3
3114 | checknil TMP2
3115 | lwz INS, -4(PC)
3116 | checkok >4
3117 | efdcfsi TMP0, RC
3118 | addi RC, RC, 1
3119 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
3120 | evstdd TMP2, 8(RA)
3121 | decode_RD4 TMP1, INS
3122 | stw RC, -4(RA) // Update control var.
3123 | add PC, TMP1, TMP3
3124 | evstdd TMP0, 0(RA)
3125 |3:
3126 | ins_next
3127 |
3128 |4: // Skip holes in array part.
3129 | addi RC, RC, 1
3130 | b <1
3131 |
3132 |5: // Traverse hash part.
3133 | lwz TMP1, TAB:RB->hmask
3134 | sub RC, RC, TMP0
3135 | lwz TMP2, TAB:RB->node
3136 |6:
3137 | cmplw RC, TMP1 // End of iteration? Branch to ITERL+1.
3138 | slwi TMP3, RC, 5
3139 | bgt <3
3140 | slwi RB, RC, 3
3141 | sub TMP3, TMP3, RB
3142 | evlddx RB, TMP2, TMP3
3143 | add NODE:TMP3, TMP2, TMP3
3144 | checknil RB
3145 | lwz INS, -4(PC)
3146 | checkok >7
3147 | evldd TMP3, NODE:TMP3->key
3148 | addis TMP2, PC, -(BCBIAS_J*4 >> 16)
3149 | evstdd RB, 8(RA)
3150 | add RC, RC, TMP0
3151 | decode_RD4 TMP1, INS
3152 | evstdd TMP3, 0(RA)
3153 | addi RC, RC, 1
3154 | add PC, TMP1, TMP2
3155 | stw RC, -4(RA) // Update control var.
3156 | b <3
3157 |
3158 |7: // Skip holes in hash part.
3159 | addi RC, RC, 1
3160 | b <6
3161 break;
3162
3163 case BC_ISNEXT:
3164 | // RA = base*8, RD = target (points to ITERN)
3165 | add RA, BASE, RA
3166 | li TMP2, -24
3167 | evlddx CFUNC:TMP1, RA, TMP2
3168 | lwz TMP2, -16(RA)
3169 | lwz TMP3, -8(RA)
3170 | evmergehi TMP0, CFUNC:TMP1, CFUNC:TMP1
3171 | cmpwi cr0, TMP2, LJ_TTAB
3172 | cmpwi cr1, TMP0, LJ_TFUNC
3173 | cmpwi cr6, TMP3, LJ_TNIL
3174 | bne cr1, >5
3175 | lbz TMP1, CFUNC:TMP1->ffid
3176 | crand 4*cr0+eq, 4*cr0+eq, 4*cr6+eq
3177 | cmpwi cr7, TMP1, FF_next_N
3178 | srwi TMP0, RD, 1
3179 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
3180 | add TMP3, PC, TMP0
3181 | bne cr0, >5
3182 | lus TMP1, 0xfffe
3183 | ori TMP1, TMP1, 0x7fff
3184 | stw ZERO, -4(RA) // Initialize control var.
3185 | stw TMP1, -8(RA)
3186 | addis PC, TMP3, -(BCBIAS_J*4 >> 16)
3187 |1:
3188 | ins_next
3189 |5: // Despecialize bytecode if any of the checks fail.
3190 | li TMP0, BC_JMP
3191 | li TMP1, BC_ITERC
3192 | stb TMP0, -1(PC)
3193 | addis PC, TMP3, -(BCBIAS_J*4 >> 16)
3194 | stb TMP1, 3(PC)
3195 | b <1
3196 break;
3197
3198 case BC_VARG:
3199 | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8
3200 | lwz TMP0, FRAME_PC(BASE)
3201 | add RC, BASE, RC
3202 | add RA, BASE, RA
3203 | addi RC, RC, FRAME_VARG
3204 | add TMP2, RA, RB
3205 | subi TMP3, BASE, 8 // TMP3 = vtop
3206 | sub RC, RC, TMP0 // RC = vbase
3207 | // Note: RC may now be even _above_ BASE if nargs was < numparams.
3208 | cmplwi cr1, RB, 0
3209 | sub. TMP1, TMP3, RC
3210 | beq cr1, >5 // Copy all varargs?
3211 | subi TMP2, TMP2, 16
3212 | ble >2 // No vararg slots?
3213 |1: // Copy vararg slots to destination slots.
3214 | evldd TMP0, 0(RC)
3215 | addi RC, RC, 8
3216 | evstdd TMP0, 0(RA)
3217 | cmplw RA, TMP2
3218 | cmplw cr1, RC, TMP3
3219 | bge >3 // All destination slots filled?
3220 | addi RA, RA, 8
3221 | blt cr1, <1 // More vararg slots?
3222 |2: // Fill up remainder with nil.
3223 | evstdd TISNIL, 0(RA)
3224 | cmplw RA, TMP2
3225 | addi RA, RA, 8
3226 | blt <2
3227 |3:
3228 | ins_next
3229 |
3230 |5: // Copy all varargs.
3231 | lwz TMP0, L->maxstack
3232 | li MULTRES, 8 // MULTRES = (0+1)*8
3233 | ble <3 // No vararg slots?
3234 | add TMP2, RA, TMP1
3235 | cmplw TMP2, TMP0
3236 | addi MULTRES, TMP1, 8
3237 | bgt >7
3238 |6:
3239 | evldd TMP0, 0(RC)
3240 | addi RC, RC, 8
3241 | evstdd TMP0, 0(RA)
3242 | cmplw RC, TMP3
3243 | addi RA, RA, 8
3244 | blt <6 // More vararg slots?
3245 | b <3
3246 |
3247 |7: // Grow stack for varargs.
3248 | mr CARG1, L
3249 | stw RA, L->top
3250 | sub SAVE0, RC, BASE // Need delta, because BASE may change.
3251 | stw BASE, L->base
3252 | sub RA, RA, BASE
3253 | stw PC, SAVE_PC
3254 | srwi CARG2, TMP1, 3
3255 | bl extern lj_state_growstack // (lua_State *L, int n)
3256 | lwz BASE, L->base
3257 | add RA, BASE, RA
3258 | add RC, BASE, SAVE0
3259 | subi TMP3, BASE, 8
3260 | b <6
3261 break;
3262
3263 /* -- Returns ----------------------------------------------------------- */
3264
3265 case BC_RETM:
3266 | // RA = results*8, RD = extra_nresults*8
3267 | add RD, RD, MULTRES // MULTRES >= 8, so RD >= 8.
3268 | // Fall through. Assumes BC_RET follows.
3269 break;
3270
3271 case BC_RET:
3272 | // RA = results*8, RD = (nresults+1)*8
3273 | lwz PC, FRAME_PC(BASE)
3274 | add RA, BASE, RA
3275 | mr MULTRES, RD
3276 |1:
3277 | andi. TMP0, PC, FRAME_TYPE
3278 | xori TMP1, PC, FRAME_VARG
3279 | bne ->BC_RETV_Z
3280 |
3281 |->BC_RET_Z:
3282 | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return
3283 | lwz INS, -4(PC)
3284 | cmpwi RD, 8
3285 | subi TMP2, BASE, 8
3286 | subi RC, RD, 8
3287 | decode_RB8 RB, INS
3288 | beq >3
3289 | li TMP1, 0
3290 |2:
3291 | addi TMP3, TMP1, 8
3292 | evlddx TMP0, RA, TMP1
3293 | cmpw TMP3, RC
3294 | evstddx TMP0, TMP2, TMP1
3295 | beq >3
3296 | addi TMP1, TMP3, 8
3297 | evlddx TMP0, RA, TMP3
3298 | cmpw TMP1, RC
3299 | evstddx TMP0, TMP2, TMP3
3300 | bne <2
3301 |3:
3302 |5:
3303 | cmplw RB, RD
3304 | decode_RA8 RA, INS
3305 | bgt >6
3306 | sub BASE, TMP2, RA
3307 | lwz LFUNC:TMP1, FRAME_FUNC(BASE)
3308 | ins_next1
3309 | lwz TMP1, LFUNC:TMP1->pc
3310 | lwz KBASE, PC2PROTO(k)(TMP1)
3311 | ins_next2
3312 |
3313 |6: // Fill up results with nil.
3314 | subi TMP1, RD, 8
3315 | addi RD, RD, 8
3316 | evstddx TISNIL, TMP2, TMP1
3317 | b <5
3318 |
3319 |->BC_RETV_Z: // Non-standard return case.
3320 | andi. TMP2, TMP1, FRAME_TYPEP
3321 | bne ->vm_return
3322 | // Return from vararg function: relocate BASE down.
3323 | sub BASE, BASE, TMP1
3324 | lwz PC, FRAME_PC(BASE)
3325 | b <1
3326 break;
3327
3328 case BC_RET0: case BC_RET1:
3329 | // RA = results*8, RD = (nresults+1)*8
3330 | lwz PC, FRAME_PC(BASE)
3331 | add RA, BASE, RA
3332 | mr MULTRES, RD
3333 | andi. TMP0, PC, FRAME_TYPE
3334 | xori TMP1, PC, FRAME_VARG
3335 | bne ->BC_RETV_Z
3336 |
3337 | lwz INS, -4(PC)
3338 | subi TMP2, BASE, 8
3339 | decode_RB8 RB, INS
3340 if (op == BC_RET1) {
3341 | evldd TMP0, 0(RA)
3342 | evstdd TMP0, 0(TMP2)
3343 }
3344 |5:
3345 | cmplw RB, RD
3346 | decode_RA8 RA, INS
3347 | bgt >6
3348 | sub BASE, TMP2, RA
3349 | lwz LFUNC:TMP1, FRAME_FUNC(BASE)
3350 | ins_next1
3351 | lwz TMP1, LFUNC:TMP1->pc
3352 | lwz KBASE, PC2PROTO(k)(TMP1)
3353 | ins_next2
3354 |
3355 |6: // Fill up results with nil.
3356 | subi TMP1, RD, 8
3357 | addi RD, RD, 8
3358 | evstddx TISNIL, TMP2, TMP1
3359 | b <5
3360 break;
3361
3362 /* -- Loops and branches ------------------------------------------------ */
3363
3364 case BC_FORL:
3365 |.if JIT
3366 | hotloop
3367 |.endif
3368 | // Fall through. Assumes BC_IFORL follows.
3369 break;
3370
3371 case BC_JFORI:
3372 case BC_JFORL:
3373#if !LJ_HASJIT
3374 break;
3375#endif
3376 case BC_FORI:
3377 case BC_IFORL:
3378 | // RA = base*8, RD = target (after end of loop or start of loop)
3379 vk = (op == BC_IFORL || op == BC_JFORL);
3380 | add RA, BASE, RA
3381 | evldd TMP1, FORL_IDX*8(RA)
3382 | evldd TMP3, FORL_STEP*8(RA)
3383 | evldd TMP2, FORL_STOP*8(RA)
3384 if (!vk) {
3385 | evcmpgtu cr0, TMP1, TISNUM
3386 | evcmpgtu cr7, TMP3, TISNUM
3387 | evcmpgtu cr1, TMP2, TISNUM
3388 | cror 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
3389 | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3390 | blt ->vmeta_for
3391 }
3392 if (vk) {
3393 | efdadd TMP1, TMP1, TMP3
3394 | evstdd TMP1, FORL_IDX*8(RA)
3395 }
3396 | evcmpgts TMP3, TISNIL
3397 | evstdd TMP1, FORL_EXT*8(RA)
3398 | bge >2
3399 | efdcmpgt TMP1, TMP2
3400 |1:
3401 if (op != BC_JFORL) {
3402 | srwi RD, RD, 1
3403 | add RD, PC, RD
3404 if (op == BC_JFORI) {
3405 | addis PC, RD, -(BCBIAS_J*4 >> 16)
3406 } else {
3407 | addis RD, RD, -(BCBIAS_J*4 >> 16)
3408 }
3409 }
3410 if (op == BC_FORI) {
3411 | iselgt PC, RD, PC
3412 } else if (op == BC_IFORL) {
3413 | iselgt PC, PC, RD
3414 } else {
3415 | ble =>BC_JLOOP
3416 }
3417 | ins_next
3418 |2:
3419 | efdcmpgt TMP2, TMP1
3420 | b <1
3421 break;
3422
3423 case BC_ITERL:
3424 |.if JIT
3425 | hotloop
3426 |.endif
3427 | // Fall through. Assumes BC_IITERL follows.
3428 break;
3429
3430 case BC_JITERL:
3431#if !LJ_HASJIT
3432 break;
3433#endif
3434 case BC_IITERL:
3435 | // RA = base*8, RD = target
3436 | evlddx TMP1, BASE, RA
3437 | subi RA, RA, 8
3438 | checknil TMP1
3439 | checkok >1 // Stop if iterator returned nil.
3440 if (op == BC_JITERL) {
3441 | NYI
3442 } else {
3443 | branch_RD // Otherwise save control var + branch.
3444 | evstddx TMP1, BASE, RA
3445 }
3446 |1:
3447 | ins_next
3448 break;
3449
3450 case BC_LOOP:
3451 | // RA = base*8, RD = target (loop extent)
3452 | // Note: RA/RD is only used by trace recorder to determine scope/extent
3453 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
3454 |.if JIT
3455 | hotloop
3456 |.endif
3457 | // Fall through. Assumes BC_ILOOP follows.
3458 break;
3459
3460 case BC_ILOOP:
3461 | // RA = base*8, RD = target (loop extent)
3462 | ins_next
3463 break;
3464
3465 case BC_JLOOP:
3466 |.if JIT
3467 | NYI
3468 |.endif
3469 break;
3470
3471 case BC_JMP:
3472 | // RA = base*8 (only used by trace recorder), RD = target
3473 | branch_RD
3474 | ins_next
3475 break;
3476
3477 /* -- Function headers -------------------------------------------------- */
3478
3479 case BC_FUNCF:
3480 |.if JIT
3481 | hotcall
3482 |.endif
3483 case BC_FUNCV: /* NYI: compiled vararg functions. */
3484 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
3485 break;
3486
3487 case BC_JFUNCF:
3488#if !LJ_HASJIT
3489 break;
3490#endif
3491 case BC_IFUNCF:
3492 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
3493 | lwz TMP2, L->maxstack
3494 | lbz TMP1, -4+PC2PROTO(numparams)(PC)
3495 | lwz KBASE, -4+PC2PROTO(k)(PC)
3496 | cmplw RA, TMP2
3497 | slwi TMP1, TMP1, 3
3498 | bgt ->vm_growstack_l
3499 | ins_next1
3500 |2:
3501 | cmplw NARGS8:RC, TMP1 // Check for missing parameters.
3502 | ble >3
3503 if (op == BC_JFUNCF) {
3504 | NYI
3505 } else {
3506 | ins_next2
3507 }
3508 |
3509 |3: // Clear missing parameters.
3510 | evstddx TISNIL, BASE, NARGS8:RC
3511 | addi NARGS8:RC, NARGS8:RC, 8
3512 | b <2
3513 break;
3514
3515 case BC_JFUNCV:
3516#if !LJ_HASJIT
3517 break;
3518#endif
3519 | NYI // NYI: compiled vararg functions
3520 break; /* NYI: compiled vararg functions. */
3521
3522 case BC_IFUNCV:
3523 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
3524 | lwz TMP2, L->maxstack
3525 | add TMP1, BASE, RC
3526 | add TMP0, RA, RC
3527 | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC.
3528 | addi TMP3, RC, 8+FRAME_VARG
3529 | lwz KBASE, -4+PC2PROTO(k)(PC)
3530 | cmplw TMP0, TMP2
3531 | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG.
3532 | bge ->vm_growstack_l
3533 | lbz TMP2, -4+PC2PROTO(numparams)(PC)
3534 | mr RA, BASE
3535 | mr RC, TMP1
3536 | ins_next1
3537 | cmpwi TMP2, 0
3538 | addi BASE, TMP1, 8
3539 | beq >3
3540 |1:
3541 | cmplw RA, RC // Less args than parameters?
3542 | evldd TMP0, 0(RA)
3543 | bge >4
3544 | evstdd TISNIL, 0(RA) // Clear old fixarg slot (help the GC).
3545 | addi RA, RA, 8
3546 |2:
3547 | addic. TMP2, TMP2, -1
3548 | evstdd TMP0, 8(TMP1)
3549 | addi TMP1, TMP1, 8
3550 | bne <1
3551 |3:
3552 | ins_next2
3553 |
3554 |4: // Clear missing parameters.
3555 | evmr TMP0, TISNIL
3556 | b <2
3557 break;
3558
3559 case BC_FUNCC:
3560 case BC_FUNCCW:
3561 | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
3562 if (op == BC_FUNCC) {
3563 | lwz TMP3, CFUNC:RB->f
3564 } else {
3565 | lwz TMP3, DISPATCH_GL(wrapf)(DISPATCH)
3566 }
3567 | add TMP1, RA, NARGS8:RC
3568 | lwz TMP2, L->maxstack
3569 | add RC, BASE, NARGS8:RC
3570 | stw BASE, L->base
3571 | cmplw TMP1, TMP2
3572 | stw RC, L->top
3573 | li_vmstate C
3574 | mtctr TMP3
3575 if (op == BC_FUNCCW) {
3576 | lwz CARG2, CFUNC:RB->f
3577 }
3578 | mr CARG1, L
3579 | bgt ->vm_growstack_c // Need to grow stack.
3580 | st_vmstate
3581 | bctrl // (lua_State *L [, lua_CFunction f])
3582 | // Returns nresults.
3583 | lwz TMP1, L->top
3584 | slwi RD, CRET1, 3
3585 | lwz BASE, L->base
3586 | li_vmstate INTERP
3587 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller.
3588 | sub RA, TMP1, RD // RA = L->top - nresults*8
3589 | st_vmstate
3590 | b ->vm_returnc
3591 break;
3592
3593 /* ---------------------------------------------------------------------- */
3594
3595 default:
3596 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
3597 exit(2);
3598 break;
3599 }
3600}
3601
3602static int build_backend(BuildCtx *ctx)
3603{
3604 int op;
3605
3606 dasm_growpc(Dst, BC__MAX);
3607
3608 build_subroutines(ctx);
3609
3610 |.code_op
3611 for (op = 0; op < BC__MAX; op++)
3612 build_ins(ctx, (BCOp)op, op);
3613
3614 return BC__MAX;
3615}
3616
3617/* Emit pseudo frame-info for all assembler functions. */
3618static void emit_asm_debug(BuildCtx *ctx)
3619{
3620 int i;
3621 switch (ctx->mode) {
3622 case BUILD_elfasm:
3623 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
3624 fprintf(ctx->fp,
3625 ".Lframe0:\n"
3626 "\t.long .LECIE0-.LSCIE0\n"
3627 ".LSCIE0:\n"
3628 "\t.long 0xffffffff\n"
3629 "\t.byte 0x1\n"
3630 "\t.string \"\"\n"
3631 "\t.uleb128 0x1\n"
3632 "\t.sleb128 -4\n"
3633 "\t.byte 65\n"
3634 "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
3635 "\t.align 2\n"
3636 ".LECIE0:\n\n");
3637 fprintf(ctx->fp,
3638 ".LSFDE0:\n"
3639 "\t.long .LEFDE0-.LASFDE0\n"
3640 ".LASFDE0:\n"
3641 "\t.long .Lframe0\n"
3642 "\t.long .Lbegin\n"
3643 "\t.long %d\n"
3644 "\t.byte 0xe\n\t.uleb128 %d\n"
3645 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
3646 "\t.byte 0x5\n\t.uleb128 70\n\t.sleb128 37\n",
3647 (int)ctx->codesz, CFRAME_SIZE);
3648 for (i = 14; i <= 31; i++)
3649 fprintf(ctx->fp,
3650 "\t.byte %d\n\t.uleb128 %d\n"
3651 "\t.byte 5\n\t.uleb128 %d\n\t.uleb128 %d\n",
3652 0x80+i, 1+2*(31-i), 1200+i, 2+2*(31-i));
3653 fprintf(ctx->fp,
3654 "\t.align 2\n"
3655 ".LEFDE0:\n\n");
3656 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
3657 fprintf(ctx->fp,
3658 ".Lframe1:\n"
3659 "\t.long .LECIE1-.LSCIE1\n"
3660 ".LSCIE1:\n"
3661 "\t.long 0\n"
3662 "\t.byte 0x1\n"
3663 "\t.string \"zPR\"\n"
3664 "\t.uleb128 0x1\n"
3665 "\t.sleb128 -4\n"
3666 "\t.byte 65\n"
3667 "\t.uleb128 6\n" /* augmentation length */
3668 "\t.byte 0x1b\n" /* pcrel|sdata4 */
3669 "\t.long lj_err_unwind_dwarf-.\n"
3670 "\t.byte 0x1b\n" /* pcrel|sdata4 */
3671 "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
3672 "\t.align 2\n"
3673 ".LECIE1:\n\n");
3674 fprintf(ctx->fp,
3675 ".LSFDE1:\n"
3676 "\t.long .LEFDE1-.LASFDE1\n"
3677 ".LASFDE1:\n"
3678 "\t.long .LASFDE1-.Lframe1\n"
3679 "\t.long .Lbegin-.\n"
3680 "\t.long %d\n"
3681 "\t.uleb128 0\n" /* augmentation length */
3682 "\t.byte 0xe\n\t.uleb128 %d\n"
3683 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
3684 "\t.byte 0x5\n\t.uleb128 70\n\t.sleb128 37\n",
3685 (int)ctx->codesz, CFRAME_SIZE);
3686 for (i = 14; i <= 31; i++)
3687 fprintf(ctx->fp,
3688 "\t.byte %d\n\t.uleb128 %d\n"
3689 "\t.byte 5\n\t.uleb128 %d\n\t.uleb128 %d\n",
3690 0x80+i, 1+2*(31-i), 1200+i, 2+2*(31-i));
3691 fprintf(ctx->fp,
3692 "\t.align 2\n"
3693 ".LEFDE1:\n\n");
3694 break;
3695 default:
3696 break;
3697 }
3698}
3699
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
new file mode 100644
index 00000000..b8ecb868
--- /dev/null
+++ b/src/vm_x64.dasc
@@ -0,0 +1,4951 @@
1|// Low-level VM code for x64 CPUs in LJ_GC64 mode.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4|
5|.arch x64
6|.section code_op, code_sub
7|
8|.actionlist build_actionlist
9|.globals GLOB_
10|.globalnames globnames
11|.externnames extnames
12|
13|//-----------------------------------------------------------------------
14|
15|.if WIN
16|.define X64WIN, 1 // Windows/x64 calling conventions.
17|.endif
18|
19|// Fixed register assignments for the interpreter.
20|// This is very fragile and has many dependencies. Caveat emptor.
21|.define BASE, rdx // Not C callee-save, refetched anyway.
22|.if X64WIN
23|.define KBASE, rdi // Must be C callee-save.
24|.define PC, rsi // Must be C callee-save.
25|.define DISPATCH, rbx // Must be C callee-save.
26|.define KBASEd, edi
27|.define PCd, esi
28|.define DISPATCHd, ebx
29|.else
30|.define KBASE, r15 // Must be C callee-save.
31|.define PC, rbx // Must be C callee-save.
32|.define DISPATCH, r14 // Must be C callee-save.
33|.define KBASEd, r15d
34|.define PCd, ebx
35|.define DISPATCHd, r14d
36|.endif
37|
38|.define RA, rcx
39|.define RAd, ecx
40|.define RAH, ch
41|.define RAL, cl
42|.define RB, rbp // Must be rbp (C callee-save).
43|.define RBd, ebp
44|.define RC, rax // Must be rax.
45|.define RCd, eax
46|.define RCW, ax
47|.define RCH, ah
48|.define RCL, al
49|.define OP, RBd
50|.define RD, RC
51|.define RDd, RCd
52|.define RDW, RCW
53|.define RDL, RCL
54|.define TMPR, r10
55|.define TMPRd, r10d
56|.define ITYPE, r11
57|.define ITYPEd, r11d
58|
59|.if X64WIN
60|.define CARG1, rcx // x64/WIN64 C call arguments.
61|.define CARG2, rdx
62|.define CARG3, r8
63|.define CARG4, r9
64|.define CARG1d, ecx
65|.define CARG2d, edx
66|.define CARG3d, r8d
67|.define CARG4d, r9d
68|.else
69|.define CARG1, rdi // x64/POSIX C call arguments.
70|.define CARG2, rsi
71|.define CARG3, rdx
72|.define CARG4, rcx
73|.define CARG5, r8
74|.define CARG6, r9
75|.define CARG1d, edi
76|.define CARG2d, esi
77|.define CARG3d, edx
78|.define CARG4d, ecx
79|.define CARG5d, r8d
80|.define CARG6d, r9d
81|.endif
82|
83|// Type definitions. Some of these are only used for documentation.
84|.type L, lua_State
85|.type GL, global_State
86|.type TVALUE, TValue
87|.type GCOBJ, GCobj
88|.type STR, GCstr
89|.type TAB, GCtab
90|.type LFUNC, GCfuncL
91|.type CFUNC, GCfuncC
92|.type PROTO, GCproto
93|.type UPVAL, GCupval
94|.type NODE, Node
95|.type NARGS, int
96|.type TRACE, GCtrace
97|.type SBUF, SBuf
98|
99|// Stack layout while in interpreter. Must match with lj_frame.h.
100|//-----------------------------------------------------------------------
101|.if X64WIN // x64/Windows stack layout
102|
103|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
104|.macro saveregs_
105| push rdi; push rsi; push rbx
106| sub rsp, CFRAME_SPACE
107|.endmacro
108|.macro saveregs
109| push rbp; saveregs_
110|.endmacro
111|.macro restoreregs
112| add rsp, CFRAME_SPACE
113| pop rbx; pop rsi; pop rdi; pop rbp
114|.endmacro
115|
116|.define SAVE_CFRAME, aword [rsp+aword*13]
117|.define SAVE_PC, aword [rsp+aword*12]
118|.define SAVE_L, aword [rsp+aword*11]
119|.define SAVE_ERRF, dword [rsp+dword*21]
120|.define SAVE_NRES, dword [rsp+dword*20]
121|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
122|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
123|.define SAVE_R4, aword [rsp+aword*8]
124|.define SAVE_R3, aword [rsp+aword*7]
125|.define SAVE_R2, aword [rsp+aword*6]
126|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
127|.define ARG5, aword [rsp+aword*4]
128|.define CSAVE_4, aword [rsp+aword*3]
129|.define CSAVE_3, aword [rsp+aword*2]
130|.define CSAVE_2, aword [rsp+aword*1]
131|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
132|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
133|
134|.define ARG5d, dword [rsp+dword*8]
135|.define TMP1, ARG5 // TMP1 overlaps ARG5
136|.define TMP1d, ARG5d
137|.define TMP1hi, dword [rsp+dword*9]
138|.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
139|
140|//-----------------------------------------------------------------------
141|.else // x64/POSIX stack layout
142|
143|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
144|.macro saveregs_
145| push rbx; push r15; push r14
146|.if NO_UNWIND
147| push r13; push r12
148|.endif
149| sub rsp, CFRAME_SPACE
150|.endmacro
151|.macro saveregs
152| push rbp; saveregs_
153|.endmacro
154|.macro restoreregs
155| add rsp, CFRAME_SPACE
156|.if NO_UNWIND
157| pop r12; pop r13
158|.endif
159| pop r14; pop r15; pop rbx; pop rbp
160|.endmacro
161|
162|//----- 16 byte aligned,
163|.if NO_UNWIND
164|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
165|.define SAVE_R4, aword [rsp+aword*10]
166|.define SAVE_R3, aword [rsp+aword*9]
167|.define SAVE_R2, aword [rsp+aword*8]
168|.define SAVE_R1, aword [rsp+aword*7]
169|.define SAVE_RU2, aword [rsp+aword*6]
170|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
171|.else
172|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
173|.define SAVE_R4, aword [rsp+aword*8]
174|.define SAVE_R3, aword [rsp+aword*7]
175|.define SAVE_R2, aword [rsp+aword*6]
176|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
177|.endif
178|.define SAVE_CFRAME, aword [rsp+aword*4]
179|.define SAVE_PC, aword [rsp+aword*3]
180|.define SAVE_L, aword [rsp+aword*2]
181|.define SAVE_ERRF, dword [rsp+dword*3]
182|.define SAVE_NRES, dword [rsp+dword*2]
183|.define TMP1, aword [rsp] //<-- rsp while in interpreter.
184|//----- 16 byte aligned
185|
186|.define TMP1d, dword [rsp]
187|.define TMP1hi, dword [rsp+dword*1]
188|.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
189|
190|.endif
191|
192|//-----------------------------------------------------------------------
193|
194|// Instruction headers.
195|.macro ins_A; .endmacro
196|.macro ins_AD; .endmacro
197|.macro ins_AJ; .endmacro
198|.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro
199|.macro ins_AB_; movzx RBd, RCH; .endmacro
200|.macro ins_A_C; movzx RCd, RCL; .endmacro
201|.macro ins_AND; not RD; .endmacro
202|
203|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
204|.macro ins_NEXT
205| mov RCd, [PC]
206| movzx RAd, RCH
207| movzx OP, RCL
208| add PC, 4
209| shr RCd, 16
210| jmp aword [DISPATCH+OP*8]
211|.endmacro
212|
213|// Instruction footer.
214|.if 1
215| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
216| .define ins_next, ins_NEXT
217| .define ins_next_, ins_NEXT
218|.else
219| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
220| // Affects only certain kinds of benchmarks (and only with -j off).
221| // Around 10%-30% slower on Core2, a lot more slower on P4.
222| .macro ins_next
223| jmp ->ins_next
224| .endmacro
225| .macro ins_next_
226| ->ins_next:
227| ins_NEXT
228| .endmacro
229|.endif
230|
231|// Call decode and dispatch.
232|.macro ins_callt
233| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC
234| mov PC, LFUNC:RB->pc
235| mov RAd, [PC]
236| movzx OP, RAL
237| movzx RAd, RAH
238| add PC, 4
239| jmp aword [DISPATCH+OP*8]
240|.endmacro
241|
242|.macro ins_call
243| // BASE = new base, RB = LFUNC, RD = nargs+1
244| mov [BASE-8], PC
245| ins_callt
246|.endmacro
247|
248|//-----------------------------------------------------------------------
249|
250|// Macros to clear or set tags.
251|.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro
252|.macro settp, reg, tp
253| mov64 ITYPE, ((uint64_t)tp<<47)
254| or reg, ITYPE
255|.endmacro
256|.macro settp, dst, reg, tp
257| mov64 dst, ((uint64_t)tp<<47)
258| or dst, reg
259|.endmacro
260|.macro setint, reg
261| settp reg, LJ_TISNUM
262|.endmacro
263|.macro setint, dst, reg
264| settp dst, reg, LJ_TISNUM
265|.endmacro
266|
267|// Macros to test operand types.
268|.macro checktp_nc, reg, tp, target
269| mov ITYPE, reg
270| sar ITYPE, 47
271| cmp ITYPEd, tp
272| jne target
273|.endmacro
274|.macro checktp, reg, tp, target
275| mov ITYPE, reg
276| cleartp reg
277| sar ITYPE, 47
278| cmp ITYPEd, tp
279| jne target
280|.endmacro
281|.macro checktptp, src, tp, target
282| mov ITYPE, src
283| sar ITYPE, 47
284| cmp ITYPEd, tp
285| jne target
286|.endmacro
287|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
288|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
289|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
290|
291|.macro checknumx, reg, target, jump
292| mov ITYPE, reg
293| sar ITYPE, 47
294| cmp ITYPEd, LJ_TISNUM
295| jump target
296|.endmacro
297|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
298|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
299|.macro checknum, reg, target; checknumx reg, target, jae; .endmacro
300|.macro checknumtp, src, target; checknumx src, target, jae; .endmacro
301|.macro checknumber, src, target; checknumx src, target, ja; .endmacro
302|
303|.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro
304|.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro
305|
306|// These operands must be used with movzx.
307|.define PC_OP, byte [PC-4]
308|.define PC_RA, byte [PC-3]
309|.define PC_RB, byte [PC-1]
310|.define PC_RC, byte [PC-2]
311|.define PC_RD, word [PC-2]
312|
313|.macro branchPC, reg
314| lea PC, [PC+reg*4-BCBIAS_J*4]
315|.endmacro
316|
317|// Assumes DISPATCH is relative to GL.
318#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
319#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
320|
321#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
322|
323|// Decrement hashed hotcount and trigger trace recorder if zero.
324|.macro hotloop, reg
325| mov reg, PCd
326| shr reg, 1
327| and reg, HOTCOUNT_PCMASK
328| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
329| jb ->vm_hotloop
330|.endmacro
331|
332|.macro hotcall, reg
333| mov reg, PCd
334| shr reg, 1
335| and reg, HOTCOUNT_PCMASK
336| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
337| jb ->vm_hotcall
338|.endmacro
339|
340|// Set current VM state.
341|.macro set_vmstate, st
342| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
343|.endmacro
344|
345|.macro fpop1; fstp st1; .endmacro
346|
347|// Synthesize SSE FP constants.
348|.macro sseconst_abs, reg, tmp // Synthesize abs mask.
349| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
350|.endmacro
351|
352|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
353| mov64 tmp, U64x(val,00000000); movd reg, tmp
354|.endmacro
355|
356|.macro sseconst_sign, reg, tmp // Synthesize sign mask.
357| sseconst_hi reg, tmp, 80000000
358|.endmacro
359|.macro sseconst_1, reg, tmp // Synthesize 1.0.
360| sseconst_hi reg, tmp, 3ff00000
361|.endmacro
362|.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
363| sseconst_hi reg, tmp, 43300000
364|.endmacro
365|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
366| sseconst_hi reg, tmp, 43380000
367|.endmacro
368|
369|// Move table write barrier back. Overwrites reg.
370|.macro barrierback, tab, reg
371| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab)
372| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
373| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
374| mov tab->gclist, reg
375|.endmacro
376|
377|//-----------------------------------------------------------------------
378
379/* Generate subroutines used by opcodes and other parts of the VM. */
380/* The .code_sub section should be last to help static branch prediction. */
381static void build_subroutines(BuildCtx *ctx)
382{
383 |.code_sub
384 |
385 |//-----------------------------------------------------------------------
386 |//-- Return handling ----------------------------------------------------
387 |//-----------------------------------------------------------------------
388 |
389 |->vm_returnp:
390 | test PCd, FRAME_P
391 | jz ->cont_dispatch
392 |
393 | // Return from pcall or xpcall fast func.
394 | and PC, -8
395 | sub BASE, PC // Restore caller base.
396 | lea RA, [RA+PC-8] // Rebase RA and prepend one result.
397 | mov PC, [BASE-8] // Fetch PC of previous frame.
398 | // Prepending may overwrite the pcall frame, so do it at the end.
399 | mov_true ITYPE
400 | mov aword [BASE+RA], ITYPE // Prepend true to results.
401 |
402 |->vm_returnc:
403 | add RDd, 1 // RD = nresults+1
404 | jz ->vm_unwind_yield
405 | mov MULTRES, RDd
406 | test PC, FRAME_TYPE
407 | jz ->BC_RET_Z // Handle regular return to Lua.
408 |
409 |->vm_return:
410 | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
411 | xor PC, FRAME_C
412 | test PCd, FRAME_TYPE
413 | jnz ->vm_returnp
414 |
415 | // Return to C.
416 | set_vmstate C
417 | and PC, -8
418 | sub PC, BASE
419 | neg PC // Previous base = BASE - delta.
420 |
421 | sub RDd, 1
422 | jz >2
423 |1: // Move results down.
424 | mov RB, [BASE+RA]
425 | mov [BASE-16], RB
426 | add BASE, 8
427 | sub RDd, 1
428 | jnz <1
429 |2:
430 | mov L:RB, SAVE_L
431 | mov L:RB->base, PC
432 |3:
433 | mov RDd, MULTRES
434 | mov RAd, SAVE_NRES // RA = wanted nresults+1
435 |4:
436 | cmp RAd, RDd
437 | jne >6 // More/less results wanted?
438 |5:
439 | sub BASE, 16
440 | mov L:RB->top, BASE
441 |
442 |->vm_leave_cp:
443 | mov RA, SAVE_CFRAME // Restore previous C frame.
444 | mov L:RB->cframe, RA
445 | xor eax, eax // Ok return status for vm_pcall.
446 |
447 |->vm_leave_unw:
448 | restoreregs
449 | ret
450 |
451 |6:
452 | jb >7 // Less results wanted?
453 | // More results wanted. Check stack size and fill up results with nil.
454 | cmp BASE, L:RB->maxstack
455 | ja >8
456 | mov aword [BASE-16], LJ_TNIL
457 | add BASE, 8
458 | add RDd, 1
459 | jmp <4
460 |
461 |7: // Less results wanted.
462 | test RAd, RAd
463 | jz <5 // But check for LUA_MULTRET+1.
464 | sub RA, RD // Negative result!
465 | lea BASE, [BASE+RA*8] // Correct top.
466 | jmp <5
467 |
468 |8: // Corner case: need to grow stack for filling up results.
469 | // This can happen if:
470 | // - A C function grows the stack (a lot).
471 | // - The GC shrinks the stack in between.
472 | // - A return back from a lua_call() with (high) nresults adjustment.
473 | mov L:RB->top, BASE // Save current top held in BASE (yes).
474 | mov MULTRES, RDd // Need to fill only remainder with nil.
475 | mov CARG2d, RAd
476 | mov CARG1, L:RB
477 | call extern lj_state_growstack // (lua_State *L, int n)
478 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
479 | jmp <3
480 |
481 |->vm_unwind_yield:
482 | mov al, LUA_YIELD
483 | jmp ->vm_unwind_c_eh
484 |
485 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
486 | // (void *cframe, int errcode)
487 | mov eax, CARG2d // Error return status for vm_pcall.
488 | mov rsp, CARG1
489 |->vm_unwind_c_eh: // Landing pad for external unwinder.
490 | mov L:RB, SAVE_L
491 | mov GL:RB, L:RB->glref
492 | mov dword GL:RB->vmstate, ~LJ_VMST_C
493 | jmp ->vm_leave_unw
494 |
495 |->vm_unwind_rethrow:
496 |.if not X64WIN
497 | mov CARG1, SAVE_L
498 | mov CARG2d, eax
499 | restoreregs
500 | jmp extern lj_err_throw // (lua_State *L, int errcode)
501 |.endif
502 |
503 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
504 | // (void *cframe)
505 | and CARG1, CFRAME_RAWMASK
506 | mov rsp, CARG1
507 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
508 | mov L:RB, SAVE_L
509 | mov RDd, 1+1 // Really 1+2 results, incr. later.
510 | mov BASE, L:RB->base
511 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
512 | add DISPATCH, GG_G2DISP
513 | mov PC, [BASE-8] // Fetch PC of previous frame.
514 | mov_false RA
515 | mov RB, [BASE]
516 | mov [BASE-16], RA // Prepend false to error message.
517 | mov [BASE-8], RB
518 | mov RA, -16 // Results start at BASE+RA = BASE-16.
519 | set_vmstate INTERP
520 | jmp ->vm_returnc // Increments RD/MULTRES and returns.
521 |
522 |//-----------------------------------------------------------------------
523 |//-- Grow stack for calls -----------------------------------------------
524 |//-----------------------------------------------------------------------
525 |
526 |->vm_growstack_c: // Grow stack for C function.
527 | mov CARG2d, LUA_MINSTACK
528 | jmp >2
529 |
530 |->vm_growstack_v: // Grow stack for vararg Lua function.
531 | sub RD, 16 // LJ_FR2
532 | jmp >1
533 |
534 |->vm_growstack_f: // Grow stack for fixarg Lua function.
535 | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
536 | lea RD, [BASE+NARGS:RD*8-8]
537 |1:
538 | movzx RAd, byte [PC-4+PC2PROTO(framesize)]
539 | add PC, 4 // Must point after first instruction.
540 | mov L:RB->base, BASE
541 | mov L:RB->top, RD
542 | mov SAVE_PC, PC
543 | mov CARG2, RA
544 |2:
545 | // RB = L, L->base = new base, L->top = top
546 | mov CARG1, L:RB
547 | call extern lj_state_growstack // (lua_State *L, int n)
548 | mov BASE, L:RB->base
549 | mov RD, L:RB->top
550 | mov LFUNC:RB, [BASE-16]
551 | cleartp LFUNC:RB
552 | sub RD, BASE
553 | shr RDd, 3
554 | add NARGS:RDd, 1
555 | // BASE = new base, RB = LFUNC, RD = nargs+1
556 | ins_callt // Just retry the call.
557 |
558 |//-----------------------------------------------------------------------
559 |//-- Entry points into the assembler VM ---------------------------------
560 |//-----------------------------------------------------------------------
561 |
562 |->vm_resume: // Setup C frame and resume thread.
563 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
564 | saveregs
565 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
566 | mov SAVE_L, CARG1
567 | mov RA, CARG2
568 | mov PCd, FRAME_CP
569 | xor RDd, RDd
570 | lea KBASE, [esp+CFRAME_RESUME]
571 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
572 | add DISPATCH, GG_G2DISP
573 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
574 | mov SAVE_CFRAME, RD
575 | mov SAVE_NRES, RDd
576 | mov SAVE_ERRF, RDd
577 | mov L:RB->cframe, KBASE
578 | cmp byte L:RB->status, RDL
579 | je >2 // Initial resume (like a call).
580 |
581 | // Resume after yield (like a return).
582 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
583 | set_vmstate INTERP
584 | mov byte L:RB->status, RDL
585 | mov BASE, L:RB->base
586 | mov RD, L:RB->top
587 | sub RD, RA
588 | shr RDd, 3
589 | add RDd, 1 // RD = nresults+1
590 | sub RA, BASE // RA = resultofs
591 | mov PC, [BASE-8]
592 | mov MULTRES, RDd
593 | test PCd, FRAME_TYPE
594 | jz ->BC_RET_Z
595 | jmp ->vm_return
596 |
597 |->vm_pcall: // Setup protected C frame and enter VM.
598 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
599 | saveregs
600 | mov PCd, FRAME_CP
601 | mov SAVE_ERRF, CARG4d
602 | jmp >1
603 |
604 |->vm_call: // Setup C frame and enter VM.
605 | // (lua_State *L, TValue *base, int nres1)
606 | saveregs
607 | mov PCd, FRAME_C
608 |
609 |1: // Entry point for vm_pcall above (PC = ftype).
610 | mov SAVE_NRES, CARG3d
611 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
612 | mov SAVE_L, CARG1
613 | mov RA, CARG2
614 |
615 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
616 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
617 | mov SAVE_CFRAME, KBASE
618 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
619 | add DISPATCH, GG_G2DISP
620 | mov L:RB->cframe, rsp
621 |
622 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
623 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
624 | set_vmstate INTERP
625 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
626 | add PC, RA
627 | sub PC, BASE // PC = frame delta + frame type
628 |
629 | mov RD, L:RB->top
630 | sub RD, RA
631 | shr NARGS:RDd, 3
632 | add NARGS:RDd, 1 // RD = nargs+1
633 |
634 |->vm_call_dispatch:
635 | mov LFUNC:RB, [RA-16]
636 | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE.
637 |
638 |->vm_call_dispatch_f:
639 | mov BASE, RA
640 | ins_call
641 | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
642 |
643 |->vm_cpcall: // Setup protected C frame, call C.
644 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
645 | saveregs
646 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
647 | mov SAVE_L, CARG1
648 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
649 |
650 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
651 | sub KBASE, L:RB->top
652 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
653 | mov SAVE_ERRF, 0 // No error function.
654 | mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame.
655 | add DISPATCH, GG_G2DISP
656 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
657 |
658 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
659 | mov SAVE_CFRAME, KBASE
660 | mov L:RB->cframe, rsp
661 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
662 |
663 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
664 | // TValue * (new base) or NULL returned in eax (RC).
665 | test RC, RC
666 | jz ->vm_leave_cp // No base? Just remove C frame.
667 | mov RA, RC
668 | mov PCd, FRAME_CP
669 | jmp <2 // Else continue with the call.
670 |
671 |//-----------------------------------------------------------------------
672 |//-- Metamethod handling ------------------------------------------------
673 |//-----------------------------------------------------------------------
674 |
675 |//-- Continuation dispatch ----------------------------------------------
676 |
677 |->cont_dispatch:
678 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
679 | add RA, BASE
680 | and PC, -8
681 | mov RB, BASE
682 | sub BASE, PC // Restore caller BASE.
683 | mov aword [RA+RD*8-8], LJ_TNIL // Ensure one valid arg.
684 | mov RC, RA // ... in [RC]
685 | mov PC, [RB-24] // Restore PC from [cont|PC].
686 | mov RA, qword [RB-32] // May be negative on WIN64 with debug.
687 |.if FFI
688 | cmp RA, 1
689 | jbe >1
690 |.endif
691 | mov LFUNC:KBASE, [BASE-16]
692 | cleartp LFUNC:KBASE
693 | mov KBASE, LFUNC:KBASE->pc
694 | mov KBASE, [KBASE+PC2PROTO(k)]
695 | // BASE = base, RC = result, RB = meta base
696 | jmp RA // Jump to continuation.
697 |
698 |.if FFI
699 |1:
700 | je ->cont_ffi_callback // cont = 1: return from FFI callback.
701 | // cont = 0: Tail call from C function.
702 | sub RB, BASE
703 | shr RBd, 3
704 | lea RDd, [RBd-3]
705 | jmp ->vm_call_tail
706 |.endif
707 |
708 |->cont_cat: // BASE = base, RC = result, RB = mbase
709 | movzx RAd, PC_RB
710 | sub RB, 32
711 | lea RA, [BASE+RA*8]
712 | sub RA, RB
713 | je ->cont_ra
714 | neg RA
715 | shr RAd, 3
716 |.if X64WIN
717 | mov CARG3d, RAd
718 | mov L:CARG1, SAVE_L
719 | mov L:CARG1->base, BASE
720 | mov RC, [RC]
721 | mov [RB], RC
722 | mov CARG2, RB
723 |.else
724 | mov L:CARG1, SAVE_L
725 | mov L:CARG1->base, BASE
726 | mov CARG3d, RAd
727 | mov RA, [RC]
728 | mov [RB], RA
729 | mov CARG2, RB
730 |.endif
731 | jmp ->BC_CAT_Z
732 |
733 |//-- Table indexing metamethods -----------------------------------------
734 |
735 |->vmeta_tgets:
736 | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
737 | mov TMP1, STR:RC
738 | lea RC, TMP1
739 | cmp PC_OP, BC_GGET
740 | jne >1
741 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
742 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
743 | mov [RB], TAB:RA
744 | jmp >2
745 |
746 |->vmeta_tgetb:
747 | movzx RCd, PC_RC
748 |.if DUALNUM
749 | setint RC
750 | mov TMP1, RC
751 |.else
752 | cvtsi2sd xmm0, RCd
753 | movsd TMP1, xmm0
754 |.endif
755 | lea RC, TMP1
756 | jmp >1
757 |
758 |->vmeta_tgetv:
759 | movzx RCd, PC_RC // Reload TValue *k from RC.
760 | lea RC, [BASE+RC*8]
761 |1:
762 | movzx RBd, PC_RB // Reload TValue *t from RB.
763 | lea RB, [BASE+RB*8]
764 |2:
765 | mov L:CARG1, SAVE_L
766 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
767 | mov CARG2, RB
768 | mov CARG3, RC
769 | mov L:RB, L:CARG1
770 | mov SAVE_PC, PC
771 | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
772 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
773 | mov BASE, L:RB->base
774 | test RC, RC
775 | jz >3
776 |->cont_ra: // BASE = base, RC = result
777 | movzx RAd, PC_RA
778 | mov RB, [RC]
779 | mov [BASE+RA*8], RB
780 | ins_next
781 |
782 |3: // Call __index metamethod.
783 | // BASE = base, L->top = new base, stack = cont/func/t/k
784 | mov RA, L:RB->top
785 | mov [RA-24], PC // [cont|PC]
786 | lea PC, [RA+FRAME_CONT]
787 | sub PC, BASE
788 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
789 | mov NARGS:RDd, 2+1 // 2 args for func(t, k).
790 | cleartp LFUNC:RB
791 | jmp ->vm_call_dispatch_f
792 |
793 |->vmeta_tgetr:
794 | mov CARG1, TAB:RB
795 | mov RB, BASE // Save BASE.
796 | mov CARG2d, RCd // Caveat: CARG2 == BASE
797 | call extern lj_tab_getinth // (GCtab *t, int32_t key)
798 | // cTValue * or NULL returned in eax (RC).
799 | movzx RAd, PC_RA
800 | mov BASE, RB // Restore BASE.
801 | test RC, RC
802 | jnz ->BC_TGETR_Z
803 | mov ITYPE, LJ_TNIL
804 | jmp ->BC_TGETR2_Z
805 |
806 |//-----------------------------------------------------------------------
807 |
808 |->vmeta_tsets:
809 | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
810 | mov TMP1, STR:RC
811 | lea RC, TMP1
812 | cmp PC_OP, BC_GSET
813 | jne >1
814 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
815 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
816 | mov [RB], TAB:RA
817 | jmp >2
818 |
819 |->vmeta_tsetb:
820 | movzx RCd, PC_RC
821 |.if DUALNUM
822 | setint RC
823 | mov TMP1, RC
824 |.else
825 | cvtsi2sd xmm0, RCd
826 | movsd TMP1, xmm0
827 |.endif
828 | lea RC, TMP1
829 | jmp >1
830 |
831 |->vmeta_tsetv:
832 | movzx RCd, PC_RC // Reload TValue *k from RC.
833 | lea RC, [BASE+RC*8]
834 |1:
835 | movzx RBd, PC_RB // Reload TValue *t from RB.
836 | lea RB, [BASE+RB*8]
837 |2:
838 | mov L:CARG1, SAVE_L
839 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
840 | mov CARG2, RB
841 | mov CARG3, RC
842 | mov L:RB, L:CARG1
843 | mov SAVE_PC, PC
844 | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
845 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
846 | mov BASE, L:RB->base
847 | test RC, RC
848 | jz >3
849 | // NOBARRIER: lj_meta_tset ensures the table is not black.
850 | movzx RAd, PC_RA
851 | mov RB, [BASE+RA*8]
852 | mov [RC], RB
853 |->cont_nop: // BASE = base, (RC = result)
854 | ins_next
855 |
856 |3: // Call __newindex metamethod.
857 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
858 | mov RA, L:RB->top
859 | mov [RA-24], PC // [cont|PC]
860 | movzx RCd, PC_RA
861 | // Copy value to third argument.
862 | mov RB, [BASE+RC*8]
863 | mov [RA+16], RB
864 | lea PC, [RA+FRAME_CONT]
865 | sub PC, BASE
866 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
867 | mov NARGS:RDd, 3+1 // 3 args for func(t, k, v).
868 | cleartp LFUNC:RB
869 | jmp ->vm_call_dispatch_f
870 |
871 |->vmeta_tsetr:
872 |.if X64WIN
873 | mov L:CARG1, SAVE_L
874 | mov CARG3d, RCd
875 | mov L:CARG1->base, BASE
876 | xchg CARG2, TAB:RB // Caveat: CARG2 == BASE.
877 |.else
878 | mov L:CARG1, SAVE_L
879 | mov CARG2, TAB:RB
880 | mov L:CARG1->base, BASE
881 | mov RB, BASE // Save BASE.
882 | mov CARG3d, RCd // Caveat: CARG3 == BASE.
883 |.endif
884 | mov SAVE_PC, PC
885 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
886 | // TValue * returned in eax (RC).
887 | movzx RAd, PC_RA
888 | mov BASE, RB // Restore BASE.
889 | jmp ->BC_TSETR_Z
890 |
891 |//-- Comparison metamethods ---------------------------------------------
892 |
893 |->vmeta_comp:
894 | movzx RDd, PC_RD
895 | movzx RAd, PC_RA
896 | mov L:RB, SAVE_L
897 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 == BASE.
898 |.if X64WIN
899 | lea CARG3, [BASE+RD*8]
900 | lea CARG2, [BASE+RA*8]
901 |.else
902 | lea CARG2, [BASE+RA*8]
903 | lea CARG3, [BASE+RD*8]
904 |.endif
905 | mov CARG1, L:RB // Caveat: CARG1/CARG4 == RA.
906 | movzx CARG4d, PC_OP
907 | mov SAVE_PC, PC
908 | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
909 | // 0/1 or TValue * (metamethod) returned in eax (RC).
910 |3:
911 | mov BASE, L:RB->base
912 | cmp RC, 1
913 | ja ->vmeta_binop
914 |4:
915 | lea PC, [PC+4]
916 | jb >6
917 |5:
918 | movzx RDd, PC_RD
919 | branchPC RD
920 |6:
921 | ins_next
922 |
923 |->cont_condt: // BASE = base, RC = result
924 | add PC, 4
925 | mov ITYPE, [RC]
926 | sar ITYPE, 47
927 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is true.
928 | jb <5
929 | jmp <6
930 |
931 |->cont_condf: // BASE = base, RC = result
932 | mov ITYPE, [RC]
933 | sar ITYPE, 47
934 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false.
935 | jmp <4
936 |
937 |->vmeta_equal:
938 | cleartp TAB:RD
939 | sub PC, 4
940 |.if X64WIN
941 | mov CARG3, RD
942 | mov CARG4d, RBd
943 | mov L:RB, SAVE_L
944 | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
945 | mov CARG2, RA
946 | mov CARG1, L:RB // Caveat: CARG1 == RA.
947 |.else
948 | mov CARG2, RA
949 | mov CARG4d, RBd // Caveat: CARG4 == RA.
950 | mov L:RB, SAVE_L
951 | mov L:RB->base, BASE // Caveat: CARG3 == BASE.
952 | mov CARG3, RD
953 | mov CARG1, L:RB
954 |.endif
955 | mov SAVE_PC, PC
956 | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
957 | // 0/1 or TValue * (metamethod) returned in eax (RC).
958 | jmp <3
959 |
960 |->vmeta_equal_cd:
961 |.if FFI
962 | sub PC, 4
963 | mov L:RB, SAVE_L
964 | mov L:RB->base, BASE
965 | mov CARG1, L:RB
966 | mov CARG2d, dword [PC-4]
967 | mov SAVE_PC, PC
968 | call extern lj_meta_equal_cd // (lua_State *L, BCIns ins)
969 | // 0/1 or TValue * (metamethod) returned in eax (RC).
970 | jmp <3
971 |.endif
972 |
973 |->vmeta_istype:
974 | mov L:RB, SAVE_L
975 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
976 | mov CARG2d, RAd
977 | mov CARG3d, RDd
978 | mov L:CARG1, L:RB
979 | mov SAVE_PC, PC
980 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
981 | mov BASE, L:RB->base
982 | jmp <6
983 |
984 |//-- Arithmetic metamethods ---------------------------------------------
985 |
986 |->vmeta_arith_vno:
987 |.if DUALNUM
988 | movzx RBd, PC_RB
989 | movzx RCd, PC_RC
990 |.endif
991 |->vmeta_arith_vn:
992 | lea RC, [KBASE+RC*8]
993 | jmp >1
994 |
995 |->vmeta_arith_nvo:
996 |.if DUALNUM
997 | movzx RBd, PC_RB
998 | movzx RCd, PC_RC
999 |.endif
1000 |->vmeta_arith_nv:
1001 | lea TMPR, [KBASE+RC*8]
1002 | lea RC, [BASE+RB*8]
1003 | mov RB, TMPR
1004 | jmp >2
1005 |
1006 |->vmeta_unm:
1007 | lea RC, [BASE+RD*8]
1008 | mov RB, RC
1009 | jmp >2
1010 |
1011 |->vmeta_arith_vvo:
1012 |.if DUALNUM
1013 | movzx RBd, PC_RB
1014 | movzx RCd, PC_RC
1015 |.endif
1016 |->vmeta_arith_vv:
1017 | lea RC, [BASE+RC*8]
1018 |1:
1019 | lea RB, [BASE+RB*8]
1020 |2:
1021 | lea RA, [BASE+RA*8]
1022 |.if X64WIN
1023 | mov CARG3, RB
1024 | mov CARG4, RC
1025 | movzx RCd, PC_OP
1026 | mov ARG5d, RCd
1027 | mov L:RB, SAVE_L
1028 | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
1029 | mov CARG2, RA
1030 | mov CARG1, L:RB // Caveat: CARG1 == RA.
1031 |.else
1032 | movzx CARG5d, PC_OP
1033 | mov CARG2, RA
1034 | mov CARG4, RC // Caveat: CARG4 == RA.
1035 | mov L:CARG1, SAVE_L
1036 | mov L:CARG1->base, BASE // Caveat: CARG3 == BASE.
1037 | mov CARG3, RB
1038 | mov L:RB, L:CARG1
1039 |.endif
1040 | mov SAVE_PC, PC
1041 | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
1042 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
1043 | mov BASE, L:RB->base
1044 | test RC, RC
1045 | jz ->cont_nop
1046 |
1047 | // Call metamethod for binary op.
1048 |->vmeta_binop:
1049 | // BASE = base, RC = new base, stack = cont/func/o1/o2
1050 | mov RA, RC
1051 | sub RC, BASE
1052 | mov [RA-24], PC // [cont|PC]
1053 | lea PC, [RC+FRAME_CONT]
1054 | mov NARGS:RDd, 2+1 // 2 args for func(o1, o2).
1055 | jmp ->vm_call_dispatch
1056 |
1057 |->vmeta_len:
1058 | movzx RDd, PC_RD
1059 | mov L:RB, SAVE_L
1060 | mov L:RB->base, BASE
1061 | lea CARG2, [BASE+RD*8] // Caveat: CARG2 == BASE
1062 | mov L:CARG1, L:RB
1063 | mov SAVE_PC, PC
1064 | call extern lj_meta_len // (lua_State *L, TValue *o)
1065 | // NULL (retry) or TValue * (metamethod) returned in eax (RC).
1066 | mov BASE, L:RB->base
1067#if LJ_52
1068 | test RC, RC
1069 | jne ->vmeta_binop // Binop call for compatibility.
1070 | movzx RDd, PC_RD
1071 | mov TAB:CARG1, [BASE+RD*8]
1072 | cleartp TAB:CARG1
1073 | jmp ->BC_LEN_Z
1074#else
1075 | jmp ->vmeta_binop // Binop call for compatibility.
1076#endif
1077 |
1078 |//-- Call metamethod ----------------------------------------------------
1079 |
1080 |->vmeta_call_ra:
1081 | lea RA, [BASE+RA*8+16]
1082 |->vmeta_call: // Resolve and call __call metamethod.
1083 | // BASE = old base, RA = new base, RC = nargs+1, PC = return
1084 | mov TMP1d, NARGS:RDd // Save RA, RC for us.
1085 | mov RB, RA
1086 |.if X64WIN
1087 | mov L:TMPR, SAVE_L
1088 | mov L:TMPR->base, BASE // Caveat: CARG2 is BASE.
1089 | lea CARG2, [RA-16]
1090 | lea CARG3, [RA+NARGS:RD*8-8]
1091 | mov CARG1, L:TMPR // Caveat: CARG1 is RA.
1092 |.else
1093 | mov L:CARG1, SAVE_L
1094 | mov L:CARG1->base, BASE // Caveat: CARG3 is BASE.
1095 | lea CARG2, [RA-16]
1096 | lea CARG3, [RA+NARGS:RD*8-8]
1097 |.endif
1098 | mov SAVE_PC, PC
1099 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
1100 | mov RA, RB
1101 | mov L:RB, SAVE_L
1102 | mov BASE, L:RB->base
1103 | mov NARGS:RDd, TMP1d
1104 | mov LFUNC:RB, [RA-16]
1105 | add NARGS:RDd, 1
1106 | // This is fragile. L->base must not move, KBASE must always be defined.
1107 | cmp KBASE, BASE // Continue with CALLT if flag set.
1108 | je ->BC_CALLT_Z
1109 | cleartp LFUNC:RB
1110 | mov BASE, RA
1111 | ins_call // Otherwise call resolved metamethod.
1112 |
1113 |//-- Argument coercion for 'for' statement ------------------------------
1114 |
1115 |->vmeta_for:
1116 | mov L:RB, SAVE_L
1117 | mov L:RB->base, BASE
1118 | mov CARG2, RA // Caveat: CARG2 == BASE
1119 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
1120 | mov SAVE_PC, PC
1121 | call extern lj_meta_for // (lua_State *L, TValue *base)
1122 | mov BASE, L:RB->base
1123 | mov RCd, [PC-4]
1124 | movzx RAd, RCH
1125 | movzx OP, RCL
1126 | shr RCd, 16
1127 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI.
1128 |
1129 |//-----------------------------------------------------------------------
1130 |//-- Fast functions -----------------------------------------------------
1131 |//-----------------------------------------------------------------------
1132 |
1133 |.macro .ffunc, name
1134 |->ff_ .. name:
1135 |.endmacro
1136 |
1137 |.macro .ffunc_1, name
1138 |->ff_ .. name:
1139 | cmp NARGS:RDd, 1+1; jb ->fff_fallback
1140 |.endmacro
1141 |
1142 |.macro .ffunc_2, name
1143 |->ff_ .. name:
1144 | cmp NARGS:RDd, 2+1; jb ->fff_fallback
1145 |.endmacro
1146 |
1147 |.macro .ffunc_n, name, op
1148 | .ffunc_1 name
1149 | checknumtp [BASE], ->fff_fallback
1150 | op xmm0, qword [BASE]
1151 |.endmacro
1152 |
1153 |.macro .ffunc_n, name
1154 | .ffunc_n name, movsd
1155 |.endmacro
1156 |
1157 |.macro .ffunc_nn, name
1158 | .ffunc_2 name
1159 | checknumtp [BASE], ->fff_fallback
1160 | checknumtp [BASE+8], ->fff_fallback
1161 | movsd xmm0, qword [BASE]
1162 | movsd xmm1, qword [BASE+8]
1163 |.endmacro
1164 |
1165 |// Inlined GC threshold check. Caveat: uses label 1.
1166 |.macro ffgccheck
1167 | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
1168 | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
1169 | jb >1
1170 | call ->fff_gcstep
1171 |1:
1172 |.endmacro
1173 |
1174 |//-- Base library: checks -----------------------------------------------
1175 |
1176 |.ffunc_1 assert
1177 | mov ITYPE, [BASE]
1178 | mov RB, ITYPE
1179 | sar ITYPE, 47
1180 | cmp ITYPEd, LJ_TISTRUECOND; jae ->fff_fallback
1181 | mov PC, [BASE-8]
1182 | mov MULTRES, RDd
1183 | mov RB, [BASE]
1184 | mov [BASE-16], RB
1185 | sub RDd, 2
1186 | jz >2
1187 | mov RA, BASE
1188 |1:
1189 | add RA, 8
1190 | mov RB, [RA]
1191 | mov [RA-16], RB
1192 | sub RDd, 1
1193 | jnz <1
1194 |2:
1195 | mov RDd, MULTRES
1196 | jmp ->fff_res_
1197 |
1198 |.ffunc_1 type
1199 | mov RC, [BASE]
1200 | sar RC, 47
1201 | mov RBd, LJ_TISNUM
1202 | cmp RCd, RBd
1203 | cmovb RCd, RBd
1204 | not RCd
1205 |2:
1206 | mov CFUNC:RB, [BASE-16]
1207 | cleartp CFUNC:RB
1208 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
1209 | mov PC, [BASE-8]
1210 | settp STR:RC, LJ_TSTR
1211 | mov [BASE-16], STR:RC
1212 | jmp ->fff_res1
1213 |
1214 |//-- Base library: getters and setters ---------------------------------
1215 |
1216 |.ffunc_1 getmetatable
1217 | mov TAB:RB, [BASE]
1218 | mov PC, [BASE-8]
1219 | checktab TAB:RB, >6
1220 |1: // Field metatable must be at same offset for GCtab and GCudata!
1221 | mov TAB:RB, TAB:RB->metatable
1222 |2:
1223 | test TAB:RB, TAB:RB
1224 | mov aword [BASE-16], LJ_TNIL
1225 | jz ->fff_res1
1226 | settp TAB:RC, TAB:RB, LJ_TTAB
1227 | mov [BASE-16], TAB:RC // Store metatable as default result.
1228 | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)]
1229 | mov RAd, TAB:RB->hmask
1230 | and RAd, STR:RC->sid
1231 | settp STR:RC, LJ_TSTR
1232 | imul RAd, #NODE
1233 | add NODE:RA, TAB:RB->node
1234 |3: // Rearranged logic, because we expect _not_ to find the key.
1235 | cmp NODE:RA->key, STR:RC
1236 | je >5
1237 |4:
1238 | mov NODE:RA, NODE:RA->next
1239 | test NODE:RA, NODE:RA
1240 | jnz <3
1241 | jmp ->fff_res1 // Not found, keep default result.
1242 |5:
1243 | mov RB, NODE:RA->val
1244 | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
1245 | mov [BASE-16], RB // Return value of mt.__metatable.
1246 | jmp ->fff_res1
1247 |
1248 |6:
1249 | cmp ITYPEd, LJ_TUDATA; je <1
1250 | cmp ITYPEd, LJ_TISNUM; ja >7
1251 | mov ITYPEd, LJ_TISNUM
1252 |7:
1253 | not ITYPEd
1254 | mov TAB:RB, [DISPATCH+ITYPE*8+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
1255 | jmp <2
1256 |
1257 |.ffunc_2 setmetatable
1258 | mov TAB:RB, [BASE]
1259 | mov TAB:TMPR, TAB:RB
1260 | checktab TAB:RB, ->fff_fallback
1261 | // Fast path: no mt for table yet and not clearing the mt.
1262 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1263 | mov TAB:RA, [BASE+8]
1264 | checktab TAB:RA, ->fff_fallback
1265 | mov TAB:RB->metatable, TAB:RA
1266 | mov PC, [BASE-8]
1267 | mov [BASE-16], TAB:TMPR // Return original table.
1268 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
1269 | jz >1
1270 | // Possible write barrier. Table is black, but skip iswhite(mt) check.
1271 | barrierback TAB:RB, RC
1272 |1:
1273 | jmp ->fff_res1
1274 |
1275 |.ffunc_2 rawget
1276 |.if X64WIN
1277 | mov TAB:RA, [BASE]
1278 | checktab TAB:RA, ->fff_fallback
1279 | mov RB, BASE // Save BASE.
1280 | lea CARG3, [BASE+8]
1281 | mov CARG2, TAB:RA // Caveat: CARG2 == BASE.
1282 | mov CARG1, SAVE_L
1283 |.else
1284 | mov TAB:CARG2, [BASE]
1285 | checktab TAB:CARG2, ->fff_fallback
1286 | mov RB, BASE // Save BASE.
1287 | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE.
1288 | mov CARG1, SAVE_L
1289 |.endif
1290 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1291 | // cTValue * returned in eax (RD).
1292 | mov BASE, RB // Restore BASE.
1293 | // Copy table slot.
1294 | mov RB, [RD]
1295 | mov PC, [BASE-8]
1296 | mov [BASE-16], RB
1297 | jmp ->fff_res1
1298 |
1299 |//-- Base library: conversions ------------------------------------------
1300 |
1301 |.ffunc tonumber
1302 | // Only handles the number case inline (without a base argument).
1303 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
1304 | mov RB, [BASE]
1305 | checknumber RB, ->fff_fallback
1306 | mov PC, [BASE-8]
1307 | mov [BASE-16], RB
1308 | jmp ->fff_res1
1309 |
1310 |.ffunc_1 tostring
1311 | // Only handles the string or number case inline.
1312 | mov PC, [BASE-8]
1313 | mov STR:RB, [BASE]
1314 | checktp_nc STR:RB, LJ_TSTR, >3
1315 | // A __tostring method in the string base metatable is ignored.
1316 |2:
1317 | mov [BASE-16], STR:RB
1318 | jmp ->fff_res1
1319 |3: // Handle numbers inline, unless a number base metatable is present.
1320 | cmp ITYPEd, LJ_TISNUM; ja ->fff_fallback_1
1321 | cmp aword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
1322 | jne ->fff_fallback
1323 | ffgccheck // Caveat: uses label 1.
1324 | mov L:RB, SAVE_L
1325 | mov L:RB->base, BASE // Add frame since C call can throw.
1326 | mov SAVE_PC, PC // Redundant (but a defined value).
1327 |.if not X64WIN
1328 | mov CARG2, BASE // Otherwise: CARG2 == BASE
1329 |.endif
1330 | mov L:CARG1, L:RB
1331 |.if DUALNUM
1332 | call extern lj_strfmt_number // (lua_State *L, cTValue *o)
1333 |.else
1334 | call extern lj_strfmt_num // (lua_State *L, lua_Number *np)
1335 |.endif
1336 | // GCstr returned in eax (RD).
1337 | mov BASE, L:RB->base
1338 | settp STR:RB, RD, LJ_TSTR
1339 | jmp <2
1340 |
1341 |//-- Base library: iterators -------------------------------------------
1342 |
1343 |.ffunc_1 next
1344 | je >2 // Missing 2nd arg?
1345 |1:
1346 | mov CARG1, [BASE]
1347 | mov PC, [BASE-8]
1348 | checktab CARG1, ->fff_fallback
1349 | mov RB, BASE // Save BASE.
1350 |.if X64WIN
1351 | lea CARG3, [BASE-16]
1352 | lea CARG2, [BASE+8] // Caveat: CARG2 == BASE.
1353 |.else
1354 | lea CARG2, [BASE+8]
1355 | lea CARG3, [BASE-16] // Caveat: CARG3 == BASE.
1356 |.endif
1357 | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
1358 | // 1=found, 0=end, -1=error returned in eax (RD).
1359 | mov BASE, RB // Restore BASE.
1360 | test RDd, RDd; jg ->fff_res2 // Found key/value.
1361 | js ->fff_fallback_2 // Invalid key.
1362 | // End of traversal: return nil.
1363 | mov aword [BASE-16], LJ_TNIL
1364 | jmp ->fff_res1
1365 |2: // Set missing 2nd arg to nil.
1366 | mov aword [BASE+8], LJ_TNIL
1367 | jmp <1
1368 |
1369 |.ffunc_1 pairs
1370 | mov TAB:RB, [BASE]
1371 | mov TMPR, TAB:RB
1372 | checktab TAB:RB, ->fff_fallback
1373#if LJ_52
1374 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1375#endif
1376 | mov CFUNC:RD, [BASE-16]
1377 | cleartp CFUNC:RD
1378 | mov CFUNC:RD, CFUNC:RD->upvalue[0]
1379 | settp CFUNC:RD, LJ_TFUNC
1380 | mov PC, [BASE-8]
1381 | mov [BASE-16], CFUNC:RD
1382 | mov [BASE-8], TMPR
1383 | mov aword [BASE], LJ_TNIL
1384 | mov RDd, 1+3
1385 | jmp ->fff_res
1386 |
1387 |.ffunc_2 ipairs_aux
1388 | mov TAB:RB, [BASE]
1389 | checktab TAB:RB, ->fff_fallback
1390 |.if DUALNUM
1391 | mov RA, [BASE+8]
1392 | checkint RA, ->fff_fallback
1393 |.else
1394 | checknumtp [BASE+8], ->fff_fallback
1395 | movsd xmm0, qword [BASE+8]
1396 |.endif
1397 | mov PC, [BASE-8]
1398 |.if DUALNUM
1399 | add RAd, 1
1400 | setint ITYPE, RA
1401 | mov [BASE-16], ITYPE
1402 |.else
1403 | sseconst_1 xmm1, TMPR
1404 | addsd xmm0, xmm1
1405 | cvttsd2si RAd, xmm0
1406 | movsd qword [BASE-16], xmm0
1407 |.endif
1408 | cmp RAd, TAB:RB->asize; jae >2 // Not in array part?
1409 | mov RD, TAB:RB->array
1410 | lea RD, [RD+RA*8]
1411 |1:
1412 | cmp aword [RD], LJ_TNIL; je ->fff_res0
1413 | // Copy array slot.
1414 | mov RB, [RD]
1415 | mov [BASE-8], RB
1416 |->fff_res2:
1417 | mov RDd, 1+2
1418 | jmp ->fff_res
1419 |2: // Check for empty hash part first. Otherwise call C function.
1420 | cmp dword TAB:RB->hmask, 0; je ->fff_res0
1421 |.if X64WIN
1422 | mov TMPR, BASE
1423 | mov CARG2d, RAd
1424 | mov CARG1, TAB:RB
1425 | mov RB, TMPR
1426 |.else
1427 | mov CARG1, TAB:RB
1428 | mov RB, BASE // Save BASE.
1429 | mov CARG2d, RAd // Caveat: CARG2 == BASE
1430 |.endif
1431 | call extern lj_tab_getinth // (GCtab *t, int32_t key)
1432 | // cTValue * or NULL returned in eax (RD).
1433 | mov BASE, RB
1434 | test RD, RD
1435 | jnz <1
1436 |->fff_res0:
1437 | mov RDd, 1+0
1438 | jmp ->fff_res
1439 |
1440 |.ffunc_1 ipairs
1441 | mov TAB:RB, [BASE]
1442 | mov TMPR, TAB:RB
1443 | checktab TAB:RB, ->fff_fallback
1444#if LJ_52
1445 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1446#endif
1447 | mov CFUNC:RD, [BASE-16]
1448 | cleartp CFUNC:RD
1449 | mov CFUNC:RD, CFUNC:RD->upvalue[0]
1450 | settp CFUNC:RD, LJ_TFUNC
1451 | mov PC, [BASE-8]
1452 | mov [BASE-16], CFUNC:RD
1453 | mov [BASE-8], TMPR
1454 |.if DUALNUM
1455 | mov64 RD, ((uint64_t)LJ_TISNUM<<47)
1456 | mov [BASE], RD
1457 |.else
1458 | mov qword [BASE], 0
1459 |.endif
1460 | mov RDd, 1+3
1461 | jmp ->fff_res
1462 |
1463 |//-- Base library: catch errors ----------------------------------------
1464 |
1465 |.ffunc_1 pcall
1466 | mov L:RB, SAVE_L
1467 | lea RA, [BASE+NARGS:RD*8]
1468 | cmp RA, L:RB->maxstack; ja ->fff_fallback
1469 | lea RA, [BASE+16]
1470 | sub NARGS:RDd, 1
1471 | mov PCd, 16+FRAME_PCALL
1472 |1:
1473 | movzx RBd, byte [DISPATCH+DISPATCH_GL(hookmask)]
1474 | shr RB, HOOK_ACTIVE_SHIFT
1475 | and RB, 1
1476 | add PC, RB // Remember active hook before pcall.
1477 | // Note: this does a (harmless) copy of the function to the PC slot, too.
1478 | mov KBASE, RD
1479 |2:
1480 | mov RB, [RA+KBASE*8-24]
1481 | mov [RA+KBASE*8-16], RB
1482 | sub KBASE, 1
1483 | ja <2
1484 | jmp ->vm_call_dispatch
1485 |
1486 |.ffunc_2 xpcall
1487 | mov L:RB, SAVE_L
1488 | lea RA, [BASE+NARGS:RD*8]
1489 | cmp RA, L:RB->maxstack; ja ->fff_fallback
1490 | mov LFUNC:RA, [BASE+8]
1491 | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
1492 | mov LFUNC:RB, [BASE] // Swap function and traceback.
1493 | mov [BASE], LFUNC:RA
1494 | mov [BASE+8], LFUNC:RB
1495 | lea RA, [BASE+24]
1496 | sub NARGS:RDd, 2
1497 | mov PCd, 24+FRAME_PCALL
1498 | jmp <1
1499 |
1500 |//-- Coroutine library --------------------------------------------------
1501 |
1502 |.macro coroutine_resume_wrap, resume
1503 |.if resume
1504 |.ffunc_1 coroutine_resume
1505 | mov L:RB, [BASE]
1506 | cleartp L:RB
1507 |.else
1508 |.ffunc coroutine_wrap_aux
1509 | mov CFUNC:RB, [BASE-16]
1510 | cleartp CFUNC:RB
1511 | mov L:RB, CFUNC:RB->upvalue[0].gcr
1512 | cleartp L:RB
1513 |.endif
1514 | mov PC, [BASE-8]
1515 | mov SAVE_PC, PC
1516 | mov TMP1, L:RB
1517 |.if resume
1518 | checktptp [BASE], LJ_TTHREAD, ->fff_fallback
1519 |.endif
1520 | cmp aword L:RB->cframe, 0; jne ->fff_fallback
1521 | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback
1522 | mov RA, L:RB->top
1523 | je >1 // Status != LUA_YIELD (i.e. 0)?
1524 | cmp RA, L:RB->base // Check for presence of initial func.
1525 | je ->fff_fallback
1526 | mov PC, [RA-8] // Move initial function up.
1527 | mov [RA], PC
1528 | add RA, 8
1529 |1:
1530 |.if resume
1531 | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread).
1532 |.else
1533 | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1).
1534 |.endif
1535 | cmp PC, L:RB->maxstack; ja ->fff_fallback
1536 | mov L:RB->top, PC
1537 |
1538 | mov L:RB, SAVE_L
1539 | mov L:RB->base, BASE
1540 |.if resume
1541 | add BASE, 8 // Keep resumed thread in stack for GC.
1542 |.endif
1543 | mov L:RB->top, BASE
1544 |.if resume
1545 | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move.
1546 |.else
1547 | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move.
1548 |.endif
1549 | sub RB, PC // Relative to PC.
1550 |
1551 | cmp PC, RA
1552 | je >3
1553 |2: // Move args to coroutine.
1554 | mov RC, [PC+RB]
1555 | mov [PC-8], RC
1556 | sub PC, 8
1557 | cmp PC, RA
1558 | jne <2
1559 |3:
1560 | mov CARG2, RA
1561 | mov CARG1, TMP1
1562 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1563 |
1564 | mov L:RB, SAVE_L
1565 | mov L:PC, TMP1
1566 | mov BASE, L:RB->base
1567 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1568 | set_vmstate INTERP
1569 |
1570 | cmp eax, LUA_YIELD
1571 | ja >8
1572 |4:
1573 | mov RA, L:PC->base
1574 | mov KBASE, L:PC->top
1575 | mov L:PC->top, RA // Clear coroutine stack.
1576 | mov PC, KBASE
1577 | sub PC, RA
1578 | je >6 // No results?
1579 | lea RD, [BASE+PC]
1580 | shr PCd, 3
1581 | cmp RD, L:RB->maxstack
1582 | ja >9 // Need to grow stack?
1583 |
1584 | mov RB, BASE
1585 | sub RB, RA
1586 |5: // Move results from coroutine.
1587 | mov RD, [RA]
1588 | mov [RA+RB], RD
1589 | add RA, 8
1590 | cmp RA, KBASE
1591 | jne <5
1592 |6:
1593 |.if resume
1594 | lea RDd, [PCd+2] // nresults+1 = 1 + true + results.
1595 | mov_true ITYPE // Prepend true to results.
1596 | mov [BASE-8], ITYPE
1597 |.else
1598 | lea RDd, [PCd+1] // nresults+1 = 1 + results.
1599 |.endif
1600 |7:
1601 | mov PC, SAVE_PC
1602 | mov MULTRES, RDd
1603 |.if resume
1604 | mov RA, -8
1605 |.else
1606 | xor RAd, RAd
1607 |.endif
1608 | test PCd, FRAME_TYPE
1609 | jz ->BC_RET_Z
1610 | jmp ->vm_return
1611 |
1612 |8: // Coroutine returned with error (at co->top-1).
1613 |.if resume
1614 | mov_false ITYPE // Prepend false to results.
1615 | mov [BASE-8], ITYPE
1616 | mov RA, L:PC->top
1617 | sub RA, 8
1618 | mov L:PC->top, RA // Clear error from coroutine stack.
1619 | // Copy error message.
1620 | mov RD, [RA]
1621 | mov [BASE], RD
1622 | mov RDd, 1+2 // nresults+1 = 1 + false + error.
1623 | jmp <7
1624 |.else
1625 | mov CARG2, L:PC
1626 | mov CARG1, L:RB
1627 | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1628 | // Error function does not return.
1629 |.endif
1630 |
1631 |9: // Handle stack expansion on return from yield.
1632 | mov L:RA, TMP1
1633 | mov L:RA->top, KBASE // Undo coroutine stack clearing.
1634 | mov CARG2, PC
1635 | mov CARG1, L:RB
1636 | call extern lj_state_growstack // (lua_State *L, int n)
1637 | mov L:PC, TMP1
1638 | mov BASE, L:RB->base
1639 | jmp <4 // Retry the stack move.
1640 |.endmacro
1641 |
1642 | coroutine_resume_wrap 1 // coroutine.resume
1643 | coroutine_resume_wrap 0 // coroutine.wrap
1644 |
1645 |.ffunc coroutine_yield
1646 | mov L:RB, SAVE_L
1647 | test aword L:RB->cframe, CFRAME_RESUME
1648 | jz ->fff_fallback
1649 | mov L:RB->base, BASE
1650 | lea RD, [BASE+NARGS:RD*8-8]
1651 | mov L:RB->top, RD
1652 | xor RDd, RDd
1653 | mov aword L:RB->cframe, RD
1654 | mov al, LUA_YIELD
1655 | mov byte L:RB->status, al
1656 | jmp ->vm_leave_unw
1657 |
1658 |//-- Math library -------------------------------------------------------
1659 |
1660 | .ffunc_1 math_abs
1661 | mov RB, [BASE]
1662 |.if DUALNUM
1663 | checkint RB, >3
1664 | cmp RBd, 0; jns ->fff_resi
1665 | neg RBd; js >2
1666 |->fff_resbit:
1667 |->fff_resi:
1668 | setint RB
1669 |->fff_resRB:
1670 | mov PC, [BASE-8]
1671 | mov [BASE-16], RB
1672 | jmp ->fff_res1
1673 |2:
1674 | mov64 RB, U64x(41e00000,00000000) // 2^31.
1675 | jmp ->fff_resRB
1676 |3:
1677 | ja ->fff_fallback
1678 |.else
1679 | checknum RB, ->fff_fallback
1680 |.endif
1681 | shl RB, 1
1682 | shr RB, 1
1683 | mov PC, [BASE-8]
1684 | mov [BASE-16], RB
1685 | jmp ->fff_res1
1686 |
1687 |.ffunc_n math_sqrt, sqrtsd
1688 |->fff_resxmm0:
1689 | mov PC, [BASE-8]
1690 | movsd qword [BASE-16], xmm0
1691 | // fallthrough
1692 |
1693 |->fff_res1:
1694 | mov RDd, 1+1
1695 |->fff_res:
1696 | mov MULTRES, RDd
1697 |->fff_res_:
1698 | test PCd, FRAME_TYPE
1699 | jnz >7
1700 |5:
1701 | cmp PC_RB, RDL // More results expected?
1702 | ja >6
1703 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1704 | movzx RAd, PC_RA
1705 | neg RA
1706 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
1707 | ins_next
1708 |
1709 |6: // Fill up results with nil.
1710 | mov aword [BASE+RD*8-24], LJ_TNIL
1711 | add RD, 1
1712 | jmp <5
1713 |
1714 |7: // Non-standard return case.
1715 | mov RA, -16 // Results start at BASE+RA = BASE-16.
1716 | jmp ->vm_return
1717 |
1718 |.macro math_round, func
1719 | .ffunc math_ .. func
1720 |.if DUALNUM
1721 | mov RB, [BASE]
1722 | checknumx RB, ->fff_resRB, je
1723 | ja ->fff_fallback
1724 |.else
1725 | checknumtp [BASE], ->fff_fallback
1726 |.endif
1727 | movsd xmm0, qword [BASE]
1728 | call ->vm_ .. func .. _sse
1729 |.if DUALNUM
1730 | cvttsd2si RBd, xmm0
1731 | cmp RBd, 0x80000000
1732 | jne ->fff_resi
1733 | cvtsi2sd xmm1, RBd
1734 | ucomisd xmm0, xmm1
1735 | jp ->fff_resxmm0
1736 | je ->fff_resi
1737 |.endif
1738 | jmp ->fff_resxmm0
1739 |.endmacro
1740 |
1741 | math_round floor
1742 | math_round ceil
1743 |
1744 |.ffunc math_log
1745 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
1746 | checknumtp [BASE], ->fff_fallback
1747 | movsd xmm0, qword [BASE]
1748 | mov RB, BASE
1749 | call extern log
1750 | mov BASE, RB
1751 | jmp ->fff_resxmm0
1752 |
1753 |.macro math_extern, func
1754 | .ffunc_n math_ .. func
1755 | mov RB, BASE
1756 | call extern func
1757 | mov BASE, RB
1758 | jmp ->fff_resxmm0
1759 |.endmacro
1760 |
1761 |.macro math_extern2, func
1762 | .ffunc_nn math_ .. func
1763 | mov RB, BASE
1764 | call extern func
1765 | mov BASE, RB
1766 | jmp ->fff_resxmm0
1767 |.endmacro
1768 |
1769 | math_extern log10
1770 | math_extern exp
1771 | math_extern sin
1772 | math_extern cos
1773 | math_extern tan
1774 | math_extern asin
1775 | math_extern acos
1776 | math_extern atan
1777 | math_extern sinh
1778 | math_extern cosh
1779 | math_extern tanh
1780 | math_extern2 pow
1781 | math_extern2 atan2
1782 | math_extern2 fmod
1783 |
1784 |.ffunc_2 math_ldexp
1785 | checknumtp [BASE], ->fff_fallback
1786 | checknumtp [BASE+8], ->fff_fallback
1787 | fld qword [BASE+8]
1788 | fld qword [BASE]
1789 | fscale
1790 | fpop1
1791 | mov PC, [BASE-8]
1792 | fstp qword [BASE-16]
1793 | jmp ->fff_res1
1794 |
1795 |.ffunc_n math_frexp
1796 | mov RB, BASE
1797 |.if X64WIN
1798 | lea CARG2, TMP1 // Caveat: CARG2 == BASE
1799 |.else
1800 | lea CARG1, TMP1
1801 |.endif
1802 | call extern frexp
1803 | mov BASE, RB
1804 | mov RBd, TMP1d
1805 | mov PC, [BASE-8]
1806 | movsd qword [BASE-16], xmm0
1807 |.if DUALNUM
1808 | setint RB
1809 | mov [BASE-8], RB
1810 |.else
1811 | cvtsi2sd xmm1, RBd
1812 | movsd qword [BASE-8], xmm1
1813 |.endif
1814 | mov RDd, 1+2
1815 | jmp ->fff_res
1816 |
1817 |.ffunc_n math_modf
1818 | mov RB, BASE
1819 |.if X64WIN
1820 | lea CARG2, [BASE-16] // Caveat: CARG2 == BASE
1821 |.else
1822 | lea CARG1, [BASE-16]
1823 |.endif
1824 | call extern modf
1825 | mov BASE, RB
1826 | mov PC, [BASE-8]
1827 | movsd qword [BASE-8], xmm0
1828 | mov RDd, 1+2
1829 | jmp ->fff_res
1830 |
1831 |.macro math_minmax, name, cmovop, sseop
1832 | .ffunc_1 name
1833 | mov RAd, 2
1834 |.if DUALNUM
1835 | mov RB, [BASE]
1836 | checkint RB, >4
1837 |1: // Handle integers.
1838 | cmp RAd, RDd; jae ->fff_resRB
1839 | mov TMPR, [BASE+RA*8-8]
1840 | checkint TMPR, >3
1841 | cmp RBd, TMPRd
1842 | cmovop RB, TMPR
1843 | add RAd, 1
1844 | jmp <1
1845 |3:
1846 | ja ->fff_fallback
1847 | // Convert intermediate result to number and continue below.
1848 | cvtsi2sd xmm0, RBd
1849 | jmp >6
1850 |4:
1851 | ja ->fff_fallback
1852 |.else
1853 | checknumtp [BASE], ->fff_fallback
1854 |.endif
1855 |
1856 | movsd xmm0, qword [BASE]
1857 |5: // Handle numbers or integers.
1858 | cmp RAd, RDd; jae ->fff_resxmm0
1859 |.if DUALNUM
1860 | mov RB, [BASE+RA*8-8]
1861 | checknumx RB, >6, jb
1862 | ja ->fff_fallback
1863 | cvtsi2sd xmm1, RBd
1864 | jmp >7
1865 |.else
1866 | checknumtp [BASE+RA*8-8], ->fff_fallback
1867 |.endif
1868 |6:
1869 | movsd xmm1, qword [BASE+RA*8-8]
1870 |7:
1871 | sseop xmm0, xmm1
1872 | add RAd, 1
1873 | jmp <5
1874 |.endmacro
1875 |
1876 | math_minmax math_min, cmovg, minsd
1877 | math_minmax math_max, cmovl, maxsd
1878 |
1879 |//-- String library -----------------------------------------------------
1880 |
1881 |.ffunc string_byte // Only handle the 1-arg case here.
1882 | cmp NARGS:RDd, 1+1; jne ->fff_fallback
1883 | mov STR:RB, [BASE]
1884 | checkstr STR:RB, ->fff_fallback
1885 | mov PC, [BASE-8]
1886 | cmp dword STR:RB->len, 1
1887 | jb ->fff_res0 // Return no results for empty string.
1888 | movzx RBd, byte STR:RB[1]
1889 |.if DUALNUM
1890 | jmp ->fff_resi
1891 |.else
1892 | cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0
1893 |.endif
1894 |
1895 |.ffunc string_char // Only handle the 1-arg case here.
1896 | ffgccheck
1897 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
1898 |.if DUALNUM
1899 | mov RB, [BASE]
1900 | checkint RB, ->fff_fallback
1901 |.else
1902 | checknumtp [BASE], ->fff_fallback
1903 | cvttsd2si RBd, qword [BASE]
1904 |.endif
1905 | cmp RBd, 255; ja ->fff_fallback
1906 | mov TMP1d, RBd
1907 | mov TMPRd, 1
1908 | lea RD, TMP1 // Points to stack. Little-endian.
1909 |->fff_newstr:
1910 | mov L:RB, SAVE_L
1911 | mov L:RB->base, BASE
1912 | mov CARG3d, TMPRd // Zero-extended to size_t.
1913 | mov CARG2, RD
1914 | mov CARG1, L:RB
1915 | mov SAVE_PC, PC
1916 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
1917 |->fff_resstr:
1918 | // GCstr * returned in eax (RD).
1919 | mov BASE, L:RB->base
1920 | mov PC, [BASE-8]
1921 | settp STR:RD, LJ_TSTR
1922 | mov [BASE-16], STR:RD
1923 | jmp ->fff_res1
1924 |
1925 |.ffunc string_sub
1926 | ffgccheck
1927 | mov TMPRd, -1
1928 | cmp NARGS:RDd, 1+2; jb ->fff_fallback
1929 | jna >1
1930 |.if DUALNUM
1931 | mov TMPR, [BASE+16]
1932 | checkint TMPR, ->fff_fallback
1933 |.else
1934 | checknumtp [BASE+16], ->fff_fallback
1935 | cvttsd2si TMPRd, qword [BASE+16]
1936 |.endif
1937 |1:
1938 | mov STR:RB, [BASE]
1939 | checkstr STR:RB, ->fff_fallback
1940 |.if DUALNUM
1941 | mov ITYPE, [BASE+8]
1942 | mov RAd, ITYPEd // Must clear hiword for lea below.
1943 | sar ITYPE, 47
1944 | cmp ITYPEd, LJ_TISNUM
1945 | jne ->fff_fallback
1946 |.else
1947 | checknumtp [BASE+8], ->fff_fallback
1948 | cvttsd2si RAd, qword [BASE+8]
1949 |.endif
1950 | mov RCd, STR:RB->len
1951 | cmp RCd, TMPRd // len < end? (unsigned compare)
1952 | jb >5
1953 |2:
1954 | test RAd, RAd // start <= 0?
1955 | jle >7
1956 |3:
1957 | sub TMPRd, RAd // start > end?
1958 | jl ->fff_emptystr
1959 | lea RD, [STR:RB+RAd+#STR-1]
1960 | add TMPRd, 1
1961 |4:
1962 | jmp ->fff_newstr
1963 |
1964 |5: // Negative end or overflow.
1965 | jl >6
1966 | lea TMPRd, [TMPRd+RCd+1] // end = end+(len+1)
1967 | jmp <2
1968 |6: // Overflow.
1969 | mov TMPRd, RCd // end = len
1970 | jmp <2
1971 |
1972 |7: // Negative start or underflow.
1973 | je >8
1974 | add RAd, RCd // start = start+(len+1)
1975 | add RAd, 1
1976 | jg <3 // start > 0?
1977 |8: // Underflow.
1978 | mov RAd, 1 // start = 1
1979 | jmp <3
1980 |
1981 |->fff_emptystr: // Range underflow.
1982 | xor TMPRd, TMPRd // Zero length. Any ptr in RD is ok.
1983 | jmp <4
1984 |
1985 |.macro ffstring_op, name
1986 | .ffunc_1 string_ .. name
1987 | ffgccheck
1988 |.if X64WIN
1989 | mov STR:TMPR, [BASE]
1990 | checkstr STR:TMPR, ->fff_fallback
1991 |.else
1992 | mov STR:CARG2, [BASE]
1993 | checkstr STR:CARG2, ->fff_fallback
1994 |.endif
1995 | mov L:RB, SAVE_L
1996 | lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
1997 | mov L:RB->base, BASE
1998 |.if X64WIN
1999 | mov STR:CARG2, STR:TMPR // Caveat: CARG2 == BASE
2000 |.endif
2001 | mov RC, SBUF:CARG1->b
2002 | mov SBUF:CARG1->L, L:RB
2003 | mov SBUF:CARG1->w, RC
2004 | mov SAVE_PC, PC
2005 | call extern lj_buf_putstr_ .. name
2006 | mov CARG1, rax
2007 | call extern lj_buf_tostr
2008 | jmp ->fff_resstr
2009 |.endmacro
2010 |
2011 |ffstring_op reverse
2012 |ffstring_op lower
2013 |ffstring_op upper
2014 |
2015 |//-- Bit library --------------------------------------------------------
2016 |
2017 |.macro .ffunc_bit, name, kind, fdef
2018 | fdef name
2019 |.if kind == 2
2020 | sseconst_tobit xmm1, RB
2021 |.endif
2022 |.if DUALNUM
2023 | mov RB, [BASE]
2024 | checkint RB, >1
2025 |.if kind > 0
2026 | jmp >2
2027 |.else
2028 | jmp ->fff_resbit
2029 |.endif
2030 |1:
2031 | ja ->fff_fallback
2032 | movd xmm0, RB
2033 |.else
2034 | checknumtp [BASE], ->fff_fallback
2035 | movsd xmm0, qword [BASE]
2036 |.endif
2037 |.if kind < 2
2038 | sseconst_tobit xmm1, RB
2039 |.endif
2040 | addsd xmm0, xmm1
2041 | movd RBd, xmm0
2042 |2:
2043 |.endmacro
2044 |
2045 |.macro .ffunc_bit, name, kind
2046 | .ffunc_bit name, kind, .ffunc_1
2047 |.endmacro
2048 |
2049 |.ffunc_bit bit_tobit, 0
2050 | jmp ->fff_resbit
2051 |
2052 |.macro .ffunc_bit_op, name, ins
2053 | .ffunc_bit name, 2
2054 | mov TMPRd, NARGS:RDd // Save for fallback.
2055 | lea RD, [BASE+NARGS:RD*8-16]
2056 |1:
2057 | cmp RD, BASE
2058 | jbe ->fff_resbit
2059 |.if DUALNUM
2060 | mov RA, [RD]
2061 | checkint RA, >2
2062 | ins RBd, RAd
2063 | sub RD, 8
2064 | jmp <1
2065 |2:
2066 | ja ->fff_fallback_bit_op
2067 | movd xmm0, RA
2068 |.else
2069 | checknumtp [RD], ->fff_fallback_bit_op
2070 | movsd xmm0, qword [RD]
2071 |.endif
2072 | addsd xmm0, xmm1
2073 | movd RAd, xmm0
2074 | ins RBd, RAd
2075 | sub RD, 8
2076 | jmp <1
2077 |.endmacro
2078 |
2079 |.ffunc_bit_op bit_band, and
2080 |.ffunc_bit_op bit_bor, or
2081 |.ffunc_bit_op bit_bxor, xor
2082 |
2083 |.ffunc_bit bit_bswap, 1
2084 | bswap RBd
2085 | jmp ->fff_resbit
2086 |
2087 |.ffunc_bit bit_bnot, 1
2088 | not RBd
2089 |.if DUALNUM
2090 | jmp ->fff_resbit
2091 |.else
2092 |->fff_resbit:
2093 | cvtsi2sd xmm0, RBd
2094 | jmp ->fff_resxmm0
2095 |.endif
2096 |
2097 |->fff_fallback_bit_op:
2098 | mov NARGS:RDd, TMPRd // Restore for fallback
2099 | jmp ->fff_fallback
2100 |
2101 |.macro .ffunc_bit_sh, name, ins
2102 |.if DUALNUM
2103 | .ffunc_bit name, 1, .ffunc_2
2104 | // Note: no inline conversion from number for 2nd argument!
2105 | mov RA, [BASE+8]
2106 | checkint RA, ->fff_fallback
2107 |.else
2108 | .ffunc_nn name
2109 | sseconst_tobit xmm2, RB
2110 | addsd xmm0, xmm2
2111 | addsd xmm1, xmm2
2112 | movd RBd, xmm0
2113 | movd RAd, xmm1
2114 |.endif
2115 | ins RBd, cl // Assumes RA is ecx.
2116 | jmp ->fff_resbit
2117 |.endmacro
2118 |
2119 |.ffunc_bit_sh bit_lshift, shl
2120 |.ffunc_bit_sh bit_rshift, shr
2121 |.ffunc_bit_sh bit_arshift, sar
2122 |.ffunc_bit_sh bit_rol, rol
2123 |.ffunc_bit_sh bit_ror, ror
2124 |
2125 |//-----------------------------------------------------------------------
2126 |
2127 |->fff_fallback_2:
2128 | mov NARGS:RDd, 1+2 // Other args are ignored, anyway.
2129 | jmp ->fff_fallback
2130 |->fff_fallback_1:
2131 | mov NARGS:RDd, 1+1 // Other args are ignored, anyway.
2132 |->fff_fallback: // Call fast function fallback handler.
2133 | // BASE = new base, RD = nargs+1
2134 | mov L:RB, SAVE_L
2135 | mov PC, [BASE-8] // Fallback may overwrite PC.
2136 | mov SAVE_PC, PC // Redundant (but a defined value).
2137 | mov L:RB->base, BASE
2138 | lea RD, [BASE+NARGS:RD*8-8]
2139 | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler.
2140 | mov L:RB->top, RD
2141 | mov CFUNC:RD, [BASE-16]
2142 | cleartp CFUNC:RD
2143 | cmp RA, L:RB->maxstack
2144 | ja >5 // Need to grow stack.
2145 | mov CARG1, L:RB
2146 | call aword CFUNC:RD->f // (lua_State *L)
2147 | mov BASE, L:RB->base
2148 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
2149 | test RDd, RDd; jg ->fff_res // Returned nresults+1?
2150 |1:
2151 | mov RA, L:RB->top
2152 | sub RA, BASE
2153 | shr RAd, 3
2154 | test RDd, RDd
2155 | lea NARGS:RDd, [RAd+1]
2156 | mov LFUNC:RB, [BASE-16]
2157 | jne ->vm_call_tail // Returned -1?
2158 | cleartp LFUNC:RB
2159 | ins_callt // Returned 0: retry fast path.
2160 |
2161 |// Reconstruct previous base for vmeta_call during tailcall.
2162 |->vm_call_tail:
2163 | mov RA, BASE
2164 | test PCd, FRAME_TYPE
2165 | jnz >3
2166 | movzx RBd, PC_RA
2167 | neg RB
2168 | lea BASE, [BASE+RB*8-16] // base = base - (RB+2)*8
2169 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2170 |3:
2171 | mov RB, PC
2172 | and RB, -8
2173 | sub BASE, RB
2174 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2175 |
2176 |5: // Grow stack for fallback handler.
2177 | mov CARG2d, LUA_MINSTACK
2178 | mov CARG1, L:RB
2179 | call extern lj_state_growstack // (lua_State *L, int n)
2180 | mov BASE, L:RB->base
2181 | xor RDd, RDd // Simulate a return 0.
2182 | jmp <1 // Dumb retry (goes through ff first).
2183 |
2184 |->fff_gcstep: // Call GC step function.
2185 | // BASE = new base, RD = nargs+1
2186 | pop RB // Must keep stack at same level.
2187 | mov TMP1, RB // Save return address
2188 | mov L:RB, SAVE_L
2189 | mov SAVE_PC, PC // Redundant (but a defined value).
2190 | mov L:RB->base, BASE
2191 | lea RD, [BASE+NARGS:RD*8-8]
2192 | mov CARG1, L:RB
2193 | mov L:RB->top, RD
2194 | call extern lj_gc_step // (lua_State *L)
2195 | mov BASE, L:RB->base
2196 | mov RD, L:RB->top
2197 | sub RD, BASE
2198 | shr RDd, 3
2199 | add NARGS:RDd, 1
2200 | mov RB, TMP1
2201 | push RB // Restore return address.
2202 | ret
2203 |
2204 |//-----------------------------------------------------------------------
2205 |//-- Special dispatch targets -------------------------------------------
2206 |//-----------------------------------------------------------------------
2207 |
2208 |->vm_record: // Dispatch target for recording phase.
2209 |.if JIT
2210 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2211 | test RDL, HOOK_VMEVENT // No recording while in vmevent.
2212 | jnz >5
2213 | // Decrement the hookcount for consistency, but always do the call.
2214 | test RDL, HOOK_ACTIVE
2215 | jnz >1
2216 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2217 | jz >1
2218 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2219 | jmp >1
2220 |.endif
2221 |
2222 |->vm_rethook: // Dispatch target for return hooks.
2223 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2224 | test RDL, HOOK_ACTIVE // Hook already active?
2225 | jnz >5
2226 | jmp >1
2227 |
2228 |->vm_inshook: // Dispatch target for instr/line hooks.
2229 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2230 | test RDL, HOOK_ACTIVE // Hook already active?
2231 | jnz >5
2232 |
2233 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2234 | jz >5
2235 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2236 | jz >1
2237 | test RDL, LUA_MASKLINE
2238 | jz >5
2239 |1:
2240 | mov L:RB, SAVE_L
2241 | mov L:RB->base, BASE
2242 | mov CARG2, PC // Caveat: CARG2 == BASE
2243 | mov CARG1, L:RB
2244 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2245 | call extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
2246 |3:
2247 | mov BASE, L:RB->base
2248 |4:
2249 | movzx RAd, PC_RA
2250 |5:
2251 | movzx OP, PC_OP
2252 | movzx RDd, PC_RD
2253 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
2254 |
2255 |->cont_hook: // Continue from hook yield.
2256 | add PC, 4
2257 | mov RA, [RB-40]
2258 | mov MULTRES, RAd // Restore MULTRES for *M ins.
2259 | jmp <4
2260 |
2261 |->vm_hotloop: // Hot loop counter underflow.
2262 |.if JIT
2263 | mov LFUNC:RB, [BASE-16] // Same as curr_topL(L).
2264 | cleartp LFUNC:RB
2265 | mov RB, LFUNC:RB->pc
2266 | movzx RDd, byte [RB+PC2PROTO(framesize)]
2267 | lea RD, [BASE+RD*8]
2268 | mov L:RB, SAVE_L
2269 | mov L:RB->base, BASE
2270 | mov L:RB->top, RD
2271 | mov CARG2, PC
2272 | lea CARG1, [DISPATCH+GG_DISP2J]
2273 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2274 | mov SAVE_PC, PC
2275 | call extern lj_trace_hot // (jit_State *J, const BCIns *pc)
2276 | jmp <3
2277 |.endif
2278 |
2279 |->vm_callhook: // Dispatch target for call hooks.
2280 | mov SAVE_PC, PC
2281 |.if JIT
2282 | jmp >1
2283 |.endif
2284 |
2285 |->vm_hotcall: // Hot call counter underflow.
2286 |.if JIT
2287 | mov SAVE_PC, PC
2288 | or PC, 1 // Marker for hot call.
2289 |1:
2290 |.endif
2291 | lea RD, [BASE+NARGS:RD*8-8]
2292 | mov L:RB, SAVE_L
2293 | mov L:RB->base, BASE
2294 | mov L:RB->top, RD
2295 | mov CARG2, PC
2296 | mov CARG1, L:RB
2297 | call extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
2298 | // ASMFunction returned in eax/rax (RD).
2299 | mov SAVE_PC, 0 // Invalidate for subsequent line hook.
2300 |.if JIT
2301 | and PC, -2
2302 |.endif
2303 | mov BASE, L:RB->base
2304 | mov RA, RD
2305 | mov RD, L:RB->top
2306 | sub RD, BASE
2307 | mov RB, RA
2308 | movzx RAd, PC_RA
2309 | shr RDd, 3
2310 | add NARGS:RDd, 1
2311 | jmp RB
2312 |
2313 |->cont_stitch: // Trace stitching.
2314 |.if JIT
2315 | // BASE = base, RC = result, RB = mbase
2316 | mov TRACE:ITYPE, [RB-40] // Save previous trace.
2317 | cleartp TRACE:ITYPE
2318 | mov TMPRd, MULTRES
2319 | movzx RAd, PC_RA
2320 | lea RA, [BASE+RA*8] // Call base.
2321 | sub TMPRd, 1
2322 | jz >2
2323 |1: // Move results down.
2324 | mov RB, [RC]
2325 | mov [RA], RB
2326 | add RC, 8
2327 | add RA, 8
2328 | sub TMPRd, 1
2329 | jnz <1
2330 |2:
2331 | movzx RCd, PC_RA
2332 | movzx RBd, PC_RB
2333 | add RC, RB
2334 | lea RC, [BASE+RC*8-8]
2335 |3:
2336 | cmp RC, RA
2337 | ja >9 // More results wanted?
2338 |
2339 | test TRACE:ITYPE, TRACE:ITYPE
2340 | jz ->cont_nop
2341 | movzx RBd, word TRACE:ITYPE->traceno
2342 | movzx RDd, word TRACE:ITYPE->link
2343 | cmp RDd, RBd
2344 | je ->cont_nop // Blacklisted.
2345 | test RDd, RDd
2346 | jne =>BC_JLOOP // Jump to stitched trace.
2347 |
2348 | // Stitch a new trace to the previous trace.
2349 | mov [DISPATCH+DISPATCH_J(exitno)], RB
2350 | mov L:RB, SAVE_L
2351 | mov L:RB->base, BASE
2352 | mov CARG2, PC
2353 | lea CARG1, [DISPATCH+GG_DISP2J]
2354 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2355 | call extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2356 | mov BASE, L:RB->base
2357 | jmp ->cont_nop
2358 |
2359 |9: // Fill up results with nil.
2360 | mov aword [RA], LJ_TNIL
2361 | add RA, 8
2362 | jmp <3
2363 |.endif
2364 |
2365 |->vm_profhook: // Dispatch target for profiler hook.
2366#if LJ_HASPROFILE
2367 | mov L:RB, SAVE_L
2368 | mov L:RB->base, BASE
2369 | mov CARG2, PC // Caveat: CARG2 == BASE
2370 | mov CARG1, L:RB
2371 | call extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2372 | mov BASE, L:RB->base
2373 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2374 | sub PC, 4
2375 | jmp ->cont_nop
2376#endif
2377 |
2378 |//-----------------------------------------------------------------------
2379 |//-- Trace exit handler -------------------------------------------------
2380 |//-----------------------------------------------------------------------
2381 |
2382 |// Called from an exit stub with the exit number on the stack.
2383 |// The 16 bit exit number is stored with two (sign-extended) push imm8.
2384 |->vm_exit_handler:
2385 |.if JIT
2386 | push r13; push r12
2387 | push r11; push r10; push r9; push r8
2388 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
2389 | push rbx; push rdx; push rcx; push rax
2390 | movzx RCd, byte [rbp-8] // Reconstruct exit number.
2391 | mov RCH, byte [rbp-16]
2392 | mov [rbp-8], r15; mov [rbp-16], r14
2393 | // DISPATCH is preserved on-trace in LJ_GC64 mode.
2394 | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
2395 | set_vmstate EXIT
2396 | mov [DISPATCH+DISPATCH_J(exitno)], RCd
2397 | mov [DISPATCH+DISPATCH_J(parent)], RAd
2398 |.if X64WIN
2399 | sub rsp, 16*8+4*8 // Room for SSE regs + save area.
2400 |.else
2401 | sub rsp, 16*8 // Room for SSE regs.
2402 |.endif
2403 | add rbp, -128
2404 | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14
2405 | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12
2406 | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10
2407 | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8
2408 | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6
2409 | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4
2410 | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2
2411 | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0
2412 | // Caveat: RB is rbp.
2413 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2414 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2415 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2416 | mov L:RB->base, BASE
2417 |.if X64WIN
2418 | lea CARG2, [rsp+4*8]
2419 |.else
2420 | mov CARG2, rsp
2421 |.endif
2422 | lea CARG1, [DISPATCH+GG_DISP2J]
2423 | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
2424 | call extern lj_trace_exit // (jit_State *J, ExitState *ex)
2425 | // MULTRES or negated error code returned in eax (RD).
2426 | mov RA, L:RB->cframe
2427 | and RA, CFRAME_RAWMASK
2428 | mov [RA+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield).
2429 | mov BASE, L:RB->base
2430 | mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC.
2431 | jmp >1
2432 |.endif
2433 |->vm_exit_interp:
2434 | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
2435 |.if JIT
2436 | // Restore additional callee-save registers only used in compiled code.
2437 |.if X64WIN
2438 | lea RA, [rsp+10*16+4*8]
2439 |1:
2440 | movdqa xmm15, [RA-10*16]
2441 | movdqa xmm14, [RA-9*16]
2442 | movdqa xmm13, [RA-8*16]
2443 | movdqa xmm12, [RA-7*16]
2444 | movdqa xmm11, [RA-6*16]
2445 | movdqa xmm10, [RA-5*16]
2446 | movdqa xmm9, [RA-4*16]
2447 | movdqa xmm8, [RA-3*16]
2448 | movdqa xmm7, [RA-2*16]
2449 | mov rsp, RA // Reposition stack to C frame.
2450 | movdqa xmm6, [RA-1*16]
2451 | mov r15, CSAVE_1
2452 | mov r14, CSAVE_2
2453 | mov r13, CSAVE_3
2454 | mov r12, CSAVE_4
2455 |.else
2456 | lea RA, [rsp+16]
2457 |1:
2458 | mov r13, [RA-8]
2459 | mov r12, [RA]
2460 | mov rsp, RA // Reposition stack to C frame.
2461 |.endif
2462 | cmp RDd, -LUA_ERRERR; jae >9 // Check for error from exit.
2463 | mov L:RB, SAVE_L
2464 | mov MULTRES, RDd
2465 | mov LFUNC:KBASE, [BASE-16]
2466 | cleartp LFUNC:KBASE
2467 | mov KBASE, LFUNC:KBASE->pc
2468 | mov KBASE, [KBASE+PC2PROTO(k)]
2469 | mov L:RB->base, BASE
2470 | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
2471 | set_vmstate INTERP
2472 | // Modified copy of ins_next which handles function header dispatch, too.
2473 | mov RCd, [PC]
2474 | movzx RAd, RCH
2475 | movzx OP, RCL
2476 | add PC, 4
2477 | shr RCd, 16
2478 | cmp MULTRES, -17 // Static dispatch?
2479 | je >5
2480 | cmp OP, BC_FUNCF // Function header?
2481 | jb >3
2482 | cmp OP, BC_FUNCC+2 // Fast function?
2483 | jae >4
2484 |2:
2485 | mov RCd, MULTRES // RC/RD holds nres+1.
2486 |3:
2487 | jmp aword [DISPATCH+OP*8]
2488 |
2489 |4: // Check frame below fast function.
2490 | mov RC, [BASE-8]
2491 | test RCd, FRAME_TYPE
2492 | jnz <2 // Trace stitching continuation?
2493 | // Otherwise set KBASE for Lua function below fast function.
2494 | movzx RCd, byte [RC-3]
2495 | neg RC
2496 | mov LFUNC:KBASE, [BASE+RC*8-32]
2497 | cleartp LFUNC:KBASE
2498 | mov KBASE, LFUNC:KBASE->pc
2499 | mov KBASE, [KBASE+PC2PROTO(k)]
2500 | jmp <2
2501 |
2502 |5: // Dispatch to static entry of original ins replaced by BC_JLOOP.
2503 | mov RA, [DISPATCH+DISPATCH_J(trace)]
2504 | mov TRACE:RA, [RA+RD*8]
2505 | mov RCd, TRACE:RA->startins
2506 | movzx RAd, RCH
2507 | movzx OP, RCL
2508 | shr RCd, 16
2509 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC]
2510 |
2511 |9: // Rethrow error from the right C frame.
2512 | mov CARG2d, RDd
2513 | mov CARG1, L:RB
2514 | neg CARG2d
2515 | call extern lj_err_trace // (lua_State *L, int errcode)
2516 |.endif
2517 |
2518 |//-----------------------------------------------------------------------
2519 |//-- Math helper functions ----------------------------------------------
2520 |//-----------------------------------------------------------------------
2521 |
2522 |// FP value rounding. Called by math.floor/math.ceil fast functions
2523 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
2524 |.macro vm_round, name, mode, cond
2525 |->name:
2526 |->name .. _sse:
2527 | sseconst_abs xmm2, RD
2528 | sseconst_2p52 xmm3, RD
2529 | movaps xmm1, xmm0
2530 | andpd xmm1, xmm2 // |x|
2531 | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|.
2532 | jbe >1
2533 | andnpd xmm2, xmm0 // Isolate sign bit.
2534 |.if mode == 2 // trunc(x)?
2535 | movaps xmm0, xmm1
2536 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2537 | subsd xmm1, xmm3
2538 | sseconst_1 xmm3, RD
2539 | cmpsd xmm0, xmm1, 1 // |x| < result?
2540 | andpd xmm0, xmm3
2541 | subsd xmm1, xmm0 // If yes, subtract -1.
2542 | orpd xmm1, xmm2 // Merge sign bit back in.
2543 |.else
2544 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2545 | subsd xmm1, xmm3
2546 | orpd xmm1, xmm2 // Merge sign bit back in.
2547 | sseconst_1 xmm3, RD
2548 | .if mode == 1 // ceil(x)?
2549 | cmpsd xmm0, xmm1, 6 // x > result?
2550 | andpd xmm0, xmm3
2551 | addsd xmm1, xmm0 // If yes, add 1.
2552 | orpd xmm1, xmm2 // Merge sign bit back in (again).
2553 | .else // floor(x)?
2554 | cmpsd xmm0, xmm1, 1 // x < result?
2555 | andpd xmm0, xmm3
2556 | subsd xmm1, xmm0 // If yes, subtract 1.
2557 | .endif
2558 |.endif
2559 | movaps xmm0, xmm1
2560 |1:
2561 | ret
2562 |.endmacro
2563 |
2564 | vm_round vm_floor, 0, 1
2565 | vm_round vm_ceil, 1, JIT
2566 | vm_round vm_trunc, 2, JIT
2567 |
2568 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
2569 |->vm_mod:
2570 |// Args in xmm0/xmm1, return value in xmm0.
2571 |// Caveat: xmm0-xmm5 and RC (eax) modified!
2572 | movaps xmm5, xmm0
2573 | divsd xmm0, xmm1
2574 | sseconst_abs xmm2, RD
2575 | sseconst_2p52 xmm3, RD
2576 | movaps xmm4, xmm0
2577 | andpd xmm4, xmm2 // |x/y|
2578 | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
2579 | jbe >1
2580 | andnpd xmm2, xmm0 // Isolate sign bit.
2581 | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
2582 | subsd xmm4, xmm3
2583 | orpd xmm4, xmm2 // Merge sign bit back in.
2584 | sseconst_1 xmm2, RD
2585 | cmpsd xmm0, xmm4, 1 // x/y < result?
2586 | andpd xmm0, xmm2
2587 | subsd xmm4, xmm0 // If yes, subtract 1.0.
2588 | movaps xmm0, xmm5
2589 | mulsd xmm1, xmm4
2590 | subsd xmm0, xmm1
2591 | ret
2592 |1:
2593 | mulsd xmm1, xmm0
2594 | movaps xmm0, xmm5
2595 | subsd xmm0, xmm1
2596 | ret
2597 |
2598 |//-----------------------------------------------------------------------
2599 |//-- Miscellaneous functions --------------------------------------------
2600 |//-----------------------------------------------------------------------
2601 |
2602 |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
2603 |->vm_cpuid:
2604 | mov eax, CARG1d
2605 | .if X64WIN; push rsi; mov rsi, CARG2; .endif
2606 | push rbx
2607 | xor ecx, ecx
2608 | cpuid
2609 | mov [rsi], eax
2610 | mov [rsi+4], ebx
2611 | mov [rsi+8], ecx
2612 | mov [rsi+12], edx
2613 | pop rbx
2614 | .if X64WIN; pop rsi; .endif
2615 | ret
2616 |
2617 |.define NEXT_TAB, TAB:CARG1
2618 |.define NEXT_IDX, CARG2d
2619 |.define NEXT_IDXa, CARG2
2620 |.define NEXT_PTR, RC
2621 |.define NEXT_PTRd, RCd
2622 |.define NEXT_TMP, CARG3
2623 |.define NEXT_ASIZE, CARG4d
2624 |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro
2625 |.if X64WIN
2626 |.define NEXT_RES_PTR, [rsp+aword*5]
2627 |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro
2628 |.else
2629 |.define NEXT_RES_PTR, [rsp+aword*1]
2630 |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro
2631 |.endif
2632 |
2633 |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
2634 |// Next idx returned in edx.
2635 |->vm_next:
2636 |.if JIT
2637 | mov NEXT_ASIZE, NEXT_TAB->asize
2638 |1: // Traverse array part.
2639 | cmp NEXT_IDX, NEXT_ASIZE; jae >5
2640 | mov NEXT_TMP, NEXT_TAB->array
2641 | mov NEXT_TMP, qword [NEXT_TMP+NEXT_IDX*8]
2642 | cmp NEXT_TMP, LJ_TNIL; je >2
2643 | lea NEXT_PTR, NEXT_RES_PTR
2644 | mov qword [NEXT_PTR], NEXT_TMP
2645 |.if DUALNUM
2646 | setint NEXT_TMP, NEXT_IDXa
2647 | mov qword [NEXT_PTR+qword*1], NEXT_TMP
2648 |.else
2649 | cvtsi2sd xmm0, NEXT_IDX
2650 | movsd qword [NEXT_PTR+qword*1], xmm0
2651 |.endif
2652 | NEXT_RES_IDX 1
2653 | ret
2654 |2: // Skip holes in array part.
2655 | add NEXT_IDX, 1
2656 | jmp <1
2657 |
2658 |5: // Traverse hash part.
2659 | sub NEXT_IDX, NEXT_ASIZE
2660 |6:
2661 | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9
2662 | imul NEXT_PTRd, NEXT_IDX, #NODE
2663 | add NODE:NEXT_PTR, NEXT_TAB->node
2664 | cmp qword NODE:NEXT_PTR->val, LJ_TNIL; je >7
2665 | NEXT_RES_IDXL NEXT_ASIZE+1
2666 | ret
2667 |7: // Skip holes in hash part.
2668 | add NEXT_IDX, 1
2669 | jmp <6
2670 |
2671 |9: // End of iteration. Set the key to nil (not the value).
2672 | NEXT_RES_IDX NEXT_ASIZE
2673 | lea NEXT_PTR, NEXT_RES_PTR
2674 | mov qword [NEXT_PTR+qword*1], LJ_TNIL
2675 | ret
2676 |.endif
2677 |
2678 |//-----------------------------------------------------------------------
2679 |//-- Assertions ---------------------------------------------------------
2680 |//-----------------------------------------------------------------------
2681 |
2682 |->assert_bad_for_arg_type:
2683#ifdef LUA_USE_ASSERT
2684 | int3
2685#endif
2686 | int3
2687 |
2688 |//-----------------------------------------------------------------------
2689 |//-- FFI helper functions -----------------------------------------------
2690 |//-----------------------------------------------------------------------
2691 |
2692 |// Handler for callback functions. Callback slot number in ah/al.
2693 |->vm_ffi_callback:
2694 |.if FFI
2695 |.type CTSTATE, CTState, PC
2696 | saveregs_ // ebp/rbp already saved. ebp now holds global_State *.
2697 | lea DISPATCH, [ebp+GG_G2DISP]
2698 | mov CTSTATE, GL:ebp->ctype_state
2699 | movzx eax, ax
2700 | mov CTSTATE->cb.slot, eax
2701 | mov CTSTATE->cb.gpr[0], CARG1
2702 | mov CTSTATE->cb.gpr[1], CARG2
2703 | mov CTSTATE->cb.gpr[2], CARG3
2704 | mov CTSTATE->cb.gpr[3], CARG4
2705 | movsd qword CTSTATE->cb.fpr[0], xmm0
2706 | movsd qword CTSTATE->cb.fpr[1], xmm1
2707 | movsd qword CTSTATE->cb.fpr[2], xmm2
2708 | movsd qword CTSTATE->cb.fpr[3], xmm3
2709 |.if X64WIN
2710 | lea rax, [rsp+CFRAME_SIZE+4*8]
2711 |.else
2712 | lea rax, [rsp+CFRAME_SIZE]
2713 | mov CTSTATE->cb.gpr[4], CARG5
2714 | mov CTSTATE->cb.gpr[5], CARG6
2715 | movsd qword CTSTATE->cb.fpr[4], xmm4
2716 | movsd qword CTSTATE->cb.fpr[5], xmm5
2717 | movsd qword CTSTATE->cb.fpr[6], xmm6
2718 | movsd qword CTSTATE->cb.fpr[7], xmm7
2719 |.endif
2720 | mov CTSTATE->cb.stack, rax
2721 | mov CARG2, rsp
2722 | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok.
2723 | mov CARG1, CTSTATE
2724 | call extern lj_ccallback_enter // (CTState *cts, void *cf)
2725 | // lua_State * returned in eax (RD).
2726 | set_vmstate INTERP
2727 | mov BASE, L:RD->base
2728 | mov RD, L:RD->top
2729 | sub RD, BASE
2730 | mov LFUNC:RB, [BASE-16]
2731 | cleartp LFUNC:RB
2732 | shr RD, 3
2733 | add RD, 1
2734 | ins_callt
2735 |.endif
2736 |
2737 |->cont_ffi_callback: // Return from FFI callback.
2738 |.if FFI
2739 | mov L:RA, SAVE_L
2740 | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
2741 | mov aword CTSTATE->L, L:RA
2742 | mov L:RA->base, BASE
2743 | mov L:RA->top, RB
2744 | mov CARG1, CTSTATE
2745 | mov CARG2, RC
2746 | call extern lj_ccallback_leave // (CTState *cts, TValue *o)
2747 | mov rax, CTSTATE->cb.gpr[0]
2748 | movsd xmm0, qword CTSTATE->cb.fpr[0]
2749 | jmp ->vm_leave_unw
2750 |.endif
2751 |
2752 |->vm_ffi_call: // Call C function via FFI.
2753 | // Caveat: needs special frame unwinding, see below.
2754 |.if FFI
2755 | .type CCSTATE, CCallState, rbx
2756 | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
2757 |
2758 | // Readjust stack.
2759 | mov eax, CCSTATE->spadj
2760 | sub rsp, rax
2761 |
2762 | // Copy stack slots.
2763 | movzx ecx, byte CCSTATE->nsp
2764 | sub ecx, 8
2765 | js >2
2766 |1:
2767 | mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)]
2768 | mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax
2769 | sub ecx, 8
2770 | jns <1
2771 |2:
2772 |
2773 | movzx eax, byte CCSTATE->nfpr
2774 | mov CARG1, CCSTATE->gpr[0]
2775 | mov CARG2, CCSTATE->gpr[1]
2776 | mov CARG3, CCSTATE->gpr[2]
2777 | mov CARG4, CCSTATE->gpr[3]
2778 |.if not X64WIN
2779 | mov CARG5, CCSTATE->gpr[4]
2780 | mov CARG6, CCSTATE->gpr[5]
2781 |.endif
2782 | test eax, eax; jz >5
2783 | movaps xmm0, CCSTATE->fpr[0]
2784 | movaps xmm1, CCSTATE->fpr[1]
2785 | movaps xmm2, CCSTATE->fpr[2]
2786 | movaps xmm3, CCSTATE->fpr[3]
2787 |.if not X64WIN
2788 | cmp eax, 4; jbe >5
2789 | movaps xmm4, CCSTATE->fpr[4]
2790 | movaps xmm5, CCSTATE->fpr[5]
2791 | movaps xmm6, CCSTATE->fpr[6]
2792 | movaps xmm7, CCSTATE->fpr[7]
2793 |.endif
2794 |5:
2795 |
2796 | call aword CCSTATE->func
2797 |
2798 | mov CCSTATE->gpr[0], rax
2799 | movaps CCSTATE->fpr[0], xmm0
2800 |.if not X64WIN
2801 | mov CCSTATE->gpr[1], rdx
2802 | movaps CCSTATE->fpr[1], xmm1
2803 |.endif
2804 |
2805 | mov rbx, [rbp-8]; leave; ret
2806 |.endif
2807 |// Note: vm_ffi_call must be the last function in this object file!
2808 |
2809 |//-----------------------------------------------------------------------
2810}
2811
2812/* Generate the code for a single instruction. */
2813static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2814{
2815 int vk = 0;
2816 |// Note: aligning all instructions does not pay off.
2817 |=>defop:
2818
2819 switch (op) {
2820
2821 /* -- Comparison ops ---------------------------------------------------- */
2822
2823 /* Remember: all ops branch for a true comparison, fall through otherwise. */
2824
2825 |.macro jmp_comp, lt, ge, le, gt, target
2826 ||switch (op) {
2827 ||case BC_ISLT:
2828 | lt target
2829 ||break;
2830 ||case BC_ISGE:
2831 | ge target
2832 ||break;
2833 ||case BC_ISLE:
2834 | le target
2835 ||break;
2836 ||case BC_ISGT:
2837 | gt target
2838 ||break;
2839 ||default: break; /* Shut up GCC. */
2840 ||}
2841 |.endmacro
2842
2843 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2844 | // RA = src1, RD = src2, JMP with RD = target
2845 | ins_AD
2846 | mov ITYPE, [BASE+RA*8]
2847 | mov RB, [BASE+RD*8]
2848 | mov RA, ITYPE
2849 | mov RD, RB
2850 | sar ITYPE, 47
2851 | sar RB, 47
2852 |.if DUALNUM
2853 | cmp ITYPEd, LJ_TISNUM; jne >7
2854 | cmp RBd, LJ_TISNUM; jne >8
2855 | add PC, 4
2856 | cmp RAd, RDd
2857 | jmp_comp jge, jl, jg, jle, >9
2858 |6:
2859 | movzx RDd, PC_RD
2860 | branchPC RD
2861 |9:
2862 | ins_next
2863 |
2864 |7: // RA is not an integer.
2865 | ja ->vmeta_comp
2866 | // RA is a number.
2867 | cmp RBd, LJ_TISNUM; jb >1; jne ->vmeta_comp
2868 | // RA is a number, RD is an integer.
2869 | cvtsi2sd xmm0, RDd
2870 | jmp >2
2871 |
2872 |8: // RA is an integer, RD is not an integer.
2873 | ja ->vmeta_comp
2874 | // RA is an integer, RD is a number.
2875 | cvtsi2sd xmm1, RAd
2876 | movd xmm0, RD
2877 | jmp >3
2878 |.else
2879 | cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp
2880 | cmp RBd, LJ_TISNUM; jae ->vmeta_comp
2881 |.endif
2882 |1:
2883 | movd xmm0, RD
2884 |2:
2885 | movd xmm1, RA
2886 |3:
2887 | add PC, 4
2888 | ucomisd xmm0, xmm1
2889 | // Unordered: all of ZF CF PF set, ordered: PF clear.
2890 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
2891 |.if DUALNUM
2892 | jmp_comp jbe, ja, jb, jae, <9
2893 | jmp <6
2894 |.else
2895 | jmp_comp jbe, ja, jb, jae, >1
2896 | movzx RDd, PC_RD
2897 | branchPC RD
2898 |1:
2899 | ins_next
2900 |.endif
2901 break;
2902
2903 case BC_ISEQV: case BC_ISNEV:
2904 vk = op == BC_ISEQV;
2905 | ins_AD // RA = src1, RD = src2, JMP with RD = target
2906 | mov RB, [BASE+RD*8]
2907 | mov ITYPE, [BASE+RA*8]
2908 | add PC, 4
2909 | mov RD, RB
2910 | mov RA, ITYPE
2911 | sar RB, 47
2912 | sar ITYPE, 47
2913 |.if DUALNUM
2914 | cmp RBd, LJ_TISNUM; jne >7
2915 | cmp ITYPEd, LJ_TISNUM; jne >8
2916 | cmp RDd, RAd
2917 if (vk) {
2918 | jne >9
2919 } else {
2920 | je >9
2921 }
2922 | movzx RDd, PC_RD
2923 | branchPC RD
2924 |9:
2925 | ins_next
2926 |
2927 |7: // RD is not an integer.
2928 | ja >5
2929 | // RD is a number.
2930 | movd xmm1, RD
2931 | cmp ITYPEd, LJ_TISNUM; jb >1; jne >5
2932 | // RD is a number, RA is an integer.
2933 | cvtsi2sd xmm0, RAd
2934 | jmp >2
2935 |
2936 |8: // RD is an integer, RA is not an integer.
2937 | ja >5
2938 | // RD is an integer, RA is a number.
2939 | cvtsi2sd xmm1, RDd
2940 | jmp >1
2941 |
2942 |.else
2943 | cmp RBd, LJ_TISNUM; jae >5
2944 | cmp ITYPEd, LJ_TISNUM; jae >5
2945 | movd xmm1, RD
2946 |.endif
2947 |1:
2948 | movd xmm0, RA
2949 |2:
2950 | ucomisd xmm0, xmm1
2951 |4:
2952 iseqne_fp:
2953 if (vk) {
2954 | jp >2 // Unordered means not equal.
2955 | jne >2
2956 } else {
2957 | jp >2 // Unordered means not equal.
2958 | je >1
2959 }
2960 iseqne_end:
2961 if (vk) {
2962 |1: // EQ: Branch to the target.
2963 | movzx RDd, PC_RD
2964 | branchPC RD
2965 |2: // NE: Fallthrough to next instruction.
2966 |.if not FFI
2967 |3:
2968 |.endif
2969 } else {
2970 |.if not FFI
2971 |3:
2972 |.endif
2973 |2: // NE: Branch to the target.
2974 | movzx RDd, PC_RD
2975 | branchPC RD
2976 |1: // EQ: Fallthrough to next instruction.
2977 }
2978 if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
2979 op == BC_ISEQN || op == BC_ISNEN)) {
2980 | jmp <9
2981 } else {
2982 | ins_next
2983 }
2984 |
2985 if (op == BC_ISEQV || op == BC_ISNEV) {
2986 |5: // Either or both types are not numbers.
2987 |.if FFI
2988 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
2989 | cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd
2990 |.endif
2991 | cmp RA, RD
2992 | je <1 // Same GCobjs or pvalues?
2993 | cmp RBd, ITYPEd
2994 | jne <2 // Not the same type?
2995 | cmp RBd, LJ_TISTABUD
2996 | ja <2 // Different objects and not table/ud?
2997 |
2998 | // Different tables or userdatas. Need to check __eq metamethod.
2999 | // Field metatable must be at same offset for GCtab and GCudata!
3000 | cleartp TAB:RA
3001 | mov TAB:RB, TAB:RA->metatable
3002 | test TAB:RB, TAB:RB
3003 | jz <2 // No metatable?
3004 | test byte TAB:RB->nomm, 1<<MM_eq
3005 | jnz <2 // Or 'no __eq' flag set?
3006 if (vk) {
3007 | xor RBd, RBd // ne = 0
3008 } else {
3009 | mov RBd, 1 // ne = 1
3010 }
3011 | jmp ->vmeta_equal // Handle __eq metamethod.
3012 } else {
3013 |.if FFI
3014 |3:
3015 | cmp ITYPEd, LJ_TCDATA
3016 if (LJ_DUALNUM && vk) {
3017 | jne <9
3018 } else {
3019 | jne <2
3020 }
3021 | jmp ->vmeta_equal_cd
3022 |.endif
3023 }
3024 break;
3025 case BC_ISEQS: case BC_ISNES:
3026 vk = op == BC_ISEQS;
3027 | ins_AND // RA = src, RD = str const, JMP with RD = target
3028 | mov RB, [BASE+RA*8]
3029 | add PC, 4
3030 | checkstr RB, >3
3031 | cmp RB, [KBASE+RD*8]
3032 iseqne_test:
3033 if (vk) {
3034 | jne >2
3035 } else {
3036 | je >1
3037 }
3038 goto iseqne_end;
3039 case BC_ISEQN: case BC_ISNEN:
3040 vk = op == BC_ISEQN;
3041 | ins_AD // RA = src, RD = num const, JMP with RD = target
3042 | mov RB, [BASE+RA*8]
3043 | add PC, 4
3044 |.if DUALNUM
3045 | checkint RB, >7
3046 | mov RD, [KBASE+RD*8]
3047 | checkint RD, >8
3048 | cmp RBd, RDd
3049 if (vk) {
3050 | jne >9
3051 } else {
3052 | je >9
3053 }
3054 | movzx RDd, PC_RD
3055 | branchPC RD
3056 |9:
3057 | ins_next
3058 |
3059 |7: // RA is not an integer.
3060 | ja >3
3061 | // RA is a number.
3062 | mov RD, [KBASE+RD*8]
3063 | checkint RD, >1
3064 | // RA is a number, RD is an integer.
3065 | cvtsi2sd xmm0, RDd
3066 | jmp >2
3067 |
3068 |8: // RA is an integer, RD is a number.
3069 | cvtsi2sd xmm0, RBd
3070 | movd xmm1, RD
3071 | ucomisd xmm0, xmm1
3072 | jmp >4
3073 |1:
3074 | movd xmm0, RD
3075 |.else
3076 | checknum RB, >3
3077 |1:
3078 | movsd xmm0, qword [KBASE+RD*8]
3079 |.endif
3080 |2:
3081 | ucomisd xmm0, qword [BASE+RA*8]
3082 |4:
3083 goto iseqne_fp;
3084 case BC_ISEQP: case BC_ISNEP:
3085 vk = op == BC_ISEQP;
3086 | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
3087 | mov RB, [BASE+RA*8]
3088 | sar RB, 47
3089 | add PC, 4
3090 | cmp RBd, RDd
3091 if (!LJ_HASFFI) goto iseqne_test;
3092 if (vk) {
3093 | jne >3
3094 | movzx RDd, PC_RD
3095 | branchPC RD
3096 |2:
3097 | ins_next
3098 |3:
3099 | cmp RBd, LJ_TCDATA; jne <2
3100 | jmp ->vmeta_equal_cd
3101 } else {
3102 | je >2
3103 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
3104 | movzx RDd, PC_RD
3105 | branchPC RD
3106 |2:
3107 | ins_next
3108 }
3109 break;
3110
3111 /* -- Unary test and copy ops ------------------------------------------- */
3112
3113 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
3114 | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
3115 | mov ITYPE, [BASE+RD*8]
3116 | add PC, 4
3117 if (op == BC_ISTC || op == BC_ISFC) {
3118 | mov RB, ITYPE
3119 }
3120 | sar ITYPE, 47
3121 | cmp ITYPEd, LJ_TISTRUECOND
3122 if (op == BC_IST || op == BC_ISTC) {
3123 | jae >1
3124 } else {
3125 | jb >1
3126 }
3127 if (op == BC_ISTC || op == BC_ISFC) {
3128 | mov [BASE+RA*8], RB
3129 }
3130 | movzx RDd, PC_RD
3131 | branchPC RD
3132 |1: // Fallthrough to the next instruction.
3133 | ins_next
3134 break;
3135
3136 case BC_ISTYPE:
3137 | ins_AD // RA = src, RD = -type
3138 | mov RB, [BASE+RA*8]
3139 | sar RB, 47
3140 | add RBd, RDd
3141 | jne ->vmeta_istype
3142 | ins_next
3143 break;
3144 case BC_ISNUM:
3145 | ins_AD // RA = src, RD = -(TISNUM-1)
3146 | checknumtp [BASE+RA*8], ->vmeta_istype
3147 | ins_next
3148 break;
3149
3150 /* -- Unary ops --------------------------------------------------------- */
3151
3152 case BC_MOV:
3153 | ins_AD // RA = dst, RD = src
3154 | mov RB, [BASE+RD*8]
3155 | mov [BASE+RA*8], RB
3156 | ins_next_
3157 break;
3158 case BC_NOT:
3159 | ins_AD // RA = dst, RD = src
3160 | mov RB, [BASE+RD*8]
3161 | sar RB, 47
3162 | mov RCd, 2
3163 | cmp RB, LJ_TISTRUECOND
3164 | sbb RCd, 0
3165 | shl RC, 47
3166 | not RC
3167 | mov [BASE+RA*8], RC
3168 | ins_next
3169 break;
3170 case BC_UNM:
3171 | ins_AD // RA = dst, RD = src
3172 | mov RB, [BASE+RD*8]
3173 |.if DUALNUM
3174 | checkint RB, >5
3175 | neg RBd
3176 | jo >4
3177 | setint RB
3178 |9:
3179 | mov [BASE+RA*8], RB
3180 | ins_next
3181 |4:
3182 | mov64 RB, U64x(41e00000,00000000) // 2^31.
3183 | jmp <9
3184 |5:
3185 | ja ->vmeta_unm
3186 |.else
3187 | checknum RB, ->vmeta_unm
3188 |.endif
3189 | mov64 RD, U64x(80000000,00000000)
3190 | xor RB, RD
3191 |.if DUALNUM
3192 | jmp <9
3193 |.else
3194 | mov [BASE+RA*8], RB
3195 | ins_next
3196 |.endif
3197 break;
3198 case BC_LEN:
3199 | ins_AD // RA = dst, RD = src
3200 | mov RD, [BASE+RD*8]
3201 | checkstr RD, >2
3202 |.if DUALNUM
3203 | mov RDd, dword STR:RD->len
3204 |1:
3205 | setint RD
3206 | mov [BASE+RA*8], RD
3207 |.else
3208 | xorps xmm0, xmm0
3209 | cvtsi2sd xmm0, dword STR:RD->len
3210 |1:
3211 | movsd qword [BASE+RA*8], xmm0
3212 |.endif
3213 | ins_next
3214 |2:
3215 | cmp ITYPEd, LJ_TTAB; jne ->vmeta_len
3216 | mov TAB:CARG1, TAB:RD
3217#if LJ_52
3218 | mov TAB:RB, TAB:RD->metatable
3219 | cmp TAB:RB, 0
3220 | jnz >9
3221 |3:
3222#endif
3223 |->BC_LEN_Z:
3224 | mov RB, BASE // Save BASE.
3225 | call extern lj_tab_len // (GCtab *t)
3226 | // Length of table returned in eax (RD).
3227 |.if DUALNUM
3228 | // Nothing to do.
3229 |.else
3230 | cvtsi2sd xmm0, RDd
3231 |.endif
3232 | mov BASE, RB // Restore BASE.
3233 | movzx RAd, PC_RA
3234 | jmp <1
3235#if LJ_52
3236 |9: // Check for __len.
3237 | test byte TAB:RB->nomm, 1<<MM_len
3238 | jnz <3
3239 | jmp ->vmeta_len // 'no __len' flag NOT set: check.
3240#endif
3241 break;
3242
3243 /* -- Binary ops -------------------------------------------------------- */
3244
3245 |.macro ins_arithpre, sseins, ssereg
3246 | ins_ABC
3247 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3248 ||switch (vk) {
3249 ||case 0:
3250 | checknumtp [BASE+RB*8], ->vmeta_arith_vn
3251 | .if DUALNUM
3252 | checknumtp [KBASE+RC*8], ->vmeta_arith_vn
3253 | .endif
3254 | movsd xmm0, qword [BASE+RB*8]
3255 | sseins ssereg, qword [KBASE+RC*8]
3256 || break;
3257 ||case 1:
3258 | checknumtp [BASE+RB*8], ->vmeta_arith_nv
3259 | .if DUALNUM
3260 | checknumtp [KBASE+RC*8], ->vmeta_arith_nv
3261 | .endif
3262 | movsd xmm0, qword [KBASE+RC*8]
3263 | sseins ssereg, qword [BASE+RB*8]
3264 || break;
3265 ||default:
3266 | checknumtp [BASE+RB*8], ->vmeta_arith_vv
3267 | checknumtp [BASE+RC*8], ->vmeta_arith_vv
3268 | movsd xmm0, qword [BASE+RB*8]
3269 | sseins ssereg, qword [BASE+RC*8]
3270 || break;
3271 ||}
3272 |.endmacro
3273 |
3274 |.macro ins_arithdn, intins
3275 | ins_ABC
3276 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3277 ||switch (vk) {
3278 ||case 0:
3279 | mov RB, [BASE+RB*8]
3280 | mov RC, [KBASE+RC*8]
3281 | checkint RB, ->vmeta_arith_vno
3282 | checkint RC, ->vmeta_arith_vno
3283 | intins RBd, RCd; jo ->vmeta_arith_vno
3284 || break;
3285 ||case 1:
3286 | mov RB, [BASE+RB*8]
3287 | mov RC, [KBASE+RC*8]
3288 | checkint RB, ->vmeta_arith_nvo
3289 | checkint RC, ->vmeta_arith_nvo
3290 | intins RCd, RBd; jo ->vmeta_arith_nvo
3291 || break;
3292 ||default:
3293 | mov RB, [BASE+RB*8]
3294 | mov RC, [BASE+RC*8]
3295 | checkint RB, ->vmeta_arith_vvo
3296 | checkint RC, ->vmeta_arith_vvo
3297 | intins RBd, RCd; jo ->vmeta_arith_vvo
3298 || break;
3299 ||}
3300 ||if (vk == 1) {
3301 | setint RC
3302 | mov [BASE+RA*8], RC
3303 ||} else {
3304 | setint RB
3305 | mov [BASE+RA*8], RB
3306 ||}
3307 | ins_next
3308 |.endmacro
3309 |
3310 |.macro ins_arithpost
3311 | movsd qword [BASE+RA*8], xmm0
3312 |.endmacro
3313 |
3314 |.macro ins_arith, sseins
3315 | ins_arithpre sseins, xmm0
3316 | ins_arithpost
3317 | ins_next
3318 |.endmacro
3319 |
3320 |.macro ins_arith, intins, sseins
3321 |.if DUALNUM
3322 | ins_arithdn intins
3323 |.else
3324 | ins_arith, sseins
3325 |.endif
3326 |.endmacro
3327
3328 | // RA = dst, RB = src1 or num const, RC = src2 or num const
3329 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
3330 | ins_arith add, addsd
3331 break;
3332 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
3333 | ins_arith sub, subsd
3334 break;
3335 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3336 | ins_arith imul, mulsd
3337 break;
3338 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
3339 | ins_arith divsd
3340 break;
3341 case BC_MODVN:
3342 | ins_arithpre movsd, xmm1
3343 |->BC_MODVN_Z:
3344 | call ->vm_mod
3345 | ins_arithpost
3346 | ins_next
3347 break;
3348 case BC_MODNV: case BC_MODVV:
3349 | ins_arithpre movsd, xmm1
3350 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3351 break;
3352 case BC_POW:
3353 | ins_arithpre movsd, xmm1
3354 | mov RB, BASE
3355 | call extern pow
3356 | movzx RAd, PC_RA
3357 | mov BASE, RB
3358 | ins_arithpost
3359 | ins_next
3360 break;
3361
3362 case BC_CAT:
3363 | ins_ABC // RA = dst, RB = src_start, RC = src_end
3364 | mov L:CARG1, SAVE_L
3365 | mov L:CARG1->base, BASE
3366 | lea CARG2, [BASE+RC*8]
3367 | mov CARG3d, RCd
3368 | sub CARG3d, RBd
3369 |->BC_CAT_Z:
3370 | mov L:RB, L:CARG1
3371 | mov SAVE_PC, PC
3372 | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
3373 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
3374 | mov BASE, L:RB->base
3375 | test RC, RC
3376 | jnz ->vmeta_binop
3377 | movzx RBd, PC_RB // Copy result to Stk[RA] from Stk[RB].
3378 | movzx RAd, PC_RA
3379 | mov RC, [BASE+RB*8]
3380 | mov [BASE+RA*8], RC
3381 | ins_next
3382 break;
3383
3384 /* -- Constant ops ------------------------------------------------------ */
3385
3386 case BC_KSTR:
3387 | ins_AND // RA = dst, RD = str const (~)
3388 | mov RD, [KBASE+RD*8]
3389 | settp RD, LJ_TSTR
3390 | mov [BASE+RA*8], RD
3391 | ins_next
3392 break;
3393 case BC_KCDATA:
3394 |.if FFI
3395 | ins_AND // RA = dst, RD = cdata const (~)
3396 | mov RD, [KBASE+RD*8]
3397 | settp RD, LJ_TCDATA
3398 | mov [BASE+RA*8], RD
3399 | ins_next
3400 |.endif
3401 break;
3402 case BC_KSHORT:
3403 | ins_AD // RA = dst, RD = signed int16 literal
3404 |.if DUALNUM
3405 | movsx RDd, RDW
3406 | setint RD
3407 | mov [BASE+RA*8], RD
3408 |.else
3409 | movsx RDd, RDW // Sign-extend literal.
3410 | cvtsi2sd xmm0, RDd
3411 | movsd qword [BASE+RA*8], xmm0
3412 |.endif
3413 | ins_next
3414 break;
3415 case BC_KNUM:
3416 | ins_AD // RA = dst, RD = num const
3417 | movsd xmm0, qword [KBASE+RD*8]
3418 | movsd qword [BASE+RA*8], xmm0
3419 | ins_next
3420 break;
3421 case BC_KPRI:
3422 | ins_AD // RA = dst, RD = primitive type (~)
3423 | shl RD, 47
3424 | not RD
3425 | mov [BASE+RA*8], RD
3426 | ins_next
3427 break;
3428 case BC_KNIL:
3429 | ins_AD // RA = dst_start, RD = dst_end
3430 | lea RA, [BASE+RA*8+8]
3431 | lea RD, [BASE+RD*8]
3432 | mov RB, LJ_TNIL
3433 | mov [RA-8], RB // Sets minimum 2 slots.
3434 |1:
3435 | mov [RA], RB
3436 | add RA, 8
3437 | cmp RA, RD
3438 | jbe <1
3439 | ins_next
3440 break;
3441
3442 /* -- Upvalue and function ops ------------------------------------------ */
3443
3444 case BC_UGET:
3445 | ins_AD // RA = dst, RD = upvalue #
3446 | mov LFUNC:RB, [BASE-16]
3447 | cleartp LFUNC:RB
3448 | mov UPVAL:RB, [LFUNC:RB+RD*8+offsetof(GCfuncL, uvptr)]
3449 | mov RB, UPVAL:RB->v
3450 | mov RD, [RB]
3451 | mov [BASE+RA*8], RD
3452 | ins_next
3453 break;
3454 case BC_USETV:
3455#define TV2MARKOFS \
3456 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
3457 | ins_AD // RA = upvalue #, RD = src
3458 | mov LFUNC:RB, [BASE-16]
3459 | cleartp LFUNC:RB
3460 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3461 | cmp byte UPVAL:RB->closed, 0
3462 | mov RB, UPVAL:RB->v
3463 | mov RA, [BASE+RD*8]
3464 | mov [RB], RA
3465 | jz >1
3466 | // Check barrier for closed upvalue.
3467 | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
3468 | jnz >2
3469 |1:
3470 | ins_next
3471 |
3472 |2: // Upvalue is black. Check if new value is collectable and white.
3473 | mov RD, RA
3474 | sar RD, 47
3475 | sub RDd, LJ_TISGCV
3476 | cmp RDd, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
3477 | jbe <1
3478 | cleartp GCOBJ:RA
3479 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
3480 | jz <1
3481 | // Crossed a write barrier. Move the barrier forward.
3482 |.if not X64WIN
3483 | mov CARG2, RB
3484 | mov RB, BASE // Save BASE.
3485 |.else
3486 | xchg CARG2, RB // Save BASE (CARG2 == BASE).
3487 |.endif
3488 | lea GL:CARG1, [DISPATCH+GG_DISP2G]
3489 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
3490 | mov BASE, RB // Restore BASE.
3491 | jmp <1
3492 break;
3493#undef TV2MARKOFS
3494 case BC_USETS:
3495 | ins_AND // RA = upvalue #, RD = str const (~)
3496 | mov LFUNC:RB, [BASE-16]
3497 | cleartp LFUNC:RB
3498 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3499 | mov STR:RA, [KBASE+RD*8]
3500 | mov RD, UPVAL:RB->v
3501 | settp STR:ITYPE, STR:RA, LJ_TSTR
3502 | mov [RD], STR:ITYPE
3503 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
3504 | jnz >2
3505 |1:
3506 | ins_next
3507 |
3508 |2: // Check if string is white and ensure upvalue is closed.
3509 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
3510 | jz <1
3511 | cmp byte UPVAL:RB->closed, 0
3512 | jz <1
3513 | // Crossed a write barrier. Move the barrier forward.
3514 | mov RB, BASE // Save BASE (CARG2 == BASE).
3515 | mov CARG2, RD
3516 | lea GL:CARG1, [DISPATCH+GG_DISP2G]
3517 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
3518 | mov BASE, RB // Restore BASE.
3519 | jmp <1
3520 break;
3521 case BC_USETN:
3522 | ins_AD // RA = upvalue #, RD = num const
3523 | mov LFUNC:RB, [BASE-16]
3524 | cleartp LFUNC:RB
3525 | movsd xmm0, qword [KBASE+RD*8]
3526 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3527 | mov RA, UPVAL:RB->v
3528 | movsd qword [RA], xmm0
3529 | ins_next
3530 break;
3531 case BC_USETP:
3532 | ins_AD // RA = upvalue #, RD = primitive type (~)
3533 | mov LFUNC:RB, [BASE-16]
3534 | cleartp LFUNC:RB
3535 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3536 | shl RD, 47
3537 | not RD
3538 | mov RA, UPVAL:RB->v
3539 | mov [RA], RD
3540 | ins_next
3541 break;
3542 case BC_UCLO:
3543 | ins_AD // RA = level, RD = target
3544 | branchPC RD // Do this first to free RD.
3545 | mov L:RB, SAVE_L
3546 | cmp aword L:RB->openupval, 0
3547 | je >1
3548 | mov L:RB->base, BASE
3549 | lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE
3550 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
3551 | call extern lj_func_closeuv // (lua_State *L, TValue *level)
3552 | mov BASE, L:RB->base
3553 |1:
3554 | ins_next
3555 break;
3556
3557 case BC_FNEW:
3558 | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
3559 | mov L:RB, SAVE_L
3560 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
3561 | mov CARG3, [BASE-16]
3562 | cleartp CARG3
3563 | mov CARG2, [KBASE+RD*8] // Fetch GCproto *.
3564 | mov CARG1, L:RB
3565 | mov SAVE_PC, PC
3566 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
3567 | call extern lj_func_newL_gc
3568 | // GCfuncL * returned in eax (RC).
3569 | mov BASE, L:RB->base
3570 | movzx RAd, PC_RA
3571 | settp LFUNC:RC, LJ_TFUNC
3572 | mov [BASE+RA*8], LFUNC:RC
3573 | ins_next
3574 break;
3575
3576 /* -- Table ops --------------------------------------------------------- */
3577
3578 case BC_TNEW:
3579 | ins_AD // RA = dst, RD = hbits|asize
3580 | mov L:RB, SAVE_L
3581 | mov L:RB->base, BASE
3582 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3583 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3584 | mov SAVE_PC, PC
3585 | jae >5
3586 |1:
3587 | mov CARG3d, RDd
3588 | and RDd, 0x7ff
3589 | shr CARG3d, 11
3590 | cmp RDd, 0x7ff
3591 | je >3
3592 |2:
3593 | mov L:CARG1, L:RB
3594 | mov CARG2d, RDd
3595 | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
3596 | // Table * returned in eax (RC).
3597 | mov BASE, L:RB->base
3598 | movzx RAd, PC_RA
3599 | settp TAB:RC, LJ_TTAB
3600 | mov [BASE+RA*8], TAB:RC
3601 | ins_next
3602 |3: // Turn 0x7ff into 0x801.
3603 | mov RDd, 0x801
3604 | jmp <2
3605 |5:
3606 | mov L:CARG1, L:RB
3607 | call extern lj_gc_step_fixtop // (lua_State *L)
3608 | movzx RDd, PC_RD
3609 | jmp <1
3610 break;
3611 case BC_TDUP:
3612 | ins_AND // RA = dst, RD = table const (~) (holding template table)
3613 | mov L:RB, SAVE_L
3614 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3615 | mov SAVE_PC, PC
3616 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3617 | mov L:RB->base, BASE
3618 | jae >3
3619 |2:
3620 | mov TAB:CARG2, [KBASE+RD*8] // Caveat: CARG2 == BASE
3621 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
3622 | call extern lj_tab_dup // (lua_State *L, Table *kt)
3623 | // Table * returned in eax (RC).
3624 | mov BASE, L:RB->base
3625 | movzx RAd, PC_RA
3626 | settp TAB:RC, LJ_TTAB
3627 | mov [BASE+RA*8], TAB:RC
3628 | ins_next
3629 |3:
3630 | mov L:CARG1, L:RB
3631 | call extern lj_gc_step_fixtop // (lua_State *L)
3632 | movzx RDd, PC_RD // Need to reload RD.
3633 | not RD
3634 | jmp <2
3635 break;
3636
3637 case BC_GGET:
3638 | ins_AND // RA = dst, RD = str const (~)
3639 | mov LFUNC:RB, [BASE-16]
3640 | cleartp LFUNC:RB
3641 | mov TAB:RB, LFUNC:RB->env
3642 | mov STR:RC, [KBASE+RD*8]
3643 | jmp ->BC_TGETS_Z
3644 break;
3645 case BC_GSET:
3646 | ins_AND // RA = src, RD = str const (~)
3647 | mov LFUNC:RB, [BASE-16]
3648 | cleartp LFUNC:RB
3649 | mov TAB:RB, LFUNC:RB->env
3650 | mov STR:RC, [KBASE+RD*8]
3651 | jmp ->BC_TSETS_Z
3652 break;
3653
3654 case BC_TGETV:
3655 | ins_ABC // RA = dst, RB = table, RC = key
3656 | mov TAB:RB, [BASE+RB*8]
3657 | mov RC, [BASE+RC*8]
3658 | checktab TAB:RB, ->vmeta_tgetv
3659 |
3660 | // Integer key?
3661 |.if DUALNUM
3662 | checkint RC, >5
3663 |.else
3664 | // Convert number to int and back and compare.
3665 | checknum RC, >5
3666 | movd xmm0, RC
3667 | cvttsd2si RCd, xmm0
3668 | cvtsi2sd xmm1, RCd
3669 | ucomisd xmm0, xmm1
3670 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
3671 |.endif
3672 | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
3673 | jae ->vmeta_tgetv // Not in array part? Use fallback.
3674 | shl RCd, 3
3675 | add RC, TAB:RB->array
3676 | // Get array slot.
3677 | mov ITYPE, [RC]
3678 | cmp ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath.
3679 | je >2
3680 |1:
3681 | mov [BASE+RA*8], ITYPE
3682 | ins_next
3683 |
3684 |2: // Check for __index if table value is nil.
3685 | mov TAB:TMPR, TAB:RB->metatable
3686 | test TAB:TMPR, TAB:TMPR
3687 | jz <1
3688 | test byte TAB:TMPR->nomm, 1<<MM_index
3689 | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
3690 | jmp <1
3691 |
3692 |5: // String key?
3693 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tgetv
3694 | cleartp STR:RC
3695 | jmp ->BC_TGETS_Z
3696 break;
3697 case BC_TGETS:
3698 | ins_ABC // RA = dst, RB = table, RC = str const (~)
3699 | mov TAB:RB, [BASE+RB*8]
3700 | not RC
3701 | mov STR:RC, [KBASE+RC*8]
3702 | checktab TAB:RB, ->vmeta_tgets
3703 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
3704 | mov TMPRd, TAB:RB->hmask
3705 | and TMPRd, STR:RC->sid
3706 | imul TMPRd, #NODE
3707 | add NODE:TMPR, TAB:RB->node
3708 | settp ITYPE, STR:RC, LJ_TSTR
3709 |1:
3710 | cmp NODE:TMPR->key, ITYPE
3711 | jne >4
3712 | // Get node value.
3713 | mov ITYPE, NODE:TMPR->val
3714 | cmp ITYPE, LJ_TNIL
3715 | je >5 // Key found, but nil value?
3716 |2:
3717 | mov [BASE+RA*8], ITYPE
3718 | ins_next
3719 |
3720 |4: // Follow hash chain.
3721 | mov NODE:TMPR, NODE:TMPR->next
3722 | test NODE:TMPR, NODE:TMPR
3723 | jnz <1
3724 | // End of hash chain: key not found, nil result.
3725 | mov ITYPE, LJ_TNIL
3726 |
3727 |5: // Check for __index if table value is nil.
3728 | mov TAB:TMPR, TAB:RB->metatable
3729 | test TAB:TMPR, TAB:TMPR
3730 | jz <2 // No metatable: done.
3731 | test byte TAB:TMPR->nomm, 1<<MM_index
3732 | jnz <2 // 'no __index' flag set: done.
3733 | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
3734 break;
3735 case BC_TGETB:
3736 | ins_ABC // RA = dst, RB = table, RC = byte literal
3737 | mov TAB:RB, [BASE+RB*8]
3738 | checktab TAB:RB, ->vmeta_tgetb
3739 | cmp RCd, TAB:RB->asize
3740 | jae ->vmeta_tgetb
3741 | shl RCd, 3
3742 | add RC, TAB:RB->array
3743 | // Get array slot.
3744 | mov ITYPE, [RC]
3745 | cmp ITYPE, LJ_TNIL
3746 | je >2
3747 |1:
3748 | mov [BASE+RA*8], ITYPE
3749 | ins_next
3750 |
3751 |2: // Check for __index if table value is nil.
3752 | mov TAB:TMPR, TAB:RB->metatable
3753 | test TAB:TMPR, TAB:TMPR
3754 | jz <1
3755 | test byte TAB:TMPR->nomm, 1<<MM_index
3756 | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
3757 | jmp <1
3758 break;
3759 case BC_TGETR:
3760 | ins_ABC // RA = dst, RB = table, RC = key
3761 | mov TAB:RB, [BASE+RB*8]
3762 | cleartp TAB:RB
3763 |.if DUALNUM
3764 | mov RCd, dword [BASE+RC*8]
3765 |.else
3766 | cvttsd2si RCd, qword [BASE+RC*8]
3767 |.endif
3768 | cmp RCd, TAB:RB->asize
3769 | jae ->vmeta_tgetr // Not in array part? Use fallback.
3770 | shl RCd, 3
3771 | add RC, TAB:RB->array
3772 | // Get array slot.
3773 |->BC_TGETR_Z:
3774 | mov ITYPE, [RC]
3775 |->BC_TGETR2_Z:
3776 | mov [BASE+RA*8], ITYPE
3777 | ins_next
3778 break;
3779
3780 case BC_TSETV:
3781 | ins_ABC // RA = src, RB = table, RC = key
3782 | mov TAB:RB, [BASE+RB*8]
3783 | mov RC, [BASE+RC*8]
3784 | checktab TAB:RB, ->vmeta_tsetv
3785 |
3786 | // Integer key?
3787 |.if DUALNUM
3788 | checkint RC, >5
3789 |.else
3790 | // Convert number to int and back and compare.
3791 | checknum RC, >5
3792 | movd xmm0, RC
3793 | cvttsd2si RCd, xmm0
3794 | cvtsi2sd xmm1, RCd
3795 | ucomisd xmm0, xmm1
3796 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
3797 |.endif
3798 | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
3799 | jae ->vmeta_tsetv
3800 | shl RCd, 3
3801 | add RC, TAB:RB->array
3802 | cmp aword [RC], LJ_TNIL
3803 | je >3 // Previous value is nil?
3804 |1:
3805 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3806 | jnz >7
3807 |2: // Set array slot.
3808 | mov RB, [BASE+RA*8]
3809 | mov [RC], RB
3810 | ins_next
3811 |
3812 |3: // Check for __newindex if previous value is nil.
3813 | mov TAB:TMPR, TAB:RB->metatable
3814 | test TAB:TMPR, TAB:TMPR
3815 | jz <1
3816 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3817 | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
3818 | jmp <1
3819 |
3820 |5: // String key?
3821 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tsetv
3822 | cleartp STR:RC
3823 | jmp ->BC_TSETS_Z
3824 |
3825 |7: // Possible table write barrier for the value. Skip valiswhite check.
3826 | barrierback TAB:RB, TMPR
3827 | jmp <2
3828 break;
3829 case BC_TSETS:
3830 | ins_ABC // RA = src, RB = table, RC = str const (~)
3831 | mov TAB:RB, [BASE+RB*8]
3832 | not RC
3833 | mov STR:RC, [KBASE+RC*8]
3834 | checktab TAB:RB, ->vmeta_tsets
3835 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
3836 | mov TMPRd, TAB:RB->hmask
3837 | and TMPRd, STR:RC->sid
3838 | imul TMPRd, #NODE
3839 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
3840 | add NODE:TMPR, TAB:RB->node
3841 | settp ITYPE, STR:RC, LJ_TSTR
3842 |1:
3843 | cmp NODE:TMPR->key, ITYPE
3844 | jne >5
3845 | // Ok, key found. Assumes: offsetof(Node, val) == 0
3846 | cmp aword [TMPR], LJ_TNIL
3847 | je >4 // Previous value is nil?
3848 |2:
3849 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3850 | jnz >7
3851 |3: // Set node value.
3852 | mov ITYPE, [BASE+RA*8]
3853 | mov [TMPR], ITYPE
3854 | ins_next
3855 |
3856 |4: // Check for __newindex if previous value is nil.
3857 | mov TAB:ITYPE, TAB:RB->metatable
3858 | test TAB:ITYPE, TAB:ITYPE
3859 | jz <2
3860 | test byte TAB:ITYPE->nomm, 1<<MM_newindex
3861 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3862 | jmp <2
3863 |
3864 |5: // Follow hash chain.
3865 | mov NODE:TMPR, NODE:TMPR->next
3866 | test NODE:TMPR, NODE:TMPR
3867 | jnz <1
3868 | // End of hash chain: key not found, add a new one.
3869 |
3870 | // But check for __newindex first.
3871 | mov TAB:TMPR, TAB:RB->metatable
3872 | test TAB:TMPR, TAB:TMPR
3873 | jz >6 // No metatable: continue.
3874 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3875 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3876 |6:
3877 | mov TMP1, ITYPE
3878 | mov L:CARG1, SAVE_L
3879 | mov L:CARG1->base, BASE
3880 | lea CARG3, TMP1
3881 | mov CARG2, TAB:RB
3882 | mov SAVE_PC, PC
3883 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
3884 | // Handles write barrier for the new key. TValue * returned in eax (RC).
3885 | mov L:CARG1, SAVE_L
3886 | mov BASE, L:CARG1->base
3887 | mov TMPR, rax
3888 | movzx RAd, PC_RA
3889 | jmp <2 // Must check write barrier for value.
3890 |
3891 |7: // Possible table write barrier for the value. Skip valiswhite check.
3892 | barrierback TAB:RB, ITYPE
3893 | jmp <3
3894 break;
3895 case BC_TSETB:
3896 | ins_ABC // RA = src, RB = table, RC = byte literal
3897 | mov TAB:RB, [BASE+RB*8]
3898 | checktab TAB:RB, ->vmeta_tsetb
3899 | cmp RCd, TAB:RB->asize
3900 | jae ->vmeta_tsetb
3901 | shl RCd, 3
3902 | add RC, TAB:RB->array
3903 | cmp aword [RC], LJ_TNIL
3904 | je >3 // Previous value is nil?
3905 |1:
3906 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3907 | jnz >7
3908 |2: // Set array slot.
3909 | mov ITYPE, [BASE+RA*8]
3910 | mov [RC], ITYPE
3911 | ins_next
3912 |
3913 |3: // Check for __newindex if previous value is nil.
3914 | mov TAB:TMPR, TAB:RB->metatable
3915 | test TAB:TMPR, TAB:TMPR
3916 | jz <1
3917 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3918 | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
3919 | jmp <1
3920 |
3921 |7: // Possible table write barrier for the value. Skip valiswhite check.
3922 | barrierback TAB:RB, TMPR
3923 | jmp <2
3924 break;
3925 case BC_TSETR:
3926 | ins_ABC // RA = src, RB = table, RC = key
3927 | mov TAB:RB, [BASE+RB*8]
3928 | cleartp TAB:RB
3929 |.if DUALNUM
3930 | mov RC, [BASE+RC*8]
3931 |.else
3932 | cvttsd2si RCd, qword [BASE+RC*8]
3933 |.endif
3934 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3935 | jnz >7
3936 |2:
3937 | cmp RCd, TAB:RB->asize
3938 | jae ->vmeta_tsetr
3939 | shl RCd, 3
3940 | add RC, TAB:RB->array
3941 | // Set array slot.
3942 |->BC_TSETR_Z:
3943 | mov ITYPE, [BASE+RA*8]
3944 | mov [RC], ITYPE
3945 | ins_next
3946 |
3947 |7: // Possible table write barrier for the value. Skip valiswhite check.
3948 | barrierback TAB:RB, TMPR
3949 | jmp <2
3950 break;
3951
3952 case BC_TSETM:
3953 | ins_AD // RA = base (table at base-1), RD = num const (start index)
3954 |1:
3955 | mov TMPRd, dword [KBASE+RD*8] // Integer constant is in lo-word.
3956 | lea RA, [BASE+RA*8]
3957 | mov TAB:RB, [RA-8] // Guaranteed to be a table.
3958 | cleartp TAB:RB
3959 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3960 | jnz >7
3961 |2:
3962 | mov RDd, MULTRES
3963 | sub RDd, 1
3964 | jz >4 // Nothing to copy?
3965 | add RDd, TMPRd // Compute needed size.
3966 | cmp RDd, TAB:RB->asize
3967 | ja >5 // Doesn't fit into array part?
3968 | sub RDd, TMPRd
3969 | shl TMPRd, 3
3970 | add TMPR, TAB:RB->array
3971 |3: // Copy result slots to table.
3972 | mov RB, [RA]
3973 | add RA, 8
3974 | mov [TMPR], RB
3975 | add TMPR, 8
3976 | sub RDd, 1
3977 | jnz <3
3978 |4:
3979 | ins_next
3980 |
3981 |5: // Need to resize array part.
3982 | mov L:CARG1, SAVE_L
3983 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
3984 | mov CARG2, TAB:RB
3985 | mov CARG3d, RDd
3986 | mov L:RB, L:CARG1
3987 | mov SAVE_PC, PC
3988 | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
3989 | mov BASE, L:RB->base
3990 | movzx RAd, PC_RA // Restore RA.
3991 | movzx RDd, PC_RD // Restore RD.
3992 | jmp <1 // Retry.
3993 |
3994 |7: // Possible table write barrier for any value. Skip valiswhite check.
3995 | barrierback TAB:RB, RD
3996 | jmp <2
3997 break;
3998
3999 /* -- Calls and vararg handling ----------------------------------------- */
4000
4001 case BC_CALL: case BC_CALLM:
4002 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
4003 if (op == BC_CALLM) {
4004 | add NARGS:RDd, MULTRES
4005 }
4006 | mov LFUNC:RB, [BASE+RA*8]
4007 | checkfunc LFUNC:RB, ->vmeta_call_ra
4008 | lea BASE, [BASE+RA*8+16]
4009 | ins_call
4010 break;
4011
4012 case BC_CALLMT:
4013 | ins_AD // RA = base, RD = extra_nargs
4014 | add NARGS:RDd, MULTRES
4015 | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
4016 break;
4017 case BC_CALLT:
4018 | ins_AD // RA = base, RD = nargs+1
4019 | lea RA, [BASE+RA*8+16]
4020 | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
4021 | mov LFUNC:RB, [RA-16]
4022 | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
4023 |->BC_CALLT_Z:
4024 | mov PC, [BASE-8]
4025 | test PCd, FRAME_TYPE
4026 | jnz >7
4027 |1:
4028 | mov [BASE-16], LFUNC:RB // Copy func+tag down, reloaded below.
4029 | mov MULTRES, NARGS:RDd
4030 | sub NARGS:RDd, 1
4031 | jz >3
4032 |2: // Move args down.
4033 | mov RB, [RA]
4034 | add RA, 8
4035 | mov [KBASE], RB
4036 | add KBASE, 8
4037 | sub NARGS:RDd, 1
4038 | jnz <2
4039 |
4040 | mov LFUNC:RB, [BASE-16]
4041 |3:
4042 | cleartp LFUNC:RB
4043 | mov NARGS:RDd, MULTRES
4044 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
4045 | ja >5
4046 |4:
4047 | ins_callt
4048 |
4049 |5: // Tailcall to a fast function.
4050 | test PCd, FRAME_TYPE // Lua frame below?
4051 | jnz <4
4052 | movzx RAd, PC_RA
4053 | neg RA
4054 | mov LFUNC:KBASE, [BASE+RA*8-32] // Need to prepare KBASE.
4055 | cleartp LFUNC:KBASE
4056 | mov KBASE, LFUNC:KBASE->pc
4057 | mov KBASE, [KBASE+PC2PROTO(k)]
4058 | jmp <4
4059 |
4060 |7: // Tailcall from a vararg function.
4061 | sub PC, FRAME_VARG
4062 | test PCd, FRAME_TYPEP
4063 | jnz >8 // Vararg frame below?
4064 | sub BASE, PC // Need to relocate BASE/KBASE down.
4065 | mov KBASE, BASE
4066 | mov PC, [BASE-8]
4067 | jmp <1
4068 |8:
4069 | add PCd, FRAME_VARG
4070 | jmp <1
4071 break;
4072
4073 case BC_ITERC:
4074 | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
4075 | lea RA, [BASE+RA*8+16] // fb = base+2
4076 | mov RB, [RA-32] // Copy state. fb[0] = fb[-4].
4077 | mov RC, [RA-24] // Copy control var. fb[1] = fb[-3].
4078 | mov [RA], RB
4079 | mov [RA+8], RC
4080 | mov LFUNC:RB, [RA-40] // Copy callable. fb[-2] = fb[-5]
4081 | mov [RA-16], LFUNC:RB
4082 | mov NARGS:RDd, 2+1 // Handle like a regular 2-arg call.
4083 | checkfunc LFUNC:RB, ->vmeta_call
4084 | mov BASE, RA
4085 | ins_call
4086 break;
4087
4088 case BC_ITERN:
4089 |.if JIT
4090 | hotloop RBd
4091 |.endif
4092 |->vm_IITERN:
4093 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
4094 | mov TAB:RB, [BASE+RA*8-16]
4095 | cleartp TAB:RB
4096 | mov RCd, [BASE+RA*8-8] // Get index from control var.
4097 | mov TMPRd, TAB:RB->asize
4098 | add PC, 4
4099 | mov ITYPE, TAB:RB->array
4100 |1: // Traverse array part.
4101 | cmp RCd, TMPRd; jae >5 // Index points after array part?
4102 | cmp aword [ITYPE+RC*8], LJ_TNIL; je >4
4103 |.if not DUALNUM
4104 | cvtsi2sd xmm0, RCd
4105 |.endif
4106 | // Copy array slot to returned value.
4107 | mov RB, [ITYPE+RC*8]
4108 | mov [BASE+RA*8+8], RB
4109 | // Return array index as a numeric key.
4110 |.if DUALNUM
4111 | setint ITYPE, RC
4112 | mov [BASE+RA*8], ITYPE
4113 |.else
4114 | movsd qword [BASE+RA*8], xmm0
4115 |.endif
4116 | add RCd, 1
4117 | mov [BASE+RA*8-8], RCd // Update control var.
4118 |2:
4119 | movzx RDd, PC_RD // Get target from ITERL.
4120 | branchPC RD
4121 |3:
4122 | ins_next
4123 |
4124 |4: // Skip holes in array part.
4125 | add RCd, 1
4126 | jmp <1
4127 |
4128 |5: // Traverse hash part.
4129 | sub RCd, TMPRd
4130 |6:
4131 | cmp RCd, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1.
4132 | imul ITYPEd, RCd, #NODE
4133 | add NODE:ITYPE, TAB:RB->node
4134 | cmp aword NODE:ITYPE->val, LJ_TNIL; je >7
4135 | lea TMPRd, [RCd+TMPRd+1]
4136 | // Copy key and value from hash slot.
4137 | mov RB, NODE:ITYPE->key
4138 | mov RC, NODE:ITYPE->val
4139 | mov [BASE+RA*8], RB
4140 | mov [BASE+RA*8+8], RC
4141 | mov [BASE+RA*8-8], TMPRd
4142 | jmp <2
4143 |
4144 |7: // Skip holes in hash part.
4145 | add RCd, 1
4146 | jmp <6
4147 break;
4148
4149 case BC_ISNEXT:
4150 | ins_AD // RA = base, RD = target (points to ITERN)
4151 | mov CFUNC:RB, [BASE+RA*8-24]
4152 | checkfunc CFUNC:RB, >5
4153 | checktptp [BASE+RA*8-16], LJ_TTAB, >5
4154 | cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5
4155 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
4156 | branchPC RD
4157 | mov64 TMPR, ((uint64_t)LJ_KEYINDEX << 32)
4158 | mov [BASE+RA*8-8], TMPR // Initialize control var.
4159 |1:
4160 | ins_next
4161 |5: // Despecialize bytecode if any of the checks fail.
4162 | mov PC_OP, BC_JMP
4163 | branchPC RD
4164 |.if JIT
4165 | cmp byte [PC], BC_ITERN
4166 | jne >6
4167 |.endif
4168 | mov byte [PC], BC_ITERC
4169 | jmp <1
4170 |.if JIT
4171 |6: // Unpatch JLOOP.
4172 | mov RA, [DISPATCH+DISPATCH_J(trace)]
4173 | movzx RCd, word [PC+2]
4174 | mov TRACE:RA, [RA+RC*8]
4175 | mov eax, TRACE:RA->startins
4176 | mov al, BC_ITERC
4177 | mov dword [PC], eax
4178 | jmp <1
4179 |.endif
4180 break;
4181
4182 case BC_VARG:
4183 | ins_ABC // RA = base, RB = nresults+1, RC = numparams
4184 | lea TMPR, [BASE+RC*8+(16+FRAME_VARG)]
4185 | lea RA, [BASE+RA*8]
4186 | sub TMPR, [BASE-8]
4187 | // Note: TMPR may now be even _above_ BASE if nargs was < numparams.
4188 | test RB, RB
4189 | jz >5 // Copy all varargs?
4190 | lea RB, [RA+RB*8-8]
4191 | cmp TMPR, BASE // No vararg slots?
4192 | jnb >2
4193 |1: // Copy vararg slots to destination slots.
4194 | mov RC, [TMPR-16]
4195 | add TMPR, 8
4196 | mov [RA], RC
4197 | add RA, 8
4198 | cmp RA, RB // All destination slots filled?
4199 | jnb >3
4200 | cmp TMPR, BASE // No more vararg slots?
4201 | jb <1
4202 |2: // Fill up remainder with nil.
4203 | mov aword [RA], LJ_TNIL
4204 | add RA, 8
4205 | cmp RA, RB
4206 | jb <2
4207 |3:
4208 | ins_next
4209 |
4210 |5: // Copy all varargs.
4211 | mov MULTRES, 1 // MULTRES = 0+1
4212 | mov RC, BASE
4213 | sub RC, TMPR
4214 | jbe <3 // No vararg slots?
4215 | mov RBd, RCd
4216 | shr RBd, 3
4217 | add RBd, 1
4218 | mov MULTRES, RBd // MULTRES = #varargs+1
4219 | mov L:RB, SAVE_L
4220 | add RC, RA
4221 | cmp RC, L:RB->maxstack
4222 | ja >7 // Need to grow stack?
4223 |6: // Copy all vararg slots.
4224 | mov RC, [TMPR-16]
4225 | add TMPR, 8
4226 | mov [RA], RC
4227 | add RA, 8
4228 | cmp TMPR, BASE // No more vararg slots?
4229 | jb <6
4230 | jmp <3
4231 |
4232 |7: // Grow stack for varargs.
4233 | mov L:RB->base, BASE
4234 | mov L:RB->top, RA
4235 | mov SAVE_PC, PC
4236 | sub TMPR, BASE // Need delta, because BASE may change.
4237 | mov TMP1hi, TMPRd
4238 | mov CARG2d, MULTRES
4239 | sub CARG2d, 1
4240 | mov CARG1, L:RB
4241 | call extern lj_state_growstack // (lua_State *L, int n)
4242 | mov BASE, L:RB->base
4243 | movsxd TMPR, TMP1hi
4244 | mov RA, L:RB->top
4245 | add TMPR, BASE
4246 | jmp <6
4247 break;
4248
4249 /* -- Returns ----------------------------------------------------------- */
4250
4251 case BC_RETM:
4252 | ins_AD // RA = results, RD = extra_nresults
4253 | add RDd, MULTRES // MULTRES >=1, so RD >=1.
4254 | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
4255 break;
4256
4257 case BC_RET: case BC_RET0: case BC_RET1:
4258 | ins_AD // RA = results, RD = nresults+1
4259 if (op != BC_RET0) {
4260 | shl RAd, 3
4261 }
4262 |1:
4263 | mov PC, [BASE-8]
4264 | mov MULTRES, RDd // Save nresults+1.
4265 | test PCd, FRAME_TYPE // Check frame type marker.
4266 | jnz >7 // Not returning to a fixarg Lua func?
4267 switch (op) {
4268 case BC_RET:
4269 |->BC_RET_Z:
4270 | mov KBASE, BASE // Use KBASE for result move.
4271 | sub RDd, 1
4272 | jz >3
4273 |2: // Move results down.
4274 | mov RB, [KBASE+RA]
4275 | mov [KBASE-16], RB
4276 | add KBASE, 8
4277 | sub RDd, 1
4278 | jnz <2
4279 |3:
4280 | mov RDd, MULTRES // Note: MULTRES may be >255.
4281 | movzx RBd, PC_RB // So cannot compare with RDL!
4282 |5:
4283 | cmp RBd, RDd // More results expected?
4284 | ja >6
4285 break;
4286 case BC_RET1:
4287 | mov RB, [BASE+RA]
4288 | mov [BASE-16], RB
4289 /* fallthrough */
4290 case BC_RET0:
4291 |5:
4292 | cmp PC_RB, RDL // More results expected?
4293 | ja >6
4294 default:
4295 break;
4296 }
4297 | movzx RAd, PC_RA
4298 | neg RA
4299 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
4300 | mov LFUNC:KBASE, [BASE-16]
4301 | cleartp LFUNC:KBASE
4302 | mov KBASE, LFUNC:KBASE->pc
4303 | mov KBASE, [KBASE+PC2PROTO(k)]
4304 | ins_next
4305 |
4306 |6: // Fill up results with nil.
4307 if (op == BC_RET) {
4308 | mov aword [KBASE-16], LJ_TNIL // Note: relies on shifted base.
4309 | add KBASE, 8
4310 } else {
4311 | mov aword [BASE+RD*8-24], LJ_TNIL
4312 }
4313 | add RD, 1
4314 | jmp <5
4315 |
4316 |7: // Non-standard return case.
4317 | lea RB, [PC-FRAME_VARG]
4318 | test RBd, FRAME_TYPEP
4319 | jnz ->vm_return
4320 | // Return from vararg function: relocate BASE down and RA up.
4321 | sub BASE, RB
4322 if (op != BC_RET0) {
4323 | add RA, RB
4324 }
4325 | jmp <1
4326 break;
4327
4328 /* -- Loops and branches ------------------------------------------------ */
4329
4330 |.define FOR_IDX, [RA]
4331 |.define FOR_STOP, [RA+8]
4332 |.define FOR_STEP, [RA+16]
4333 |.define FOR_EXT, [RA+24]
4334
4335 case BC_FORL:
4336 |.if JIT
4337 | hotloop RBd
4338 |.endif
4339 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
4340 break;
4341
4342 case BC_JFORI:
4343 case BC_JFORL:
4344#if !LJ_HASJIT
4345 break;
4346#endif
4347 case BC_FORI:
4348 case BC_IFORL:
4349 vk = (op == BC_IFORL || op == BC_JFORL);
4350 | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
4351 | lea RA, [BASE+RA*8]
4352 if (LJ_DUALNUM) {
4353 | mov RB, FOR_IDX
4354 | checkint RB, >9
4355 | mov TMPR, FOR_STOP
4356 if (!vk) {
4357 | checkint TMPR, ->vmeta_for
4358 | mov ITYPE, FOR_STEP
4359 | test ITYPEd, ITYPEd; js >5
4360 | sar ITYPE, 47;
4361 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
4362 } else {
4363#ifdef LUA_USE_ASSERT
4364 | checkinttp FOR_STOP, ->assert_bad_for_arg_type
4365 | checkinttp FOR_STEP, ->assert_bad_for_arg_type
4366#endif
4367 | mov ITYPE, FOR_STEP
4368 | test ITYPEd, ITYPEd; js >5
4369 | add RBd, ITYPEd; jo >1
4370 | setint RB
4371 | mov FOR_IDX, RB
4372 }
4373 | cmp RBd, TMPRd
4374 | mov FOR_EXT, RB
4375 if (op == BC_FORI) {
4376 | jle >7
4377 |1:
4378 |6:
4379 | branchPC RD
4380 } else if (op == BC_JFORI) {
4381 | branchPC RD
4382 | movzx RDd, PC_RD
4383 | jle =>BC_JLOOP
4384 |1:
4385 |6:
4386 } else if (op == BC_IFORL) {
4387 | jg >7
4388 |6:
4389 | branchPC RD
4390 |1:
4391 } else {
4392 | jle =>BC_JLOOP
4393 |1:
4394 |6:
4395 }
4396 |7:
4397 | ins_next
4398 |
4399 |5: // Invert check for negative step.
4400 if (!vk) {
4401 | sar ITYPE, 47;
4402 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
4403 } else {
4404 | add RBd, ITYPEd; jo <1
4405 | setint RB
4406 | mov FOR_IDX, RB
4407 }
4408 | cmp RBd, TMPRd
4409 | mov FOR_EXT, RB
4410 if (op == BC_FORI) {
4411 | jge <7
4412 } else if (op == BC_JFORI) {
4413 | branchPC RD
4414 | movzx RDd, PC_RD
4415 | jge =>BC_JLOOP
4416 } else if (op == BC_IFORL) {
4417 | jl <7
4418 } else {
4419 | jge =>BC_JLOOP
4420 }
4421 | jmp <6
4422 |9: // Fallback to FP variant.
4423 if (!vk) {
4424 | jae ->vmeta_for
4425 }
4426 } else if (!vk) {
4427 | checknumtp FOR_IDX, ->vmeta_for
4428 }
4429 if (!vk) {
4430 | checknumtp FOR_STOP, ->vmeta_for
4431 } else {
4432#ifdef LUA_USE_ASSERT
4433 | checknumtp FOR_STOP, ->assert_bad_for_arg_type
4434 | checknumtp FOR_STEP, ->assert_bad_for_arg_type
4435#endif
4436 }
4437 | mov RB, FOR_STEP
4438 if (!vk) {
4439 | checknum RB, ->vmeta_for
4440 }
4441 | movsd xmm0, qword FOR_IDX
4442 | movsd xmm1, qword FOR_STOP
4443 if (vk) {
4444 | addsd xmm0, qword FOR_STEP
4445 | movsd qword FOR_IDX, xmm0
4446 | test RB, RB; js >3
4447 } else {
4448 | jl >3
4449 }
4450 | ucomisd xmm1, xmm0
4451 |1:
4452 | movsd qword FOR_EXT, xmm0
4453 if (op == BC_FORI) {
4454 |.if DUALNUM
4455 | jnb <7
4456 |.else
4457 | jnb >2
4458 | branchPC RD
4459 |.endif
4460 } else if (op == BC_JFORI) {
4461 | branchPC RD
4462 | movzx RDd, PC_RD
4463 | jnb =>BC_JLOOP
4464 } else if (op == BC_IFORL) {
4465 |.if DUALNUM
4466 | jb <7
4467 |.else
4468 | jb >2
4469 | branchPC RD
4470 |.endif
4471 } else {
4472 | jnb =>BC_JLOOP
4473 }
4474 |.if DUALNUM
4475 | jmp <6
4476 |.else
4477 |2:
4478 | ins_next
4479 |.endif
4480 |
4481 |3: // Invert comparison if step is negative.
4482 | ucomisd xmm0, xmm1
4483 | jmp <1
4484 break;
4485
4486 case BC_ITERL:
4487 |.if JIT
4488 | hotloop RBd
4489 |.endif
4490 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
4491 break;
4492
4493 case BC_JITERL:
4494#if !LJ_HASJIT
4495 break;
4496#endif
4497 case BC_IITERL:
4498 | ins_AJ // RA = base, RD = target
4499 | lea RA, [BASE+RA*8]
4500 | mov RB, [RA]
4501 | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
4502 if (op == BC_JITERL) {
4503 | mov [RA-8], RB
4504 | jmp =>BC_JLOOP
4505 } else {
4506 | branchPC RD // Otherwise save control var + branch.
4507 | mov [RA-8], RB
4508 }
4509 |1:
4510 | ins_next
4511 break;
4512
4513 case BC_LOOP:
4514 | ins_A // RA = base, RD = target (loop extent)
4515 | // Note: RA/RD is only used by trace recorder to determine scope/extent
4516 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
4517 |.if JIT
4518 | hotloop RBd
4519 |.endif
4520 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
4521 break;
4522
4523 case BC_ILOOP:
4524 | ins_A // RA = base, RD = target (loop extent)
4525 | ins_next
4526 break;
4527
4528 case BC_JLOOP:
4529 |.if JIT
4530 | ins_AD // RA = base (ignored), RD = traceno
4531 | mov RA, [DISPATCH+DISPATCH_J(trace)]
4532 | mov TRACE:RD, [RA+RD*8]
4533 | mov RD, TRACE:RD->mcode
4534 | mov L:RB, SAVE_L
4535 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
4536 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
4537 | // Save additional callee-save registers only used in compiled code.
4538 |.if X64WIN
4539 | mov CSAVE_4, r12
4540 | mov CSAVE_3, r13
4541 | mov CSAVE_2, r14
4542 | mov CSAVE_1, r15
4543 | mov RA, rsp
4544 | sub rsp, 10*16+4*8
4545 | movdqa [RA-1*16], xmm6
4546 | movdqa [RA-2*16], xmm7
4547 | movdqa [RA-3*16], xmm8
4548 | movdqa [RA-4*16], xmm9
4549 | movdqa [RA-5*16], xmm10
4550 | movdqa [RA-6*16], xmm11
4551 | movdqa [RA-7*16], xmm12
4552 | movdqa [RA-8*16], xmm13
4553 | movdqa [RA-9*16], xmm14
4554 | movdqa [RA-10*16], xmm15
4555 |.else
4556 | sub rsp, 16
4557 | mov [rsp+16], r12
4558 | mov [rsp+8], r13
4559 |.endif
4560 | jmp RD
4561 |.endif
4562 break;
4563
4564 case BC_JMP:
4565 | ins_AJ // RA = unused, RD = target
4566 | branchPC RD
4567 | ins_next
4568 break;
4569
4570 /* -- Function headers -------------------------------------------------- */
4571
4572 /*
4573 ** Reminder: A function may be called with func/args above L->maxstack,
4574 ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
4575 ** too. This means all FUNC* ops (including fast functions) must check
4576 ** for stack overflow _before_ adding more slots!
4577 */
4578
4579 case BC_FUNCF:
4580 |.if JIT
4581 | hotcall RBd
4582 |.endif
4583 case BC_FUNCV: /* NYI: compiled vararg functions. */
4584 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
4585 break;
4586
4587 case BC_JFUNCF:
4588#if !LJ_HASJIT
4589 break;
4590#endif
4591 case BC_IFUNCF:
4592 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4593 | mov KBASE, [PC-4+PC2PROTO(k)]
4594 | mov L:RB, SAVE_L
4595 | lea RA, [BASE+RA*8] // Top of frame.
4596 | cmp RA, L:RB->maxstack
4597 | ja ->vm_growstack_f
4598 | movzx RAd, byte [PC-4+PC2PROTO(numparams)]
4599 | cmp NARGS:RDd, RAd // Check for missing parameters.
4600 | jbe >3
4601 |2:
4602 if (op == BC_JFUNCF) {
4603 | movzx RDd, PC_RD
4604 | jmp =>BC_JLOOP
4605 } else {
4606 | ins_next
4607 }
4608 |
4609 |3: // Clear missing parameters.
4610 | mov aword [BASE+NARGS:RD*8-8], LJ_TNIL
4611 | add NARGS:RDd, 1
4612 | cmp NARGS:RDd, RAd
4613 | jbe <3
4614 | jmp <2
4615 break;
4616
4617 case BC_JFUNCV:
4618#if !LJ_HASJIT
4619 break;
4620#endif
4621 | int3 // NYI: compiled vararg functions
4622 break; /* NYI: compiled vararg functions. */
4623
4624 case BC_IFUNCV:
4625 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4626 | lea RBd, [NARGS:RD*8+FRAME_VARG+8]
4627 | lea RD, [BASE+NARGS:RD*8+8]
4628 | mov LFUNC:KBASE, [BASE-16]
4629 | mov [RD-8], RB // Store delta + FRAME_VARG.
4630 | mov [RD-16], LFUNC:KBASE // Store copy of LFUNC.
4631 | mov L:RB, SAVE_L
4632 | lea RA, [RD+RA*8]
4633 | cmp RA, L:RB->maxstack
4634 | ja ->vm_growstack_v // Need to grow stack.
4635 | mov RA, BASE
4636 | mov BASE, RD
4637 | movzx RBd, byte [PC-4+PC2PROTO(numparams)]
4638 | test RBd, RBd
4639 | jz >2
4640 | add RA, 8
4641 |1: // Copy fixarg slots up to new frame.
4642 | add RA, 8
4643 | cmp RA, BASE
4644 | jnb >3 // Less args than parameters?
4645 | mov KBASE, [RA-16]
4646 | mov [RD], KBASE
4647 | add RD, 8
4648 | mov aword [RA-16], LJ_TNIL // Clear old fixarg slot (help the GC).
4649 | sub RBd, 1
4650 | jnz <1
4651 |2:
4652 if (op == BC_JFUNCV) {
4653 | movzx RDd, PC_RD
4654 | jmp =>BC_JLOOP
4655 } else {
4656 | mov KBASE, [PC-4+PC2PROTO(k)]
4657 | ins_next
4658 }
4659 |
4660 |3: // Clear missing parameters.
4661 | mov aword [RD], LJ_TNIL
4662 | add RD, 8
4663 | sub RBd, 1
4664 | jnz <3
4665 | jmp <2
4666 break;
4667
4668 case BC_FUNCC:
4669 case BC_FUNCCW:
4670 | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
4671 | mov CFUNC:RB, [BASE-16]
4672 | cleartp CFUNC:RB
4673 | mov KBASE, CFUNC:RB->f
4674 | mov L:RB, SAVE_L
4675 | lea RD, [BASE+NARGS:RD*8-8]
4676 | mov L:RB->base, BASE
4677 | lea RA, [RD+8*LUA_MINSTACK]
4678 | cmp RA, L:RB->maxstack
4679 | mov L:RB->top, RD
4680 if (op == BC_FUNCC) {
4681 | mov CARG1, L:RB // Caveat: CARG1 may be RA.
4682 } else {
4683 | mov CARG2, KBASE
4684 | mov CARG1, L:RB // Caveat: CARG1 may be RA.
4685 }
4686 | ja ->vm_growstack_c // Need to grow stack.
4687 | set_vmstate C
4688 if (op == BC_FUNCC) {
4689 | call KBASE // (lua_State *L)
4690 } else {
4691 | // (lua_State *L, lua_CFunction f)
4692 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
4693 }
4694 | // nresults returned in eax (RD).
4695 | mov BASE, L:RB->base
4696 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
4697 | set_vmstate INTERP
4698 | lea RA, [BASE+RD*8]
4699 | neg RA
4700 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
4701 | mov PC, [BASE-8] // Fetch PC of caller.
4702 | jmp ->vm_returnc
4703 break;
4704
4705 /* ---------------------------------------------------------------------- */
4706
4707 default:
4708 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
4709 exit(2);
4710 break;
4711 }
4712}
4713
4714static int build_backend(BuildCtx *ctx)
4715{
4716 int op;
4717 dasm_growpc(Dst, BC__MAX);
4718 build_subroutines(ctx);
4719 |.code_op
4720 for (op = 0; op < BC__MAX; op++)
4721 build_ins(ctx, (BCOp)op, op);
4722 return BC__MAX;
4723}
4724
4725/* Emit pseudo frame-info for all assembler functions. */
4726static void emit_asm_debug(BuildCtx *ctx)
4727{
4728 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
4729 switch (ctx->mode) {
4730 case BUILD_elfasm:
4731 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
4732 fprintf(ctx->fp,
4733 ".Lframe0:\n"
4734 "\t.long .LECIE0-.LSCIE0\n"
4735 ".LSCIE0:\n"
4736 "\t.long 0xffffffff\n"
4737 "\t.byte 0x1\n"
4738 "\t.string \"\"\n"
4739 "\t.uleb128 0x1\n"
4740 "\t.sleb128 -8\n"
4741 "\t.byte 0x10\n"
4742 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4743 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4744 "\t.align 8\n"
4745 ".LECIE0:\n\n");
4746 fprintf(ctx->fp,
4747 ".LSFDE0:\n"
4748 "\t.long .LEFDE0-.LASFDE0\n"
4749 ".LASFDE0:\n"
4750 "\t.long .Lframe0\n"
4751 "\t.quad .Lbegin\n"
4752 "\t.quad %d\n"
4753 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4754 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4755 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4756 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4757 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4758#if LJ_NO_UNWIND
4759 "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */
4760 "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */
4761#endif
4762 "\t.align 8\n"
4763 ".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
4764#if LJ_HASFFI
4765 fprintf(ctx->fp,
4766 ".LSFDE1:\n"
4767 "\t.long .LEFDE1-.LASFDE1\n"
4768 ".LASFDE1:\n"
4769 "\t.long .Lframe0\n"
4770 "\t.quad lj_vm_ffi_call\n"
4771 "\t.quad %d\n"
4772 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
4773 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4774 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
4775 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4776 "\t.align 8\n"
4777 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
4778#endif
4779#if !LJ_NO_UNWIND
4780#if LJ_TARGET_SOLARIS
4781 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
4782#else
4783 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
4784#endif
4785 fprintf(ctx->fp,
4786 ".Lframe1:\n"
4787 "\t.long .LECIE1-.LSCIE1\n"
4788 ".LSCIE1:\n"
4789 "\t.long 0\n"
4790 "\t.byte 0x1\n"
4791 "\t.string \"zPR\"\n"
4792 "\t.uleb128 0x1\n"
4793 "\t.sleb128 -8\n"
4794 "\t.byte 0x10\n"
4795 "\t.uleb128 6\n" /* augmentation length */
4796 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4797 "\t.long lj_err_unwind_dwarf-.\n"
4798 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4799 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4800 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4801 "\t.align 8\n"
4802 ".LECIE1:\n\n");
4803 fprintf(ctx->fp,
4804 ".LSFDE2:\n"
4805 "\t.long .LEFDE2-.LASFDE2\n"
4806 ".LASFDE2:\n"
4807 "\t.long .LASFDE2-.Lframe1\n"
4808 "\t.long .Lbegin-.\n"
4809 "\t.long %d\n"
4810 "\t.uleb128 0\n" /* augmentation length */
4811 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4812 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4813 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4814 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4815 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4816 "\t.align 8\n"
4817 ".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
4818#if LJ_HASFFI
4819 fprintf(ctx->fp,
4820 ".Lframe2:\n"
4821 "\t.long .LECIE2-.LSCIE2\n"
4822 ".LSCIE2:\n"
4823 "\t.long 0\n"
4824 "\t.byte 0x1\n"
4825 "\t.string \"zR\"\n"
4826 "\t.uleb128 0x1\n"
4827 "\t.sleb128 -8\n"
4828 "\t.byte 0x10\n"
4829 "\t.uleb128 1\n" /* augmentation length */
4830 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4831 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4832 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4833 "\t.align 8\n"
4834 ".LECIE2:\n\n");
4835 fprintf(ctx->fp,
4836 ".LSFDE3:\n"
4837 "\t.long .LEFDE3-.LASFDE3\n"
4838 ".LASFDE3:\n"
4839 "\t.long .LASFDE3-.Lframe2\n"
4840 "\t.long lj_vm_ffi_call-.\n"
4841 "\t.long %d\n"
4842 "\t.uleb128 0\n" /* augmentation length */
4843 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
4844 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4845 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
4846 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4847 "\t.align 8\n"
4848 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
4849#endif
4850#endif
4851 break;
4852#if !LJ_NO_UNWIND
4853 /* Mental note: never let Apple design an assembler.
4854 ** Or a linker. Or a plastic case. But I digress.
4855 */
4856 case BUILD_machasm: {
4857#if LJ_HASFFI
4858 int fcsize = 0;
4859#endif
4860 int i;
4861 fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
4862 fprintf(ctx->fp,
4863 "EH_frame1:\n"
4864 "\t.set L$set$x,LECIEX-LSCIEX\n"
4865 "\t.long L$set$x\n"
4866 "LSCIEX:\n"
4867 "\t.long 0\n"
4868 "\t.byte 0x1\n"
4869 "\t.ascii \"zPR\\0\"\n"
4870 "\t.byte 0x1\n"
4871 "\t.byte 128-8\n"
4872 "\t.byte 0x10\n"
4873 "\t.byte 6\n" /* augmentation length */
4874 "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
4875 "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
4876 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4877 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
4878 "\t.byte 0x80+0x10\n\t.byte 0x1\n"
4879 "\t.align 3\n"
4880 "LECIEX:\n\n");
4881 for (i = 0; i < ctx->nsym; i++) {
4882 const char *name = ctx->sym[i].name;
4883 int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
4884 if (size == 0) continue;
4885#if LJ_HASFFI
4886 if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
4887#endif
4888 fprintf(ctx->fp,
4889 "%s.eh:\n"
4890 "LSFDE%d:\n"
4891 "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
4892 "\t.long L$set$%d\n"
4893 "LASFDE%d:\n"
4894 "\t.long LASFDE%d-EH_frame1\n"
4895 "\t.long %s-.\n"
4896 "\t.long %d\n"
4897 "\t.byte 0\n" /* augmentation length */
4898 "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */
4899 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
4900 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
4901 "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */
4902 "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */
4903 "\t.align 3\n"
4904 "LEFDE%d:\n\n",
4905 name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
4906 }
4907#if LJ_HASFFI
4908 if (fcsize) {
4909 fprintf(ctx->fp,
4910 "EH_frame2:\n"
4911 "\t.set L$set$y,LECIEY-LSCIEY\n"
4912 "\t.long L$set$y\n"
4913 "LSCIEY:\n"
4914 "\t.long 0\n"
4915 "\t.byte 0x1\n"
4916 "\t.ascii \"zR\\0\"\n"
4917 "\t.byte 0x1\n"
4918 "\t.byte 128-8\n"
4919 "\t.byte 0x10\n"
4920 "\t.byte 1\n" /* augmentation length */
4921 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4922 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
4923 "\t.byte 0x80+0x10\n\t.byte 0x1\n"
4924 "\t.align 3\n"
4925 "LECIEY:\n\n");
4926 fprintf(ctx->fp,
4927 "_lj_vm_ffi_call.eh:\n"
4928 "LSFDEY:\n"
4929 "\t.set L$set$yy,LEFDEY-LASFDEY\n"
4930 "\t.long L$set$yy\n"
4931 "LASFDEY:\n"
4932 "\t.long LASFDEY-EH_frame2\n"
4933 "\t.long _lj_vm_ffi_call-.\n"
4934 "\t.long %d\n"
4935 "\t.byte 0\n" /* augmentation length */
4936 "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */
4937 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
4938 "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */
4939 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
4940 "\t.align 3\n"
4941 "LEFDEY:\n\n", fcsize);
4942 }
4943#endif
4944 }
4945 break;
4946#endif
4947 default: /* Difficult for other modes. */
4948 break;
4949 }
4950}
4951
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index 811d5e75..1994c0a0 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -18,7 +18,6 @@
18| 18|
19|.if P64 19|.if P64
20|.define X64, 1 20|.define X64, 1
21|.define SSE, 1
22|.if WIN 21|.if WIN
23|.define X64WIN, 1 22|.define X64WIN, 1
24|.endif 23|.endif
@@ -116,24 +115,74 @@
116|.type NODE, Node 115|.type NODE, Node
117|.type NARGS, int 116|.type NARGS, int
118|.type TRACE, GCtrace 117|.type TRACE, GCtrace
118|.type SBUF, SBuf
119| 119|
120|// Stack layout while in interpreter. Must match with lj_frame.h. 120|// Stack layout while in interpreter. Must match with lj_frame.h.
121|//----------------------------------------------------------------------- 121|//-----------------------------------------------------------------------
122|.if not X64 // x86 stack layout. 122|.if not X64 // x86 stack layout.
123| 123|
124|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). 124|.if WIN
125|
126|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
125|.macro saveregs_ 127|.macro saveregs_
126| push edi; push esi; push ebx 128| push edi; push esi; push ebx
129| push extern lj_err_unwind_win
130| fs; push dword [0]
131| fs; mov [0], esp
127| sub esp, CFRAME_SPACE 132| sub esp, CFRAME_SPACE
128|.endmacro 133|.endmacro
129|.macro saveregs 134|.macro restoreregs
130| push ebp; saveregs_ 135| add esp, CFRAME_SPACE
136| fs; pop dword [0]
137| pop edi // Short for esp += 4.
138| pop ebx; pop esi; pop edi; pop ebp
139|.endmacro
140|
141|.else
142|
143|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
144|.macro saveregs_
145| push edi; push esi; push ebx
146| sub esp, CFRAME_SPACE
131|.endmacro 147|.endmacro
132|.macro restoreregs 148|.macro restoreregs
133| add esp, CFRAME_SPACE 149| add esp, CFRAME_SPACE
134| pop ebx; pop esi; pop edi; pop ebp 150| pop ebx; pop esi; pop edi; pop ebp
135|.endmacro 151|.endmacro
136| 152|
153|.endif
154|
155|.macro saveregs
156| push ebp; saveregs_
157|.endmacro
158|
159|.if WIN
160|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only.
161|.define SAVE_NRES, aword [esp+aword*18]
162|.define SAVE_CFRAME, aword [esp+aword*17]
163|.define SAVE_L, aword [esp+aword*16]
164|//----- 16 byte aligned, ^^^ arguments from C caller
165|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter.
166|.define SAVE_R4, aword [esp+aword*14]
167|.define SAVE_R3, aword [esp+aword*13]
168|.define SAVE_R2, aword [esp+aword*12]
169|//----- 16 byte aligned
170|.define SAVE_R1, aword [esp+aword*11]
171|.define SEH_FUNC, aword [esp+aword*10]
172|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves.
173|.define UNUSED2, aword [esp+aword*8]
174|//----- 16 byte aligned
175|.define UNUSED1, aword [esp+aword*7]
176|.define SAVE_PC, aword [esp+aword*6]
177|.define TMP2, aword [esp+aword*5]
178|.define TMP1, aword [esp+aword*4]
179|//----- 16 byte aligned
180|.define ARG4, aword [esp+aword*3]
181|.define ARG3, aword [esp+aword*2]
182|.define ARG2, aword [esp+aword*1]
183|.define ARG1, aword [esp] //<-- esp while in interpreter.
184|//----- 16 byte aligned, ^^^ arguments for C callee
185|.else
137|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. 186|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
138|.define SAVE_NRES, aword [esp+aword*14] 187|.define SAVE_NRES, aword [esp+aword*14]
139|.define SAVE_CFRAME, aword [esp+aword*13] 188|.define SAVE_CFRAME, aword [esp+aword*13]
@@ -154,6 +203,7 @@
154|.define ARG2, aword [esp+aword*1] 203|.define ARG2, aword [esp+aword*1]
155|.define ARG1, aword [esp] //<-- esp while in interpreter. 204|.define ARG1, aword [esp] //<-- esp while in interpreter.
156|//----- 16 byte aligned, ^^^ arguments for C callee 205|//----- 16 byte aligned, ^^^ arguments for C callee
206|.endif
157| 207|
158|// FPARGx overlaps ARGx and ARG(x+1) on x86. 208|// FPARGx overlaps ARGx and ARG(x+1) on x86.
159|.define FPARG3, qword [esp+qword*1] 209|.define FPARG3, qword [esp+qword*1]
@@ -389,7 +439,6 @@
389| fpop 439| fpop
390|.endmacro 440|.endmacro
391| 441|
392|.macro fdup; fld st0; .endmacro
393|.macro fpop1; fstp st1; .endmacro 442|.macro fpop1; fstp st1; .endmacro
394| 443|
395|// Synthesize SSE FP constants. 444|// Synthesize SSE FP constants.
@@ -552,6 +601,10 @@ static void build_subroutines(BuildCtx *ctx)
552 |.else 601 |.else
553 | mov eax, FCARG2 // Error return status for vm_pcall. 602 | mov eax, FCARG2 // Error return status for vm_pcall.
554 | mov esp, FCARG1 603 | mov esp, FCARG1
604 |.if WIN
605 | lea FCARG1, SEH_NEXT
606 | fs; mov [0], FCARG1
607 |.endif
555 |.endif 608 |.endif
556 |->vm_unwind_c_eh: // Landing pad for external unwinder. 609 |->vm_unwind_c_eh: // Landing pad for external unwinder.
557 | mov L:RB, SAVE_L 610 | mov L:RB, SAVE_L
@@ -575,6 +628,10 @@ static void build_subroutines(BuildCtx *ctx)
575 |.else 628 |.else
576 | and FCARG1, CFRAME_RAWMASK 629 | and FCARG1, CFRAME_RAWMASK
577 | mov esp, FCARG1 630 | mov esp, FCARG1
631 |.if WIN
632 | lea FCARG1, SEH_NEXT
633 | fs; mov [0], FCARG1
634 |.endif
578 |.endif 635 |.endif
579 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 636 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
580 | mov L:RB, SAVE_L 637 | mov L:RB, SAVE_L
@@ -588,6 +645,19 @@ static void build_subroutines(BuildCtx *ctx)
588 | set_vmstate INTERP 645 | set_vmstate INTERP
589 | jmp ->vm_returnc // Increments RD/MULTRES and returns. 646 | jmp ->vm_returnc // Increments RD/MULTRES and returns.
590 | 647 |
648 |.if WIN and not X64
649 |->vm_rtlunwind@16: // Thin layer around RtlUnwind.
650 | // (void *cframe, void *excptrec, void *unwinder, int errcode)
651 | mov [esp], FCARG1 // Return value for RtlUnwind.
652 | push FCARG2 // Exception record for RtlUnwind.
653 | push 0 // Ignored by RtlUnwind.
654 | push dword [FCARG1+CFRAME_OFS_SEH]
655 | call extern RtlUnwind@16 // Violates ABI (clobbers too much).
656 | mov FCARG1, eax
657 | mov FCARG2, [esp+4] // errcode (for vm_unwind_c).
658 | ret // Jump to unwinder.
659 |.endif
660 |
591 |//----------------------------------------------------------------------- 661 |//-----------------------------------------------------------------------
592 |//-- Grow stack for calls ----------------------------------------------- 662 |//-- Grow stack for calls -----------------------------------------------
593 |//----------------------------------------------------------------------- 663 |//-----------------------------------------------------------------------
@@ -643,17 +713,18 @@ static void build_subroutines(BuildCtx *ctx)
643 | lea KBASEa, [esp+CFRAME_RESUME] 713 | lea KBASEa, [esp+CFRAME_RESUME]
644 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 714 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
645 | add DISPATCH, GG_G2DISP 715 | add DISPATCH, GG_G2DISP
646 | mov L:RB->cframe, KBASEa
647 | mov SAVE_PC, RD // Any value outside of bytecode is ok. 716 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
648 | mov SAVE_CFRAME, RDa 717 | mov SAVE_CFRAME, RDa
649 |.if X64 718 |.if X64
650 | mov SAVE_NRES, RD 719 | mov SAVE_NRES, RD
651 | mov SAVE_ERRF, RD 720 | mov SAVE_ERRF, RD
652 |.endif 721 |.endif
722 | mov L:RB->cframe, KBASEa
653 | cmp byte L:RB->status, RDL 723 | cmp byte L:RB->status, RDL
654 | je >3 // Initial resume (like a call). 724 | je >2 // Initial resume (like a call).
655 | 725 |
656 | // Resume after yield (like a return). 726 | // Resume after yield (like a return).
727 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
657 | set_vmstate INTERP 728 | set_vmstate INTERP
658 | mov byte L:RB->status, RDL 729 | mov byte L:RB->status, RDL
659 | mov BASE, L:RB->base 730 | mov BASE, L:RB->base
@@ -693,20 +764,19 @@ static void build_subroutines(BuildCtx *ctx)
693 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! 764 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
694 |.endif 765 |.endif
695 | 766 |
767 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
696 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 768 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
697 | mov SAVE_CFRAME, KBASEa 769 | mov SAVE_CFRAME, KBASEa
698 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. 770 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
771 | add DISPATCH, GG_G2DISP
699 |.if X64 772 |.if X64
700 | mov L:RB->cframe, rsp 773 | mov L:RB->cframe, rsp
701 |.else 774 |.else
702 | mov L:RB->cframe, esp 775 | mov L:RB->cframe, esp
703 |.endif 776 |.endif
704 | 777 |
705 |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype). 778 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
706 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 779 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
707 | add DISPATCH, GG_G2DISP
708 |
709 |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype).
710 | set_vmstate INTERP 780 | set_vmstate INTERP
711 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). 781 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
712 | add PC, RA 782 | add PC, RA
@@ -744,14 +814,17 @@ static void build_subroutines(BuildCtx *ctx)
744 | 814 |
745 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). 815 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
746 | sub KBASE, L:RB->top 816 | sub KBASE, L:RB->top
817 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
747 | mov SAVE_ERRF, 0 // No error function. 818 | mov SAVE_ERRF, 0 // No error function.
748 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. 819 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
820 | add DISPATCH, GG_G2DISP
749 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). 821 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
750 | 822 |
751 |.if X64 823 |.if X64
752 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 824 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
753 | mov SAVE_CFRAME, KBASEa 825 | mov SAVE_CFRAME, KBASEa
754 | mov L:RB->cframe, rsp 826 | mov L:RB->cframe, rsp
827 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
755 | 828 |
756 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) 829 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
757 |.else 830 |.else
@@ -762,6 +835,7 @@ static void build_subroutines(BuildCtx *ctx)
762 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. 835 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
763 | mov SAVE_CFRAME, KBASE 836 | mov SAVE_CFRAME, KBASE
764 | mov L:RB->cframe, esp 837 | mov L:RB->cframe, esp
838 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
765 | 839 |
766 | call BASE // (lua_State *L, lua_CFunction func, void *ud) 840 | call BASE // (lua_State *L, lua_CFunction func, void *ud)
767 |.endif 841 |.endif
@@ -869,13 +943,9 @@ static void build_subroutines(BuildCtx *ctx)
869 |.if DUALNUM 943 |.if DUALNUM
870 | mov TMP2, LJ_TISNUM 944 | mov TMP2, LJ_TISNUM
871 | mov TMP1, RC 945 | mov TMP1, RC
872 |.elif SSE 946 |.else
873 | cvtsi2sd xmm0, RC 947 | cvtsi2sd xmm0, RC
874 | movsd TMPQ, xmm0 948 | movsd TMPQ, xmm0
875 |.else
876 | mov ARG4, RC
877 | fild ARG4
878 | fstp TMPQ
879 |.endif 949 |.endif
880 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 950 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
881 | jmp >1 951 | jmp >1
@@ -929,6 +999,19 @@ static void build_subroutines(BuildCtx *ctx)
929 | mov NARGS:RD, 2+1 // 2 args for func(t, k). 999 | mov NARGS:RD, 2+1 // 2 args for func(t, k).
930 | jmp ->vm_call_dispatch_f 1000 | jmp ->vm_call_dispatch_f
931 | 1001 |
1002 |->vmeta_tgetr:
1003 | mov FCARG1, TAB:RB
1004 | mov RB, BASE // Save BASE.
1005 | mov FCARG2, RC // Caveat: FCARG2 == BASE
1006 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
1007 | // cTValue * or NULL returned in eax (RC).
1008 | movzx RA, PC_RA
1009 | mov BASE, RB // Restore BASE.
1010 | test RC, RC
1011 | jnz ->BC_TGETR_Z
1012 | mov dword [BASE+RA*8+4], LJ_TNIL
1013 | jmp ->BC_TGETR2_Z
1014 |
932 |//----------------------------------------------------------------------- 1015 |//-----------------------------------------------------------------------
933 | 1016 |
934 |->vmeta_tsets: 1017 |->vmeta_tsets:
@@ -948,13 +1031,9 @@ static void build_subroutines(BuildCtx *ctx)
948 |.if DUALNUM 1031 |.if DUALNUM
949 | mov TMP2, LJ_TISNUM 1032 | mov TMP2, LJ_TISNUM
950 | mov TMP1, RC 1033 | mov TMP1, RC
951 |.elif SSE 1034 |.else
952 | cvtsi2sd xmm0, RC 1035 | cvtsi2sd xmm0, RC
953 | movsd TMPQ, xmm0 1036 | movsd TMPQ, xmm0
954 |.else
955 | mov ARG4, RC
956 | fild ARG4
957 | fstp TMPQ
958 |.endif 1037 |.endif
959 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 1038 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
960 | jmp >1 1039 | jmp >1
@@ -1020,6 +1099,33 @@ static void build_subroutines(BuildCtx *ctx)
1020 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). 1099 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
1021 | jmp ->vm_call_dispatch_f 1100 | jmp ->vm_call_dispatch_f
1022 | 1101 |
1102 |->vmeta_tsetr:
1103 |.if X64WIN
1104 | mov L:CARG1d, SAVE_L
1105 | mov CARG3d, RC
1106 | mov L:CARG1d->base, BASE
1107 | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
1108 |.elif X64
1109 | mov L:CARG1d, SAVE_L
1110 | mov CARG2d, TAB:RB
1111 | mov L:CARG1d->base, BASE
1112 | mov RB, BASE // Save BASE.
1113 | mov CARG3d, RC // Caveat: CARG3d == BASE.
1114 |.else
1115 | mov L:RA, SAVE_L
1116 | mov ARG2, TAB:RB
1117 | mov RB, BASE // Save BASE.
1118 | mov ARG3, RC
1119 | mov ARG1, L:RA
1120 | mov L:RA->base, BASE
1121 |.endif
1122 | mov SAVE_PC, PC
1123 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1124 | // TValue * returned in eax (RC).
1125 | movzx RA, PC_RA
1126 | mov BASE, RB // Restore BASE.
1127 | jmp ->BC_TSETR_Z
1128 |
1023 |//-- Comparison metamethods --------------------------------------------- 1129 |//-- Comparison metamethods ---------------------------------------------
1024 | 1130 |
1025 |->vmeta_comp: 1131 |->vmeta_comp:
@@ -1114,6 +1220,26 @@ static void build_subroutines(BuildCtx *ctx)
1114 | jmp <3 1220 | jmp <3
1115 |.endif 1221 |.endif
1116 | 1222 |
1223 |->vmeta_istype:
1224 |.if X64
1225 | mov L:RB, SAVE_L
1226 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
1227 | mov CARG2d, RA
1228 | movzx CARG3d, PC_RD
1229 | mov L:CARG1d, L:RB
1230 |.else
1231 | movzx RD, PC_RD
1232 | mov ARG2, RA
1233 | mov L:RB, SAVE_L
1234 | mov ARG3, RD
1235 | mov ARG1, L:RB
1236 | mov L:RB->base, BASE
1237 |.endif
1238 | mov SAVE_PC, PC
1239 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1240 | mov BASE, L:RB->base
1241 | jmp <6
1242 |
1117 |//-- Arithmetic metamethods --------------------------------------------- 1243 |//-- Arithmetic metamethods ---------------------------------------------
1118 | 1244 |
1119 |->vmeta_arith_vno: 1245 |->vmeta_arith_vno:
@@ -1290,19 +1416,6 @@ static void build_subroutines(BuildCtx *ctx)
1290 | cmp NARGS:RD, 2+1; jb ->fff_fallback 1416 | cmp NARGS:RD, 2+1; jb ->fff_fallback
1291 |.endmacro 1417 |.endmacro
1292 | 1418 |
1293 |.macro .ffunc_n, name
1294 | .ffunc_1 name
1295 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1296 | fld qword [BASE]
1297 |.endmacro
1298 |
1299 |.macro .ffunc_n, name, op
1300 | .ffunc_1 name
1301 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1302 | op
1303 | fld qword [BASE]
1304 |.endmacro
1305 |
1306 |.macro .ffunc_nsse, name, op 1419 |.macro .ffunc_nsse, name, op
1307 | .ffunc_1 name 1420 | .ffunc_1 name
1308 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1421 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
@@ -1313,14 +1426,6 @@ static void build_subroutines(BuildCtx *ctx)
1313 | .ffunc_nsse name, movsd 1426 | .ffunc_nsse name, movsd
1314 |.endmacro 1427 |.endmacro
1315 | 1428 |
1316 |.macro .ffunc_nn, name
1317 | .ffunc_2 name
1318 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1319 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
1320 | fld qword [BASE]
1321 | fld qword [BASE+8]
1322 |.endmacro
1323 |
1324 |.macro .ffunc_nnsse, name 1429 |.macro .ffunc_nnsse, name
1325 | .ffunc_2 name 1430 | .ffunc_2 name
1326 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1431 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
@@ -1418,7 +1523,7 @@ static void build_subroutines(BuildCtx *ctx)
1418 | mov dword [BASE-4], LJ_TTAB // Store metatable as default result. 1523 | mov dword [BASE-4], LJ_TTAB // Store metatable as default result.
1419 | mov [BASE-8], TAB:RB 1524 | mov [BASE-8], TAB:RB
1420 | mov RA, TAB:RB->hmask 1525 | mov RA, TAB:RB->hmask
1421 | and RA, STR:RC->hash 1526 | and RA, STR:RC->sid
1422 | imul RA, #NODE 1527 | imul RA, #NODE
1423 | add NODE:RA, TAB:RB->node 1528 | add NODE:RA, TAB:RB->node
1424 |3: // Rearranged logic, because we expect _not_ to find the key. 1529 |3: // Rearranged logic, because we expect _not_ to find the key.
@@ -1526,11 +1631,7 @@ static void build_subroutines(BuildCtx *ctx)
1526 |.else 1631 |.else
1527 | jae ->fff_fallback 1632 | jae ->fff_fallback
1528 |.endif 1633 |.endif
1529 |.if SSE
1530 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 1634 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1531 |.else
1532 | fld qword [BASE]; jmp ->fff_resn
1533 |.endif
1534 | 1635 |
1535 |.ffunc_1 tostring 1636 |.ffunc_1 tostring
1536 | // Only handles the string or number case inline. 1637 | // Only handles the string or number case inline.
@@ -1555,9 +1656,9 @@ static void build_subroutines(BuildCtx *ctx)
1555 |.endif 1656 |.endif
1556 | mov L:FCARG1, L:RB 1657 | mov L:FCARG1, L:RB
1557 |.if DUALNUM 1658 |.if DUALNUM
1558 | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) 1659 | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o)
1559 |.else 1660 |.else
1560 | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) 1661 | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np)
1561 |.endif 1662 |.endif
1562 | // GCstr returned in eax (RD). 1663 | // GCstr returned in eax (RD).
1563 | mov BASE, L:RB->base 1664 | mov BASE, L:RB->base
@@ -1569,55 +1670,35 @@ static void build_subroutines(BuildCtx *ctx)
1569 | je >2 // Missing 2nd arg? 1670 | je >2 // Missing 2nd arg?
1570 |1: 1671 |1:
1571 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback 1672 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1572 | mov L:RB, SAVE_L
1573 | mov L:RB->base, BASE // Add frame since C call can throw.
1574 | mov L:RB->top, BASE // Dummy frame length is ok.
1575 | mov PC, [BASE-4] 1673 | mov PC, [BASE-4]
1674 | mov RB, BASE // Save BASE.
1576 |.if X64WIN 1675 |.if X64WIN
1577 | lea CARG3d, [BASE+8] 1676 | mov CARG1d, [BASE]
1578 | mov CARG2d, [BASE] // Caveat: CARG2d == BASE. 1677 | lea CARG3d, [BASE-8]
1579 | mov CARG1d, L:RB 1678 | lea CARG2d, [BASE+8] // Caveat: CARG2d == BASE.
1580 |.elif X64 1679 |.elif X64
1581 | mov CARG2d, [BASE] 1680 | mov CARG1d, [BASE]
1582 | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE. 1681 | lea CARG2d, [BASE+8]
1583 | mov CARG1d, L:RB 1682 | lea CARG3d, [BASE-8] // Caveat: CARG3d == BASE.
1584 |.else 1683 |.else
1585 | mov TAB:RD, [BASE] 1684 | mov TAB:RD, [BASE]
1586 | mov ARG2, TAB:RD 1685 | mov ARG1, TAB:RD
1587 | mov ARG1, L:RB
1588 | add BASE, 8 1686 | add BASE, 8
1687 | mov ARG2, BASE
1688 | sub BASE, 8+8
1589 | mov ARG3, BASE 1689 | mov ARG3, BASE
1590 |.endif 1690 |.endif
1591 | mov SAVE_PC, PC // Needed for ITERN fallback. 1691 | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
1592 | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) 1692 | // 1=found, 0=end, -1=error returned in eax (RD).
1593 | // Flag returned in eax (RD). 1693 | mov BASE, RB // Restore BASE.
1594 | mov BASE, L:RB->base 1694 | test RD, RD; jg ->fff_res2 // Found key/value.
1595 | test RD, RD; jz >3 // End of traversal? 1695 | js ->fff_fallback_2 // Invalid key.
1596 | // Copy key and value to results. 1696 | // End of traversal: return nil.
1597 |.if X64 1697 | mov dword [BASE-4], LJ_TNIL
1598 | mov RBa, [BASE+8] 1698 | jmp ->fff_res1
1599 | mov RDa, [BASE+16]
1600 | mov [BASE-8], RBa
1601 | mov [BASE], RDa
1602 |.else
1603 | mov RB, [BASE+8]
1604 | mov RD, [BASE+12]
1605 | mov [BASE-8], RB
1606 | mov [BASE-4], RD
1607 | mov RB, [BASE+16]
1608 | mov RD, [BASE+20]
1609 | mov [BASE], RB
1610 | mov [BASE+4], RD
1611 |.endif
1612 |->fff_res2:
1613 | mov RD, 1+2
1614 | jmp ->fff_res
1615 |2: // Set missing 2nd arg to nil. 1699 |2: // Set missing 2nd arg to nil.
1616 | mov dword [BASE+12], LJ_TNIL 1700 | mov dword [BASE+12], LJ_TNIL
1617 | jmp <1 1701 | jmp <1
1618 |3: // End of traversal: return nil.
1619 | mov dword [BASE-4], LJ_TNIL
1620 | jmp ->fff_res1
1621 | 1702 |
1622 |.ffunc_1 pairs 1703 |.ffunc_1 pairs
1623 | mov TAB:RB, [BASE] 1704 | mov TAB:RB, [BASE]
@@ -1648,19 +1729,12 @@ static void build_subroutines(BuildCtx *ctx)
1648 | add RD, 1 1729 | add RD, 1
1649 | mov dword [BASE-4], LJ_TISNUM 1730 | mov dword [BASE-4], LJ_TISNUM
1650 | mov dword [BASE-8], RD 1731 | mov dword [BASE-8], RD
1651 |.elif SSE 1732 |.else
1652 | movsd xmm0, qword [BASE+8] 1733 | movsd xmm0, qword [BASE+8]
1653 | sseconst_1 xmm1, RBa 1734 | sseconst_1 xmm1, RBa
1654 | addsd xmm0, xmm1 1735 | addsd xmm0, xmm1
1655 | cvtsd2si RD, xmm0 1736 | cvttsd2si RD, xmm0
1656 | movsd qword [BASE-8], xmm0 1737 | movsd qword [BASE-8], xmm0
1657 |.else
1658 | fld qword [BASE+8]
1659 | fld1
1660 | faddp st1
1661 | fist ARG1
1662 | fstp qword [BASE-8]
1663 | mov RD, ARG1
1664 |.endif 1738 |.endif
1665 | mov TAB:RB, [BASE] 1739 | mov TAB:RB, [BASE]
1666 | cmp RD, TAB:RB->asize; jae >2 // Not in array part? 1740 | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
@@ -1678,7 +1752,9 @@ static void build_subroutines(BuildCtx *ctx)
1678 | mov [BASE], RB 1752 | mov [BASE], RB
1679 | mov [BASE+4], RD 1753 | mov [BASE+4], RD
1680 |.endif 1754 |.endif
1681 | jmp ->fff_res2 1755 |->fff_res2:
1756 | mov RD, 1+2
1757 | jmp ->fff_res
1682 |2: // Check for empty hash part first. Otherwise call C function. 1758 |2: // Check for empty hash part first. Otherwise call C function.
1683 | cmp dword TAB:RB->hmask, 0; je ->fff_res0 1759 | cmp dword TAB:RB->hmask, 0; je ->fff_res0
1684 | mov FCARG1, TAB:RB 1760 | mov FCARG1, TAB:RB
@@ -1707,12 +1783,9 @@ static void build_subroutines(BuildCtx *ctx)
1707 |.if DUALNUM 1783 |.if DUALNUM
1708 | mov dword [BASE+12], LJ_TISNUM 1784 | mov dword [BASE+12], LJ_TISNUM
1709 | mov dword [BASE+8], 0 1785 | mov dword [BASE+8], 0
1710 |.elif SSE 1786 |.else
1711 | xorps xmm0, xmm0 1787 | xorps xmm0, xmm0
1712 | movsd qword [BASE+8], xmm0 1788 | movsd qword [BASE+8], xmm0
1713 |.else
1714 | fldz
1715 | fstp qword [BASE+8]
1716 |.endif 1789 |.endif
1717 | mov RD, 1+3 1790 | mov RD, 1+3
1718 | jmp ->fff_res 1791 | jmp ->fff_res
@@ -1825,7 +1898,6 @@ static void build_subroutines(BuildCtx *ctx)
1825 | mov ARG3, RA 1898 | mov ARG3, RA
1826 |.endif 1899 |.endif
1827 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) 1900 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1828 | set_vmstate INTERP
1829 | 1901 |
1830 | mov L:RB, SAVE_L 1902 | mov L:RB, SAVE_L
1831 |.if X64 1903 |.if X64
@@ -1834,6 +1906,9 @@ static void build_subroutines(BuildCtx *ctx)
1834 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. 1906 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
1835 |.endif 1907 |.endif
1836 | mov BASE, L:RB->base 1908 | mov BASE, L:RB->base
1909 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1910 | set_vmstate INTERP
1911 |
1837 | cmp eax, LUA_YIELD 1912 | cmp eax, LUA_YIELD
1838 | ja >8 1913 | ja >8
1839 |4: 1914 |4:
@@ -1948,12 +2023,10 @@ static void build_subroutines(BuildCtx *ctx)
1948 |->fff_resi: // Dummy. 2023 |->fff_resi: // Dummy.
1949 |.endif 2024 |.endif
1950 | 2025 |
1951 |.if SSE
1952 |->fff_resn: 2026 |->fff_resn:
1953 | mov PC, [BASE-4] 2027 | mov PC, [BASE-4]
1954 | fstp qword [BASE-8] 2028 | fstp qword [BASE-8]
1955 | jmp ->fff_res1 2029 | jmp ->fff_res1
1956 |.endif
1957 | 2030 |
1958 | .ffunc_1 math_abs 2031 | .ffunc_1 math_abs
1959 |.if DUALNUM 2032 |.if DUALNUM
@@ -1977,8 +2050,6 @@ static void build_subroutines(BuildCtx *ctx)
1977 |.else 2050 |.else
1978 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2051 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1979 |.endif 2052 |.endif
1980 |
1981 |.if SSE
1982 | movsd xmm0, qword [BASE] 2053 | movsd xmm0, qword [BASE]
1983 | sseconst_abs xmm1, RDa 2054 | sseconst_abs xmm1, RDa
1984 | andps xmm0, xmm1 2055 | andps xmm0, xmm1
@@ -1986,15 +2057,6 @@ static void build_subroutines(BuildCtx *ctx)
1986 | mov PC, [BASE-4] 2057 | mov PC, [BASE-4]
1987 | movsd qword [BASE-8], xmm0 2058 | movsd qword [BASE-8], xmm0
1988 | // fallthrough 2059 | // fallthrough
1989 |.else
1990 | fld qword [BASE]
1991 | fabs
1992 | // fallthrough
1993 |->fff_resxmm0: // Dummy.
1994 |->fff_resn:
1995 | mov PC, [BASE-4]
1996 | fstp qword [BASE-8]
1997 |.endif
1998 | 2060 |
1999 |->fff_res1: 2061 |->fff_res1:
2000 | mov RD, 1+1 2062 | mov RD, 1+1
@@ -2021,6 +2083,12 @@ static void build_subroutines(BuildCtx *ctx)
2021 | mov RAa, -8 // Results start at BASE+RA = BASE-8. 2083 | mov RAa, -8 // Results start at BASE+RA = BASE-8.
2022 | jmp ->vm_return 2084 | jmp ->vm_return
2023 | 2085 |
2086 |.if X64
2087 |.define fff_resfp, fff_resxmm0
2088 |.else
2089 |.define fff_resfp, fff_resn
2090 |.endif
2091 |
2024 |.macro math_round, func 2092 |.macro math_round, func
2025 | .ffunc math_ .. func 2093 | .ffunc math_ .. func
2026 |.if DUALNUM 2094 |.if DUALNUM
@@ -2031,107 +2099,75 @@ static void build_subroutines(BuildCtx *ctx)
2031 |.else 2099 |.else
2032 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2100 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2033 |.endif 2101 |.endif
2034 |.if SSE
2035 | movsd xmm0, qword [BASE] 2102 | movsd xmm0, qword [BASE]
2036 | call ->vm_ .. func 2103 | call ->vm_ .. func .. _sse
2037 | .if DUALNUM 2104 |.if DUALNUM
2038 | cvtsd2si RB, xmm0 2105 | cvttsd2si RB, xmm0
2039 | cmp RB, 0x80000000 2106 | cmp RB, 0x80000000
2040 | jne ->fff_resi 2107 | jne ->fff_resi
2041 | cvtsi2sd xmm1, RB 2108 | cvtsi2sd xmm1, RB
2042 | ucomisd xmm0, xmm1 2109 | ucomisd xmm0, xmm1
2043 | jp ->fff_resxmm0 2110 | jp ->fff_resxmm0
2044 | je ->fff_resi 2111 | je ->fff_resi
2045 | .endif
2046 | jmp ->fff_resxmm0
2047 |.else
2048 | fld qword [BASE]
2049 | call ->vm_ .. func
2050 | .if DUALNUM
2051 | fist ARG1
2052 | mov RB, ARG1
2053 | cmp RB, 0x80000000; jne >2
2054 | fdup
2055 | fild ARG1
2056 | fcomparepp
2057 | jp ->fff_resn
2058 | jne ->fff_resn
2059 |2:
2060 | fpop
2061 | jmp ->fff_resi
2062 | .else
2063 | jmp ->fff_resn
2064 | .endif
2065 |.endif 2112 |.endif
2113 | jmp ->fff_resxmm0
2066 |.endmacro 2114 |.endmacro
2067 | 2115 |
2068 | math_round floor 2116 | math_round floor
2069 | math_round ceil 2117 | math_round ceil
2070 | 2118 |
2071 |.if SSE
2072 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 2119 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
2073 |.else
2074 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
2075 |.endif
2076 | 2120 |
2077 |.ffunc math_log 2121 |.ffunc math_log
2078 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. 2122 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
2079 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2123 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2080 | fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn 2124 | movsd xmm0, qword [BASE]
2081 | 2125 |.if not X64
2082 |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn 2126 | movsd FPARG1, xmm0
2083 |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn 2127 |.endif
2084 | 2128 | mov RB, BASE
2085 |.ffunc_n math_sin; fsin; jmp ->fff_resn 2129 | call extern log
2086 |.ffunc_n math_cos; fcos; jmp ->fff_resn 2130 | mov BASE, RB
2087 |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn 2131 | jmp ->fff_resfp
2088 |
2089 |.ffunc_n math_asin
2090 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan
2091 | jmp ->fff_resn
2092 |.ffunc_n math_acos
2093 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan
2094 | jmp ->fff_resn
2095 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
2096 | 2132 |
2097 |.macro math_extern, func 2133 |.macro math_extern, func
2098 |.if SSE
2099 | .ffunc_nsse math_ .. func 2134 | .ffunc_nsse math_ .. func
2100 | .if not X64 2135 |.if not X64
2101 | movsd FPARG1, xmm0 2136 | movsd FPARG1, xmm0
2102 | .endif
2103 |.else
2104 | .ffunc_n math_ .. func
2105 | fstp FPARG1
2106 |.endif 2137 |.endif
2107 | mov RB, BASE 2138 | mov RB, BASE
2108 | call extern lj_vm_ .. func 2139 | call extern func
2109 | mov BASE, RB 2140 | mov BASE, RB
2110 | .if X64 2141 | jmp ->fff_resfp
2111 | jmp ->fff_resxmm0
2112 | .else
2113 | jmp ->fff_resn
2114 | .endif
2115 |.endmacro 2142 |.endmacro
2116 | 2143 |
2144 |.macro math_extern2, func
2145 | .ffunc_nnsse math_ .. func
2146 |.if not X64
2147 | movsd FPARG1, xmm0
2148 | movsd FPARG3, xmm1
2149 |.endif
2150 | mov RB, BASE
2151 | call extern func
2152 | mov BASE, RB
2153 | jmp ->fff_resfp
2154 |.endmacro
2155 |
2156 | math_extern log10
2157 | math_extern exp
2158 | math_extern sin
2159 | math_extern cos
2160 | math_extern tan
2161 | math_extern asin
2162 | math_extern acos
2163 | math_extern atan
2117 | math_extern sinh 2164 | math_extern sinh
2118 | math_extern cosh 2165 | math_extern cosh
2119 | math_extern tanh 2166 | math_extern tanh
2167 | math_extern2 pow
2168 | math_extern2 atan2
2169 | math_extern2 fmod
2120 | 2170 |
2121 |->ff_math_deg:
2122 |.if SSE
2123 |.ffunc_nsse math_rad
2124 | mov CFUNC:RB, [BASE-8]
2125 | mulsd xmm0, qword CFUNC:RB->upvalue[0]
2126 | jmp ->fff_resxmm0
2127 |.else
2128 |.ffunc_n math_rad
2129 | mov CFUNC:RB, [BASE-8]
2130 | fmul qword CFUNC:RB->upvalue[0]
2131 | jmp ->fff_resn
2132 |.endif
2133 |
2134 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
2135 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn 2171 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
2136 | 2172 |
2137 |.ffunc_1 math_frexp 2173 |.ffunc_1 math_frexp
@@ -2146,65 +2182,34 @@ static void build_subroutines(BuildCtx *ctx)
2146 | cmp RB, 0x00200000; jb >4 2182 | cmp RB, 0x00200000; jb >4
2147 |1: 2183 |1:
2148 | shr RB, 21; sub RB, RC // Extract and unbias exponent. 2184 | shr RB, 21; sub RB, RC // Extract and unbias exponent.
2149 |.if SSE
2150 | cvtsi2sd xmm0, RB 2185 | cvtsi2sd xmm0, RB
2151 |.else
2152 | mov TMP1, RB; fild TMP1
2153 |.endif
2154 | mov RB, [BASE-4] 2186 | mov RB, [BASE-4]
2155 | and RB, 0x800fffff // Mask off exponent. 2187 | and RB, 0x800fffff // Mask off exponent.
2156 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. 2188 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
2157 | mov [BASE-4], RB 2189 | mov [BASE-4], RB
2158 |2: 2190 |2:
2159 |.if SSE
2160 | movsd qword [BASE], xmm0 2191 | movsd qword [BASE], xmm0
2161 |.else
2162 | fstp qword [BASE]
2163 |.endif
2164 | mov RD, 1+2 2192 | mov RD, 1+2
2165 | jmp ->fff_res 2193 | jmp ->fff_res
2166 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. 2194 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
2167 |.if SSE
2168 | xorps xmm0, xmm0; jmp <2 2195 | xorps xmm0, xmm0; jmp <2
2169 |.else
2170 | fldz; jmp <2
2171 |.endif
2172 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. 2196 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
2173 |.if SSE
2174 | movsd xmm0, qword [BASE] 2197 | movsd xmm0, qword [BASE]
2175 | sseconst_hi xmm1, RBa, 43500000 // 2^54. 2198 | sseconst_hi xmm1, RBa, 43500000 // 2^54.
2176 | mulsd xmm0, xmm1 2199 | mulsd xmm0, xmm1
2177 | movsd qword [BASE-8], xmm0 2200 | movsd qword [BASE-8], xmm0
2178 |.else
2179 | fld qword [BASE]
2180 | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
2181 | fstp qword [BASE-8]
2182 |.endif
2183 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 2201 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
2184 | 2202 |
2185 |.if SSE
2186 |.ffunc_nsse math_modf 2203 |.ffunc_nsse math_modf
2187 |.else
2188 |.ffunc_n math_modf
2189 |.endif
2190 | mov RB, [BASE+4] 2204 | mov RB, [BASE+4]
2191 | mov PC, [BASE-4] 2205 | mov PC, [BASE-4]
2192 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? 2206 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
2193 |.if SSE
2194 | movaps xmm4, xmm0 2207 | movaps xmm4, xmm0
2195 | call ->vm_trunc 2208 | call ->vm_trunc_sse
2196 | subsd xmm4, xmm0 2209 | subsd xmm4, xmm0
2197 |1: 2210 |1:
2198 | movsd qword [BASE-8], xmm0 2211 | movsd qword [BASE-8], xmm0
2199 | movsd qword [BASE], xmm4 2212 | movsd qword [BASE], xmm4
2200 |.else
2201 | fdup
2202 | call ->vm_trunc
2203 | fsub st1, st0
2204 |1:
2205 | fstp qword [BASE-8]
2206 | fstp qword [BASE]
2207 |.endif
2208 | mov RC, [BASE-4]; mov RB, [BASE+4] 2213 | mov RC, [BASE-4]; mov RB, [BASE+4]
2209 | xor RC, RB; js >3 // Need to adjust sign? 2214 | xor RC, RB; js >3 // Need to adjust sign?
2210 |2: 2215 |2:
@@ -2214,25 +2219,10 @@ static void build_subroutines(BuildCtx *ctx)
2214 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. 2219 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
2215 | jmp <2 2220 | jmp <2
2216 |4: 2221 |4:
2217 |.if SSE
2218 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. 2222 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
2219 |.else
2220 | fldz; fxch; jmp <1 // Return +-Inf and +-0.
2221 |.endif
2222 |
2223 |.ffunc_nnr math_fmod
2224 |1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1
2225 | fpop1
2226 | jmp ->fff_resn
2227 |
2228 |.if SSE
2229 |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0
2230 |.else
2231 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
2232 |.endif
2233 | 2223 |
2234 |.macro math_minmax, name, cmovop, fcmovop, sseop 2224 |.macro math_minmax, name, cmovop, sseop
2235 | .ffunc name 2225 | .ffunc_1 name
2236 | mov RA, 2 2226 | mov RA, 2
2237 | cmp dword [BASE+4], LJ_TISNUM 2227 | cmp dword [BASE+4], LJ_TISNUM
2238 |.if DUALNUM 2228 |.if DUALNUM
@@ -2248,12 +2238,7 @@ static void build_subroutines(BuildCtx *ctx)
2248 |3: 2238 |3:
2249 | ja ->fff_fallback 2239 | ja ->fff_fallback
2250 | // Convert intermediate result to number and continue below. 2240 | // Convert intermediate result to number and continue below.
2251 |.if SSE
2252 | cvtsi2sd xmm0, RB 2241 | cvtsi2sd xmm0, RB
2253 |.else
2254 | mov TMP1, RB
2255 | fild TMP1
2256 |.endif
2257 | jmp >6 2242 | jmp >6
2258 |4: 2243 |4:
2259 | ja ->fff_fallback 2244 | ja ->fff_fallback
@@ -2261,7 +2246,6 @@ static void build_subroutines(BuildCtx *ctx)
2261 | jae ->fff_fallback 2246 | jae ->fff_fallback
2262 |.endif 2247 |.endif
2263 | 2248 |
2264 |.if SSE
2265 | movsd xmm0, qword [BASE] 2249 | movsd xmm0, qword [BASE]
2266 |5: // Handle numbers or integers. 2250 |5: // Handle numbers or integers.
2267 | cmp RA, RD; jae ->fff_resxmm0 2251 | cmp RA, RD; jae ->fff_resxmm0
@@ -2280,48 +2264,13 @@ static void build_subroutines(BuildCtx *ctx)
2280 | sseop xmm0, xmm1 2264 | sseop xmm0, xmm1
2281 | add RA, 1 2265 | add RA, 1
2282 | jmp <5 2266 | jmp <5
2283 |.else
2284 | fld qword [BASE]
2285 |5: // Handle numbers or integers.
2286 | cmp RA, RD; jae ->fff_resn
2287 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2288 |.if DUALNUM
2289 | jb >6
2290 | ja >9
2291 | fild dword [BASE+RA*8-8]
2292 | jmp >7
2293 |.else
2294 | jae >9
2295 |.endif
2296 |6:
2297 | fld qword [BASE+RA*8-8]
2298 |7:
2299 | fucomi st1; fcmovop st1; fpop1
2300 | add RA, 1
2301 | jmp <5
2302 |.endif
2303 |.endmacro 2267 |.endmacro
2304 | 2268 |
2305 | math_minmax math_min, cmovg, fcmovnbe, minsd 2269 | math_minmax math_min, cmovg, minsd
2306 | math_minmax math_max, cmovl, fcmovbe, maxsd 2270 | math_minmax math_max, cmovl, maxsd
2307 |.if not SSE
2308 |9:
2309 | fpop; jmp ->fff_fallback
2310 |.endif
2311 | 2271 |
2312 |//-- String library ----------------------------------------------------- 2272 |//-- String library -----------------------------------------------------
2313 | 2273 |
2314 |.ffunc_1 string_len
2315 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2316 | mov STR:RB, [BASE]
2317 |.if DUALNUM
2318 | mov RB, dword STR:RB->len; jmp ->fff_resi
2319 |.elif SSE
2320 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2321 |.else
2322 | fild dword STR:RB->len; jmp ->fff_resn
2323 |.endif
2324 |
2325 |.ffunc string_byte // Only handle the 1-arg case here. 2274 |.ffunc string_byte // Only handle the 1-arg case here.
2326 | cmp NARGS:RD, 1+1; jne ->fff_fallback 2275 | cmp NARGS:RD, 1+1; jne ->fff_fallback
2327 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2276 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2332,10 +2281,8 @@ static void build_subroutines(BuildCtx *ctx)
2332 | movzx RB, byte STR:RB[1] 2281 | movzx RB, byte STR:RB[1]
2333 |.if DUALNUM 2282 |.if DUALNUM
2334 | jmp ->fff_resi 2283 | jmp ->fff_resi
2335 |.elif SSE
2336 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2337 |.else 2284 |.else
2338 | mov TMP1, RB; fild TMP1; jmp ->fff_resn 2285 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2339 |.endif 2286 |.endif
2340 | 2287 |
2341 |.ffunc string_char // Only handle the 1-arg case here. 2288 |.ffunc string_char // Only handle the 1-arg case here.
@@ -2347,16 +2294,11 @@ static void build_subroutines(BuildCtx *ctx)
2347 | mov RB, dword [BASE] 2294 | mov RB, dword [BASE]
2348 | cmp RB, 255; ja ->fff_fallback 2295 | cmp RB, 255; ja ->fff_fallback
2349 | mov TMP2, RB 2296 | mov TMP2, RB
2350 |.elif SSE 2297 |.else
2351 | jae ->fff_fallback 2298 | jae ->fff_fallback
2352 | cvttsd2si RB, qword [BASE] 2299 | cvttsd2si RB, qword [BASE]
2353 | cmp RB, 255; ja ->fff_fallback 2300 | cmp RB, 255; ja ->fff_fallback
2354 | mov TMP2, RB 2301 | mov TMP2, RB
2355 |.else
2356 | jae ->fff_fallback
2357 | fld qword [BASE]
2358 | fistp TMP2
2359 | cmp TMP2, 255; ja ->fff_fallback
2360 |.endif 2302 |.endif
2361 |.if X64 2303 |.if X64
2362 | mov TMP3, 1 2304 | mov TMP3, 1
@@ -2377,6 +2319,7 @@ static void build_subroutines(BuildCtx *ctx)
2377 |.endif 2319 |.endif
2378 | mov SAVE_PC, PC 2320 | mov SAVE_PC, PC
2379 | call extern lj_str_new // (lua_State *L, char *str, size_t l) 2321 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
2322 |->fff_resstr:
2380 | // GCstr * returned in eax (RD). 2323 | // GCstr * returned in eax (RD).
2381 | mov BASE, L:RB->base 2324 | mov BASE, L:RB->base
2382 | mov PC, [BASE-4] 2325 | mov PC, [BASE-4]
@@ -2394,14 +2337,10 @@ static void build_subroutines(BuildCtx *ctx)
2394 | jne ->fff_fallback 2337 | jne ->fff_fallback
2395 | mov RB, dword [BASE+16] 2338 | mov RB, dword [BASE+16]
2396 | mov TMP2, RB 2339 | mov TMP2, RB
2397 |.elif SSE 2340 |.else
2398 | jae ->fff_fallback 2341 | jae ->fff_fallback
2399 | cvttsd2si RB, qword [BASE+16] 2342 | cvttsd2si RB, qword [BASE+16]
2400 | mov TMP2, RB 2343 | mov TMP2, RB
2401 |.else
2402 | jae ->fff_fallback
2403 | fld qword [BASE+16]
2404 | fistp TMP2
2405 |.endif 2344 |.endif
2406 |1: 2345 |1:
2407 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2346 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2416,12 +2355,8 @@ static void build_subroutines(BuildCtx *ctx)
2416 | mov RB, STR:RB->len 2355 | mov RB, STR:RB->len
2417 |.if DUALNUM 2356 |.if DUALNUM
2418 | mov RA, dword [BASE+8] 2357 | mov RA, dword [BASE+8]
2419 |.elif SSE
2420 | cvttsd2si RA, qword [BASE+8]
2421 |.else 2358 |.else
2422 | fld qword [BASE+8] 2359 | cvttsd2si RA, qword [BASE+8]
2423 | fistp ARG3
2424 | mov RA, ARG3
2425 |.endif 2360 |.endif
2426 | mov RC, TMP2 2361 | mov RC, TMP2
2427 | cmp RB, RC // len < end? (unsigned compare) 2362 | cmp RB, RC // len < end? (unsigned compare)
@@ -2465,136 +2400,34 @@ static void build_subroutines(BuildCtx *ctx)
2465 | xor RC, RC // Zero length. Any ptr in RB is ok. 2400 | xor RC, RC // Zero length. Any ptr in RB is ok.
2466 | jmp <4 2401 | jmp <4
2467 | 2402 |
2468 |.ffunc string_rep // Only handle the 1-char case inline. 2403 |.macro ffstring_op, name
2469 | ffgccheck 2404 | .ffunc_1 string_ .. name
2470 | cmp NARGS:RD, 2+1; jne ->fff_fallback // Exactly 2 arguments.
2471 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2472 | cmp dword [BASE+12], LJ_TISNUM
2473 | mov STR:RB, [BASE]
2474 |.if DUALNUM
2475 | jne ->fff_fallback
2476 | mov RC, dword [BASE+8]
2477 |.elif SSE
2478 | jae ->fff_fallback
2479 | cvttsd2si RC, qword [BASE+8]
2480 |.else
2481 | jae ->fff_fallback
2482 | fld qword [BASE+8]
2483 | fistp TMP2
2484 | mov RC, TMP2
2485 |.endif
2486 | test RC, RC
2487 | jle ->fff_emptystr // Count <= 0? (or non-int)
2488 | cmp dword STR:RB->len, 1
2489 | jb ->fff_emptystr // Zero length string?
2490 | jne ->fff_fallback_2 // Fallback for > 1-char strings.
2491 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
2492 | movzx RA, byte STR:RB[1]
2493 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2494 |.if X64
2495 | mov TMP3, RC
2496 |.else
2497 | mov ARG3, RC
2498 |.endif
2499 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2500 | mov [RB], RAL
2501 | add RB, 1
2502 | sub RC, 1
2503 | jnz <1
2504 | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2505 | jmp ->fff_newstr
2506 |
2507 |.ffunc_1 string_reverse
2508 | ffgccheck
2509 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2510 | mov STR:RB, [BASE]
2511 | mov RC, STR:RB->len
2512 | test RC, RC
2513 | jz ->fff_emptystr // Zero length string?
2514 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2515 | add RB, #STR
2516 | mov TMP2, PC // Need another temp register.
2517 |.if X64
2518 | mov TMP3, RC
2519 |.else
2520 | mov ARG3, RC
2521 |.endif
2522 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2523 |1:
2524 | movzx RA, byte [RB]
2525 | add RB, 1
2526 | sub RC, 1
2527 | mov [PC+RC], RAL
2528 | jnz <1
2529 | mov RD, PC
2530 | mov PC, TMP2
2531 | jmp ->fff_newstr
2532 |
2533 |.macro ffstring_case, name, lo, hi
2534 | .ffunc_1 name
2535 | ffgccheck 2405 | ffgccheck
2536 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2406 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2537 | mov STR:RB, [BASE] 2407 | mov L:RB, SAVE_L
2538 | mov RC, STR:RB->len 2408 | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
2539 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 2409 | mov L:RB->base, BASE
2540 | add RB, #STR 2410 | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE
2541 | mov TMP2, PC // Need another temp register. 2411 | mov RCa, SBUF:FCARG1->b
2542 |.if X64 2412 | mov SBUF:FCARG1->L, L:RB
2543 | mov TMP3, RC 2413 | mov SBUF:FCARG1->w, RCa
2544 |.else 2414 | mov SAVE_PC, PC
2545 | mov ARG3, RC 2415 | call extern lj_buf_putstr_ .. name .. @8
2546 |.endif 2416 | mov FCARG1, eax
2547 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] 2417 | call extern lj_buf_tostr@4
2548 | jmp >3 2418 | jmp ->fff_resstr
2549 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
2550 | movzx RA, byte [RB+RC]
2551 | cmp RA, lo
2552 | jb >2
2553 | cmp RA, hi
2554 | ja >2
2555 | xor RA, 0x20
2556 |2:
2557 | mov [PC+RC], RAL
2558 |3:
2559 | sub RC, 1
2560 | jns <1
2561 | mov RD, PC
2562 | mov PC, TMP2
2563 | jmp ->fff_newstr
2564 |.endmacro 2419 |.endmacro
2565 | 2420 |
2566 |ffstring_case string_lower, 0x41, 0x5a 2421 |ffstring_op reverse
2567 |ffstring_case string_upper, 0x61, 0x7a 2422 |ffstring_op lower
2568 | 2423 |ffstring_op upper
2569 |//-- Table library ------------------------------------------------------
2570 |
2571 |.ffunc_1 table_getn
2572 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
2573 | mov RB, BASE // Save BASE.
2574 | mov TAB:FCARG1, [BASE]
2575 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
2576 | // Length of table returned in eax (RD).
2577 | mov BASE, RB // Restore BASE.
2578 |.if DUALNUM
2579 | mov RB, RD; jmp ->fff_resi
2580 |.elif SSE
2581 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
2582 |.else
2583 | mov ARG1, RD; fild ARG1; jmp ->fff_resn
2584 |.endif
2585 | 2424 |
2586 |//-- Bit library -------------------------------------------------------- 2425 |//-- Bit library --------------------------------------------------------
2587 | 2426 |
2588 |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!).
2589 |
2590 |.macro .ffunc_bit, name, kind, fdef 2427 |.macro .ffunc_bit, name, kind, fdef
2591 | fdef name 2428 | fdef name
2592 |.if kind == 2 2429 |.if kind == 2
2593 |.if SSE
2594 | sseconst_tobit xmm1, RBa 2430 | sseconst_tobit xmm1, RBa
2595 |.else
2596 | mov TMP1, TOBIT_BIAS
2597 |.endif
2598 |.endif 2431 |.endif
2599 | cmp dword [BASE+4], LJ_TISNUM 2432 | cmp dword [BASE+4], LJ_TISNUM
2600 |.if DUALNUM 2433 |.if DUALNUM
@@ -2610,24 +2443,12 @@ static void build_subroutines(BuildCtx *ctx)
2610 |.else 2443 |.else
2611 | jae ->fff_fallback 2444 | jae ->fff_fallback
2612 |.endif 2445 |.endif
2613 |.if SSE
2614 | movsd xmm0, qword [BASE] 2446 | movsd xmm0, qword [BASE]
2615 |.if kind < 2 2447 |.if kind < 2
2616 | sseconst_tobit xmm1, RBa 2448 | sseconst_tobit xmm1, RBa
2617 |.endif 2449 |.endif
2618 | addsd xmm0, xmm1 2450 | addsd xmm0, xmm1
2619 | movd RB, xmm0 2451 | movd RB, xmm0
2620 |.else
2621 | fld qword [BASE]
2622 |.if kind < 2
2623 | mov TMP1, TOBIT_BIAS
2624 |.endif
2625 | fadd TMP1
2626 | fstp FPARG1
2627 |.if kind > 0
2628 | mov RB, ARG1
2629 |.endif
2630 |.endif
2631 |2: 2452 |2:
2632 |.endmacro 2453 |.endmacro
2633 | 2454 |
@@ -2636,15 +2457,7 @@ static void build_subroutines(BuildCtx *ctx)
2636 |.endmacro 2457 |.endmacro
2637 | 2458 |
2638 |.ffunc_bit bit_tobit, 0 2459 |.ffunc_bit bit_tobit, 0
2639 |.if DUALNUM or SSE
2640 |.if not SSE
2641 | mov RB, ARG1
2642 |.endif
2643 | jmp ->fff_resbit 2460 | jmp ->fff_resbit
2644 |.else
2645 | fild ARG1
2646 | jmp ->fff_resn
2647 |.endif
2648 | 2461 |
2649 |.macro .ffunc_bit_op, name, ins 2462 |.macro .ffunc_bit_op, name, ins
2650 | .ffunc_bit name, 2 2463 | .ffunc_bit name, 2
@@ -2664,17 +2477,10 @@ static void build_subroutines(BuildCtx *ctx)
2664 |.else 2477 |.else
2665 | jae ->fff_fallback_bit_op 2478 | jae ->fff_fallback_bit_op
2666 |.endif 2479 |.endif
2667 |.if SSE
2668 | movsd xmm0, qword [RD] 2480 | movsd xmm0, qword [RD]
2669 | addsd xmm0, xmm1 2481 | addsd xmm0, xmm1
2670 | movd RA, xmm0 2482 | movd RA, xmm0
2671 | ins RB, RA 2483 | ins RB, RA
2672 |.else
2673 | fld qword [RD]
2674 | fadd TMP1
2675 | fstp FPARG1
2676 | ins RB, ARG1
2677 |.endif
2678 | sub RD, 8 2484 | sub RD, 8
2679 | jmp <1 2485 | jmp <1
2680 |.endmacro 2486 |.endmacro
@@ -2691,15 +2497,10 @@ static void build_subroutines(BuildCtx *ctx)
2691 | not RB 2497 | not RB
2692 |.if DUALNUM 2498 |.if DUALNUM
2693 | jmp ->fff_resbit 2499 | jmp ->fff_resbit
2694 |.elif SSE 2500 |.else
2695 |->fff_resbit: 2501 |->fff_resbit:
2696 | cvtsi2sd xmm0, RB 2502 | cvtsi2sd xmm0, RB
2697 | jmp ->fff_resxmm0 2503 | jmp ->fff_resxmm0
2698 |.else
2699 |->fff_resbit:
2700 | mov ARG1, RB
2701 | fild ARG1
2702 | jmp ->fff_resn
2703 |.endif 2504 |.endif
2704 | 2505 |
2705 |->fff_fallback_bit_op: 2506 |->fff_fallback_bit_op:
@@ -2712,22 +2513,13 @@ static void build_subroutines(BuildCtx *ctx)
2712 | // Note: no inline conversion from number for 2nd argument! 2513 | // Note: no inline conversion from number for 2nd argument!
2713 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback 2514 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
2714 | mov RA, dword [BASE+8] 2515 | mov RA, dword [BASE+8]
2715 |.elif SSE 2516 |.else
2716 | .ffunc_nnsse name 2517 | .ffunc_nnsse name
2717 | sseconst_tobit xmm2, RBa 2518 | sseconst_tobit xmm2, RBa
2718 | addsd xmm0, xmm2 2519 | addsd xmm0, xmm2
2719 | addsd xmm1, xmm2 2520 | addsd xmm1, xmm2
2720 | movd RB, xmm0 2521 | movd RB, xmm0
2721 | movd RA, xmm1 2522 | movd RA, xmm1
2722 |.else
2723 | .ffunc_nn name
2724 | mov TMP1, TOBIT_BIAS
2725 | fadd TMP1
2726 | fstp FPARG3
2727 | fadd TMP1
2728 | fstp FPARG1
2729 | mov RA, ARG3
2730 | mov RB, ARG1
2731 |.endif 2523 |.endif
2732 | ins RB, cl // Assumes RA is ecx. 2524 | ins RB, cl // Assumes RA is ecx.
2733 | jmp ->fff_resbit 2525 | jmp ->fff_resbit
@@ -2861,7 +2653,7 @@ static void build_subroutines(BuildCtx *ctx)
2861 | mov FCARG2, PC // Caveat: FCARG2 == BASE 2653 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2862 | mov FCARG1, L:RB 2654 | mov FCARG1, L:RB
2863 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. 2655 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2864 | call extern lj_dispatch_ins@8 // (lua_State *L, BCIns *pc) 2656 | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc)
2865 |3: 2657 |3:
2866 | mov BASE, L:RB->base 2658 | mov BASE, L:RB->base
2867 |4: 2659 |4:
@@ -2932,6 +2724,79 @@ static void build_subroutines(BuildCtx *ctx)
2932 | add NARGS:RD, 1 2724 | add NARGS:RD, 1
2933 | jmp RBa 2725 | jmp RBa
2934 | 2726 |
2727 |->cont_stitch: // Trace stitching.
2728 |.if JIT
2729 | // BASE = base, RC = result, RB = mbase
2730 | mov TRACE:RA, [RB-24] // Save previous trace.
2731 | mov TMP1, TRACE:RA
2732 | mov TMP3, DISPATCH // Need one more register.
2733 | mov DISPATCH, MULTRES
2734 | movzx RA, PC_RA
2735 | lea RA, [BASE+RA*8] // Call base.
2736 | sub DISPATCH, 1
2737 | jz >2
2738 |1: // Move results down.
2739 |.if X64
2740 | mov RBa, [RC]
2741 | mov [RA], RBa
2742 |.else
2743 | mov RB, [RC]
2744 | mov [RA], RB
2745 | mov RB, [RC+4]
2746 | mov [RA+4], RB
2747 |.endif
2748 | add RC, 8
2749 | add RA, 8
2750 | sub DISPATCH, 1
2751 | jnz <1
2752 |2:
2753 | movzx RC, PC_RA
2754 | movzx RB, PC_RB
2755 | add RC, RB
2756 | lea RC, [BASE+RC*8-8]
2757 |3:
2758 | cmp RC, RA
2759 | ja >9 // More results wanted?
2760 |
2761 | mov DISPATCH, TMP3
2762 | mov TRACE:RD, TMP1 // Get previous trace.
2763 | movzx RB, word TRACE:RD->traceno
2764 | movzx RD, word TRACE:RD->link
2765 | cmp RD, RB
2766 | je ->cont_nop // Blacklisted.
2767 | test RD, RD
2768 | jne =>BC_JLOOP // Jump to stitched trace.
2769 |
2770 | // Stitch a new trace to the previous trace.
2771 | mov [DISPATCH+DISPATCH_J(exitno)], RB
2772 | mov L:RB, SAVE_L
2773 | mov L:RB->base, BASE
2774 | mov FCARG2, PC
2775 | lea FCARG1, [DISPATCH+GG_DISP2J]
2776 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2777 | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc)
2778 | mov BASE, L:RB->base
2779 | jmp ->cont_nop
2780 |
2781 |9: // Fill up results with nil.
2782 | mov dword [RA+4], LJ_TNIL
2783 | add RA, 8
2784 | jmp <3
2785 |.endif
2786 |
2787 |->vm_profhook: // Dispatch target for profiler hook.
2788#if LJ_HASPROFILE
2789 | mov L:RB, SAVE_L
2790 | mov L:RB->base, BASE
2791 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2792 | mov FCARG1, L:RB
2793 | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc)
2794 | mov BASE, L:RB->base
2795 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2796 | sub PC, 4
2797 | jmp ->cont_nop
2798#endif
2799 |
2935 |//----------------------------------------------------------------------- 2800 |//-----------------------------------------------------------------------
2936 |//-- Trace exit handler ------------------------------------------------- 2801 |//-- Trace exit handler -------------------------------------------------
2937 |//----------------------------------------------------------------------- 2802 |//-----------------------------------------------------------------------
@@ -2984,10 +2849,9 @@ static void build_subroutines(BuildCtx *ctx)
2984 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 2849 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
2985 |.endif 2850 |.endif
2986 | // Caveat: RB is ebp. 2851 | // Caveat: RB is ebp.
2987 | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)] 2852 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2988 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] 2853 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2989 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa 2854 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2990 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
2991 | mov L:RB->base, BASE 2855 | mov L:RB->base, BASE
2992 |.if X64WIN 2856 |.if X64WIN
2993 | lea CARG2, [rsp+4*8] 2857 | lea CARG2, [rsp+4*8]
@@ -2997,6 +2861,7 @@ static void build_subroutines(BuildCtx *ctx)
2997 | lea FCARG2, [esp+16] 2861 | lea FCARG2, [esp+16]
2998 |.endif 2862 |.endif
2999 | lea FCARG1, [DISPATCH+GG_DISP2J] 2863 | lea FCARG1, [DISPATCH+GG_DISP2J]
2864 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
3000 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) 2865 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
3001 | // MULTRES or negated error code returned in eax (RD). 2866 | // MULTRES or negated error code returned in eax (RD).
3002 | mov RAa, L:RB->cframe 2867 | mov RAa, L:RB->cframe
@@ -3043,12 +2908,14 @@ static void build_subroutines(BuildCtx *ctx)
3043 | mov r13, TMPa 2908 | mov r13, TMPa
3044 | mov r12, TMPQ 2909 | mov r12, TMPQ
3045 |.endif 2910 |.endif
3046 | test RD, RD; js >3 // Check for error from exit. 2911 | cmp RD, -LUA_ERRERR; jae >9 // Check for error from exit.
2912 | mov L:RB, SAVE_L
3047 | mov MULTRES, RD 2913 | mov MULTRES, RD
3048 | mov LFUNC:KBASE, [BASE-8] 2914 | mov LFUNC:KBASE, [BASE-8]
3049 | mov KBASE, LFUNC:KBASE->pc 2915 | mov KBASE, LFUNC:KBASE->pc
3050 | mov KBASE, [KBASE+PC2PROTO(k)] 2916 | mov KBASE, [KBASE+PC2PROTO(k)]
3051 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 2917 | mov L:RB->base, BASE
2918 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
3052 | set_vmstate INTERP 2919 | set_vmstate INTERP
3053 | // Modified copy of ins_next which handles function header dispatch, too. 2920 | // Modified copy of ins_next which handles function header dispatch, too.
3054 | mov RC, [PC] 2921 | mov RC, [PC]
@@ -3056,19 +2923,51 @@ static void build_subroutines(BuildCtx *ctx)
3056 | movzx OP, RCL 2923 | movzx OP, RCL
3057 | add PC, 4 2924 | add PC, 4
3058 | shr RC, 16 2925 | shr RC, 16
2926 | cmp MULTRES, -17 // Static dispatch?
2927 | je >5
3059 | cmp OP, BC_FUNCF // Function header? 2928 | cmp OP, BC_FUNCF // Function header?
3060 | jb >2 2929 | jb >3
3061 | mov RC, MULTRES // RC/RD holds nres+1. 2930 | cmp OP, BC_FUNCC+2 // Fast function?
2931 | jae >4
3062 |2: 2932 |2:
2933 | mov RC, MULTRES // RC/RD holds nres+1.
2934 |3:
3063 |.if X64 2935 |.if X64
3064 | jmp aword [DISPATCH+OP*8] 2936 | jmp aword [DISPATCH+OP*8]
3065 |.else 2937 |.else
3066 | jmp aword [DISPATCH+OP*4] 2938 | jmp aword [DISPATCH+OP*4]
3067 |.endif 2939 |.endif
3068 | 2940 |
3069 |3: // Rethrow error from the right C frame. 2941 |4: // Check frame below fast function.
2942 | mov RC, [BASE-4]
2943 | test RC, FRAME_TYPE
2944 | jnz <2 // Trace stitching continuation?
2945 | // Otherwise set KBASE for Lua function below fast function.
2946 | movzx RC, byte [RC-3]
2947 | not RCa
2948 | mov LFUNC:KBASE, [BASE+RC*8-8]
2949 | mov KBASE, LFUNC:KBASE->pc
2950 | mov KBASE, [KBASE+PC2PROTO(k)]
2951 | jmp <2
2952 |
2953 |5: // Dispatch to static entry of original ins replaced by BC_JLOOP.
2954 | mov RA, [DISPATCH+DISPATCH_J(trace)]
2955 | mov TRACE:RA, [RA+RD*4]
2956 | mov RC, TRACE:RA->startins
2957 | movzx RA, RCH
2958 | movzx OP, RCL
2959 | shr RC, 16
2960 |.if X64
2961 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC]
2962 |.else
2963 | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC]
2964 |.endif
2965 |
2966 |9: // Rethrow error from the right C frame.
2967 | mov FCARG2, RD
3070 | mov FCARG1, L:RB 2968 | mov FCARG1, L:RB
3071 | call extern lj_err_run@4 // (lua_State *L) 2969 | neg FCARG2
2970 | call extern lj_err_trace@8 // (lua_State *L, int errcode)
3072 |.endif 2971 |.endif
3073 | 2972 |
3074 |//----------------------------------------------------------------------- 2973 |//-----------------------------------------------------------------------
@@ -3076,27 +2975,18 @@ static void build_subroutines(BuildCtx *ctx)
3076 |//----------------------------------------------------------------------- 2975 |//-----------------------------------------------------------------------
3077 | 2976 |
3078 |// FP value rounding. Called by math.floor/math.ceil fast functions 2977 |// FP value rounding. Called by math.floor/math.ceil fast functions
3079 |// and from JIT code. 2978 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3080 | 2979 |.macro vm_round, name, mode, cond
3081 |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. 2980 |->name:
3082 |.macro vm_round_x87, mode1, mode2 2981 |.if not X64 and cond
3083 | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2. 2982 | movsd xmm0, qword [esp+4]
3084 | mov [esp+8], eax 2983 | call ->name .. _sse
3085 | mov ax, mode1 2984 | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
3086 | or ax, [esp+4] 2985 | fld qword [esp+4]
3087 |.if mode2 ~= 0xffff
3088 | and ax, mode2
3089 |.endif
3090 | mov [esp+6], ax
3091 | fldcw word [esp+6]
3092 | frndint
3093 | fldcw word [esp+4]
3094 | mov eax, [esp+8]
3095 | ret 2986 | ret
3096 |.endmacro 2987 |.endif
3097 | 2988 |
3098 |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. 2989 |->name .. _sse:
3099 |.macro vm_round_sse, mode
3100 | sseconst_abs xmm2, RDa 2990 | sseconst_abs xmm2, RDa
3101 | sseconst_2p52 xmm3, RDa 2991 | sseconst_2p52 xmm3, RDa
3102 | movaps xmm1, xmm0 2992 | movaps xmm1, xmm0
@@ -3134,22 +3024,12 @@ static void build_subroutines(BuildCtx *ctx)
3134 | ret 3024 | ret
3135 |.endmacro 3025 |.endmacro
3136 | 3026 |
3137 |.macro vm_round, name, ssemode, mode1, mode2 3027 | vm_round vm_floor, 0, 1
3138 |->name: 3028 | vm_round vm_ceil, 1, JIT
3139 |.if not SSE 3029 | vm_round vm_trunc, 2, JIT
3140 | vm_round_x87 mode1, mode2
3141 |.endif
3142 |->name .. _sse:
3143 | vm_round_sse ssemode
3144 |.endmacro
3145 |
3146 | vm_round vm_floor, 0, 0x0400, 0xf7ff
3147 | vm_round vm_ceil, 1, 0x0800, 0xfbff
3148 | vm_round vm_trunc, 2, 0x0c00, 0xffff
3149 | 3030 |
3150 |// FP modulo x%y. Called by BC_MOD* and vm_arith. 3031 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
3151 |->vm_mod: 3032 |->vm_mod:
3152 |.if SSE
3153 |// Args in xmm0/xmm1, return value in xmm0. 3033 |// Args in xmm0/xmm1, return value in xmm0.
3154 |// Caveat: xmm0-xmm5 and RC (eax) modified! 3034 |// Caveat: xmm0-xmm5 and RC (eax) modified!
3155 | movaps xmm5, xmm0 3035 | movaps xmm5, xmm0
@@ -3177,488 +3057,6 @@ static void build_subroutines(BuildCtx *ctx)
3177 | movaps xmm0, xmm5 3057 | movaps xmm0, xmm5
3178 | subsd xmm0, xmm1 3058 | subsd xmm0, xmm1
3179 | ret 3059 | ret
3180 |.else
3181 |// Args/ret on x87 stack (y on top). No xmm registers modified.
3182 |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
3183 | fld st1
3184 | fdiv st1
3185 | fnstcw word [esp+4]
3186 | mov ax, 0x0400
3187 | or ax, [esp+4]
3188 | and ax, 0xf7ff
3189 | mov [esp+6], ax
3190 | fldcw word [esp+6]
3191 | frndint
3192 | fldcw word [esp+4]
3193 | fmulp st1
3194 | fsubp st1
3195 | ret
3196 |.endif
3197 |
3198 |// FP log2(x). Called by math.log(x, base).
3199 |->vm_log2:
3200 |.if X64WIN
3201 | movsd qword [rsp+8], xmm0 // Use scratch area.
3202 | fld1
3203 | fld qword [rsp+8]
3204 | fyl2x
3205 | fstp qword [rsp+8]
3206 | movsd xmm0, qword [rsp+8]
3207 |.elif X64
3208 | movsd qword [rsp-8], xmm0 // Use red zone.
3209 | fld1
3210 | fld qword [rsp-8]
3211 | fyl2x
3212 | fstp qword [rsp-8]
3213 | movsd xmm0, qword [rsp-8]
3214 |.else
3215 | fld1
3216 | fld qword [esp+4]
3217 | fyl2x
3218 |.endif
3219 | ret
3220 |
3221 |// FP exponentiation e^x and 2^x. Called by math.exp fast function and
3222 |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified.
3223 |// Caveat: needs 3 slots on x87 stack!
3224 |->vm_exp_x87:
3225 | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e))
3226 |->vm_exp2_x87:
3227 | .if X64WIN
3228 | .define expscratch, dword [rsp+8] // Use scratch area.
3229 | .elif X64
3230 | .define expscratch, dword [rsp-8] // Use red zone.
3231 | .else
3232 | .define expscratch, dword [esp+4] // Needs 4 byte scratch area.
3233 | .endif
3234 | fst expscratch // Caveat: overwrites ARG1.
3235 | cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf
3236 | cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0
3237 |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check.
3238 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
3239 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
3240 |1:
3241 | ret
3242 |2:
3243 | fpop; fldz; ret
3244 |
3245 |// Generic power function x^y. Called by BC_POW, math.pow fast function,
3246 |// and vm_arith.
3247 |// Args/ret on x87 stack (y on top). RC (eax) modified.
3248 |// Caveat: needs 3 slots on x87 stack!
3249 |->vm_pow:
3250 |.if not SSE
3251 | fist dword [esp+4] // Store/reload int before comparison.
3252 | fild dword [esp+4] // Integral exponent used in vm_powi.
3253 | fucomip st1
3254 | jnz >8 // Branch for FP exponents.
3255 | jp >9 // Branch for NaN exponent.
3256 | fpop // Pop y and fallthrough to vm_powi.
3257 |
3258 |// FP/int power function x^i. Arg1/ret on x87 stack.
3259 |// Arg2 (int) on C stack. RC (eax) modified.
3260 |// Caveat: needs 2 slots on x87 stack!
3261 | mov eax, [esp+4]
3262 | cmp eax, 1; jle >6 // i<=1?
3263 | // Now 1 < (unsigned)i <= 0x80000000.
3264 |1: // Handle leading zeros.
3265 | test eax, 1; jnz >2
3266 | fmul st0
3267 | shr eax, 1
3268 | jmp <1
3269 |2:
3270 | shr eax, 1; jz >5
3271 | fdup
3272 |3: // Handle trailing bits.
3273 | fmul st0
3274 | shr eax, 1; jz >4
3275 | jnc <3
3276 | fmul st1, st0
3277 | jmp <3
3278 |4:
3279 | fmulp st1
3280 |5:
3281 | ret
3282 |6:
3283 | je <5 // x^1 ==> x
3284 | jb >7
3285 | fld1; fdivrp st1
3286 | neg eax
3287 | cmp eax, 1; je <5 // x^-1 ==> 1/x
3288 | jmp <1 // x^-i ==> (1/x)^i
3289 |7:
3290 | fpop; fld1 // x^0 ==> 1
3291 | ret
3292 |
3293 |8: // FP/FP power function x^y.
3294 | fst dword [esp+4]
3295 | fxch
3296 | fst dword [esp+8]
3297 | mov eax, [esp+4]; shl eax, 1
3298 | cmp eax, 0xff000000; je >2 // x^+-Inf?
3299 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3300 | cmp eax, 0xff000000; je >4 // +-Inf^y?
3301 | fyl2x
3302 | jmp ->vm_exp2raw
3303 |
3304 |9: // Handle x^NaN.
3305 | fld1
3306 | fucomip st2
3307 | je >1 // 1^NaN ==> 1
3308 | fxch // x^NaN ==> NaN
3309 |1:
3310 | fpop
3311 | ret
3312 |
3313 |2: // Handle x^+-Inf.
3314 | fabs
3315 | fld1
3316 | fucomip st1
3317 | je >3 // +-1^+-Inf ==> 1
3318 | fpop; fabs; fldz; mov eax, 0; setc al
3319 | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
3320 | fxch
3321 |3:
3322 | fpop1; fabs
3323 | ret
3324 |
3325 |4: // Handle +-0^y or +-Inf^y.
3326 | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
3327 | fpop; fpop
3328 | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
3329 | fldz // y < 0, +-Inf^y ==> 0
3330 | ret
3331 |5:
3332 | mov dword [esp+4], 0x7f800000 // Return +Inf.
3333 | fld dword [esp+4]
3334 | ret
3335 |.endif
3336 |
3337 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
3338 |// Needs 16 byte scratch area for x86. Also called from JIT code.
3339 |->vm_pow_sse:
3340 | cvtsd2si eax, xmm1
3341 | cvtsi2sd xmm2, eax
3342 | ucomisd xmm1, xmm2
3343 | jnz >8 // Branch for FP exponents.
3344 | jp >9 // Branch for NaN exponent.
3345 | // Fallthrough to vm_powi_sse.
3346 |
3347 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
3348 |->vm_powi_sse:
3349 | cmp eax, 1; jle >6 // i<=1?
3350 | // Now 1 < (unsigned)i <= 0x80000000.
3351 |1: // Handle leading zeros.
3352 | test eax, 1; jnz >2
3353 | mulsd xmm0, xmm0
3354 | shr eax, 1
3355 | jmp <1
3356 |2:
3357 | shr eax, 1; jz >5
3358 | movaps xmm1, xmm0
3359 |3: // Handle trailing bits.
3360 | mulsd xmm0, xmm0
3361 | shr eax, 1; jz >4
3362 | jnc <3
3363 | mulsd xmm1, xmm0
3364 | jmp <3
3365 |4:
3366 | mulsd xmm0, xmm1
3367 |5:
3368 | ret
3369 |6:
3370 | je <5 // x^1 ==> x
3371 | jb >7 // x^0 ==> 1
3372 | neg eax
3373 | call <1
3374 | sseconst_1 xmm1, RDa
3375 | divsd xmm1, xmm0
3376 | movaps xmm0, xmm1
3377 | ret
3378 |7:
3379 | sseconst_1 xmm0, RDa
3380 | ret
3381 |
3382 |8: // FP/FP power function x^y.
3383 |.if X64
3384 | movd rax, xmm1; shl rax, 1
3385 | rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf?
3386 | movd rax, xmm0; shl rax, 1; je >4 // +-0^y?
3387 | rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y?
3388 | .if X64WIN
3389 | movsd qword [rsp+16], xmm1 // Use scratch area.
3390 | movsd qword [rsp+8], xmm0
3391 | fld qword [rsp+16]
3392 | fld qword [rsp+8]
3393 | .else
3394 | movsd qword [rsp-16], xmm1 // Use red zone.
3395 | movsd qword [rsp-8], xmm0
3396 | fld qword [rsp-16]
3397 | fld qword [rsp-8]
3398 | .endif
3399 |.else
3400 | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area.
3401 | movsd qword [esp+4], xmm0
3402 | cmp dword [esp+12], 0; jne >1
3403 | mov eax, [esp+16]; shl eax, 1
3404 | cmp eax, 0xffe00000; je >2 // x^+-Inf?
3405 |1:
3406 | cmp dword [esp+4], 0; jne >1
3407 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3408 | cmp eax, 0xffe00000; je >5 // +-Inf^y?
3409 |1:
3410 | fld qword [esp+12]
3411 | fld qword [esp+4]
3412 |.endif
3413 | fyl2x // y*log2(x)
3414 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
3415 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
3416 |.if X64WIN
3417 | fstp qword [rsp+8] // Use scratch area.
3418 | movsd xmm0, qword [rsp+8]
3419 |.elif X64
3420 | fstp qword [rsp-8] // Use red zone.
3421 | movsd xmm0, qword [rsp-8]
3422 |.else
3423 | fstp qword [esp+4] // Needs 8 byte scratch area.
3424 | movsd xmm0, qword [esp+4]
3425 |.endif
3426 | ret
3427 |
3428 |9: // Handle x^NaN.
3429 | sseconst_1 xmm2, RDa
3430 | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1
3431 | movaps xmm0, xmm1 // x^NaN ==> NaN
3432 |1:
3433 | ret
3434 |
3435 |2: // Handle x^+-Inf.
3436 | sseconst_abs xmm2, RDa
3437 | andpd xmm0, xmm2 // |x|
3438 | sseconst_1 xmm2, RDa
3439 | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1
3440 | movmskpd eax, xmm1
3441 | xorps xmm0, xmm0
3442 | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0
3443 |3:
3444 | sseconst_hi xmm0, RDa, 7ff00000 // +Inf
3445 | ret
3446 |
3447 |4: // Handle +-0^y.
3448 | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf
3449 | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0
3450 | ret
3451 |
3452 |5: // Handle +-Inf^y.
3453 | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf
3454 | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0
3455 | ret
3456 |
3457 |// Callable from C: double lj_vm_foldfpm(double x, int fpm)
3458 |// Computes fpm(x) for extended math functions. ORDER FPM.
3459 |->vm_foldfpm:
3460 |.if JIT
3461 |.if X64
3462 | .if X64WIN
3463 | .define fpmop, CARG2d
3464 | .else
3465 | .define fpmop, CARG1d
3466 | .endif
3467 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3468 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3469 | sqrtsd xmm0, xmm0; ret
3470 |2:
3471 | .if X64WIN
3472 | movsd qword [rsp+8], xmm0 // Use scratch area.
3473 | fld qword [rsp+8]
3474 | .else
3475 | movsd qword [rsp-8], xmm0 // Use red zone.
3476 | fld qword [rsp-8]
3477 | .endif
3478 | cmp fpmop, 5; ja >2
3479 | .if X64WIN; pop rax; .endif
3480 | je >1
3481 | call ->vm_exp_x87
3482 | .if X64WIN; push rax; .endif
3483 | jmp >7
3484 |1:
3485 | call ->vm_exp2_x87
3486 | .if X64WIN; push rax; .endif
3487 | jmp >7
3488 |2: ; cmp fpmop, 7; je >1; ja >2
3489 | fldln2; fxch; fyl2x; jmp >7
3490 |1: ; fld1; fxch; fyl2x; jmp >7
3491 |2: ; cmp fpmop, 9; je >1; ja >2
3492 | fldlg2; fxch; fyl2x; jmp >7
3493 |1: ; fsin; jmp >7
3494 |2: ; cmp fpmop, 11; je >1; ja >9
3495 | fcos; jmp >7
3496 |1: ; fptan; fpop
3497 |7:
3498 | .if X64WIN
3499 | fstp qword [rsp+8] // Use scratch area.
3500 | movsd xmm0, qword [rsp+8]
3501 | .else
3502 | fstp qword [rsp-8] // Use red zone.
3503 | movsd xmm0, qword [rsp-8]
3504 | .endif
3505 | ret
3506 |.else // x86 calling convention.
3507 | .define fpmop, eax
3508 |.if SSE
3509 | mov fpmop, [esp+12]
3510 | movsd xmm0, qword [esp+4]
3511 | cmp fpmop, 1; je >1; ja >2
3512 | call ->vm_floor; jmp >7
3513 |1: ; call ->vm_ceil; jmp >7
3514 |2: ; cmp fpmop, 3; je >1; ja >2
3515 | call ->vm_trunc; jmp >7
3516 |1:
3517 | sqrtsd xmm0, xmm0
3518 |7:
3519 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3520 | fld qword [esp+4]
3521 | ret
3522 |2: ; fld qword [esp+4]
3523 | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3524 |2: ; cmp fpmop, 7; je >1; ja >2
3525 | fldln2; fxch; fyl2x; ret
3526 |1: ; fld1; fxch; fyl2x; ret
3527 |2: ; cmp fpmop, 9; je >1; ja >2
3528 | fldlg2; fxch; fyl2x; ret
3529 |1: ; fsin; ret
3530 |2: ; cmp fpmop, 11; je >1; ja >9
3531 | fcos; ret
3532 |1: ; fptan; fpop; ret
3533 |.else
3534 | mov fpmop, [esp+12]
3535 | fld qword [esp+4]
3536 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3537 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3538 | fsqrt; ret
3539 |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3540 | cmp fpmop, 7; je >1; ja >2
3541 | fldln2; fxch; fyl2x; ret
3542 |1: ; fld1; fxch; fyl2x; ret
3543 |2: ; cmp fpmop, 9; je >1; ja >2
3544 | fldlg2; fxch; fyl2x; ret
3545 |1: ; fsin; ret
3546 |2: ; cmp fpmop, 11; je >1; ja >9
3547 | fcos; ret
3548 |1: ; fptan; fpop; ret
3549 |.endif
3550 |.endif
3551 |9: ; int3 // Bad fpm.
3552 |.endif
3553 |
3554 |// Callable from C: double lj_vm_foldarith(double x, double y, int op)
3555 |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
3556 |// and basic math functions. ORDER ARITH
3557 |->vm_foldarith:
3558 |.if X64
3559 |
3560 | .if X64WIN
3561 | .define foldop, CARG3d
3562 | .else
3563 | .define foldop, CARG1d
3564 | .endif
3565 | cmp foldop, 1; je >1; ja >2
3566 | addsd xmm0, xmm1; ret
3567 |1: ; subsd xmm0, xmm1; ret
3568 |2: ; cmp foldop, 3; je >1; ja >2
3569 | mulsd xmm0, xmm1; ret
3570 |1: ; divsd xmm0, xmm1; ret
3571 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow
3572 | cmp foldop, 7; je >1; ja >2
3573 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
3574 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
3575 |2: ; cmp foldop, 9; ja >2
3576 |.if X64WIN
3577 | movsd qword [rsp+8], xmm0 // Use scratch area.
3578 | movsd qword [rsp+16], xmm1
3579 | fld qword [rsp+8]
3580 | fld qword [rsp+16]
3581 |.else
3582 | movsd qword [rsp-8], xmm0 // Use red zone.
3583 | movsd qword [rsp-16], xmm1
3584 | fld qword [rsp-8]
3585 | fld qword [rsp-16]
3586 |.endif
3587 | je >1
3588 | fpatan
3589 |7:
3590 |.if X64WIN
3591 | fstp qword [rsp+8] // Use scratch area.
3592 | movsd xmm0, qword [rsp+8]
3593 |.else
3594 | fstp qword [rsp-8] // Use red zone.
3595 | movsd xmm0, qword [rsp-8]
3596 |.endif
3597 | ret
3598 |1: ; fxch; fscale; fpop1; jmp <7
3599 |2: ; cmp foldop, 11; je >1; ja >9
3600 | minsd xmm0, xmm1; ret
3601 |1: ; maxsd xmm0, xmm1; ret
3602 |9: ; int3 // Bad op.
3603 |
3604 |.elif SSE // x86 calling convention with SSE ops.
3605 |
3606 | .define foldop, eax
3607 | mov foldop, [esp+20]
3608 | movsd xmm0, qword [esp+4]
3609 | movsd xmm1, qword [esp+12]
3610 | cmp foldop, 1; je >1; ja >2
3611 | addsd xmm0, xmm1
3612 |7:
3613 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3614 | fld qword [esp+4]
3615 | ret
3616 |1: ; subsd xmm0, xmm1; jmp <7
3617 |2: ; cmp foldop, 3; je >1; ja >2
3618 | mulsd xmm0, xmm1; jmp <7
3619 |1: ; divsd xmm0, xmm1; jmp <7
3620 |2: ; cmp foldop, 5
3621 | je >1; ja >2
3622 | call ->vm_mod; jmp <7
3623 |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area.
3624 |2: ; cmp foldop, 7; je >1; ja >2
3625 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
3626 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
3627 |2: ; cmp foldop, 9; ja >2
3628 | fld qword [esp+4] // Reload from stack
3629 | fld qword [esp+12]
3630 | je >1
3631 | fpatan; ret
3632 |1: ; fxch; fscale; fpop1; ret
3633 |2: ; cmp foldop, 11; je >1; ja >9
3634 | minsd xmm0, xmm1; jmp <7
3635 |1: ; maxsd xmm0, xmm1; jmp <7
3636 |9: ; int3 // Bad op.
3637 |
3638 |.else // x86 calling convention with x87 ops.
3639 |
3640 | mov eax, [esp+20]
3641 | fld qword [esp+4]
3642 | fld qword [esp+12]
3643 | cmp eax, 1; je >1; ja >2
3644 | faddp st1; ret
3645 |1: ; fsubp st1; ret
3646 |2: ; cmp eax, 3; je >1; ja >2
3647 | fmulp st1; ret
3648 |1: ; fdivp st1; ret
3649 |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
3650 | cmp eax, 7; je >1; ja >2
3651 | fpop; fchs; ret
3652 |1: ; fpop; fabs; ret
3653 |2: ; cmp eax, 9; je >1; ja >2
3654 | fpatan; ret
3655 |1: ; fxch; fscale; fpop1; ret
3656 |2: ; cmp eax, 11; je >1; ja >9
3657 | fucomi st1; fcmovnbe st1; fpop1; ret
3658 |1: ; fucomi st1; fcmovbe st1; fpop1; ret
3659 |9: ; int3 // Bad op.
3660 |
3661 |.endif
3662 | 3060 |
3663 |//----------------------------------------------------------------------- 3061 |//-----------------------------------------------------------------------
3664 |//-- Miscellaneous functions -------------------------------------------- 3062 |//-- Miscellaneous functions --------------------------------------------
@@ -3670,6 +3068,7 @@ static void build_subroutines(BuildCtx *ctx)
3670 | mov eax, CARG1d 3068 | mov eax, CARG1d
3671 | .if X64WIN; push rsi; mov rsi, CARG2; .endif 3069 | .if X64WIN; push rsi; mov rsi, CARG2; .endif
3672 | push rbx 3070 | push rbx
3071 | xor ecx, ecx
3673 | cpuid 3072 | cpuid
3674 | mov [rsi], eax 3073 | mov [rsi], eax
3675 | mov [rsi+4], ebx 3074 | mov [rsi+4], ebx
@@ -3693,6 +3092,7 @@ static void build_subroutines(BuildCtx *ctx)
3693 | mov eax, [esp+4] // Argument 1 is function number. 3092 | mov eax, [esp+4] // Argument 1 is function number.
3694 | push edi 3093 | push edi
3695 | push ebx 3094 | push ebx
3095 | xor ecx, ecx
3696 | cpuid 3096 | cpuid
3697 | mov edi, [esp+16] // Argument 2 is result area. 3097 | mov edi, [esp+16] // Argument 2 is result area.
3698 | mov [edi], eax 3098 | mov [edi], eax
@@ -3705,6 +3105,86 @@ static void build_subroutines(BuildCtx *ctx)
3705 | ret 3105 | ret
3706 |.endif 3106 |.endif
3707 | 3107 |
3108 |.define NEXT_TAB, TAB:FCARG1
3109 |.define NEXT_IDX, FCARG2
3110 |.define NEXT_PTR, RCa
3111 |.define NEXT_PTRd, RC
3112 |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro
3113 |.if X64
3114 |.define NEXT_TMP, CARG3d
3115 |.define NEXT_TMPq, CARG3
3116 |.define NEXT_ASIZE, CARG4d
3117 |.macro NEXT_ENTER; .endmacro
3118 |.macro NEXT_LEAVE; ret; .endmacro
3119 |.if X64WIN
3120 |.define NEXT_RES_PTR, [rsp+aword*5]
3121 |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro
3122 |.else
3123 |.define NEXT_RES_PTR, [rsp+aword*1]
3124 |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro
3125 |.endif
3126 |.else
3127 |.define NEXT_ASIZE, esi
3128 |.define NEXT_TMP, edi
3129 |.macro NEXT_ENTER; push esi; push edi; .endmacro
3130 |.macro NEXT_LEAVE; pop edi; pop esi; ret; .endmacro
3131 |.define NEXT_RES_PTR, [esp+dword*3]
3132 |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro
3133 |.endif
3134 |
3135 |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
3136 |// Next idx returned in edx.
3137 |->vm_next:
3138 |.if JIT
3139 | NEXT_ENTER
3140 | mov NEXT_ASIZE, NEXT_TAB->asize
3141 |1: // Traverse array part.
3142 | cmp NEXT_IDX, NEXT_ASIZE; jae >5
3143 | mov NEXT_TMP, NEXT_TAB->array
3144 | cmp dword [NEXT_TMP+NEXT_IDX*8+4], LJ_TNIL; je >2
3145 | lea NEXT_PTR, NEXT_RES_PTR
3146 |.if X64
3147 | mov NEXT_TMPq, qword [NEXT_TMP+NEXT_IDX*8]
3148 | mov qword [NEXT_PTR], NEXT_TMPq
3149 |.else
3150 | mov NEXT_ASIZE, dword [NEXT_TMP+NEXT_IDX*8+4]
3151 | mov NEXT_TMP, dword [NEXT_TMP+NEXT_IDX*8]
3152 | mov dword [NEXT_PTR+4], NEXT_ASIZE
3153 | mov dword [NEXT_PTR], NEXT_TMP
3154 |.endif
3155 |.if DUALNUM
3156 | mov dword [NEXT_PTR+dword*3], LJ_TISNUM
3157 | mov dword [NEXT_PTR+dword*2], NEXT_IDX
3158 |.else
3159 | cvtsi2sd xmm0, NEXT_IDX
3160 | movsd qword [NEXT_PTR+dword*2], xmm0
3161 |.endif
3162 | NEXT_RES_IDX 1
3163 | NEXT_LEAVE
3164 |2: // Skip holes in array part.
3165 | add NEXT_IDX, 1
3166 | jmp <1
3167 |
3168 |5: // Traverse hash part.
3169 | sub NEXT_IDX, NEXT_ASIZE
3170 |6:
3171 | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9
3172 | imul NEXT_PTRd, NEXT_IDX, #NODE
3173 | add NODE:NEXT_PTRd, dword NEXT_TAB->node
3174 | cmp dword NODE:NEXT_PTR->val.it, LJ_TNIL; je >7
3175 | NEXT_RES_IDXL NEXT_ASIZE+1
3176 | NEXT_LEAVE
3177 |7: // Skip holes in hash part.
3178 | add NEXT_IDX, 1
3179 | jmp <6
3180 |
3181 |9: // End of iteration. Set the key to nil (not the value).
3182 | NEXT_RES_IDX NEXT_ASIZE
3183 | lea NEXT_PTR, NEXT_RES_PTR
3184 | mov dword [NEXT_PTR+dword*3], LJ_TNIL
3185 | NEXT_LEAVE
3186 |.endif
3187 |
3708 |//----------------------------------------------------------------------- 3188 |//-----------------------------------------------------------------------
3709 |//-- Assertions --------------------------------------------------------- 3189 |//-- Assertions ---------------------------------------------------------
3710 |//----------------------------------------------------------------------- 3190 |//-----------------------------------------------------------------------
@@ -3840,19 +3320,25 @@ static void build_subroutines(BuildCtx *ctx)
3840 | 3320 |
3841 | // Copy stack slots. 3321 | // Copy stack slots.
3842 | movzx ecx, byte CCSTATE->nsp 3322 | movzx ecx, byte CCSTATE->nsp
3843 | sub ecx, 1 3323 |.if X64
3324 | sub ecx, 8
3844 | js >2 3325 | js >2
3845 |1: 3326 |1:
3846 |.if X64 3327 | mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)]
3847 | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] 3328 | mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax
3848 | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax 3329 | sub ecx, 8
3330 | jns <1
3331 |2:
3849 |.else 3332 |.else
3850 | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)] 3333 | sub ecx, 4
3851 | mov [esp+ecx*4], eax 3334 | js >2
3852 |.endif 3335 |1:
3853 | sub ecx, 1 3336 | mov eax, [CCSTATE+ecx+offsetof(CCallState, stack)]
3337 | mov [esp+ecx], eax
3338 | sub ecx, 4
3854 | jns <1 3339 | jns <1
3855 |2: 3340 |2:
3341 |.endif
3856 | 3342 |
3857 |.if X64 3343 |.if X64
3858 | movzx eax, byte CCSTATE->nfpr 3344 | movzx eax, byte CCSTATE->nfpr
@@ -3970,19 +3456,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3970 | // RA is a number. 3456 | // RA is a number.
3971 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp 3457 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
3972 | // RA is a number, RD is an integer. 3458 | // RA is a number, RD is an integer.
3973 |.if SSE
3974 | cvtsi2sd xmm0, dword [BASE+RD*8] 3459 | cvtsi2sd xmm0, dword [BASE+RD*8]
3975 | jmp >2 3460 | jmp >2
3976 |.else
3977 | fld qword [BASE+RA*8]
3978 | fild dword [BASE+RD*8]
3979 | jmp >3
3980 |.endif
3981 | 3461 |
3982 |8: // RA is an integer, RD is not an integer. 3462 |8: // RA is an integer, RD is not an integer.
3983 | ja ->vmeta_comp 3463 | ja ->vmeta_comp
3984 | // RA is an integer, RD is a number. 3464 | // RA is an integer, RD is a number.
3985 |.if SSE
3986 | cvtsi2sd xmm1, dword [BASE+RA*8] 3465 | cvtsi2sd xmm1, dword [BASE+RA*8]
3987 | movsd xmm0, qword [BASE+RD*8] 3466 | movsd xmm0, qword [BASE+RD*8]
3988 | add PC, 4 3467 | add PC, 4
@@ -3990,29 +3469,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3990 | jmp_comp jbe, ja, jb, jae, <9 3469 | jmp_comp jbe, ja, jb, jae, <9
3991 | jmp <6 3470 | jmp <6
3992 |.else 3471 |.else
3993 | fild dword [BASE+RA*8]
3994 | jmp >2
3995 |.endif
3996 |.else
3997 | checknum RA, ->vmeta_comp 3472 | checknum RA, ->vmeta_comp
3998 | checknum RD, ->vmeta_comp 3473 | checknum RD, ->vmeta_comp
3999 |.endif 3474 |.endif
4000 |.if SSE
4001 |1: 3475 |1:
4002 | movsd xmm0, qword [BASE+RD*8] 3476 | movsd xmm0, qword [BASE+RD*8]
4003 |2: 3477 |2:
4004 | add PC, 4 3478 | add PC, 4
4005 | ucomisd xmm0, qword [BASE+RA*8] 3479 | ucomisd xmm0, qword [BASE+RA*8]
4006 |3: 3480 |3:
4007 |.else
4008 |1:
4009 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
4010 |2:
4011 | fld qword [BASE+RD*8]
4012 |3:
4013 | add PC, 4
4014 | fcomparepp
4015 |.endif
4016 | // Unordered: all of ZF CF PF set, ordered: PF clear. 3481 | // Unordered: all of ZF CF PF set, ordered: PF clear.
4017 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. 3482 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
4018 |.if DUALNUM 3483 |.if DUALNUM
@@ -4052,43 +3517,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4052 | // RD is a number. 3517 | // RD is a number.
4053 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 3518 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
4054 | // RD is a number, RA is an integer. 3519 | // RD is a number, RA is an integer.
4055 |.if SSE
4056 | cvtsi2sd xmm0, dword [BASE+RA*8] 3520 | cvtsi2sd xmm0, dword [BASE+RA*8]
4057 |.else
4058 | fild dword [BASE+RA*8]
4059 |.endif
4060 | jmp >2 3521 | jmp >2
4061 | 3522 |
4062 |8: // RD is an integer, RA is not an integer. 3523 |8: // RD is an integer, RA is not an integer.
4063 | ja >5 3524 | ja >5
4064 | // RD is an integer, RA is a number. 3525 | // RD is an integer, RA is a number.
4065 |.if SSE
4066 | cvtsi2sd xmm0, dword [BASE+RD*8] 3526 | cvtsi2sd xmm0, dword [BASE+RD*8]
4067 | ucomisd xmm0, qword [BASE+RA*8] 3527 | ucomisd xmm0, qword [BASE+RA*8]
4068 |.else
4069 | fild dword [BASE+RD*8]
4070 | fld qword [BASE+RA*8]
4071 |.endif
4072 | jmp >4 3528 | jmp >4
4073 | 3529 |
4074 |.else 3530 |.else
4075 | cmp RB, LJ_TISNUM; jae >5 3531 | cmp RB, LJ_TISNUM; jae >5
4076 | checknum RA, >5 3532 | checknum RA, >5
4077 |.endif 3533 |.endif
4078 |.if SSE
4079 |1: 3534 |1:
4080 | movsd xmm0, qword [BASE+RA*8] 3535 | movsd xmm0, qword [BASE+RA*8]
4081 |2: 3536 |2:
4082 | ucomisd xmm0, qword [BASE+RD*8] 3537 | ucomisd xmm0, qword [BASE+RD*8]
4083 |4: 3538 |4:
4084 |.else
4085 |1:
4086 | fld qword [BASE+RA*8]
4087 |2:
4088 | fld qword [BASE+RD*8]
4089 |4:
4090 | fcomparepp
4091 |.endif
4092 iseqne_fp: 3539 iseqne_fp:
4093 if (vk) { 3540 if (vk) {
4094 | jp >2 // Unordered means not equal. 3541 | jp >2 // Unordered means not equal.
@@ -4211,39 +3658,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4211 | // RA is a number. 3658 | // RA is a number.
4212 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 3659 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
4213 | // RA is a number, RD is an integer. 3660 | // RA is a number, RD is an integer.
4214 |.if SSE
4215 | cvtsi2sd xmm0, dword [KBASE+RD*8] 3661 | cvtsi2sd xmm0, dword [KBASE+RD*8]
4216 |.else
4217 | fild dword [KBASE+RD*8]
4218 |.endif
4219 | jmp >2 3662 | jmp >2
4220 | 3663 |
4221 |8: // RA is an integer, RD is a number. 3664 |8: // RA is an integer, RD is a number.
4222 |.if SSE
4223 | cvtsi2sd xmm0, dword [BASE+RA*8] 3665 | cvtsi2sd xmm0, dword [BASE+RA*8]
4224 | ucomisd xmm0, qword [KBASE+RD*8] 3666 | ucomisd xmm0, qword [KBASE+RD*8]
4225 |.else
4226 | fild dword [BASE+RA*8]
4227 | fld qword [KBASE+RD*8]
4228 |.endif
4229 | jmp >4 3667 | jmp >4
4230 |.else 3668 |.else
4231 | cmp RB, LJ_TISNUM; jae >3 3669 | cmp RB, LJ_TISNUM; jae >3
4232 |.endif 3670 |.endif
4233 |.if SSE
4234 |1: 3671 |1:
4235 | movsd xmm0, qword [KBASE+RD*8] 3672 | movsd xmm0, qword [KBASE+RD*8]
4236 |2: 3673 |2:
4237 | ucomisd xmm0, qword [BASE+RA*8] 3674 | ucomisd xmm0, qword [BASE+RA*8]
4238 |4: 3675 |4:
4239 |.else
4240 |1:
4241 | fld qword [KBASE+RD*8]
4242 |2:
4243 | fld qword [BASE+RA*8]
4244 |4:
4245 | fcomparepp
4246 |.endif
4247 goto iseqne_fp; 3676 goto iseqne_fp;
4248 case BC_ISEQP: case BC_ISNEP: 3677 case BC_ISEQP: case BC_ISNEP:
4249 vk = op == BC_ISEQP; 3678 vk = op == BC_ISEQP;
@@ -4294,6 +3723,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4294 | ins_next 3723 | ins_next
4295 break; 3724 break;
4296 3725
3726 case BC_ISTYPE:
3727 | ins_AD // RA = src, RD = -type
3728 | add RD, [BASE+RA*8+4]
3729 | jne ->vmeta_istype
3730 | ins_next
3731 break;
3732 case BC_ISNUM:
3733 | ins_AD // RA = src, RD = -(TISNUM-1)
3734 | checknum RA, ->vmeta_istype
3735 | ins_next
3736 break;
3737
4297 /* -- Unary ops --------------------------------------------------------- */ 3738 /* -- Unary ops --------------------------------------------------------- */
4298 3739
4299 case BC_MOV: 3740 case BC_MOV:
@@ -4337,16 +3778,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4337 |.else 3778 |.else
4338 | checknum RD, ->vmeta_unm 3779 | checknum RD, ->vmeta_unm
4339 |.endif 3780 |.endif
4340 |.if SSE
4341 | movsd xmm0, qword [BASE+RD*8] 3781 | movsd xmm0, qword [BASE+RD*8]
4342 | sseconst_sign xmm1, RDa 3782 | sseconst_sign xmm1, RDa
4343 | xorps xmm0, xmm1 3783 | xorps xmm0, xmm1
4344 | movsd qword [BASE+RA*8], xmm0 3784 | movsd qword [BASE+RA*8], xmm0
4345 |.else
4346 | fld qword [BASE+RD*8]
4347 | fchs
4348 | fstp qword [BASE+RA*8]
4349 |.endif
4350 |.if DUALNUM 3785 |.if DUALNUM
4351 | jmp <9 3786 | jmp <9
4352 |.else 3787 |.else
@@ -4362,15 +3797,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4362 |1: 3797 |1:
4363 | mov dword [BASE+RA*8+4], LJ_TISNUM 3798 | mov dword [BASE+RA*8+4], LJ_TISNUM
4364 | mov dword [BASE+RA*8], RD 3799 | mov dword [BASE+RA*8], RD
4365 |.elif SSE 3800 |.else
4366 | xorps xmm0, xmm0 3801 | xorps xmm0, xmm0
4367 | cvtsi2sd xmm0, dword STR:RD->len 3802 | cvtsi2sd xmm0, dword STR:RD->len
4368 |1: 3803 |1:
4369 | movsd qword [BASE+RA*8], xmm0 3804 | movsd qword [BASE+RA*8], xmm0
4370 |.else
4371 | fild dword STR:RD->len
4372 |1:
4373 | fstp qword [BASE+RA*8]
4374 |.endif 3805 |.endif
4375 | ins_next 3806 | ins_next
4376 |2: 3807 |2:
@@ -4388,11 +3819,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4388 | // Length of table returned in eax (RD). 3819 | // Length of table returned in eax (RD).
4389 |.if DUALNUM 3820 |.if DUALNUM
4390 | // Nothing to do. 3821 | // Nothing to do.
4391 |.elif SSE
4392 | cvtsi2sd xmm0, RD
4393 |.else 3822 |.else
4394 | mov ARG1, RD 3823 | cvtsi2sd xmm0, RD
4395 | fild ARG1
4396 |.endif 3824 |.endif
4397 | mov BASE, RB // Restore BASE. 3825 | mov BASE, RB // Restore BASE.
4398 | movzx RA, PC_RA 3826 | movzx RA, PC_RA
@@ -4407,7 +3835,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4407 3835
4408 /* -- Binary ops -------------------------------------------------------- */ 3836 /* -- Binary ops -------------------------------------------------------- */
4409 3837
4410 |.macro ins_arithpre, x87ins, sseins, ssereg 3838 |.macro ins_arithpre, sseins, ssereg
4411 | ins_ABC 3839 | ins_ABC
4412 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3840 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
4413 ||switch (vk) { 3841 ||switch (vk) {
@@ -4416,37 +3844,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4416 | .if DUALNUM 3844 | .if DUALNUM
4417 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn 3845 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
4418 | .endif 3846 | .endif
4419 | .if SSE 3847 | movsd xmm0, qword [BASE+RB*8]
4420 | movsd xmm0, qword [BASE+RB*8] 3848 | sseins ssereg, qword [KBASE+RC*8]
4421 | sseins ssereg, qword [KBASE+RC*8]
4422 | .else
4423 | fld qword [BASE+RB*8]
4424 | x87ins qword [KBASE+RC*8]
4425 | .endif
4426 || break; 3849 || break;
4427 ||case 1: 3850 ||case 1:
4428 | checknum RB, ->vmeta_arith_nv 3851 | checknum RB, ->vmeta_arith_nv
4429 | .if DUALNUM 3852 | .if DUALNUM
4430 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv 3853 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
4431 | .endif 3854 | .endif
4432 | .if SSE 3855 | movsd xmm0, qword [KBASE+RC*8]
4433 | movsd xmm0, qword [KBASE+RC*8] 3856 | sseins ssereg, qword [BASE+RB*8]
4434 | sseins ssereg, qword [BASE+RB*8]
4435 | .else
4436 | fld qword [KBASE+RC*8]
4437 | x87ins qword [BASE+RB*8]
4438 | .endif
4439 || break; 3857 || break;
4440 ||default: 3858 ||default:
4441 | checknum RB, ->vmeta_arith_vv 3859 | checknum RB, ->vmeta_arith_vv
4442 | checknum RC, ->vmeta_arith_vv 3860 | checknum RC, ->vmeta_arith_vv
4443 | .if SSE 3861 | movsd xmm0, qword [BASE+RB*8]
4444 | movsd xmm0, qword [BASE+RB*8] 3862 | sseins ssereg, qword [BASE+RC*8]
4445 | sseins ssereg, qword [BASE+RC*8]
4446 | .else
4447 | fld qword [BASE+RB*8]
4448 | x87ins qword [BASE+RC*8]
4449 | .endif
4450 || break; 3863 || break;
4451 ||} 3864 ||}
4452 |.endmacro 3865 |.endmacro
@@ -4484,55 +3897,62 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4484 |.endmacro 3897 |.endmacro
4485 | 3898 |
4486 |.macro ins_arithpost 3899 |.macro ins_arithpost
4487 |.if SSE
4488 | movsd qword [BASE+RA*8], xmm0 3900 | movsd qword [BASE+RA*8], xmm0
4489 |.else
4490 | fstp qword [BASE+RA*8]
4491 |.endif
4492 |.endmacro 3901 |.endmacro
4493 | 3902 |
4494 |.macro ins_arith, x87ins, sseins 3903 |.macro ins_arith, sseins
4495 | ins_arithpre x87ins, sseins, xmm0 3904 | ins_arithpre sseins, xmm0
4496 | ins_arithpost 3905 | ins_arithpost
4497 | ins_next 3906 | ins_next
4498 |.endmacro 3907 |.endmacro
4499 | 3908 |
4500 |.macro ins_arith, intins, x87ins, sseins 3909 |.macro ins_arith, intins, sseins
4501 |.if DUALNUM 3910 |.if DUALNUM
4502 | ins_arithdn intins 3911 | ins_arithdn intins
4503 |.else 3912 |.else
4504 | ins_arith, x87ins, sseins 3913 | ins_arith, sseins
4505 |.endif 3914 |.endif
4506 |.endmacro 3915 |.endmacro
4507 3916
4508 | // RA = dst, RB = src1 or num const, RC = src2 or num const 3917 | // RA = dst, RB = src1 or num const, RC = src2 or num const
4509 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3918 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
4510 | ins_arith add, fadd, addsd 3919 | ins_arith add, addsd
4511 break; 3920 break;
4512 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3921 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
4513 | ins_arith sub, fsub, subsd 3922 | ins_arith sub, subsd
4514 break; 3923 break;
4515 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3924 case BC_MULVN: case BC_MULNV: case BC_MULVV:
4516 | ins_arith imul, fmul, mulsd 3925 | ins_arith imul, mulsd
4517 break; 3926 break;
4518 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3927 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
4519 | ins_arith fdiv, divsd 3928 | ins_arith divsd
4520 break; 3929 break;
4521 case BC_MODVN: 3930 case BC_MODVN:
4522 | ins_arithpre fld, movsd, xmm1 3931 | ins_arithpre movsd, xmm1
4523 |->BC_MODVN_Z: 3932 |->BC_MODVN_Z:
4524 | call ->vm_mod 3933 | call ->vm_mod
4525 | ins_arithpost 3934 | ins_arithpost
4526 | ins_next 3935 | ins_next
4527 break; 3936 break;
4528 case BC_MODNV: case BC_MODVV: 3937 case BC_MODNV: case BC_MODVV:
4529 | ins_arithpre fld, movsd, xmm1 3938 | ins_arithpre movsd, xmm1
4530 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3939 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
4531 break; 3940 break;
4532 case BC_POW: 3941 case BC_POW:
4533 | ins_arithpre fld, movsd, xmm1 3942 | ins_arithpre movsd, xmm1
4534 | call ->vm_pow 3943 | mov RB, BASE
3944 |.if not X64
3945 | movsd FPARG1, xmm0
3946 | movsd FPARG3, xmm1
3947 |.endif
3948 | call extern pow
3949 | movzx RA, PC_RA
3950 | mov BASE, RB
3951 |.if X64
4535 | ins_arithpost 3952 | ins_arithpost
3953 |.else
3954 | fstp qword [BASE+RA*8]
3955 |.endif
4536 | ins_next 3956 | ins_next
4537 break; 3957 break;
4538 3958
@@ -4600,25 +4020,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4600 | movsx RD, RDW 4020 | movsx RD, RDW
4601 | mov dword [BASE+RA*8+4], LJ_TISNUM 4021 | mov dword [BASE+RA*8+4], LJ_TISNUM
4602 | mov dword [BASE+RA*8], RD 4022 | mov dword [BASE+RA*8], RD
4603 |.elif SSE 4023 |.else
4604 | movsx RD, RDW // Sign-extend literal. 4024 | movsx RD, RDW // Sign-extend literal.
4605 | cvtsi2sd xmm0, RD 4025 | cvtsi2sd xmm0, RD
4606 | movsd qword [BASE+RA*8], xmm0 4026 | movsd qword [BASE+RA*8], xmm0
4607 |.else
4608 | fild PC_RD // Refetch signed RD from instruction.
4609 | fstp qword [BASE+RA*8]
4610 |.endif 4027 |.endif
4611 | ins_next 4028 | ins_next
4612 break; 4029 break;
4613 case BC_KNUM: 4030 case BC_KNUM:
4614 | ins_AD // RA = dst, RD = num const 4031 | ins_AD // RA = dst, RD = num const
4615 |.if SSE
4616 | movsd xmm0, qword [KBASE+RD*8] 4032 | movsd xmm0, qword [KBASE+RD*8]
4617 | movsd qword [BASE+RA*8], xmm0 4033 | movsd qword [BASE+RA*8], xmm0
4618 |.else
4619 | fld qword [KBASE+RD*8]
4620 | fstp qword [BASE+RA*8]
4621 |.endif
4622 | ins_next 4034 | ins_next
4623 break; 4035 break;
4624 case BC_KPRI: 4036 case BC_KPRI:
@@ -4725,18 +4137,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4725 case BC_USETN: 4137 case BC_USETN:
4726 | ins_AD // RA = upvalue #, RD = num const 4138 | ins_AD // RA = upvalue #, RD = num const
4727 | mov LFUNC:RB, [BASE-8] 4139 | mov LFUNC:RB, [BASE-8]
4728 |.if SSE
4729 | movsd xmm0, qword [KBASE+RD*8] 4140 | movsd xmm0, qword [KBASE+RD*8]
4730 |.else
4731 | fld qword [KBASE+RD*8]
4732 |.endif
4733 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4141 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4734 | mov RA, UPVAL:RB->v 4142 | mov RA, UPVAL:RB->v
4735 |.if SSE
4736 | movsd qword [RA], xmm0 4143 | movsd qword [RA], xmm0
4737 |.else
4738 | fstp qword [RA]
4739 |.endif
4740 | ins_next 4144 | ins_next
4741 break; 4145 break;
4742 case BC_USETP: 4146 case BC_USETP:
@@ -4890,18 +4294,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4890 |.else 4294 |.else
4891 | // Convert number to int and back and compare. 4295 | // Convert number to int and back and compare.
4892 | checknum RC, >5 4296 | checknum RC, >5
4893 |.if SSE
4894 | movsd xmm0, qword [BASE+RC*8] 4297 | movsd xmm0, qword [BASE+RC*8]
4895 | cvtsd2si RC, xmm0 4298 | cvttsd2si RC, xmm0
4896 | cvtsi2sd xmm1, RC 4299 | cvtsi2sd xmm1, RC
4897 | ucomisd xmm0, xmm1 4300 | ucomisd xmm0, xmm1
4898 |.else
4899 | fld qword [BASE+RC*8]
4900 | fist ARG1
4901 | fild ARG1
4902 | fcomparepp
4903 | mov RC, ARG1
4904 |.endif
4905 | jne ->vmeta_tgetv // Generic numeric key? Use fallback. 4301 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
4906 |.endif 4302 |.endif
4907 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4303 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -4947,7 +4343,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4947 | mov TAB:RB, [BASE+RB*8] 4343 | mov TAB:RB, [BASE+RB*8]
4948 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. 4344 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
4949 | mov RA, TAB:RB->hmask 4345 | mov RA, TAB:RB->hmask
4950 | and RA, STR:RC->hash 4346 | and RA, STR:RC->sid
4951 | imul RA, #NODE 4347 | imul RA, #NODE
4952 | add NODE:RA, TAB:RB->node 4348 | add NODE:RA, TAB:RB->node
4953 |1: 4349 |1:
@@ -5025,6 +4421,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5025 | mov dword [BASE+RA*8+4], LJ_TNIL 4421 | mov dword [BASE+RA*8+4], LJ_TNIL
5026 | jmp <1 4422 | jmp <1
5027 break; 4423 break;
4424 case BC_TGETR:
4425 | ins_ABC // RA = dst, RB = table, RC = key
4426 | mov TAB:RB, [BASE+RB*8]
4427 |.if DUALNUM
4428 | mov RC, dword [BASE+RC*8]
4429 |.else
4430 | cvttsd2si RC, qword [BASE+RC*8]
4431 |.endif
4432 | cmp RC, TAB:RB->asize
4433 | jae ->vmeta_tgetr // Not in array part? Use fallback.
4434 | shl RC, 3
4435 | add RC, TAB:RB->array
4436 | // Get array slot.
4437 |->BC_TGETR_Z:
4438 |.if X64
4439 | mov RBa, [RC]
4440 | mov [BASE+RA*8], RBa
4441 |.else
4442 | mov RB, [RC]
4443 | mov RC, [RC+4]
4444 | mov [BASE+RA*8], RB
4445 | mov [BASE+RA*8+4], RC
4446 |.endif
4447 |->BC_TGETR2_Z:
4448 | ins_next
4449 break;
5028 4450
5029 case BC_TSETV: 4451 case BC_TSETV:
5030 | ins_ABC // RA = src, RB = table, RC = key 4452 | ins_ABC // RA = src, RB = table, RC = key
@@ -5038,18 +4460,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5038 |.else 4460 |.else
5039 | // Convert number to int and back and compare. 4461 | // Convert number to int and back and compare.
5040 | checknum RC, >5 4462 | checknum RC, >5
5041 |.if SSE
5042 | movsd xmm0, qword [BASE+RC*8] 4463 | movsd xmm0, qword [BASE+RC*8]
5043 | cvtsd2si RC, xmm0 4464 | cvttsd2si RC, xmm0
5044 | cvtsi2sd xmm1, RC 4465 | cvtsi2sd xmm1, RC
5045 | ucomisd xmm0, xmm1 4466 | ucomisd xmm0, xmm1
5046 |.else
5047 | fld qword [BASE+RC*8]
5048 | fist ARG1
5049 | fild ARG1
5050 | fcomparepp
5051 | mov RC, ARG1
5052 |.endif
5053 | jne ->vmeta_tsetv // Generic numeric key? Use fallback. 4467 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
5054 |.endif 4468 |.endif
5055 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4469 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -5100,7 +4514,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5100 | mov TAB:RB, [BASE+RB*8] 4514 | mov TAB:RB, [BASE+RB*8]
5101 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. 4515 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
5102 | mov RA, TAB:RB->hmask 4516 | mov RA, TAB:RB->hmask
5103 | and RA, STR:RC->hash 4517 | and RA, STR:RC->sid
5104 | imul RA, #NODE 4518 | imul RA, #NODE
5105 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. 4519 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
5106 | add NODE:RA, TAB:RB->node 4520 | add NODE:RA, TAB:RB->node
@@ -5219,6 +4633,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5219 | movzx RA, PC_RA // Restore RA. 4633 | movzx RA, PC_RA // Restore RA.
5220 | jmp <2 4634 | jmp <2
5221 break; 4635 break;
4636 case BC_TSETR:
4637 | ins_ABC // RA = src, RB = table, RC = key
4638 | mov TAB:RB, [BASE+RB*8]
4639 |.if DUALNUM
4640 | mov RC, dword [BASE+RC*8]
4641 |.else
4642 | cvttsd2si RC, qword [BASE+RC*8]
4643 |.endif
4644 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4645 | jnz >7
4646 |2:
4647 | cmp RC, TAB:RB->asize
4648 | jae ->vmeta_tsetr
4649 | shl RC, 3
4650 | add RC, TAB:RB->array
4651 | // Set array slot.
4652 |->BC_TSETR_Z:
4653 |.if X64
4654 | mov RBa, [BASE+RA*8]
4655 | mov [RC], RBa
4656 |.else
4657 | mov RB, [BASE+RA*8+4]
4658 | mov RA, [BASE+RA*8]
4659 | mov [RC+4], RB
4660 | mov [RC], RA
4661 |.endif
4662 | ins_next
4663 |
4664 |7: // Possible table write barrier for the value. Skip valiswhite check.
4665 | barrierback TAB:RB, RA
4666 | movzx RA, PC_RA // Restore RA.
4667 | jmp <2
4668 break;
5222 4669
5223 case BC_TSETM: 4670 case BC_TSETM:
5224 | ins_AD // RA = base (table at base-1), RD = num const (start index) 4671 | ins_AD // RA = base (table at base-1), RD = num const (start index)
@@ -5395,10 +4842,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5395 break; 4842 break;
5396 4843
5397 case BC_ITERN: 4844 case BC_ITERN:
5398 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
5399 |.if JIT 4845 |.if JIT
5400 | // NYI: add hotloop, record BC_ITERN. 4846 | hotloop RB
5401 |.endif 4847 |.endif
4848 |->vm_IITERN:
4849 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
5402 | mov TMP1, KBASE // Need two more free registers. 4850 | mov TMP1, KBASE // Need two more free registers.
5403 | mov TMP2, DISPATCH 4851 | mov TMP2, DISPATCH
5404 | mov TAB:RB, [BASE+RA*8-16] 4852 | mov TAB:RB, [BASE+RA*8-16]
@@ -5412,10 +4860,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5412 |.if DUALNUM 4860 |.if DUALNUM
5413 | mov dword [BASE+RA*8+4], LJ_TISNUM 4861 | mov dword [BASE+RA*8+4], LJ_TISNUM
5414 | mov dword [BASE+RA*8], RC 4862 | mov dword [BASE+RA*8], RC
5415 |.elif SSE
5416 | cvtsi2sd xmm0, RC
5417 |.else 4863 |.else
5418 | fild dword [BASE+RA*8-8] 4864 | cvtsi2sd xmm0, RC
5419 |.endif 4865 |.endif
5420 | // Copy array slot to returned value. 4866 | // Copy array slot to returned value.
5421 |.if X64 4867 |.if X64
@@ -5431,10 +4877,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5431 | // Return array index as a numeric key. 4877 | // Return array index as a numeric key.
5432 |.if DUALNUM 4878 |.if DUALNUM
5433 | // See above. 4879 | // See above.
5434 |.elif SSE
5435 | movsd qword [BASE+RA*8], xmm0
5436 |.else 4880 |.else
5437 | fstp qword [BASE+RA*8] 4881 | movsd qword [BASE+RA*8], xmm0
5438 |.endif 4882 |.endif
5439 | mov [BASE+RA*8-8], RC // Update control var. 4883 | mov [BASE+RA*8-8], RC // Update control var.
5440 |2: 4884 |2:
@@ -5447,9 +4891,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5447 | 4891 |
5448 |4: // Skip holes in array part. 4892 |4: // Skip holes in array part.
5449 | add RC, 1 4893 | add RC, 1
5450 |.if not (DUALNUM or SSE)
5451 | mov [BASE+RA*8-8], RC
5452 |.endif
5453 | jmp <1 4894 | jmp <1
5454 | 4895 |
5455 |5: // Traverse hash part. 4896 |5: // Traverse hash part.
@@ -5493,14 +4934,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5493 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 4934 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
5494 | branchPC RD 4935 | branchPC RD
5495 | mov dword [BASE+RA*8-8], 0 // Initialize control var. 4936 | mov dword [BASE+RA*8-8], 0 // Initialize control var.
5496 | mov dword [BASE+RA*8-4], 0xfffe7fff 4937 | mov dword [BASE+RA*8-4], LJ_KEYINDEX
5497 |1: 4938 |1:
5498 | ins_next 4939 | ins_next
5499 |5: // Despecialize bytecode if any of the checks fail. 4940 |5: // Despecialize bytecode if any of the checks fail.
5500 | mov PC_OP, BC_JMP 4941 | mov PC_OP, BC_JMP
5501 | branchPC RD 4942 | branchPC RD
4943 |.if JIT
4944 | cmp byte [PC], BC_ITERN
4945 | jne >6
4946 |.endif
5502 | mov byte [PC], BC_ITERC 4947 | mov byte [PC], BC_ITERC
5503 | jmp <1 4948 | jmp <1
4949 |.if JIT
4950 |6: // Unpatch JLOOP.
4951 | mov RA, [DISPATCH+DISPATCH_J(trace)]
4952 | movzx RC, word [PC+2]
4953 | mov TRACE:RA, [RA+RC*4]
4954 | mov eax, TRACE:RA->startins
4955 | mov al, BC_ITERC
4956 | mov dword [PC], eax
4957 | jmp <1
4958 |.endif
5504 break; 4959 break;
5505 4960
5506 case BC_VARG: 4961 case BC_VARG:
@@ -5783,7 +5238,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5783 if (!vk) { 5238 if (!vk) {
5784 | cmp RB, LJ_TISNUM; jae ->vmeta_for 5239 | cmp RB, LJ_TISNUM; jae ->vmeta_for
5785 } 5240 }
5786 |.if SSE
5787 | movsd xmm0, qword FOR_IDX 5241 | movsd xmm0, qword FOR_IDX
5788 | movsd xmm1, qword FOR_STOP 5242 | movsd xmm1, qword FOR_STOP
5789 if (vk) { 5243 if (vk) {
@@ -5796,22 +5250,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5796 | ucomisd xmm1, xmm0 5250 | ucomisd xmm1, xmm0
5797 |1: 5251 |1:
5798 | movsd qword FOR_EXT, xmm0 5252 | movsd qword FOR_EXT, xmm0
5799 |.else
5800 | fld qword FOR_STOP
5801 | fld qword FOR_IDX
5802 if (vk) {
5803 | fadd qword FOR_STEP // nidx = idx + step
5804 | fst qword FOR_IDX
5805 | fst qword FOR_EXT
5806 | test RB, RB; js >1
5807 } else {
5808 | fst qword FOR_EXT
5809 | jl >1
5810 }
5811 | fxch // Swap lim/(n)idx if step non-negative.
5812 |1:
5813 | fcomparepp
5814 |.endif
5815 if (op == BC_FORI) { 5253 if (op == BC_FORI) {
5816 |.if DUALNUM 5254 |.if DUALNUM
5817 | jnb <7 5255 | jnb <7
@@ -5839,11 +5277,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5839 |2: 5277 |2:
5840 | ins_next 5278 | ins_next
5841 |.endif 5279 |.endif
5842 |.if SSE 5280 |
5843 |3: // Invert comparison if step is negative. 5281 |3: // Invert comparison if step is negative.
5844 | ucomisd xmm0, xmm1 5282 | ucomisd xmm0, xmm1
5845 | jmp <1 5283 | jmp <1
5846 |.endif
5847 break; 5284 break;
5848 5285
5849 case BC_ITERL: 5286 case BC_ITERL:
@@ -5881,7 +5318,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5881 | ins_A // RA = base, RD = target (loop extent) 5318 | ins_A // RA = base, RD = target (loop extent)
5882 | // Note: RA/RD is only used by trace recorder to determine scope/extent 5319 | // Note: RA/RD is only used by trace recorder to determine scope/extent
5883 | // This opcode does NOT jump, it's only purpose is to detect a hot loop. 5320 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
5884 |.if JIT 5321 |.if JIT
5885 | hotloop RB 5322 | hotloop RB
5886 |.endif 5323 |.endif
5887 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. 5324 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
@@ -5900,7 +5337,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5900 | mov RDa, TRACE:RD->mcode 5337 | mov RDa, TRACE:RD->mcode
5901 | mov L:RB, SAVE_L 5338 | mov L:RB, SAVE_L
5902 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE 5339 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
5903 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB 5340 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
5904 | // Save additional callee-save registers only used in compiled code. 5341 | // Save additional callee-save registers only used in compiled code.
5905 |.if X64WIN 5342 |.if X64WIN
5906 | mov TMPQ, r12 5343 | mov TMPQ, r12
@@ -6067,9 +5504,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
6067 | // (lua_State *L, lua_CFunction f) 5504 | // (lua_State *L, lua_CFunction f)
6068 | call aword [DISPATCH+DISPATCH_GL(wrapf)] 5505 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
6069 } 5506 }
6070 | set_vmstate INTERP
6071 | // nresults returned in eax (RD). 5507 | // nresults returned in eax (RD).
6072 | mov BASE, L:RB->base 5508 | mov BASE, L:RB->base
5509 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
5510 | set_vmstate INTERP
6073 | lea RA, [BASE+RD*8] 5511 | lea RA, [BASE+RD*8]
6074 | neg RA 5512 | neg RA
6075 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 5513 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
@@ -6182,7 +5620,7 @@ static void emit_asm_debug(BuildCtx *ctx)
6182 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); 5620 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
6183#endif 5621#endif
6184#if !LJ_NO_UNWIND 5622#if !LJ_NO_UNWIND
6185#if (defined(__sun__) && defined(__svr4__)) 5623#if LJ_TARGET_SOLARIS
6186#if LJ_64 5624#if LJ_64
6187 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); 5625 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
6188#else 5626#else
@@ -6389,14 +5827,19 @@ static void emit_asm_debug(BuildCtx *ctx)
6389 "LEFDEY:\n\n", fcsize); 5827 "LEFDEY:\n\n", fcsize);
6390 } 5828 }
6391#endif 5829#endif
6392#if LJ_64 5830#if !LJ_64
6393 fprintf(ctx->fp, "\t.subsections_via_symbols\n");
6394#else
6395 fprintf(ctx->fp, 5831 fprintf(ctx->fp,
6396 "\t.non_lazy_symbol_pointer\n" 5832 "\t.non_lazy_symbol_pointer\n"
6397 "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" 5833 "L_lj_err_unwind_dwarf$non_lazy_ptr:\n"
6398 ".indirect_symbol _lj_err_unwind_dwarf\n" 5834 ".indirect_symbol _lj_err_unwind_dwarf\n"
6399 ".long 0\n"); 5835 ".long 0\n\n");
5836 fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n");
5837 {
5838 const char *const *xn;
5839 for (xn = ctx->extnames; *xn; xn++)
5840 if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1))
5841 fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn);
5842 }
6400#endif 5843#endif
6401 } 5844 }
6402 break; 5845 break;
diff --git a/src/xb1build.bat b/src/xb1build.bat
new file mode 100644
index 00000000..019d6ebe
--- /dev/null
+++ b/src/xb1build.bat
@@ -0,0 +1,104 @@
1@rem Script to build LuaJIT with the Xbox One SDK.
2@rem Donated to the public domain.
3@rem
4@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler)
5@rem Then cd to this directory and run this script.
6
7@if not defined INCLUDE goto :FAIL
8@if not defined DurangoXDK goto :FAIL
9
10@setlocal
11@echo ---- Host compiler ----
12@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE
13@set LJLINK=link /nologo
14@set LJMT=mt /nologo
15@set DASMDIR=..\dynasm
16@set DASM=%DASMDIR%\dynasm.lua
17@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
18
19%LJCOMPILE% host\minilua.c
20@if errorlevel 1 goto :BAD
21%LJLINK% /out:minilua.exe minilua.obj
22@if errorlevel 1 goto :BAD
23if exist minilua.exe.manifest^
24 %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
25
26@rem Error out for 64 bit host compiler
27@minilua
28@if not errorlevel 8 goto :FAIL
29
30@set DASMFLAGS=-D WIN -D FFI -D P64
31minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x64.dasc
32@if errorlevel 1 goto :BAD
33
34if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
35minilua host\genversion.lua
36
37%LJCOMPILE% /I "." /I %DASMDIR% /D_DURANGO host\buildvm*.c
38@if errorlevel 1 goto :BAD
39%LJLINK% /out:buildvm.exe buildvm*.obj
40@if errorlevel 1 goto :BAD
41if exist buildvm.exe.manifest^
42 %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
43
44buildvm -m peobj -o lj_vm.obj
45@if errorlevel 1 goto :BAD
46buildvm -m bcdef -o lj_bcdef.h %ALL_LIB%
47@if errorlevel 1 goto :BAD
48buildvm -m ffdef -o lj_ffdef.h %ALL_LIB%
49@if errorlevel 1 goto :BAD
50buildvm -m libdef -o lj_libdef.h %ALL_LIB%
51@if errorlevel 1 goto :BAD
52buildvm -m recdef -o lj_recdef.h %ALL_LIB%
53@if errorlevel 1 goto :BAD
54buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB%
55@if errorlevel 1 goto :BAD
56buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
57@if errorlevel 1 goto :BAD
58
59@echo ---- Cross compiler ----
60
61@set CWD=%cd%
62@call "%DurangoXDK%\xdk\DurangoVars.cmd" XDK
63@cd /D "%CWD%"
64@shift
65
66@set LJCOMPILE="cl" /nologo /c /W3 /GF /Gm- /GR- /GS- /Gy /openmp- /D_CRT_SECURE_NO_DEPRECATE /D_LIB /D_UNICODE /D_DURANGO
67@set LJLIB="lib" /nologo
68
69@if "%1"=="debug" (
70 @shift
71 @set LJCOMPILE=%LJCOMPILE% /Zi /MDd /Od
72 @set LJLINK=%LJLINK% /debug
73) else (
74 @set LJCOMPILE=%LJCOMPILE% /MD /O2 /DNDEBUG
75)
76
77@if "%1"=="amalg" goto :AMALG
78%LJCOMPILE% /DLUA_BUILD_AS_DLL lj_*.c lib_*.c
79@if errorlevel 1 goto :BAD
80%LJLIB% /OUT:luajit.lib lj_*.obj lib_*.obj
81@if errorlevel 1 goto :BAD
82@goto :NOAMALG
83:AMALG
84%LJCOMPILE% /DLUA_BUILD_AS_DLL ljamalg.c
85@if errorlevel 1 goto :BAD
86%LJLIB% /OUT:luajit.lib ljamalg.obj lj_vm.obj
87@if errorlevel 1 goto :BAD
88:NOAMALG
89
90@del *.obj *.manifest minilua.exe buildvm.exe
91@echo.
92@echo === Successfully built LuaJIT for Xbox One ===
93
94@goto :END
95:BAD
96@echo.
97@echo *******************************************************
98@echo *** Build FAILED -- Please check the error messages ***
99@echo *******************************************************
100@goto :END
101:FAIL
102@echo To run this script you must open a "Visual Studio .NET Command Prompt"
103@echo (64 bit host compiler). The Xbox One SDK must be installed, too.
104:END
diff --git a/src/xedkbuild.bat b/src/xedkbuild.bat
index b07f3bc2..5444024e 100644
--- a/src/xedkbuild.bat
+++ b/src/xedkbuild.bat
@@ -14,7 +14,7 @@
14@set LJMT=mt /nologo 14@set LJMT=mt /nologo
15@set DASMDIR=..\dynasm 15@set DASMDIR=..\dynasm
16@set DASM=%DASMDIR%\dynasm.lua 16@set DASM=%DASMDIR%\dynasm.lua
17@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c 17@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
18 18
19%LJCOMPILE% host\minilua.c 19%LJCOMPILE% host\minilua.c
20@if errorlevel 1 goto :BAD 20@if errorlevel 1 goto :BAD