aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--Makefile25
-rw-r--r--README4
-rw-r--r--doc/contact.html8
-rw-r--r--doc/ext_buffer.html682
-rw-r--r--doc/ext_c_api.html8
-rw-r--r--doc/ext_ffi.html8
-rw-r--r--doc/ext_ffi_api.html16
-rw-r--r--doc/ext_ffi_semantics.html35
-rw-r--r--doc/ext_ffi_tutorial.html8
-rw-r--r--doc/ext_jit.html10
-rw-r--r--doc/ext_profiler.html361
-rw-r--r--doc/extensions.html112
-rw-r--r--doc/faq.html8
-rw-r--r--doc/install.html166
-rw-r--r--doc/luajit.html14
-rw-r--r--doc/running.html9
-rw-r--r--doc/status.html16
-rw-r--r--dynasm/dasm_arm.h7
-rw-r--r--dynasm/dasm_arm.lua6
-rw-r--r--dynasm/dasm_arm64.h561
-rw-r--r--dynasm/dasm_arm64.lua1219
-rw-r--r--dynasm/dasm_mips.h29
-rw-r--r--dynasm/dasm_mips.lua684
-rw-r--r--dynasm/dasm_mips64.lua12
-rw-r--r--dynasm/dasm_ppc.h20
-rw-r--r--dynasm/dasm_ppc.lua702
-rw-r--r--dynasm/dasm_proto.h4
-rw-r--r--dynasm/dasm_x86.h62
-rw-r--r--dynasm/dasm_x86.lua648
-rw-r--r--dynasm/dynasm.lua7
-rw-r--r--etc/luajit.pc6
-rw-r--r--src/.gitignore2
-rw-r--r--src/Makefile109
-rw-r--r--src/Makefile.dep255
-rw-r--r--src/host/buildvm.c26
-rw-r--r--src/host/buildvm.h1
-rw-r--r--src/host/buildvm_asm.c56
-rw-r--r--src/host/buildvm_lib.c63
-rw-r--r--src/host/buildvm_libbc.h56
-rw-r--r--src/host/buildvm_peobj.c43
-rw-r--r--src/host/genlibbc.lua197
-rw-r--r--src/jit/bc.lua19
-rw-r--r--src/jit/bcsave.lua106
-rw-r--r--src/jit/dis_arm.lua18
-rw-r--r--src/jit/dis_arm64.lua1216
-rw-r--r--src/jit/dis_arm64be.lua12
-rw-r--r--src/jit/dis_mips.lua372
-rw-r--r--src/jit/dis_mips64.lua17
-rw-r--r--src/jit/dis_mips64el.lua17
-rw-r--r--src/jit/dis_mips64r6.lua17
-rw-r--r--src/jit/dis_mips64r6el.lua17
-rw-r--r--src/jit/dis_mipsel.lua15
-rw-r--r--src/jit/dis_ppc.lua18
-rw-r--r--src/jit/dis_x64.lua15
-rw-r--r--src/jit/dis_x86.lua297
-rw-r--r--src/jit/dump.lua59
-rw-r--r--src/jit/p.lua312
-rw-r--r--src/jit/v.lua17
-rw-r--r--src/jit/zone.lua45
-rw-r--r--src/lauxlib.h34
-rw-r--r--src/lib_aux.c82
-rw-r--r--src/lib_base.c153
-rw-r--r--src/lib_bit.c134
-rw-r--r--src/lib_buffer.c349
-rw-r--r--src/lib_debug.c14
-rw-r--r--src/lib_ffi.c82
-rw-r--r--src/lib_io.c55
-rw-r--r--src/lib_jit.c233
-rw-r--r--src/lib_math.c92
-rw-r--r--src/lib_os.c37
-rw-r--r--src/lib_package.c73
-rw-r--r--src/lib_string.c427
-rw-r--r--src/lib_table.c187
-rw-r--r--src/lj.supp41
-rw-r--r--src/lj_alloc.c275
-rw-r--r--src/lj_alloc.h3
-rw-r--r--src/lj_api.c385
-rw-r--r--src/lj_arch.h408
-rw-r--r--src/lj_asm.c976
-rw-r--r--src/lj_asm_arm.h626
-rw-r--r--src/lj_asm_arm64.h2039
-rw-r--r--src/lj_asm_mips.h1770
-rw-r--r--src/lj_asm_ppc.h891
-rw-r--r--src/lj_asm_x86.h1314
-rw-r--r--src/lj_assert.c28
-rw-r--r--src/lj_bc.h4
-rw-r--r--src/lj_bcdump.h6
-rw-r--r--src/lj_bcread.c158
-rw-r--r--src/lj_bcwrite.c245
-rw-r--r--src/lj_buf.c305
-rw-r--r--src/lj_buf.h197
-rw-r--r--src/lj_carith.c83
-rw-r--r--src/lj_carith.h10
-rw-r--r--src/lj_ccall.c397
-rw-r--r--src/lj_ccall.h49
-rw-r--r--src/lj_ccallback.c280
-rw-r--r--src/lj_cconv.c66
-rw-r--r--src/lj_cconv.h5
-rw-r--r--src/lj_cdata.c67
-rw-r--r--src/lj_cdata.h14
-rw-r--r--src/lj_clib.c47
-rw-r--r--src/lj_cparse.c178
-rw-r--r--src/lj_cparse.h2
-rw-r--r--src/lj_crecord.c391
-rw-r--r--src/lj_crecord.h12
-rw-r--r--src/lj_ctype.c30
-rw-r--r--src/lj_ctype.h27
-rw-r--r--src/lj_debug.c204
-rw-r--r--src/lj_debug.h8
-rw-r--r--src/lj_def.h56
-rw-r--r--src/lj_dispatch.c105
-rw-r--r--src/lj_dispatch.h45
-rw-r--r--src/lj_emit_arm.h71
-rw-r--r--src/lj_emit_arm64.h424
-rw-r--r--src/lj_emit_mips.h161
-rw-r--r--src/lj_emit_ppc.h34
-rw-r--r--src/lj_emit_x86.h200
-rw-r--r--src/lj_err.c689
-rw-r--r--src/lj_err.h19
-rw-r--r--src/lj_errmsg.h23
-rw-r--r--src/lj_ffrecord.c965
-rw-r--r--src/lj_frame.h160
-rw-r--r--src/lj_func.c18
-rw-r--r--src/lj_gc.c178
-rw-r--r--src/lj_gc.h16
-rw-r--r--src/lj_gdbjit.c55
-rw-r--r--src/lj_ir.c175
-rw-r--r--src/lj_ir.h115
-rw-r--r--src/lj_ircall.h237
-rw-r--r--src/lj_iropt.h16
-rw-r--r--src/lj_jit.h219
-rw-r--r--src/lj_lex.c391
-rw-r--r--src/lj_lex.h23
-rw-r--r--src/lj_lib.c131
-rw-r--r--src/lj_lib.h40
-rw-r--r--src/lj_load.c6
-rw-r--r--src/lj_mcode.c78
-rw-r--r--src/lj_meta.c134
-rw-r--r--src/lj_meta.h1
-rw-r--r--src/lj_obj.c18
-rw-r--r--src/lj_obj.h299
-rw-r--r--src/lj_opt_fold.c580
-rw-r--r--src/lj_opt_loop.c45
-rw-r--r--src/lj_opt_mem.c159
-rw-r--r--src/lj_opt_narrow.c48
-rw-r--r--src/lj_opt_sink.c14
-rw-r--r--src/lj_opt_split.c196
-rw-r--r--src/lj_parse.c301
-rw-r--r--src/lj_prng.c250
-rw-r--r--src/lj_prng.h24
-rw-r--r--src/lj_profile.c367
-rw-r--r--src/lj_profile.h21
-rw-r--r--src/lj_record.c812
-rw-r--r--src/lj_record.h2
-rw-r--r--src/lj_serialize.c475
-rw-r--r--src/lj_serialize.h27
-rw-r--r--src/lj_snap.c261
-rw-r--r--src/lj_snap.h3
-rw-r--r--src/lj_state.c112
-rw-r--r--src/lj_state.h4
-rw-r--r--src/lj_str.c519
-rw-r--r--src/lj_str.h39
-rw-r--r--src/lj_strfmt.c606
-rw-r--r--src/lj_strfmt.h131
-rw-r--r--src/lj_strfmt_num.c592
-rw-r--r--src/lj_strscan.c80
-rw-r--r--src/lj_strscan.h3
-rw-r--r--src/lj_tab.c166
-rw-r--r--src/lj_tab.h31
-rw-r--r--src/lj_target.h9
-rw-r--r--src/lj_target_arm.h5
-rw-r--r--src/lj_target_arm64.h334
-rw-r--r--src/lj_target_mips.h195
-rw-r--r--src/lj_target_ppc.h2
-rw-r--r--src/lj_target_x86.h38
-rw-r--r--src/lj_trace.c235
-rw-r--r--src/lj_trace.h5
-rw-r--r--src/lj_traceerr.h4
-rw-r--r--src/lj_udata.c28
-rw-r--r--src/lj_udata.h3
-rw-r--r--src/lj_vm.h34
-rw-r--r--src/lj_vmevent.c1
-rw-r--r--src/lj_vmmath.c75
-rw-r--r--src/ljamalg.c18
-rw-r--r--src/lua.h11
-rw-r--r--src/luaconf.h10
-rw-r--r--src/luajit.c134
-rw-r--r--src/luajit.h15
-rw-r--r--src/lualib.h1
-rw-r--r--src/msvcbuild.bat14
-rw-r--r--src/ps4build.bat34
-rw-r--r--src/psvitabuild.bat2
-rw-r--r--src/vm_arm.dasc359
-rw-r--r--src/vm_arm64.dasc3989
-rw-r--r--src/vm_mips.dasc2556
-rw-r--r--src/vm_mips64.dasc5458
-rw-r--r--src/vm_ppc.dasc1648
-rw-r--r--src/vm_ppcspe.dasc3691
-rw-r--r--src/vm_x64.dasc4909
-rw-r--r--src/vm_x86.dasc1577
-rw-r--r--src/xb1build.bat101
-rw-r--r--src/xedkbuild.bat2
202 files changed, 45479 insertions, 13335 deletions
diff --git a/Makefile b/Makefile
index 7b66aa01..aa1b84bd 100644
--- a/Makefile
+++ b/Makefile
@@ -14,9 +14,10 @@
14############################################################################## 14##############################################################################
15 15
16MAJVER= 2 16MAJVER= 2
17MINVER= 0 17MINVER= 1
18RELVER= 5 18RELVER= 0
19VERSION= $(MAJVER).$(MINVER).$(RELVER) 19PREREL= -beta3
20VERSION= $(MAJVER).$(MINVER).$(RELVER)$(PREREL)
20ABIVER= 5.1 21ABIVER= 5.1
21 22
22############################################################################## 23##############################################################################
@@ -84,8 +85,10 @@ FILE_SO= libluajit.so
84FILE_MAN= luajit.1 85FILE_MAN= luajit.1
85FILE_PC= luajit.pc 86FILE_PC= luajit.pc
86FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h 87FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
87FILES_JITLIB= bc.lua v.lua dump.lua dis_x86.lua dis_x64.lua dis_arm.lua \ 88FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
88 dis_ppc.lua dis_mips.lua dis_mipsel.lua bcsave.lua vmdef.lua 89 dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
90 dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
91 dis_mips64.lua dis_mips64el.lua vmdef.lua
89 92
90ifeq (,$(findstring Windows,$(OS))) 93ifeq (,$(findstring Windows,$(OS)))
91 HOST_SYS:= $(shell uname -s) 94 HOST_SYS:= $(shell uname -s)
@@ -115,7 +118,7 @@ install: $(INSTALL_DEP)
115 $(MKDIR) $(INSTALL_DIRS) 118 $(MKDIR) $(INSTALL_DIRS)
116 cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T) 119 cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T)
117 cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || : 120 cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || :
118 $(RM) $(INSTALL_TSYM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) 121 $(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2)
119 cd src && test -f $(FILE_SO) && \ 122 cd src && test -f $(FILE_SO) && \
120 $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \ 123 $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \
121 ( $(LDCONFIG) $(INSTALL_LIB) || : ) && \ 124 ( $(LDCONFIG) $(INSTALL_LIB) || : ) && \
@@ -127,12 +130,18 @@ install: $(INSTALL_DEP)
127 $(RM) $(FILE_PC).tmp 130 $(RM) $(FILE_PC).tmp
128 cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC) 131 cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC)
129 cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB) 132 cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB)
130 $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)
131 @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ====" 133 @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ===="
134 @echo ""
135 @echo "Note: the development releases deliberately do NOT install a symlink for luajit"
136 @echo "You can do this now by running this command (with sudo):"
137 @echo ""
138 @echo " $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)"
139 @echo ""
140
132 141
133uninstall: 142uninstall:
134 @echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ====" 143 @echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ===="
135 $(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) 144 $(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
136 for file in $(FILES_JITLIB); do \ 145 for file in $(FILES_JITLIB); do \
137 $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \ 146 $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \
138 done 147 done
diff --git a/README b/README
index 5aa4f0ed..c9f7d9ad 100644
--- a/README
+++ b/README
@@ -1,5 +1,5 @@
1README for LuaJIT 2.0.5 1README for LuaJIT 2.1.0-beta3
2----------------------- 2-----------------------------
3 3
4LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language. 4LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
5 5
diff --git a/doc/contact.html b/doc/contact.html
index ba45a03a..c253a08b 100644
--- a/doc/contact.html
+++ b/doc/contact.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>Contact</title> 4<title>Contact</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2021"> 6<meta name="Copyright" content="Copyright (C) 2005-2021">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -37,9 +37,13 @@
37<a href="ext_ffi_semantics.html">FFI Semantics</a> 37<a href="ext_ffi_semantics.html">FFI Semantics</a>
38</li></ul> 38</li></ul>
39</li><li> 39</li><li>
40<a href="ext_buffer.html">String Buffers</a>
41</li><li>
40<a href="ext_jit.html">jit.* Library</a> 42<a href="ext_jit.html">jit.* Library</a>
41</li><li> 43</li><li>
42<a href="ext_c_api.html">Lua/C API</a> 44<a href="ext_c_api.html">Lua/C API</a>
45</li><li>
46<a href="ext_profiler.html">Profiler</a>
43</li></ul> 47</li></ul>
44</li><li> 48</li><li>
45<a href="status.html">Status</a> 49<a href="status.html">Status</a>
diff --git a/doc/ext_buffer.html b/doc/ext_buffer.html
new file mode 100644
index 00000000..2443fc90
--- /dev/null
+++ b/doc/ext_buffer.html
@@ -0,0 +1,682 @@
1<!DOCTYPE html>
2<html>
3<head>
4<title>String Buffer Library</title>
5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2021">
7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
9<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
10<style type="text/css">
11.lib {
12 vertical-align: middle;
13 margin-left: 5px;
14 padding: 0 5px;
15 font-size: 60%;
16 border-radius: 5px;
17 background: #c5d5ff;
18 color: #000;
19}
20</style>
21</head>
22<body>
23<div id="site">
24<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
25</div>
26<div id="head">
27<h1>String Buffer Library</h1>
28</div>
29<div id="nav">
30<ul><li>
31<a href="luajit.html">LuaJIT</a>
32<ul><li>
33<a href="https://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
34</li><li>
35<a href="install.html">Installation</a>
36</li><li>
37<a href="running.html">Running</a>
38</li></ul>
39</li><li>
40<a href="extensions.html">Extensions</a>
41<ul><li>
42<a href="ext_ffi.html">FFI Library</a>
43<ul><li>
44<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
45</li><li>
46<a href="ext_ffi_api.html">ffi.* API</a>
47</li><li>
48<a href="ext_ffi_semantics.html">FFI Semantics</a>
49</li></ul>
50</li><li>
51<a class="current" href="ext_buffer.html">String Buffers</a>
52</li><li>
53<a href="ext_jit.html">jit.* Library</a>
54</li><li>
55<a href="ext_c_api.html">Lua/C API</a>
56</li><li>
57<a href="ext_profiler.html">Profiler</a>
58</li></ul>
59</li><li>
60<a href="status.html">Status</a>
61</li><li>
62<a href="faq.html">FAQ</a>
63</li><li>
64<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
65</li><li>
66<a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
67</li></ul>
68</div>
69<div id="main">
70<p>
71The string buffer library allows <b>high-performance manipulation of
72string-like data</b>.
73</p>
74<p>
75Unlike Lua strings, which are constants, string buffers are
76<b>mutable</b> sequences of 8-bit (binary-transparent) characters. Data
77can be stored, formatted and encoded into a string buffer and later
78converted, extracted or decoded.
79</p>
80<p>
81The convenient string buffer API simplifies common string manipulation
82tasks, that would otherwise require creating many intermediate strings.
83String buffers improve performance by eliminating redundant memory
84copies, object creation, string interning and garbage collection
85overhead. In conjunction with the FFI library, they allow zero-copy
86operations.
87</p>
88<p>
89The string buffer libary also includes a high-performance
90<a href="serialize">serializer</a> for Lua objects.
91</p>
92
93<h2 id="wip" style="color:#ff0000">Work in Progress</h2>
94<p>
95<b style="color:#ff0000">This library is a work in progress. More
96functionality will be added soon.</b>
97</p>
98
99<h2 id="use">Using the String Buffer Library</h2>
100<p>
101The string buffer library is built into LuaJIT by default, but it's not
102loaded by default. Add this to the start of every Lua file that needs
103one of its functions:
104</p>
105<pre class="code">
106local buffer = require("string.buffer")
107</pre>
108<p>
109The convention for the syntax shown on this page is that <tt>buffer</tt>
110refers to the buffer library and <tt>buf</tt> refers to an individual
111buffer object.
112</p>
113<p>
114Please note the difference between a Lua function call, e.g.
115<tt>buffer.new()</tt> (with a dot) and a Lua method call, e.g.
116<tt>buf:reset()</tt> (with a colon).
117</p>
118
119<h3 id="buffer_object">Buffer Objects</h3>
120<p>
121A buffer object is a garbage-collected Lua object. After creation with
122<tt>buffer.new()</tt>, it can (and should) be reused for many operations.
123When the last reference to a buffer object is gone, it will eventually
124be freed by the garbage collector, along with the allocated buffer
125space.
126</p>
127<p>
128Buffers operate like a FIFO (first-in first-out) data structure. Data
129can be appended (written) to the end of the buffer and consumed (read)
130from the front of the buffer. These operations can be freely mixed.
131</p>
132<p>
133The buffer space that holds the characters is managed automatically
134&mdash; it grows as needed and already consumed space is recycled. Use
135<tt>buffer.new(size)</tt> and <tt>buf:free()</tt>, if you need more
136control.
137</p>
138<p>
139The maximum size of a single buffer is the same as the maximum size of a
140Lua string, which is slightly below two gigabytes. For huge data sizes,
141neither strings nor buffers are the right data structure &mdash; use the
142FFI library to directly map memory or files up to the virtual memory
143limit of your OS.
144</p>
145
146<h3 id="buffer_overview">Buffer Method Overview</h3>
147<ul>
148<li>
149The <tt>buf:put*()</tt>-like methods append (write) characters to the
150end of the buffer.
151</li>
152<li>
153The <tt>buf:get*()</tt>-like methods consume (read) characters from the
154front of the buffer.
155</li>
156<li>
157Other methods, like <tt>buf:tostring()</tt> only read the buffer
158contents, but don't change the buffer.
159</li>
160<li>
161The <tt>buf:set()</tt> method allows zero-copy consumption of a string
162or an FFI cdata object as a buffer.
163</li>
164<li>
165The FFI-specific methods allow zero-copy read/write-style operations or
166modifying the buffer contents in-place. Please check the
167<a href="#ffi_caveats">FFI caveats</a> below, too.
168</li>
169<li>
170Methods that don't need to return anything specific, return the buffer
171object itself as a convenience. This allows method chaining, e.g.:
172<tt>buf:reset():encode(obj)</tt> or <tt>buf:skip(len):get()</tt>
173</li>
174</ul>
175
176<h2 id="create">Buffer Creation and Management</h2>
177
178<h3 id="buffer_new"><tt>local buf = buffer.new([size [,options]])<br>
179local buf = buffer.new([options])</tt></h3>
180<p>
181Creates a new buffer object.
182</p>
183<p>
184The optional <tt>size</tt> argument ensures a minimum initial buffer
185size. This is strictly an optimization when the required buffer size is
186known beforehand. The buffer space will grow as needed, in any case.
187</p>
188<p>
189The optional table <tt>options</tt> sets various
190<a href="#serialize_options">serialization options</a>.
191</p>
192
193<h3 id="buffer_reset"><tt>buf = buf:reset()</tt></h3>
194<p>
195Reset (empty) the buffer. The allocated buffer space is not freed and
196may be reused.
197</p>
198
199<h3 id="buffer_free"><tt>buf = buf:free()</tt></h3>
200<p>
201The buffer space of the buffer object is freed. The object itself
202remains intact, empty and it may be reused.
203</p>
204<p>
205Note: you normally don't need to use this method. The garbage collector
206automatically frees the buffer space, when the buffer object is
207collected. Use this method, if you need to free the associated memory
208immediately.
209</p>
210
211<h2 id="write">Buffer Writers</h2>
212
213<h3 id="buffer_put"><tt>buf = buf:put([str|num|obj] [,…])</tt></h3>
214<p>
215Appends a string <tt>str</tt>, a number <tt>num</tt> or any object
216<tt>obj</tt> with a <tt>__tostring</tt> metamethod to the buffer.
217Multiple arguments are appended in the given order.
218</p>
219<p>
220Appending a buffer to a buffer is possible and short-circuited
221internally. But it still involves a copy. Better combine the buffer
222writes to use a single buffer.
223</p>
224
225<h3 id="buffer_putf"><tt>buf = buf:putf(format, …)</tt></h3>
226<p>
227Appends the formatted arguments to the buffer. The <tt>format</tt>
228string supports the same options as <tt>string.format()</tt>.
229</p>
230
231<h3 id="buffer_putcdata"><tt>buf = buf:putcdata(cdata, len)</tt><span class="lib">FFI</span></h3>
232<p>
233Appends the given <tt>len</tt> number of bytes from the memory pointed
234to by the FFI <tt>cdata</tt> object to the buffer. The object needs to
235be convertible to a (constant) pointer.
236</p>
237
238<h3 id="buffer_set"><tt>buf = buf:set(str)<br>
239buf = buf:set(cdata, len)</tt><span class="lib">FFI</span></h3>
240<p>
241This method allows zero-copy consumption of a string or an FFI cdata
242object as a buffer. It stores a reference to the passed string
243<tt>str</tt> or the FFI <tt>cdata</tt> object in the buffer. Any buffer
244space originally allocated is freed. This is <i>not</i> an append
245operation, unlike the <tt>buf:put*()</tt> methods.
246</p>
247<p>
248After calling this method, the buffer behaves as if
249<tt>buf:free():put(str)</tt> or <tt>buf:free():put(cdata,&nbsp;len)</tt>
250had been called. However, the data is only referenced and not copied, as
251long as the buffer is only consumed.
252</p>
253<p>
254In case the buffer is written to later on, the referenced data is copied
255and the object reference is removed (copy-on-write semantics).
256</p>
257<p>
258The stored reference is an anchor for the garbage collector and keeps the
259originally passed string or FFI cdata object alive.
260</p>
261
262<h3 id="buffer_reserve"><tt>ptr, len = buf:reserve(size)</tt><span class="lib">FFI</span><br>
263<tt>buf = buf:commit(used)</tt><span class="lib">FFI</span></h3>
264<p>
265The <tt>reserve</tt> method reserves at least <tt>size</tt> bytes of
266write space in the buffer. It returns an <tt>uint8_t&nbsp;*</tt> FFI
267cdata pointer <tt>ptr</tt> that points to this space.
268</p>
269<p>
270The available length in bytes is returned in <tt>len</tt>. This is at
271least <tt>size</tt> bytes, but may be more to facilitate efficient
272buffer growth. You can either make use of the additional space or ignore
273<tt>len</tt> and only use <tt>size</tt> bytes.
274</p>
275<p>
276The <tt>commit</tt> method appends the <tt>used</tt> bytes of the
277previously returned write space to the buffer data.
278</p>
279<p>
280This pair of methods allows zero-copy use of C read-style APIs:
281</p>
282<pre class="code">
283local MIN_SIZE = 65536
284repeat
285 local ptr, len = buf:reserve(MIN_SIZE)
286 local n = C.read(fd, ptr, len)
287 if n == 0 then break end -- EOF.
288 if n &lt; 0 then error("read error") end
289 buf:commit(n)
290until false
291</pre>
292<p>
293The reserved write space is <i>not</i> initialized. At least the
294<tt>used</tt> bytes <b>must</b> be written to before calling the
295<tt>commit</tt> method. There's no need to call the <tt>commit</tt>
296method, if nothing is added to the buffer (e.g. on error).
297</p>
298
299<h2 id="read">Buffer Readers</h2>
300
301<h3 id="buffer_length"><tt>len = #buf</tt></h3>
302<p>
303Returns the current length of the buffer data in bytes.
304</p>
305
306<h3 id="buffer_concat"><tt>res = str|num|buf .. str|num|buf […]</tt></h3>
307<p>
308The Lua concatenation operator <tt>..</tt> also accepts buffers, just
309like strings or numbers. It always returns a string and not a buffer.
310</p>
311<p>
312Note that although this is supported for convenience, this thwarts one
313of the main reasons to use buffers, which is to avoid string
314allocations. Rewrite it with <tt>buf:put()</tt> and <tt>buf:get()</tt>.
315</p>
316<p>
317Mixing this with unrelated objects that have a <tt>__concat</tt>
318metamethod may not work, since these probably only expect strings.
319</p>
320
321<h3 id="buffer_skip"><tt>buf = buf:skip(len)</tt></h3>
322<p>
323Skips (consumes) <tt>len</tt> bytes from the buffer up to the current
324length of the buffer data.
325</p>
326
327<h3 id="buffer_get"><tt>str, … = buf:get([len|nil] [,…])</tt></h3>
328<p>
329Consumes the buffer data and returns one or more strings. If called
330without arguments, the whole buffer data is consumed. If called with a
331number, up to <tt>len</tt> bytes are consumed. A <tt>nil</tt> argument
332consumes the remaining buffer space (this only makes sense as the last
333argument). Multiple arguments consume the buffer data in the given
334order.
335</p>
336<p>
337Note: a zero length or no remaining buffer data returns an empty string
338and not <tt>nil</tt>.
339</p>
340
341<h3 id="buffer_tostring"><tt>str = buf:tostring()<br>
342str = tostring(buf)</tt></h3>
343<p>
344Creates a string from the buffer data, but doesn't consume it. The
345buffer remains unchanged.
346</p>
347<p>
348Buffer objects also define a <tt>__tostring</tt> metamethod. This means
349buffers can be passed to the global <tt>tostring()</tt> function and
350many other functions that accept this in place of strings. The important
351internal uses in functions like <tt>io.write()</tt> are short-circuited
352to avoid the creation of an intermediate string object.
353</p>
354
355<h3 id="buffer_ref"><tt>ptr, len = buf:ref()</tt><span class="lib">FFI</span></h3>
356<p>
357Returns an <tt>uint8_t&nbsp;*</tt> FFI cdata pointer <tt>ptr</tt> that
358points to the buffer data. The length of the buffer data in bytes is
359returned in <tt>len</tt>.
360</p>
361<p>
362The returned pointer can be directly passed to C functions that expect a
363buffer and a length. You can also do bytewise reads
364(<tt>local&nbsp;x&nbsp;=&nbsp;ptr[i]</tt>) or writes
365(<tt>ptr[i]&nbsp;=&nbsp;0x40</tt>) of the buffer data.
366</p>
367<p>
368In conjunction with the <tt>skip</tt> method, this allows zero-copy use
369of C write-style APIs:
370</p>
371<pre class="code">
372repeat
373 local ptr, len = buf:ref()
374 if len == 0 then break end
375 local n = C.write(fd, ptr, len)
376 if n &lt; 0 then error("write error") end
377 buf:skip(n)
378until n >= len
379</pre>
380<p>
381Unlike Lua strings, buffer data is <i>not</i> implicitly
382zero-terminated. It's not safe to pass <tt>ptr</tt> to C functions that
383expect zero-terminated strings. If you're not using <tt>len</tt>, then
384you're doing something wrong.
385</p>
386
387<h2 id="serialize">Serialization of Lua Objects</h2>
388<p>
389The following functions and methods allow <b>high-speed serialization</b>
390(encoding) of a Lua object into a string and decoding it back to a Lua
391object. This allows convenient storage and transport of <b>structured
392data</b>.
393</p>
394<p>
395The encoded data is in an <a href="#serialize_format">internal binary
396format</a>. The data can be stored in files, binary-transparent
397databases or transmitted to other LuaJIT instances across threads,
398processes or networks.
399</p>
400<p>
401Encoding speed can reach up to 1 Gigabyte/second on a modern desktop- or
402server-class system, even when serializing many small objects. Decoding
403speed is mostly constrained by object creation cost.
404</p>
405<p>
406The serializer handles most Lua types, common FFI number types and
407nested structures. Functions, thread objects, other FFI cdata, full
408userdata and associated metatables cannot be serialized (yet).
409</p>
410<p>
411The encoder serializes nested structures as trees. Multiple references
412to a single object will be stored separately and create distinct objects
413after decoding. Circular references cause an error.
414</p>
415
416<h3 id="serialize_methods">Serialization Functions and Methods</h3>
417
418<h3 id="buffer_encode"><tt>str = buffer.encode(obj)<br>
419buf = buf:encode(obj)</tt></h3>
420<p>
421Serializes (encodes) the Lua object <tt>obj</tt>. The stand-alone
422function returns a string <tt>str</tt>. The buffer method appends the
423encoding to the buffer.
424</p>
425<p>
426<tt>obj</tt> can be any of the supported Lua types &mdash; it doesn't
427need to be a Lua table.
428</p>
429<p>
430This function may throw an error when attempting to serialize
431unsupported object types, circular references or deeply nested tables.
432</p>
433
434<h3 id="buffer_decode"><tt>obj = buffer.decode(str)<br>
435obj = buf:decode()</tt></h3>
436<p>
437The stand-alone function de-serializes (decodes) the string
438<tt>str</tt>, the buffer method de-serializes one object from the
439buffer. Both return a Lua object <tt>obj</tt>.
440</p>
441<p>
442The returned object may be any of the supported Lua types &mdash;
443even <tt>nil</tt>.
444</p>
445<p>
446This function may throw an error when fed with malformed or incomplete
447encoded data. The stand-alone function throws when there's left-over
448data after decoding a single top-level object. The buffer method leaves
449any left-over data in the buffer.
450</p>
451
452<h3 id="serialize_options">Serialization Options</h3>
453<p>
454The <tt>options</tt> table passed to <tt>buffer.new()</tt> may contain
455the following members (all optional):
456</p>
457<ul>
458<li>
459<tt>dict</tt> is a Lua table holding a <b>dictionary of strings</b> that
460commonly occur as table keys of objects you are serializing. These keys
461are compactly encoded as indexes during serialization. A well chosen
462dictionary saves space and improves serialization performance.
463</li>
464</ul>
465<p>
466<tt>dict</tt> needs to be an array of strings, starting at index 1 and
467without holes (no <tt>nil</tt> inbetween). The table is anchored in the
468buffer object and internally modified into a two-way index (don't do
469this yourself, just pass a plain array). The table must not be modified
470after it has been passed to <tt>buffer.new()</tt>.
471</p>
472<p>
473The <tt>dict</tt> tables used by the encoder and decoder must be the
474same. Put the most common entries at the front. Extend at the end to
475ensure backwards-compatibility &mdash; older encodings can then still be
476read. You may also set some indexes to <tt>false</tt> to explicitly drop
477backwards-compatibility. Old encodings that use these indexes will throw
478an error when decoded.
479</p>
480<p>
481Note: parsing and preparation of the options table is somewhat
482expensive. Create a buffer object only once and recycle it for multiple
483uses. Avoid mixing encoder and decoder buffers, since the
484<tt>buf:set()</tt> method frees the already allocated buffer space:
485</p>
486<pre class="code">
487local options = {
488 dict = { "commonly", "used", "string", "keys" },
489}
490local buf_enc = buffer.new(options)
491local buf_dec = buffer.new(options)
492
493local function encode(obj)
494 return buf_enc:reset():encode(obj):get()
495end
496
497local function decode(str)
498 return buf_dec:set(str):decode()
499end
500</pre>
501
502<h3 id="serialize_stream">Streaming Serialization</h3>
503<p>
504In some contexts, it's desirable to do piecewise serialization of large
505datasets, also known as <i>streaming</i>.
506</p>
507<p>
508This serialization format can be safely concatenated and supports streaming.
509Multiple encodings can simply be appended to a buffer and later decoded
510individually:
511</p>
512<pre class="code">
513local buf = buffer.new()
514buf:encode(obj1)
515buf:encode(obj2)
516local copy1 = buf:decode()
517local copy2 = buf:decode()
518</pre>
519<p>
520Here's how to iterate over a stream:
521</p>
522<pre class="code">
523while #buf ~= 0 do
524 local obj = buf:decode()
525 -- Do something with obj.
526end
527</pre>
528<p>
529Since the serialization format doesn't prepend a length to its encoding,
530network applications may need to transmit the length, too.
531</p>
532
533<h3 id="serialize_format">Serialization Format Specification</h3>
534<p>
535This serialization format is designed for <b>internal use</b> by LuaJIT
536applications. Serialized data is upwards-compatible and portable across
537all supported LuaJIT platforms.
538</p>
539<p>
540It's an <b>8-bit binary format</b> and not human-readable. It uses e.g.
541embedded zeroes and stores embedded Lua string objects unmodified, which
542are 8-bit-clean, too. Encoded data can be safely concatenated for
543streaming and later decoded one top-level object at a time.
544</p>
545<p>
546The encoding is reasonably compact, but tuned for maximum performance,
547not for minimum space usage. It compresses well with any of the common
548byte-oriented data compression algorithms.
549</p>
550<p>
551Although documented here for reference, this format is explicitly
552<b>not</b> intended to be a 'public standard' for structured data
553interchange across computer languages (like JSON or MessagePack). Please
554do not use it as such.
555</p>
556<p>
557The specification is given below as a context-free grammar with a
558top-level <tt>object</tt> as the starting point. Alternatives are
559separated by the <tt>|</tt> symbol and <tt>*</tt> indicates repeats.
560Grouping is implicit or indicated by <tt>{…}</tt>. Terminals are
561either plain hex numbers, encoded as bytes, or have a <tt>.format</tt>
562suffix.
563</p>
564<pre>
565object → nil | false | true
566 | null | lightud32 | lightud64
567 | int | num | tab
568 | int64 | uint64 | complex
569 | string
570
571nil → 0x00
572false → 0x01
573true → 0x02
574
575null → 0x03 // NULL lightuserdata
576lightud32 → 0x04 data.I // 32 bit lightuserdata
577lightud64 → 0x05 data.L // 64 bit lightuserdata
578
579int → 0x06 int.I // int32_t
580num → 0x07 double.L
581
582tab → 0x08 // Empty table
583 | 0x09 h.U h*{object object} // Key/value hash
584 | 0x0a a.U a*object // 0-based array
585 | 0x0b a.U a*object h.U h*{object object} // Mixed
586 | 0x0c a.U (a-1)*object // 1-based array
587 | 0x0d a.U (a-1)*object h.U h*{object object} // Mixed
588
589int64 → 0x10 int.L // FFI int64_t
590uint64 → 0x11 uint.L // FFI uint64_t
591complex → 0x12 re.L im.L // FFI complex
592
593string → (0x20+len).U len*char.B
594 | 0x0f (index-1).U // Dict entry
595
596.B = 8 bit
597.I = 32 bit little-endian
598.L = 64 bit little-endian
599.U = prefix-encoded 32 bit unsigned number n:
600 0x00..0xdf → n.B
601 0xe0..0x1fdf → (0xe0|(((n-0xe0)>>8)&0x1f)).B ((n-0xe0)&0xff).B
602 0x1fe0.. → 0xff n.I
603</pre>
604
605<h2 id="error">Error handling</h2>
606<p>
607Many of the buffer methods can throw an error. Out-of-memory or usage
608errors are best caught with an outer wrapper for larger parts of code.
609There's not much one can do after that, anyway.
610</p>
611<p>
612OTOH you may want to catch some errors individually. Buffer methods need
613to receive the buffer object as the first argument. The Lua colon-syntax
614<tt>obj:method()</tt> does that implicitly. But to wrap a method with
615<tt>pcall()</tt>, the arguments need to be passed like this:
616</p>
617<pre class="code">
618local ok, err = pcall(buf.encode, buf, obj)
619if not ok then
620 -- Handle error in err.
621end
622</pre>
623
624<h2 id="ffi_caveats">FFI caveats</h2>
625<p>
626The string buffer library has been designed to work well together with
627the FFI library. But due to the low-level nature of the FFI library,
628some care needs to be taken:
629</p>
630<p>
631First, please remember that FFI pointers are zero-indexed. The space
632returned by <tt>buf:reserve()</tt> and <tt>buf:ref()</tt> starts at the
633returned pointer and ends before <tt>len</tt> bytes after that.
634</p>
635<p>
636I.e. the first valid index is <tt>ptr[0]</tt> and the last valid index
637is <tt>ptr[len-1]</tt>. If the returned length is zero, there's no valid
638index at all. The returned pointer may even be <tt>NULL</tt>.
639</p>
640<p>
641The space pointed to by the returned pointer is only valid as long as
642the buffer is not modified in any way (neither append, nor consume, nor
643reset, etc.). The pointer is also not a GC anchor for the buffer object
644itself.
645</p>
646<p>
647Buffer data is only guaranteed to be byte-aligned. Casting the returned
648pointer to a data type with higher alignment may cause unaligned
649accesses. It depends on the CPU architecture whether this is allowed or
650not (it's always OK on x86/x64 and mostly OK on other modern
651architectures).
652</p>
653<p>
654FFI pointers or references do not count as GC anchors for an underlying
655object. E.g. an <tt>array</tt> allocated with <tt>ffi.new()</tt> is
656anchored by <tt>buf:set(array,&nbsp;len)</tt>, but not by
657<tt>buf:set(array+offset,&nbsp;len)</tt>. The addition of the offset
658creates a new pointer, even when the offset is zero. In this case, you
659need to make sure there's still a reference to the original array as
660long as its contents are in use by the buffer.
661</p>
662<p>
663Even though each LuaJIT VM instance is single-threaded (but you can
664create multiple VMs), FFI data structures can be accessed concurrently.
665Be careful when reading/writing FFI cdata from/to buffers to avoid
666concurrent accesses or modifications. In particular, the memory
667referenced by <tt>buf:set(cdata,&nbsp;len)</tt> must not be modified
668while buffer readers are working on it. Shared, but read-only memory
669mappings of files are OK, but only if the file does not change.
670</p>
671<br class="flush">
672</div>
673<div id="foot">
674<hr class="hide">
675Copyright &copy; 2005-2021
676<span class="noprint">
677&middot;
678<a href="contact.html">Contact</a>
679</span>
680</div>
681</body>
682</html>
diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html
index ea33c701..9f1ad212 100644
--- a/doc/ext_c_api.html
+++ b/doc/ext_c_api.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>Lua/C API Extensions</title> 4<title>Lua/C API Extensions</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2021"> 6<meta name="Copyright" content="Copyright (C) 2005-2021">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -37,9 +37,13 @@
37<a href="ext_ffi_semantics.html">FFI Semantics</a> 37<a href="ext_ffi_semantics.html">FFI Semantics</a>
38</li></ul> 38</li></ul>
39</li><li> 39</li><li>
40<a href="ext_buffer.html">String Buffers</a>
41</li><li>
40<a href="ext_jit.html">jit.* Library</a> 42<a href="ext_jit.html">jit.* Library</a>
41</li><li> 43</li><li>
42<a class="current" href="ext_c_api.html">Lua/C API</a> 44<a class="current" href="ext_c_api.html">Lua/C API</a>
45</li><li>
46<a href="ext_profiler.html">Profiler</a>
43</li></ul> 47</li></ul>
44</li><li> 48</li><li>
45<a href="status.html">Status</a> 49<a href="status.html">Status</a>
diff --git a/doc/ext_ffi.html b/doc/ext_ffi.html
index 654d4980..b934dc78 100644
--- a/doc/ext_ffi.html
+++ b/doc/ext_ffi.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>FFI Library</title> 4<title>FFI Library</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2021"> 6<meta name="Copyright" content="Copyright (C) 2005-2021">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -37,9 +37,13 @@
37<a href="ext_ffi_semantics.html">FFI Semantics</a> 37<a href="ext_ffi_semantics.html">FFI Semantics</a>
38</li></ul> 38</li></ul>
39</li><li> 39</li><li>
40<a href="ext_buffer.html">String Buffers</a>
41</li><li>
40<a href="ext_jit.html">jit.* Library</a> 42<a href="ext_jit.html">jit.* Library</a>
41</li><li> 43</li><li>
42<a href="ext_c_api.html">Lua/C API</a> 44<a href="ext_c_api.html">Lua/C API</a>
45</li><li>
46<a href="ext_profiler.html">Profiler</a>
43</li></ul> 47</li></ul>
44</li><li> 48</li><li>
45<a href="status.html">Status</a> 49<a href="status.html">Status</a>
diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html
index a2af25df..061cc42a 100644
--- a/doc/ext_ffi_api.html
+++ b/doc/ext_ffi_api.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>ffi.* API Functions</title> 4<title>ffi.* API Functions</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2021"> 6<meta name="Copyright" content="Copyright (C) 2005-2021">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -42,9 +42,13 @@ td.abiparam { font-weight: bold; width: 6em; }
42<a href="ext_ffi_semantics.html">FFI Semantics</a> 42<a href="ext_ffi_semantics.html">FFI Semantics</a>
43</li></ul> 43</li></ul>
44</li><li> 44</li><li>
45<a href="ext_buffer.html">String Buffers</a>
46</li><li>
45<a href="ext_jit.html">jit.* Library</a> 47<a href="ext_jit.html">jit.* Library</a>
46</li><li> 48</li><li>
47<a href="ext_c_api.html">Lua/C API</a> 49<a href="ext_c_api.html">Lua/C API</a>
50</li><li>
51<a href="ext_profiler.html">Profiler</a>
48</li></ul> 52</li></ul>
49</li><li> 53</li><li>
50<a href="status.html">Status</a> 54<a href="status.html">Status</a>
@@ -460,6 +464,10 @@ otherwise. The following parameters are currently defined:
460<td class="abiparam">eabi</td><td class="abidesc">EABI variant of the standard ABI</td></tr> 464<td class="abiparam">eabi</td><td class="abidesc">EABI variant of the standard ABI</td></tr>
461<tr class="odd"> 465<tr class="odd">
462<td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr> 466<td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr>
467<tr class="even">
468<td class="abiparam">uwp</td><td class="abidesc">Universal Windows Platform</td></tr>
469<tr class="odd">
470<td class="abiparam">gc64</td><td class="abidesc">64 bit GC references</td></tr>
463</table> 471</table>
464 472
465<h3 id="ffi_os"><tt>ffi.os</tt></h3> 473<h3 id="ffi_os"><tt>ffi.os</tt></h3>
@@ -536,8 +544,8 @@ corresponding ctype.
536The parser for Lua source code treats numeric literals with the 544The parser for Lua source code treats numeric literals with the
537suffixes <tt>LL</tt> or <tt>ULL</tt> as signed or unsigned 64&nbsp;bit 545suffixes <tt>LL</tt> or <tt>ULL</tt> as signed or unsigned 64&nbsp;bit
538integers. Case doesn't matter, but uppercase is recommended for 546integers. Case doesn't matter, but uppercase is recommended for
539readability. It handles both decimal (<tt>42LL</tt>) and hexadecimal 547readability. It handles decimal (<tt>42LL</tt>), hexadecimal
540(<tt>0x2aLL</tt>) literals. 548(<tt>0x2aLL</tt>) and binary (<tt>0b101010LL</tt>) literals.
541</p> 549</p>
542<p> 550<p>
543The imaginary part of complex numbers can be specified by suffixing 551The imaginary part of complex numbers can be specified by suffixing
diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html
index e42d1a97..fef39c32 100644
--- a/doc/ext_ffi_semantics.html
+++ b/doc/ext_ffi_semantics.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>FFI Semantics</title> 4<title>FFI Semantics</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2021"> 6<meta name="Copyright" content="Copyright (C) 2005-2021">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -42,9 +42,13 @@ td.convop { font-style: italic; width: 40%; }
42<a class="current" href="ext_ffi_semantics.html">FFI Semantics</a> 42<a class="current" href="ext_ffi_semantics.html">FFI Semantics</a>
43</li></ul> 43</li></ul>
44</li><li> 44</li><li>
45<a href="ext_buffer.html">String Buffers</a>
46</li><li>
45<a href="ext_jit.html">jit.* Library</a> 47<a href="ext_jit.html">jit.* Library</a>
46</li><li> 48</li><li>
47<a href="ext_c_api.html">Lua/C API</a> 49<a href="ext_c_api.html">Lua/C API</a>
50</li><li>
51<a href="ext_profiler.html">Profiler</a>
48</li></ul> 52</li></ul>
49</li><li> 53</li><li>
50<a href="status.html">Status</a> 54<a href="status.html">Status</a>
@@ -177,6 +181,8 @@ a <tt>typedef</tt>, except re-declarations will be ignored):
177<tt>uint16_t</tt>, <tt>uint32_t</tt>, <tt>uint64_t</tt>, 181<tt>uint16_t</tt>, <tt>uint32_t</tt>, <tt>uint64_t</tt>,
178<tt>intptr_t</tt>, <tt>uintptr_t</tt>.</li> 182<tt>intptr_t</tt>, <tt>uintptr_t</tt>.</li>
179 183
184<li>From <tt>&lt;unistd.h&gt;</tt> (POSIX): <tt>ssize_t</tt>.</li>
185
180</ul> 186</ul>
181<p> 187<p>
182You're encouraged to use these types in preference to 188You're encouraged to use these types in preference to
@@ -724,6 +730,22 @@ You'll have to explicitly convert a 64&nbsp;bit integer to a Lua
724number (e.g. for regular floating-point calculations) with 730number (e.g. for regular floating-point calculations) with
725<tt>tonumber()</tt>. But note this may incur a precision loss.</li> 731<tt>tonumber()</tt>. But note this may incur a precision loss.</li>
726 732
733<li><b>64&nbsp;bit bitwise operations</b>: the rules for 64&nbsp;bit
734arithmetic operators apply analogously.<br>
735
736Unlike the other <tt>bit.*</tt> operations, <tt>bit.tobit()</tt>
737converts a cdata number via <tt>int64_t</tt> to <tt>int32_t</tt> and
738returns a Lua number.<br>
739
740For <tt>bit.band()</tt>, <tt>bit.bor()</tt> and <tt>bit.bxor()</tt>, the
741conversion to <tt>int64_t</tt> or <tt>uint64_t</tt> applies to
742<em>all</em> arguments, if <em>any</em> argument is a cdata number.<br>
743
744For all other operations, only the first argument is used to determine
745the output type. This implies that a cdata number as a shift count for
746shifts and rotates is accepted, but that alone does <em>not</em> cause
747a cdata number output.
748
727</ul> 749</ul>
728 750
729<h3 id="cdata_comp">Comparisons of cdata objects</h3> 751<h3 id="cdata_comp">Comparisons of cdata objects</h3>
@@ -1195,14 +1217,12 @@ The following operations are currently not compiled and may exhibit
1195suboptimal performance, especially when used in inner loops: 1217suboptimal performance, especially when used in inner loops:
1196</p> 1218</p>
1197<ul> 1219<ul>
1198<li>Bitfield accesses and initializations.</li>
1199<li>Vector operations.</li> 1220<li>Vector operations.</li>
1200<li>Table initializers.</li> 1221<li>Table initializers.</li>
1201<li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li> 1222<li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li>
1202<li>Allocations of variable-length arrays or structs.</li> 1223<li>Non-default initialization of VLA/VLS or large C&nbsp;types
1203<li>Allocations of C&nbsp;types with a size &gt; 128&nbsp;bytes or an 1224(&gt; 128&nbsp;bytes or &gt; 16 array elements).</li>
1204alignment &gt; 8&nbsp;bytes.</li> 1225<li>Bitfield initializations.</li>
1205<li>Conversions from lightuserdata to <tt>void&nbsp;*</tt>.</li>
1206<li>Pointer differences for element sizes that are not a power of 1226<li>Pointer differences for element sizes that are not a power of
1207two.</li> 1227two.</li>
1208<li>Calls to C&nbsp;functions with aggregates passed or returned by 1228<li>Calls to C&nbsp;functions with aggregates passed or returned by
@@ -1218,7 +1238,6 @@ value.</li>
1218Other missing features: 1238Other missing features:
1219</p> 1239</p>
1220<ul> 1240<ul>
1221<li>Bit operations for 64&nbsp;bit types.</li>
1222<li>Arithmetic for <tt>complex</tt> numbers.</li> 1241<li>Arithmetic for <tt>complex</tt> numbers.</li>
1223<li>Passing structs by value to vararg C&nbsp;functions.</li> 1242<li>Passing structs by value to vararg C&nbsp;functions.</li>
1224<li><a href="extensions.html#exceptions">C++ exception interoperability</a> 1243<li><a href="extensions.html#exceptions">C++ exception interoperability</a>
diff --git a/doc/ext_ffi_tutorial.html b/doc/ext_ffi_tutorial.html
index a5235186..ca71be4d 100644
--- a/doc/ext_ffi_tutorial.html
+++ b/doc/ext_ffi_tutorial.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>FFI Tutorial</title> 4<title>FFI Tutorial</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2021"> 6<meta name="Copyright" content="Copyright (C) 2005-2021">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -44,9 +44,13 @@ td.idiomlua b { font-weight: normal; color: #2142bf; }
44<a href="ext_ffi_semantics.html">FFI Semantics</a> 44<a href="ext_ffi_semantics.html">FFI Semantics</a>
45</li></ul> 45</li></ul>
46</li><li> 46</li><li>
47<a href="ext_buffer.html">String Buffers</a>
48</li><li>
47<a href="ext_jit.html">jit.* Library</a> 49<a href="ext_jit.html">jit.* Library</a>
48</li><li> 50</li><li>
49<a href="ext_c_api.html">Lua/C API</a> 51<a href="ext_c_api.html">Lua/C API</a>
52</li><li>
53<a href="ext_profiler.html">Profiler</a>
50</li></ul> 54</li></ul>
51</li><li> 55</li><li>
52<a href="status.html">Status</a> 56<a href="status.html">Status</a>
diff --git a/doc/ext_jit.html b/doc/ext_jit.html
index 93240fda..6dd54c70 100644
--- a/doc/ext_jit.html
+++ b/doc/ext_jit.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>jit.* Library</title> 4<title>jit.* Library</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2021"> 6<meta name="Copyright" content="Copyright (C) 2005-2021">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -37,9 +37,13 @@
37<a href="ext_ffi_semantics.html">FFI Semantics</a> 37<a href="ext_ffi_semantics.html">FFI Semantics</a>
38</li></ul> 38</li></ul>
39</li><li> 39</li><li>
40<a href="ext_buffer.html">String Buffers</a>
41</li><li>
40<a class="current" href="ext_jit.html">jit.* Library</a> 42<a class="current" href="ext_jit.html">jit.* Library</a>
41</li><li> 43</li><li>
42<a href="ext_c_api.html">Lua/C API</a> 44<a href="ext_c_api.html">Lua/C API</a>
45</li><li>
46<a href="ext_profiler.html">Profiler</a>
43</li></ul> 47</li></ul>
44</li><li> 48</li><li>
45<a href="status.html">Status</a> 49<a href="status.html">Status</a>
@@ -145,7 +149,7 @@ Contains the target OS name:
145<h3 id="jit_arch"><tt>jit.arch</tt></h3> 149<h3 id="jit_arch"><tt>jit.arch</tt></h3>
146<p> 150<p>
147Contains the target architecture name: 151Contains the target architecture name:
148"x86", "x64", "arm", "ppc", "ppcspe", or "mips". 152"x86", "x64", "arm", "arm64", "arm64be", "ppc", "mips", "mipsel", "mips64", "mips64el", "mips64r6", "mips64r6el".
149</p> 153</p>
150 154
151<h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2> 155<h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2>
diff --git a/doc/ext_profiler.html b/doc/ext_profiler.html
new file mode 100644
index 00000000..2783abdb
--- /dev/null
+++ b/doc/ext_profiler.html
@@ -0,0 +1,361 @@
1<!DOCTYPE html>
2<html>
3<head>
4<title>Profiler</title>
5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2021">
7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
9<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
10</head>
11<body>
12<div id="site">
13<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
14</div>
15<div id="head">
16<h1>Profiler</h1>
17</div>
18<div id="nav">
19<ul><li>
20<a href="luajit.html">LuaJIT</a>
21<ul><li>
22<a href="https://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
23</li><li>
24<a href="install.html">Installation</a>
25</li><li>
26<a href="running.html">Running</a>
27</li></ul>
28</li><li>
29<a href="extensions.html">Extensions</a>
30<ul><li>
31<a href="ext_ffi.html">FFI Library</a>
32<ul><li>
33<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
34</li><li>
35<a href="ext_ffi_api.html">ffi.* API</a>
36</li><li>
37<a href="ext_ffi_semantics.html">FFI Semantics</a>
38</li></ul>
39</li><li>
40<a href="ext_buffer.html">String Buffers</a>
41</li><li>
42<a href="ext_jit.html">jit.* Library</a>
43</li><li>
44<a href="ext_c_api.html">Lua/C API</a>
45</li><li>
46<a class="current" href="ext_profiler.html">Profiler</a>
47</li></ul>
48</li><li>
49<a href="status.html">Status</a>
50</li><li>
51<a href="faq.html">FAQ</a>
52</li><li>
53<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
54</li><li>
55<a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
56</li></ul>
57</div>
58<div id="main">
59<p>
60LuaJIT has an integrated statistical profiler with very low overhead. It
61allows sampling the currently executing stack and other parameters in
62regular intervals.
63</p>
64<p>
65The integrated profiler can be accessed from three levels:
66</p>
67<ul>
68<li>The <a href="#hl_profiler">bundled high-level profiler</a>, invoked by the
69<a href="#j_p"><tt>-jp</tt></a> command line option.</li>
70<li>A <a href="#ll_lua_api">low-level Lua API</a> to control the profiler.</li>
71<li>A <a href="#ll_c_api">low-level C API</a> to control the profiler.</li>
72</ul>
73
74<h2 id="hl_profiler">High-Level Profiler</h2>
75<p>
76The bundled high-level profiler offers basic profiling functionality. It
77generates simple textual summaries or source code annotations. It can be
78accessed with the <a href="#j_p"><tt>-jp</tt></a> command line option
79or from Lua code by loading the underlying <tt>jit.p</tt> module.
80</p>
81<p>
82To cut to the chase &mdash; run this to get a CPU usage profile by
83function name:
84</p>
85<pre class="code">
86luajit -jp myapp.lua
87</pre>
88<p>
89It's <em>not</em> a stated goal of the bundled profiler to add every
90possible option or to cater for special profiling needs. The low-level
91profiler APIs are documented below. They may be used by third-party
92authors to implement advanced functionality, e.g. IDE integration or
93graphical profilers.
94</p>
95<p>
96Note: Sampling works for both interpreted and JIT-compiled code. The
97results for JIT-compiled code may sometimes be surprising. LuaJIT
98heavily optimizes and inlines Lua code &mdash; there's no simple
99one-to-one correspondence between source code lines and the sampled
100machine code.
101</p>
102
103<h3 id="j_p"><tt>-jp=[options[,output]]</tt></h3>
104<p>
105The <tt>-jp</tt> command line option starts the high-level profiler.
106When the application run by the command line terminates, the profiler
107stops and writes the results to <tt>stdout</tt> or to the specified
108<tt>output</tt> file.
109</p>
110<p>
111The <tt>options</tt> argument specifies how the profiling is to be
112performed:
113</p>
114<ul>
115<li><tt>f</tt> &mdash; Stack dump: function name, otherwise module:line.
116This is the default mode.</li>
117<li><tt>F</tt> &mdash; Stack dump: ditto, but dump module:name.</li>
118<li><tt>l</tt> &mdash; Stack dump: module:line.</li>
119<li><tt>&lt;number&gt;</tt> &mdash; stack dump depth (callee &larr;
120caller). Default: 1.</li>
121<li><tt>-&lt;number&gt;</tt> &mdash; Inverse stack dump depth (caller
122&rarr; callee).</li>
123<li><tt>s</tt> &mdash; Split stack dump after first stack level. Implies
124depth&nbsp;&ge;&nbsp;2 or depth&nbsp;&le;&nbsp;-2.</li>
125<li><tt>p</tt> &mdash; Show full path for module names.</li>
126<li><tt>v</tt> &mdash; Show VM states.</li>
127<li><tt>z</tt> &mdash; Show <a href="#jit_zone">zones</a>.</li>
128<li><tt>r</tt> &mdash; Show raw sample counts. Default: show percentages.</li>
129<li><tt>a</tt> &mdash; Annotate excerpts from source code files.</li>
130<li><tt>A</tt> &mdash; Annotate complete source code files.</li>
131<li><tt>G</tt> &mdash; Produce raw output suitable for graphical tools.</li>
132<li><tt>m&lt;number&gt;</tt> &mdash; Minimum sample percentage to be shown.
133Default: 3%.</li>
134<li><tt>i&lt;number&gt;</tt> &mdash; Sampling interval in milliseconds.
135Default: 10ms.<br>
136Note: The actual sampling precision is OS-dependent.</li>
137</ul>
138<p>
139The default output for <tt>-jp</tt> is a list of the most CPU consuming
140spots in the application. Increasing the stack dump depth with (say)
141<tt>-jp=2</tt> may help to point out the main callers or callees of
142hotspots. But sample aggregation is still flat per unique stack dump.
143</p>
144<p>
145To get a two-level view (split view) of callers/callees, use
146<tt>-jp=s</tt> or <tt>-jp=-s</tt>. The percentages shown for the second
147level are relative to the first level.
148</p>
149<p>
150To see how much time is spent in each line relative to a function, use
151<tt>-jp=fl</tt>.
152</p>
153<p>
154To see how much time is spent in different VM states or
155<a href="#jit_zone">zones</a>, use <tt>-jp=v</tt> or <tt>-jp=z</tt>.
156</p>
157<p>
158Combinations of <tt>v/z</tt> with <tt>f/F/l</tt> produce two-level
159views, e.g. <tt>-jp=vf</tt> or <tt>-jp=fv</tt>. This shows the time
160spent in a VM state or zone vs. hotspots. This can be used to answer
161questions like "Which time consuming functions are only interpreted?" or
162"What's the garbage collector overhead for a specific function?".
163</p>
164<p>
165Multiple options can be combined &mdash; but not all combinations make
166sense, see above. E.g. <tt>-jp=3si4m1</tt> samples three stack levels
167deep in 4ms intervals and shows a split view of the CPU consuming
168functions and their callers with a 1% threshold.
169</p>
170<p>
171Source code annotations produced by <tt>-jp=a</tt> or <tt>-jp=A</tt> are
172always flat and at the line level. Obviously, the source code files need
173to be readable by the profiler script.
174</p>
175<p>
176The high-level profiler can also be started and stopped from Lua code with:
177</p>
178<pre class="code">
179require("jit.p").start(options, output)
180...
181require("jit.p").stop()
182</pre>
183
184<h3 id="jit_zone"><tt>jit.zone</tt> &mdash; Zones</h3>
185<p>
186Zones can be used to provide information about different parts of an
187application to the high-level profiler. E.g. a game could make use of an
188<tt>"AI"</tt> zone, a <tt>"PHYS"</tt> zone, etc. Zones are hierarchical,
189organized as a stack.
190</p>
191<p>
192The <tt>jit.zone</tt> module needs to be loaded explicitly:
193</p>
194<pre class="code">
195local zone = require("jit.zone")
196</pre>
197<ul>
198<li><tt>zone("name")</tt> pushes a named zone to the zone stack.</li>
199<li><tt>zone()</tt> pops the current zone from the zone stack and
200returns its name.</li>
201<li><tt>zone:get()</tt> returns the current zone name or <tt>nil</tt>.</li>
202<li><tt>zone:flush()</tt> flushes the zone stack.</li>
203</ul>
204<p>
205To show the time spent in each zone use <tt>-jp=z</tt>. To show the time
206spent relative to hotspots use e.g. <tt>-jp=zf</tt> or <tt>-jp=fz</tt>.
207</p>
208
209<h2 id="ll_lua_api">Low-level Lua API</h2>
210<p>
211The <tt>jit.profile</tt> module gives access to the low-level API of the
212profiler from Lua code. This module needs to be loaded explicitly:
213<pre class="code">
214local profile = require("jit.profile")
215</pre>
216<p>
217This module can be used to implement your own higher-level profiler.
218A typical profiling run starts the profiler, captures stack dumps in
219the profiler callback, adds them to a hash table to aggregate the number
220of samples, stops the profiler and then analyzes all of the captured
221stack dumps. Other parameters can be sampled in the profiler callback,
222too. But it's important not to spend too much time in the callback,
223since this may skew the statistics.
224</p>
225
226<h3 id="profile_start"><tt>profile.start(mode, cb)</tt>
227&mdash; Start profiler</h3>
228<p>
229This function starts the profiler. The <tt>mode</tt> argument is a
230string holding options:
231</p>
232<ul>
233<li><tt>f</tt> &mdash; Profile with precision down to the function level.</li>
234<li><tt>l</tt> &mdash; Profile with precision down to the line level.</li>
235<li><tt>i&lt;number&gt;</tt> &mdash; Sampling interval in milliseconds (default
23610ms).</br>
237Note: The actual sampling precision is OS-dependent.
238</li>
239</ul>
240<p>
241The <tt>cb</tt> argument is a callback function which is called with
242three arguments: <tt>(thread, samples, vmstate)</tt>. The callback is
243called on a separate coroutine, the <tt>thread</tt> argument is the
244state that holds the stack to sample for profiling. Note: do
245<em>not</em> modify the stack of that state or call functions on it.
246</p>
247<p>
248<tt>samples</tt> gives the number of accumulated samples since the last
249callback (usually 1).
250</p>
251<p>
252<tt>vmstate</tt> holds the VM state at the time the profiling timer
253triggered. This may or may not correspond to the state of the VM when
254the profiling callback is called. The state is either <tt>'N'</tt>
255native (compiled) code, <tt>'I'</tt> interpreted code, <tt>'C'</tt>
256C&nbsp;code, <tt>'G'</tt> the garbage collector, or <tt>'J'</tt> the JIT
257compiler.
258</p>
259
260<h3 id="profile_stop"><tt>profile.stop()</tt>
261&mdash; Stop profiler</h3>
262<p>
263This function stops the profiler.
264</p>
265
266<h3 id="profile_dump"><tt>dump = profile.dumpstack([thread,] fmt, depth)</tt>
267&mdash; Dump stack </h3>
268<p>
269This function allows taking stack dumps in an efficient manner. It
270returns a string with a stack dump for the <tt>thread</tt> (coroutine),
271formatted according to the <tt>fmt</tt> argument:
272</p>
273<ul>
274<li><tt>p</tt> &mdash; Preserve the full path for module names. Otherwise
275only the file name is used.</li>
276<li><tt>f</tt> &mdash; Dump the function name if it can be derived. Otherwise
277use module:line.</li>
278<li><tt>F</tt> &mdash; Ditto, but dump module:name.</li>
279<li><tt>l</tt> &mdash; Dump module:line.</li>
280<li><tt>Z</tt> &mdash; Zap the following characters for the last dumped
281frame.</li>
282<li>All other characters are added verbatim to the output string.</li>
283</ul>
284<p>
285The <tt>depth</tt> argument gives the number of frames to dump, starting
286at the topmost frame of the thread. A negative number dumps the frames in
287inverse order.
288</p>
289<p>
290The first example prints a list of the current module names and line
291numbers of up to 10 frames in separate lines. The second example prints
292semicolon-separated function names for all frames (up to 100) in inverse
293order:
294</p>
295<pre class="code">
296print(profile.dumpstack(thread, "l\n", 10))
297print(profile.dumpstack(thread, "lZ;", -100))
298</pre>
299
300<h2 id="ll_c_api">Low-level C API</h2>
301<p>
302The profiler can be controlled directly from C&nbsp;code, e.g. for
303use by IDEs. The declarations are in <tt>"luajit.h"</tt> (see
304<a href="ext_c_api.html">Lua/C API</a> extensions).
305</p>
306
307<h3 id="luaJIT_profile_start"><tt>luaJIT_profile_start(L, mode, cb, data)</tt>
308&mdash; Start profiler</h3>
309<p>
310This function starts the profiler. <a href="#profile_start">See
311above</a> for a description of the <tt>mode</tt> argument.
312</p>
313<p>
314The <tt>cb</tt> argument is a callback function with the following
315declaration:
316</p>
317<pre class="code">
318typedef void (*luaJIT_profile_callback)(void *data, lua_State *L,
319 int samples, int vmstate);
320</pre>
321<p>
322<tt>data</tt> is available for use by the callback. <tt>L</tt> is the
323state that holds the stack to sample for profiling. Note: do
324<em>not</em> modify this stack or call functions on this stack &mdash;
325use a separate coroutine for this purpose. <a href="#profile_start">See
326above</a> for a description of <tt>samples</tt> and <tt>vmstate</tt>.
327</p>
328
329<h3 id="luaJIT_profile_stop"><tt>luaJIT_profile_stop(L)</tt>
330&mdash; Stop profiler</h3>
331<p>
332This function stops the profiler.
333</p>
334
335<h3 id="luaJIT_profile_dumpstack"><tt>p = luaJIT_profile_dumpstack(L, fmt, depth, len)</tt>
336&mdash; Dump stack </h3>
337<p>
338This function allows taking stack dumps in an efficient manner.
339<a href="#profile_dump">See above</a> for a description of <tt>fmt</tt>
340and <tt>depth</tt>.
341</p>
342<p>
343This function returns a <tt>const&nbsp;char&nbsp;*</tt> pointing to a
344private string buffer of the profiler. The <tt>int&nbsp;*len</tt>
345argument returns the length of the output string. The buffer is
346overwritten on the next call and deallocated when the profiler stops.
347You either need to consume the content immediately or copy it for later
348use.
349</p>
350<br class="flush">
351</div>
352<div id="foot">
353<hr class="hide">
354Copyright &copy; 2005-2021
355<span class="noprint">
356&middot;
357<a href="contact.html">Contact</a>
358</span>
359</div>
360</body>
361</html>
diff --git a/doc/extensions.html b/doc/extensions.html
index 306943b7..799679a3 100644
--- a/doc/extensions.html
+++ b/doc/extensions.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>Extensions</title> 4<title>Extensions</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2021"> 6<meta name="Copyright" content="Copyright (C) 2005-2021">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -54,9 +54,13 @@ td.excinterop {
54<a href="ext_ffi_semantics.html">FFI Semantics</a> 54<a href="ext_ffi_semantics.html">FFI Semantics</a>
55</li></ul> 55</li></ul>
56</li><li> 56</li><li>
57<a href="ext_buffer.html">String Buffers</a>
58</li><li>
57<a href="ext_jit.html">jit.* Library</a> 59<a href="ext_jit.html">jit.* Library</a>
58</li><li> 60</li><li>
59<a href="ext_c_api.html">Lua/C API</a> 61<a href="ext_c_api.html">Lua/C API</a>
62</li><li>
63<a href="ext_profiler.html">Profiler</a>
60</li></ul> 64</li></ul>
61</li><li> 65</li><li>
62<a href="status.html">Status</a> 66<a href="status.html">Status</a>
@@ -107,6 +111,9 @@ bit.lshift bit.rshift bit.arshift bit.rol bit.ror bit.bswap
107This module is a LuaJIT built-in &mdash; you don't need to download or 111This module is a LuaJIT built-in &mdash; you don't need to download or
108install Lua BitOp. The Lua BitOp site has full documentation for all 112install Lua BitOp. The Lua BitOp site has full documentation for all
109<a href="https://bitop.luajit.org/api.html"><span class="ext">&raquo;</span>&nbsp;Lua BitOp API functions</a>. 113<a href="https://bitop.luajit.org/api.html"><span class="ext">&raquo;</span>&nbsp;Lua BitOp API functions</a>.
114The FFI adds support for
115<a href="ext_ffi_semantics.html#cdata_arith">64&nbsp;bit bitwise operations</a>,
116using the same API functions.
110</p> 117</p>
111<p> 118<p>
112Please make sure to <tt>require</tt> the module before using any of 119Please make sure to <tt>require</tt> the module before using any of
@@ -140,6 +147,11 @@ LuaJIT adds some
140<a href="ext_c_api.html">extra functions to the Lua/C API</a>. 147<a href="ext_c_api.html">extra functions to the Lua/C API</a>.
141</p> 148</p>
142 149
150<h3 id="profiler">Profiler</h3>
151<p>
152LuaJIT has an <a href="ext_profiler.html">integrated profiler</a>.
153</p>
154
143<h2 id="library">Enhanced Standard Library Functions</h2> 155<h2 id="library">Enhanced Standard Library Functions</h2>
144 156
145<h3 id="xpcall"><tt>xpcall(f, err [,args...])</tt> passes arguments</h3> 157<h3 id="xpcall"><tt>xpcall(f, err [,args...])</tt> passes arguments</h3>
@@ -167,7 +179,7 @@ in <tt>"-inf"</tt>.
167<h3 id="tonumber"><tt>tonumber()</tt> etc. use builtin string to number conversion</h3> 179<h3 id="tonumber"><tt>tonumber()</tt> etc. use builtin string to number conversion</h3>
168<p> 180<p>
169All string-to-number conversions consistently convert integer and 181All string-to-number conversions consistently convert integer and
170floating-point inputs in decimal and hexadecimal on all platforms. 182floating-point inputs in decimal, hexadecimal and binary on all platforms.
171<tt>strtod()</tt> is <em>not</em> used anymore, which avoids numerous 183<tt>strtod()</tt> is <em>not</em> used anymore, which avoids numerous
172problems with poor C library implementations. The builtin conversion 184problems with poor C library implementations. The builtin conversion
173function provides full precision according to the IEEE-754 standard, it 185function provides full precision according to the IEEE-754 standard, it
@@ -191,6 +203,36 @@ for dot releases (x.y.0 &rarr; x.y.1), but may change with major or
191minor releases (2.0 &rarr; 2.1) or between any beta release. Foreign 203minor releases (2.0 &rarr; 2.1) or between any beta release. Foreign
192bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded. 204bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded.
193</p> 205</p>
206<p>
207Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies
208a different, incompatible bytecode format for all 64 bit ports. This may be
209rectified in the future.
210</p>
211
212<h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3>
213<p>
214An extra library function <tt>table.new()</tt> can be made available via
215<tt>require("table.new")</tt>. This creates a pre-sized table, just like
216the C API equivalent <tt>lua_createtable()</tt>. This is useful for big
217tables if the final table size is known and automatic table resizing is
218too expensive.
219</p>
220
221<h3 id="table_clear"><tt>table.clear(tab)</tt> clears a table</h3>
222<p>
223An extra library function <tt>table.clear()</tt> can be made available
224via <tt>require("table.clear")</tt>. This clears all keys and values
225from a table, but preserves the allocated array/hash sizes. This is
226useful when a table, which is linked from multiple places, needs to be
227cleared and/or when recycling a table for use by the same context. This
228avoids managing backlinks, saves an allocation and the overhead of
229incremental array/hash part growth.
230</p>
231<p>
232Please note this function is meant for very specific situations. In most
233cases it's better to replace the (usually single) link with a new table
234and let the GC do its work.
235</p>
194 236
195<h3 id="math_random">Enhanced PRNG for <tt>math.random()</tt></h3> 237<h3 id="math_random">Enhanced PRNG for <tt>math.random()</tt></h3>
196<p> 238<p>
@@ -269,6 +311,26 @@ indexes for varargs.</li>
269<li><tt>debug.getupvalue()</tt> and <tt>debug.setupvalue()</tt> handle 311<li><tt>debug.getupvalue()</tt> and <tt>debug.setupvalue()</tt> handle
270C&nbsp;functions.</li> 312C&nbsp;functions.</li>
271<li><tt>debug.upvalueid()</tt> and <tt>debug.upvaluejoin()</tt>.</li> 313<li><tt>debug.upvalueid()</tt> and <tt>debug.upvaluejoin()</tt>.</li>
314<li>Lua/C API extensions:
315<tt>lua_version()</tt>
316<tt>lua_upvalueid()</tt>
317<tt>lua_upvaluejoin()</tt>
318<tt>lua_loadx()</tt>
319<tt>lua_copy()</tt>
320<tt>lua_tonumberx()</tt>
321<tt>lua_tointegerx()</tt>
322<tt>luaL_fileresult()</tt>
323<tt>luaL_execresult()</tt>
324<tt>luaL_loadfilex()</tt>
325<tt>luaL_loadbufferx()</tt>
326<tt>luaL_traceback()</tt>
327<tt>luaL_setfuncs()</tt>
328<tt>luaL_pushmodule()</tt>
329<tt>luaL_newlibtable()</tt>
330<tt>luaL_newlib()</tt>
331<tt>luaL_testudata()</tt>
332<tt>luaL_setmetatable()</tt>
333</li>
272<li>Command line option <tt>-E</tt>.</li> 334<li>Command line option <tt>-E</tt>.</li>
273<li>Command line checks <tt>__tostring</tt> for errors.</li> 335<li>Command line checks <tt>__tostring</tt> for errors.</li>
274</ul> 336</ul>
@@ -294,6 +356,8 @@ exit status.</li>
294<li><tt>debug.setmetatable()</tt> returns object.</li> 356<li><tt>debug.setmetatable()</tt> returns object.</li>
295<li><tt>debug.getuservalue()</tt> and <tt>debug.setuservalue()</tt>.</li> 357<li><tt>debug.getuservalue()</tt> and <tt>debug.setuservalue()</tt>.</li>
296<li>Remove <tt>math.mod()</tt>, <tt>string.gfind()</tt>.</li> 358<li>Remove <tt>math.mod()</tt>, <tt>string.gfind()</tt>.</li>
359<li><tt>package.searchers</tt>.</li>
360<li><tt>module()</tt> returns the module table.</li>
297</ul> 361</ul>
298<p> 362<p>
299Note: this provides only partial compatibility with Lua 5.2 at the 363Note: this provides only partial compatibility with Lua 5.2 at the
@@ -302,6 +366,21 @@ Lua&nbsp;5.1, which prevents implementing features that would otherwise
302break the Lua/C API and ABI (e.g. <tt>_ENV</tt>). 366break the Lua/C API and ABI (e.g. <tt>_ENV</tt>).
303</p> 367</p>
304 368
369<h2 id="lua53">Extensions from Lua 5.3</h2>
370<p>
371LuaJIT supports some extensions from Lua&nbsp;5.3:
372<ul>
373<li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8 encoding in string literals.</li>
374<li>The argument table <tt>arg</tt> can be read (and modified) by <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li>
375<li><tt>io.read()</tt> and <tt>file:read()</tt> accept formats with or without a leading <tt>*</tt>.</li>
376<li><tt>assert()</tt> accepts any type of error object.</li>
377<li><tt>table.move(a1, f, e, t [,a2])</tt>.</li>
378<li><tt>coroutine.isyieldable()</tt>.</li>
379<li>Lua/C API extensions:
380<tt>lua_isyieldable()</tt>
381</li>
382</ul>
383
305<h2 id="exceptions">C++ Exception Interoperability</h2> 384<h2 id="exceptions">C++ Exception Interoperability</h2>
306<p> 385<p>
307LuaJIT has built-in support for interoperating with C++&nbsp;exceptions. 386LuaJIT has built-in support for interoperating with C++&nbsp;exceptions.
@@ -315,26 +394,21 @@ the toolchain used to compile LuaJIT:
315<td class="excinterop">Interoperability</td> 394<td class="excinterop">Interoperability</td>
316</tr> 395</tr>
317<tr class="odd separate"> 396<tr class="odd separate">
318<td class="excplatform">POSIX/x64, DWARF2 unwinding</td> 397<td class="excplatform">External frame unwinding</td>
319<td class="exccompiler">GCC 4.3+</td> 398<td class="exccompiler">GCC, Clang, MSVC</td>
320<td class="excinterop"><b style="color: #00a000;">Full</b></td> 399<td class="excinterop"><b style="color: #00a000;">Full</b></td>
321</tr> 400</tr>
322<tr class="even"> 401<tr class="even">
323<td class="excplatform">Other platforms, DWARF2 unwinding</td> 402<td class="excplatform">Internal frame unwinding + DWARF2</td>
324<td class="exccompiler">GCC</td> 403<td class="exccompiler">GCC, Clang</td>
325<td class="excinterop"><b style="color: #c06000;">Limited</b></td> 404<td class="excinterop"><b style="color: #c06000;">Limited</b></td>
326</tr> 405</tr>
327<tr class="odd"> 406<tr class="odd">
328<td class="excplatform">Windows/x64</td> 407<td class="excplatform">Windows 64 bit</td>
329<td class="exccompiler">MSVC</td> 408<td class="exccompiler">non-MSVC</td>
330<td class="excinterop"><b style="color: #00a000;">Full</b></td> 409<td class="excinterop"><b style="color: #c06000;">Limited</b></td>
331</tr> 410</tr>
332<tr class="even"> 411<tr class="even">
333<td class="excplatform">Windows/x86</td>
334<td class="exccompiler">Any</td>
335<td class="excinterop"><b style="color: #a00000;">No</b></td>
336</tr>
337<tr class="odd">
338<td class="excplatform">Other platforms</td> 412<td class="excplatform">Other platforms</td>
339<td class="exccompiler">Other compilers</td> 413<td class="exccompiler">Other compilers</td>
340<td class="excinterop"><b style="color: #a00000;">No</b></td> 414<td class="excinterop"><b style="color: #a00000;">No</b></td>
@@ -383,14 +457,6 @@ C++ destructors.</li>
383<li>Lua errors <b>cannot</b> be caught on the C++ side.</li> 457<li>Lua errors <b>cannot</b> be caught on the C++ side.</li>
384<li>Throwing Lua errors across C++ frames will <b>not</b> call 458<li>Throwing Lua errors across C++ frames will <b>not</b> call
385C++ destructors.</li> 459C++ destructors.</li>
386<li>Additionally, on Windows/x86 with SEH-based C++&nbsp;exceptions:
387it's <b>not</b> safe to throw a Lua error across any frames containing
388a C++ function with any try/catch construct or using variables with
389(implicit) destructors. This also applies to any functions which may be
390inlined in such a function. It doesn't matter whether <tt>lua_error()</tt>
391is called inside or outside of a try/catch or whether any object actually
392needs to be destroyed: the SEH chain is corrupted and this will eventually
393lead to the termination of the process.</li>
394</ul> 460</ul>
395<br class="flush"> 461<br class="flush">
396</div> 462</div>
diff --git a/doc/faq.html b/doc/faq.html
index cb777acc..1b7cb371 100644
--- a/doc/faq.html
+++ b/doc/faq.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>Frequently Asked Questions (FAQ)</title> 4<title>Frequently Asked Questions (FAQ)</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2021"> 6<meta name="Copyright" content="Copyright (C) 2005-2021">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -40,9 +40,13 @@ dd { margin-left: 1.5em; }
40<a href="ext_ffi_semantics.html">FFI Semantics</a> 40<a href="ext_ffi_semantics.html">FFI Semantics</a>
41</li></ul> 41</li></ul>
42</li><li> 42</li><li>
43<a href="ext_buffer.html">String Buffers</a>
44</li><li>
43<a href="ext_jit.html">jit.* Library</a> 45<a href="ext_jit.html">jit.* Library</a>
44</li><li> 46</li><li>
45<a href="ext_c_api.html">Lua/C API</a> 47<a href="ext_c_api.html">Lua/C API</a>
48</li><li>
49<a href="ext_profiler.html">Profiler</a>
46</li></ul> 50</li></ul>
47</li><li> 51</li><li>
48<a href="status.html">Status</a> 52<a href="status.html">Status</a>
diff --git a/doc/install.html b/doc/install.html
index 56fc184e..e4af9dde 100644
--- a/doc/install.html
+++ b/doc/install.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>Installation</title> 4<title>Installation</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2021"> 6<meta name="Copyright" content="Copyright (C) 2005-2021">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -65,9 +65,13 @@ td.compatno {
65<a href="ext_ffi_semantics.html">FFI Semantics</a> 65<a href="ext_ffi_semantics.html">FFI Semantics</a>
66</li></ul> 66</li></ul>
67</li><li> 67</li><li>
68<a href="ext_buffer.html">String Buffers</a>
69</li><li>
68<a href="ext_jit.html">jit.* Library</a> 70<a href="ext_jit.html">jit.* Library</a>
69</li><li> 71</li><li>
70<a href="ext_c_api.html">Lua/C API</a> 72<a href="ext_c_api.html">Lua/C API</a>
73</li><li>
74<a href="ext_profiler.html">Profiler</a>
71</li></ul> 75</li></ul>
72</li><li> 76</li><li>
73<a href="status.html">Status</a> 77<a href="status.html">Status</a>
@@ -102,21 +106,21 @@ operating systems, CPUs and compilers:
102<td class="compatos"><a href="#posix">Linux</a> or<br><a href="#android">Android</a></td> 106<td class="compatos"><a href="#posix">Linux</a> or<br><a href="#android">Android</a></td>
103<td class="compatos"><a href="#posix">*BSD, Other</a></td> 107<td class="compatos"><a href="#posix">*BSD, Other</a></td>
104<td class="compatos"><a href="#posix">macOS 10.4+</a> or<br><a href="#ios">iOS 3.0+</a></td> 108<td class="compatos"><a href="#posix">macOS 10.4+</a> or<br><a href="#ios">iOS 3.0+</a></td>
105<td class="compatos"><a href="#windows">Windows XP<br>or later</a></td> 109<td class="compatos"><a href="#windows">Windows 7<br>or later</a></td>
106</tr> 110</tr>
107<tr class="odd separate"> 111<tr class="odd separate">
108<td class="compatcpu">x86 (32 bit)</td> 112<td class="compatcpu">x86 (32 bit)</td>
109<td class="compatos">GCC 4.x+<br>GCC 3.4</td> 113<td class="compatos">GCC 4.2+</td>
110<td class="compatos">GCC 4.x+<br>GCC 3.4</td> 114<td class="compatos">GCC 4.2+</td>
111<td class="compatos">XCode 5.0+<br>Clang</td> 115<td class="compatos">XCode 5.0+<br>Clang</td>
112<td class="compatos">MSVC<br>MinGW, Cygwin</td> 116<td class="compatos">MSVC<br>MinGW, Cygwin</td>
113</tr> 117</tr>
114<tr class="even"> 118<tr class="even">
115<td class="compatcpu">x64 (64 bit)</td> 119<td class="compatcpu">x64 (64 bit)</td>
116<td class="compatos">GCC 4.x+</td> 120<td class="compatos">GCC 4.2+</td>
117<td class="compatos">ORBIS (<a href="#ps4">PS4</a>)</td> 121<td class="compatos">GCC 4.2+<br>ORBIS (<a href="#ps4">PS4</a>)</td>
118<td class="compatos">XCode 5.0+<br>Clang</td> 122<td class="compatos">XCode 5.0+<br>Clang</td>
119<td class="compatos">MSVC</td> 123<td class="compatos">MSVC<br>Durango (<a href="#xboxone">Xbox One</a>)</td>
120</tr> 124</tr>
121<tr class="odd"> 125<tr class="odd">
122<td class="compatcpu"><a href="#cross2">ARMv5+<br>ARM9E+</a></td> 126<td class="compatcpu"><a href="#cross2">ARMv5+<br>ARM9E+</a></td>
@@ -126,21 +130,21 @@ operating systems, CPUs and compilers:
126<td class="compatos compatno">&nbsp;</td> 130<td class="compatos compatno">&nbsp;</td>
127</tr> 131</tr>
128<tr class="even"> 132<tr class="even">
129<td class="compatcpu"><a href="#cross2">PPC</a></td> 133<td class="compatcpu"><a href="#cross2">ARM64<br>ARM64be</a></td>
130<td class="compatos">GCC 4.3+</td> 134<td class="compatos">GCC 4.8+</td>
131<td class="compatos">GCC 4.3+<br>GCC 4.1 (<a href="#ps3">PS3</a>)</td> 135<td class="compatos compatno">&nbsp;</td>
136<td class="compatos">XCode 6.0+<br>Clang 3.5+</td>
132<td class="compatos compatno">&nbsp;</td> 137<td class="compatos compatno">&nbsp;</td>
133<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td>
134</tr> 138</tr>
135<tr class="odd"> 139<tr class="odd">
136<td class="compatcpu"><a href="#cross2">PPC/e500v2</a></td> 140<td class="compatcpu"><a href="#cross2">PPC</a></td>
137<td class="compatos">GCC 4.3+</td>
138<td class="compatos">GCC 4.3+</td> 141<td class="compatos">GCC 4.3+</td>
142<td class="compatos">GCC 4.3+<br>GCC 4.1 (<a href="#ps3">PS3</a>)</td>
139<td class="compatos compatno">&nbsp;</td> 143<td class="compatos compatno">&nbsp;</td>
140<td class="compatos compatno">&nbsp;</td> 144<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td>
141</tr> 145</tr>
142<tr class="even"> 146<tr class="even">
143<td class="compatcpu"><a href="#cross2">MIPS</a></td> 147<td class="compatcpu"><a href="#cross2">MIPS32<br>MIPS64<br>MIPS64r6</a></td>
144<td class="compatos">GCC 4.3+</td> 148<td class="compatos">GCC 4.3+</td>
145<td class="compatos">GCC 4.3+</td> 149<td class="compatos">GCC 4.3+</td>
146<td class="compatos compatno">&nbsp;</td> 150<td class="compatos compatno">&nbsp;</td>
@@ -167,6 +171,13 @@ MSVC (Visual Studio).</li>
167Please read the instructions given in these files, before changing 171Please read the instructions given in these files, before changing
168any settings. 172any settings.
169</p> 173</p>
174<p>
175All LuaJIT 64 bit ports use 64 bit GC objects by default (<tt>LJ_GC64</tt>).
176For x64, you can select the old 32-on-64 bit mode by adding
177<tt>XCFLAGS=-DLUAJIT_DISABLE_GC64</tt> to the make command.
178Please check the note about the
179<a href="extensions.html#string_dump">bytecode format</a> differences, too.
180</p>
170 181
171<h2 id="posix">POSIX Systems (Linux, macOS, *BSD etc.)</h2> 182<h2 id="posix">POSIX Systems (Linux, macOS, *BSD etc.)</h2>
172<h3>Prerequisites</h3> 183<h3>Prerequisites</h3>
@@ -199,7 +210,7 @@ which is probably the default on your system, anyway. Simply run:
199make 210make
200</pre> 211</pre>
201<p> 212<p>
202This always builds a native x86, x64 or PPC binary, depending on the host OS 213This always builds a native binary, depending on the host OS
203you're running this command on. Check the section on 214you're running this command on. Check the section on
204<a href="#cross">cross-compilation</a> for more options. 215<a href="#cross">cross-compilation</a> for more options.
205</p> 216</p>
@@ -301,25 +312,36 @@ directory where <tt>luajit.exe</tt> is installed
301 312
302<h2 id="cross">Cross-compiling LuaJIT</h2> 313<h2 id="cross">Cross-compiling LuaJIT</h2>
303<p> 314<p>
315First, let's clear up some terminology:
316</p>
317<ul>
318<li>Host: This is your development system, usually based on a x64 or x86 CPU.</li>
319<li>Target: This is the target system you want LuaJIT to run on, e.g. Android/ARM.</li>
320<li>Toolchain: This comprises a C compiler, linker, assembler and a matching C library.</li>
321<li>Host (or system) toolchain: This is the toolchain used to build native binaries for your host system.</li>
322<li>Cross-compile toolchain: This is the toolchain used to build binaries for the target system. They can only be run on the target system.</li>
323</ul>
324<p>
304The GNU Makefile-based build system allows cross-compiling on any host 325The GNU Makefile-based build system allows cross-compiling on any host
305for any supported target, as long as both architectures have the same 326for any supported target:
306pointer size. If you want to cross-compile to any 32 bit target on an
307x64 OS, you need to install the multilib development package (e.g.
308<tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part
309(<tt>HOST_CC="gcc -m32"</tt>).
310</p> 327</p>
328<ul>
329<li>Yes, you need a toolchain for both your host <em>and</em> your target!</li>
330<li>Both host and target architectures must have the same pointer size.</li>
331<li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li>
332<li>64 bit targets always require compilation on a 64 bit host.</li>
333</ul>
311<p> 334<p>
312You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the 335You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the
313target OS differ, or you'll get assembler or linker errors. E.g. if 336target OS differ, or you'll get assembler or linker errors:
314you're compiling on a Windows or macOS host for embedded Linux or Android,
315you need to add <tt>TARGET_SYS=Linux</tt> to the examples below. For a
316minimal target OS, you may need to disable the built-in allocator in
317<tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>. Don't forget to
318specify the same <tt>TARGET_SYS</tt> for the install step, too.
319</p> 337</p>
338<ul>
339<li>E.g. if you're compiling on a Windows or macOS host for embedded Linux or Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples below.</li>
340<li>For a minimal target OS, you may need to disable the built-in allocator in <tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>.</li>
341<li>Don't forget to specify the same <tt>TARGET_SYS</tt> for the install step, too.</li>
342</ul>
320<p> 343<p>
321The examples below only show some popular targets &mdash; please check 344Here are some examples where host and target have the same CPU:
322the comments in <tt>src/Makefile</tt> for more details.
323</p> 345</p>
324<pre class="code"> 346<pre class="code">
325# Cross-compile to a 32 bit binary on a multilib x64 OS 347# Cross-compile to a 32 bit binary on a multilib x64 OS
@@ -337,34 +359,44 @@ use the canonical toolchain triplets for Linux.
337</p> 359</p>
338<p> 360<p>
339Since there's often no easy way to detect CPU features at runtime, it's 361Since there's often no easy way to detect CPU features at runtime, it's
340important to compile with the proper CPU or architecture settings. You 362important to compile with the proper CPU or architecture settings:
341can specify these when building the toolchain yourself. Or add 363</o>
342<tt>-mcpu=...</tt> or <tt>-march=...</tt> to <tt>TARGET_CFLAGS</tt>. For 364<ul>
343ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting, 365<li>The best way to get consistent results is to specify the correct settings when building the toolchain yourself.</li>
344too. Otherwise LuaJIT may not run at the full performance of your target 366<li>For a pre-built, generic toolchain add <tt>-mcpu=...</tt> or <tt>-march=...</tt> and other necessary flags to <tt>TARGET_CFLAGS</tt>.</li>
345CPU. 367<li>For ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting, too. Otherwise LuaJIT may not run at the full performance of your target CPU.</li>
368<li>For MIPS it's important to select a supported ABI (o32 on MIPS32, n64 on MIPS64) and consistently compile your project either with hard-float or soft-float compiler settings.</li>
369</ul>
370<p>
371Here are some examples for targets with a different CPU than the host:
346</p> 372</p>
347<pre class="code"> 373<pre class="code">
348# ARM soft-float 374# ARM soft-float
349make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \ 375make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
350 TARGET_CFLAGS="-mfloat-abi=soft" 376 TARGET_CFLAGS="-mfloat-abi=soft"
351 377
352# ARM soft-float ABI with VFP (example for Cortex-A8) 378# ARM soft-float ABI with VFP (example for Cortex-A9)
353make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \ 379make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
354 TARGET_CFLAGS="-mcpu=cortex-a8 -mfloat-abi=softfp" 380 TARGET_CFLAGS="-mcpu=cortex-a9 -mfloat-abi=softfp"
355 381
356# ARM hard-float ABI with VFP (armhf, requires recent toolchain) 382# ARM hard-float ABI with VFP (armhf, most modern toolchains)
357make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf- 383make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf-
358 384
385# ARM64
386make CROSS=aarch64-linux-
387
359# PPC 388# PPC
360make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- 389make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
361# PPC/e500v2 (fast interpreter only)
362make HOST_CC="gcc -m32" CROSS=powerpc-e500v2-linux-gnuspe-
363 390
364# MIPS big-endian 391# MIPS32 big-endian
365make HOST_CC="gcc -m32" CROSS=mips-linux- 392make HOST_CC="gcc -m32" CROSS=mips-linux-
366# MIPS little-endian 393# MIPS32 little-endian
367make HOST_CC="gcc -m32" CROSS=mipsel-linux- 394make HOST_CC="gcc -m32" CROSS=mipsel-linux-
395
396# MIPS64 big-endian
397make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
398# MIPS64 little-endian
399make CROSS=mipsel-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
368</pre> 400</pre>
369<p> 401<p>
370You can cross-compile for <b id="android">Android</b> using the <a href="https://developer.android.com/ndk/"><span class="ext">&raquo;</span>&nbsp;Android NDK</a>. 402You can cross-compile for <b id="android">Android</b> using the <a href="https://developer.android.com/ndk/"><span class="ext">&raquo;</span>&nbsp;Android NDK</a>.
@@ -372,8 +404,16 @@ Please adapt the environment variables to match the install locations and the
372desired target platform. E.g. Android&nbsp;4.1 corresponds to ABI level&nbsp;16. 404desired target platform. E.g. Android&nbsp;4.1 corresponds to ABI level&nbsp;16.
373</p> 405</p>
374<pre class="code"> 406<pre class="code">
375# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.1+ (JB) 407# Android/ARM64, aarch64, Android 5.0+ (L)
408NDKDIR=/opt/android/ndk
409NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin
410NDKCROSS=$NDKBIN/aarch64-linux-android-
411NDKCC=$NDKBIN/aarch64-linux-android21-clang
412make CROSS=$NDKCROSS \
413 STATIC_CC=$NDKCC DYNAMIC_CC="$NDKCC -fPIC" \
414 TARGET_LD=$NDKCC
376 415
416# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.1+ (JB)
377NDKDIR=/opt/android/ndk 417NDKDIR=/opt/android/ndk
378NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin 418NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin
379NDKCROSS=$NDKBIN/arm-linux-androideabi- 419NDKCROSS=$NDKBIN/arm-linux-androideabi-
@@ -383,9 +423,23 @@ make HOST_CC="gcc -m32" CROSS=$NDKCROSS \
383 TARGET_LD=$NDKCC 423 TARGET_LD=$NDKCC
384</pre> 424</pre>
385<p> 425<p>
386Please use the LuaJIT 2.1 branch to compile for 426You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="https://developer.apple.com/ios/"><span class="ext">&raquo;</span>&nbsp;iOS SDK</a>:
387<b id="ios">iOS</b> (iPhone/iPad). 427</p>
428<p style="font-size: 8pt;">
429Note: <b>the JIT compiler is disabled for iOS</b>, because regular iOS Apps
430are not allowed to generate code at runtime. You'll only get the performance
431of the LuaJIT interpreter on iOS. This is still faster than plain Lua, but
432much slower than the JIT compiler. Please complain to Apple, not me.
433Or use Android. :-p
388</p> 434</p>
435<pre class="code">
436# iOS/ARM64
437ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
438ICC=$(xcrun --sdk iphoneos --find clang)
439ISDKF="-arch arm64 -isysroot $ISDKP"
440make DEFAULT_CC=clang CROSS="$(dirname $ICC)/" \
441 TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
442</pre>
389 443
390<h3 id="consoles">Cross-compiling for consoles</h3> 444<h3 id="consoles">Cross-compiling for consoles</h3>
391<p> 445<p>
@@ -441,6 +495,16 @@ the following commands:
441cd src 495cd src
442xedkbuild 496xedkbuild
443</pre> 497</pre>
498<p>
499To cross-compile for <b id="xboxone">Xbox One</b> from a Windows host,
500open a "Visual Studio .NET Command Prompt" (64&nbsp;bit host compiler),
501<tt>cd</tt> to the directory where you've unpacked the sources and run
502the following commands:
503</p>
504<pre class="code">
505cd src
506xb1build
507</pre>
444 508
445<h2 id="embed">Embedding LuaJIT</h2> 509<h2 id="embed">Embedding LuaJIT</h2>
446<p> 510<p>
@@ -469,16 +533,6 @@ the DLL). You may link LuaJIT statically on Windows only if you don't
469intend to load Lua/C modules at runtime. 533intend to load Lua/C modules at runtime.
470</li></ul> 534</li></ul>
471</li> 535</li>
472<li>
473<i>Important: this relates to LuaJIT 2.0 only &mdash; use LuaJIT 2.1 to
474avoid these complications.</i><br>
475If you're building a 64 bit application on macOS which links directly or
476indirectly against LuaJIT, you need to link your main executable
477with these flags:
478<pre class="code">
479-pagezero_size 10000 -image_base 100000000
480</pre>
481</li>
482</ul> 536</ul>
483<p>Additional hints for initializing LuaJIT using the C API functions:</p> 537<p>Additional hints for initializing LuaJIT using the C API functions:</p>
484<ul> 538<ul>
diff --git a/doc/luajit.html b/doc/luajit.html
index 42f32750..a25267a6 100644
--- a/doc/luajit.html
+++ b/doc/luajit.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>LuaJIT</title> 4<title>LuaJIT</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2021"> 6<meta name="Copyright" content="Copyright (C) 2005-2021">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -122,9 +122,13 @@ table.feature small {
122<a href="ext_ffi_semantics.html">FFI Semantics</a> 122<a href="ext_ffi_semantics.html">FFI Semantics</a>
123</li></ul> 123</li></ul>
124</li><li> 124</li><li>
125<a href="ext_buffer.html">String Buffers</a>
126</li><li>
125<a href="ext_jit.html">jit.* Library</a> 127<a href="ext_jit.html">jit.* Library</a>
126</li><li> 128</li><li>
127<a href="ext_c_api.html">Lua/C API</a> 129<a href="ext_c_api.html">Lua/C API</a>
130</li><li>
131<a href="ext_profiler.html">Profiler</a>
128</li></ul> 132</li></ul>
129</li><li> 133</li><li>
130<a href="status.html">Status</a> 134<a href="status.html">Status</a>
@@ -158,13 +162,13 @@ LuaJIT is Copyright &copy; 2005-2021 Mike Pall, released under the
158<tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr> 162<tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr>
159</table> 163</table>
160<table class="feature os os3"> 164<table class="feature os os3">
161<tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td></tr> 165<tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td></tr>
162</table> 166</table>
163<table class="feature compiler"> 167<table class="feature compiler">
164<tr><td>GCC</td><td>CLANG<br>LLVM</td><td>MSVC</td></tr> 168<tr><td>GCC</td><td>Clang<br>LLVM</td><td>MSVC</td></tr>
165</table> 169</table>
166<table class="feature cpu"> 170<table class="feature cpu">
167<tr><td>x86</td><td>x64</td><td>ARM</td><td>PPC</td><td>e500</td><td>MIPS</td></tr> 171<tr><td>x86<br>x64</td><td>ARM<br>ARM64</td><td>PPC</td><td>MIPS32<br>MIPS64</td></tr>
168</table> 172</table>
169<table class="feature fcompat"> 173<table class="feature fcompat">
170<tr><td>Lua&nbsp;5.1<br>API+ABI</td><td>+&nbsp;JIT</td><td>+&nbsp;BitOp</td><td>+&nbsp;FFI</td><td>Drop-in<br>DLL/.so</td></tr> 174<tr><td>Lua&nbsp;5.1<br>API+ABI</td><td>+&nbsp;JIT</td><td>+&nbsp;BitOp</td><td>+&nbsp;FFI</td><td>Drop-in<br>DLL/.so</td></tr>
diff --git a/doc/running.html b/doc/running.html
index ea46a97e..b55b8439 100644
--- a/doc/running.html
+++ b/doc/running.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>Running LuaJIT</title> 4<title>Running LuaJIT</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2021"> 6<meta name="Copyright" content="Copyright (C) 2005-2021">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -59,9 +59,13 @@ td.param_default {
59<a href="ext_ffi_semantics.html">FFI Semantics</a> 59<a href="ext_ffi_semantics.html">FFI Semantics</a>
60</li></ul> 60</li></ul>
61</li><li> 61</li><li>
62<a href="ext_buffer.html">String Buffers</a>
63</li><li>
62<a href="ext_jit.html">jit.* Library</a> 64<a href="ext_jit.html">jit.* Library</a>
63</li><li> 65</li><li>
64<a href="ext_c_api.html">Lua/C API</a> 66<a href="ext_c_api.html">Lua/C API</a>
67</li><li>
68<a href="ext_profiler.html">Profiler</a>
65</li></ul> 69</li></ul>
66</li><li> 70</li><li>
67<a href="status.html">Status</a> 71<a href="status.html">Status</a>
@@ -172,6 +176,7 @@ Here are the available LuaJIT control commands:
172<li id="j_flush"><tt>-jflush</tt> &mdash; Flushes the whole cache of compiled code.</li> 176<li id="j_flush"><tt>-jflush</tt> &mdash; Flushes the whole cache of compiled code.</li>
173<li id="j_v"><tt>-jv</tt> &mdash; Shows verbose information about the progress of the JIT compiler.</li> 177<li id="j_v"><tt>-jv</tt> &mdash; Shows verbose information about the progress of the JIT compiler.</li>
174<li id="j_dump"><tt>-jdump</tt> &mdash; Dumps the code and structures used in various compiler stages.</li> 178<li id="j_dump"><tt>-jdump</tt> &mdash; Dumps the code and structures used in various compiler stages.</li>
179<li id="j_p"><tt>-jp</tt> &mdash; Start the <a href="ext_profiler.html">integrated profiler</a>.</li>
175</ul> 180</ul>
176<p> 181<p>
177The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules 182The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules
diff --git a/doc/status.html b/doc/status.html
index 4ab20dd3..1d3ba984 100644
--- a/doc/status.html
+++ b/doc/status.html
@@ -1,8 +1,8 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> 1<!DOCTYPE html>
2<html> 2<html>
3<head> 3<head>
4<title>Status</title> 4<title>Status</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 5<meta charset="utf-8">
6<meta name="Copyright" content="Copyright (C) 2005-2021"> 6<meta name="Copyright" content="Copyright (C) 2005-2021">
7<meta name="Language" content="en"> 7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -40,9 +40,13 @@ ul li { padding-bottom: 0.3em; }
40<a href="ext_ffi_semantics.html">FFI Semantics</a> 40<a href="ext_ffi_semantics.html">FFI Semantics</a>
41</li></ul> 41</li></ul>
42</li><li> 42</li><li>
43<a href="ext_buffer.html">String Buffers</a>
44</li><li>
43<a href="ext_jit.html">jit.* Library</a> 45<a href="ext_jit.html">jit.* Library</a>
44</li><li> 46</li><li>
45<a href="ext_c_api.html">Lua/C API</a> 47<a href="ext_c_api.html">Lua/C API</a>
48</li><li>
49<a href="ext_profiler.html">Profiler</a>
46</li></ul> 50</li></ul>
47</li><li> 51</li><li>
48<a class="current" href="status.html">Status</a> 52<a class="current" href="status.html">Status</a>
@@ -56,7 +60,7 @@ ul li { padding-bottom: 0.3em; }
56</div> 60</div>
57<div id="main"> 61<div id="main">
58<p> 62<p>
59This documentation is for LuaJIT 2.0.5. Please check the <tt>doc</tt> 63This documentation is for LuaJIT 2.1.0-beta3. Please check the <tt>doc</tt>
60directory in each git branch for the version-specific documentation. 64directory in each git branch for the version-specific documentation.
61</p> 65</p>
62<p> 66<p>
@@ -88,12 +92,6 @@ The Lua <b>debug API</b> is missing a couple of features (return
88hooks for non-Lua functions) and shows slightly different behavior 92hooks for non-Lua functions) and shows slightly different behavior
89in LuaJIT (no per-coroutine hooks, no tail call counting). 93in LuaJIT (no per-coroutine hooks, no tail call counting).
90</li> 94</li>
91<li>
92Currently some <b>out-of-memory</b> errors from <b>on-trace code</b> are not
93handled correctly. The error may fall through an on-trace
94<tt>pcall</tt> or it may be passed on to the function set with
95<tt>lua_atpanic</tt> on x64.
96</li>
97</ul> 95</ul>
98<br class="flush"> 96<br class="flush">
99</div> 97</div>
diff --git a/dynasm/dasm_arm.h b/dynasm/dasm_arm.h
index a7295663..ebcf4ac0 100644
--- a/dynasm/dasm_arm.h
+++ b/dynasm/dasm_arm.h
@@ -294,7 +294,7 @@ int dasm_link(Dst_DECL, size_t *szp)
294 294
295 { /* Handle globals not defined in this translation unit. */ 295 { /* Handle globals not defined in this translation unit. */
296 int idx; 296 int idx;
297 for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { 297 for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
298 int n = D->lglabels[idx]; 298 int n = D->lglabels[idx];
299 /* Undefined label: Collapse rel chain and replace with marker (< 0). */ 299 /* Undefined label: Collapse rel chain and replace with marker (< 0). */
300 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } 300 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
@@ -371,7 +371,10 @@ int dasm_encode(Dst_DECL, void *buffer)
371 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000; 371 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
372 break; 372 break;
373 case DASM_REL_LG: 373 case DASM_REL_LG:
374 CK(n >= 0, UNDEF_LG); 374 if (n < 0) {
375 n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp - 4);
376 goto patchrel;
377 }
375 /* fallthrough */ 378 /* fallthrough */
376 case DASM_REL_PC: 379 case DASM_REL_PC:
377 CK(n >= 0, UNDEF_PC); 380 CK(n >= 0, UNDEF_PC);
diff --git a/dynasm/dasm_arm.lua b/dynasm/dasm_arm.lua
index ffa8ae82..0c775ae2 100644
--- a/dynasm/dasm_arm.lua
+++ b/dynasm/dasm_arm.lua
@@ -9,9 +9,9 @@
9local _info = { 9local _info = {
10 arch = "arm", 10 arch = "arm",
11 description = "DynASM ARM module", 11 description = "DynASM ARM module",
12 version = "1.3.0", 12 version = "1.5.0",
13 vernum = 10300, 13 vernum = 10500,
14 release = "2011-05-05", 14 release = "2021-05-02",
15 author = "Mike Pall", 15 author = "Mike Pall",
16 license = "MIT", 16 license = "MIT",
17} 17}
diff --git a/dynasm/dasm_arm64.h b/dynasm/dasm_arm64.h
new file mode 100644
index 00000000..d6da4528
--- /dev/null
+++ b/dynasm/dasm_arm64.h
@@ -0,0 +1,561 @@
1/*
2** DynASM ARM64 encoding engine.
3** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
4** Released under the MIT license. See dynasm.lua for full copyright notice.
5*/
6
7#include <stddef.h>
8#include <stdarg.h>
9#include <string.h>
10#include <stdlib.h>
11
12#define DASM_ARCH "arm64"
13
14#ifndef DASM_EXTERN
15#define DASM_EXTERN(a,b,c,d) 0
16#endif
17
18/* Action definitions. */
19enum {
20 DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
21 /* The following actions need a buffer position. */
22 DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
23 /* The following actions also have an argument. */
24 DASM_REL_PC, DASM_LABEL_PC, DASM_REL_A,
25 DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML,
26 DASM_IMMV, DASM_VREG,
27 DASM__MAX
28};
29
30/* Maximum number of section buffer positions for a single dasm_put() call. */
31#define DASM_MAXSECPOS 25
32
33/* DynASM encoder status codes. Action list offset or number are or'ed in. */
34#define DASM_S_OK 0x00000000
35#define DASM_S_NOMEM 0x01000000
36#define DASM_S_PHASE 0x02000000
37#define DASM_S_MATCH_SEC 0x03000000
38#define DASM_S_RANGE_I 0x11000000
39#define DASM_S_RANGE_SEC 0x12000000
40#define DASM_S_RANGE_LG 0x13000000
41#define DASM_S_RANGE_PC 0x14000000
42#define DASM_S_RANGE_REL 0x15000000
43#define DASM_S_RANGE_VREG 0x16000000
44#define DASM_S_UNDEF_LG 0x21000000
45#define DASM_S_UNDEF_PC 0x22000000
46
47/* Macros to convert positions (8 bit section + 24 bit index). */
48#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
49#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
50#define DASM_SEC2POS(sec) ((sec)<<24)
51#define DASM_POS2SEC(pos) ((pos)>>24)
52#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
53
54/* Action list type. */
55typedef const unsigned int *dasm_ActList;
56
57/* Per-section structure. */
58typedef struct dasm_Section {
59 int *rbuf; /* Biased buffer pointer (negative section bias). */
60 int *buf; /* True buffer pointer. */
61 size_t bsize; /* Buffer size in bytes. */
62 int pos; /* Biased buffer position. */
63 int epos; /* End of biased buffer position - max single put. */
64 int ofs; /* Byte offset into section. */
65} dasm_Section;
66
67/* Core structure holding the DynASM encoding state. */
68struct dasm_State {
69 size_t psize; /* Allocated size of this structure. */
70 dasm_ActList actionlist; /* Current actionlist pointer. */
71 int *lglabels; /* Local/global chain/pos ptrs. */
72 size_t lgsize;
73 int *pclabels; /* PC label chains/pos ptrs. */
74 size_t pcsize;
75 void **globals; /* Array of globals (bias -10). */
76 dasm_Section *section; /* Pointer to active section. */
77 size_t codesize; /* Total size of all code sections. */
78 int maxsection; /* 0 <= sectionidx < maxsection. */
79 int status; /* Status code. */
80 dasm_Section sections[1]; /* All sections. Alloc-extended. */
81};
82
83/* The size of the core structure depends on the max. number of sections. */
84#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
85
86
87/* Initialize DynASM state. */
88void dasm_init(Dst_DECL, int maxsection)
89{
90 dasm_State *D;
91 size_t psz = 0;
92 int i;
93 Dst_REF = NULL;
94 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
95 D = Dst_REF;
96 D->psize = psz;
97 D->lglabels = NULL;
98 D->lgsize = 0;
99 D->pclabels = NULL;
100 D->pcsize = 0;
101 D->globals = NULL;
102 D->maxsection = maxsection;
103 for (i = 0; i < maxsection; i++) {
104 D->sections[i].buf = NULL; /* Need this for pass3. */
105 D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
106 D->sections[i].bsize = 0;
107 D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
108 }
109}
110
111/* Free DynASM state. */
112void dasm_free(Dst_DECL)
113{
114 dasm_State *D = Dst_REF;
115 int i;
116 for (i = 0; i < D->maxsection; i++)
117 if (D->sections[i].buf)
118 DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
119 if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
120 if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
121 DASM_M_FREE(Dst, D, D->psize);
122}
123
124/* Setup global label array. Must be called before dasm_setup(). */
125void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
126{
127 dasm_State *D = Dst_REF;
128 D->globals = gl - 10; /* Negative bias to compensate for locals. */
129 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
130}
131
132/* Grow PC label array. Can be called after dasm_setup(), too. */
133void dasm_growpc(Dst_DECL, unsigned int maxpc)
134{
135 dasm_State *D = Dst_REF;
136 size_t osz = D->pcsize;
137 DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
138 memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
139}
140
141/* Setup encoder. */
142void dasm_setup(Dst_DECL, const void *actionlist)
143{
144 dasm_State *D = Dst_REF;
145 int i;
146 D->actionlist = (dasm_ActList)actionlist;
147 D->status = DASM_S_OK;
148 D->section = &D->sections[0];
149 memset((void *)D->lglabels, 0, D->lgsize);
150 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
151 for (i = 0; i < D->maxsection; i++) {
152 D->sections[i].pos = DASM_SEC2POS(i);
153 D->sections[i].ofs = 0;
154 }
155}
156
157
158#ifdef DASM_CHECKS
159#define CK(x, st) \
160 do { if (!(x)) { \
161 D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
162#define CKPL(kind, st) \
163 do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
164 D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
165#else
166#define CK(x, st) ((void)0)
167#define CKPL(kind, st) ((void)0)
168#endif
169
170static int dasm_imm12(unsigned int n)
171{
172 if ((n >> 12) == 0)
173 return n;
174 else if ((n & 0xff000fff) == 0)
175 return (n >> 12) | 0x1000;
176 else
177 return -1;
178}
179
180static int dasm_ffs(unsigned long long x)
181{
182 int n = -1;
183 while (x) { x >>= 1; n++; }
184 return n;
185}
186
187static int dasm_imm13(int lo, int hi)
188{
189 int inv = 0, w = 64, s = 0xfff, xa, xb;
190 unsigned long long n = (((unsigned long long)hi) << 32) | (unsigned int)lo;
191 unsigned long long m = 1ULL, a, b, c;
192 if (n & 1) { n = ~n; inv = 1; }
193 a = n & -n; b = (n+a)&-(n+a); c = (n+a-b)&-(n+a-b);
194 xa = dasm_ffs(a); xb = dasm_ffs(b);
195 if (c) {
196 w = dasm_ffs(c) - xa;
197 if (w == 32) m = 0x0000000100000001UL;
198 else if (w == 16) m = 0x0001000100010001UL;
199 else if (w == 8) m = 0x0101010101010101UL;
200 else if (w == 4) m = 0x1111111111111111UL;
201 else if (w == 2) m = 0x5555555555555555UL;
202 else return -1;
203 s = (-2*w & 0x3f) - 1;
204 } else if (!a) {
205 return -1;
206 } else if (xb == -1) {
207 xb = 64;
208 }
209 if ((b-a) * m != n) return -1;
210 if (inv) {
211 return ((w - xb) << 6) | (s+w+xa-xb);
212 } else {
213 return ((w - xa) << 6) | (s+xb-xa);
214 }
215 return -1;
216}
217
218/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
219void dasm_put(Dst_DECL, int start, ...)
220{
221 va_list ap;
222 dasm_State *D = Dst_REF;
223 dasm_ActList p = D->actionlist + start;
224 dasm_Section *sec = D->section;
225 int pos = sec->pos, ofs = sec->ofs;
226 int *b;
227
228 if (pos >= sec->epos) {
229 DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
230 sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
231 sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
232 sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
233 }
234
235 b = sec->rbuf;
236 b[pos++] = start;
237
238 va_start(ap, start);
239 while (1) {
240 unsigned int ins = *p++;
241 unsigned int action = (ins >> 16);
242 if (action >= DASM__MAX) {
243 ofs += 4;
244 } else {
245 int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
246 switch (action) {
247 case DASM_STOP: goto stop;
248 case DASM_SECTION:
249 n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
250 D->section = &D->sections[n]; goto stop;
251 case DASM_ESC: p++; ofs += 4; break;
252 case DASM_REL_EXT: if ((ins & 0x8000)) ofs += 8; break;
253 case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
254 case DASM_REL_LG:
255 n = (ins & 2047) - 10; pl = D->lglabels + n;
256 /* Bkwd rel or global. */
257 if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
258 pl += 10; n = *pl;
259 if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
260 goto linkrel;
261 case DASM_REL_PC:
262 pl = D->pclabels + n; CKPL(pc, PC);
263 putrel:
264 n = *pl;
265 if (n < 0) { /* Label exists. Get label pos and store it. */
266 b[pos] = -n;
267 } else {
268 linkrel:
269 b[pos] = n; /* Else link to rel chain, anchored at label. */
270 *pl = pos;
271 }
272 pos++;
273 if ((ins & 0x8000)) ofs += 8;
274 break;
275 case DASM_REL_A:
276 b[pos++] = n;
277 b[pos++] = va_arg(ap, int);
278 break;
279 case DASM_LABEL_LG:
280 pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
281 case DASM_LABEL_PC:
282 pl = D->pclabels + n; CKPL(pc, PC);
283 putlabel:
284 n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
285 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
286 }
287 *pl = -pos; /* Label exists now. */
288 b[pos++] = ofs; /* Store pass1 offset estimate. */
289 break;
290 case DASM_IMM:
291 CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
292 n >>= ((ins>>10)&31);
293#ifdef DASM_CHECKS
294 if ((ins & 0x8000))
295 CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
296 else
297 CK((n>>((ins>>5)&31)) == 0, RANGE_I);
298#endif
299 b[pos++] = n;
300 break;
301 case DASM_IMM6:
302 CK((n >> 6) == 0, RANGE_I);
303 b[pos++] = n;
304 break;
305 case DASM_IMM12:
306 CK(dasm_imm12((unsigned int)n) != -1, RANGE_I);
307 b[pos++] = n;
308 break;
309 case DASM_IMM13W:
310 CK(dasm_imm13(n, n) != -1, RANGE_I);
311 b[pos++] = n;
312 break;
313 case DASM_IMM13X: {
314 int m = va_arg(ap, int);
315 CK(dasm_imm13(n, m) != -1, RANGE_I);
316 b[pos++] = n;
317 b[pos++] = m;
318 break;
319 }
320 case DASM_IMML: {
321#ifdef DASM_CHECKS
322 int scale = (ins & 3);
323 CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ||
324 (unsigned int)(n+256) < 512, RANGE_I);
325#endif
326 b[pos++] = n;
327 break;
328 }
329 case DASM_IMMV:
330 ofs += 4;
331 b[pos++] = n;
332 break;
333 case DASM_VREG:
334 CK(n < 32, RANGE_VREG);
335 b[pos++] = n;
336 break;
337 }
338 }
339 }
340stop:
341 va_end(ap);
342 sec->pos = pos;
343 sec->ofs = ofs;
344}
345#undef CK
346
347/* Pass 2: Link sections, shrink aligns, fix label offsets. */
348int dasm_link(Dst_DECL, size_t *szp)
349{
350 dasm_State *D = Dst_REF;
351 int secnum;
352 int ofs = 0;
353
354#ifdef DASM_CHECKS
355 *szp = 0;
356 if (D->status != DASM_S_OK) return D->status;
357 {
358 int pc;
359 for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
360 if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
361 }
362#endif
363
364 { /* Handle globals not defined in this translation unit. */
365 int idx;
366 for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
367 int n = D->lglabels[idx];
368 /* Undefined label: Collapse rel chain and replace with marker (< 0). */
369 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
370 }
371 }
372
373 /* Combine all code sections. No support for data sections (yet). */
374 for (secnum = 0; secnum < D->maxsection; secnum++) {
375 dasm_Section *sec = D->sections + secnum;
376 int *b = sec->rbuf;
377 int pos = DASM_SEC2POS(secnum);
378 int lastpos = sec->pos;
379
380 while (pos != lastpos) {
381 dasm_ActList p = D->actionlist + b[pos++];
382 while (1) {
383 unsigned int ins = *p++;
384 unsigned int action = (ins >> 16);
385 switch (action) {
386 case DASM_STOP: case DASM_SECTION: goto stop;
387 case DASM_ESC: p++; break;
388 case DASM_REL_EXT: break;
389 case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
390 case DASM_REL_LG: case DASM_REL_PC: pos++; break;
391 case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
392 case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W:
393 case DASM_IMML: case DASM_IMMV: case DASM_VREG: pos++; break;
394 case DASM_IMM13X: case DASM_REL_A: pos += 2; break;
395 }
396 }
397 stop: (void)0;
398 }
399 ofs += sec->ofs; /* Next section starts right after current section. */
400 }
401
402 D->codesize = ofs; /* Total size of all code sections */
403 *szp = ofs;
404 return DASM_S_OK;
405}
406
407#ifdef DASM_CHECKS
408#define CK(x, st) \
409 do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
410#else
411#define CK(x, st) ((void)0)
412#endif
413
414/* Pass 3: Encode sections. */
415int dasm_encode(Dst_DECL, void *buffer)
416{
417 dasm_State *D = Dst_REF;
418 char *base = (char *)buffer;
419 unsigned int *cp = (unsigned int *)buffer;
420 int secnum;
421
422 /* Encode all code sections. No support for data sections (yet). */
423 for (secnum = 0; secnum < D->maxsection; secnum++) {
424 dasm_Section *sec = D->sections + secnum;
425 int *b = sec->buf;
426 int *endb = sec->rbuf + sec->pos;
427
428 while (b != endb) {
429 dasm_ActList p = D->actionlist + *b++;
430 while (1) {
431 unsigned int ins = *p++;
432 unsigned int action = (ins >> 16);
433 int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
434 switch (action) {
435 case DASM_STOP: case DASM_SECTION: goto stop;
436 case DASM_ESC: *cp++ = *p++; break;
437 case DASM_REL_EXT:
438 n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048));
439 goto patchrel;
440 case DASM_ALIGN:
441 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
442 break;
443 case DASM_REL_LG:
444 if (n < 0) {
445 ptrdiff_t na = (ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp + 4;
446 n = (int)na;
447 CK((ptrdiff_t)n == na, RANGE_REL);
448 goto patchrel;
449 }
450 /* fallthrough */
451 case DASM_REL_PC:
452 CK(n >= 0, UNDEF_PC);
453 n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;
454 patchrel:
455 if (!(ins & 0xf800)) { /* B, BL */
456 CK((n & 3) == 0 && ((n+0x08000000) >> 28) == 0, RANGE_REL);
457 cp[-1] |= ((n >> 2) & 0x03ffffff);
458 } else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */
459 CK((n & 3) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL);
460 cp[-1] |= ((n << 3) & 0x00ffffe0);
461 } else if ((ins & 0x3000) == 0x2000) { /* ADR */
462 CK(((n+0x00100000) >> 21) == 0, RANGE_REL);
463 cp[-1] |= ((n << 3) & 0x00ffffe0) | ((n & 3) << 29);
464 } else if ((ins & 0x3000) == 0x3000) { /* ADRP */
465 cp[-1] |= ((n >> 9) & 0x00ffffe0) | (((n >> 12) & 3) << 29);
466 } else if ((ins & 0x1000)) { /* TBZ, TBNZ */
467 CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL);
468 cp[-1] |= ((n << 3) & 0x0007ffe0);
469 } else if ((ins & 0x8000)) { /* absolute */
470 cp[0] = (unsigned int)((ptrdiff_t)cp - 4 + n);
471 cp[1] = (unsigned int)(((ptrdiff_t)cp - 4 + n) >> 32);
472 cp += 2;
473 }
474 break;
475 case DASM_REL_A: {
476 ptrdiff_t na = (((ptrdiff_t)(*b++) << 32) | (unsigned int)n);
477 if ((ins & 0x3000) == 0x3000) { /* ADRP */
478 ins &= ~0x1000;
479 na = (na >> 12) - (((ptrdiff_t)cp - 4) >> 12);
480 } else {
481 na = na - (ptrdiff_t)cp + 4;
482 }
483 n = (int)na;
484 CK((ptrdiff_t)n == na, RANGE_REL);
485 goto patchrel;
486 }
487 case DASM_LABEL_LG:
488 ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
489 break;
490 case DASM_LABEL_PC: break;
491 case DASM_IMM:
492 cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
493 break;
494 case DASM_IMM6:
495 cp[-1] |= ((n&31) << 19) | ((n&32) << 26);
496 break;
497 case DASM_IMM12:
498 cp[-1] |= (dasm_imm12((unsigned int)n) << 10);
499 break;
500 case DASM_IMM13W:
501 cp[-1] |= (dasm_imm13(n, n) << 10);
502 break;
503 case DASM_IMM13X:
504 cp[-1] |= (dasm_imm13(n, *b++) << 10);
505 break;
506 case DASM_IMML: {
507 int scale = (ins & 3);
508 cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ?
509 ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12);
510 break;
511 }
512 case DASM_IMMV:
513 *cp++ = n;
514 break;
515 case DASM_VREG:
516 cp[-1] |= (n & 0x1f) << (ins & 0x1f);
517 break;
518 default: *cp++ = ins; break;
519 }
520 }
521 stop: (void)0;
522 }
523 }
524
525 if (base + D->codesize != (char *)cp) /* Check for phase errors. */
526 return DASM_S_PHASE;
527 return DASM_S_OK;
528}
529#undef CK
530
531/* Get PC label offset. */
532int dasm_getpclabel(Dst_DECL, unsigned int pc)
533{
534 dasm_State *D = Dst_REF;
535 if (pc*sizeof(int) < D->pcsize) {
536 int pos = D->pclabels[pc];
537 if (pos < 0) return *DASM_POS2PTR(D, -pos);
538 if (pos > 0) return -1; /* Undefined. */
539 }
540 return -2; /* Unused or out of range. */
541}
542
543#ifdef DASM_CHECKS
544/* Optional sanity checker to call between isolated encoding steps. */
545int dasm_checkstep(Dst_DECL, int secmatch)
546{
547 dasm_State *D = Dst_REF;
548 if (D->status == DASM_S_OK) {
549 int i;
550 for (i = 1; i <= 9; i++) {
551 if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
552 D->lglabels[i] = 0;
553 }
554 }
555 if (D->status == DASM_S_OK && secmatch >= 0 &&
556 D->section != &D->sections[secmatch])
557 D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
558 return D->status;
559}
560#endif
561
diff --git a/dynasm/dasm_arm64.lua b/dynasm/dasm_arm64.lua
new file mode 100644
index 00000000..cb82dc4a
--- /dev/null
+++ b/dynasm/dasm_arm64.lua
@@ -0,0 +1,1219 @@
1------------------------------------------------------------------------------
2-- DynASM ARM64 module.
3--
4-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
5-- See dynasm.lua for full copyright notice.
6------------------------------------------------------------------------------
7
8-- Module information:
9local _info = {
10 arch = "arm",
11 description = "DynASM ARM64 module",
12 version = "1.5.0",
13 vernum = 10500,
14 release = "2021-05-02",
15 author = "Mike Pall",
16 license = "MIT",
17}
18
19-- Exported glue functions for the arch-specific module.
20local _M = { _info = _info }
21
22-- Cache library functions.
23local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
24local assert, setmetatable, rawget = assert, setmetatable, rawget
25local _s = string
26local format, byte, char = _s.format, _s.byte, _s.char
27local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
28local concat, sort, insert = table.concat, table.sort, table.insert
29local bit = bit or require("bit")
30local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
31local ror, tohex, tobit = bit.ror, bit.tohex, bit.tobit
32
33-- Inherited tables and callbacks.
34local g_opt, g_arch
35local wline, werror, wfatal, wwarn
36
37-- Action name list.
38-- CHECK: Keep this in sync with the C code!
39local action_names = {
40 "STOP", "SECTION", "ESC", "REL_EXT",
41 "ALIGN", "REL_LG", "LABEL_LG",
42 "REL_PC", "LABEL_PC", "REL_A",
43 "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML", "IMMV",
44 "VREG",
45}
46
47-- Maximum number of section buffer positions for dasm_put().
48-- CHECK: Keep this in sync with the C code!
49local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
50
51-- Action name -> action number.
52local map_action = {}
53for n,name in ipairs(action_names) do
54 map_action[name] = n-1
55end
56
57-- Action list buffer.
58local actlist = {}
59
60-- Argument list for next dasm_put(). Start with offset 0 into action list.
61local actargs = { 0 }
62
63-- Current number of section buffer positions for dasm_put().
64local secpos = 1
65
66------------------------------------------------------------------------------
67
68-- Dump action names and numbers.
69local function dumpactions(out)
70 out:write("DynASM encoding engine action codes:\n")
71 for n,name in ipairs(action_names) do
72 local num = map_action[name]
73 out:write(format(" %-10s %02X %d\n", name, num, num))
74 end
75 out:write("\n")
76end
77
78-- Write action list buffer as a huge static C array.
79local function writeactions(out, name)
80 local nn = #actlist
81 if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
82 out:write("static const unsigned int ", name, "[", nn, "] = {\n")
83 for i = 1,nn-1 do
84 assert(out:write("0x", tohex(actlist[i]), ",\n"))
85 end
86 assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
87end
88
89------------------------------------------------------------------------------
90
91-- Add word to action list.
92local function wputxw(n)
93 assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
94 actlist[#actlist+1] = n
95end
96
97-- Add action to list with optional arg. Advance buffer pos, too.
98local function waction(action, val, a, num)
99 local w = assert(map_action[action], "bad action name `"..action.."'")
100 wputxw(w * 0x10000 + (val or 0))
101 if a then actargs[#actargs+1] = a end
102 if a or num then secpos = secpos + (num or 1) end
103end
104
105-- Flush action list (intervening C code or buffer pos overflow).
106local function wflush(term)
107 if #actlist == actargs[1] then return end -- Nothing to flush.
108 if not term then waction("STOP") end -- Terminate action list.
109 wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
110 actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
111 secpos = 1 -- The actionlist offset occupies a buffer position, too.
112end
113
114-- Put escaped word.
115local function wputw(n)
116 if n <= 0x000fffff then waction("ESC") end
117 wputxw(n)
118end
119
120-- Reserve position for word.
121local function wpos()
122 local pos = #actlist+1
123 actlist[pos] = ""
124 return pos
125end
126
127-- Store word to reserved position.
128local function wputpos(pos, n)
129 assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
130 if n <= 0x000fffff then
131 insert(actlist, pos+1, n)
132 n = map_action.ESC * 0x10000
133 end
134 actlist[pos] = n
135end
136
137------------------------------------------------------------------------------
138
139-- Global label name -> global label number. With auto assignment on 1st use.
140local next_global = 20
141local map_global = setmetatable({}, { __index = function(t, name)
142 if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
143 local n = next_global
144 if n > 2047 then werror("too many global labels") end
145 next_global = n + 1
146 t[name] = n
147 return n
148end})
149
150-- Dump global labels.
151local function dumpglobals(out, lvl)
152 local t = {}
153 for name, n in pairs(map_global) do t[n] = name end
154 out:write("Global labels:\n")
155 for i=20,next_global-1 do
156 out:write(format(" %s\n", t[i]))
157 end
158 out:write("\n")
159end
160
161-- Write global label enum.
162local function writeglobals(out, prefix)
163 local t = {}
164 for name, n in pairs(map_global) do t[n] = name end
165 out:write("enum {\n")
166 for i=20,next_global-1 do
167 out:write(" ", prefix, t[i], ",\n")
168 end
169 out:write(" ", prefix, "_MAX\n};\n")
170end
171
172-- Write global label names.
173local function writeglobalnames(out, name)
174 local t = {}
175 for name, n in pairs(map_global) do t[n] = name end
176 out:write("static const char *const ", name, "[] = {\n")
177 for i=20,next_global-1 do
178 out:write(" \"", t[i], "\",\n")
179 end
180 out:write(" (const char *)0\n};\n")
181end
182
183------------------------------------------------------------------------------
184
185-- Extern label name -> extern label number. With auto assignment on 1st use.
186local next_extern = 0
187local map_extern_ = {}
188local map_extern = setmetatable({}, { __index = function(t, name)
189 -- No restrictions on the name for now.
190 local n = next_extern
191 if n > 2047 then werror("too many extern labels") end
192 next_extern = n + 1
193 t[name] = n
194 map_extern_[n] = name
195 return n
196end})
197
198-- Dump extern labels.
199local function dumpexterns(out, lvl)
200 out:write("Extern labels:\n")
201 for i=0,next_extern-1 do
202 out:write(format(" %s\n", map_extern_[i]))
203 end
204 out:write("\n")
205end
206
207-- Write extern label names.
208local function writeexternnames(out, name)
209 out:write("static const char *const ", name, "[] = {\n")
210 for i=0,next_extern-1 do
211 out:write(" \"", map_extern_[i], "\",\n")
212 end
213 out:write(" (const char *)0\n};\n")
214end
215
216------------------------------------------------------------------------------
217
218-- Arch-specific maps.
219
220-- Ext. register name -> int. name.
221local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", }
222
223-- Int. register name -> ext. name.
224local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", }
225
226local map_type = {} -- Type name -> { ctype, reg }
227local ctypenum = 0 -- Type number (for Dt... macros).
228
229-- Reverse defines for registers.
230function _M.revdef(s)
231 return map_reg_rev[s] or s
232end
233
234local map_shift = { lsl = 0, lsr = 1, asr = 2, }
235
236local map_extend = {
237 uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3,
238 sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7,
239}
240
241local map_cond = {
242 eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
243 hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
244 hs = 2, lo = 3,
245}
246
247------------------------------------------------------------------------------
248
249local parse_reg_type
250
251local function parse_reg(expr, shift)
252 if not expr then werror("expected register name") end
253 local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$")
254 if not tname then
255 tname, ovreg = match(expr, "^([%w_]+):(R[xwqdshb]%b())$")
256 end
257 local tp = map_type[tname or expr]
258 if tp then
259 local reg = ovreg or tp.reg
260 if not reg then
261 werror("type `"..(tname or expr).."' needs a register override")
262 end
263 expr = reg
264 end
265 local ok31, rt, r = match(expr, "^(@?)([xwqdshb])([123]?[0-9])$")
266 if r then
267 r = tonumber(r)
268 if r <= 30 or (r == 31 and ok31 ~= "" or (rt ~= "w" and rt ~= "x")) then
269 if not parse_reg_type then
270 parse_reg_type = rt
271 elseif parse_reg_type ~= rt then
272 werror("register size mismatch")
273 end
274 return shl(r, shift), tp
275 end
276 end
277 local vrt, vreg = match(expr, "^R([xwqdshb])(%b())$")
278 if vreg then
279 if not parse_reg_type then
280 parse_reg_type = vrt
281 elseif parse_reg_type ~= vrt then
282 werror("register size mismatch")
283 end
284 if shift then waction("VREG", shift, vreg) end
285 return 0
286 end
287 werror("bad register name `"..expr.."'")
288end
289
290local function parse_reg_base(expr)
291 if expr == "sp" then return 0x3e0 end
292 local base, tp = parse_reg(expr, 5)
293 if parse_reg_type ~= "x" then werror("bad register type") end
294 parse_reg_type = false
295 return base, tp
296end
297
298local parse_ctx = {}
299
300local loadenv = setfenv and function(s)
301 local code = loadstring(s, "")
302 if code then setfenv(code, parse_ctx) end
303 return code
304end or function(s)
305 return load(s, "", nil, parse_ctx)
306end
307
308-- Try to parse simple arithmetic, too, since some basic ops are aliases.
309local function parse_number(n)
310 local x = tonumber(n)
311 if x then return x end
312 local code = loadenv("return "..n)
313 if code then
314 local ok, y = pcall(code)
315 if ok and type(y) == "number" then return y end
316 end
317 return nil
318end
319
320local function parse_imm(imm, bits, shift, scale, signed)
321 imm = match(imm, "^#(.*)$")
322 if not imm then werror("expected immediate operand") end
323 local n = parse_number(imm)
324 if n then
325 local m = sar(n, scale)
326 if shl(m, scale) == n then
327 if signed then
328 local s = sar(m, bits-1)
329 if s == 0 then return shl(m, shift)
330 elseif s == -1 then return shl(m + shl(1, bits), shift) end
331 else
332 if sar(m, bits) == 0 then return shl(m, shift) end
333 end
334 end
335 werror("out of range immediate `"..imm.."'")
336 else
337 waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
338 return 0
339 end
340end
341
342local function parse_imm12(imm)
343 imm = match(imm, "^#(.*)$")
344 if not imm then werror("expected immediate operand") end
345 local n = parse_number(imm)
346 if n then
347 if shr(n, 12) == 0 then
348 return shl(n, 10)
349 elseif band(n, 0xff000fff) == 0 then
350 return shr(n, 2) + 0x00400000
351 end
352 werror("out of range immediate `"..imm.."'")
353 else
354 waction("IMM12", 0, imm)
355 return 0
356 end
357end
358
359local function parse_imm13(imm)
360 imm = match(imm, "^#(.*)$")
361 if not imm then werror("expected immediate operand") end
362 local n = parse_number(imm)
363 local r64 = parse_reg_type == "x"
364 if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then
365 local inv = false
366 if band(n, 1) == 1 then n = bit.bnot(n); inv = true end
367 local t = {}
368 for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end
369 local b = table.concat(t)
370 b = b..(r64 and (inv and "1" or "0"):rep(32) or b)
371 local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)")
372 if p0 then
373 local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a
374 if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then
375 local s = band(-2*w, 0x3f) - 1
376 if w == 64 then s = s + 0x1000 end
377 if inv then
378 return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10)
379 else
380 return shl(w-#p0, 16) + shl(s+#p1, 10)
381 end
382 end
383 end
384 werror("out of range immediate `"..imm.."'")
385 elseif r64 then
386 waction("IMM13X", 0, format("(unsigned int)(%s)", imm))
387 actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm)
388 return 0
389 else
390 waction("IMM13W", 0, imm)
391 return 0
392 end
393end
394
395local function parse_imm6(imm)
396 imm = match(imm, "^#(.*)$")
397 if not imm then werror("expected immediate operand") end
398 local n = parse_number(imm)
399 if n then
400 if n >= 0 and n <= 63 then
401 return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0)
402 end
403 werror("out of range immediate `"..imm.."'")
404 else
405 waction("IMM6", 0, imm)
406 return 0
407 end
408end
409
410local function parse_imm_load(imm, scale)
411 local n = parse_number(imm)
412 if n then
413 local m = sar(n, scale)
414 if shl(m, scale) == n and m >= 0 and m < 0x1000 then
415 return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset.
416 elseif n >= -256 and n < 256 then
417 return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset.
418 end
419 werror("out of range immediate `"..imm.."'")
420 else
421 waction("IMML", scale, imm)
422 return 0
423 end
424end
425
426local function parse_fpimm(imm)
427 imm = match(imm, "^#(.*)$")
428 if not imm then werror("expected immediate operand") end
429 local n = parse_number(imm)
430 if n then
431 local m, e = math.frexp(n)
432 local s, e2 = 0, band(e-2, 7)
433 if m < 0 then m = -m; s = 0x00100000 end
434 m = m*32-16
435 if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then
436 return s + shl(e2, 17) + shl(m, 13)
437 end
438 werror("out of range immediate `"..imm.."'")
439 else
440 werror("NYI fpimm action")
441 end
442end
443
444local function parse_shift(expr)
445 local s, s2 = match(expr, "^(%S+)%s*(.*)$")
446 s = map_shift[s]
447 if not s then werror("expected shift operand") end
448 return parse_imm(s2, 6, 10, 0, false) + shl(s, 22)
449end
450
451local function parse_lslx16(expr)
452 local n = match(expr, "^lsl%s*#(%d+)$")
453 n = tonumber(n)
454 if not n then werror("expected shift operand") end
455 if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then
456 werror("bad shift amount")
457 end
458 return shl(n, 17)
459end
460
461local function parse_extend(expr)
462 local s, s2 = match(expr, "^(%S+)%s*(.*)$")
463 if s == "lsl" then
464 s = parse_reg_type == "x" and 3 or 2
465 else
466 s = map_extend[s]
467 end
468 if not s then werror("expected extend operand") end
469 return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13)
470end
471
472local function parse_cond(expr, inv)
473 local c = map_cond[expr]
474 if not c then werror("expected condition operand") end
475 return shl(bit.bxor(c, inv), 12)
476end
477
478local function parse_load(params, nparams, n, op)
479 if params[n+2] then werror("too many operands") end
480 local scale = shr(op, 30)
481 local pn, p2 = params[n], params[n+1]
482 local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
483 if not p1 then
484 if not p2 then
485 local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
486 if reg and tailr ~= "" then
487 local base, tp = parse_reg_base(reg)
488 if tp then
489 waction("IMML", scale, format(tp.ctypefmt, tailr))
490 return op + base
491 end
492 end
493 end
494 werror("expected address operand")
495 end
496 if p2 then
497 if wb == "!" then werror("bad use of '!'") end
498 op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
499 elseif wb == "!" then
500 local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
501 if not p1a then werror("bad use of '!'") end
502 op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00
503 else
504 local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$")
505 op = op + parse_reg_base(p1a)
506 if p2a ~= "" then
507 local imm = match(p2a, "^,%s*#(.*)$")
508 if imm then
509 op = op + parse_imm_load(imm, scale)
510 else
511 local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
512 op = op + parse_reg(p2b, 16) + 0x00200800
513 if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
514 werror("bad index register type")
515 end
516 if p3b == "" then
517 if parse_reg_type ~= "x" then werror("bad index register type") end
518 op = op + 0x6000
519 else
520 if p3s == "" or p3s == "#0" then
521 elseif p3s == "#"..scale then
522 op = op + 0x1000
523 else
524 werror("bad scale")
525 end
526 if parse_reg_type == "x" then
527 if p3b == "lsl" and p3s ~= "" then op = op + 0x6000
528 elseif p3b == "sxtx" then op = op + 0xe000
529 else
530 werror("bad extend/shift specifier")
531 end
532 else
533 if p3b == "uxtw" then op = op + 0x4000
534 elseif p3b == "sxtw" then op = op + 0xc000
535 else
536 werror("bad extend/shift specifier")
537 end
538 end
539 end
540 end
541 else
542 if wb == "!" then werror("bad use of '!'") end
543 op = op + 0x01000000
544 end
545 end
546 return op
547end
548
549local function parse_load_pair(params, nparams, n, op)
550 if params[n+2] then werror("too many operands") end
551 local pn, p2 = params[n], params[n+1]
552 local scale = shr(op, 30) == 0 and 2 or 3
553 local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
554 if not p1 then
555 if not p2 then
556 local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
557 if reg and tailr ~= "" then
558 local base, tp = parse_reg_base(reg)
559 if tp then
560 waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr))
561 return op + base + 0x01000000
562 end
563 end
564 end
565 werror("expected address operand")
566 end
567 if p2 then
568 if wb == "!" then werror("bad use of '!'") end
569 op = op + 0x00800000
570 else
571 local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
572 if p1a then p1, p2 = p1a, p2a else p2 = "#0" end
573 op = op + (wb == "!" and 0x01800000 or 0x01000000)
574 end
575 return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true)
576end
577
578local function parse_label(label, def)
579 local prefix = label:sub(1, 2)
580 -- =>label (pc label reference)
581 if prefix == "=>" then
582 return "PC", 0, label:sub(3)
583 end
584 -- ->name (global label reference)
585 if prefix == "->" then
586 return "LG", map_global[label:sub(3)]
587 end
588 if def then
589 -- [1-9] (local label definition)
590 if match(label, "^[1-9]$") then
591 return "LG", 10+tonumber(label)
592 end
593 else
594 -- [<>][1-9] (local label reference)
595 local dir, lnum = match(label, "^([<>])([1-9])$")
596 if dir then -- Fwd: 1-9, Bkwd: 11-19.
597 return "LG", lnum + (dir == ">" and 0 or 10)
598 end
599 -- extern label (extern label reference)
600 local extname = match(label, "^extern%s+(%S+)$")
601 if extname then
602 return "EXT", map_extern[extname]
603 end
604 -- &expr (pointer)
605 if label:sub(1, 1) == "&" then
606 return "A", 0, format("(ptrdiff_t)(%s)", label:sub(2))
607 end
608 end
609end
610
611local function branch_type(op)
612 if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL
613 elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or
614 band(op, 0x3b000000) == 0x18000000 then
615 return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal
616 elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ
617 elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR
618 elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP
619 else
620 assert(false, "unknown branch type")
621 end
622end
623
624------------------------------------------------------------------------------
625
626local map_op, op_template
627
628local function op_alias(opname, f)
629 return function(params, nparams)
630 if not params then return "-> "..opname:sub(1, -3) end
631 f(params, nparams)
632 op_template(params, map_op[opname], nparams)
633 end
634end
635
636local function alias_bfx(p)
637 p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1"
638end
639
640local function alias_bfiz(p)
641 parse_reg(p[1], 0)
642 if parse_reg_type == "w" then
643 p[3] = "#(32-("..p[3]:sub(2).."))%32"
644 p[4] = "#("..p[4]:sub(2)..")-1"
645 else
646 p[3] = "#(64-("..p[3]:sub(2).."))%64"
647 p[4] = "#("..p[4]:sub(2)..")-1"
648 end
649end
650
651local alias_lslimm = op_alias("ubfm_4", function(p)
652 parse_reg(p[1], 0)
653 local sh = p[3]:sub(2)
654 if parse_reg_type == "w" then
655 p[3] = "#(32-("..sh.."))%32"
656 p[4] = "#31-("..sh..")"
657 else
658 p[3] = "#(64-("..sh.."))%64"
659 p[4] = "#63-("..sh..")"
660 end
661end)
662
663-- Template strings for ARM instructions.
664map_op = {
665 -- Basic data processing instructions.
666 add_3 = "0b000000DNMg|11000000pDpNIg|8b206000pDpNMx",
667 add_4 = "0b000000DNMSg|0b200000DNMXg|8b200000pDpNMXx|8b200000pDpNxMwX",
668 adds_3 = "2b000000DNMg|31000000DpNIg|ab206000DpNMx",
669 adds_4 = "2b000000DNMSg|2b200000DNMXg|ab200000DpNMXx|ab200000DpNxMwX",
670 cmn_2 = "2b00001fNMg|3100001fpNIg|ab20601fpNMx",
671 cmn_3 = "2b00001fNMSg|2b20001fNMXg|ab20001fpNMXx|ab20001fpNxMwX",
672
673 sub_3 = "4b000000DNMg|51000000pDpNIg|cb206000pDpNMx",
674 sub_4 = "4b000000DNMSg|4b200000DNMXg|cb200000pDpNMXx|cb200000pDpNxMwX",
675 subs_3 = "6b000000DNMg|71000000DpNIg|eb206000DpNMx",
676 subs_4 = "6b000000DNMSg|6b200000DNMXg|eb200000DpNMXx|eb200000DpNxMwX",
677 cmp_2 = "6b00001fNMg|7100001fpNIg|eb20601fpNMx",
678 cmp_3 = "6b00001fNMSg|6b20001fNMXg|eb20001fpNMXx|eb20001fpNxMwX",
679
680 neg_2 = "4b0003e0DMg",
681 neg_3 = "4b0003e0DMSg",
682 negs_2 = "6b0003e0DMg",
683 negs_3 = "6b0003e0DMSg",
684
685 adc_3 = "1a000000DNMg",
686 adcs_3 = "3a000000DNMg",
687 sbc_3 = "5a000000DNMg",
688 sbcs_3 = "7a000000DNMg",
689 ngc_2 = "5a0003e0DMg",
690 ngcs_2 = "7a0003e0DMg",
691
692 and_3 = "0a000000DNMg|12000000pDNig",
693 and_4 = "0a000000DNMSg",
694 orr_3 = "2a000000DNMg|32000000pDNig",
695 orr_4 = "2a000000DNMSg",
696 eor_3 = "4a000000DNMg|52000000pDNig",
697 eor_4 = "4a000000DNMSg",
698 ands_3 = "6a000000DNMg|72000000DNig",
699 ands_4 = "6a000000DNMSg",
700 tst_2 = "6a00001fNMg|7200001fNig",
701 tst_3 = "6a00001fNMSg",
702
703 bic_3 = "0a200000DNMg",
704 bic_4 = "0a200000DNMSg",
705 orn_3 = "2a200000DNMg",
706 orn_4 = "2a200000DNMSg",
707 eon_3 = "4a200000DNMg",
708 eon_4 = "4a200000DNMSg",
709 bics_3 = "6a200000DNMg",
710 bics_4 = "6a200000DNMSg",
711
712 movn_2 = "12800000DWg",
713 movn_3 = "12800000DWRg",
714 movz_2 = "52800000DWg",
715 movz_3 = "52800000DWRg",
716 movk_2 = "72800000DWg",
717 movk_3 = "72800000DWRg",
718
719 -- TODO: this doesn't cover all valid immediates for mov reg, #imm.
720 mov_2 = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg",
721 mov_3 = "2a0003e0DMSg",
722 mvn_2 = "2a2003e0DMg",
723 mvn_3 = "2a2003e0DMSg",
724
725 adr_2 = "10000000DBx",
726 adrp_2 = "90000000DBx",
727
728 csel_4 = "1a800000DNMCg",
729 csinc_4 = "1a800400DNMCg",
730 csinv_4 = "5a800000DNMCg",
731 csneg_4 = "5a800400DNMCg",
732 cset_2 = "1a9f07e0Dcg",
733 csetm_2 = "5a9f03e0Dcg",
734 cinc_3 = "1a800400DNmcg",
735 cinv_3 = "5a800000DNmcg",
736 cneg_3 = "5a800400DNmcg",
737
738 ccmn_4 = "3a400000NMVCg|3a400800N5VCg",
739 ccmp_4 = "7a400000NMVCg|7a400800N5VCg",
740
741 madd_4 = "1b000000DNMAg",
742 msub_4 = "1b008000DNMAg",
743 mul_3 = "1b007c00DNMg",
744 mneg_3 = "1b00fc00DNMg",
745
746 smaddl_4 = "9b200000DxNMwAx",
747 smsubl_4 = "9b208000DxNMwAx",
748 smull_3 = "9b207c00DxNMw",
749 smnegl_3 = "9b20fc00DxNMw",
750 smulh_3 = "9b407c00DNMx",
751 umaddl_4 = "9ba00000DxNMwAx",
752 umsubl_4 = "9ba08000DxNMwAx",
753 umull_3 = "9ba07c00DxNMw",
754 umnegl_3 = "9ba0fc00DxNMw",
755 umulh_3 = "9bc07c00DNMx",
756
757 udiv_3 = "1ac00800DNMg",
758 sdiv_3 = "1ac00c00DNMg",
759
760 -- Bit operations.
761 sbfm_4 = "13000000DN12w|93400000DN12x",
762 bfm_4 = "33000000DN12w|b3400000DN12x",
763 ubfm_4 = "53000000DN12w|d3400000DN12x",
764 extr_4 = "13800000DNM2w|93c00000DNM2x",
765
766 sxtb_2 = "13001c00DNw|93401c00DNx",
767 sxth_2 = "13003c00DNw|93403c00DNx",
768 sxtw_2 = "93407c00DxNw",
769 uxtb_2 = "53001c00DNw",
770 uxth_2 = "53003c00DNw",
771
772 sbfx_4 = op_alias("sbfm_4", alias_bfx),
773 bfxil_4 = op_alias("bfm_4", alias_bfx),
774 ubfx_4 = op_alias("ubfm_4", alias_bfx),
775 sbfiz_4 = op_alias("sbfm_4", alias_bfiz),
776 bfi_4 = op_alias("bfm_4", alias_bfiz),
777 ubfiz_4 = op_alias("ubfm_4", alias_bfiz),
778
779 lsl_3 = function(params, nparams)
780 if params and params[3]:byte() == 35 then
781 return alias_lslimm(params, nparams)
782 else
783 return op_template(params, "1ac02000DNMg", nparams)
784 end
785 end,
786 lsr_3 = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x",
787 asr_3 = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x",
788 ror_3 = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x",
789
790 clz_2 = "5ac01000DNg",
791 cls_2 = "5ac01400DNg",
792 rbit_2 = "5ac00000DNg",
793 rev_2 = "5ac00800DNw|dac00c00DNx",
794 rev16_2 = "5ac00400DNg",
795 rev32_2 = "dac00800DNx",
796
797 -- Loads and stores.
798 ["strb_*"] = "38000000DwL",
799 ["ldrb_*"] = "38400000DwL",
800 ["ldrsb_*"] = "38c00000DwL|38800000DxL",
801 ["strh_*"] = "78000000DwL",
802 ["ldrh_*"] = "78400000DwL",
803 ["ldrsh_*"] = "78c00000DwL|78800000DxL",
804 ["str_*"] = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL",
805 ["ldr_*"] = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL",
806 ["ldrsw_*"] = "98000000DxB|b8800000DxL",
807 -- NOTE: ldur etc. are handled by ldr et al.
808
809 ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
810 ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
811 ["ldpsw_*"] = "68400000DAxP",
812
813 -- Branches.
814 b_1 = "14000000B",
815 bl_1 = "94000000B",
816 blr_1 = "d63f0000Nx",
817 br_1 = "d61f0000Nx",
818 ret_0 = "d65f03c0",
819 ret_1 = "d65f0000Nx",
820 -- b.cond is added below.
821 cbz_2 = "34000000DBg",
822 cbnz_2 = "35000000DBg",
823 tbz_3 = "36000000DTBw|36000000DTBx",
824 tbnz_3 = "37000000DTBw|37000000DTBx",
825
826 -- Miscellaneous instructions.
827 -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
828 -- TODO: sys, sysl, ic, dc, at, tlbi
829 -- TODO: hint, yield, wfe, wfi, sev, sevl
830 -- TODO: clrex, dsb, dmb, isb
831 nop_0 = "d503201f",
832 brk_0 = "d4200000",
833 brk_1 = "d4200000W",
834
835 -- Floating point instructions.
836 fmov_2 = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf",
837 fabs_2 = "1e20c000DNf",
838 fneg_2 = "1e214000DNf",
839 fsqrt_2 = "1e21c000DNf",
840
841 fcvt_2 = "1e22c000DdNs|1e624000DsNd",
842
843 -- TODO: half-precision and fixed-point conversions.
844 fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd",
845 fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd",
846 fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd",
847 fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd",
848 fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd",
849 fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd",
850 fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd",
851 fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd",
852 fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd",
853 fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd",
854
855 scvtf_2 = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx",
856 ucvtf_2 = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx",
857
858 frintn_2 = "1e244000DNf",
859 frintp_2 = "1e24c000DNf",
860 frintm_2 = "1e254000DNf",
861 frintz_2 = "1e25c000DNf",
862 frinta_2 = "1e264000DNf",
863 frintx_2 = "1e274000DNf",
864 frinti_2 = "1e27c000DNf",
865
866 fadd_3 = "1e202800DNMf",
867 fsub_3 = "1e203800DNMf",
868 fmul_3 = "1e200800DNMf",
869 fnmul_3 = "1e208800DNMf",
870 fdiv_3 = "1e201800DNMf",
871
872 fmadd_4 = "1f000000DNMAf",
873 fmsub_4 = "1f008000DNMAf",
874 fnmadd_4 = "1f200000DNMAf",
875 fnmsub_4 = "1f208000DNMAf",
876
877 fmax_3 = "1e204800DNMf",
878 fmaxnm_3 = "1e206800DNMf",
879 fmin_3 = "1e205800DNMf",
880 fminnm_3 = "1e207800DNMf",
881
882 fcmp_2 = "1e202000NMf|1e202008NZf",
883 fcmpe_2 = "1e202010NMf|1e202018NZf",
884
885 fccmp_4 = "1e200400NMVCf",
886 fccmpe_4 = "1e200410NMVCf",
887
888 fcsel_4 = "1e200c00DNMCf",
889
890 -- TODO: crc32*, aes*, sha*, pmull
891 -- TODO: SIMD instructions.
892}
893
894for cond,c in pairs(map_cond) do
895 map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
896end
897
898------------------------------------------------------------------------------
899
900-- Handle opcodes defined with template strings.
901local function parse_template(params, template, nparams, pos)
902 local op = tonumber(template:sub(1, 8), 16)
903 local n = 1
904 local rtt = {}
905
906 parse_reg_type = false
907
908 -- Process each character.
909 for p in gmatch(template:sub(9), ".") do
910 local q = params[n]
911 if p == "D" then
912 op = op + parse_reg(q, 0); n = n + 1
913 elseif p == "N" then
914 op = op + parse_reg(q, 5); n = n + 1
915 elseif p == "M" then
916 op = op + parse_reg(q, 16); n = n + 1
917 elseif p == "A" then
918 op = op + parse_reg(q, 10); n = n + 1
919 elseif p == "m" then
920 op = op + parse_reg(params[n-1], 16)
921
922 elseif p == "p" then
923 if q == "sp" then params[n] = "@x31" end
924 elseif p == "g" then
925 if parse_reg_type == "x" then
926 op = op + 0x80000000
927 elseif parse_reg_type ~= "w" then
928 werror("bad register type")
929 end
930 parse_reg_type = false
931 elseif p == "f" then
932 if parse_reg_type == "d" then
933 op = op + 0x00400000
934 elseif parse_reg_type ~= "s" then
935 werror("bad register type")
936 end
937 parse_reg_type = false
938 elseif p == "x" or p == "w" or p == "d" or p == "s" then
939 if parse_reg_type ~= p then
940 werror("register size mismatch")
941 end
942 parse_reg_type = false
943
944 elseif p == "L" then
945 op = parse_load(params, nparams, n, op)
946 elseif p == "P" then
947 op = parse_load_pair(params, nparams, n, op)
948
949 elseif p == "B" then
950 local mode, v, s = parse_label(q, false); n = n + 1
951 if not mode then werror("bad label `"..q.."'") end
952 local m = branch_type(op)
953 if mode == "A" then
954 waction("REL_"..mode, v+m, format("(unsigned int)(%s)", s))
955 actargs[#actargs+1] = format("(unsigned int)((%s)>>32)", s)
956 else
957 waction("REL_"..mode, v+m, s, 1)
958 end
959
960 elseif p == "I" then
961 op = op + parse_imm12(q); n = n + 1
962 elseif p == "i" then
963 op = op + parse_imm13(q); n = n + 1
964 elseif p == "W" then
965 op = op + parse_imm(q, 16, 5, 0, false); n = n + 1
966 elseif p == "T" then
967 op = op + parse_imm6(q); n = n + 1
968 elseif p == "1" then
969 op = op + parse_imm(q, 6, 16, 0, false); n = n + 1
970 elseif p == "2" then
971 op = op + parse_imm(q, 6, 10, 0, false); n = n + 1
972 elseif p == "5" then
973 op = op + parse_imm(q, 5, 16, 0, false); n = n + 1
974 elseif p == "V" then
975 op = op + parse_imm(q, 4, 0, 0, false); n = n + 1
976 elseif p == "F" then
977 op = op + parse_fpimm(q); n = n + 1
978 elseif p == "Z" then
979 if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end
980 n = n + 1
981
982 elseif p == "S" then
983 op = op + parse_shift(q); n = n + 1
984 elseif p == "X" then
985 op = op + parse_extend(q); n = n + 1
986 elseif p == "R" then
987 op = op + parse_lslx16(q); n = n + 1
988 elseif p == "C" then
989 op = op + parse_cond(q, 0); n = n + 1
990 elseif p == "c" then
991 op = op + parse_cond(q, 1); n = n + 1
992
993 else
994 assert(false)
995 end
996 end
997 wputpos(pos, op)
998end
999
1000function op_template(params, template, nparams)
1001 if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
1002
1003 -- Limit number of section buffer positions used by a single dasm_put().
1004 -- A single opcode needs a maximum of 4 positions.
1005 if secpos+4 > maxsecpos then wflush() end
1006 local pos = wpos()
1007 local lpos, apos, spos = #actlist, #actargs, secpos
1008
1009 local ok, err
1010 for t in gmatch(template, "[^|]+") do
1011 ok, err = pcall(parse_template, params, t, nparams, pos)
1012 if ok then return end
1013 secpos = spos
1014 actlist[lpos+1] = nil
1015 actlist[lpos+2] = nil
1016 actlist[lpos+3] = nil
1017 actlist[lpos+4] = nil
1018 actargs[apos+1] = nil
1019 actargs[apos+2] = nil
1020 actargs[apos+3] = nil
1021 actargs[apos+4] = nil
1022 end
1023 error(err, 0)
1024end
1025
1026map_op[".template__"] = op_template
1027
1028------------------------------------------------------------------------------
1029
1030-- Pseudo-opcode to mark the position where the action list is to be emitted.
1031map_op[".actionlist_1"] = function(params)
1032 if not params then return "cvar" end
1033 local name = params[1] -- No syntax check. You get to keep the pieces.
1034 wline(function(out) writeactions(out, name) end)
1035end
1036
1037-- Pseudo-opcode to mark the position where the global enum is to be emitted.
1038map_op[".globals_1"] = function(params)
1039 if not params then return "prefix" end
1040 local prefix = params[1] -- No syntax check. You get to keep the pieces.
1041 wline(function(out) writeglobals(out, prefix) end)
1042end
1043
1044-- Pseudo-opcode to mark the position where the global names are to be emitted.
1045map_op[".globalnames_1"] = function(params)
1046 if not params then return "cvar" end
1047 local name = params[1] -- No syntax check. You get to keep the pieces.
1048 wline(function(out) writeglobalnames(out, name) end)
1049end
1050
1051-- Pseudo-opcode to mark the position where the extern names are to be emitted.
1052map_op[".externnames_1"] = function(params)
1053 if not params then return "cvar" end
1054 local name = params[1] -- No syntax check. You get to keep the pieces.
1055 wline(function(out) writeexternnames(out, name) end)
1056end
1057
1058------------------------------------------------------------------------------
1059
1060-- Label pseudo-opcode (converted from trailing colon form).
1061map_op[".label_1"] = function(params)
1062 if not params then return "[1-9] | ->global | =>pcexpr" end
1063 if secpos+1 > maxsecpos then wflush() end
1064 local mode, n, s = parse_label(params[1], true)
1065 if not mode or mode == "EXT" then werror("bad label definition") end
1066 waction("LABEL_"..mode, n, s, 1)
1067end
1068
1069------------------------------------------------------------------------------
1070
1071-- Pseudo-opcodes for data storage.
1072local function op_data(params)
1073 if not params then return "imm..." end
1074 local sz = params.op == ".long" and 4 or 8
1075 for _,p in ipairs(params) do
1076 local imm = parse_number(p)
1077 if imm then
1078 local n = tobit(imm)
1079 if n == imm or (n < 0 and n + 2^32 == imm) then
1080 wputw(n < 0 and n + 2^32 or n)
1081 if sz == 8 then
1082 wputw(imm < 0 and 0xffffffff or 0)
1083 end
1084 elseif sz == 4 then
1085 werror("bad immediate `"..p.."'")
1086 else
1087 imm = nil
1088 end
1089 end
1090 if not imm then
1091 local mode, v, s = parse_label(p, false)
1092 if sz == 4 then
1093 if mode then werror("label does not fit into .long") end
1094 waction("IMMV", 0, p)
1095 elseif mode and mode ~= "A" then
1096 waction("REL_"..mode, v+0x8000, s, 1)
1097 else
1098 if mode == "A" then p = s end
1099 waction("IMMV", 0, format("(unsigned int)(%s)", p))
1100 waction("IMMV", 0, format("(unsigned int)((unsigned long long)(%s)>>32)", p))
1101 end
1102 end
1103 if secpos+2 > maxsecpos then wflush() end
1104 end
1105end
1106map_op[".long_*"] = op_data
1107map_op[".quad_*"] = op_data
1108map_op[".addr_*"] = op_data
1109
1110-- Alignment pseudo-opcode.
1111map_op[".align_1"] = function(params)
1112 if not params then return "numpow2" end
1113 if secpos+1 > maxsecpos then wflush() end
1114 local align = tonumber(params[1])
1115 if align then
1116 local x = align
1117 -- Must be a power of 2 in the range (2 ... 256).
1118 for i=1,8 do
1119 x = x / 2
1120 if x == 1 then
1121 waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
1122 return
1123 end
1124 end
1125 end
1126 werror("bad alignment")
1127end
1128
1129------------------------------------------------------------------------------
1130
1131-- Pseudo-opcode for (primitive) type definitions (map to C types).
1132map_op[".type_3"] = function(params, nparams)
1133 if not params then
1134 return nparams == 2 and "name, ctype" or "name, ctype, reg"
1135 end
1136 local name, ctype, reg = params[1], params[2], params[3]
1137 if not match(name, "^[%a_][%w_]*$") then
1138 werror("bad type name `"..name.."'")
1139 end
1140 local tp = map_type[name]
1141 if tp then
1142 werror("duplicate type `"..name.."'")
1143 end
1144 -- Add #type to defines. A bit unclean to put it in map_archdef.
1145 map_archdef["#"..name] = "sizeof("..ctype..")"
1146 -- Add new type and emit shortcut define.
1147 local num = ctypenum + 1
1148 map_type[name] = {
1149 ctype = ctype,
1150 ctypefmt = format("Dt%X(%%s)", num),
1151 reg = reg,
1152 }
1153 wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
1154 ctypenum = num
1155end
1156map_op[".type_2"] = map_op[".type_3"]
1157
1158-- Dump type definitions.
1159local function dumptypes(out, lvl)
1160 local t = {}
1161 for name in pairs(map_type) do t[#t+1] = name end
1162 sort(t)
1163 out:write("Type definitions:\n")
1164 for _,name in ipairs(t) do
1165 local tp = map_type[name]
1166 local reg = tp.reg or ""
1167 out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
1168 end
1169 out:write("\n")
1170end
1171
1172------------------------------------------------------------------------------
1173
1174-- Set the current section.
1175function _M.section(num)
1176 waction("SECTION", num)
1177 wflush(true) -- SECTION is a terminal action.
1178end
1179
1180------------------------------------------------------------------------------
1181
1182-- Dump architecture description.
1183function _M.dumparch(out)
1184 out:write(format("DynASM %s version %s, released %s\n\n",
1185 _info.arch, _info.version, _info.release))
1186 dumpactions(out)
1187end
1188
1189-- Dump all user defined elements.
1190function _M.dumpdef(out, lvl)
1191 dumptypes(out, lvl)
1192 dumpglobals(out, lvl)
1193 dumpexterns(out, lvl)
1194end
1195
1196------------------------------------------------------------------------------
1197
1198-- Pass callbacks from/to the DynASM core.
1199function _M.passcb(wl, we, wf, ww)
1200 wline, werror, wfatal, wwarn = wl, we, wf, ww
1201 return wflush
1202end
1203
1204-- Setup the arch-specific module.
1205function _M.setup(arch, opt)
1206 g_arch, g_opt = arch, opt
1207end
1208
1209-- Merge the core maps and the arch-specific maps.
1210function _M.mergemaps(map_coreop, map_def)
1211 setmetatable(map_op, { __index = map_coreop })
1212 setmetatable(map_def, { __index = map_archdef })
1213 return map_op, map_def
1214end
1215
1216return _M
1217
1218------------------------------------------------------------------------------
1219
diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h
index e942b69a..b99b56b0 100644
--- a/dynasm/dasm_mips.h
+++ b/dynasm/dasm_mips.h
@@ -21,7 +21,7 @@ enum {
21 /* The following actions need a buffer position. */ 21 /* The following actions need a buffer position. */
22 DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, 22 DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
23 /* The following actions also have an argument. */ 23 /* The following actions also have an argument. */
24 DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, 24 DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS,
25 DASM__MAX 25 DASM__MAX
26}; 26};
27 27
@@ -231,7 +231,7 @@ void dasm_put(Dst_DECL, int start, ...)
231 *pl = -pos; /* Label exists now. */ 231 *pl = -pos; /* Label exists now. */
232 b[pos++] = ofs; /* Store pass1 offset estimate. */ 232 b[pos++] = ofs; /* Store pass1 offset estimate. */
233 break; 233 break;
234 case DASM_IMM: 234 case DASM_IMM: case DASM_IMMS:
235#ifdef DASM_CHECKS 235#ifdef DASM_CHECKS
236 CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); 236 CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
237#endif 237#endif
@@ -273,7 +273,7 @@ int dasm_link(Dst_DECL, size_t *szp)
273 273
274 { /* Handle globals not defined in this translation unit. */ 274 { /* Handle globals not defined in this translation unit. */
275 int idx; 275 int idx;
276 for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { 276 for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
277 int n = D->lglabels[idx]; 277 int n = D->lglabels[idx];
278 /* Undefined label: Collapse rel chain and replace with marker (< 0). */ 278 /* Undefined label: Collapse rel chain and replace with marker (< 0). */
279 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } 279 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
@@ -299,7 +299,7 @@ int dasm_link(Dst_DECL, size_t *szp)
299 case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; 299 case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
300 case DASM_REL_LG: case DASM_REL_PC: pos++; break; 300 case DASM_REL_LG: case DASM_REL_PC: pos++; break;
301 case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; 301 case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
302 case DASM_IMM: pos++; break; 302 case DASM_IMM: case DASM_IMMS: pos++; break;
303 } 303 }
304 } 304 }
305 stop: (void)0; 305 stop: (void)0;
@@ -349,25 +349,32 @@ int dasm_encode(Dst_DECL, void *buffer)
349 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; 349 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
350 break; 350 break;
351 case DASM_REL_LG: 351 case DASM_REL_LG:
352 CK(n >= 0, UNDEF_LG); 352 if (n < 0) {
353 n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp);
354 goto patchrel;
355 }
353 /* fallthrough */ 356 /* fallthrough */
354 case DASM_REL_PC: 357 case DASM_REL_PC:
355 CK(n >= 0, UNDEF_PC); 358 CK(n >= 0, UNDEF_PC);
356 n = *DASM_POS2PTR(D, n); 359 n = *DASM_POS2PTR(D, n);
357 if (ins & 2048) 360 if (ins & 2048)
358 n = n - (int)((char *)cp - base); 361 n = (n + (int)(size_t)base) & 0x0fffffff;
359 else 362 else
360 n = (n + (int)base) & 0x0fffffff; 363 n = n - (int)((char *)cp - base);
361 patchrel: 364 patchrel: {
365 unsigned int e = 16 + ((ins >> 12) & 15);
362 CK((n & 3) == 0 && 366 CK((n & 3) == 0 &&
363 ((n + ((ins & 2048) ? 0x00020000 : 0)) >> 367 ((n + ((ins & 2048) ? 0 : (1<<(e+1)))) >> (e+2)) == 0, RANGE_REL);
364 ((ins & 2048) ? 18 : 28)) == 0, RANGE_REL); 368 cp[-1] |= ((n>>2) & ((1<<e)-1));
365 cp[-1] |= ((n>>2) & ((ins & 2048) ? 0x0000ffff: 0x03ffffff)); 369 }
366 break; 370 break;
367 case DASM_LABEL_LG: 371 case DASM_LABEL_LG:
368 ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); 372 ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
369 break; 373 break;
370 case DASM_LABEL_PC: break; 374 case DASM_LABEL_PC: break;
375 case DASM_IMMS:
376 cp[-1] |= ((n>>3) & 4); n &= 0x1f;
377 /* fallthrough */
371 case DASM_IMM: 378 case DASM_IMM:
372 cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); 379 cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
373 break; 380 break;
diff --git a/dynasm/dasm_mips.lua b/dynasm/dasm_mips.lua
index b2b43bd1..59147015 100644
--- a/dynasm/dasm_mips.lua
+++ b/dynasm/dasm_mips.lua
@@ -1,17 +1,20 @@
1------------------------------------------------------------------------------ 1------------------------------------------------------------------------------
2-- DynASM MIPS module. 2-- DynASM MIPS32/MIPS64 module.
3-- 3--
4-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. 4-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
5-- See dynasm.lua for full copyright notice. 5-- See dynasm.lua for full copyright notice.
6------------------------------------------------------------------------------ 6------------------------------------------------------------------------------
7 7
8local mips64 = mips64
9local mipsr6 = _map_def.MIPSR6
10
8-- Module information: 11-- Module information:
9local _info = { 12local _info = {
10 arch = "mips", 13 arch = mips64 and "mips64" or "mips",
11 description = "DynASM MIPS module", 14 description = "DynASM MIPS32/MIPS64 module",
12 version = "1.3.0", 15 version = "1.5.0",
13 vernum = 10300, 16 vernum = 10500,
14 release = "2012-01-23", 17 release = "2021-05-02",
15 author = "Mike Pall", 18 author = "Mike Pall",
16 license = "MIT", 19 license = "MIT",
17} 20}
@@ -27,7 +30,8 @@ local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
27local match, gmatch = _s.match, _s.gmatch 30local match, gmatch = _s.match, _s.gmatch
28local concat, sort = table.concat, table.sort 31local concat, sort = table.concat, table.sort
29local bit = bit or require("bit") 32local bit = bit or require("bit")
30local band, shl, sar, tohex = bit.band, bit.lshift, bit.arshift, bit.tohex 33local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
34local tohex = bit.tohex
31 35
32-- Inherited tables and callbacks. 36-- Inherited tables and callbacks.
33local g_opt, g_arch 37local g_opt, g_arch
@@ -38,7 +42,7 @@ local wline, werror, wfatal, wwarn
38local action_names = { 42local action_names = {
39 "STOP", "SECTION", "ESC", "REL_EXT", 43 "STOP", "SECTION", "ESC", "REL_EXT",
40 "ALIGN", "REL_LG", "LABEL_LG", 44 "ALIGN", "REL_LG", "LABEL_LG",
41 "REL_PC", "LABEL_PC", "IMM", 45 "REL_PC", "LABEL_PC", "IMM", "IMMS",
42} 46}
43 47
44-- Maximum number of section buffer positions for dasm_put(). 48-- Maximum number of section buffer positions for dasm_put().
@@ -235,7 +239,6 @@ local map_op = {
235 bne_3 = "14000000STB", 239 bne_3 = "14000000STB",
236 blez_2 = "18000000SB", 240 blez_2 = "18000000SB",
237 bgtz_2 = "1c000000SB", 241 bgtz_2 = "1c000000SB",
238 addi_3 = "20000000TSI",
239 li_2 = "24000000TI", 242 li_2 = "24000000TI",
240 addiu_3 = "24000000TSI", 243 addiu_3 = "24000000TSI",
241 slti_3 = "28000000TSI", 244 slti_3 = "28000000TSI",
@@ -245,70 +248,52 @@ local map_op = {
245 ori_3 = "34000000TSU", 248 ori_3 = "34000000TSU",
246 xori_3 = "38000000TSU", 249 xori_3 = "38000000TSU",
247 lui_2 = "3c000000TU", 250 lui_2 = "3c000000TU",
248 beqzl_2 = "50000000SB", 251 daddiu_3 = mips64 and "64000000TSI",
249 beql_3 = "50000000STB", 252 ldl_2 = mips64 and "68000000TO",
250 bnezl_2 = "54000000SB", 253 ldr_2 = mips64 and "6c000000TO",
251 bnel_3 = "54000000STB",
252 blezl_2 = "58000000SB",
253 bgtzl_2 = "5c000000SB",
254 lb_2 = "80000000TO", 254 lb_2 = "80000000TO",
255 lh_2 = "84000000TO", 255 lh_2 = "84000000TO",
256 lwl_2 = "88000000TO",
257 lw_2 = "8c000000TO", 256 lw_2 = "8c000000TO",
258 lbu_2 = "90000000TO", 257 lbu_2 = "90000000TO",
259 lhu_2 = "94000000TO", 258 lhu_2 = "94000000TO",
260 lwr_2 = "98000000TO", 259 lwu_2 = mips64 and "9c000000TO",
261 sb_2 = "a0000000TO", 260 sb_2 = "a0000000TO",
262 sh_2 = "a4000000TO", 261 sh_2 = "a4000000TO",
263 swl_2 = "a8000000TO",
264 sw_2 = "ac000000TO", 262 sw_2 = "ac000000TO",
265 swr_2 = "b8000000TO",
266 cache_2 = "bc000000NO",
267 ll_2 = "c0000000TO",
268 lwc1_2 = "c4000000HO", 263 lwc1_2 = "c4000000HO",
269 pref_2 = "cc000000NO",
270 ldc1_2 = "d4000000HO", 264 ldc1_2 = "d4000000HO",
271 sc_2 = "e0000000TO", 265 ld_2 = mips64 and "dc000000TO",
272 swc1_2 = "e4000000HO", 266 swc1_2 = "e4000000HO",
273 sdc1_2 = "f4000000HO", 267 sdc1_2 = "f4000000HO",
268 sd_2 = mips64 and "fc000000TO",
274 269
275 -- Opcode SPECIAL. 270 -- Opcode SPECIAL.
276 nop_0 = "00000000", 271 nop_0 = "00000000",
277 sll_3 = "00000000DTA", 272 sll_3 = "00000000DTA",
278 movf_2 = "00000001DS", 273 sextw_2 = "00000000DT",
279 movf_3 = "00000001DSC",
280 movt_2 = "00010001DS",
281 movt_3 = "00010001DSC",
282 srl_3 = "00000002DTA", 274 srl_3 = "00000002DTA",
283 rotr_3 = "00200002DTA", 275 rotr_3 = "00200002DTA",
284 sra_3 = "00000003DTA", 276 sra_3 = "00000003DTA",
285 sllv_3 = "00000004DTS", 277 sllv_3 = "00000004DTS",
286 srlv_3 = "00000006DTS", 278 srlv_3 = "00000006DTS",
287 rotrv_3 = "00000046DTS", 279 rotrv_3 = "00000046DTS",
280 drotrv_3 = mips64 and "00000056DTS",
288 srav_3 = "00000007DTS", 281 srav_3 = "00000007DTS",
289 jr_1 = "00000008S",
290 jalr_1 = "0000f809S", 282 jalr_1 = "0000f809S",
291 jalr_2 = "00000009DS", 283 jalr_2 = "00000009DS",
292 movz_3 = "0000000aDST",
293 movn_3 = "0000000bDST",
294 syscall_0 = "0000000c", 284 syscall_0 = "0000000c",
295 syscall_1 = "0000000cY", 285 syscall_1 = "0000000cY",
296 break_0 = "0000000d", 286 break_0 = "0000000d",
297 break_1 = "0000000dY", 287 break_1 = "0000000dY",
298 sync_0 = "0000000f", 288 sync_0 = "0000000f",
299 mfhi_1 = "00000010D", 289 dsllv_3 = mips64 and "00000014DTS",
300 mthi_1 = "00000011S", 290 dsrlv_3 = mips64 and "00000016DTS",
301 mflo_1 = "00000012D", 291 dsrav_3 = mips64 and "00000017DTS",
302 mtlo_1 = "00000013S",
303 mult_2 = "00000018ST",
304 multu_2 = "00000019ST",
305 div_2 = "0000001aST",
306 divu_2 = "0000001bST",
307 add_3 = "00000020DST", 292 add_3 = "00000020DST",
308 move_2 = "00000021DS", 293 move_2 = mips64 and "00000025DS" or "00000021DS",
309 addu_3 = "00000021DST", 294 addu_3 = "00000021DST",
310 sub_3 = "00000022DST", 295 sub_3 = "00000022DST",
311 negu_2 = "00000023DT", 296 negu_2 = mips64 and "0000002fDT" or "00000023DT",
312 subu_3 = "00000023DST", 297 subu_3 = "00000023DST",
313 and_3 = "00000024DST", 298 and_3 = "00000024DST",
314 or_3 = "00000025DST", 299 or_3 = "00000025DST",
@@ -317,6 +302,10 @@ local map_op = {
317 nor_3 = "00000027DST", 302 nor_3 = "00000027DST",
318 slt_3 = "0000002aDST", 303 slt_3 = "0000002aDST",
319 sltu_3 = "0000002bDST", 304 sltu_3 = "0000002bDST",
305 dadd_3 = mips64 and "0000002cDST",
306 daddu_3 = mips64 and "0000002dDST",
307 dsub_3 = mips64 and "0000002eDST",
308 dsubu_3 = mips64 and "0000002fDST",
320 tge_2 = "00000030ST", 309 tge_2 = "00000030ST",
321 tge_3 = "00000030STZ", 310 tge_3 = "00000030STZ",
322 tgeu_2 = "00000031ST", 311 tgeu_2 = "00000031ST",
@@ -329,40 +318,36 @@ local map_op = {
329 teq_3 = "00000034STZ", 318 teq_3 = "00000034STZ",
330 tne_2 = "00000036ST", 319 tne_2 = "00000036ST",
331 tne_3 = "00000036STZ", 320 tne_3 = "00000036STZ",
321 dsll_3 = mips64 and "00000038DTa",
322 dsrl_3 = mips64 and "0000003aDTa",
323 drotr_3 = mips64 and "0020003aDTa",
324 dsra_3 = mips64 and "0000003bDTa",
325 dsll32_3 = mips64 and "0000003cDTA",
326 dsrl32_3 = mips64 and "0000003eDTA",
327 drotr32_3 = mips64 and "0020003eDTA",
328 dsra32_3 = mips64 and "0000003fDTA",
332 329
333 -- Opcode REGIMM. 330 -- Opcode REGIMM.
334 bltz_2 = "04000000SB", 331 bltz_2 = "04000000SB",
335 bgez_2 = "04010000SB", 332 bgez_2 = "04010000SB",
336 bltzl_2 = "04020000SB", 333 bltzl_2 = "04020000SB",
337 bgezl_2 = "04030000SB", 334 bgezl_2 = "04030000SB",
338 tgei_2 = "04080000SI",
339 tgeiu_2 = "04090000SI",
340 tlti_2 = "040a0000SI",
341 tltiu_2 = "040b0000SI",
342 teqi_2 = "040c0000SI",
343 tnei_2 = "040e0000SI",
344 bltzal_2 = "04100000SB",
345 bal_1 = "04110000B", 335 bal_1 = "04110000B",
346 bgezal_2 = "04110000SB",
347 bltzall_2 = "04120000SB",
348 bgezall_2 = "04130000SB",
349 synci_1 = "041f0000O", 336 synci_1 = "041f0000O",
350 337
351 -- Opcode SPECIAL2.
352 madd_2 = "70000000ST",
353 maddu_2 = "70000001ST",
354 mul_3 = "70000002DST",
355 msub_2 = "70000004ST",
356 msubu_2 = "70000005ST",
357 clz_2 = "70000020DS=",
358 clo_2 = "70000021DS=",
359 sdbbp_0 = "7000003f",
360 sdbbp_1 = "7000003fY",
361
362 -- Opcode SPECIAL3. 338 -- Opcode SPECIAL3.
363 ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1 339 ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1
340 dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32
341 dextu_4 = mips64 and "7c000002TSAM", -- Args: pos-32 | size-1
342 dext_4 = mips64 and "7c000003TSAM", -- Args: pos | size-1
343 zextw_2 = mips64 and "7c00f803TS",
364 ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1 344 ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1
345 dinsm_4 = mips64 and "7c000005TSAM", -- Args: pos | pos+size-33
346 dinsu_4 = mips64 and "7c000006TSAM", -- Args: pos-32 | pos+size-33
347 dins_4 = mips64 and "7c000007TSAM", -- Args: pos | pos+size-1
365 wsbh_2 = "7c0000a0DT", 348 wsbh_2 = "7c0000a0DT",
349 dsbh_2 = mips64 and "7c0000a4DT",
350 dshd_2 = mips64 and "7c000164DT",
366 seb_2 = "7c000420DT", 351 seb_2 = "7c000420DT",
367 seh_2 = "7c000620DT", 352 seh_2 = "7c000620DT",
368 rdhwr_2 = "7c00003bTD", 353 rdhwr_2 = "7c00003bTD",
@@ -370,8 +355,12 @@ local map_op = {
370 -- Opcode COP0. 355 -- Opcode COP0.
371 mfc0_2 = "40000000TD", 356 mfc0_2 = "40000000TD",
372 mfc0_3 = "40000000TDW", 357 mfc0_3 = "40000000TDW",
358 dmfc0_2 = mips64 and "40200000TD",
359 dmfc0_3 = mips64 and "40200000TDW",
373 mtc0_2 = "40800000TD", 360 mtc0_2 = "40800000TD",
374 mtc0_3 = "40800000TDW", 361 mtc0_3 = "40800000TDW",
362 dmtc0_2 = mips64 and "40a00000TD",
363 dmtc0_3 = mips64 and "40a00000TDW",
375 rdpgpr_2 = "41400000DT", 364 rdpgpr_2 = "41400000DT",
376 di_0 = "41606000", 365 di_0 = "41606000",
377 di_1 = "41606000T", 366 di_1 = "41606000T",
@@ -388,21 +377,14 @@ local map_op = {
388 377
389 -- Opcode COP1. 378 -- Opcode COP1.
390 mfc1_2 = "44000000TG", 379 mfc1_2 = "44000000TG",
380 dmfc1_2 = mips64 and "44200000TG",
391 cfc1_2 = "44400000TG", 381 cfc1_2 = "44400000TG",
392 mfhc1_2 = "44600000TG", 382 mfhc1_2 = "44600000TG",
393 mtc1_2 = "44800000TG", 383 mtc1_2 = "44800000TG",
384 dmtc1_2 = mips64 and "44a00000TG",
394 ctc1_2 = "44c00000TG", 385 ctc1_2 = "44c00000TG",
395 mthc1_2 = "44e00000TG", 386 mthc1_2 = "44e00000TG",
396 387
397 bc1f_1 = "45000000B",
398 bc1f_2 = "45000000CB",
399 bc1t_1 = "45010000B",
400 bc1t_2 = "45010000CB",
401 bc1fl_1 = "45020000B",
402 bc1fl_2 = "45020000CB",
403 bc1tl_1 = "45030000B",
404 bc1tl_2 = "45030000CB",
405
406 ["add.s_3"] = "46000000FGH", 388 ["add.s_3"] = "46000000FGH",
407 ["sub.s_3"] = "46000001FGH", 389 ["sub.s_3"] = "46000001FGH",
408 ["mul.s_3"] = "46000002FGH", 390 ["mul.s_3"] = "46000002FGH",
@@ -419,51 +401,11 @@ local map_op = {
419 ["trunc.w.s_2"] = "4600000dFG", 401 ["trunc.w.s_2"] = "4600000dFG",
420 ["ceil.w.s_2"] = "4600000eFG", 402 ["ceil.w.s_2"] = "4600000eFG",
421 ["floor.w.s_2"] = "4600000fFG", 403 ["floor.w.s_2"] = "4600000fFG",
422 ["movf.s_2"] = "46000011FG",
423 ["movf.s_3"] = "46000011FGC",
424 ["movt.s_2"] = "46010011FG",
425 ["movt.s_3"] = "46010011FGC",
426 ["movz.s_3"] = "46000012FGT",
427 ["movn.s_3"] = "46000013FGT",
428 ["recip.s_2"] = "46000015FG", 404 ["recip.s_2"] = "46000015FG",
429 ["rsqrt.s_2"] = "46000016FG", 405 ["rsqrt.s_2"] = "46000016FG",
430 ["cvt.d.s_2"] = "46000021FG", 406 ["cvt.d.s_2"] = "46000021FG",
431 ["cvt.w.s_2"] = "46000024FG", 407 ["cvt.w.s_2"] = "46000024FG",
432 ["cvt.l.s_2"] = "46000025FG", 408 ["cvt.l.s_2"] = "46000025FG",
433 ["cvt.ps.s_3"] = "46000026FGH",
434 ["c.f.s_2"] = "46000030GH",
435 ["c.f.s_3"] = "46000030VGH",
436 ["c.un.s_2"] = "46000031GH",
437 ["c.un.s_3"] = "46000031VGH",
438 ["c.eq.s_2"] = "46000032GH",
439 ["c.eq.s_3"] = "46000032VGH",
440 ["c.ueq.s_2"] = "46000033GH",
441 ["c.ueq.s_3"] = "46000033VGH",
442 ["c.olt.s_2"] = "46000034GH",
443 ["c.olt.s_3"] = "46000034VGH",
444 ["c.ult.s_2"] = "46000035GH",
445 ["c.ult.s_3"] = "46000035VGH",
446 ["c.ole.s_2"] = "46000036GH",
447 ["c.ole.s_3"] = "46000036VGH",
448 ["c.ule.s_2"] = "46000037GH",
449 ["c.ule.s_3"] = "46000037VGH",
450 ["c.sf.s_2"] = "46000038GH",
451 ["c.sf.s_3"] = "46000038VGH",
452 ["c.ngle.s_2"] = "46000039GH",
453 ["c.ngle.s_3"] = "46000039VGH",
454 ["c.seq.s_2"] = "4600003aGH",
455 ["c.seq.s_3"] = "4600003aVGH",
456 ["c.ngl.s_2"] = "4600003bGH",
457 ["c.ngl.s_3"] = "4600003bVGH",
458 ["c.lt.s_2"] = "4600003cGH",
459 ["c.lt.s_3"] = "4600003cVGH",
460 ["c.nge.s_2"] = "4600003dGH",
461 ["c.nge.s_3"] = "4600003dVGH",
462 ["c.le.s_2"] = "4600003eGH",
463 ["c.le.s_3"] = "4600003eVGH",
464 ["c.ngt.s_2"] = "4600003fGH",
465 ["c.ngt.s_3"] = "4600003fVGH",
466
467 ["add.d_3"] = "46200000FGH", 409 ["add.d_3"] = "46200000FGH",
468 ["sub.d_3"] = "46200001FGH", 410 ["sub.d_3"] = "46200001FGH",
469 ["mul.d_3"] = "46200002FGH", 411 ["mul.d_3"] = "46200002FGH",
@@ -480,130 +422,410 @@ local map_op = {
480 ["trunc.w.d_2"] = "4620000dFG", 422 ["trunc.w.d_2"] = "4620000dFG",
481 ["ceil.w.d_2"] = "4620000eFG", 423 ["ceil.w.d_2"] = "4620000eFG",
482 ["floor.w.d_2"] = "4620000fFG", 424 ["floor.w.d_2"] = "4620000fFG",
483 ["movf.d_2"] = "46200011FG",
484 ["movf.d_3"] = "46200011FGC",
485 ["movt.d_2"] = "46210011FG",
486 ["movt.d_3"] = "46210011FGC",
487 ["movz.d_3"] = "46200012FGT",
488 ["movn.d_3"] = "46200013FGT",
489 ["recip.d_2"] = "46200015FG", 425 ["recip.d_2"] = "46200015FG",
490 ["rsqrt.d_2"] = "46200016FG", 426 ["rsqrt.d_2"] = "46200016FG",
491 ["cvt.s.d_2"] = "46200020FG", 427 ["cvt.s.d_2"] = "46200020FG",
492 ["cvt.w.d_2"] = "46200024FG", 428 ["cvt.w.d_2"] = "46200024FG",
493 ["cvt.l.d_2"] = "46200025FG", 429 ["cvt.l.d_2"] = "46200025FG",
494 ["c.f.d_2"] = "46200030GH",
495 ["c.f.d_3"] = "46200030VGH",
496 ["c.un.d_2"] = "46200031GH",
497 ["c.un.d_3"] = "46200031VGH",
498 ["c.eq.d_2"] = "46200032GH",
499 ["c.eq.d_3"] = "46200032VGH",
500 ["c.ueq.d_2"] = "46200033GH",
501 ["c.ueq.d_3"] = "46200033VGH",
502 ["c.olt.d_2"] = "46200034GH",
503 ["c.olt.d_3"] = "46200034VGH",
504 ["c.ult.d_2"] = "46200035GH",
505 ["c.ult.d_3"] = "46200035VGH",
506 ["c.ole.d_2"] = "46200036GH",
507 ["c.ole.d_3"] = "46200036VGH",
508 ["c.ule.d_2"] = "46200037GH",
509 ["c.ule.d_3"] = "46200037VGH",
510 ["c.sf.d_2"] = "46200038GH",
511 ["c.sf.d_3"] = "46200038VGH",
512 ["c.ngle.d_2"] = "46200039GH",
513 ["c.ngle.d_3"] = "46200039VGH",
514 ["c.seq.d_2"] = "4620003aGH",
515 ["c.seq.d_3"] = "4620003aVGH",
516 ["c.ngl.d_2"] = "4620003bGH",
517 ["c.ngl.d_3"] = "4620003bVGH",
518 ["c.lt.d_2"] = "4620003cGH",
519 ["c.lt.d_3"] = "4620003cVGH",
520 ["c.nge.d_2"] = "4620003dGH",
521 ["c.nge.d_3"] = "4620003dVGH",
522 ["c.le.d_2"] = "4620003eGH",
523 ["c.le.d_3"] = "4620003eVGH",
524 ["c.ngt.d_2"] = "4620003fGH",
525 ["c.ngt.d_3"] = "4620003fVGH",
526
527 ["add.ps_3"] = "46c00000FGH",
528 ["sub.ps_3"] = "46c00001FGH",
529 ["mul.ps_3"] = "46c00002FGH",
530 ["abs.ps_2"] = "46c00005FG",
531 ["mov.ps_2"] = "46c00006FG",
532 ["neg.ps_2"] = "46c00007FG",
533 ["movf.ps_2"] = "46c00011FG",
534 ["movf.ps_3"] = "46c00011FGC",
535 ["movt.ps_2"] = "46c10011FG",
536 ["movt.ps_3"] = "46c10011FGC",
537 ["movz.ps_3"] = "46c00012FGT",
538 ["movn.ps_3"] = "46c00013FGT",
539 ["cvt.s.pu_2"] = "46c00020FG",
540 ["cvt.s.pl_2"] = "46c00028FG",
541 ["pll.ps_3"] = "46c0002cFGH",
542 ["plu.ps_3"] = "46c0002dFGH",
543 ["pul.ps_3"] = "46c0002eFGH",
544 ["puu.ps_3"] = "46c0002fFGH",
545 ["c.f.ps_2"] = "46c00030GH",
546 ["c.f.ps_3"] = "46c00030VGH",
547 ["c.un.ps_2"] = "46c00031GH",
548 ["c.un.ps_3"] = "46c00031VGH",
549 ["c.eq.ps_2"] = "46c00032GH",
550 ["c.eq.ps_3"] = "46c00032VGH",
551 ["c.ueq.ps_2"] = "46c00033GH",
552 ["c.ueq.ps_3"] = "46c00033VGH",
553 ["c.olt.ps_2"] = "46c00034GH",
554 ["c.olt.ps_3"] = "46c00034VGH",
555 ["c.ult.ps_2"] = "46c00035GH",
556 ["c.ult.ps_3"] = "46c00035VGH",
557 ["c.ole.ps_2"] = "46c00036GH",
558 ["c.ole.ps_3"] = "46c00036VGH",
559 ["c.ule.ps_2"] = "46c00037GH",
560 ["c.ule.ps_3"] = "46c00037VGH",
561 ["c.sf.ps_2"] = "46c00038GH",
562 ["c.sf.ps_3"] = "46c00038VGH",
563 ["c.ngle.ps_2"] = "46c00039GH",
564 ["c.ngle.ps_3"] = "46c00039VGH",
565 ["c.seq.ps_2"] = "46c0003aGH",
566 ["c.seq.ps_3"] = "46c0003aVGH",
567 ["c.ngl.ps_2"] = "46c0003bGH",
568 ["c.ngl.ps_3"] = "46c0003bVGH",
569 ["c.lt.ps_2"] = "46c0003cGH",
570 ["c.lt.ps_3"] = "46c0003cVGH",
571 ["c.nge.ps_2"] = "46c0003dGH",
572 ["c.nge.ps_3"] = "46c0003dVGH",
573 ["c.le.ps_2"] = "46c0003eGH",
574 ["c.le.ps_3"] = "46c0003eVGH",
575 ["c.ngt.ps_2"] = "46c0003fGH",
576 ["c.ngt.ps_3"] = "46c0003fVGH",
577
578 ["cvt.s.w_2"] = "46800020FG", 430 ["cvt.s.w_2"] = "46800020FG",
579 ["cvt.d.w_2"] = "46800021FG", 431 ["cvt.d.w_2"] = "46800021FG",
580
581 ["cvt.s.l_2"] = "46a00020FG", 432 ["cvt.s.l_2"] = "46a00020FG",
582 ["cvt.d.l_2"] = "46a00021FG", 433 ["cvt.d.l_2"] = "46a00021FG",
583
584 -- Opcode COP1X.
585 lwxc1_2 = "4c000000FX",
586 ldxc1_2 = "4c000001FX",
587 luxc1_2 = "4c000005FX",
588 swxc1_2 = "4c000008FX",
589 sdxc1_2 = "4c000009FX",
590 suxc1_2 = "4c00000dFX",
591 prefx_2 = "4c00000fMX",
592 ["alnv.ps_4"] = "4c00001eFGHS",
593 ["madd.s_4"] = "4c000020FRGH",
594 ["madd.d_4"] = "4c000021FRGH",
595 ["madd.ps_4"] = "4c000026FRGH",
596 ["msub.s_4"] = "4c000028FRGH",
597 ["msub.d_4"] = "4c000029FRGH",
598 ["msub.ps_4"] = "4c00002eFRGH",
599 ["nmadd.s_4"] = "4c000030FRGH",
600 ["nmadd.d_4"] = "4c000031FRGH",
601 ["nmadd.ps_4"] = "4c000036FRGH",
602 ["nmsub.s_4"] = "4c000038FRGH",
603 ["nmsub.d_4"] = "4c000039FRGH",
604 ["nmsub.ps_4"] = "4c00003eFRGH",
605} 434}
606 435
436if mipsr6 then -- Instructions added with MIPSR6.
437
438 for k,v in pairs({
439
440 -- Add immediate to upper bits.
441 aui_3 = "3c000000TSI",
442 daui_3 = mips64 and "74000000TSI",
443 dahi_2 = mips64 and "04060000SI",
444 dati_2 = mips64 and "041e0000SI",
445
446 -- TODO: addiupc, auipc, aluipc, lwpc, lwupc, ldpc.
447
448 -- Compact branches.
449 blezalc_2 = "18000000TB", -- rt != 0.
450 bgezalc_2 = "18000000T=SB", -- rt != 0.
451 bgtzalc_2 = "1c000000TB", -- rt != 0.
452 bltzalc_2 = "1c000000T=SB", -- rt != 0.
453
454 blezc_2 = "58000000TB", -- rt != 0.
455 bgezc_2 = "58000000T=SB", -- rt != 0.
456 bgec_3 = "58000000STB", -- rs != rt.
457 blec_3 = "58000000TSB", -- rt != rs.
458
459 bgtzc_2 = "5c000000TB", -- rt != 0.
460 bltzc_2 = "5c000000T=SB", -- rt != 0.
461 bltc_3 = "5c000000STB", -- rs != rt.
462 bgtc_3 = "5c000000TSB", -- rt != rs.
463
464 bgeuc_3 = "18000000STB", -- rs != rt.
465 bleuc_3 = "18000000TSB", -- rt != rs.
466 bltuc_3 = "1c000000STB", -- rs != rt.
467 bgtuc_3 = "1c000000TSB", -- rt != rs.
468
469 beqzalc_2 = "20000000TB", -- rt != 0.
470 bnezalc_2 = "60000000TB", -- rt != 0.
471 beqc_3 = "20000000STB", -- rs < rt.
472 bnec_3 = "60000000STB", -- rs < rt.
473 bovc_3 = "20000000STB", -- rs >= rt.
474 bnvc_3 = "60000000STB", -- rs >= rt.
475
476 beqzc_2 = "d8000000SK", -- rs != 0.
477 bnezc_2 = "f8000000SK", -- rs != 0.
478 jic_2 = "d8000000TI",
479 jialc_2 = "f8000000TI",
480 bc_1 = "c8000000L",
481 balc_1 = "e8000000L",
482
483 -- Opcode SPECIAL.
484 jr_1 = "00000009S",
485 sdbbp_0 = "0000000e",
486 sdbbp_1 = "0000000eY",
487 lsa_4 = "00000005DSTA",
488 dlsa_4 = mips64 and "00000015DSTA",
489 seleqz_3 = "00000035DST",
490 selnez_3 = "00000037DST",
491 clz_2 = "00000050DS",
492 clo_2 = "00000051DS",
493 dclz_2 = mips64 and "00000052DS",
494 dclo_2 = mips64 and "00000053DS",
495 mul_3 = "00000098DST",
496 muh_3 = "000000d8DST",
497 mulu_3 = "00000099DST",
498 muhu_3 = "000000d9DST",
499 div_3 = "0000009aDST",
500 mod_3 = "000000daDST",
501 divu_3 = "0000009bDST",
502 modu_3 = "000000dbDST",
503 dmul_3 = mips64 and "0000009cDST",
504 dmuh_3 = mips64 and "000000dcDST",
505 dmulu_3 = mips64 and "0000009dDST",
506 dmuhu_3 = mips64 and "000000ddDST",
507 ddiv_3 = mips64 and "0000009eDST",
508 dmod_3 = mips64 and "000000deDST",
509 ddivu_3 = mips64 and "0000009fDST",
510 dmodu_3 = mips64 and "000000dfDST",
511
512 -- Opcode SPECIAL3.
513 align_4 = "7c000220DSTA",
514 dalign_4 = mips64 and "7c000224DSTA",
515 bitswap_2 = "7c000020DT",
516 dbitswap_2 = mips64 and "7c000024DT",
517
518 -- Opcode COP1.
519 bc1eqz_2 = "45200000HB",
520 bc1nez_2 = "45a00000HB",
521
522 ["sel.s_3"] = "46000010FGH",
523 ["seleqz.s_3"] = "46000014FGH",
524 ["selnez.s_3"] = "46000017FGH",
525 ["maddf.s_3"] = "46000018FGH",
526 ["msubf.s_3"] = "46000019FGH",
527 ["rint.s_2"] = "4600001aFG",
528 ["class.s_2"] = "4600001bFG",
529 ["min.s_3"] = "4600001cFGH",
530 ["mina.s_3"] = "4600001dFGH",
531 ["max.s_3"] = "4600001eFGH",
532 ["maxa.s_3"] = "4600001fFGH",
533 ["cmp.af.s_3"] = "46800000FGH",
534 ["cmp.un.s_3"] = "46800001FGH",
535 ["cmp.or.s_3"] = "46800011FGH",
536 ["cmp.eq.s_3"] = "46800002FGH",
537 ["cmp.une.s_3"] = "46800012FGH",
538 ["cmp.ueq.s_3"] = "46800003FGH",
539 ["cmp.ne.s_3"] = "46800013FGH",
540 ["cmp.lt.s_3"] = "46800004FGH",
541 ["cmp.ult.s_3"] = "46800005FGH",
542 ["cmp.le.s_3"] = "46800006FGH",
543 ["cmp.ule.s_3"] = "46800007FGH",
544 ["cmp.saf.s_3"] = "46800008FGH",
545 ["cmp.sun.s_3"] = "46800009FGH",
546 ["cmp.sor.s_3"] = "46800019FGH",
547 ["cmp.seq.s_3"] = "4680000aFGH",
548 ["cmp.sune.s_3"] = "4680001aFGH",
549 ["cmp.sueq.s_3"] = "4680000bFGH",
550 ["cmp.sne.s_3"] = "4680001bFGH",
551 ["cmp.slt.s_3"] = "4680000cFGH",
552 ["cmp.sult.s_3"] = "4680000dFGH",
553 ["cmp.sle.s_3"] = "4680000eFGH",
554 ["cmp.sule.s_3"] = "4680000fFGH",
555
556 ["sel.d_3"] = "46200010FGH",
557 ["seleqz.d_3"] = "46200014FGH",
558 ["selnez.d_3"] = "46200017FGH",
559 ["maddf.d_3"] = "46200018FGH",
560 ["msubf.d_3"] = "46200019FGH",
561 ["rint.d_2"] = "4620001aFG",
562 ["class.d_2"] = "4620001bFG",
563 ["min.d_3"] = "4620001cFGH",
564 ["mina.d_3"] = "4620001dFGH",
565 ["max.d_3"] = "4620001eFGH",
566 ["maxa.d_3"] = "4620001fFGH",
567 ["cmp.af.d_3"] = "46a00000FGH",
568 ["cmp.un.d_3"] = "46a00001FGH",
569 ["cmp.or.d_3"] = "46a00011FGH",
570 ["cmp.eq.d_3"] = "46a00002FGH",
571 ["cmp.une.d_3"] = "46a00012FGH",
572 ["cmp.ueq.d_3"] = "46a00003FGH",
573 ["cmp.ne.d_3"] = "46a00013FGH",
574 ["cmp.lt.d_3"] = "46a00004FGH",
575 ["cmp.ult.d_3"] = "46a00005FGH",
576 ["cmp.le.d_3"] = "46a00006FGH",
577 ["cmp.ule.d_3"] = "46a00007FGH",
578 ["cmp.saf.d_3"] = "46a00008FGH",
579 ["cmp.sun.d_3"] = "46a00009FGH",
580 ["cmp.sor.d_3"] = "46a00019FGH",
581 ["cmp.seq.d_3"] = "46a0000aFGH",
582 ["cmp.sune.d_3"] = "46a0001aFGH",
583 ["cmp.sueq.d_3"] = "46a0000bFGH",
584 ["cmp.sne.d_3"] = "46a0001bFGH",
585 ["cmp.slt.d_3"] = "46a0000cFGH",
586 ["cmp.sult.d_3"] = "46a0000dFGH",
587 ["cmp.sle.d_3"] = "46a0000eFGH",
588 ["cmp.sule.d_3"] = "46a0000fFGH",
589
590 }) do map_op[k] = v end
591
592else -- Instructions removed by MIPSR6.
593
594 for k,v in pairs({
595 -- Traps, don't use.
596 addi_3 = "20000000TSI",
597 daddi_3 = mips64 and "60000000TSI",
598
599 -- Branch on likely, don't use.
600 beqzl_2 = "50000000SB",
601 beql_3 = "50000000STB",
602 bnezl_2 = "54000000SB",
603 bnel_3 = "54000000STB",
604 blezl_2 = "58000000SB",
605 bgtzl_2 = "5c000000SB",
606
607 lwl_2 = "88000000TO",
608 lwr_2 = "98000000TO",
609 swl_2 = "a8000000TO",
610 sdl_2 = mips64 and "b0000000TO",
611 sdr_2 = mips64 and "b1000000TO",
612 swr_2 = "b8000000TO",
613 cache_2 = "bc000000NO",
614 ll_2 = "c0000000TO",
615 pref_2 = "cc000000NO",
616 sc_2 = "e0000000TO",
617 scd_2 = mips64 and "f0000000TO",
618
619 -- Opcode SPECIAL.
620 movf_2 = "00000001DS",
621 movf_3 = "00000001DSC",
622 movt_2 = "00010001DS",
623 movt_3 = "00010001DSC",
624 jr_1 = "00000008S",
625 movz_3 = "0000000aDST",
626 movn_3 = "0000000bDST",
627 mfhi_1 = "00000010D",
628 mthi_1 = "00000011S",
629 mflo_1 = "00000012D",
630 mtlo_1 = "00000013S",
631 mult_2 = "00000018ST",
632 multu_2 = "00000019ST",
633 div_3 = "0000001aST",
634 divu_3 = "0000001bST",
635 ddiv_3 = mips64 and "0000001eST",
636 ddivu_3 = mips64 and "0000001fST",
637 dmult_2 = mips64 and "0000001cST",
638 dmultu_2 = mips64 and "0000001dST",
639
640 -- Opcode REGIMM.
641 tgei_2 = "04080000SI",
642 tgeiu_2 = "04090000SI",
643 tlti_2 = "040a0000SI",
644 tltiu_2 = "040b0000SI",
645 teqi_2 = "040c0000SI",
646 tnei_2 = "040e0000SI",
647 bltzal_2 = "04100000SB",
648 bgezal_2 = "04110000SB",
649 bltzall_2 = "04120000SB",
650 bgezall_2 = "04130000SB",
651
652 -- Opcode SPECIAL2.
653 madd_2 = "70000000ST",
654 maddu_2 = "70000001ST",
655 mul_3 = "70000002DST",
656 msub_2 = "70000004ST",
657 msubu_2 = "70000005ST",
658 clz_2 = "70000020D=TS",
659 clo_2 = "70000021D=TS",
660 dclz_2 = mips64 and "70000024D=TS",
661 dclo_2 = mips64 and "70000025D=TS",
662 sdbbp_0 = "7000003f",
663 sdbbp_1 = "7000003fY",
664
665 -- Opcode COP1.
666 bc1f_1 = "45000000B",
667 bc1f_2 = "45000000CB",
668 bc1t_1 = "45010000B",
669 bc1t_2 = "45010000CB",
670 bc1fl_1 = "45020000B",
671 bc1fl_2 = "45020000CB",
672 bc1tl_1 = "45030000B",
673 bc1tl_2 = "45030000CB",
674
675 ["movf.s_2"] = "46000011FG",
676 ["movf.s_3"] = "46000011FGC",
677 ["movt.s_2"] = "46010011FG",
678 ["movt.s_3"] = "46010011FGC",
679 ["movz.s_3"] = "46000012FGT",
680 ["movn.s_3"] = "46000013FGT",
681 ["cvt.ps.s_3"] = "46000026FGH",
682 ["c.f.s_2"] = "46000030GH",
683 ["c.f.s_3"] = "46000030VGH",
684 ["c.un.s_2"] = "46000031GH",
685 ["c.un.s_3"] = "46000031VGH",
686 ["c.eq.s_2"] = "46000032GH",
687 ["c.eq.s_3"] = "46000032VGH",
688 ["c.ueq.s_2"] = "46000033GH",
689 ["c.ueq.s_3"] = "46000033VGH",
690 ["c.olt.s_2"] = "46000034GH",
691 ["c.olt.s_3"] = "46000034VGH",
692 ["c.ult.s_2"] = "46000035GH",
693 ["c.ult.s_3"] = "46000035VGH",
694 ["c.ole.s_2"] = "46000036GH",
695 ["c.ole.s_3"] = "46000036VGH",
696 ["c.ule.s_2"] = "46000037GH",
697 ["c.ule.s_3"] = "46000037VGH",
698 ["c.sf.s_2"] = "46000038GH",
699 ["c.sf.s_3"] = "46000038VGH",
700 ["c.ngle.s_2"] = "46000039GH",
701 ["c.ngle.s_3"] = "46000039VGH",
702 ["c.seq.s_2"] = "4600003aGH",
703 ["c.seq.s_3"] = "4600003aVGH",
704 ["c.ngl.s_2"] = "4600003bGH",
705 ["c.ngl.s_3"] = "4600003bVGH",
706 ["c.lt.s_2"] = "4600003cGH",
707 ["c.lt.s_3"] = "4600003cVGH",
708 ["c.nge.s_2"] = "4600003dGH",
709 ["c.nge.s_3"] = "4600003dVGH",
710 ["c.le.s_2"] = "4600003eGH",
711 ["c.le.s_3"] = "4600003eVGH",
712 ["c.ngt.s_2"] = "4600003fGH",
713 ["c.ngt.s_3"] = "4600003fVGH",
714 ["movf.d_2"] = "46200011FG",
715 ["movf.d_3"] = "46200011FGC",
716 ["movt.d_2"] = "46210011FG",
717 ["movt.d_3"] = "46210011FGC",
718 ["movz.d_3"] = "46200012FGT",
719 ["movn.d_3"] = "46200013FGT",
720 ["c.f.d_2"] = "46200030GH",
721 ["c.f.d_3"] = "46200030VGH",
722 ["c.un.d_2"] = "46200031GH",
723 ["c.un.d_3"] = "46200031VGH",
724 ["c.eq.d_2"] = "46200032GH",
725 ["c.eq.d_3"] = "46200032VGH",
726 ["c.ueq.d_2"] = "46200033GH",
727 ["c.ueq.d_3"] = "46200033VGH",
728 ["c.olt.d_2"] = "46200034GH",
729 ["c.olt.d_3"] = "46200034VGH",
730 ["c.ult.d_2"] = "46200035GH",
731 ["c.ult.d_3"] = "46200035VGH",
732 ["c.ole.d_2"] = "46200036GH",
733 ["c.ole.d_3"] = "46200036VGH",
734 ["c.ule.d_2"] = "46200037GH",
735 ["c.ule.d_3"] = "46200037VGH",
736 ["c.sf.d_2"] = "46200038GH",
737 ["c.sf.d_3"] = "46200038VGH",
738 ["c.ngle.d_2"] = "46200039GH",
739 ["c.ngle.d_3"] = "46200039VGH",
740 ["c.seq.d_2"] = "4620003aGH",
741 ["c.seq.d_3"] = "4620003aVGH",
742 ["c.ngl.d_2"] = "4620003bGH",
743 ["c.ngl.d_3"] = "4620003bVGH",
744 ["c.lt.d_2"] = "4620003cGH",
745 ["c.lt.d_3"] = "4620003cVGH",
746 ["c.nge.d_2"] = "4620003dGH",
747 ["c.nge.d_3"] = "4620003dVGH",
748 ["c.le.d_2"] = "4620003eGH",
749 ["c.le.d_3"] = "4620003eVGH",
750 ["c.ngt.d_2"] = "4620003fGH",
751 ["c.ngt.d_3"] = "4620003fVGH",
752 ["add.ps_3"] = "46c00000FGH",
753 ["sub.ps_3"] = "46c00001FGH",
754 ["mul.ps_3"] = "46c00002FGH",
755 ["abs.ps_2"] = "46c00005FG",
756 ["mov.ps_2"] = "46c00006FG",
757 ["neg.ps_2"] = "46c00007FG",
758 ["movf.ps_2"] = "46c00011FG",
759 ["movf.ps_3"] = "46c00011FGC",
760 ["movt.ps_2"] = "46c10011FG",
761 ["movt.ps_3"] = "46c10011FGC",
762 ["movz.ps_3"] = "46c00012FGT",
763 ["movn.ps_3"] = "46c00013FGT",
764 ["cvt.s.pu_2"] = "46c00020FG",
765 ["cvt.s.pl_2"] = "46c00028FG",
766 ["pll.ps_3"] = "46c0002cFGH",
767 ["plu.ps_3"] = "46c0002dFGH",
768 ["pul.ps_3"] = "46c0002eFGH",
769 ["puu.ps_3"] = "46c0002fFGH",
770 ["c.f.ps_2"] = "46c00030GH",
771 ["c.f.ps_3"] = "46c00030VGH",
772 ["c.un.ps_2"] = "46c00031GH",
773 ["c.un.ps_3"] = "46c00031VGH",
774 ["c.eq.ps_2"] = "46c00032GH",
775 ["c.eq.ps_3"] = "46c00032VGH",
776 ["c.ueq.ps_2"] = "46c00033GH",
777 ["c.ueq.ps_3"] = "46c00033VGH",
778 ["c.olt.ps_2"] = "46c00034GH",
779 ["c.olt.ps_3"] = "46c00034VGH",
780 ["c.ult.ps_2"] = "46c00035GH",
781 ["c.ult.ps_3"] = "46c00035VGH",
782 ["c.ole.ps_2"] = "46c00036GH",
783 ["c.ole.ps_3"] = "46c00036VGH",
784 ["c.ule.ps_2"] = "46c00037GH",
785 ["c.ule.ps_3"] = "46c00037VGH",
786 ["c.sf.ps_2"] = "46c00038GH",
787 ["c.sf.ps_3"] = "46c00038VGH",
788 ["c.ngle.ps_2"] = "46c00039GH",
789 ["c.ngle.ps_3"] = "46c00039VGH",
790 ["c.seq.ps_2"] = "46c0003aGH",
791 ["c.seq.ps_3"] = "46c0003aVGH",
792 ["c.ngl.ps_2"] = "46c0003bGH",
793 ["c.ngl.ps_3"] = "46c0003bVGH",
794 ["c.lt.ps_2"] = "46c0003cGH",
795 ["c.lt.ps_3"] = "46c0003cVGH",
796 ["c.nge.ps_2"] = "46c0003dGH",
797 ["c.nge.ps_3"] = "46c0003dVGH",
798 ["c.le.ps_2"] = "46c0003eGH",
799 ["c.le.ps_3"] = "46c0003eVGH",
800 ["c.ngt.ps_2"] = "46c0003fGH",
801 ["c.ngt.ps_3"] = "46c0003fVGH",
802
803 -- Opcode COP1X.
804 lwxc1_2 = "4c000000FX",
805 ldxc1_2 = "4c000001FX",
806 luxc1_2 = "4c000005FX",
807 swxc1_2 = "4c000008FX",
808 sdxc1_2 = "4c000009FX",
809 suxc1_2 = "4c00000dFX",
810 prefx_2 = "4c00000fMX",
811 ["alnv.ps_4"] = "4c00001eFGHS",
812 ["madd.s_4"] = "4c000020FRGH",
813 ["madd.d_4"] = "4c000021FRGH",
814 ["madd.ps_4"] = "4c000026FRGH",
815 ["msub.s_4"] = "4c000028FRGH",
816 ["msub.d_4"] = "4c000029FRGH",
817 ["msub.ps_4"] = "4c00002eFRGH",
818 ["nmadd.s_4"] = "4c000030FRGH",
819 ["nmadd.d_4"] = "4c000031FRGH",
820 ["nmadd.ps_4"] = "4c000036FRGH",
821 ["nmsub.s_4"] = "4c000038FRGH",
822 ["nmsub.d_4"] = "4c000039FRGH",
823 ["nmsub.ps_4"] = "4c00003eFRGH",
824
825 }) do map_op[k] = v end
826
827end
828
607------------------------------------------------------------------------------ 829------------------------------------------------------------------------------
608 830
609local function parse_gpr(expr) 831local function parse_gpr(expr)
@@ -633,7 +855,7 @@ local function parse_fpr(expr)
633 werror("bad register name `"..expr.."'") 855 werror("bad register name `"..expr.."'")
634end 856end
635 857
636local function parse_imm(imm, bits, shift, scale, signed) 858local function parse_imm(imm, bits, shift, scale, signed, action)
637 local n = tonumber(imm) 859 local n = tonumber(imm)
638 if n then 860 if n then
639 local m = sar(n, scale) 861 local m = sar(n, scale)
@@ -651,7 +873,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
651 match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then 873 match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then
652 werror("expected immediate operand, got register") 874 werror("expected immediate operand, got register")
653 else 875 else
654 waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) 876 waction(action or "IMM",
877 (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm)
655 return 0 878 return 0
656 end 879 end
657end 880end
@@ -756,13 +979,18 @@ map_op[".template__"] = function(params, template, nparams)
756 op = op + parse_disp(params[n]); n = n + 1 979 op = op + parse_disp(params[n]); n = n + 1
757 elseif p == "X" then 980 elseif p == "X" then
758 op = op + parse_index(params[n]); n = n + 1 981 op = op + parse_index(params[n]); n = n + 1
759 elseif p == "B" or p == "J" then 982 elseif p == "B" or p == "J" or p == "K" or p == "L" then
760 local mode, m, s = parse_label(params[n], false) 983 local mode, m, s = parse_label(params[n], false)
761 if p == "B" then m = m + 2048 end 984 if p == "J" then m = m + 0xa800
985 elseif p == "K" then m = m + 0x5000
986 elseif p == "L" then m = m + 0xa000 end
762 waction("REL_"..mode, m, s, 1) 987 waction("REL_"..mode, m, s, 1)
763 n = n + 1 988 n = n + 1
764 elseif p == "A" then 989 elseif p == "A" then
765 op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1 990 op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1
991 elseif p == "a" then
992 local m = parse_imm(params[n], 6, 6, 0, false, "IMMS"); n = n + 1
993 op = op + band(m, 0x7c0) + band(shr(m, 9), 4)
766 elseif p == "M" then 994 elseif p == "M" then
767 op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1 995 op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1
768 elseif p == "N" then 996 elseif p == "N" then
@@ -778,7 +1006,7 @@ map_op[".template__"] = function(params, template, nparams)
778 elseif p == "Z" then 1006 elseif p == "Z" then
779 op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1 1007 op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1
780 elseif p == "=" then 1008 elseif p == "=" then
781 op = op + shl(band(op, 0xf800), 5) -- Copy D to T for clz, clo. 1009 n = n - 1 -- Re-use previous parameter for next template char.
782 else 1010 else
783 assert(false) 1011 assert(false)
784 end 1012 end
diff --git a/dynasm/dasm_mips64.lua b/dynasm/dasm_mips64.lua
new file mode 100644
index 00000000..8ab5d33a
--- /dev/null
+++ b/dynasm/dasm_mips64.lua
@@ -0,0 +1,12 @@
1------------------------------------------------------------------------------
2-- DynASM MIPS64 module.
3--
4-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
5-- See dynasm.lua for full copyright notice.
6------------------------------------------------------------------------------
7-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module.
8-- All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11mips64 = true -- Using a global is an ugly, but effective solution.
12return require("dasm_mips")
diff --git a/dynasm/dasm_ppc.h b/dynasm/dasm_ppc.h
index d276bea3..35264f2e 100644
--- a/dynasm/dasm_ppc.h
+++ b/dynasm/dasm_ppc.h
@@ -1,5 +1,5 @@
1/* 1/*
2** DynASM PPC encoding engine. 2** DynASM PPC/PPC64 encoding engine.
3** Copyright (C) 2005-2021 Mike Pall. All rights reserved. 3** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
4** Released under the MIT license. See dynasm.lua for full copyright notice. 4** Released under the MIT license. See dynasm.lua for full copyright notice.
5*/ 5*/
@@ -21,7 +21,7 @@ enum {
21 /* The following actions need a buffer position. */ 21 /* The following actions need a buffer position. */
22 DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, 22 DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
23 /* The following actions also have an argument. */ 23 /* The following actions also have an argument. */
24 DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, 24 DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH,
25 DASM__MAX 25 DASM__MAX
26}; 26};
27 27
@@ -244,6 +244,10 @@ void dasm_put(Dst_DECL, int start, ...)
244#endif 244#endif
245 b[pos++] = n; 245 b[pos++] = n;
246 break; 246 break;
247 case DASM_IMMSH:
248 CK((n >> 6) == 0, RANGE_I);
249 b[pos++] = n;
250 break;
247 } 251 }
248 } 252 }
249 } 253 }
@@ -273,7 +277,7 @@ int dasm_link(Dst_DECL, size_t *szp)
273 277
274 { /* Handle globals not defined in this translation unit. */ 278 { /* Handle globals not defined in this translation unit. */
275 int idx; 279 int idx;
276 for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { 280 for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
277 int n = D->lglabels[idx]; 281 int n = D->lglabels[idx];
278 /* Undefined label: Collapse rel chain and replace with marker (< 0). */ 282 /* Undefined label: Collapse rel chain and replace with marker (< 0). */
279 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } 283 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
@@ -299,7 +303,7 @@ int dasm_link(Dst_DECL, size_t *szp)
299 case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; 303 case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
300 case DASM_REL_LG: case DASM_REL_PC: pos++; break; 304 case DASM_REL_LG: case DASM_REL_PC: pos++; break;
301 case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; 305 case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
302 case DASM_IMM: pos++; break; 306 case DASM_IMM: case DASM_IMMSH: pos++; break;
303 } 307 }
304 } 308 }
305 stop: (void)0; 309 stop: (void)0;
@@ -349,7 +353,10 @@ int dasm_encode(Dst_DECL, void *buffer)
349 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; 353 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
350 break; 354 break;
351 case DASM_REL_LG: 355 case DASM_REL_LG:
352 CK(n >= 0, UNDEF_LG); 356 if (n < 0) {
357 n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp);
358 goto patchrel;
359 }
353 /* fallthrough */ 360 /* fallthrough */
354 case DASM_REL_PC: 361 case DASM_REL_PC:
355 CK(n >= 0, UNDEF_PC); 362 CK(n >= 0, UNDEF_PC);
@@ -367,6 +374,9 @@ int dasm_encode(Dst_DECL, void *buffer)
367 case DASM_IMM: 374 case DASM_IMM:
368 cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); 375 cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
369 break; 376 break;
377 case DASM_IMMSH:
378 cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32);
379 break;
370 default: *cp++ = ins; break; 380 default: *cp++ = ins; break;
371 } 381 }
372 } 382 }
diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua
index ad5f38a5..ee2afb2e 100644
--- a/dynasm/dasm_ppc.lua
+++ b/dynasm/dasm_ppc.lua
@@ -1,17 +1,19 @@
1------------------------------------------------------------------------------ 1------------------------------------------------------------------------------
2-- DynASM PPC module. 2-- DynASM PPC/PPC64 module.
3-- 3--
4-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. 4-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
5-- See dynasm.lua for full copyright notice. 5-- See dynasm.lua for full copyright notice.
6--
7-- Support for various extensions contributed by Caio Souza Oliveira.
6------------------------------------------------------------------------------ 8------------------------------------------------------------------------------
7 9
8-- Module information: 10-- Module information:
9local _info = { 11local _info = {
10 arch = "ppc", 12 arch = "ppc",
11 description = "DynASM PPC module", 13 description = "DynASM PPC module",
12 version = "1.3.0", 14 version = "1.5.0",
13 vernum = 10300, 15 vernum = 10500,
14 release = "2011-05-05", 16 release = "2021-05-02",
15 author = "Mike Pall", 17 author = "Mike Pall",
16 license = "MIT", 18 license = "MIT",
17} 19}
@@ -39,7 +41,7 @@ local wline, werror, wfatal, wwarn
39local action_names = { 41local action_names = {
40 "STOP", "SECTION", "ESC", "REL_EXT", 42 "STOP", "SECTION", "ESC", "REL_EXT",
41 "ALIGN", "REL_LG", "LABEL_LG", 43 "ALIGN", "REL_LG", "LABEL_LG",
42 "REL_PC", "LABEL_PC", "IMM", 44 "REL_PC", "LABEL_PC", "IMM", "IMMSH"
43} 45}
44 46
45-- Maximum number of section buffer positions for dasm_put(). 47-- Maximum number of section buffer positions for dasm_put().
@@ -228,8 +230,18 @@ local map_cond = {
228 230
229------------------------------------------------------------------------------ 231------------------------------------------------------------------------------
230 232
233local map_op, op_template
234
235local function op_alias(opname, f)
236 return function(params, nparams)
237 if not params then return "-> "..opname:sub(1, -3) end
238 f(params, nparams)
239 op_template(params, map_op[opname], nparams)
240 end
241end
242
231-- Template strings for PPC instructions. 243-- Template strings for PPC instructions.
232local map_op = { 244map_op = {
233 tdi_3 = "08000000ARI", 245 tdi_3 = "08000000ARI",
234 twi_3 = "0c000000ARI", 246 twi_3 = "0c000000ARI",
235 mulli_3 = "1c000000RRI", 247 mulli_3 = "1c000000RRI",
@@ -297,6 +309,250 @@ local map_op = {
297 std_2 = "f8000000RD", 309 std_2 = "f8000000RD",
298 stdu_2 = "f8000001RD", 310 stdu_2 = "f8000001RD",
299 311
312 subi_3 = op_alias("addi_3", function(p) p[3] = "-("..p[3]..")" end),
313 subis_3 = op_alias("addis_3", function(p) p[3] = "-("..p[3]..")" end),
314 subic_3 = op_alias("addic_3", function(p) p[3] = "-("..p[3]..")" end),
315 ["subic._3"] = op_alias("addic._3", function(p) p[3] = "-("..p[3]..")" end),
316
317 rotlwi_3 = op_alias("rlwinm_5", function(p)
318 p[4] = "0"; p[5] = "31"
319 end),
320 rotrwi_3 = op_alias("rlwinm_5", function(p)
321 p[3] = "32-("..p[3]..")"; p[4] = "0"; p[5] = "31"
322 end),
323 rotlw_3 = op_alias("rlwnm_5", function(p)
324 p[4] = "0"; p[5] = "31"
325 end),
326 slwi_3 = op_alias("rlwinm_5", function(p)
327 p[5] = "31-("..p[3]..")"; p[4] = "0"
328 end),
329 srwi_3 = op_alias("rlwinm_5", function(p)
330 p[4] = p[3]; p[3] = "32-("..p[3]..")"; p[5] = "31"
331 end),
332 clrlwi_3 = op_alias("rlwinm_5", function(p)
333 p[4] = p[3]; p[3] = "0"; p[5] = "31"
334 end),
335 clrrwi_3 = op_alias("rlwinm_5", function(p)
336 p[5] = "31-("..p[3]..")"; p[3] = "0"; p[4] = "0"
337 end),
338
339 -- Primary opcode 4:
340 mulhhwu_3 = "10000010RRR.",
341 machhwu_3 = "10000018RRR.",
342 mulhhw_3 = "10000050RRR.",
343 nmachhw_3 = "1000005cRRR.",
344 machhwsu_3 = "10000098RRR.",
345 machhws_3 = "100000d8RRR.",
346 nmachhws_3 = "100000dcRRR.",
347 mulchwu_3 = "10000110RRR.",
348 macchwu_3 = "10000118RRR.",
349 mulchw_3 = "10000150RRR.",
350 macchw_3 = "10000158RRR.",
351 nmacchw_3 = "1000015cRRR.",
352 macchwsu_3 = "10000198RRR.",
353 macchws_3 = "100001d8RRR.",
354 nmacchws_3 = "100001dcRRR.",
355 mullhw_3 = "10000350RRR.",
356 maclhw_3 = "10000358RRR.",
357 nmaclhw_3 = "1000035cRRR.",
358 maclhwsu_3 = "10000398RRR.",
359 maclhws_3 = "100003d8RRR.",
360 nmaclhws_3 = "100003dcRRR.",
361 machhwuo_3 = "10000418RRR.",
362 nmachhwo_3 = "1000045cRRR.",
363 machhwsuo_3 = "10000498RRR.",
364 machhwso_3 = "100004d8RRR.",
365 nmachhwso_3 = "100004dcRRR.",
366 macchwuo_3 = "10000518RRR.",
367 macchwo_3 = "10000558RRR.",
368 nmacchwo_3 = "1000055cRRR.",
369 macchwsuo_3 = "10000598RRR.",
370 macchwso_3 = "100005d8RRR.",
371 nmacchwso_3 = "100005dcRRR.",
372 maclhwo_3 = "10000758RRR.",
373 nmaclhwo_3 = "1000075cRRR.",
374 maclhwsuo_3 = "10000798RRR.",
375 maclhwso_3 = "100007d8RRR.",
376 nmaclhwso_3 = "100007dcRRR.",
377
378 vaddubm_3 = "10000000VVV",
379 vmaxub_3 = "10000002VVV",
380 vrlb_3 = "10000004VVV",
381 vcmpequb_3 = "10000006VVV",
382 vmuloub_3 = "10000008VVV",
383 vaddfp_3 = "1000000aVVV",
384 vmrghb_3 = "1000000cVVV",
385 vpkuhum_3 = "1000000eVVV",
386 vmhaddshs_4 = "10000020VVVV",
387 vmhraddshs_4 = "10000021VVVV",
388 vmladduhm_4 = "10000022VVVV",
389 vmsumubm_4 = "10000024VVVV",
390 vmsummbm_4 = "10000025VVVV",
391 vmsumuhm_4 = "10000026VVVV",
392 vmsumuhs_4 = "10000027VVVV",
393 vmsumshm_4 = "10000028VVVV",
394 vmsumshs_4 = "10000029VVVV",
395 vsel_4 = "1000002aVVVV",
396 vperm_4 = "1000002bVVVV",
397 vsldoi_4 = "1000002cVVVP",
398 vpermxor_4 = "1000002dVVVV",
399 vmaddfp_4 = "1000002eVVVV~",
400 vnmsubfp_4 = "1000002fVVVV~",
401 vaddeuqm_4 = "1000003cVVVV",
402 vaddecuq_4 = "1000003dVVVV",
403 vsubeuqm_4 = "1000003eVVVV",
404 vsubecuq_4 = "1000003fVVVV",
405 vadduhm_3 = "10000040VVV",
406 vmaxuh_3 = "10000042VVV",
407 vrlh_3 = "10000044VVV",
408 vcmpequh_3 = "10000046VVV",
409 vmulouh_3 = "10000048VVV",
410 vsubfp_3 = "1000004aVVV",
411 vmrghh_3 = "1000004cVVV",
412 vpkuwum_3 = "1000004eVVV",
413 vadduwm_3 = "10000080VVV",
414 vmaxuw_3 = "10000082VVV",
415 vrlw_3 = "10000084VVV",
416 vcmpequw_3 = "10000086VVV",
417 vmulouw_3 = "10000088VVV",
418 vmuluwm_3 = "10000089VVV",
419 vmrghw_3 = "1000008cVVV",
420 vpkuhus_3 = "1000008eVVV",
421 vaddudm_3 = "100000c0VVV",
422 vmaxud_3 = "100000c2VVV",
423 vrld_3 = "100000c4VVV",
424 vcmpeqfp_3 = "100000c6VVV",
425 vcmpequd_3 = "100000c7VVV",
426 vpkuwus_3 = "100000ceVVV",
427 vadduqm_3 = "10000100VVV",
428 vmaxsb_3 = "10000102VVV",
429 vslb_3 = "10000104VVV",
430 vmulosb_3 = "10000108VVV",
431 vrefp_2 = "1000010aV-V",
432 vmrglb_3 = "1000010cVVV",
433 vpkshus_3 = "1000010eVVV",
434 vaddcuq_3 = "10000140VVV",
435 vmaxsh_3 = "10000142VVV",
436 vslh_3 = "10000144VVV",
437 vmulosh_3 = "10000148VVV",
438 vrsqrtefp_2 = "1000014aV-V",
439 vmrglh_3 = "1000014cVVV",
440 vpkswus_3 = "1000014eVVV",
441 vaddcuw_3 = "10000180VVV",
442 vmaxsw_3 = "10000182VVV",
443 vslw_3 = "10000184VVV",
444 vmulosw_3 = "10000188VVV",
445 vexptefp_2 = "1000018aV-V",
446 vmrglw_3 = "1000018cVVV",
447 vpkshss_3 = "1000018eVVV",
448 vmaxsd_3 = "100001c2VVV",
449 vsl_3 = "100001c4VVV",
450 vcmpgefp_3 = "100001c6VVV",
451 vlogefp_2 = "100001caV-V",
452 vpkswss_3 = "100001ceVVV",
453 vadduhs_3 = "10000240VVV",
454 vminuh_3 = "10000242VVV",
455 vsrh_3 = "10000244VVV",
456 vcmpgtuh_3 = "10000246VVV",
457 vmuleuh_3 = "10000248VVV",
458 vrfiz_2 = "1000024aV-V",
459 vsplth_3 = "1000024cVV3",
460 vupkhsh_2 = "1000024eV-V",
461 vminuw_3 = "10000282VVV",
462 vminud_3 = "100002c2VVV",
463 vcmpgtud_3 = "100002c7VVV",
464 vrfim_2 = "100002caV-V",
465 vcmpgtsb_3 = "10000306VVV",
466 vcfux_3 = "1000030aVVA~",
467 vaddshs_3 = "10000340VVV",
468 vminsh_3 = "10000342VVV",
469 vsrah_3 = "10000344VVV",
470 vcmpgtsh_3 = "10000346VVV",
471 vmulesh_3 = "10000348VVV",
472 vcfsx_3 = "1000034aVVA~",
473 vspltish_2 = "1000034cVS",
474 vupkhpx_2 = "1000034eV-V",
475 vaddsws_3 = "10000380VVV",
476 vminsw_3 = "10000382VVV",
477 vsraw_3 = "10000384VVV",
478 vcmpgtsw_3 = "10000386VVV",
479 vmulesw_3 = "10000388VVV",
480 vctuxs_3 = "1000038aVVA~",
481 vspltisw_2 = "1000038cVS",
482 vminsd_3 = "100003c2VVV",
483 vsrad_3 = "100003c4VVV",
484 vcmpbfp_3 = "100003c6VVV",
485 vcmpgtsd_3 = "100003c7VVV",
486 vctsxs_3 = "100003caVVA~",
487 vupklpx_2 = "100003ceV-V",
488 vsububm_3 = "10000400VVV",
489 ["bcdadd._4"] = "10000401VVVy.",
490 vavgub_3 = "10000402VVV",
491 vand_3 = "10000404VVV",
492 ["vcmpequb._3"] = "10000406VVV",
493 vmaxfp_3 = "1000040aVVV",
494 vsubuhm_3 = "10000440VVV",
495 ["bcdsub._4"] = "10000441VVVy.",
496 vavguh_3 = "10000442VVV",
497 vandc_3 = "10000444VVV",
498 ["vcmpequh._3"] = "10000446VVV",
499 vminfp_3 = "1000044aVVV",
500 vpkudum_3 = "1000044eVVV",
501 vsubuwm_3 = "10000480VVV",
502 vavguw_3 = "10000482VVV",
503 vor_3 = "10000484VVV",
504 ["vcmpequw._3"] = "10000486VVV",
505 vpmsumw_3 = "10000488VVV",
506 ["vcmpeqfp._3"] = "100004c6VVV",
507 ["vcmpequd._3"] = "100004c7VVV",
508 vpkudus_3 = "100004ceVVV",
509 vavgsb_3 = "10000502VVV",
510 vavgsh_3 = "10000542VVV",
511 vorc_3 = "10000544VVV",
512 vbpermq_3 = "1000054cVVV",
513 vpksdus_3 = "1000054eVVV",
514 vavgsw_3 = "10000582VVV",
515 vsld_3 = "100005c4VVV",
516 ["vcmpgefp._3"] = "100005c6VVV",
517 vpksdss_3 = "100005ceVVV",
518 vsububs_3 = "10000600VVV",
519 mfvscr_1 = "10000604V--",
520 vsum4ubs_3 = "10000608VVV",
521 vsubuhs_3 = "10000640VVV",
522 mtvscr_1 = "10000644--V",
523 ["vcmpgtuh._3"] = "10000646VVV",
524 vsum4shs_3 = "10000648VVV",
525 vupkhsw_2 = "1000064eV-V",
526 vsubuws_3 = "10000680VVV",
527 vshasigmaw_4 = "10000682VVYp",
528 veqv_3 = "10000684VVV",
529 vsum2sws_3 = "10000688VVV",
530 vmrgow_3 = "1000068cVVV",
531 vshasigmad_4 = "100006c2VVYp",
532 vsrd_3 = "100006c4VVV",
533 ["vcmpgtud._3"] = "100006c7VVV",
534 vupklsw_2 = "100006ceV-V",
535 vupkslw_2 = "100006ceV-V",
536 vsubsbs_3 = "10000700VVV",
537 vclzb_2 = "10000702V-V",
538 vpopcntb_2 = "10000703V-V",
539 ["vcmpgtsb._3"] = "10000706VVV",
540 vsum4sbs_3 = "10000708VVV",
541 vsubshs_3 = "10000740VVV",
542 vclzh_2 = "10000742V-V",
543 vpopcnth_2 = "10000743V-V",
544 ["vcmpgtsh._3"] = "10000746VVV",
545 vsubsws_3 = "10000780VVV",
546 vclzw_2 = "10000782V-V",
547 vpopcntw_2 = "10000783V-V",
548 ["vcmpgtsw._3"] = "10000786VVV",
549 vsumsws_3 = "10000788VVV",
550 vmrgew_3 = "1000078cVVV",
551 vclzd_2 = "100007c2V-V",
552 vpopcntd_2 = "100007c3V-V",
553 ["vcmpbfp._3"] = "100007c6VVV",
554 ["vcmpgtsd._3"] = "100007c7VVV",
555
300 -- Primary opcode 19: 556 -- Primary opcode 19:
301 mcrf_2 = "4c000000XX", 557 mcrf_2 = "4c000000XX",
302 isync_0 = "4c00012c", 558 isync_0 = "4c00012c",
@@ -316,6 +572,8 @@ local map_op = {
316 bclrl_2 = "4c000021AA", 572 bclrl_2 = "4c000021AA",
317 bcctr_2 = "4c000420AA", 573 bcctr_2 = "4c000420AA",
318 bcctrl_2 = "4c000421AA", 574 bcctrl_2 = "4c000421AA",
575 bctar_2 = "4c000460AA",
576 bctarl_2 = "4c000461AA",
319 blr_0 = "4e800020", 577 blr_0 = "4e800020",
320 blrl_0 = "4e800021", 578 blrl_0 = "4e800021",
321 bctr_0 = "4e800420", 579 bctr_0 = "4e800420",
@@ -327,6 +585,7 @@ local map_op = {
327 cmpd_3 = "7c200000XRR", 585 cmpd_3 = "7c200000XRR",
328 cmpd_2 = "7c200000-RR", 586 cmpd_2 = "7c200000-RR",
329 tw_3 = "7c000008ARR", 587 tw_3 = "7c000008ARR",
588 lvsl_3 = "7c00000cVRR",
330 subfc_3 = "7c000010RRR.", 589 subfc_3 = "7c000010RRR.",
331 subc_3 = "7c000010RRR~.", 590 subc_3 = "7c000010RRR~.",
332 mulhdu_3 = "7c000012RRR.", 591 mulhdu_3 = "7c000012RRR.",
@@ -351,50 +610,68 @@ local map_op = {
351 cmplw_2 = "7c000040-RR", 610 cmplw_2 = "7c000040-RR",
352 cmpld_3 = "7c200040XRR", 611 cmpld_3 = "7c200040XRR",
353 cmpld_2 = "7c200040-RR", 612 cmpld_2 = "7c200040-RR",
613 lvsr_3 = "7c00004cVRR",
354 subf_3 = "7c000050RRR.", 614 subf_3 = "7c000050RRR.",
355 sub_3 = "7c000050RRR~.", 615 sub_3 = "7c000050RRR~.",
616 lbarx_3 = "7c000068RR0R",
356 ldux_3 = "7c00006aRR0R", 617 ldux_3 = "7c00006aRR0R",
357 dcbst_2 = "7c00006c-RR", 618 dcbst_2 = "7c00006c-RR",
358 lwzux_3 = "7c00006eRR0R", 619 lwzux_3 = "7c00006eRR0R",
359 cntlzd_2 = "7c000074RR~", 620 cntlzd_2 = "7c000074RR~",
360 andc_3 = "7c000078RR~R.", 621 andc_3 = "7c000078RR~R.",
361 td_3 = "7c000088ARR", 622 td_3 = "7c000088ARR",
623 lvewx_3 = "7c00008eVRR",
362 mulhd_3 = "7c000092RRR.", 624 mulhd_3 = "7c000092RRR.",
625 addg6s_3 = "7c000094RRR",
363 mulhw_3 = "7c000096RRR.", 626 mulhw_3 = "7c000096RRR.",
627 dlmzb_3 = "7c00009cRR~R.",
364 ldarx_3 = "7c0000a8RR0R", 628 ldarx_3 = "7c0000a8RR0R",
365 dcbf_2 = "7c0000ac-RR", 629 dcbf_2 = "7c0000ac-RR",
366 lbzx_3 = "7c0000aeRR0R", 630 lbzx_3 = "7c0000aeRR0R",
631 lvx_3 = "7c0000ceVRR",
367 neg_2 = "7c0000d0RR.", 632 neg_2 = "7c0000d0RR.",
633 lharx_3 = "7c0000e8RR0R",
368 lbzux_3 = "7c0000eeRR0R", 634 lbzux_3 = "7c0000eeRR0R",
369 popcntb_2 = "7c0000f4RR~", 635 popcntb_2 = "7c0000f4RR~",
370 not_2 = "7c0000f8RR~%.", 636 not_2 = "7c0000f8RR~%.",
371 nor_3 = "7c0000f8RR~R.", 637 nor_3 = "7c0000f8RR~R.",
638 stvebx_3 = "7c00010eVRR",
372 subfe_3 = "7c000110RRR.", 639 subfe_3 = "7c000110RRR.",
373 sube_3 = "7c000110RRR~.", 640 sube_3 = "7c000110RRR~.",
374 adde_3 = "7c000114RRR.", 641 adde_3 = "7c000114RRR.",
375 stdx_3 = "7c00012aRR0R", 642 stdx_3 = "7c00012aRR0R",
376 stwcx_3 = "7c00012cRR0R.", 643 ["stwcx._3"] = "7c00012dRR0R.",
377 stwx_3 = "7c00012eRR0R", 644 stwx_3 = "7c00012eRR0R",
378 prtyw_2 = "7c000134RR~", 645 prtyw_2 = "7c000134RR~",
646 stvehx_3 = "7c00014eVRR",
379 stdux_3 = "7c00016aRR0R", 647 stdux_3 = "7c00016aRR0R",
648 ["stqcx._3"] = "7c00016dR:R0R.",
380 stwux_3 = "7c00016eRR0R", 649 stwux_3 = "7c00016eRR0R",
381 prtyd_2 = "7c000174RR~", 650 prtyd_2 = "7c000174RR~",
651 stvewx_3 = "7c00018eVRR",
382 subfze_2 = "7c000190RR.", 652 subfze_2 = "7c000190RR.",
383 addze_2 = "7c000194RR.", 653 addze_2 = "7c000194RR.",
384 stdcx_3 = "7c0001acRR0R.", 654 ["stdcx._3"] = "7c0001adRR0R.",
385 stbx_3 = "7c0001aeRR0R", 655 stbx_3 = "7c0001aeRR0R",
656 stvx_3 = "7c0001ceVRR",
386 subfme_2 = "7c0001d0RR.", 657 subfme_2 = "7c0001d0RR.",
387 mulld_3 = "7c0001d2RRR.", 658 mulld_3 = "7c0001d2RRR.",
388 addme_2 = "7c0001d4RR.", 659 addme_2 = "7c0001d4RR.",
389 mullw_3 = "7c0001d6RRR.", 660 mullw_3 = "7c0001d6RRR.",
390 dcbtst_2 = "7c0001ec-RR", 661 dcbtst_2 = "7c0001ec-RR",
391 stbux_3 = "7c0001eeRR0R", 662 stbux_3 = "7c0001eeRR0R",
663 bpermd_3 = "7c0001f8RR~R",
664 lvepxl_3 = "7c00020eVRR",
392 add_3 = "7c000214RRR.", 665 add_3 = "7c000214RRR.",
666 lqarx_3 = "7c000228R:R0R",
393 dcbt_2 = "7c00022c-RR", 667 dcbt_2 = "7c00022c-RR",
394 lhzx_3 = "7c00022eRR0R", 668 lhzx_3 = "7c00022eRR0R",
669 cdtbcd_2 = "7c000234RR~",
395 eqv_3 = "7c000238RR~R.", 670 eqv_3 = "7c000238RR~R.",
671 lvepx_3 = "7c00024eVRR",
396 eciwx_3 = "7c00026cRR0R", 672 eciwx_3 = "7c00026cRR0R",
397 lhzux_3 = "7c00026eRR0R", 673 lhzux_3 = "7c00026eRR0R",
674 cbcdtd_2 = "7c000274RR~",
398 xor_3 = "7c000278RR~R.", 675 xor_3 = "7c000278RR~R.",
399 mfspefscr_1 = "7c0082a6R", 676 mfspefscr_1 = "7c0082a6R",
400 mfxer_1 = "7c0102a6R", 677 mfxer_1 = "7c0102a6R",
@@ -404,8 +681,12 @@ local map_op = {
404 lhax_3 = "7c0002aeRR0R", 681 lhax_3 = "7c0002aeRR0R",
405 mftb_1 = "7c0c42e6R", 682 mftb_1 = "7c0c42e6R",
406 mftbu_1 = "7c0d42e6R", 683 mftbu_1 = "7c0d42e6R",
684 lvxl_3 = "7c0002ceVRR",
407 lwaux_3 = "7c0002eaRR0R", 685 lwaux_3 = "7c0002eaRR0R",
408 lhaux_3 = "7c0002eeRR0R", 686 lhaux_3 = "7c0002eeRR0R",
687 popcntw_2 = "7c0002f4RR~",
688 divdeu_3 = "7c000312RRR.",
689 divweu_3 = "7c000316RRR.",
409 sthx_3 = "7c00032eRR0R", 690 sthx_3 = "7c00032eRR0R",
410 orc_3 = "7c000338RR~R.", 691 orc_3 = "7c000338RR~R.",
411 ecowx_3 = "7c00036cRR0R", 692 ecowx_3 = "7c00036cRR0R",
@@ -420,10 +701,14 @@ local map_op = {
420 mtctr_1 = "7c0903a6R", 701 mtctr_1 = "7c0903a6R",
421 dcbi_2 = "7c0003ac-RR", 702 dcbi_2 = "7c0003ac-RR",
422 nand_3 = "7c0003b8RR~R.", 703 nand_3 = "7c0003b8RR~R.",
704 dsn_2 = "7c0003c6-RR",
705 stvxl_3 = "7c0003ceVRR",
423 divd_3 = "7c0003d2RRR.", 706 divd_3 = "7c0003d2RRR.",
424 divw_3 = "7c0003d6RRR.", 707 divw_3 = "7c0003d6RRR.",
708 popcntd_2 = "7c0003f4RR~",
425 cmpb_3 = "7c0003f8RR~R.", 709 cmpb_3 = "7c0003f8RR~R.",
426 mcrxr_1 = "7c000400X", 710 mcrxr_1 = "7c000400X",
711 lbdx_3 = "7c000406RRR",
427 subfco_3 = "7c000410RRR.", 712 subfco_3 = "7c000410RRR.",
428 subco_3 = "7c000410RRR~.", 713 subco_3 = "7c000410RRR~.",
429 addco_3 = "7c000414RRR.", 714 addco_3 = "7c000414RRR.",
@@ -433,16 +718,20 @@ local map_op = {
433 lfsx_3 = "7c00042eFR0R", 718 lfsx_3 = "7c00042eFR0R",
434 srw_3 = "7c000430RR~R.", 719 srw_3 = "7c000430RR~R.",
435 srd_3 = "7c000436RR~R.", 720 srd_3 = "7c000436RR~R.",
721 lhdx_3 = "7c000446RRR",
436 subfo_3 = "7c000450RRR.", 722 subfo_3 = "7c000450RRR.",
437 subo_3 = "7c000450RRR~.", 723 subo_3 = "7c000450RRR~.",
438 lfsux_3 = "7c00046eFR0R", 724 lfsux_3 = "7c00046eFR0R",
725 lwdx_3 = "7c000486RRR",
439 lswi_3 = "7c0004aaRR0A", 726 lswi_3 = "7c0004aaRR0A",
440 sync_0 = "7c0004ac", 727 sync_0 = "7c0004ac",
441 lwsync_0 = "7c2004ac", 728 lwsync_0 = "7c2004ac",
442 ptesync_0 = "7c4004ac", 729 ptesync_0 = "7c4004ac",
443 lfdx_3 = "7c0004aeFR0R", 730 lfdx_3 = "7c0004aeFR0R",
731 lddx_3 = "7c0004c6RRR",
444 nego_2 = "7c0004d0RR.", 732 nego_2 = "7c0004d0RR.",
445 lfdux_3 = "7c0004eeFR0R", 733 lfdux_3 = "7c0004eeFR0R",
734 stbdx_3 = "7c000506RRR",
446 subfeo_3 = "7c000510RRR.", 735 subfeo_3 = "7c000510RRR.",
447 subeo_3 = "7c000510RRR~.", 736 subeo_3 = "7c000510RRR~.",
448 addeo_3 = "7c000514RRR.", 737 addeo_3 = "7c000514RRR.",
@@ -450,27 +739,42 @@ local map_op = {
450 stswx_3 = "7c00052aRR0R", 739 stswx_3 = "7c00052aRR0R",
451 stwbrx_3 = "7c00052cRR0R", 740 stwbrx_3 = "7c00052cRR0R",
452 stfsx_3 = "7c00052eFR0R", 741 stfsx_3 = "7c00052eFR0R",
742 sthdx_3 = "7c000546RRR",
743 ["stbcx._3"] = "7c00056dRRR",
453 stfsux_3 = "7c00056eFR0R", 744 stfsux_3 = "7c00056eFR0R",
745 stwdx_3 = "7c000586RRR",
454 subfzeo_2 = "7c000590RR.", 746 subfzeo_2 = "7c000590RR.",
455 addzeo_2 = "7c000594RR.", 747 addzeo_2 = "7c000594RR.",
456 stswi_3 = "7c0005aaRR0A", 748 stswi_3 = "7c0005aaRR0A",
749 ["sthcx._3"] = "7c0005adRRR",
457 stfdx_3 = "7c0005aeFR0R", 750 stfdx_3 = "7c0005aeFR0R",
751 stddx_3 = "7c0005c6RRR",
458 subfmeo_2 = "7c0005d0RR.", 752 subfmeo_2 = "7c0005d0RR.",
459 mulldo_3 = "7c0005d2RRR.", 753 mulldo_3 = "7c0005d2RRR.",
460 addmeo_2 = "7c0005d4RR.", 754 addmeo_2 = "7c0005d4RR.",
461 mullwo_3 = "7c0005d6RRR.", 755 mullwo_3 = "7c0005d6RRR.",
462 dcba_2 = "7c0005ec-RR", 756 dcba_2 = "7c0005ec-RR",
463 stfdux_3 = "7c0005eeFR0R", 757 stfdux_3 = "7c0005eeFR0R",
758 stvepxl_3 = "7c00060eVRR",
464 addo_3 = "7c000614RRR.", 759 addo_3 = "7c000614RRR.",
465 lhbrx_3 = "7c00062cRR0R", 760 lhbrx_3 = "7c00062cRR0R",
761 lfdpx_3 = "7c00062eF:RR",
466 sraw_3 = "7c000630RR~R.", 762 sraw_3 = "7c000630RR~R.",
467 srad_3 = "7c000634RR~R.", 763 srad_3 = "7c000634RR~R.",
764 lfddx_3 = "7c000646FRR",
765 stvepx_3 = "7c00064eVRR",
468 srawi_3 = "7c000670RR~A.", 766 srawi_3 = "7c000670RR~A.",
469 sradi_3 = "7c000674RR~H.", 767 sradi_3 = "7c000674RR~H.",
470 eieio_0 = "7c0006ac", 768 eieio_0 = "7c0006ac",
471 lfiwax_3 = "7c0006aeFR0R", 769 lfiwax_3 = "7c0006aeFR0R",
770 divdeuo_3 = "7c000712RRR.",
771 divweuo_3 = "7c000716RRR.",
472 sthbrx_3 = "7c00072cRR0R", 772 sthbrx_3 = "7c00072cRR0R",
773 stfdpx_3 = "7c00072eF:RR",
473 extsh_2 = "7c000734RR~.", 774 extsh_2 = "7c000734RR~.",
775 stfddx_3 = "7c000746FRR",
776 divdeo_3 = "7c000752RRR.",
777 divweo_3 = "7c000756RRR.",
474 extsb_2 = "7c000774RR~.", 778 extsb_2 = "7c000774RR~.",
475 divduo_3 = "7c000792RRR.", 779 divduo_3 = "7c000792RRR.",
476 divwou_3 = "7c000796RRR.", 780 divwou_3 = "7c000796RRR.",
@@ -481,6 +785,40 @@ local map_op = {
481 divwo_3 = "7c0007d6RRR.", 785 divwo_3 = "7c0007d6RRR.",
482 dcbz_2 = "7c0007ec-RR", 786 dcbz_2 = "7c0007ec-RR",
483 787
788 ["tbegin._1"] = "7c00051d1",
789 ["tbegin._0"] = "7c00051d",
790 ["tend._1"] = "7c00055dY",
791 ["tend._0"] = "7c00055d",
792 ["tendall._0"] = "7e00055d",
793 tcheck_1 = "7c00059cX",
794 ["tsr._1"] = "7c0005dd1",
795 ["tsuspend._0"] = "7c0005dd",
796 ["tresume._0"] = "7c2005dd",
797 ["tabortwc._3"] = "7c00061dARR",
798 ["tabortdc._3"] = "7c00065dARR",
799 ["tabortwci._3"] = "7c00069dARS",
800 ["tabortdci._3"] = "7c0006ddARS",
801 ["tabort._1"] = "7c00071d-R-",
802 ["treclaim._1"] = "7c00075d-R",
803 ["trechkpt._0"] = "7c0007dd",
804
805 lxsiwzx_3 = "7c000018QRR",
806 lxsiwax_3 = "7c000098QRR",
807 mfvsrd_2 = "7c000066-Rq",
808 mfvsrwz_2 = "7c0000e6-Rq",
809 stxsiwx_3 = "7c000118QRR",
810 mtvsrd_2 = "7c000166QR",
811 mtvsrwa_2 = "7c0001a6QR",
812 lxvdsx_3 = "7c000298QRR",
813 lxsspx_3 = "7c000418QRR",
814 lxsdx_3 = "7c000498QRR",
815 stxsspx_3 = "7c000518QRR",
816 stxsdx_3 = "7c000598QRR",
817 lxvw4x_3 = "7c000618QRR",
818 lxvd2x_3 = "7c000698QRR",
819 stxvw4x_3 = "7c000718QRR",
820 stxvd2x_3 = "7c000798QRR",
821
484 -- Primary opcode 30: 822 -- Primary opcode 30:
485 rldicl_4 = "78000000RR~HM.", 823 rldicl_4 = "78000000RR~HM.",
486 rldicr_4 = "78000004RR~HM.", 824 rldicr_4 = "78000004RR~HM.",
@@ -489,6 +827,34 @@ local map_op = {
489 rldcl_4 = "78000010RR~RM.", 827 rldcl_4 = "78000010RR~RM.",
490 rldcr_4 = "78000012RR~RM.", 828 rldcr_4 = "78000012RR~RM.",
491 829
830 rotldi_3 = op_alias("rldicl_4", function(p)
831 p[4] = "0"
832 end),
833 rotrdi_3 = op_alias("rldicl_4", function(p)
834 p[3] = "64-("..p[3]..")"; p[4] = "0"
835 end),
836 rotld_3 = op_alias("rldcl_4", function(p)
837 p[4] = "0"
838 end),
839 sldi_3 = op_alias("rldicr_4", function(p)
840 p[4] = "63-("..p[3]..")"
841 end),
842 srdi_3 = op_alias("rldicl_4", function(p)
843 p[4] = p[3]; p[3] = "64-("..p[3]..")"
844 end),
845 clrldi_3 = op_alias("rldicl_4", function(p)
846 p[4] = p[3]; p[3] = "0"
847 end),
848 clrrdi_3 = op_alias("rldicr_4", function(p)
849 p[4] = "63-("..p[3]..")"; p[3] = "0"
850 end),
851
852 -- Primary opcode 56:
853 lq_2 = "e0000000R:D", -- NYI: displacement must be divisible by 8.
854
855 -- Primary opcode 57:
856 lfdp_2 = "e4000000F:D", -- NYI: displacement must be divisible by 4.
857
492 -- Primary opcode 59: 858 -- Primary opcode 59:
493 fdivs_3 = "ec000024FFF.", 859 fdivs_3 = "ec000024FFF.",
494 fsubs_3 = "ec000028FFF.", 860 fsubs_3 = "ec000028FFF.",
@@ -501,6 +867,200 @@ local map_op = {
501 fmadds_4 = "ec00003aFFFF~.", 867 fmadds_4 = "ec00003aFFFF~.",
502 fnmsubs_4 = "ec00003cFFFF~.", 868 fnmsubs_4 = "ec00003cFFFF~.",
503 fnmadds_4 = "ec00003eFFFF~.", 869 fnmadds_4 = "ec00003eFFFF~.",
870 fcfids_2 = "ec00069cF-F.",
871 fcfidus_2 = "ec00079cF-F.",
872
873 dadd_3 = "ec000004FFF.",
874 dqua_4 = "ec000006FFFZ.",
875 dmul_3 = "ec000044FFF.",
876 drrnd_4 = "ec000046FFFZ.",
877 dscli_3 = "ec000084FF6.",
878 dquai_4 = "ec000086SF~FZ.",
879 dscri_3 = "ec0000c4FF6.",
880 drintx_4 = "ec0000c61F~FZ.",
881 dcmpo_3 = "ec000104XFF",
882 dtstex_3 = "ec000144XFF",
883 dtstdc_3 = "ec000184XF6",
884 dtstdg_3 = "ec0001c4XF6",
885 drintn_4 = "ec0001c61F~FZ.",
886 dctdp_2 = "ec000204F-F.",
887 dctfix_2 = "ec000244F-F.",
888 ddedpd_3 = "ec000284ZF~F.",
889 dxex_2 = "ec0002c4F-F.",
890 dsub_3 = "ec000404FFF.",
891 ddiv_3 = "ec000444FFF.",
892 dcmpu_3 = "ec000504XFF",
893 dtstsf_3 = "ec000544XFF",
894 drsp_2 = "ec000604F-F.",
895 dcffix_2 = "ec000644F-F.",
896 denbcd_3 = "ec000684YF~F.",
897 diex_3 = "ec0006c4FFF.",
898
899 -- Primary opcode 60:
900 xsaddsp_3 = "f0000000QQQ",
901 xsmaddasp_3 = "f0000008QQQ",
902 xxsldwi_4 = "f0000010QQQz",
903 xsrsqrtesp_2 = "f0000028Q-Q",
904 xssqrtsp_2 = "f000002cQ-Q",
905 xxsel_4 = "f0000030QQQQ",
906 xssubsp_3 = "f0000040QQQ",
907 xsmaddmsp_3 = "f0000048QQQ",
908 xxpermdi_4 = "f0000050QQQz",
909 xsresp_2 = "f0000068Q-Q",
910 xsmulsp_3 = "f0000080QQQ",
911 xsmsubasp_3 = "f0000088QQQ",
912 xxmrghw_3 = "f0000090QQQ",
913 xsdivsp_3 = "f00000c0QQQ",
914 xsmsubmsp_3 = "f00000c8QQQ",
915 xsadddp_3 = "f0000100QQQ",
916 xsmaddadp_3 = "f0000108QQQ",
917 xscmpudp_3 = "f0000118XQQ",
918 xscvdpuxws_2 = "f0000120Q-Q",
919 xsrdpi_2 = "f0000124Q-Q",
920 xsrsqrtedp_2 = "f0000128Q-Q",
921 xssqrtdp_2 = "f000012cQ-Q",
922 xssubdp_3 = "f0000140QQQ",
923 xsmaddmdp_3 = "f0000148QQQ",
924 xscmpodp_3 = "f0000158XQQ",
925 xscvdpsxws_2 = "f0000160Q-Q",
926 xsrdpiz_2 = "f0000164Q-Q",
927 xsredp_2 = "f0000168Q-Q",
928 xsmuldp_3 = "f0000180QQQ",
929 xsmsubadp_3 = "f0000188QQQ",
930 xxmrglw_3 = "f0000190QQQ",
931 xsrdpip_2 = "f00001a4Q-Q",
932 xstsqrtdp_2 = "f00001a8X-Q",
933 xsrdpic_2 = "f00001acQ-Q",
934 xsdivdp_3 = "f00001c0QQQ",
935 xsmsubmdp_3 = "f00001c8QQQ",
936 xsrdpim_2 = "f00001e4Q-Q",
937 xstdivdp_3 = "f00001e8XQQ",
938 xvaddsp_3 = "f0000200QQQ",
939 xvmaddasp_3 = "f0000208QQQ",
940 xvcmpeqsp_3 = "f0000218QQQ",
941 xvcvspuxws_2 = "f0000220Q-Q",
942 xvrspi_2 = "f0000224Q-Q",
943 xvrsqrtesp_2 = "f0000228Q-Q",
944 xvsqrtsp_2 = "f000022cQ-Q",
945 xvsubsp_3 = "f0000240QQQ",
946 xvmaddmsp_3 = "f0000248QQQ",
947 xvcmpgtsp_3 = "f0000258QQQ",
948 xvcvspsxws_2 = "f0000260Q-Q",
949 xvrspiz_2 = "f0000264Q-Q",
950 xvresp_2 = "f0000268Q-Q",
951 xvmulsp_3 = "f0000280QQQ",
952 xvmsubasp_3 = "f0000288QQQ",
953 xxspltw_3 = "f0000290QQg~",
954 xvcmpgesp_3 = "f0000298QQQ",
955 xvcvuxwsp_2 = "f00002a0Q-Q",
956 xvrspip_2 = "f00002a4Q-Q",
957 xvtsqrtsp_2 = "f00002a8X-Q",
958 xvrspic_2 = "f00002acQ-Q",
959 xvdivsp_3 = "f00002c0QQQ",
960 xvmsubmsp_3 = "f00002c8QQQ",
961 xvcvsxwsp_2 = "f00002e0Q-Q",
962 xvrspim_2 = "f00002e4Q-Q",
963 xvtdivsp_3 = "f00002e8XQQ",
964 xvadddp_3 = "f0000300QQQ",
965 xvmaddadp_3 = "f0000308QQQ",
966 xvcmpeqdp_3 = "f0000318QQQ",
967 xvcvdpuxws_2 = "f0000320Q-Q",
968 xvrdpi_2 = "f0000324Q-Q",
969 xvrsqrtedp_2 = "f0000328Q-Q",
970 xvsqrtdp_2 = "f000032cQ-Q",
971 xvsubdp_3 = "f0000340QQQ",
972 xvmaddmdp_3 = "f0000348QQQ",
973 xvcmpgtdp_3 = "f0000358QQQ",
974 xvcvdpsxws_2 = "f0000360Q-Q",
975 xvrdpiz_2 = "f0000364Q-Q",
976 xvredp_2 = "f0000368Q-Q",
977 xvmuldp_3 = "f0000380QQQ",
978 xvmsubadp_3 = "f0000388QQQ",
979 xvcmpgedp_3 = "f0000398QQQ",
980 xvcvuxwdp_2 = "f00003a0Q-Q",
981 xvrdpip_2 = "f00003a4Q-Q",
982 xvtsqrtdp_2 = "f00003a8X-Q",
983 xvrdpic_2 = "f00003acQ-Q",
984 xvdivdp_3 = "f00003c0QQQ",
985 xvmsubmdp_3 = "f00003c8QQQ",
986 xvcvsxwdp_2 = "f00003e0Q-Q",
987 xvrdpim_2 = "f00003e4Q-Q",
988 xvtdivdp_3 = "f00003e8XQQ",
989 xsnmaddasp_3 = "f0000408QQQ",
990 xxland_3 = "f0000410QQQ",
991 xscvdpsp_2 = "f0000424Q-Q",
992 xscvdpspn_2 = "f000042cQ-Q",
993 xsnmaddmsp_3 = "f0000448QQQ",
994 xxlandc_3 = "f0000450QQQ",
995 xsrsp_2 = "f0000464Q-Q",
996 xsnmsubasp_3 = "f0000488QQQ",
997 xxlor_3 = "f0000490QQQ",
998 xscvuxdsp_2 = "f00004a0Q-Q",
999 xsnmsubmsp_3 = "f00004c8QQQ",
1000 xxlxor_3 = "f00004d0QQQ",
1001 xscvsxdsp_2 = "f00004e0Q-Q",
1002 xsmaxdp_3 = "f0000500QQQ",
1003 xsnmaddadp_3 = "f0000508QQQ",
1004 xxlnor_3 = "f0000510QQQ",
1005 xscvdpuxds_2 = "f0000520Q-Q",
1006 xscvspdp_2 = "f0000524Q-Q",
1007 xscvspdpn_2 = "f000052cQ-Q",
1008 xsmindp_3 = "f0000540QQQ",
1009 xsnmaddmdp_3 = "f0000548QQQ",
1010 xxlorc_3 = "f0000550QQQ",
1011 xscvdpsxds_2 = "f0000560Q-Q",
1012 xsabsdp_2 = "f0000564Q-Q",
1013 xscpsgndp_3 = "f0000580QQQ",
1014 xsnmsubadp_3 = "f0000588QQQ",
1015 xxlnand_3 = "f0000590QQQ",
1016 xscvuxddp_2 = "f00005a0Q-Q",
1017 xsnabsdp_2 = "f00005a4Q-Q",
1018 xsnmsubmdp_3 = "f00005c8QQQ",
1019 xxleqv_3 = "f00005d0QQQ",
1020 xscvsxddp_2 = "f00005e0Q-Q",
1021 xsnegdp_2 = "f00005e4Q-Q",
1022 xvmaxsp_3 = "f0000600QQQ",
1023 xvnmaddasp_3 = "f0000608QQQ",
1024 ["xvcmpeqsp._3"] = "f0000618QQQ",
1025 xvcvspuxds_2 = "f0000620Q-Q",
1026 xvcvdpsp_2 = "f0000624Q-Q",
1027 xvminsp_3 = "f0000640QQQ",
1028 xvnmaddmsp_3 = "f0000648QQQ",
1029 ["xvcmpgtsp._3"] = "f0000658QQQ",
1030 xvcvspsxds_2 = "f0000660Q-Q",
1031 xvabssp_2 = "f0000664Q-Q",
1032 xvcpsgnsp_3 = "f0000680QQQ",
1033 xvnmsubasp_3 = "f0000688QQQ",
1034 ["xvcmpgesp._3"] = "f0000698QQQ",
1035 xvcvuxdsp_2 = "f00006a0Q-Q",
1036 xvnabssp_2 = "f00006a4Q-Q",
1037 xvnmsubmsp_3 = "f00006c8QQQ",
1038 xvcvsxdsp_2 = "f00006e0Q-Q",
1039 xvnegsp_2 = "f00006e4Q-Q",
1040 xvmaxdp_3 = "f0000700QQQ",
1041 xvnmaddadp_3 = "f0000708QQQ",
1042 ["xvcmpeqdp._3"] = "f0000718QQQ",
1043 xvcvdpuxds_2 = "f0000720Q-Q",
1044 xvcvspdp_2 = "f0000724Q-Q",
1045 xvmindp_3 = "f0000740QQQ",
1046 xvnmaddmdp_3 = "f0000748QQQ",
1047 ["xvcmpgtdp._3"] = "f0000758QQQ",
1048 xvcvdpsxds_2 = "f0000760Q-Q",
1049 xvabsdp_2 = "f0000764Q-Q",
1050 xvcpsgndp_3 = "f0000780QQQ",
1051 xvnmsubadp_3 = "f0000788QQQ",
1052 ["xvcmpgedp._3"] = "f0000798QQQ",
1053 xvcvuxddp_2 = "f00007a0Q-Q",
1054 xvnabsdp_2 = "f00007a4Q-Q",
1055 xvnmsubmdp_3 = "f00007c8QQQ",
1056 xvcvsxddp_2 = "f00007e0Q-Q",
1057 xvnegdp_2 = "f00007e4Q-Q",
1058
1059 -- Primary opcode 61:
1060 stfdp_2 = "f4000000F:D", -- NYI: displacement must be divisible by 4.
1061
1062 -- Primary opcode 62:
1063 stq_2 = "f8000002R:D", -- NYI: displacement must be divisible by 8.
504 1064
505 -- Primary opcode 63: 1065 -- Primary opcode 63:
506 fdiv_3 = "fc000024FFF.", 1066 fdiv_3 = "fc000024FFF.",
@@ -526,8 +1086,12 @@ local map_op = {
526 frsp_2 = "fc000018F-F.", 1086 frsp_2 = "fc000018F-F.",
527 fctiw_2 = "fc00001cF-F.", 1087 fctiw_2 = "fc00001cF-F.",
528 fctiwz_2 = "fc00001eF-F.", 1088 fctiwz_2 = "fc00001eF-F.",
1089 ftdiv_2 = "fc000100X-F.",
1090 fctiwu_2 = "fc00011cF-F.",
1091 fctiwuz_2 = "fc00011eF-F.",
529 mtfsfi_2 = "fc00010cAA", -- NYI: upshift. 1092 mtfsfi_2 = "fc00010cAA", -- NYI: upshift.
530 fnabs_2 = "fc000110F-F.", 1093 fnabs_2 = "fc000110F-F.",
1094 ftsqrt_2 = "fc000140X-F.",
531 fabs_2 = "fc000210F-F.", 1095 fabs_2 = "fc000210F-F.",
532 frin_2 = "fc000310F-F.", 1096 frin_2 = "fc000310F-F.",
533 friz_2 = "fc000350F-F.", 1097 friz_2 = "fc000350F-F.",
@@ -537,7 +1101,38 @@ local map_op = {
537 -- NYI: mtfsf, mtfsb0, mtfsb1. 1101 -- NYI: mtfsf, mtfsb0, mtfsb1.
538 fctid_2 = "fc00065cF-F.", 1102 fctid_2 = "fc00065cF-F.",
539 fctidz_2 = "fc00065eF-F.", 1103 fctidz_2 = "fc00065eF-F.",
1104 fmrgow_3 = "fc00068cFFF",
540 fcfid_2 = "fc00069cF-F.", 1105 fcfid_2 = "fc00069cF-F.",
1106 fctidu_2 = "fc00075cF-F.",
1107 fctiduz_2 = "fc00075eF-F.",
1108 fmrgew_3 = "fc00078cFFF",
1109 fcfidu_2 = "fc00079cF-F.",
1110
1111 daddq_3 = "fc000004F:F:F:.",
1112 dquaq_4 = "fc000006F:F:F:Z.",
1113 dmulq_3 = "fc000044F:F:F:.",
1114 drrndq_4 = "fc000046F:F:F:Z.",
1115 dscliq_3 = "fc000084F:F:6.",
1116 dquaiq_4 = "fc000086SF:~F:Z.",
1117 dscriq_3 = "fc0000c4F:F:6.",
1118 drintxq_4 = "fc0000c61F:~F:Z.",
1119 dcmpoq_3 = "fc000104XF:F:",
1120 dtstexq_3 = "fc000144XF:F:",
1121 dtstdcq_3 = "fc000184XF:6",
1122 dtstdgq_3 = "fc0001c4XF:6",
1123 drintnq_4 = "fc0001c61F:~F:Z.",
1124 dctqpq_2 = "fc000204F:-F:.",
1125 dctfixq_2 = "fc000244F:-F:.",
1126 ddedpdq_3 = "fc000284ZF:~F:.",
1127 dxexq_2 = "fc0002c4F:-F:.",
1128 dsubq_3 = "fc000404F:F:F:.",
1129 ddivq_3 = "fc000444F:F:F:.",
1130 dcmpuq_3 = "fc000504XF:F:",
1131 dtstsfq_3 = "fc000544XF:F:",
1132 drdpq_2 = "fc000604F:-F:.",
1133 dcffixq_2 = "fc000644F:-F:.",
1134 denbcdq_3 = "fc000684YF:~F:.",
1135 diexq_3 = "fc0006c4F:FF:.",
541 1136
542 -- Primary opcode 4, SPE APU extension: 1137 -- Primary opcode 4, SPE APU extension:
543 evaddw_3 = "10000200RRR", 1138 evaddw_3 = "10000200RRR",
@@ -822,7 +1417,7 @@ local map_op = {
822do 1417do
823 local t = {} 1418 local t = {}
824 for k,v in pairs(map_op) do 1419 for k,v in pairs(map_op) do
825 if sub(v, -1) == "." then 1420 if type(v) == "string" and sub(v, -1) == "." then
826 local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2) 1421 local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2)
827 t[sub(k, 1, -3).."."..sub(k, -2)] = v2 1422 t[sub(k, 1, -3).."."..sub(k, -2)] = v2
828 end 1423 end
@@ -884,6 +1479,24 @@ local function parse_fpr(expr)
884 werror("bad register name `"..expr.."'") 1479 werror("bad register name `"..expr.."'")
885end 1480end
886 1481
1482local function parse_vr(expr)
1483 local r = match(expr, "^v([1-3]?[0-9])$")
1484 if r then
1485 r = tonumber(r)
1486 if r <= 31 then return r end
1487 end
1488 werror("bad register name `"..expr.."'")
1489end
1490
1491local function parse_vs(expr)
1492 local r = match(expr, "^vs([1-6]?[0-9])$")
1493 if r then
1494 r = tonumber(r)
1495 if r <= 63 then return r end
1496 end
1497 werror("bad register name `"..expr.."'")
1498end
1499
887local function parse_cr(expr) 1500local function parse_cr(expr)
888 local r = match(expr, "^cr([0-7])$") 1501 local r = match(expr, "^cr([0-7])$")
889 if r then return tonumber(r) end 1502 if r then return tonumber(r) end
@@ -900,8 +1513,30 @@ local function parse_cond(expr)
900 werror("bad condition bit name `"..expr.."'") 1513 werror("bad condition bit name `"..expr.."'")
901end 1514end
902 1515
1516local parse_ctx = {}
1517
1518local loadenv = setfenv and function(s)
1519 local code = loadstring(s, "")
1520 if code then setfenv(code, parse_ctx) end
1521 return code
1522end or function(s)
1523 return load(s, "", nil, parse_ctx)
1524end
1525
1526-- Try to parse simple arithmetic, too, since some basic ops are aliases.
1527local function parse_number(n)
1528 local x = tonumber(n)
1529 if x then return x end
1530 local code = loadenv("return "..n)
1531 if code then
1532 local ok, y = pcall(code)
1533 if ok then return y end
1534 end
1535 return nil
1536end
1537
903local function parse_imm(imm, bits, shift, scale, signed) 1538local function parse_imm(imm, bits, shift, scale, signed)
904 local n = tonumber(imm) 1539 local n = parse_number(imm)
905 if n then 1540 if n then
906 local m = sar(n, scale) 1541 local m = sar(n, scale)
907 if shl(m, scale) == n then 1542 if shl(m, scale) == n then
@@ -914,7 +1549,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
914 end 1549 end
915 end 1550 end
916 werror("out of range immediate `"..imm.."'") 1551 werror("out of range immediate `"..imm.."'")
917 elseif match(imm, "^r([1-3]?[0-9])$") or 1552 elseif match(imm, "^[rfv]([1-3]?[0-9])$") or
1553 match(imm, "^vs([1-6]?[0-9])$") or
918 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then 1554 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
919 werror("expected immediate operand, got register") 1555 werror("expected immediate operand, got register")
920 else 1556 else
@@ -924,11 +1560,11 @@ local function parse_imm(imm, bits, shift, scale, signed)
924end 1560end
925 1561
926local function parse_shiftmask(imm, isshift) 1562local function parse_shiftmask(imm, isshift)
927 local n = tonumber(imm) 1563 local n = parse_number(imm)
928 if n then 1564 if n then
929 if shr(n, 6) == 0 then 1565 if shr(n, 6) == 0 then
930 local lsb = band(imm, 31) 1566 local lsb = band(n, 31)
931 local msb = imm - lsb 1567 local msb = n - lsb
932 return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb) 1568 return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb)
933 end 1569 end
934 werror("out of range immediate `"..imm.."'") 1570 werror("out of range immediate `"..imm.."'")
@@ -936,7 +1572,8 @@ local function parse_shiftmask(imm, isshift)
936 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then 1572 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
937 werror("expected immediate operand, got register") 1573 werror("expected immediate operand, got register")
938 else 1574 else
939 werror("NYI: parameterized 64 bit shift/mask") 1575 waction("IMMSH", isshift and 1 or 0, imm)
1576 return 0;
940 end 1577 end
941end 1578end
942 1579
@@ -1011,7 +1648,7 @@ end
1011------------------------------------------------------------------------------ 1648------------------------------------------------------------------------------
1012 1649
1013-- Handle opcodes defined with template strings. 1650-- Handle opcodes defined with template strings.
1014map_op[".template__"] = function(params, template, nparams) 1651op_template = function(params, template, nparams)
1015 if not params then return sub(template, 9) end 1652 if not params then return sub(template, 9) end
1016 local op = tonumber(sub(template, 1, 8), 16) 1653 local op = tonumber(sub(template, 1, 8), 16)
1017 local n, rs = 1, 26 1654 local n, rs = 1, 26
@@ -1027,6 +1664,15 @@ map_op[".template__"] = function(params, template, nparams)
1027 rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1 1664 rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1
1028 elseif p == "F" then 1665 elseif p == "F" then
1029 rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1 1666 rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1
1667 elseif p == "V" then
1668 rs = rs - 5; op = op + shl(parse_vr(params[n]), rs); n = n + 1
1669 elseif p == "Q" then
1670 local vs = parse_vs(params[n]); n = n + 1; rs = rs - 5
1671 local sh = rs == 6 and 2 or 3 + band(shr(rs, 1), 3)
1672 op = op + shl(band(vs, 31), rs) + shr(band(vs, 32), sh)
1673 elseif p == "q" then
1674 local vs = parse_vs(params[n]); n = n + 1
1675 op = op + shl(band(vs, 31), 21) + shr(band(vs, 32), 5)
1030 elseif p == "A" then 1676 elseif p == "A" then
1031 rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1 1677 rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1
1032 elseif p == "S" then 1678 elseif p == "S" then
@@ -1047,6 +1693,26 @@ map_op[".template__"] = function(params, template, nparams)
1047 rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1 1693 rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1
1048 elseif p == "X" then 1694 elseif p == "X" then
1049 rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1 1695 rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1
1696 elseif p == "1" then
1697 rs = rs - 5; op = op + parse_imm(params[n], 1, rs, 0, false); n = n + 1
1698 elseif p == "g" then
1699 rs = rs - 5; op = op + parse_imm(params[n], 2, rs, 0, false); n = n + 1
1700 elseif p == "3" then
1701 rs = rs - 5; op = op + parse_imm(params[n], 3, rs, 0, false); n = n + 1
1702 elseif p == "P" then
1703 rs = rs - 5; op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
1704 elseif p == "p" then
1705 op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
1706 elseif p == "6" then
1707 rs = rs - 6; op = op + parse_imm(params[n], 6, rs, 0, false); n = n + 1
1708 elseif p == "Y" then
1709 rs = rs - 5; op = op + parse_imm(params[n], 1, rs+4, 0, false); n = n + 1
1710 elseif p == "y" then
1711 rs = rs - 5; op = op + parse_imm(params[n], 1, rs+3, 0, false); n = n + 1
1712 elseif p == "Z" then
1713 rs = rs - 5; op = op + parse_imm(params[n], 2, rs+3, 0, false); n = n + 1
1714 elseif p == "z" then
1715 rs = rs - 5; op = op + parse_imm(params[n], 2, rs+2, 0, false); n = n + 1
1050 elseif p == "W" then 1716 elseif p == "W" then
1051 op = op + parse_cr(params[n]); n = n + 1 1717 op = op + parse_cr(params[n]); n = n + 1
1052 elseif p == "G" then 1718 elseif p == "G" then
@@ -1071,6 +1737,8 @@ map_op[".template__"] = function(params, template, nparams)
1071 local lo = band(op, mm) 1737 local lo = band(op, mm)
1072 local hi = band(op, shl(mm, 5)) 1738 local hi = band(op, shl(mm, 5))
1073 op = op - lo - hi + shl(lo, 5) + shr(hi, 5) 1739 op = op - lo - hi + shl(lo, 5) + shr(hi, 5)
1740 elseif p == ":" then
1741 if band(shr(op, rs), 1) ~= 0 then werror("register pair expected") end
1074 elseif p == "-" then 1742 elseif p == "-" then
1075 rs = rs - 5 1743 rs = rs - 5
1076 elseif p == "." then 1744 elseif p == "." then
@@ -1082,6 +1750,8 @@ map_op[".template__"] = function(params, template, nparams)
1082 wputpos(pos, op) 1750 wputpos(pos, op)
1083end 1751end
1084 1752
1753map_op[".template__"] = op_template
1754
1085------------------------------------------------------------------------------ 1755------------------------------------------------------------------------------
1086 1756
1087-- Pseudo-opcode to mark the position where the action list is to be emitted. 1757-- Pseudo-opcode to mark the position where the action list is to be emitted.
diff --git a/dynasm/dasm_proto.h b/dynasm/dasm_proto.h
index 8ed23ce8..8914596a 100644
--- a/dynasm/dasm_proto.h
+++ b/dynasm/dasm_proto.h
@@ -10,8 +10,8 @@
10#include <stddef.h> 10#include <stddef.h>
11#include <stdarg.h> 11#include <stdarg.h>
12 12
13#define DASM_IDENT "DynASM 1.3.0" 13#define DASM_IDENT "DynASM 1.5.0"
14#define DASM_VERSION 10300 /* 1.3.0 */ 14#define DASM_VERSION 10500 /* 1.5.0 */
15 15
16#ifndef Dst_DECL 16#ifndef Dst_DECL
17#define Dst_DECL dasm_State **Dst 17#define Dst_DECL dasm_State **Dst
diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h
index c519907d..d8d4928c 100644
--- a/dynasm/dasm_x86.h
+++ b/dynasm/dasm_x86.h
@@ -170,7 +170,7 @@ void dasm_put(Dst_DECL, int start, ...)
170 dasm_State *D = Dst_REF; 170 dasm_State *D = Dst_REF;
171 dasm_ActList p = D->actionlist + start; 171 dasm_ActList p = D->actionlist + start;
172 dasm_Section *sec = D->section; 172 dasm_Section *sec = D->section;
173 int pos = sec->pos, ofs = sec->ofs, mrm = 4; 173 int pos = sec->pos, ofs = sec->ofs, mrm = -1;
174 int *b; 174 int *b;
175 175
176 if (pos >= sec->epos) { 176 if (pos >= sec->epos) {
@@ -193,7 +193,7 @@ void dasm_put(Dst_DECL, int start, ...)
193 b[pos++] = n; 193 b[pos++] = n;
194 switch (action) { 194 switch (action) {
195 case DASM_DISP: 195 case DASM_DISP:
196 if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; } 196 if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; }
197 /* fallthrough */ 197 /* fallthrough */
198 case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ 198 case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */
199 case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ 199 case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
@@ -204,11 +204,17 @@ void dasm_put(Dst_DECL, int start, ...)
204 case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; 204 case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
205 case DASM_SPACE: p++; ofs += n; break; 205 case DASM_SPACE: p++; ofs += n; break;
206 case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ 206 case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
207 case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG); 207 case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG);
208 if (*p++ == 1 && *p == DASM_DISP) mrm = n; 208 if (*p < 0x40 && p[1] == DASM_DISP) mrm = n;
209 if (*p < 0x20 && (n&7) == 4) ofs++;
210 switch ((*p++ >> 3) & 3) {
211 case 3: n |= b[pos-3]; /* fallthrough */
212 case 2: n |= b[pos-2]; /* fallthrough */
213 case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; }
214 }
209 continue; 215 continue;
210 } 216 }
211 mrm = 4; 217 mrm = -1;
212 } else { 218 } else {
213 int *pl, n; 219 int *pl, n;
214 switch (action) { 220 switch (action) {
@@ -233,8 +239,11 @@ void dasm_put(Dst_DECL, int start, ...)
233 } 239 }
234 pos++; 240 pos++;
235 ofs += 4; /* Maximum offset needed. */ 241 ofs += 4; /* Maximum offset needed. */
236 if (action == DASM_REL_LG || action == DASM_REL_PC) 242 if (action == DASM_REL_LG || action == DASM_REL_PC) {
237 b[pos++] = ofs; /* Store pass1 offset estimate. */ 243 b[pos++] = ofs; /* Store pass1 offset estimate. */
244 } else if (sizeof(ptrdiff_t) == 8) {
245 ofs += 4;
246 }
238 break; 247 break;
239 case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel; 248 case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel;
240 case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); 249 case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC);
@@ -359,10 +368,22 @@ int dasm_link(Dst_DECL, size_t *szp)
359 do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0) 368 do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0)
360#define dasmd(x) \ 369#define dasmd(x) \
361 do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0) 370 do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0)
371#define dasmq(x) \
372 do { *((unsigned long long *)cp) = (unsigned long long)(x); cp+=8; } while (0)
362#else 373#else
363#define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0) 374#define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0)
364#define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0) 375#define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0)
376#define dasmq(x) do { dasmd(x); dasmd((x)>>32); } while (0)
365#endif 377#endif
378static unsigned char *dasma_(unsigned char *cp, ptrdiff_t x)
379{
380 if (sizeof(ptrdiff_t) == 8)
381 dasmq((unsigned long long)x);
382 else
383 dasmd((unsigned int)x);
384 return cp;
385}
386#define dasma(x) (cp = dasma_(cp, (x)))
366 387
367/* Pass 3: Encode sections. */ 388/* Pass 3: Encode sections. */
368int dasm_encode(Dst_DECL, void *buffer) 389int dasm_encode(Dst_DECL, void *buffer)
@@ -402,7 +423,27 @@ int dasm_encode(Dst_DECL, void *buffer)
402 case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL; 423 case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
403 /* fallthrough */ 424 /* fallthrough */
404 case DASM_IMM_W: dasmw(n); break; 425 case DASM_IMM_W: dasmw(n); break;
405 case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; } 426 case DASM_VREG: {
427 int t = *p++;
428 unsigned char *ex = cp - (t&7);
429 if ((n & 8) && t < 0xa0) {
430 if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6);
431 n &= 7;
432 } else if (n & 0x10) {
433 if (*ex & 0x80) {
434 *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2;
435 }
436 while (++ex < cp) ex[-1] = *ex;
437 if (mark) mark--;
438 cp--;
439 n &= 7;
440 }
441 if (t >= 0xc0) n <<= 4;
442 else if (t >= 0x40) n <<= 3;
443 else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; }
444 cp[-1] ^= n;
445 break;
446 }
406 case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; 447 case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
407 b++; n = (int)(ptrdiff_t)D->globals[-n]; 448 b++; n = (int)(ptrdiff_t)D->globals[-n];
408 /* fallthrough */ 449 /* fallthrough */
@@ -417,12 +458,13 @@ int dasm_encode(Dst_DECL, void *buffer)
417 goto wb; 458 goto wb;
418 } 459 }
419 case DASM_IMM_LG: 460 case DASM_IMM_LG:
420 p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; } 461 p++;
462 if (n < 0) { dasma((ptrdiff_t)D->globals[-n]); break; }
421 /* fallthrough */ 463 /* fallthrough */
422 case DASM_IMM_PC: { 464 case DASM_IMM_PC: {
423 int *pb = DASM_POS2PTR(D, n); 465 int *pb = DASM_POS2PTR(D, n);
424 n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base); 466 dasma(*pb < 0 ? (ptrdiff_t)pb[1] : (*pb + (ptrdiff_t)base));
425 goto wd; 467 break;
426 } 468 }
427 case DASM_LABEL_LG: { 469 case DASM_LABEL_LG: {
428 int idx = *p++; 470 int idx = *p++;
diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
index c37d6a40..fe2cf579 100644
--- a/dynasm/dasm_x86.lua
+++ b/dynasm/dasm_x86.lua
@@ -11,9 +11,9 @@ local x64 = x64
11local _info = { 11local _info = {
12 arch = x64 and "x64" or "x86", 12 arch = x64 and "x64" or "x86",
13 description = "DynASM x86/x64 module", 13 description = "DynASM x86/x64 module",
14 version = "1.3.0", 14 version = "1.5.0",
15 vernum = 10300, 15 vernum = 10500,
16 release = "2011-05-05", 16 release = "2021-05-02",
17 author = "Mike Pall", 17 author = "Mike Pall",
18 license = "MIT", 18 license = "MIT",
19} 19}
@@ -27,9 +27,9 @@ local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatabl
27local _s = string 27local _s = string
28local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char 28local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
29local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub 29local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
30local concat, sort = table.concat, table.sort 30local concat, sort, remove = table.concat, table.sort, table.remove
31local bit = bit or require("bit") 31local bit = bit or require("bit")
32local band, shl, shr = bit.band, bit.lshift, bit.rshift 32local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift
33 33
34-- Inherited tables and callbacks. 34-- Inherited tables and callbacks.
35local g_opt, g_arch 35local g_opt, g_arch
@@ -41,7 +41,7 @@ local action_names = {
41 -- int arg, 1 buffer pos: 41 -- int arg, 1 buffer pos:
42 "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", 42 "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
43 -- action arg (1 byte), int arg, 1 buffer pos (reg/num): 43 -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
44 "VREG", "SPACE", -- !x64: VREG support NYI. 44 "VREG", "SPACE",
45 -- ptrdiff_t arg, 1 buffer pos (address): !x64 45 -- ptrdiff_t arg, 1 buffer pos (address): !x64
46 "SETLABEL", "REL_A", 46 "SETLABEL", "REL_A",
47 -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): 47 -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
@@ -83,6 +83,21 @@ local actargs = { 0 }
83-- Current number of section buffer positions for dasm_put(). 83-- Current number of section buffer positions for dasm_put().
84local secpos = 1 84local secpos = 1
85 85
86-- VREG kind encodings, pre-shifted by 5 bits.
87local map_vreg = {
88 ["modrm.rm.m"] = 0x00,
89 ["modrm.rm.r"] = 0x20,
90 ["opcode"] = 0x20,
91 ["sib.base"] = 0x20,
92 ["sib.index"] = 0x40,
93 ["modrm.reg"] = 0x80,
94 ["vex.v"] = 0xa0,
95 ["imm.hi"] = 0xc0,
96}
97
98-- Current number of VREG actions contributing to REX/VEX shrinkage.
99local vreg_shrink_count = 0
100
86------------------------------------------------------------------------------ 101------------------------------------------------------------------------------
87 102
88-- Compute action numbers for action names. 103-- Compute action numbers for action names.
@@ -134,6 +149,21 @@ local function waction(action, a, num)
134 if a or num then secpos = secpos + (num or 1) end 149 if a or num then secpos = secpos + (num or 1) end
135end 150end
136 151
152-- Optionally add a VREG action.
153local function wvreg(kind, vreg, psz, sk, defer)
154 if not vreg then return end
155 waction("VREG", vreg)
156 local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'")
157 if b < (sk or 0) then
158 vreg_shrink_count = vreg_shrink_count + 1
159 end
160 if not defer then
161 b = b + vreg_shrink_count * 8
162 vreg_shrink_count = 0
163 end
164 wputxb(b + (psz or 0))
165end
166
137-- Add call to embedded DynASM C code. 167-- Add call to embedded DynASM C code.
138local function wcall(func, args) 168local function wcall(func, args)
139 wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) 169 wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
@@ -299,7 +329,7 @@ local function mkrmap(sz, cl, names)
299 local iname = format("@%s%x%s", sz, i, needrex and "R" or "") 329 local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
300 if needrex then map_reg_needrex[iname] = true end 330 if needrex then map_reg_needrex[iname] = true end
301 local name 331 local name
302 if sz == "o" then name = format("xmm%d", i) 332 if sz == "o" or sz == "y" then name = format("%s%d", cl, i)
303 elseif sz == "f" then name = format("st%d", i) 333 elseif sz == "f" then name = format("st%d", i)
304 else name = format("r%d%s", i, sz == addrsize and "" or sz) end 334 else name = format("r%d%s", i, sz == addrsize and "" or sz) end
305 map_archdef[name] = iname 335 map_archdef[name] = iname
@@ -326,6 +356,7 @@ mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
326mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) 356mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
327map_reg_valid_index[map_archdef.esp] = false 357map_reg_valid_index[map_archdef.esp] = false
328if x64 then map_reg_valid_index[map_archdef.rsp] = false end 358if x64 then map_reg_valid_index[map_archdef.rsp] = false end
359if x64 then map_reg_needrex[map_archdef.Rb] = true end
329map_archdef["Ra"] = "@"..addrsize 360map_archdef["Ra"] = "@"..addrsize
330 361
331-- FP registers (internally tword sized, but use "f" as operand size). 362-- FP registers (internally tword sized, but use "f" as operand size).
@@ -334,21 +365,24 @@ mkrmap("f", "Rf")
334-- SSE registers (oword sized, but qword and dword accessible). 365-- SSE registers (oword sized, but qword and dword accessible).
335mkrmap("o", "xmm") 366mkrmap("o", "xmm")
336 367
368-- AVX registers (yword sized, but oword, qword and dword accessible).
369mkrmap("y", "ymm")
370
337-- Operand size prefixes to codes. 371-- Operand size prefixes to codes.
338local map_opsize = { 372local map_opsize = {
339 byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t", 373 byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y",
340 aword = addrsize, 374 tword = "t", aword = addrsize,
341} 375}
342 376
343-- Operand size code to number. 377-- Operand size code to number.
344local map_opsizenum = { 378local map_opsizenum = {
345 b = 1, w = 2, d = 4, q = 8, o = 16, t = 10, 379 b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10,
346} 380}
347 381
348-- Operand size code to name. 382-- Operand size code to name.
349local map_opsizename = { 383local map_opsizename = {
350 b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword", 384 b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword",
351 f = "fpword", 385 t = "tword", f = "fpword",
352} 386}
353 387
354-- Valid index register scale factors. 388-- Valid index register scale factors.
@@ -450,6 +484,22 @@ local function wputdarg(n)
450 end 484 end
451end 485end
452 486
487-- Put signed or unsigned qword or arg.
488local function wputqarg(n)
489 local tn = type(n)
490 if tn == "number" then -- This is only used for numbers from -2^31..2^32-1.
491 wputb(band(n, 255))
492 wputb(band(shr(n, 8), 255))
493 wputb(band(shr(n, 16), 255))
494 wputb(shr(n, 24))
495 local sign = n < 0 and 255 or 0
496 wputb(sign); wputb(sign); wputb(sign); wputb(sign)
497 else
498 waction("IMM_D", format("(unsigned int)(%s)", n))
499 waction("IMM_D", format("(unsigned int)((unsigned long long)(%s)>>32)", n))
500 end
501end
502
453-- Put operand-size dependent number or arg (defaults to dword). 503-- Put operand-size dependent number or arg (defaults to dword).
454local function wputszarg(sz, n) 504local function wputszarg(sz, n)
455 if not sz or sz == "d" or sz == "q" then wputdarg(n) 505 if not sz or sz == "d" or sz == "q" then wputdarg(n)
@@ -460,9 +510,45 @@ local function wputszarg(sz, n)
460end 510end
461 511
462-- Put multi-byte opcode with operand-size dependent modifications. 512-- Put multi-byte opcode with operand-size dependent modifications.
463local function wputop(sz, op, rex) 513local function wputop(sz, op, rex, vex, vregr, vregxb)
514 local psz, sk = 0, nil
515 if vex then
516 local tail
517 if vex.m == 1 and band(rex, 11) == 0 then
518 if x64 and vregxb then
519 sk = map_vreg["modrm.reg"]
520 else
521 wputb(0xc5)
522 tail = shl(bxor(band(rex, 4), 4), 5)
523 psz = 3
524 end
525 end
526 if not tail then
527 wputb(0xc4)
528 wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
529 tail = shl(band(rex, 8), 4)
530 psz = 4
531 end
532 local reg, vreg = 0, nil
533 if vex.v then
534 reg = vex.v.reg
535 if not reg then werror("bad vex operand") end
536 if reg < 0 then reg = 0; vreg = vex.v.vreg end
537 end
538 if sz == "y" or vex.l then tail = tail + 4 end
539 wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
540 wvreg("vex.v", vreg)
541 rex = 0
542 if op >= 256 then werror("bad vex opcode") end
543 else
544 if rex ~= 0 then
545 if not x64 then werror("bad operand size") end
546 elseif (vregr or vregxb) and x64 then
547 rex = 0x10
548 sk = map_vreg["vex.v"]
549 end
550 end
464 local r 551 local r
465 if rex ~= 0 and not x64 then werror("bad operand size") end
466 if sz == "w" then wputb(102) end 552 if sz == "w" then wputb(102) end
467 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] 553 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
468 if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end 554 if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
@@ -471,20 +557,20 @@ local function wputop(sz, op, rex)
471 if rex ~= 0 then 557 if rex ~= 0 then
472 local opc3 = band(op, 0xffff00) 558 local opc3 = band(op, 0xffff00)
473 if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then 559 if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
474 wputb(64 + band(rex, 15)); rex = 0 560 wputb(64 + band(rex, 15)); rex = 0; psz = 2
475 end 561 end
476 end 562 end
477 wputb(shr(op, 16)); op = band(op, 0xffff) 563 wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1
478 end 564 end
479 if op >= 256 then 565 if op >= 256 then
480 local b = shr(op, 8) 566 local b = shr(op, 8)
481 if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end 567 if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end
482 wputb(b) 568 wputb(b); op = band(op, 255); psz = psz + 1
483 op = band(op, 255)
484 end 569 end
485 if rex ~= 0 then wputb(64 + band(rex, 15)) end 570 if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end
486 if sz == "b" then op = op - 1 end 571 if sz == "b" then op = op - 1 end
487 wputb(op) 572 wputb(op)
573 return psz, sk
488end 574end
489 575
490-- Put ModRM or SIB formatted byte. 576-- Put ModRM or SIB formatted byte.
@@ -494,7 +580,7 @@ local function wputmodrm(m, s, rm, vs, vrm)
494end 580end
495 581
496-- Put ModRM/SIB plus optional displacement. 582-- Put ModRM/SIB plus optional displacement.
497local function wputmrmsib(t, imark, s, vsreg) 583local function wputmrmsib(t, imark, s, vsreg, psz, sk)
498 local vreg, vxreg 584 local vreg, vxreg
499 local reg, xreg = t.reg, t.xreg 585 local reg, xreg = t.reg, t.xreg
500 if reg and reg < 0 then reg = 0; vreg = t.vreg end 586 if reg and reg < 0 then reg = 0; vreg = t.vreg end
@@ -504,8 +590,8 @@ local function wputmrmsib(t, imark, s, vsreg)
504 -- Register mode. 590 -- Register mode.
505 if sub(t.mode, 1, 1) == "r" then 591 if sub(t.mode, 1, 1) == "r" then
506 wputmodrm(3, s, reg) 592 wputmodrm(3, s, reg)
507 if vsreg then waction("VREG", vsreg); wputxb(2) end 593 wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
508 if vreg then waction("VREG", vreg); wputxb(0) end 594 wvreg("modrm.rm.r", vreg, psz+1, sk)
509 return 595 return
510 end 596 end
511 597
@@ -519,21 +605,22 @@ local function wputmrmsib(t, imark, s, vsreg)
519 -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) 605 -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
520 wputmodrm(0, s, 4) 606 wputmodrm(0, s, 4)
521 if imark == "I" then waction("MARK") end 607 if imark == "I" then waction("MARK") end
522 if vsreg then waction("VREG", vsreg); wputxb(2) end 608 wvreg("modrm.reg", vsreg, psz+1, sk, vxreg)
523 wputmodrm(t.xsc, xreg, 5) 609 wputmodrm(t.xsc, xreg, 5)
524 if vxreg then waction("VREG", vxreg); wputxb(3) end 610 wvreg("sib.index", vxreg, psz+2, sk)
525 else 611 else
526 -- Pure 32 bit displacement. 612 -- Pure 32 bit displacement.
527 if x64 and tdisp ~= "table" then 613 if x64 and tdisp ~= "table" then
528 wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp) 614 wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
615 wvreg("modrm.reg", vsreg, psz+1, sk)
529 if imark == "I" then waction("MARK") end 616 if imark == "I" then waction("MARK") end
530 wputmodrm(0, 4, 5) 617 wputmodrm(0, 4, 5)
531 else 618 else
532 riprel = x64 619 riprel = x64
533 wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp) 620 wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
621 wvreg("modrm.reg", vsreg, psz+1, sk)
534 if imark == "I" then waction("MARK") end 622 if imark == "I" then waction("MARK") end
535 end 623 end
536 if vsreg then waction("VREG", vsreg); wputxb(2) end
537 end 624 end
538 if riprel then -- Emit rip-relative displacement. 625 if riprel then -- Emit rip-relative displacement.
539 if match("UWSiI", imark) then 626 if match("UWSiI", imark) then
@@ -561,16 +648,16 @@ local function wputmrmsib(t, imark, s, vsreg)
561 if xreg or band(reg, 7) == 4 then 648 if xreg or band(reg, 7) == 4 then
562 wputmodrm(m or 2, s, 4) -- ModRM. 649 wputmodrm(m or 2, s, 4) -- ModRM.
563 if m == nil or imark == "I" then waction("MARK") end 650 if m == nil or imark == "I" then waction("MARK") end
564 if vsreg then waction("VREG", vsreg); wputxb(2) end 651 wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg)
565 wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. 652 wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
566 if vxreg then waction("VREG", vxreg); wputxb(3) end 653 wvreg("sib.index", vxreg, psz+2, sk, vreg)
567 if vreg then waction("VREG", vreg); wputxb(1) end 654 wvreg("sib.base", vreg, psz+2, sk)
568 else 655 else
569 wputmodrm(m or 2, s, reg) -- ModRM. 656 wputmodrm(m or 2, s, reg) -- ModRM.
570 if (imark == "I" and (m == 1 or m == 2)) or 657 if (imark == "I" and (m == 1 or m == 2)) or
571 (m == nil and (vsreg or vreg)) then waction("MARK") end 658 (m == nil and (vsreg or vreg)) then waction("MARK") end
572 if vsreg then waction("VREG", vsreg); wputxb(2) end 659 wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
573 if vreg then waction("VREG", vreg); wputxb(1) end 660 wvreg("modrm.rm.m", vreg, psz+1, sk)
574 end 661 end
575 662
576 -- Put displacement. 663 -- Put displacement.
@@ -592,10 +679,16 @@ local function opmodestr(op, args)
592end 679end
593 680
594-- Convert number to valid integer or nil. 681-- Convert number to valid integer or nil.
595local function toint(expr) 682local function toint(expr, isqword)
596 local n = tonumber(expr) 683 local n = tonumber(expr)
597 if n then 684 if n then
598 if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then 685 if n % 1 ~= 0 then
686 werror("not an integer number `"..expr.."'")
687 elseif isqword then
688 if n < -2147483648 or n > 2147483647 then
689 n = nil -- Handle it as an expression to avoid precision loss.
690 end
691 elseif n < -2147483648 or n > 4294967295 then
599 werror("bad integer number `"..expr.."'") 692 werror("bad integer number `"..expr.."'")
600 end 693 end
601 return n 694 return n
@@ -678,7 +771,7 @@ local function rtexpr(expr)
678end 771end
679 772
680-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }. 773-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
681local function parseoperand(param) 774local function parseoperand(param, isqword)
682 local t = {} 775 local t = {}
683 776
684 local expr = param 777 local expr = param
@@ -766,7 +859,7 @@ local function parseoperand(param)
766 t.disp = dispexpr(tailx) 859 t.disp = dispexpr(tailx)
767 else 860 else
768 -- imm or opsize*imm 861 -- imm or opsize*imm
769 local imm = toint(expr) 862 local imm = toint(expr, isqword)
770 if not imm and sub(expr, 1, 1) == "*" and t.opsize then 863 if not imm and sub(expr, 1, 1) == "*" and t.opsize then
771 imm = toint(sub(expr, 2)) 864 imm = toint(sub(expr, 2))
772 if imm then 865 if imm then
@@ -881,9 +974,16 @@ end
881-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. 974-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
882-- The spare 3 bits are either filled with the last hex digit or 975-- The spare 3 bits are either filled with the last hex digit or
883-- the result from a previous "r"/"R". The opcode is restored. 976-- the result from a previous "r"/"R". The opcode is restored.
977-- "u" Use VEX encoding, vvvv unused.
978-- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is
979-- removed from the list used by future characters).
980-- "w" Use VEX encoding, vvvv from 3rd operand.
981-- "L" Force VEX.L
884-- 982--
885-- All of the following characters force a flush of the opcode: 983-- All of the following characters force a flush of the opcode:
886-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. 984-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand.
985-- "s" stores a 4 bit immediate from the last register operand,
986-- followed by 4 zero bits.
887-- "S" stores a signed 8 bit immediate from the last operand. 987-- "S" stores a signed 8 bit immediate from the last operand.
888-- "U" stores an unsigned 8 bit immediate from the last operand. 988-- "U" stores an unsigned 8 bit immediate from the last operand.
889-- "W" stores an unsigned 16 bit immediate from the last operand. 989-- "W" stores an unsigned 16 bit immediate from the last operand.
@@ -1226,46 +1326,14 @@ local map_op = {
1226 movups_2 = "rmo:0F10rM|mro:0F11Rm", 1326 movups_2 = "rmo:0F10rM|mro:0F11Rm",
1227 orpd_2 = "rmo:660F56rM", 1327 orpd_2 = "rmo:660F56rM",
1228 orps_2 = "rmo:0F56rM", 1328 orps_2 = "rmo:0F56rM",
1229 packssdw_2 = "rmo:660F6BrM",
1230 packsswb_2 = "rmo:660F63rM",
1231 packuswb_2 = "rmo:660F67rM",
1232 paddb_2 = "rmo:660FFCrM",
1233 paddd_2 = "rmo:660FFErM",
1234 paddq_2 = "rmo:660FD4rM",
1235 paddsb_2 = "rmo:660FECrM",
1236 paddsw_2 = "rmo:660FEDrM",
1237 paddusb_2 = "rmo:660FDCrM",
1238 paddusw_2 = "rmo:660FDDrM",
1239 paddw_2 = "rmo:660FFDrM",
1240 pand_2 = "rmo:660FDBrM",
1241 pandn_2 = "rmo:660FDFrM",
1242 pause_0 = "F390", 1329 pause_0 = "F390",
1243 pavgb_2 = "rmo:660FE0rM",
1244 pavgw_2 = "rmo:660FE3rM",
1245 pcmpeqb_2 = "rmo:660F74rM",
1246 pcmpeqd_2 = "rmo:660F76rM",
1247 pcmpeqw_2 = "rmo:660F75rM",
1248 pcmpgtb_2 = "rmo:660F64rM",
1249 pcmpgtd_2 = "rmo:660F66rM",
1250 pcmpgtw_2 = "rmo:660F65rM",
1251 pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only. 1330 pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
1252 pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", 1331 pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:",
1253 pmaddwd_2 = "rmo:660FF5rM",
1254 pmaxsw_2 = "rmo:660FEErM",
1255 pmaxub_2 = "rmo:660FDErM",
1256 pminsw_2 = "rmo:660FEArM",
1257 pminub_2 = "rmo:660FDArM",
1258 pmovmskb_2 = "rr/do:660FD7rM", 1332 pmovmskb_2 = "rr/do:660FD7rM",
1259 pmulhuw_2 = "rmo:660FE4rM",
1260 pmulhw_2 = "rmo:660FE5rM",
1261 pmullw_2 = "rmo:660FD5rM",
1262 pmuludq_2 = "rmo:660FF4rM",
1263 por_2 = "rmo:660FEBrM",
1264 prefetchnta_1 = "xb:n0F180m", 1333 prefetchnta_1 = "xb:n0F180m",
1265 prefetcht0_1 = "xb:n0F181m", 1334 prefetcht0_1 = "xb:n0F181m",
1266 prefetcht1_1 = "xb:n0F182m", 1335 prefetcht1_1 = "xb:n0F182m",
1267 prefetcht2_1 = "xb:n0F183m", 1336 prefetcht2_1 = "xb:n0F183m",
1268 psadbw_2 = "rmo:660FF6rM",
1269 pshufd_3 = "rmio:660F70rMU", 1337 pshufd_3 = "rmio:660F70rMU",
1270 pshufhw_3 = "rmio:F30F70rMU", 1338 pshufhw_3 = "rmio:F30F70rMU",
1271 pshuflw_3 = "rmio:F20F70rMU", 1339 pshuflw_3 = "rmio:F20F70rMU",
@@ -1279,23 +1347,6 @@ local map_op = {
1279 psrldq_2 = "rio:660F733mU", 1347 psrldq_2 = "rio:660F733mU",
1280 psrlq_2 = "rmo:660FD3rM|rio:660F732mU", 1348 psrlq_2 = "rmo:660FD3rM|rio:660F732mU",
1281 psrlw_2 = "rmo:660FD1rM|rio:660F712mU", 1349 psrlw_2 = "rmo:660FD1rM|rio:660F712mU",
1282 psubb_2 = "rmo:660FF8rM",
1283 psubd_2 = "rmo:660FFArM",
1284 psubq_2 = "rmo:660FFBrM",
1285 psubsb_2 = "rmo:660FE8rM",
1286 psubsw_2 = "rmo:660FE9rM",
1287 psubusb_2 = "rmo:660FD8rM",
1288 psubusw_2 = "rmo:660FD9rM",
1289 psubw_2 = "rmo:660FF9rM",
1290 punpckhbw_2 = "rmo:660F68rM",
1291 punpckhdq_2 = "rmo:660F6ArM",
1292 punpckhqdq_2 = "rmo:660F6DrM",
1293 punpckhwd_2 = "rmo:660F69rM",
1294 punpcklbw_2 = "rmo:660F60rM",
1295 punpckldq_2 = "rmo:660F62rM",
1296 punpcklqdq_2 = "rmo:660F6CrM",
1297 punpcklwd_2 = "rmo:660F61rM",
1298 pxor_2 = "rmo:660FEFrM",
1299 rcpps_2 = "rmo:0F53rM", 1350 rcpps_2 = "rmo:0F53rM",
1300 rcpss_2 = "rro:F30F53rM|rx/od:", 1351 rcpss_2 = "rro:F30F53rM|rx/od:",
1301 rsqrtps_2 = "rmo:0F52rM", 1352 rsqrtps_2 = "rmo:0F52rM",
@@ -1413,6 +1464,327 @@ local map_op = {
1413 movntsd_2 = "xr/qo:nF20F2BRm", 1464 movntsd_2 = "xr/qo:nF20F2BRm",
1414 movntss_2 = "xr/do:F30F2BRm", 1465 movntss_2 = "xr/do:F30F2BRm",
1415 -- popcnt is also in SSE4.2 1466 -- popcnt is also in SSE4.2
1467
1468 -- AES-NI
1469 aesdec_2 = "rmo:660F38DErM",
1470 aesdeclast_2 = "rmo:660F38DFrM",
1471 aesenc_2 = "rmo:660F38DCrM",
1472 aesenclast_2 = "rmo:660F38DDrM",
1473 aesimc_2 = "rmo:660F38DBrM",
1474 aeskeygenassist_3 = "rmio:660F3ADFrMU",
1475 pclmulqdq_3 = "rmio:660F3A44rMU",
1476
1477 -- AVX FP ops
1478 vaddsubpd_3 = "rrmoy:660FVD0rM",
1479 vaddsubps_3 = "rrmoy:F20FVD0rM",
1480 vandpd_3 = "rrmoy:660FV54rM",
1481 vandps_3 = "rrmoy:0FV54rM",
1482 vandnpd_3 = "rrmoy:660FV55rM",
1483 vandnps_3 = "rrmoy:0FV55rM",
1484 vblendpd_4 = "rrmioy:660F3AV0DrMU",
1485 vblendps_4 = "rrmioy:660F3AV0CrMU",
1486 vblendvpd_4 = "rrmroy:660F3AV4BrMs",
1487 vblendvps_4 = "rrmroy:660F3AV4ArMs",
1488 vbroadcastf128_2 = "rx/yo:660F38u1ArM",
1489 vcmppd_4 = "rrmioy:660FVC2rMU",
1490 vcmpps_4 = "rrmioy:0FVC2rMU",
1491 vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:",
1492 vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:",
1493 vcomisd_2 = "rro:660Fu2FrM|rx/oq:",
1494 vcomiss_2 = "rro:0Fu2FrM|rx/od:",
1495 vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:",
1496 vcvtdq2ps_2 = "rmoy:0Fu5BrM",
1497 vcvtpd2dq_2 = "rmoy:F20FuE6rM",
1498 vcvtpd2ps_2 = "rmoy:660Fu5ArM",
1499 vcvtps2dq_2 = "rmoy:660Fu5BrM",
1500 vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:",
1501 vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:",
1502 vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:",
1503 vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM",
1504 vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM",
1505 vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:",
1506 vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:",
1507 vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM",
1508 vcvttps2dq_2 = "rmoy:F30Fu5BrM",
1509 vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:",
1510 vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:",
1511 vdppd_4 = "rrmio:660F3AV41rMU",
1512 vdpps_4 = "rrmioy:660F3AV40rMU",
1513 vextractf128_3 = "mri/oy:660F3AuL19RmU",
1514 vextractps_3 = "mri/do:660F3Au17RmU",
1515 vhaddpd_3 = "rrmoy:660FV7CrM",
1516 vhaddps_3 = "rrmoy:F20FV7CrM",
1517 vhsubpd_3 = "rrmoy:660FV7DrM",
1518 vhsubps_3 = "rrmoy:F20FV7DrM",
1519 vinsertf128_4 = "rrmi/yyo:660F3AV18rMU",
1520 vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:",
1521 vldmxcsr_1 = "xd:0FuAE2m",
1522 vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm",
1523 vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm",
1524 vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm",
1525 vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm",
1526 vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:",
1527 vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm",
1528 vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:",
1529 vmovhlps_3 = "rrro:0FV12rM",
1530 vmovhpd_2 = "xr/qo:660Fu17Rm",
1531 vmovhpd_3 = "rrx/ooq:660FV16rM",
1532 vmovhps_2 = "xr/qo:0Fu17Rm",
1533 vmovhps_3 = "rrx/ooq:0FV16rM",
1534 vmovlhps_3 = "rrro:0FV16rM",
1535 vmovlpd_2 = "xr/qo:660Fu13Rm",
1536 vmovlpd_3 = "rrx/ooq:660FV12rM",
1537 vmovlps_2 = "xr/qo:0Fu13Rm",
1538 vmovlps_3 = "rrx/ooq:0FV12rM",
1539 vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM",
1540 vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM",
1541 vmovntpd_2 = "xroy:660Fu2BRm",
1542 vmovntps_2 = "xroy:0Fu2BRm",
1543 vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm",
1544 vmovsd_3 = "rrro:F20FV10rM",
1545 vmovshdup_2 = "rmoy:F30Fu16rM",
1546 vmovsldup_2 = "rmoy:F30Fu12rM",
1547 vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm",
1548 vmovss_3 = "rrro:F30FV10rM",
1549 vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm",
1550 vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm",
1551 vorpd_3 = "rrmoy:660FV56rM",
1552 vorps_3 = "rrmoy:0FV56rM",
1553 vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU",
1554 vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU",
1555 vperm2f128_4 = "rrmiy:660F3AV06rMU",
1556 vptestpd_2 = "rmoy:660F38u0FrM",
1557 vptestps_2 = "rmoy:660F38u0ErM",
1558 vrcpps_2 = "rmoy:0Fu53rM",
1559 vrcpss_3 = "rrro:F30FV53rM|rrx/ood:",
1560 vrsqrtps_2 = "rmoy:0Fu52rM",
1561 vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:",
1562 vroundpd_3 = "rmioy:660F3Au09rMU",
1563 vroundps_3 = "rmioy:660F3Au08rMU",
1564 vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:",
1565 vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:",
1566 vshufpd_4 = "rrmioy:660FVC6rMU",
1567 vshufps_4 = "rrmioy:0FVC6rMU",
1568 vsqrtps_2 = "rmoy:0Fu51rM",
1569 vsqrtss_2 = "rro:F30Fu51rM|rx/od:",
1570 vsqrtpd_2 = "rmoy:660Fu51rM",
1571 vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:",
1572 vstmxcsr_1 = "xd:0FuAE3m",
1573 vucomisd_2 = "rro:660Fu2ErM|rx/oq:",
1574 vucomiss_2 = "rro:0Fu2ErM|rx/od:",
1575 vunpckhpd_3 = "rrmoy:660FV15rM",
1576 vunpckhps_3 = "rrmoy:0FV15rM",
1577 vunpcklpd_3 = "rrmoy:660FV14rM",
1578 vunpcklps_3 = "rrmoy:0FV14rM",
1579 vxorpd_3 = "rrmoy:660FV57rM",
1580 vxorps_3 = "rrmoy:0FV57rM",
1581 vzeroall_0 = "0FuL77",
1582 vzeroupper_0 = "0Fu77",
1583
1584 -- AVX2 FP ops
1585 vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:",
1586 vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:",
1587 -- *vgather* (!vsib)
1588 vpermpd_3 = "rmiy:660F3AuX01rMU",
1589 vpermps_3 = "rrmy:660F38V16rM",
1590
1591 -- AVX, AVX2 integer ops
1592 -- In general, xmm requires AVX, ymm requires AVX2.
1593 vaesdec_3 = "rrmo:660F38VDErM",
1594 vaesdeclast_3 = "rrmo:660F38VDFrM",
1595 vaesenc_3 = "rrmo:660F38VDCrM",
1596 vaesenclast_3 = "rrmo:660F38VDDrM",
1597 vaesimc_2 = "rmo:660F38uDBrM",
1598 vaeskeygenassist_3 = "rmio:660F3AuDFrMU",
1599 vlddqu_2 = "rxoy:F20FuF0rM",
1600 vmaskmovdqu_2 = "rro:660FuF7rM",
1601 vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm",
1602 vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm",
1603 vmovntdq_2 = "xroy:660FuE7Rm",
1604 vmovntdqa_2 = "rxoy:660F38u2ArM",
1605 vmpsadbw_4 = "rrmioy:660F3AV42rMU",
1606 vpabsb_2 = "rmoy:660F38u1CrM",
1607 vpabsd_2 = "rmoy:660F38u1ErM",
1608 vpabsw_2 = "rmoy:660F38u1DrM",
1609 vpackusdw_3 = "rrmoy:660F38V2BrM",
1610 vpalignr_4 = "rrmioy:660F3AV0FrMU",
1611 vpblendvb_4 = "rrmroy:660F3AV4CrMs",
1612 vpblendw_4 = "rrmioy:660F3AV0ErMU",
1613 vpclmulqdq_4 = "rrmio:660F3AV44rMU",
1614 vpcmpeqq_3 = "rrmoy:660F38V29rM",
1615 vpcmpestri_3 = "rmio:660F3Au61rMU",
1616 vpcmpestrm_3 = "rmio:660F3Au60rMU",
1617 vpcmpgtq_3 = "rrmoy:660F38V37rM",
1618 vpcmpistri_3 = "rmio:660F3Au63rMU",
1619 vpcmpistrm_3 = "rmio:660F3Au62rMU",
1620 vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:",
1621 vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU",
1622 vpextrd_3 = "mri/do:660F3Au16RmU",
1623 vpextrq_3 = "mri/qo:660F3Au16RmU",
1624 vphaddw_3 = "rrmoy:660F38V01rM",
1625 vphaddd_3 = "rrmoy:660F38V02rM",
1626 vphaddsw_3 = "rrmoy:660F38V03rM",
1627 vphminposuw_2 = "rmo:660F38u41rM",
1628 vphsubw_3 = "rrmoy:660F38V05rM",
1629 vphsubd_3 = "rrmoy:660F38V06rM",
1630 vphsubsw_3 = "rrmoy:660F38V07rM",
1631 vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:",
1632 vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:",
1633 vpinsrd_4 = "rrmi/ood:660F3AV22rMU",
1634 vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU",
1635 vpmaddubsw_3 = "rrmoy:660F38V04rM",
1636 vpmaxsb_3 = "rrmoy:660F38V3CrM",
1637 vpmaxsd_3 = "rrmoy:660F38V3DrM",
1638 vpmaxuw_3 = "rrmoy:660F38V3ErM",
1639 vpmaxud_3 = "rrmoy:660F38V3FrM",
1640 vpminsb_3 = "rrmoy:660F38V38rM",
1641 vpminsd_3 = "rrmoy:660F38V39rM",
1642 vpminuw_3 = "rrmoy:660F38V3ArM",
1643 vpminud_3 = "rrmoy:660F38V3BrM",
1644 vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM",
1645 vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:",
1646 vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:",
1647 vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:",
1648 vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:",
1649 vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:",
1650 vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:",
1651 vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:",
1652 vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:",
1653 vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:",
1654 vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:",
1655 vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:",
1656 vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:",
1657 vpmuldq_3 = "rrmoy:660F38V28rM",
1658 vpmulhrsw_3 = "rrmoy:660F38V0BrM",
1659 vpmulld_3 = "rrmoy:660F38V40rM",
1660 vpshufb_3 = "rrmoy:660F38V00rM",
1661 vpshufd_3 = "rmioy:660Fu70rMU",
1662 vpshufhw_3 = "rmioy:F30Fu70rMU",
1663 vpshuflw_3 = "rmioy:F20Fu70rMU",
1664 vpsignb_3 = "rrmoy:660F38V08rM",
1665 vpsignw_3 = "rrmoy:660F38V09rM",
1666 vpsignd_3 = "rrmoy:660F38V0ArM",
1667 vpslldq_3 = "rrioy:660Fv737mU",
1668 vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU",
1669 vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU",
1670 vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU",
1671 vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU",
1672 vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU",
1673 vpsrldq_3 = "rrioy:660Fv733mU",
1674 vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU",
1675 vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU",
1676 vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU",
1677 vptest_2 = "rmoy:660F38u17rM",
1678
1679 -- AVX2 integer ops
1680 vbroadcasti128_2 = "rx/yo:660F38u5ArM",
1681 vinserti128_4 = "rrmi/yyo:660F3AV38rMU",
1682 vextracti128_3 = "mri/oy:660F3AuL39RmU",
1683 vpblendd_4 = "rrmioy:660F3AV02rMU",
1684 vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:",
1685 vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:",
1686 vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:",
1687 vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:",
1688 vpermd_3 = "rrmy:660F38V36rM",
1689 vpermq_3 = "rmiy:660F3AuX00rMU",
1690 -- *vpgather* (!vsib)
1691 vperm2i128_4 = "rrmiy:660F3AV46rMU",
1692 vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm",
1693 vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm",
1694 vpsllvd_3 = "rrmoy:660F38V47rM",
1695 vpsllvq_3 = "rrmoy:660F38VX47rM",
1696 vpsravd_3 = "rrmoy:660F38V46rM",
1697 vpsrlvd_3 = "rrmoy:660F38V45rM",
1698 vpsrlvq_3 = "rrmoy:660F38VX45rM",
1699
1700 -- Intel ADX
1701 adcx_2 = "rmqd:660F38F6rM",
1702 adox_2 = "rmqd:F30F38F6rM",
1703
1704 -- BMI1
1705 andn_3 = "rrmqd:0F38VF2rM",
1706 bextr_3 = "rmrqd:0F38wF7rM",
1707 blsi_2 = "rmqd:0F38vF33m",
1708 blsmsk_2 = "rmqd:0F38vF32m",
1709 blsr_2 = "rmqd:0F38vF31m",
1710 tzcnt_2 = "rmqdw:F30FBCrM",
1711
1712 -- BMI2
1713 bzhi_3 = "rmrqd:0F38wF5rM",
1714 mulx_3 = "rrmqd:F20F38VF6rM",
1715 pdep_3 = "rrmqd:F20F38VF5rM",
1716 pext_3 = "rrmqd:F30F38VF5rM",
1717 rorx_3 = "rmSqd:F20F3AuF0rMS",
1718 sarx_3 = "rmrqd:F30F38wF7rM",
1719 shrx_3 = "rmrqd:F20F38wF7rM",
1720 shlx_3 = "rmrqd:660F38wF7rM",
1721
1722 -- FMA3
1723 vfmaddsub132pd_3 = "rrmoy:660F38VX96rM",
1724 vfmaddsub132ps_3 = "rrmoy:660F38V96rM",
1725 vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM",
1726 vfmaddsub213ps_3 = "rrmoy:660F38VA6rM",
1727 vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM",
1728 vfmaddsub231ps_3 = "rrmoy:660F38VB6rM",
1729
1730 vfmsubadd132pd_3 = "rrmoy:660F38VX97rM",
1731 vfmsubadd132ps_3 = "rrmoy:660F38V97rM",
1732 vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM",
1733 vfmsubadd213ps_3 = "rrmoy:660F38VA7rM",
1734 vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM",
1735 vfmsubadd231ps_3 = "rrmoy:660F38VB7rM",
1736
1737 vfmadd132pd_3 = "rrmoy:660F38VX98rM",
1738 vfmadd132ps_3 = "rrmoy:660F38V98rM",
1739 vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:",
1740 vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:",
1741 vfmadd213pd_3 = "rrmoy:660F38VXA8rM",
1742 vfmadd213ps_3 = "rrmoy:660F38VA8rM",
1743 vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:",
1744 vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:",
1745 vfmadd231pd_3 = "rrmoy:660F38VXB8rM",
1746 vfmadd231ps_3 = "rrmoy:660F38VB8rM",
1747 vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:",
1748 vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:",
1749
1750 vfmsub132pd_3 = "rrmoy:660F38VX9ArM",
1751 vfmsub132ps_3 = "rrmoy:660F38V9ArM",
1752 vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:",
1753 vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:",
1754 vfmsub213pd_3 = "rrmoy:660F38VXAArM",
1755 vfmsub213ps_3 = "rrmoy:660F38VAArM",
1756 vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:",
1757 vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:",
1758 vfmsub231pd_3 = "rrmoy:660F38VXBArM",
1759 vfmsub231ps_3 = "rrmoy:660F38VBArM",
1760 vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:",
1761 vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:",
1762
1763 vfnmadd132pd_3 = "rrmoy:660F38VX9CrM",
1764 vfnmadd132ps_3 = "rrmoy:660F38V9CrM",
1765 vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:",
1766 vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:",
1767 vfnmadd213pd_3 = "rrmoy:660F38VXACrM",
1768 vfnmadd213ps_3 = "rrmoy:660F38VACrM",
1769 vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:",
1770 vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:",
1771 vfnmadd231pd_3 = "rrmoy:660F38VXBCrM",
1772 vfnmadd231ps_3 = "rrmoy:660F38VBCrM",
1773 vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:",
1774 vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:",
1775
1776 vfnmsub132pd_3 = "rrmoy:660F38VX9ErM",
1777 vfnmsub132ps_3 = "rrmoy:660F38V9ErM",
1778 vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:",
1779 vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:",
1780 vfnmsub213pd_3 = "rrmoy:660F38VXAErM",
1781 vfnmsub213ps_3 = "rrmoy:660F38VAErM",
1782 vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:",
1783 vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:",
1784 vfnmsub231pd_3 = "rrmoy:660F38VXBErM",
1785 vfnmsub231ps_3 = "rrmoy:660F38VBErM",
1786 vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:",
1787 vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:",
1416} 1788}
1417 1789
1418------------------------------------------------------------------------------ 1790------------------------------------------------------------------------------
@@ -1463,28 +1835,58 @@ for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
1463 map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ 1835 map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
1464end 1836end
1465 1837
1466-- SSE FP arithmetic ops. 1838-- SSE / AVX FP arithmetic ops.
1467for name,n in pairs{ sqrt = 1, add = 8, mul = 9, 1839for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
1468 sub = 12, min = 13, div = 14, max = 15 } do 1840 sub = 12, min = 13, div = 14, max = 15 } do
1469 map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) 1841 map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
1470 map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) 1842 map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
1471 map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) 1843 map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
1472 map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) 1844 map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
1845 if n ~= 1 then
1846 map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n)
1847 map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n)
1848 map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n)
1849 map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n)
1850 end
1851end
1852
1853-- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf).
1854for name,n in pairs{
1855 paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4,
1856 paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B,
1857 packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC,
1858 paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0,
1859 pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76,
1860 pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66,
1861 pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE,
1862 pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA,
1863 pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5,
1864 pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8,
1865 psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8,
1866 psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9,
1867 punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A,
1868 punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61,
1869 punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF
1870} do
1871 map_op[name.."_2"] = format("rmo:660F%02XrM", n)
1872 map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n)
1473end 1873end
1474 1874
1475------------------------------------------------------------------------------ 1875------------------------------------------------------------------------------
1476 1876
1877local map_vexarg = { u = false, v = 1, V = 2, w = 3 }
1878
1477-- Process pattern string. 1879-- Process pattern string.
1478local function dopattern(pat, args, sz, op, needrex) 1880local function dopattern(pat, args, sz, op, needrex)
1479 local digit, addin 1881 local digit, addin, vex
1480 local opcode = 0 1882 local opcode = 0
1481 local szov = sz 1883 local szov = sz
1482 local narg = 1 1884 local narg = 1
1483 local rex = 0 1885 local rex = 0
1484 1886
1485 -- Limit number of section buffer positions used by a single dasm_put(). 1887 -- Limit number of section buffer positions used by a single dasm_put().
1486 -- A single opcode needs a maximum of 5 positions. 1888 -- A single opcode needs a maximum of 6 positions.
1487 if secpos+5 > maxsecpos then wflush() end 1889 if secpos+6 > maxsecpos then wflush() end
1488 1890
1489 -- Process each character. 1891 -- Process each character.
1490 for c in gmatch(pat.."|", ".") do 1892 for c in gmatch(pat.."|", ".") do
@@ -1498,6 +1900,8 @@ local function dopattern(pat, args, sz, op, needrex)
1498 szov = nil 1900 szov = nil
1499 elseif c == "X" then -- Force REX.W. 1901 elseif c == "X" then -- Force REX.W.
1500 rex = 8 1902 rex = 8
1903 elseif c == "L" then -- Force VEX.L.
1904 vex.l = true
1501 elseif c == "r" then -- Merge 1st operand regno. into opcode. 1905 elseif c == "r" then -- Merge 1st operand regno. into opcode.
1502 addin = args[1]; opcode = opcode + (addin.reg % 8) 1906 addin = args[1]; opcode = opcode + (addin.reg % 8)
1503 if narg < 2 then narg = 2 end 1907 if narg < 2 then narg = 2 end
@@ -1521,21 +1925,42 @@ local function dopattern(pat, args, sz, op, needrex)
1521 if t.xreg and t.xreg > 7 then rex = rex + 2 end 1925 if t.xreg and t.xreg > 7 then rex = rex + 2 end
1522 if s > 7 then rex = rex + 4 end 1926 if s > 7 then rex = rex + 4 end
1523 if needrex then rex = rex + 16 end 1927 if needrex then rex = rex + 16 end
1524 wputop(szov, opcode, rex); opcode = nil 1928 local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg)
1929 opcode = nil
1525 local imark = sub(pat, -1) -- Force a mark (ugly). 1930 local imark = sub(pat, -1) -- Force a mark (ugly).
1526 -- Put ModRM/SIB with regno/last digit as spare. 1931 -- Put ModRM/SIB with regno/last digit as spare.
1527 wputmrmsib(t, imark, s, addin and addin.vreg) 1932 wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk)
1528 addin = nil 1933 addin = nil
1934 elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
1935 local b = band(opcode, 255); opcode = shr(opcode, 8)
1936 local m = 1
1937 if b == 0x38 then m = 2
1938 elseif b == 0x3a then m = 3 end
1939 if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end
1940 if b ~= 0x0f then
1941 werror("expected `0F', `0F38', or `0F3A' to precede `"..c..
1942 "' in pattern `"..pat.."' for `"..op.."'")
1943 end
1944 local v = map_vexarg[c]
1945 if v then v = remove(args, v) end
1946 b = band(opcode, 255)
1947 local p = 0
1948 if b == 0x66 then p = 1
1949 elseif b == 0xf3 then p = 2
1950 elseif b == 0xf2 then p = 3 end
1951 if p ~= 0 then opcode = shr(opcode, 8) end
1952 if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end
1953 vex = { m = m, p = p, v = v }
1529 else 1954 else
1530 if opcode then -- Flush opcode. 1955 if opcode then -- Flush opcode.
1531 if szov == "q" and rex == 0 then rex = rex + 8 end 1956 if szov == "q" and rex == 0 then rex = rex + 8 end
1532 if needrex then rex = rex + 16 end 1957 if needrex then rex = rex + 16 end
1533 if addin and addin.reg == -1 then 1958 if addin and addin.reg == -1 then
1534 wputop(szov, opcode - 7, rex) 1959 local psz, sk = wputop(szov, opcode - 7, rex, vex, true)
1535 waction("VREG", addin.vreg); wputxb(0) 1960 wvreg("opcode", addin.vreg, psz, sk)
1536 else 1961 else
1537 if addin and addin.reg > 7 then rex = rex + 1 end 1962 if addin and addin.reg > 7 then rex = rex + 1 end
1538 wputop(szov, opcode, rex) 1963 wputop(szov, opcode, rex, vex)
1539 end 1964 end
1540 opcode = nil 1965 opcode = nil
1541 end 1966 end
@@ -1549,7 +1974,7 @@ local function dopattern(pat, args, sz, op, needrex)
1549 local a = args[narg] 1974 local a = args[narg]
1550 narg = narg + 1 1975 narg = narg + 1
1551 local mode, imm = a.mode, a.imm 1976 local mode, imm = a.mode, a.imm
1552 if mode == "iJ" and not match("iIJ", c) then 1977 if mode == "iJ" and not match(x64 and "J" or "iIJ", c) then
1553 werror("bad operand size for label") 1978 werror("bad operand size for label")
1554 end 1979 end
1555 if c == "S" then 1980 if c == "S" then
@@ -1572,6 +1997,14 @@ local function dopattern(pat, args, sz, op, needrex)
1572 else 1997 else
1573 wputlabel("REL_", imm, 2) 1998 wputlabel("REL_", imm, 2)
1574 end 1999 end
2000 elseif c == "s" then
2001 local reg = a.reg
2002 if reg < 0 then
2003 wputb(0)
2004 wvreg("imm.hi", a.vreg)
2005 else
2006 wputb(shl(reg, 4))
2007 end
1575 else 2008 else
1576 werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") 2009 werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
1577 end 2010 end
@@ -1648,11 +2081,14 @@ map_op[".template__"] = function(params, template, nparams)
1648 if pat == "" then pat = lastpat else lastpat = pat end 2081 if pat == "" then pat = lastpat else lastpat = pat end
1649 if matchtm(tm, args) then 2082 if matchtm(tm, args) then
1650 local prefix = sub(szm, 1, 1) 2083 local prefix = sub(szm, 1, 1)
1651 if prefix == "/" then -- Match both operand sizes. 2084 if prefix == "/" then -- Exactly match leading operand sizes.
1652 if args[1].opsize == sub(szm, 2, 2) and 2085 for i = #szm,1,-1 do
1653 args[2].opsize == sub(szm, 3, 3) then 2086 if i == 1 then
1654 dopattern(pat, args, sz, params.op, needrex) -- Process pattern. 2087 dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
1655 return 2088 return
2089 elseif args[i-1].opsize ~= sub(szm, i, i) then
2090 break
2091 end
1656 end 2092 end
1657 else -- Match common operand size. 2093 else -- Match common operand size.
1658 local szp = sz 2094 local szp = sz
@@ -1717,8 +2153,8 @@ if x64 then
1717 rex = a.reg > 7 and 9 or 8 2153 rex = a.reg > 7 and 9 or 8
1718 end 2154 end
1719 end 2155 end
1720 wputop(sz, opcode, rex) 2156 local psz, sk = wputop(sz, opcode, rex, nil, vreg)
1721 if vreg then waction("VREG", vreg); wputxb(0) end 2157 wvreg("opcode", vreg, psz, sk)
1722 waction("IMM_D", format("(unsigned int)(%s)", op64)) 2158 waction("IMM_D", format("(unsigned int)(%s)", op64))
1723 waction("IMM_D", format("(unsigned int)((%s)>>32)", op64)) 2159 waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
1724 end 2160 end
@@ -1730,14 +2166,16 @@ end
1730local function op_data(params) 2166local function op_data(params)
1731 if not params then return "imm..." end 2167 if not params then return "imm..." end
1732 local sz = sub(params.op, 2, 2) 2168 local sz = sub(params.op, 2, 2)
1733 if sz == "a" then sz = addrsize end 2169 if sz == "l" then sz = "d" elseif sz == "a" then sz = addrsize end
1734 for _,p in ipairs(params) do 2170 for _,p in ipairs(params) do
1735 local a = parseoperand(p) 2171 local a = parseoperand(p, sz == "q")
1736 if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then 2172 if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then
1737 werror("bad mode or size in `"..p.."'") 2173 werror("bad mode or size in `"..p.."'")
1738 end 2174 end
1739 if a.mode == "iJ" then 2175 if a.mode == "iJ" then
1740 wputlabel("IMM_", a.imm, 1) 2176 wputlabel("IMM_", a.imm, 1)
2177 elseif sz == "q" then
2178 wputqarg(a.imm)
1741 else 2179 else
1742 wputszarg(sz, a.imm) 2180 wputszarg(sz, a.imm)
1743 end 2181 end
@@ -1749,7 +2187,11 @@ map_op[".byte_*"] = op_data
1749map_op[".sbyte_*"] = op_data 2187map_op[".sbyte_*"] = op_data
1750map_op[".word_*"] = op_data 2188map_op[".word_*"] = op_data
1751map_op[".dword_*"] = op_data 2189map_op[".dword_*"] = op_data
2190map_op[".qword_*"] = op_data
1752map_op[".aword_*"] = op_data 2191map_op[".aword_*"] = op_data
2192map_op[".long_*"] = op_data
2193map_op[".quad_*"] = op_data
2194map_op[".addr_*"] = op_data
1753 2195
1754------------------------------------------------------------------------------ 2196------------------------------------------------------------------------------
1755 2197
diff --git a/dynasm/dynasm.lua b/dynasm/dynasm.lua
index 6f85bb06..f4e71eca 100644
--- a/dynasm/dynasm.lua
+++ b/dynasm/dynasm.lua
@@ -10,9 +10,9 @@
10local _info = { 10local _info = {
11 name = "DynASM", 11 name = "DynASM",
12 description = "A dynamic assembler for code generation engines", 12 description = "A dynamic assembler for code generation engines",
13 version = "1.3.0", 13 version = "1.5.0",
14 vernum = 10300, 14 vernum = 10500,
15 release = "2011-05-05", 15 release = "2021-05-02",
16 author = "Mike Pall", 16 author = "Mike Pall",
17 url = "https://luajit.org/dynasm.html", 17 url = "https://luajit.org/dynasm.html",
18 license = "MIT", 18 license = "MIT",
@@ -630,6 +630,7 @@ end
630-- Load architecture-specific module. 630-- Load architecture-specific module.
631local function loadarch(arch) 631local function loadarch(arch)
632 if not match(arch, "^[%w_]+$") then return "bad arch name" end 632 if not match(arch, "^[%w_]+$") then return "bad arch name" end
633 _G._map_def = map_def
633 local ok, m_arch = pcall(require, "dasm_"..arch) 634 local ok, m_arch = pcall(require, "dasm_"..arch)
634 if not ok then return "cannot load module: "..m_arch end 635 if not ok then return "cannot load module: "..m_arch end
635 g_arch = m_arch 636 g_arch = m_arch
diff --git a/etc/luajit.pc b/etc/luajit.pc
index 9bac3a8b..39e1e577 100644
--- a/etc/luajit.pc
+++ b/etc/luajit.pc
@@ -1,8 +1,8 @@
1# Package information for LuaJIT to be used by pkg-config. 1# Package information for LuaJIT to be used by pkg-config.
2majver=2 2majver=2
3minver=0 3minver=1
4relver=5 4relver=0
5version=${majver}.${minver}.${relver} 5version=${majver}.${minver}.${relver}-beta3
6abiver=5.1 6abiver=5.1
7 7
8prefix=/usr/local 8prefix=/usr/local
diff --git a/src/.gitignore b/src/.gitignore
index fc94e82c..1a30573c 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -4,4 +4,4 @@ lj_ffdef.h
4lj_libdef.h 4lj_libdef.h
5lj_recdef.h 5lj_recdef.h
6lj_folddef.h 6lj_folddef.h
7lj_vm.s 7lj_vm.[sS]
diff --git a/src/Makefile b/src/Makefile
index 384614f1..2538503f 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -11,8 +11,8 @@
11############################################################################## 11##############################################################################
12 12
13MAJVER= 2 13MAJVER= 2
14MINVER= 0 14MINVER= 1
15RELVER= 5 15RELVER= 0
16ABIVER= 5.1 16ABIVER= 5.1
17NODOTABIVER= 51 17NODOTABIVER= 51
18 18
@@ -44,17 +44,14 @@ CCOPT= -O2 -fomit-frame-pointer
44# 44#
45# Target-specific compiler options: 45# Target-specific compiler options:
46# 46#
47# x86 only: it's recommended to compile at least for i686. Better yet,
48# compile for an architecture that has SSE2, too (-msse -msse2).
49#
50# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute 47# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute
51# the binaries to a different machine you could also use: -march=native 48# the binaries to a different machine you could also use: -march=native
52# 49#
53CCOPT_x86= -march=i686 50CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
54CCOPT_x64= 51CCOPT_x64=
55CCOPT_arm= 52CCOPT_arm=
53CCOPT_arm64=
56CCOPT_ppc= 54CCOPT_ppc=
57CCOPT_ppcspe=
58CCOPT_mips= 55CCOPT_mips=
59# 56#
60CCDEBUG= 57CCDEBUG=
@@ -113,6 +110,9 @@ XCFLAGS=
113#XCFLAGS+= -DLUAJIT_NUMMODE=1 110#XCFLAGS+= -DLUAJIT_NUMMODE=1
114#XCFLAGS+= -DLUAJIT_NUMMODE=2 111#XCFLAGS+= -DLUAJIT_NUMMODE=2
115# 112#
113# Disable LJ_GC64 mode for x64.
114#XCFLAGS+= -DLUAJIT_DISABLE_GC64
115#
116############################################################################## 116##############################################################################
117 117
118############################################################################## 118##############################################################################
@@ -124,15 +124,14 @@ XCFLAGS=
124# 124#
125# Use the system provided memory allocator (realloc) instead of the 125# Use the system provided memory allocator (realloc) instead of the
126# bundled memory allocator. This is slower, but sometimes helpful for 126# bundled memory allocator. This is slower, but sometimes helpful for
127# debugging. This option cannot be enabled on x64, since realloc usually 127# debugging. This option cannot be enabled on x64 without GC64, since
128# doesn't return addresses in the right address range. 128# realloc usually doesn't return addresses in the right address range.
129# OTOH this option is mandatory for Valgrind's memcheck tool on x64 and 129# OTOH this option is mandatory for Valgrind's memcheck tool on x64 and
130# the only way to get useful results from it for all other architectures. 130# the only way to get useful results from it for all other architectures.
131#XCFLAGS+= -DLUAJIT_USE_SYSMALLOC 131#XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
132# 132#
133# This define is required to run LuaJIT under Valgrind. The Valgrind 133# This define is required to run LuaJIT under Valgrind. The Valgrind
134# header files must be installed. You should enable debug information, too. 134# header files must be installed. You should enable debug information, too.
135# Use --suppressions=lj.supp to avoid some false positives.
136#XCFLAGS+= -DLUAJIT_USE_VALGRIND 135#XCFLAGS+= -DLUAJIT_USE_VALGRIND
137# 136#
138# This is the client for the GDB JIT API. GDB 7.0 or higher is required 137# This is the client for the GDB JIT API. GDB 7.0 or higher is required
@@ -189,7 +188,8 @@ endif
189# make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows 188# make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows
190# make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- 189# make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
191 190
192CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS) 191ASOPTIONS= $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
192CCOPTIONS= $(CCDEBUG) $(ASOPTIONS)
193LDOPTIONS= $(CCDEBUG) $(LDFLAGS) 193LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
194 194
195HOST_CC= $(CC) 195HOST_CC= $(CC)
@@ -229,6 +229,7 @@ TARGET_XLDFLAGS=
229TARGET_XLIBS= -lm 229TARGET_XLIBS= -lm
230TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) 230TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
231TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) 231TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
232TARGET_ASFLAGS= $(ASOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
232TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS) 233TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS)
233TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS) 234TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
234TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS) 235TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
@@ -243,17 +244,29 @@ else
243ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) 244ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
244 TARGET_LJARCH= arm 245 TARGET_LJARCH= arm
245else 246else
247ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
248 ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
249 TARGET_ARCH= -D__AARCH64EB__=1
250 endif
251 TARGET_LJARCH= arm64
252else
246ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH))) 253ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH)))
254 ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH)))
255 TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE
256 else
257 TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_BE
258 endif
247 TARGET_LJARCH= ppc 259 TARGET_LJARCH= ppc
248else 260else
249ifneq (,$(findstring LJ_TARGET_PPCSPE ,$(TARGET_TESTARCH)))
250 TARGET_LJARCH= ppcspe
251else
252ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) 261ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
253 ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH))) 262 ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH)))
254 TARGET_ARCH= -D__MIPSEL__=1 263 TARGET_ARCH= -D__MIPSEL__=1
255 endif 264 endif
256 TARGET_LJARCH= mips 265 ifneq (,$(findstring LJ_TARGET_MIPS64 ,$(TARGET_TESTARCH)))
266 TARGET_LJARCH= mips64
267 else
268 TARGET_LJARCH= mips
269 endif
257else 270else
258 $(error Unsupported target architecture) 271 $(error Unsupported target architecture)
259endif 272endif
@@ -267,6 +280,7 @@ ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
267 TARGET_SYS= PS3 280 TARGET_SYS= PS3
268 TARGET_ARCH+= -D__CELLOS_LV2__ 281 TARGET_ARCH+= -D__CELLOS_LV2__
269 TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC 282 TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
283 TARGET_XLIBS+= -lpthread
270endif 284endif
271 285
272TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH)) 286TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH))
@@ -306,20 +320,27 @@ ifeq (Darwin,$(TARGET_SYS))
306 $(error missing: export MACOSX_DEPLOYMENT_TARGET=XX.YY) 320 $(error missing: export MACOSX_DEPLOYMENT_TARGET=XX.YY)
307 endif 321 endif
308 TARGET_STRIP+= -x 322 TARGET_STRIP+= -x
323 TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
309 TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC 324 TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
310 TARGET_DYNXLDOPTS= 325 TARGET_DYNXLDOPTS=
311 TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) 326 TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
312 ifeq (x64,$(TARGET_LJARCH))
313 TARGET_XLDFLAGS+= -pagezero_size 10000 -image_base 100000000
314 TARGET_XSHLDFLAGS+= -image_base 7fff04c4a000
315 endif
316else 327else
317ifeq (iOS,$(TARGET_SYS)) 328ifeq (iOS,$(TARGET_SYS))
318 TARGET_STRIP+= -x 329 TARGET_STRIP+= -x
319 TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC 330 TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
320 TARGET_DYNXLDOPTS= 331 TARGET_DYNXLDOPTS=
321 TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) 332 TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
333 ifeq (arm64,$(TARGET_LJARCH))
334 TARGET_XCFLAGS+= -fno-omit-frame-pointer
335 endif
322else 336else
337 ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
338 # Find out whether the target toolchain always generates unwind tables.
339 TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && { grep -qa -e eh_frame -e __unwind_info tmpunwind.o || grep -qU -e eh_frame -e __unwind_info tmpunwind.o; } && echo E; rm -f tmpunwind.o)
340 ifneq (,$(findstring E,$(TARGET_TESTUNWIND)))
341 TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
342 endif
343 endif
323 ifneq (SunOS,$(TARGET_SYS)) 344 ifneq (SunOS,$(TARGET_SYS))
324 ifneq (PS3,$(TARGET_SYS)) 345 ifneq (PS3,$(TARGET_SYS))
325 TARGET_XLDFLAGS+= -Wl,-E 346 TARGET_XLDFLAGS+= -Wl,-E
@@ -346,7 +367,7 @@ ifneq ($(HOST_SYS),$(TARGET_SYS))
346 HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX 367 HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX
347 else 368 else
348 ifeq (iOS,$(TARGET_SYS)) 369 ifeq (iOS,$(TARGET_SYS))
349 HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX 370 HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX -DTARGET_OS_IPHONE=1
350 else 371 else
351 HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OTHER 372 HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OTHER
352 endif 373 endif
@@ -379,6 +400,11 @@ DASM_XFLAGS=
379DASM_AFLAGS= 400DASM_AFLAGS=
380DASM_ARCH= $(TARGET_LJARCH) 401DASM_ARCH= $(TARGET_LJARCH)
381 402
403ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH)))
404 DASM_AFLAGS+= -D ENDIAN_LE
405else
406 DASM_AFLAGS+= -D ENDIAN_BE
407endif
382ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH))) 408ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH)))
383 DASM_AFLAGS+= -D P64 409 DASM_AFLAGS+= -D P64
384endif 410endif
@@ -411,19 +437,19 @@ DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subs
411ifeq (Windows,$(TARGET_SYS)) 437ifeq (Windows,$(TARGET_SYS))
412 DASM_AFLAGS+= -D WIN 438 DASM_AFLAGS+= -D WIN
413endif 439endif
414ifeq (x86,$(TARGET_LJARCH))
415 ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH)))
416 DASM_AFLAGS+= -D SSE
417 endif
418else
419ifeq (x64,$(TARGET_LJARCH)) 440ifeq (x64,$(TARGET_LJARCH))
420 DASM_ARCH= x86 441 ifeq (,$(findstring LJ_FR2 1,$(TARGET_TESTARCH)))
442 DASM_ARCH= x86
443 endif
421else 444else
422ifeq (arm,$(TARGET_LJARCH)) 445ifeq (arm,$(TARGET_LJARCH))
423 ifeq (iOS,$(TARGET_SYS)) 446 ifeq (iOS,$(TARGET_SYS))
424 DASM_AFLAGS+= -D IOS 447 DASM_AFLAGS+= -D IOS
425 endif 448 endif
426else 449else
450ifneq (,$(findstring LJ_TARGET_MIPSR6 ,$(TARGET_TESTARCH)))
451 DASM_AFLAGS+= -D MIPSR6
452endif
427ifeq (ppc,$(TARGET_LJARCH)) 453ifeq (ppc,$(TARGET_LJARCH))
428 ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH))) 454 ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH)))
429 DASM_AFLAGS+= -D SQRT 455 DASM_AFLAGS+= -D SQRT
@@ -431,7 +457,7 @@ ifeq (ppc,$(TARGET_LJARCH))
431 ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH))) 457 ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH)))
432 DASM_AFLAGS+= -D ROUND 458 DASM_AFLAGS+= -D ROUND
433 endif 459 endif
434 ifneq (,$(findstring LJ_ARCH_PPC64 1,$(TARGET_TESTARCH))) 460 ifneq (,$(findstring LJ_ARCH_PPC32ON64 1,$(TARGET_TESTARCH)))
435 DASM_AFLAGS+= -D GPR64 461 DASM_AFLAGS+= -D GPR64
436 endif 462 endif
437 ifeq (PS3,$(TARGET_SYS)) 463 ifeq (PS3,$(TARGET_SYS))
@@ -440,7 +466,6 @@ ifeq (ppc,$(TARGET_LJARCH))
440endif 466endif
441endif 467endif
442endif 468endif
443endif
444 469
445DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) 470DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
446DASM_DASC= vm_$(DASM_ARCH).dasc 471DASM_DASC= vm_$(DASM_ARCH).dasc
@@ -453,19 +478,22 @@ BUILDVM_X= $(BUILDVM_T)
453HOST_O= $(MINILUA_O) $(BUILDVM_O) 478HOST_O= $(MINILUA_O) $(BUILDVM_O)
454HOST_T= $(MINILUA_T) $(BUILDVM_T) 479HOST_T= $(MINILUA_T) $(BUILDVM_T)
455 480
456LJVM_S= lj_vm.s 481LJVM_S= lj_vm.S
457LJVM_O= lj_vm.o 482LJVM_O= lj_vm.o
458LJVM_BOUT= $(LJVM_S) 483LJVM_BOUT= $(LJVM_S)
459LJVM_MODE= elfasm 484LJVM_MODE= elfasm
460 485
461LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \ 486LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
462 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o 487 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o \
488 lib_buffer.o
463LJLIB_C= $(LJLIB_O:.o=.c) 489LJLIB_C= $(LJLIB_O:.o=.c)
464 490
465LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \ 491LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
466 lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ 492 lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
467 lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ 493 lj_prng.o lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o \
468 lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ 494 lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_serialize.o \
495 lj_api.o lj_profile.o \
496 lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
469 lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ 497 lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
470 lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ 498 lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
471 lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ 499 lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
@@ -580,12 +608,15 @@ E= @echo
580default all: $(TARGET_T) 608default all: $(TARGET_T)
581 609
582amalg: 610amalg:
583 @grep "^[+|]" ljamalg.c
584 $(MAKE) all "LJCORE_O=ljamalg.o" 611 $(MAKE) all "LJCORE_O=ljamalg.o"
585 612
586clean: 613clean:
587 $(HOST_RM) $(ALL_RM) 614 $(HOST_RM) $(ALL_RM)
588 615
616libbc:
617 ./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C)
618 $(MAKE) all
619
589depend: 620depend:
590 @for file in $(ALL_HDRGEN); do \ 621 @for file in $(ALL_HDRGEN); do \
591 test -f $$file || touch $$file; \ 622 test -f $$file || touch $$file; \
@@ -600,7 +631,7 @@ depend:
600 test -s $$file || $(HOST_RM) $$file; \ 631 test -s $$file || $(HOST_RM) $$file; \
601 done 632 done
602 633
603.PHONY: default all amalg clean depend 634.PHONY: default all amalg clean libbc depend
604 635
605############################################################################## 636##############################################################################
606# Rules for generated files. 637# Rules for generated files.
@@ -610,7 +641,7 @@ $(MINILUA_T): $(MINILUA_O)
610 $(E) "HOSTLINK $@" 641 $(E) "HOSTLINK $@"
611 $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS) 642 $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS)
612 643
613host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) lj_arch.h lua.h luaconf.h 644host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua lj_arch.h lua.h luaconf.h
614 $(E) "DYNASM $@" 645 $(E) "DYNASM $@"
615 $(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC) 646 $(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC)
616 647
@@ -657,10 +688,10 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c
657 $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< 688 $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
658 $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< 689 $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
659 690
660%.o: %.s 691%.o: %.S
661 $(E) "ASM $@" 692 $(E) "ASM $@"
662 $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< 693 $(Q)$(TARGET_DYNCC) $(TARGET_ASFLAGS) -c -o $(@:.o=_dyn.o) $<
663 $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< 694 $(Q)$(TARGET_CC) $(TARGET_ASFLAGS) -c -o $@ $<
664 695
665$(LUAJIT_O): 696$(LUAJIT_O):
666 $(E) "CC $@" 697 $(E) "CC $@"
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 9e14d617..1ad6701a 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -1,66 +1,79 @@
1lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ 1lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
2 lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \ 2 lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \
3 lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h 3 lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h
4lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 4lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
5 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \ 5 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h \
6 lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \ 6 lj_str.h lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \
7 lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ 7 lj_cconv.h lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h \
8 lj_lib.h lj_libdef.h 8 lj_strscan.h lj_strfmt.h lj_lib.h lj_libdef.h
9lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 9lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
10 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h 10 lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \
11 lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \
12 lj_ffdef.h lj_lib.h lj_libdef.h
13lib_buffer.o: lib_buffer.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
14 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
15 lj_tab.h lj_udata.h lj_meta.h lj_ctype.h lj_cdata.h lj_cconv.h \
16 lj_strfmt.h lj_serialize.h lj_lib.h lj_libdef.h
11lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 17lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
12 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \ 18 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
13 lj_libdef.h 19 lj_libdef.h
14lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 20lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
15 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \ 21 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \
16 lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \ 22 lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \
17 lj_ccallback.h lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h 23 lj_ccallback.h lj_clib.h lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h \
24 lj_libdef.h
18lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h 25lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
19lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 26lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
20 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_ff.h \ 27 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \
21 lj_ffdef.h lj_lib.h lj_libdef.h 28 lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
22lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \ 29lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
23 lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \ 30 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
24 lj_bc.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_target.h \ 31 lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
25 lj_target_*.h lj_dispatch.h lj_vm.h lj_vmevent.h lj_lib.h luajit.h \ 32 lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \
26 lj_libdef.h 33 lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h
27lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 34lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
28 lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h 35 lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_prng.h lj_libdef.h
29lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 36lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
30 lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h 37 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \
38 lj_libdef.h
31lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 39lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
32 lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h 40 lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h
33lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 41lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
34 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \ 42 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
35 lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h lj_char.h \ 43 lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \
36 lj_lib.h lj_libdef.h 44 lj_char.h lj_strfmt.h lj_lib.h lj_libdef.h
37lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 45lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
38 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_lib.h \ 46 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
39 lj_libdef.h 47 lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
40lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h 48lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h \
49 lj_prng.h
41lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 50lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
42 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ 51 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
43 lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \ 52 lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \
44 lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h 53 lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h
45lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 54lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
46 lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \ 55 lj_buf.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h \
47 lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \ 56 lj_jit.h lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \
48 lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h lj_emit_*.h \ 57 lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h \
49 lj_asm_*.h 58 lj_emit_*.h lj_asm_*.h
59lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
50lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ 60lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
51 lj_bcdef.h 61 lj_bcdef.h
52lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 62lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
53 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_bc.h lj_ctype.h \ 63 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_bc.h \
54 lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h 64 lj_ctype.h lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h \
65 lj_strfmt.h
55lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 66lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
56 lj_gc.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h lj_ir.h \ 67 lj_gc.h lj_buf.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h \
57 lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h 68 lj_ir.h lj_strfmt.h lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h
69lj_buf.o: lj_buf.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
70 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_strfmt.h
58lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 71lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
59 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ctype.h lj_cconv.h \ 72 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ir.h lj_ctype.h \
60 lj_cdata.h lj_carith.h 73 lj_cconv.h lj_cdata.h lj_carith.h lj_strscan.h
61lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 74lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
62 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \ 75 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h \
63 lj_cdata.h lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ 76 lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
64 lj_traceerr.h 77 lj_traceerr.h
65lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \ 78lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
66 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \ 79 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \
@@ -68,110 +81,127 @@ lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
68 lj_target_*.h lj_mcode.h lj_jit.h lj_ir.h lj_trace.h lj_dispatch.h \ 81 lj_target_*.h lj_mcode.h lj_jit.h lj_ir.h lj_trace.h lj_dispatch.h \
69 lj_traceerr.h lj_vm.h 82 lj_traceerr.h lj_vm.h
70lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 83lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
71 lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \ 84 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ctype.h \
72 lj_ccallback.h 85 lj_cdata.h lj_cconv.h lj_ccallback.h
73lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 86lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
74 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \ 87 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h
75 lj_cdata.h
76lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h 88lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h
77lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 89lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
78 lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \ 90 lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \
79 lj_cdata.h lj_clib.h 91 lj_cdata.h lj_clib.h lj_strfmt.h
80lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 92lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
81 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_cparse.h lj_frame.h \ 93 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_ctype.h lj_cparse.h \
82 lj_bc.h lj_vm.h lj_char.h lj_strscan.h 94 lj_frame.h lj_bc.h lj_vm.h lj_char.h lj_strscan.h lj_strfmt.h
83lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 95lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
84 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \ 96 lj_err.h lj_errmsg.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_gc.h \
85 lj_gc.h lj_cdata.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ff.h \ 97 lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \
86 lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 98 lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
87 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \ 99 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \
88 lj_crecord.h 100 lj_crecord.h lj_strfmt.h
89lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 101lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
90 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_ccallback.h 102 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \
103 lj_ccallback.h lj_buf.h
91lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 104lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
92 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_state.h lj_frame.h \ 105 lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
93 lj_bc.h lj_vm.h lj_jit.h lj_ir.h 106 lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_jit.h lj_ir.h
94lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 107lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
95 lj_err.h lj_errmsg.h lj_func.h lj_str.h lj_tab.h lj_meta.h lj_debug.h \ 108 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_func.h lj_tab.h \
96 lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h lj_jit.h lj_ir.h \ 109 lj_meta.h lj_debug.h lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h \
97 lj_ccallback.h lj_ctype.h lj_gc.h lj_trace.h lj_dispatch.h lj_traceerr.h \ 110 lj_strfmt.h lj_jit.h lj_ir.h lj_ccallback.h lj_ctype.h lj_trace.h \
98 lj_vm.h luajit.h 111 lj_dispatch.h lj_traceerr.h lj_profile.h lj_vm.h luajit.h
99lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \ 112lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \
100 lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \ 113 lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \
101 lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ 114 lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
102 lj_traceerr.h lj_vm.h 115 lj_traceerr.h lj_vm.h lj_strfmt.h
103lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 116lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
104 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \ 117 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_frame.h \
105 lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 118 lj_bc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
106 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \ 119 lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h \
107 lj_vm.h lj_strscan.h lj_recdef.h 120 lj_crecord.h lj_vm.h lj_strscan.h lj_strfmt.h lj_serialize.h lj_recdef.h
108lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 121lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
109 lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ 122 lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
110 lj_traceerr.h lj_vm.h 123 lj_traceerr.h lj_vm.h
111lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 124lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
112 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \ 125 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
113 lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h \ 126 lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h \
114 lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h 127 lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
115lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 128lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
116 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_jit.h \ 129 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \
117 lj_ir.h lj_dispatch.h 130 lj_str.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.h
118lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 131lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
119 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 132 lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
120 lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \ 133 lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \
121 lj_vm.h lj_strscan.h lj_lib.h 134 lj_carith.h lj_vm.h lj_strscan.h lj_serialize.h lj_strfmt.h lj_prng.h
122lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 135lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
123 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \ 136 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \
124 lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h 137 lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \
138 lj_strfmt.h
125lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ 139lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
126 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \ 140 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
127 lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lib.h 141 lj_dispatch.h lj_jit.h lj_ir.h lj_ctype.h lj_vm.h lj_strscan.h \
142 lj_strfmt.h lj_lex.h lj_bcdump.h lj_lib.h
128lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ 143lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
129 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_func.h lj_frame.h \ 144 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \
130 lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h 145 lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
131lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 146lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
132 lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \ 147 lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \
133 lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h 148 lj_dispatch.h lj_bc.h lj_traceerr.h lj_prng.h lj_vm.h
134lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 149lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
135 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ 150 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \
136 lj_vm.h lj_strscan.h 151 lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
137lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h 152lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
138lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 153lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
139 lj_ir.h lj_jit.h lj_iropt.h 154 lj_ir.h lj_jit.h lj_iropt.h
140lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 155lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
141 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ 156 lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h \
142 lj_bc.h lj_traceerr.h lj_ctype.h lj_gc.h lj_carith.h lj_vm.h \ 157 lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h \
143 lj_strscan.h lj_folddef.h 158 lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_folddef.h
144lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 159lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
145 lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ 160 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h lj_jit.h \
146 lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h 161 lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \
162 lj_vm.h
147lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 163lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
148 lj_tab.h lj_ir.h lj_jit.h lj_iropt.h 164 lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h lj_dispatch.h lj_bc.h
149lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ 165lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
150 lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ 166 lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
151 lj_traceerr.h lj_vm.h lj_strscan.h 167 lj_traceerr.h lj_vm.h lj_strscan.h
152lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 168lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
153 lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h 169 lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
154lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ 170lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
155 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \ 171 lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \
156 lj_iropt.h lj_vm.h 172 lj_jit.h lj_ircall.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h
157lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 173lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
158 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h \ 174 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
159 lj_state.h lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h 175 lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
176 lj_vm.h lj_vmevent.h
177lj_prng.o: lj_prng.c lj_def.h lua.h luaconf.h lj_arch.h lj_prng.h
178lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
179 lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \
180 lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h
160lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 181lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
161 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ 182 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
162 lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \ 183 lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \
163 lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h \ 184 lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \
164 lj_ffrecord.h lj_snap.h lj_vm.h 185 lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h lj_prng.h
186lj_serialize.o: lj_serialize.c lj_obj.h lua.h luaconf.h lj_def.h \
187 lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
188 lj_udata.h lj_ctype.h lj_cdata.h lj_ir.h lj_serialize.h
165lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 189lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
166 lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ 190 lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
167 lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ 191 lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
168 lj_target_*.h lj_ctype.h lj_cdata.h 192 lj_target_*.h lj_ctype.h lj_cdata.h
169lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 193lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
170 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \ 194 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \
171 lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \ 195 lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \
172 lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h 196 lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_prng.h lj_lex.h \
197 lj_alloc.h luajit.h
173lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 198lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
174 lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_char.h 199 lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_prng.h
200lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
201 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_meta.h lj_state.h \
202 lj_char.h lj_strfmt.h lj_ctype.h lj_lib.h
203lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \
204 lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h
175lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 205lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
176 lj_char.h lj_strscan.h 206 lj_char.h lj_strscan.h
177lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 207lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
@@ -180,35 +210,37 @@ lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
180 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_frame.h lj_bc.h \ 210 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_frame.h lj_bc.h \
181 lj_state.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \ 211 lj_state.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \
182 lj_dispatch.h lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h \ 212 lj_dispatch.h lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h \
183 lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h 213 lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h lj_prng.h
184lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 214lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
185 lj_gc.h lj_udata.h 215 lj_gc.h lj_err.h lj_errmsg.h lj_udata.h
186lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 216lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
187 lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \ 217 lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \
188 lj_vm.h lj_vmevent.h 218 lj_vm.h lj_vmevent.h
189lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 219lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
190 lj_ir.h lj_vm.h 220 lj_ir.h lj_vm.h
191ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ 221ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_assert.c lj_obj.h \
192 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \ 222 lj_def.h lj_arch.h lj_gc.c lj_gc.h lj_err.h lj_errmsg.h lj_buf.h \
193 lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h \ 223 lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h \
194 lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c \ 224 lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
195 lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h \ 225 lj_traceerr.h lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h \
196 lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \ 226 lj_char.c lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c \
197 lj_debug.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h \ 227 lj_prng.h lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h \
198 luajit.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c \ 228 lj_debug.c lj_prng.c lj_state.c lj_lex.h lj_alloc.h luajit.h \
199 lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \ 229 lj_dispatch.c lj_ccallback.h lj_profile.h lj_vmevent.c lj_vmevent.h \
200 lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \ 230 lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_serialize.c \
201 lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \ 231 lj_serialize.h lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \
202 lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \ 232 lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \
203 lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h \ 233 lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \
204 lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \ 234 lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
205 lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \ 235 lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \
206 lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ 236 lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
237 lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \
238 lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
207 lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ 239 lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
208 lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ 240 lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
209 lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ 241 lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
210 lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \ 242 lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \
211 lib_init.c 243 lib_buffer.c lib_init.c
212luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h 244luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
213host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ 245host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
214 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ 246 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \
@@ -220,7 +252,8 @@ host/buildvm_asm.o: host/buildvm_asm.c host/buildvm.h lj_def.h lua.h luaconf.h \
220host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \ 252host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \
221 luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h 253 luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h
222host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \ 254host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \
223 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_lib.h lj_obj.h 255 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_bc.h lj_lib.h lj_obj.h \
256 host/buildvm_libbc.h
224host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \ 257host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \
225 luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h 258 luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h
226host/minilua.o: host/minilua.c 259host/minilua.o: host/minilua.c
diff --git a/src/host/buildvm.c b/src/host/buildvm.c
index 51222999..9dc328fc 100644
--- a/src/host/buildvm.c
+++ b/src/host/buildvm.c
@@ -59,10 +59,10 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
59#include "../dynasm/dasm_x86.h" 59#include "../dynasm/dasm_x86.h"
60#elif LJ_TARGET_ARM 60#elif LJ_TARGET_ARM
61#include "../dynasm/dasm_arm.h" 61#include "../dynasm/dasm_arm.h"
62#elif LJ_TARGET_ARM64
63#include "../dynasm/dasm_arm64.h"
62#elif LJ_TARGET_PPC 64#elif LJ_TARGET_PPC
63#include "../dynasm/dasm_ppc.h" 65#include "../dynasm/dasm_ppc.h"
64#elif LJ_TARGET_PPCSPE
65#include "../dynasm/dasm_ppc.h"
66#elif LJ_TARGET_MIPS 66#elif LJ_TARGET_MIPS
67#include "../dynasm/dasm_mips.h" 67#include "../dynasm/dasm_mips.h"
68#else 68#else
@@ -110,11 +110,11 @@ static const char *sym_decorate(BuildCtx *ctx,
110 if (p) { 110 if (p) {
111#if LJ_TARGET_X86ORX64 111#if LJ_TARGET_X86ORX64
112 if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj)) 112 if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj))
113 name[0] = '@'; 113 name[0] = name[1] == 'R' ? '_' : '@'; /* Just for _RtlUnwind@16. */
114 else 114 else
115 *p = '\0'; 115 *p = '\0';
116#elif (LJ_TARGET_PPC || LJ_TARGET_PPCSPE) && !LJ_TARGET_CONSOLE 116#elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE
117 /* Keep @plt. */ 117 /* Keep @plt etc. */
118#else 118#else
119 *p = '\0'; 119 *p = '\0';
120#endif 120#endif
@@ -179,6 +179,7 @@ static int build_code(BuildCtx *ctx)
179 ctx->nreloc = 0; 179 ctx->nreloc = 0;
180 180
181 ctx->globnames = globnames; 181 ctx->globnames = globnames;
182 ctx->extnames = extnames;
182 ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *)); 183 ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *));
183 ctx->nrelocsym = 0; 184 ctx->nrelocsym = 0;
184 for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1; 185 for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1;
@@ -320,20 +321,20 @@ static void emit_vmdef(BuildCtx *ctx)
320 char buf[80]; 321 char buf[80];
321 int i; 322 int i;
322 fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n"); 323 fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
323 fprintf(ctx->fp, "module(...)\n\n"); 324 fprintf(ctx->fp, "return {\n\n");
324 325
325 fprintf(ctx->fp, "bcnames = \""); 326 fprintf(ctx->fp, "bcnames = \"");
326 for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]); 327 for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
327 fprintf(ctx->fp, "\"\n\n"); 328 fprintf(ctx->fp, "\",\n\n");
328 329
329 fprintf(ctx->fp, "irnames = \""); 330 fprintf(ctx->fp, "irnames = \"");
330 for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]); 331 for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
331 fprintf(ctx->fp, "\"\n\n"); 332 fprintf(ctx->fp, "\",\n\n");
332 333
333 fprintf(ctx->fp, "irfpm = { [0]="); 334 fprintf(ctx->fp, "irfpm = { [0]=");
334 for (i = 0; irfpm_names[i]; i++) 335 for (i = 0; irfpm_names[i]; i++)
335 fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i])); 336 fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i]));
336 fprintf(ctx->fp, "}\n\n"); 337 fprintf(ctx->fp, "},\n\n");
337 338
338 fprintf(ctx->fp, "irfield = { [0]="); 339 fprintf(ctx->fp, "irfield = { [0]=");
339 for (i = 0; irfield_names[i]; i++) { 340 for (i = 0; irfield_names[i]; i++) {
@@ -343,17 +344,17 @@ static void emit_vmdef(BuildCtx *ctx)
343 if (p) *p = '.'; 344 if (p) *p = '.';
344 fprintf(ctx->fp, "\"%s\", ", buf); 345 fprintf(ctx->fp, "\"%s\", ", buf);
345 } 346 }
346 fprintf(ctx->fp, "}\n\n"); 347 fprintf(ctx->fp, "},\n\n");
347 348
348 fprintf(ctx->fp, "ircall = {\n[0]="); 349 fprintf(ctx->fp, "ircall = {\n[0]=");
349 for (i = 0; ircall_names[i]; i++) 350 for (i = 0; ircall_names[i]; i++)
350 fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]); 351 fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
351 fprintf(ctx->fp, "}\n\n"); 352 fprintf(ctx->fp, "},\n\n");
352 353
353 fprintf(ctx->fp, "traceerr = {\n[0]="); 354 fprintf(ctx->fp, "traceerr = {\n[0]=");
354 for (i = 0; trace_errors[i]; i++) 355 for (i = 0; trace_errors[i]; i++)
355 fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); 356 fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
356 fprintf(ctx->fp, "}\n\n"); 357 fprintf(ctx->fp, "},\n\n");
357} 358}
358 359
359/* -- Argument parsing ---------------------------------------------------- */ 360/* -- Argument parsing ---------------------------------------------------- */
@@ -490,6 +491,7 @@ int main(int argc, char **argv)
490 case BUILD_vmdef: 491 case BUILD_vmdef:
491 emit_vmdef(ctx); 492 emit_vmdef(ctx);
492 emit_lib(ctx); 493 emit_lib(ctx);
494 fprintf(ctx->fp, "}\n\n");
493 break; 495 break;
494 case BUILD_ffdef: 496 case BUILD_ffdef:
495 case BUILD_libdef: 497 case BUILD_libdef:
diff --git a/src/host/buildvm.h b/src/host/buildvm.h
index 25750150..f81ef7e0 100644
--- a/src/host/buildvm.h
+++ b/src/host/buildvm.h
@@ -82,6 +82,7 @@ typedef struct BuildCtx {
82 const char *beginsym; 82 const char *beginsym;
83 /* Strings generated by DynASM. */ 83 /* Strings generated by DynASM. */
84 const char *const *globnames; 84 const char *const *globnames;
85 const char *const *extnames;
85 const char *dasm_ident; 86 const char *dasm_ident;
86 const char *dasm_arch; 87 const char *dasm_arch;
87 /* Relocations. */ 88 /* Relocations. */
diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
index 5e3fe021..01a1ba06 100644
--- a/src/host/buildvm_asm.c
+++ b/src/host/buildvm_asm.c
@@ -51,8 +51,8 @@ static const char *const jccnames[] = {
51 "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg" 51 "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg"
52}; 52};
53 53
54/* Emit relocation for the incredibly stupid OSX assembler. */ 54/* Emit x86/x64 text relocations. */
55static void emit_asm_reloc_mach(BuildCtx *ctx, uint8_t *cp, int n, 55static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
56 const char *sym) 56 const char *sym)
57{ 57{
58 const char *opname = NULL; 58 const char *opname = NULL;
@@ -71,6 +71,20 @@ err:
71 exit(1); 71 exit(1);
72 } 72 }
73 emit_asm_bytes(ctx, cp, n); 73 emit_asm_bytes(ctx, cp, n);
74 if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) {
75 /* Various fixups for external symbols outside of our binary. */
76 if (ctx->mode == BUILD_elfasm) {
77 if (LJ_32)
78 fprintf(ctx->fp, "#if __PIC__\n\t%s lj_wrap_%s\n#else\n", opname, sym);
79 fprintf(ctx->fp, "\t%s %s@PLT\n", opname, sym);
80 if (LJ_32)
81 fprintf(ctx->fp, "#endif\n");
82 return;
83 } else if (LJ_32 && ctx->mode == BUILD_machasm) {
84 fprintf(ctx->fp, "\t%s L%s$stub\n", opname, sym);
85 return;
86 }
87 }
74 fprintf(ctx->fp, "\t%s %s\n", opname, sym); 88 fprintf(ctx->fp, "\t%s %s\n", opname, sym);
75} 89}
76#else 90#else
@@ -79,10 +93,14 @@ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n)
79{ 93{
80 int i; 94 int i;
81 for (i = 0; i < n; i += 4) { 95 for (i = 0; i < n; i += 4) {
96 uint32_t ins = *(uint32_t *)(p+i);
97#if LJ_TARGET_ARM64 && LJ_BE
98 ins = lj_bswap(ins); /* ARM64 instructions are always little-endian. */
99#endif
82 if ((i & 15) == 0) 100 if ((i & 15) == 0)
83 fprintf(ctx->fp, "\t.long 0x%08x", *(uint32_t *)(p+i)); 101 fprintf(ctx->fp, "\t.long 0x%08x", ins);
84 else 102 else
85 fprintf(ctx->fp, ",0x%08x", *(uint32_t *)(p+i)); 103 fprintf(ctx->fp, ",0x%08x", ins);
86 if ((i & 15) == 12) putc('\n', ctx->fp); 104 if ((i & 15) == 12) putc('\n', ctx->fp);
87 } 105 }
88 if ((n & 15) != 0) putc('\n', ctx->fp); 106 if ((n & 15) != 0) putc('\n', ctx->fp);
@@ -107,7 +125,16 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
107 ins, sym); 125 ins, sym);
108 exit(1); 126 exit(1);
109 } 127 }
110#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE 128#elif LJ_TARGET_ARM64
129 if ((ins >> 26) == 0x25u) {
130 fprintf(ctx->fp, "\tbl %s\n", sym);
131 } else {
132 fprintf(stderr,
133 "Error: unsupported opcode %08x for %s symbol relocation.\n",
134 ins, sym);
135 exit(1);
136 }
137#elif LJ_TARGET_PPC
111#if LJ_TARGET_PS3 138#if LJ_TARGET_PS3
112#define TOCPREFIX "." 139#define TOCPREFIX "."
113#else 140#else
@@ -228,11 +255,20 @@ void emit_asm(BuildCtx *ctx)
228 255
229#if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND 256#if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND
230 /* This should really be moved into buildvm_arm.dasc. */ 257 /* This should really be moved into buildvm_arm.dasc. */
258#if LJ_ARCH_HASFPU
259 fprintf(ctx->fp,
260 ".fnstart\n"
261 ".save {r5, r6, r7, r8, r9, r10, r11, lr}\n"
262 ".vsave {d8-d15}\n"
263 ".save {r4}\n"
264 ".pad #28\n");
265#else
231 fprintf(ctx->fp, 266 fprintf(ctx->fp,
232 ".fnstart\n" 267 ".fnstart\n"
233 ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n" 268 ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n"
234 ".pad #28\n"); 269 ".pad #28\n");
235#endif 270#endif
271#endif
236#if LJ_TARGET_MIPS 272#if LJ_TARGET_MIPS
237 fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n"); 273 fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n");
238#endif 274#endif
@@ -255,8 +291,9 @@ void emit_asm(BuildCtx *ctx)
255 BuildReloc *r = &ctx->reloc[rel]; 291 BuildReloc *r = &ctx->reloc[rel];
256 int n = r->ofs - ofs; 292 int n = r->ofs - ofs;
257#if LJ_TARGET_X86ORX64 293#if LJ_TARGET_X86ORX64
258 if (ctx->mode == BUILD_machasm && r->type != 0) { 294 if (r->type != 0 &&
259 emit_asm_reloc_mach(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]); 295 (ctx->mode == BUILD_elfasm || ctx->mode == BUILD_machasm)) {
296 emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
260 } else { 297 } else {
261 emit_asm_bytes(ctx, ctx->code+ofs, n); 298 emit_asm_bytes(ctx, ctx->code+ofs, n);
262 emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]); 299 emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
@@ -290,10 +327,7 @@ void emit_asm(BuildCtx *ctx)
290#if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA) 327#if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
291 fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n"); 328 fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
292#endif 329#endif
293#if LJ_TARGET_PPCSPE 330#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP
294 /* Soft-float ABI + SPE. */
295 fprintf(ctx->fp, "\t.gnu_attribute 4, 2\n\t.gnu_attribute 8, 3\n");
296#elif LJ_TARGET_PPC && !LJ_TARGET_PS3
297 /* Hard-float ABI. */ 331 /* Hard-float ABI. */
298 fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n"); 332 fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
299#endif 333#endif
diff --git a/src/host/buildvm_lib.c b/src/host/buildvm_lib.c
index 3d6f8153..20bb77cd 100644
--- a/src/host/buildvm_lib.c
+++ b/src/host/buildvm_lib.c
@@ -5,7 +5,9 @@
5 5
6#include "buildvm.h" 6#include "buildvm.h"
7#include "lj_obj.h" 7#include "lj_obj.h"
8#include "lj_bc.h"
8#include "lj_lib.h" 9#include "lj_lib.h"
10#include "buildvm_libbc.h"
9 11
10/* Context for library definitions. */ 12/* Context for library definitions. */
11static uint8_t obuf[8192]; 13static uint8_t obuf[8192];
@@ -151,6 +153,62 @@ static void libdef_func(BuildCtx *ctx, char *p, int arg)
151 regfunc = REGFUNC_OK; 153 regfunc = REGFUNC_OK;
152} 154}
153 155
156static uint8_t *libdef_uleb128(uint8_t *p, uint32_t *vv)
157{
158 uint32_t v = *p++;
159 if (v >= 0x80) {
160 int sh = 0; v &= 0x7f;
161 do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
162 }
163 *vv = v;
164 return p;
165}
166
167static void libdef_fixupbc(uint8_t *p)
168{
169 uint32_t i, sizebc;
170 p += 4;
171 p = libdef_uleb128(p, &sizebc);
172 p = libdef_uleb128(p, &sizebc);
173 p = libdef_uleb128(p, &sizebc);
174 for (i = 0; i < sizebc; i++, p += 4) {
175 uint8_t op = p[libbc_endian ? 3 : 0];
176 uint8_t ra = p[libbc_endian ? 2 : 1];
177 uint8_t rc = p[libbc_endian ? 1 : 2];
178 uint8_t rb = p[libbc_endian ? 0 : 3];
179 if (!LJ_DUALNUM && op == BC_ISTYPE && rc == ~LJ_TNUMX+1) {
180 op = BC_ISNUM; rc++;
181 }
182 p[LJ_ENDIAN_SELECT(0, 3)] = op;
183 p[LJ_ENDIAN_SELECT(1, 2)] = ra;
184 p[LJ_ENDIAN_SELECT(2, 1)] = rc;
185 p[LJ_ENDIAN_SELECT(3, 0)] = rb;
186 }
187}
188
189static void libdef_lua(BuildCtx *ctx, char *p, int arg)
190{
191 UNUSED(arg);
192 if (ctx->mode == BUILD_libdef) {
193 int i;
194 for (i = 0; libbc_map[i].name != NULL; i++) {
195 if (!strcmp(libbc_map[i].name, p)) {
196 int ofs = libbc_map[i].ofs;
197 int len = libbc_map[i+1].ofs - ofs;
198 obuf[2]++; /* Bump hash table size. */
199 *optr++ = LIBINIT_LUA;
200 libdef_name(p, 0);
201 memcpy(optr, libbc_code + ofs, len);
202 libdef_fixupbc(optr);
203 optr += len;
204 return;
205 }
206 }
207 fprintf(stderr, "Error: missing libbc definition for %s\n", p);
208 exit(1);
209 }
210}
211
154static uint32_t find_rec(char *name) 212static uint32_t find_rec(char *name)
155{ 213{
156 char *p = (char *)obuf; 214 char *p = (char *)obuf;
@@ -277,6 +335,7 @@ static const LibDefHandler libdef_handlers[] = {
277 { "CF(", ")", libdef_func, LIBINIT_CF }, 335 { "CF(", ")", libdef_func, LIBINIT_CF },
278 { "ASM(", ")", libdef_func, LIBINIT_ASM }, 336 { "ASM(", ")", libdef_func, LIBINIT_ASM },
279 { "ASM_(", ")", libdef_func, LIBINIT_ASM_ }, 337 { "ASM_(", ")", libdef_func, LIBINIT_ASM_ },
338 { "LUA(", ")", libdef_lua, 0 },
280 { "REC(", ")", libdef_rec, 0 }, 339 { "REC(", ")", libdef_rec, 0 },
281 { "PUSH(", ")", libdef_push, 0 }, 340 { "PUSH(", ")", libdef_push, 0 },
282 { "SET(", ")", libdef_set, 0 }, 341 { "SET(", ")", libdef_set, 0 },
@@ -326,6 +385,8 @@ void emit_lib(BuildCtx *ctx)
326 ok = LJ_HASJIT; 385 ok = LJ_HASJIT;
327 else if (!strcmp(buf, "#if LJ_HASFFI\n")) 386 else if (!strcmp(buf, "#if LJ_HASFFI\n"))
328 ok = LJ_HASFFI; 387 ok = LJ_HASFFI;
388 else if (!strcmp(buf, "#if LJ_HASBUFFER\n"))
389 ok = LJ_HASBUFFER;
329 if (!ok) { 390 if (!ok) {
330 int lvl = 1; 391 int lvl = 1;
331 while (fgets(buf, sizeof(buf), fp) != NULL) { 392 while (fgets(buf, sizeof(buf), fp) != NULL) {
@@ -373,7 +434,7 @@ void emit_lib(BuildCtx *ctx)
373 "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n", 434 "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n",
374 ffasmfunc); 435 ffasmfunc);
375 } else if (ctx->mode == BUILD_vmdef) { 436 } else if (ctx->mode == BUILD_vmdef) {
376 fprintf(ctx->fp, "}\n\n"); 437 fprintf(ctx->fp, "},\n\n");
377 } else if (ctx->mode == BUILD_bcdef) { 438 } else if (ctx->mode == BUILD_bcdef) {
378 int i; 439 int i;
379 fprintf(ctx->fp, "\n};\n\n"); 440 fprintf(ctx->fp, "\n};\n\n");
diff --git a/src/host/buildvm_libbc.h b/src/host/buildvm_libbc.h
new file mode 100644
index 00000000..b2600bd5
--- /dev/null
+++ b/src/host/buildvm_libbc.h
@@ -0,0 +1,56 @@
1/* This is a generated file. DO NOT EDIT! */
2
3static const int libbc_endian = 0;
4
5static const uint8_t libbc_code[] = {
6#if LJ_FR2
70,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
80,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
916,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
100,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,88,7,1,
11128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,11,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
120,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7,
130,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
140,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
158,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
160,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
170,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
182,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16,
193,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3,
200,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0,
2141,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128,
2218,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,
236,252,127,76,4,2,0,0
24#else
250,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
260,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
2716,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
280,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,88,7,1,
29128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
300,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3,2,10,7,0,
310,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
320,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
338,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
340,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
350,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
362,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16,
373,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3,
380,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0,
3941,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128,
4018,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,
416,252,127,76,4,2,0,0
42#endif
43};
44
45static const struct { const char *name; int ofs; } libbc_map[] = {
46{"math_deg",0},
47{"math_rad",25},
48{"string_len",50},
49{"table_foreachi",69},
50{"table_foreach",136},
51{"table_getn",207},
52{"table_remove",226},
53{"table_move",355},
54{NULL,502}
55};
56
diff --git a/src/host/buildvm_peobj.c b/src/host/buildvm_peobj.c
index e390d31d..aa061e6e 100644
--- a/src/host/buildvm_peobj.c
+++ b/src/host/buildvm_peobj.c
@@ -9,7 +9,7 @@
9#include "buildvm.h" 9#include "buildvm.h"
10#include "lj_bc.h" 10#include "lj_bc.h"
11 11
12#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC 12#if LJ_TARGET_X86ORX64
13 13
14/* Context for PE object emitter. */ 14/* Context for PE object emitter. */
15static char *strtab; 15static char *strtab;
@@ -93,12 +93,6 @@ typedef struct PEsymaux {
93#define PEOBJ_RELOC_ADDR32NB 0x03 93#define PEOBJ_RELOC_ADDR32NB 0x03
94#define PEOBJ_RELOC_OFS 0 94#define PEOBJ_RELOC_OFS 0
95#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ 95#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
96#elif LJ_TARGET_PPC
97#define PEOBJ_ARCH_TARGET 0x01f2
98#define PEOBJ_RELOC_REL32 0x06
99#define PEOBJ_RELOC_DIR32 0x02
100#define PEOBJ_RELOC_OFS (-4)
101#define PEOBJ_TEXT_FLAGS 0x60400020 /* 60=r+x, 40=align8, 20=code. */
102#endif 96#endif
103 97
104/* Section numbers (0-based). */ 98/* Section numbers (0-based). */
@@ -109,6 +103,8 @@ enum {
109#if LJ_TARGET_X64 103#if LJ_TARGET_X64
110 PEOBJ_SECT_PDATA, 104 PEOBJ_SECT_PDATA,
111 PEOBJ_SECT_XDATA, 105 PEOBJ_SECT_XDATA,
106#elif LJ_TARGET_X86
107 PEOBJ_SECT_SXDATA,
112#endif 108#endif
113 PEOBJ_SECT_RDATA_Z, 109 PEOBJ_SECT_RDATA_Z,
114 PEOBJ_NSECTIONS 110 PEOBJ_NSECTIONS
@@ -208,6 +204,13 @@ void emit_peobj(BuildCtx *ctx)
208 sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE; 204 sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
209 /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ 205 /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
210 pesect[PEOBJ_SECT_XDATA].flags = 0x40300040; 206 pesect[PEOBJ_SECT_XDATA].flags = 0x40300040;
207#elif LJ_TARGET_X86
208 memcpy(pesect[PEOBJ_SECT_SXDATA].name, ".sxdata", sizeof(".sxdata")-1);
209 pesect[PEOBJ_SECT_SXDATA].ofs = sofs;
210 sofs += (pesect[PEOBJ_SECT_SXDATA].size = 4);
211 pesect[PEOBJ_SECT_SXDATA].relocofs = sofs;
212 /* Flags: 40 = read, 30 = align4, 02 = lnk_info, 40 = initialized data. */
213 pesect[PEOBJ_SECT_SXDATA].flags = 0x40300240;
211#endif 214#endif
212 215
213 memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1); 216 memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1);
@@ -232,7 +235,7 @@ void emit_peobj(BuildCtx *ctx)
232 nrsym = ctx->nrelocsym; 235 nrsym = ctx->nrelocsym;
233 pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym; 236 pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym;
234#if LJ_TARGET_X64 237#if LJ_TARGET_X64
235 pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win64. */ 238 pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */
236#endif 239#endif
237 240
238 /* Write PE object header and all sections. */ 241 /* Write PE object header and all sections. */
@@ -242,15 +245,8 @@ void emit_peobj(BuildCtx *ctx)
242 /* Write .text section. */ 245 /* Write .text section. */
243 host_endian.u = 1; 246 host_endian.u = 1;
244 if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) { 247 if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) {
245#if LJ_TARGET_PPC
246 uint32_t *p = (uint32_t *)ctx->code;
247 int n = (int)(ctx->codesz >> 2);
248 for (i = 0; i < n; i++, p++)
249 *p = lj_bswap(*p); /* Byteswap .text section. */
250#else
251 fprintf(stderr, "Error: different byte order for host and target\n"); 248 fprintf(stderr, "Error: different byte order for host and target\n");
252 exit(1); 249 exit(1);
253#endif
254 } 250 }
255 owrite(ctx, ctx->code, ctx->codesz); 251 owrite(ctx, ctx->code, ctx->codesz);
256 for (i = 0; i < ctx->nreloc; i++) { 252 for (i = 0; i < ctx->nreloc; i++) {
@@ -312,6 +308,19 @@ void emit_peobj(BuildCtx *ctx)
312 reloc.type = PEOBJ_RELOC_ADDR32NB; 308 reloc.type = PEOBJ_RELOC_ADDR32NB;
313 owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); 309 owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
314 } 310 }
311#elif LJ_TARGET_X86
312 /* Write .sxdata section. */
313 for (i = 0; i < nrsym; i++) {
314 if (!strcmp(ctx->relocsym[i], "_lj_err_unwind_win")) {
315 uint32_t symidx = 1+2+i;
316 owrite(ctx, &symidx, 4);
317 break;
318 }
319 }
320 if (i == nrsym) {
321 fprintf(stderr, "Error: extern lj_err_unwind_win not used\n");
322 exit(1);
323 }
315#endif 324#endif
316 325
317 /* Write .rdata$Z section. */ 326 /* Write .rdata$Z section. */
@@ -333,8 +342,10 @@ void emit_peobj(BuildCtx *ctx)
333#if LJ_TARGET_X64 342#if LJ_TARGET_X64
334 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA); 343 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA);
335 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA); 344 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA);
336 emit_peobj_sym(ctx, "lj_err_unwind_win64", 0, 345 emit_peobj_sym(ctx, "lj_err_unwind_win", 0,
337 PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); 346 PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
347#elif LJ_TARGET_X86
348 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_SXDATA);
338#endif 349#endif
339 350
340 emit_peobj_sym(ctx, ctx->beginsym, 0, 351 emit_peobj_sym(ctx, ctx->beginsym, 0,
diff --git a/src/host/genlibbc.lua b/src/host/genlibbc.lua
new file mode 100644
index 00000000..921769f7
--- /dev/null
+++ b/src/host/genlibbc.lua
@@ -0,0 +1,197 @@
1----------------------------------------------------------------------------
2-- Lua script to dump the bytecode of the library functions written in Lua.
3-- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
4----------------------------------------------------------------------------
5-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
6-- Released under the MIT license. See Copyright Notice in luajit.h
7----------------------------------------------------------------------------
8
9local ffi = require("ffi")
10local bit = require("bit")
11local vmdef = require("jit.vmdef")
12local bcnames = vmdef.bcnames
13
14local format = string.format
15
16local isbe = (string.byte(string.dump(function() end), 5) % 2 == 1)
17
18local function usage(arg)
19 io.stderr:write("Usage: ", arg and arg[0] or "genlibbc",
20 " [-o buildvm_libbc.h] lib_*.c\n")
21 os.exit(1)
22end
23
24local function parse_arg(arg)
25 local outfile = "-"
26 if not (arg and arg[1]) then
27 usage(arg)
28 end
29 if arg[1] == "-o" then
30 outfile = arg[2]
31 if not outfile then usage(arg) end
32 table.remove(arg, 1)
33 table.remove(arg, 1)
34 end
35 return outfile
36end
37
38local function read_files(names)
39 local src = ""
40 for _,name in ipairs(names) do
41 local fp = assert(io.open(name))
42 src = src .. fp:read("*a")
43 fp:close()
44 end
45 return src
46end
47
48local function transform_lua(code)
49 local fixup = {}
50 local n = -30000
51 code = string.gsub(code, "CHECK_(%w*)%((.-)%)", function(tp, var)
52 n = n + 1
53 fixup[n] = { "CHECK", tp }
54 return format("%s=%d", var, n)
55 end)
56 code = string.gsub(code, "PAIRS%((.-)%)", function(var)
57 fixup.PAIRS = true
58 return format("nil, %s, 0", var)
59 end)
60 return "return "..code, fixup
61end
62
63local function read_uleb128(p)
64 local v = p[0]; p = p + 1
65 if v >= 128 then
66 local sh = 7; v = v - 128
67 repeat
68 local r = p[0]
69 v = v + bit.lshift(bit.band(r, 127), sh)
70 sh = sh + 7
71 p = p + 1
72 until r < 128
73 end
74 return p, v
75end
76
77-- ORDER LJ_T
78local name2itype = {
79 str = 5, func = 9, tab = 12, int = 14, num = 15
80}
81
82local BC = {}
83for i=0,#bcnames/6-1 do
84 BC[string.gsub(string.sub(bcnames, i*6+1, i*6+6), " ", "")] = i
85end
86local xop, xra = isbe and 3 or 0, isbe and 2 or 1
87local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3
88
89local function fixup_dump(dump, fixup)
90 local buf = ffi.new("uint8_t[?]", #dump+1, dump)
91 local p = buf+5
92 local n, sizebc
93 p, n = read_uleb128(p)
94 local start = p
95 p = p + 4
96 p = read_uleb128(p)
97 p = read_uleb128(p)
98 p, sizebc = read_uleb128(p)
99 local rawtab = {}
100 for i=0,sizebc-1 do
101 local op = p[xop]
102 if op == BC.KSHORT then
103 local rd = p[xrc] + 256*p[xrb]
104 rd = bit.arshift(bit.lshift(rd, 16), 16)
105 local f = fixup[rd]
106 if f then
107 if f[1] == "CHECK" then
108 local tp = f[2]
109 if tp == "tab" then rawtab[p[xra]] = true end
110 p[xop] = tp == "num" and BC.ISNUM or BC.ISTYPE
111 p[xrb] = 0
112 p[xrc] = name2itype[tp]
113 else
114 error("unhandled fixup type: "..f[1])
115 end
116 end
117 elseif op == BC.TGETV then
118 if rawtab[p[xrb]] then
119 p[xop] = BC.TGETR
120 end
121 elseif op == BC.TSETV then
122 if rawtab[p[xrb]] then
123 p[xop] = BC.TSETR
124 end
125 elseif op == BC.ITERC then
126 if fixup.PAIRS then
127 p[xop] = BC.ITERN
128 end
129 end
130 p = p + 4
131 end
132 return ffi.string(start, n)
133end
134
135local function find_defs(src)
136 local defs = {}
137 for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do
138 local env = {}
139 local tcode, fixup = transform_lua(code)
140 local func = assert(load(tcode, "", nil, env))()
141 defs[name] = fixup_dump(string.dump(func, true), fixup)
142 defs[#defs+1] = name
143 end
144 return defs
145end
146
147local function gen_header(defs)
148 local t = {}
149 local function w(x) t[#t+1] = x end
150 w("/* This is a generated file. DO NOT EDIT! */\n\n")
151 w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
152 local s = ""
153 for _,name in ipairs(defs) do
154 s = s .. defs[name]
155 end
156 w("static const uint8_t libbc_code[] = {\n")
157 local n = 0
158 for i=1,#s do
159 local x = string.byte(s, i)
160 w(x); w(",")
161 n = n + (x < 10 and 2 or (x < 100 and 3 or 4))
162 if n >= 75 then n = 0; w("\n") end
163 end
164 w("0\n};\n\n")
165 w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
166 local m = 0
167 for _,name in ipairs(defs) do
168 w('{"'); w(name); w('",'); w(m) w('},\n')
169 m = m + #defs[name]
170 end
171 w("{NULL,"); w(m); w("}\n};\n\n")
172 return table.concat(t)
173end
174
175local function write_file(name, data)
176 if name == "-" then
177 assert(io.write(data))
178 assert(io.flush())
179 else
180 local fp = io.open(name)
181 if fp then
182 local old = fp:read("*a")
183 fp:close()
184 if data == old then return end
185 end
186 fp = assert(io.open(name, "w"))
187 assert(fp:write(data))
188 assert(fp:close())
189 end
190end
191
192local outfile = parse_arg(arg)
193local src = read_files(arg)
194local defs = find_defs(src)
195local hdr = gen_header(defs)
196write_file(outfile, hdr)
197
diff --git a/src/jit/bc.lua b/src/jit/bc.lua
index 37c4d3f6..e58a3fef 100644
--- a/src/jit/bc.lua
+++ b/src/jit/bc.lua
@@ -41,7 +41,7 @@
41 41
42-- Cache some library functions and objects. 42-- Cache some library functions and objects.
43local jit = require("jit") 43local jit = require("jit")
44assert(jit.version_num == 20005, "LuaJIT core/library version mismatch") 44assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
45local jutil = require("jit.util") 45local jutil = require("jit.util")
46local vmdef = require("jit.vmdef") 46local vmdef = require("jit.vmdef")
47local bit = require("bit") 47local bit = require("bit")
@@ -179,13 +179,12 @@ local function bcliston(outfile)
179end 179end
180 180
181-- Public module functions. 181-- Public module functions.
182module(...) 182return {
183 183 line = bcline,
184line = bcline 184 dump = bcdump,
185dump = bcdump 185 targets = bctargets,
186targets = bctargets 186 on = bcliston,
187 187 off = bclistoff,
188on = bcliston 188 start = bcliston -- For -j command line option.
189off = bclistoff 189}
190start = bcliston -- For -j command line option.
191 190
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
index 73654297..ab13667a 100644
--- a/src/jit/bcsave.lua
+++ b/src/jit/bcsave.lua
@@ -11,12 +11,16 @@
11------------------------------------------------------------------------------ 11------------------------------------------------------------------------------
12 12
13local jit = require("jit") 13local jit = require("jit")
14assert(jit.version_num == 20005, "LuaJIT core/library version mismatch") 14assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
15local bit = require("bit") 15local bit = require("bit")
16 16
17-- Symbol name prefix for LuaJIT bytecode. 17-- Symbol name prefix for LuaJIT bytecode.
18local LJBC_PREFIX = "luaJIT_BC_" 18local LJBC_PREFIX = "luaJIT_BC_"
19 19
20local type, assert = type, assert
21local format = string.format
22local tremove, tconcat = table.remove, table.concat
23
20------------------------------------------------------------------------------ 24------------------------------------------------------------------------------
21 25
22local function usage() 26local function usage()
@@ -63,8 +67,18 @@ local map_type = {
63} 67}
64 68
65local map_arch = { 69local map_arch = {
66 x86 = true, x64 = true, arm = true, ppc = true, ppcspe = true, 70 x86 = { e = "le", b = 32, m = 3, p = 0x14c, },
67 mips = true, mipsel = true, 71 x64 = { e = "le", b = 64, m = 62, p = 0x8664, },
72 arm = { e = "le", b = 32, m = 40, p = 0x1c0, },
73 arm64 = { e = "le", b = 64, m = 183, p = 0xaa64, },
74 arm64be = { e = "be", b = 64, m = 183, },
75 ppc = { e = "be", b = 32, m = 20, },
76 mips = { e = "be", b = 32, m = 8, f = 0x50001006, },
77 mipsel = { e = "le", b = 32, m = 8, f = 0x50001006, },
78 mips64 = { e = "be", b = 64, m = 8, f = 0x80000007, },
79 mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, },
80 mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, },
81 mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, },
68} 82}
69 83
70local map_os = { 84local map_os = {
@@ -73,33 +87,33 @@ local map_os = {
73} 87}
74 88
75local function checkarg(str, map, err) 89local function checkarg(str, map, err)
76 str = string.lower(str) 90 str = str:lower()
77 local s = check(map[str], "unknown ", err) 91 local s = check(map[str], "unknown ", err)
78 return s == true and str or s 92 return type(s) == "string" and s or str
79end 93end
80 94
81local function detecttype(str) 95local function detecttype(str)
82 local ext = string.match(string.lower(str), "%.(%a+)$") 96 local ext = str:lower():match("%.(%a+)$")
83 return map_type[ext] or "raw" 97 return map_type[ext] or "raw"
84end 98end
85 99
86local function checkmodname(str) 100local function checkmodname(str)
87 check(string.match(str, "^[%w_.%-]+$"), "bad module name") 101 check(str:match("^[%w_.%-]+$"), "bad module name")
88 return string.gsub(str, "[%.%-]", "_") 102 return str:gsub("[%.%-]", "_")
89end 103end
90 104
91local function detectmodname(str) 105local function detectmodname(str)
92 if type(str) == "string" then 106 if type(str) == "string" then
93 local tail = string.match(str, "[^/\\]+$") 107 local tail = str:match("[^/\\]+$")
94 if tail then str = tail end 108 if tail then str = tail end
95 local head = string.match(str, "^(.*)%.[^.]*$") 109 local head = str:match("^(.*)%.[^.]*$")
96 if head then str = head end 110 if head then str = head end
97 str = string.match(str, "^[%w_.%-]+") 111 str = str:match("^[%w_.%-]+")
98 else 112 else
99 str = nil 113 str = nil
100 end 114 end
101 check(str, "cannot derive module name, use -n name") 115 check(str, "cannot derive module name, use -n name")
102 return string.gsub(str, "[%.%-]", "_") 116 return str:gsub("[%.%-]", "_")
103end 117end
104 118
105------------------------------------------------------------------------------ 119------------------------------------------------------------------------------
@@ -118,19 +132,19 @@ end
118local function bcsave_c(ctx, output, s) 132local function bcsave_c(ctx, output, s)
119 local fp = savefile(output, "w") 133 local fp = savefile(output, "w")
120 if ctx.type == "c" then 134 if ctx.type == "c" then
121 fp:write(string.format([[ 135 fp:write(format([[
122#ifdef __cplusplus 136#ifdef __cplusplus
123extern "C" 137extern "C"
124#endif 138#endif
125#ifdef _WIN32 139#ifdef _WIN32
126__declspec(dllexport) 140__declspec(dllexport)
127#endif 141#endif
128const char %s%s[] = { 142const unsigned char %s%s[] = {
129]], LJBC_PREFIX, ctx.modname)) 143]], LJBC_PREFIX, ctx.modname))
130 else 144 else
131 fp:write(string.format([[ 145 fp:write(format([[
132#define %s%s_SIZE %d 146#define %s%s_SIZE %d
133static const char %s%s[] = { 147static const unsigned char %s%s[] = {
134]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname)) 148]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname))
135 end 149 end
136 local t, n, m = {}, 0, 0 150 local t, n, m = {}, 0, 0
@@ -138,13 +152,13 @@ static const char %s%s[] = {
138 local b = tostring(string.byte(s, i)) 152 local b = tostring(string.byte(s, i))
139 m = m + #b + 1 153 m = m + #b + 1
140 if m > 78 then 154 if m > 78 then
141 fp:write(table.concat(t, ",", 1, n), ",\n") 155 fp:write(tconcat(t, ",", 1, n), ",\n")
142 n, m = 0, #b + 1 156 n, m = 0, #b + 1
143 end 157 end
144 n = n + 1 158 n = n + 1
145 t[n] = b 159 t[n] = b
146 end 160 end
147 bcsave_tail(fp, output, table.concat(t, ",", 1, n).."\n};\n") 161 bcsave_tail(fp, output, tconcat(t, ",", 1, n).."\n};\n")
148end 162end
149 163
150local function bcsave_elfobj(ctx, output, s, ffi) 164local function bcsave_elfobj(ctx, output, s, ffi)
@@ -199,12 +213,8 @@ typedef struct {
199} ELF64obj; 213} ELF64obj;
200]] 214]]
201 local symname = LJBC_PREFIX..ctx.modname 215 local symname = LJBC_PREFIX..ctx.modname
202 local is64, isbe = false, false 216 local ai = assert(map_arch[ctx.arch])
203 if ctx.arch == "x64" then 217 local is64, isbe = ai.b == 64, ai.e == "be"
204 is64 = true
205 elseif ctx.arch == "ppc" or ctx.arch == "ppcspe" or ctx.arch == "mips" then
206 isbe = true
207 end
208 218
209 -- Handle different host/target endianess. 219 -- Handle different host/target endianess.
210 local function f32(x) return x end 220 local function f32(x) return x end
@@ -237,10 +247,8 @@ typedef struct {
237 hdr.eendian = isbe and 2 or 1 247 hdr.eendian = isbe and 2 or 1
238 hdr.eversion = 1 248 hdr.eversion = 1
239 hdr.type = f16(1) 249 hdr.type = f16(1)
240 hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, ppcspe=20, mips=8, mipsel=8 })[ctx.arch]) 250 hdr.machine = f16(ai.m)
241 if ctx.arch == "mips" or ctx.arch == "mipsel" then 251 hdr.flags = f32(ai.f or 0)
242 hdr.flags = f32(0x50001006)
243 end
244 hdr.version = f32(1) 252 hdr.version = f32(1)
245 hdr.shofs = fofs(ffi.offsetof(o, "sect")) 253 hdr.shofs = fofs(ffi.offsetof(o, "sect"))
246 hdr.ehsize = f16(ffi.sizeof(hdr)) 254 hdr.ehsize = f16(ffi.sizeof(hdr))
@@ -336,12 +344,8 @@ typedef struct {
336} PEobj; 344} PEobj;
337]] 345]]
338 local symname = LJBC_PREFIX..ctx.modname 346 local symname = LJBC_PREFIX..ctx.modname
339 local is64 = false 347 local ai = assert(map_arch[ctx.arch])
340 if ctx.arch == "x86" then 348 local is64 = ai.b == 64
341 symname = "_"..symname
342 elseif ctx.arch == "x64" then
343 is64 = true
344 end
345 local symexport = " /EXPORT:"..symname..",DATA " 349 local symexport = " /EXPORT:"..symname..",DATA "
346 350
347 -- The file format is always little-endian. Swap if the host is big-endian. 351 -- The file format is always little-endian. Swap if the host is big-endian.
@@ -355,7 +359,7 @@ typedef struct {
355 -- Create PE object and fill in header. 359 -- Create PE object and fill in header.
356 local o = ffi.new("PEobj") 360 local o = ffi.new("PEobj")
357 local hdr = o.hdr 361 local hdr = o.hdr
358 hdr.arch = f16(({ x86=0x14c, x64=0x8664, arm=0x1c0, ppc=0x1f2, mips=0x366, mipsel=0x366 })[ctx.arch]) 362 hdr.arch = f16(assert(ai.p))
359 hdr.nsects = f16(2) 363 hdr.nsects = f16(2)
360 hdr.symtabofs = f32(ffi.offsetof(o, "sym0")) 364 hdr.symtabofs = f32(ffi.offsetof(o, "sym0"))
361 hdr.nsyms = f32(6) 365 hdr.nsyms = f32(6)
@@ -477,13 +481,13 @@ typedef struct {
477} mach_obj_64; 481} mach_obj_64;
478typedef struct { 482typedef struct {
479 mach_fat_header fat; 483 mach_fat_header fat;
480 mach_fat_arch fat_arch[4]; 484 mach_fat_arch fat_arch[2];
481 struct { 485 struct {
482 mach_header hdr; 486 mach_header hdr;
483 mach_segment_command seg; 487 mach_segment_command seg;
484 mach_section sec; 488 mach_section sec;
485 mach_symtab_command sym; 489 mach_symtab_command sym;
486 } arch[4]; 490 } arch[2];
487 mach_nlist sym_entry; 491 mach_nlist sym_entry;
488 uint8_t space[4096]; 492 uint8_t space[4096];
489} mach_fat_obj; 493} mach_fat_obj;
@@ -494,6 +498,8 @@ typedef struct {
494 is64, align, mobj = true, 8, "mach_obj_64" 498 is64, align, mobj = true, 8, "mach_obj_64"
495 elseif ctx.arch == "arm" then 499 elseif ctx.arch == "arm" then
496 isfat, mobj = true, "mach_fat_obj" 500 isfat, mobj = true, "mach_fat_obj"
501 elseif ctx.arch == "arm64" then
502 is64, align, isfat, mobj = true, 8, true, "mach_fat_obj"
497 else 503 else
498 check(ctx.arch == "x86", "unsupported architecture for OSX") 504 check(ctx.arch == "x86", "unsupported architecture for OSX")
499 end 505 end
@@ -503,8 +509,8 @@ typedef struct {
503 -- Create Mach-O object and fill in header. 509 -- Create Mach-O object and fill in header.
504 local o = ffi.new(mobj) 510 local o = ffi.new(mobj)
505 local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align) 511 local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align)
506 local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12,12,12} })[ctx.arch] 512 local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12}, arm64={0x01000007,0x0100000c} })[ctx.arch]
507 local cpusubtype = ({ x86={3}, x64={3}, arm={3,6,9,11} })[ctx.arch] 513 local cpusubtype = ({ x86={3}, x64={3}, arm={3,9}, arm64={3,0} })[ctx.arch]
508 if isfat then 514 if isfat then
509 o.fat.magic = be32(0xcafebabe) 515 o.fat.magic = be32(0xcafebabe)
510 o.fat.nfat_arch = be32(#cpusubtype) 516 o.fat.nfat_arch = be32(#cpusubtype)
@@ -603,16 +609,16 @@ local function docmd(...)
603 local n = 1 609 local n = 1
604 local list = false 610 local list = false
605 local ctx = { 611 local ctx = {
606 strip = true, arch = jit.arch, os = string.lower(jit.os), 612 strip = true, arch = jit.arch, os = jit.os:lower(),
607 type = false, modname = false, 613 type = false, modname = false,
608 } 614 }
609 while n <= #arg do 615 while n <= #arg do
610 local a = arg[n] 616 local a = arg[n]
611 if type(a) == "string" and string.sub(a, 1, 1) == "-" and a ~= "-" then 617 if type(a) == "string" and a:sub(1, 1) == "-" and a ~= "-" then
612 table.remove(arg, n) 618 tremove(arg, n)
613 if a == "--" then break end 619 if a == "--" then break end
614 for m=2,#a do 620 for m=2,#a do
615 local opt = string.sub(a, m, m) 621 local opt = a:sub(m, m)
616 if opt == "l" then 622 if opt == "l" then
617 list = true 623 list = true
618 elseif opt == "s" then 624 elseif opt == "s" then
@@ -625,13 +631,13 @@ local function docmd(...)
625 if n ~= 1 then usage() end 631 if n ~= 1 then usage() end
626 arg[1] = check(loadstring(arg[1])) 632 arg[1] = check(loadstring(arg[1]))
627 elseif opt == "n" then 633 elseif opt == "n" then
628 ctx.modname = checkmodname(table.remove(arg, n)) 634 ctx.modname = checkmodname(tremove(arg, n))
629 elseif opt == "t" then 635 elseif opt == "t" then
630 ctx.type = checkarg(table.remove(arg, n), map_type, "file type") 636 ctx.type = checkarg(tremove(arg, n), map_type, "file type")
631 elseif opt == "a" then 637 elseif opt == "a" then
632 ctx.arch = checkarg(table.remove(arg, n), map_arch, "architecture") 638 ctx.arch = checkarg(tremove(arg, n), map_arch, "architecture")
633 elseif opt == "o" then 639 elseif opt == "o" then
634 ctx.os = checkarg(table.remove(arg, n), map_os, "OS name") 640 ctx.os = checkarg(tremove(arg, n), map_os, "OS name")
635 else 641 else
636 usage() 642 usage()
637 end 643 end
@@ -653,7 +659,7 @@ end
653------------------------------------------------------------------------------ 659------------------------------------------------------------------------------
654 660
655-- Public module functions. 661-- Public module functions.
656module(...) 662return {
657 663 start = docmd -- Process -b command line option.
658start = docmd -- Process -b command line option. 664}
659 665
diff --git a/src/jit/dis_arm.lua b/src/jit/dis_arm.lua
index cd3acbd7..ba79c47e 100644
--- a/src/jit/dis_arm.lua
+++ b/src/jit/dis_arm.lua
@@ -658,7 +658,7 @@ local function disass_block(ctx, ofs, len)
658end 658end
659 659
660-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 660-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
661local function create_(code, addr, out) 661local function create(code, addr, out)
662 local ctx = {} 662 local ctx = {}
663 ctx.code = code 663 ctx.code = code
664 ctx.addr = addr or 0 664 ctx.addr = addr or 0
@@ -670,20 +670,20 @@ local function create_(code, addr, out)
670end 670end
671 671
672-- Simple API: disassemble code (a string) at address and output via out. 672-- Simple API: disassemble code (a string) at address and output via out.
673local function disass_(code, addr, out) 673local function disass(code, addr, out)
674 create_(code, addr, out):disass() 674 create(code, addr, out):disass()
675end 675end
676 676
677-- Return register name for RID. 677-- Return register name for RID.
678local function regname_(r) 678local function regname(r)
679 if r < 16 then return map_gpr[r] end 679 if r < 16 then return map_gpr[r] end
680 return "d"..(r-16) 680 return "d"..(r-16)
681end 681end
682 682
683-- Public module functions. 683-- Public module functions.
684module(...) 684return {
685 685 create = create,
686create = create_ 686 disass = disass,
687disass = disass_ 687 regname = regname
688regname = regname_ 688}
689 689
diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua
new file mode 100644
index 00000000..ad909fbd
--- /dev/null
+++ b/src/jit/dis_arm64.lua
@@ -0,0 +1,1216 @@
1----------------------------------------------------------------------------
2-- LuaJIT ARM64 disassembler module.
3--
4-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6--
7-- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
8-- Sponsored by Cisco Systems, Inc.
9----------------------------------------------------------------------------
10-- This is a helper module used by the LuaJIT machine code dumper module.
11--
12-- It disassembles most user-mode AArch64 instructions.
13-- NYI: Advanced SIMD and VFP instructions.
14------------------------------------------------------------------------------
15
16local type = type
17local sub, byte, format = string.sub, string.byte, string.format
18local match, gmatch, gsub = string.match, string.gmatch, string.gsub
19local concat = table.concat
20local bit = require("bit")
21local band, bor, bxor, tohex = bit.band, bit.bor, bit.bxor, bit.tohex
22local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
23local ror = bit.ror
24
25------------------------------------------------------------------------------
26-- Opcode maps
27------------------------------------------------------------------------------
28
29local map_adr = { -- PC-relative addressing.
30 shift = 31, mask = 1,
31 [0] = "adrDBx", "adrpDBx"
32}
33
34local map_addsubi = { -- Add/subtract immediate.
35 shift = 29, mask = 3,
36 [0] = "add|movDNIg", "adds|cmnD0NIg", "subDNIg", "subs|cmpD0NIg",
37}
38
39local map_logi = { -- Logical immediate.
40 shift = 31, mask = 1,
41 [0] = {
42 shift = 22, mask = 1,
43 [0] = {
44 shift = 29, mask = 3,
45 [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig"
46 },
47 false -- unallocated
48 },
49 {
50 shift = 29, mask = 3,
51 [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig"
52 }
53}
54
55local map_movwi = { -- Move wide immediate.
56 shift = 31, mask = 1,
57 [0] = {
58 shift = 22, mask = 1,
59 [0] = {
60 shift = 29, mask = 3,
61 [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg"
62 }, false -- unallocated
63 },
64 {
65 shift = 29, mask = 3,
66 [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg"
67 },
68}
69
70local map_bitf = { -- Bitfield.
71 shift = 31, mask = 1,
72 [0] = {
73 shift = 22, mask = 1,
74 [0] = {
75 shift = 29, mask = 3,
76 [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12w",
77 "bfm|bfi|bfxilDN13w",
78 "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12w"
79 }
80 },
81 {
82 shift = 22, mask = 1,
83 {
84 shift = 29, mask = 3,
85 [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12x",
86 "bfm|bfi|bfxilDN13x",
87 "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12x"
88 }
89 }
90}
91
92local map_datai = { -- Data processing - immediate.
93 shift = 23, mask = 7,
94 [0] = map_adr, map_adr, map_addsubi, false,
95 map_logi, map_movwi, map_bitf,
96 {
97 shift = 15, mask = 0x1c0c1,
98 [0] = "extr|rorDNM4w", [0x10080] = "extr|rorDNM4x",
99 [0x10081] = "extr|rorDNM4x"
100 }
101}
102
103local map_logsr = { -- Logical, shifted register.
104 shift = 31, mask = 1,
105 [0] = {
106 shift = 15, mask = 1,
107 [0] = {
108 shift = 29, mask = 3,
109 [0] = {
110 shift = 21, mask = 7,
111 [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
112 "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
113 },
114 {
115 shift = 21, mask = 7,
116 [0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
117 "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
118 },
119 {
120 shift = 21, mask = 7,
121 [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
122 "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
123 },
124 {
125 shift = 21, mask = 7,
126 [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
127 "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
128 }
129 },
130 false -- unallocated
131 },
132 {
133 shift = 29, mask = 3,
134 [0] = {
135 shift = 21, mask = 7,
136 [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
137 "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
138 },
139 {
140 shift = 21, mask = 7,
141 [0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
142 "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
143 },
144 {
145 shift = 21, mask = 7,
146 [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
147 "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
148 },
149 {
150 shift = 21, mask = 7,
151 [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
152 "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
153 }
154 }
155}
156
157local map_assh = {
158 shift = 31, mask = 1,
159 [0] = {
160 shift = 15, mask = 1,
161 [0] = {
162 shift = 29, mask = 3,
163 [0] = {
164 shift = 22, mask = 3,
165 [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg"
166 },
167 {
168 shift = 22, mask = 3,
169 [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg",
170 "adds|cmnD0NMSg", "adds|cmnD0NMg"
171 },
172 {
173 shift = 22, mask = 3,
174 [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg"
175 },
176 {
177 shift = 22, mask = 3,
178 [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg",
179 "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg"
180 },
181 },
182 false -- unallocated
183 },
184 {
185 shift = 29, mask = 3,
186 [0] = {
187 shift = 22, mask = 3,
188 [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg"
189 },
190 {
191 shift = 22, mask = 3,
192 [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", "adds|cmnD0NMSg",
193 "adds|cmnD0NMg"
194 },
195 {
196 shift = 22, mask = 3,
197 [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg"
198 },
199 {
200 shift = 22, mask = 3,
201 [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg",
202 "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg"
203 }
204 }
205}
206
207local map_addsubsh = { -- Add/subtract, shifted register.
208 shift = 22, mask = 3,
209 [0] = map_assh, map_assh, map_assh
210}
211
212local map_addsubex = { -- Add/subtract, extended register.
213 shift = 22, mask = 3,
214 [0] = {
215 shift = 29, mask = 3,
216 [0] = "addDNMXg", "adds|cmnD0NMXg", "subDNMXg", "subs|cmpD0NMzXg",
217 }
218}
219
220local map_addsubc = { -- Add/subtract, with carry.
221 shift = 10, mask = 63,
222 [0] = {
223 shift = 29, mask = 3,
224 [0] = "adcDNMg", "adcsDNMg", "sbc|ngcDN0Mg", "sbcs|ngcsDN0Mg",
225 }
226}
227
228local map_ccomp = {
229 shift = 4, mask = 1,
230 [0] = {
231 shift = 10, mask = 3,
232 [0] = { -- Conditional compare register.
233 shift = 29, mask = 3,
234 "ccmnNMVCg", false, "ccmpNMVCg",
235 },
236 [2] = { -- Conditional compare immediate.
237 shift = 29, mask = 3,
238 "ccmnN5VCg", false, "ccmpN5VCg",
239 }
240 }
241}
242
243local map_csel = { -- Conditional select.
244 shift = 11, mask = 1,
245 [0] = {
246 shift = 10, mask = 1,
247 [0] = {
248 shift = 29, mask = 3,
249 [0] = "cselDNMzCg", false, "csinv|cinv|csetmDNMcg", false,
250 },
251 {
252 shift = 29, mask = 3,
253 [0] = "csinc|cinc|csetDNMcg", false, "csneg|cnegDNMcg", false,
254 }
255 }
256}
257
258local map_data1s = { -- Data processing, 1 source.
259 shift = 29, mask = 1,
260 [0] = {
261 shift = 31, mask = 1,
262 [0] = {
263 shift = 10, mask = 0x7ff,
264 [0] = "rbitDNg", "rev16DNg", "revDNw", false, "clzDNg", "clsDNg"
265 },
266 {
267 shift = 10, mask = 0x7ff,
268 [0] = "rbitDNg", "rev16DNg", "rev32DNx", "revDNx", "clzDNg", "clsDNg"
269 }
270 }
271}
272
273local map_data2s = { -- Data processing, 2 sources.
274 shift = 29, mask = 1,
275 [0] = {
276 shift = 10, mask = 63,
277 false, "udivDNMg", "sdivDNMg", false, false, false, false, "lslDNMg",
278 "lsrDNMg", "asrDNMg", "rorDNMg"
279 }
280}
281
282local map_data3s = { -- Data processing, 3 sources.
283 shift = 29, mask = 7,
284 [0] = {
285 shift = 21, mask = 7,
286 [0] = {
287 shift = 15, mask = 1,
288 [0] = "madd|mulDNMA0g", "msub|mnegDNMA0g"
289 }
290 }, false, false, false,
291 {
292 shift = 15, mask = 1,
293 [0] = {
294 shift = 21, mask = 7,
295 [0] = "madd|mulDNMA0g", "smaddl|smullDxNMwA0x", "smulhDNMx", false,
296 false, "umaddl|umullDxNMwA0x", "umulhDNMx"
297 },
298 {
299 shift = 21, mask = 7,
300 [0] = "msub|mnegDNMA0g", "smsubl|smneglDxNMwA0x", false, false,
301 false, "umsubl|umneglDxNMwA0x"
302 }
303 }
304}
305
306local map_datar = { -- Data processing, register.
307 shift = 28, mask = 1,
308 [0] = {
309 shift = 24, mask = 1,
310 [0] = map_logsr,
311 {
312 shift = 21, mask = 1,
313 [0] = map_addsubsh, map_addsubex
314 }
315 },
316 {
317 shift = 21, mask = 15,
318 [0] = map_addsubc, false, map_ccomp, false, map_csel, false,
319 {
320 shift = 30, mask = 1,
321 [0] = map_data2s, map_data1s
322 },
323 false, map_data3s, map_data3s, map_data3s, map_data3s, map_data3s,
324 map_data3s, map_data3s, map_data3s
325 }
326}
327
328local map_lrl = { -- Load register, literal.
329 shift = 26, mask = 1,
330 [0] = {
331 shift = 30, mask = 3,
332 [0] = "ldrDwB", "ldrDxB", "ldrswDxB"
333 },
334 {
335 shift = 30, mask = 3,
336 [0] = "ldrDsB", "ldrDdB"
337 }
338}
339
340local map_lsriind = { -- Load/store register, immediate pre/post-indexed.
341 shift = 30, mask = 3,
342 [0] = {
343 shift = 26, mask = 1,
344 [0] = {
345 shift = 22, mask = 3,
346 [0] = "strbDwzL", "ldrbDwzL", "ldrsbDxzL", "ldrsbDwzL"
347 }
348 },
349 {
350 shift = 26, mask = 1,
351 [0] = {
352 shift = 22, mask = 3,
353 [0] = "strhDwzL", "ldrhDwzL", "ldrshDxzL", "ldrshDwzL"
354 }
355 },
356 {
357 shift = 26, mask = 1,
358 [0] = {
359 shift = 22, mask = 3,
360 [0] = "strDwzL", "ldrDwzL", "ldrswDxzL"
361 },
362 {
363 shift = 22, mask = 3,
364 [0] = "strDszL", "ldrDszL"
365 }
366 },
367 {
368 shift = 26, mask = 1,
369 [0] = {
370 shift = 22, mask = 3,
371 [0] = "strDxzL", "ldrDxzL"
372 },
373 {
374 shift = 22, mask = 3,
375 [0] = "strDdzL", "ldrDdzL"
376 }
377 }
378}
379
380local map_lsriro = {
381 shift = 21, mask = 1,
382 [0] = { -- Load/store register immediate.
383 shift = 10, mask = 3,
384 [0] = { -- Unscaled immediate.
385 shift = 26, mask = 1,
386 [0] = {
387 shift = 30, mask = 3,
388 [0] = {
389 shift = 22, mask = 3,
390 [0] = "sturbDwK", "ldurbDwK"
391 },
392 {
393 shift = 22, mask = 3,
394 [0] = "sturhDwK", "ldurhDwK"
395 },
396 {
397 shift = 22, mask = 3,
398 [0] = "sturDwK", "ldurDwK"
399 },
400 {
401 shift = 22, mask = 3,
402 [0] = "sturDxK", "ldurDxK"
403 }
404 }
405 }, map_lsriind, false, map_lsriind
406 },
407 { -- Load/store register, register offset.
408 shift = 10, mask = 3,
409 [2] = {
410 shift = 26, mask = 1,
411 [0] = {
412 shift = 30, mask = 3,
413 [0] = {
414 shift = 22, mask = 3,
415 [0] = "strbDwO", "ldrbDwO", "ldrsbDxO", "ldrsbDwO"
416 },
417 {
418 shift = 22, mask = 3,
419 [0] = "strhDwO", "ldrhDwO", "ldrshDxO", "ldrshDwO"
420 },
421 {
422 shift = 22, mask = 3,
423 [0] = "strDwO", "ldrDwO", "ldrswDxO"
424 },
425 {
426 shift = 22, mask = 3,
427 [0] = "strDxO", "ldrDxO"
428 }
429 },
430 {
431 shift = 30, mask = 3,
432 [2] = {
433 shift = 22, mask = 3,
434 [0] = "strDsO", "ldrDsO"
435 },
436 [3] = {
437 shift = 22, mask = 3,
438 [0] = "strDdO", "ldrDdO"
439 }
440 }
441 }
442 }
443}
444
445local map_lsp = { -- Load/store register pair, offset.
446 shift = 22, mask = 1,
447 [0] = {
448 shift = 30, mask = 3,
449 [0] = {
450 shift = 26, mask = 1,
451 [0] = "stpDzAzwP", "stpDzAzsP",
452 },
453 {
454 shift = 26, mask = 1,
455 "stpDzAzdP"
456 },
457 {
458 shift = 26, mask = 1,
459 [0] = "stpDzAzxP"
460 }
461 },
462 {
463 shift = 30, mask = 3,
464 [0] = {
465 shift = 26, mask = 1,
466 [0] = "ldpDzAzwP", "ldpDzAzsP",
467 },
468 {
469 shift = 26, mask = 1,
470 [0] = "ldpswDAxP", "ldpDzAzdP"
471 },
472 {
473 shift = 26, mask = 1,
474 [0] = "ldpDzAzxP"
475 }
476 }
477}
478
479local map_ls = { -- Loads and stores.
480 shift = 24, mask = 0x31,
481 [0x10] = map_lrl, [0x30] = map_lsriro,
482 [0x20] = {
483 shift = 23, mask = 3,
484 map_lsp, map_lsp, map_lsp
485 },
486 [0x21] = {
487 shift = 23, mask = 3,
488 map_lsp, map_lsp, map_lsp
489 },
490 [0x31] = {
491 shift = 26, mask = 1,
492 [0] = {
493 shift = 30, mask = 3,
494 [0] = {
495 shift = 22, mask = 3,
496 [0] = "strbDwzU", "ldrbDwzU"
497 },
498 {
499 shift = 22, mask = 3,
500 [0] = "strhDwzU", "ldrhDwzU"
501 },
502 {
503 shift = 22, mask = 3,
504 [0] = "strDwzU", "ldrDwzU"
505 },
506 {
507 shift = 22, mask = 3,
508 [0] = "strDxzU", "ldrDxzU"
509 }
510 },
511 {
512 shift = 30, mask = 3,
513 [2] = {
514 shift = 22, mask = 3,
515 [0] = "strDszU", "ldrDszU"
516 },
517 [3] = {
518 shift = 22, mask = 3,
519 [0] = "strDdzU", "ldrDdzU"
520 }
521 }
522 },
523}
524
525local map_datafp = { -- Data processing, SIMD and FP.
526 shift = 28, mask = 7,
527 { -- 001
528 shift = 24, mask = 1,
529 [0] = {
530 shift = 21, mask = 1,
531 {
532 shift = 10, mask = 3,
533 [0] = {
534 shift = 12, mask = 1,
535 [0] = {
536 shift = 13, mask = 1,
537 [0] = {
538 shift = 14, mask = 1,
539 [0] = {
540 shift = 15, mask = 1,
541 [0] = { -- FP/int conversion.
542 shift = 31, mask = 1,
543 [0] = {
544 shift = 16, mask = 0xff,
545 [0x20] = "fcvtnsDwNs", [0x21] = "fcvtnuDwNs",
546 [0x22] = "scvtfDsNw", [0x23] = "ucvtfDsNw",
547 [0x24] = "fcvtasDwNs", [0x25] = "fcvtauDwNs",
548 [0x26] = "fmovDwNs", [0x27] = "fmovDsNw",
549 [0x28] = "fcvtpsDwNs", [0x29] = "fcvtpuDwNs",
550 [0x30] = "fcvtmsDwNs", [0x31] = "fcvtmuDwNs",
551 [0x38] = "fcvtzsDwNs", [0x39] = "fcvtzuDwNs",
552 [0x60] = "fcvtnsDwNd", [0x61] = "fcvtnuDwNd",
553 [0x62] = "scvtfDdNw", [0x63] = "ucvtfDdNw",
554 [0x64] = "fcvtasDwNd", [0x65] = "fcvtauDwNd",
555 [0x68] = "fcvtpsDwNd", [0x69] = "fcvtpuDwNd",
556 [0x70] = "fcvtmsDwNd", [0x71] = "fcvtmuDwNd",
557 [0x78] = "fcvtzsDwNd", [0x79] = "fcvtzuDwNd"
558 },
559 {
560 shift = 16, mask = 0xff,
561 [0x20] = "fcvtnsDxNs", [0x21] = "fcvtnuDxNs",
562 [0x22] = "scvtfDsNx", [0x23] = "ucvtfDsNx",
563 [0x24] = "fcvtasDxNs", [0x25] = "fcvtauDxNs",
564 [0x28] = "fcvtpsDxNs", [0x29] = "fcvtpuDxNs",
565 [0x30] = "fcvtmsDxNs", [0x31] = "fcvtmuDxNs",
566 [0x38] = "fcvtzsDxNs", [0x39] = "fcvtzuDxNs",
567 [0x60] = "fcvtnsDxNd", [0x61] = "fcvtnuDxNd",
568 [0x62] = "scvtfDdNx", [0x63] = "ucvtfDdNx",
569 [0x64] = "fcvtasDxNd", [0x65] = "fcvtauDxNd",
570 [0x66] = "fmovDxNd", [0x67] = "fmovDdNx",
571 [0x68] = "fcvtpsDxNd", [0x69] = "fcvtpuDxNd",
572 [0x70] = "fcvtmsDxNd", [0x71] = "fcvtmuDxNd",
573 [0x78] = "fcvtzsDxNd", [0x79] = "fcvtzuDxNd"
574 }
575 }
576 },
577 { -- FP data-processing, 1 source.
578 shift = 31, mask = 1,
579 [0] = {
580 shift = 22, mask = 3,
581 [0] = {
582 shift = 15, mask = 63,
583 [0] = "fmovDNf", "fabsDNf", "fnegDNf",
584 "fsqrtDNf", false, "fcvtDdNs", false, false,
585 "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf",
586 "frintaDNf", false, "frintxDNf", "frintiDNf",
587 },
588 {
589 shift = 15, mask = 63,
590 [0] = "fmovDNf", "fabsDNf", "fnegDNf",
591 "fsqrtDNf", "fcvtDsNd", false, false, false,
592 "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf",
593 "frintaDNf", false, "frintxDNf", "frintiDNf",
594 }
595 }
596 }
597 },
598 { -- FP compare.
599 shift = 31, mask = 1,
600 [0] = {
601 shift = 14, mask = 3,
602 [0] = {
603 shift = 23, mask = 1,
604 [0] = {
605 shift = 0, mask = 31,
606 [0] = "fcmpNMf", [8] = "fcmpNZf",
607 [16] = "fcmpeNMf", [24] = "fcmpeNZf",
608 }
609 }
610 }
611 }
612 },
613 { -- FP immediate.
614 shift = 31, mask = 1,
615 [0] = {
616 shift = 5, mask = 31,
617 [0] = {
618 shift = 23, mask = 1,
619 [0] = "fmovDFf"
620 }
621 }
622 }
623 },
624 { -- FP conditional compare.
625 shift = 31, mask = 1,
626 [0] = {
627 shift = 23, mask = 1,
628 [0] = {
629 shift = 4, mask = 1,
630 [0] = "fccmpNMVCf", "fccmpeNMVCf"
631 }
632 }
633 },
634 { -- FP data-processing, 2 sources.
635 shift = 31, mask = 1,
636 [0] = {
637 shift = 23, mask = 1,
638 [0] = {
639 shift = 12, mask = 15,
640 [0] = "fmulDNMf", "fdivDNMf", "faddDNMf", "fsubDNMf",
641 "fmaxDNMf", "fminDNMf", "fmaxnmDNMf", "fminnmDNMf",
642 "fnmulDNMf"
643 }
644 }
645 },
646 { -- FP conditional select.
647 shift = 31, mask = 1,
648 [0] = {
649 shift = 23, mask = 1,
650 [0] = "fcselDNMCf"
651 }
652 }
653 }
654 },
655 { -- FP data-processing, 3 sources.
656 shift = 31, mask = 1,
657 [0] = {
658 shift = 15, mask = 1,
659 [0] = {
660 shift = 21, mask = 5,
661 [0] = "fmaddDNMAf", "fnmaddDNMAf"
662 },
663 {
664 shift = 21, mask = 5,
665 [0] = "fmsubDNMAf", "fnmsubDNMAf"
666 }
667 }
668 }
669 }
670}
671
672local map_br = { -- Branches, exception generating and system instructions.
673 shift = 29, mask = 7,
674 [0] = "bB",
675 { -- Compare & branch, immediate.
676 shift = 24, mask = 3,
677 [0] = "cbzDBg", "cbnzDBg", "tbzDTBw", "tbnzDTBw"
678 },
679 { -- Conditional branch, immediate.
680 shift = 24, mask = 3,
681 [0] = {
682 shift = 4, mask = 1,
683 [0] = {
684 shift = 0, mask = 15,
685 [0] = "beqB", "bneB", "bhsB", "bloB", "bmiB", "bplB", "bvsB", "bvcB",
686 "bhiB", "blsB", "bgeB", "bltB", "bgtB", "bleB", "balB"
687 }
688 }
689 }, false, "blB",
690 { -- Compare & branch, immediate.
691 shift = 24, mask = 3,
692 [0] = "cbzDBg", "cbnzDBg", "tbzDTBx", "tbnzDTBx"
693 },
694 {
695 shift = 24, mask = 3,
696 [0] = { -- Exception generation.
697 shift = 0, mask = 0xe0001f,
698 [0x200000] = "brkW"
699 },
700 { -- System instructions.
701 shift = 0, mask = 0x3fffff,
702 [0x03201f] = "nop"
703 },
704 { -- Unconditional branch, register.
705 shift = 0, mask = 0xfffc1f,
706 [0x1f0000] = "brNx", [0x3f0000] = "blrNx",
707 [0x5f0000] = "retNx"
708 },
709 }
710}
711
712local map_init = {
713 shift = 25, mask = 15,
714 [0] = false, false, false, false, map_ls, map_datar, map_ls, map_datafp,
715 map_datai, map_datai, map_br, map_br, map_ls, map_datar, map_ls, map_datafp
716}
717
718------------------------------------------------------------------------------
719
720local map_regs = { x = {}, w = {}, d = {}, s = {} }
721
722for i=0,30 do
723 map_regs.x[i] = "x"..i
724 map_regs.w[i] = "w"..i
725 map_regs.d[i] = "d"..i
726 map_regs.s[i] = "s"..i
727end
728map_regs.x[31] = "sp"
729map_regs.w[31] = "wsp"
730map_regs.d[31] = "d31"
731map_regs.s[31] = "s31"
732
733local map_cond = {
734 [0] = "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
735 "hi", "ls", "ge", "lt", "gt", "le", "al",
736}
737
738local map_shift = { [0] = "lsl", "lsr", "asr", }
739
740local map_extend = {
741 [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx",
742}
743
744------------------------------------------------------------------------------
745
746-- Output a nicely formatted line with an opcode and operands.
747local function putop(ctx, text, operands)
748 local pos = ctx.pos
749 local extra = ""
750 if ctx.rel then
751 local sym = ctx.symtab[ctx.rel]
752 if sym then
753 extra = "\t->"..sym
754 end
755 end
756 if ctx.hexdump > 0 then
757 ctx.out(format("%08x %s %-5s %s%s\n",
758 ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra))
759 else
760 ctx.out(format("%08x %-5s %s%s\n",
761 ctx.addr+pos, text, concat(operands, ", "), extra))
762 end
763 ctx.pos = pos + 4
764end
765
766-- Fallback for unknown opcodes.
767local function unknown(ctx)
768 return putop(ctx, ".long", { "0x"..tohex(ctx.op) })
769end
770
771local function match_reg(p, pat, regnum)
772 return map_regs[match(pat, p.."%w-([xwds])")][regnum]
773end
774
775local function fmt_hex32(x)
776 if x < 0 then
777 return tohex(x)
778 else
779 return format("%x", x)
780 end
781end
782
783local imm13_rep = { 0x55555555, 0x11111111, 0x01010101, 0x00010001, 0x00000001 }
784
785local function decode_imm13(op)
786 local imms = band(rshift(op, 10), 63)
787 local immr = band(rshift(op, 16), 63)
788 if band(op, 0x00400000) == 0 then
789 local len = 5
790 if imms >= 56 then
791 if imms >= 60 then len = 1 else len = 2 end
792 elseif imms >= 48 then len = 3 elseif imms >= 32 then len = 4 end
793 local l = lshift(1, len)-1
794 local s = band(imms, l)
795 local r = band(immr, l)
796 local imm = ror(rshift(-1, 31-s), r)
797 if len ~= 5 then imm = band(imm, lshift(1, l)-1) + rshift(imm, 31-l) end
798 imm = imm * imm13_rep[len]
799 local ix = fmt_hex32(imm)
800 if rshift(op, 31) ~= 0 then
801 return ix..tohex(imm)
802 else
803 return ix
804 end
805 else
806 local lo, hi = -1, 0
807 if imms < 32 then lo = rshift(-1, 31-imms) else hi = rshift(-1, 63-imms) end
808 if immr ~= 0 then
809 lo, hi = ror(lo, immr), ror(hi, immr)
810 local x = immr == 32 and 0 or band(bxor(lo, hi), lshift(-1, 32-immr))
811 lo, hi = bxor(lo, x), bxor(hi, x)
812 if immr >= 32 then lo, hi = hi, lo end
813 end
814 if hi ~= 0 then
815 return fmt_hex32(hi)..tohex(lo)
816 else
817 return fmt_hex32(lo)
818 end
819 end
820end
821
822local function parse_immpc(op, name)
823 if name == "b" or name == "bl" then
824 return arshift(lshift(op, 6), 4)
825 elseif name == "adr" or name == "adrp" then
826 local immlo = band(rshift(op, 29), 3)
827 local immhi = lshift(arshift(lshift(op, 8), 13), 2)
828 return bor(immhi, immlo)
829 elseif name == "tbz" or name == "tbnz" then
830 return lshift(arshift(lshift(op, 13), 18), 2)
831 else
832 return lshift(arshift(lshift(op, 8), 13), 2)
833 end
834end
835
836local function parse_fpimm8(op)
837 local sign = band(op, 0x100000) == 0 and 1 or -1
838 local exp = bxor(rshift(arshift(lshift(op, 12), 5), 24), 0x80) - 131
839 local frac = 16+band(rshift(op, 13), 15)
840 return sign * frac * 2^exp
841end
842
843local function prefer_bfx(sf, uns, imms, immr)
844 if imms < immr or imms == 31 or imms == 63 then
845 return false
846 end
847 if immr == 0 then
848 if sf == 0 and (imms == 7 or imms == 15) then
849 return false
850 end
851 if sf ~= 0 and uns == 0 and (imms == 7 or imms == 15 or imms == 31) then
852 return false
853 end
854 end
855 return true
856end
857
858-- Disassemble a single instruction.
859local function disass_ins(ctx)
860 local pos = ctx.pos
861 local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4)
862 local op = bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0)
863 local operands = {}
864 local suffix = ""
865 local last, name, pat
866 local map_reg
867 ctx.op = op
868 ctx.rel = nil
869 last = nil
870 local opat
871 opat = map_init[band(rshift(op, 25), 15)]
872 while type(opat) ~= "string" do
873 if not opat then return unknown(ctx) end
874 opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
875 end
876 name, pat = match(opat, "^([a-z0-9]*)(.*)")
877 local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
878 if altname then pat = pat2 end
879 if sub(pat, 1, 1) == "." then
880 local s2, p2 = match(pat, "^([a-z0-9.]*)(.*)")
881 suffix = suffix..s2
882 pat = p2
883 end
884
885 local rt = match(pat, "[gf]")
886 if rt then
887 if rt == "g" then
888 map_reg = band(op, 0x80000000) ~= 0 and map_regs.x or map_regs.w
889 else
890 map_reg = band(op, 0x400000) ~= 0 and map_regs.d or map_regs.s
891 end
892 end
893
894 local second0, immr
895
896 for p in gmatch(pat, ".") do
897 local x = nil
898 if p == "D" then
899 local regnum = band(op, 31)
900 x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
901 elseif p == "N" then
902 local regnum = band(rshift(op, 5), 31)
903 x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
904 elseif p == "M" then
905 local regnum = band(rshift(op, 16), 31)
906 x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
907 elseif p == "A" then
908 local regnum = band(rshift(op, 10), 31)
909 x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
910 elseif p == "B" then
911 local addr = ctx.addr + pos + parse_immpc(op, name)
912 ctx.rel = addr
913 x = "0x"..tohex(addr)
914 elseif p == "T" then
915 x = bor(band(rshift(op, 26), 32), band(rshift(op, 19), 31))
916 elseif p == "V" then
917 x = band(op, 15)
918 elseif p == "C" then
919 x = map_cond[band(rshift(op, 12), 15)]
920 elseif p == "c" then
921 local rn = band(rshift(op, 5), 31)
922 local rm = band(rshift(op, 16), 31)
923 local cond = band(rshift(op, 12), 15)
924 local invc = bxor(cond, 1)
925 x = map_cond[cond]
926 if altname and cond ~= 14 and cond ~= 15 then
927 local a1, a2 = match(altname, "([^|]*)|(.*)")
928 if rn == rm then
929 local n = #operands
930 operands[n] = nil
931 x = map_cond[invc]
932 if rn ~= 31 then
933 if a1 then name = a1 else name = altname end
934 else
935 operands[n-1] = nil
936 name = a2
937 end
938 end
939 end
940 elseif p == "W" then
941 x = band(rshift(op, 5), 0xffff)
942 elseif p == "Y" then
943 x = band(rshift(op, 5), 0xffff)
944 local hw = band(rshift(op, 21), 3)
945 if altname and (hw == 0 or x ~= 0) then
946 name = altname
947 end
948 elseif p == "L" then
949 local rn = map_regs.x[band(rshift(op, 5), 31)]
950 local imm9 = arshift(lshift(op, 11), 23)
951 if band(op, 0x800) ~= 0 then
952 x = "["..rn..", #"..imm9.."]!"
953 else
954 x = "["..rn.."], #"..imm9
955 end
956 elseif p == "U" then
957 local rn = map_regs.x[band(rshift(op, 5), 31)]
958 local sz = band(rshift(op, 30), 3)
959 local imm12 = lshift(arshift(lshift(op, 10), 20), sz)
960 if imm12 ~= 0 then
961 x = "["..rn..", #"..imm12.."]"
962 else
963 x = "["..rn.."]"
964 end
965 elseif p == "K" then
966 local rn = map_regs.x[band(rshift(op, 5), 31)]
967 local imm9 = arshift(lshift(op, 11), 23)
968 if imm9 ~= 0 then
969 x = "["..rn..", #"..imm9.."]"
970 else
971 x = "["..rn.."]"
972 end
973 elseif p == "O" then
974 local rn, rm = map_regs.x[band(rshift(op, 5), 31)]
975 local m = band(rshift(op, 13), 1)
976 if m == 0 then
977 rm = map_regs.w[band(rshift(op, 16), 31)]
978 else
979 rm = map_regs.x[band(rshift(op, 16), 31)]
980 end
981 x = "["..rn..", "..rm
982 local opt = band(rshift(op, 13), 7)
983 local s = band(rshift(op, 12), 1)
984 local sz = band(rshift(op, 30), 3)
985 -- extension to be applied
986 if opt == 3 then
987 if s == 0 then x = x.."]"
988 else x = x..", lsl #"..sz.."]" end
989 elseif opt == 2 or opt == 6 or opt == 7 then
990 if s == 0 then x = x..", "..map_extend[opt].."]"
991 else x = x..", "..map_extend[opt].." #"..sz.."]" end
992 else
993 x = x.."]"
994 end
995 elseif p == "P" then
996 local opcv, sh = rshift(op, 26), 2
997 if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end
998 local imm7 = lshift(arshift(lshift(op, 10), 25), sh)
999 local rn = map_regs.x[band(rshift(op, 5), 31)]
1000 local ind = band(rshift(op, 23), 3)
1001 if ind == 1 then
1002 x = "["..rn.."], #"..imm7
1003 elseif ind == 2 then
1004 if imm7 == 0 then
1005 x = "["..rn.."]"
1006 else
1007 x = "["..rn..", #"..imm7.."]"
1008 end
1009 elseif ind == 3 then
1010 x = "["..rn..", #"..imm7.."]!"
1011 end
1012 elseif p == "I" then
1013 local shf = band(rshift(op, 22), 3)
1014 local imm12 = band(rshift(op, 10), 0x0fff)
1015 local rn, rd = band(rshift(op, 5), 31), band(op, 31)
1016 if altname == "mov" and shf == 0 and imm12 == 0 and (rn == 31 or rd == 31) then
1017 name = altname
1018 x = nil
1019 elseif shf == 0 then
1020 x = imm12
1021 elseif shf == 1 then
1022 x = imm12..", lsl #12"
1023 end
1024 elseif p == "i" then
1025 x = "#0x"..decode_imm13(op)
1026 elseif p == "1" then
1027 immr = band(rshift(op, 16), 63)
1028 x = immr
1029 elseif p == "2" then
1030 x = band(rshift(op, 10), 63)
1031 if altname then
1032 local a1, a2, a3, a4, a5, a6 =
1033 match(altname, "([^|]*)|([^|]*)|([^|]*)|([^|]*)|([^|]*)|(.*)")
1034 local sf = band(rshift(op, 26), 32)
1035 local uns = band(rshift(op, 30), 1)
1036 if prefer_bfx(sf, uns, x, immr) then
1037 name = a2
1038 x = x - immr + 1
1039 elseif immr == 0 and x == 7 then
1040 local n = #operands
1041 operands[n] = nil
1042 if sf ~= 0 then
1043 operands[n-1] = gsub(operands[n-1], "x", "w")
1044 end
1045 last = operands[n-1]
1046 name = a6
1047 x = nil
1048 elseif immr == 0 and x == 15 then
1049 local n = #operands
1050 operands[n] = nil
1051 if sf ~= 0 then
1052 operands[n-1] = gsub(operands[n-1], "x", "w")
1053 end
1054 last = operands[n-1]
1055 name = a5
1056 x = nil
1057 elseif x == 31 or x == 63 then
1058 if x == 31 and immr == 0 and name == "sbfm" then
1059 name = a4
1060 local n = #operands
1061 operands[n] = nil
1062 if sf ~= 0 then
1063 operands[n-1] = gsub(operands[n-1], "x", "w")
1064 end
1065 last = operands[n-1]
1066 else
1067 name = a3
1068 end
1069 x = nil
1070 elseif band(x, 31) ~= 31 and immr == x+1 and name == "ubfm" then
1071 name = a4
1072 last = "#"..(sf+32 - immr)
1073 operands[#operands] = last
1074 x = nil
1075 elseif x < immr then
1076 name = a1
1077 last = "#"..(sf+32 - immr)
1078 operands[#operands] = last
1079 x = x + 1
1080 end
1081 end
1082 elseif p == "3" then
1083 x = band(rshift(op, 10), 63)
1084 if altname then
1085 local a1, a2 = match(altname, "([^|]*)|(.*)")
1086 if x < immr then
1087 name = a1
1088 local sf = band(rshift(op, 26), 32)
1089 last = "#"..(sf+32 - immr)
1090 operands[#operands] = last
1091 x = x + 1
1092 else
1093 name = a2
1094 x = x - immr + 1
1095 end
1096 end
1097 elseif p == "4" then
1098 x = band(rshift(op, 10), 63)
1099 local rn = band(rshift(op, 5), 31)
1100 local rm = band(rshift(op, 16), 31)
1101 if altname and rn == rm then
1102 local n = #operands
1103 operands[n] = nil
1104 last = operands[n-1]
1105 name = altname
1106 end
1107 elseif p == "5" then
1108 x = band(rshift(op, 16), 31)
1109 elseif p == "S" then
1110 x = band(rshift(op, 10), 63)
1111 if x == 0 then x = nil
1112 else x = map_shift[band(rshift(op, 22), 3)].." #"..x end
1113 elseif p == "X" then
1114 local opt = band(rshift(op, 13), 7)
1115 -- Width specifier <R>.
1116 if opt ~= 3 and opt ~= 7 then
1117 last = map_regs.w[band(rshift(op, 16), 31)]
1118 operands[#operands] = last
1119 end
1120 x = band(rshift(op, 10), 7)
1121 -- Extension.
1122 if opt == 2 + band(rshift(op, 31), 1) and
1123 band(rshift(op, second0 and 5 or 0), 31) == 31 then
1124 if x == 0 then x = nil
1125 else x = "lsl #"..x end
1126 else
1127 if x == 0 then x = map_extend[band(rshift(op, 13), 7)]
1128 else x = map_extend[band(rshift(op, 13), 7)].." #"..x end
1129 end
1130 elseif p == "R" then
1131 x = band(rshift(op,21), 3)
1132 if x == 0 then x = nil
1133 else x = "lsl #"..x*16 end
1134 elseif p == "z" then
1135 local n = #operands
1136 if operands[n] == "sp" then operands[n] = "xzr"
1137 elseif operands[n] == "wsp" then operands[n] = "wzr"
1138 end
1139 elseif p == "Z" then
1140 x = 0
1141 elseif p == "F" then
1142 x = parse_fpimm8(op)
1143 elseif p == "g" or p == "f" or p == "x" or p == "w" or
1144 p == "d" or p == "s" then
1145 -- These are handled in D/N/M/A.
1146 elseif p == "0" then
1147 if last == "sp" or last == "wsp" then
1148 local n = #operands
1149 operands[n] = nil
1150 last = operands[n-1]
1151 if altname then
1152 local a1, a2 = match(altname, "([^|]*)|(.*)")
1153 if not a1 then
1154 name = altname
1155 elseif second0 then
1156 name, altname = a2, a1
1157 else
1158 name, altname = a1, a2
1159 end
1160 end
1161 end
1162 second0 = true
1163 else
1164 assert(false)
1165 end
1166 if x then
1167 last = x
1168 if type(x) == "number" then x = "#"..x end
1169 operands[#operands+1] = x
1170 end
1171 end
1172
1173 return putop(ctx, name..suffix, operands)
1174end
1175
1176------------------------------------------------------------------------------
1177
1178-- Disassemble a block of code.
1179local function disass_block(ctx, ofs, len)
1180 if not ofs then ofs = 0 end
1181 local stop = len and ofs+len or #ctx.code
1182 ctx.pos = ofs
1183 ctx.rel = nil
1184 while ctx.pos < stop do disass_ins(ctx) end
1185end
1186
1187-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
1188local function create(code, addr, out)
1189 local ctx = {}
1190 ctx.code = code
1191 ctx.addr = addr or 0
1192 ctx.out = out or io.write
1193 ctx.symtab = {}
1194 ctx.disass = disass_block
1195 ctx.hexdump = 8
1196 return ctx
1197end
1198
1199-- Simple API: disassemble code (a string) at address and output via out.
1200local function disass(code, addr, out)
1201 create(code, addr, out):disass()
1202end
1203
1204-- Return register name for RID.
1205local function regname(r)
1206 if r < 32 then return map_regs.x[r] end
1207 return map_regs.d[r-32]
1208end
1209
1210-- Public module functions.
1211return {
1212 create = create,
1213 disass = disass,
1214 regname = regname
1215}
1216
diff --git a/src/jit/dis_arm64be.lua b/src/jit/dis_arm64be.lua
new file mode 100644
index 00000000..edcbffa8
--- /dev/null
+++ b/src/jit/dis_arm64be.lua
@@ -0,0 +1,12 @@
1----------------------------------------------------------------------------
2-- LuaJIT ARM64BE disassembler wrapper module.
3--
4-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- ARM64 instructions are always little-endian. So just forward to the
8-- common ARM64 disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11return require((string.match(..., ".*%.") or "").."dis_arm64")
12
diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua
index 8f5734ed..6ad17f54 100644
--- a/src/jit/dis_mips.lua
+++ b/src/jit/dis_mips.lua
@@ -19,13 +19,34 @@ local band, bor, tohex = bit.band, bit.bor, bit.tohex
19local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift 19local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
20 20
21------------------------------------------------------------------------------ 21------------------------------------------------------------------------------
22-- Primary and extended opcode maps 22-- Extended opcode maps common to all MIPS releases
23------------------------------------------------------------------------------ 23------------------------------------------------------------------------------
24 24
25local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", }
26local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", } 25local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", }
27local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", } 26local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", }
28 27
28local map_cop0 = {
29 shift = 25, mask = 1,
30 [0] = {
31 shift = 21, mask = 15,
32 [0] = "mfc0TDW", [4] = "mtc0TDW",
33 [10] = "rdpgprDT",
34 [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", },
35 [14] = "wrpgprDT",
36 }, {
37 shift = 0, mask = 63,
38 [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp",
39 [24] = "eret", [31] = "deret",
40 [32] = "wait",
41 },
42}
43
44------------------------------------------------------------------------------
45-- Primary and extended opcode maps for MIPS R1-R5
46------------------------------------------------------------------------------
47
48local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", }
49
29local map_special = { 50local map_special = {
30 shift = 0, mask = 63, 51 shift = 0, mask = 63,
31 [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" }, 52 [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" },
@@ -34,15 +55,17 @@ local map_special = {
34 "jrS", "jalrD1S", "movzDST", "movnDST", 55 "jrS", "jalrD1S", "movzDST", "movnDST",
35 "syscallY", "breakY", false, "sync", 56 "syscallY", "breakY", false, "sync",
36 "mfhiD", "mthiS", "mfloD", "mtloS", 57 "mfhiD", "mthiS", "mfloD", "mtloS",
37 false, false, false, false, 58 "dsllvDST", false, "dsrlvDST", "dsravDST",
38 "multST", "multuST", "divST", "divuST", 59 "multST", "multuST", "divST", "divuST",
39 false, false, false, false, 60 "dmultST", "dmultuST", "ddivST", "ddivuST",
40 "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T", 61 "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T",
41 "andDST", "orDST", "xorDST", "nor|notDST0", 62 "andDST", "or|moveDST0", "xorDST", "nor|notDST0",
42 false, false, "sltDST", "sltuDST", 63 false, false, "sltDST", "sltuDST",
43 false, false, false, false, 64 "daddDST", "dadduDST", "dsubDST", "dsubuDST",
44 "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ", 65 "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ",
45 "teqSTZ", false, "tneSTZ", 66 "teqSTZ", false, "tneSTZ", false,
67 "dsllDTA", false, "dsrlDTA", "dsraDTA",
68 "dsll32DTA", false, "dsrl32DTA", "dsra32DTA",
46} 69}
47 70
48local map_special2 = { 71local map_special2 = {
@@ -60,11 +83,17 @@ local map_bshfl = {
60 [24] = "sehDT", 83 [24] = "sehDT",
61} 84}
62 85
86local map_dbshfl = {
87 shift = 6, mask = 31,
88 [2] = "dsbhDT",
89 [5] = "dshdDT",
90}
91
63local map_special3 = { 92local map_special3 = {
64 shift = 0, mask = 63, 93 shift = 0, mask = 63,
65 [0] = "extTSAK", [4] = "insTSAL", 94 [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK",
66 [32] = map_bshfl, 95 [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL",
67 [59] = "rdhwrTD", 96 [32] = map_bshfl, [36] = map_dbshfl, [59] = "rdhwrTD",
68} 97}
69 98
70local map_regimm = { 99local map_regimm = {
@@ -79,22 +108,6 @@ local map_regimm = {
79 false, false, false, "synciSO", 108 false, false, false, "synciSO",
80} 109}
81 110
82local map_cop0 = {
83 shift = 25, mask = 1,
84 [0] = {
85 shift = 21, mask = 15,
86 [0] = "mfc0TDW", [4] = "mtc0TDW",
87 [10] = "rdpgprDT",
88 [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", },
89 [14] = "wrpgprDT",
90 }, {
91 shift = 0, mask = 63,
92 [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp",
93 [24] = "eret", [31] = "deret",
94 [32] = "wait",
95 },
96}
97
98local map_cop1s = { 111local map_cop1s = {
99 shift = 0, mask = 63, 112 shift = 0, mask = 63,
100 [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH", 113 [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH",
@@ -178,8 +191,8 @@ local map_cop1bc = {
178 191
179local map_cop1 = { 192local map_cop1 = {
180 shift = 21, mask = 31, 193 shift = 21, mask = 31,
181 [0] = "mfc1TG", false, "cfc1TG", "mfhc1TG", 194 [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG",
182 "mtc1TG", false, "ctc1TG", "mthc1TG", 195 "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG",
183 map_cop1bc, false, false, false, 196 map_cop1bc, false, false, false,
184 false, false, false, false, 197 false, false, false, false,
185 map_cop1s, map_cop1d, false, false, 198 map_cop1s, map_cop1d, false, false,
@@ -213,16 +226,218 @@ local map_pri = {
213 "andiTSU", "ori|liTS0U", "xoriTSU", "luiTU", 226 "andiTSU", "ori|liTS0U", "xoriTSU", "luiTU",
214 map_cop0, map_cop1, false, map_cop1x, 227 map_cop0, map_cop1, false, map_cop1x,
215 "beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB", 228 "beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB",
216 false, false, false, false, 229 "daddiTSI", "daddiuTSI", false, false,
217 map_special2, false, false, map_special3, 230 map_special2, "jalxJ", false, map_special3,
218 "lbTSO", "lhTSO", "lwlTSO", "lwTSO", 231 "lbTSO", "lhTSO", "lwlTSO", "lwTSO",
219 "lbuTSO", "lhuTSO", "lwrTSO", false, 232 "lbuTSO", "lhuTSO", "lwrTSO", false,
220 "sbTSO", "shTSO", "swlTSO", "swTSO", 233 "sbTSO", "shTSO", "swlTSO", "swTSO",
221 false, false, "swrTSO", "cacheNSO", 234 false, false, "swrTSO", "cacheNSO",
222 "llTSO", "lwc1HSO", "lwc2TSO", "prefNSO", 235 "llTSO", "lwc1HSO", "lwc2TSO", "prefNSO",
223 false, "ldc1HSO", "ldc2TSO", false, 236 false, "ldc1HSO", "ldc2TSO", "ldTSO",
224 "scTSO", "swc1HSO", "swc2TSO", false, 237 "scTSO", "swc1HSO", "swc2TSO", false,
225 false, "sdc1HSO", "sdc2TSO", false, 238 false, "sdc1HSO", "sdc2TSO", "sdTSO",
239}
240
241------------------------------------------------------------------------------
242-- Primary and extended opcode maps for MIPS R6
243------------------------------------------------------------------------------
244
245local map_mul_r6 = { shift = 6, mask = 3, [2] = "mulDST", [3] = "muhDST" }
246local map_mulu_r6 = { shift = 6, mask = 3, [2] = "muluDST", [3] = "muhuDST" }
247local map_div_r6 = { shift = 6, mask = 3, [2] = "divDST", [3] = "modDST" }
248local map_divu_r6 = { shift = 6, mask = 3, [2] = "divuDST", [3] = "moduDST" }
249local map_dmul_r6 = { shift = 6, mask = 3, [2] = "dmulDST", [3] = "dmuhDST" }
250local map_dmulu_r6 = { shift = 6, mask = 3, [2] = "dmuluDST", [3] = "dmuhuDST" }
251local map_ddiv_r6 = { shift = 6, mask = 3, [2] = "ddivDST", [3] = "dmodDST" }
252local map_ddivu_r6 = { shift = 6, mask = 3, [2] = "ddivuDST", [3] = "dmoduDST" }
253
254local map_special_r6 = {
255 shift = 0, mask = 63,
256 [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" },
257 false, map_srl, "sraDTA",
258 "sllvDTS", false, map_srlv, "sravDTS",
259 "jrS", "jalrD1S", false, false,
260 "syscallY", "breakY", false, "sync",
261 "clzDS", "cloDS", "dclzDS", "dcloDS",
262 "dsllvDST", "dlsaDSTA", "dsrlvDST", "dsravDST",
263 map_mul_r6, map_mulu_r6, map_div_r6, map_divu_r6,
264 map_dmul_r6, map_dmulu_r6, map_ddiv_r6, map_ddivu_r6,
265 "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T",
266 "andDST", "or|moveDST0", "xorDST", "nor|notDST0",
267 false, false, "sltDST", "sltuDST",
268 "daddDST", "dadduDST", "dsubDST", "dsubuDST",
269 "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ",
270 "teqSTZ", "seleqzDST", "tneSTZ", "selnezDST",
271 "dsllDTA", false, "dsrlDTA", "dsraDTA",
272 "dsll32DTA", false, "dsrl32DTA", "dsra32DTA",
273}
274
275local map_bshfl_r6 = {
276 shift = 9, mask = 3,
277 [1] = "alignDSTa",
278 _ = {
279 shift = 6, mask = 31,
280 [0] = "bitswapDT",
281 [2] = "wsbhDT",
282 [16] = "sebDT",
283 [24] = "sehDT",
284 }
285}
286
287local map_dbshfl_r6 = {
288 shift = 9, mask = 3,
289 [1] = "dalignDSTa",
290 _ = {
291 shift = 6, mask = 31,
292 [0] = "dbitswapDT",
293 [2] = "dsbhDT",
294 [5] = "dshdDT",
295 }
296}
297
298local map_special3_r6 = {
299 shift = 0, mask = 63,
300 [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK",
301 [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL",
302 [32] = map_bshfl_r6, [36] = map_dbshfl_r6, [59] = "rdhwrTD",
303}
304
305local map_regimm_r6 = {
306 shift = 16, mask = 31,
307 [0] = "bltzSB", [1] = "bgezSB",
308 [6] = "dahiSI", [30] = "datiSI",
309 [23] = "sigrieI", [31] = "synciSO",
310}
311
312local map_pcrel_r6 = {
313 shift = 19, mask = 3,
314 [0] = "addiupcS2", "lwpcS2", "lwupcS2", {
315 shift = 18, mask = 1,
316 [0] = "ldpcS3", { shift = 16, mask = 3, [2] = "auipcSI", [3] = "aluipcSI" }
317 }
318}
319
320local map_cop1s_r6 = {
321 shift = 0, mask = 63,
322 [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH",
323 "sqrt.sFG", "abs.sFG", "mov.sFG", "neg.sFG",
324 "round.l.sFG", "trunc.l.sFG", "ceil.l.sFG", "floor.l.sFG",
325 "round.w.sFG", "trunc.w.sFG", "ceil.w.sFG", "floor.w.sFG",
326 "sel.sFGH", false, false, false,
327 "seleqz.sFGH", "recip.sFG", "rsqrt.sFG", "selnez.sFGH",
328 "maddf.sFGH", "msubf.sFGH", "rint.sFG", "class.sFG",
329 "min.sFGH", "mina.sFGH", "max.sFGH", "maxa.sFGH",
330 false, "cvt.d.sFG", false, false,
331 "cvt.w.sFG", "cvt.l.sFG",
332}
333
334local map_cop1d_r6 = {
335 shift = 0, mask = 63,
336 [0] = "add.dFGH", "sub.dFGH", "mul.dFGH", "div.dFGH",
337 "sqrt.dFG", "abs.dFG", "mov.dFG", "neg.dFG",
338 "round.l.dFG", "trunc.l.dFG", "ceil.l.dFG", "floor.l.dFG",
339 "round.w.dFG", "trunc.w.dFG", "ceil.w.dFG", "floor.w.dFG",
340 "sel.dFGH", false, false, false,
341 "seleqz.dFGH", "recip.dFG", "rsqrt.dFG", "selnez.dFGH",
342 "maddf.dFGH", "msubf.dFGH", "rint.dFG", "class.dFG",
343 "min.dFGH", "mina.dFGH", "max.dFGH", "maxa.dFGH",
344 "cvt.s.dFG", false, false, false,
345 "cvt.w.dFG", "cvt.l.dFG",
346}
347
348local map_cop1w_r6 = {
349 shift = 0, mask = 63,
350 [0] = "cmp.af.sFGH", "cmp.un.sFGH", "cmp.eq.sFGH", "cmp.ueq.sFGH",
351 "cmp.lt.sFGH", "cmp.ult.sFGH", "cmp.le.sFGH", "cmp.ule.sFGH",
352 "cmp.saf.sFGH", "cmp.sun.sFGH", "cmp.seq.sFGH", "cmp.sueq.sFGH",
353 "cmp.slt.sFGH", "cmp.sult.sFGH", "cmp.sle.sFGH", "cmp.sule.sFGH",
354 false, "cmp.or.sFGH", "cmp.une.sFGH", "cmp.ne.sFGH",
355 false, false, false, false,
356 false, "cmp.sor.sFGH", "cmp.sune.sFGH", "cmp.sne.sFGH",
357 false, false, false, false,
358 "cvt.s.wFG", "cvt.d.wFG",
359}
360
361local map_cop1l_r6 = {
362 shift = 0, mask = 63,
363 [0] = "cmp.af.dFGH", "cmp.un.dFGH", "cmp.eq.dFGH", "cmp.ueq.dFGH",
364 "cmp.lt.dFGH", "cmp.ult.dFGH", "cmp.le.dFGH", "cmp.ule.dFGH",
365 "cmp.saf.dFGH", "cmp.sun.dFGH", "cmp.seq.dFGH", "cmp.sueq.dFGH",
366 "cmp.slt.dFGH", "cmp.sult.dFGH", "cmp.sle.dFGH", "cmp.sule.dFGH",
367 false, "cmp.or.dFGH", "cmp.une.dFGH", "cmp.ne.dFGH",
368 false, false, false, false,
369 false, "cmp.sor.dFGH", "cmp.sune.dFGH", "cmp.sne.dFGH",
370 false, false, false, false,
371 "cvt.s.lFG", "cvt.d.lFG",
372}
373
374local map_cop1_r6 = {
375 shift = 21, mask = 31,
376 [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG",
377 "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG",
378 false, "bc1eqzHB", false, false,
379 false, "bc1nezHB", false, false,
380 map_cop1s_r6, map_cop1d_r6, false, false,
381 map_cop1w_r6, map_cop1l_r6,
382}
383
384local function maprs_popTS(rs, rt)
385 if rt == 0 then return 0 elseif rs == 0 then return 1
386 elseif rs == rt then return 2 else return 3 end
387end
388
389local map_pop06_r6 = {
390 maprs = maprs_popTS, [0] = "blezSB", "blezalcTB", "bgezalcTB", "bgeucSTB"
391}
392local map_pop07_r6 = {
393 maprs = maprs_popTS, [0] = "bgtzSB", "bgtzalcTB", "bltzalcTB", "bltucSTB"
394}
395local map_pop26_r6 = {
396 maprs = maprs_popTS, "blezcTB", "bgezcTB", "bgecSTB"
397}
398local map_pop27_r6 = {
399 maprs = maprs_popTS, "bgtzcTB", "bltzcTB", "bltcSTB"
400}
401
402local function maprs_popS(rs, rt)
403 if rs == 0 then return 0 else return 1 end
404end
405
406local map_pop66_r6 = {
407 maprs = maprs_popS, [0] = "jicTI", "beqzcSb"
408}
409local map_pop76_r6 = {
410 maprs = maprs_popS, [0] = "jialcTI", "bnezcSb"
411}
412
413local function maprs_popST(rs, rt)
414 if rs >= rt then return 0 elseif rs == 0 then return 1 else return 2 end
415end
416
417local map_pop10_r6 = {
418 maprs = maprs_popST, [0] = "bovcSTB", "beqzalcTB", "beqcSTB"
419}
420local map_pop30_r6 = {
421 maprs = maprs_popST, [0] = "bnvcSTB", "bnezalcTB", "bnecSTB"
422}
423
424local map_pri_r6 = {
425 [0] = map_special_r6, map_regimm_r6, "jJ", "jalJ",
426 "beq|beqz|bST00B", "bne|bnezST0B", map_pop06_r6, map_pop07_r6,
427 map_pop10_r6, "addiu|liTS0I", "sltiTSI", "sltiuTSI",
428 "andiTSU", "ori|liTS0U", "xoriTSU", "aui|luiTS0U",
429 map_cop0, map_cop1_r6, false, false,
430 false, false, map_pop26_r6, map_pop27_r6,
431 map_pop30_r6, "daddiuTSI", false, false,
432 false, "dauiTSI", false, map_special3_r6,
433 "lbTSO", "lhTSO", false, "lwTSO",
434 "lbuTSO", "lhuTSO", false, false,
435 "sbTSO", "shTSO", false, "swTSO",
436 false, false, false, false,
437 false, "lwc1HSO", "bc#", false,
438 false, "ldc1HSO", map_pop66_r6, "ldTSO",
439 false, "swc1HSO", "balc#", map_pcrel_r6,
440 false, "sdc1HSO", map_pop76_r6, "sdTSO",
226} 441}
227 442
228------------------------------------------------------------------------------ 443------------------------------------------------------------------------------
@@ -279,10 +494,14 @@ local function disass_ins(ctx)
279 ctx.op = op 494 ctx.op = op
280 ctx.rel = nil 495 ctx.rel = nil
281 496
282 local opat = map_pri[rshift(op, 26)] 497 local opat = ctx.map_pri[rshift(op, 26)]
283 while type(opat) ~= "string" do 498 while type(opat) ~= "string" do
284 if not opat then return unknown(ctx) end 499 if not opat then return unknown(ctx) end
285 opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ 500 if opat.maprs then
501 opat = opat[opat.maprs(band(rshift(op,21),31), band(rshift(op,16),31))]
502 else
503 opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
504 end
286 end 505 end
287 local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") 506 local name, pat = match(opat, "^([a-z0-9_.]*)(.*)")
288 local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") 507 local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
@@ -306,6 +525,10 @@ local function disass_ins(ctx)
306 x = "f"..band(rshift(op, 21), 31) 525 x = "f"..band(rshift(op, 21), 31)
307 elseif p == "A" then 526 elseif p == "A" then
308 x = band(rshift(op, 6), 31) 527 x = band(rshift(op, 6), 31)
528 elseif p == "a" then
529 x = band(rshift(op, 6), 7)
530 elseif p == "E" then
531 x = band(rshift(op, 6), 31) + 32
309 elseif p == "M" then 532 elseif p == "M" then
310 x = band(rshift(op, 11), 31) 533 x = band(rshift(op, 11), 31)
311 elseif p == "N" then 534 elseif p == "N" then
@@ -315,10 +538,18 @@ local function disass_ins(ctx)
315 if x == 0 then x = nil end 538 if x == 0 then x = nil end
316 elseif p == "K" then 539 elseif p == "K" then
317 x = band(rshift(op, 11), 31) + 1 540 x = band(rshift(op, 11), 31) + 1
541 elseif p == "P" then
542 x = band(rshift(op, 11), 31) + 33
318 elseif p == "L" then 543 elseif p == "L" then
319 x = band(rshift(op, 11), 31) - last + 1 544 x = band(rshift(op, 11), 31) - last + 1
545 elseif p == "Q" then
546 x = band(rshift(op, 11), 31) - last + 33
320 elseif p == "I" then 547 elseif p == "I" then
321 x = arshift(lshift(op, 16), 16) 548 x = arshift(lshift(op, 16), 16)
549 elseif p == "2" then
550 x = arshift(lshift(op, 13), 11)
551 elseif p == "3" then
552 x = arshift(lshift(op, 14), 11)
322 elseif p == "U" then 553 elseif p == "U" then
323 x = band(op, 0xffff) 554 x = band(op, 0xffff)
324 elseif p == "O" then 555 elseif p == "O" then
@@ -328,13 +559,22 @@ local function disass_ins(ctx)
328 local index = map_gpr[band(rshift(op, 16), 31)] 559 local index = map_gpr[band(rshift(op, 16), 31)]
329 operands[#operands] = format("%s(%s)", index, last) 560 operands[#operands] = format("%s(%s)", index, last)
330 elseif p == "B" then 561 elseif p == "B" then
331 x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4 562 x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 14) + 4
563 ctx.rel = x
564 x = format("0x%08x", x)
565 elseif p == "b" then
566 x = ctx.addr + ctx.pos + arshift(lshift(op, 11), 9) + 4
332 ctx.rel = x 567 ctx.rel = x
333 x = "0x"..tohex(x) 568 x = format("0x%08x", x)
569 elseif p == "#" then
570 x = ctx.addr + ctx.pos + arshift(lshift(op, 6), 4) + 4
571 ctx.rel = x
572 x = format("0x%08x", x)
334 elseif p == "J" then 573 elseif p == "J" then
335 x = band(ctx.addr + ctx.pos, 0xf0000000) + band(op, 0x03ffffff)*4 574 local a = ctx.addr + ctx.pos
575 x = a - band(a, 0x0fffffff) + band(op, 0x03ffffff)*4
336 ctx.rel = x 576 ctx.rel = x
337 x = "0x"..tohex(x) 577 x = format("0x%08x", x)
338 elseif p == "V" then 578 elseif p == "V" then
339 x = band(rshift(op, 8), 7) 579 x = band(rshift(op, 8), 7)
340 if x == 0 then x = nil end 580 if x == 0 then x = nil end
@@ -384,7 +624,7 @@ local function disass_block(ctx, ofs, len)
384end 624end
385 625
386-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 626-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
387local function create_(code, addr, out) 627local function create(code, addr, out)
388 local ctx = {} 628 local ctx = {}
389 ctx.code = code 629 ctx.code = code
390 ctx.addr = addr or 0 630 ctx.addr = addr or 0
@@ -393,36 +633,62 @@ local function create_(code, addr, out)
393 ctx.disass = disass_block 633 ctx.disass = disass_block
394 ctx.hexdump = 8 634 ctx.hexdump = 8
395 ctx.get = get_be 635 ctx.get = get_be
636 ctx.map_pri = map_pri
637 return ctx
638end
639
640local function create_el(code, addr, out)
641 local ctx = create(code, addr, out)
642 ctx.get = get_le
643 return ctx
644end
645
646local function create_r6(code, addr, out)
647 local ctx = create(code, addr, out)
648 ctx.map_pri = map_pri_r6
396 return ctx 649 return ctx
397end 650end
398 651
399local function create_el_(code, addr, out) 652local function create_r6_el(code, addr, out)
400 local ctx = create_(code, addr, out) 653 local ctx = create(code, addr, out)
401 ctx.get = get_le 654 ctx.get = get_le
655 ctx.map_pri = map_pri_r6
402 return ctx 656 return ctx
403end 657end
404 658
405-- Simple API: disassemble code (a string) at address and output via out. 659-- Simple API: disassemble code (a string) at address and output via out.
406local function disass_(code, addr, out) 660local function disass(code, addr, out)
407 create_(code, addr, out):disass() 661 create(code, addr, out):disass()
662end
663
664local function disass_el(code, addr, out)
665 create_el(code, addr, out):disass()
408end 666end
409 667
410local function disass_el_(code, addr, out) 668local function disass_r6(code, addr, out)
411 create_el_(code, addr, out):disass() 669 create_r6(code, addr, out):disass()
670end
671
672local function disass_r6_el(code, addr, out)
673 create_r6_el(code, addr, out):disass()
412end 674end
413 675
414-- Return register name for RID. 676-- Return register name for RID.
415local function regname_(r) 677local function regname(r)
416 if r < 32 then return map_gpr[r] end 678 if r < 32 then return map_gpr[r] end
417 return "f"..(r-32) 679 return "f"..(r-32)
418end 680end
419 681
420-- Public module functions. 682-- Public module functions.
421module(...) 683return {
422 684 create = create,
423create = create_ 685 create_el = create_el,
424create_el = create_el_ 686 create_r6 = create_r6,
425disass = disass_ 687 create_r6_el = create_r6_el,
426disass_el = disass_el_ 688 disass = disass,
427regname = regname_ 689 disass_el = disass_el,
690 disass_r6 = disass_r6,
691 disass_r6_el = disass_r6_el,
692 regname = regname
693}
428 694
diff --git a/src/jit/dis_mips64.lua b/src/jit/dis_mips64.lua
new file mode 100644
index 00000000..5ad48f8f
--- /dev/null
+++ b/src/jit/dis_mips64.lua
@@ -0,0 +1,17 @@
1----------------------------------------------------------------------------
2-- LuaJIT MIPS64 disassembler wrapper module.
3--
4-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the big-endian functions from the
8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12return {
13 create = dis_mips.create,
14 disass = dis_mips.disass,
15 regname = dis_mips.regname
16}
17
diff --git a/src/jit/dis_mips64el.lua b/src/jit/dis_mips64el.lua
new file mode 100644
index 00000000..d50e3a18
--- /dev/null
+++ b/src/jit/dis_mips64el.lua
@@ -0,0 +1,17 @@
1----------------------------------------------------------------------------
2-- LuaJIT MIPS64EL disassembler wrapper module.
3--
4-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the little-endian functions from the
8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12return {
13 create = dis_mips.create_el,
14 disass = dis_mips.disass_el,
15 regname = dis_mips.regname
16}
17
diff --git a/src/jit/dis_mips64r6.lua b/src/jit/dis_mips64r6.lua
new file mode 100644
index 00000000..921b3cbe
--- /dev/null
+++ b/src/jit/dis_mips64r6.lua
@@ -0,0 +1,17 @@
1----------------------------------------------------------------------------
2-- LuaJIT MIPS64R6 disassembler wrapper module.
3--
4-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the r6 big-endian functions from the
8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12return {
13 create = dis_mips.create_r6,
14 disass = dis_mips.disass_r6,
15 regname = dis_mips.regname
16}
17
diff --git a/src/jit/dis_mips64r6el.lua b/src/jit/dis_mips64r6el.lua
new file mode 100644
index 00000000..aadef9f3
--- /dev/null
+++ b/src/jit/dis_mips64r6el.lua
@@ -0,0 +1,17 @@
1----------------------------------------------------------------------------
2-- LuaJIT MIPS64R6EL disassembler wrapper module.
3--
4-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the r6 little-endian functions from the
8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12return {
13 create = dis_mips.create_r6_el,
14 disass = dis_mips.disass_r6_el,
15 regname = dis_mips.regname
16}
17
diff --git a/src/jit/dis_mipsel.lua b/src/jit/dis_mipsel.lua
index 5f3e1402..52cebefb 100644
--- a/src/jit/dis_mipsel.lua
+++ b/src/jit/dis_mipsel.lua
@@ -8,13 +8,10 @@
8-- MIPS disassembler module. All the interesting stuff is there. 8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------ 9------------------------------------------------------------------------------
10 10
11local require = require 11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12 12return {
13module(...) 13 create = dis_mips.create_el,
14 14 disass = dis_mips.disass_el,
15local dis_mips = require(_PACKAGE.."dis_mips") 15 regname = dis_mips.regname
16 16}
17create = dis_mips.create_el
18disass = dis_mips.disass_el
19regname = dis_mips.regname
20 17
diff --git a/src/jit/dis_ppc.lua b/src/jit/dis_ppc.lua
index eda1c4f9..08d742f1 100644
--- a/src/jit/dis_ppc.lua
+++ b/src/jit/dis_ppc.lua
@@ -560,7 +560,7 @@ local function disass_block(ctx, ofs, len)
560end 560end
561 561
562-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 562-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
563local function create_(code, addr, out) 563local function create(code, addr, out)
564 local ctx = {} 564 local ctx = {}
565 ctx.code = code 565 ctx.code = code
566 ctx.addr = addr or 0 566 ctx.addr = addr or 0
@@ -572,20 +572,20 @@ local function create_(code, addr, out)
572end 572end
573 573
574-- Simple API: disassemble code (a string) at address and output via out. 574-- Simple API: disassemble code (a string) at address and output via out.
575local function disass_(code, addr, out) 575local function disass(code, addr, out)
576 create_(code, addr, out):disass() 576 create(code, addr, out):disass()
577end 577end
578 578
579-- Return register name for RID. 579-- Return register name for RID.
580local function regname_(r) 580local function regname(r)
581 if r < 32 then return map_gpr[r] end 581 if r < 32 then return map_gpr[r] end
582 return "f"..(r-32) 582 return "f"..(r-32)
583end 583end
584 584
585-- Public module functions. 585-- Public module functions.
586module(...) 586return {
587 587 create = create,
588create = create_ 588 disass = disass,
589disass = disass_ 589 regname = regname
590regname = regname_ 590}
591 591
diff --git a/src/jit/dis_x64.lua b/src/jit/dis_x64.lua
index 9222c1d9..2d37423e 100644
--- a/src/jit/dis_x64.lua
+++ b/src/jit/dis_x64.lua
@@ -8,13 +8,10 @@
8-- x86/x64 disassembler module. All the interesting stuff is there. 8-- x86/x64 disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------ 9------------------------------------------------------------------------------
10 10
11local require = require 11local dis_x86 = require((string.match(..., ".*%.") or "").."dis_x86")
12 12return {
13module(...) 13 create = dis_x86.create64,
14 14 disass = dis_x86.disass64,
15local dis_x86 = require(_PACKAGE.."dis_x86") 15 regname = dis_x86.regname64
16 16}
17create = dis_x86.create64
18disass = dis_x86.disass64
19regname = dis_x86.regname64
20 17
diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua
index f804476b..5480854c 100644
--- a/src/jit/dis_x86.lua
+++ b/src/jit/dis_x86.lua
@@ -15,19 +15,20 @@
15-- Intel and AMD manuals. The supported instruction set is quite extensive 15-- Intel and AMD manuals. The supported instruction set is quite extensive
16-- and reflects what a current generation Intel or AMD CPU implements in 16-- and reflects what a current generation Intel or AMD CPU implements in
17-- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3, 17-- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3,
18-- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM) 18-- SSE4.1, SSE4.2, SSE4a, AVX, AVX2 and even privileged and hypervisor
19-- instructions. 19-- (VMX/SVM) instructions.
20-- 20--
21-- Notes: 21-- Notes:
22-- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported. 22-- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported.
23-- * No attempt at optimization has been made -- it's fast enough for my needs. 23-- * No attempt at optimization has been made -- it's fast enough for my needs.
24-- * The public API may change when more architectures are added.
25------------------------------------------------------------------------------ 24------------------------------------------------------------------------------
26 25
27local type = type 26local type = type
28local sub, byte, format = string.sub, string.byte, string.format 27local sub, byte, format = string.sub, string.byte, string.format
29local match, gmatch, gsub = string.match, string.gmatch, string.gsub 28local match, gmatch, gsub = string.match, string.gmatch, string.gsub
30local lower, rep = string.lower, string.rep 29local lower, rep = string.lower, string.rep
30local bit = require("bit")
31local tohex = bit.tohex
31 32
32-- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on. 33-- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
33local map_opc1_32 = { 34local map_opc1_32 = {
@@ -76,7 +77,7 @@ local map_opc1_32 = {
76"movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi", 77"movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi",
77"movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI", 78"movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI",
78--Cx 79--Cx
79"shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi", 80"shift!Bmu","shift!Vmu","retBw","ret","vex*3$lesVrm","vex*2$ldsVrm","movBmi","movVmi",
80"enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS", 81"enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS",
81--Dx 82--Dx
82"shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb", 83"shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb",
@@ -101,7 +102,7 @@ local map_opc1_64 = setmetatable({
101 [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb", 102 [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb",
102 [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb", 103 [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb",
103 [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb", 104 [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb",
104 [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false, 105 [0x82]=false, [0x9a]=false, [0xc4]="vex*3", [0xc5]="vex*2", [0xce]=false,
105 [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false, 106 [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false,
106}, { __index = map_opc1_32 }) 107}, { __index = map_opc1_32 })
107 108
@@ -112,12 +113,12 @@ local map_opc2 = {
112[0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret", 113[0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret",
113"invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu", 114"invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu",
114--1x 115--1x
115"movupsXrm|movssXrm|movupdXrm|movsdXrm", 116"movupsXrm|movssXrvm|movupdXrm|movsdXrvm",
116"movupsXmr|movssXmr|movupdXmr|movsdXmr", 117"movupsXmr|movssXmvr|movupdXmr|movsdXmvr",
117"movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm", 118"movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm",
118"movlpsXmr||movlpdXmr", 119"movlpsXmr||movlpdXmr",
119"unpcklpsXrm||unpcklpdXrm", 120"unpcklpsXrvm||unpcklpdXrvm",
120"unpckhpsXrm||unpckhpdXrm", 121"unpckhpsXrvm||unpckhpdXrvm",
121"movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm", 122"movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
122"movhpsXmr||movhpdXmr", 123"movhpsXmr||movhpdXmr",
123"$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm", 124"$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
@@ -126,7 +127,7 @@ local map_opc2 = {
126"movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil, 127"movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
127"movapsXrm||movapdXrm", 128"movapsXrm||movapdXrm",
128"movapsXmr||movapdXmr", 129"movapsXmr||movapdXmr",
129"cvtpi2psXrMm|cvtsi2ssXrVmt|cvtpi2pdXrMm|cvtsi2sdXrVmt", 130"cvtpi2psXrMm|cvtsi2ssXrvVmt|cvtpi2pdXrMm|cvtsi2sdXrvVmt",
130"movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr", 131"movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr",
131"cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm", 132"cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm",
132"cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm", 133"cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm",
@@ -142,27 +143,27 @@ local map_opc2 = {
142"cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm", 143"cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm",
143--5x 144--5x
144"movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm", 145"movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm",
145"rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm", 146"rsqrtpsXrm|rsqrtssXrvm","rcppsXrm|rcpssXrvm",
146"andpsXrm||andpdXrm","andnpsXrm||andnpdXrm", 147"andpsXrvm||andpdXrvm","andnpsXrvm||andnpdXrvm",
147"orpsXrm||orpdXrm","xorpsXrm||xorpdXrm", 148"orpsXrvm||orpdXrvm","xorpsXrvm||xorpdXrvm",
148"addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm", 149"addpsXrvm|addssXrvm|addpdXrvm|addsdXrvm","mulpsXrvm|mulssXrvm|mulpdXrvm|mulsdXrvm",
149"cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm", 150"cvtps2pdXrm|cvtss2sdXrvm|cvtpd2psXrm|cvtsd2ssXrvm",
150"cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm", 151"cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm",
151"subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm", 152"subpsXrvm|subssXrvm|subpdXrvm|subsdXrvm","minpsXrvm|minssXrvm|minpdXrvm|minsdXrvm",
152"divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm", 153"divpsXrvm|divssXrvm|divpdXrvm|divsdXrvm","maxpsXrvm|maxssXrvm|maxpdXrvm|maxsdXrvm",
153--6x 154--6x
154"punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm", 155"punpcklbwPrvm","punpcklwdPrvm","punpckldqPrvm","packsswbPrvm",
155"pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm", 156"pcmpgtbPrvm","pcmpgtwPrvm","pcmpgtdPrvm","packuswbPrvm",
156"punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm", 157"punpckhbwPrvm","punpckhwdPrvm","punpckhdqPrvm","packssdwPrvm",
157"||punpcklqdqXrm","||punpckhqdqXrm", 158"||punpcklqdqXrvm","||punpckhqdqXrvm",
158"movPrVSm","movqMrm|movdquXrm|movdqaXrm", 159"movPrVSm","movqMrm|movdquXrm|movdqaXrm",
159--7x 160--7x
160"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu", 161"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pvmu",
161"pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu", 162"pshiftd!Pvmu","pshiftq!Mvmu||pshiftdq!Xvmu",
162"pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|", 163"pcmpeqbPrvm","pcmpeqwPrvm","pcmpeqdPrvm","emms*|",
163"vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$", 164"vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
164nil,nil, 165nil,nil,
165"||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm", 166"||haddpdXrvm|haddpsXrvm","||hsubpdXrvm|hsubpsXrvm",
166"movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr", 167"movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr",
167--8x 168--8x
168"joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj", 169"joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj",
@@ -180,27 +181,27 @@ nil,nil,
180"bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt", 181"bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt",
181--Cx 182--Cx
182"xaddBmr","xaddVmr", 183"xaddBmr","xaddVmr",
183"cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|", 184"cmppsXrvmu|cmpssXrvmu|cmppdXrvmu|cmpsdXrvmu","$movntiVmr|",
184"pinsrwPrWmu","pextrwDrPmu", 185"pinsrwPrvWmu","pextrwDrPmu",
185"shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp", 186"shufpsXrvmu||shufpdXrvmu","$cmpxchg!Qmp",
186"bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR", 187"bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR",
187--Dx 188--Dx
188"||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm", 189"||addsubpdXrvm|addsubpsXrvm","psrlwPrvm","psrldPrvm","psrlqPrvm",
189"paddqPrm","pmullwPrm", 190"paddqPrvm","pmullwPrvm",
190"|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm", 191"|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm",
191"psubusbPrm","psubuswPrm","pminubPrm","pandPrm", 192"psubusbPrvm","psubuswPrvm","pminubPrvm","pandPrvm",
192"paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm", 193"paddusbPrvm","padduswPrvm","pmaxubPrvm","pandnPrvm",
193--Ex 194--Ex
194"pavgbPrm","psrawPrm","psradPrm","pavgwPrm", 195"pavgbPrvm","psrawPrvm","psradPrvm","pavgwPrvm",
195"pmulhuwPrm","pmulhwPrm", 196"pmulhuwPrvm","pmulhwPrvm",
196"|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr", 197"|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr",
197"psubsbPrm","psubswPrm","pminswPrm","porPrm", 198"psubsbPrvm","psubswPrvm","pminswPrvm","porPrvm",
198"paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm", 199"paddsbPrvm","paddswPrvm","pmaxswPrvm","pxorPrvm",
199--Fx 200--Fx
200"|||lddquXrm","psllwPrm","pslldPrm","psllqPrm", 201"|||lddquXrm","psllwPrvm","pslldPrvm","psllqPrvm",
201"pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$", 202"pmuludqPrvm","pmaddwdPrvm","psadbwPrvm","maskmovqMrm||maskmovdquXrm$",
202"psubbPrm","psubwPrm","psubdPrm","psubqPrm", 203"psubbPrvm","psubwPrvm","psubdPrvm","psubqPrvm",
203"paddbPrm","paddwPrm","padddPrm","ud", 204"paddbPrvm","paddwPrvm","padddPrvm","ud",
204} 205}
205assert(map_opc2[255] == "ud") 206assert(map_opc2[255] == "ud")
206 207
@@ -208,49 +209,91 @@ assert(map_opc2[255] == "ud")
208local map_opc3 = { 209local map_opc3 = {
209["38"] = { -- [66] 0f 38 xx 210["38"] = { -- [66] 0f 38 xx
210--0x 211--0x
211[0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm", 212[0]="pshufbPrvm","phaddwPrvm","phadddPrvm","phaddswPrvm",
212"pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm", 213"pmaddubswPrvm","phsubwPrvm","phsubdPrvm","phsubswPrvm",
213"psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm", 214"psignbPrvm","psignwPrvm","psigndPrvm","pmulhrswPrvm",
214nil,nil,nil,nil, 215"||permilpsXrvm","||permilpdXrvm",nil,nil,
215--1x 216--1x
216"||pblendvbXrma",nil,nil,nil, 217"||pblendvbXrma",nil,nil,nil,
217"||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm", 218"||blendvpsXrma","||blendvpdXrma","||permpsXrvm","||ptestXrm",
218nil,nil,nil,nil, 219"||broadcastssXrm","||broadcastsdXrm","||broadcastf128XrlXm",nil,
219"pabsbPrm","pabswPrm","pabsdPrm",nil, 220"pabsbPrm","pabswPrm","pabsdPrm",nil,
220--2x 221--2x
221"||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm", 222"||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm",
222"||pmovsxwqXrm","||pmovsxdqXrm",nil,nil, 223"||pmovsxwqXrm","||pmovsxdqXrm",nil,nil,
223"||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm", 224"||pmuldqXrvm","||pcmpeqqXrvm","||$movntdqaXrm","||packusdwXrvm",
224nil,nil,nil,nil, 225"||maskmovpsXrvm","||maskmovpdXrvm","||maskmovpsXmvr","||maskmovpdXmvr",
225--3x 226--3x
226"||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm", 227"||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm",
227"||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm", 228"||pmovzxwqXrm","||pmovzxdqXrm","||permdXrvm","||pcmpgtqXrvm",
228"||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm", 229"||pminsbXrvm","||pminsdXrvm","||pminuwXrvm","||pminudXrvm",
229"||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm", 230"||pmaxsbXrvm","||pmaxsdXrvm","||pmaxuwXrvm","||pmaxudXrvm",
230--4x 231--4x
231"||pmulddXrm","||phminposuwXrm", 232"||pmulddXrvm","||phminposuwXrm",nil,nil,
233nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm",
234--5x
235[0x58] = "||pbroadcastdXrlXm",[0x59] = "||pbroadcastqXrlXm",
236[0x5a] = "||broadcasti128XrlXm",
237--7x
238[0x78] = "||pbroadcastbXrlXm",[0x79] = "||pbroadcastwXrlXm",
239--8x
240[0x8c] = "||pmaskmovXrvVSm",
241[0x8e] = "||pmaskmovVSmXvr",
242--9x
243[0x96] = "||fmaddsub132pHXrvm",[0x97] = "||fmsubadd132pHXrvm",
244[0x98] = "||fmadd132pHXrvm",[0x99] = "||fmadd132sHXrvm",
245[0x9a] = "||fmsub132pHXrvm",[0x9b] = "||fmsub132sHXrvm",
246[0x9c] = "||fnmadd132pHXrvm",[0x9d] = "||fnmadd132sHXrvm",
247[0x9e] = "||fnmsub132pHXrvm",[0x9f] = "||fnmsub132sHXrvm",
248--Ax
249[0xa6] = "||fmaddsub213pHXrvm",[0xa7] = "||fmsubadd213pHXrvm",
250[0xa8] = "||fmadd213pHXrvm",[0xa9] = "||fmadd213sHXrvm",
251[0xaa] = "||fmsub213pHXrvm",[0xab] = "||fmsub213sHXrvm",
252[0xac] = "||fnmadd213pHXrvm",[0xad] = "||fnmadd213sHXrvm",
253[0xae] = "||fnmsub213pHXrvm",[0xaf] = "||fnmsub213sHXrvm",
254--Bx
255[0xb6] = "||fmaddsub231pHXrvm",[0xb7] = "||fmsubadd231pHXrvm",
256[0xb8] = "||fmadd231pHXrvm",[0xb9] = "||fmadd231sHXrvm",
257[0xba] = "||fmsub231pHXrvm",[0xbb] = "||fmsub231sHXrvm",
258[0xbc] = "||fnmadd231pHXrvm",[0xbd] = "||fnmadd231sHXrvm",
259[0xbe] = "||fnmsub231pHXrvm",[0xbf] = "||fnmsub231sHXrvm",
260--Dx
261[0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm",
262[0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm",
232--Fx 263--Fx
233[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt", 264[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
265[0xf7] = "| sarxVrmv| shlxVrmv| shrxVrmv",
234}, 266},
235 267
236["3a"] = { -- [66] 0f 3a xx 268["3a"] = { -- [66] 0f 3a xx
237--0x 269--0x
238[0x00]=nil,nil,nil,nil,nil,nil,nil,nil, 270[0x00]="||permqXrmu","||permpdXrmu","||pblenddXrvmu",nil,
239"||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu", 271"||permilpsXrmu","||permilpdXrmu","||perm2f128Xrvmu",nil,
240"||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu", 272"||roundpsXrmu","||roundpdXrmu","||roundssXrvmu","||roundsdXrvmu",
273"||blendpsXrvmu","||blendpdXrvmu","||pblendwXrvmu","palignrPrvmu",
241--1x 274--1x
242nil,nil,nil,nil, 275nil,nil,nil,nil,
243"||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru", 276"||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru",
244nil,nil,nil,nil,nil,nil,nil,nil, 277"||insertf128XrvlXmu","||extractf128XlXmYru",nil,nil,
278nil,nil,nil,nil,
245--2x 279--2x
246"||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil, 280"||pinsrbXrvVmu","||insertpsXrvmu","||pinsrXrvVmuS",nil,
281--3x
282[0x38] = "||inserti128Xrvmu",[0x39] = "||extracti128XlXmYru",
247--4x 283--4x
248[0x40] = "||dppsXrmu", 284[0x40] = "||dppsXrvmu",
249[0x41] = "||dppdXrmu", 285[0x41] = "||dppdXrvmu",
250[0x42] = "||mpsadbwXrmu", 286[0x42] = "||mpsadbwXrvmu",
287[0x44] = "||pclmulqdqXrvmu",
288[0x46] = "||perm2i128Xrvmu",
289[0x4a] = "||blendvpsXrvmb",[0x4b] = "||blendvpdXrvmb",
290[0x4c] = "||pblendvbXrvmb",
251--6x 291--6x
252[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu", 292[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
253[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu", 293[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
294[0xdf] = "||aeskeygenassistXrmu",
295--Fx
296[0xf0] = "||| rorxVrmu",
254}, 297},
255} 298}
256 299
@@ -354,17 +397,19 @@ local map_regs = {
354 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext! 397 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext!
355 X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", 398 X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
356 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" }, 399 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" },
400 Y = { "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7",
401 "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" },
357} 402}
358local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" } 403local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" }
359 404
360-- Maps for size names. 405-- Maps for size names.
361local map_sz2n = { 406local map_sz2n = {
362 B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, 407 B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, Y = 32,
363} 408}
364local map_sz2prefix = { 409local map_sz2prefix = {
365 B = "byte", W = "word", D = "dword", 410 B = "byte", W = "word", D = "dword",
366 Q = "qword", 411 Q = "qword",
367 M = "qword", X = "xword", 412 M = "qword", X = "xword", Y = "yword",
368 F = "dword", G = "qword", -- No need for sizes/register names for these two. 413 F = "dword", G = "qword", -- No need for sizes/register names for these two.
369} 414}
370 415
@@ -387,10 +432,13 @@ local function putop(ctx, text, operands)
387 if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end 432 if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end
388 if ctx.rex then 433 if ctx.rex then
389 local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "").. 434 local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "")..
390 (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "") 435 (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")..
391 if t ~= "" then text = "rex."..t.." "..text end 436 (ctx.vexl and "l" or "")
437 if ctx.vexv and ctx.vexv ~= 0 then t = t.."v"..ctx.vexv end
438 if t ~= "" then text = ctx.rex.."."..t.." "..gsub(text, "^ ", "")
439 elseif ctx.rex == "vex" then text = gsub("v"..text, "^v ", "") end
392 ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false 440 ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
393 ctx.rex = false 441 ctx.rex = false; ctx.vexl = false; ctx.vexv = false
394 end 442 end
395 if ctx.seg then 443 if ctx.seg then
396 local text2, n = gsub(text, "%[", "["..ctx.seg..":") 444 local text2, n = gsub(text, "%[", "["..ctx.seg..":")
@@ -405,6 +453,7 @@ local function putop(ctx, text, operands)
405 end 453 end
406 ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text)) 454 ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text))
407 ctx.mrm = false 455 ctx.mrm = false
456 ctx.vexv = false
408 ctx.start = pos 457 ctx.start = pos
409 ctx.imm = nil 458 ctx.imm = nil
410end 459end
@@ -413,7 +462,7 @@ end
413local function clearprefixes(ctx) 462local function clearprefixes(ctx)
414 ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false 463 ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false
415 ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false 464 ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
416 ctx.rex = false; ctx.a32 = false 465 ctx.rex = false; ctx.a32 = false; ctx.vexl = false
417end 466end
418 467
419-- Fallback for incomplete opcodes at the end. 468-- Fallback for incomplete opcodes at the end.
@@ -450,9 +499,9 @@ end
450-- Process pattern string and generate the operands. 499-- Process pattern string and generate the operands.
451local function putpat(ctx, name, pat) 500local function putpat(ctx, name, pat)
452 local operands, regs, sz, mode, sp, rm, sc, rx, sdisp 501 local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
453 local code, pos, stop = ctx.code, ctx.pos, ctx.stop 502 local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl
454 503
455 -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz 504 -- Chars used: 1DFGHIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
456 for p in gmatch(pat, ".") do 505 for p in gmatch(pat, ".") do
457 local x = nil 506 local x = nil
458 if p == "V" or p == "U" then 507 if p == "V" or p == "U" then
@@ -467,12 +516,17 @@ local function putpat(ctx, name, pat)
467 elseif p == "B" then 516 elseif p == "B" then
468 sz = "B" 517 sz = "B"
469 regs = ctx.rex and map_regs.B64 or map_regs.B 518 regs = ctx.rex and map_regs.B64 or map_regs.B
470 elseif match(p, "[WDQMXFG]") then 519 elseif match(p, "[WDQMXYFG]") then
471 sz = p 520 sz = p
521 if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
472 regs = map_regs[sz] 522 regs = map_regs[sz]
473 elseif p == "P" then 523 elseif p == "P" then
474 sz = ctx.o16 and "X" or "M"; ctx.o16 = false 524 sz = ctx.o16 and "X" or "M"; ctx.o16 = false
525 if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
475 regs = map_regs[sz] 526 regs = map_regs[sz]
527 elseif p == "H" then
528 name = name..(ctx.rexw and "d" or "s")
529 ctx.rexw = false
476 elseif p == "S" then 530 elseif p == "S" then
477 name = name..lower(sz) 531 name = name..lower(sz)
478 elseif p == "s" then 532 elseif p == "s" then
@@ -484,6 +538,10 @@ local function putpat(ctx, name, pat)
484 local imm = getimm(ctx, pos, 1); if not imm then return end 538 local imm = getimm(ctx, pos, 1); if not imm then return end
485 x = format("0x%02x", imm) 539 x = format("0x%02x", imm)
486 pos = pos+1 540 pos = pos+1
541 elseif p == "b" then
542 local imm = getimm(ctx, pos, 1); if not imm then return end
543 x = regs[imm/16+1]
544 pos = pos+1
487 elseif p == "w" then 545 elseif p == "w" then
488 local imm = getimm(ctx, pos, 2); if not imm then return end 546 local imm = getimm(ctx, pos, 2); if not imm then return end
489 x = format("0x%x", imm) 547 x = format("0x%x", imm)
@@ -532,7 +590,7 @@ local function putpat(ctx, name, pat)
532 local lo = imm % 0x1000000 590 local lo = imm % 0x1000000
533 x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo) 591 x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
534 else 592 else
535 x = format("0x%08x", imm) 593 x = "0x"..tohex(imm)
536 end 594 end
537 elseif p == "R" then 595 elseif p == "R" then
538 local r = byte(code, pos-1, pos-1)%8 596 local r = byte(code, pos-1, pos-1)%8
@@ -616,8 +674,13 @@ local function putpat(ctx, name, pat)
616 else 674 else
617 x = "CR"..sp 675 x = "CR"..sp
618 end 676 end
677 elseif p == "v" then
678 if ctx.vexv then
679 x = regs[ctx.vexv+1]; ctx.vexv = false
680 end
619 elseif p == "y" then x = "DR"..sp 681 elseif p == "y" then x = "DR"..sp
620 elseif p == "z" then x = "TR"..sp 682 elseif p == "z" then x = "TR"..sp
683 elseif p == "l" then vexl = false
621 elseif p == "t" then 684 elseif p == "t" then
622 else 685 else
623 error("bad pattern `"..pat.."'") 686 error("bad pattern `"..pat.."'")
@@ -692,7 +755,8 @@ map_act = {
692 B = putpat, W = putpat, D = putpat, Q = putpat, 755 B = putpat, W = putpat, D = putpat, Q = putpat,
693 V = putpat, U = putpat, T = putpat, 756 V = putpat, U = putpat, T = putpat,
694 M = putpat, X = putpat, P = putpat, 757 M = putpat, X = putpat, P = putpat,
695 F = putpat, G = putpat, 758 F = putpat, G = putpat, Y = putpat,
759 H = putpat,
696 760
697 -- Collect prefixes. 761 -- Collect prefixes.
698 [":"] = function(ctx, name, pat) 762 [":"] = function(ctx, name, pat)
@@ -753,15 +817,68 @@ map_act = {
753 817
754 -- REX prefix. 818 -- REX prefix.
755 rex = function(ctx, name, pat) 819 rex = function(ctx, name, pat)
756 if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed. 820 if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
757 for p in gmatch(pat, ".") do ctx["rex"..p] = true end 821 for p in gmatch(pat, ".") do ctx["rex"..p] = true end
758 ctx.rex = true 822 ctx.rex = "rex"
823 end,
824
825 -- VEX prefix.
826 vex = function(ctx, name, pat)
827 if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
828 ctx.rex = "vex"
829 local pos = ctx.pos
830 if ctx.mrm then
831 ctx.mrm = nil
832 pos = pos-1
833 end
834 local b = byte(ctx.code, pos, pos)
835 if not b then return incomplete(ctx) end
836 pos = pos+1
837 if b < 128 then ctx.rexr = true end
838 local m = 1
839 if pat == "3" then
840 m = b%32; b = (b-m)/32
841 local nb = b%2; b = (b-nb)/2
842 if nb == 0 then ctx.rexb = true end
843 local nx = b%2
844 if nx == 0 then ctx.rexx = true end
845 b = byte(ctx.code, pos, pos)
846 if not b then return incomplete(ctx) end
847 pos = pos+1
848 if b >= 128 then ctx.rexw = true end
849 end
850 ctx.pos = pos
851 local map
852 if m == 1 then map = map_opc2
853 elseif m == 2 then map = map_opc3["38"]
854 elseif m == 3 then map = map_opc3["3a"]
855 else return unknown(ctx) end
856 local p = b%4; b = (b-p)/4
857 if p == 1 then ctx.o16 = "o16"
858 elseif p == 2 then ctx.rep = "rep"
859 elseif p == 3 then ctx.rep = "repne" end
860 local l = b%2; b = (b-l)/2
861 if l ~= 0 then ctx.vexl = true end
862 ctx.vexv = (-1-b)%16
863 return dispatchmap(ctx, map)
759 end, 864 end,
760 865
761 -- Special case for nop with REX prefix. 866 -- Special case for nop with REX prefix.
762 nop = function(ctx, name, pat) 867 nop = function(ctx, name, pat)
763 return dispatch(ctx, ctx.rex and pat or "nop") 868 return dispatch(ctx, ctx.rex and pat or "nop")
764 end, 869 end,
870
871 -- Special case for 0F 77.
872 emms = function(ctx, name, pat)
873 if ctx.rex ~= "vex" then
874 return putop(ctx, "emms")
875 elseif ctx.vexl then
876 ctx.vexl = false
877 return putop(ctx, "zeroall")
878 else
879 return putop(ctx, "zeroupper")
880 end
881 end,
765} 882}
766 883
767------------------------------------------------------------------------------ 884------------------------------------------------------------------------------
@@ -782,7 +899,7 @@ local function disass_block(ctx, ofs, len)
782end 899end
783 900
784-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 901-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
785local function create_(code, addr, out) 902local function create(code, addr, out)
786 local ctx = {} 903 local ctx = {}
787 ctx.code = code 904 ctx.code = code
788 ctx.addr = (addr or 0) - 1 905 ctx.addr = (addr or 0) - 1
@@ -796,8 +913,8 @@ local function create_(code, addr, out)
796 return ctx 913 return ctx
797end 914end
798 915
799local function create64_(code, addr, out) 916local function create64(code, addr, out)
800 local ctx = create_(code, addr, out) 917 local ctx = create(code, addr, out)
801 ctx.x64 = true 918 ctx.x64 = true
802 ctx.map1 = map_opc1_64 919 ctx.map1 = map_opc1_64
803 ctx.aregs = map_regs.Q 920 ctx.aregs = map_regs.Q
@@ -805,32 +922,32 @@ local function create64_(code, addr, out)
805end 922end
806 923
807-- Simple API: disassemble code (a string) at address and output via out. 924-- Simple API: disassemble code (a string) at address and output via out.
808local function disass_(code, addr, out) 925local function disass(code, addr, out)
809 create_(code, addr, out):disass() 926 create(code, addr, out):disass()
810end 927end
811 928
812local function disass64_(code, addr, out) 929local function disass64(code, addr, out)
813 create64_(code, addr, out):disass() 930 create64(code, addr, out):disass()
814end 931end
815 932
816-- Return register name for RID. 933-- Return register name for RID.
817local function regname_(r) 934local function regname(r)
818 if r < 8 then return map_regs.D[r+1] end 935 if r < 8 then return map_regs.D[r+1] end
819 return map_regs.X[r-7] 936 return map_regs.X[r-7]
820end 937end
821 938
822local function regname64_(r) 939local function regname64(r)
823 if r < 16 then return map_regs.Q[r+1] end 940 if r < 16 then return map_regs.Q[r+1] end
824 return map_regs.X[r-15] 941 return map_regs.X[r-15]
825end 942end
826 943
827-- Public module functions. 944-- Public module functions.
828module(...) 945return {
829 946 create = create,
830create = create_ 947 create64 = create64,
831create64 = create64_ 948 disass = disass,
832disass = disass_ 949 disass64 = disass64,
833disass64 = disass64_ 950 regname = regname,
834regname = regname_ 951 regname64 = regname64
835regname64 = regname64_ 952}
836 953
diff --git a/src/jit/dump.lua b/src/jit/dump.lua
index 22568ac7..03140196 100644
--- a/src/jit/dump.lua
+++ b/src/jit/dump.lua
@@ -55,7 +55,7 @@
55 55
56-- Cache some library functions and objects. 56-- Cache some library functions and objects.
57local jit = require("jit") 57local jit = require("jit")
58assert(jit.version_num == 20005, "LuaJIT core/library version mismatch") 58assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
59local jutil = require("jit.util") 59local jutil = require("jit.util")
60local vmdef = require("jit.vmdef") 60local vmdef = require("jit.vmdef")
61local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc 61local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc
@@ -63,7 +63,7 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek
63local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap 63local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
64local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr 64local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
65local bit = require("bit") 65local bit = require("bit")
66local band, shr = bit.band, bit.rshift 66local band, shr, tohex = bit.band, bit.rshift, bit.tohex
67local sub, gsub, format = string.sub, string.gsub, string.format 67local sub, gsub, format = string.sub, string.gsub, string.format
68local byte, rep = string.byte, string.rep 68local byte, rep = string.byte, string.rep
69local type, tostring = type, tostring 69local type, tostring = type, tostring
@@ -85,12 +85,13 @@ local nexitsym = 0
85local function fillsymtab_tr(tr, nexit) 85local function fillsymtab_tr(tr, nexit)
86 local t = {} 86 local t = {}
87 symtabmt.__index = t 87 symtabmt.__index = t
88 if jit.arch == "mips" or jit.arch == "mipsel" then 88 if jit.arch:sub(1, 4) == "mips" then
89 t[traceexitstub(tr, 0)] = "exit" 89 t[traceexitstub(tr, 0)] = "exit"
90 return 90 return
91 end 91 end
92 for i=0,nexit-1 do 92 for i=0,nexit-1 do
93 local addr = traceexitstub(tr, i) 93 local addr = traceexitstub(tr, i)
94 if addr < 0 then addr = addr + 2^32 end
94 t[addr] = tostring(i) 95 t[addr] = tostring(i)
95 end 96 end
96 local addr = traceexitstub(tr, nexit) 97 local addr = traceexitstub(tr, nexit)
@@ -104,7 +105,10 @@ local function fillsymtab(tr, nexit)
104 local ircall = vmdef.ircall 105 local ircall = vmdef.ircall
105 for i=0,#ircall do 106 for i=0,#ircall do
106 local addr = ircalladdr(i) 107 local addr = ircalladdr(i)
107 if addr ~= 0 then t[addr] = ircall[i] end 108 if addr ~= 0 then
109 if addr < 0 then addr = addr + 2^32 end
110 t[addr] = ircall[i]
111 end
108 end 112 end
109 end 113 end
110 if nexitsym == 1000000 then -- Per-trace exit stubs. 114 if nexitsym == 1000000 then -- Per-trace exit stubs.
@@ -118,6 +122,7 @@ local function fillsymtab(tr, nexit)
118 nexit = 1000000 122 nexit = 1000000
119 break 123 break
120 end 124 end
125 if addr < 0 then addr = addr + 2^32 end
121 t[addr] = tostring(i) 126 t[addr] = tostring(i)
122 end 127 end
123 nexitsym = nexit 128 nexitsym = nexit
@@ -136,6 +141,7 @@ local function dump_mcode(tr)
136 local mcode, addr, loop = tracemc(tr) 141 local mcode, addr, loop = tracemc(tr)
137 if not mcode then return end 142 if not mcode then return end
138 if not disass then disass = require("jit.dis_"..jit.arch) end 143 if not disass then disass = require("jit.dis_"..jit.arch) end
144 if addr < 0 then addr = addr + 2^32 end
139 out:write("---- TRACE ", tr, " mcode ", #mcode, "\n") 145 out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
140 local ctx = disass.create(mcode, addr, dumpwrite) 146 local ctx = disass.create(mcode, addr, dumpwrite)
141 ctx.hexdump = 0 147 ctx.hexdump = 0
@@ -270,16 +276,20 @@ local litname = {
270 ["CONV "] = setmetatable({}, { __index = function(t, mode) 276 ["CONV "] = setmetatable({}, { __index = function(t, mode)
271 local s = irtype[band(mode, 31)] 277 local s = irtype[band(mode, 31)]
272 s = irtype[band(shr(mode, 5), 31)].."."..s 278 s = irtype[band(shr(mode, 5), 31)].."."..s
273 if band(mode, 0x400) ~= 0 then s = s.." trunc" 279 if band(mode, 0x800) ~= 0 then s = s.." sext" end
274 elseif band(mode, 0x800) ~= 0 then s = s.." sext" end
275 local c = shr(mode, 12) 280 local c = shr(mode, 12)
276 if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end 281 if c == 1 then s = s.." none"
282 elseif c == 2 then s = s.." index"
283 elseif c == 3 then s = s.." check" end
277 t[mode] = s 284 t[mode] = s
278 return s 285 return s
279 end}), 286 end}),
280 ["FLOAD "] = vmdef.irfield, 287 ["FLOAD "] = vmdef.irfield,
281 ["FREF "] = vmdef.irfield, 288 ["FREF "] = vmdef.irfield,
282 ["FPMATH"] = vmdef.irfpm, 289 ["FPMATH"] = vmdef.irfpm,
290 ["TMPREF"] = { [0] = "", "IN", "OUT", "INOUT", "", "", "OUT2", "INOUT2" },
291 ["BUFHDR"] = { [0] = "RESET", "APPEND", "WRITE" },
292 ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" },
283} 293}
284 294
285local function ctlsub(c) 295local function ctlsub(c)
@@ -303,15 +313,19 @@ local function fmtfunc(func, pc)
303 end 313 end
304end 314end
305 315
306local function formatk(tr, idx) 316local function formatk(tr, idx, sn)
307 local k, t, slot = tracek(tr, idx) 317 local k, t, slot = tracek(tr, idx)
308 local tn = type(k) 318 local tn = type(k)
309 local s 319 local s
310 if tn == "number" then 320 if tn == "number" then
311 if k == 2^52+2^51 then 321 if t < 12 then
322 s = k == 0 and "NULL" or format("[0x%08x]", k)
323 elseif band(sn or 0, 0x30000) ~= 0 then
324 s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz"
325 elseif k == 2^52+2^51 then
312 s = "bias" 326 s = "bias"
313 else 327 else
314 s = format("%+.14g", k) 328 s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k)
315 end 329 end
316 elseif tn == "string" then 330 elseif tn == "string" then
317 s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub)) 331 s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub))
@@ -329,6 +343,8 @@ local function formatk(tr, idx)
329 elseif t == 21 then -- int64_t 343 elseif t == 21 then -- int64_t
330 s = sub(tostring(k), 1, -3) 344 s = sub(tostring(k), 1, -3)
331 if sub(s, 1, 1) ~= "-" then s = "+"..s end 345 if sub(s, 1, 1) ~= "-" then s = "+"..s end
346 elseif sn == 0x1057fff then -- SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL)
347 return "----" -- Special case for LJ_FR2 slot 1.
332 else 348 else
333 s = tostring(k) -- For primitives. 349 s = tostring(k) -- For primitives.
334 end 350 end
@@ -347,7 +363,7 @@ local function printsnap(tr, snap)
347 n = n + 1 363 n = n + 1
348 local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS 364 local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS
349 if ref < 0 then 365 if ref < 0 then
350 out:write(formatk(tr, ref)) 366 out:write(formatk(tr, ref, sn))
351 elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM 367 elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM
352 out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) 368 out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
353 else 369 else
@@ -545,7 +561,7 @@ local function dump_trace(what, tr, func, pc, otr, oex)
545 if what == "start" then 561 if what == "start" then
546 if dumpmode.H then out:write('<pre class="ljdump">\n') end 562 if dumpmode.H then out:write('<pre class="ljdump">\n') end
547 out:write("---- TRACE ", tr, " ", what) 563 out:write("---- TRACE ", tr, " ", what)
548 if otr then out:write(" ", otr, "/", oex) end 564 if otr then out:write(" ", otr, "/", oex == -1 and "stitch" or oex) end
549 out:write(" ", fmtfunc(func, pc), "\n") 565 out:write(" ", fmtfunc(func, pc), "\n")
550 elseif what == "stop" or what == "abort" then 566 elseif what == "stop" or what == "abort" then
551 out:write("---- TRACE ", tr, " ", what) 567 out:write("---- TRACE ", tr, " ", what)
@@ -595,23 +611,26 @@ end
595 611
596------------------------------------------------------------------------------ 612------------------------------------------------------------------------------
597 613
614local gpr64 = jit.arch:match("64")
615local fprmips32 = jit.arch == "mips" or jit.arch == "mipsel"
616
598-- Dump taken trace exits. 617-- Dump taken trace exits.
599local function dump_texit(tr, ex, ngpr, nfpr, ...) 618local function dump_texit(tr, ex, ngpr, nfpr, ...)
600 out:write("---- TRACE ", tr, " exit ", ex, "\n") 619 out:write("---- TRACE ", tr, " exit ", ex, "\n")
601 if dumpmode.X then 620 if dumpmode.X then
602 local regs = {...} 621 local regs = {...}
603 if jit.arch == "x64" then 622 if gpr64 then
604 for i=1,ngpr do 623 for i=1,ngpr do
605 out:write(format(" %016x", regs[i])) 624 out:write(format(" %016x", regs[i]))
606 if i % 4 == 0 then out:write("\n") end 625 if i % 4 == 0 then out:write("\n") end
607 end 626 end
608 else 627 else
609 for i=1,ngpr do 628 for i=1,ngpr do
610 out:write(format(" %08x", regs[i])) 629 out:write(" ", tohex(regs[i]))
611 if i % 8 == 0 then out:write("\n") end 630 if i % 8 == 0 then out:write("\n") end
612 end 631 end
613 end 632 end
614 if jit.arch == "mips" or jit.arch == "mipsel" then 633 if fprmips32 then
615 for i=1,nfpr,2 do 634 for i=1,nfpr,2 do
616 out:write(format(" %+17.14g", regs[ngpr+i])) 635 out:write(format(" %+17.14g", regs[ngpr+i]))
617 if i % 8 == 7 then out:write("\n") end 636 if i % 8 == 7 then out:write("\n") end
@@ -692,9 +711,9 @@ local function dumpon(opt, outfile)
692end 711end
693 712
694-- Public module functions. 713-- Public module functions.
695module(...) 714return {
696 715 on = dumpon,
697on = dumpon 716 off = dumpoff,
698off = dumpoff 717 start = dumpon -- For -j command line option.
699start = dumpon -- For -j command line option. 718}
700 719
diff --git a/src/jit/p.lua b/src/jit/p.lua
new file mode 100644
index 00000000..c9ec1d8b
--- /dev/null
+++ b/src/jit/p.lua
@@ -0,0 +1,312 @@
1----------------------------------------------------------------------------
2-- LuaJIT profiler.
3--
4-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7--
8-- This module is a simple command line interface to the built-in
9-- low-overhead profiler of LuaJIT.
10--
11-- The lower-level API of the profiler is accessible via the "jit.profile"
12-- module or the luaJIT_profile_* C API.
13--
14-- Example usage:
15--
16-- luajit -jp myapp.lua
17-- luajit -jp=s myapp.lua
18-- luajit -jp=-s myapp.lua
19-- luajit -jp=vl myapp.lua
20-- luajit -jp=G,profile.txt myapp.lua
21--
22-- The following dump features are available:
23--
24-- f Stack dump: function name, otherwise module:line. Default mode.
25-- F Stack dump: ditto, but always prepend module.
26-- l Stack dump: module:line.
27-- <number> stack dump depth (callee < caller). Default: 1.
28-- -<number> Inverse stack dump depth (caller > callee).
29-- s Split stack dump after first stack level. Implies abs(depth) >= 2.
30-- p Show full path for module names.
31-- v Show VM states. Can be combined with stack dumps, e.g. vf or fv.
32-- z Show zones. Can be combined with stack dumps, e.g. zf or fz.
33-- r Show raw sample counts. Default: show percentages.
34-- a Annotate excerpts from source code files.
35-- A Annotate complete source code files.
36-- G Produce raw output suitable for graphical tools (e.g. flame graphs).
37-- m<number> Minimum sample percentage to be shown. Default: 3.
38-- i<number> Sampling interval in milliseconds. Default: 10.
39--
40----------------------------------------------------------------------------
41
42-- Cache some library functions and objects.
43local jit = require("jit")
44assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
45local profile = require("jit.profile")
46local vmdef = require("jit.vmdef")
47local math = math
48local pairs, ipairs, tonumber, floor = pairs, ipairs, tonumber, math.floor
49local sort, format = table.sort, string.format
50local stdout = io.stdout
51local zone -- Load jit.zone module on demand.
52
53-- Output file handle.
54local out
55
56------------------------------------------------------------------------------
57
58local prof_ud
59local prof_states, prof_split, prof_min, prof_raw, prof_fmt, prof_depth
60local prof_ann, prof_count1, prof_count2, prof_samples
61
62local map_vmmode = {
63 N = "Compiled",
64 I = "Interpreted",
65 C = "C code",
66 G = "Garbage Collector",
67 J = "JIT Compiler",
68}
69
70-- Profiler callback.
71local function prof_cb(th, samples, vmmode)
72 prof_samples = prof_samples + samples
73 local key_stack, key_stack2, key_state
74 -- Collect keys for sample.
75 if prof_states then
76 if prof_states == "v" then
77 key_state = map_vmmode[vmmode] or vmmode
78 else
79 key_state = zone:get() or "(none)"
80 end
81 end
82 if prof_fmt then
83 key_stack = profile.dumpstack(th, prof_fmt, prof_depth)
84 key_stack = key_stack:gsub("%[builtin#(%d+)%]", function(x)
85 return vmdef.ffnames[tonumber(x)]
86 end)
87 if prof_split == 2 then
88 local k1, k2 = key_stack:match("(.-) [<>] (.*)")
89 if k2 then key_stack, key_stack2 = k1, k2 end
90 elseif prof_split == 3 then
91 key_stack2 = profile.dumpstack(th, "l", 1)
92 end
93 end
94 -- Order keys.
95 local k1, k2
96 if prof_split == 1 then
97 if key_state then
98 k1 = key_state
99 if key_stack then k2 = key_stack end
100 end
101 elseif key_stack then
102 k1 = key_stack
103 if key_stack2 then k2 = key_stack2 elseif key_state then k2 = key_state end
104 end
105 -- Coalesce samples in one or two levels.
106 if k1 then
107 local t1 = prof_count1
108 t1[k1] = (t1[k1] or 0) + samples
109 if k2 then
110 local t2 = prof_count2
111 local t3 = t2[k1]
112 if not t3 then t3 = {}; t2[k1] = t3 end
113 t3[k2] = (t3[k2] or 0) + samples
114 end
115 end
116end
117
118------------------------------------------------------------------------------
119
120-- Show top N list.
121local function prof_top(count1, count2, samples, indent)
122 local t, n = {}, 0
123 for k in pairs(count1) do
124 n = n + 1
125 t[n] = k
126 end
127 sort(t, function(a, b) return count1[a] > count1[b] end)
128 for i=1,n do
129 local k = t[i]
130 local v = count1[k]
131 local pct = floor(v*100/samples + 0.5)
132 if pct < prof_min then break end
133 if not prof_raw then
134 out:write(format("%s%2d%% %s\n", indent, pct, k))
135 elseif prof_raw == "r" then
136 out:write(format("%s%5d %s\n", indent, v, k))
137 else
138 out:write(format("%s %d\n", k, v))
139 end
140 if count2 then
141 local r = count2[k]
142 if r then
143 prof_top(r, nil, v, (prof_split == 3 or prof_split == 1) and " -- " or
144 (prof_depth < 0 and " -> " or " <- "))
145 end
146 end
147 end
148end
149
150-- Annotate source code
151local function prof_annotate(count1, samples)
152 local files = {}
153 local ms = 0
154 for k, v in pairs(count1) do
155 local pct = floor(v*100/samples + 0.5)
156 ms = math.max(ms, v)
157 if pct >= prof_min then
158 local file, line = k:match("^(.*):(%d+)$")
159 if not file then file = k; line = 0 end
160 local fl = files[file]
161 if not fl then fl = {}; files[file] = fl; files[#files+1] = file end
162 line = tonumber(line)
163 fl[line] = prof_raw and v or pct
164 end
165 end
166 sort(files)
167 local fmtv, fmtn = " %3d%% | %s\n", " | %s\n"
168 if prof_raw then
169 local n = math.max(5, math.ceil(math.log10(ms)))
170 fmtv = "%"..n.."d | %s\n"
171 fmtn = (" "):rep(n).." | %s\n"
172 end
173 local ann = prof_ann
174 for _, file in ipairs(files) do
175 local f0 = file:byte()
176 if f0 == 40 or f0 == 91 then
177 out:write(format("\n====== %s ======\n[Cannot annotate non-file]\n", file))
178 break
179 end
180 local fp, err = io.open(file)
181 if not fp then
182 out:write(format("====== ERROR: %s: %s\n", file, err))
183 break
184 end
185 out:write(format("\n====== %s ======\n", file))
186 local fl = files[file]
187 local n, show = 1, false
188 if ann ~= 0 then
189 for i=1,ann do
190 if fl[i] then show = true; out:write("@@ 1 @@\n"); break end
191 end
192 end
193 for line in fp:lines() do
194 if line:byte() == 27 then
195 out:write("[Cannot annotate bytecode file]\n")
196 break
197 end
198 local v = fl[n]
199 if ann ~= 0 then
200 local v2 = fl[n+ann]
201 if show then
202 if v2 then show = n+ann elseif v then show = n
203 elseif show+ann < n then show = false end
204 elseif v2 then
205 show = n+ann
206 out:write(format("@@ %d @@\n", n))
207 end
208 if not show then goto next end
209 end
210 if v then
211 out:write(format(fmtv, v, line))
212 else
213 out:write(format(fmtn, line))
214 end
215 ::next::
216 n = n + 1
217 end
218 fp:close()
219 end
220end
221
222------------------------------------------------------------------------------
223
224-- Finish profiling and dump result.
225local function prof_finish()
226 if prof_ud then
227 profile.stop()
228 local samples = prof_samples
229 if samples == 0 then
230 if prof_raw ~= true then out:write("[No samples collected]\n") end
231 return
232 end
233 if prof_ann then
234 prof_annotate(prof_count1, samples)
235 else
236 prof_top(prof_count1, prof_count2, samples, "")
237 end
238 prof_count1 = nil
239 prof_count2 = nil
240 prof_ud = nil
241 if out ~= stdout then out:close() end
242 end
243end
244
245-- Start profiling.
246local function prof_start(mode)
247 local interval = ""
248 mode = mode:gsub("i%d*", function(s) interval = s; return "" end)
249 prof_min = 3
250 mode = mode:gsub("m(%d+)", function(s) prof_min = tonumber(s); return "" end)
251 prof_depth = 1
252 mode = mode:gsub("%-?%d+", function(s) prof_depth = tonumber(s); return "" end)
253 local m = {}
254 for c in mode:gmatch(".") do m[c] = c end
255 prof_states = m.z or m.v
256 if prof_states == "z" then zone = require("jit.zone") end
257 local scope = m.l or m.f or m.F or (prof_states and "" or "f")
258 local flags = (m.p or "")
259 prof_raw = m.r
260 if m.s then
261 prof_split = 2
262 if prof_depth == -1 or m["-"] then prof_depth = -2
263 elseif prof_depth == 1 then prof_depth = 2 end
264 elseif mode:find("[fF].*l") then
265 scope = "l"
266 prof_split = 3
267 else
268 prof_split = (scope == "" or mode:find("[zv].*[lfF]")) and 1 or 0
269 end
270 prof_ann = m.A and 0 or (m.a and 3)
271 if prof_ann then
272 scope = "l"
273 prof_fmt = "pl"
274 prof_split = 0
275 prof_depth = 1
276 elseif m.G and scope ~= "" then
277 prof_fmt = flags..scope.."Z;"
278 prof_depth = -100
279 prof_raw = true
280 prof_min = 0
281 elseif scope == "" then
282 prof_fmt = false
283 else
284 local sc = prof_split == 3 and m.f or m.F or scope
285 prof_fmt = flags..sc..(prof_depth >= 0 and "Z < " or "Z > ")
286 end
287 prof_count1 = {}
288 prof_count2 = {}
289 prof_samples = 0
290 profile.start(scope:lower()..interval, prof_cb)
291 prof_ud = newproxy(true)
292 getmetatable(prof_ud).__gc = prof_finish
293end
294
295------------------------------------------------------------------------------
296
297local function start(mode, outfile)
298 if not outfile then outfile = os.getenv("LUAJIT_PROFILEFILE") end
299 if outfile then
300 out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
301 else
302 out = stdout
303 end
304 prof_start(mode or "f")
305end
306
307-- Public module functions.
308return {
309 start = start, -- For -j command line option.
310 stop = prof_finish
311}
312
diff --git a/src/jit/v.lua b/src/jit/v.lua
index a39bd203..83589143 100644
--- a/src/jit/v.lua
+++ b/src/jit/v.lua
@@ -59,7 +59,7 @@
59 59
60-- Cache some library functions and objects. 60-- Cache some library functions and objects.
61local jit = require("jit") 61local jit = require("jit")
62assert(jit.version_num == 20005, "LuaJIT core/library version mismatch") 62assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
63local jutil = require("jit.util") 63local jutil = require("jit.util")
64local vmdef = require("jit.vmdef") 64local vmdef = require("jit.vmdef")
65local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo 65local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
@@ -99,7 +99,7 @@ end
99local function dump_trace(what, tr, func, pc, otr, oex) 99local function dump_trace(what, tr, func, pc, otr, oex)
100 if what == "start" then 100 if what == "start" then
101 startloc = fmtfunc(func, pc) 101 startloc = fmtfunc(func, pc)
102 startex = otr and "("..otr.."/"..oex..") " or "" 102 startex = otr and "("..otr.."/"..(oex == -1 and "stitch" or oex)..") " or ""
103 else 103 else
104 if what == "abort" then 104 if what == "abort" then
105 local loc = fmtfunc(func, pc) 105 local loc = fmtfunc(func, pc)
@@ -116,6 +116,9 @@ local function dump_trace(what, tr, func, pc, otr, oex)
116 if ltype == "interpreter" then 116 if ltype == "interpreter" then
117 out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n", 117 out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n",
118 tr, startex, startloc)) 118 tr, startex, startloc))
119 elseif ltype == "stitch" then
120 out:write(format("[TRACE %3s %s%s %s %s]\n",
121 tr, startex, startloc, ltype, fmtfunc(func, pc)))
119 elseif link == tr or link == 0 then 122 elseif link == tr or link == 0 then
120 out:write(format("[TRACE %3s %s%s %s]\n", 123 out:write(format("[TRACE %3s %s%s %s]\n",
121 tr, startex, startloc, ltype)) 124 tr, startex, startloc, ltype))
@@ -159,9 +162,9 @@ local function dumpon(outfile)
159end 162end
160 163
161-- Public module functions. 164-- Public module functions.
162module(...) 165return {
163 166 on = dumpon,
164on = dumpon 167 off = dumpoff,
165off = dumpoff 168 start = dumpon -- For -j command line option.
166start = dumpon -- For -j command line option. 169}
167 170
diff --git a/src/jit/zone.lua b/src/jit/zone.lua
new file mode 100644
index 00000000..94357854
--- /dev/null
+++ b/src/jit/zone.lua
@@ -0,0 +1,45 @@
1----------------------------------------------------------------------------
2-- LuaJIT profiler zones.
3--
4-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7--
8-- This module implements a simple hierarchical zone model.
9--
10-- Example usage:
11--
12-- local zone = require("jit.zone")
13-- zone("AI")
14-- ...
15-- zone("A*")
16-- ...
17-- print(zone:get()) --> "A*"
18-- ...
19-- zone()
20-- ...
21-- print(zone:get()) --> "AI"
22-- ...
23-- zone()
24--
25----------------------------------------------------------------------------
26
27local remove = table.remove
28
29return setmetatable({
30 flush = function(t)
31 for i=#t,1,-1 do t[i] = nil end
32 end,
33 get = function(t)
34 return t[#t]
35 end
36}, {
37 __call = function(t, zone)
38 if zone then
39 t[#t+1] = zone
40 else
41 return (assert(remove(t), "empty zone stack"))
42 end
43 end
44})
45
diff --git a/src/lauxlib.h b/src/lauxlib.h
index fed1491b..a44f0272 100644
--- a/src/lauxlib.h
+++ b/src/lauxlib.h
@@ -15,9 +15,6 @@
15#include "lua.h" 15#include "lua.h"
16 16
17 17
18#define luaL_getn(L,i) ((int)lua_objlen(L, i))
19#define luaL_setn(L,i,j) ((void)0) /* no op! */
20
21/* extra error code for `luaL_load' */ 18/* extra error code for `luaL_load' */
22#define LUA_ERRFILE (LUA_ERRERR+1) 19#define LUA_ERRFILE (LUA_ERRERR+1)
23 20
@@ -58,6 +55,10 @@ LUALIB_API int (luaL_error) (lua_State *L, const char *fmt, ...);
58LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def, 55LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def,
59 const char *const lst[]); 56 const char *const lst[]);
60 57
58/* pre-defined references */
59#define LUA_NOREF (-2)
60#define LUA_REFNIL (-1)
61
61LUALIB_API int (luaL_ref) (lua_State *L, int t); 62LUALIB_API int (luaL_ref) (lua_State *L, int t);
62LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref); 63LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref);
63 64
@@ -84,6 +85,11 @@ LUALIB_API int (luaL_loadbufferx) (lua_State *L, const char *buff, size_t sz,
84 const char *name, const char *mode); 85 const char *name, const char *mode);
85LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, 86LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg,
86 int level); 87 int level);
88LUALIB_API void (luaL_setfuncs) (lua_State *L, const luaL_Reg *l, int nup);
89LUALIB_API void (luaL_pushmodule) (lua_State *L, const char *modname,
90 int sizehint);
91LUALIB_API void *(luaL_testudata) (lua_State *L, int ud, const char *tname);
92LUALIB_API void (luaL_setmetatable) (lua_State *L, const char *tname);
87 93
88 94
89/* 95/*
@@ -113,6 +119,11 @@ LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg,
113 119
114#define luaL_opt(L,f,n,d) (lua_isnoneornil(L,(n)) ? (d) : f(L,(n))) 120#define luaL_opt(L,f,n,d) (lua_isnoneornil(L,(n)) ? (d) : f(L,(n)))
115 121
122/* From Lua 5.2. */
123#define luaL_newlibtable(L, l) \
124 lua_createtable(L, 0, sizeof(l)/sizeof((l)[0]) - 1)
125#define luaL_newlib(L, l) (luaL_newlibtable(L, l), luaL_setfuncs(L, l, 0))
126
116/* 127/*
117** {====================================================== 128** {======================================================
118** Generic Buffer manipulation 129** Generic Buffer manipulation
@@ -147,21 +158,4 @@ LUALIB_API void (luaL_pushresult) (luaL_Buffer *B);
147 158
148/* }====================================================== */ 159/* }====================================================== */
149 160
150
151/* compatibility with ref system */
152
153/* pre-defined references */
154#define LUA_NOREF (-2)
155#define LUA_REFNIL (-1)
156
157#define lua_ref(L,lock) ((lock) ? luaL_ref(L, LUA_REGISTRYINDEX) : \
158 (lua_pushstring(L, "unlocked references are obsolete"), lua_error(L), 0))
159
160#define lua_unref(L,ref) luaL_unref(L, LUA_REGISTRYINDEX, (ref))
161
162#define lua_getref(L,ref) lua_rawgeti(L, LUA_REGISTRYINDEX, (ref))
163
164
165#define luaL_reg luaL_Reg
166
167#endif 161#endif
diff --git a/src/lib_aux.c b/src/lib_aux.c
index dde6b433..4ef55581 100644
--- a/src/lib_aux.c
+++ b/src/lib_aux.c
@@ -107,38 +107,36 @@ LUALIB_API const char *luaL_findtable(lua_State *L, int idx,
107static int libsize(const luaL_Reg *l) 107static int libsize(const luaL_Reg *l)
108{ 108{
109 int size = 0; 109 int size = 0;
110 for (; l->name; l++) size++; 110 for (; l && l->name; l++) size++;
111 return size; 111 return size;
112} 112}
113 113
114LUALIB_API void luaL_pushmodule(lua_State *L, const char *modname, int sizehint)
115{
116 luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16);
117 lua_getfield(L, -1, modname);
118 if (!lua_istable(L, -1)) {
119 lua_pop(L, 1);
120 if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, sizehint) != NULL)
121 lj_err_callerv(L, LJ_ERR_BADMODN, modname);
122 lua_pushvalue(L, -1);
123 lua_setfield(L, -3, modname); /* _LOADED[modname] = new table. */
124 }
125 lua_remove(L, -2); /* Remove _LOADED table. */
126}
127
114LUALIB_API void luaL_openlib(lua_State *L, const char *libname, 128LUALIB_API void luaL_openlib(lua_State *L, const char *libname,
115 const luaL_Reg *l, int nup) 129 const luaL_Reg *l, int nup)
116{ 130{
117 lj_lib_checkfpu(L); 131 lj_lib_checkfpu(L);
118 if (libname) { 132 if (libname) {
119 int size = libsize(l); 133 luaL_pushmodule(L, libname, libsize(l));
120 /* check whether lib already exists */ 134 lua_insert(L, -(nup + 1)); /* Move module table below upvalues. */
121 luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16);
122 lua_getfield(L, -1, libname); /* get _LOADED[libname] */
123 if (!lua_istable(L, -1)) { /* not found? */
124 lua_pop(L, 1); /* remove previous result */
125 /* try global variable (and create one if it does not exist) */
126 if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, size) != NULL)
127 lj_err_callerv(L, LJ_ERR_BADMODN, libname);
128 lua_pushvalue(L, -1);
129 lua_setfield(L, -3, libname); /* _LOADED[libname] = new table */
130 }
131 lua_remove(L, -2); /* remove _LOADED table */
132 lua_insert(L, -(nup+1)); /* move library table to below upvalues */
133 } 135 }
134 for (; l->name; l++) { 136 if (l)
135 int i; 137 luaL_setfuncs(L, l, nup);
136 for (i = 0; i < nup; i++) /* copy upvalues to the top */ 138 else
137 lua_pushvalue(L, -nup); 139 lua_pop(L, nup); /* Remove upvalues. */
138 lua_pushcclosure(L, l->func, nup);
139 lua_setfield(L, -(nup+2), l->name);
140 }
141 lua_pop(L, nup); /* remove upvalues */
142} 140}
143 141
144LUALIB_API void luaL_register(lua_State *L, const char *libname, 142LUALIB_API void luaL_register(lua_State *L, const char *libname,
@@ -147,6 +145,19 @@ LUALIB_API void luaL_register(lua_State *L, const char *libname,
147 luaL_openlib(L, libname, l, 0); 145 luaL_openlib(L, libname, l, 0);
148} 146}
149 147
148LUALIB_API void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup)
149{
150 luaL_checkstack(L, nup, "too many upvalues");
151 for (; l->name; l++) {
152 int i;
153 for (i = 0; i < nup; i++) /* Copy upvalues to the top. */
154 lua_pushvalue(L, -nup);
155 lua_pushcclosure(L, l->func, nup);
156 lua_setfield(L, -(nup + 2), l->name);
157 }
158 lua_pop(L, nup); /* Remove upvalues. */
159}
160
150LUALIB_API const char *luaL_gsub(lua_State *L, const char *s, 161LUALIB_API const char *luaL_gsub(lua_State *L, const char *s,
151 const char *p, const char *r) 162 const char *p, const char *r)
152{ 163{
@@ -207,8 +218,15 @@ LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B)
207 218
208LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l) 219LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l)
209{ 220{
210 while (l--) 221 if (l <= bufffree(B)) {
211 luaL_addchar(B, *s++); 222 memcpy(B->p, s, l);
223 B->p += l;
224 } else {
225 emptybuffer(B);
226 lua_pushlstring(B->L, s, l);
227 B->lvl++;
228 adjuststack(B);
229 }
212} 230}
213 231
214LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s) 232LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s)
@@ -302,7 +320,7 @@ static int panic(lua_State *L)
302 320
303#ifdef LUAJIT_USE_SYSMALLOC 321#ifdef LUAJIT_USE_SYSMALLOC
304 322
305#if LJ_64 && !defined(LUAJIT_USE_VALGRIND) 323#if LJ_64 && !LJ_GC64 && !defined(LUAJIT_USE_VALGRIND)
306#error "Must use builtin allocator for 64 bit target" 324#error "Must use builtin allocator for 64 bit target"
307#endif 325#endif
308 326
@@ -327,23 +345,19 @@ LUALIB_API lua_State *luaL_newstate(void)
327 345
328#else 346#else
329 347
330#include "lj_alloc.h"
331
332LUALIB_API lua_State *luaL_newstate(void) 348LUALIB_API lua_State *luaL_newstate(void)
333{ 349{
334 lua_State *L; 350 lua_State *L;
335 void *ud = lj_alloc_create(); 351#if LJ_64 && !LJ_GC64
336 if (ud == NULL) return NULL; 352 L = lj_state_newstate(LJ_ALLOCF_INTERNAL, NULL);
337#if LJ_64
338 L = lj_state_newstate(lj_alloc_f, ud);
339#else 353#else
340 L = lua_newstate(lj_alloc_f, ud); 354 L = lua_newstate(LJ_ALLOCF_INTERNAL, NULL);
341#endif 355#endif
342 if (L) G(L)->panic = panic; 356 if (L) G(L)->panic = panic;
343 return L; 357 return L;
344} 358}
345 359
346#if LJ_64 360#if LJ_64 && !LJ_GC64
347LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) 361LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
348{ 362{
349 UNUSED(f); UNUSED(ud); 363 UNUSED(f); UNUSED(ud);
diff --git a/src/lib_base.c b/src/lib_base.c
index acdec16d..1c8816f0 100644
--- a/src/lib_base.c
+++ b/src/lib_base.c
@@ -19,10 +19,12 @@
19#include "lj_gc.h" 19#include "lj_gc.h"
20#include "lj_err.h" 20#include "lj_err.h"
21#include "lj_debug.h" 21#include "lj_debug.h"
22#include "lj_buf.h"
22#include "lj_str.h" 23#include "lj_str.h"
23#include "lj_tab.h" 24#include "lj_tab.h"
24#include "lj_meta.h" 25#include "lj_meta.h"
25#include "lj_state.h" 26#include "lj_state.h"
27#include "lj_frame.h"
26#if LJ_HASFFI 28#if LJ_HASFFI
27#include "lj_ctype.h" 29#include "lj_ctype.h"
28#include "lj_cconv.h" 30#include "lj_cconv.h"
@@ -32,6 +34,7 @@
32#include "lj_dispatch.h" 34#include "lj_dispatch.h"
33#include "lj_char.h" 35#include "lj_char.h"
34#include "lj_strscan.h" 36#include "lj_strscan.h"
37#include "lj_strfmt.h"
35#include "lj_lib.h" 38#include "lj_lib.h"
36 39
37/* -- Base library: checks ------------------------------------------------ */ 40/* -- Base library: checks ------------------------------------------------ */
@@ -40,13 +43,13 @@
40 43
41LJLIB_ASM(assert) LJLIB_REC(.) 44LJLIB_ASM(assert) LJLIB_REC(.)
42{ 45{
43 GCstr *s;
44 lj_lib_checkany(L, 1); 46 lj_lib_checkany(L, 1);
45 s = lj_lib_optstr(L, 2); 47 if (L->top == L->base+1)
46 if (s)
47 lj_err_callermsg(L, strdata(s));
48 else
49 lj_err_caller(L, LJ_ERR_ASSERT); 48 lj_err_caller(L, LJ_ERR_ASSERT);
49 else if (tvisstr(L->base+1) || tvisnumber(L->base+1))
50 lj_err_callermsg(L, strdata(lj_lib_checkstr(L, 2)));
51 else
52 lj_err_run(L);
50 return FFH_UNREACHABLE; 53 return FFH_UNREACHABLE;
51} 54}
52 55
@@ -86,10 +89,11 @@ static int ffh_pairs(lua_State *L, MMS mm)
86 cTValue *mo = lj_meta_lookup(L, o, mm); 89 cTValue *mo = lj_meta_lookup(L, o, mm);
87 if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) { 90 if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) {
88 L->top = o+1; /* Only keep one argument. */ 91 L->top = o+1; /* Only keep one argument. */
89 copyTV(L, L->base-1, mo); /* Replace callable. */ 92 copyTV(L, L->base-1-LJ_FR2, mo); /* Replace callable. */
90 return FFH_TAILCALL; 93 return FFH_TAILCALL;
91 } else { 94 } else {
92 if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE); 95 if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE);
96 if (LJ_FR2) { copyTV(L, o-1, o); o--; }
93 setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1))); 97 setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1)));
94 if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0); 98 if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0);
95 return FFH_RES(3); 99 return FFH_RES(3);
@@ -100,7 +104,7 @@ static int ffh_pairs(lua_State *L, MMS mm)
100#endif 104#endif
101 105
102LJLIB_PUSH(lastcl) 106LJLIB_PUSH(lastcl)
103LJLIB_ASM(pairs) 107LJLIB_ASM(pairs) LJLIB_REC(xpairs 0)
104{ 108{
105 return ffh_pairs(L, MM_pairs); 109 return ffh_pairs(L, MM_pairs);
106} 110}
@@ -113,7 +117,7 @@ LJLIB_NOREGUV LJLIB_ASM(ipairs_aux) LJLIB_REC(.)
113} 117}
114 118
115LJLIB_PUSH(lastcl) 119LJLIB_PUSH(lastcl)
116LJLIB_ASM(ipairs) LJLIB_REC(.) 120LJLIB_ASM(ipairs) LJLIB_REC(xpairs 1)
117{ 121{
118 return ffh_pairs(L, MM_ipairs); 122 return ffh_pairs(L, MM_ipairs);
119} 123}
@@ -131,11 +135,11 @@ LJLIB_ASM(setmetatable) LJLIB_REC(.)
131 lj_err_caller(L, LJ_ERR_PROTMT); 135 lj_err_caller(L, LJ_ERR_PROTMT);
132 setgcref(t->metatable, obj2gco(mt)); 136 setgcref(t->metatable, obj2gco(mt));
133 if (mt) { lj_gc_objbarriert(L, t, mt); } 137 if (mt) { lj_gc_objbarriert(L, t, mt); }
134 settabV(L, L->base-1, t); 138 settabV(L, L->base-1-LJ_FR2, t);
135 return FFH_RES(1); 139 return FFH_RES(1);
136} 140}
137 141
138LJLIB_CF(getfenv) 142LJLIB_CF(getfenv) LJLIB_REC(.)
139{ 143{
140 GCfunc *fn; 144 GCfunc *fn;
141 cTValue *o = L->base; 145 cTValue *o = L->base;
@@ -144,6 +148,7 @@ LJLIB_CF(getfenv)
144 o = lj_debug_frame(L, level, &level); 148 o = lj_debug_frame(L, level, &level);
145 if (o == NULL) 149 if (o == NULL)
146 lj_err_arg(L, 1, LJ_ERR_INVLVL); 150 lj_err_arg(L, 1, LJ_ERR_INVLVL);
151 if (LJ_FR2) o--;
147 } 152 }
148 fn = &gcval(o)->fn; 153 fn = &gcval(o)->fn;
149 settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env)); 154 settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env));
@@ -165,6 +170,7 @@ LJLIB_CF(setfenv)
165 o = lj_debug_frame(L, level, &level); 170 o = lj_debug_frame(L, level, &level);
166 if (o == NULL) 171 if (o == NULL)
167 lj_err_arg(L, 1, LJ_ERR_INVLVL); 172 lj_err_arg(L, 1, LJ_ERR_INVLVL);
173 if (LJ_FR2) o--;
168 } 174 }
169 fn = &gcval(o)->fn; 175 fn = &gcval(o)->fn;
170 if (!isluafunc(fn)) 176 if (!isluafunc(fn))
@@ -259,7 +265,7 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
259 if (base == 10) { 265 if (base == 10) {
260 TValue *o = lj_lib_checkany(L, 1); 266 TValue *o = lj_lib_checkany(L, 1);
261 if (lj_strscan_numberobj(o)) { 267 if (lj_strscan_numberobj(o)) {
262 copyTV(L, L->base-1, o); 268 copyTV(L, L->base-1-LJ_FR2, o);
263 return FFH_RES(1); 269 return FFH_RES(1);
264 } 270 }
265#if LJ_HASFFI 271#if LJ_HASFFI
@@ -272,11 +278,11 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
272 ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) { 278 ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) {
273 int32_t i; 279 int32_t i;
274 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0); 280 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0);
275 setintV(L->base-1, i); 281 setintV(L->base-1-LJ_FR2, i);
276 return FFH_RES(1); 282 return FFH_RES(1);
277 } 283 }
278 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE), 284 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE),
279 (uint8_t *)&(L->base-1)->n, o, 0); 285 (uint8_t *)&(L->base-1-LJ_FR2)->n, o, 0);
280 return FFH_RES(1); 286 return FFH_RES(1);
281 } 287 }
282 } 288 }
@@ -284,53 +290,46 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
284 } else { 290 } else {
285 const char *p = strdata(lj_lib_checkstr(L, 1)); 291 const char *p = strdata(lj_lib_checkstr(L, 1));
286 char *ep; 292 char *ep;
293 unsigned int neg = 0;
287 unsigned long ul; 294 unsigned long ul;
288 if (base < 2 || base > 36) 295 if (base < 2 || base > 36)
289 lj_err_arg(L, 2, LJ_ERR_BASERNG); 296 lj_err_arg(L, 2, LJ_ERR_BASERNG);
290 ul = strtoul(p, &ep, base); 297 while (lj_char_isspace((unsigned char)(*p))) p++;
291 if (p != ep) { 298 if (*p == '-') { p++; neg = 1; } else if (*p == '+') { p++; }
292 while (lj_char_isspace((unsigned char)(*ep))) ep++; 299 if (lj_char_isalnum((unsigned char)(*p))) {
293 if (*ep == '\0') { 300 ul = strtoul(p, &ep, base);
294 if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u)) 301 if (p != ep) {
295 setintV(L->base-1, (int32_t)ul); 302 while (lj_char_isspace((unsigned char)(*ep))) ep++;
296 else 303 if (*ep == '\0') {
297 setnumV(L->base-1, (lua_Number)ul); 304 if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) {
298 return FFH_RES(1); 305 if (neg) ul = (unsigned long)-(long)ul;
306 setintV(L->base-1-LJ_FR2, (int32_t)ul);
307 } else {
308 lua_Number n = (lua_Number)ul;
309 if (neg) n = -n;
310 setnumV(L->base-1-LJ_FR2, n);
311 }
312 return FFH_RES(1);
313 }
299 } 314 }
300 } 315 }
301 } 316 }
302 setnilV(L->base-1); 317 setnilV(L->base-1-LJ_FR2);
303 return FFH_RES(1); 318 return FFH_RES(1);
304} 319}
305 320
306LJLIB_PUSH("nil")
307LJLIB_PUSH("false")
308LJLIB_PUSH("true")
309LJLIB_ASM(tostring) LJLIB_REC(.) 321LJLIB_ASM(tostring) LJLIB_REC(.)
310{ 322{
311 TValue *o = lj_lib_checkany(L, 1); 323 TValue *o = lj_lib_checkany(L, 1);
312 cTValue *mo; 324 cTValue *mo;
313 L->top = o+1; /* Only keep one argument. */ 325 L->top = o+1; /* Only keep one argument. */
314 if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { 326 if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
315 copyTV(L, L->base-1, mo); /* Replace callable. */ 327 copyTV(L, L->base-1-LJ_FR2, mo); /* Replace callable. */
316 return FFH_TAILCALL; 328 return FFH_TAILCALL;
317 } else {
318 GCstr *s;
319 if (tvisnumber(o)) {
320 s = lj_str_fromnumber(L, o);
321 } else if (tvispri(o)) {
322 s = strV(lj_lib_upvalue(L, -(int32_t)itype(o)));
323 } else {
324 if (tvisfunc(o) && isffunc(funcV(o)))
325 lua_pushfstring(L, "function: builtin#%d", funcV(o)->c.ffid);
326 else
327 lua_pushfstring(L, "%s: %p", lj_typename(o), lua_topointer(L, 1));
328 /* Note: lua_pushfstring calls the GC which may invalidate o. */
329 s = strV(L->top-1);
330 }
331 setstrV(L, L->base-1, s);
332 return FFH_RES(1);
333 } 329 }
330 lj_gc_check(L);
331 setstrV(L, L->base-1-LJ_FR2, lj_strfmt_obj(L, L->base));
332 return FFH_RES(1);
334} 333}
335 334
336/* -- Base library: throw and catch errors -------------------------------- */ 335/* -- Base library: throw and catch errors -------------------------------- */
@@ -359,7 +358,7 @@ LJLIB_ASM_(xpcall) LJLIB_REC(.)
359 358
360static int load_aux(lua_State *L, int status, int envarg) 359static int load_aux(lua_State *L, int status, int envarg)
361{ 360{
362 if (status == 0) { 361 if (status == LUA_OK) {
363 if (tvistab(L->base+envarg-1)) { 362 if (tvistab(L->base+envarg-1)) {
364 GCfunc *fn = funcV(L->top-1); 363 GCfunc *fn = funcV(L->top-1);
365 GCtab *t = tabV(L->base+envarg-1); 364 GCtab *t = tabV(L->base+envarg-1);
@@ -408,10 +407,22 @@ LJLIB_CF(load)
408 GCstr *name = lj_lib_optstr(L, 2); 407 GCstr *name = lj_lib_optstr(L, 2);
409 GCstr *mode = lj_lib_optstr(L, 3); 408 GCstr *mode = lj_lib_optstr(L, 3);
410 int status; 409 int status;
411 if (L->base < L->top && (tvisstr(L->base) || tvisnumber(L->base))) { 410 if (L->base < L->top &&
412 GCstr *s = lj_lib_checkstr(L, 1); 411 (tvisstr(L->base) || tvisnumber(L->base) || tvisbuf(L->base))) {
412 const char *s;
413 MSize len;
414 if (tvisbuf(L->base)) {
415 SBufExt *sbx = bufV(L->base);
416 s = sbx->r;
417 len = sbufxlen(sbx);
418 if (!name) name = &G(L)->strempty; /* Buffers are not NUL-terminated. */
419 } else {
420 GCstr *str = lj_lib_checkstr(L, 1);
421 s = strdata(str);
422 len = str->len;
423 }
413 lua_settop(L, 4); /* Ensure env arg exists. */ 424 lua_settop(L, 4); /* Ensure env arg exists. */
414 status = luaL_loadbufferx(L, strdata(s), s->len, strdata(name ? name : s), 425 status = luaL_loadbufferx(L, s, len, name ? strdata(name) : s,
415 mode ? strdata(mode) : NULL); 426 mode ? strdata(mode) : NULL);
416 } else { 427 } else {
417 lj_lib_checkfunc(L, 1); 428 lj_lib_checkfunc(L, 1);
@@ -432,7 +443,7 @@ LJLIB_CF(dofile)
432 GCstr *fname = lj_lib_optstr(L, 1); 443 GCstr *fname = lj_lib_optstr(L, 1);
433 setnilV(L->top); 444 setnilV(L->top);
434 L->top = L->base+1; 445 L->top = L->base+1;
435 if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != 0) 446 if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != LUA_OK)
436 lua_error(L); 447 lua_error(L);
437 lua_call(L, 0, LUA_MULTRET); 448 lua_call(L, 0, LUA_MULTRET);
438 return (int)(L->top - L->base) - 1; 449 return (int)(L->top - L->base) - 1;
@@ -442,20 +453,20 @@ LJLIB_CF(dofile)
442 453
443LJLIB_CF(gcinfo) 454LJLIB_CF(gcinfo)
444{ 455{
445 setintV(L->top++, (G(L)->gc.total >> 10)); 456 setintV(L->top++, (int32_t)(G(L)->gc.total >> 10));
446 return 1; 457 return 1;
447} 458}
448 459
449LJLIB_CF(collectgarbage) 460LJLIB_CF(collectgarbage)
450{ 461{
451 int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT, /* ORDER LUA_GC* */ 462 int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT, /* ORDER LUA_GC* */
452 "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul"); 463 "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul\1\377\11isrunning");
453 int32_t data = lj_lib_optint(L, 2, 0); 464 int32_t data = lj_lib_optint(L, 2, 0);
454 if (opt == LUA_GCCOUNT) { 465 if (opt == LUA_GCCOUNT) {
455 setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0); 466 setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0);
456 } else { 467 } else {
457 int res = lua_gc(L, opt, data); 468 int res = lua_gc(L, opt, data);
458 if (opt == LUA_GCSTEP) 469 if (opt == LUA_GCSTEP || opt == LUA_GCISRUNNING)
459 setboolV(L->top, res); 470 setboolV(L->top, res);
460 else 471 else
461 setintV(L->top, res); 472 setintV(L->top, res);
@@ -507,23 +518,14 @@ LJLIB_CF(print)
507 tv = L->top-1; 518 tv = L->top-1;
508 } 519 }
509 shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring) && 520 shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring) &&
510 !gcrefu(basemt_it(G(L), LJ_TNUMX)); 521 !gcrefu(basemt_it(G(L), LJ_TNUMX));
511 for (i = 0; i < nargs; i++) { 522 for (i = 0; i < nargs; i++) {
523 cTValue *o = &L->base[i];
512 const char *str; 524 const char *str;
513 size_t size; 525 size_t size;
514 cTValue *o = &L->base[i]; 526 MSize len;
515 if (shortcut && tvisstr(o)) { 527 if (shortcut && (str = lj_strfmt_wstrnum(L, o, &len)) != NULL) {
516 str = strVdata(o); 528 size = len;
517 size = strV(o)->len;
518 } else if (shortcut && tvisint(o)) {
519 char buf[LJ_STR_INTBUF];
520 char *p = lj_str_bufint(buf, intV(o));
521 size = (size_t)(buf+LJ_STR_INTBUF-p);
522 str = p;
523 } else if (shortcut && tvisnum(o)) {
524 char buf[LJ_STR_NUMBUF];
525 size = lj_str_bufnum(buf, o);
526 str = buf;
527 } else { 529 } else {
528 copyTV(L, L->top+1, o); 530 copyTV(L, L->top+1, o);
529 copyTV(L, L->top, L->top-1); 531 copyTV(L, L->top, L->top-1);
@@ -560,8 +562,8 @@ LJLIB_CF(coroutine_status)
560 co = threadV(L->base); 562 co = threadV(L->base);
561 if (co == L) s = "running"; 563 if (co == L) s = "running";
562 else if (co->status == LUA_YIELD) s = "suspended"; 564 else if (co->status == LUA_YIELD) s = "suspended";
563 else if (co->status != 0) s = "dead"; 565 else if (co->status != LUA_OK) s = "dead";
564 else if (co->base > tvref(co->stack)+1) s = "normal"; 566 else if (co->base > tvref(co->stack)+1+LJ_FR2) s = "normal";
565 else if (co->top == co->base) s = "dead"; 567 else if (co->top == co->base) s = "dead";
566 else s = "suspended"; 568 else s = "suspended";
567 lua_pushstring(L, s); 569 lua_pushstring(L, s);
@@ -581,6 +583,12 @@ LJLIB_CF(coroutine_running)
581#endif 583#endif
582} 584}
583 585
586LJLIB_CF(coroutine_isyieldable)
587{
588 setboolV(L->top++, cframe_canyield(L->cframe));
589 return 1;
590}
591
584LJLIB_CF(coroutine_create) 592LJLIB_CF(coroutine_create)
585{ 593{
586 lua_State *L1; 594 lua_State *L1;
@@ -600,11 +608,11 @@ LJLIB_ASM(coroutine_yield)
600static int ffh_resume(lua_State *L, lua_State *co, int wrap) 608static int ffh_resume(lua_State *L, lua_State *co, int wrap)
601{ 609{
602 if (co->cframe != NULL || co->status > LUA_YIELD || 610 if (co->cframe != NULL || co->status > LUA_YIELD ||
603 (co->status == 0 && co->top == co->base)) { 611 (co->status == LUA_OK && co->top == co->base)) {
604 ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD; 612 ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD;
605 if (wrap) lj_err_caller(L, em); 613 if (wrap) lj_err_caller(L, em);
606 setboolV(L->base-1, 0); 614 setboolV(L->base-1-LJ_FR2, 0);
607 setstrV(L, L->base, lj_err_str(L, em)); 615 setstrV(L, L->base-LJ_FR2, lj_err_str(L, em));
608 return FFH_RES(2); 616 return FFH_RES(2);
609 } 617 }
610 lj_state_growstack(co, (MSize)(L->top - L->base)); 618 lj_state_growstack(co, (MSize)(L->top - L->base));
@@ -645,9 +653,10 @@ static void setpc_wrap_aux(lua_State *L, GCfunc *fn);
645 653
646LJLIB_CF(coroutine_wrap) 654LJLIB_CF(coroutine_wrap)
647{ 655{
656 GCfunc *fn;
648 lj_cf_coroutine_create(L); 657 lj_cf_coroutine_create(L);
649 lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1); 658 fn = lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1);
650 setpc_wrap_aux(L, funcV(L->top-1)); 659 setpc_wrap_aux(L, fn);
651 return 1; 660 return 1;
652} 661}
653 662
diff --git a/src/lib_bit.c b/src/lib_bit.c
index 553beed8..6fb8ad47 100644
--- a/src/lib_bit.c
+++ b/src/lib_bit.c
@@ -12,26 +12,99 @@
12 12
13#include "lj_obj.h" 13#include "lj_obj.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_str.h" 15#include "lj_buf.h"
16#include "lj_strscan.h"
17#include "lj_strfmt.h"
18#if LJ_HASFFI
19#include "lj_ctype.h"
20#include "lj_cdata.h"
21#include "lj_cconv.h"
22#include "lj_carith.h"
23#endif
24#include "lj_ff.h"
16#include "lj_lib.h" 25#include "lj_lib.h"
17 26
18/* ------------------------------------------------------------------------ */ 27/* ------------------------------------------------------------------------ */
19 28
20#define LJLIB_MODULE_bit 29#define LJLIB_MODULE_bit
21 30
22LJLIB_ASM(bit_tobit) LJLIB_REC(bit_unary IR_TOBIT) 31#if LJ_HASFFI
32static int bit_result64(lua_State *L, CTypeID id, uint64_t x)
23{ 33{
34 GCcdata *cd = lj_cdata_new_(L, id, 8);
35 *(uint64_t *)cdataptr(cd) = x;
36 setcdataV(L, L->base-1-LJ_FR2, cd);
37 return FFH_RES(1);
38}
39#else
40static int32_t bit_checkbit(lua_State *L, int narg)
41{
42 TValue *o = L->base + narg-1;
43 if (!(o < L->top && lj_strscan_numberobj(o)))
44 lj_err_argt(L, narg, LUA_TNUMBER);
45 if (LJ_LIKELY(tvisint(o))) {
46 return intV(o);
47 } else {
48 int32_t i = lj_num2bit(numV(o));
49 if (LJ_DUALNUM) setintV(o, i);
50 return i;
51 }
52}
53#endif
54
55LJLIB_ASM(bit_tobit) LJLIB_REC(bit_tobit)
56{
57#if LJ_HASFFI
58 CTypeID id = 0;
59 setintV(L->base-1-LJ_FR2, (int32_t)lj_carith_check64(L, 1, &id));
60 return FFH_RES(1);
61#else
62 lj_lib_checknumber(L, 1);
63 return FFH_RETRY;
64#endif
65}
66
67LJLIB_ASM(bit_bnot) LJLIB_REC(bit_unary IR_BNOT)
68{
69#if LJ_HASFFI
70 CTypeID id = 0;
71 uint64_t x = lj_carith_check64(L, 1, &id);
72 return id ? bit_result64(L, id, ~x) : FFH_RETRY;
73#else
24 lj_lib_checknumber(L, 1); 74 lj_lib_checknumber(L, 1);
25 return FFH_RETRY; 75 return FFH_RETRY;
76#endif
77}
78
79LJLIB_ASM(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP)
80{
81#if LJ_HASFFI
82 CTypeID id = 0;
83 uint64_t x = lj_carith_check64(L, 1, &id);
84 return id ? bit_result64(L, id, lj_bswap64(x)) : FFH_RETRY;
85#else
86 lj_lib_checknumber(L, 1);
87 return FFH_RETRY;
88#endif
26} 89}
27LJLIB_ASM_(bit_bnot) LJLIB_REC(bit_unary IR_BNOT)
28LJLIB_ASM_(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP)
29 90
30LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL) 91LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL)
31{ 92{
93#if LJ_HASFFI
94 CTypeID id = 0, id2 = 0;
95 uint64_t x = lj_carith_check64(L, 1, &id);
96 int32_t sh = (int32_t)lj_carith_check64(L, 2, &id2);
97 if (id) {
98 x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift);
99 return bit_result64(L, id, x);
100 }
101 if (id2) setintV(L->base+1, sh);
102 return FFH_RETRY;
103#else
32 lj_lib_checknumber(L, 1); 104 lj_lib_checknumber(L, 1);
33 lj_lib_checkbit(L, 2); 105 bit_checkbit(L, 2);
34 return FFH_RETRY; 106 return FFH_RETRY;
107#endif
35} 108}
36LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR) 109LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR)
37LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR) 110LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR)
@@ -40,25 +113,58 @@ LJLIB_ASM_(bit_ror) LJLIB_REC(bit_shift IR_BROR)
40 113
41LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND) 114LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND)
42{ 115{
116#if LJ_HASFFI
117 CTypeID id = 0;
118 TValue *o = L->base, *top = L->top;
119 int i = 0;
120 do { lj_carith_check64(L, ++i, &id); } while (++o < top);
121 if (id) {
122 CTState *cts = ctype_cts(L);
123 CType *ct = ctype_get(cts, id);
124 int op = curr_func(L)->c.ffid - (int)FF_bit_bor;
125 uint64_t x, y = op >= 0 ? 0 : ~(uint64_t)0;
126 o = L->base;
127 do {
128 lj_cconv_ct_tv(cts, ct, (uint8_t *)&x, o, 0);
129 if (op < 0) y &= x; else if (op == 0) y |= x; else y ^= x;
130 } while (++o < top);
131 return bit_result64(L, id, y);
132 }
133 return FFH_RETRY;
134#else
43 int i = 0; 135 int i = 0;
44 do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top); 136 do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
45 return FFH_RETRY; 137 return FFH_RETRY;
138#endif
46} 139}
47LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR) 140LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR)
48LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR) 141LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR)
49 142
50/* ------------------------------------------------------------------------ */ 143/* ------------------------------------------------------------------------ */
51 144
52LJLIB_CF(bit_tohex) 145LJLIB_CF(bit_tohex) LJLIB_REC(.)
53{ 146{
54 uint32_t b = (uint32_t)lj_lib_checkbit(L, 1); 147#if LJ_HASFFI
55 int32_t i, n = L->base+1 >= L->top ? 8 : lj_lib_checkbit(L, 2); 148 CTypeID id = 0, id2 = 0;
56 const char *hexdigits = "0123456789abcdef"; 149 uint64_t b = lj_carith_check64(L, 1, &id);
57 char buf[8]; 150 int32_t n = L->base+1>=L->top ? (id ? 16 : 8) :
58 if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; } 151 (int32_t)lj_carith_check64(L, 2, &id2);
59 if (n > 8) n = 8; 152#else
60 for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; } 153 uint32_t b = (uint32_t)bit_checkbit(L, 1);
61 lua_pushlstring(L, buf, (size_t)n); 154 int32_t n = L->base+1>=L->top ? 8 : bit_checkbit(L, 2);
155#endif
156 SBuf *sb = lj_buf_tmp_(L);
157 SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
158 if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
159 sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
160#if LJ_HASFFI
161 if (n < 16) b &= ((uint64_t)1 << 4*n)-1;
162#else
163 if (n < 8) b &= (1u << 4*n)-1;
164#endif
165 sb = lj_strfmt_putfxint(sb, sf, b);
166 setstrV(L, L->top-1, lj_buf_str(L, sb));
167 lj_gc_check(L);
62 return 1; 168 return 1;
63} 169}
64 170
diff --git a/src/lib_buffer.c b/src/lib_buffer.c
new file mode 100644
index 00000000..ae065759
--- /dev/null
+++ b/src/lib_buffer.c
@@ -0,0 +1,349 @@
1/*
2** Buffer library.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lib_buffer_c
7#define LUA_LIB
8
9#include "lua.h"
10#include "lauxlib.h"
11#include "lualib.h"
12
13#include "lj_obj.h"
14
15#if LJ_HASBUFFER
16#include "lj_gc.h"
17#include "lj_err.h"
18#include "lj_buf.h"
19#include "lj_str.h"
20#include "lj_tab.h"
21#include "lj_udata.h"
22#include "lj_meta.h"
23#if LJ_HASFFI
24#include "lj_ctype.h"
25#include "lj_cdata.h"
26#include "lj_cconv.h"
27#endif
28#include "lj_strfmt.h"
29#include "lj_serialize.h"
30#include "lj_lib.h"
31
32/* -- Helper functions ---------------------------------------------------- */
33
34/* Check that the first argument is a string buffer. */
35static SBufExt *buffer_tobuf(lua_State *L)
36{
37 if (!(L->base < L->top && tvisbuf(L->base)))
38 lj_err_argtype(L, 1, "buffer");
39 return bufV(L->base);
40}
41
42/* Ditto, but for writers. */
43static LJ_AINLINE SBufExt *buffer_tobufw(lua_State *L)
44{
45 SBufExt *sbx = buffer_tobuf(L);
46 setsbufXL_(sbx, L);
47 return sbx;
48}
49
50#define buffer_toudata(sbx) ((GCudata *)(sbx)-1)
51
52/* -- Buffer methods ------------------------------------------------------ */
53
54#define LJLIB_MODULE_buffer_method
55
56LJLIB_CF(buffer_method_free)
57{
58 SBufExt *sbx = buffer_tobuf(L);
59 lj_bufx_free(L, sbx);
60 L->top = L->base+1; /* Chain buffer object. */
61 return 1;
62}
63
64LJLIB_CF(buffer_method_reset) LJLIB_REC(.)
65{
66 SBufExt *sbx = buffer_tobuf(L);
67 lj_bufx_reset(sbx);
68 L->top = L->base+1; /* Chain buffer object. */
69 return 1;
70}
71
72LJLIB_CF(buffer_method_skip) LJLIB_REC(.)
73{
74 SBufExt *sbx = buffer_tobuf(L);
75 MSize n = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
76 MSize len = sbufxlen(sbx);
77 if (n < len) {
78 sbx->r += n;
79 } else {
80 sbx->r = sbx->w = sbx->b;
81 }
82 L->top = L->base+1; /* Chain buffer object. */
83 return 1;
84}
85
86LJLIB_CF(buffer_method_set) LJLIB_REC(.)
87{
88 SBufExt *sbx = buffer_tobuf(L);
89 GCobj *ref;
90 const char *p;
91 MSize len;
92#if LJ_HASFFI
93 if (tviscdata(L->base+1)) {
94 CTState *cts = ctype_cts(L);
95 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p,
96 L->base+1, CCF_ARG(2));
97 len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF);
98 } else
99#endif
100 {
101 GCstr *str = lj_lib_checkstrx(L, 2);
102 p = strdata(str);
103 len = str->len;
104 }
105 lj_bufx_free(L, sbx);
106 lj_bufx_set_cow(L, sbx, p, len);
107 ref = gcV(L->base+1);
108 setgcref(sbx->cowref, ref);
109 lj_gc_objbarrier(L, buffer_toudata(sbx), ref);
110 L->top = L->base+1; /* Chain buffer object. */
111 return 1;
112}
113
114LJLIB_CF(buffer_method_put) LJLIB_REC(.)
115{
116 SBufExt *sbx = buffer_tobufw(L);
117 ptrdiff_t arg, narg = L->top - L->base;
118 for (arg = 1; arg < narg; arg++) {
119 cTValue *o = &L->base[arg], *mo = NULL;
120 retry:
121 if (tvisstr(o)) {
122 lj_buf_putstr((SBuf *)sbx, strV(o));
123 } else if (tvisint(o)) {
124 lj_strfmt_putint((SBuf *)sbx, intV(o));
125 } else if (tvisnum(o)) {
126 lj_strfmt_putfnum((SBuf *)sbx, STRFMT_G14, numV(o));
127 } else if (tvisbuf(o)) {
128 SBufExt *sbx2 = bufV(o);
129 if (sbx2 == sbx) lj_err_arg(L, arg+1, LJ_ERR_BUFFER_SELF);
130 lj_buf_putmem((SBuf *)sbx, sbx2->r, sbufxlen(sbx2));
131 } else if (!mo && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
132 /* Call __tostring metamethod inline. */
133 copyTV(L, L->top++, mo);
134 copyTV(L, L->top++, o);
135 lua_call(L, 1, 1);
136 o = &L->base[arg]; /* The stack may have been reallocated. */
137 copyTV(L, &L->base[arg], L->top-1);
138 L->top = L->base + narg;
139 goto retry; /* Retry with the result. */
140 } else {
141 lj_err_argtype(L, arg+1, "string/number/__tostring");
142 }
143 /* Probably not useful to inline other __tostring MMs, e.g. FFI numbers. */
144 }
145 L->top = L->base+1; /* Chain buffer object. */
146 lj_gc_check(L);
147 return 1;
148}
149
150LJLIB_CF(buffer_method_putf) LJLIB_REC(.)
151{
152 SBufExt *sbx = buffer_tobufw(L);
153 lj_strfmt_putarg(L, (SBuf *)sbx, 2, 2);
154 L->top = L->base+1; /* Chain buffer object. */
155 lj_gc_check(L);
156 return 1;
157}
158
159LJLIB_CF(buffer_method_get) LJLIB_REC(.)
160{
161 SBufExt *sbx = buffer_tobuf(L);
162 ptrdiff_t arg, narg = L->top - L->base;
163 if (narg == 1) {
164 narg++;
165 setnilV(L->top++); /* get() is the same as get(nil). */
166 }
167 for (arg = 1; arg < narg; arg++) {
168 TValue *o = &L->base[arg];
169 MSize n = tvisnil(o) ? LJ_MAX_BUF :
170 (MSize) lj_lib_checkintrange(L, arg+1, 0, LJ_MAX_BUF);
171 MSize len = sbufxlen(sbx);
172 if (n > len) n = len;
173 setstrV(L, o, lj_str_new(L, sbx->r, n));
174 sbx->r += n;
175 }
176 if (sbx->r == sbx->w) sbx->r = sbx->w = sbx->b;
177 lj_gc_check(L);
178 return narg-1;
179}
180
181#if LJ_HASFFI
182LJLIB_CF(buffer_method_putcdata) LJLIB_REC(.)
183{
184 SBufExt *sbx = buffer_tobufw(L);
185 const char *p;
186 MSize len;
187 if (tviscdata(L->base+1)) {
188 CTState *cts = ctype_cts(L);
189 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p,
190 L->base+1, CCF_ARG(2));
191 } else {
192 lj_err_argtype(L, 2, "cdata");
193 }
194 len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF);
195 lj_buf_putmem((SBuf *)sbx, p, len);
196 L->top = L->base+1; /* Chain buffer object. */
197 return 1;
198}
199
200LJLIB_CF(buffer_method_reserve) LJLIB_REC(.)
201{
202 SBufExt *sbx = buffer_tobufw(L);
203 MSize sz = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
204 GCcdata *cd;
205 lj_buf_more((SBuf *)sbx, sz);
206 ctype_loadffi(L);
207 cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR);
208 *(void **)cdataptr(cd) = sbx->w;
209 setcdataV(L, L->top++, cd);
210 setintV(L->top++, sbufleft(sbx));
211 return 2;
212}
213
214LJLIB_CF(buffer_method_commit) LJLIB_REC(.)
215{
216 SBufExt *sbx = buffer_tobuf(L);
217 MSize len = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
218 if (len > sbufleft(sbx)) lj_err_arg(L, 2, LJ_ERR_NUMRNG);
219 sbx->w += len;
220 L->top = L->base+1; /* Chain buffer object. */
221 return 1;
222}
223
224LJLIB_CF(buffer_method_ref) LJLIB_REC(.)
225{
226 SBufExt *sbx = buffer_tobuf(L);
227 GCcdata *cd;
228 ctype_loadffi(L);
229 cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR);
230 *(void **)cdataptr(cd) = sbx->r;
231 setcdataV(L, L->top++, cd);
232 setintV(L->top++, sbufxlen(sbx));
233 return 2;
234}
235#endif
236
237LJLIB_CF(buffer_method_encode) LJLIB_REC(.)
238{
239 SBufExt *sbx = buffer_tobufw(L);
240 cTValue *o = lj_lib_checkany(L, 2);
241 lj_serialize_put(sbx, o);
242 lj_gc_check(L);
243 L->top = L->base+1; /* Chain buffer object. */
244 return 1;
245}
246
247LJLIB_CF(buffer_method_decode) LJLIB_REC(.)
248{
249 SBufExt *sbx = buffer_tobufw(L);
250 setnilV(L->top++);
251 sbx->r = lj_serialize_get(sbx, L->top-1);
252 lj_gc_check(L);
253 return 1;
254}
255
256LJLIB_CF(buffer_method___gc)
257{
258 SBufExt *sbx = buffer_tobuf(L);
259 lj_bufx_free(L, sbx);
260 return 0;
261}
262
263LJLIB_CF(buffer_method___tostring) LJLIB_REC(.)
264{
265 SBufExt *sbx = buffer_tobuf(L);
266 setstrV(L, L->top-1, lj_str_new(L, sbx->r, sbufxlen(sbx)));
267 lj_gc_check(L);
268 return 1;
269}
270
271LJLIB_CF(buffer_method___len) LJLIB_REC(.)
272{
273 SBufExt *sbx = buffer_tobuf(L);
274 setintV(L->top-1, (int32_t)sbufxlen(sbx));
275 return 1;
276}
277
278LJLIB_PUSH("buffer") LJLIB_SET(__metatable)
279LJLIB_PUSH(top-1) LJLIB_SET(__index)
280
281/* -- Buffer library functions -------------------------------------------- */
282
283#define LJLIB_MODULE_buffer
284
285LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */
286
287LJLIB_CF(buffer_new)
288{
289 MSize sz = 0;
290 int targ = 1;
291 GCtab *env, *dict = NULL;
292 GCudata *ud;
293 SBufExt *sbx;
294 if (L->base < L->top && !tvistab(L->base)) {
295 targ = 2;
296 if (!tvisnil(L->base))
297 sz = (MSize)lj_lib_checkintrange(L, 1, 0, LJ_MAX_BUF);
298 }
299 if (L->base+targ-1 < L->top) {
300 GCtab *options = lj_lib_checktab(L, targ);
301 cTValue *opt_dict = lj_tab_getstr(options, lj_str_newlit(L, "dict"));
302 if (opt_dict && tvistab(opt_dict)) {
303 dict = tabV(opt_dict);
304 lj_serialize_dict_prep(L, dict);
305 }
306 }
307 env = tabref(curr_func(L)->c.env);
308 ud = lj_udata_new(L, sizeof(SBufExt), env);
309 ud->udtype = UDTYPE_BUFFER;
310 /* NOBARRIER: The GCudata is new (marked white). */
311 setgcref(ud->metatable, obj2gco(env));
312 setudataV(L, L->top++, ud);
313 sbx = (SBufExt *)uddata(ud);
314 lj_bufx_init(L, sbx);
315 setgcref(sbx->dict, obj2gco(dict));
316 if (sz > 0) lj_buf_need2((SBuf *)sbx, sz);
317 return 1;
318}
319
320LJLIB_CF(buffer_encode) LJLIB_REC(.)
321{
322 cTValue *o = lj_lib_checkany(L, 1);
323 setstrV(L, L->top++, lj_serialize_encode(L, o));
324 lj_gc_check(L);
325 return 1;
326}
327
328LJLIB_CF(buffer_decode) LJLIB_REC(.)
329{
330 GCstr *str = lj_lib_checkstrx(L, 1);
331 setnilV(L->top++);
332 lj_serialize_decode(L, L->top-1, str);
333 return 1;
334}
335
336/* ------------------------------------------------------------------------ */
337
338#include "lj_libdef.h"
339
340int luaopen_string_buffer(lua_State *L)
341{
342 LJ_LIB_REG(L, NULL, buffer_method);
343 lua_getfield(L, -1, "__tostring");
344 lua_setfield(L, -2, "tostring");
345 LJ_LIB_REG(L, NULL, buffer);
346 return 1;
347}
348
349#endif
diff --git a/src/lib_debug.c b/src/lib_debug.c
index b25f26cf..a6acc6f2 100644
--- a/src/lib_debug.c
+++ b/src/lib_debug.c
@@ -29,7 +29,7 @@ LJLIB_CF(debug_getregistry)
29 return 1; 29 return 1;
30} 30}
31 31
32LJLIB_CF(debug_getmetatable) 32LJLIB_CF(debug_getmetatable) LJLIB_REC(.)
33{ 33{
34 lj_lib_checkany(L, 1); 34 lj_lib_checkany(L, 1);
35 if (!lua_getmetatable(L, 1)) { 35 if (!lua_getmetatable(L, 1)) {
@@ -231,8 +231,8 @@ LJLIB_CF(debug_upvalueid)
231 int32_t n = lj_lib_checkint(L, 2) - 1; 231 int32_t n = lj_lib_checkint(L, 2) - 1;
232 if ((uint32_t)n >= fn->l.nupvalues) 232 if ((uint32_t)n >= fn->l.nupvalues)
233 lj_err_arg(L, 2, LJ_ERR_IDXRNG); 233 lj_err_arg(L, 2, LJ_ERR_IDXRNG);
234 setlightudV(L->top-1, isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) : 234 lua_pushlightuserdata(L, isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) :
235 (void *)&fn->c.upvalue[n]); 235 (void *)&fn->c.upvalue[n]);
236 return 1; 236 return 1;
237} 237}
238 238
@@ -283,13 +283,13 @@ LJLIB_CF(debug_setuservalue)
283 283
284/* ------------------------------------------------------------------------ */ 284/* ------------------------------------------------------------------------ */
285 285
286static const char KEY_HOOK = 'h'; 286#define KEY_HOOK (U64x(80000000,00000000)|'h')
287 287
288static void hookf(lua_State *L, lua_Debug *ar) 288static void hookf(lua_State *L, lua_Debug *ar)
289{ 289{
290 static const char *const hooknames[] = 290 static const char *const hooknames[] =
291 {"call", "return", "line", "count", "tail return"}; 291 {"call", "return", "line", "count", "tail return"};
292 lua_pushlightuserdata(L, (void *)&KEY_HOOK); 292 (L->top++)->u64 = KEY_HOOK;
293 lua_rawget(L, LUA_REGISTRYINDEX); 293 lua_rawget(L, LUA_REGISTRYINDEX);
294 if (lua_isfunction(L, -1)) { 294 if (lua_isfunction(L, -1)) {
295 lua_pushstring(L, hooknames[(int)ar->event]); 295 lua_pushstring(L, hooknames[(int)ar->event]);
@@ -334,7 +334,7 @@ LJLIB_CF(debug_sethook)
334 count = luaL_optint(L, arg+3, 0); 334 count = luaL_optint(L, arg+3, 0);
335 func = hookf; mask = makemask(smask, count); 335 func = hookf; mask = makemask(smask, count);
336 } 336 }
337 lua_pushlightuserdata(L, (void *)&KEY_HOOK); 337 (L->top++)->u64 = KEY_HOOK;
338 lua_pushvalue(L, arg+1); 338 lua_pushvalue(L, arg+1);
339 lua_rawset(L, LUA_REGISTRYINDEX); 339 lua_rawset(L, LUA_REGISTRYINDEX);
340 lua_sethook(L, func, mask, count); 340 lua_sethook(L, func, mask, count);
@@ -349,7 +349,7 @@ LJLIB_CF(debug_gethook)
349 if (hook != NULL && hook != hookf) { /* external hook? */ 349 if (hook != NULL && hook != hookf) { /* external hook? */
350 lua_pushliteral(L, "external hook"); 350 lua_pushliteral(L, "external hook");
351 } else { 351 } else {
352 lua_pushlightuserdata(L, (void *)&KEY_HOOK); 352 (L->top++)->u64 = KEY_HOOK;
353 lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */ 353 lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */
354 } 354 }
355 lua_pushstring(L, unmakemask(mask, buff)); 355 lua_pushstring(L, unmakemask(mask, buff));
diff --git a/src/lib_ffi.c b/src/lib_ffi.c
index 4c86c1e1..a29014e5 100644
--- a/src/lib_ffi.c
+++ b/src/lib_ffi.c
@@ -29,6 +29,7 @@
29#include "lj_ccall.h" 29#include "lj_ccall.h"
30#include "lj_ccallback.h" 30#include "lj_ccallback.h"
31#include "lj_clib.h" 31#include "lj_clib.h"
32#include "lj_strfmt.h"
32#include "lj_ff.h" 33#include "lj_ff.h"
33#include "lj_lib.h" 34#include "lj_lib.h"
34 35
@@ -137,7 +138,7 @@ static int ffi_index_meta(lua_State *L, CTState *cts, CType *ct, MMS mm)
137 } 138 }
138 } 139 }
139 copyTV(L, base, L->top); 140 copyTV(L, base, L->top);
140 tv = L->top-1; 141 tv = L->top-1-LJ_FR2;
141 } 142 }
142 return lj_meta_tailcall(L, tv); 143 return lj_meta_tailcall(L, tv);
143} 144}
@@ -318,7 +319,7 @@ LJLIB_CF(ffi_meta___tostring)
318 } 319 }
319 } 320 }
320 } 321 }
321 lj_str_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p); 322 lj_strfmt_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p);
322checkgc: 323checkgc:
323 lj_gc_check(L); 324 lj_gc_check(L);
324 return 1; 325 return 1;
@@ -504,10 +505,7 @@ LJLIB_CF(ffi_new) LJLIB_REC(.)
504 } 505 }
505 if (sz == CTSIZE_INVALID) 506 if (sz == CTSIZE_INVALID)
506 lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE); 507 lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE);
507 if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN) 508 cd = lj_cdata_newx(cts, id, sz, info);
508 cd = lj_cdata_new(cts, id, sz);
509 else
510 cd = lj_cdata_newv(cts, id, sz, ctype_align(info));
511 setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */ 509 setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */
512 lj_cconv_ct_init(cts, ct, sz, cdataptr(cd), 510 lj_cconv_ct_init(cts, ct, sz, cdataptr(cd),
513 o, (MSize)(L->top - o)); /* Initialize cdata. */ 511 o, (MSize)(L->top - o)); /* Initialize cdata. */
@@ -558,6 +556,31 @@ LJLIB_CF(ffi_typeof) LJLIB_REC(.)
558 return 1; 556 return 1;
559} 557}
560 558
559/* Internal and unsupported API. */
560LJLIB_CF(ffi_typeinfo)
561{
562 CTState *cts = ctype_cts(L);
563 CTypeID id = (CTypeID)ffi_checkint(L, 1);
564 if (id > 0 && id < cts->top) {
565 CType *ct = ctype_get(cts, id);
566 GCtab *t;
567 lua_createtable(L, 0, 4); /* Increment hash size if fields are added. */
568 t = tabV(L->top-1);
569 setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "info")), (int32_t)ct->info);
570 if (ct->size != CTSIZE_INVALID)
571 setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "size")), (int32_t)ct->size);
572 if (ct->sib)
573 setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "sib")), (int32_t)ct->sib);
574 if (gcref(ct->name)) {
575 GCstr *s = gco2str(gcref(ct->name));
576 setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "name")), s);
577 }
578 lj_gc_check(L);
579 return 1;
580 }
581 return 0;
582}
583
561LJLIB_CF(ffi_istype) LJLIB_REC(.) 584LJLIB_CF(ffi_istype) LJLIB_REC(.)
562{ 585{
563 CTState *cts = ctype_cts(L); 586 CTState *cts = ctype_cts(L);
@@ -697,44 +720,47 @@ LJLIB_CF(ffi_fill) LJLIB_REC(.)
697 return 0; 720 return 0;
698} 721}
699 722
700#define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be)
701
702/* Test ABI string. */ 723/* Test ABI string. */
703LJLIB_CF(ffi_abi) LJLIB_REC(.) 724LJLIB_CF(ffi_abi) LJLIB_REC(.)
704{ 725{
705 GCstr *s = lj_lib_checkstr(L, 1); 726 GCstr *s = lj_lib_checkstr(L, 1);
706 int b = 0; 727 int b = lj_cparse_case(s,
707 switch (s->hash) {
708#if LJ_64 728#if LJ_64
709 case H_(849858eb,ad35fd06): b = 1; break; /* 64bit */ 729 "\00564bit"
710#else 730#else
711 case H_(662d3c79,d0e22477): b = 1; break; /* 32bit */ 731 "\00532bit"
712#endif 732#endif
713#if LJ_ARCH_HASFPU 733#if LJ_ARCH_HASFPU
714 case H_(e33ee463,e33ee463): b = 1; break; /* fpu */ 734 "\003fpu"
715#endif 735#endif
716#if LJ_ABI_SOFTFP 736#if LJ_ABI_SOFTFP
717 case H_(61211a23,c2e8c81c): b = 1; break; /* softfp */ 737 "\006softfp"
718#else 738#else
719 case H_(539417a8,8ce0812f): b = 1; break; /* hardfp */ 739 "\006hardfp"
720#endif 740#endif
721#if LJ_ABI_EABI 741#if LJ_ABI_EABI
722 case H_(2182df8f,f2ed1152): b = 1; break; /* eabi */ 742 "\004eabi"
723#endif 743#endif
724#if LJ_ABI_WIN 744#if LJ_ABI_WIN
725 case H_(4ab624a8,4ab624a8): b = 1; break; /* win */ 745 "\003win"
726#endif 746#endif
727 case H_(3af93066,1f001464): b = 1; break; /* le/be */ 747#if LJ_TARGET_UWP
728 default: 748 "\003uwp"
729 break; 749#endif
730 } 750#if LJ_LE
751 "\002le"
752#else
753 "\002be"
754#endif
755#if LJ_GC64
756 "\004gc64"
757#endif
758 ) >= 0;
731 setboolV(L->top-1, b); 759 setboolV(L->top-1, b);
732 setboolV(&G(L)->tmptv2, b); /* Remember for trace recorder. */ 760 setboolV(&G(L)->tmptv2, b); /* Remember for trace recorder. */
733 return 1; 761 return 1;
734} 762}
735 763
736#undef H_
737
738LJLIB_PUSH(top-8) LJLIB_SET(!) /* Store reference to miscmap table. */ 764LJLIB_PUSH(top-8) LJLIB_SET(!) /* Store reference to miscmap table. */
739 765
740LJLIB_CF(ffi_metatype) 766LJLIB_CF(ffi_metatype)
@@ -768,19 +794,11 @@ LJLIB_CF(ffi_gc) LJLIB_REC(.)
768 GCcdata *cd = ffi_checkcdata(L, 1); 794 GCcdata *cd = ffi_checkcdata(L, 1);
769 TValue *fin = lj_lib_checkany(L, 2); 795 TValue *fin = lj_lib_checkany(L, 2);
770 CTState *cts = ctype_cts(L); 796 CTState *cts = ctype_cts(L);
771 GCtab *t = cts->finalizer;
772 CType *ct = ctype_raw(cts, cd->ctypeid); 797 CType *ct = ctype_raw(cts, cd->ctypeid);
773 if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) || 798 if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) ||
774 ctype_isrefarray(ct->info))) 799 ctype_isrefarray(ct->info)))
775 lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE); 800 lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
776 if (gcref(t->metatable)) { /* Update finalizer table, if still enabled. */ 801 lj_cdata_setfin(L, cd, gcval(fin), itype(fin));
777 copyTV(L, lj_tab_set(L, t, L->base), fin);
778 lj_gc_anybarriert(L, t);
779 if (!tvisnil(fin))
780 cd->marked |= LJ_GC_CDATA_FIN;
781 else
782 cd->marked &= ~LJ_GC_CDATA_FIN;
783 }
784 L->top = L->base+1; /* Pass through the cdata object. */ 802 L->top = L->base+1; /* Pass through the cdata object. */
785 return 1; 803 return 1;
786} 804}
diff --git a/src/lib_io.c b/src/lib_io.c
index 35c57d8b..b9d8cc75 100644
--- a/src/lib_io.c
+++ b/src/lib_io.c
@@ -19,8 +19,10 @@
19#include "lj_obj.h" 19#include "lj_obj.h"
20#include "lj_gc.h" 20#include "lj_gc.h"
21#include "lj_err.h" 21#include "lj_err.h"
22#include "lj_buf.h"
22#include "lj_str.h" 23#include "lj_str.h"
23#include "lj_state.h" 24#include "lj_state.h"
25#include "lj_strfmt.h"
24#include "lj_ff.h" 26#include "lj_ff.h"
25#include "lj_lib.h" 27#include "lj_lib.h"
26 28
@@ -84,7 +86,7 @@ static IOFileUD *io_file_open(lua_State *L, const char *mode)
84 IOFileUD *iof = io_file_new(L); 86 IOFileUD *iof = io_file_new(L);
85 iof->fp = fopen(fname, mode); 87 iof->fp = fopen(fname, mode);
86 if (iof->fp == NULL) 88 if (iof->fp == NULL)
87 luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno))); 89 luaL_argerror(L, 1, lj_strfmt_pushf(L, "%s: %s", fname, strerror(errno)));
88 return iof; 90 return iof;
89} 91}
90 92
@@ -97,11 +99,8 @@ static int io_file_close(lua_State *L, IOFileUD *iof)
97 int stat = -1; 99 int stat = -1;
98#if LJ_TARGET_POSIX 100#if LJ_TARGET_POSIX
99 stat = pclose(iof->fp); 101 stat = pclose(iof->fp);
100#elif LJ_TARGET_WINDOWS 102#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP
101 stat = _pclose(iof->fp); 103 stat = _pclose(iof->fp);
102#else
103 lua_assert(0);
104 return 0;
105#endif 104#endif
106#if LJ_52 105#if LJ_52
107 iof->fp = NULL; 106 iof->fp = NULL;
@@ -110,7 +109,8 @@ static int io_file_close(lua_State *L, IOFileUD *iof)
110 ok = (stat != -1); 109 ok = (stat != -1);
111#endif 110#endif
112 } else { 111 } else {
113 lua_assert((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF); 112 lj_assertL((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF,
113 "close of unknown FILE* type");
114 setnilV(L->top++); 114 setnilV(L->top++);
115 lua_pushliteral(L, "cannot close standard file"); 115 lua_pushliteral(L, "cannot close standard file");
116 return 2; 116 return 2;
@@ -145,7 +145,7 @@ static int io_file_readline(lua_State *L, FILE *fp, MSize chop)
145 MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0; 145 MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0;
146 char *buf; 146 char *buf;
147 for (;;) { 147 for (;;) {
148 buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 148 buf = lj_buf_tmp(L, m);
149 if (fgets(buf+n, m-n, fp) == NULL) break; 149 if (fgets(buf+n, m-n, fp) == NULL) break;
150 n += (MSize)strlen(buf+n); 150 n += (MSize)strlen(buf+n);
151 ok |= n; 151 ok |= n;
@@ -161,7 +161,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
161{ 161{
162 MSize m, n; 162 MSize m, n;
163 for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) { 163 for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) {
164 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 164 char *buf = lj_buf_tmp(L, m);
165 n += (MSize)fread(buf+n, 1, m-n, fp); 165 n += (MSize)fread(buf+n, 1, m-n, fp);
166 if (n != m) { 166 if (n != m) {
167 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); 167 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
@@ -174,7 +174,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
174static int io_file_readlen(lua_State *L, FILE *fp, MSize m) 174static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
175{ 175{
176 if (m) { 176 if (m) {
177 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 177 char *buf = lj_buf_tmp(L, m);
178 MSize n = (MSize)fread(buf, 1, m, fp); 178 MSize n = (MSize)fread(buf, 1, m, fp);
179 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); 179 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
180 lj_gc_check(L); 180 lj_gc_check(L);
@@ -202,13 +202,12 @@ static int io_file_read(lua_State *L, IOFileUD *iof, int start)
202 for (n = start; nargs-- && ok; n++) { 202 for (n = start; nargs-- && ok; n++) {
203 if (tvisstr(L->base+n)) { 203 if (tvisstr(L->base+n)) {
204 const char *p = strVdata(L->base+n); 204 const char *p = strVdata(L->base+n);
205 if (p[0] != '*') 205 if (p[0] == '*') p++;
206 lj_err_arg(L, n+1, LJ_ERR_INVOPT); 206 if (p[0] == 'n')
207 if (p[1] == 'n')
208 ok = io_file_readnum(L, fp); 207 ok = io_file_readnum(L, fp);
209 else if ((p[1] & ~0x20) == 'L') 208 else if ((p[0] & ~0x20) == 'L')
210 ok = io_file_readline(L, fp, (p[1] == 'l')); 209 ok = io_file_readline(L, fp, (p[0] == 'l'));
211 else if (p[1] == 'a') 210 else if (p[0] == 'a')
212 io_file_readall(L, fp); 211 io_file_readall(L, fp);
213 else 212 else
214 lj_err_arg(L, n+1, LJ_ERR_INVFMT); 213 lj_err_arg(L, n+1, LJ_ERR_INVFMT);
@@ -232,19 +231,11 @@ static int io_file_write(lua_State *L, IOFileUD *iof, int start)
232 cTValue *tv; 231 cTValue *tv;
233 int status = 1; 232 int status = 1;
234 for (tv = L->base+start; tv < L->top; tv++) { 233 for (tv = L->base+start; tv < L->top; tv++) {
235 if (tvisstr(tv)) { 234 MSize len;
236 MSize len = strV(tv)->len; 235 const char *p = lj_strfmt_wstrnum(L, tv, &len);
237 status = status && (fwrite(strVdata(tv), 1, len, fp) == len); 236 if (!p)
238 } else if (tvisint(tv)) {
239 char buf[LJ_STR_INTBUF];
240 char *p = lj_str_bufint(buf, intV(tv));
241 size_t len = (size_t)(buf+LJ_STR_INTBUF-p);
242 status = status && (fwrite(p, 1, len, fp) == len);
243 } else if (tvisnum(tv)) {
244 status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0);
245 } else {
246 lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING); 237 lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING);
247 } 238 status = status && (fwrite(p, 1, len, fp) == len);
248 } 239 }
249 if (LJ_52 && status) { 240 if (LJ_52 && status) {
250 L->top = L->base+1; 241 L->top = L->base+1;
@@ -319,6 +310,14 @@ LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0)
319 return luaL_fileresult(L, fflush(io_tofile(L)->fp) == 0, NULL); 310 return luaL_fileresult(L, fflush(io_tofile(L)->fp) == 0, NULL);
320} 311}
321 312
313#if LJ_32 && defined(__ANDROID__) && __ANDROID_API__ < 24
314/* The Android NDK is such an unmatched marvel of engineering. */
315extern int fseeko32(FILE *, long int, int) __asm__("fseeko");
316extern long int ftello32(FILE *) __asm__("ftello");
317#define fseeko(fp, pos, whence) (fseeko32((fp), (pos), (whence)))
318#define ftello(fp) (ftello32((fp)))
319#endif
320
322LJLIB_CF(io_method_seek) 321LJLIB_CF(io_method_seek)
323{ 322{
324 FILE *fp = io_tofile(L)->fp; 323 FILE *fp = io_tofile(L)->fp;
@@ -419,7 +418,7 @@ LJLIB_CF(io_open)
419 418
420LJLIB_CF(io_popen) 419LJLIB_CF(io_popen)
421{ 420{
422#if LJ_TARGET_POSIX || LJ_TARGET_WINDOWS 421#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP)
423 const char *fname = strdata(lj_lib_checkstr(L, 1)); 422 const char *fname = strdata(lj_lib_checkstr(L, 1));
424 GCstr *s = lj_lib_optstr(L, 2); 423 GCstr *s = lj_lib_optstr(L, 2);
425 const char *mode = s ? strdata(s) : "r"; 424 const char *mode = s ? strdata(s) : "r";
diff --git a/src/lib_jit.c b/src/lib_jit.c
index bc7a4621..817c2967 100644
--- a/src/lib_jit.c
+++ b/src/lib_jit.c
@@ -10,13 +10,17 @@
10#include "lauxlib.h" 10#include "lauxlib.h"
11#include "lualib.h" 11#include "lualib.h"
12 12
13#include "lj_arch.h"
14#include "lj_obj.h" 13#include "lj_obj.h"
14#include "lj_gc.h"
15#include "lj_err.h" 15#include "lj_err.h"
16#include "lj_debug.h" 16#include "lj_debug.h"
17#include "lj_str.h" 17#include "lj_str.h"
18#include "lj_tab.h" 18#include "lj_tab.h"
19#include "lj_state.h"
19#include "lj_bc.h" 20#include "lj_bc.h"
21#if LJ_HASFFI
22#include "lj_ctype.h"
23#endif
20#if LJ_HASJIT 24#if LJ_HASJIT
21#include "lj_ir.h" 25#include "lj_ir.h"
22#include "lj_jit.h" 26#include "lj_jit.h"
@@ -24,6 +28,7 @@
24#include "lj_iropt.h" 28#include "lj_iropt.h"
25#include "lj_target.h" 29#include "lj_target.h"
26#endif 30#endif
31#include "lj_trace.h"
27#include "lj_dispatch.h" 32#include "lj_dispatch.h"
28#include "lj_vm.h" 33#include "lj_vm.h"
29#include "lj_vmevent.h" 34#include "lj_vmevent.h"
@@ -99,8 +104,8 @@ LJLIB_CF(jit_status)
99 jit_State *J = L2J(L); 104 jit_State *J = L2J(L);
100 L->top = L->base; 105 L->top = L->base;
101 setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0); 106 setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0);
102 flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING); 107 flagbits_to_strings(L, J->flags, JIT_F_CPU, JIT_F_CPUSTRING);
103 flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING); 108 flagbits_to_strings(L, J->flags, JIT_F_OPT, JIT_F_OPTSTRING);
104 return (int)(L->top - L->base); 109 return (int)(L->top - L->base);
105#else 110#else
106 setboolV(L->top++, 0); 111 setboolV(L->top++, 0);
@@ -108,6 +113,13 @@ LJLIB_CF(jit_status)
108#endif 113#endif
109} 114}
110 115
116LJLIB_CF(jit_security)
117{
118 int idx = lj_lib_checkopt(L, 1, -1, LJ_SECURITY_MODESTRING);
119 setintV(L->top++, ((LJ_SECURITY_MODE >> (2*idx)) & 3));
120 return 1;
121}
122
111LJLIB_CF(jit_attach) 123LJLIB_CF(jit_attach)
112{ 124{
113#ifdef LUAJIT_DISABLE_VMEVENT 125#ifdef LUAJIT_DISABLE_VMEVENT
@@ -222,7 +234,7 @@ LJLIB_CF(jit_util_funcbc)
222 if (pc < pt->sizebc) { 234 if (pc < pt->sizebc) {
223 BCIns ins = proto_bc(pt)[pc]; 235 BCIns ins = proto_bc(pt)[pc];
224 BCOp op = bc_op(ins); 236 BCOp op = bc_op(ins);
225 lua_assert(op < BC__MAX); 237 lj_assertL(op < BC__MAX, "bad bytecode op %d", op);
226 setintV(L->top, ins); 238 setintV(L->top, ins);
227 setintV(L->top+1, lj_bc_mode[op]); 239 setintV(L->top+1, lj_bc_mode[op]);
228 L->top += 2; 240 L->top += 2;
@@ -280,7 +292,7 @@ static GCtrace *jit_checktrace(lua_State *L)
280/* Names of link types. ORDER LJ_TRLINK */ 292/* Names of link types. ORDER LJ_TRLINK */
281static const char *const jit_trlinkname[] = { 293static const char *const jit_trlinkname[] = {
282 "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion", 294 "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion",
283 "interpreter", "return" 295 "interpreter", "return", "stitch"
284}; 296};
285 297
286/* local info = jit.util.traceinfo(tr) */ 298/* local info = jit.util.traceinfo(tr) */
@@ -333,6 +345,9 @@ LJLIB_CF(jit_util_tracek)
333 slot = ir->op2; 345 slot = ir->op2;
334 ir = &T->ir[ir->op1]; 346 ir = &T->ir[ir->op1];
335 } 347 }
348#if LJ_HASFFI
349 if (ir->o == IR_KINT64) ctype_loadffi(L);
350#endif
336 lj_ir_kvalue(L, L->top-2, ir); 351 lj_ir_kvalue(L, L->top-2, ir);
337 setintV(L->top-1, (int32_t)irt_type(ir->t)); 352 setintV(L->top-1, (int32_t)irt_type(ir->t));
338 if (slot == -1) 353 if (slot == -1)
@@ -417,6 +432,12 @@ LJLIB_CF(jit_util_ircalladdr)
417 432
418#include "lj_libdef.h" 433#include "lj_libdef.h"
419 434
435static int luaopen_jit_util(lua_State *L)
436{
437 LJ_LIB_REG(L, NULL, jit_util);
438 return 1;
439}
440
420/* -- jit.opt module ------------------------------------------------------ */ 441/* -- jit.opt module ------------------------------------------------------ */
421 442
422#if LJ_HASJIT 443#if LJ_HASJIT
@@ -453,7 +474,7 @@ static int jitopt_flag(jit_State *J, const char *str)
453 str += str[2] == '-' ? 3 : 2; 474 str += str[2] == '-' ? 3 : 2;
454 set = 0; 475 set = 0;
455 } 476 }
456 for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) { 477 for (opt = JIT_F_OPT; ; opt <<= 1) {
457 size_t len = *(const uint8_t *)lst; 478 size_t len = *(const uint8_t *)lst;
458 if (len == 0) 479 if (len == 0)
459 break; 480 break;
@@ -473,7 +494,7 @@ static int jitopt_param(jit_State *J, const char *str)
473 int i; 494 int i;
474 for (i = 0; i < JIT_P__MAX; i++) { 495 for (i = 0; i < JIT_P__MAX; i++) {
475 size_t len = *(const uint8_t *)lst; 496 size_t len = *(const uint8_t *)lst;
476 lua_assert(len != 0); 497 lj_assertJ(len != 0, "bad JIT_P_STRING");
477 if (strncmp(str, lst+1, len) == 0 && str[len] == '=') { 498 if (strncmp(str, lst+1, len) == 0 && str[len] == '=') {
478 int32_t n = 0; 499 int32_t n = 0;
479 const char *p = &str[len+1]; 500 const char *p = &str[len+1];
@@ -514,6 +535,104 @@ LJLIB_CF(jit_opt_start)
514 535
515#endif 536#endif
516 537
538/* -- jit.profile module -------------------------------------------------- */
539
540#if LJ_HASPROFILE
541
542#define LJLIB_MODULE_jit_profile
543
544/* Not loaded by default, use: local profile = require("jit.profile") */
545
546#define KEY_PROFILE_THREAD (U64x(80000000,00000000)|'t')
547#define KEY_PROFILE_FUNC (U64x(80000000,00000000)|'f')
548
549static void jit_profile_callback(lua_State *L2, lua_State *L, int samples,
550 int vmstate)
551{
552 TValue key;
553 cTValue *tv;
554 key.u64 = KEY_PROFILE_FUNC;
555 tv = lj_tab_get(L, tabV(registry(L)), &key);
556 if (tvisfunc(tv)) {
557 char vmst = (char)vmstate;
558 int status;
559 setfuncV(L2, L2->top++, funcV(tv));
560 setthreadV(L2, L2->top++, L);
561 setintV(L2->top++, samples);
562 setstrV(L2, L2->top++, lj_str_new(L2, &vmst, 1));
563 status = lua_pcall(L2, 3, 0, 0); /* callback(thread, samples, vmstate) */
564 if (status) {
565 if (G(L2)->panic) G(L2)->panic(L2);
566 exit(EXIT_FAILURE);
567 }
568 lj_trace_abort(G(L2));
569 }
570}
571
572/* profile.start(mode, cb) */
573LJLIB_CF(jit_profile_start)
574{
575 GCtab *registry = tabV(registry(L));
576 GCstr *mode = lj_lib_optstr(L, 1);
577 GCfunc *func = lj_lib_checkfunc(L, 2);
578 lua_State *L2 = lua_newthread(L); /* Thread that runs profiler callback. */
579 TValue key;
580 /* Anchor thread and function in registry. */
581 key.u64 = KEY_PROFILE_THREAD;
582 setthreadV(L, lj_tab_set(L, registry, &key), L2);
583 key.u64 = KEY_PROFILE_FUNC;
584 setfuncV(L, lj_tab_set(L, registry, &key), func);
585 lj_gc_anybarriert(L, registry);
586 luaJIT_profile_start(L, mode ? strdata(mode) : "",
587 (luaJIT_profile_callback)jit_profile_callback, L2);
588 return 0;
589}
590
591/* profile.stop() */
592LJLIB_CF(jit_profile_stop)
593{
594 GCtab *registry;
595 TValue key;
596 luaJIT_profile_stop(L);
597 registry = tabV(registry(L));
598 key.u64 = KEY_PROFILE_THREAD;
599 setnilV(lj_tab_set(L, registry, &key));
600 key.u64 = KEY_PROFILE_FUNC;
601 setnilV(lj_tab_set(L, registry, &key));
602 lj_gc_anybarriert(L, registry);
603 return 0;
604}
605
606/* dump = profile.dumpstack([thread,] fmt, depth) */
607LJLIB_CF(jit_profile_dumpstack)
608{
609 lua_State *L2 = L;
610 int arg = 0;
611 size_t len;
612 int depth;
613 GCstr *fmt;
614 const char *p;
615 if (L->top > L->base && tvisthread(L->base)) {
616 L2 = threadV(L->base);
617 arg = 1;
618 }
619 fmt = lj_lib_checkstr(L, arg+1);
620 depth = lj_lib_checkint(L, arg+2);
621 p = luaJIT_profile_dumpstack(L2, strdata(fmt), depth, &len);
622 lua_pushlstring(L, p, len);
623 return 1;
624}
625
626#include "lj_libdef.h"
627
628static int luaopen_jit_profile(lua_State *L)
629{
630 LJ_LIB_REG(L, NULL, jit_profile);
631 return 1;
632}
633
634#endif
635
517/* -- JIT compiler initialization ----------------------------------------- */ 636/* -- JIT compiler initialization ----------------------------------------- */
518 637
519#if LJ_HASJIT 638#if LJ_HASJIT
@@ -524,66 +643,41 @@ JIT_PARAMDEF(JIT_PARAMINIT)
524#undef JIT_PARAMINIT 643#undef JIT_PARAMINIT
525 0 644 0
526}; 645};
527#endif
528 646
529#if LJ_TARGET_ARM && LJ_TARGET_LINUX 647#if LJ_TARGET_ARM && LJ_TARGET_LINUX
530#include <sys/utsname.h> 648#include <sys/utsname.h>
531#endif 649#endif
532 650
533/* Arch-dependent CPU detection. */ 651/* Arch-dependent CPU feature detection. */
534static uint32_t jit_cpudetect(lua_State *L) 652static uint32_t jit_cpudetect(void)
535{ 653{
536 uint32_t flags = 0; 654 uint32_t flags = 0;
537#if LJ_TARGET_X86ORX64 655#if LJ_TARGET_X86ORX64
656
538 uint32_t vendor[4]; 657 uint32_t vendor[4];
539 uint32_t features[4]; 658 uint32_t features[4];
540 if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { 659 if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
541#if !LJ_HASJIT
542#define JIT_F_CMOV 1
543#define JIT_F_SSE2 2
544#endif
545 flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
546 flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
547#if LJ_HASJIT
548 flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; 660 flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
549 flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; 661 flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
550 if (vendor[2] == 0x6c65746e) { /* Intel. */ 662 if (vendor[0] >= 7) {
551 if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */ 663 uint32_t xfeatures[4];
552 flags |= JIT_F_P4; /* Currently unused. */ 664 lj_vm_cpuid(7, xfeatures);
553 else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ 665 flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2;
554 flags |= JIT_F_LEA_AGU;
555 } else if (vendor[2] == 0x444d4163) { /* AMD. */
556 uint32_t fam = (features[0] & 0x0ff00f00);
557 if (fam == 0x00000f00) /* K8. */
558 flags |= JIT_F_SPLIT_XMM;
559 if (fam >= 0x00000f00) /* K8, K10. */
560 flags |= JIT_F_PREFER_IMUL;
561 } 666 }
562#endif
563 } 667 }
564 /* Check for required instruction set support on x86 (unnecessary on x64). */ 668 /* Don't bother checking for SSE2 -- the VM will crash before getting here. */
565#if LJ_TARGET_X86 669
566#if !defined(LUAJIT_CPU_NOCMOV)
567 if (!(flags & JIT_F_CMOV))
568 luaL_error(L, "CPU not supported");
569#endif
570#if defined(LUAJIT_CPU_SSE2)
571 if (!(flags & JIT_F_SSE2))
572 luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)");
573#endif
574#endif
575#elif LJ_TARGET_ARM 670#elif LJ_TARGET_ARM
576#if LJ_HASJIT 671
577 int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */ 672 int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */
578#if LJ_TARGET_LINUX 673#if LJ_TARGET_LINUX
579 if (ver < 70) { /* Runtime ARM CPU detection. */ 674 if (ver < 70) { /* Runtime ARM CPU detection. */
580 struct utsname ut; 675 struct utsname ut;
581 uname(&ut); 676 uname(&ut);
582 if (strncmp(ut.machine, "armv", 4) == 0) { 677 if (strncmp(ut.machine, "armv", 4) == 0) {
583 if (ut.machine[4] >= '7') 678 if (ut.machine[4] >= '8') ver = 80;
584 ver = 70; 679 else if (ut.machine[4] == '7') ver = 70;
585 else if (ut.machine[4] == '6') 680 else if (ut.machine[4] == '6') ver = 60;
586 ver = 60;
587 } 681 }
588 } 682 }
589#endif 683#endif
@@ -591,74 +685,77 @@ static uint32_t jit_cpudetect(lua_State *L)
591 ver >= 61 ? JIT_F_ARMV6T2_ : 685 ver >= 61 ? JIT_F_ARMV6T2_ :
592 ver >= 60 ? JIT_F_ARMV6_ : 0; 686 ver >= 60 ? JIT_F_ARMV6_ : 0;
593 flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; 687 flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
594#endif 688
689#elif LJ_TARGET_ARM64
690
691 /* No optional CPU features to detect (for now). */
692
595#elif LJ_TARGET_PPC 693#elif LJ_TARGET_PPC
596#if LJ_HASJIT 694
597#if LJ_ARCH_SQRT 695#if LJ_ARCH_SQRT
598 flags |= JIT_F_SQRT; 696 flags |= JIT_F_SQRT;
599#endif 697#endif
600#if LJ_ARCH_ROUND 698#if LJ_ARCH_ROUND
601 flags |= JIT_F_ROUND; 699 flags |= JIT_F_ROUND;
602#endif 700#endif
603#endif 701
604#elif LJ_TARGET_PPCSPE
605 /* Nothing to do. */
606#elif LJ_TARGET_MIPS 702#elif LJ_TARGET_MIPS
607#if LJ_HASJIT 703
608 /* Compile-time MIPS CPU detection. */ 704 /* Compile-time MIPS CPU detection. */
609#if LJ_ARCH_VERSION >= 20 705#if LJ_ARCH_VERSION >= 20
610 flags |= JIT_F_MIPS32R2; 706 flags |= JIT_F_MIPSXXR2;
611#endif 707#endif
612 /* Runtime MIPS CPU detection. */ 708 /* Runtime MIPS CPU detection. */
613#if defined(__GNUC__) 709#if defined(__GNUC__)
614 if (!(flags & JIT_F_MIPS32R2)) { 710 if (!(flags & JIT_F_MIPSXXR2)) {
615 int x; 711 int x;
712#ifdef __mips16
713 x = 0; /* Runtime detection is difficult. Ensure optimal -march flags. */
714#else
616 /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */ 715 /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */
617 __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2"); 716 __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2");
618 if (x) flags |= JIT_F_MIPS32R2; /* Either 0x80000000 (R2) or 0 (R1). */
619 }
620#endif 717#endif
718 if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */
719 }
621#endif 720#endif
721
622#else 722#else
623#error "Missing CPU detection for this architecture" 723#error "Missing CPU detection for this architecture"
624#endif 724#endif
625 UNUSED(L);
626 return flags; 725 return flags;
627} 726}
628 727
629/* Initialize JIT compiler. */ 728/* Initialize JIT compiler. */
630static void jit_init(lua_State *L) 729static void jit_init(lua_State *L)
631{ 730{
632 uint32_t flags = jit_cpudetect(L);
633#if LJ_HASJIT
634 jit_State *J = L2J(L); 731 jit_State *J = L2J(L);
635#if LJ_TARGET_X86 732 J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT;
636 /* Silently turn off the JIT compiler on CPUs without SSE2. */
637 if ((flags & JIT_F_SSE2))
638#endif
639 J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
640 memcpy(J->param, jit_param_default, sizeof(J->param)); 733 memcpy(J->param, jit_param_default, sizeof(J->param));
641 lj_dispatch_update(G(L)); 734 lj_dispatch_update(G(L));
642#else
643 UNUSED(flags);
644#endif
645} 735}
736#endif
646 737
647LUALIB_API int luaopen_jit(lua_State *L) 738LUALIB_API int luaopen_jit(lua_State *L)
648{ 739{
740#if LJ_HASJIT
741 jit_init(L);
742#endif
649 lua_pushliteral(L, LJ_OS_NAME); 743 lua_pushliteral(L, LJ_OS_NAME);
650 lua_pushliteral(L, LJ_ARCH_NAME); 744 lua_pushliteral(L, LJ_ARCH_NAME);
651 lua_pushinteger(L, LUAJIT_VERSION_NUM); 745 lua_pushinteger(L, LUAJIT_VERSION_NUM);
652 lua_pushliteral(L, LUAJIT_VERSION); 746 lua_pushliteral(L, LUAJIT_VERSION);
653 LJ_LIB_REG(L, LUA_JITLIBNAME, jit); 747 LJ_LIB_REG(L, LUA_JITLIBNAME, jit);
748#if LJ_HASPROFILE
749 lj_lib_prereg(L, LUA_JITLIBNAME ".profile", luaopen_jit_profile,
750 tabref(L->env));
751#endif
654#ifndef LUAJIT_DISABLE_JITUTIL 752#ifndef LUAJIT_DISABLE_JITUTIL
655 LJ_LIB_REG(L, "jit.util", jit_util); 753 lj_lib_prereg(L, LUA_JITLIBNAME ".util", luaopen_jit_util, tabref(L->env));
656#endif 754#endif
657#if LJ_HASJIT 755#if LJ_HASJIT
658 LJ_LIB_REG(L, "jit.opt", jit_opt); 756 LJ_LIB_REG(L, "jit.opt", jit_opt);
659#endif 757#endif
660 L->top -= 2; 758 L->top -= 2;
661 jit_init(L);
662 return 1; 759 return 1;
663} 760}
664 761
diff --git a/src/lib_math.c b/src/lib_math.c
index 4c2c7753..e9a0b597 100644
--- a/src/lib_math.c
+++ b/src/lib_math.c
@@ -15,6 +15,7 @@
15#include "lj_obj.h" 15#include "lj_obj.h"
16#include "lj_lib.h" 16#include "lj_lib.h"
17#include "lj_vm.h" 17#include "lj_vm.h"
18#include "lj_prng.h"
18 19
19/* ------------------------------------------------------------------------ */ 20/* ------------------------------------------------------------------------ */
20 21
@@ -33,25 +34,19 @@ LJLIB_ASM(math_sqrt) LJLIB_REC(math_unary IRFPM_SQRT)
33 lj_lib_checknum(L, 1); 34 lj_lib_checknum(L, 1);
34 return FFH_RETRY; 35 return FFH_RETRY;
35} 36}
36LJLIB_ASM_(math_log10) LJLIB_REC(math_unary IRFPM_LOG10) 37LJLIB_ASM_(math_log10) LJLIB_REC(math_call IRCALL_log10)
37LJLIB_ASM_(math_exp) LJLIB_REC(math_unary IRFPM_EXP) 38LJLIB_ASM_(math_exp) LJLIB_REC(math_call IRCALL_exp)
38LJLIB_ASM_(math_sin) LJLIB_REC(math_unary IRFPM_SIN) 39LJLIB_ASM_(math_sin) LJLIB_REC(math_call IRCALL_sin)
39LJLIB_ASM_(math_cos) LJLIB_REC(math_unary IRFPM_COS) 40LJLIB_ASM_(math_cos) LJLIB_REC(math_call IRCALL_cos)
40LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN) 41LJLIB_ASM_(math_tan) LJLIB_REC(math_call IRCALL_tan)
41LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin) 42LJLIB_ASM_(math_asin) LJLIB_REC(math_call IRCALL_asin)
42LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos) 43LJLIB_ASM_(math_acos) LJLIB_REC(math_call IRCALL_acos)
43LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan) 44LJLIB_ASM_(math_atan) LJLIB_REC(math_call IRCALL_atan)
44LJLIB_ASM_(math_sinh) LJLIB_REC(math_htrig IRCALL_sinh) 45LJLIB_ASM_(math_sinh) LJLIB_REC(math_call IRCALL_sinh)
45LJLIB_ASM_(math_cosh) LJLIB_REC(math_htrig IRCALL_cosh) 46LJLIB_ASM_(math_cosh) LJLIB_REC(math_call IRCALL_cosh)
46LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh) 47LJLIB_ASM_(math_tanh) LJLIB_REC(math_call IRCALL_tanh)
47LJLIB_ASM_(math_frexp) 48LJLIB_ASM_(math_frexp)
48LJLIB_ASM_(math_modf) LJLIB_REC(.) 49LJLIB_ASM_(math_modf)
49
50LJLIB_PUSH(57.29577951308232)
51LJLIB_ASM_(math_deg) LJLIB_REC(math_degrad)
52
53LJLIB_PUSH(0.017453292519943295)
54LJLIB_ASM_(math_rad) LJLIB_REC(math_degrad)
55 50
56LJLIB_ASM(math_log) LJLIB_REC(math_log) 51LJLIB_ASM(math_log) LJLIB_REC(math_log)
57{ 52{
@@ -63,12 +58,15 @@ LJLIB_ASM(math_log) LJLIB_REC(math_log)
63#else 58#else
64 x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y); 59 x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y);
65#endif 60#endif
66 setnumV(L->base-1, x*y); /* Do NOT join the expression to x / y. */ 61 setnumV(L->base-1-LJ_FR2, x*y); /* Do NOT join the expression to x / y. */
67 return FFH_RES(1); 62 return FFH_RES(1);
68 } 63 }
69 return FFH_RETRY; 64 return FFH_RETRY;
70} 65}
71 66
67LJLIB_LUA(math_deg) /* function(x) return x * 57.29577951308232 end */
68LJLIB_LUA(math_rad) /* function(x) return x * 0.017453292519943295 end */
69
72LJLIB_ASM(math_atan2) LJLIB_REC(.) 70LJLIB_ASM(math_atan2) LJLIB_REC(.)
73{ 71{
74 lj_lib_checknum(L, 1); 72 lj_lib_checknum(L, 1);
@@ -108,34 +106,11 @@ LJLIB_PUSH(1e310) LJLIB_SET(huge)
108** Full-period ME-CF generator with L=64, J=4, k=223, N1=49. 106** Full-period ME-CF generator with L=64, J=4, k=223, N1=49.
109*/ 107*/
110 108
111/* PRNG state. */
112struct RandomState {
113 uint64_t gen[4]; /* State of the 4 LFSR generators. */
114 int valid; /* State is valid. */
115};
116
117/* Union needed for bit-pattern conversion between uint64_t and double. */ 109/* Union needed for bit-pattern conversion between uint64_t and double. */
118typedef union { uint64_t u64; double d; } U64double; 110typedef union { uint64_t u64; double d; } U64double;
119 111
120/* Update generator i and compute a running xor of all states. */ 112/* PRNG seeding function. */
121#define TW223_GEN(i, k, q, s) \ 113static void random_seed(PRNGState *rs, double d)
122 z = rs->gen[i]; \
123 z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \
124 r ^= z; rs->gen[i] = z;
125
126/* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */
127LJ_NOINLINE uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs)
128{
129 uint64_t z, r = 0;
130 TW223_GEN(0, 63, 31, 18)
131 TW223_GEN(1, 58, 19, 28)
132 TW223_GEN(2, 55, 24, 7)
133 TW223_GEN(3, 47, 21, 8)
134 return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000);
135}
136
137/* PRNG initialization function. */
138static void random_init(RandomState *rs, double d)
139{ 114{
140 uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */ 115 uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */
141 int i; 116 int i;
@@ -144,24 +119,22 @@ static void random_init(RandomState *rs, double d)
144 uint32_t m = 1u << (r&255); 119 uint32_t m = 1u << (r&255);
145 r >>= 8; 120 r >>= 8;
146 u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354; 121 u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354;
147 if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */ 122 if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of u[i] are non-zero. */
148 rs->gen[i] = u.u64; 123 rs->u[i] = u.u64;
149 } 124 }
150 rs->valid = 1;
151 for (i = 0; i < 10; i++) 125 for (i = 0; i < 10; i++)
152 lj_math_random_step(rs); 126 (void)lj_prng_u64(rs);
153} 127}
154 128
155/* PRNG extract function. */ 129/* PRNG extract function. */
156LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */ 130LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */
157LJLIB_CF(math_random) LJLIB_REC(.) 131LJLIB_CF(math_random) LJLIB_REC(.)
158{ 132{
159 int n = (int)(L->top - L->base); 133 int n = (int)(L->top - L->base);
160 RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); 134 PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
161 U64double u; 135 U64double u;
162 double d; 136 double d;
163 if (LJ_UNLIKELY(!rs->valid)) random_init(rs, 0.0); 137 u.u64 = lj_prng_u64d(rs);
164 u.u64 = lj_math_random_step(rs);
165 d = u.d - 1.0; 138 d = u.d - 1.0;
166 if (n > 0) { 139 if (n > 0) {
167#if LJ_DUALNUM 140#if LJ_DUALNUM
@@ -206,11 +179,11 @@ LJLIB_CF(math_random) LJLIB_REC(.)
206} 179}
207 180
208/* PRNG seed function. */ 181/* PRNG seed function. */
209LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */ 182LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */
210LJLIB_CF(math_randomseed) 183LJLIB_CF(math_randomseed)
211{ 184{
212 RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); 185 PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
213 random_init(rs, lj_lib_checknum(L, 1)); 186 random_seed(rs, lj_lib_checknum(L, 1));
214 return 0; 187 return 0;
215} 188}
216 189
@@ -220,14 +193,9 @@ LJLIB_CF(math_randomseed)
220 193
221LUALIB_API int luaopen_math(lua_State *L) 194LUALIB_API int luaopen_math(lua_State *L)
222{ 195{
223 RandomState *rs; 196 PRNGState *rs = (PRNGState *)lua_newuserdata(L, sizeof(PRNGState));
224 rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState)); 197 lj_prng_seed_fixed(rs);
225 rs->valid = 0; /* Use lazy initialization to save some time on startup. */
226 LJ_LIB_REG(L, LUA_MATHLIBNAME, math); 198 LJ_LIB_REG(L, LUA_MATHLIBNAME, math);
227#if defined(LUA_COMPAT_MOD) && !LJ_52
228 lua_getfield(L, -1, "fmod");
229 lua_setfield(L, -2, "mod");
230#endif
231 return 1; 199 return 1;
232} 200}
233 201
diff --git a/src/lib_os.c b/src/lib_os.c
index a5888eba..f19b831c 100644
--- a/src/lib_os.c
+++ b/src/lib_os.c
@@ -17,7 +17,10 @@
17#include "lualib.h" 17#include "lualib.h"
18 18
19#include "lj_obj.h" 19#include "lj_obj.h"
20#include "lj_gc.h"
20#include "lj_err.h" 21#include "lj_err.h"
22#include "lj_buf.h"
23#include "lj_str.h"
21#include "lj_lib.h" 24#include "lj_lib.h"
22 25
23#if LJ_TARGET_POSIX 26#if LJ_TARGET_POSIX
@@ -188,7 +191,7 @@ LJLIB_CF(os_date)
188#endif 191#endif
189 } 192 }
190 if (stm == NULL) { /* Invalid date? */ 193 if (stm == NULL) { /* Invalid date? */
191 setnilV(L->top-1); 194 setnilV(L->top++);
192 } else if (strcmp(s, "*t") == 0) { 195 } else if (strcmp(s, "*t") == 0) {
193 lua_createtable(L, 0, 9); /* 9 = number of fields */ 196 lua_createtable(L, 0, 9); /* 9 = number of fields */
194 setfield(L, "sec", stm->tm_sec); 197 setfield(L, "sec", stm->tm_sec);
@@ -200,23 +203,25 @@ LJLIB_CF(os_date)
200 setfield(L, "wday", stm->tm_wday+1); 203 setfield(L, "wday", stm->tm_wday+1);
201 setfield(L, "yday", stm->tm_yday+1); 204 setfield(L, "yday", stm->tm_yday+1);
202 setboolfield(L, "isdst", stm->tm_isdst); 205 setboolfield(L, "isdst", stm->tm_isdst);
203 } else { 206 } else if (*s) {
204 char cc[3]; 207 SBuf *sb = &G(L)->tmpbuf;
205 luaL_Buffer b; 208 MSize sz = 0, retry = 4;
206 cc[0] = '%'; cc[2] = '\0'; 209 const char *q;
207 luaL_buffinit(L, &b); 210 for (q = s; *q; q++)
208 for (; *s; s++) { 211 sz += (*q == '%') ? 30 : 1; /* Overflow doesn't matter. */
209 if (*s != '%' || *(s + 1) == '\0') { /* No conversion specifier? */ 212 setsbufL(sb, L);
210 luaL_addchar(&b, *s); 213 while (retry--) { /* Limit growth for invalid format or empty result. */
211 } else { 214 char *buf = lj_buf_need(sb, sz);
212 size_t reslen; 215 size_t len = strftime(buf, sbufsz(sb), s, stm);
213 char buff[200]; /* Should be big enough for any conversion result. */ 216 if (len) {
214 cc[1] = *(++s); 217 setstrV(L, L->top++, lj_str_new(L, buf, len));
215 reslen = strftime(buff, sizeof(buff), cc, stm); 218 lj_gc_check(L);
216 luaL_addlstring(&b, buff, reslen); 219 break;
217 } 220 }
221 sz += (sz|1);
218 } 222 }
219 luaL_pushresult(&b); 223 } else {
224 setstrV(L, L->top++, &G(L)->strempty);
220 } 225 }
221 return 1; 226 return 1;
222} 227}
diff --git a/src/lib_package.c b/src/lib_package.c
index b0274370..2068a098 100644
--- a/src/lib_package.c
+++ b/src/lib_package.c
@@ -76,6 +76,20 @@ static const char *ll_bcsym(void *lib, const char *sym)
76BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); 76BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*);
77#endif 77#endif
78 78
79#if LJ_TARGET_UWP
80void *LJ_WIN_LOADLIBA(const char *path)
81{
82 DWORD err = GetLastError();
83 wchar_t wpath[256];
84 HANDLE lib = NULL;
85 if (MultiByteToWideChar(CP_ACP, 0, path, -1, wpath, 256) > 0) {
86 lib = LoadPackagedLibrary(wpath, 0);
87 }
88 SetLastError(err);
89 return lib;
90}
91#endif
92
79#undef setprogdir 93#undef setprogdir
80 94
81static void setprogdir(lua_State *L) 95static void setprogdir(lua_State *L)
@@ -96,9 +110,17 @@ static void setprogdir(lua_State *L)
96static void pusherror(lua_State *L) 110static void pusherror(lua_State *L)
97{ 111{
98 DWORD error = GetLastError(); 112 DWORD error = GetLastError();
113#if LJ_TARGET_XBOXONE
114 wchar_t wbuffer[128];
115 char buffer[128*2];
116 if (FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
117 NULL, error, 0, wbuffer, sizeof(wbuffer)/sizeof(wchar_t), NULL) &&
118 WideCharToMultiByte(CP_ACP, 0, wbuffer, 128, buffer, 128*2, NULL, NULL))
119#else
99 char buffer[128]; 120 char buffer[128];
100 if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM, 121 if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
101 NULL, error, 0, buffer, sizeof(buffer), NULL)) 122 NULL, error, 0, buffer, sizeof(buffer), NULL))
123#endif
102 lua_pushstring(L, buffer); 124 lua_pushstring(L, buffer);
103 else 125 else
104 lua_pushfstring(L, "system error %d\n", error); 126 lua_pushfstring(L, "system error %d\n", error);
@@ -111,7 +133,7 @@ static void ll_unloadlib(void *lib)
111 133
112static void *ll_load(lua_State *L, const char *path, int gl) 134static void *ll_load(lua_State *L, const char *path, int gl)
113{ 135{
114 HINSTANCE lib = LoadLibraryA(path); 136 HINSTANCE lib = LJ_WIN_LOADLIBA(path);
115 if (lib == NULL) pusherror(L); 137 if (lib == NULL) pusherror(L);
116 UNUSED(gl); 138 UNUSED(gl);
117 return lib; 139 return lib;
@@ -124,17 +146,25 @@ static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym)
124 return f; 146 return f;
125} 147}
126 148
149#if LJ_TARGET_UWP
150EXTERN_C IMAGE_DOS_HEADER __ImageBase;
151#endif
152
127static const char *ll_bcsym(void *lib, const char *sym) 153static const char *ll_bcsym(void *lib, const char *sym)
128{ 154{
129 if (lib) { 155 if (lib) {
130 return (const char *)GetProcAddress((HINSTANCE)lib, sym); 156 return (const char *)GetProcAddress((HINSTANCE)lib, sym);
131 } else { 157 } else {
158#if LJ_TARGET_UWP
159 return (const char *)GetProcAddress((HINSTANCE)&__ImageBase, sym);
160#else
132 HINSTANCE h = GetModuleHandleA(NULL); 161 HINSTANCE h = GetModuleHandleA(NULL);
133 const char *p = (const char *)GetProcAddress(h, sym); 162 const char *p = (const char *)GetProcAddress(h, sym);
134 if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, 163 if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
135 (const char *)ll_bcsym, &h)) 164 (const char *)ll_bcsym, &h))
136 p = (const char *)GetProcAddress(h, sym); 165 p = (const char *)GetProcAddress(h, sym);
137 return p; 166 return p;
167#endif
138 } 168 }
139} 169}
140 170
@@ -185,8 +215,7 @@ static void **ll_register(lua_State *L, const char *path)
185 lua_pop(L, 1); 215 lua_pop(L, 1);
186 plib = (void **)lua_newuserdata(L, sizeof(void *)); 216 plib = (void **)lua_newuserdata(L, sizeof(void *));
187 *plib = NULL; 217 *plib = NULL;
188 luaL_getmetatable(L, "_LOADLIB"); 218 luaL_setmetatable(L, "_LOADLIB");
189 lua_setmetatable(L, -2);
190 lua_pushfstring(L, "LOADLIB: %s", path); 219 lua_pushfstring(L, "LOADLIB: %s", path);
191 lua_pushvalue(L, -2); 220 lua_pushvalue(L, -2);
192 lua_settable(L, LUA_REGISTRYINDEX); 221 lua_settable(L, LUA_REGISTRYINDEX);
@@ -396,8 +425,7 @@ static int lj_cf_package_loader_preload(lua_State *L)
396 425
397/* ------------------------------------------------------------------------ */ 426/* ------------------------------------------------------------------------ */
398 427
399static const int sentinel_ = 0; 428#define KEY_SENTINEL (U64x(80000000,00000000)|'s')
400#define sentinel ((void *)&sentinel_)
401 429
402static int lj_cf_package_require(lua_State *L) 430static int lj_cf_package_require(lua_State *L)
403{ 431{
@@ -407,7 +435,7 @@ static int lj_cf_package_require(lua_State *L)
407 lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); 435 lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED");
408 lua_getfield(L, 2, name); 436 lua_getfield(L, 2, name);
409 if (lua_toboolean(L, -1)) { /* is it there? */ 437 if (lua_toboolean(L, -1)) { /* is it there? */
410 if (lua_touserdata(L, -1) == sentinel) /* check loops */ 438 if ((L->top-1)->u64 == KEY_SENTINEL) /* check loops */
411 luaL_error(L, "loop or previous error loading module " LUA_QS, name); 439 luaL_error(L, "loop or previous error loading module " LUA_QS, name);
412 return 1; /* package is already loaded */ 440 return 1; /* package is already loaded */
413 } 441 }
@@ -430,14 +458,14 @@ static int lj_cf_package_require(lua_State *L)
430 else 458 else
431 lua_pop(L, 1); 459 lua_pop(L, 1);
432 } 460 }
433 lua_pushlightuserdata(L, sentinel); 461 (L->top++)->u64 = KEY_SENTINEL;
434 lua_setfield(L, 2, name); /* _LOADED[name] = sentinel */ 462 lua_setfield(L, 2, name); /* _LOADED[name] = sentinel */
435 lua_pushstring(L, name); /* pass name as argument to module */ 463 lua_pushstring(L, name); /* pass name as argument to module */
436 lua_call(L, 1, 1); /* run loaded module */ 464 lua_call(L, 1, 1); /* run loaded module */
437 if (!lua_isnil(L, -1)) /* non-nil return? */ 465 if (!lua_isnil(L, -1)) /* non-nil return? */
438 lua_setfield(L, 2, name); /* _LOADED[name] = returned value */ 466 lua_setfield(L, 2, name); /* _LOADED[name] = returned value */
439 lua_getfield(L, 2, name); 467 lua_getfield(L, 2, name);
440 if (lua_touserdata(L, -1) == sentinel) { /* module did not set a value? */ 468 if ((L->top-1)->u64 == KEY_SENTINEL) { /* module did not set a value? */
441 lua_pushboolean(L, 1); /* use true as result */ 469 lua_pushboolean(L, 1); /* use true as result */
442 lua_pushvalue(L, -1); /* extra copy to be returned */ 470 lua_pushvalue(L, -1); /* extra copy to be returned */
443 lua_setfield(L, 2, name); /* _LOADED[name] = true */ 471 lua_setfield(L, 2, name); /* _LOADED[name] = true */
@@ -487,29 +515,19 @@ static void modinit(lua_State *L, const char *modname)
487static int lj_cf_package_module(lua_State *L) 515static int lj_cf_package_module(lua_State *L)
488{ 516{
489 const char *modname = luaL_checkstring(L, 1); 517 const char *modname = luaL_checkstring(L, 1);
490 int loaded = lua_gettop(L) + 1; /* index of _LOADED table */ 518 int lastarg = (int)(L->top - L->base);
491 lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); 519 luaL_pushmodule(L, modname, 1);
492 lua_getfield(L, loaded, modname); /* get _LOADED[modname] */
493 if (!lua_istable(L, -1)) { /* not found? */
494 lua_pop(L, 1); /* remove previous result */
495 /* try global variable (and create one if it does not exist) */
496 if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, 1) != NULL)
497 lj_err_callerv(L, LJ_ERR_BADMODN, modname);
498 lua_pushvalue(L, -1);
499 lua_setfield(L, loaded, modname); /* _LOADED[modname] = new table */
500 }
501 /* check whether table already has a _NAME field */
502 lua_getfield(L, -1, "_NAME"); 520 lua_getfield(L, -1, "_NAME");
503 if (!lua_isnil(L, -1)) { /* is table an initialized module? */ 521 if (!lua_isnil(L, -1)) { /* Module already initialized? */
504 lua_pop(L, 1); 522 lua_pop(L, 1);
505 } else { /* no; initialize it */ 523 } else {
506 lua_pop(L, 1); 524 lua_pop(L, 1);
507 modinit(L, modname); 525 modinit(L, modname);
508 } 526 }
509 lua_pushvalue(L, -1); 527 lua_pushvalue(L, -1);
510 setfenv(L); 528 setfenv(L);
511 dooptions(L, loaded - 1); 529 dooptions(L, lastarg);
512 return 0; 530 return LJ_52;
513} 531}
514 532
515static int lj_cf_package_seeall(lua_State *L) 533static int lj_cf_package_seeall(lua_State *L)
@@ -580,13 +598,16 @@ LUALIB_API int luaopen_package(lua_State *L)
580 lj_lib_pushcf(L, lj_cf_package_unloadlib, 1); 598 lj_lib_pushcf(L, lj_cf_package_unloadlib, 1);
581 lua_setfield(L, -2, "__gc"); 599 lua_setfield(L, -2, "__gc");
582 luaL_register(L, LUA_LOADLIBNAME, package_lib); 600 luaL_register(L, LUA_LOADLIBNAME, package_lib);
583 lua_pushvalue(L, -1); 601 lua_copy(L, -1, LUA_ENVIRONINDEX);
584 lua_replace(L, LUA_ENVIRONINDEX);
585 lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0); 602 lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0);
586 for (i = 0; package_loaders[i] != NULL; i++) { 603 for (i = 0; package_loaders[i] != NULL; i++) {
587 lj_lib_pushcf(L, package_loaders[i], 1); 604 lj_lib_pushcf(L, package_loaders[i], 1);
588 lua_rawseti(L, -2, i+1); 605 lua_rawseti(L, -2, i+1);
589 } 606 }
607#if LJ_52
608 lua_pushvalue(L, -1);
609 lua_setfield(L, -3, "searchers");
610#endif
590 lua_setfield(L, -2, "loaders"); 611 lua_setfield(L, -2, "loaders");
591 lua_getfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); 612 lua_getfield(L, LUA_REGISTRYINDEX, "LUA_NOENV");
592 noenv = lua_toboolean(L, -1); 613 noenv = lua_toboolean(L, -1);
diff --git a/src/lib_string.c b/src/lib_string.c
index e534326a..75d855d6 100644
--- a/src/lib_string.c
+++ b/src/lib_string.c
@@ -6,8 +6,6 @@
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h 6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/ 7*/
8 8
9#include <stdio.h>
10
11#define lib_string_c 9#define lib_string_c
12#define LUA_LIB 10#define LUA_LIB
13 11
@@ -18,6 +16,7 @@
18#include "lj_obj.h" 16#include "lj_obj.h"
19#include "lj_gc.h" 17#include "lj_gc.h"
20#include "lj_err.h" 18#include "lj_err.h"
19#include "lj_buf.h"
21#include "lj_str.h" 20#include "lj_str.h"
22#include "lj_tab.h" 21#include "lj_tab.h"
23#include "lj_meta.h" 22#include "lj_meta.h"
@@ -25,17 +24,19 @@
25#include "lj_ff.h" 24#include "lj_ff.h"
26#include "lj_bcdump.h" 25#include "lj_bcdump.h"
27#include "lj_char.h" 26#include "lj_char.h"
27#include "lj_strfmt.h"
28#include "lj_lib.h" 28#include "lj_lib.h"
29 29
30/* ------------------------------------------------------------------------ */ 30/* ------------------------------------------------------------------------ */
31 31
32#define LJLIB_MODULE_string 32#define LJLIB_MODULE_string
33 33
34LJLIB_ASM(string_len) LJLIB_REC(.) 34LJLIB_LUA(string_len) /*
35{ 35 function(s)
36 lj_lib_checkstr(L, 1); 36 CHECK_str(s)
37 return FFH_RETRY; 37 return #s
38} 38 end
39*/
39 40
40LJLIB_ASM(string_byte) LJLIB_REC(string_range 0) 41LJLIB_ASM(string_byte) LJLIB_REC(string_range 0)
41{ 42{
@@ -57,21 +58,21 @@ LJLIB_ASM(string_byte) LJLIB_REC(string_range 0)
57 lj_state_checkstack(L, (MSize)n); 58 lj_state_checkstack(L, (MSize)n);
58 p = (const unsigned char *)strdata(s) + start; 59 p = (const unsigned char *)strdata(s) + start;
59 for (i = 0; i < n; i++) 60 for (i = 0; i < n; i++)
60 setintV(L->base + i-1, p[i]); 61 setintV(L->base + i-1-LJ_FR2, p[i]);
61 return FFH_RES(n); 62 return FFH_RES(n);
62} 63}
63 64
64LJLIB_ASM(string_char) 65LJLIB_ASM(string_char) LJLIB_REC(.)
65{ 66{
66 int i, nargs = (int)(L->top - L->base); 67 int i, nargs = (int)(L->top - L->base);
67 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (MSize)nargs); 68 char *buf = lj_buf_tmp(L, (MSize)nargs);
68 for (i = 1; i <= nargs; i++) { 69 for (i = 1; i <= nargs; i++) {
69 int32_t k = lj_lib_checkint(L, i); 70 int32_t k = lj_lib_checkint(L, i);
70 if (!checku8(k)) 71 if (!checku8(k))
71 lj_err_arg(L, i, LJ_ERR_BADVAL); 72 lj_err_arg(L, i, LJ_ERR_BADVAL);
72 buf[i-1] = (char)k; 73 buf[i-1] = (char)k;
73 } 74 }
74 setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)nargs)); 75 setstrV(L, L->base-1-LJ_FR2, lj_str_new(L, buf, (size_t)nargs));
75 return FFH_RES(1); 76 return FFH_RES(1);
76} 77}
77 78
@@ -83,68 +84,38 @@ LJLIB_ASM(string_sub) LJLIB_REC(string_range 1)
83 return FFH_RETRY; 84 return FFH_RETRY;
84} 85}
85 86
86LJLIB_ASM(string_rep) 87LJLIB_CF(string_rep) LJLIB_REC(.)
87{ 88{
88 GCstr *s = lj_lib_checkstr(L, 1); 89 GCstr *s = lj_lib_checkstr(L, 1);
89 int32_t k = lj_lib_checkint(L, 2); 90 int32_t rep = lj_lib_checkint(L, 2);
90 GCstr *sep = lj_lib_optstr(L, 3); 91 GCstr *sep = lj_lib_optstr(L, 3);
91 int32_t len = (int32_t)s->len; 92 SBuf *sb = lj_buf_tmp_(L);
92 global_State *g = G(L); 93 if (sep && rep > 1) {
93 int64_t tlen; 94 GCstr *s2 = lj_buf_cat2str(L, sep, s);
94 const char *src; 95 lj_buf_reset(sb);
95 char *buf; 96 lj_buf_putstr(sb, s);
96 if (k <= 0) { 97 s = s2;
97 empty: 98 rep--;
98 setstrV(L, L->base-1, &g->strempty);
99 return FFH_RES(1);
100 }
101 if (sep) {
102 tlen = (int64_t)len + sep->len;
103 if (tlen > LJ_MAX_STR)
104 lj_err_caller(L, LJ_ERR_STROV);
105 tlen *= k;
106 if (tlen > LJ_MAX_STR)
107 lj_err_caller(L, LJ_ERR_STROV);
108 } else {
109 tlen = (int64_t)k * len;
110 if (tlen > LJ_MAX_STR)
111 lj_err_caller(L, LJ_ERR_STROV);
112 }
113 if (tlen == 0) goto empty;
114 buf = lj_str_needbuf(L, &g->tmpbuf, (MSize)tlen);
115 src = strdata(s);
116 if (sep) {
117 tlen -= sep->len; /* Ignore trailing separator. */
118 if (k > 1) { /* Paste one string and one separator. */
119 int32_t i;
120 i = 0; while (i < len) *buf++ = src[i++];
121 src = strdata(sep); len = sep->len;
122 i = 0; while (i < len) *buf++ = src[i++];
123 src = g->tmpbuf.buf; len += s->len; k--; /* Now copy that k-1 times. */
124 }
125 } 99 }
126 do { 100 sb = lj_buf_putstr_rep(sb, s, rep);
127 int32_t i = 0; 101 setstrV(L, L->top-1, lj_buf_str(L, sb));
128 do { *buf++ = src[i++]; } while (i < len); 102 lj_gc_check(L);
129 } while (--k > 0); 103 return 1;
130 setstrV(L, L->base-1, lj_str_new(L, g->tmpbuf.buf, (size_t)tlen));
131 return FFH_RES(1);
132} 104}
133 105
134LJLIB_ASM(string_reverse) 106LJLIB_ASM(string_reverse) LJLIB_REC(string_op IRCALL_lj_buf_putstr_reverse)
135{ 107{
136 GCstr *s = lj_lib_checkstr(L, 1); 108 lj_lib_checkstr(L, 1);
137 lj_str_needbuf(L, &G(L)->tmpbuf, s->len);
138 return FFH_RETRY; 109 return FFH_RETRY;
139} 110}
140LJLIB_ASM_(string_lower) 111LJLIB_ASM_(string_lower) LJLIB_REC(string_op IRCALL_lj_buf_putstr_lower)
141LJLIB_ASM_(string_upper) 112LJLIB_ASM_(string_upper) LJLIB_REC(string_op IRCALL_lj_buf_putstr_upper)
142 113
143/* ------------------------------------------------------------------------ */ 114/* ------------------------------------------------------------------------ */
144 115
145static int writer_buf(lua_State *L, const void *p, size_t size, void *b) 116static int writer_buf(lua_State *L, const void *p, size_t size, void *sb)
146{ 117{
147 luaL_addlstring((luaL_Buffer *)b, (const char *)p, size); 118 lj_buf_putmem((SBuf *)sb, p, (MSize)size);
148 UNUSED(L); 119 UNUSED(L);
149 return 0; 120 return 0;
150} 121}
@@ -153,19 +124,19 @@ LJLIB_CF(string_dump)
153{ 124{
154 GCfunc *fn = lj_lib_checkfunc(L, 1); 125 GCfunc *fn = lj_lib_checkfunc(L, 1);
155 int strip = L->base+1 < L->top && tvistruecond(L->base+1); 126 int strip = L->base+1 < L->top && tvistruecond(L->base+1);
156 luaL_Buffer b; 127 SBuf *sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */
157 L->top = L->base+1; 128 L->top = L->base+1;
158 luaL_buffinit(L, &b); 129 if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip))
159 if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, &b, strip))
160 lj_err_caller(L, LJ_ERR_STRDUMP); 130 lj_err_caller(L, LJ_ERR_STRDUMP);
161 luaL_pushresult(&b); 131 setstrV(L, L->top-1, lj_buf_str(L, sb));
132 lj_gc_check(L);
162 return 1; 133 return 1;
163} 134}
164 135
165/* ------------------------------------------------------------------------ */ 136/* ------------------------------------------------------------------------ */
166 137
167/* macro to `unsign' a character */ 138/* macro to `unsign' a character */
168#define uchar(c) ((unsigned char)(c)) 139#define uchar(c) ((unsigned char)(c))
169 140
170#define CAP_UNFINISHED (-1) 141#define CAP_UNFINISHED (-1)
171#define CAP_POSITION (-2) 142#define CAP_POSITION (-2)
@@ -183,7 +154,6 @@ typedef struct MatchState {
183} MatchState; 154} MatchState;
184 155
185#define L_ESC '%' 156#define L_ESC '%'
186#define SPECIALS "^$*+?.([%-"
187 157
188static int check_capture(MatchState *ms, int l) 158static int check_capture(MatchState *ms, int l)
189{ 159{
@@ -450,30 +420,6 @@ static const char *match(MatchState *ms, const char *s, const char *p)
450 return s; 420 return s;
451} 421}
452 422
453static const char *lmemfind(const char *s1, size_t l1,
454 const char *s2, size_t l2)
455{
456 if (l2 == 0) {
457 return s1; /* empty strings are everywhere */
458 } else if (l2 > l1) {
459 return NULL; /* avoids a negative `l1' */
460 } else {
461 const char *init; /* to search for a `*s2' inside `s1' */
462 l2--; /* 1st char will be checked by `memchr' */
463 l1 = l1-l2; /* `s2' cannot be found after that */
464 while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
465 init++; /* 1st char is already checked */
466 if (memcmp(init, s2+1, l2) == 0) {
467 return init-1;
468 } else { /* correct `l1' and `s1' to try again */
469 l1 -= (size_t)(init-s1);
470 s1 = init;
471 }
472 }
473 return NULL; /* not found */
474 }
475}
476
477static void push_onecapture(MatchState *ms, int i, const char *s, const char *e) 423static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
478{ 424{
479 if (i >= ms->level) { 425 if (i >= ms->level) {
@@ -501,64 +447,60 @@ static int push_captures(MatchState *ms, const char *s, const char *e)
501 return nlevels; /* number of strings pushed */ 447 return nlevels; /* number of strings pushed */
502} 448}
503 449
504static ptrdiff_t posrelat(ptrdiff_t pos, size_t len)
505{
506 /* relative string position: negative means back from end */
507 if (pos < 0) pos += (ptrdiff_t)len + 1;
508 return (pos >= 0) ? pos : 0;
509}
510
511static int str_find_aux(lua_State *L, int find) 450static int str_find_aux(lua_State *L, int find)
512{ 451{
513 size_t l1, l2; 452 GCstr *s = lj_lib_checkstr(L, 1);
514 const char *s = luaL_checklstring(L, 1, &l1); 453 GCstr *p = lj_lib_checkstr(L, 2);
515 const char *p = luaL_checklstring(L, 2, &l2); 454 int32_t start = lj_lib_optint(L, 3, 1);
516 ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1; 455 MSize st;
517 if (init < 0) { 456 if (start < 0) start += (int32_t)s->len; else start--;
518 init = 0; 457 if (start < 0) start = 0;
519 } else if ((size_t)(init) > l1) { 458 st = (MSize)start;
459 if (st > s->len) {
520#if LJ_52 460#if LJ_52
521 setnilV(L->top-1); 461 setnilV(L->top-1);
522 return 1; 462 return 1;
523#else 463#else
524 init = (ptrdiff_t)l1; 464 st = s->len;
525#endif 465#endif
526 } 466 }
527 if (find && (lua_toboolean(L, 4) || /* explicit request? */ 467 if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) ||
528 strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */ 468 !lj_str_haspattern(p))) { /* Search for fixed string. */
529 /* do a plain search */ 469 const char *q = lj_str_find(strdata(s)+st, strdata(p), s->len-st, p->len);
530 const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2); 470 if (q) {
531 if (s2) { 471 setintV(L->top-2, (int32_t)(q-strdata(s)) + 1);
532 lua_pushinteger(L, s2-s+1); 472 setintV(L->top-1, (int32_t)(q-strdata(s)) + (int32_t)p->len);
533 lua_pushinteger(L, s2-s+(ptrdiff_t)l2);
534 return 2; 473 return 2;
535 } 474 }
536 } else { 475 } else { /* Search for pattern. */
537 MatchState ms; 476 MatchState ms;
538 int anchor = (*p == '^') ? (p++, 1) : 0; 477 const char *pstr = strdata(p);
539 const char *s1=s+init; 478 const char *sstr = strdata(s) + st;
479 int anchor = 0;
480 if (*pstr == '^') { pstr++; anchor = 1; }
540 ms.L = L; 481 ms.L = L;
541 ms.src_init = s; 482 ms.src_init = strdata(s);
542 ms.src_end = s+l1; 483 ms.src_end = strdata(s) + s->len;
543 do { 484 do { /* Loop through string and try to match the pattern. */
544 const char *res; 485 const char *q;
545 ms.level = ms.depth = 0; 486 ms.level = ms.depth = 0;
546 if ((res=match(&ms, s1, p)) != NULL) { 487 q = match(&ms, sstr, pstr);
488 if (q) {
547 if (find) { 489 if (find) {
548 lua_pushinteger(L, s1-s+1); /* start */ 490 setintV(L->top++, (int32_t)(sstr-(strdata(s)-1)));
549 lua_pushinteger(L, res-s); /* end */ 491 setintV(L->top++, (int32_t)(q-strdata(s)));
550 return push_captures(&ms, NULL, 0) + 2; 492 return push_captures(&ms, NULL, NULL) + 2;
551 } else { 493 } else {
552 return push_captures(&ms, s1, res); 494 return push_captures(&ms, sstr, q);
553 } 495 }
554 } 496 }
555 } while (s1++ < ms.src_end && !anchor); 497 } while (sstr++ < ms.src_end && !anchor);
556 } 498 }
557 lua_pushnil(L); /* not found */ 499 setnilV(L->top-1); /* Not found. */
558 return 1; 500 return 1;
559} 501}
560 502
561LJLIB_CF(string_find) 503LJLIB_CF(string_find) LJLIB_REC(.)
562{ 504{
563 return str_find_aux(L, 1); 505 return str_find_aux(L, 1);
564} 506}
@@ -698,222 +640,16 @@ LJLIB_CF(string_gsub)
698 640
699/* ------------------------------------------------------------------------ */ 641/* ------------------------------------------------------------------------ */
700 642
701/* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ 643LJLIB_CF(string_format) LJLIB_REC(.)
702#define MAX_FMTITEM 512
703/* valid flags in a format specification */
704#define FMT_FLAGS "-+ #0"
705/*
706** maximum size of each format specification (such as '%-099.99d')
707** (+10 accounts for %99.99x plus margin of error)
708*/
709#define MAX_FMTSPEC (sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
710
711static void addquoted(lua_State *L, luaL_Buffer *b, int arg)
712{
713 GCstr *str = lj_lib_checkstr(L, arg);
714 int32_t len = (int32_t)str->len;
715 const char *s = strdata(str);
716 luaL_addchar(b, '"');
717 while (len--) {
718 uint32_t c = uchar(*s);
719 if (c == '"' || c == '\\' || c == '\n') {
720 luaL_addchar(b, '\\');
721 } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
722 uint32_t d;
723 luaL_addchar(b, '\\');
724 if (c >= 100 || lj_char_isdigit(uchar(s[1]))) {
725 luaL_addchar(b, '0'+(c >= 100)); if (c >= 100) c -= 100;
726 goto tens;
727 } else if (c >= 10) {
728 tens:
729 d = (c * 205) >> 11; c -= d * 10; luaL_addchar(b, '0'+d);
730 }
731 c += '0';
732 }
733 luaL_addchar(b, c);
734 s++;
735 }
736 luaL_addchar(b, '"');
737}
738
739static const char *scanformat(lua_State *L, const char *strfrmt, char *form)
740{
741 const char *p = strfrmt;
742 while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++; /* skip flags */
743 if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS))
744 lj_err_caller(L, LJ_ERR_STRFMTR);
745 if (lj_char_isdigit(uchar(*p))) p++; /* skip width */
746 if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */
747 if (*p == '.') {
748 p++;
749 if (lj_char_isdigit(uchar(*p))) p++; /* skip precision */
750 if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */
751 }
752 if (lj_char_isdigit(uchar(*p)))
753 lj_err_caller(L, LJ_ERR_STRFMTW);
754 *(form++) = '%';
755 strncpy(form, strfrmt, (size_t)(p - strfrmt + 1));
756 form += p - strfrmt + 1;
757 *form = '\0';
758 return p;
759}
760
761static void addintlen(char *form)
762{
763 size_t l = strlen(form);
764 char spec = form[l - 1];
765 strcpy(form + l - 1, LUA_INTFRMLEN);
766 form[l + sizeof(LUA_INTFRMLEN) - 2] = spec;
767 form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0';
768}
769
770static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg)
771{
772 if (sizeof(LUA_INTFRM_T) == 4) {
773 return (LUA_INTFRM_T)lj_lib_checkbit(L, arg);
774 } else {
775 cTValue *o;
776 lj_lib_checknumber(L, arg);
777 o = L->base+arg-1;
778 if (tvisint(o))
779 return (LUA_INTFRM_T)intV(o);
780 else
781 return (LUA_INTFRM_T)numV(o);
782 }
783}
784
785static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg)
786{ 644{
787 if (sizeof(LUA_INTFRM_T) == 4) { 645 int retry = 0;
788 return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg); 646 SBuf *sb;
789 } else { 647 do {
790 cTValue *o; 648 sb = lj_buf_tmp_(L);
791 lj_lib_checknumber(L, arg); 649 retry = lj_strfmt_putarg(L, sb, 1, -retry);
792 o = L->base+arg-1; 650 } while (retry > 0);
793 if (tvisint(o)) 651 setstrV(L, L->top-1, lj_buf_str(L, sb));
794 return (unsigned LUA_INTFRM_T)intV(o); 652 lj_gc_check(L);
795 else if ((int32_t)o->u32.hi < 0)
796 return (unsigned LUA_INTFRM_T)(LUA_INTFRM_T)numV(o);
797 else
798 return (unsigned LUA_INTFRM_T)numV(o);
799 }
800}
801
802static GCstr *meta_tostring(lua_State *L, int arg)
803{
804 TValue *o = L->base+arg-1;
805 cTValue *mo;
806 lua_assert(o < L->top); /* Caller already checks for existence. */
807 if (LJ_LIKELY(tvisstr(o)))
808 return strV(o);
809 if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
810 copyTV(L, L->top++, mo);
811 copyTV(L, L->top++, o);
812 lua_call(L, 1, 1);
813 L->top--;
814 if (tvisstr(L->top))
815 return strV(L->top);
816 o = L->base+arg-1;
817 copyTV(L, o, L->top);
818 }
819 if (tvisnumber(o)) {
820 return lj_str_fromnumber(L, o);
821 } else if (tvisnil(o)) {
822 return lj_str_newlit(L, "nil");
823 } else if (tvisfalse(o)) {
824 return lj_str_newlit(L, "false");
825 } else if (tvistrue(o)) {
826 return lj_str_newlit(L, "true");
827 } else {
828 if (tvisfunc(o) && isffunc(funcV(o)))
829 lj_str_pushf(L, "function: builtin#%d", funcV(o)->c.ffid);
830 else
831 lj_str_pushf(L, "%s: %p", lj_typename(o), lua_topointer(L, arg));
832 L->top--;
833 return strV(L->top);
834 }
835}
836
837LJLIB_CF(string_format)
838{
839 int arg = 1, top = (int)(L->top - L->base);
840 GCstr *fmt = lj_lib_checkstr(L, arg);
841 const char *strfrmt = strdata(fmt);
842 const char *strfrmt_end = strfrmt + fmt->len;
843 luaL_Buffer b;
844 luaL_buffinit(L, &b);
845 while (strfrmt < strfrmt_end) {
846 if (*strfrmt != L_ESC) {
847 luaL_addchar(&b, *strfrmt++);
848 } else if (*++strfrmt == L_ESC) {
849 luaL_addchar(&b, *strfrmt++); /* %% */
850 } else { /* format item */
851 char form[MAX_FMTSPEC]; /* to store the format (`%...') */
852 char buff[MAX_FMTITEM]; /* to store the formatted item */
853 int n = 0;
854 if (++arg > top)
855 luaL_argerror(L, arg, lj_obj_typename[0]);
856 strfrmt = scanformat(L, strfrmt, form);
857 switch (*strfrmt++) {
858 case 'c':
859 n = sprintf(buff, form, lj_lib_checkint(L, arg));
860 break;
861 case 'd': case 'i':
862 addintlen(form);
863 n = sprintf(buff, form, num2intfrm(L, arg));
864 break;
865 case 'o': case 'u': case 'x': case 'X':
866 addintlen(form);
867 n = sprintf(buff, form, num2uintfrm(L, arg));
868 break;
869 case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': {
870 TValue tv;
871 tv.n = lj_lib_checknum(L, arg);
872 if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) {
873 /* Canonicalize output of non-finite values. */
874 char *p, nbuf[LJ_STR_NUMBUF];
875 size_t len = lj_str_bufnum(nbuf, &tv);
876 if (strfrmt[-1] < 'a') {
877 nbuf[len-3] = nbuf[len-3] - 0x20;
878 nbuf[len-2] = nbuf[len-2] - 0x20;
879 nbuf[len-1] = nbuf[len-1] - 0x20;
880 }
881 nbuf[len] = '\0';
882 for (p = form; *p < 'A' && *p != '.'; p++) ;
883 *p++ = 's'; *p = '\0';
884 n = sprintf(buff, form, nbuf);
885 break;
886 }
887 n = sprintf(buff, form, (double)tv.n);
888 break;
889 }
890 case 'q':
891 addquoted(L, &b, arg);
892 continue;
893 case 'p':
894 lj_str_pushf(L, "%p", lua_topointer(L, arg));
895 luaL_addvalue(&b);
896 continue;
897 case 's': {
898 GCstr *str = meta_tostring(L, arg);
899 if (!strchr(form, '.') && str->len >= 100) {
900 /* no precision and string is too long to be formatted;
901 keep original string */
902 setstrV(L, L->top++, str);
903 luaL_addvalue(&b);
904 continue;
905 }
906 n = sprintf(buff, form, strdata(str));
907 break;
908 }
909 default:
910 lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1));
911 break;
912 }
913 luaL_addlstring(&b, buff, n);
914 }
915 }
916 luaL_pushresult(&b);
917 return 1; 653 return 1;
918} 654}
919 655
@@ -926,16 +662,15 @@ LUALIB_API int luaopen_string(lua_State *L)
926 GCtab *mt; 662 GCtab *mt;
927 global_State *g; 663 global_State *g;
928 LJ_LIB_REG(L, LUA_STRLIBNAME, string); 664 LJ_LIB_REG(L, LUA_STRLIBNAME, string);
929#if defined(LUA_COMPAT_GFIND) && !LJ_52
930 lua_getfield(L, -1, "gmatch");
931 lua_setfield(L, -2, "gfind");
932#endif
933 mt = lj_tab_new(L, 0, 1); 665 mt = lj_tab_new(L, 0, 1);
934 /* NOBARRIER: basemt is a GC root. */ 666 /* NOBARRIER: basemt is a GC root. */
935 g = G(L); 667 g = G(L);
936 setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt)); 668 setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt));
937 settabV(L, lj_tab_setstr(L, mt, mmname_str(g, MM_index)), tabV(L->top-1)); 669 settabV(L, lj_tab_setstr(L, mt, mmname_str(g, MM_index)), tabV(L->top-1));
938 mt->nomm = (uint8_t)(~(1u<<MM_index)); 670 mt->nomm = (uint8_t)(~(1u<<MM_index));
671#if LJ_HASBUFFER
672 lj_lib_prereg(L, LUA_STRLIBNAME ".buffer", luaopen_string_buffer, tabV(L->top-1));
673#endif
939 return 1; 674 return 1;
940} 675}
941 676
diff --git a/src/lib_table.c b/src/lib_table.c
index 5471bc01..0214bb40 100644
--- a/src/lib_table.c
+++ b/src/lib_table.c
@@ -16,57 +16,43 @@
16#include "lj_obj.h" 16#include "lj_obj.h"
17#include "lj_gc.h" 17#include "lj_gc.h"
18#include "lj_err.h" 18#include "lj_err.h"
19#include "lj_buf.h"
19#include "lj_tab.h" 20#include "lj_tab.h"
21#include "lj_ff.h"
20#include "lj_lib.h" 22#include "lj_lib.h"
21 23
22/* ------------------------------------------------------------------------ */ 24/* ------------------------------------------------------------------------ */
23 25
24#define LJLIB_MODULE_table 26#define LJLIB_MODULE_table
25 27
26LJLIB_CF(table_foreachi) 28LJLIB_LUA(table_foreachi) /*
27{ 29 function(t, f)
28 GCtab *t = lj_lib_checktab(L, 1); 30 CHECK_tab(t)
29 GCfunc *func = lj_lib_checkfunc(L, 2); 31 CHECK_func(f)
30 MSize i, n = lj_tab_len(t); 32 for i=1,#t do
31 for (i = 1; i <= n; i++) { 33 local r = f(i, t[i])
32 cTValue *val; 34 if r ~= nil then return r end
33 setfuncV(L, L->top, func); 35 end
34 setintV(L->top+1, i); 36 end
35 val = lj_tab_getint(t, (int32_t)i); 37*/
36 if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); }
37 L->top += 3;
38 lua_call(L, 2, 1);
39 if (!tvisnil(L->top-1))
40 return 1;
41 L->top--;
42 }
43 return 0;
44}
45 38
46LJLIB_CF(table_foreach) 39LJLIB_LUA(table_foreach) /*
47{ 40 function(t, f)
48 GCtab *t = lj_lib_checktab(L, 1); 41 CHECK_tab(t)
49 GCfunc *func = lj_lib_checkfunc(L, 2); 42 CHECK_func(f)
50 L->top = L->base+3; 43 for k, v in PAIRS(t) do
51 setnilV(L->top-1); 44 local r = f(k, v)
52 while (lj_tab_next(L, t, L->top-1)) { 45 if r ~= nil then return r end
53 copyTV(L, L->top+2, L->top); 46 end
54 copyTV(L, L->top+1, L->top-1); 47 end
55 setfuncV(L, L->top, func); 48*/
56 L->top += 3;
57 lua_call(L, 2, 1);
58 if (!tvisnil(L->top-1))
59 return 1;
60 L->top--;
61 }
62 return 0;
63}
64 49
65LJLIB_ASM(table_getn) LJLIB_REC(.) 50LJLIB_LUA(table_getn) /*
66{ 51 function(t)
67 lj_lib_checktab(L, 1); 52 CHECK_tab(t)
68 return FFH_UNREACHABLE; 53 return #t
69} 54 end
55*/
70 56
71LJLIB_CF(table_maxn) 57LJLIB_CF(table_maxn)
72{ 58{
@@ -119,52 +105,67 @@ LJLIB_CF(table_insert) LJLIB_REC(.)
119 return 0; 105 return 0;
120} 106}
121 107
122LJLIB_CF(table_remove) LJLIB_REC(.) 108LJLIB_LUA(table_remove) /*
123{ 109 function(t, pos)
124 GCtab *t = lj_lib_checktab(L, 1); 110 CHECK_tab(t)
125 int32_t e = (int32_t)lj_tab_len(t); 111 local len = #t
126 int32_t pos = lj_lib_optint(L, 2, e); 112 if pos == nil then
127 if (!(1 <= pos && pos <= e)) /* Nothing to remove? */ 113 if len ~= 0 then
128 return 0; 114 local old = t[len]
129 lua_rawgeti(L, 1, pos); /* Get previous value. */ 115 t[len] = nil
130 /* NOBARRIER: This just moves existing elements around. */ 116 return old
131 for (; pos < e; pos++) { 117 end
132 cTValue *src = lj_tab_getint(t, pos+1); 118 else
133 TValue *dst = lj_tab_setint(L, t, pos); 119 CHECK_int(pos)
134 if (src) { 120 if pos >= 1 and pos <= len then
135 copyTV(L, dst, src); 121 local old = t[pos]
136 } else { 122 for i=pos+1,len do
137 setnilV(dst); 123 t[i-1] = t[i]
138 } 124 end
139 } 125 t[len] = nil
140 setnilV(lj_tab_setint(L, t, e)); /* Remove (last) value. */ 126 return old
141 return 1; /* Return previous value. */ 127 end
142} 128 end
129 end
130*/
131
132LJLIB_LUA(table_move) /*
133 function(a1, f, e, t, a2)
134 CHECK_tab(a1)
135 CHECK_int(f)
136 CHECK_int(e)
137 CHECK_int(t)
138 if a2 == nil then a2 = a1 end
139 CHECK_tab(a2)
140 if e >= f then
141 local d = t - f
142 if t > e or t <= f or a2 ~= a1 then
143 for i=f,e do a2[i+d] = a1[i] end
144 else
145 for i=e,f,-1 do a2[i+d] = a1[i] end
146 end
147 end
148 return a2
149 end
150*/
143 151
144LJLIB_CF(table_concat) 152LJLIB_CF(table_concat) LJLIB_REC(.)
145{ 153{
146 luaL_Buffer b;
147 GCtab *t = lj_lib_checktab(L, 1); 154 GCtab *t = lj_lib_checktab(L, 1);
148 GCstr *sep = lj_lib_optstr(L, 2); 155 GCstr *sep = lj_lib_optstr(L, 2);
149 MSize seplen = sep ? sep->len : 0;
150 int32_t i = lj_lib_optint(L, 3, 1); 156 int32_t i = lj_lib_optint(L, 3, 1);
151 int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ? 157 int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ?
152 lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t); 158 lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t);
153 luaL_buffinit(L, &b); 159 SBuf *sb = lj_buf_tmp_(L);
154 if (i <= e) { 160 SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e);
155 for (;;) { 161 if (LJ_UNLIKELY(!sbx)) { /* Error: bad element type. */
156 cTValue *o; 162 int32_t idx = (int32_t)(intptr_t)sb->w;
157 lua_rawgeti(L, 1, i); 163 cTValue *o = lj_tab_getint(t, idx);
158 o = L->top-1; 164 lj_err_callerv(L, LJ_ERR_TABCAT,
159 if (!(tvisstr(o) || tvisnumber(o))) 165 lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx);
160 lj_err_callerv(L, LJ_ERR_TABCAT, lj_typename(o), i);
161 luaL_addvalue(&b);
162 if (i++ == e) break;
163 if (seplen)
164 luaL_addlstring(&b, strdata(sep), seplen);
165 }
166 } 166 }
167 luaL_pushresult(&b); 167 setstrV(L, L->top-1, lj_buf_str(L, sbx));
168 lj_gc_check(L);
168 return 1; 169 return 1;
169} 170}
170 171
@@ -284,6 +285,30 @@ LJLIB_CF(table_pack)
284} 285}
285#endif 286#endif
286 287
288LJLIB_NOREG LJLIB_CF(table_new) LJLIB_REC(.)
289{
290 int32_t a = lj_lib_checkint(L, 1);
291 int32_t h = lj_lib_checkint(L, 2);
292 lua_createtable(L, a, h);
293 return 1;
294}
295
296LJLIB_NOREG LJLIB_CF(table_clear) LJLIB_REC(.)
297{
298 lj_tab_clear(lj_lib_checktab(L, 1));
299 return 0;
300}
301
302static int luaopen_table_new(lua_State *L)
303{
304 return lj_lib_postreg(L, lj_cf_table_new, FF_table_new, "new");
305}
306
307static int luaopen_table_clear(lua_State *L)
308{
309 return lj_lib_postreg(L, lj_cf_table_clear, FF_table_clear, "clear");
310}
311
287/* ------------------------------------------------------------------------ */ 312/* ------------------------------------------------------------------------ */
288 313
289#include "lj_libdef.h" 314#include "lj_libdef.h"
@@ -295,6 +320,8 @@ LUALIB_API int luaopen_table(lua_State *L)
295 lua_getglobal(L, "unpack"); 320 lua_getglobal(L, "unpack");
296 lua_setfield(L, -2, "unpack"); 321 lua_setfield(L, -2, "unpack");
297#endif 322#endif
323 lj_lib_prereg(L, LUA_TABLIBNAME ".new", luaopen_table_new, tabV(L->top-1));
324 lj_lib_prereg(L, LUA_TABLIBNAME ".clear", luaopen_table_clear, tabV(L->top-1));
298 return 1; 325 return 1;
299} 326}
300 327
diff --git a/src/lj.supp b/src/lj.supp
deleted file mode 100644
index 217f7c89..00000000
--- a/src/lj.supp
+++ /dev/null
@@ -1,41 +0,0 @@
1# Valgrind suppression file for LuaJIT 2.0.
2{
3 Optimized string compare
4 Memcheck:Addr4
5 fun:lj_str_cmp
6}
7{
8 Optimized string compare
9 Memcheck:Addr1
10 fun:lj_str_cmp
11}
12{
13 Optimized string compare
14 Memcheck:Addr4
15 fun:lj_str_new
16}
17{
18 Optimized string compare
19 Memcheck:Addr1
20 fun:lj_str_new
21}
22{
23 Optimized string compare
24 Memcheck:Cond
25 fun:lj_str_new
26}
27{
28 Optimized string compare
29 Memcheck:Addr4
30 fun:str_fastcmp
31}
32{
33 Optimized string compare
34 Memcheck:Addr1
35 fun:str_fastcmp
36}
37{
38 Optimized string compare
39 Memcheck:Cond
40 fun:str_fastcmp
41}
diff --git a/src/lj_alloc.c b/src/lj_alloc.c
index 9adaa0e5..165203fa 100644
--- a/src/lj_alloc.c
+++ b/src/lj_alloc.c
@@ -31,6 +31,7 @@
31#include "lj_def.h" 31#include "lj_def.h"
32#include "lj_arch.h" 32#include "lj_arch.h"
33#include "lj_alloc.h" 33#include "lj_alloc.h"
34#include "lj_prng.h"
34 35
35#ifndef LUAJIT_USE_SYSMALLOC 36#ifndef LUAJIT_USE_SYSMALLOC
36 37
@@ -72,15 +73,58 @@
72 73
73#define IS_DIRECT_BIT (SIZE_T_ONE) 74#define IS_DIRECT_BIT (SIZE_T_ONE)
74 75
76
77/* Determine system-specific block allocation method. */
75#if LJ_TARGET_WINDOWS 78#if LJ_TARGET_WINDOWS
76 79
77#define WIN32_LEAN_AND_MEAN 80#define WIN32_LEAN_AND_MEAN
78#include <windows.h> 81#include <windows.h>
79 82
83#define LJ_ALLOC_VIRTUALALLOC 1
84
85#if LJ_64 && !LJ_GC64
86#define LJ_ALLOC_NTAVM 1
87#endif
88
89#else
90
91#include <errno.h>
92/* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */
93#include <sys/mman.h>
94
95#define LJ_ALLOC_MMAP 1
96
80#if LJ_64 97#if LJ_64
81 98
99#define LJ_ALLOC_MMAP_PROBE 1
100
101#if LJ_GC64
102#define LJ_ALLOC_MBITS 47 /* 128 TB in LJ_GC64 mode. */
103#elif LJ_TARGET_X64 && LJ_HASJIT
104/* Due to limitations in the x64 compiler backend. */
105#define LJ_ALLOC_MBITS 31 /* 2 GB on x64 with !LJ_GC64. */
106#else
107#define LJ_ALLOC_MBITS 32 /* 4 GB on other archs with !LJ_GC64. */
108#endif
109
110#endif
111
112#if LJ_64 && !LJ_GC64 && defined(MAP_32BIT)
113#define LJ_ALLOC_MMAP32 1
114#endif
115
116#if LJ_TARGET_LINUX
117#define LJ_ALLOC_MREMAP 1
118#endif
119
120#endif
121
122
123#if LJ_ALLOC_VIRTUALALLOC
124
125#if LJ_ALLOC_NTAVM
82/* Undocumented, but hey, that's what we all love so much about Windows. */ 126/* Undocumented, but hey, that's what we all love so much about Windows. */
83typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits, 127typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG_PTR zbits,
84 size_t *size, ULONG alloctype, ULONG prot); 128 size_t *size, ULONG alloctype, ULONG prot);
85static PNTAVM ntavm; 129static PNTAVM ntavm;
86 130
@@ -89,14 +133,15 @@ static PNTAVM ntavm;
89*/ 133*/
90#define NTAVM_ZEROBITS 1 134#define NTAVM_ZEROBITS 1
91 135
92static void INIT_MMAP(void) 136static void init_mmap(void)
93{ 137{
94 ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"), 138 ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"),
95 "NtAllocateVirtualMemory"); 139 "NtAllocateVirtualMemory");
96} 140}
141#define INIT_MMAP() init_mmap()
97 142
98/* Win64 32 bit MMAP via NtAllocateVirtualMemory. */ 143/* Win64 32 bit MMAP via NtAllocateVirtualMemory. */
99static LJ_AINLINE void *CALL_MMAP(size_t size) 144static void *mmap_plain(size_t size)
100{ 145{
101 DWORD olderr = GetLastError(); 146 DWORD olderr = GetLastError();
102 void *ptr = NULL; 147 void *ptr = NULL;
@@ -107,7 +152,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
107} 152}
108 153
109/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ 154/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
110static LJ_AINLINE void *DIRECT_MMAP(size_t size) 155static void *direct_mmap(size_t size)
111{ 156{
112 DWORD olderr = GetLastError(); 157 DWORD olderr = GetLastError();
113 void *ptr = NULL; 158 void *ptr = NULL;
@@ -119,31 +164,32 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size)
119 164
120#else 165#else
121 166
122#define INIT_MMAP() ((void)0)
123
124/* Win32 MMAP via VirtualAlloc */ 167/* Win32 MMAP via VirtualAlloc */
125static LJ_AINLINE void *CALL_MMAP(size_t size) 168static void *mmap_plain(size_t size)
126{ 169{
127 DWORD olderr = GetLastError(); 170 DWORD olderr = GetLastError();
128 void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); 171 void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
129 SetLastError(olderr); 172 SetLastError(olderr);
130 return ptr ? ptr : MFAIL; 173 return ptr ? ptr : MFAIL;
131} 174}
132 175
133/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ 176/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
134static LJ_AINLINE void *DIRECT_MMAP(size_t size) 177static void *direct_mmap(size_t size)
135{ 178{
136 DWORD olderr = GetLastError(); 179 DWORD olderr = GetLastError();
137 void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, 180 void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
138 PAGE_READWRITE); 181 PAGE_READWRITE);
139 SetLastError(olderr); 182 SetLastError(olderr);
140 return ptr ? ptr : MFAIL; 183 return ptr ? ptr : MFAIL;
141} 184}
142 185
143#endif 186#endif
144 187
188#define CALL_MMAP(prng, size) mmap_plain(size)
189#define DIRECT_MMAP(prng, size) direct_mmap(size)
190
145/* This function supports releasing coalesed segments */ 191/* This function supports releasing coalesed segments */
146static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) 192static int CALL_MUNMAP(void *ptr, size_t size)
147{ 193{
148 DWORD olderr = GetLastError(); 194 DWORD olderr = GetLastError();
149 MEMORY_BASIC_INFORMATION minfo; 195 MEMORY_BASIC_INFORMATION minfo;
@@ -163,10 +209,7 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
163 return 0; 209 return 0;
164} 210}
165 211
166#else 212#elif LJ_ALLOC_MMAP
167
168#include <errno.h>
169#include <sys/mman.h>
170 213
171#define MMAP_PROT (PROT_READ|PROT_WRITE) 214#define MMAP_PROT (PROT_READ|PROT_WRITE)
172#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) 215#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
@@ -174,105 +217,134 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
174#endif 217#endif
175#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) 218#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
176 219
177#if LJ_64 220#if LJ_ALLOC_MMAP_PROBE
178/* 64 bit mode needs special support for allocating memory in the lower 2GB. */
179 221
180#if defined(MAP_32BIT) 222#ifdef MAP_TRYFIXED
181 223#define MMAP_FLAGS_PROBE (MMAP_FLAGS|MAP_TRYFIXED)
182#if defined(__sun__)
183#define MMAP_REGION_START ((uintptr_t)0x1000)
184#else 224#else
185/* Actually this only gives us max. 1GB in current Linux kernels. */ 225#define MMAP_FLAGS_PROBE MMAP_FLAGS
186#define MMAP_REGION_START ((uintptr_t)0)
187#endif 226#endif
188 227
189static LJ_AINLINE void *CALL_MMAP(size_t size) 228#define LJ_ALLOC_MMAP_PROBE_MAX 30
229#define LJ_ALLOC_MMAP_PROBE_LINEAR 5
230
231#define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000)
232
233static void *mmap_probe(PRNGState *rs, size_t size)
190{ 234{
235 /* Hint for next allocation. Doesn't need to be thread-safe. */
236 static uintptr_t hint_addr = 0;
191 int olderr = errno; 237 int olderr = errno;
192 void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0); 238 int retry;
239 for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) {
240 void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0);
241 uintptr_t addr = (uintptr_t)p;
242 if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER &&
243 ((addr + size) >> LJ_ALLOC_MBITS) == 0) {
244 /* We got a suitable address. Bump the hint address. */
245 hint_addr = addr + size;
246 errno = olderr;
247 return p;
248 }
249 if (p != MFAIL) {
250 munmap(p, size);
251 } else if (errno == ENOMEM) {
252 return MFAIL;
253 }
254 if (hint_addr) {
255 /* First, try linear probing. */
256 if (retry < LJ_ALLOC_MMAP_PROBE_LINEAR) {
257 hint_addr += 0x1000000;
258 if (((hint_addr + size) >> LJ_ALLOC_MBITS) != 0)
259 hint_addr = 0;
260 continue;
261 } else if (retry == LJ_ALLOC_MMAP_PROBE_LINEAR) {
262 /* Next, try a no-hint probe to get back an ASLR address. */
263 hint_addr = 0;
264 continue;
265 }
266 }
267 /* Finally, try pseudo-random probing. */
268 do {
269 hint_addr = lj_prng_u64(rs) & (((uintptr_t)1<<LJ_ALLOC_MBITS)-LJ_PAGESIZE);
270 } while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER);
271 }
193 errno = olderr; 272 errno = olderr;
194 return ptr; 273 return MFAIL;
195} 274}
196 275
197#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || LJ_TARGET_CYGWIN 276#endif
277
278#if LJ_ALLOC_MMAP32
198 279
199/* OSX and FreeBSD mmap() use a naive first-fit linear search. 280#if LJ_TARGET_SOLARIS
200** That's perfect for us. Except that -pagezero_size must be set for OSX, 281#define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000)
201** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs
202** to be reduced to 250MB on FreeBSD.
203*/
204#if LJ_TARGET_OSX || defined(__DragonFly__)
205#define MMAP_REGION_START ((uintptr_t)0x10000)
206#elif LJ_TARGET_PS4
207#define MMAP_REGION_START ((uintptr_t)0x4000)
208#else 282#else
209#define MMAP_REGION_START ((uintptr_t)0x10000000) 283#define LJ_ALLOC_MMAP32_START ((uintptr_t)0)
210#endif 284#endif
211#define MMAP_REGION_END ((uintptr_t)0x80000000)
212 285
213#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 286#if LJ_ALLOC_MMAP_PROBE
214#include <sys/resource.h> 287static void *mmap_map32(PRNGState *rs, size_t size)
288#else
289static void *mmap_map32(size_t size)
215#endif 290#endif
216
217static LJ_AINLINE void *CALL_MMAP(size_t size)
218{ 291{
219 int olderr = errno; 292#if LJ_ALLOC_MMAP_PROBE
220 /* Hint for next allocation. Doesn't need to be thread-safe. */ 293 static int fallback = 0;
221 static uintptr_t alloc_hint = MMAP_REGION_START; 294 if (fallback)
222 int retry = 0; 295 return mmap_probe(rs, size);
223#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
224 static int rlimit_modified = 0;
225 if (LJ_UNLIKELY(rlimit_modified == 0)) {
226 struct rlimit rlim;
227 rlim.rlim_cur = rlim.rlim_max = MMAP_REGION_START;
228 setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail below. */
229 rlimit_modified = 1;
230 }
231#endif 296#endif
232 for (;;) { 297 {
233 void *p = mmap((void *)alloc_hint, size, MMAP_PROT, MMAP_FLAGS, -1, 0); 298 int olderr = errno;
234 if ((uintptr_t)p >= MMAP_REGION_START && 299 void *ptr = mmap((void *)LJ_ALLOC_MMAP32_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
235 (uintptr_t)p + size < MMAP_REGION_END) { 300 errno = olderr;
236 alloc_hint = (uintptr_t)p + size; 301 /* This only allows 1GB on Linux. So fallback to probing to get 2GB. */
237 errno = olderr; 302#if LJ_ALLOC_MMAP_PROBE
238 return p; 303 if (ptr == MFAIL) {
304 fallback = 1;
305 return mmap_probe(rs, size);
239 } 306 }
240 if (p != CMFAIL) munmap(p, size);
241#if defined(__sun__) || defined(__DragonFly__)
242 alloc_hint += 0x1000000; /* Need near-exhaustive linear scan. */
243 if (alloc_hint + size < MMAP_REGION_END) continue;
244#endif 307#endif
245 if (retry) break; 308 return ptr;
246 retry = 1;
247 alloc_hint = MMAP_REGION_START;
248 } 309 }
249 errno = olderr;
250 return CMFAIL;
251} 310}
252 311
253#else
254
255#error "NYI: need an equivalent of MAP_32BIT for this 64 bit OS"
256
257#endif 312#endif
258 313
314#if LJ_ALLOC_MMAP32
315#if LJ_ALLOC_MMAP_PROBE
316#define CALL_MMAP(prng, size) mmap_map32(prng, size)
259#else 317#else
260 318#define CALL_MMAP(prng, size) mmap_map32(size)
261/* 32 bit mode is easy. */ 319#endif
262static LJ_AINLINE void *CALL_MMAP(size_t size) 320#elif LJ_ALLOC_MMAP_PROBE
321#define CALL_MMAP(prng, size) mmap_probe(prng, size)
322#else
323static void *mmap_plain(size_t size)
263{ 324{
264 int olderr = errno; 325 int olderr = errno;
265 void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0); 326 void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
266 errno = olderr; 327 errno = olderr;
267 return ptr; 328 return ptr;
268} 329}
269 330#define CALL_MMAP(prng, size) mmap_plain(size)
270#endif 331#endif
271 332
272#define INIT_MMAP() ((void)0) 333#if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__ < 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
273#define DIRECT_MMAP(s) CALL_MMAP(s) 334
335#include <sys/resource.h>
274 336
275static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) 337static void init_mmap(void)
338{
339 struct rlimit rlim;
340 rlim.rlim_cur = rlim.rlim_max = 0x10000;
341 setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail later. */
342}
343#define INIT_MMAP() init_mmap()
344
345#endif
346
347static int CALL_MUNMAP(void *ptr, size_t size)
276{ 348{
277 int olderr = errno; 349 int olderr = errno;
278 int ret = munmap(ptr, size); 350 int ret = munmap(ptr, size);
@@ -280,10 +352,9 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
280 return ret; 352 return ret;
281} 353}
282 354
283#if LJ_TARGET_LINUX 355#if LJ_ALLOC_MREMAP
284/* Need to define _GNU_SOURCE to get the mremap prototype. */ 356/* Need to define _GNU_SOURCE to get the mremap prototype. */
285static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, 357static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags)
286 int flags)
287{ 358{
288 int olderr = errno; 359 int olderr = errno;
289 ptr = mremap(ptr, osz, nsz, flags); 360 ptr = mremap(ptr, osz, nsz, flags);
@@ -294,7 +365,7 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
294#define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv)) 365#define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
295#define CALL_MREMAP_NOMOVE 0 366#define CALL_MREMAP_NOMOVE 0
296#define CALL_MREMAP_MAYMOVE 1 367#define CALL_MREMAP_MAYMOVE 1
297#if LJ_64 368#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64)
298#define CALL_MREMAP_MV CALL_MREMAP_NOMOVE 369#define CALL_MREMAP_MV CALL_MREMAP_NOMOVE
299#else 370#else
300#define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE 371#define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE
@@ -303,6 +374,15 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
303 374
304#endif 375#endif
305 376
377
378#ifndef INIT_MMAP
379#define INIT_MMAP() ((void)0)
380#endif
381
382#ifndef DIRECT_MMAP
383#define DIRECT_MMAP(prng, s) CALL_MMAP(prng, s)
384#endif
385
306#ifndef CALL_MREMAP 386#ifndef CALL_MREMAP
307#define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL) 387#define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL)
308#endif 388#endif
@@ -459,6 +539,7 @@ struct malloc_state {
459 mchunkptr smallbins[(NSMALLBINS+1)*2]; 539 mchunkptr smallbins[(NSMALLBINS+1)*2];
460 tbinptr treebins[NTREEBINS]; 540 tbinptr treebins[NTREEBINS];
461 msegment seg; 541 msegment seg;
542 PRNGState *prng;
462}; 543};
463 544
464typedef struct malloc_state *mstate; 545typedef struct malloc_state *mstate;
@@ -516,7 +597,7 @@ static int has_segment_link(mstate m, msegmentptr ss)
516 noncontiguous segments are added. 597 noncontiguous segments are added.
517*/ 598*/
518#define TOP_FOOT_SIZE\ 599#define TOP_FOOT_SIZE\
519 (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) 600 (align_offset(TWO_SIZE_T_SIZES)+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
520 601
521/* ---------------------------- Indexing Bins ---------------------------- */ 602/* ---------------------------- Indexing Bins ---------------------------- */
522 603
@@ -741,11 +822,11 @@ static int has_segment_link(mstate m, msegmentptr ss)
741 822
742/* ----------------------- Direct-mmapping chunks ----------------------- */ 823/* ----------------------- Direct-mmapping chunks ----------------------- */
743 824
744static void *direct_alloc(size_t nb) 825static void *direct_alloc(mstate m, size_t nb)
745{ 826{
746 size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); 827 size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
747 if (LJ_LIKELY(mmsize > nb)) { /* Check for wrap around 0 */ 828 if (LJ_LIKELY(mmsize > nb)) { /* Check for wrap around 0 */
748 char *mm = (char *)(DIRECT_MMAP(mmsize)); 829 char *mm = (char *)(DIRECT_MMAP(m->prng, mmsize));
749 if (mm != CMFAIL) { 830 if (mm != CMFAIL) {
750 size_t offset = align_offset(chunk2mem(mm)); 831 size_t offset = align_offset(chunk2mem(mm));
751 size_t psize = mmsize - offset - DIRECT_FOOT_PAD; 832 size_t psize = mmsize - offset - DIRECT_FOOT_PAD;
@@ -757,6 +838,7 @@ static void *direct_alloc(size_t nb)
757 return chunk2mem(p); 838 return chunk2mem(p);
758 } 839 }
759 } 840 }
841 UNUSED(m);
760 return NULL; 842 return NULL;
761} 843}
762 844
@@ -905,7 +987,7 @@ static void *alloc_sys(mstate m, size_t nb)
905 987
906 /* Directly map large chunks */ 988 /* Directly map large chunks */
907 if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) { 989 if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) {
908 void *mem = direct_alloc(nb); 990 void *mem = direct_alloc(m, nb);
909 if (mem != 0) 991 if (mem != 0)
910 return mem; 992 return mem;
911 } 993 }
@@ -914,7 +996,7 @@ static void *alloc_sys(mstate m, size_t nb)
914 size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE; 996 size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE;
915 size_t rsize = granularity_align(req); 997 size_t rsize = granularity_align(req);
916 if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */ 998 if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */
917 char *mp = (char *)(CALL_MMAP(rsize)); 999 char *mp = (char *)(CALL_MMAP(m->prng, rsize));
918 if (mp != CMFAIL) { 1000 if (mp != CMFAIL) {
919 tbase = mp; 1001 tbase = mp;
920 tsize = rsize; 1002 tsize = rsize;
@@ -1141,12 +1223,13 @@ static void *tmalloc_small(mstate m, size_t nb)
1141 1223
1142/* ----------------------------------------------------------------------- */ 1224/* ----------------------------------------------------------------------- */
1143 1225
1144void *lj_alloc_create(void) 1226void *lj_alloc_create(PRNGState *rs)
1145{ 1227{
1146 size_t tsize = DEFAULT_GRANULARITY; 1228 size_t tsize = DEFAULT_GRANULARITY;
1147 char *tbase; 1229 char *tbase;
1148 INIT_MMAP(); 1230 INIT_MMAP();
1149 tbase = (char *)(CALL_MMAP(tsize)); 1231 UNUSED(rs);
1232 tbase = (char *)(CALL_MMAP(rs, tsize));
1150 if (tbase != CMFAIL) { 1233 if (tbase != CMFAIL) {
1151 size_t msize = pad_request(sizeof(struct malloc_state)); 1234 size_t msize = pad_request(sizeof(struct malloc_state));
1152 mchunkptr mn; 1235 mchunkptr mn;
@@ -1165,6 +1248,12 @@ void *lj_alloc_create(void)
1165 return NULL; 1248 return NULL;
1166} 1249}
1167 1250
1251void lj_alloc_setprng(void *msp, PRNGState *rs)
1252{
1253 mstate ms = (mstate)msp;
1254 ms->prng = rs;
1255}
1256
1168void lj_alloc_destroy(void *msp) 1257void lj_alloc_destroy(void *msp)
1169{ 1258{
1170 mstate ms = (mstate)msp; 1259 mstate ms = (mstate)msp;
diff --git a/src/lj_alloc.h b/src/lj_alloc.h
index f87a7cf3..669f50b7 100644
--- a/src/lj_alloc.h
+++ b/src/lj_alloc.h
@@ -9,7 +9,8 @@
9#include "lj_def.h" 9#include "lj_def.h"
10 10
11#ifndef LUAJIT_USE_SYSMALLOC 11#ifndef LUAJIT_USE_SYSMALLOC
12LJ_FUNC void *lj_alloc_create(void); 12LJ_FUNC void *lj_alloc_create(PRNGState *rs);
13LJ_FUNC void lj_alloc_setprng(void *msp, PRNGState *rs);
13LJ_FUNC void lj_alloc_destroy(void *msp); 14LJ_FUNC void lj_alloc_destroy(void *msp);
14LJ_FUNC void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize); 15LJ_FUNC void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize);
15#endif 16#endif
diff --git a/src/lj_api.c b/src/lj_api.c
index 933bffb9..18a7ecbc 100644
--- a/src/lj_api.c
+++ b/src/lj_api.c
@@ -24,11 +24,12 @@
24#include "lj_trace.h" 24#include "lj_trace.h"
25#include "lj_vm.h" 25#include "lj_vm.h"
26#include "lj_strscan.h" 26#include "lj_strscan.h"
27#include "lj_strfmt.h"
27 28
28/* -- Common helper functions --------------------------------------------- */ 29/* -- Common helper functions --------------------------------------------- */
29 30
30#define api_checknelems(L, n) api_check(L, (n) <= (L->top - L->base)) 31#define lj_checkapi_slot(idx) \
31#define api_checkvalidindex(L, i) api_check(L, (i) != niltv(L)) 32 lj_checkapi((idx) <= (L->top - L->base), "stack slot %d out of range", (idx))
32 33
33static TValue *index2adr(lua_State *L, int idx) 34static TValue *index2adr(lua_State *L, int idx)
34{ 35{
@@ -36,7 +37,8 @@ static TValue *index2adr(lua_State *L, int idx)
36 TValue *o = L->base + (idx - 1); 37 TValue *o = L->base + (idx - 1);
37 return o < L->top ? o : niltv(L); 38 return o < L->top ? o : niltv(L);
38 } else if (idx > LUA_REGISTRYINDEX) { 39 } else if (idx > LUA_REGISTRYINDEX) {
39 api_check(L, idx != 0 && -idx <= L->top - L->base); 40 lj_checkapi(idx != 0 && -idx <= L->top - L->base,
41 "bad stack slot %d", idx);
40 return L->top + idx; 42 return L->top + idx;
41 } else if (idx == LUA_GLOBALSINDEX) { 43 } else if (idx == LUA_GLOBALSINDEX) {
42 TValue *o = &G(L)->tmptv; 44 TValue *o = &G(L)->tmptv;
@@ -46,7 +48,8 @@ static TValue *index2adr(lua_State *L, int idx)
46 return registry(L); 48 return registry(L);
47 } else { 49 } else {
48 GCfunc *fn = curr_func(L); 50 GCfunc *fn = curr_func(L);
49 api_check(L, fn->c.gct == ~LJ_TFUNC && !isluafunc(fn)); 51 lj_checkapi(fn->c.gct == ~LJ_TFUNC && !isluafunc(fn),
52 "calling frame is not a C function");
50 if (idx == LUA_ENVIRONINDEX) { 53 if (idx == LUA_ENVIRONINDEX) {
51 TValue *o = &G(L)->tmptv; 54 TValue *o = &G(L)->tmptv;
52 settabV(L, o, tabref(fn->c.env)); 55 settabV(L, o, tabref(fn->c.env));
@@ -58,13 +61,27 @@ static TValue *index2adr(lua_State *L, int idx)
58 } 61 }
59} 62}
60 63
61static TValue *stkindex2adr(lua_State *L, int idx) 64static LJ_AINLINE TValue *index2adr_check(lua_State *L, int idx)
65{
66 TValue *o = index2adr(L, idx);
67 lj_checkapi(o != niltv(L), "invalid stack slot %d", idx);
68 return o;
69}
70
71static TValue *index2adr_stack(lua_State *L, int idx)
62{ 72{
63 if (idx > 0) { 73 if (idx > 0) {
64 TValue *o = L->base + (idx - 1); 74 TValue *o = L->base + (idx - 1);
75 if (o < L->top) {
76 return o;
77 } else {
78 lj_checkapi(0, "invalid stack slot %d", idx);
79 return niltv(L);
80 }
65 return o < L->top ? o : niltv(L); 81 return o < L->top ? o : niltv(L);
66 } else { 82 } else {
67 api_check(L, idx != 0 && -idx <= L->top - L->base); 83 lj_checkapi(idx != 0 && -idx <= L->top - L->base,
84 "invalid stack slot %d", idx);
68 return L->top + idx; 85 return L->top + idx;
69 } 86 }
70} 87}
@@ -98,17 +115,24 @@ LUALIB_API void luaL_checkstack(lua_State *L, int size, const char *msg)
98 lj_err_callerv(L, LJ_ERR_STKOVM, msg); 115 lj_err_callerv(L, LJ_ERR_STKOVM, msg);
99} 116}
100 117
101LUA_API void lua_xmove(lua_State *from, lua_State *to, int n) 118LUA_API void lua_xmove(lua_State *L, lua_State *to, int n)
102{ 119{
103 TValue *f, *t; 120 TValue *f, *t;
104 if (from == to) return; 121 if (L == to) return;
105 api_checknelems(from, n); 122 lj_checkapi_slot(n);
106 api_check(from, G(from) == G(to)); 123 lj_checkapi(G(L) == G(to), "move across global states");
107 lj_state_checkstack(to, (MSize)n); 124 lj_state_checkstack(to, (MSize)n);
108 f = from->top; 125 f = L->top;
109 t = to->top = to->top + n; 126 t = to->top = to->top + n;
110 while (--n >= 0) copyTV(to, --t, --f); 127 while (--n >= 0) copyTV(to, --t, --f);
111 from->top = f; 128 L->top = f;
129}
130
131LUA_API const lua_Number *lua_version(lua_State *L)
132{
133 static const lua_Number version = LUA_VERSION_NUM;
134 UNUSED(L);
135 return &version;
112} 136}
113 137
114/* -- Stack manipulation -------------------------------------------------- */ 138/* -- Stack manipulation -------------------------------------------------- */
@@ -121,7 +145,7 @@ LUA_API int lua_gettop(lua_State *L)
121LUA_API void lua_settop(lua_State *L, int idx) 145LUA_API void lua_settop(lua_State *L, int idx)
122{ 146{
123 if (idx >= 0) { 147 if (idx >= 0) {
124 api_check(L, idx <= tvref(L->maxstack) - L->base); 148 lj_checkapi(idx <= tvref(L->maxstack) - L->base, "bad stack slot %d", idx);
125 if (L->base + idx > L->top) { 149 if (L->base + idx > L->top) {
126 if (L->base + idx >= tvref(L->maxstack)) 150 if (L->base + idx >= tvref(L->maxstack))
127 lj_state_growstack(L, (MSize)idx - (MSize)(L->top - L->base)); 151 lj_state_growstack(L, (MSize)idx - (MSize)(L->top - L->base));
@@ -130,51 +154,58 @@ LUA_API void lua_settop(lua_State *L, int idx)
130 L->top = L->base + idx; 154 L->top = L->base + idx;
131 } 155 }
132 } else { 156 } else {
133 api_check(L, -(idx+1) <= (L->top - L->base)); 157 lj_checkapi(-(idx+1) <= (L->top - L->base), "bad stack slot %d", idx);
134 L->top += idx+1; /* Shrinks top (idx < 0). */ 158 L->top += idx+1; /* Shrinks top (idx < 0). */
135 } 159 }
136} 160}
137 161
138LUA_API void lua_remove(lua_State *L, int idx) 162LUA_API void lua_remove(lua_State *L, int idx)
139{ 163{
140 TValue *p = stkindex2adr(L, idx); 164 TValue *p = index2adr_stack(L, idx);
141 api_checkvalidindex(L, p);
142 while (++p < L->top) copyTV(L, p-1, p); 165 while (++p < L->top) copyTV(L, p-1, p);
143 L->top--; 166 L->top--;
144} 167}
145 168
146LUA_API void lua_insert(lua_State *L, int idx) 169LUA_API void lua_insert(lua_State *L, int idx)
147{ 170{
148 TValue *q, *p = stkindex2adr(L, idx); 171 TValue *q, *p = index2adr_stack(L, idx);
149 api_checkvalidindex(L, p);
150 for (q = L->top; q > p; q--) copyTV(L, q, q-1); 172 for (q = L->top; q > p; q--) copyTV(L, q, q-1);
151 copyTV(L, p, L->top); 173 copyTV(L, p, L->top);
152} 174}
153 175
154LUA_API void lua_replace(lua_State *L, int idx) 176static void copy_slot(lua_State *L, TValue *f, int idx)
155{ 177{
156 api_checknelems(L, 1);
157 if (idx == LUA_GLOBALSINDEX) { 178 if (idx == LUA_GLOBALSINDEX) {
158 api_check(L, tvistab(L->top-1)); 179 lj_checkapi(tvistab(f), "stack slot %d is not a table", idx);
159 /* NOBARRIER: A thread (i.e. L) is never black. */ 180 /* NOBARRIER: A thread (i.e. L) is never black. */
160 setgcref(L->env, obj2gco(tabV(L->top-1))); 181 setgcref(L->env, obj2gco(tabV(f)));
161 } else if (idx == LUA_ENVIRONINDEX) { 182 } else if (idx == LUA_ENVIRONINDEX) {
162 GCfunc *fn = curr_func(L); 183 GCfunc *fn = curr_func(L);
163 if (fn->c.gct != ~LJ_TFUNC) 184 if (fn->c.gct != ~LJ_TFUNC)
164 lj_err_msg(L, LJ_ERR_NOENV); 185 lj_err_msg(L, LJ_ERR_NOENV);
165 api_check(L, tvistab(L->top-1)); 186 lj_checkapi(tvistab(f), "stack slot %d is not a table", idx);
166 setgcref(fn->c.env, obj2gco(tabV(L->top-1))); 187 setgcref(fn->c.env, obj2gco(tabV(f)));
167 lj_gc_barrier(L, fn, L->top-1); 188 lj_gc_barrier(L, fn, f);
168 } else { 189 } else {
169 TValue *o = index2adr(L, idx); 190 TValue *o = index2adr_check(L, idx);
170 api_checkvalidindex(L, o); 191 copyTV(L, o, f);
171 copyTV(L, o, L->top-1);
172 if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */ 192 if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */
173 lj_gc_barrier(L, curr_func(L), L->top-1); 193 lj_gc_barrier(L, curr_func(L), f);
174 } 194 }
195}
196
197LUA_API void lua_replace(lua_State *L, int idx)
198{
199 lj_checkapi_slot(1);
200 copy_slot(L, L->top - 1, idx);
175 L->top--; 201 L->top--;
176} 202}
177 203
204LUA_API void lua_copy(lua_State *L, int fromidx, int toidx)
205{
206 copy_slot(L, index2adr(L, fromidx), toidx);
207}
208
178LUA_API void lua_pushvalue(lua_State *L, int idx) 209LUA_API void lua_pushvalue(lua_State *L, int idx)
179{ 210{
180 copyTV(L, L->top, index2adr(L, idx)); 211 copyTV(L, L->top, index2adr(L, idx));
@@ -188,7 +219,7 @@ LUA_API int lua_type(lua_State *L, int idx)
188 cTValue *o = index2adr(L, idx); 219 cTValue *o = index2adr(L, idx);
189 if (tvisnumber(o)) { 220 if (tvisnumber(o)) {
190 return LUA_TNUMBER; 221 return LUA_TNUMBER;
191#if LJ_64 222#if LJ_64 && !LJ_GC64
192 } else if (tvislightud(o)) { 223 } else if (tvislightud(o)) {
193 return LUA_TLIGHTUSERDATA; 224 return LUA_TLIGHTUSERDATA;
194#endif 225#endif
@@ -201,7 +232,7 @@ LUA_API int lua_type(lua_State *L, int idx)
201#else 232#else
202 int tt = (int)(((t < 8 ? 0x98042110u : 0x75a06u) >> 4*(t&7)) & 15u); 233 int tt = (int)(((t < 8 ? 0x98042110u : 0x75a06u) >> 4*(t&7)) & 15u);
203#endif 234#endif
204 lua_assert(tt != LUA_TNIL || tvisnil(o)); 235 lj_assertL(tt != LUA_TNIL || tvisnil(o), "bad tag conversion");
205 return tt; 236 return tt;
206 } 237 }
207} 238}
@@ -268,7 +299,7 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
268 return 0; 299 return 0;
269 } else if (tvispri(o1)) { 300 } else if (tvispri(o1)) {
270 return o1 != niltv(L) && o2 != niltv(L); 301 return o1 != niltv(L) && o2 != niltv(L);
271#if LJ_64 302#if LJ_64 && !LJ_GC64
272 } else if (tvislightud(o1)) { 303 } else if (tvislightud(o1)) {
273 return o1->u64 == o2->u64; 304 return o1->u64 == o2->u64;
274#endif 305#endif
@@ -283,8 +314,8 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
283 } else { 314 } else {
284 L->top = base+2; 315 L->top = base+2;
285 lj_vm_call(L, base, 1+1); 316 lj_vm_call(L, base, 1+1);
286 L->top -= 2; 317 L->top -= 2+LJ_FR2;
287 return tvistruecond(L->top+1); 318 return tvistruecond(L->top+1+LJ_FR2);
288 } 319 }
289 } 320 }
290} 321}
@@ -306,8 +337,8 @@ LUA_API int lua_lessthan(lua_State *L, int idx1, int idx2)
306 } else { 337 } else {
307 L->top = base+2; 338 L->top = base+2;
308 lj_vm_call(L, base, 1+1); 339 lj_vm_call(L, base, 1+1);
309 L->top -= 2; 340 L->top -= 2+LJ_FR2;
310 return tvistruecond(L->top+1); 341 return tvistruecond(L->top+1+LJ_FR2);
311 } 342 }
312 } 343 }
313} 344}
@@ -324,6 +355,22 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx)
324 return 0; 355 return 0;
325} 356}
326 357
358LUA_API lua_Number lua_tonumberx(lua_State *L, int idx, int *ok)
359{
360 cTValue *o = index2adr(L, idx);
361 TValue tmp;
362 if (LJ_LIKELY(tvisnumber(o))) {
363 if (ok) *ok = 1;
364 return numberVnum(o);
365 } else if (tvisstr(o) && lj_strscan_num(strV(o), &tmp)) {
366 if (ok) *ok = 1;
367 return numV(&tmp);
368 } else {
369 if (ok) *ok = 0;
370 return 0;
371 }
372}
373
327LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx) 374LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx)
328{ 375{
329 cTValue *o = index2adr(L, idx); 376 cTValue *o = index2adr(L, idx);
@@ -361,7 +408,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
361 if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) 408 if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp)))
362 return 0; 409 return 0;
363 if (tvisint(&tmp)) 410 if (tvisint(&tmp))
364 return (lua_Integer)intV(&tmp); 411 return intV(&tmp);
365 n = numV(&tmp); 412 n = numV(&tmp);
366 } 413 }
367#if LJ_64 414#if LJ_64
@@ -371,6 +418,35 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
371#endif 418#endif
372} 419}
373 420
421LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok)
422{
423 cTValue *o = index2adr(L, idx);
424 TValue tmp;
425 lua_Number n;
426 if (LJ_LIKELY(tvisint(o))) {
427 if (ok) *ok = 1;
428 return intV(o);
429 } else if (LJ_LIKELY(tvisnum(o))) {
430 n = numV(o);
431 } else {
432 if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) {
433 if (ok) *ok = 0;
434 return 0;
435 }
436 if (tvisint(&tmp)) {
437 if (ok) *ok = 1;
438 return intV(&tmp);
439 }
440 n = numV(&tmp);
441 }
442 if (ok) *ok = 1;
443#if LJ_64
444 return (lua_Integer)n;
445#else
446 return lj_num2int(n);
447#endif
448}
449
374LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) 450LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx)
375{ 451{
376 cTValue *o = index2adr(L, idx); 452 cTValue *o = index2adr(L, idx);
@@ -434,7 +510,7 @@ LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len)
434 } else if (tvisnumber(o)) { 510 } else if (tvisnumber(o)) {
435 lj_gc_check(L); 511 lj_gc_check(L);
436 o = index2adr(L, idx); /* GC may move the stack. */ 512 o = index2adr(L, idx); /* GC may move the stack. */
437 s = lj_str_fromnumber(L, o); 513 s = lj_strfmt_number(L, o);
438 setstrV(L, o, s); 514 setstrV(L, o, s);
439 } else { 515 } else {
440 if (len != NULL) *len = 0; 516 if (len != NULL) *len = 0;
@@ -453,7 +529,7 @@ LUALIB_API const char *luaL_checklstring(lua_State *L, int idx, size_t *len)
453 } else if (tvisnumber(o)) { 529 } else if (tvisnumber(o)) {
454 lj_gc_check(L); 530 lj_gc_check(L);
455 o = index2adr(L, idx); /* GC may move the stack. */ 531 o = index2adr(L, idx); /* GC may move the stack. */
456 s = lj_str_fromnumber(L, o); 532 s = lj_strfmt_number(L, o);
457 setstrV(L, o, s); 533 setstrV(L, o, s);
458 } else { 534 } else {
459 lj_err_argt(L, idx, LUA_TSTRING); 535 lj_err_argt(L, idx, LUA_TSTRING);
@@ -475,7 +551,7 @@ LUALIB_API const char *luaL_optlstring(lua_State *L, int idx,
475 } else if (tvisnumber(o)) { 551 } else if (tvisnumber(o)) {
476 lj_gc_check(L); 552 lj_gc_check(L);
477 o = index2adr(L, idx); /* GC may move the stack. */ 553 o = index2adr(L, idx); /* GC may move the stack. */
478 s = lj_str_fromnumber(L, o); 554 s = lj_strfmt_number(L, o);
479 setstrV(L, o, s); 555 setstrV(L, o, s);
480 } else { 556 } else {
481 lj_err_argt(L, idx, LUA_TSTRING); 557 lj_err_argt(L, idx, LUA_TSTRING);
@@ -507,7 +583,7 @@ LUA_API size_t lua_objlen(lua_State *L, int idx)
507 } else if (tvisudata(o)) { 583 } else if (tvisudata(o)) {
508 return udataV(o)->len; 584 return udataV(o)->len;
509 } else if (tvisnumber(o)) { 585 } else if (tvisnumber(o)) {
510 GCstr *s = lj_str_fromnumber(L, o); 586 GCstr *s = lj_strfmt_number(L, o);
511 setstrV(L, o, s); 587 setstrV(L, o, s);
512 return s->len; 588 return s->len;
513 } else { 589 } else {
@@ -532,7 +608,7 @@ LUA_API void *lua_touserdata(lua_State *L, int idx)
532 if (tvisudata(o)) 608 if (tvisudata(o))
533 return uddata(udataV(o)); 609 return uddata(udataV(o));
534 else if (tvislightud(o)) 610 else if (tvislightud(o))
535 return lightudV(o); 611 return lightudV(G(L), o);
536 else 612 else
537 return NULL; 613 return NULL;
538} 614}
@@ -545,17 +621,7 @@ LUA_API lua_State *lua_tothread(lua_State *L, int idx)
545 621
546LUA_API const void *lua_topointer(lua_State *L, int idx) 622LUA_API const void *lua_topointer(lua_State *L, int idx)
547{ 623{
548 cTValue *o = index2adr(L, idx); 624 return lj_obj_ptr(G(L), index2adr(L, idx));
549 if (tvisudata(o))
550 return uddata(udataV(o));
551 else if (tvislightud(o))
552 return lightudV(o);
553 else if (tviscdata(o))
554 return cdataptr(cdataV(o));
555 else if (tvisgcv(o))
556 return gcV(o);
557 else
558 return NULL;
559} 625}
560 626
561/* -- Stack setters (object creation) ------------------------------------- */ 627/* -- Stack setters (object creation) ------------------------------------- */
@@ -606,7 +672,7 @@ LUA_API const char *lua_pushvfstring(lua_State *L, const char *fmt,
606 va_list argp) 672 va_list argp)
607{ 673{
608 lj_gc_check(L); 674 lj_gc_check(L);
609 return lj_str_pushvf(L, fmt, argp); 675 return lj_strfmt_pushvf(L, fmt, argp);
610} 676}
611 677
612LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...) 678LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
@@ -615,7 +681,7 @@ LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
615 va_list argp; 681 va_list argp;
616 lj_gc_check(L); 682 lj_gc_check(L);
617 va_start(argp, fmt); 683 va_start(argp, fmt);
618 ret = lj_str_pushvf(L, fmt, argp); 684 ret = lj_strfmt_pushvf(L, fmt, argp);
619 va_end(argp); 685 va_end(argp);
620 return ret; 686 return ret;
621} 687}
@@ -624,14 +690,14 @@ LUA_API void lua_pushcclosure(lua_State *L, lua_CFunction f, int n)
624{ 690{
625 GCfunc *fn; 691 GCfunc *fn;
626 lj_gc_check(L); 692 lj_gc_check(L);
627 api_checknelems(L, n); 693 lj_checkapi_slot(n);
628 fn = lj_func_newC(L, (MSize)n, getcurrenv(L)); 694 fn = lj_func_newC(L, (MSize)n, getcurrenv(L));
629 fn->c.f = f; 695 fn->c.f = f;
630 L->top -= n; 696 L->top -= n;
631 while (n--) 697 while (n--)
632 copyTV(L, &fn->c.upvalue[n], L->top+n); 698 copyTV(L, &fn->c.upvalue[n], L->top+n);
633 setfuncV(L, L->top, fn); 699 setfuncV(L, L->top, fn);
634 lua_assert(iswhite(obj2gco(fn))); 700 lj_assertL(iswhite(obj2gco(fn)), "new GC object is not white");
635 incr_top(L); 701 incr_top(L);
636} 702}
637 703
@@ -643,16 +709,17 @@ LUA_API void lua_pushboolean(lua_State *L, int b)
643 709
644LUA_API void lua_pushlightuserdata(lua_State *L, void *p) 710LUA_API void lua_pushlightuserdata(lua_State *L, void *p)
645{ 711{
646 setlightudV(L->top, checklightudptr(L, p)); 712#if LJ_64
713 p = lj_lightud_intern(L, p);
714#endif
715 setrawlightudV(L->top, p);
647 incr_top(L); 716 incr_top(L);
648} 717}
649 718
650LUA_API void lua_createtable(lua_State *L, int narray, int nrec) 719LUA_API void lua_createtable(lua_State *L, int narray, int nrec)
651{ 720{
652 GCtab *t;
653 lj_gc_check(L); 721 lj_gc_check(L);
654 t = lj_tab_new(L, (uint32_t)(narray > 0 ? narray+1 : 0), hsize2hbits(nrec)); 722 settabV(L, L->top, lj_tab_new_ah(L, narray, nrec));
655 settabV(L, L->top, t);
656 incr_top(L); 723 incr_top(L);
657} 724}
658 725
@@ -703,7 +770,7 @@ LUA_API void *lua_newuserdata(lua_State *L, size_t size)
703 770
704LUA_API void lua_concat(lua_State *L, int n) 771LUA_API void lua_concat(lua_State *L, int n)
705{ 772{
706 api_checknelems(L, n); 773 lj_checkapi_slot(n);
707 if (n >= 2) { 774 if (n >= 2) {
708 n--; 775 n--;
709 do { 776 do {
@@ -715,8 +782,8 @@ LUA_API void lua_concat(lua_State *L, int n)
715 n -= (int)(L->top - top); 782 n -= (int)(L->top - top);
716 L->top = top+2; 783 L->top = top+2;
717 lj_vm_call(L, top, 1+1); 784 lj_vm_call(L, top, 1+1);
718 L->top--; 785 L->top -= 1+LJ_FR2;
719 copyTV(L, L->top-1, L->top); 786 copyTV(L, L->top-1, L->top+LJ_FR2);
720 } while (--n > 0); 787 } while (--n > 0);
721 } else if (n == 0) { /* Push empty string. */ 788 } else if (n == 0) { /* Push empty string. */
722 setstrV(L, L->top, &G(L)->strempty); 789 setstrV(L, L->top, &G(L)->strempty);
@@ -729,30 +796,28 @@ LUA_API void lua_concat(lua_State *L, int n)
729 796
730LUA_API void lua_gettable(lua_State *L, int idx) 797LUA_API void lua_gettable(lua_State *L, int idx)
731{ 798{
732 cTValue *v, *t = index2adr(L, idx); 799 cTValue *t = index2adr_check(L, idx);
733 api_checkvalidindex(L, t); 800 cTValue *v = lj_meta_tget(L, t, L->top-1);
734 v = lj_meta_tget(L, t, L->top-1);
735 if (v == NULL) { 801 if (v == NULL) {
736 L->top += 2; 802 L->top += 2;
737 lj_vm_call(L, L->top-2, 1+1); 803 lj_vm_call(L, L->top-2, 1+1);
738 L->top -= 2; 804 L->top -= 2+LJ_FR2;
739 v = L->top+1; 805 v = L->top+1+LJ_FR2;
740 } 806 }
741 copyTV(L, L->top-1, v); 807 copyTV(L, L->top-1, v);
742} 808}
743 809
744LUA_API void lua_getfield(lua_State *L, int idx, const char *k) 810LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
745{ 811{
746 cTValue *v, *t = index2adr(L, idx); 812 cTValue *v, *t = index2adr_check(L, idx);
747 TValue key; 813 TValue key;
748 api_checkvalidindex(L, t);
749 setstrV(L, &key, lj_str_newz(L, k)); 814 setstrV(L, &key, lj_str_newz(L, k));
750 v = lj_meta_tget(L, t, &key); 815 v = lj_meta_tget(L, t, &key);
751 if (v == NULL) { 816 if (v == NULL) {
752 L->top += 2; 817 L->top += 2;
753 lj_vm_call(L, L->top-2, 1+1); 818 lj_vm_call(L, L->top-2, 1+1);
754 L->top -= 2; 819 L->top -= 2+LJ_FR2;
755 v = L->top+1; 820 v = L->top+1+LJ_FR2;
756 } 821 }
757 copyTV(L, L->top, v); 822 copyTV(L, L->top, v);
758 incr_top(L); 823 incr_top(L);
@@ -761,14 +826,14 @@ LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
761LUA_API void lua_rawget(lua_State *L, int idx) 826LUA_API void lua_rawget(lua_State *L, int idx)
762{ 827{
763 cTValue *t = index2adr(L, idx); 828 cTValue *t = index2adr(L, idx);
764 api_check(L, tvistab(t)); 829 lj_checkapi(tvistab(t), "stack slot %d is not a table", idx);
765 copyTV(L, L->top-1, lj_tab_get(L, tabV(t), L->top-1)); 830 copyTV(L, L->top-1, lj_tab_get(L, tabV(t), L->top-1));
766} 831}
767 832
768LUA_API void lua_rawgeti(lua_State *L, int idx, int n) 833LUA_API void lua_rawgeti(lua_State *L, int idx, int n)
769{ 834{
770 cTValue *v, *t = index2adr(L, idx); 835 cTValue *v, *t = index2adr(L, idx);
771 api_check(L, tvistab(t)); 836 lj_checkapi(tvistab(t), "stack slot %d is not a table", idx);
772 v = lj_tab_getint(tabV(t), n); 837 v = lj_tab_getint(tabV(t), n);
773 if (v) { 838 if (v) {
774 copyTV(L, L->top, v); 839 copyTV(L, L->top, v);
@@ -810,8 +875,7 @@ LUALIB_API int luaL_getmetafield(lua_State *L, int idx, const char *field)
810 875
811LUA_API void lua_getfenv(lua_State *L, int idx) 876LUA_API void lua_getfenv(lua_State *L, int idx)
812{ 877{
813 cTValue *o = index2adr(L, idx); 878 cTValue *o = index2adr_check(L, idx);
814 api_checkvalidindex(L, o);
815 if (tvisfunc(o)) { 879 if (tvisfunc(o)) {
816 settabV(L, L->top, tabref(funcV(o)->c.env)); 880 settabV(L, L->top, tabref(funcV(o)->c.env));
817 } else if (tvisudata(o)) { 881 } else if (tvisudata(o)) {
@@ -828,7 +892,7 @@ LUA_API int lua_next(lua_State *L, int idx)
828{ 892{
829 cTValue *t = index2adr(L, idx); 893 cTValue *t = index2adr(L, idx);
830 int more; 894 int more;
831 api_check(L, tvistab(t)); 895 lj_checkapi(tvistab(t), "stack slot %d is not a table", idx);
832 more = lj_tab_next(L, tabV(t), L->top-1); 896 more = lj_tab_next(L, tabV(t), L->top-1);
833 if (more) { 897 if (more) {
834 incr_top(L); /* Return new key and value slot. */ 898 incr_top(L); /* Return new key and value slot. */
@@ -854,7 +918,7 @@ LUA_API void *lua_upvalueid(lua_State *L, int idx, int n)
854{ 918{
855 GCfunc *fn = funcV(index2adr(L, idx)); 919 GCfunc *fn = funcV(index2adr(L, idx));
856 n--; 920 n--;
857 api_check(L, (uint32_t)n < fn->l.nupvalues); 921 lj_checkapi((uint32_t)n < fn->l.nupvalues, "bad upvalue %d", n);
858 return isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) : 922 return isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) :
859 (void *)&fn->c.upvalue[n]; 923 (void *)&fn->c.upvalue[n];
860} 924}
@@ -864,13 +928,15 @@ LUA_API void lua_upvaluejoin(lua_State *L, int idx1, int n1, int idx2, int n2)
864 GCfunc *fn1 = funcV(index2adr(L, idx1)); 928 GCfunc *fn1 = funcV(index2adr(L, idx1));
865 GCfunc *fn2 = funcV(index2adr(L, idx2)); 929 GCfunc *fn2 = funcV(index2adr(L, idx2));
866 n1--; n2--; 930 n1--; n2--;
867 api_check(L, isluafunc(fn1) && (uint32_t)n1 < fn1->l.nupvalues); 931 lj_checkapi(isluafunc(fn1), "stack slot %d is not a Lua function", idx1);
868 api_check(L, isluafunc(fn2) && (uint32_t)n2 < fn2->l.nupvalues); 932 lj_checkapi(isluafunc(fn2), "stack slot %d is not a Lua function", idx2);
933 lj_checkapi((uint32_t)n1 < fn1->l.nupvalues, "bad upvalue %d", n1+1);
934 lj_checkapi((uint32_t)n2 < fn2->l.nupvalues, "bad upvalue %d", n2+1);
869 setgcrefr(fn1->l.uvptr[n1], fn2->l.uvptr[n2]); 935 setgcrefr(fn1->l.uvptr[n1], fn2->l.uvptr[n2]);
870 lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1])); 936 lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1]));
871} 937}
872 938
873LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) 939LUALIB_API void *luaL_testudata(lua_State *L, int idx, const char *tname)
874{ 940{
875 cTValue *o = index2adr(L, idx); 941 cTValue *o = index2adr(L, idx);
876 if (tvisudata(o)) { 942 if (tvisudata(o)) {
@@ -879,8 +945,14 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
879 if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable)) 945 if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable))
880 return uddata(ud); 946 return uddata(ud);
881 } 947 }
882 lj_err_argtype(L, idx, tname); 948 return NULL; /* value is not a userdata with a metatable */
883 return NULL; /* unreachable */ 949}
950
951LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
952{
953 void *p = luaL_testudata(L, idx, tname);
954 if (!p) lj_err_argtype(L, idx, tname);
955 return p;
884} 956}
885 957
886/* -- Object setters ------------------------------------------------------ */ 958/* -- Object setters ------------------------------------------------------ */
@@ -888,19 +960,19 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
888LUA_API void lua_settable(lua_State *L, int idx) 960LUA_API void lua_settable(lua_State *L, int idx)
889{ 961{
890 TValue *o; 962 TValue *o;
891 cTValue *t = index2adr(L, idx); 963 cTValue *t = index2adr_check(L, idx);
892 api_checknelems(L, 2); 964 lj_checkapi_slot(2);
893 api_checkvalidindex(L, t);
894 o = lj_meta_tset(L, t, L->top-2); 965 o = lj_meta_tset(L, t, L->top-2);
895 if (o) { 966 if (o) {
896 /* NOBARRIER: lj_meta_tset ensures the table is not black. */ 967 /* NOBARRIER: lj_meta_tset ensures the table is not black. */
897 copyTV(L, o, L->top-1);
898 L->top -= 2; 968 L->top -= 2;
969 copyTV(L, o, L->top+1);
899 } else { 970 } else {
900 L->top += 3; 971 TValue *base = L->top;
901 copyTV(L, L->top-1, L->top-6); 972 copyTV(L, base+2, base-3-2*LJ_FR2);
902 lj_vm_call(L, L->top-3, 0+1); 973 L->top = base+3;
903 L->top -= 3; 974 lj_vm_call(L, base, 0+1);
975 L->top -= 3+LJ_FR2;
904 } 976 }
905} 977}
906 978
@@ -908,20 +980,19 @@ LUA_API void lua_setfield(lua_State *L, int idx, const char *k)
908{ 980{
909 TValue *o; 981 TValue *o;
910 TValue key; 982 TValue key;
911 cTValue *t = index2adr(L, idx); 983 cTValue *t = index2adr_check(L, idx);
912 api_checknelems(L, 1); 984 lj_checkapi_slot(1);
913 api_checkvalidindex(L, t);
914 setstrV(L, &key, lj_str_newz(L, k)); 985 setstrV(L, &key, lj_str_newz(L, k));
915 o = lj_meta_tset(L, t, &key); 986 o = lj_meta_tset(L, t, &key);
916 if (o) { 987 if (o) {
917 L->top--;
918 /* NOBARRIER: lj_meta_tset ensures the table is not black. */ 988 /* NOBARRIER: lj_meta_tset ensures the table is not black. */
919 copyTV(L, o, L->top); 989 copyTV(L, o, --L->top);
920 } else { 990 } else {
921 L->top += 3; 991 TValue *base = L->top;
922 copyTV(L, L->top-1, L->top-6); 992 copyTV(L, base+2, base-3-2*LJ_FR2);
923 lj_vm_call(L, L->top-3, 0+1); 993 L->top = base+3;
924 L->top -= 2; 994 lj_vm_call(L, base, 0+1);
995 L->top -= 2+LJ_FR2;
925 } 996 }
926} 997}
927 998
@@ -929,7 +1000,7 @@ LUA_API void lua_rawset(lua_State *L, int idx)
929{ 1000{
930 GCtab *t = tabV(index2adr(L, idx)); 1001 GCtab *t = tabV(index2adr(L, idx));
931 TValue *dst, *key; 1002 TValue *dst, *key;
932 api_checknelems(L, 2); 1003 lj_checkapi_slot(2);
933 key = L->top-2; 1004 key = L->top-2;
934 dst = lj_tab_set(L, t, key); 1005 dst = lj_tab_set(L, t, key);
935 copyTV(L, dst, key+1); 1006 copyTV(L, dst, key+1);
@@ -941,7 +1012,7 @@ LUA_API void lua_rawseti(lua_State *L, int idx, int n)
941{ 1012{
942 GCtab *t = tabV(index2adr(L, idx)); 1013 GCtab *t = tabV(index2adr(L, idx));
943 TValue *dst, *src; 1014 TValue *dst, *src;
944 api_checknelems(L, 1); 1015 lj_checkapi_slot(1);
945 dst = lj_tab_setint(L, t, n); 1016 dst = lj_tab_setint(L, t, n);
946 src = L->top-1; 1017 src = L->top-1;
947 copyTV(L, dst, src); 1018 copyTV(L, dst, src);
@@ -953,13 +1024,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx)
953{ 1024{
954 global_State *g; 1025 global_State *g;
955 GCtab *mt; 1026 GCtab *mt;
956 cTValue *o = index2adr(L, idx); 1027 cTValue *o = index2adr_check(L, idx);
957 api_checknelems(L, 1); 1028 lj_checkapi_slot(1);
958 api_checkvalidindex(L, o);
959 if (tvisnil(L->top-1)) { 1029 if (tvisnil(L->top-1)) {
960 mt = NULL; 1030 mt = NULL;
961 } else { 1031 } else {
962 api_check(L, tvistab(L->top-1)); 1032 lj_checkapi(tvistab(L->top-1), "top stack slot is not a table");
963 mt = tabV(L->top-1); 1033 mt = tabV(L->top-1);
964 } 1034 }
965 g = G(L); 1035 g = G(L);
@@ -988,13 +1058,18 @@ LUA_API int lua_setmetatable(lua_State *L, int idx)
988 return 1; 1058 return 1;
989} 1059}
990 1060
1061LUALIB_API void luaL_setmetatable(lua_State *L, const char *tname)
1062{
1063 lua_getfield(L, LUA_REGISTRYINDEX, tname);
1064 lua_setmetatable(L, -2);
1065}
1066
991LUA_API int lua_setfenv(lua_State *L, int idx) 1067LUA_API int lua_setfenv(lua_State *L, int idx)
992{ 1068{
993 cTValue *o = index2adr(L, idx); 1069 cTValue *o = index2adr_check(L, idx);
994 GCtab *t; 1070 GCtab *t;
995 api_checknelems(L, 1); 1071 lj_checkapi_slot(1);
996 api_checkvalidindex(L, o); 1072 lj_checkapi(tvistab(L->top-1), "top stack slot is not a table");
997 api_check(L, tvistab(L->top-1));
998 t = tabV(L->top-1); 1073 t = tabV(L->top-1);
999 if (tvisfunc(o)) { 1074 if (tvisfunc(o)) {
1000 setgcref(funcV(o)->c.env, obj2gco(t)); 1075 setgcref(funcV(o)->c.env, obj2gco(t));
@@ -1017,7 +1092,7 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n)
1017 TValue *val; 1092 TValue *val;
1018 GCobj *o; 1093 GCobj *o;
1019 const char *name; 1094 const char *name;
1020 api_checknelems(L, 1); 1095 lj_checkapi_slot(1);
1021 name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val, &o); 1096 name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val, &o);
1022 if (name) { 1097 if (name) {
1023 L->top--; 1098 L->top--;
@@ -1029,11 +1104,25 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n)
1029 1104
1030/* -- Calls --------------------------------------------------------------- */ 1105/* -- Calls --------------------------------------------------------------- */
1031 1106
1107#if LJ_FR2
1108static TValue *api_call_base(lua_State *L, int nargs)
1109{
1110 TValue *o = L->top, *base = o - nargs;
1111 L->top = o+1;
1112 for (; o > base; o--) copyTV(L, o, o-1);
1113 setnilV(o);
1114 return o+1;
1115}
1116#else
1117#define api_call_base(L, nargs) (L->top - (nargs))
1118#endif
1119
1032LUA_API void lua_call(lua_State *L, int nargs, int nresults) 1120LUA_API void lua_call(lua_State *L, int nargs, int nresults)
1033{ 1121{
1034 api_check(L, L->status == 0 || L->status == LUA_ERRERR); 1122 lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR,
1035 api_checknelems(L, nargs+1); 1123 "thread called in wrong state %d", L->status);
1036 lj_vm_call(L, L->top - nargs, nresults+1); 1124 lj_checkapi_slot(nargs+1);
1125 lj_vm_call(L, api_call_base(L, nargs), nresults+1);
1037} 1126}
1038 1127
1039LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) 1128LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
@@ -1042,16 +1131,16 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
1042 uint8_t oldh = hook_save(g); 1131 uint8_t oldh = hook_save(g);
1043 ptrdiff_t ef; 1132 ptrdiff_t ef;
1044 int status; 1133 int status;
1045 api_check(L, L->status == 0 || L->status == LUA_ERRERR); 1134 lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR,
1046 api_checknelems(L, nargs+1); 1135 "thread called in wrong state %d", L->status);
1136 lj_checkapi_slot(nargs+1);
1047 if (errfunc == 0) { 1137 if (errfunc == 0) {
1048 ef = 0; 1138 ef = 0;
1049 } else { 1139 } else {
1050 cTValue *o = stkindex2adr(L, errfunc); 1140 cTValue *o = index2adr_stack(L, errfunc);
1051 api_checkvalidindex(L, o);
1052 ef = savestack(L, o); 1141 ef = savestack(L, o);
1053 } 1142 }
1054 status = lj_vm_pcall(L, L->top - nargs, nresults+1, ef); 1143 status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef);
1055 if (status) hook_restore(g, oldh); 1144 if (status) hook_restore(g, oldh);
1056 return status; 1145 return status;
1057} 1146}
@@ -1059,12 +1148,17 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
1059static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud) 1148static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud)
1060{ 1149{
1061 GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L)); 1150 GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L));
1151 TValue *top = L->top;
1062 fn->c.f = func; 1152 fn->c.f = func;
1063 setfuncV(L, L->top, fn); 1153 setfuncV(L, top++, fn);
1064 setlightudV(L->top+1, checklightudptr(L, ud)); 1154 if (LJ_FR2) setnilV(top++);
1155#if LJ_64
1156 ud = lj_lightud_intern(L, ud);
1157#endif
1158 setrawlightudV(top++, ud);
1065 cframe_nres(L->cframe) = 1+0; /* Zero results. */ 1159 cframe_nres(L->cframe) = 1+0; /* Zero results. */
1066 L->top += 2; 1160 L->top = top;
1067 return L->top-1; /* Now call the newly allocated C function. */ 1161 return top-1; /* Now call the newly allocated C function. */
1068} 1162}
1069 1163
1070LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) 1164LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
@@ -1072,7 +1166,8 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
1072 global_State *g = G(L); 1166 global_State *g = G(L);
1073 uint8_t oldh = hook_save(g); 1167 uint8_t oldh = hook_save(g);
1074 int status; 1168 int status;
1075 api_check(L, L->status == 0 || L->status == LUA_ERRERR); 1169 lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR,
1170 "thread called in wrong state %d", L->status);
1076 status = lj_vm_cpcall(L, func, ud, cpcall); 1171 status = lj_vm_cpcall(L, func, ud, cpcall);
1077 if (status) hook_restore(g, oldh); 1172 if (status) hook_restore(g, oldh);
1078 return status; 1173 return status;
@@ -1081,10 +1176,11 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
1081LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field) 1176LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
1082{ 1177{
1083 if (luaL_getmetafield(L, idx, field)) { 1178 if (luaL_getmetafield(L, idx, field)) {
1084 TValue *base = L->top--; 1179 TValue *top = L->top--;
1085 copyTV(L, base, index2adr(L, idx)); 1180 if (LJ_FR2) setnilV(top++);
1086 L->top = base+1; 1181 copyTV(L, top++, index2adr(L, idx));
1087 lj_vm_call(L, base, 1+1); 1182 L->top = top;
1183 lj_vm_call(L, top-1, 1+1);
1088 return 1; 1184 return 1;
1089 } 1185 }
1090 return 0; 1186 return 0;
@@ -1092,6 +1188,11 @@ LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
1092 1188
1093/* -- Coroutine yield and resume ------------------------------------------ */ 1189/* -- Coroutine yield and resume ------------------------------------------ */
1094 1190
1191LUA_API int lua_isyieldable(lua_State *L)
1192{
1193 return cframe_canyield(L->cframe);
1194}
1195
1095LUA_API int lua_yield(lua_State *L, int nresults) 1196LUA_API int lua_yield(lua_State *L, int nresults)
1096{ 1197{
1097 void *cf = L->cframe; 1198 void *cf = L->cframe;
@@ -1111,13 +1212,16 @@ LUA_API int lua_yield(lua_State *L, int nresults)
1111 } else { /* Yield from hook: add a pseudo-frame. */ 1212 } else { /* Yield from hook: add a pseudo-frame. */
1112 TValue *top = L->top; 1213 TValue *top = L->top;
1113 hook_leave(g); 1214 hook_leave(g);
1114 top->u64 = cframe_multres(cf); 1215 (top++)->u64 = cframe_multres(cf);
1115 setcont(top+1, lj_cont_hook); 1216 setcont(top, lj_cont_hook);
1116 setframe_pc(top+1, cframe_pc(cf)-1); 1217 if (LJ_FR2) top++;
1117 setframe_gc(top+2, obj2gco(L)); 1218 setframe_pc(top, cframe_pc(cf)-1);
1118 setframe_ftsz(top+2, (int)((char *)(top+3)-(char *)L->base)+FRAME_CONT); 1219 top++;
1119 L->top = L->base = top+3; 1220 setframe_gc(top, obj2gco(L), LJ_TTHREAD);
1120#if LJ_TARGET_X64 1221 if (LJ_FR2) top++;
1222 setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT);
1223 L->top = L->base = top+1;
1224#if ((defined(__GNUC__) || defined(__clang__)) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND) || LJ_TARGET_WINDOWS
1121 lj_err_throw(L, LUA_YIELD); 1225 lj_err_throw(L, LUA_YIELD);
1122#else 1226#else
1123 L->cframe = NULL; 1227 L->cframe = NULL;
@@ -1133,7 +1237,9 @@ LUA_API int lua_yield(lua_State *L, int nresults)
1133LUA_API int lua_resume(lua_State *L, int nargs) 1237LUA_API int lua_resume(lua_State *L, int nargs)
1134{ 1238{
1135 if (L->cframe == NULL && L->status <= LUA_YIELD) 1239 if (L->cframe == NULL && L->status <= LUA_YIELD)
1136 return lj_vm_resume(L, L->top - nargs, 0, 0); 1240 return lj_vm_resume(L,
1241 L->status == LUA_OK ? api_call_base(L, nargs) : L->top - nargs,
1242 0, 0);
1137 L->top = L->base; 1243 L->top = L->base;
1138 setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP)); 1244 setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP));
1139 incr_top(L); 1245 incr_top(L);
@@ -1163,7 +1269,7 @@ LUA_API int lua_gc(lua_State *L, int what, int data)
1163 res = (int)(g->gc.total & 0x3ff); 1269 res = (int)(g->gc.total & 0x3ff);
1164 break; 1270 break;
1165 case LUA_GCSTEP: { 1271 case LUA_GCSTEP: {
1166 MSize a = (MSize)data << 10; 1272 GCSize a = (GCSize)data << 10;
1167 g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0; 1273 g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0;
1168 while (g->gc.total >= g->gc.threshold) 1274 while (g->gc.total >= g->gc.threshold)
1169 if (lj_gc_step(L) > 0) { 1275 if (lj_gc_step(L) > 0) {
@@ -1180,6 +1286,9 @@ LUA_API int lua_gc(lua_State *L, int what, int data)
1180 res = (int)(g->gc.stepmul); 1286 res = (int)(g->gc.stepmul);
1181 g->gc.stepmul = (MSize)data; 1287 g->gc.stepmul = (MSize)data;
1182 break; 1288 break;
1289 case LUA_GCISRUNNING:
1290 res = (g->gc.threshold != LJ_MAX_MEM);
1291 break;
1183 default: 1292 default:
1184 res = -1; /* Invalid option. */ 1293 res = -1; /* Invalid option. */
1185 } 1294 }
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 8959c4f9..ae999467 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -8,6 +8,8 @@
8 8
9#include "lua.h" 9#include "lua.h"
10 10
11/* -- Target definitions -------------------------------------------------- */
12
11/* Target endianess. */ 13/* Target endianess. */
12#define LUAJIT_LE 0 14#define LUAJIT_LE 0
13#define LUAJIT_BE 1 15#define LUAJIT_BE 1
@@ -19,12 +21,16 @@
19#define LUAJIT_ARCH_x64 2 21#define LUAJIT_ARCH_x64 2
20#define LUAJIT_ARCH_ARM 3 22#define LUAJIT_ARCH_ARM 3
21#define LUAJIT_ARCH_arm 3 23#define LUAJIT_ARCH_arm 3
22#define LUAJIT_ARCH_PPC 4 24#define LUAJIT_ARCH_ARM64 4
23#define LUAJIT_ARCH_ppc 4 25#define LUAJIT_ARCH_arm64 4
24#define LUAJIT_ARCH_PPCSPE 5 26#define LUAJIT_ARCH_PPC 5
25#define LUAJIT_ARCH_ppcspe 5 27#define LUAJIT_ARCH_ppc 5
26#define LUAJIT_ARCH_MIPS 6 28#define LUAJIT_ARCH_MIPS 6
27#define LUAJIT_ARCH_mips 6 29#define LUAJIT_ARCH_mips 6
30#define LUAJIT_ARCH_MIPS32 6
31#define LUAJIT_ARCH_mips32 6
32#define LUAJIT_ARCH_MIPS64 7
33#define LUAJIT_ARCH_mips64 7
28 34
29/* Target OS. */ 35/* Target OS. */
30#define LUAJIT_OS_OTHER 0 36#define LUAJIT_OS_OTHER 0
@@ -34,6 +40,14 @@
34#define LUAJIT_OS_BSD 4 40#define LUAJIT_OS_BSD 4
35#define LUAJIT_OS_POSIX 5 41#define LUAJIT_OS_POSIX 5
36 42
43/* Number mode. */
44#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */
45#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */
46#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */
47#define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */
48
49/* -- Target detection ---------------------------------------------------- */
50
37/* Select native target if no target defined. */ 51/* Select native target if no target defined. */
38#ifndef LUAJIT_TARGET 52#ifndef LUAJIT_TARGET
39 53
@@ -43,14 +57,14 @@
43#define LUAJIT_TARGET LUAJIT_ARCH_X64 57#define LUAJIT_TARGET LUAJIT_ARCH_X64
44#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM) 58#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
45#define LUAJIT_TARGET LUAJIT_ARCH_ARM 59#define LUAJIT_TARGET LUAJIT_ARCH_ARM
60#elif defined(__aarch64__)
61#define LUAJIT_TARGET LUAJIT_ARCH_ARM64
46#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) 62#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
47#ifdef __NO_FPRS__
48#define LUAJIT_TARGET LUAJIT_ARCH_PPCSPE
49#else
50#define LUAJIT_TARGET LUAJIT_ARCH_PPC 63#define LUAJIT_TARGET LUAJIT_ARCH_PPC
51#endif 64#elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64)
65#define LUAJIT_TARGET LUAJIT_ARCH_MIPS64
52#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) 66#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
53#define LUAJIT_TARGET LUAJIT_ARCH_MIPS 67#define LUAJIT_TARGET LUAJIT_ARCH_MIPS32
54#else 68#else
55#error "No support for this architecture (yet)" 69#error "No support for this architecture (yet)"
56#endif 70#endif
@@ -65,12 +79,16 @@
65#elif defined(__linux__) 79#elif defined(__linux__)
66#define LUAJIT_OS LUAJIT_OS_LINUX 80#define LUAJIT_OS LUAJIT_OS_LINUX
67#elif defined(__MACH__) && defined(__APPLE__) 81#elif defined(__MACH__) && defined(__APPLE__)
82#include "TargetConditionals.h"
68#define LUAJIT_OS LUAJIT_OS_OSX 83#define LUAJIT_OS LUAJIT_OS_OSX
69#elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \ 84#elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
70 defined(__NetBSD__) || defined(__OpenBSD__) || \ 85 defined(__NetBSD__) || defined(__OpenBSD__) || \
71 defined(__DragonFly__)) && !defined(__ORBIS__) 86 defined(__DragonFly__)) && !defined(__ORBIS__)
72#define LUAJIT_OS LUAJIT_OS_BSD 87#define LUAJIT_OS LUAJIT_OS_BSD
73#elif (defined(__sun__) && defined(__svr4__)) 88#elif (defined(__sun__) && defined(__svr4__))
89#define LJ_TARGET_SOLARIS 1
90#define LUAJIT_OS LUAJIT_OS_POSIX
91#elif defined(__HAIKU__)
74#define LUAJIT_OS LUAJIT_OS_POSIX 92#define LUAJIT_OS LUAJIT_OS_POSIX
75#elif defined(__CYGWIN__) 93#elif defined(__CYGWIN__)
76#define LJ_TARGET_CYGWIN 1 94#define LJ_TARGET_CYGWIN 1
@@ -99,10 +117,16 @@
99#define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS) 117#define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS)
100#define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX) 118#define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX)
101#define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX) 119#define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX)
102#define LJ_TARGET_IOS (LJ_TARGET_OSX && LUAJIT_TARGET == LUAJIT_ARCH_ARM) 120#define LJ_TARGET_BSD (LUAJIT_OS == LUAJIT_OS_BSD)
103#define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS) 121#define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS)
104#define LJ_TARGET_DLOPEN LJ_TARGET_POSIX 122#define LJ_TARGET_DLOPEN LJ_TARGET_POSIX
105 123
124#if TARGET_OS_IPHONE
125#define LJ_TARGET_IOS 1
126#else
127#define LJ_TARGET_IOS 0
128#endif
129
106#ifdef __CELLOS_LV2__ 130#ifdef __CELLOS_LV2__
107#define LJ_TARGET_PS3 1 131#define LJ_TARGET_PS3 1
108#define LJ_TARGET_CONSOLE 1 132#define LJ_TARGET_CONSOLE 1
@@ -125,10 +149,20 @@
125#define LJ_TARGET_CONSOLE 1 149#define LJ_TARGET_CONSOLE 1
126#endif 150#endif
127 151
128#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */ 152#ifdef _DURANGO
129#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */ 153#define LJ_TARGET_XBOXONE 1
130#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */ 154#define LJ_TARGET_CONSOLE 1
131#define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */ 155#define LJ_TARGET_GC64 1
156#endif
157
158#ifdef _UWP
159#define LJ_TARGET_UWP 1
160#if LUAJIT_TARGET == LUAJIT_ARCH_X64
161#define LJ_TARGET_GC64 1
162#endif
163#endif
164
165/* -- Arch-specific settings ---------------------------------------------- */
132 166
133/* Set target architecture properties. */ 167/* Set target architecture properties. */
134#if LUAJIT_TARGET == LUAJIT_ARCH_X86 168#if LUAJIT_TARGET == LUAJIT_ARCH_X86
@@ -136,14 +170,10 @@
136#define LJ_ARCH_NAME "x86" 170#define LJ_ARCH_NAME "x86"
137#define LJ_ARCH_BITS 32 171#define LJ_ARCH_BITS 32
138#define LJ_ARCH_ENDIAN LUAJIT_LE 172#define LJ_ARCH_ENDIAN LUAJIT_LE
139#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
140#define LJ_ABI_WIN 1
141#else
142#define LJ_ABI_WIN 0
143#endif
144#define LJ_TARGET_X86 1 173#define LJ_TARGET_X86 1
145#define LJ_TARGET_X86ORX64 1 174#define LJ_TARGET_X86ORX64 1
146#define LJ_TARGET_EHRETREG 0 175#define LJ_TARGET_EHRETREG 0
176#define LJ_TARGET_EHRAREG 8
147#define LJ_TARGET_MASKSHIFT 1 177#define LJ_TARGET_MASKSHIFT 1
148#define LJ_TARGET_MASKROT 1 178#define LJ_TARGET_MASKROT 1
149#define LJ_TARGET_UNALIGNED 1 179#define LJ_TARGET_UNALIGNED 1
@@ -154,19 +184,20 @@
154#define LJ_ARCH_NAME "x64" 184#define LJ_ARCH_NAME "x64"
155#define LJ_ARCH_BITS 64 185#define LJ_ARCH_BITS 64
156#define LJ_ARCH_ENDIAN LUAJIT_LE 186#define LJ_ARCH_ENDIAN LUAJIT_LE
157#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
158#define LJ_ABI_WIN 1
159#else
160#define LJ_ABI_WIN 0
161#endif
162#define LJ_TARGET_X64 1 187#define LJ_TARGET_X64 1
163#define LJ_TARGET_X86ORX64 1 188#define LJ_TARGET_X86ORX64 1
164#define LJ_TARGET_EHRETREG 0 189#define LJ_TARGET_EHRETREG 0
190#define LJ_TARGET_EHRAREG 16
165#define LJ_TARGET_JUMPRANGE 31 /* +-2^31 = +-2GB */ 191#define LJ_TARGET_JUMPRANGE 31 /* +-2^31 = +-2GB */
166#define LJ_TARGET_MASKSHIFT 1 192#define LJ_TARGET_MASKSHIFT 1
167#define LJ_TARGET_MASKROT 1 193#define LJ_TARGET_MASKROT 1
168#define LJ_TARGET_UNALIGNED 1 194#define LJ_TARGET_UNALIGNED 1
169#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL 195#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL
196#ifndef LUAJIT_DISABLE_GC64
197#define LJ_TARGET_GC64 1
198#elif LJ_TARGET_OSX
199#error "macOS requires GC64 -- don't disable it"
200#endif
170 201
171#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM 202#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
172 203
@@ -182,40 +213,105 @@
182#define LJ_ABI_EABI 1 213#define LJ_ABI_EABI 1
183#define LJ_TARGET_ARM 1 214#define LJ_TARGET_ARM 1
184#define LJ_TARGET_EHRETREG 0 215#define LJ_TARGET_EHRETREG 0
216#define LJ_TARGET_EHRAREG 14
185#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ 217#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
186#define LJ_TARGET_MASKSHIFT 0 218#define LJ_TARGET_MASKSHIFT 0
187#define LJ_TARGET_MASKROT 1 219#define LJ_TARGET_MASKROT 1
188#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ 220#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
189#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL 221#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
190 222
191#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__ 223#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
192#define LJ_ARCH_VERSION 80 224#define LJ_ARCH_VERSION 80
193#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ 225#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
194#define LJ_ARCH_VERSION 70 226#define LJ_ARCH_VERSION 70
195#elif __ARM_ARCH_6T2__ 227#elif __ARM_ARCH_6T2__
196#define LJ_ARCH_VERSION 61 228#define LJ_ARCH_VERSION 61
197#elif __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ 229#elif __ARM_ARCH == 6 || __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__
198#define LJ_ARCH_VERSION 60 230#define LJ_ARCH_VERSION 60
199#else 231#else
200#define LJ_ARCH_VERSION 50 232#define LJ_ARCH_VERSION 50
201#endif 233#endif
202 234
235#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64
236
237#define LJ_ARCH_BITS 64
238#if defined(__AARCH64EB__)
239#define LJ_ARCH_NAME "arm64be"
240#define LJ_ARCH_ENDIAN LUAJIT_BE
241#else
242#define LJ_ARCH_NAME "arm64"
243#define LJ_ARCH_ENDIAN LUAJIT_LE
244#endif
245#define LJ_TARGET_ARM64 1
246#define LJ_TARGET_EHRETREG 0
247#define LJ_TARGET_EHRAREG 30
248#define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */
249#define LJ_TARGET_MASKSHIFT 1
250#define LJ_TARGET_MASKROT 1
251#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
252#define LJ_TARGET_GC64 1
253#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
254
255#define LJ_ARCH_VERSION 80
256
203#elif LUAJIT_TARGET == LUAJIT_ARCH_PPC 257#elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
204 258
205#define LJ_ARCH_NAME "ppc" 259#ifndef LJ_ARCH_ENDIAN
260#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
261#define LJ_ARCH_ENDIAN LUAJIT_LE
262#else
263#define LJ_ARCH_ENDIAN LUAJIT_BE
264#endif
265#endif
266
206#if _LP64 267#if _LP64
207#define LJ_ARCH_BITS 64 268#define LJ_ARCH_BITS 64
269#if LJ_ARCH_ENDIAN == LUAJIT_LE
270#define LJ_ARCH_NAME "ppc64le"
271#else
272#define LJ_ARCH_NAME "ppc64"
273#endif
208#else 274#else
209#define LJ_ARCH_BITS 32 275#define LJ_ARCH_BITS 32
276#define LJ_ARCH_NAME "ppc"
277
278#if !defined(LJ_ARCH_HASFPU)
279#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
280#define LJ_ARCH_HASFPU 0
281#else
282#define LJ_ARCH_HASFPU 1
210#endif 283#endif
211#define LJ_ARCH_ENDIAN LUAJIT_BE 284#endif
285
286#if !defined(LJ_ABI_SOFTFP)
287#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
288#define LJ_ABI_SOFTFP 1
289#else
290#define LJ_ABI_SOFTFP 0
291#endif
292#endif
293#endif
294
295#if LJ_ABI_SOFTFP
296#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
297#else
298#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
299#endif
300
212#define LJ_TARGET_PPC 1 301#define LJ_TARGET_PPC 1
213#define LJ_TARGET_EHRETREG 3 302#define LJ_TARGET_EHRETREG 3
303#define LJ_TARGET_EHRAREG 65
214#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ 304#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
215#define LJ_TARGET_MASKSHIFT 0 305#define LJ_TARGET_MASKSHIFT 0
216#define LJ_TARGET_MASKROT 1 306#define LJ_TARGET_MASKROT 1
217#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */ 307#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
218#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE 308
309#if LJ_TARGET_CONSOLE
310#define LJ_ARCH_PPC32ON64 1
311#define LJ_ARCH_NOFFI 1
312#elif LJ_ARCH_BITS == 64
313#error "No support for PPC64"
314#endif
219 315
220#if _ARCH_PWR7 316#if _ARCH_PWR7
221#define LJ_ARCH_VERSION 70 317#define LJ_ARCH_VERSION 70
@@ -230,10 +326,6 @@
230#else 326#else
231#define LJ_ARCH_VERSION 0 327#define LJ_ARCH_VERSION 0
232#endif 328#endif
233#if __PPC64__ || __powerpc64__ || LJ_TARGET_CONSOLE
234#define LJ_ARCH_PPC64 1
235#define LJ_ARCH_NOFFI 1
236#endif
237#if _ARCH_PPCSQ 329#if _ARCH_PPCSQ
238#define LJ_ARCH_SQRT 1 330#define LJ_ARCH_SQRT 1
239#endif 331#endif
@@ -247,44 +339,80 @@
247#define LJ_ARCH_XENON 1 339#define LJ_ARCH_XENON 1
248#endif 340#endif
249 341
250#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE 342#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64
251
252#define LJ_ARCH_NAME "ppcspe"
253#define LJ_ARCH_BITS 32
254#define LJ_ARCH_ENDIAN LUAJIT_BE
255#ifndef LJ_ABI_SOFTFP
256#define LJ_ABI_SOFTFP 1
257#endif
258#define LJ_ABI_EABI 1
259#define LJ_TARGET_PPCSPE 1
260#define LJ_TARGET_EHRETREG 3
261#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
262#define LJ_TARGET_MASKSHIFT 0
263#define LJ_TARGET_MASKROT 1
264#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
265#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE
266#define LJ_ARCH_NOFFI 1 /* NYI: comparisons, calls. */
267#define LJ_ARCH_NOJIT 1
268
269#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS
270 343
271#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) 344#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
345#if __mips_isa_rev >= 6
346#define LJ_TARGET_MIPSR6 1
347#define LJ_TARGET_UNALIGNED 1
348#endif
349#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
350#if LJ_TARGET_MIPSR6
351#define LJ_ARCH_NAME "mips32r6el"
352#else
272#define LJ_ARCH_NAME "mipsel" 353#define LJ_ARCH_NAME "mipsel"
354#endif
355#else
356#if LJ_TARGET_MIPSR6
357#define LJ_ARCH_NAME "mips64r6el"
358#else
359#define LJ_ARCH_NAME "mips64el"
360#endif
361#endif
273#define LJ_ARCH_ENDIAN LUAJIT_LE 362#define LJ_ARCH_ENDIAN LUAJIT_LE
274#else 363#else
364#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
365#if LJ_TARGET_MIPSR6
366#define LJ_ARCH_NAME "mips32r6"
367#else
275#define LJ_ARCH_NAME "mips" 368#define LJ_ARCH_NAME "mips"
369#endif
370#else
371#if LJ_TARGET_MIPSR6
372#define LJ_ARCH_NAME "mips64r6"
373#else
374#define LJ_ARCH_NAME "mips64"
375#endif
376#endif
276#define LJ_ARCH_ENDIAN LUAJIT_BE 377#define LJ_ARCH_ENDIAN LUAJIT_BE
277#endif 378#endif
379
380#if !defined(LJ_ARCH_HASFPU)
381#ifdef __mips_soft_float
382#define LJ_ARCH_HASFPU 0
383#else
384#define LJ_ARCH_HASFPU 1
385#endif
386#endif
387
388#if !defined(LJ_ABI_SOFTFP)
389#ifdef __mips_soft_float
390#define LJ_ABI_SOFTFP 1
391#else
392#define LJ_ABI_SOFTFP 0
393#endif
394#endif
395
396#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
278#define LJ_ARCH_BITS 32 397#define LJ_ARCH_BITS 32
398#define LJ_TARGET_MIPS32 1
399#else
400#define LJ_ARCH_BITS 64
401#define LJ_TARGET_MIPS64 1
402#define LJ_TARGET_GC64 1
403#endif
279#define LJ_TARGET_MIPS 1 404#define LJ_TARGET_MIPS 1
280#define LJ_TARGET_EHRETREG 4 405#define LJ_TARGET_EHRETREG 4
406#define LJ_TARGET_EHRAREG 31
281#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ 407#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */
282#define LJ_TARGET_MASKSHIFT 1 408#define LJ_TARGET_MASKSHIFT 1
283#define LJ_TARGET_MASKROT 1 409#define LJ_TARGET_MASKROT 1
284#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ 410#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
285#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE 411#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
286 412
287#if _MIPS_ARCH_MIPS32R2 413#if LJ_TARGET_MIPSR6
414#define LJ_ARCH_VERSION 60
415#elif _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2
288#define LJ_ARCH_VERSION 20 416#define LJ_ARCH_VERSION 20
289#else 417#else
290#define LJ_ARCH_VERSION 10 418#define LJ_ARCH_VERSION 10
@@ -294,9 +422,7 @@
294#error "No target architecture defined" 422#error "No target architecture defined"
295#endif 423#endif
296 424
297#ifndef LJ_PAGESIZE 425/* -- Checks for requirements --------------------------------------------- */
298#define LJ_PAGESIZE 4096
299#endif
300 426
301/* Check for minimum required compiler versions. */ 427/* Check for minimum required compiler versions. */
302#if defined(__GNUC__) 428#if defined(__GNUC__)
@@ -312,6 +438,16 @@
312#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2) 438#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
313#error "Need at least GCC 4.2 or newer" 439#error "Need at least GCC 4.2 or newer"
314#endif 440#endif
441#elif LJ_TARGET_ARM64
442#if __clang__
443#if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)) && !defined(__NX_TOOLCHAIN_MAJOR__)
444#error "Need at least Clang 3.5 or newer"
445#endif
446#else
447#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8)
448#error "Need at least GCC 4.8 or newer"
449#endif
450#endif
315#elif !LJ_TARGET_PS3 451#elif !LJ_TARGET_PS3
316#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3) 452#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3)
317#error "Need at least GCC 4.3 or newer" 453#error "Need at least GCC 4.3 or newer"
@@ -335,26 +471,35 @@
335#if !(__ARM_EABI__ || LJ_TARGET_IOS) 471#if !(__ARM_EABI__ || LJ_TARGET_IOS)
336#error "Only ARM EABI or iOS 3.0+ ABI is supported" 472#error "Only ARM EABI or iOS 3.0+ ABI is supported"
337#endif 473#endif
338#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE 474#elif LJ_TARGET_ARM64
339#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) 475#if defined(_ILP32)
340#error "No support for PowerPC CPUs without double-precision FPU" 476#error "No support for ILP32 model on ARM64"
341#endif 477#endif
478#elif LJ_TARGET_PPC
342#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN)) 479#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN))
343#error "No support for little-endian PowerPC" 480#error "No support for little-endian PPC32"
481#endif
482#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
483#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
344#endif 484#endif
345#if defined(_LP64) 485#elif LJ_TARGET_MIPS32
346#error "No support for PowerPC 64 bit mode" 486#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32))
487#error "Only o32 ABI supported for MIPS32"
347#endif 488#endif
348#elif LJ_TARGET_MIPS 489#if LJ_TARGET_MIPSR6
349#if defined(__mips_soft_float) 490/* Not that useful, since most available r6 CPUs are 64 bit. */
350#error "No support for MIPS CPUs without FPU" 491#error "No support for MIPS32R6"
351#endif 492#endif
352#if defined(_LP64) 493#elif LJ_TARGET_MIPS64
353#error "No support for MIPS64" 494#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64))
495/* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */
496#error "Only n64 ABI supported for MIPS64"
354#endif 497#endif
355#endif 498#endif
356#endif 499#endif
357 500
501/* -- Derived defines ----------------------------------------------------- */
502
358/* Enable or disable the dual-number mode for the VM. */ 503/* Enable or disable the dual-number mode for the VM. */
359#if (LJ_ARCH_NUMMODE == LJ_NUMMODE_SINGLE && LUAJIT_NUMMODE == 2) || \ 504#if (LJ_ARCH_NUMMODE == LJ_NUMMODE_SINGLE && LUAJIT_NUMMODE == 2) || \
360 (LJ_ARCH_NUMMODE == LJ_NUMMODE_DUAL && LUAJIT_NUMMODE == 1) 505 (LJ_ARCH_NUMMODE == LJ_NUMMODE_DUAL && LUAJIT_NUMMODE == 1)
@@ -376,6 +521,20 @@
376#endif 521#endif
377#endif 522#endif
378 523
524/* 64 bit GC references. */
525#if LJ_TARGET_GC64
526#define LJ_GC64 1
527#else
528#define LJ_GC64 0
529#endif
530
531/* 2-slot frame info. */
532#if LJ_GC64
533#define LJ_FR2 1
534#else
535#define LJ_FR2 0
536#endif
537
379/* Disable or enable the JIT compiler. */ 538/* Disable or enable the JIT compiler. */
380#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) 539#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT)
381#define LJ_HASJIT 0 540#define LJ_HASJIT 0
@@ -390,6 +549,28 @@
390#define LJ_HASFFI 1 549#define LJ_HASFFI 1
391#endif 550#endif
392 551
552/* Disable or enable the string buffer extension. */
553#if defined(LUAJIT_DISABLE_BUFFER)
554#define LJ_HASBUFFER 0
555#else
556#define LJ_HASBUFFER 1
557#endif
558
559#if defined(LUAJIT_DISABLE_PROFILE)
560#define LJ_HASPROFILE 0
561#elif LJ_TARGET_POSIX
562#define LJ_HASPROFILE 1
563#define LJ_PROFILE_SIGPROF 1
564#elif LJ_TARGET_PS3
565#define LJ_HASPROFILE 1
566#define LJ_PROFILE_PTHREAD 1
567#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOX360
568#define LJ_HASPROFILE 1
569#define LJ_PROFILE_WTHREAD 1
570#else
571#define LJ_HASPROFILE 0
572#endif
573
393#ifndef LJ_ARCH_HASFPU 574#ifndef LJ_ARCH_HASFPU
394#define LJ_ARCH_HASFPU 1 575#define LJ_ARCH_HASFPU 1
395#endif 576#endif
@@ -397,6 +578,7 @@
397#define LJ_ABI_SOFTFP 0 578#define LJ_ABI_SOFTFP 0
398#endif 579#endif
399#define LJ_SOFTFP (!LJ_ARCH_HASFPU) 580#define LJ_SOFTFP (!LJ_ARCH_HASFPU)
581#define LJ_SOFTFP32 (LJ_SOFTFP && LJ_32)
400 582
401#if LJ_ARCH_ENDIAN == LUAJIT_BE 583#if LJ_ARCH_ENDIAN == LUAJIT_BE
402#define LJ_LE 0 584#define LJ_LE 0
@@ -422,26 +604,52 @@
422#define LJ_TARGET_UNALIGNED 0 604#define LJ_TARGET_UNALIGNED 0
423#endif 605#endif
424 606
425/* Various workarounds for embedded operating systems. */ 607#ifndef LJ_PAGESIZE
426#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 608#define LJ_PAGESIZE 4096
427#define LUAJIT_NO_LOG2
428#endif 609#endif
429#if defined(__symbian__) 610
430#define LUAJIT_NO_EXP2 611/* Various workarounds for embedded operating systems or weak C runtimes. */
612#if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS
613#define LUAJIT_NO_LOG2
431#endif 614#endif
432#if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0) 615#if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0)
433#define LJ_NO_SYSTEM 1 616#define LJ_NO_SYSTEM 1
434#endif 617#endif
435 618
436#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__ 619#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
437/* NYI: no support for compact unwind specification, yet. */ 620#define LJ_ABI_WIN 1
438#define LUAJIT_NO_UNWIND 1 621#else
622#define LJ_ABI_WIN 0
439#endif 623#endif
440 624
441#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 625#if LJ_TARGET_WINDOWS
626#if LJ_TARGET_UWP
627#define LJ_WIN_VALLOC VirtualAllocFromApp
628#define LJ_WIN_VPROTECT VirtualProtectFromApp
629extern void *LJ_WIN_LOADLIBA(const char *path);
630#else
631#define LJ_WIN_VALLOC VirtualAlloc
632#define LJ_WIN_VPROTECT VirtualProtect
633#define LJ_WIN_LOADLIBA(path) LoadLibraryExA((path), NULL, 0)
634#endif
635#endif
636
637#if defined(LUAJIT_NO_UNWIND) || __GNU_COMPACT_EH__ || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
442#define LJ_NO_UNWIND 1 638#define LJ_NO_UNWIND 1
443#endif 639#endif
444 640
641#if !LJ_NO_UNWIND && !defined(LUAJIT_UNWIND_INTERNAL) && (LJ_ABI_WIN || (defined(LUAJIT_UNWIND_EXTERNAL) && (defined(__GNUC__) || defined(__clang__))))
642#define LJ_UNWIND_EXT 1
643#else
644#define LJ_UNWIND_EXT 0
645#endif
646
647#if LJ_UNWIND_EXT && LJ_HASJIT && !LJ_TARGET_ARM && !(LJ_ABI_WIN && LJ_TARGET_X86)
648#define LJ_UNWIND_JIT 1
649#else
650#define LJ_UNWIND_JIT 0
651#endif
652
445/* Compatibility with Lua 5.1 vs. 5.2. */ 653/* Compatibility with Lua 5.1 vs. 5.2. */
446#ifdef LUAJIT_ENABLE_LUA52COMPAT 654#ifdef LUAJIT_ENABLE_LUA52COMPAT
447#define LJ_52 1 655#define LJ_52 1
@@ -449,4 +657,46 @@
449#define LJ_52 0 657#define LJ_52 0
450#endif 658#endif
451 659
660/* -- VM security --------------------------------------------------------- */
661
662/* Don't make any changes here. Instead build with:
663** make "XCFLAGS=-DLUAJIT_SECURITY_flag=value"
664**
665** Important note to distro maintainers: DO NOT change the defaults for a
666** regular distro build -- neither upwards, nor downwards!
667** These build-time configurable security flags are intended for embedders
668** who may have specific needs wrt. security vs. performance.
669*/
670
671/* Security defaults. */
672#ifndef LUAJIT_SECURITY_PRNG
673/* PRNG init: 0 = fixed/insecure, 1 = secure from OS. */
674#define LUAJIT_SECURITY_PRNG 1
675#endif
676
677#ifndef LUAJIT_SECURITY_STRHASH
678/* String hash: 0 = sparse only, 1 = sparse + dense. */
679#define LUAJIT_SECURITY_STRHASH 1
680#endif
681
682#ifndef LUAJIT_SECURITY_STRID
683/* String IDs: 0 = linear, 1 = reseed < 255, 2 = reseed < 15, 3 = random. */
684#define LUAJIT_SECURITY_STRID 1
685#endif
686
687#ifndef LUAJIT_SECURITY_MCODE
688/* Machine code page protection: 0 = insecure RWX, 1 = secure RW^X. */
689#define LUAJIT_SECURITY_MCODE 1
690#endif
691
692#define LJ_SECURITY_MODE \
693 ( 0u \
694 | ((LUAJIT_SECURITY_PRNG & 3) << 0) \
695 | ((LUAJIT_SECURITY_STRHASH & 3) << 2) \
696 | ((LUAJIT_SECURITY_STRID & 3) << 4) \
697 | ((LUAJIT_SECURITY_MCODE & 3) << 6) \
698 )
699#define LJ_SECURITY_MODESTRING \
700 "\004prng\007strhash\005strid\005mcode"
701
452#endif 702#endif
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 41006873..b0e6d313 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -11,6 +11,7 @@
11#if LJ_HASJIT 11#if LJ_HASJIT
12 12
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_buf.h"
14#include "lj_str.h" 15#include "lj_str.h"
15#include "lj_tab.h" 16#include "lj_tab.h"
16#include "lj_frame.h" 17#include "lj_frame.h"
@@ -71,6 +72,7 @@ typedef struct ASMState {
71 IRRef snaprename; /* Rename highwater mark for snapshot check. */ 72 IRRef snaprename; /* Rename highwater mark for snapshot check. */
72 SnapNo snapno; /* Current snapshot number. */ 73 SnapNo snapno; /* Current snapshot number. */
73 SnapNo loopsnapno; /* Loop snapshot number. */ 74 SnapNo loopsnapno; /* Loop snapshot number. */
75 int snapalloc; /* Current snapshot needs allocation. */
74 BloomFilter snapfilt1, snapfilt2; /* Filled with snapshot refs. */ 76 BloomFilter snapfilt1, snapfilt2; /* Filled with snapshot refs. */
75 77
76 IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */ 78 IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */
@@ -85,18 +87,25 @@ typedef struct ASMState {
85 87
86 MCode *mcbot; /* Bottom of reserved MCode. */ 88 MCode *mcbot; /* Bottom of reserved MCode. */
87 MCode *mctop; /* Top of generated MCode. */ 89 MCode *mctop; /* Top of generated MCode. */
90 MCode *mctoporig; /* Original top of generated MCode. */
88 MCode *mcloop; /* Pointer to loop MCode (or NULL). */ 91 MCode *mcloop; /* Pointer to loop MCode (or NULL). */
89 MCode *invmcp; /* Points to invertible loop branch (or NULL). */ 92 MCode *invmcp; /* Points to invertible loop branch (or NULL). */
90 MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ 93 MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */
91 MCode *realign; /* Realign loop if not NULL. */ 94 MCode *realign; /* Realign loop if not NULL. */
92 95
93#ifdef RID_NUM_KREF 96#ifdef RID_NUM_KREF
94 int32_t krefk[RID_NUM_KREF]; 97 intptr_t krefk[RID_NUM_KREF];
95#endif 98#endif
96 IRRef1 phireg[RID_MAX]; /* PHI register references. */ 99 IRRef1 phireg[RID_MAX]; /* PHI register references. */
97 uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ 100 uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */
98} ASMState; 101} ASMState;
99 102
103#ifdef LUA_USE_ASSERT
104#define lj_assertA(c, ...) lj_assertG_(J2G(as->J), (c), __VA_ARGS__)
105#else
106#define lj_assertA(c, ...) ((void)as)
107#endif
108
100#define IR(ref) (&as->ir[(ref)]) 109#define IR(ref) (&as->ir[(ref)])
101 110
102#define ASMREF_TMP1 REF_TRUE /* Temp. register. */ 111#define ASMREF_TMP1 REF_TRUE /* Temp. register. */
@@ -128,9 +137,8 @@ static LJ_AINLINE void checkmclim(ASMState *as)
128#ifdef LUA_USE_ASSERT 137#ifdef LUA_USE_ASSERT
129 if (as->mcp + MCLIM_REDZONE < as->mcp_prev) { 138 if (as->mcp + MCLIM_REDZONE < as->mcp_prev) {
130 IRIns *ir = IR(as->curins+1); 139 IRIns *ir = IR(as->curins+1);
131 fprintf(stderr, "RED ZONE OVERFLOW: %p IR %04d %02d %04d %04d\n", as->mcp, 140 lj_assertA(0, "red zone overflow: %p IR %04d %02d %04d %04d\n", as->mcp,
132 as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS); 141 as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS);
133 lua_assert(0);
134 } 142 }
135#endif 143#endif
136 if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as); 144 if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as);
@@ -144,7 +152,7 @@ static LJ_AINLINE void checkmclim(ASMState *as)
144#define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) 152#define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref)))
145#define ra_krefk(as, ref) (as->krefk[(ref)]) 153#define ra_krefk(as, ref) (as->krefk[(ref)])
146 154
147static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k) 155static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k)
148{ 156{
149 IRRef ref = (IRRef)(r - RID_MIN_KREF); 157 IRRef ref = (IRRef)(r - RID_MIN_KREF);
150 as->krefk[ref] = k; 158 as->krefk[ref] = k;
@@ -171,6 +179,8 @@ IRFLDEF(FLOFS)
171#include "lj_emit_x86.h" 179#include "lj_emit_x86.h"
172#elif LJ_TARGET_ARM 180#elif LJ_TARGET_ARM
173#include "lj_emit_arm.h" 181#include "lj_emit_arm.h"
182#elif LJ_TARGET_ARM64
183#include "lj_emit_arm64.h"
174#elif LJ_TARGET_PPC 184#elif LJ_TARGET_PPC
175#include "lj_emit_ppc.h" 185#include "lj_emit_ppc.h"
176#elif LJ_TARGET_MIPS 186#elif LJ_TARGET_MIPS
@@ -179,6 +189,12 @@ IRFLDEF(FLOFS)
179#error "Missing instruction emitter for target CPU" 189#error "Missing instruction emitter for target CPU"
180#endif 190#endif
181 191
192/* Generic load/store of register from/to stack slot. */
193#define emit_spload(as, ir, r, ofs) \
194 emit_loadofs(as, ir, (r), RID_SP, (ofs))
195#define emit_spstore(as, ir, r, ofs) \
196 emit_storeofs(as, ir, (r), RID_SP, (ofs))
197
182/* -- Register allocator debugging ---------------------------------------- */ 198/* -- Register allocator debugging ---------------------------------------- */
183 199
184/* #define LUAJIT_DEBUG_RA */ 200/* #define LUAJIT_DEBUG_RA */
@@ -236,7 +252,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...)
236 *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q; 252 *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q;
237 } else { 253 } else {
238 *p++ = '?'; 254 *p++ = '?';
239 lua_assert(0); 255 lj_assertA(0, "bad register %d for debug format \"%s\"", r, fmt);
240 } 256 }
241 } else if (e[1] == 'f' || e[1] == 'i') { 257 } else if (e[1] == 'f' || e[1] == 'i') {
242 IRRef ref; 258 IRRef ref;
@@ -254,7 +270,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...)
254 } else if (e[1] == 'x') { 270 } else if (e[1] == 'x') {
255 p += sprintf(p, "%08x", va_arg(argp, int32_t)); 271 p += sprintf(p, "%08x", va_arg(argp, int32_t));
256 } else { 272 } else {
257 lua_assert(0); 273 lj_assertA(0, "bad debug format code");
258 } 274 }
259 fmt = e+2; 275 fmt = e+2;
260 } 276 }
@@ -313,37 +329,51 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
313 Reg r; 329 Reg r;
314 if (ra_iskref(ref)) { 330 if (ra_iskref(ref)) {
315 r = ra_krefreg(ref); 331 r = ra_krefreg(ref);
316 lua_assert(!rset_test(as->freeset, r)); 332 lj_assertA(!rset_test(as->freeset, r), "rematk of free reg %d", r);
317 ra_free(as, r); 333 ra_free(as, r);
318 ra_modified(as, r); 334 ra_modified(as, r);
335#if LJ_64
336 emit_loadu64(as, r, ra_krefk(as, ref));
337#else
319 emit_loadi(as, r, ra_krefk(as, ref)); 338 emit_loadi(as, r, ra_krefk(as, ref));
339#endif
320 return r; 340 return r;
321 } 341 }
322 ir = IR(ref); 342 ir = IR(ref);
323 r = ir->r; 343 r = ir->r;
324 lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); 344 lj_assertA(ra_hasreg(r), "rematk of K%03d has no reg", REF_BIAS - ref);
345 lj_assertA(!ra_hasspill(ir->s),
346 "rematk of K%03d has spill slot [%x]", REF_BIAS - ref, ir->s);
325 ra_free(as, r); 347 ra_free(as, r);
326 ra_modified(as, r); 348 ra_modified(as, r);
327 ir->r = RID_INIT; /* Do not keep any hint. */ 349 ir->r = RID_INIT; /* Do not keep any hint. */
328 RA_DBGX((as, "remat $i $r", ir, r)); 350 RA_DBGX((as, "remat $i $r", ir, r));
329#if !LJ_SOFTFP 351#if !LJ_SOFTFP32
330 if (ir->o == IR_KNUM) { 352 if (ir->o == IR_KNUM) {
331 emit_loadn(as, r, ir_knum(ir)); 353 emit_loadk64(as, r, ir);
332 } else 354 } else
333#endif 355#endif
334 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { 356 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
335 ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ 357 ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */
336 emit_getgl(as, r, jit_base); 358 emit_getgl(as, r, jit_base);
337 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { 359 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
338 lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ 360 /* REF_NIL stores ASMREF_L register. */
339 emit_getgl(as, r, jit_L); 361 lj_assertA(irt_isnil(ir->t), "rematk of bad ASMREF_L");
362 emit_getgl(as, r, cur_L);
340#if LJ_64 363#if LJ_64
341 } else if (ir->o == IR_KINT64) { 364 } else if (ir->o == IR_KINT64) {
342 emit_loadu64(as, r, ir_kint64(ir)->u64); 365 emit_loadu64(as, r, ir_kint64(ir)->u64);
366#if LJ_GC64
367 } else if (ir->o == IR_KGC) {
368 emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
369 } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
370 emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
371#endif
343#endif 372#endif
344 } else { 373 } else {
345 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 374 lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
346 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); 375 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
376 "rematk of bad IR op %d", ir->o);
347 emit_loadi(as, r, ir->i); 377 emit_loadi(as, r, ir->i);
348 } 378 }
349 return r; 379 return r;
@@ -353,7 +383,8 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
353static int32_t ra_spill(ASMState *as, IRIns *ir) 383static int32_t ra_spill(ASMState *as, IRIns *ir)
354{ 384{
355 int32_t slot = ir->s; 385 int32_t slot = ir->s;
356 lua_assert(ir >= as->ir + REF_TRUE); 386 lj_assertA(ir >= as->ir + REF_TRUE,
387 "spill of K%03d", REF_BIAS - (int)(ir - as->ir));
357 if (!ra_hasspill(slot)) { 388 if (!ra_hasspill(slot)) {
358 if (irt_is64(ir->t)) { 389 if (irt_is64(ir->t)) {
359 slot = as->evenspill; 390 slot = as->evenspill;
@@ -378,7 +409,9 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref)
378{ 409{
379 IRIns *ir = IR(ref); 410 IRIns *ir = IR(ref);
380 Reg r = ir->r; 411 Reg r = ir->r;
381 lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); 412 lj_assertA(ra_hasreg(r), "release of TMP%d has no reg", ref-ASMREF_TMP1+1);
413 lj_assertA(!ra_hasspill(ir->s),
414 "release of TMP%d has spill slot [%x]", ref-ASMREF_TMP1+1, ir->s);
382 ra_free(as, r); 415 ra_free(as, r);
383 ra_modified(as, r); 416 ra_modified(as, r);
384 ir->r = RID_INIT; 417 ir->r = RID_INIT;
@@ -394,7 +427,7 @@ static Reg ra_restore(ASMState *as, IRRef ref)
394 IRIns *ir = IR(ref); 427 IRIns *ir = IR(ref);
395 int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ 428 int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */
396 Reg r = ir->r; 429 Reg r = ir->r;
397 lua_assert(ra_hasreg(r)); 430 lj_assertA(ra_hasreg(r), "restore of IR %04d has no reg", ref - REF_BIAS);
398 ra_sethint(ir->r, r); /* Keep hint. */ 431 ra_sethint(ir->r, r); /* Keep hint. */
399 ra_free(as, r); 432 ra_free(as, r);
400 if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ 433 if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */
@@ -423,14 +456,15 @@ static Reg ra_evict(ASMState *as, RegSet allow)
423{ 456{
424 IRRef ref; 457 IRRef ref;
425 RegCost cost = ~(RegCost)0; 458 RegCost cost = ~(RegCost)0;
426 lua_assert(allow != RSET_EMPTY); 459 lj_assertA(allow != RSET_EMPTY, "evict from empty set");
427 if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) { 460 if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) {
428 GPRDEF(MINCOST) 461 GPRDEF(MINCOST)
429 } else { 462 } else {
430 FPRDEF(MINCOST) 463 FPRDEF(MINCOST)
431 } 464 }
432 ref = regcost_ref(cost); 465 ref = regcost_ref(cost);
433 lua_assert(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins)); 466 lj_assertA(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins),
467 "evict of out-of-range IR %04d", ref - REF_BIAS);
434 /* Preferably pick any weak ref instead of a non-weak, non-const ref. */ 468 /* Preferably pick any weak ref instead of a non-weak, non-const ref. */
435 if (!irref_isk(ref) && (as->weakset & allow)) { 469 if (!irref_isk(ref) && (as->weakset & allow)) {
436 IRIns *ir = IR(ref); 470 IRIns *ir = IR(ref);
@@ -512,7 +546,7 @@ static void ra_evictk(ASMState *as)
512 546
513#ifdef RID_NUM_KREF 547#ifdef RID_NUM_KREF
514/* Allocate a register for a constant. */ 548/* Allocate a register for a constant. */
515static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) 549static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow)
516{ 550{
517 /* First try to find a register which already holds the same constant. */ 551 /* First try to find a register which already holds the same constant. */
518 RegSet pick, work = ~as->freeset & RSET_GPR; 552 RegSet pick, work = ~as->freeset & RSET_GPR;
@@ -521,9 +555,31 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
521 IRRef ref; 555 IRRef ref;
522 r = rset_pickbot(work); 556 r = rset_pickbot(work);
523 ref = regcost_ref(as->cost[r]); 557 ref = regcost_ref(as->cost[r]);
558#if LJ_64
559 if (ref < ASMREF_L) {
560 if (ra_iskref(ref)) {
561 if (k == ra_krefk(as, ref))
562 return r;
563 } else {
564 IRIns *ir = IR(ref);
565 if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
566#if LJ_GC64
567 (ir->o == IR_KINT && k == ir->i) ||
568 (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
569 ((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
570 k == (intptr_t)ir_kptr(ir))
571#else
572 (ir->o != IR_KINT64 && k == ir->i)
573#endif
574 )
575 return r;
576 }
577 }
578#else
524 if (ref < ASMREF_L && 579 if (ref < ASMREF_L &&
525 k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) 580 k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i))
526 return r; 581 return r;
582#endif
527 rset_clear(work, r); 583 rset_clear(work, r);
528 } 584 }
529 pick = as->freeset & allow; 585 pick = as->freeset & allow;
@@ -543,7 +599,7 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
543} 599}
544 600
545/* Allocate a specific register for a constant. */ 601/* Allocate a specific register for a constant. */
546static void ra_allockreg(ASMState *as, int32_t k, Reg r) 602static void ra_allockreg(ASMState *as, intptr_t k, Reg r)
547{ 603{
548 Reg kr = ra_allock(as, k, RID2RSET(r)); 604 Reg kr = ra_allock(as, k, RID2RSET(r));
549 if (kr != r) { 605 if (kr != r) {
@@ -566,7 +622,8 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow)
566 IRIns *ir = IR(ref); 622 IRIns *ir = IR(ref);
567 RegSet pick = as->freeset & allow; 623 RegSet pick = as->freeset & allow;
568 Reg r; 624 Reg r;
569 lua_assert(ra_noreg(ir->r)); 625 lj_assertA(ra_noreg(ir->r),
626 "IR %04d already has reg %d", ref - REF_BIAS, ir->r);
570 if (pick) { 627 if (pick) {
571 /* First check register hint from propagation or PHI. */ 628 /* First check register hint from propagation or PHI. */
572 if (ra_hashint(ir->r)) { 629 if (ra_hashint(ir->r)) {
@@ -613,15 +670,27 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
613 return r; 670 return r;
614} 671}
615 672
673/* Add a register rename to the IR. */
674static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno)
675{
676 IRRef ren;
677 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno);
678 ren = tref_ref(lj_ir_emit(as->J));
679 as->J->cur.ir[ren].r = (uint8_t)down;
680 as->J->cur.ir[ren].s = SPS_NONE;
681}
682
616/* Rename register allocation and emit move. */ 683/* Rename register allocation and emit move. */
617static void ra_rename(ASMState *as, Reg down, Reg up) 684static void ra_rename(ASMState *as, Reg down, Reg up)
618{ 685{
619 IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); 686 IRRef ref = regcost_ref(as->cost[up] = as->cost[down]);
620 IRIns *ir = IR(ref); 687 IRIns *ir = IR(ref);
621 ir->r = (uint8_t)up; 688 ir->r = (uint8_t)up;
622 as->cost[down] = 0; 689 as->cost[down] = 0;
623 lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR)); 690 lj_assertA((down < RID_MAX_GPR) == (up < RID_MAX_GPR),
624 lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up)); 691 "rename between GPR/FPR %d and %d", down, up);
692 lj_assertA(!rset_test(as->freeset, down), "rename from free reg %d", down);
693 lj_assertA(rset_test(as->freeset, up), "rename to non-free reg %d", up);
625 ra_free(as, down); /* 'down' is free ... */ 694 ra_free(as, down); /* 'down' is free ... */
626 ra_modified(as, down); 695 ra_modified(as, down);
627 rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ 696 rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */
@@ -629,11 +698,14 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
629 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); 698 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up));
630 emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ 699 emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */
631 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ 700 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
632 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); 701 /*
633 ren = tref_ref(lj_ir_emit(as->J)); 702 ** The rename is effective at the subsequent (already emitted) exit
634 as->ir = as->T->ir; /* The IR may have been reallocated. */ 703 ** branch. This is for the current snapshot (as->snapno). Except if we
635 IR(ren)->r = (uint8_t)down; 704 ** haven't yet allocated any refs for the snapshot (as->snapalloc == 1),
636 IR(ren)->s = SPS_NONE; 705 ** then it belongs to the next snapshot.
706 ** See also the discussion at asm_snap_checkrename().
707 */
708 ra_addrename(as, down, ref, as->snapno + as->snapalloc);
637 } 709 }
638} 710}
639 711
@@ -666,7 +738,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r)
666{ 738{
667 Reg dest = ra_dest(as, ir, RID2RSET(r)); 739 Reg dest = ra_dest(as, ir, RID2RSET(r));
668 if (dest != r) { 740 if (dest != r) {
669 lua_assert(rset_test(as->freeset, r)); 741 lj_assertA(rset_test(as->freeset, r), "dest reg %d is not free", r);
670 ra_modified(as, r); 742 ra_modified(as, r);
671 emit_movrr(as, ir, dest, r); 743 emit_movrr(as, ir, dest, r);
672 } 744 }
@@ -683,20 +755,25 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
683 if (ra_noreg(left)) { 755 if (ra_noreg(left)) {
684 if (irref_isk(lref)) { 756 if (irref_isk(lref)) {
685 if (ir->o == IR_KNUM) { 757 if (ir->o == IR_KNUM) {
686 cTValue *tv = ir_knum(ir);
687 /* FP remat needs a load except for +0. Still better than eviction. */ 758 /* FP remat needs a load except for +0. Still better than eviction. */
688 if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { 759 if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) {
689 emit_loadn(as, dest, tv); 760 emit_loadk64(as, dest, ir);
690 return; 761 return;
691 } 762 }
692#if LJ_64 763#if LJ_64
693 } else if (ir->o == IR_KINT64) { 764 } else if (ir->o == IR_KINT64) {
694 emit_loadu64(as, dest, ir_kint64(ir)->u64); 765 emit_loadk64(as, dest, ir);
766 return;
767#if LJ_GC64
768 } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) {
769 emit_loadk64(as, dest, ir);
695 return; 770 return;
696#endif 771#endif
697 } else { 772#endif
698 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 773 } else if (ir->o != IR_KPRI) {
699 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); 774 lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
775 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
776 "K%03d has bad IR op %d", REF_BIAS - lref, ir->o);
700 emit_loadi(as, dest, ir->i); 777 emit_loadi(as, dest, ir->i);
701 return; 778 return;
702 } 779 }
@@ -841,11 +918,14 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref)
841#endif 918#endif
842 { /* Allocate stored values for TNEW, TDUP and CNEW. */ 919 { /* Allocate stored values for TNEW, TDUP and CNEW. */
843 IRIns *irs; 920 IRIns *irs;
844 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW); 921 lj_assertA(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW,
922 "sink of IR %04d has bad op %d", ref - REF_BIAS, ir->o);
845 for (irs = IR(as->snapref-1); irs > ir; irs--) 923 for (irs = IR(as->snapref-1); irs > ir; irs--)
846 if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) { 924 if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) {
847 lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || 925 lj_assertA(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
848 irs->o == IR_FSTORE || irs->o == IR_XSTORE); 926 irs->o == IR_FSTORE || irs->o == IR_XSTORE,
927 "sunk store IR %04d has bad op %d",
928 (int)(irs - as->ir) - REF_BIAS, irs->o);
849 asm_snap_alloc1(as, irs->op2); 929 asm_snap_alloc1(as, irs->op2);
850 if (LJ_32 && (irs+1)->o == IR_HIOP) 930 if (LJ_32 && (irs+1)->o == IR_HIOP)
851 asm_snap_alloc1(as, (irs+1)->op2); 931 asm_snap_alloc1(as, (irs+1)->op2);
@@ -881,9 +961,9 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref)
881} 961}
882 962
883/* Allocate refs escaping to a snapshot. */ 963/* Allocate refs escaping to a snapshot. */
884static void asm_snap_alloc(ASMState *as) 964static void asm_snap_alloc(ASMState *as, int snapno)
885{ 965{
886 SnapShot *snap = &as->T->snap[as->snapno]; 966 SnapShot *snap = &as->T->snap[snapno];
887 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 967 SnapEntry *map = &as->T->snapmap[snap->mapofs];
888 MSize n, nent = snap->nent; 968 MSize n, nent = snap->nent;
889 as->snapfilt1 = as->snapfilt2 = 0; 969 as->snapfilt1 = as->snapfilt2 = 0;
@@ -893,7 +973,9 @@ static void asm_snap_alloc(ASMState *as)
893 if (!irref_isk(ref)) { 973 if (!irref_isk(ref)) {
894 asm_snap_alloc1(as, ref); 974 asm_snap_alloc1(as, ref);
895 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { 975 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
896 lua_assert(irt_type(IR(ref+1)->t) == IRT_SOFTFP); 976 lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP,
977 "snap %d[%d] points to bad SOFTFP IR %04d",
978 snapno, n, ref - REF_BIAS);
897 asm_snap_alloc1(as, ref+1); 979 asm_snap_alloc1(as, ref+1);
898 } 980 }
899 } 981 }
@@ -919,67 +1001,55 @@ static int asm_snap_checkrename(ASMState *as, IRRef ren)
919 return 0; /* Not found. */ 1001 return 0; /* Not found. */
920} 1002}
921 1003
922/* Prepare snapshot for next guard instruction. */ 1004/* Prepare snapshot for next guard or throwing instruction. */
923static void asm_snap_prep(ASMState *as) 1005static void asm_snap_prep(ASMState *as)
924{ 1006{
925 if (as->curins < as->snapref) { 1007 if (as->snapalloc) {
926 do { 1008 /* Alloc on first invocation for each snapshot. */
927 if (as->snapno == 0) return; /* Called by sunk stores before snap #0. */ 1009 as->snapalloc = 0;
928 as->snapno--; 1010 asm_snap_alloc(as, as->snapno);
929 as->snapref = as->T->snap[as->snapno].ref;
930 } while (as->curins < as->snapref);
931 asm_snap_alloc(as);
932 as->snaprename = as->T->nins; 1011 as->snaprename = as->T->nins;
933 } else { 1012 } else {
934 /* Process any renames above the highwater mark. */ 1013 /* Check any renames above the highwater mark. */
935 for (; as->snaprename < as->T->nins; as->snaprename++) { 1014 for (; as->snaprename < as->T->nins; as->snaprename++) {
936 IRIns *ir = IR(as->snaprename); 1015 IRIns *ir = &as->T->ir[as->snaprename];
937 if (asm_snap_checkrename(as, ir->op1)) 1016 if (asm_snap_checkrename(as, ir->op1))
938 ir->op2 = REF_BIAS-1; /* Kill rename. */ 1017 ir->op2 = REF_BIAS-1; /* Kill rename. */
939 } 1018 }
940 } 1019 }
941} 1020}
942 1021
943/* -- Miscellaneous helpers ----------------------------------------------- */ 1022/* Move to previous snapshot when we cross the current snapshot ref. */
944 1023static void asm_snap_prev(ASMState *as)
945/* Collect arguments from CALL* and CARG instructions. */
946static void asm_collectargs(ASMState *as, IRIns *ir,
947 const CCallInfo *ci, IRRef *args)
948{ 1024{
949 uint32_t n = CCI_NARGS(ci); 1025 if (as->curins < as->snapref) {
950 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */ 1026 ptrdiff_t ofs = as->mctoporig - as->mcp;
951 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } 1027 if (ofs >= 0x10000) lj_trace_err(as->J, LJ_TRERR_MCODEOV);
952 while (n-- > 1) { 1028 do {
953 ir = IR(ir->op1); 1029 if (as->snapno == 0) return;
954 lua_assert(ir->o == IR_CARG); 1030 as->snapno--;
955 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; 1031 as->snapref = as->T->snap[as->snapno].ref;
1032 as->T->snap[as->snapno].mcofs = ofs; /* Remember mcode offset. */
1033 } while (as->curins < as->snapref); /* May have no ins inbetween. */
1034 as->snapalloc = 1;
956 } 1035 }
957 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
958 lua_assert(IR(ir->op1)->o != IR_CARG);
959} 1036}
960 1037
961/* Reconstruct CCallInfo flags for CALLX*. */ 1038/* Fixup snapshot mcode offsetst. */
962static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) 1039static void asm_snap_fixup_mcofs(ASMState *as)
963{ 1040{
964 uint32_t nargs = 0; 1041 uint32_t sz = (uint32_t)(as->mctoporig - as->mcp);
965 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ 1042 SnapShot *snap = as->T->snap;
966 IRIns *ira = IR(ir->op1); 1043 SnapNo i;
967 nargs++; 1044 for (i = as->T->nsnap-1; i > 0; i--) {
968 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } 1045 /* Compute offset from mcode start and store in correct snapshot. */
1046 snap[i].mcofs = (uint16_t)(sz - snap[i-1].mcofs);
969 } 1047 }
970#if LJ_HASFFI 1048 snap[0].mcofs = 0;
971 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
972 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
973 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
974 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
975#if LJ_TARGET_X86
976 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
977#endif
978 }
979#endif
980 return (nargs | (ir->t.irt << CCI_OTSHIFT));
981} 1049}
982 1050
1051/* -- Miscellaneous helpers ----------------------------------------------- */
1052
983/* Calculate stack adjustment. */ 1053/* Calculate stack adjustment. */
984static int32_t asm_stack_adjust(ASMState *as) 1054static int32_t asm_stack_adjust(ASMState *as)
985{ 1055{
@@ -989,21 +1059,26 @@ static int32_t asm_stack_adjust(ASMState *as)
989} 1059}
990 1060
991/* Must match with hash*() in lj_tab.c. */ 1061/* Must match with hash*() in lj_tab.c. */
992static uint32_t ir_khash(IRIns *ir) 1062static uint32_t ir_khash(ASMState *as, IRIns *ir)
993{ 1063{
994 uint32_t lo, hi; 1064 uint32_t lo, hi;
1065 UNUSED(as);
995 if (irt_isstr(ir->t)) { 1066 if (irt_isstr(ir->t)) {
996 return ir_kstr(ir)->hash; 1067 return ir_kstr(ir)->sid;
997 } else if (irt_isnum(ir->t)) { 1068 } else if (irt_isnum(ir->t)) {
998 lo = ir_knum(ir)->u32.lo; 1069 lo = ir_knum(ir)->u32.lo;
999 hi = ir_knum(ir)->u32.hi << 1; 1070 hi = ir_knum(ir)->u32.hi << 1;
1000 } else if (irt_ispri(ir->t)) { 1071 } else if (irt_ispri(ir->t)) {
1001 lua_assert(!irt_isnil(ir->t)); 1072 lj_assertA(!irt_isnil(ir->t), "hash of nil key");
1002 return irt_type(ir->t)-IRT_FALSE; 1073 return irt_type(ir->t)-IRT_FALSE;
1003 } else { 1074 } else {
1004 lua_assert(irt_isgcv(ir->t)); 1075 lj_assertA(irt_isgcv(ir->t), "hash of bad IR type %d", irt_type(ir->t));
1005 lo = u32ptr(ir_kgc(ir)); 1076 lo = u32ptr(ir_kgc(ir));
1077#if LJ_GC64
1078 hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15);
1079#else
1006 hi = lo + HASH_BIAS; 1080 hi = lo + HASH_BIAS;
1081#endif
1007 } 1082 }
1008 return hashrot(lo, hi); 1083 return hashrot(lo, hi);
1009} 1084}
@@ -1017,6 +1092,7 @@ static void asm_snew(ASMState *as, IRIns *ir)
1017{ 1092{
1018 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new]; 1093 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
1019 IRRef args[3]; 1094 IRRef args[3];
1095 asm_snap_prep(as);
1020 args[0] = ASMREF_L; /* lua_State *L */ 1096 args[0] = ASMREF_L; /* lua_State *L */
1021 args[1] = ir->op1; /* const char *str */ 1097 args[1] = ir->op1; /* const char *str */
1022 args[2] = ir->op2; /* size_t len */ 1098 args[2] = ir->op2; /* size_t len */
@@ -1029,6 +1105,7 @@ static void asm_tnew(ASMState *as, IRIns *ir)
1029{ 1105{
1030 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1]; 1106 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
1031 IRRef args[2]; 1107 IRRef args[2];
1108 asm_snap_prep(as);
1032 args[0] = ASMREF_L; /* lua_State *L */ 1109 args[0] = ASMREF_L; /* lua_State *L */
1033 args[1] = ASMREF_TMP1; /* uint32_t ahsize */ 1110 args[1] = ASMREF_TMP1; /* uint32_t ahsize */
1034 as->gcsteps++; 1111 as->gcsteps++;
@@ -1041,6 +1118,7 @@ static void asm_tdup(ASMState *as, IRIns *ir)
1041{ 1118{
1042 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup]; 1119 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
1043 IRRef args[2]; 1120 IRRef args[2];
1121 asm_snap_prep(as);
1044 args[0] = ASMREF_L; /* lua_State *L */ 1122 args[0] = ASMREF_L; /* lua_State *L */
1045 args[1] = ir->op1; /* const GCtab *kt */ 1123 args[1] = ir->op1; /* const GCtab *kt */
1046 as->gcsteps++; 1124 as->gcsteps++;
@@ -1064,6 +1142,260 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
1064 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ 1142 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */
1065} 1143}
1066 1144
1145/* -- Buffer operations --------------------------------------------------- */
1146
1147static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode);
1148#if LJ_HASBUFFER
1149static void asm_bufhdr_write(ASMState *as, Reg sb);
1150#endif
1151
1152static void asm_bufhdr(ASMState *as, IRIns *ir)
1153{
1154 Reg sb = ra_dest(as, ir, RSET_GPR);
1155 switch (ir->op2) {
1156 case IRBUFHDR_RESET: {
1157 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
1158 IRIns irbp;
1159 irbp.ot = IRT(0, IRT_PTR); /* Buffer data pointer type. */
1160 emit_storeofs(as, &irbp, tmp, sb, offsetof(SBuf, w));
1161 emit_loadofs(as, &irbp, tmp, sb, offsetof(SBuf, b));
1162 break;
1163 }
1164 case IRBUFHDR_APPEND: {
1165 /* Rematerialize const buffer pointer instead of likely spill. */
1166 IRIns *irp = IR(ir->op1);
1167 if (!(ra_hasreg(irp->r) || irp == ir-1 ||
1168 (irp == ir-2 && !ra_used(ir-1)))) {
1169 while (!(irp->o == IR_BUFHDR && irp->op2 == IRBUFHDR_RESET))
1170 irp = IR(irp->op1);
1171 if (irref_isk(irp->op1)) {
1172 ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
1173 ir = irp;
1174 }
1175 }
1176 break;
1177 }
1178#if LJ_HASBUFFER
1179 case IRBUFHDR_WRITE:
1180 asm_bufhdr_write(as, sb);
1181 break;
1182#endif
1183 default: lj_assertA(0, "bad BUFHDR op2 %d", ir->op2); break;
1184 }
1185#if LJ_TARGET_X86ORX64
1186 ra_left(as, sb, ir->op1);
1187#else
1188 ra_leftov(as, sb, ir->op1);
1189#endif
1190}
1191
1192static void asm_bufput(ASMState *as, IRIns *ir)
1193{
1194 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
1195 IRRef args[3];
1196 IRIns *irs;
1197 int kchar = -129;
1198 args[0] = ir->op1; /* SBuf * */
1199 args[1] = ir->op2; /* GCstr * */
1200 irs = IR(ir->op2);
1201 lj_assertA(irt_isstr(irs->t),
1202 "BUFPUT of non-string IR %04d", ir->op2 - REF_BIAS);
1203 if (irs->o == IR_KGC) {
1204 GCstr *s = ir_kstr(irs);
1205 if (s->len == 1) { /* Optimize put of single-char string constant. */
1206 kchar = (int8_t)strdata(s)[0]; /* Signed! */
1207 args[1] = ASMREF_TMP1; /* int, truncated to char */
1208 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1209 }
1210 } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
1211 if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */
1212 if (irs->op2 == IRTOSTR_NUM) {
1213 args[1] = ASMREF_TMP1; /* TValue * */
1214 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
1215 } else {
1216 lj_assertA(irt_isinteger(IR(irs->op1)->t),
1217 "TOSTR of non-numeric IR %04d", irs->op1);
1218 args[1] = irs->op1; /* int */
1219 if (irs->op2 == IRTOSTR_INT)
1220 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
1221 else
1222 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1223 }
1224 } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */
1225 args[1] = irs->op1; /* const void * */
1226 args[2] = irs->op2; /* MSize */
1227 ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
1228 }
1229 }
1230 asm_setupresult(as, ir, ci); /* SBuf * */
1231 asm_gencall(as, ci, args);
1232 if (args[1] == ASMREF_TMP1) {
1233 Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
1234 if (kchar == -129)
1235 asm_tvptr(as, tmp, irs->op1, IRTMPREF_IN1);
1236 else
1237 ra_allockreg(as, kchar, tmp);
1238 }
1239}
1240
1241static void asm_bufstr(ASMState *as, IRIns *ir)
1242{
1243 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
1244 IRRef args[1];
1245 args[0] = ir->op1; /* SBuf *sb */
1246 as->gcsteps++;
1247 asm_setupresult(as, ir, ci); /* GCstr * */
1248 asm_gencall(as, ci, args);
1249}
1250
1251/* -- Type conversions ---------------------------------------------------- */
1252
1253static void asm_tostr(ASMState *as, IRIns *ir)
1254{
1255 const CCallInfo *ci;
1256 IRRef args[2];
1257 asm_snap_prep(as);
1258 args[0] = ASMREF_L;
1259 as->gcsteps++;
1260 if (ir->op2 == IRTOSTR_NUM) {
1261 args[1] = ASMREF_TMP1; /* cTValue * */
1262 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
1263 } else {
1264 args[1] = ir->op1; /* int32_t k */
1265 if (ir->op2 == IRTOSTR_INT)
1266 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
1267 else
1268 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
1269 }
1270 asm_setupresult(as, ir, ci); /* GCstr * */
1271 asm_gencall(as, ci, args);
1272 if (ir->op2 == IRTOSTR_NUM)
1273 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1, IRTMPREF_IN1);
1274}
1275
1276#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
1277static void asm_conv64(ASMState *as, IRIns *ir)
1278{
1279 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1280 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1281 IRCallID id;
1282 IRRef args[2];
1283 lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP,
1284 "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS);
1285 args[LJ_BE] = (ir-1)->op1;
1286 args[LJ_LE] = ir->op1;
1287 if (st == IRT_NUM || st == IRT_FLOAT) {
1288 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
1289 ir--;
1290 } else {
1291 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
1292 }
1293 {
1294#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1295 CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
1296 cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
1297#else
1298 const CCallInfo *ci = &lj_ir_callinfo[id];
1299#endif
1300 asm_setupresult(as, ir, ci);
1301 asm_gencall(as, ci, args);
1302 }
1303}
1304#endif
1305
1306/* -- Memory references --------------------------------------------------- */
1307
1308static void asm_newref(ASMState *as, IRIns *ir)
1309{
1310 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1311 IRRef args[3];
1312 if (ir->r == RID_SINK)
1313 return;
1314 asm_snap_prep(as);
1315 args[0] = ASMREF_L; /* lua_State *L */
1316 args[1] = ir->op1; /* GCtab *t */
1317 args[2] = ASMREF_TMP1; /* cTValue *key */
1318 asm_setupresult(as, ir, ci); /* TValue * */
1319 asm_gencall(as, ci, args);
1320 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2, IRTMPREF_IN1);
1321}
1322
1323static void asm_tmpref(ASMState *as, IRIns *ir)
1324{
1325 Reg r = ra_dest(as, ir, RSET_GPR);
1326 asm_tvptr(as, r, ir->op1, ir->op2);
1327}
1328
1329static void asm_lref(ASMState *as, IRIns *ir)
1330{
1331 Reg r = ra_dest(as, ir, RSET_GPR);
1332#if LJ_TARGET_X86ORX64
1333 ra_left(as, r, ASMREF_L);
1334#else
1335 ra_leftov(as, r, ASMREF_L);
1336#endif
1337}
1338
1339/* -- Calls --------------------------------------------------------------- */
1340
1341/* Collect arguments from CALL* and CARG instructions. */
1342static void asm_collectargs(ASMState *as, IRIns *ir,
1343 const CCallInfo *ci, IRRef *args)
1344{
1345 uint32_t n = CCI_XNARGS(ci);
1346 /* Account for split args. */
1347 lj_assertA(n <= CCI_NARGS_MAX*2, "too many args %d to collect", n);
1348 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
1349 while (n-- > 1) {
1350 ir = IR(ir->op1);
1351 lj_assertA(ir->o == IR_CARG, "malformed CALL arg tree");
1352 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
1353 }
1354 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
1355 lj_assertA(IR(ir->op1)->o != IR_CARG, "malformed CALL arg tree");
1356}
1357
1358/* Reconstruct CCallInfo flags for CALLX*. */
1359static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
1360{
1361 uint32_t nargs = 0;
1362 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
1363 IRIns *ira = IR(ir->op1);
1364 nargs++;
1365 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
1366 }
1367#if LJ_HASFFI
1368 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
1369 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
1370 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
1371 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
1372#if LJ_TARGET_X86
1373 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
1374#endif
1375 }
1376#endif
1377 return (nargs | (ir->t.irt << CCI_OTSHIFT));
1378}
1379
1380static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
1381{
1382 const CCallInfo *ci = &lj_ir_callinfo[id];
1383 IRRef args[2];
1384 args[0] = ir->op1;
1385 args[1] = ir->op2;
1386 asm_setupresult(as, ir, ci);
1387 asm_gencall(as, ci, args);
1388}
1389
1390static void asm_call(ASMState *as, IRIns *ir)
1391{
1392 IRRef args[CCI_NARGS_MAX];
1393 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1394 asm_collectargs(as, ir, ci, args);
1395 asm_setupresult(as, ir, ci);
1396 asm_gencall(as, ci, args);
1397}
1398
1067/* -- PHI and loop handling ----------------------------------------------- */ 1399/* -- PHI and loop handling ----------------------------------------------- */
1068 1400
1069/* Break a PHI cycle by renaming to a free register (evict if needed). */ 1401/* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1249,12 +1581,7 @@ static void asm_phi_fixup(ASMState *as)
1249 irt_clearmark(ir->t); 1581 irt_clearmark(ir->t);
1250 /* Left PHI gained a spill slot before the loop? */ 1582 /* Left PHI gained a spill slot before the loop? */
1251 if (ra_hasspill(ir->s)) { 1583 if (ra_hasspill(ir->s)) {
1252 IRRef ren; 1584 ra_addrename(as, r, lref, as->loopsnapno);
1253 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
1254 ren = tref_ref(lj_ir_emit(as->J));
1255 as->ir = as->T->ir; /* The IR may have been reallocated. */
1256 IR(ren)->r = (uint8_t)r;
1257 IR(ren)->s = SPS_NONE;
1258 } 1585 }
1259 } 1586 }
1260 rset_clear(work, r); 1587 rset_clear(work, r);
@@ -1329,6 +1656,8 @@ static void asm_loop(ASMState *as)
1329#include "lj_asm_x86.h" 1656#include "lj_asm_x86.h"
1330#elif LJ_TARGET_ARM 1657#elif LJ_TARGET_ARM
1331#include "lj_asm_arm.h" 1658#include "lj_asm_arm.h"
1659#elif LJ_TARGET_ARM64
1660#include "lj_asm_arm64.h"
1332#elif LJ_TARGET_PPC 1661#elif LJ_TARGET_PPC
1333#include "lj_asm_ppc.h" 1662#include "lj_asm_ppc.h"
1334#elif LJ_TARGET_MIPS 1663#elif LJ_TARGET_MIPS
@@ -1337,6 +1666,204 @@ static void asm_loop(ASMState *as)
1337#error "Missing assembler for target CPU" 1666#error "Missing assembler for target CPU"
1338#endif 1667#endif
1339 1668
1669/* -- Common instruction helpers ------------------------------------------ */
1670
1671#if !LJ_SOFTFP32
1672#if !LJ_TARGET_X86ORX64
1673#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1674#define asm_fppowi(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1675#endif
1676
1677static void asm_pow(ASMState *as, IRIns *ir)
1678{
1679#if LJ_64 && LJ_HASFFI
1680 if (!irt_isnum(ir->t))
1681 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1682 IRCALL_lj_carith_powu64);
1683 else
1684#endif
1685 if (irt_isnum(IR(ir->op2)->t))
1686 asm_callid(as, ir, IRCALL_pow);
1687 else
1688 asm_fppowi(as, ir);
1689}
1690
1691static void asm_div(ASMState *as, IRIns *ir)
1692{
1693#if LJ_64 && LJ_HASFFI
1694 if (!irt_isnum(ir->t))
1695 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
1696 IRCALL_lj_carith_divu64);
1697 else
1698#endif
1699 asm_fpdiv(as, ir);
1700}
1701#endif
1702
1703static void asm_mod(ASMState *as, IRIns *ir)
1704{
1705#if LJ_64 && LJ_HASFFI
1706 if (!irt_isint(ir->t))
1707 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
1708 IRCALL_lj_carith_modu64);
1709 else
1710#endif
1711 asm_callid(as, ir, IRCALL_lj_vm_modi);
1712}
1713
1714static void asm_fuseequal(ASMState *as, IRIns *ir)
1715{
1716 /* Fuse HREF + EQ/NE. */
1717 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1718 as->curins--;
1719 asm_href(as, ir-1, (IROp)ir->o);
1720 } else {
1721 asm_equal(as, ir);
1722 }
1723}
1724
1725static void asm_alen(ASMState *as, IRIns *ir)
1726{
1727 asm_callid(as, ir, ir->op2 == REF_NIL ? IRCALL_lj_tab_len :
1728 IRCALL_lj_tab_len_hint);
1729}
1730
1731/* -- Instruction dispatch ------------------------------------------------ */
1732
1733/* Assemble a single instruction. */
1734static void asm_ir(ASMState *as, IRIns *ir)
1735{
1736 switch ((IROp)ir->o) {
1737 /* Miscellaneous ops. */
1738 case IR_LOOP: asm_loop(as); break;
1739 case IR_NOP: case IR_XBAR:
1740 lj_assertA(!ra_used(ir),
1741 "IR %04d not unused", (int)(ir - as->ir) - REF_BIAS);
1742 break;
1743 case IR_USE:
1744 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1745 case IR_PHI: asm_phi(as, ir); break;
1746 case IR_HIOP: asm_hiop(as, ir); break;
1747 case IR_GCSTEP: asm_gcstep(as, ir); break;
1748 case IR_PROF: asm_prof(as, ir); break;
1749
1750 /* Guarded assertions. */
1751 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1752 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1753 case IR_ABC:
1754 asm_comp(as, ir);
1755 break;
1756 case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break;
1757
1758 case IR_RETF: asm_retf(as, ir); break;
1759
1760 /* Bit ops. */
1761 case IR_BNOT: asm_bnot(as, ir); break;
1762 case IR_BSWAP: asm_bswap(as, ir); break;
1763 case IR_BAND: asm_band(as, ir); break;
1764 case IR_BOR: asm_bor(as, ir); break;
1765 case IR_BXOR: asm_bxor(as, ir); break;
1766 case IR_BSHL: asm_bshl(as, ir); break;
1767 case IR_BSHR: asm_bshr(as, ir); break;
1768 case IR_BSAR: asm_bsar(as, ir); break;
1769 case IR_BROL: asm_brol(as, ir); break;
1770 case IR_BROR: asm_bror(as, ir); break;
1771
1772 /* Arithmetic ops. */
1773 case IR_ADD: asm_add(as, ir); break;
1774 case IR_SUB: asm_sub(as, ir); break;
1775 case IR_MUL: asm_mul(as, ir); break;
1776 case IR_MOD: asm_mod(as, ir); break;
1777 case IR_NEG: asm_neg(as, ir); break;
1778#if LJ_SOFTFP32
1779 case IR_DIV: case IR_POW: case IR_ABS:
1780 case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
1781 /* Unused for LJ_SOFTFP32. */
1782 lj_assertA(0, "IR %04d with unused op %d",
1783 (int)(ir - as->ir) - REF_BIAS, ir->o);
1784 break;
1785#else
1786 case IR_DIV: asm_div(as, ir); break;
1787 case IR_POW: asm_pow(as, ir); break;
1788 case IR_ABS: asm_abs(as, ir); break;
1789 case IR_LDEXP: asm_ldexp(as, ir); break;
1790 case IR_FPMATH: asm_fpmath(as, ir); break;
1791 case IR_TOBIT: asm_tobit(as, ir); break;
1792#endif
1793 case IR_MIN: asm_min(as, ir); break;
1794 case IR_MAX: asm_max(as, ir); break;
1795
1796 /* Overflow-checking arithmetic ops. */
1797 case IR_ADDOV: asm_addov(as, ir); break;
1798 case IR_SUBOV: asm_subov(as, ir); break;
1799 case IR_MULOV: asm_mulov(as, ir); break;
1800
1801 /* Memory references. */
1802 case IR_AREF: asm_aref(as, ir); break;
1803 case IR_HREF: asm_href(as, ir, 0); break;
1804 case IR_HREFK: asm_hrefk(as, ir); break;
1805 case IR_NEWREF: asm_newref(as, ir); break;
1806 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1807 case IR_FREF: asm_fref(as, ir); break;
1808 case IR_TMPREF: asm_tmpref(as, ir); break;
1809 case IR_STRREF: asm_strref(as, ir); break;
1810 case IR_LREF: asm_lref(as, ir); break;
1811
1812 /* Loads and stores. */
1813 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1814 asm_ahuvload(as, ir);
1815 break;
1816 case IR_FLOAD: asm_fload(as, ir); break;
1817 case IR_XLOAD: asm_xload(as, ir); break;
1818 case IR_SLOAD: asm_sload(as, ir); break;
1819 case IR_ALEN: asm_alen(as, ir); break;
1820
1821 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1822 case IR_FSTORE: asm_fstore(as, ir); break;
1823 case IR_XSTORE: asm_xstore(as, ir); break;
1824
1825 /* Allocations. */
1826 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1827 case IR_TNEW: asm_tnew(as, ir); break;
1828 case IR_TDUP: asm_tdup(as, ir); break;
1829 case IR_CNEW: case IR_CNEWI:
1830#if LJ_HASFFI
1831 asm_cnew(as, ir);
1832#else
1833 lj_assertA(0, "IR %04d with unused op %d",
1834 (int)(ir - as->ir) - REF_BIAS, ir->o);
1835#endif
1836 break;
1837
1838 /* Buffer operations. */
1839 case IR_BUFHDR: asm_bufhdr(as, ir); break;
1840 case IR_BUFPUT: asm_bufput(as, ir); break;
1841 case IR_BUFSTR: asm_bufstr(as, ir); break;
1842
1843 /* Write barriers. */
1844 case IR_TBAR: asm_tbar(as, ir); break;
1845 case IR_OBAR: asm_obar(as, ir); break;
1846
1847 /* Type conversions. */
1848 case IR_CONV: asm_conv(as, ir); break;
1849 case IR_TOSTR: asm_tostr(as, ir); break;
1850 case IR_STRTO: asm_strto(as, ir); break;
1851
1852 /* Calls. */
1853 case IR_CALLA:
1854 as->gcsteps++;
1855 /* fallthrough */
1856 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1857 case IR_CALLXS: asm_callx(as, ir); break;
1858 case IR_CARG: break;
1859
1860 default:
1861 setintV(&as->J->errinfo, ir->o);
1862 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1863 break;
1864 }
1865}
1866
1340/* -- Head of trace ------------------------------------------------------- */ 1867/* -- Head of trace ------------------------------------------------------- */
1341 1868
1342/* Head of a root trace. */ 1869/* Head of a root trace. */
@@ -1373,8 +1900,7 @@ static void asm_head_side(ASMState *as)
1373 1900
1374 if (as->snapno && as->topslot > as->parent->topslot) { 1901 if (as->snapno && as->topslot > as->parent->topslot) {
1375 /* Force snap #0 alloc to prevent register overwrite in stack check. */ 1902 /* Force snap #0 alloc to prevent register overwrite in stack check. */
1376 as->snapno = 0; 1903 asm_snap_alloc(as, 0);
1377 asm_snap_alloc(as);
1378 } 1904 }
1379 allow = asm_head_side_base(as, irp, allow); 1905 allow = asm_head_side_base(as, irp, allow);
1380 1906
@@ -1382,8 +1908,10 @@ static void asm_head_side(ASMState *as)
1382 for (i = as->stopins; i > REF_BASE; i--) { 1908 for (i = as->stopins; i > REF_BASE; i--) {
1383 IRIns *ir = IR(i); 1909 IRIns *ir = IR(i);
1384 RegSP rs; 1910 RegSP rs;
1385 lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || 1911 lj_assertA((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
1386 (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL); 1912 (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL,
1913 "IR %04d has bad parent op %d",
1914 (int)(ir - as->ir) - REF_BIAS, ir->o);
1387 rs = as->parentmap[i - REF_FIRST]; 1915 rs = as->parentmap[i - REF_FIRST];
1388 if (ra_hasreg(ir->r)) { 1916 if (ra_hasreg(ir->r)) {
1389 rset_clear(allow, ir->r); 1917 rset_clear(allow, ir->r);
@@ -1535,7 +2063,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
1535 SnapEntry sn = map[n-1]; 2063 SnapEntry sn = map[n-1];
1536 if ((sn & SNAP_FRAME)) { 2064 if ((sn & SNAP_FRAME)) {
1537 *gotframe = 1; 2065 *gotframe = 1;
1538 return snap_slot(sn); 2066 return snap_slot(sn) - LJ_FR2;
1539 } 2067 }
1540 } 2068 }
1541 return 0; 2069 return 0;
@@ -1555,19 +2083,23 @@ static void asm_tail_link(ASMState *as)
1555 2083
1556 if (as->T->link == 0) { 2084 if (as->T->link == 0) {
1557 /* Setup fixed registers for exit to interpreter. */ 2085 /* Setup fixed registers for exit to interpreter. */
1558 const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); 2086 const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]);
1559 int32_t mres; 2087 int32_t mres;
1560 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ 2088 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */
1561 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; 2089 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
1562 if (bc_isret(bc_op(*retpc))) 2090 if (bc_isret(bc_op(*retpc)))
1563 pc = retpc; 2091 pc = retpc;
1564 } 2092 }
2093#if LJ_GC64
2094 emit_loadu64(as, RID_LPC, u64ptr(pc));
2095#else
1565 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); 2096 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
1566 ra_allockreg(as, i32ptr(pc), RID_LPC); 2097 ra_allockreg(as, i32ptr(pc), RID_LPC);
1567 mres = (int32_t)(snap->nslots - baseslot); 2098#endif
2099 mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
1568 switch (bc_op(*pc)) { 2100 switch (bc_op(*pc)) {
1569 case BC_CALLM: case BC_CALLMT: 2101 case BC_CALLM: case BC_CALLMT:
1570 mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break; 2102 mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
1571 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; 2103 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
1572 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; 2104 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
1573 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; 2105 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
@@ -1579,6 +2111,11 @@ static void asm_tail_link(ASMState *as)
1579 } 2111 }
1580 emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); 2112 emit_addptr(as, RID_BASE, 8*(int32_t)baseslot);
1581 2113
2114 if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */
2115 setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal));
2116 IR(as->J->ktrace)->o = IR_KGC;
2117 }
2118
1582 /* Sync the interpreter state with the on-trace state. */ 2119 /* Sync the interpreter state with the on-trace state. */
1583 asm_stack_restore(as, snap); 2120 asm_stack_restore(as, snap);
1584 2121
@@ -1602,22 +2139,32 @@ static void asm_setup_regsp(ASMState *as)
1602#endif 2139#endif
1603 2140
1604 ra_setup(as); 2141 ra_setup(as);
2142#if LJ_TARGET_ARM64
2143 ra_setkref(as, RID_GL, (intptr_t)J2G(as->J));
2144#endif
1605 2145
1606 /* Clear reg/sp for constants. */ 2146 /* Clear reg/sp for constants. */
1607 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) 2147 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
1608 ir->prev = REGSP_INIT; 2148 ir->prev = REGSP_INIT;
2149 if (irt_is64(ir->t) && ir->o != IR_KNULL) {
2150#if LJ_GC64
2151 /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
2152 ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */
2153#else
2154 /* Make life easier for backends by putting address of constant in i. */
2155 ir->i = (int32_t)(intptr_t)(ir+1);
2156#endif
2157 ir++;
2158 }
2159 }
1609 2160
1610 /* REF_BASE is used for implicit references to the BASE register. */ 2161 /* REF_BASE is used for implicit references to the BASE register. */
1611 lastir->prev = REGSP_HINT(RID_BASE); 2162 lastir->prev = REGSP_HINT(RID_BASE);
1612 2163
1613 ir = IR(nins-1);
1614 if (ir->o == IR_RENAME) {
1615 do { ir--; nins--; } while (ir->o == IR_RENAME);
1616 T->nins = nins; /* Remove any renames left over from ASM restart. */
1617 }
1618 as->snaprename = nins; 2164 as->snaprename = nins;
1619 as->snapref = nins; 2165 as->snapref = nins;
1620 as->snapno = T->nsnap; 2166 as->snapno = T->nsnap;
2167 as->snapalloc = 0;
1621 2168
1622 as->stopins = REF_BASE; 2169 as->stopins = REF_BASE;
1623 as->orignins = nins; 2170 as->orignins = nins;
@@ -1627,7 +2174,7 @@ static void asm_setup_regsp(ASMState *as)
1627 ir = IR(REF_FIRST); 2174 ir = IR(REF_FIRST);
1628 if (as->parent) { 2175 if (as->parent) {
1629 uint16_t *p; 2176 uint16_t *p;
1630 lastir = lj_snap_regspmap(as->parent, as->J->exitno, ir); 2177 lastir = lj_snap_regspmap(as->J, as->parent, as->J->exitno, ir);
1631 if (lastir - ir > LJ_MAX_JSLOTS) 2178 if (lastir - ir > LJ_MAX_JSLOTS)
1632 lj_trace_err(as->J, LJ_TRERR_NYICOAL); 2179 lj_trace_err(as->J, LJ_TRERR_NYICOAL);
1633 as->stopins = (IRRef)((lastir-1) - as->ir); 2180 as->stopins = (IRRef)((lastir-1) - as->ir);
@@ -1666,6 +2213,10 @@ static void asm_setup_regsp(ASMState *as)
1666 ir->prev = (uint16_t)REGSP_HINT((rload & 15)); 2213 ir->prev = (uint16_t)REGSP_HINT((rload & 15));
1667 rload = lj_ror(rload, 4); 2214 rload = lj_ror(rload, 4);
1668 continue; 2215 continue;
2216 case IR_TMPREF:
2217 if ((ir->op2 & IRTMPREF_OUT2) && as->evenspill < 4)
2218 as->evenspill = 4; /* TMPREF OUT2 needs two TValues on the stack. */
2219 break;
1669#endif 2220#endif
1670 case IR_CALLXS: { 2221 case IR_CALLXS: {
1671 CCallInfo ci; 2222 CCallInfo ci;
@@ -1675,7 +2226,7 @@ static void asm_setup_regsp(ASMState *as)
1675 as->modset |= RSET_SCRATCH; 2226 as->modset |= RSET_SCRATCH;
1676 continue; 2227 continue;
1677 } 2228 }
1678 case IR_CALLN: case IR_CALLL: case IR_CALLS: { 2229 case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
1679 const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; 2230 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1680 ir->prev = asm_setup_call_slots(as, ir, ci); 2231 ir->prev = asm_setup_call_slots(as, ir, ci);
1681 if (inloop) 2232 if (inloop)
@@ -1700,8 +2251,8 @@ static void asm_setup_regsp(ASMState *as)
1700 ir->prev = REGSP_HINT(RID_FPRET); 2251 ir->prev = REGSP_HINT(RID_FPRET);
1701 continue; 2252 continue;
1702 } 2253 }
1703 /* fallthrough */
1704#endif 2254#endif
2255 /* fallthrough */
1705 case IR_CALLN: case IR_CALLXS: 2256 case IR_CALLN: case IR_CALLXS:
1706#if LJ_SOFTFP 2257#if LJ_SOFTFP
1707 case IR_MIN: case IR_MAX: 2258 case IR_MIN: case IR_MAX:
@@ -1720,11 +2271,23 @@ static void asm_setup_regsp(ASMState *as)
1720#endif 2271#endif
1721 /* fallthrough */ 2272 /* fallthrough */
1722 /* C calls evict all scratch regs and return results in RID_RET. */ 2273 /* C calls evict all scratch regs and return results in RID_RET. */
1723 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: 2274 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
1724 if (REGARG_NUMGPR < 3 && as->evenspill < 3) 2275 if (REGARG_NUMGPR < 3 && as->evenspill < 3)
1725 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ 2276 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */
2277#if LJ_TARGET_X86 && LJ_HASFFI
2278 if (0) {
2279 case IR_CNEW:
2280 if (ir->op2 != REF_NIL && as->evenspill < 4)
2281 as->evenspill = 4; /* lj_cdata_newv needs 4 args. */
2282 }
2283 /* fallthrough */
2284#else
1726 /* fallthrough */ 2285 /* fallthrough */
1727 case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: 2286 case IR_CNEW:
2287#endif
2288 /* fallthrough */
2289 case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
2290 case IR_BUFSTR:
1728 ir->prev = REGSP_HINT(RID_RET); 2291 ir->prev = REGSP_HINT(RID_RET);
1729 if (inloop) 2292 if (inloop)
1730 as->modset = RSET_SCRATCH; 2293 as->modset = RSET_SCRATCH;
@@ -1733,58 +2296,73 @@ static void asm_setup_regsp(ASMState *as)
1733 if (inloop) 2296 if (inloop)
1734 as->modset = RSET_SCRATCH; 2297 as->modset = RSET_SCRATCH;
1735 break; 2298 break;
1736#if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP 2299#if !LJ_SOFTFP
1737 case IR_ATAN2: case IR_LDEXP: 2300#if !LJ_TARGET_X86ORX64
2301 case IR_LDEXP:
2302#endif
1738#endif 2303#endif
2304 /* fallthrough */
1739 case IR_POW: 2305 case IR_POW:
1740 if (!LJ_SOFTFP && irt_isnum(ir->t)) { 2306 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1741#if LJ_TARGET_X86ORX64
1742 ir->prev = REGSP_HINT(RID_XMM0);
1743 if (inloop) 2307 if (inloop)
1744 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); 2308 as->modset |= RSET_SCRATCH;
2309#if LJ_TARGET_X86
2310 if (irt_isnum(IR(ir->op2)->t)) {
2311 if (as->evenspill < 4) /* Leave room to call pow(). */
2312 as->evenspill = 4;
2313 }
2314 break;
1745#else 2315#else
1746 ir->prev = REGSP_HINT(RID_FPRET); 2316 ir->prev = REGSP_HINT(RID_FPRET);
1747 if (inloop)
1748 as->modset |= RSET_SCRATCH;
1749#endif
1750 continue; 2317 continue;
2318#endif
1751 } 2319 }
1752 /* fallthrough */ /* for integer POW */ 2320 /* fallthrough */ /* for integer POW */
1753 case IR_DIV: case IR_MOD: 2321 case IR_DIV: case IR_MOD:
1754 if (!irt_isnum(ir->t)) { 2322 if ((LJ_64 && LJ_SOFTFP) || !irt_isnum(ir->t)) {
1755 ir->prev = REGSP_HINT(RID_RET); 2323 ir->prev = REGSP_HINT(RID_RET);
1756 if (inloop) 2324 if (inloop)
1757 as->modset |= (RSET_SCRATCH & RSET_GPR); 2325 as->modset |= (RSET_SCRATCH & RSET_GPR);
1758 continue; 2326 continue;
1759 } 2327 }
1760 break; 2328 break;
1761 case IR_FPMATH: 2329#if LJ_64 && LJ_SOFTFP
1762#if LJ_TARGET_X86ORX64 2330 case IR_ADD: case IR_SUB: case IR_MUL:
1763 if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ 2331 if (irt_isnum(ir->t)) {
1764 ir->prev = REGSP_HINT(RID_XMM0); 2332 ir->prev = REGSP_HINT(RID_RET);
1765#if !LJ_64
1766 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */
1767 as->evenspill = 4;
1768#endif
1769 if (inloop)
1770 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
1771 continue;
1772 } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
1773 ir->prev = REGSP_HINT(RID_XMM0);
1774 if (inloop) 2333 if (inloop)
1775 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); 2334 as->modset |= (RSET_SCRATCH & RSET_GPR);
1776 continue; 2335 continue;
1777 } 2336 }
1778 break; 2337 break;
1779#else 2338#endif
1780 ir->prev = REGSP_HINT(RID_FPRET); 2339 case IR_FPMATH:
2340#if LJ_TARGET_X86ORX64
2341 if (ir->op2 <= IRFPM_TRUNC) {
2342 if (!(as->flags & JIT_F_SSE4_1)) {
2343 ir->prev = REGSP_HINT(RID_XMM0);
2344 if (inloop)
2345 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
2346 continue;
2347 }
2348 break;
2349 }
2350#endif
1781 if (inloop) 2351 if (inloop)
1782 as->modset |= RSET_SCRATCH; 2352 as->modset |= RSET_SCRATCH;
2353#if LJ_TARGET_X86
2354 break;
2355#else
2356 ir->prev = REGSP_HINT(RID_FPRET);
1783 continue; 2357 continue;
1784#endif 2358#endif
1785#if LJ_TARGET_X86ORX64 2359#if LJ_TARGET_X86ORX64
1786 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ 2360 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */
1787 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: 2361 case IR_BSHL: case IR_BSHR: case IR_BSAR:
2362 if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */
2363 break;
2364 /* fallthrough */
2365 case IR_BROL: case IR_BROR:
1788 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { 2366 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
1789 IR(ir->op2)->r = REGSP_HINT(RID_ECX); 2367 IR(ir->op2)->r = REGSP_HINT(RID_ECX);
1790 if (inloop) 2368 if (inloop)
@@ -1828,16 +2406,26 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1828{ 2406{
1829 ASMState as_; 2407 ASMState as_;
1830 ASMState *as = &as_; 2408 ASMState *as = &as_;
1831 MCode *origtop; 2409
2410 /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
2411 {
2412 IRRef nins = T->nins;
2413 IRIns *ir = &T->ir[nins-1];
2414 if (ir->o == IR_NOP || ir->o == IR_RENAME) {
2415 do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME);
2416 T->nins = nins;
2417 }
2418 }
1832 2419
1833 /* Ensure an initialized instruction beyond the last one for HIOP checks. */ 2420 /* Ensure an initialized instruction beyond the last one for HIOP checks. */
1834 J->cur.nins = lj_ir_nextins(J); 2421 /* This also allows one RENAME to be added without reallocating curfinal. */
1835 lj_ir_nop(&J->cur.ir[J->cur.nins]); 2422 as->orignins = lj_ir_nextins(J);
2423 lj_ir_nop(&J->cur.ir[as->orignins]);
1836 2424
1837 /* Setup initial state. Copy some fields to reduce indirections. */ 2425 /* Setup initial state. Copy some fields to reduce indirections. */
1838 as->J = J; 2426 as->J = J;
1839 as->T = T; 2427 as->T = T;
1840 as->ir = T->ir; 2428 J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */
1841 as->flags = J->flags; 2429 as->flags = J->flags;
1842 as->loopref = J->loopref; 2430 as->loopref = J->loopref;
1843 as->realign = NULL; 2431 as->realign = NULL;
@@ -1845,17 +2433,46 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1845 as->parent = J->parent ? traceref(J, J->parent) : NULL; 2433 as->parent = J->parent ? traceref(J, J->parent) : NULL;
1846 2434
1847 /* Reserve MCode memory. */ 2435 /* Reserve MCode memory. */
1848 as->mctop = origtop = lj_mcode_reserve(J, &as->mcbot); 2436 as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot);
1849 as->mcp = as->mctop; 2437 as->mcp = as->mctop;
1850 as->mclim = as->mcbot + MCLIM_REDZONE; 2438 as->mclim = as->mcbot + MCLIM_REDZONE;
1851 asm_setup_target(as); 2439 asm_setup_target(as);
1852 2440
1853 do { 2441 /*
2442 ** This is a loop, because the MCode may have to be (re-)assembled
2443 ** multiple times:
2444 **
2445 ** 1. as->realign is set (and the assembly aborted), if the arch-specific
2446 ** backend wants the MCode to be aligned differently.
2447 **
2448 ** This is currently only the case on x86/x64, where small loops get
2449 ** an aligned loop body plus a short branch. Not much effort is wasted,
2450 ** because the abort happens very quickly and only once.
2451 **
2452 ** 2. The IR is immovable, since the MCode embeds pointers to various
2453 ** constants inside the IR. But RENAMEs may need to be added to the IR
2454 ** during assembly, which might grow and reallocate the IR. We check
2455 ** at the end if the IR (in J->cur.ir) has actually grown, resize the
2456 ** copy (in J->curfinal.ir) and try again.
2457 **
2458 ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have
2459 ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to
2460 ** always have one spare slot in the IR (see above), which means we
2461 ** have to redo the assembly for only ~2% of all traces.
2462 **
2463 ** Very, very rarely, this needs to be done repeatedly, since the
2464 ** location of constants inside the IR (actually, reachability from
2465 ** a global pointer) may affect register allocation and thus the
2466 ** number of RENAMEs.
2467 */
2468 for (;;) {
1854 as->mcp = as->mctop; 2469 as->mcp = as->mctop;
1855#ifdef LUA_USE_ASSERT 2470#ifdef LUA_USE_ASSERT
1856 as->mcp_prev = as->mcp; 2471 as->mcp_prev = as->mcp;
1857#endif 2472#endif
1858 as->curins = T->nins; 2473 as->ir = J->curfinal->ir; /* Use the copied IR. */
2474 as->curins = J->cur.nins = as->orignins;
2475
1859 RA_DBG_START(); 2476 RA_DBG_START();
1860 RA_DBGX((as, "===== STOP =====")); 2477 RA_DBGX((as, "===== STOP ====="));
1861 2478
@@ -1874,7 +2491,11 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1874 /* Assemble a trace in linear backwards order. */ 2491 /* Assemble a trace in linear backwards order. */
1875 for (as->curins--; as->curins > as->stopins; as->curins--) { 2492 for (as->curins--; as->curins > as->stopins; as->curins--) {
1876 IRIns *ir = IR(as->curins); 2493 IRIns *ir = IR(as->curins);
1877 lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ 2494 /* 64 bit types handled by SPLIT for 32 bit archs. */
2495 lj_assertA(!(LJ_32 && irt_isint64(ir->t)),
2496 "IR %04d has unsplit 64 bit type",
2497 (int)(ir - as->ir) - REF_BIAS);
2498 asm_snap_prev(as);
1878 if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) 2499 if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE))
1879 continue; /* Dead-code elimination can be soooo easy. */ 2500 continue; /* Dead-code elimination can be soooo easy. */
1880 if (irt_isguard(ir->t)) 2501 if (irt_isguard(ir->t))
@@ -1883,22 +2504,43 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1883 checkmclim(as); 2504 checkmclim(as);
1884 asm_ir(as, ir); 2505 asm_ir(as, ir);
1885 } 2506 }
1886 } while (as->realign); /* Retry in case the MCode needs to be realigned. */
1887 2507
1888 /* Emit head of trace. */ 2508 if (as->realign && J->curfinal->nins >= T->nins)
1889 RA_DBG_REF(); 2509 continue; /* Retry in case only the MCode needs to be realigned. */
1890 checkmclim(as); 2510
1891 if (as->gcsteps > 0) { 2511 /* Emit head of trace. */
1892 as->curins = as->T->snap[0].ref; 2512 RA_DBG_REF();
1893 asm_snap_prep(as); /* The GC check is a guard. */ 2513 checkmclim(as);
1894 asm_gc_check(as); 2514 if (as->gcsteps > 0) {
2515 as->curins = as->T->snap[0].ref;
2516 asm_snap_prep(as); /* The GC check is a guard. */
2517 asm_gc_check(as);
2518 as->curins = as->stopins;
2519 }
2520 ra_evictk(as);
2521 if (as->parent)
2522 asm_head_side(as);
2523 else
2524 asm_head_root(as);
2525 asm_phi_fixup(as);
2526
2527 if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */
2528 lj_assertA(J->curfinal->nk == T->nk, "unexpected IR constant growth");
2529 memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
2530 (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */
2531 T->nins = J->curfinal->nins;
2532 /* Fill mcofs of any unprocessed snapshots. */
2533 as->curins = REF_FIRST;
2534 asm_snap_prev(as);
2535 break; /* Done. */
2536 }
2537
2538 /* Otherwise try again with a bigger IR. */
2539 lj_trace_free(J2G(J), J->curfinal);
2540 J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */
2541 J->curfinal = lj_trace_alloc(J->L, T);
2542 as->realign = NULL;
1895 } 2543 }
1896 ra_evictk(as);
1897 if (as->parent)
1898 asm_head_side(as);
1899 else
1900 asm_head_root(as);
1901 asm_phi_fixup(as);
1902 2544
1903 RA_DBGX((as, "===== START ====")); 2545 RA_DBGX((as, "===== START ===="));
1904 RA_DBG_FLUSH(); 2546 RA_DBG_FLUSH();
@@ -1911,7 +2553,11 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1911 if (!as->loopref) 2553 if (!as->loopref)
1912 asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ 2554 asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */
1913 T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); 2555 T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
1914 lj_mcode_sync(T->mcode, origtop); 2556 asm_snap_fixup_mcofs(as);
2557#if LJ_TARGET_MCODE_FIXUP
2558 asm_mcode_fixup(T->mcode, T->szmcode);
2559#endif
2560 lj_mcode_sync(T->mcode, as->mctoporig);
1915} 2561}
1916 2562
1917#undef IR 2563#undef IR
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 68327c38..96703d7d 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -41,7 +41,7 @@ static Reg ra_scratchpair(ASMState *as, RegSet allow)
41 } 41 }
42 } 42 }
43 } 43 }
44 lua_assert(rset_test(RSET_GPREVEN, r)); 44 lj_assertA(rset_test(RSET_GPREVEN, r), "odd reg %d", r);
45 ra_modified(as, r); 45 ra_modified(as, r);
46 ra_modified(as, r+1); 46 ra_modified(as, r+1);
47 RA_DBGX((as, "scratchpair $r $r", r, r+1)); 47 RA_DBGX((as, "scratchpair $r $r", r, r+1));
@@ -185,6 +185,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
185 *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */ 185 *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */
186 return ra_allock(as, (ofs & ~255), allow); 186 return ra_allock(as, (ofs & ~255), allow);
187 } 187 }
188 } else if (ir->o == IR_TMPREF) {
189 *ofsp = 0;
190 return RID_SP;
188 } 191 }
189 } 192 }
190 *ofsp = 0; 193 *ofsp = 0;
@@ -269,7 +272,7 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref,
269 return; 272 return;
270 } 273 }
271 } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) { 274 } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) {
272 lua_assert(ofs == 0); 275 lj_assertA(ofs == 0, "bad usage");
273 ofs = (int32_t)sizeof(GCstr); 276 ofs = (int32_t)sizeof(GCstr);
274 if (irref_isk(ir->op2)) { 277 if (irref_isk(ir->op2)) {
275 ofs += IR(ir->op2)->i; 278 ofs += IR(ir->op2)->i;
@@ -338,7 +341,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
338/* Generate a call to a C function. */ 341/* Generate a call to a C function. */
339static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 342static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
340{ 343{
341 uint32_t n, nargs = CCI_NARGS(ci); 344 uint32_t n, nargs = CCI_XNARGS(ci);
342 int32_t ofs = 0; 345 int32_t ofs = 0;
343#if LJ_SOFTFP 346#if LJ_SOFTFP
344 Reg gpr = REGARG_FIRSTGPR; 347 Reg gpr = REGARG_FIRSTGPR;
@@ -389,9 +392,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
389 as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); 392 as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1));
390 if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u; 393 if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u;
391 if (gpr <= REGARG_LASTGPR) { 394 if (gpr <= REGARG_LASTGPR) {
392 lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ 395 lj_assertA(rset_test(as->freeset, gpr),
396 "reg %d not free", gpr); /* Must have been evicted. */
393 if (irt_isnum(ir->t)) { 397 if (irt_isnum(ir->t)) {
394 lua_assert(rset_test(as->freeset, gpr+1)); /* Ditto. */ 398 lj_assertA(rset_test(as->freeset, gpr+1),
399 "reg %d not free", gpr+1); /* Ditto. */
395 emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15)); 400 emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15));
396 gpr += 2; 401 gpr += 2;
397 } else { 402 } else {
@@ -408,7 +413,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
408#endif 413#endif
409 { 414 {
410 if (gpr <= REGARG_LASTGPR) { 415 if (gpr <= REGARG_LASTGPR) {
411 lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ 416 lj_assertA(rset_test(as->freeset, gpr),
417 "reg %d not free", gpr); /* Must have been evicted. */
412 if (ref) ra_leftov(as, gpr, ref); 418 if (ref) ra_leftov(as, gpr, ref);
413 gpr++; 419 gpr++;
414 } else { 420 } else {
@@ -433,7 +439,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
433 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ 439 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
434 ra_evictset(as, drop); /* Evictions must be performed first. */ 440 ra_evictset(as, drop); /* Evictions must be performed first. */
435 if (ra_used(ir)) { 441 if (ra_used(ir)) {
436 lua_assert(!irt_ispri(ir->t)); 442 lj_assertA(!irt_ispri(ir->t), "PRI dest");
437 if (!LJ_SOFTFP && irt_isfp(ir->t)) { 443 if (!LJ_SOFTFP && irt_isfp(ir->t)) {
438 if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) { 444 if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) {
439 Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); 445 Reg dest = (ra_dest(as, ir, RSET_FPR) & 15);
@@ -453,15 +459,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
453 UNUSED(ci); 459 UNUSED(ci);
454} 460}
455 461
456static void asm_call(ASMState *as, IRIns *ir)
457{
458 IRRef args[CCI_NARGS_MAX];
459 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
460 asm_collectargs(as, ir, ci, args);
461 asm_setupresult(as, ir, ci);
462 asm_gencall(as, ci, args);
463}
464
465static void asm_callx(ASMState *as, IRIns *ir) 462static void asm_callx(ASMState *as, IRIns *ir)
466{ 463{
467 IRRef args[CCI_NARGS_MAX*2]; 464 IRRef args[CCI_NARGS_MAX*2];
@@ -490,7 +487,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
490{ 487{
491 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 488 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
492 void *pc = ir_kptr(IR(ir->op2)); 489 void *pc = ir_kptr(IR(ir->op2));
493 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 490 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
494 as->topslot -= (BCReg)delta; 491 as->topslot -= (BCReg)delta;
495 if ((int32_t)as->topslot < 0) as->topslot = 0; 492 if ((int32_t)as->topslot < 0) as->topslot = 0;
496 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 493 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -504,6 +501,30 @@ static void asm_retf(ASMState *as, IRIns *ir)
504 emit_lso(as, ARMI_LDR, RID_TMP, base, -4); 501 emit_lso(as, ARMI_LDR, RID_TMP, base, -4);
505} 502}
506 503
504/* -- Buffer operations --------------------------------------------------- */
505
506#if LJ_HASBUFFER
507static void asm_bufhdr_write(ASMState *as, Reg sb)
508{
509 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
510 IRIns irgc;
511 int32_t addr = i32ptr((void *)&J2G(as->J)->cur_L);
512 irgc.ot = IRT(0, IRT_PGC); /* GC type. */
513 emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
514 if ((as->flags & JIT_F_ARMV6T2)) {
515 emit_dnm(as, ARMI_BFI, RID_TMP, lj_fls(SBUF_MASK_FLAG), tmp);
516 } else {
517 emit_dnm(as, ARMI_ORR, RID_TMP, RID_TMP, tmp);
518 emit_dn(as, ARMI_AND|ARMI_K12|SBUF_MASK_FLAG, tmp, tmp);
519 }
520 emit_lso(as, ARMI_LDR, RID_TMP,
521 ra_allock(as, (addr & ~4095),
522 rset_exclude(rset_exclude(RSET_GPR, sb), tmp)),
523 (addr & 4095));
524 emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
525}
526#endif
527
507/* -- Type conversions ---------------------------------------------------- */ 528/* -- Type conversions ---------------------------------------------------- */
508 529
509#if !LJ_SOFTFP 530#if !LJ_SOFTFP
@@ -539,13 +560,17 @@ static void asm_conv(ASMState *as, IRIns *ir)
539#endif 560#endif
540 IRRef lref = ir->op1; 561 IRRef lref = ir->op1;
541 /* 64 bit integer conversions are handled by SPLIT. */ 562 /* 64 bit integer conversions are handled by SPLIT. */
542 lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64)); 563 lj_assertA(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64),
564 "IR %04d has unsplit 64 bit type",
565 (int)(ir - as->ir) - REF_BIAS);
543#if LJ_SOFTFP 566#if LJ_SOFTFP
544 /* FP conversions are handled by SPLIT. */ 567 /* FP conversions are handled by SPLIT. */
545 lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); 568 lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT),
569 "IR %04d has FP type",
570 (int)(ir - as->ir) - REF_BIAS);
546 /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ 571 /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
547#else 572#else
548 lua_assert(irt_type(ir->t) != st); 573 lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
549 if (irt_isfp(ir->t)) { 574 if (irt_isfp(ir->t)) {
550 Reg dest = ra_dest(as, ir, RSET_FPR); 575 Reg dest = ra_dest(as, ir, RSET_FPR);
551 if (stfp) { /* FP to FP conversion. */ 576 if (stfp) { /* FP to FP conversion. */
@@ -562,7 +587,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
562 } else if (stfp) { /* FP to integer conversion. */ 587 } else if (stfp) { /* FP to integer conversion. */
563 if (irt_isguard(ir->t)) { 588 if (irt_isguard(ir->t)) {
564 /* Checked conversions are only supported from number to int. */ 589 /* Checked conversions are only supported from number to int. */
565 lua_assert(irt_isint(ir->t) && st == IRT_NUM); 590 lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
591 "bad type for checked CONV");
566 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 592 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
567 } else { 593 } else {
568 Reg left = ra_alloc1(as, lref, RSET_FPR); 594 Reg left = ra_alloc1(as, lref, RSET_FPR);
@@ -581,7 +607,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
581 Reg dest = ra_dest(as, ir, RSET_GPR); 607 Reg dest = ra_dest(as, ir, RSET_GPR);
582 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ 608 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
583 Reg left = ra_alloc1(as, lref, RSET_GPR); 609 Reg left = ra_alloc1(as, lref, RSET_GPR);
584 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); 610 lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
585 if ((as->flags & JIT_F_ARMV6)) { 611 if ((as->flags & JIT_F_ARMV6)) {
586 ARMIns ai = st == IRT_I8 ? ARMI_SXTB : 612 ARMIns ai = st == IRT_I8 ? ARMI_SXTB :
587 st == IRT_U8 ? ARMI_UXTB : 613 st == IRT_U8 ? ARMI_UXTB :
@@ -601,31 +627,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
601 } 627 }
602} 628}
603 629
604#if !LJ_SOFTFP && LJ_HASFFI
605static void asm_conv64(ASMState *as, IRIns *ir)
606{
607 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
608 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
609 IRCallID id;
610 CCallInfo ci;
611 IRRef args[2];
612 args[0] = (ir-1)->op1;
613 args[1] = ir->op1;
614 if (st == IRT_NUM || st == IRT_FLOAT) {
615 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
616 ir--;
617 } else {
618 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
619 }
620 ci = lj_ir_callinfo[id];
621#if !LJ_ABI_SOFTFP
622 ci.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
623#endif
624 asm_setupresult(as, ir, &ci);
625 asm_gencall(as, &ci, args);
626}
627#endif
628
629static void asm_strto(ASMState *as, IRIns *ir) 630static void asm_strto(ASMState *as, IRIns *ir)
630{ 631{
631 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 632 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -689,60 +690,61 @@ static void asm_strto(ASMState *as, IRIns *ir)
689 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); 690 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
690} 691}
691 692
693/* -- Memory references --------------------------------------------------- */
694
692/* Get pointer to TValue. */ 695/* Get pointer to TValue. */
693static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 696static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
694{ 697{
695 IRIns *ir = IR(ref); 698 if ((mode & IRTMPREF_IN1)) {
696 if (irt_isnum(ir->t)) { 699 IRIns *ir = IR(ref);
697 if (irref_isk(ref)) { 700 if (irt_isnum(ir->t)) {
698 /* Use the number constant itself as a TValue. */ 701 if ((mode & IRTMPREF_OUT1)) {
699 ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
700 } else {
701#if LJ_SOFTFP 702#if LJ_SOFTFP
702 lua_assert(0); 703 lj_assertA(irref_isk(ref), "unsplit FP op");
704 emit_dm(as, ARMI_MOV, dest, RID_SP);
705 emit_lso(as, ARMI_STR,
706 ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
707 RID_SP, 0);
708 emit_lso(as, ARMI_STR,
709 ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
710 RID_SP, 4);
703#else 711#else
704 /* Otherwise force a spill and use the spill slot. */ 712 Reg src = ra_alloc1(as, ref, RSET_FPR);
705 emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); 713 emit_dm(as, ARMI_MOV, dest, RID_SP);
714 emit_vlso(as, ARMI_VSTR_D, src, RID_SP, 0);
706#endif 715#endif
716 } else if (irref_isk(ref)) {
717 /* Use the number constant itself as a TValue. */
718 ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
719 } else {
720#if LJ_SOFTFP
721 lj_assertA(0, "unsplit FP op");
722#else
723 /* Otherwise force a spill and use the spill slot. */
724 emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
725#endif
726 }
727 } else {
728 /* Otherwise use [sp] and [sp+4] to hold the TValue.
729 ** This assumes the following call has max. 4 args.
730 */
731 Reg type;
732 emit_dm(as, ARMI_MOV, dest, RID_SP);
733 if (!irt_ispri(ir->t)) {
734 Reg src = ra_alloc1(as, ref, RSET_GPR);
735 emit_lso(as, ARMI_STR, src, RID_SP, 0);
736 }
737 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
738 type = ra_alloc1(as, ref+1, RSET_GPR);
739 else
740 type = ra_allock(as, irt_toitype(ir->t), RSET_GPR);
741 emit_lso(as, ARMI_STR, type, RID_SP, 4);
707 } 742 }
708 } else { 743 } else {
709 /* Otherwise use [sp] and [sp+4] to hold the TValue. */
710 RegSet allow = rset_exclude(RSET_GPR, dest);
711 Reg type;
712 emit_dm(as, ARMI_MOV, dest, RID_SP); 744 emit_dm(as, ARMI_MOV, dest, RID_SP);
713 if (!irt_ispri(ir->t)) {
714 Reg src = ra_alloc1(as, ref, allow);
715 emit_lso(as, ARMI_STR, src, RID_SP, 0);
716 }
717 if ((ir+1)->o == IR_HIOP)
718 type = ra_alloc1(as, ref+1, allow);
719 else
720 type = ra_allock(as, irt_toitype(ir->t), allow);
721 emit_lso(as, ARMI_STR, type, RID_SP, 4);
722 } 745 }
723} 746}
724 747
725static void asm_tostr(ASMState *as, IRIns *ir)
726{
727 IRRef args[2];
728 args[0] = ASMREF_L;
729 as->gcsteps++;
730 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
731 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
732 args[1] = ASMREF_TMP1; /* const lua_Number * */
733 asm_setupresult(as, ir, ci); /* GCstr * */
734 asm_gencall(as, ci, args);
735 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
736 } else {
737 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
738 args[1] = ir->op1; /* int32_t k */
739 asm_setupresult(as, ir, ci); /* GCstr * */
740 asm_gencall(as, ci, args);
741 }
742}
743
744/* -- Memory references --------------------------------------------------- */
745
746static void asm_aref(ASMState *as, IRIns *ir) 748static void asm_aref(ASMState *as, IRIns *ir)
747{ 749{
748 Reg dest = ra_dest(as, ir, RSET_GPR); 750 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -864,16 +866,16 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
864 *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu); 866 *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu);
865 867
866 /* Load main position relative to tab->node into dest. */ 868 /* Load main position relative to tab->node into dest. */
867 khash = irref_isk(refkey) ? ir_khash(irkey) : 1; 869 khash = irref_isk(refkey) ? ir_khash(as, irkey) : 1;
868 if (khash == 0) { 870 if (khash == 0) {
869 emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); 871 emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
870 } else { 872 } else {
871 emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp); 873 emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp);
872 emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp); 874 emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp);
873 if (irt_isstr(kt)) { /* Fetch of str->hash is cheaper than ra_allock. */ 875 if (irt_isstr(kt)) { /* Fetch of str->sid is cheaper than ra_allock. */
874 emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP); 876 emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP);
875 emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); 877 emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
876 emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, hash)); 878 emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, sid));
877 emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); 879 emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask));
878 } else if (irref_isk(refkey)) { 880 } else if (irref_isk(refkey)) {
879 emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash, 881 emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash,
@@ -920,7 +922,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
920 Reg node = ra_alloc1(as, ir->op1, RSET_GPR); 922 Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
921 Reg key = RID_NONE, type = RID_TMP, idx = node; 923 Reg key = RID_NONE, type = RID_TMP, idx = node;
922 RegSet allow = rset_exclude(RSET_GPR, node); 924 RegSet allow = rset_exclude(RSET_GPR, node);
923 lua_assert(ofs % sizeof(Node) == 0); 925 lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
924 if (ofs > 4095) { 926 if (ofs > 4095) {
925 idx = dest; 927 idx = dest;
926 rset_clear(allow, dest); 928 rset_clear(allow, dest);
@@ -960,20 +962,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
960 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); 962 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
961} 963}
962 964
963static void asm_newref(ASMState *as, IRIns *ir)
964{
965 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
966 IRRef args[3];
967 if (ir->r == RID_SINK)
968 return;
969 args[0] = ASMREF_L; /* lua_State *L */
970 args[1] = ir->op1; /* GCtab *t */
971 args[2] = ASMREF_TMP1; /* cTValue *key */
972 asm_setupresult(as, ir, ci); /* TValue * */
973 asm_gencall(as, ci, args);
974 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
975}
976
977static void asm_uref(ASMState *as, IRIns *ir) 965static void asm_uref(ASMState *as, IRIns *ir)
978{ 966{
979 Reg dest = ra_dest(as, ir, RSET_GPR); 967 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1001,7 +989,7 @@ static void asm_uref(ASMState *as, IRIns *ir)
1001static void asm_fref(ASMState *as, IRIns *ir) 989static void asm_fref(ASMState *as, IRIns *ir)
1002{ 990{
1003 UNUSED(as); UNUSED(ir); 991 UNUSED(as); UNUSED(ir);
1004 lua_assert(!ra_used(ir)); 992 lj_assertA(!ra_used(ir), "unfused FREF");
1005} 993}
1006 994
1007static void asm_strref(ASMState *as, IRIns *ir) 995static void asm_strref(ASMState *as, IRIns *ir)
@@ -1038,25 +1026,27 @@ static void asm_strref(ASMState *as, IRIns *ir)
1038 1026
1039/* -- Loads and stores ---------------------------------------------------- */ 1027/* -- Loads and stores ---------------------------------------------------- */
1040 1028
1041static ARMIns asm_fxloadins(IRIns *ir) 1029static ARMIns asm_fxloadins(ASMState *as, IRIns *ir)
1042{ 1030{
1031 UNUSED(as);
1043 switch (irt_type(ir->t)) { 1032 switch (irt_type(ir->t)) {
1044 case IRT_I8: return ARMI_LDRSB; 1033 case IRT_I8: return ARMI_LDRSB;
1045 case IRT_U8: return ARMI_LDRB; 1034 case IRT_U8: return ARMI_LDRB;
1046 case IRT_I16: return ARMI_LDRSH; 1035 case IRT_I16: return ARMI_LDRSH;
1047 case IRT_U16: return ARMI_LDRH; 1036 case IRT_U16: return ARMI_LDRH;
1048 case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D; 1037 case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VLDR_D;
1049 case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */ 1038 case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */
1050 default: return ARMI_LDR; 1039 default: return ARMI_LDR;
1051 } 1040 }
1052} 1041}
1053 1042
1054static ARMIns asm_fxstoreins(IRIns *ir) 1043static ARMIns asm_fxstoreins(ASMState *as, IRIns *ir)
1055{ 1044{
1045 UNUSED(as);
1056 switch (irt_type(ir->t)) { 1046 switch (irt_type(ir->t)) {
1057 case IRT_I8: case IRT_U8: return ARMI_STRB; 1047 case IRT_I8: case IRT_U8: return ARMI_STRB;
1058 case IRT_I16: case IRT_U16: return ARMI_STRH; 1048 case IRT_I16: case IRT_U16: return ARMI_STRH;
1059 case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D; 1049 case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VSTR_D;
1060 case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */ 1050 case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */
1061 default: return ARMI_STR; 1051 default: return ARMI_STR;
1062 } 1052 }
@@ -1065,17 +1055,23 @@ static ARMIns asm_fxstoreins(IRIns *ir)
1065static void asm_fload(ASMState *as, IRIns *ir) 1055static void asm_fload(ASMState *as, IRIns *ir)
1066{ 1056{
1067 Reg dest = ra_dest(as, ir, RSET_GPR); 1057 Reg dest = ra_dest(as, ir, RSET_GPR);
1068 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); 1058 ARMIns ai = asm_fxloadins(as, ir);
1069 ARMIns ai = asm_fxloadins(ir); 1059 Reg idx;
1070 int32_t ofs; 1060 int32_t ofs;
1071 if (ir->op2 == IRFL_TAB_ARRAY) { 1061 if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
1072 ofs = asm_fuseabase(as, ir->op1); 1062 idx = ra_allock(as, (int32_t)(ir->op2<<2) + (int32_t)J2GG(as->J), RSET_GPR);
1073 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ 1063 ofs = 0;
1074 emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); 1064 } else {
1075 return; 1065 idx = ra_alloc1(as, ir->op1, RSET_GPR);
1066 if (ir->op2 == IRFL_TAB_ARRAY) {
1067 ofs = asm_fuseabase(as, ir->op1);
1068 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
1069 emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
1070 return;
1071 }
1076 } 1072 }
1073 ofs = field_ofs[ir->op2];
1077 } 1074 }
1078 ofs = field_ofs[ir->op2];
1079 if ((ai & 0x04000000)) 1075 if ((ai & 0x04000000))
1080 emit_lso(as, ai, dest, idx, ofs); 1076 emit_lso(as, ai, dest, idx, ofs);
1081 else 1077 else
@@ -1089,7 +1085,7 @@ static void asm_fstore(ASMState *as, IRIns *ir)
1089 IRIns *irf = IR(ir->op1); 1085 IRIns *irf = IR(ir->op1);
1090 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); 1086 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
1091 int32_t ofs = field_ofs[irf->op2]; 1087 int32_t ofs = field_ofs[irf->op2];
1092 ARMIns ai = asm_fxstoreins(ir); 1088 ARMIns ai = asm_fxstoreins(as, ir);
1093 if ((ai & 0x04000000)) 1089 if ((ai & 0x04000000))
1094 emit_lso(as, ai, src, idx, ofs); 1090 emit_lso(as, ai, src, idx, ofs);
1095 else 1091 else
@@ -1101,20 +1097,22 @@ static void asm_xload(ASMState *as, IRIns *ir)
1101{ 1097{
1102 Reg dest = ra_dest(as, ir, 1098 Reg dest = ra_dest(as, ir,
1103 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); 1099 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
1104 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); 1100 lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD");
1105 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 1101 asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0);
1106} 1102}
1107 1103
1108static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 1104static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
1109{ 1105{
1110 if (ir->r != RID_SINK) { 1106 if (ir->r != RID_SINK) {
1111 Reg src = ra_alloc1(as, ir->op2, 1107 Reg src = ra_alloc1(as, ir->op2,
1112 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); 1108 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
1113 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, 1109 asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1,
1114 rset_exclude(RSET_GPR, src), ofs); 1110 rset_exclude(RSET_GPR, src), ofs);
1115 } 1111 }
1116} 1112}
1117 1113
1114#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
1115
1118static void asm_ahuvload(ASMState *as, IRIns *ir) 1116static void asm_ahuvload(ASMState *as, IRIns *ir)
1119{ 1117{
1120 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); 1118 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
@@ -1127,8 +1125,9 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1127 rset_clear(allow, type); 1125 rset_clear(allow, type);
1128 } 1126 }
1129 if (ra_used(ir)) { 1127 if (ra_used(ir)) {
1130 lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || 1128 lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
1131 irt_isint(ir->t) || irt_isaddr(ir->t)); 1129 irt_isint(ir->t) || irt_isaddr(ir->t),
1130 "bad load type %d", irt_type(ir->t));
1132 dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); 1131 dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
1133 rset_clear(allow, dest); 1132 rset_clear(allow, dest);
1134 } 1133 }
@@ -1194,10 +1193,13 @@ static void asm_sload(ASMState *as, IRIns *ir)
1194 IRType t = hiop ? IRT_NUM : irt_type(ir->t); 1193 IRType t = hiop ? IRT_NUM : irt_type(ir->t);
1195 Reg dest = RID_NONE, type = RID_NONE, base; 1194 Reg dest = RID_NONE, type = RID_NONE, base;
1196 RegSet allow = RSET_GPR; 1195 RegSet allow = RSET_GPR;
1197 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 1196 lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
1198 lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); 1197 "bad parent SLOAD"); /* Handled by asm_head_side(). */
1198 lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
1199 "inconsistent SLOAD variant");
1199#if LJ_SOFTFP 1200#if LJ_SOFTFP
1200 lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ 1201 lj_assertA(!(ir->op2 & IRSLOAD_CONVERT),
1202 "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */
1201 if (hiop && ra_used(ir+1)) { 1203 if (hiop && ra_used(ir+1)) {
1202 type = ra_dest(as, ir+1, allow); 1204 type = ra_dest(as, ir+1, allow);
1203 rset_clear(allow, type); 1205 rset_clear(allow, type);
@@ -1213,8 +1215,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
1213 Reg tmp = RID_NONE; 1215 Reg tmp = RID_NONE;
1214 if ((ir->op2 & IRSLOAD_CONVERT)) 1216 if ((ir->op2 & IRSLOAD_CONVERT))
1215 tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR); 1217 tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR);
1216 lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || 1218 lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
1217 irt_isint(ir->t) || irt_isaddr(ir->t)); 1219 irt_isint(ir->t) || irt_isaddr(ir->t),
1220 "bad SLOAD type %d", irt_type(ir->t));
1218 dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); 1221 dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
1219 rset_clear(allow, dest); 1222 rset_clear(allow, dest);
1220 base = ra_alloc1(as, REF_BASE, allow); 1223 base = ra_alloc1(as, REF_BASE, allow);
@@ -1272,19 +1275,17 @@ dotypecheck:
1272static void asm_cnew(ASMState *as, IRIns *ir) 1275static void asm_cnew(ASMState *as, IRIns *ir)
1273{ 1276{
1274 CTState *cts = ctype_ctsG(J2G(as->J)); 1277 CTState *cts = ctype_ctsG(J2G(as->J));
1275 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1278 CTypeID id = (CTypeID)IR(ir->op1)->i;
1276 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1279 CTSize sz;
1277 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1280 CTInfo info = lj_ctype_info(cts, id, &sz);
1278 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1281 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1279 IRRef args[2]; 1282 IRRef args[4];
1280 RegSet allow = (RSET_GPR & ~RSET_SCRATCH); 1283 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1281 RegSet drop = RSET_SCRATCH; 1284 RegSet drop = RSET_SCRATCH;
1282 lua_assert(sz != CTSIZE_INVALID); 1285 lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
1286 "bad CNEW/CNEWI operands");
1283 1287
1284 args[0] = ASMREF_L; /* lua_State *L */
1285 args[1] = ASMREF_TMP1; /* MSize size */
1286 as->gcsteps++; 1288 as->gcsteps++;
1287
1288 if (ra_hasreg(ir->r)) 1289 if (ra_hasreg(ir->r))
1289 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1290 rset_clear(drop, ir->r); /* Dest reg handled below. */
1290 ra_evictset(as, drop); 1291 ra_evictset(as, drop);
@@ -1294,10 +1295,10 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1294 /* Initialize immutable cdata object. */ 1295 /* Initialize immutable cdata object. */
1295 if (ir->o == IR_CNEWI) { 1296 if (ir->o == IR_CNEWI) {
1296 int32_t ofs = sizeof(GCcdata); 1297 int32_t ofs = sizeof(GCcdata);
1297 lua_assert(sz == 4 || sz == 8); 1298 lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
1298 if (sz == 8) { 1299 if (sz == 8) {
1299 ofs += 4; ir++; 1300 ofs += 4; ir++;
1300 lua_assert(ir->o == IR_HIOP); 1301 lj_assertA(ir->o == IR_HIOP, "expected HIOP for CNEWI");
1301 } 1302 }
1302 for (;;) { 1303 for (;;) {
1303 Reg r = ra_alloc1(as, ir->op2, allow); 1304 Reg r = ra_alloc1(as, ir->op2, allow);
@@ -1306,22 +1307,32 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1306 if (ofs == sizeof(GCcdata)) break; 1307 if (ofs == sizeof(GCcdata)) break;
1307 ofs -= 4; ir--; 1308 ofs -= 4; ir--;
1308 } 1309 }
1310 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1311 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1312 args[0] = ASMREF_L; /* lua_State *L */
1313 args[1] = ir->op1; /* CTypeID id */
1314 args[2] = ir->op2; /* CTSize sz */
1315 args[3] = ASMREF_TMP1; /* CTSize align */
1316 asm_gencall(as, ci, args);
1317 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1318 return;
1309 } 1319 }
1320
1310 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1321 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1311 { 1322 {
1312 uint32_t k = emit_isk12(ARMI_MOV, ctypeid); 1323 uint32_t k = emit_isk12(ARMI_MOV, id);
1313 Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow); 1324 Reg r = k ? RID_R1 : ra_allock(as, id, allow);
1314 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); 1325 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
1315 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); 1326 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
1316 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); 1327 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
1317 if (k) emit_d(as, ARMI_MOV^k, RID_R1); 1328 if (k) emit_d(as, ARMI_MOV^k, RID_R1);
1318 } 1329 }
1330 args[0] = ASMREF_L; /* lua_State *L */
1331 args[1] = ASMREF_TMP1; /* MSize size */
1319 asm_gencall(as, ci, args); 1332 asm_gencall(as, ci, args);
1320 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1333 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1321 ra_releasetmp(as, ASMREF_TMP1)); 1334 ra_releasetmp(as, ASMREF_TMP1));
1322} 1335}
1323#else
1324#define asm_cnew(as, ir) ((void)0)
1325#endif 1336#endif
1326 1337
1327/* -- Write barriers ------------------------------------------------------ */ 1338/* -- Write barriers ------------------------------------------------------ */
@@ -1353,7 +1364,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
1353 MCLabel l_end; 1364 MCLabel l_end;
1354 Reg obj, val, tmp; 1365 Reg obj, val, tmp;
1355 /* No need for other object barriers (yet). */ 1366 /* No need for other object barriers (yet). */
1356 lua_assert(IR(ir->op1)->o == IR_UREFC); 1367 lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
1357 ra_evictset(as, RSET_SCRATCH); 1368 ra_evictset(as, RSET_SCRATCH);
1358 l_end = emit_label(as); 1369 l_end = emit_label(as);
1359 args[0] = ASMREF_TMP1; /* global_State *g */ 1370 args[0] = ASMREF_TMP1; /* global_State *g */
@@ -1392,23 +1403,36 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
1392 emit_dm(as, ai, (dest & 15), (left & 15)); 1403 emit_dm(as, ai, (dest & 15), (left & 15));
1393} 1404}
1394 1405
1395static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1406static void asm_callround(ASMState *as, IRIns *ir, int id)
1396{ 1407{
1397 IRIns *irp = IR(ir->op1); 1408 /* The modified regs must match with the *.dasc implementation. */
1398 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1409 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1399 IRIns *irpp = IR(irp->op1); 1410 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1400 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1411 RegSet of;
1401 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1412 Reg dest, src;
1402 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1413 ra_evictset(as, drop);
1403 IRRef args[2]; 1414 dest = ra_dest(as, ir, RSET_FPR);
1404 args[0] = irpp->op1; 1415 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1405 args[1] = irp->op2; 1416 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1406 asm_setupresult(as, ir, ci); 1417 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1407 asm_gencall(as, ci, args); 1418 (void *)lj_vm_trunc_sf);
1408 return 1; 1419 /* Workaround to protect argument GPRs from being used for remat. */
1409 } 1420 of = as->freeset;
1410 } 1421 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1411 return 0; 1422 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1423 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1424 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1425 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1426}
1427
1428static void asm_fpmath(ASMState *as, IRIns *ir)
1429{
1430 if (ir->op2 <= IRFPM_TRUNC)
1431 asm_callround(as, ir, ir->op2);
1432 else if (ir->op2 == IRFPM_SQRT)
1433 asm_fpunary(as, ir, ARMI_VSQRT_D);
1434 else
1435 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1412} 1436}
1413#endif 1437#endif
1414 1438
@@ -1474,19 +1498,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
1474 asm_intop(as, ir, asm_drop_cmp0(as, ai)); 1498 asm_intop(as, ir, asm_drop_cmp0(as, ai));
1475} 1499}
1476 1500
1477static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1478{
1479 ai = asm_drop_cmp0(as, ai);
1480 if (ir->op2 == 0) {
1481 Reg dest = ra_dest(as, ir, RSET_GPR);
1482 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1483 emit_d(as, ai^m, dest);
1484 } else {
1485 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1486 asm_intop(as, ir, ai);
1487 }
1488}
1489
1490static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) 1501static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
1491{ 1502{
1492 Reg dest = ra_dest(as, ir, RSET_GPR); 1503 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1552,6 +1563,15 @@ static void asm_mul(ASMState *as, IRIns *ir)
1552 asm_intmul(as, ir); 1563 asm_intmul(as, ir);
1553} 1564}
1554 1565
1566#define asm_addov(as, ir) asm_add(as, ir)
1567#define asm_subov(as, ir) asm_sub(as, ir)
1568#define asm_mulov(as, ir) asm_mul(as, ir)
1569
1570#if !LJ_SOFTFP
1571#define asm_fpdiv(as, ir) asm_fparith(as, ir, ARMI_VDIV_D)
1572#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D)
1573#endif
1574
1555static void asm_neg(ASMState *as, IRIns *ir) 1575static void asm_neg(ASMState *as, IRIns *ir)
1556{ 1576{
1557#if !LJ_SOFTFP 1577#if !LJ_SOFTFP
@@ -1563,41 +1583,22 @@ static void asm_neg(ASMState *as, IRIns *ir)
1563 asm_intneg(as, ir, ARMI_RSB); 1583 asm_intneg(as, ir, ARMI_RSB);
1564} 1584}
1565 1585
1566static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) 1586static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1567{ 1587{
1568 const CCallInfo *ci = &lj_ir_callinfo[id]; 1588 ai = asm_drop_cmp0(as, ai);
1569 IRRef args[2]; 1589 if (ir->op2 == 0) {
1570 args[0] = ir->op1; 1590 Reg dest = ra_dest(as, ir, RSET_GPR);
1571 args[1] = ir->op2; 1591 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1572 asm_setupresult(as, ir, ci); 1592 emit_d(as, ai^m, dest);
1573 asm_gencall(as, ci, args); 1593 } else {
1594 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1595 asm_intop(as, ir, ai);
1596 }
1574} 1597}
1575 1598
1576#if !LJ_SOFTFP 1599#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN)
1577static void asm_callround(ASMState *as, IRIns *ir, int id)
1578{
1579 /* The modified regs must match with the *.dasc implementation. */
1580 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1581 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1582 RegSet of;
1583 Reg dest, src;
1584 ra_evictset(as, drop);
1585 dest = ra_dest(as, ir, RSET_FPR);
1586 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1587 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1588 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1589 (void *)lj_vm_trunc_sf);
1590 /* Workaround to protect argument GPRs from being used for remat. */
1591 of = as->freeset;
1592 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1593 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1594 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1595 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1596 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1597}
1598#endif
1599 1600
1600static void asm_bitswap(ASMState *as, IRIns *ir) 1601static void asm_bswap(ASMState *as, IRIns *ir)
1601{ 1602{
1602 Reg dest = ra_dest(as, ir, RSET_GPR); 1603 Reg dest = ra_dest(as, ir, RSET_GPR);
1603 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1604 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1614,6 +1615,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1614 } 1615 }
1615} 1616}
1616 1617
1618#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND)
1619#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR)
1620#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR)
1621
1617static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) 1622static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1618{ 1623{
1619 if (irref_isk(ir->op2)) { /* Constant shifts. */ 1624 if (irref_isk(ir->op2)) { /* Constant shifts. */
@@ -1631,6 +1636,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1631 } 1636 }
1632} 1637}
1633 1638
1639#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL)
1640#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR)
1641#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR)
1642#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR)
1643#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
1644
1634static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) 1645static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1635{ 1646{
1636 uint32_t kcmp = 0, kmov = 0; 1647 uint32_t kcmp = 0, kmov = 0;
@@ -1704,6 +1715,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
1704 asm_intmin_max(as, ir, cc); 1715 asm_intmin_max(as, ir, cc);
1705} 1716}
1706 1717
1718#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_PL)
1719#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LE)
1720
1707/* -- Comparisons --------------------------------------------------------- */ 1721/* -- Comparisons --------------------------------------------------------- */
1708 1722
1709/* Map of comparisons to flags. ORDER IR. */ 1723/* Map of comparisons to flags. ORDER IR. */
@@ -1777,7 +1791,8 @@ static void asm_intcomp(ASMState *as, IRIns *ir)
1777 Reg left; 1791 Reg left;
1778 uint32_t m; 1792 uint32_t m;
1779 int cmpprev0 = 0; 1793 int cmpprev0 = 0;
1780 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); 1794 lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t),
1795 "bad comparison data type %d", irt_type(ir->t));
1781 if (asm_swapops(as, lref, rref)) { 1796 if (asm_swapops(as, lref, rref)) {
1782 Reg tmp = lref; lref = rref; rref = tmp; 1797 Reg tmp = lref; lref = rref; rref = tmp;
1783 if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ 1798 if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */
@@ -1819,6 +1834,18 @@ notst:
1819 as->flagmcp = as->mcp; /* Allow elimination of the compare. */ 1834 as->flagmcp = as->mcp; /* Allow elimination of the compare. */
1820} 1835}
1821 1836
1837static void asm_comp(ASMState *as, IRIns *ir)
1838{
1839#if !LJ_SOFTFP
1840 if (irt_isnum(ir->t))
1841 asm_fpcomp(as, ir);
1842 else
1843#endif
1844 asm_intcomp(as, ir);
1845}
1846
1847#define asm_equal(as, ir) asm_comp(as, ir)
1848
1822#if LJ_HASFFI 1849#if LJ_HASFFI
1823/* 64 bit integer comparisons. */ 1850/* 64 bit integer comparisons. */
1824static void asm_int64comp(ASMState *as, IRIns *ir) 1851static void asm_int64comp(ASMState *as, IRIns *ir)
@@ -1882,7 +1909,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1882 } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { 1909 } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
1883 as->curins--; /* Always skip the loword min/max. */ 1910 as->curins--; /* Always skip the loword min/max. */
1884 if (uselo || usehi) 1911 if (uselo || usehi)
1885 asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); 1912 asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE);
1886 return; 1913 return;
1887#elif LJ_HASFFI 1914#elif LJ_HASFFI
1888 } else if ((ir-1)->o == IR_CONV) { 1915 } else if ((ir-1)->o == IR_CONV) {
@@ -1893,7 +1920,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1893#endif 1920#endif
1894 } else if ((ir-1)->o == IR_XSTORE) { 1921 } else if ((ir-1)->o == IR_XSTORE) {
1895 if ((ir-1)->r != RID_SINK) 1922 if ((ir-1)->r != RID_SINK)
1896 asm_xstore(as, ir, 4); 1923 asm_xstore_(as, ir, 4);
1897 return; 1924 return;
1898 } 1925 }
1899 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1926 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
@@ -1929,18 +1956,29 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1929 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ 1956 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
1930 break; 1957 break;
1931#if LJ_SOFTFP 1958#if LJ_SOFTFP
1932 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: 1959 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
1933#endif 1960#endif
1934 case IR_CNEWI: 1961 case IR_CNEWI:
1935 /* Nothing to do here. Handled by lo op itself. */ 1962 /* Nothing to do here. Handled by lo op itself. */
1936 break; 1963 break;
1937 default: lua_assert(0); break; 1964 default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
1938 } 1965 }
1939#else 1966#else
1940 UNUSED(as); UNUSED(ir); lua_assert(0); 1967 /* Unused without SOFTFP or FFI. */
1968 UNUSED(as); UNUSED(ir); lj_assertA(0, "unexpected HIOP");
1941#endif 1969#endif
1942} 1970}
1943 1971
1972/* -- Profiling ----------------------------------------------------------- */
1973
1974static void asm_prof(ASMState *as, IRIns *ir)
1975{
1976 UNUSED(ir);
1977 asm_guardcc(as, CC_NE);
1978 emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP);
1979 emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
1980}
1981
1944/* -- Stack handling ------------------------------------------------------ */ 1982/* -- Stack handling ------------------------------------------------------ */
1945 1983
1946/* Check Lua stack size for overflow. Use exit handler as fallback. */ 1984/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1952,7 +1990,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1952 if (irp) { 1990 if (irp) {
1953 if (!ra_hasspill(irp->s)) { 1991 if (!ra_hasspill(irp->s)) {
1954 pbase = irp->r; 1992 pbase = irp->r;
1955 lua_assert(ra_hasreg(pbase)); 1993 lj_assertA(ra_hasreg(pbase), "base reg lost");
1956 } else if (allow) { 1994 } else if (allow) {
1957 pbase = rset_pickbot(allow); 1995 pbase = rset_pickbot(allow);
1958 } else { 1996 } else {
@@ -1964,13 +2002,13 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1964 } 2002 }
1965 emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno)); 2003 emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno));
1966 k = emit_isk12(0, (int32_t)(8*topslot)); 2004 k = emit_isk12(0, (int32_t)(8*topslot));
1967 lua_assert(k); 2005 lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
1968 emit_n(as, ARMI_CMP^k, RID_TMP); 2006 emit_n(as, ARMI_CMP^k, RID_TMP);
1969 emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase); 2007 emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase);
1970 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, 2008 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
1971 (int32_t)offsetof(lua_State, maxstack)); 2009 (int32_t)offsetof(lua_State, maxstack));
1972 if (irp) { /* Must not spill arbitrary registers in head of side trace. */ 2010 if (irp) { /* Must not spill arbitrary registers in head of side trace. */
1973 int32_t i = i32ptr(&J2G(as->J)->jit_L); 2011 int32_t i = i32ptr(&J2G(as->J)->cur_L);
1974 if (ra_hasspill(irp->s)) 2012 if (ra_hasspill(irp->s))
1975 emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); 2013 emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
1976 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); 2014 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
@@ -1978,7 +2016,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1978 emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ 2016 emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */
1979 emit_loadi(as, RID_TMP, (i & ~4095)); 2017 emit_loadi(as, RID_TMP, (i & ~4095));
1980 } else { 2018 } else {
1981 emit_getgl(as, RID_TMP, jit_L); 2019 emit_getgl(as, RID_TMP, cur_L);
1982 } 2020 }
1983} 2021}
1984 2022
@@ -2001,7 +2039,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
2001#if LJ_SOFTFP 2039#if LJ_SOFTFP
2002 RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); 2040 RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);
2003 Reg tmp; 2041 Reg tmp;
2004 lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ 2042 /* LJ_SOFTFP: must be a number constant. */
2043 lj_assertA(irref_isk(ref), "unsplit FP op");
2005 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, 2044 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo,
2006 rset_exclude(RSET_GPREVEN, RID_BASE)); 2045 rset_exclude(RSET_GPREVEN, RID_BASE));
2007 emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs); 2046 emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs);
@@ -2015,7 +2054,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
2015 } else { 2054 } else {
2016 RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); 2055 RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);
2017 Reg type; 2056 Reg type;
2018 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); 2057 lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
2058 "restore of IR type %d", irt_type(ir->t));
2019 if (!irt_ispri(ir->t)) { 2059 if (!irt_ispri(ir->t)) {
2020 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE)); 2060 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE));
2021 emit_lso(as, ARMI_STR, src, RID_BASE, ofs); 2061 emit_lso(as, ARMI_STR, src, RID_BASE, ofs);
@@ -2035,7 +2075,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
2035 } 2075 }
2036 checkmclim(as); 2076 checkmclim(as);
2037 } 2077 }
2038 lua_assert(map + nent == flinks); 2078 lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
2039} 2079}
2040 2080
2041/* -- GC handling --------------------------------------------------------- */ 2081/* -- GC handling --------------------------------------------------------- */
@@ -2091,13 +2131,13 @@ static void asm_loop_fixup(ASMState *as)
2091 2131
2092/* -- Head of trace ------------------------------------------------------- */ 2132/* -- Head of trace ------------------------------------------------------- */
2093 2133
2094/* Reload L register from g->jit_L. */ 2134/* Reload L register from g->cur_L. */
2095static void asm_head_lreg(ASMState *as) 2135static void asm_head_lreg(ASMState *as)
2096{ 2136{
2097 IRIns *ir = IR(ASMREF_L); 2137 IRIns *ir = IR(ASMREF_L);
2098 if (ra_used(ir)) { 2138 if (ra_used(ir)) {
2099 Reg r = ra_dest(as, ir, RSET_GPR); 2139 Reg r = ra_dest(as, ir, RSET_GPR);
2100 emit_getgl(as, r, jit_L); 2140 emit_getgl(as, r, cur_L);
2101 ra_evictk(as); 2141 ra_evictk(as);
2102 } 2142 }
2103} 2143}
@@ -2125,7 +2165,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
2125 rset_clear(allow, ra_dest(as, ir, allow)); 2165 rset_clear(allow, ra_dest(as, ir, allow));
2126 } else { 2166 } else {
2127 Reg r = irp->r; 2167 Reg r = irp->r;
2128 lua_assert(ra_hasreg(r)); 2168 lj_assertA(ra_hasreg(r), "base reg lost");
2129 rset_clear(allow, r); 2169 rset_clear(allow, r);
2130 if (r != ir->r && !rset_test(as->freeset, r)) 2170 if (r != ir->r && !rset_test(as->freeset, r))
2131 ra_restore(as, regcost_ref(as->cost[r])); 2171 ra_restore(as, regcost_ref(as->cost[r]));
@@ -2147,7 +2187,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
2147 } else { 2187 } else {
2148 /* Patch stack adjustment. */ 2188 /* Patch stack adjustment. */
2149 uint32_t k = emit_isk12(ARMI_ADD, spadj); 2189 uint32_t k = emit_isk12(ARMI_ADD, spadj);
2150 lua_assert(k); 2190 lj_assertA(k, "stack adjustment %d does not fit in K12", spadj);
2151 p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); 2191 p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP);
2152 } 2192 }
2153 /* Patch exit branch. */ 2193 /* Patch exit branch. */
@@ -2168,143 +2208,13 @@ static void asm_tail_prep(ASMState *as)
2168 *p = 0; /* Prevent load/store merging. */ 2208 *p = 0; /* Prevent load/store merging. */
2169} 2209}
2170 2210
2171/* -- Instruction dispatch ------------------------------------------------ */
2172
2173/* Assemble a single instruction. */
2174static void asm_ir(ASMState *as, IRIns *ir)
2175{
2176 switch ((IROp)ir->o) {
2177 /* Miscellaneous ops. */
2178 case IR_LOOP: asm_loop(as); break;
2179 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
2180 case IR_USE:
2181 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
2182 case IR_PHI: asm_phi(as, ir); break;
2183 case IR_HIOP: asm_hiop(as, ir); break;
2184 case IR_GCSTEP: asm_gcstep(as, ir); break;
2185
2186 /* Guarded assertions. */
2187 case IR_EQ: case IR_NE:
2188 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
2189 as->curins--;
2190 asm_href(as, ir-1, (IROp)ir->o);
2191 break;
2192 }
2193 /* fallthrough */
2194 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
2195 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
2196 case IR_ABC:
2197#if !LJ_SOFTFP
2198 if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; }
2199#endif
2200 asm_intcomp(as, ir);
2201 break;
2202
2203 case IR_RETF: asm_retf(as, ir); break;
2204
2205 /* Bit ops. */
2206 case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break;
2207 case IR_BSWAP: asm_bitswap(as, ir); break;
2208
2209 case IR_BAND: asm_bitop(as, ir, ARMI_AND); break;
2210 case IR_BOR: asm_bitop(as, ir, ARMI_ORR); break;
2211 case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break;
2212
2213 case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break;
2214 case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break;
2215 case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break;
2216 case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break;
2217 case IR_BROL: lua_assert(0); break;
2218
2219 /* Arithmetic ops. */
2220 case IR_ADD: case IR_ADDOV: asm_add(as, ir); break;
2221 case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break;
2222 case IR_MUL: case IR_MULOV: asm_mul(as, ir); break;
2223 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2224 case IR_NEG: asm_neg(as, ir); break;
2225
2226#if LJ_SOFTFP
2227 case IR_DIV: case IR_POW: case IR_ABS:
2228 case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
2229 lua_assert(0); /* Unused for LJ_SOFTFP. */
2230 break;
2231#else
2232 case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break;
2233 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2234 case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break;
2235 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2236 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2237 case IR_FPMATH:
2238 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2239 break;
2240 if (ir->op2 <= IRFPM_TRUNC)
2241 asm_callround(as, ir, ir->op2);
2242 else if (ir->op2 == IRFPM_SQRT)
2243 asm_fpunary(as, ir, ARMI_VSQRT_D);
2244 else
2245 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2246 break;
2247 case IR_TOBIT: asm_tobit(as, ir); break;
2248#endif
2249
2250 case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break;
2251 case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break;
2252
2253 /* Memory references. */
2254 case IR_AREF: asm_aref(as, ir); break;
2255 case IR_HREF: asm_href(as, ir, 0); break;
2256 case IR_HREFK: asm_hrefk(as, ir); break;
2257 case IR_NEWREF: asm_newref(as, ir); break;
2258 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2259 case IR_FREF: asm_fref(as, ir); break;
2260 case IR_STRREF: asm_strref(as, ir); break;
2261
2262 /* Loads and stores. */
2263 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2264 asm_ahuvload(as, ir);
2265 break;
2266 case IR_FLOAD: asm_fload(as, ir); break;
2267 case IR_XLOAD: asm_xload(as, ir); break;
2268 case IR_SLOAD: asm_sload(as, ir); break;
2269
2270 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2271 case IR_FSTORE: asm_fstore(as, ir); break;
2272 case IR_XSTORE: asm_xstore(as, ir, 0); break;
2273
2274 /* Allocations. */
2275 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2276 case IR_TNEW: asm_tnew(as, ir); break;
2277 case IR_TDUP: asm_tdup(as, ir); break;
2278 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2279
2280 /* Write barriers. */
2281 case IR_TBAR: asm_tbar(as, ir); break;
2282 case IR_OBAR: asm_obar(as, ir); break;
2283
2284 /* Type conversions. */
2285 case IR_CONV: asm_conv(as, ir); break;
2286 case IR_TOSTR: asm_tostr(as, ir); break;
2287 case IR_STRTO: asm_strto(as, ir); break;
2288
2289 /* Calls. */
2290 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2291 case IR_CALLXS: asm_callx(as, ir); break;
2292 case IR_CARG: break;
2293
2294 default:
2295 setintV(&as->J->errinfo, ir->o);
2296 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2297 break;
2298 }
2299}
2300
2301/* -- Trace setup --------------------------------------------------------- */ 2211/* -- Trace setup --------------------------------------------------------- */
2302 2212
2303/* Ensure there are enough stack slots for call arguments. */ 2213/* Ensure there are enough stack slots for call arguments. */
2304static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2214static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2305{ 2215{
2306 IRRef args[CCI_NARGS_MAX*2]; 2216 IRRef args[CCI_NARGS_MAX*2];
2307 uint32_t i, nargs = (int)CCI_NARGS(ci); 2217 uint32_t i, nargs = CCI_XNARGS(ci);
2308 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; 2218 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
2309 asm_collectargs(as, ir, ci, args); 2219 asm_collectargs(as, ir, ci, args);
2310 for (i = 0; i < nargs; i++) { 2220 for (i = 0; i < nargs; i++) {
@@ -2360,7 +2270,7 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2360 if (!cstart) cstart = p; 2270 if (!cstart) cstart = p;
2361 } 2271 }
2362 } 2272 }
2363 lua_assert(cstart != NULL); 2273 lj_assertJ(cstart != NULL, "exit stub %d not found", exitno);
2364 lj_mcode_sync(cstart, cend); 2274 lj_mcode_sync(cstart, cend);
2365 lj_mcode_patch(J, mcarea, 1); 2275 lj_mcode_patch(J, mcarea, 1);
2366} 2276}
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
new file mode 100644
index 00000000..dde52347
--- /dev/null
+++ b/src/lj_asm_arm64.h
@@ -0,0 +1,2039 @@
1/*
2** ARM64 IR assembler (SSA IR -> machine code).
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4**
5** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
6** Sponsored by Cisco Systems, Inc.
7*/
8
9/* -- Register allocator extensions --------------------------------------- */
10
11/* Allocate a register with a hint. */
12static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
13{
14 Reg r = IR(ref)->r;
15 if (ra_noreg(r)) {
16 if (!ra_hashint(r) && !iscrossref(as, ref))
17 ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */
18 r = ra_allocref(as, ref, allow);
19 }
20 ra_noweak(as, r);
21 return r;
22}
23
24/* Allocate two source registers for three-operand instructions. */
25static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
26{
27 IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
28 Reg left = irl->r, right = irr->r;
29 if (ra_hasreg(left)) {
30 ra_noweak(as, left);
31 if (ra_noreg(right))
32 right = ra_allocref(as, ir->op2, rset_exclude(allow, left));
33 else
34 ra_noweak(as, right);
35 } else if (ra_hasreg(right)) {
36 ra_noweak(as, right);
37 left = ra_allocref(as, ir->op1, rset_exclude(allow, right));
38 } else if (ra_hashint(right)) {
39 right = ra_allocref(as, ir->op2, allow);
40 left = ra_alloc1(as, ir->op1, rset_exclude(allow, right));
41 } else {
42 left = ra_allocref(as, ir->op1, allow);
43 right = ra_alloc1(as, ir->op2, rset_exclude(allow, left));
44 }
45 return left | (right << 8);
46}
47
48/* -- Guard handling ------------------------------------------------------ */
49
50/* Setup all needed exit stubs. */
51static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
52{
53 ExitNo i;
54 MCode *mxp = as->mctop;
55 if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim)
56 asm_mclimit(as);
57 /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */
58 for (i = nexits-1; (int32_t)i >= 0; i--)
59 *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i));
60 *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno));
61 mxp--;
62 *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp)));
63 *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP));
64 as->mctop = mxp;
65}
66
67static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
68{
69 /* Keep this in-sync with exitstub_trace_addr(). */
70 return as->mctop + exitno + 3;
71}
72
73/* Emit conditional branch to exit for guard. */
74static void asm_guardcc(ASMState *as, A64CC cc)
75{
76 MCode *target = asm_exitstub_addr(as, as->snapno);
77 MCode *p = as->mcp;
78 if (LJ_UNLIKELY(p == as->invmcp)) {
79 as->loopinv = 1;
80 *p = A64I_B | A64F_S26(target-p);
81 emit_cond_branch(as, cc^1, p-1);
82 return;
83 }
84 emit_cond_branch(as, cc, target);
85}
86
87/* Emit test and branch instruction to exit for guard. */
88static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
89{
90 MCode *target = asm_exitstub_addr(as, as->snapno);
91 MCode *p = as->mcp;
92 if (LJ_UNLIKELY(p == as->invmcp)) {
93 as->loopinv = 1;
94 *p = A64I_B | A64F_S26(target-p);
95 emit_tnb(as, ai^0x01000000u, r, bit, p-1);
96 return;
97 }
98 emit_tnb(as, ai, r, bit, target);
99}
100
101/* Emit compare and branch instruction to exit for guard. */
102static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r)
103{
104 MCode *target = asm_exitstub_addr(as, as->snapno);
105 MCode *p = as->mcp;
106 if (LJ_UNLIKELY(p == as->invmcp)) {
107 as->loopinv = 1;
108 *p = A64I_B | A64F_S26(target-p);
109 emit_cnb(as, ai^0x01000000u, r, p-1);
110 return;
111 }
112 emit_cnb(as, ai, r, target);
113}
114
115/* -- Operand fusion ------------------------------------------------------ */
116
117/* Limit linear search to this distance. Avoids O(n^2) behavior. */
118#define CONFLICT_SEARCH_LIM 31
119
120static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
121{
122 if (irref_isk(ref)) {
123 IRIns *ir = IR(ref);
124 if (ir->o == IR_KNULL || !irt_is64(ir->t)) {
125 *k = ir->i;
126 return 1;
127 } else if (checki32((int64_t)ir_k64(ir)->u64)) {
128 *k = (int32_t)ir_k64(ir)->u64;
129 return 1;
130 }
131 }
132 return 0;
133}
134
135/* Check if there's no conflicting instruction between curins and ref. */
136static int noconflict(ASMState *as, IRRef ref, IROp conflict)
137{
138 IRIns *ir = as->ir;
139 IRRef i = as->curins;
140 if (i > ref + CONFLICT_SEARCH_LIM)
141 return 0; /* Give up, ref is too far away. */
142 while (--i > ref)
143 if (ir[i].o == conflict)
144 return 0; /* Conflict found. */
145 return 1; /* Ok, no conflict. */
146}
147
148/* Fuse the array base of colocated arrays. */
149static int32_t asm_fuseabase(ASMState *as, IRRef ref)
150{
151 IRIns *ir = IR(ref);
152 if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
153 !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
154 return (int32_t)sizeof(GCtab);
155 return 0;
156}
157
158#define FUSE_REG 0x40000000
159
160/* Fuse array/hash/upvalue reference into register+offset operand. */
161static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
162 A64Ins ins)
163{
164 IRIns *ir = IR(ref);
165 if (ra_noreg(ir->r)) {
166 if (ir->o == IR_AREF) {
167 if (mayfuse(as, ref)) {
168 if (irref_isk(ir->op2)) {
169 IRRef tab = IR(ir->op1)->op1;
170 int32_t ofs = asm_fuseabase(as, tab);
171 IRRef refa = ofs ? tab : ir->op1;
172 ofs += 8*IR(ir->op2)->i;
173 if (emit_checkofs(ins, ofs)) {
174 *ofsp = ofs;
175 return ra_alloc1(as, refa, allow);
176 }
177 } else {
178 Reg base = ra_alloc1(as, ir->op1, allow);
179 *ofsp = FUSE_REG|ra_alloc1(as, ir->op2, rset_exclude(allow, base));
180 return base;
181 }
182 }
183 } else if (ir->o == IR_HREFK) {
184 if (mayfuse(as, ref)) {
185 int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
186 if (emit_checkofs(ins, ofs)) {
187 *ofsp = ofs;
188 return ra_alloc1(as, ir->op1, allow);
189 }
190 }
191 } else if (ir->o == IR_UREFC) {
192 if (irref_isk(ir->op1)) {
193 GCfunc *fn = ir_kfunc(IR(ir->op1));
194 GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
195 int64_t ofs = glofs(as, &uv->tv);
196 if (emit_checkofs(ins, ofs)) {
197 *ofsp = (int32_t)ofs;
198 return RID_GL;
199 }
200 }
201 } else if (ir->o == IR_TMPREF) {
202 *ofsp = (int32_t)glofs(as, &J2G(as->J)->tmptv);
203 return RID_GL;
204 }
205 }
206 *ofsp = 0;
207 return ra_alloc1(as, ref, allow);
208}
209
210/* Fuse m operand into arithmetic/logic instructions. */
211static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
212{
213 IRIns *ir = IR(ref);
214 if (ra_hasreg(ir->r)) {
215 ra_noweak(as, ir->r);
216 return A64F_M(ir->r);
217 } else if (irref_isk(ref)) {
218 uint32_t m;
219 int64_t k = get_k64val(as, ref);
220 if ((ai & 0x1f000000) == 0x0a000000)
221 m = emit_isk13(k, irt_is64(ir->t));
222 else
223 m = emit_isk12(k);
224 if (m)
225 return m;
226 } else if (mayfuse(as, ref)) {
227 if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR && irref_isk(ir->op2)) ||
228 (ir->o == IR_ADD && ir->op1 == ir->op2)) {
229 A64Shift sh = ir->o == IR_BSHR ? A64SH_LSR :
230 ir->o == IR_BSAR ? A64SH_ASR : A64SH_LSL;
231 int shift = ir->o == IR_ADD ? 1 :
232 (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
233 IRIns *irl = IR(ir->op1);
234 if (sh == A64SH_LSL &&
235 irl->o == IR_CONV &&
236 irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
237 shift <= 4 &&
238 canfuse(as, irl)) {
239 Reg m = ra_alloc1(as, irl->op1, allow);
240 return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift);
241 } else {
242 Reg m = ra_alloc1(as, ir->op1, allow);
243 return A64F_M(m) | A64F_SH(sh, shift);
244 }
245 } else if (ir->o == IR_CONV &&
246 ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) {
247 Reg m = ra_alloc1(as, ir->op1, allow);
248 return A64F_M(m) | A64F_EX(A64EX_SXTW);
249 }
250 }
251 return A64F_M(ra_allocref(as, ref, allow));
252}
253
254/* Fuse XLOAD/XSTORE reference into load/store operand. */
255static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref,
256 RegSet allow)
257{
258 IRIns *ir = IR(ref);
259 Reg base;
260 int32_t ofs = 0;
261 if (ra_noreg(ir->r) && canfuse(as, ir)) {
262 if (ir->o == IR_ADD) {
263 if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) {
264 ref = ir->op1;
265 } else {
266 Reg rn, rm;
267 IRRef lref = ir->op1, rref = ir->op2;
268 IRIns *irl = IR(lref);
269 if (mayfuse(as, irl->op1)) {
270 unsigned int shift = 4;
271 if (irl->o == IR_BSHL && irref_isk(irl->op2)) {
272 shift = (IR(irl->op2)->i & 63);
273 } else if (irl->o == IR_ADD && irl->op1 == irl->op2) {
274 shift = 1;
275 }
276 if ((ai >> 30) == shift) {
277 lref = irl->op1;
278 irl = IR(lref);
279 ai |= A64I_LS_SH;
280 }
281 }
282 if (irl->o == IR_CONV &&
283 irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
284 canfuse(as, irl)) {
285 lref = irl->op1;
286 ai |= A64I_LS_SXTWx;
287 } else {
288 ai |= A64I_LS_LSLx;
289 }
290 rm = ra_alloc1(as, lref, allow);
291 rn = ra_alloc1(as, rref, rset_exclude(allow, rm));
292 emit_dnm(as, (ai^A64I_LS_R), (rd & 31), rn, rm);
293 return;
294 }
295 } else if (ir->o == IR_STRREF) {
296 if (asm_isk32(as, ir->op2, &ofs)) {
297 ref = ir->op1;
298 } else if (asm_isk32(as, ir->op1, &ofs)) {
299 ref = ir->op2;
300 } else {
301 Reg refk = irref_isk(ir->op1) ? ir->op1 : ir->op2;
302 Reg refv = irref_isk(ir->op1) ? ir->op2 : ir->op1;
303 Reg rn = ra_alloc1(as, refv, allow);
304 IRIns *irr = IR(refk);
305 uint32_t m;
306 if (irr+1 == ir && !ra_used(irr) &&
307 irr->o == IR_ADD && irref_isk(irr->op2)) {
308 ofs = sizeof(GCstr) + IR(irr->op2)->i;
309 if (emit_checkofs(ai, ofs)) {
310 Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn));
311 m = A64F_M(rm) | A64F_EX(A64EX_SXTW);
312 goto skipopm;
313 }
314 }
315 m = asm_fuseopm(as, 0, refk, rset_exclude(allow, rn));
316 ofs = sizeof(GCstr);
317 skipopm:
318 emit_lso(as, ai, rd, rd, ofs);
319 emit_dn(as, A64I_ADDx^m, rd, rn);
320 return;
321 }
322 ofs += sizeof(GCstr);
323 if (!emit_checkofs(ai, ofs)) {
324 Reg rn = ra_alloc1(as, ref, allow);
325 Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn));
326 emit_dnm(as, (ai^A64I_LS_R)|A64I_LS_UXTWx, rd, rn, rm);
327 return;
328 }
329 }
330 }
331 base = ra_alloc1(as, ref, allow);
332 emit_lso(as, ai, (rd & 31), base, ofs);
333}
334
335/* Fuse FP multiply-add/sub. */
336static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
337{
338 IRRef lref = ir->op1, rref = ir->op2;
339 IRIns *irm;
340 if (lref != rref &&
341 ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
342 ra_noreg(irm->r)) ||
343 (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
344 (rref = lref, ai = air, ra_noreg(irm->r))))) {
345 Reg dest = ra_dest(as, ir, RSET_FPR);
346 Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
347 Reg left = ra_alloc2(as, irm,
348 rset_exclude(rset_exclude(RSET_FPR, dest), add));
349 Reg right = (left >> 8); left &= 255;
350 emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31));
351 return 1;
352 }
353 return 0;
354}
355
356/* Fuse BAND + BSHL/BSHR into UBFM. */
357static int asm_fuseandshift(ASMState *as, IRIns *ir)
358{
359 IRIns *irl = IR(ir->op1);
360 lj_assertA(ir->o == IR_BAND, "bad usage");
361 if (canfuse(as, irl) && irref_isk(ir->op2)) {
362 uint64_t mask = get_k64val(as, ir->op2);
363 if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) {
364 int32_t shmask = irt_is64(irl->t) ? 63 : 31;
365 int32_t shift = (IR(irl->op2)->i & shmask);
366 int32_t imms = shift;
367 if (irl->o == IR_BSHL) {
368 mask >>= shift;
369 shift = (shmask-shift+1) & shmask;
370 imms = 0;
371 }
372 if (mask && !((mask+1) & mask)) { /* Contiguous 1-bits at the bottom. */
373 Reg dest = ra_dest(as, ir, RSET_GPR);
374 Reg left = ra_alloc1(as, irl->op1, RSET_GPR);
375 A64Ins ai = shmask == 63 ? A64I_UBFMx : A64I_UBFMw;
376 imms += 63 - emit_clz64(mask);
377 if (imms > shmask) imms = shmask;
378 emit_dn(as, ai | A64F_IMMS(imms) | A64F_IMMR(shift), dest, left);
379 return 1;
380 }
381 }
382 }
383 return 0;
384}
385
386/* Fuse BOR(BSHL, BSHR) into EXTR/ROR. */
387static int asm_fuseorshift(ASMState *as, IRIns *ir)
388{
389 IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
390 lj_assertA(ir->o == IR_BOR, "bad usage");
391 if (canfuse(as, irl) && canfuse(as, irr) &&
392 ((irl->o == IR_BSHR && irr->o == IR_BSHL) ||
393 (irl->o == IR_BSHL && irr->o == IR_BSHR))) {
394 if (irref_isk(irl->op2) && irref_isk(irr->op2)) {
395 IRRef lref = irl->op1, rref = irr->op1;
396 uint32_t lshift = IR(irl->op2)->i, rshift = IR(irr->op2)->i;
397 if (irl->o == IR_BSHR) { /* BSHR needs to be the right operand. */
398 uint32_t tmp2;
399 IRRef tmp1 = lref; lref = rref; rref = tmp1;
400 tmp2 = lshift; lshift = rshift; rshift = tmp2;
401 }
402 if (rshift + lshift == (irt_is64(ir->t) ? 64 : 32)) {
403 A64Ins ai = irt_is64(ir->t) ? A64I_EXTRx : A64I_EXTRw;
404 Reg dest = ra_dest(as, ir, RSET_GPR);
405 Reg left = ra_alloc1(as, lref, RSET_GPR);
406 Reg right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left));
407 emit_dnm(as, ai | A64F_IMMS(rshift), dest, left, right);
408 return 1;
409 }
410 }
411 }
412 return 0;
413}
414
415/* -- Calls --------------------------------------------------------------- */
416
417/* Generate a call to a C function. */
418static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
419{
420 uint32_t n, nargs = CCI_XNARGS(ci);
421 int32_t ofs = 0;
422 Reg gpr, fpr = REGARG_FIRSTFPR;
423 if ((void *)ci->func)
424 emit_call(as, (void *)ci->func);
425 for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
426 as->cost[gpr] = REGCOST(~0u, ASMREF_L);
427 gpr = REGARG_FIRSTGPR;
428 for (n = 0; n < nargs; n++) { /* Setup args. */
429 IRRef ref = args[n];
430 IRIns *ir = IR(ref);
431 if (ref) {
432 if (irt_isfp(ir->t)) {
433 if (fpr <= REGARG_LASTFPR) {
434 lj_assertA(rset_test(as->freeset, fpr),
435 "reg %d not free", fpr); /* Must have been evicted. */
436 ra_leftov(as, fpr, ref);
437 fpr++;
438 } else {
439 Reg r = ra_alloc1(as, ref, RSET_FPR);
440 emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0));
441 ofs += 8;
442 }
443 } else {
444 if (gpr <= REGARG_LASTGPR) {
445 lj_assertA(rset_test(as->freeset, gpr),
446 "reg %d not free", gpr); /* Must have been evicted. */
447 ra_leftov(as, gpr, ref);
448 gpr++;
449 } else {
450 Reg r = ra_alloc1(as, ref, RSET_GPR);
451 emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0));
452 ofs += 8;
453 }
454 }
455 }
456 }
457}
458
459/* Setup result reg/sp for call. Evict scratch regs. */
460static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
461{
462 RegSet drop = RSET_SCRATCH;
463 if (ra_hasreg(ir->r))
464 rset_clear(drop, ir->r); /* Dest reg handled below. */
465 ra_evictset(as, drop); /* Evictions must be performed first. */
466 if (ra_used(ir)) {
467 lj_assertA(!irt_ispri(ir->t), "PRI dest");
468 if (irt_isfp(ir->t)) {
469 if (ci->flags & CCI_CASTU64) {
470 Reg dest = ra_dest(as, ir, RSET_FPR) & 31;
471 emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D_R : A64I_FMOV_S_R,
472 dest, RID_RET);
473 } else {
474 ra_destreg(as, ir, RID_FPRET);
475 }
476 } else {
477 ra_destreg(as, ir, RID_RET);
478 }
479 }
480 UNUSED(ci);
481}
482
483static void asm_callx(ASMState *as, IRIns *ir)
484{
485 IRRef args[CCI_NARGS_MAX*2];
486 CCallInfo ci;
487 IRRef func;
488 IRIns *irf;
489 ci.flags = asm_callx_flags(as, ir);
490 asm_collectargs(as, ir, &ci, args);
491 asm_setupresult(as, ir, &ci);
492 func = ir->op2; irf = IR(func);
493 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
494 if (irref_isk(func)) { /* Call to constant address. */
495 ci.func = (ASMFunction)(ir_k64(irf)->u64);
496 } else { /* Need a non-argument register for indirect calls. */
497 Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
498 emit_n(as, A64I_BLR, freg);
499 ci.func = (ASMFunction)(void *)0;
500 }
501 asm_gencall(as, &ci, args);
502}
503
504/* -- Returns ------------------------------------------------------------- */
505
506/* Return to lower frame. Guard that it goes to the right spot. */
507static void asm_retf(ASMState *as, IRIns *ir)
508{
509 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
510 void *pc = ir_kptr(IR(ir->op2));
511 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
512 as->topslot -= (BCReg)delta;
513 if ((int32_t)as->topslot < 0) as->topslot = 0;
514 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
515 /* Need to force a spill on REF_BASE now to update the stack slot. */
516 emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE)));
517 emit_setgl(as, base, jit_base);
518 emit_addptr(as, base, -8*delta);
519 asm_guardcc(as, CC_NE);
520 emit_nm(as, A64I_CMPx, RID_TMP,
521 ra_allock(as, i64ptr(pc), rset_exclude(RSET_GPR, base)));
522 emit_lso(as, A64I_LDRx, RID_TMP, base, -8);
523}
524
525/* -- Buffer operations --------------------------------------------------- */
526
527#if LJ_HASBUFFER
528static void asm_bufhdr_write(ASMState *as, Reg sb)
529{
530 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
531 IRIns irgc;
532 irgc.ot = IRT(0, IRT_PGC); /* GC type. */
533 emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
534 emit_dn(as, A64I_BFMx | A64F_IMMS(lj_fls(SBUF_MASK_FLAG)) | A64F_IMMR(0), RID_TMP, tmp);
535 emit_getgl(as, RID_TMP, cur_L);
536 emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
537}
538#endif
539
540/* -- Type conversions ---------------------------------------------------- */
541
542static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
543{
544 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
545 Reg dest = ra_dest(as, ir, RSET_GPR);
546 asm_guardcc(as, CC_NE);
547 emit_nm(as, A64I_FCMPd, (tmp & 31), (left & 31));
548 emit_dn(as, A64I_FCVT_F64_S32, (tmp & 31), dest);
549 emit_dn(as, A64I_FCVT_S32_F64, dest, (left & 31));
550}
551
552static void asm_tobit(ASMState *as, IRIns *ir)
553{
554 RegSet allow = RSET_FPR;
555 Reg left = ra_alloc1(as, ir->op1, allow);
556 Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
557 Reg tmp = ra_scratch(as, rset_clear(allow, right));
558 Reg dest = ra_dest(as, ir, RSET_GPR);
559 emit_dn(as, A64I_FMOV_R_S, dest, (tmp & 31));
560 emit_dnm(as, A64I_FADDd, (tmp & 31), (left & 31), (right & 31));
561}
562
563static void asm_conv(ASMState *as, IRIns *ir)
564{
565 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
566 int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
567 int stfp = (st == IRT_NUM || st == IRT_FLOAT);
568 IRRef lref = ir->op1;
569 lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
570 if (irt_isfp(ir->t)) {
571 Reg dest = ra_dest(as, ir, RSET_FPR);
572 if (stfp) { /* FP to FP conversion. */
573 emit_dn(as, st == IRT_NUM ? A64I_FCVT_F32_F64 : A64I_FCVT_F64_F32,
574 (dest & 31), (ra_alloc1(as, lref, RSET_FPR) & 31));
575 } else { /* Integer to FP conversion. */
576 Reg left = ra_alloc1(as, lref, RSET_GPR);
577 A64Ins ai = irt_isfloat(ir->t) ?
578 (((IRT_IS64 >> st) & 1) ?
579 (st == IRT_I64 ? A64I_FCVT_F32_S64 : A64I_FCVT_F32_U64) :
580 (st == IRT_INT ? A64I_FCVT_F32_S32 : A64I_FCVT_F32_U32)) :
581 (((IRT_IS64 >> st) & 1) ?
582 (st == IRT_I64 ? A64I_FCVT_F64_S64 : A64I_FCVT_F64_U64) :
583 (st == IRT_INT ? A64I_FCVT_F64_S32 : A64I_FCVT_F64_U32));
584 emit_dn(as, ai, (dest & 31), left);
585 }
586 } else if (stfp) { /* FP to integer conversion. */
587 if (irt_isguard(ir->t)) {
588 /* Checked conversions are only supported from number to int. */
589 lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
590 "bad type for checked CONV");
591 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
592 } else {
593 Reg left = ra_alloc1(as, lref, RSET_FPR);
594 Reg dest = ra_dest(as, ir, RSET_GPR);
595 A64Ins ai = irt_is64(ir->t) ?
596 (st == IRT_NUM ?
597 (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) :
598 (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) :
599 (st == IRT_NUM ?
600 (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) :
601 (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32));
602 emit_dn(as, ai, dest, (left & 31));
603 }
604 } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
605 Reg dest = ra_dest(as, ir, RSET_GPR);
606 Reg left = ra_alloc1(as, lref, RSET_GPR);
607 A64Ins ai = st == IRT_I8 ? A64I_SXTBw :
608 st == IRT_U8 ? A64I_UXTBw :
609 st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw;
610 lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
611 emit_dn(as, ai, dest, left);
612 } else {
613 Reg dest = ra_dest(as, ir, RSET_GPR);
614 if (irt_is64(ir->t)) {
615 if (st64 || !(ir->op2 & IRCONV_SEXT)) {
616 /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */
617 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
618 } else { /* 32 to 64 bit sign extension. */
619 Reg left = ra_alloc1(as, lref, RSET_GPR);
620 emit_dn(as, A64I_SXTW, dest, left);
621 }
622 } else {
623 if (st64 && !(ir->op2 & IRCONV_NONE)) {
624 /* This is either a 32 bit reg/reg mov which zeroes the hiword
625 ** or a load of the loword from a 64 bit address.
626 */
627 Reg left = ra_alloc1(as, lref, RSET_GPR);
628 emit_dm(as, A64I_MOVw, dest, left);
629 } else { /* 32/32 bit no-op (cast). */
630 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
631 }
632 }
633 }
634}
635
636static void asm_strto(ASMState *as, IRIns *ir)
637{
638 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
639 IRRef args[2];
640 Reg dest = 0, tmp;
641 int destused = ra_used(ir);
642 int32_t ofs = 0;
643 ra_evictset(as, RSET_SCRATCH);
644 if (destused) {
645 if (ra_hasspill(ir->s)) {
646 ofs = sps_scale(ir->s);
647 destused = 0;
648 if (ra_hasreg(ir->r)) {
649 ra_free(as, ir->r);
650 ra_modified(as, ir->r);
651 emit_spload(as, ir, ir->r, ofs);
652 }
653 } else {
654 dest = ra_dest(as, ir, RSET_FPR);
655 }
656 }
657 if (destused)
658 emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0);
659 asm_guardcnb(as, A64I_CBZ, RID_RET);
660 args[0] = ir->op1; /* GCstr *str */
661 args[1] = ASMREF_TMP1; /* TValue *n */
662 asm_gencall(as, ci, args);
663 tmp = ra_releasetmp(as, ASMREF_TMP1);
664 emit_opk(as, A64I_ADDx, tmp, RID_SP, ofs, RSET_GPR);
665}
666
667/* -- Memory references --------------------------------------------------- */
668
669/* Store tagged value for ref at base+ofs. */
670static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
671{
672 RegSet allow = rset_exclude(RSET_GPR, base);
673 IRIns *ir = IR(ref);
674 lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
675 "store of IR type %d", irt_type(ir->t));
676 if (irref_isk(ref)) {
677 TValue k;
678 lj_ir_kvalue(as->J->L, &k, ir);
679 emit_lso(as, A64I_STRx, ra_allock(as, k.u64, allow), base, ofs);
680 } else {
681 Reg src = ra_alloc1(as, ref, allow);
682 rset_clear(allow, src);
683 if (irt_isinteger(ir->t)) {
684 Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
685 emit_lso(as, A64I_STRx, RID_TMP, base, ofs);
686 emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), RID_TMP, type, src);
687 } else {
688 Reg type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
689 emit_lso(as, A64I_STRx, RID_TMP, base, ofs);
690 emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), RID_TMP, src, type);
691 }
692 }
693}
694
695/* Get pointer to TValue. */
696static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
697{
698 if ((mode & IRTMPREF_IN1)) {
699 IRIns *ir = IR(ref);
700 if (irt_isnum(ir->t)) {
701 if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) {
702 /* Use the number constant itself as a TValue. */
703 ra_allockreg(as, i64ptr(ir_knum(ir)), dest);
704 return;
705 }
706 emit_lso(as, A64I_STRd, (ra_alloc1(as, ref, RSET_FPR) & 31), dest, 0);
707 } else {
708 asm_tvstore64(as, dest, 0, ref);
709 }
710 }
711 /* g->tmptv holds the TValue(s). */
712 emit_dn(as, A64I_ADDx^emit_isk12(glofs(as, &J2G(as->J)->tmptv)), dest, RID_GL);
713}
714
715static void asm_aref(ASMState *as, IRIns *ir)
716{
717 Reg dest = ra_dest(as, ir, RSET_GPR);
718 Reg idx, base;
719 if (irref_isk(ir->op2)) {
720 IRRef tab = IR(ir->op1)->op1;
721 int32_t ofs = asm_fuseabase(as, tab);
722 IRRef refa = ofs ? tab : ir->op1;
723 uint32_t k = emit_isk12(ofs + 8*IR(ir->op2)->i);
724 if (k) {
725 base = ra_alloc1(as, refa, RSET_GPR);
726 emit_dn(as, A64I_ADDx^k, dest, base);
727 return;
728 }
729 }
730 base = ra_alloc1(as, ir->op1, RSET_GPR);
731 idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
732 emit_dnm(as, A64I_ADDx | A64F_EXSH(A64EX_UXTW, 3), dest, base, idx);
733}
734
735/* Inlined hash lookup. Specialized for key type and for const keys.
736** The equivalent C code is:
737** Node *n = hashkey(t, key);
738** do {
739** if (lj_obj_equal(&n->key, key)) return &n->val;
740** } while ((n = nextnode(n)));
741** return niltv(L);
742*/
743static void asm_href(ASMState *as, IRIns *ir, IROp merge)
744{
745 RegSet allow = RSET_GPR;
746 int destused = ra_used(ir);
747 Reg dest = ra_dest(as, ir, allow);
748 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
749 Reg key = 0, tmp = RID_TMP;
750 Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE;
751 IRRef refkey = ir->op2;
752 IRIns *irkey = IR(refkey);
753 int isk = irref_isk(ir->op2);
754 IRType1 kt = irkey->t;
755 uint32_t k = 0;
756 uint32_t khash;
757 MCLabel l_end, l_loop, l_next;
758 rset_clear(allow, tab);
759
760 if (!isk) {
761 key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
762 rset_clear(allow, key);
763 if (!irt_isstr(kt)) {
764 tmp = ra_scratch(as, allow);
765 rset_clear(allow, tmp);
766 }
767 } else if (irt_isnum(kt)) {
768 int64_t val = (int64_t)ir_knum(irkey)->u64;
769 if (!(k = emit_isk12(val))) {
770 key = ra_allock(as, val, allow);
771 rset_clear(allow, key);
772 }
773 } else if (!irt_ispri(kt)) {
774 if (!(k = emit_isk12(irkey->i))) {
775 key = ra_alloc1(as, refkey, allow);
776 rset_clear(allow, key);
777 }
778 }
779
780 /* Allocate constants early. */
781 if (irt_isnum(kt)) {
782 if (!isk) {
783 tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
784 ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
785 rset_clear(allow, tisnum);
786 }
787 } else if (irt_isaddr(kt)) {
788 if (isk) {
789 int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
790 scr = ra_allock(as, kk, allow);
791 } else {
792 scr = ra_scratch(as, allow);
793 }
794 rset_clear(allow, scr);
795 } else {
796 lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
797 type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
798 scr = ra_scratch(as, rset_clear(allow, type));
799 rset_clear(allow, scr);
800 }
801
802 /* Key not found in chain: jump to exit (if merged) or load niltv. */
803 l_end = emit_label(as);
804 as->invmcp = NULL;
805 if (merge == IR_NE)
806 asm_guardcc(as, CC_AL);
807 else if (destused)
808 emit_loada(as, dest, niltvg(J2G(as->J)));
809
810 /* Follow hash chain until the end. */
811 l_loop = --as->mcp;
812 emit_n(as, A64I_CMPx^A64I_K12^0, dest);
813 emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
814 l_next = emit_label(as);
815
816 /* Type and value comparison. */
817 if (merge == IR_EQ)
818 asm_guardcc(as, CC_EQ);
819 else
820 emit_cond_branch(as, CC_EQ, l_end);
821
822 if (irt_isnum(kt)) {
823 if (isk) {
824 /* Assumes -0.0 is already canonicalized to +0.0. */
825 if (k)
826 emit_n(as, A64I_CMPx^k, tmp);
827 else
828 emit_nm(as, A64I_CMPx, key, tmp);
829 emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
830 } else {
831 emit_nm(as, A64I_FCMPd, key, ftmp);
832 emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31));
833 emit_cond_branch(as, CC_LO, l_next);
834 emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp);
835 emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n));
836 }
837 } else if (irt_isaddr(kt)) {
838 if (isk) {
839 emit_nm(as, A64I_CMPx, scr, tmp);
840 emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
841 } else {
842 emit_nm(as, A64I_CMPx, tmp, scr);
843 emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64));
844 }
845 } else {
846 emit_nm(as, A64I_CMPw, scr, type);
847 emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key));
848 }
849
850 *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE;
851 if (!isk && irt_isaddr(kt)) {
852 type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
853 emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type);
854 rset_clear(allow, type);
855 }
856 /* Load main position relative to tab->node into dest. */
857 khash = isk ? ir_khash(as, irkey) : 1;
858 if (khash == 0) {
859 emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node));
860 } else {
861 emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 3), dest, tmp, dest);
862 emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 1), dest, dest, dest);
863 emit_lso(as, A64I_LDRx, tmp, tab, offsetof(GCtab, node));
864 if (isk) {
865 Reg tmphash = ra_allock(as, khash, allow);
866 emit_dnm(as, A64I_ANDw, dest, dest, tmphash);
867 emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
868 } else if (irt_isstr(kt)) {
869 /* Fetch of str->sid is cheaper than ra_allock. */
870 emit_dnm(as, A64I_ANDw, dest, dest, tmp);
871 emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid));
872 emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
873 } else { /* Must match with hash*() in lj_tab.c. */
874 emit_dnm(as, A64I_ANDw, dest, dest, tmp);
875 emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask));
876 emit_dnm(as, A64I_SUBw, dest, dest, tmp);
877 emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp);
878 emit_dnm(as, A64I_EORw, dest, dest, tmp);
879 emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest);
880 emit_dnm(as, A64I_SUBw, tmp, tmp, dest);
881 emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest);
882 emit_dnm(as, A64I_EORw, tmp, tmp, dest);
883 if (irt_isnum(kt)) {
884 emit_dnm(as, A64I_ADDw, dest, dest, dest);
885 emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
886 emit_dm(as, A64I_MOVw, tmp, dest);
887 emit_dn(as, A64I_FMOV_R_D, dest, (key & 31));
888 } else {
889 checkmclim(as);
890 emit_dm(as, A64I_MOVw, tmp, key);
891 emit_dnm(as, A64I_EORw, dest, dest,
892 ra_allock(as, irt_toitype(kt) << 15, allow));
893 emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
894 emit_dm(as, A64I_MOVx, dest, key);
895 }
896 }
897 }
898}
899
900static void asm_hrefk(ASMState *as, IRIns *ir)
901{
902 IRIns *kslot = IR(ir->op2);
903 IRIns *irkey = IR(kslot->op1);
904 int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
905 int32_t kofs = ofs + (int32_t)offsetof(Node, key);
906 int bigofs = !emit_checkofs(A64I_LDRx, ofs);
907 Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
908 Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
909 Reg key, idx = node;
910 RegSet allow = rset_exclude(RSET_GPR, node);
911 uint64_t k;
912 lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
913 if (bigofs) {
914 idx = dest;
915 rset_clear(allow, dest);
916 kofs = (int32_t)offsetof(Node, key);
917 } else if (ra_hasreg(dest)) {
918 emit_opk(as, A64I_ADDx, dest, node, ofs, allow);
919 }
920 asm_guardcc(as, CC_NE);
921 if (irt_ispri(irkey->t)) {
922 k = ~((int64_t)~irt_toitype(irkey->t) << 47);
923 } else if (irt_isnum(irkey->t)) {
924 k = ir_knum(irkey)->u64;
925 } else {
926 k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey);
927 }
928 key = ra_scratch(as, allow);
929 emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key)));
930 emit_lso(as, A64I_LDRx, key, idx, kofs);
931 if (bigofs)
932 emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR);
933}
934
935static void asm_uref(ASMState *as, IRIns *ir)
936{
937 Reg dest = ra_dest(as, ir, RSET_GPR);
938 if (irref_isk(ir->op1)) {
939 GCfunc *fn = ir_kfunc(IR(ir->op1));
940 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
941 emit_lsptr(as, A64I_LDRx, dest, v);
942 } else {
943 Reg uv = ra_scratch(as, RSET_GPR);
944 Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
945 if (ir->o == IR_UREFC) {
946 asm_guardcc(as, CC_NE);
947 emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP);
948 emit_opk(as, A64I_ADDx, dest, uv,
949 (int32_t)offsetof(GCupval, tv), RSET_GPR);
950 emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
951 } else {
952 emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v));
953 }
954 emit_lso(as, A64I_LDRx, uv, func,
955 (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
956 }
957}
958
959static void asm_fref(ASMState *as, IRIns *ir)
960{
961 UNUSED(as); UNUSED(ir);
962 lj_assertA(!ra_used(ir), "unfused FREF");
963}
964
965static void asm_strref(ASMState *as, IRIns *ir)
966{
967 RegSet allow = RSET_GPR;
968 Reg dest = ra_dest(as, ir, allow);
969 Reg base = ra_alloc1(as, ir->op1, allow);
970 IRIns *irr = IR(ir->op2);
971 int32_t ofs = sizeof(GCstr);
972 uint32_t m;
973 rset_clear(allow, base);
974 if (irref_isk(ir->op2) && (m = emit_isk12(ofs + irr->i))) {
975 emit_dn(as, A64I_ADDx^m, dest, base);
976 } else {
977 emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, dest);
978 emit_dnm(as, A64I_ADDx, dest, base, ra_alloc1(as, ir->op2, allow));
979 }
980}
981
982/* -- Loads and stores ---------------------------------------------------- */
983
984static A64Ins asm_fxloadins(IRIns *ir)
985{
986 switch (irt_type(ir->t)) {
987 case IRT_I8: return A64I_LDRB ^ A64I_LS_S;
988 case IRT_U8: return A64I_LDRB;
989 case IRT_I16: return A64I_LDRH ^ A64I_LS_S;
990 case IRT_U16: return A64I_LDRH;
991 case IRT_NUM: return A64I_LDRd;
992 case IRT_FLOAT: return A64I_LDRs;
993 default: return irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw;
994 }
995}
996
997static A64Ins asm_fxstoreins(IRIns *ir)
998{
999 switch (irt_type(ir->t)) {
1000 case IRT_I8: case IRT_U8: return A64I_STRB;
1001 case IRT_I16: case IRT_U16: return A64I_STRH;
1002 case IRT_NUM: return A64I_STRd;
1003 case IRT_FLOAT: return A64I_STRs;
1004 default: return irt_is64(ir->t) ? A64I_STRx : A64I_STRw;
1005 }
1006}
1007
1008static void asm_fload(ASMState *as, IRIns *ir)
1009{
1010 Reg dest = ra_dest(as, ir, RSET_GPR);
1011 Reg idx;
1012 A64Ins ai = asm_fxloadins(ir);
1013 int32_t ofs;
1014 if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
1015 idx = RID_GL;
1016 ofs = (ir->op2 << 2) - GG_OFS(g);
1017 } else {
1018 idx = ra_alloc1(as, ir->op1, RSET_GPR);
1019 if (ir->op2 == IRFL_TAB_ARRAY) {
1020 ofs = asm_fuseabase(as, ir->op1);
1021 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
1022 emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, idx);
1023 return;
1024 }
1025 }
1026 ofs = field_ofs[ir->op2];
1027 }
1028 emit_lso(as, ai, (dest & 31), idx, ofs);
1029}
1030
1031static void asm_fstore(ASMState *as, IRIns *ir)
1032{
1033 if (ir->r != RID_SINK) {
1034 Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
1035 IRIns *irf = IR(ir->op1);
1036 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
1037 int32_t ofs = field_ofs[irf->op2];
1038 emit_lso(as, asm_fxstoreins(ir), (src & 31), idx, ofs);
1039 }
1040}
1041
1042static void asm_xload(ASMState *as, IRIns *ir)
1043{
1044 Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
1045 lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD");
1046 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
1047}
1048
1049static void asm_xstore(ASMState *as, IRIns *ir)
1050{
1051 if (ir->r != RID_SINK) {
1052 Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
1053 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
1054 rset_exclude(RSET_GPR, src));
1055 }
1056}
1057
1058static void asm_ahuvload(ASMState *as, IRIns *ir)
1059{
1060 Reg idx, tmp, type;
1061 int32_t ofs = 0;
1062 RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1063 lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
1064 irt_isint(ir->t),
1065 "bad load type %d", irt_type(ir->t));
1066 if (ra_used(ir)) {
1067 Reg dest = ra_dest(as, ir, allow);
1068 tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest;
1069 if (irt_isaddr(ir->t)) {
1070 emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest);
1071 } else if (irt_isnum(ir->t)) {
1072 emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp);
1073 } else if (irt_isint(ir->t)) {
1074 emit_dm(as, A64I_MOVw, dest, dest);
1075 }
1076 } else {
1077 tmp = ra_scratch(as, gpr);
1078 }
1079 type = ra_scratch(as, rset_clear(gpr, tmp));
1080 idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx);
1081 /* Always do the type check, even if the load result is unused. */
1082 asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE);
1083 if (irt_type(ir->t) >= IRT_NUM) {
1084 lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
1085 "bad load type %d", irt_type(ir->t));
1086 emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
1087 ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp);
1088 } else if (irt_isaddr(ir->t)) {
1089 emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type);
1090 emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
1091 } else if (irt_isnil(ir->t)) {
1092 emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
1093 } else {
1094 emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
1095 ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, gpr), tmp);
1096 }
1097 if (ofs & FUSE_REG)
1098 emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31));
1099 else
1100 emit_lso(as, A64I_LDRx, tmp, idx, ofs);
1101}
1102
1103static void asm_ahustore(ASMState *as, IRIns *ir)
1104{
1105 if (ir->r != RID_SINK) {
1106 RegSet allow = RSET_GPR;
1107 Reg idx, src = RID_NONE, tmp = RID_TMP, type = RID_NONE;
1108 int32_t ofs = 0;
1109 if (irt_isnum(ir->t)) {
1110 src = ra_alloc1(as, ir->op2, RSET_FPR);
1111 idx = asm_fuseahuref(as, ir->op1, &ofs, allow, A64I_STRd);
1112 if (ofs & FUSE_REG)
1113 emit_dnm(as, (A64I_STRd^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, (src & 31), idx, (ofs &31));
1114 else
1115 emit_lso(as, A64I_STRd, (src & 31), idx, ofs);
1116 } else {
1117 if (!irt_ispri(ir->t)) {
1118 src = ra_alloc1(as, ir->op2, allow);
1119 rset_clear(allow, src);
1120 if (irt_isinteger(ir->t))
1121 type = ra_allock(as, (uint64_t)(int32_t)LJ_TISNUM << 47, allow);
1122 else
1123 type = ra_allock(as, irt_toitype(ir->t), allow);
1124 } else {
1125 tmp = type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t)<<47), allow);
1126 }
1127 idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type),
1128 A64I_STRx);
1129 if (ofs & FUSE_REG)
1130 emit_dnm(as, (A64I_STRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31));
1131 else
1132 emit_lso(as, A64I_STRx, tmp, idx, ofs);
1133 if (ra_hasreg(src)) {
1134 if (irt_isinteger(ir->t)) {
1135 emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), tmp, type, src);
1136 } else {
1137 emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, src, type);
1138 }
1139 }
1140 }
1141 }
1142}
1143
1144static void asm_sload(ASMState *as, IRIns *ir)
1145{
1146 int32_t ofs = 8*((int32_t)ir->op1-2);
1147 IRType1 t = ir->t;
1148 Reg dest = RID_NONE, base;
1149 RegSet allow = RSET_GPR;
1150 lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
1151 "bad parent SLOAD"); /* Handled by asm_head_side(). */
1152 lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK),
1153 "inconsistent SLOAD variant");
1154 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
1155 dest = ra_scratch(as, RSET_FPR);
1156 asm_tointg(as, ir, dest);
1157 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1158 } else if (ra_used(ir)) {
1159 Reg tmp = RID_NONE;
1160 if ((ir->op2 & IRSLOAD_CONVERT))
1161 tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR);
1162 lj_assertA((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t),
1163 "bad SLOAD type %d", irt_type(t));
1164 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow);
1165 base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest));
1166 if (irt_isaddr(t)) {
1167 emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest);
1168 } else if ((ir->op2 & IRSLOAD_CONVERT)) {
1169 if (irt_isint(t)) {
1170 emit_dn(as, A64I_FCVT_S32_F64, dest, (tmp & 31));
1171 /* If value is already loaded for type check, move it to FPR. */
1172 if ((ir->op2 & IRSLOAD_TYPECHECK))
1173 emit_dn(as, A64I_FMOV_D_R, (tmp & 31), dest);
1174 else
1175 dest = tmp;
1176 t.irt = IRT_NUM; /* Check for original type. */
1177 } else {
1178 emit_dn(as, A64I_FCVT_F64_S32, (dest & 31), tmp);
1179 dest = tmp;
1180 t.irt = IRT_INT; /* Check for original type. */
1181 }
1182 } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) {
1183 emit_dm(as, A64I_MOVw, dest, dest);
1184 }
1185 goto dotypecheck;
1186 }
1187 base = ra_alloc1(as, REF_BASE, allow);
1188dotypecheck:
1189 rset_clear(allow, base);
1190 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1191 Reg tmp;
1192 if (ra_hasreg(dest) && rset_test(RSET_GPR, dest)) {
1193 tmp = dest;
1194 } else {
1195 tmp = ra_scratch(as, allow);
1196 rset_clear(allow, tmp);
1197 }
1198 if (irt_isnum(t) && !(ir->op2 & IRSLOAD_CONVERT))
1199 emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp);
1200 /* Need type check, even if the load result is unused. */
1201 asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE);
1202 if (irt_type(t) >= IRT_NUM) {
1203 lj_assertA(irt_isinteger(t) || irt_isnum(t),
1204 "bad SLOAD type %d", irt_type(t));
1205 emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
1206 ra_allock(as, LJ_TISNUM << 15, allow), tmp);
1207 } else if (irt_isnil(t)) {
1208 emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
1209 } else if (irt_ispri(t)) {
1210 emit_nm(as, A64I_CMPx,
1211 ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp);
1212 } else {
1213 Reg type = ra_scratch(as, allow);
1214 emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type);
1215 emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
1216 }
1217 emit_lso(as, A64I_LDRx, tmp, base, ofs);
1218 return;
1219 }
1220 if (ra_hasreg(dest)) {
1221 emit_lso(as, irt_isnum(t) ? A64I_LDRd :
1222 (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base,
1223 ofs ^ ((LJ_BE && irt_isint(t) ? 4 : 0)));
1224 }
1225}
1226
1227/* -- Allocations --------------------------------------------------------- */
1228
1229#if LJ_HASFFI
1230static void asm_cnew(ASMState *as, IRIns *ir)
1231{
1232 CTState *cts = ctype_ctsG(J2G(as->J));
1233 CTypeID id = (CTypeID)IR(ir->op1)->i;
1234 CTSize sz;
1235 CTInfo info = lj_ctype_info(cts, id, &sz);
1236 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1237 IRRef args[4];
1238 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1239 lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
1240 "bad CNEW/CNEWI operands");
1241
1242 as->gcsteps++;
1243 asm_setupresult(as, ir, ci); /* GCcdata * */
1244 /* Initialize immutable cdata object. */
1245 if (ir->o == IR_CNEWI) {
1246 int32_t ofs = sizeof(GCcdata);
1247 Reg r = ra_alloc1(as, ir->op2, allow);
1248 lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
1249 emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs);
1250 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1251 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1252 args[0] = ASMREF_L; /* lua_State *L */
1253 args[1] = ir->op1; /* CTypeID id */
1254 args[2] = ir->op2; /* CTSize sz */
1255 args[3] = ASMREF_TMP1; /* CTSize align */
1256 asm_gencall(as, ci, args);
1257 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1258 return;
1259 }
1260
1261 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1262 {
1263 Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow);
1264 emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
1265 emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
1266 emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP);
1267 if (id < 65536) emit_d(as, A64I_MOVZw | A64F_U16(id), RID_X1);
1268 }
1269 args[0] = ASMREF_L; /* lua_State *L */
1270 args[1] = ASMREF_TMP1; /* MSize size */
1271 asm_gencall(as, ci, args);
1272 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1273 ra_releasetmp(as, ASMREF_TMP1));
1274}
1275#endif
1276
1277/* -- Write barriers ------------------------------------------------------ */
1278
1279static void asm_tbar(ASMState *as, IRIns *ir)
1280{
1281 Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
1282 Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
1283 Reg mark = RID_TMP;
1284 MCLabel l_end = emit_label(as);
1285 emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
1286 emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
1287 emit_setgl(as, tab, gc.grayagain);
1288 emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark);
1289 emit_getgl(as, link, gc.grayagain);
1290 emit_cond_branch(as, CC_EQ, l_end);
1291 emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark);
1292 emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked));
1293}
1294
1295static void asm_obar(ASMState *as, IRIns *ir)
1296{
1297 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
1298 IRRef args[2];
1299 MCLabel l_end;
1300 RegSet allow = RSET_GPR;
1301 Reg obj, val, tmp;
1302 /* No need for other object barriers (yet). */
1303 lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
1304 ra_evictset(as, RSET_SCRATCH);
1305 l_end = emit_label(as);
1306 args[0] = ASMREF_TMP1; /* global_State *g */
1307 args[1] = ir->op1; /* TValue *tv */
1308 asm_gencall(as, ci, args);
1309 emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
1310 obj = IR(ir->op1)->r;
1311 tmp = ra_scratch(as, rset_exclude(allow, obj));
1312 emit_cond_branch(as, CC_EQ, l_end);
1313 emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp);
1314 emit_cond_branch(as, CC_EQ, l_end);
1315 emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP);
1316 val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
1317 emit_lso(as, A64I_LDRB, tmp, obj,
1318 (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
1319 emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked));
1320}
1321
1322/* -- Arithmetic and logic operations ------------------------------------- */
1323
1324static void asm_fparith(ASMState *as, IRIns *ir, A64Ins ai)
1325{
1326 Reg dest = ra_dest(as, ir, RSET_FPR);
1327 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1328 right = (left >> 8); left &= 255;
1329 emit_dnm(as, ai, (dest & 31), (left & 31), (right & 31));
1330}
1331
1332static void asm_fpunary(ASMState *as, IRIns *ir, A64Ins ai)
1333{
1334 Reg dest = ra_dest(as, ir, RSET_FPR);
1335 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
1336 emit_dn(as, ai, (dest & 31), (left & 31));
1337}
1338
1339static void asm_fpmath(ASMState *as, IRIns *ir)
1340{
1341 IRFPMathOp fpm = (IRFPMathOp)ir->op2;
1342 if (fpm == IRFPM_SQRT) {
1343 asm_fpunary(as, ir, A64I_FSQRTd);
1344 } else if (fpm <= IRFPM_TRUNC) {
1345 asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd :
1346 fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd);
1347 } else {
1348 asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
1349 }
1350}
1351
1352static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
1353{
1354 IRIns *ir;
1355 if (irref_isk(rref))
1356 return 0; /* Don't swap constants to the left. */
1357 if (irref_isk(lref))
1358 return 1; /* But swap constants to the right. */
1359 ir = IR(rref);
1360 if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
1361 (ir->o == IR_ADD && ir->op1 == ir->op2) ||
1362 (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
1363 return 0; /* Don't swap fusable operands to the left. */
1364 ir = IR(lref);
1365 if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
1366 (ir->o == IR_ADD && ir->op1 == ir->op2) ||
1367 (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
1368 return 1; /* But swap fusable operands to the right. */
1369 return 0; /* Otherwise don't swap. */
1370}
1371
1372static void asm_intop(ASMState *as, IRIns *ir, A64Ins ai)
1373{
1374 IRRef lref = ir->op1, rref = ir->op2;
1375 Reg left, dest = ra_dest(as, ir, RSET_GPR);
1376 uint32_t m;
1377 if ((ai & ~A64I_S) != A64I_SUBw && asm_swapops(as, lref, rref)) {
1378 IRRef tmp = lref; lref = rref; rref = tmp;
1379 }
1380 left = ra_hintalloc(as, lref, dest, RSET_GPR);
1381 if (irt_is64(ir->t)) ai |= A64I_X;
1382 m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left));
1383 if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */
1384 asm_guardcc(as, CC_VS);
1385 ai |= A64I_S;
1386 }
1387 emit_dn(as, ai^m, dest, left);
1388}
1389
1390static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai)
1391{
1392 if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */
1393 as->flagmcp = NULL;
1394 as->mcp++;
1395 ai |= A64I_S;
1396 }
1397 asm_intop(as, ir, ai);
1398}
1399
1400static void asm_intneg(ASMState *as, IRIns *ir)
1401{
1402 Reg dest = ra_dest(as, ir, RSET_GPR);
1403 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1404 emit_dm(as, irt_is64(ir->t) ? A64I_NEGx : A64I_NEGw, dest, left);
1405}
1406
1407/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */
1408static void asm_intmul(ASMState *as, IRIns *ir)
1409{
1410 Reg dest = ra_dest(as, ir, RSET_GPR);
1411 Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest));
1412 Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1413 if (irt_isguard(ir->t)) { /* IR_MULOV */
1414 asm_guardcc(as, CC_NE);
1415 emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */
1416 emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest);
1417 emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest);
1418 emit_dnm(as, A64I_SMULL, dest, right, left);
1419 } else {
1420 emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right);
1421 }
1422}
1423
1424static void asm_add(ASMState *as, IRIns *ir)
1425{
1426 if (irt_isnum(ir->t)) {
1427 if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd))
1428 asm_fparith(as, ir, A64I_FADDd);
1429 return;
1430 }
1431 asm_intop_s(as, ir, A64I_ADDw);
1432}
1433
1434static void asm_sub(ASMState *as, IRIns *ir)
1435{
1436 if (irt_isnum(ir->t)) {
1437 if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd))
1438 asm_fparith(as, ir, A64I_FSUBd);
1439 return;
1440 }
1441 asm_intop_s(as, ir, A64I_SUBw);
1442}
1443
1444static void asm_mul(ASMState *as, IRIns *ir)
1445{
1446 if (irt_isnum(ir->t)) {
1447 asm_fparith(as, ir, A64I_FMULd);
1448 return;
1449 }
1450 asm_intmul(as, ir);
1451}
1452
1453#define asm_addov(as, ir) asm_add(as, ir)
1454#define asm_subov(as, ir) asm_sub(as, ir)
1455#define asm_mulov(as, ir) asm_mul(as, ir)
1456
1457#define asm_fpdiv(as, ir) asm_fparith(as, ir, A64I_FDIVd)
1458#define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS)
1459
1460static void asm_neg(ASMState *as, IRIns *ir)
1461{
1462 if (irt_isnum(ir->t)) {
1463 asm_fpunary(as, ir, A64I_FNEGd);
1464 return;
1465 }
1466 asm_intneg(as, ir);
1467}
1468
1469static void asm_band(ASMState *as, IRIns *ir)
1470{
1471 A64Ins ai = A64I_ANDw;
1472 if (asm_fuseandshift(as, ir))
1473 return;
1474 if (as->flagmcp == as->mcp) {
1475 /* Try to drop cmp r, #0. */
1476 as->flagmcp = NULL;
1477 as->mcp++;
1478 ai = A64I_ANDSw;
1479 }
1480 asm_intop(as, ir, ai);
1481}
1482
1483static void asm_borbxor(ASMState *as, IRIns *ir, A64Ins ai)
1484{
1485 IRRef lref = ir->op1, rref = ir->op2;
1486 IRIns *irl = IR(lref), *irr = IR(rref);
1487 if ((canfuse(as, irl) && irl->o == IR_BNOT && !irref_isk(rref)) ||
1488 (canfuse(as, irr) && irr->o == IR_BNOT && !irref_isk(lref))) {
1489 Reg left, dest = ra_dest(as, ir, RSET_GPR);
1490 uint32_t m;
1491 if (irl->o == IR_BNOT) {
1492 IRRef tmp = lref; lref = rref; rref = tmp;
1493 }
1494 left = ra_alloc1(as, lref, RSET_GPR);
1495 ai |= A64I_ON;
1496 if (irt_is64(ir->t)) ai |= A64I_X;
1497 m = asm_fuseopm(as, ai, IR(rref)->op1, rset_exclude(RSET_GPR, left));
1498 emit_dn(as, ai^m, dest, left);
1499 } else {
1500 asm_intop(as, ir, ai);
1501 }
1502}
1503
1504static void asm_bor(ASMState *as, IRIns *ir)
1505{
1506 if (asm_fuseorshift(as, ir))
1507 return;
1508 asm_borbxor(as, ir, A64I_ORRw);
1509}
1510
1511#define asm_bxor(as, ir) asm_borbxor(as, ir, A64I_EORw)
1512
1513static void asm_bnot(ASMState *as, IRIns *ir)
1514{
1515 A64Ins ai = A64I_MVNw;
1516 Reg dest = ra_dest(as, ir, RSET_GPR);
1517 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1518 if (irt_is64(ir->t)) ai |= A64I_X;
1519 emit_d(as, ai^m, dest);
1520}
1521
1522static void asm_bswap(ASMState *as, IRIns *ir)
1523{
1524 Reg dest = ra_dest(as, ir, RSET_GPR);
1525 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1526 emit_dn(as, irt_is64(ir->t) ? A64I_REVx : A64I_REVw, dest, left);
1527}
1528
1529static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh)
1530{
1531 int32_t shmask = irt_is64(ir->t) ? 63 : 31;
1532 if (irref_isk(ir->op2)) { /* Constant shifts. */
1533 Reg left, dest = ra_dest(as, ir, RSET_GPR);
1534 int32_t shift = (IR(ir->op2)->i & shmask);
1535 IRIns *irl = IR(ir->op1);
1536 if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw;
1537
1538 /* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */
1539 if ((sh == A64SH_LSR || sh == A64SH_ASR) && canfuse(as, irl)) {
1540 if (irl->o == IR_BSHL && irref_isk(irl->op2)) {
1541 int32_t shift2 = (IR(irl->op2)->i & shmask);
1542 shift = ((shift - shift2) & shmask);
1543 shmask -= shift2;
1544 ir = irl;
1545 }
1546 }
1547
1548 left = ra_alloc1(as, ir->op1, RSET_GPR);
1549 switch (sh) {
1550 case A64SH_LSL:
1551 emit_dn(as, ai | A64F_IMMS(shmask-shift) |
1552 A64F_IMMR((shmask-shift+1)&shmask), dest, left);
1553 break;
1554 case A64SH_LSR: case A64SH_ASR:
1555 emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left);
1556 break;
1557 case A64SH_ROR:
1558 emit_dnm(as, ai | A64F_IMMS(shift), dest, left, left);
1559 break;
1560 }
1561 } else { /* Variable-length shifts. */
1562 Reg dest = ra_dest(as, ir, RSET_GPR);
1563 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1564 Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1565 emit_dnm(as, (shmask == 63 ? A64I_SHRx : A64I_SHRw) | A64F_BSH(sh), dest, left, right);
1566 }
1567}
1568
1569#define asm_bshl(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSL)
1570#define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR)
1571#define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR)
1572#define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR)
1573#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
1574
1575static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc)
1576{
1577 Reg dest = ra_dest(as, ir, RSET_GPR);
1578 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1579 Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1580 emit_dnm(as, A64I_CSELw|A64F_CC(cc), dest, left, right);
1581 emit_nm(as, A64I_CMPw, left, right);
1582}
1583
1584static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc)
1585{
1586 Reg dest = (ra_dest(as, ir, RSET_FPR) & 31);
1587 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1588 right = ((left >> 8) & 31); left &= 31;
1589 emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, right, left);
1590 emit_nm(as, A64I_FCMPd, left, right);
1591}
1592
1593static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc)
1594{
1595 if (irt_isnum(ir->t))
1596 asm_fpmin_max(as, ir, fcc);
1597 else
1598 asm_intmin_max(as, ir, cc);
1599}
1600
1601#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_PL)
1602#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_LE)
1603
1604/* -- Comparisons --------------------------------------------------------- */
1605
1606/* Map of comparisons to flags. ORDER IR. */
1607static const uint8_t asm_compmap[IR_ABC+1] = {
1608 /* op FP swp int cc FP cc */
1609 /* LT */ CC_GE + (CC_HS << 4),
1610 /* GE x */ CC_LT + (CC_HI << 4),
1611 /* LE */ CC_GT + (CC_HI << 4),
1612 /* GT x */ CC_LE + (CC_HS << 4),
1613 /* ULT x */ CC_HS + (CC_LS << 4),
1614 /* UGE */ CC_LO + (CC_LO << 4),
1615 /* ULE x */ CC_HI + (CC_LO << 4),
1616 /* UGT */ CC_LS + (CC_LS << 4),
1617 /* EQ */ CC_NE + (CC_NE << 4),
1618 /* NE */ CC_EQ + (CC_EQ << 4),
1619 /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */
1620};
1621
1622/* FP comparisons. */
1623static void asm_fpcomp(ASMState *as, IRIns *ir)
1624{
1625 Reg left, right;
1626 A64Ins ai;
1627 int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1);
1628 if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) {
1629 left = (ra_alloc1(as, ir->op1, RSET_FPR) & 31);
1630 right = 0;
1631 ai = A64I_FCMPZd;
1632 } else {
1633 left = ra_alloc2(as, ir, RSET_FPR);
1634 if (swp) {
1635 right = (left & 31); left = ((left >> 8) & 31);
1636 } else {
1637 right = ((left >> 8) & 31); left &= 31;
1638 }
1639 ai = A64I_FCMPd;
1640 }
1641 asm_guardcc(as, (asm_compmap[ir->o] >> 4));
1642 emit_nm(as, ai, left, right);
1643}
1644
1645/* Integer comparisons. */
1646static void asm_intcomp(ASMState *as, IRIns *ir)
1647{
1648 A64CC oldcc, cc = (asm_compmap[ir->o] & 15);
1649 A64Ins ai = irt_is64(ir->t) ? A64I_CMPx : A64I_CMPw;
1650 IRRef lref = ir->op1, rref = ir->op2;
1651 Reg left;
1652 uint32_t m;
1653 int cmpprev0 = 0;
1654 lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) ||
1655 irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t),
1656 "bad comparison data type %d", irt_type(ir->t));
1657 if (asm_swapops(as, lref, rref)) {
1658 IRRef tmp = lref; lref = rref; rref = tmp;
1659 if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */
1660 else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */
1661 }
1662 oldcc = cc;
1663 if (irref_isk(rref) && get_k64val(as, rref) == 0) {
1664 IRIns *irl = IR(lref);
1665 if (cc == CC_GE) cc = CC_PL;
1666 else if (cc == CC_LT) cc = CC_MI;
1667 else if (cc > CC_NE) goto nocombine; /* Other conds don't work with tst. */
1668 cmpprev0 = (irl+1 == ir);
1669 /* Combine and-cmp-bcc into tbz/tbnz or and-cmp into tst. */
1670 if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) {
1671 IRRef blref = irl->op1, brref = irl->op2;
1672 uint32_t m2 = 0;
1673 Reg bleft;
1674 if (asm_swapops(as, blref, brref)) {
1675 Reg tmp = blref; blref = brref; brref = tmp;
1676 }
1677 if (irref_isk(brref)) {
1678 uint64_t k = get_k64val(as, brref);
1679 if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) {
1680 asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ,
1681 ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k));
1682 return;
1683 }
1684 m2 = emit_isk13(k, irt_is64(irl->t));
1685 }
1686 bleft = ra_alloc1(as, blref, RSET_GPR);
1687 ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw);
1688 if (!m2)
1689 m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft));
1690 asm_guardcc(as, cc);
1691 emit_n(as, ai^m2, bleft);
1692 return;
1693 }
1694 if (cc == CC_EQ || cc == CC_NE) {
1695 /* Combine cmp-bcc into cbz/cbnz. */
1696 ai = cc == CC_EQ ? A64I_CBZ : A64I_CBNZ;
1697 if (irt_is64(ir->t)) ai |= A64I_X;
1698 asm_guardcnb(as, ai, ra_alloc1(as, lref, RSET_GPR));
1699 return;
1700 }
1701 }
1702nocombine:
1703 left = ra_alloc1(as, lref, RSET_GPR);
1704 m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left));
1705 asm_guardcc(as, cc);
1706 emit_n(as, ai^m, left);
1707 /* Signed comparison with zero and referencing previous ins? */
1708 if (cmpprev0 && (oldcc <= CC_NE || oldcc >= CC_GE))
1709 as->flagmcp = as->mcp; /* Allow elimination of the compare. */
1710}
1711
1712static void asm_comp(ASMState *as, IRIns *ir)
1713{
1714 if (irt_isnum(ir->t))
1715 asm_fpcomp(as, ir);
1716 else
1717 asm_intcomp(as, ir);
1718}
1719
1720#define asm_equal(as, ir) asm_comp(as, ir)
1721
1722/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
1723
1724/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
1725static void asm_hiop(ASMState *as, IRIns *ir)
1726{
1727 UNUSED(as); UNUSED(ir);
1728 lj_assertA(0, "unexpected HIOP"); /* Unused on 64 bit. */
1729}
1730
1731/* -- Profiling ----------------------------------------------------------- */
1732
1733static void asm_prof(ASMState *as, IRIns *ir)
1734{
1735 uint32_t k = emit_isk13(HOOK_PROFILE, 0);
1736 lj_assertA(k != 0, "HOOK_PROFILE does not fit in K13");
1737 UNUSED(ir);
1738 asm_guardcc(as, CC_NE);
1739 emit_n(as, A64I_TSTw^k, RID_TMP);
1740 emit_lsptr(as, A64I_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
1741}
1742
1743/* -- Stack handling ------------------------------------------------------ */
1744
1745/* Check Lua stack size for overflow. Use exit handler as fallback. */
1746static void asm_stack_check(ASMState *as, BCReg topslot,
1747 IRIns *irp, RegSet allow, ExitNo exitno)
1748{
1749 Reg pbase;
1750 uint32_t k;
1751 if (irp) {
1752 if (!ra_hasspill(irp->s)) {
1753 pbase = irp->r;
1754 lj_assertA(ra_hasreg(pbase), "base reg lost");
1755 } else if (allow) {
1756 pbase = rset_pickbot(allow);
1757 } else {
1758 pbase = RID_RET;
1759 emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */
1760 }
1761 } else {
1762 pbase = RID_BASE;
1763 }
1764 emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno));
1765 k = emit_isk12((8*topslot));
1766 lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
1767 emit_n(as, A64I_CMPx^k, RID_TMP);
1768 emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase);
1769 emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP,
1770 (int32_t)offsetof(lua_State, maxstack));
1771 if (irp) { /* Must not spill arbitrary registers in head of side trace. */
1772 if (ra_hasspill(irp->s))
1773 emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s));
1774 emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L));
1775 if (ra_hasspill(irp->s) && !allow)
1776 emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */
1777 } else {
1778 emit_getgl(as, RID_TMP, cur_L);
1779 }
1780}
1781
1782/* Restore Lua stack from on-trace state. */
1783static void asm_stack_restore(ASMState *as, SnapShot *snap)
1784{
1785 SnapEntry *map = &as->T->snapmap[snap->mapofs];
1786#ifdef LUA_USE_ASSERT
1787 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
1788#endif
1789 MSize n, nent = snap->nent;
1790 /* Store the value of all modified slots to the Lua stack. */
1791 for (n = 0; n < nent; n++) {
1792 SnapEntry sn = map[n];
1793 BCReg s = snap_slot(sn);
1794 int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
1795 IRRef ref = snap_ref(sn);
1796 IRIns *ir = IR(ref);
1797 if ((sn & SNAP_NORESTORE))
1798 continue;
1799 if (irt_isnum(ir->t)) {
1800 Reg src = ra_alloc1(as, ref, RSET_FPR);
1801 emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs);
1802 } else {
1803 asm_tvstore64(as, RID_BASE, ofs, ref);
1804 }
1805 checkmclim(as);
1806 }
1807 lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
1808}
1809
1810/* -- GC handling --------------------------------------------------------- */
1811
1812/* Marker to prevent patching the GC check exit. */
1813#define ARM64_NOPATCH_GC_CHECK \
1814 (A64I_ORRx|A64F_D(RID_TMP)|A64F_M(RID_TMP)|A64F_N(RID_TMP))
1815
1816/* Check GC threshold and do one or more GC steps. */
1817static void asm_gc_check(ASMState *as)
1818{
1819 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
1820 IRRef args[2];
1821 MCLabel l_end;
1822 Reg tmp2;
1823 ra_evictset(as, RSET_SCRATCH);
1824 l_end = emit_label(as);
1825 /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
1826 asm_guardcnb(as, A64I_CBNZ, RID_RET); /* Assumes asm_snap_prep() is done. */
1827 *--as->mcp = ARM64_NOPATCH_GC_CHECK;
1828 args[0] = ASMREF_TMP1; /* global_State *g */
1829 args[1] = ASMREF_TMP2; /* MSize steps */
1830 asm_gencall(as, ci, args);
1831 emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
1832 tmp2 = ra_releasetmp(as, ASMREF_TMP2);
1833 emit_loadi(as, tmp2, as->gcsteps);
1834 /* Jump around GC step if GC total < GC threshold. */
1835 emit_cond_branch(as, CC_LS, l_end);
1836 emit_nm(as, A64I_CMPx, RID_TMP, tmp2);
1837 emit_getgl(as, tmp2, gc.threshold);
1838 emit_getgl(as, RID_TMP, gc.total);
1839 as->gcsteps = 0;
1840 checkmclim(as);
1841}
1842
1843/* -- Loop handling ------------------------------------------------------- */
1844
1845/* Fixup the loop branch. */
1846static void asm_loop_fixup(ASMState *as)
1847{
1848 MCode *p = as->mctop;
1849 MCode *target = as->mcp;
1850 if (as->loopinv) { /* Inverted loop branch? */
1851 uint32_t mask = (p[-2] & 0x7e000000) == 0x36000000 ? 0x3fffu : 0x7ffffu;
1852 ptrdiff_t delta = target - (p - 2);
1853 /* asm_guard* already inverted the bcc/tnb/cnb and patched the final b. */
1854 p[-2] |= ((uint32_t)delta & mask) << 5;
1855 } else {
1856 ptrdiff_t delta = target - (p - 1);
1857 p[-1] = A64I_B | A64F_S26(delta);
1858 }
1859}
1860
1861/* -- Head of trace ------------------------------------------------------- */
1862
1863/* Reload L register from g->cur_L. */
1864static void asm_head_lreg(ASMState *as)
1865{
1866 IRIns *ir = IR(ASMREF_L);
1867 if (ra_used(ir)) {
1868 Reg r = ra_dest(as, ir, RSET_GPR);
1869 emit_getgl(as, r, cur_L);
1870 ra_evictk(as);
1871 }
1872}
1873
1874/* Coalesce BASE register for a root trace. */
1875static void asm_head_root_base(ASMState *as)
1876{
1877 IRIns *ir;
1878 asm_head_lreg(as);
1879 ir = IR(REF_BASE);
1880 if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
1881 ra_spill(as, ir);
1882 ra_destreg(as, ir, RID_BASE);
1883}
1884
1885/* Coalesce BASE register for a side trace. */
1886static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
1887{
1888 IRIns *ir;
1889 asm_head_lreg(as);
1890 ir = IR(REF_BASE);
1891 if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
1892 ra_spill(as, ir);
1893 if (ra_hasspill(irp->s)) {
1894 rset_clear(allow, ra_dest(as, ir, allow));
1895 } else {
1896 Reg r = irp->r;
1897 lj_assertA(ra_hasreg(r), "base reg lost");
1898 rset_clear(allow, r);
1899 if (r != ir->r && !rset_test(as->freeset, r))
1900 ra_restore(as, regcost_ref(as->cost[r]));
1901 ra_destreg(as, ir, r);
1902 }
1903 return allow;
1904}
1905
1906/* -- Tail of trace ------------------------------------------------------- */
1907
1908/* Fixup the tail code. */
1909static void asm_tail_fixup(ASMState *as, TraceNo lnk)
1910{
1911 MCode *p = as->mctop;
1912 MCode *target;
1913 /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
1914 int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED));
1915 if (spadj == 0) {
1916 *--p = A64I_LE(A64I_NOP);
1917 as->mctop = p;
1918 } else {
1919 /* Patch stack adjustment. */
1920 uint32_t k = emit_isk12(spadj);
1921 lj_assertA(k, "stack adjustment %d does not fit in K12", spadj);
1922 p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP);
1923 }
1924 /* Patch exit branch. */
1925 target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
1926 p[-1] = A64I_B | A64F_S26((target-p)+1);
1927}
1928
1929/* Prepare tail of code. */
1930static void asm_tail_prep(ASMState *as)
1931{
1932 MCode *p = as->mctop - 1; /* Leave room for exit branch. */
1933 if (as->loopref) {
1934 as->invmcp = as->mcp = p;
1935 } else {
1936 as->mcp = p-1; /* Leave room for stack pointer adjustment. */
1937 as->invmcp = NULL;
1938 }
1939 *p = 0; /* Prevent load/store merging. */
1940}
1941
1942/* -- Trace setup --------------------------------------------------------- */
1943
1944/* Ensure there are enough stack slots for call arguments. */
1945static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
1946{
1947 IRRef args[CCI_NARGS_MAX*2];
1948 uint32_t i, nargs = CCI_XNARGS(ci);
1949 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
1950 asm_collectargs(as, ir, ci, args);
1951 for (i = 0; i < nargs; i++) {
1952 if (args[i] && irt_isfp(IR(args[i])->t)) {
1953 if (nfpr > 0) nfpr--; else nslots += 2;
1954 } else {
1955 if (ngpr > 0) ngpr--; else nslots += 2;
1956 }
1957 }
1958 if (nslots > as->evenspill) /* Leave room for args in stack slots. */
1959 as->evenspill = nslots;
1960 return REGSP_HINT(RID_RET);
1961}
1962
1963static void asm_setup_target(ASMState *as)
1964{
1965 /* May need extra exit for asm_stack_check on side traces. */
1966 asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
1967}
1968
1969#if LJ_BE
1970/* ARM64 instructions are always little-endian. Swap for ARM64BE. */
1971static void asm_mcode_fixup(MCode *mcode, MSize size)
1972{
1973 MCode *pe = (MCode *)((char *)mcode + size);
1974 while (mcode < pe) {
1975 MCode ins = *mcode;
1976 *mcode++ = lj_bswap(ins);
1977 }
1978}
1979#define LJ_TARGET_MCODE_FIXUP 1
1980#endif
1981
1982/* -- Trace patching ------------------------------------------------------ */
1983
1984/* Patch exit jumps of existing machine code to a new target. */
1985void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
1986{
1987 MCode *p = T->mcode;
1988 MCode *pe = (MCode *)((char *)p + T->szmcode);
1989 MCode *cstart = NULL;
1990 MCode *mcarea = lj_mcode_patch(J, p, 0);
1991 MCode *px = exitstub_trace_addr(T, exitno);
1992 int patchlong = 1;
1993 /* Note: this assumes a trace exit is only ever patched once. */
1994 for (; p < pe; p++) {
1995 /* Look for exitstub branch, replace with branch to target. */
1996 ptrdiff_t delta = target - p;
1997 MCode ins = A64I_LE(*p);
1998 if ((ins & 0xff000000u) == 0x54000000u &&
1999 ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
2000 /* Patch bcc, if within range. */
2001 if (A64F_S_OK(delta, 19)) {
2002 *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta));
2003 if (!cstart) cstart = p;
2004 }
2005 } else if ((ins & 0xfc000000u) == 0x14000000u &&
2006 ((ins ^ (px-p)) & 0x03ffffffu) == 0) {
2007 /* Patch b. */
2008 lj_assertJ(A64F_S_OK(delta, 26), "branch target out of range");
2009 *p = A64I_LE((ins & 0xfc000000u) | A64F_S26(delta));
2010 if (!cstart) cstart = p;
2011 } else if ((ins & 0x7e000000u) == 0x34000000u &&
2012 ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
2013 /* Patch cbz/cbnz, if within range. */
2014 if (p[-1] == ARM64_NOPATCH_GC_CHECK) {
2015 patchlong = 0;
2016 } else if (A64F_S_OK(delta, 19)) {
2017 *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta));
2018 if (!cstart) cstart = p;
2019 }
2020 } else if ((ins & 0x7e000000u) == 0x36000000u &&
2021 ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) {
2022 /* Patch tbz/tbnz, if within range. */
2023 if (A64F_S_OK(delta, 14)) {
2024 *p = A64I_LE((ins & 0xfff8001fu) | A64F_S14(delta));
2025 if (!cstart) cstart = p;
2026 }
2027 }
2028 }
2029 /* Always patch long-range branch in exit stub itself. Except, if we can't. */
2030 if (patchlong) {
2031 ptrdiff_t delta = target - px;
2032 lj_assertJ(A64F_S_OK(delta, 26), "branch target out of range");
2033 *px = A64I_B | A64F_S26(delta);
2034 if (!cstart) cstart = px;
2035 }
2036 if (cstart) lj_mcode_sync(cstart, px+1);
2037 lj_mcode_patch(J, mcarea, 1);
2038}
2039
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index b55596e6..22aa88bf 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -23,7 +23,7 @@ static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow)
23{ 23{
24 Reg r = IR(ref)->r; 24 Reg r = IR(ref)->r;
25 if (ra_noreg(r)) { 25 if (ra_noreg(r)) {
26 if (!(allow & RSET_FPR) && irref_isk(ref) && IR(ref)->i == 0) 26 if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(as, ref) == 0)
27 return RID_ZERO; 27 return RID_ZERO;
28 r = ra_allocref(as, ref, allow); 28 r = ra_allocref(as, ref, allow);
29 } else { 29 } else {
@@ -64,17 +64,29 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
64/* Setup spare long-range jump slots per mcarea. */ 64/* Setup spare long-range jump slots per mcarea. */
65static void asm_sparejump_setup(ASMState *as) 65static void asm_sparejump_setup(ASMState *as)
66{ 66{
67 MCode *mxp = as->mcbot; 67 MCode *mxp = as->mctop;
68 if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == sizeof(MCLink)) { 68 if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) {
69 lua_assert(MIPSI_NOP == 0); 69 mxp -= MIPS_SPAREJUMP*2;
70 lj_assertA(MIPSI_NOP == 0, "bad NOP");
70 memset(mxp, 0, MIPS_SPAREJUMP*2*sizeof(MCode)); 71 memset(mxp, 0, MIPS_SPAREJUMP*2*sizeof(MCode));
71 mxp += MIPS_SPAREJUMP*2; 72 as->mctop = mxp;
72 lua_assert(mxp < as->mctop); 73 }
73 lj_mcode_sync(as->mcbot, mxp); 74}
74 lj_mcode_commitbot(as->J, mxp); 75
75 as->mcbot = mxp; 76static MCode *asm_sparejump_use(MCode *mcarea, MCode tjump)
76 as->mclim = as->mcbot + MCLIM_REDZONE; 77{
78 MCode *mxp = (MCode *)((char *)mcarea + ((MCLink *)mcarea)->size);
79 int slot = MIPS_SPAREJUMP;
80 while (slot--) {
81 mxp -= 2;
82 if (*mxp == tjump) {
83 return mxp;
84 } else if (*mxp == MIPSI_NOP) {
85 *mxp = tjump;
86 return mxp;
87 }
77 } 88 }
89 return NULL;
78} 90}
79 91
80/* Setup exit stub after the end of each trace. */ 92/* Setup exit stub after the end of each trace. */
@@ -84,7 +96,8 @@ static void asm_exitstub_setup(ASMState *as)
84 /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ 96 /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */
85 *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno; 97 *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno;
86 *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu); 98 *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu);
87 lua_assert(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0); 99 lj_assertA(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0,
100 "branch target out of range");
88 *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0; 101 *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0;
89 as->mctop = mxp; 102 as->mctop = mxp;
90} 103}
@@ -101,7 +114,12 @@ static void asm_guard(ASMState *as, MIPSIns mi, Reg rs, Reg rt)
101 as->invmcp = NULL; 114 as->invmcp = NULL;
102 as->loopinv = 1; 115 as->loopinv = 1;
103 as->mcp = p+1; 116 as->mcp = p+1;
117#if !LJ_TARGET_MIPSR6
104 mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */ 118 mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */
119#else
120 mi = mi ^ ((mi>>28) == 1 ? 0x04000000u :
121 (mi>>28) == 4 ? 0x00800000u : 0x00010000u); /* Invert cond. */
122#endif
105 target = p; /* Patch target later in asm_loop_fixup. */ 123 target = p; /* Patch target later in asm_loop_fixup. */
106 } 124 }
107 emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); 125 emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
@@ -165,9 +183,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
165 } else if (ir->o == IR_UREFC) { 183 } else if (ir->o == IR_UREFC) {
166 if (irref_isk(ir->op1)) { 184 if (irref_isk(ir->op1)) {
167 GCfunc *fn = ir_kfunc(IR(ir->op1)); 185 GCfunc *fn = ir_kfunc(IR(ir->op1));
168 int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); 186 intptr_t ofs = (intptr_t)&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv;
169 int32_t jgl = (intptr_t)J2G(as->J); 187 intptr_t jgl = (intptr_t)J2G(as->J);
170 if ((uint32_t)(ofs-jgl) < 65536) { 188 if ((uintptr_t)(ofs-jgl) < 65536) {
171 *ofsp = ofs-jgl-32768; 189 *ofsp = ofs-jgl-32768;
172 return RID_JGL; 190 return RID_JGL;
173 } else { 191 } else {
@@ -175,6 +193,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
175 return ra_allock(as, ofs-(int16_t)ofs, allow); 193 return ra_allock(as, ofs-(int16_t)ofs, allow);
176 } 194 }
177 } 195 }
196 } else if (ir->o == IR_TMPREF) {
197 *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768);
198 return RID_JGL;
178 } 199 }
179 } 200 }
180 *ofsp = 0; 201 *ofsp = 0;
@@ -189,20 +210,21 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
189 Reg base; 210 Reg base;
190 if (ra_noreg(ir->r) && canfuse(as, ir)) { 211 if (ra_noreg(ir->r) && canfuse(as, ir)) {
191 if (ir->o == IR_ADD) { 212 if (ir->o == IR_ADD) {
192 int32_t ofs2; 213 intptr_t ofs2;
193 if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) { 214 if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2),
215 checki16(ofs2))) {
194 ref = ir->op1; 216 ref = ir->op1;
195 ofs = ofs2; 217 ofs = (int32_t)ofs2;
196 } 218 }
197 } else if (ir->o == IR_STRREF) { 219 } else if (ir->o == IR_STRREF) {
198 int32_t ofs2 = 65536; 220 intptr_t ofs2 = 65536;
199 lua_assert(ofs == 0); 221 lj_assertA(ofs == 0, "bad usage");
200 ofs = (int32_t)sizeof(GCstr); 222 ofs = (int32_t)sizeof(GCstr);
201 if (irref_isk(ir->op2)) { 223 if (irref_isk(ir->op2)) {
202 ofs2 = ofs + IR(ir->op2)->i; 224 ofs2 = ofs + get_kval(as, ir->op2);
203 ref = ir->op1; 225 ref = ir->op1;
204 } else if (irref_isk(ir->op1)) { 226 } else if (irref_isk(ir->op1)) {
205 ofs2 = ofs + IR(ir->op1)->i; 227 ofs2 = ofs + get_kval(as, ir->op1);
206 ref = ir->op2; 228 ref = ir->op2;
207 } 229 }
208 if (!checki16(ofs2)) { 230 if (!checki16(ofs2)) {
@@ -210,7 +232,7 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
210 Reg right, left = ra_alloc2(as, ir, allow); 232 Reg right, left = ra_alloc2(as, ir, allow);
211 right = (left >> 8); left &= 255; 233 right = (left >> 8); left &= 255;
212 emit_hsi(as, mi, rt, RID_TMP, ofs); 234 emit_hsi(as, mi, rt, RID_TMP, ofs);
213 emit_dst(as, MIPSI_ADDU, RID_TMP, left, right); 235 emit_dst(as, MIPSI_AADDU, RID_TMP, left, right);
214 return; 236 return;
215 } 237 }
216 ofs = ofs2; 238 ofs = ofs2;
@@ -225,29 +247,43 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
225/* Generate a call to a C function. */ 247/* Generate a call to a C function. */
226static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 248static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
227{ 249{
228 uint32_t n, nargs = CCI_NARGS(ci); 250 uint32_t n, nargs = CCI_XNARGS(ci);
229 int32_t ofs = 16; 251 int32_t ofs = LJ_32 ? 16 : 0;
252#if LJ_SOFTFP
253 Reg gpr = REGARG_FIRSTGPR;
254#else
230 Reg gpr, fpr = REGARG_FIRSTFPR; 255 Reg gpr, fpr = REGARG_FIRSTFPR;
256#endif
231 if ((void *)ci->func) 257 if ((void *)ci->func)
232 emit_call(as, (void *)ci->func); 258 emit_call(as, (void *)ci->func, 1);
259#if !LJ_SOFTFP
233 for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) 260 for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
234 as->cost[gpr] = REGCOST(~0u, ASMREF_L); 261 as->cost[gpr] = REGCOST(~0u, ASMREF_L);
235 gpr = REGARG_FIRSTGPR; 262 gpr = REGARG_FIRSTGPR;
263#endif
236 for (n = 0; n < nargs; n++) { /* Setup args. */ 264 for (n = 0; n < nargs; n++) { /* Setup args. */
237 IRRef ref = args[n]; 265 IRRef ref = args[n];
238 if (ref) { 266 if (ref) {
239 IRIns *ir = IR(ref); 267 IRIns *ir = IR(ref);
268#if !LJ_SOFTFP
240 if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR && 269 if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR &&
241 !(ci->flags & CCI_VARARG)) { 270 !(ci->flags & CCI_VARARG)) {
242 lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ 271 lj_assertA(rset_test(as->freeset, fpr),
272 "reg %d not free", fpr); /* Already evicted. */
243 ra_leftov(as, fpr, ref); 273 ra_leftov(as, fpr, ref);
244 fpr += 2; 274 fpr += LJ_32 ? 2 : 1;
245 gpr += irt_isnum(ir->t) ? 2 : 1; 275 gpr += (LJ_32 && irt_isnum(ir->t)) ? 2 : 1;
246 } else { 276 } else
277#endif
278 {
279#if LJ_32 && !LJ_SOFTFP
247 fpr = REGARG_LASTFPR+1; 280 fpr = REGARG_LASTFPR+1;
248 if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1; 281#endif
282 if (LJ_32 && irt_isnum(ir->t)) gpr = (gpr+1) & ~1;
249 if (gpr <= REGARG_LASTGPR) { 283 if (gpr <= REGARG_LASTGPR) {
250 lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ 284 lj_assertA(rset_test(as->freeset, gpr),
285 "reg %d not free", gpr); /* Already evicted. */
286#if !LJ_SOFTFP
251 if (irt_isfp(ir->t)) { 287 if (irt_isfp(ir->t)) {
252 RegSet of = as->freeset; 288 RegSet of = as->freeset;
253 Reg r; 289 Reg r;
@@ -256,31 +292,56 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
256 r = ra_alloc1(as, ref, RSET_FPR); 292 r = ra_alloc1(as, ref, RSET_FPR);
257 as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); 293 as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1));
258 if (irt_isnum(ir->t)) { 294 if (irt_isnum(ir->t)) {
295#if LJ_32
259 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1); 296 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1);
260 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r); 297 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r);
261 lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */ 298 lj_assertA(rset_test(as->freeset, gpr+1),
299 "reg %d not free", gpr+1); /* Already evicted. */
262 gpr += 2; 300 gpr += 2;
301#else
302 emit_tg(as, MIPSI_DMFC1, gpr, r);
303 gpr++; fpr++;
304#endif
263 } else if (irt_isfloat(ir->t)) { 305 } else if (irt_isfloat(ir->t)) {
264 emit_tg(as, MIPSI_MFC1, gpr, r); 306 emit_tg(as, MIPSI_MFC1, gpr, r);
265 gpr++; 307 gpr++;
308#if LJ_64
309 fpr++;
310#endif
266 } 311 }
267 } else { 312 } else
313#endif
314 {
268 ra_leftov(as, gpr, ref); 315 ra_leftov(as, gpr, ref);
269 gpr++; 316 gpr++;
317#if LJ_64 && !LJ_SOFTFP
318 fpr++;
319#endif
270 } 320 }
271 } else { 321 } else {
272 Reg r = ra_alloc1z(as, ref, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 322 Reg r = ra_alloc1z(as, ref, !LJ_SOFTFP && irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
323#if LJ_32
273 if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; 324 if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4;
274 emit_spstore(as, ir, r, ofs); 325 emit_spstore(as, ir, r, ofs);
275 ofs += irt_isnum(ir->t) ? 8 : 4; 326 ofs += irt_isnum(ir->t) ? 8 : 4;
327#else
328 emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isfp(ir->t) && !irt_is64(ir->t)) ? 4 : 0));
329 ofs += 8;
330#endif
276 } 331 }
277 } 332 }
278 } else { 333 } else {
334#if !LJ_SOFTFP
279 fpr = REGARG_LASTFPR+1; 335 fpr = REGARG_LASTFPR+1;
280 if (gpr <= REGARG_LASTGPR) 336#endif
337 if (gpr <= REGARG_LASTGPR) {
281 gpr++; 338 gpr++;
282 else 339#if LJ_64 && !LJ_SOFTFP
283 ofs += 4; 340 fpr++;
341#endif
342 } else {
343 ofs += LJ_32 ? 4 : 8;
344 }
284 } 345 }
285 checkmclim(as); 346 checkmclim(as);
286 } 347 }
@@ -290,50 +351,57 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
290static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) 351static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
291{ 352{
292 RegSet drop = RSET_SCRATCH; 353 RegSet drop = RSET_SCRATCH;
354#if LJ_32
293 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); 355 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
356#endif
357#if !LJ_SOFTFP
294 if ((ci->flags & CCI_NOFPRCLOBBER)) 358 if ((ci->flags & CCI_NOFPRCLOBBER))
295 drop &= ~RSET_FPR; 359 drop &= ~RSET_FPR;
360#endif
296 if (ra_hasreg(ir->r)) 361 if (ra_hasreg(ir->r))
297 rset_clear(drop, ir->r); /* Dest reg handled below. */ 362 rset_clear(drop, ir->r); /* Dest reg handled below. */
363#if LJ_32
298 if (hiop && ra_hasreg((ir+1)->r)) 364 if (hiop && ra_hasreg((ir+1)->r))
299 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ 365 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
366#endif
300 ra_evictset(as, drop); /* Evictions must be performed first. */ 367 ra_evictset(as, drop); /* Evictions must be performed first. */
301 if (ra_used(ir)) { 368 if (ra_used(ir)) {
302 lua_assert(!irt_ispri(ir->t)); 369 lj_assertA(!irt_ispri(ir->t), "PRI dest");
303 if (irt_isfp(ir->t)) { 370 if (!LJ_SOFTFP && irt_isfp(ir->t)) {
304 if ((ci->flags & CCI_CASTU64)) { 371 if ((ci->flags & CCI_CASTU64)) {
305 int32_t ofs = sps_scale(ir->s); 372 int32_t ofs = sps_scale(ir->s);
306 Reg dest = ir->r; 373 Reg dest = ir->r;
307 if (ra_hasreg(dest)) { 374 if (ra_hasreg(dest)) {
308 ra_free(as, dest); 375 ra_free(as, dest);
309 ra_modified(as, dest); 376 ra_modified(as, dest);
377#if LJ_32
310 emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1); 378 emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1);
311 emit_tg(as, MIPSI_MTC1, RID_RETLO, dest); 379 emit_tg(as, MIPSI_MTC1, RID_RETLO, dest);
380#else
381 emit_tg(as, MIPSI_DMTC1, RID_RET, dest);
382#endif
312 } 383 }
313 if (ofs) { 384 if (ofs) {
385#if LJ_32
314 emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0)); 386 emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0));
315 emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4)); 387 emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4));
388#else
389 emit_tsi(as, MIPSI_SD, RID_RET, RID_SP, ofs);
390#endif
316 } 391 }
317 } else { 392 } else {
318 ra_destreg(as, ir, RID_FPRET); 393 ra_destreg(as, ir, RID_FPRET);
319 } 394 }
395#if LJ_32
320 } else if (hiop) { 396 } else if (hiop) {
321 ra_destpair(as, ir); 397 ra_destpair(as, ir);
398#endif
322 } else { 399 } else {
323 ra_destreg(as, ir, RID_RET); 400 ra_destreg(as, ir, RID_RET);
324 } 401 }
325 } 402 }
326} 403}
327 404
328static void asm_call(ASMState *as, IRIns *ir)
329{
330 IRRef args[CCI_NARGS_MAX];
331 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
332 asm_collectargs(as, ir, ci, args);
333 asm_setupresult(as, ir, ci);
334 asm_gencall(as, ci, args);
335}
336
337static void asm_callx(ASMState *as, IRIns *ir) 405static void asm_callx(ASMState *as, IRIns *ir)
338{ 406{
339 IRRef args[CCI_NARGS_MAX*2]; 407 IRRef args[CCI_NARGS_MAX*2];
@@ -346,7 +414,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
346 func = ir->op2; irf = IR(func); 414 func = ir->op2; irf = IR(func);
347 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } 415 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
348 if (irref_isk(func)) { /* Call to constant address. */ 416 if (irref_isk(func)) { /* Call to constant address. */
349 ci.func = (ASMFunction)(void *)(irf->i); 417 ci.func = (ASMFunction)(void *)get_kval(as, func);
350 } else { /* Need specific register for indirect calls. */ 418 } else { /* Need specific register for indirect calls. */
351 Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); 419 Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR));
352 MCode *p = as->mcp; 420 MCode *p = as->mcp;
@@ -361,27 +429,23 @@ static void asm_callx(ASMState *as, IRIns *ir)
361 asm_gencall(as, &ci, args); 429 asm_gencall(as, &ci, args);
362} 430}
363 431
364static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) 432#if !LJ_SOFTFP
365{
366 const CCallInfo *ci = &lj_ir_callinfo[id];
367 IRRef args[2];
368 args[0] = ir->op1;
369 args[1] = ir->op2;
370 asm_setupresult(as, ir, ci);
371 asm_gencall(as, ci, args);
372}
373
374static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) 433static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
375{ 434{
376 /* The modified regs must match with the *.dasc implementation. */ 435 /* The modified regs must match with the *.dasc implementation. */
377 RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| 436 RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)|
378 RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR); 437 RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR)
438#if LJ_TARGET_MIPSR6
439 |RID2RSET(RID_F21)
440#endif
441 ;
379 if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); 442 if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
380 ra_evictset(as, drop); 443 ra_evictset(as, drop);
381 ra_destreg(as, ir, RID_FPRET); 444 ra_destreg(as, ir, RID_FPRET);
382 emit_call(as, (void *)lj_ir_callinfo[id].func); 445 emit_call(as, (void *)lj_ir_callinfo[id].func, 0);
383 ra_leftov(as, REGARG_FIRSTFPR, ir->op1); 446 ra_leftov(as, REGARG_FIRSTFPR, ir->op1);
384} 447}
448#endif
385 449
386/* -- Returns ------------------------------------------------------------- */ 450/* -- Returns ------------------------------------------------------------- */
387 451
@@ -390,25 +454,52 @@ static void asm_retf(ASMState *as, IRIns *ir)
390{ 454{
391 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 455 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
392 void *pc = ir_kptr(IR(ir->op2)); 456 void *pc = ir_kptr(IR(ir->op2));
393 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 457 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
394 as->topslot -= (BCReg)delta; 458 as->topslot -= (BCReg)delta;
395 if ((int32_t)as->topslot < 0) as->topslot = 0; 459 if ((int32_t)as->topslot < 0) as->topslot = 0;
396 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 460 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
397 emit_setgl(as, base, jit_base); 461 emit_setgl(as, base, jit_base);
398 emit_addptr(as, base, -8*delta); 462 emit_addptr(as, base, -8*delta);
399 asm_guard(as, MIPSI_BNE, RID_TMP, 463 asm_guard(as, MIPSI_BNE, RID_TMP,
400 ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); 464 ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base)));
401 emit_tsi(as, MIPSI_LW, RID_TMP, base, -8); 465 emit_tsi(as, MIPSI_AL, RID_TMP, base, -8);
402} 466}
403 467
468/* -- Buffer operations --------------------------------------------------- */
469
470#if LJ_HASBUFFER
471static void asm_bufhdr_write(ASMState *as, Reg sb)
472{
473 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
474 IRIns irgc;
475 irgc.ot = IRT(0, IRT_PGC); /* GC type. */
476 emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
477 if ((as->flags & JIT_F_MIPSXXR2)) {
478 emit_tsml(as, LJ_64 ? MIPSI_DINS : MIPSI_INS, RID_TMP, tmp,
479 lj_fls(SBUF_MASK_FLAG), 0);
480 } else {
481 emit_dst(as, MIPSI_OR, RID_TMP, RID_TMP, tmp);
482 emit_tsi(as, MIPSI_ANDI, tmp, tmp, SBUF_MASK_FLAG);
483 }
484 emit_getgl(as, RID_TMP, cur_L);
485 emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
486}
487#endif
488
404/* -- Type conversions ---------------------------------------------------- */ 489/* -- Type conversions ---------------------------------------------------- */
405 490
491#if !LJ_SOFTFP
406static void asm_tointg(ASMState *as, IRIns *ir, Reg left) 492static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
407{ 493{
408 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); 494 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
409 Reg dest = ra_dest(as, ir, RSET_GPR); 495 Reg dest = ra_dest(as, ir, RSET_GPR);
496#if !LJ_TARGET_MIPSR6
410 asm_guard(as, MIPSI_BC1F, 0, 0); 497 asm_guard(as, MIPSI_BC1F, 0, 0);
411 emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left); 498 emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left);
499#else
500 asm_guard(as, MIPSI_BC1EQZ, 0, (tmp&31));
501 emit_fgh(as, MIPSI_CMP_EQ_D, tmp, tmp, left);
502#endif
412 emit_fg(as, MIPSI_CVT_D_W, tmp, tmp); 503 emit_fg(as, MIPSI_CVT_D_W, tmp, tmp);
413 emit_tg(as, MIPSI_MFC1, dest, tmp); 504 emit_tg(as, MIPSI_MFC1, dest, tmp);
414 emit_fg(as, MIPSI_CVT_W_D, tmp, left); 505 emit_fg(as, MIPSI_CVT_W_D, tmp, left);
@@ -424,15 +515,57 @@ static void asm_tobit(ASMState *as, IRIns *ir)
424 emit_tg(as, MIPSI_MFC1, dest, tmp); 515 emit_tg(as, MIPSI_MFC1, dest, tmp);
425 emit_fgh(as, MIPSI_ADD_D, tmp, left, right); 516 emit_fgh(as, MIPSI_ADD_D, tmp, left, right);
426} 517}
518#elif LJ_64 /* && LJ_SOFTFP */
519static void asm_tointg(ASMState *as, IRIns *ir, Reg r)
520{
521 /* The modified regs must match with the *.dasc implementation. */
522 RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)|
523 RID2RSET(RID_R1)|RID2RSET(RID_R12);
524 if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
525 ra_evictset(as, drop);
526 /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */
527 ra_destreg(as, ir, RID_RET);
528 asm_guard(as, MIPSI_BNE, RID_RET+1, RID_ZERO);
529 emit_call(as, (void *)lj_ir_callinfo[IRCALL_lj_vm_tointg].func, 0);
530 if (r == RID_NONE)
531 ra_leftov(as, REGARG_FIRSTGPR, ir->op1);
532 else if (r != REGARG_FIRSTGPR)
533 emit_move(as, REGARG_FIRSTGPR, r);
534}
535
536static void asm_tobit(ASMState *as, IRIns *ir)
537{
538 Reg dest = ra_dest(as, ir, RSET_GPR);
539 emit_dta(as, MIPSI_SLL, dest, dest, 0);
540 asm_callid(as, ir, IRCALL_lj_vm_tobit);
541}
542#endif
427 543
428static void asm_conv(ASMState *as, IRIns *ir) 544static void asm_conv(ASMState *as, IRIns *ir)
429{ 545{
430 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 546 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
547#if !LJ_SOFTFP32
431 int stfp = (st == IRT_NUM || st == IRT_FLOAT); 548 int stfp = (st == IRT_NUM || st == IRT_FLOAT);
549#endif
550#if LJ_64
551 int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
552#endif
432 IRRef lref = ir->op1; 553 IRRef lref = ir->op1;
433 lua_assert(irt_type(ir->t) != st); 554#if LJ_32
434 lua_assert(!(irt_isint64(ir->t) || 555 /* 64 bit integer conversions are handled by SPLIT. */
435 (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ 556 lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)),
557 "IR %04d has unsplit 64 bit type",
558 (int)(ir - as->ir) - REF_BIAS);
559#endif
560#if LJ_SOFTFP32
561 /* FP conversions are handled by SPLIT. */
562 lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT),
563 "IR %04d has FP type",
564 (int)(ir - as->ir) - REF_BIAS);
565 /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
566#else
567 lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
568#if !LJ_SOFTFP
436 if (irt_isfp(ir->t)) { 569 if (irt_isfp(ir->t)) {
437 Reg dest = ra_dest(as, ir, RSET_FPR); 570 Reg dest = ra_dest(as, ir, RSET_FPR);
438 if (stfp) { /* FP to FP conversion. */ 571 if (stfp) { /* FP to FP conversion. */
@@ -448,27 +581,56 @@ static void asm_conv(ASMState *as, IRIns *ir)
448 emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); 581 emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp);
449 emit_fg(as, MIPSI_CVT_D_W, dest, dest); 582 emit_fg(as, MIPSI_CVT_D_W, dest, dest);
450 emit_lsptr(as, MIPSI_LDC1, (tmp & 31), 583 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
451 (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), 584 (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
452 RSET_GPR);
453 emit_tg(as, MIPSI_MTC1, RID_TMP, dest); 585 emit_tg(as, MIPSI_MTC1, RID_TMP, dest);
454 emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left); 586 emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left);
455 emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); 587 emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
588#if LJ_64
589 } else if(st == IRT_U64) { /* U64 to FP conversion. */
590 /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */
591 Reg left = ra_alloc1(as, lref, RSET_GPR);
592 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest));
593 MCLabel l_end = emit_label(as);
594 if (irt_isfloat(ir->t)) {
595 emit_fgh(as, MIPSI_ADD_S, dest, dest, tmp);
596 emit_lsptr(as, MIPSI_LWC1, (tmp & 31), (void *)&as->J->k32[LJ_K32_2P63],
597 rset_exclude(RSET_GPR, left));
598 emit_fg(as, MIPSI_CVT_S_L, dest, dest);
599 } else {
600 emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp);
601 emit_lsptr(as, MIPSI_LDC1, (tmp & 31), (void *)&as->J->k64[LJ_K64_2P63],
602 rset_exclude(RSET_GPR, left));
603 emit_fg(as, MIPSI_CVT_D_L, dest, dest);
604 }
605 emit_branch(as, MIPSI_BGEZ, left, RID_ZERO, l_end);
606 emit_tg(as, MIPSI_DMTC1, RID_TMP, dest);
607 emit_tsml(as, MIPSI_DEXTM, RID_TMP, left, 30, 0);
608#endif
456 } else { /* Integer to FP conversion. */ 609 } else { /* Integer to FP conversion. */
457 Reg left = ra_alloc1(as, lref, RSET_GPR); 610 Reg left = ra_alloc1(as, lref, RSET_GPR);
611#if LJ_32
458 emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W, 612 emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W,
459 dest, dest); 613 dest, dest);
460 emit_tg(as, MIPSI_MTC1, left, dest); 614 emit_tg(as, MIPSI_MTC1, left, dest);
615#else
616 MIPSIns mi = irt_isfloat(ir->t) ?
617 (st64 ? MIPSI_CVT_S_L : MIPSI_CVT_S_W) :
618 (st64 ? MIPSI_CVT_D_L : MIPSI_CVT_D_W);
619 emit_fg(as, mi, dest, dest);
620 emit_tg(as, st64 ? MIPSI_DMTC1 : MIPSI_MTC1, left, dest);
621#endif
461 } 622 }
462 } else if (stfp) { /* FP to integer conversion. */ 623 } else if (stfp) { /* FP to integer conversion. */
463 if (irt_isguard(ir->t)) { 624 if (irt_isguard(ir->t)) {
464 /* Checked conversions are only supported from number to int. */ 625 /* Checked conversions are only supported from number to int. */
465 lua_assert(irt_isint(ir->t) && st == IRT_NUM); 626 lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
627 "bad type for checked CONV");
466 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 628 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
467 } else { 629 } else {
468 Reg dest = ra_dest(as, ir, RSET_GPR); 630 Reg dest = ra_dest(as, ir, RSET_GPR);
469 Reg left = ra_alloc1(as, lref, RSET_FPR); 631 Reg left = ra_alloc1(as, lref, RSET_FPR);
470 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); 632 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
471 if (irt_isu32(ir->t)) { 633 if (irt_isu32(ir->t)) { /* FP to U32 conversion. */
472 /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */ 634 /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */
473 emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP); 635 emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP);
474 emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); 636 emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
@@ -479,25 +641,112 @@ static void asm_conv(ASMState *as, IRIns *ir)
479 tmp, left, tmp); 641 tmp, left, tmp);
480 if (st == IRT_FLOAT) 642 if (st == IRT_FLOAT)
481 emit_lsptr(as, MIPSI_LWC1, (tmp & 31), 643 emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
482 (void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)), 644 (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
483 RSET_GPR);
484 else 645 else
485 emit_lsptr(as, MIPSI_LDC1, (tmp & 31), 646 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
486 (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), 647 (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
487 RSET_GPR); 648#if LJ_64
649 } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */
650 MCLabel l_end;
651 emit_tg(as, MIPSI_DMFC1, dest, tmp);
652 l_end = emit_label(as);
653 /* For inputs >= 2^63 add -2^64 and convert again. */
654 if (st == IRT_NUM) {
655 emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp);
656 emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp);
657 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
658 (void *)&as->J->k64[LJ_K64_M2P64],
659 rset_exclude(RSET_GPR, dest));
660 emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */
661#if !LJ_TARGET_MIPSR6
662 emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
663 emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp);
664#else
665 emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end);
666 emit_fgh(as, MIPSI_CMP_LT_D, left, left, tmp);
667#endif
668 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
669 (void *)&as->J->k64[LJ_K64_2P63],
670 rset_exclude(RSET_GPR, dest));
671 } else {
672 emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp);
673 emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp);
674 emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
675 (void *)&as->J->k32[LJ_K32_M2P64],
676 rset_exclude(RSET_GPR, dest));
677 emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */
678#if !LJ_TARGET_MIPSR6
679 emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
680 emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp);
681#else
682 emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end);
683 emit_fgh(as, MIPSI_CMP_LT_S, left, left, tmp);
684#endif
685 emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
686 (void *)&as->J->k32[LJ_K32_2P63],
687 rset_exclude(RSET_GPR, dest));
688 }
689#endif
488 } else { 690 } else {
691#if LJ_32
489 emit_tg(as, MIPSI_MFC1, dest, tmp); 692 emit_tg(as, MIPSI_MFC1, dest, tmp);
490 emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, 693 emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D,
491 tmp, left); 694 tmp, left);
695#else
696 MIPSIns mi = irt_is64(ir->t) ?
697 (st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) :
698 (st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S);
699 emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, left);
700 emit_fg(as, mi, left, left);
701#endif
492 } 702 }
493 } 703 }
494 } else { 704 } else
705#else
706 if (irt_isfp(ir->t)) {
707#if LJ_64 && LJ_HASFFI
708 if (stfp) { /* FP to FP conversion. */
709 asm_callid(as, ir, irt_isnum(ir->t) ? IRCALL_softfp_f2d :
710 IRCALL_softfp_d2f);
711 } else { /* Integer to FP conversion. */
712 IRCallID cid = ((IRT_IS64 >> st) & 1) ?
713 (irt_isnum(ir->t) ?
714 (st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d) :
715 (st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f)) :
716 (irt_isnum(ir->t) ?
717 (st == IRT_INT ? IRCALL_softfp_i2d : IRCALL_softfp_ui2d) :
718 (st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f));
719 asm_callid(as, ir, cid);
720 }
721#else
722 asm_callid(as, ir, IRCALL_softfp_i2d);
723#endif
724 } else if (stfp) { /* FP to integer conversion. */
725 if (irt_isguard(ir->t)) {
726 /* Checked conversions are only supported from number to int. */
727 lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
728 "bad type for checked CONV");
729 asm_tointg(as, ir, RID_NONE);
730 } else {
731 IRCallID cid = irt_is64(ir->t) ?
732 ((st == IRT_NUM) ?
733 (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) :
734 (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) :
735 ((st == IRT_NUM) ?
736 (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
737 (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui));
738 asm_callid(as, ir, cid);
739 }
740 } else
741#endif
742#endif
743 {
495 Reg dest = ra_dest(as, ir, RSET_GPR); 744 Reg dest = ra_dest(as, ir, RSET_GPR);
496 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ 745 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
497 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 746 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
498 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); 747 lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
499 if ((ir->op2 & IRCONV_SEXT)) { 748 if ((ir->op2 & IRCONV_SEXT)) {
500 if ((as->flags & JIT_F_MIPS32R2)) { 749 if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) {
501 emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left); 750 emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left);
502 } else { 751 } else {
503 uint32_t shift = st == IRT_I8 ? 24 : 16; 752 uint32_t shift = st == IRT_I8 ? 24 : 16;
@@ -509,94 +758,171 @@ static void asm_conv(ASMState *as, IRIns *ir)
509 (int32_t)(st == IRT_U8 ? 0xff : 0xffff)); 758 (int32_t)(st == IRT_U8 ? 0xff : 0xffff));
510 } 759 }
511 } else { /* 32/64 bit integer conversions. */ 760 } else { /* 32/64 bit integer conversions. */
761#if LJ_32
512 /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */ 762 /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */
513 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ 763 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
764#else
765 if (irt_is64(ir->t)) {
766 if (st64) {
767 /* 64/64 bit no-op (cast)*/
768 ra_leftov(as, dest, lref);
769 } else {
770 Reg left = ra_alloc1(as, lref, RSET_GPR);
771 if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */
772 emit_dta(as, MIPSI_SLL, dest, left, 0);
773 } else { /* 32 to 64 bit zero extension. */
774 emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0);
775 }
776 }
777 } else {
778 if (st64 && !(ir->op2 & IRCONV_NONE)) {
779 /* This is either a 32 bit reg/reg mov which zeroes the hiword
780 ** or a load of the loword from a 64 bit address.
781 */
782 Reg left = ra_alloc1(as, lref, RSET_GPR);
783 emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0);
784 } else { /* 32/32 bit no-op (cast). */
785 /* Do nothing, but may need to move regs. */
786 ra_leftov(as, dest, lref);
787 }
788 }
789#endif
514 } 790 }
515 } 791 }
516} 792}
517 793
518#if LJ_HASFFI
519static void asm_conv64(ASMState *as, IRIns *ir)
520{
521 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
522 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
523 IRCallID id;
524 const CCallInfo *ci;
525 IRRef args[2];
526 args[LJ_BE?0:1] = ir->op1;
527 args[LJ_BE?1:0] = (ir-1)->op1;
528 if (st == IRT_NUM || st == IRT_FLOAT) {
529 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
530 ir--;
531 } else {
532 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
533 }
534 ci = &lj_ir_callinfo[id];
535 asm_setupresult(as, ir, ci);
536 asm_gencall(as, ci, args);
537}
538#endif
539
540static void asm_strto(ASMState *as, IRIns *ir) 794static void asm_strto(ASMState *as, IRIns *ir)
541{ 795{
542 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 796 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
543 IRRef args[2]; 797 IRRef args[2];
798 int32_t ofs = 0;
799#if LJ_SOFTFP32
800 ra_evictset(as, RSET_SCRATCH);
801 if (ra_used(ir)) {
802 if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
803 (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) {
804 int i;
805 for (i = 0; i < 2; i++) {
806 Reg r = (ir+i)->r;
807 if (ra_hasreg(r)) {
808 ra_free(as, r);
809 ra_modified(as, r);
810 emit_spload(as, ir+i, r, sps_scale((ir+i)->s));
811 }
812 }
813 ofs = sps_scale(ir->s & ~1);
814 } else {
815 Reg rhi = ra_dest(as, ir+1, RSET_GPR);
816 Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
817 emit_tsi(as, MIPSI_LW, rhi, RID_SP, ofs+(LJ_BE?0:4));
818 emit_tsi(as, MIPSI_LW, rlo, RID_SP, ofs+(LJ_BE?4:0));
819 }
820 }
821#else
544 RegSet drop = RSET_SCRATCH; 822 RegSet drop = RSET_SCRATCH;
545 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ 823 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
546 ra_evictset(as, drop); 824 ra_evictset(as, drop);
825 ofs = sps_scale(ir->s);
826#endif
547 asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ 827 asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); /* Test return status. */
548 args[0] = ir->op1; /* GCstr *str */ 828 args[0] = ir->op1; /* GCstr *str */
549 args[1] = ASMREF_TMP1; /* TValue *n */ 829 args[1] = ASMREF_TMP1; /* TValue *n */
550 asm_gencall(as, ci, args); 830 asm_gencall(as, ci, args);
551 /* Store the result to the spill slot or temp slots. */ 831 /* Store the result to the spill slot or temp slots. */
552 emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), 832 emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1),
553 RID_SP, sps_scale(ir->s)); 833 RID_SP, ofs);
554} 834}
555 835
556/* Get pointer to TValue. */ 836/* -- Memory references --------------------------------------------------- */
557static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 837
838#if LJ_64
839/* Store tagged value for ref at base+ofs. */
840static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
558{ 841{
842 RegSet allow = rset_exclude(RSET_GPR, base);
559 IRIns *ir = IR(ref); 843 IRIns *ir = IR(ref);
560 if (irt_isnum(ir->t)) { 844 lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
561 if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ 845 "store of IR type %d", irt_type(ir->t));
562 ra_allockreg(as, i32ptr(ir_knum(ir)), dest); 846 if (irref_isk(ref)) {
563 else /* Otherwise force a spill and use the spill slot. */ 847 TValue k;
564 emit_tsi(as, MIPSI_ADDIU, dest, RID_SP, ra_spill(as, ir)); 848 lj_ir_kvalue(as->J->L, &k, ir);
849 emit_tsi(as, MIPSI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs);
565 } else { 850 } else {
566 /* Otherwise use g->tmptv to hold the TValue. */ 851 Reg src = ra_alloc1(as, ref, allow);
567 RegSet allow = rset_exclude(RSET_GPR, dest); 852 Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47,
568 Reg type; 853 rset_exclude(allow, src));
569 emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, offsetof(global_State, tmptv)-32768); 854 emit_tsi(as, MIPSI_SD, RID_TMP, base, ofs);
570 if (!irt_ispri(ir->t)) { 855 if (irt_isinteger(ir->t)) {
571 Reg src = ra_alloc1(as, ref, allow); 856 emit_dst(as, MIPSI_DADDU, RID_TMP, RID_TMP, type);
572 emit_setgl(as, src, tmptv.gcr); 857 emit_tsml(as, MIPSI_DEXT, RID_TMP, src, 31, 0);
858 } else {
859 emit_dst(as, MIPSI_DADDU, RID_TMP, src, type);
573 } 860 }
574 type = ra_allock(as, irt_toitype(ir->t), allow);
575 emit_setgl(as, type, tmptv.it);
576 } 861 }
577} 862}
863#endif
578 864
579static void asm_tostr(ASMState *as, IRIns *ir) 865/* Get pointer to TValue. */
866static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
580{ 867{
581 IRRef args[2]; 868 int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768);
582 args[0] = ASMREF_L; 869 if ((mode & IRTMPREF_IN1)) {
583 as->gcsteps++; 870 IRIns *ir = IR(ref);
584 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) { 871 if (irt_isnum(ir->t)) {
585 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; 872 if ((mode & IRTMPREF_OUT1)) {
586 args[1] = ASMREF_TMP1; /* const lua_Number * */ 873#if LJ_SOFTFP
587 asm_setupresult(as, ir, ci); /* GCstr * */ 874 emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
588 asm_gencall(as, ci, args); 875#if LJ_64
589 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); 876 emit_setgl(as, ra_alloc1(as, ref, RSET_GPR), tmptv.u64);
877#else
878 lj_assertA(irref_isk(ref), "unsplit FP op");
879 emit_setgl(as,
880 ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
881 tmptv.u32.lo);
882 emit_setgl(as,
883 ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
884 tmptv.u32.hi);
885#endif
886#else
887 Reg src = ra_alloc1(as, ref, RSET_FPR);
888 emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
889 emit_tsi(as, MIPSI_SDC1, (src & 31), RID_JGL, tmpofs);
890#endif
891 } else if (irref_isk(ref)) {
892 /* Use the number constant itself as a TValue. */
893 ra_allockreg(as, igcptr(ir_knum(ir)), dest);
894 } else {
895#if LJ_SOFTFP
896 lj_assertA(0, "unsplit FP op");
897#else
898 /* Otherwise force a spill and use the spill slot. */
899 emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir));
900#endif
901 }
902 } else {
903 /* Otherwise use g->tmptv to hold the TValue. */
904#if LJ_32
905 Reg type;
906 emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, tmpofs);
907 if (!irt_ispri(ir->t)) {
908 Reg src = ra_alloc1(as, ref, RSET_GPR);
909 emit_setgl(as, src, tmptv.gcr);
910 }
911 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
912 type = ra_alloc1(as, ref+1, RSET_GPR);
913 else
914 type = ra_allock(as, (int32_t)irt_toitype(ir->t), RSET_GPR);
915 emit_setgl(as, type, tmptv.it);
916#else
917 asm_tvstore64(as, dest, 0, ref);
918 emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL, tmpofs);
919#endif
920 }
590 } else { 921 } else {
591 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; 922 emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
592 args[1] = ir->op1; /* int32_t k */
593 asm_setupresult(as, ir, ci); /* GCstr * */
594 asm_gencall(as, ci, args);
595 } 923 }
596} 924}
597 925
598/* -- Memory references --------------------------------------------------- */
599
600static void asm_aref(ASMState *as, IRIns *ir) 926static void asm_aref(ASMState *as, IRIns *ir)
601{ 927{
602 Reg dest = ra_dest(as, ir, RSET_GPR); 928 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -608,14 +934,18 @@ static void asm_aref(ASMState *as, IRIns *ir)
608 ofs += 8*IR(ir->op2)->i; 934 ofs += 8*IR(ir->op2)->i;
609 if (checki16(ofs)) { 935 if (checki16(ofs)) {
610 base = ra_alloc1(as, refa, RSET_GPR); 936 base = ra_alloc1(as, refa, RSET_GPR);
611 emit_tsi(as, MIPSI_ADDIU, dest, base, ofs); 937 emit_tsi(as, MIPSI_AADDIU, dest, base, ofs);
612 return; 938 return;
613 } 939 }
614 } 940 }
615 base = ra_alloc1(as, ir->op1, RSET_GPR); 941 base = ra_alloc1(as, ir->op1, RSET_GPR);
616 idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); 942 idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
617 emit_dst(as, MIPSI_ADDU, dest, RID_TMP, base); 943#if !LJ_TARGET_MIPSR6
944 emit_dst(as, MIPSI_AADDU, dest, RID_TMP, base);
618 emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3); 945 emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3);
946#else
947 emit_dst(as, MIPSI_ALSA | MIPSF_A(3-1), dest, idx, base);
948#endif
619} 949}
620 950
621/* Inlined hash lookup. Specialized for key type and for const keys. 951/* Inlined hash lookup. Specialized for key type and for const keys.
@@ -626,51 +956,109 @@ static void asm_aref(ASMState *as, IRIns *ir)
626** } while ((n = nextnode(n))); 956** } while ((n = nextnode(n)));
627** return niltv(L); 957** return niltv(L);
628*/ 958*/
629static void asm_href(ASMState *as, IRIns *ir) 959static void asm_href(ASMState *as, IRIns *ir, IROp merge)
630{ 960{
631 RegSet allow = RSET_GPR; 961 RegSet allow = RSET_GPR;
632 int destused = ra_used(ir); 962 int destused = ra_used(ir);
633 Reg dest = ra_dest(as, ir, allow); 963 Reg dest = ra_dest(as, ir, allow);
634 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); 964 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
635 Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; 965 Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2;
966#if LJ_64
967 Reg cmp64 = RID_NONE;
968#endif
636 IRRef refkey = ir->op2; 969 IRRef refkey = ir->op2;
637 IRIns *irkey = IR(refkey); 970 IRIns *irkey = IR(refkey);
971 int isk = irref_isk(refkey);
638 IRType1 kt = irkey->t; 972 IRType1 kt = irkey->t;
639 uint32_t khash; 973 uint32_t khash;
640 MCLabel l_end, l_loop, l_next; 974 MCLabel l_end, l_loop, l_next;
641 975
642 rset_clear(allow, tab); 976 rset_clear(allow, tab);
643 if (irt_isnum(kt)) { 977#if LJ_SOFTFP32
978 if (!isk) {
979 key = ra_alloc1(as, refkey, allow);
980 rset_clear(allow, key);
981 if (irkey[1].o == IR_HIOP) {
982 if (ra_hasreg((irkey+1)->r)) {
983 type = tmpnum = (irkey+1)->r;
984 tmp1 = ra_scratch(as, allow);
985 rset_clear(allow, tmp1);
986 ra_noweak(as, tmpnum);
987 } else {
988 type = tmpnum = ra_allocref(as, refkey+1, allow);
989 }
990 rset_clear(allow, tmpnum);
991 } else {
992 type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow);
993 rset_clear(allow, type);
994 }
995 }
996#else
997 if (!LJ_SOFTFP && irt_isnum(kt)) {
644 key = ra_alloc1(as, refkey, RSET_FPR); 998 key = ra_alloc1(as, refkey, RSET_FPR);
645 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); 999 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
646 } else if (!irt_ispri(kt)) { 1000 } else if (!irt_ispri(kt)) {
647 key = ra_alloc1(as, refkey, allow); 1001 key = ra_alloc1(as, refkey, allow);
648 rset_clear(allow, key); 1002 rset_clear(allow, key);
649 type = ra_allock(as, irt_toitype(irkey->t), allow); 1003#if LJ_32
1004 type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow);
650 rset_clear(allow, type); 1005 rset_clear(allow, type);
1006#endif
651 } 1007 }
1008#endif
652 tmp2 = ra_scratch(as, allow); 1009 tmp2 = ra_scratch(as, allow);
653 rset_clear(allow, tmp2); 1010 rset_clear(allow, tmp2);
1011#if LJ_64
1012 if (LJ_SOFTFP || !irt_isnum(kt)) {
1013 /* Allocate cmp64 register used for 64-bit comparisons */
1014 if (LJ_SOFTFP && irt_isnum(kt)) {
1015 cmp64 = key;
1016 } else if (!isk && irt_isaddr(kt)) {
1017 cmp64 = tmp2;
1018 } else {
1019 int64_t k;
1020 if (isk && irt_isaddr(kt)) {
1021 k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
1022 } else {
1023 lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
1024 k = ~((int64_t)~irt_toitype(ir->t) << 47);
1025 }
1026 cmp64 = ra_allock(as, k, allow);
1027 rset_clear(allow, cmp64);
1028 }
1029 }
1030#endif
654 1031
655 /* Key not found in chain: load niltv. */ 1032 /* Key not found in chain: jump to exit (if merged) or load niltv. */
656 l_end = emit_label(as); 1033 l_end = emit_label(as);
657 if (destused) 1034 as->invmcp = NULL;
1035 if (merge == IR_NE)
1036 asm_guard(as, MIPSI_B, RID_ZERO, RID_ZERO);
1037 else if (destused)
658 emit_loada(as, dest, niltvg(J2G(as->J))); 1038 emit_loada(as, dest, niltvg(J2G(as->J)));
659 else
660 *--as->mcp = MIPSI_NOP;
661 /* Follow hash chain until the end. */ 1039 /* Follow hash chain until the end. */
662 emit_move(as, dest, tmp1); 1040 emit_move(as, dest, tmp1);
663 l_loop = --as->mcp; 1041 l_loop = --as->mcp;
664 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, next)); 1042 emit_tsi(as, MIPSI_AL, tmp1, dest, (int32_t)offsetof(Node, next));
665 l_next = emit_label(as); 1043 l_next = emit_label(as);
666 1044
667 /* Type and value comparison. */ 1045 /* Type and value comparison. */
668 if (irt_isnum(kt)) { 1046 if (merge == IR_EQ) { /* Must match asm_guard(). */
1047 emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
1048 l_end = asm_exitstub_addr(as);
1049 }
1050 if (!LJ_SOFTFP && irt_isnum(kt)) {
1051#if !LJ_TARGET_MIPSR6
669 emit_branch(as, MIPSI_BC1T, 0, 0, l_end); 1052 emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
670 emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); 1053 emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key);
671 emit_tg(as, MIPSI_MFC1, tmp1, key+1); 1054#else
1055 emit_branch(as, MIPSI_BC1NEZ, 0, (tmpnum&31), l_end);
1056 emit_fgh(as, MIPSI_CMP_EQ_D, tmpnum, tmpnum, key);
1057#endif
1058 *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */
672 emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next); 1059 emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next);
673 emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM); 1060 emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM);
1061#if LJ_32
674 emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n)); 1062 emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n));
675 } else { 1063 } else {
676 if (irt_ispri(kt)) { 1064 if (irt_ispri(kt)) {
@@ -683,36 +1071,52 @@ static void asm_href(ASMState *as, IRIns *ir)
683 } 1071 }
684 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it)); 1072 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it));
685 *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); 1073 *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu);
1074#else
1075 emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15);
1076 emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum);
1077 emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
1078 } else {
1079 emit_branch(as, MIPSI_BEQ, tmp1, cmp64, l_end);
1080 emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
1081 }
1082 *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu);
1083 if (!isk && irt_isaddr(kt)) {
1084 type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow);
1085 emit_dst(as, MIPSI_DADDU, tmp2, key, type);
1086 rset_clear(allow, type);
1087 }
1088#endif
686 1089
687 /* Load main position relative to tab->node into dest. */ 1090 /* Load main position relative to tab->node into dest. */
688 khash = irref_isk(refkey) ? ir_khash(irkey) : 1; 1091 khash = isk ? ir_khash(as, irkey) : 1;
689 if (khash == 0) { 1092 if (khash == 0) {
690 emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node)); 1093 emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node));
691 } else { 1094 } else {
692 Reg tmphash = tmp1; 1095 Reg tmphash = tmp1;
693 if (irref_isk(refkey)) 1096 if (isk)
694 tmphash = ra_allock(as, khash, allow); 1097 tmphash = ra_allock(as, khash, allow);
695 emit_dst(as, MIPSI_ADDU, dest, dest, tmp1); 1098 emit_dst(as, MIPSI_AADDU, dest, dest, tmp1);
696 lua_assert(sizeof(Node) == 24); 1099 lj_assertA(sizeof(Node) == 24, "bad Node size");
697 emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1); 1100 emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1);
698 emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3); 1101 emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3);
699 emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5); 1102 emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5);
700 emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash); 1103 emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash);
701 emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node)); 1104 emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node));
702 emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); 1105 emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
703 if (irref_isk(refkey)) { 1106 if (isk) {
704 /* Nothing to do. */ 1107 /* Nothing to do. */
705 } else if (irt_isstr(kt)) { 1108 } else if (irt_isstr(kt)) {
706 emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash)); 1109 emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, sid));
707 } else { /* Must match with hash*() in lj_tab.c. */ 1110 } else { /* Must match with hash*() in lj_tab.c. */
708 emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2); 1111 emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2);
709 emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31); 1112 emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31);
710 emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2); 1113 emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2);
711 emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31); 1114 emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31);
712 emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest); 1115 emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest);
713 if (irt_isnum(kt)) { 1116#if LJ_32
1117 if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
714 emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); 1118 emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1);
715 if ((as->flags & JIT_F_MIPS32R2)) { 1119 if ((as->flags & JIT_F_MIPSXXR2)) {
716 emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); 1120 emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31);
717 } else { 1121 } else {
718 emit_dst(as, MIPSI_OR, dest, dest, tmp1); 1122 emit_dst(as, MIPSI_OR, dest, dest, tmp1);
@@ -720,13 +1124,35 @@ static void asm_href(ASMState *as, IRIns *ir)
720 emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31); 1124 emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31);
721 } 1125 }
722 emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); 1126 emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1);
1127#if LJ_SOFTFP
1128 emit_ds(as, MIPSI_MOVE, tmp1, type);
1129 emit_ds(as, MIPSI_MOVE, tmp2, key);
1130#else
723 emit_tg(as, MIPSI_MFC1, tmp2, key); 1131 emit_tg(as, MIPSI_MFC1, tmp2, key);
724 emit_tg(as, MIPSI_MFC1, tmp1, key+1); 1132 emit_tg(as, MIPSI_MFC1, tmp1, key+1);
1133#endif
725 } else { 1134 } else {
726 emit_dst(as, MIPSI_XOR, tmp2, key, tmp1); 1135 emit_dst(as, MIPSI_XOR, tmp2, key, tmp1);
727 emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31); 1136 emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31);
728 emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow)); 1137 emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow));
729 } 1138 }
1139#else
1140 emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1);
1141 emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31);
1142 if (irt_isnum(kt)) {
1143 emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1);
1144 emit_dta(as, MIPSI_DSRA32, tmp1, LJ_SOFTFP ? key : tmp1, 0);
1145 emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0);
1146#if !LJ_SOFTFP
1147 emit_tg(as, MIPSI_DMFC1, tmp1, key);
1148#endif
1149 } else {
1150 checkmclim(as);
1151 emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0);
1152 emit_dta(as, MIPSI_SLL, tmp2, key, 0);
1153 emit_dst(as, MIPSI_DADDU, tmp1, key, type);
1154 }
1155#endif
730 } 1156 }
731 } 1157 }
732} 1158}
@@ -739,17 +1165,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
739 int32_t kofs = ofs + (int32_t)offsetof(Node, key); 1165 int32_t kofs = ofs + (int32_t)offsetof(Node, key);
740 Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; 1166 Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
741 Reg node = ra_alloc1(as, ir->op1, RSET_GPR); 1167 Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
742 Reg key = RID_NONE, type = RID_TMP, idx = node;
743 RegSet allow = rset_exclude(RSET_GPR, node); 1168 RegSet allow = rset_exclude(RSET_GPR, node);
1169 Reg idx = node;
1170#if LJ_32
1171 Reg key = RID_NONE, type = RID_TMP;
744 int32_t lo, hi; 1172 int32_t lo, hi;
745 lua_assert(ofs % sizeof(Node) == 0); 1173#else
1174 Reg key = ra_scratch(as, allow);
1175 int64_t k;
1176#endif
1177 lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
746 if (ofs > 32736) { 1178 if (ofs > 32736) {
747 idx = dest; 1179 idx = dest;
748 rset_clear(allow, dest); 1180 rset_clear(allow, dest);
749 kofs = (int32_t)offsetof(Node, key); 1181 kofs = (int32_t)offsetof(Node, key);
750 } else if (ra_hasreg(dest)) { 1182 } else if (ra_hasreg(dest)) {
751 emit_tsi(as, MIPSI_ADDIU, dest, node, ofs); 1183 emit_tsi(as, MIPSI_AADDIU, dest, node, ofs);
752 } 1184 }
1185#if LJ_32
753 if (!irt_ispri(irkey->t)) { 1186 if (!irt_ispri(irkey->t)) {
754 key = ra_scratch(as, allow); 1187 key = ra_scratch(as, allow);
755 rset_clear(allow, key); 1188 rset_clear(allow, key);
@@ -768,22 +1201,20 @@ nolo:
768 asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO); 1201 asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO);
769 if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0)); 1202 if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0));
770 emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4)); 1203 emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4));
771 if (ofs > 32736) 1204#else
772 emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow)); 1205 if (irt_ispri(irkey->t)) {
773} 1206 lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type");
774 1207 k = ~((int64_t)~irt_toitype(irkey->t) << 47);
775static void asm_newref(ASMState *as, IRIns *ir) 1208 } else if (irt_isnum(irkey->t)) {
776{ 1209 k = (int64_t)ir_knum(irkey)->u64;
777 if (ir->r != RID_SINK) { 1210 } else {
778 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; 1211 k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey);
779 IRRef args[3];
780 args[0] = ASMREF_L; /* lua_State *L */
781 args[1] = ir->op1; /* GCtab *t */
782 args[2] = ASMREF_TMP1; /* cTValue *key */
783 asm_setupresult(as, ir, ci); /* TValue * */
784 asm_gencall(as, ci, args);
785 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
786 } 1212 }
1213 asm_guard(as, MIPSI_BNE, key, ra_allock(as, k, allow));
1214 emit_tsi(as, MIPSI_LD, key, idx, kofs);
1215#endif
1216 if (ofs > 32736)
1217 emit_tsi(as, MIPSI_AADDU, dest, node, ra_allock(as, ofs, allow));
787} 1218}
788 1219
789static void asm_uref(ASMState *as, IRIns *ir) 1220static void asm_uref(ASMState *as, IRIns *ir)
@@ -792,30 +1223,31 @@ static void asm_uref(ASMState *as, IRIns *ir)
792 if (irref_isk(ir->op1)) { 1223 if (irref_isk(ir->op1)) {
793 GCfunc *fn = ir_kfunc(IR(ir->op1)); 1224 GCfunc *fn = ir_kfunc(IR(ir->op1));
794 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; 1225 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
795 emit_lsptr(as, MIPSI_LW, dest, v, RSET_GPR); 1226 emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR);
796 } else { 1227 } else {
797 Reg uv = ra_scratch(as, RSET_GPR); 1228 Reg uv = ra_scratch(as, RSET_GPR);
798 Reg func = ra_alloc1(as, ir->op1, RSET_GPR); 1229 Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
799 if (ir->o == IR_UREFC) { 1230 if (ir->o == IR_UREFC) {
800 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); 1231 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
801 emit_tsi(as, MIPSI_ADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); 1232 emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv));
802 emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); 1233 emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
803 } else { 1234 } else {
804 emit_tsi(as, MIPSI_LW, dest, uv, (int32_t)offsetof(GCupval, v)); 1235 emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v));
805 } 1236 }
806 emit_tsi(as, MIPSI_LW, uv, func, 1237 emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) +
807 (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); 1238 (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
808 } 1239 }
809} 1240}
810 1241
811static void asm_fref(ASMState *as, IRIns *ir) 1242static void asm_fref(ASMState *as, IRIns *ir)
812{ 1243{
813 UNUSED(as); UNUSED(ir); 1244 UNUSED(as); UNUSED(ir);
814 lua_assert(!ra_used(ir)); 1245 lj_assertA(!ra_used(ir), "unfused FREF");
815} 1246}
816 1247
817static void asm_strref(ASMState *as, IRIns *ir) 1248static void asm_strref(ASMState *as, IRIns *ir)
818{ 1249{
1250#if LJ_32
819 Reg dest = ra_dest(as, ir, RSET_GPR); 1251 Reg dest = ra_dest(as, ir, RSET_GPR);
820 IRRef ref = ir->op2, refk = ir->op1; 1252 IRRef ref = ir->op2, refk = ir->op1;
821 int32_t ofs = (int32_t)sizeof(GCstr); 1253 int32_t ofs = (int32_t)sizeof(GCstr);
@@ -847,49 +1279,79 @@ static void asm_strref(ASMState *as, IRIns *ir)
847 else 1279 else
848 emit_dst(as, MIPSI_ADDU, dest, r, 1280 emit_dst(as, MIPSI_ADDU, dest, r,
849 ra_allock(as, ofs, rset_exclude(RSET_GPR, r))); 1281 ra_allock(as, ofs, rset_exclude(RSET_GPR, r)));
1282#else
1283 RegSet allow = RSET_GPR;
1284 Reg dest = ra_dest(as, ir, allow);
1285 Reg base = ra_alloc1(as, ir->op1, allow);
1286 IRIns *irr = IR(ir->op2);
1287 int32_t ofs = sizeof(GCstr);
1288 rset_clear(allow, base);
1289 if (irref_isk(ir->op2) && checki16(ofs + irr->i)) {
1290 emit_tsi(as, MIPSI_DADDIU, dest, base, ofs + irr->i);
1291 } else {
1292 emit_tsi(as, MIPSI_DADDIU, dest, dest, ofs);
1293 emit_dst(as, MIPSI_DADDU, dest, base, ra_alloc1(as, ir->op2, allow));
1294 }
1295#endif
850} 1296}
851 1297
852/* -- Loads and stores ---------------------------------------------------- */ 1298/* -- Loads and stores ---------------------------------------------------- */
853 1299
854static MIPSIns asm_fxloadins(IRIns *ir) 1300static MIPSIns asm_fxloadins(ASMState *as, IRIns *ir)
855{ 1301{
1302 UNUSED(as);
856 switch (irt_type(ir->t)) { 1303 switch (irt_type(ir->t)) {
857 case IRT_I8: return MIPSI_LB; 1304 case IRT_I8: return MIPSI_LB;
858 case IRT_U8: return MIPSI_LBU; 1305 case IRT_U8: return MIPSI_LBU;
859 case IRT_I16: return MIPSI_LH; 1306 case IRT_I16: return MIPSI_LH;
860 case IRT_U16: return MIPSI_LHU; 1307 case IRT_U16: return MIPSI_LHU;
861 case IRT_NUM: return MIPSI_LDC1; 1308 case IRT_NUM:
862 case IRT_FLOAT: return MIPSI_LWC1; 1309 lj_assertA(!LJ_SOFTFP32, "unsplit FP op");
863 default: return MIPSI_LW; 1310 if (!LJ_SOFTFP) return MIPSI_LDC1;
1311 /* fallthrough */
1312 case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1;
1313 /* fallthrough */
1314 default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW;
864 } 1315 }
865} 1316}
866 1317
867static MIPSIns asm_fxstoreins(IRIns *ir) 1318static MIPSIns asm_fxstoreins(ASMState *as, IRIns *ir)
868{ 1319{
1320 UNUSED(as);
869 switch (irt_type(ir->t)) { 1321 switch (irt_type(ir->t)) {
870 case IRT_I8: case IRT_U8: return MIPSI_SB; 1322 case IRT_I8: case IRT_U8: return MIPSI_SB;
871 case IRT_I16: case IRT_U16: return MIPSI_SH; 1323 case IRT_I16: case IRT_U16: return MIPSI_SH;
872 case IRT_NUM: return MIPSI_SDC1; 1324 case IRT_NUM:
873 case IRT_FLOAT: return MIPSI_SWC1; 1325 lj_assertA(!LJ_SOFTFP32, "unsplit FP op");
874 default: return MIPSI_SW; 1326 if (!LJ_SOFTFP) return MIPSI_SDC1;
1327 /* fallthrough */
1328 case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1;
1329 /* fallthrough */
1330 default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW;
875 } 1331 }
876} 1332}
877 1333
878static void asm_fload(ASMState *as, IRIns *ir) 1334static void asm_fload(ASMState *as, IRIns *ir)
879{ 1335{
880 Reg dest = ra_dest(as, ir, RSET_GPR); 1336 Reg dest = ra_dest(as, ir, RSET_GPR);
881 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); 1337 MIPSIns mi = asm_fxloadins(as, ir);
882 MIPSIns mi = asm_fxloadins(ir); 1338 Reg idx;
883 int32_t ofs; 1339 int32_t ofs;
884 if (ir->op2 == IRFL_TAB_ARRAY) { 1340 if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
885 ofs = asm_fuseabase(as, ir->op1); 1341 idx = RID_JGL;
886 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ 1342 ofs = (ir->op2 << 2) - 32768 - GG_OFS(g);
887 emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs); 1343 } else {
888 return; 1344 idx = ra_alloc1(as, ir->op1, RSET_GPR);
1345 if (ir->op2 == IRFL_TAB_ARRAY) {
1346 ofs = asm_fuseabase(as, ir->op1);
1347 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
1348 emit_tsi(as, MIPSI_AADDIU, dest, idx, ofs);
1349 return;
1350 }
889 } 1351 }
1352 ofs = field_ofs[ir->op2];
890 } 1353 }
891 ofs = field_ofs[ir->op2]; 1354 lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD");
892 lua_assert(!irt_isfp(ir->t));
893 emit_tsi(as, mi, dest, idx, ofs); 1355 emit_tsi(as, mi, dest, idx, ofs);
894} 1356}
895 1357
@@ -900,51 +1362,89 @@ static void asm_fstore(ASMState *as, IRIns *ir)
900 IRIns *irf = IR(ir->op1); 1362 IRIns *irf = IR(ir->op1);
901 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); 1363 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
902 int32_t ofs = field_ofs[irf->op2]; 1364 int32_t ofs = field_ofs[irf->op2];
903 MIPSIns mi = asm_fxstoreins(ir); 1365 MIPSIns mi = asm_fxstoreins(as, ir);
904 lua_assert(!irt_isfp(ir->t)); 1366 lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE");
905 emit_tsi(as, mi, src, idx, ofs); 1367 emit_tsi(as, mi, src, idx, ofs);
906 } 1368 }
907} 1369}
908 1370
909static void asm_xload(ASMState *as, IRIns *ir) 1371static void asm_xload(ASMState *as, IRIns *ir)
910{ 1372{
911 Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 1373 Reg dest = ra_dest(as, ir,
912 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); 1374 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
913 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 1375 lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED),
1376 "unaligned XLOAD");
1377 asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0);
914} 1378}
915 1379
916static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 1380static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
917{ 1381{
918 if (ir->r != RID_SINK) { 1382 if (ir->r != RID_SINK) {
919 Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 1383 Reg src = ra_alloc1z(as, ir->op2,
920 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, 1384 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
1385 asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1,
921 rset_exclude(RSET_GPR, src), ofs); 1386 rset_exclude(RSET_GPR, src), ofs);
922 } 1387 }
923} 1388}
924 1389
1390#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
1391
925static void asm_ahuvload(ASMState *as, IRIns *ir) 1392static void asm_ahuvload(ASMState *as, IRIns *ir)
926{ 1393{
927 IRType1 t = ir->t; 1394 int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP);
928 Reg dest = RID_NONE, type = RID_TMP, idx; 1395 Reg dest = RID_NONE, type = RID_TMP, idx;
929 RegSet allow = RSET_GPR; 1396 RegSet allow = RSET_GPR;
930 int32_t ofs = 0; 1397 int32_t ofs = 0;
1398 IRType1 t = ir->t;
1399 if (hiop) {
1400 t.irt = IRT_NUM;
1401 if (ra_used(ir+1)) {
1402 type = ra_dest(as, ir+1, allow);
1403 rset_clear(allow, type);
1404 }
1405 }
931 if (ra_used(ir)) { 1406 if (ra_used(ir)) {
932 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1407 lj_assertA((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) ||
933 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); 1408 irt_isint(ir->t) || irt_isaddr(ir->t),
1409 "bad load type %d", irt_type(ir->t));
1410 dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
934 rset_clear(allow, dest); 1411 rset_clear(allow, dest);
1412#if LJ_64
1413 if (irt_isaddr(t))
1414 emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0);
1415 else if (irt_isint(t))
1416 emit_dta(as, MIPSI_SLL, dest, dest, 0);
1417#endif
935 } 1418 }
936 idx = asm_fuseahuref(as, ir->op1, &ofs, allow); 1419 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
937 rset_clear(allow, idx); 1420 rset_clear(allow, idx);
938 if (irt_isnum(t)) { 1421 if (irt_isnum(t)) {
939 asm_guard(as, MIPSI_BEQ, type, RID_ZERO); 1422 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
940 emit_tsi(as, MIPSI_SLTIU, type, type, (int32_t)LJ_TISNUM); 1423 emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM);
941 if (ra_hasreg(dest))
942 emit_hsi(as, MIPSI_LDC1, dest, idx, ofs);
943 } else { 1424 } else {
944 asm_guard(as, MIPSI_BNE, type, ra_allock(as, irt_toitype(t), allow)); 1425 asm_guard(as, MIPSI_BNE, type,
945 if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0)); 1426 ra_allock(as, (int32_t)irt_toitype(t), allow));
1427 }
1428#if LJ_32
1429 if (ra_hasreg(dest)) {
1430 if (!LJ_SOFTFP && irt_isnum(t))
1431 emit_hsi(as, MIPSI_LDC1, dest, idx, ofs);
1432 else
1433 emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0));
946 } 1434 }
947 emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4)); 1435 emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4));
1436#else
1437 if (ra_hasreg(dest)) {
1438 if (!LJ_SOFTFP && irt_isnum(t)) {
1439 emit_hsi(as, MIPSI_LDC1, dest, idx, ofs);
1440 dest = type;
1441 }
1442 } else {
1443 dest = type;
1444 }
1445 emit_dta(as, MIPSI_DSRA32, type, dest, 15);
1446 emit_tsi(as, MIPSI_LD, dest, idx, ofs);
1447#endif
948} 1448}
949 1449
950static void asm_ahustore(ASMState *as, IRIns *ir) 1450static void asm_ahustore(ASMState *as, IRIns *ir)
@@ -954,81 +1454,180 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
954 int32_t ofs = 0; 1454 int32_t ofs = 0;
955 if (ir->r == RID_SINK) 1455 if (ir->r == RID_SINK)
956 return; 1456 return;
957 if (irt_isnum(ir->t)) { 1457 if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
958 src = ra_alloc1(as, ir->op2, RSET_FPR); 1458 src = ra_alloc1(as, ir->op2, LJ_SOFTFP ? RSET_GPR : RSET_FPR);
1459 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
1460 emit_hsi(as, LJ_SOFTFP ? MIPSI_SD : MIPSI_SDC1, src, idx, ofs);
959 } else { 1461 } else {
1462#if LJ_32
960 if (!irt_ispri(ir->t)) { 1463 if (!irt_ispri(ir->t)) {
961 src = ra_alloc1(as, ir->op2, allow); 1464 src = ra_alloc1(as, ir->op2, allow);
962 rset_clear(allow, src); 1465 rset_clear(allow, src);
963 } 1466 }
964 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 1467 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
1468 type = ra_alloc1(as, (ir+1)->op2, allow);
1469 else
1470 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
965 rset_clear(allow, type); 1471 rset_clear(allow, type);
966 } 1472 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
967 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
968 if (irt_isnum(ir->t)) {
969 emit_hsi(as, MIPSI_SDC1, src, idx, ofs);
970 } else {
971 if (ra_hasreg(src)) 1473 if (ra_hasreg(src))
972 emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0)); 1474 emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0));
973 emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4)); 1475 emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4));
1476#else
1477 Reg tmp = RID_TMP;
1478 if (irt_ispri(ir->t)) {
1479 tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
1480 rset_clear(allow, tmp);
1481 } else {
1482 src = ra_alloc1(as, ir->op2, allow);
1483 rset_clear(allow, src);
1484 type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
1485 rset_clear(allow, type);
1486 }
1487 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
1488 emit_tsi(as, MIPSI_SD, tmp, idx, ofs);
1489 if (ra_hasreg(src)) {
1490 if (irt_isinteger(ir->t)) {
1491 emit_dst(as, MIPSI_DADDU, tmp, tmp, type);
1492 emit_tsml(as, MIPSI_DEXT, tmp, src, 31, 0);
1493 } else {
1494 emit_dst(as, MIPSI_DADDU, tmp, src, type);
1495 }
1496 }
1497#endif
974 } 1498 }
975} 1499}
976 1500
977static void asm_sload(ASMState *as, IRIns *ir) 1501static void asm_sload(ASMState *as, IRIns *ir)
978{ 1502{
979 int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
980 IRType1 t = ir->t;
981 Reg dest = RID_NONE, type = RID_NONE, base; 1503 Reg dest = RID_NONE, type = RID_NONE, base;
982 RegSet allow = RSET_GPR; 1504 RegSet allow = RSET_GPR;
983 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 1505 IRType1 t = ir->t;
984 lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); 1506#if LJ_32
985 lua_assert(!irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); 1507 int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
1508 int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP);
1509 if (hiop)
1510 t.irt = IRT_NUM;
1511#else
1512 int32_t ofs = 8*((int32_t)ir->op1-2);
1513#endif
1514 lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
1515 "bad parent SLOAD"); /* Handled by asm_head_side(). */
1516 lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
1517 "inconsistent SLOAD variant");
1518#if LJ_SOFTFP32
1519 lj_assertA(!(ir->op2 & IRSLOAD_CONVERT),
1520 "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */
1521 if (hiop && ra_used(ir+1)) {
1522 type = ra_dest(as, ir+1, allow);
1523 rset_clear(allow, type);
1524 }
1525#else
986 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { 1526 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
987 dest = ra_scratch(as, RSET_FPR); 1527 dest = ra_scratch(as, LJ_SOFTFP ? allow : RSET_FPR);
988 asm_tointg(as, ir, dest); 1528 asm_tointg(as, ir, dest);
989 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1529 t.irt = IRT_NUM; /* Continue with a regular number type check. */
990 } else if (ra_used(ir)) { 1530 } else
991 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1531#endif
992 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); 1532 if (ra_used(ir)) {
1533 lj_assertA((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) ||
1534 irt_isint(ir->t) || irt_isaddr(ir->t),
1535 "bad SLOAD type %d", irt_type(ir->t));
1536 dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
993 rset_clear(allow, dest); 1537 rset_clear(allow, dest);
994 base = ra_alloc1(as, REF_BASE, allow); 1538 base = ra_alloc1(as, REF_BASE, allow);
995 rset_clear(allow, base); 1539 rset_clear(allow, base);
996 if ((ir->op2 & IRSLOAD_CONVERT)) { 1540 if (!LJ_SOFTFP32 && (ir->op2 & IRSLOAD_CONVERT)) {
997 if (irt_isint(t)) { 1541 if (irt_isint(t)) {
998 Reg tmp = ra_scratch(as, RSET_FPR); 1542 Reg tmp = ra_scratch(as, LJ_SOFTFP ? RSET_GPR : RSET_FPR);
1543#if LJ_SOFTFP
1544 ra_evictset(as, rset_exclude(RSET_SCRATCH, dest));
1545 ra_destreg(as, ir, RID_RET);
1546 emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_d2i].func, 0);
1547 if (tmp != REGARG_FIRSTGPR)
1548 emit_move(as, REGARG_FIRSTGPR, tmp);
1549#else
999 emit_tg(as, MIPSI_MFC1, dest, tmp); 1550 emit_tg(as, MIPSI_MFC1, dest, tmp);
1000 emit_fg(as, MIPSI_CVT_W_D, tmp, tmp); 1551 emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp);
1552#endif
1001 dest = tmp; 1553 dest = tmp;
1002 t.irt = IRT_NUM; /* Check for original type. */ 1554 t.irt = IRT_NUM; /* Check for original type. */
1003 } else { 1555 } else {
1004 Reg tmp = ra_scratch(as, RSET_GPR); 1556 Reg tmp = ra_scratch(as, RSET_GPR);
1557#if LJ_SOFTFP
1558 ra_evictset(as, rset_exclude(RSET_SCRATCH, dest));
1559 ra_destreg(as, ir, RID_RET);
1560 emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_i2d].func, 0);
1561 emit_dta(as, MIPSI_SLL, REGARG_FIRSTGPR, tmp, 0);
1562#else
1005 emit_fg(as, MIPSI_CVT_D_W, dest, dest); 1563 emit_fg(as, MIPSI_CVT_D_W, dest, dest);
1006 emit_tg(as, MIPSI_MTC1, tmp, dest); 1564 emit_tg(as, MIPSI_MTC1, tmp, dest);
1565#endif
1007 dest = tmp; 1566 dest = tmp;
1008 t.irt = IRT_INT; /* Check for original type. */ 1567 t.irt = IRT_INT; /* Check for original type. */
1009 } 1568 }
1010 } 1569 }
1570#if LJ_64
1571 else if (irt_isaddr(t)) {
1572 /* Clear type from pointers. */
1573 emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0);
1574 } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) {
1575 /* Sign-extend integers. */
1576 emit_dta(as, MIPSI_SLL, dest, dest, 0);
1577 }
1578#endif
1011 goto dotypecheck; 1579 goto dotypecheck;
1012 } 1580 }
1013 base = ra_alloc1(as, REF_BASE, allow); 1581 base = ra_alloc1(as, REF_BASE, allow);
1014 rset_clear(allow, base); 1582 rset_clear(allow, base);
1015dotypecheck: 1583dotypecheck:
1016 if (irt_isnum(t)) { 1584#if LJ_32
1017 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1585 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1018 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); 1586 if (ra_noreg(type))
1019 emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM);
1020 type = RID_TMP; 1587 type = RID_TMP;
1021 } 1588 if (irt_isnum(t)) {
1022 if (ra_hasreg(dest)) emit_hsi(as, MIPSI_LDC1, dest, base, ofs); 1589 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
1023 } else { 1590 emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM);
1024 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1591 } else {
1025 Reg ktype = ra_allock(as, irt_toitype(t), allow); 1592 Reg ktype = ra_allock(as, irt_toitype(t), allow);
1026 asm_guard(as, MIPSI_BNE, RID_TMP, ktype); 1593 asm_guard(as, MIPSI_BNE, type, ktype);
1027 type = RID_TMP;
1028 } 1594 }
1029 if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0));
1030 } 1595 }
1031 if (ra_hasreg(type)) emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4)); 1596 if (ra_hasreg(dest)) {
1597 if (!LJ_SOFTFP && irt_isnum(t))
1598 emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
1599 else
1600 emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0));
1601 }
1602 if (ra_hasreg(type))
1603 emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4));
1604#else
1605 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1606 type = dest < RID_MAX_GPR ? dest : RID_TMP;
1607 if (irt_ispri(t)) {
1608 asm_guard(as, MIPSI_BNE, type,
1609 ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow));
1610 } else {
1611 if (irt_isnum(t)) {
1612 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
1613 emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM);
1614 if (!LJ_SOFTFP && ra_hasreg(dest))
1615 emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
1616 } else {
1617 asm_guard(as, MIPSI_BNE, RID_TMP,
1618 ra_allock(as, (int32_t)irt_toitype(t), allow));
1619 }
1620 emit_dta(as, MIPSI_DSRA32, RID_TMP, type, 15);
1621 }
1622 emit_tsi(as, MIPSI_LD, type, base, ofs);
1623 } else if (ra_hasreg(dest)) {
1624 if (!LJ_SOFTFP && irt_isnum(t))
1625 emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
1626 else
1627 emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base,
1628 ofs ^ ((LJ_BE && irt_isint(t)) ? 4 : 0));
1629 }
1630#endif
1032} 1631}
1033 1632
1034/* -- Allocations --------------------------------------------------------- */ 1633/* -- Allocations --------------------------------------------------------- */
@@ -1037,19 +1636,16 @@ dotypecheck:
1037static void asm_cnew(ASMState *as, IRIns *ir) 1636static void asm_cnew(ASMState *as, IRIns *ir)
1038{ 1637{
1039 CTState *cts = ctype_ctsG(J2G(as->J)); 1638 CTState *cts = ctype_ctsG(J2G(as->J));
1040 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1639 CTypeID id = (CTypeID)IR(ir->op1)->i;
1041 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1640 CTSize sz;
1042 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1641 CTInfo info = lj_ctype_info(cts, id, &sz);
1043 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1642 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1044 IRRef args[2]; 1643 IRRef args[4];
1045 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1046 RegSet drop = RSET_SCRATCH; 1644 RegSet drop = RSET_SCRATCH;
1047 lua_assert(sz != CTSIZE_INVALID); 1645 lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
1646 "bad CNEW/CNEWI operands");
1048 1647
1049 args[0] = ASMREF_L; /* lua_State *L */
1050 args[1] = ASMREF_TMP1; /* MSize size */
1051 as->gcsteps++; 1648 as->gcsteps++;
1052
1053 if (ra_hasreg(ir->r)) 1649 if (ra_hasreg(ir->r))
1054 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1650 rset_clear(drop, ir->r); /* Dest reg handled below. */
1055 ra_evictset(as, drop); 1651 ra_evictset(as, drop);
@@ -1058,11 +1654,12 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1058 1654
1059 /* Initialize immutable cdata object. */ 1655 /* Initialize immutable cdata object. */
1060 if (ir->o == IR_CNEWI) { 1656 if (ir->o == IR_CNEWI) {
1657 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1658#if LJ_32
1061 int32_t ofs = sizeof(GCcdata); 1659 int32_t ofs = sizeof(GCcdata);
1062 lua_assert(sz == 4 || sz == 8);
1063 if (sz == 8) { 1660 if (sz == 8) {
1064 ofs += 4; 1661 ofs += 4;
1065 lua_assert((ir+1)->o == IR_HIOP); 1662 lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI");
1066 if (LJ_LE) ir++; 1663 if (LJ_LE) ir++;
1067 } 1664 }
1068 for (;;) { 1665 for (;;) {
@@ -1072,18 +1669,33 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1072 if (ofs == sizeof(GCcdata)) break; 1669 if (ofs == sizeof(GCcdata)) break;
1073 ofs -= 4; if (LJ_BE) ir++; else ir--; 1670 ofs -= 4; if (LJ_BE) ir++; else ir--;
1074 } 1671 }
1672#else
1673 emit_tsi(as, sz == 8 ? MIPSI_SD : MIPSI_SW, ra_alloc1(as, ir->op2, allow),
1674 RID_RET, sizeof(GCcdata));
1675#endif
1676 lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
1677 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1678 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1679 args[0] = ASMREF_L; /* lua_State *L */
1680 args[1] = ir->op1; /* CTypeID id */
1681 args[2] = ir->op2; /* CTSize sz */
1682 args[3] = ASMREF_TMP1; /* CTSize align */
1683 asm_gencall(as, ci, args);
1684 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1685 return;
1075 } 1686 }
1687
1076 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1688 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1077 emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); 1689 emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
1078 emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); 1690 emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
1079 emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA); 1691 emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA);
1080 emit_ti(as, MIPSI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ 1692 emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
1693 args[0] = ASMREF_L; /* lua_State *L */
1694 args[1] = ASMREF_TMP1; /* MSize size */
1081 asm_gencall(as, ci, args); 1695 asm_gencall(as, ci, args);
1082 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1696 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1083 ra_releasetmp(as, ASMREF_TMP1)); 1697 ra_releasetmp(as, ASMREF_TMP1));
1084} 1698}
1085#else
1086#define asm_cnew(as, ir) ((void)0)
1087#endif 1699#endif
1088 1700
1089/* -- Write barriers ------------------------------------------------------ */ 1701/* -- Write barriers ------------------------------------------------------ */
@@ -1094,7 +1706,7 @@ static void asm_tbar(ASMState *as, IRIns *ir)
1094 Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); 1706 Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab));
1095 Reg link = RID_TMP; 1707 Reg link = RID_TMP;
1096 MCLabel l_end = emit_label(as); 1708 MCLabel l_end = emit_label(as);
1097 emit_tsi(as, MIPSI_SW, link, tab, (int32_t)offsetof(GCtab, gclist)); 1709 emit_tsi(as, MIPSI_AS, link, tab, (int32_t)offsetof(GCtab, gclist));
1098 emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); 1710 emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked));
1099 emit_setgl(as, tab, gc.grayagain); 1711 emit_setgl(as, tab, gc.grayagain);
1100 emit_getgl(as, link, gc.grayagain); 1712 emit_getgl(as, link, gc.grayagain);
@@ -1111,13 +1723,13 @@ static void asm_obar(ASMState *as, IRIns *ir)
1111 MCLabel l_end; 1723 MCLabel l_end;
1112 Reg obj, val, tmp; 1724 Reg obj, val, tmp;
1113 /* No need for other object barriers (yet). */ 1725 /* No need for other object barriers (yet). */
1114 lua_assert(IR(ir->op1)->o == IR_UREFC); 1726 lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
1115 ra_evictset(as, RSET_SCRATCH); 1727 ra_evictset(as, RSET_SCRATCH);
1116 l_end = emit_label(as); 1728 l_end = emit_label(as);
1117 args[0] = ASMREF_TMP1; /* global_State *g */ 1729 args[0] = ASMREF_TMP1; /* global_State *g */
1118 args[1] = ir->op1; /* TValue *tv */ 1730 args[1] = ir->op1; /* TValue *tv */
1119 asm_gencall(as, ci, args); 1731 asm_gencall(as, ci, args);
1120 emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); 1732 emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
1121 obj = IR(ir->op1)->r; 1733 obj = IR(ir->op1)->r;
1122 tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); 1734 tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
1123 emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); 1735 emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end);
@@ -1132,6 +1744,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
1132 1744
1133/* -- Arithmetic and logic operations ------------------------------------- */ 1745/* -- Arithmetic and logic operations ------------------------------------- */
1134 1746
1747#if !LJ_SOFTFP
1135static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi) 1748static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi)
1136{ 1749{
1137 Reg dest = ra_dest(as, ir, RSET_FPR); 1750 Reg dest = ra_dest(as, ir, RSET_FPR);
@@ -1146,83 +1759,147 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
1146 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); 1759 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
1147 emit_fg(as, mi, dest, left); 1760 emit_fg(as, mi, dest, left);
1148} 1761}
1762#endif
1149 1763
1150static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1764#if !LJ_SOFTFP32
1151{ 1765static void asm_fpmath(ASMState *as, IRIns *ir)
1152 IRIns *irp = IR(ir->op1); 1766{
1153 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1767#if !LJ_SOFTFP
1154 IRIns *irpp = IR(irp->op1); 1768 if (ir->op2 <= IRFPM_TRUNC)
1155 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1769 asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
1156 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1770 else if (ir->op2 == IRFPM_SQRT)
1157 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1771 asm_fpunary(as, ir, MIPSI_SQRT_D);
1158 IRRef args[2]; 1772 else
1159 args[0] = irpp->op1; 1773#endif
1160 args[1] = irp->op2; 1774 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1161 asm_setupresult(as, ir, ci);
1162 asm_gencall(as, ci, args);
1163 return 1;
1164 }
1165 }
1166 return 0;
1167} 1775}
1776#endif
1777
1778#if !LJ_SOFTFP
1779#define asm_fpadd(as, ir) asm_fparith(as, ir, MIPSI_ADD_D)
1780#define asm_fpsub(as, ir) asm_fparith(as, ir, MIPSI_SUB_D)
1781#define asm_fpmul(as, ir) asm_fparith(as, ir, MIPSI_MUL_D)
1782#elif LJ_64 /* && LJ_SOFTFP */
1783#define asm_fpadd(as, ir) asm_callid(as, ir, IRCALL_softfp_add)
1784#define asm_fpsub(as, ir) asm_callid(as, ir, IRCALL_softfp_sub)
1785#define asm_fpmul(as, ir) asm_callid(as, ir, IRCALL_softfp_mul)
1786#endif
1168 1787
1169static void asm_add(ASMState *as, IRIns *ir) 1788static void asm_add(ASMState *as, IRIns *ir)
1170{ 1789{
1171 if (irt_isnum(ir->t)) { 1790 IRType1 t = ir->t;
1172 asm_fparith(as, ir, MIPSI_ADD_D); 1791#if !LJ_SOFTFP32
1173 } else { 1792 if (irt_isnum(t)) {
1793 asm_fpadd(as, ir);
1794 } else
1795#endif
1796 {
1797 /* TODO MIPSR6: Fuse ADD(BSHL(a,1-4),b) or ADD(ADD(a,a),b) to MIPSI_ALSA. */
1174 Reg dest = ra_dest(as, ir, RSET_GPR); 1798 Reg dest = ra_dest(as, ir, RSET_GPR);
1175 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1799 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1176 if (irref_isk(ir->op2)) { 1800 if (irref_isk(ir->op2)) {
1177 int32_t k = IR(ir->op2)->i; 1801 intptr_t k = get_kval(as, ir->op2);
1178 if (checki16(k)) { 1802 if (checki16(k)) {
1179 emit_tsi(as, MIPSI_ADDIU, dest, left, k); 1803 emit_tsi(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDIU : MIPSI_ADDIU, dest,
1804 left, k);
1180 return; 1805 return;
1181 } 1806 }
1182 } 1807 }
1183 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); 1808 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1184 emit_dst(as, MIPSI_ADDU, dest, left, right); 1809 emit_dst(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDU : MIPSI_ADDU, dest,
1810 left, right);
1185 } 1811 }
1186} 1812}
1187 1813
1188static void asm_sub(ASMState *as, IRIns *ir) 1814static void asm_sub(ASMState *as, IRIns *ir)
1189{ 1815{
1816#if !LJ_SOFTFP32
1190 if (irt_isnum(ir->t)) { 1817 if (irt_isnum(ir->t)) {
1191 asm_fparith(as, ir, MIPSI_SUB_D); 1818 asm_fpsub(as, ir);
1192 } else { 1819 } else
1820#endif
1821 {
1193 Reg dest = ra_dest(as, ir, RSET_GPR); 1822 Reg dest = ra_dest(as, ir, RSET_GPR);
1194 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 1823 Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1195 right = (left >> 8); left &= 255; 1824 right = (left >> 8); left &= 255;
1196 emit_dst(as, MIPSI_SUBU, dest, left, right); 1825 emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest,
1826 left, right);
1197 } 1827 }
1198} 1828}
1199 1829
1200static void asm_mul(ASMState *as, IRIns *ir) 1830static void asm_mul(ASMState *as, IRIns *ir)
1201{ 1831{
1832#if !LJ_SOFTFP32
1202 if (irt_isnum(ir->t)) { 1833 if (irt_isnum(ir->t)) {
1203 asm_fparith(as, ir, MIPSI_MUL_D); 1834 asm_fpmul(as, ir);
1204 } else { 1835 } else
1836#endif
1837 {
1205 Reg dest = ra_dest(as, ir, RSET_GPR); 1838 Reg dest = ra_dest(as, ir, RSET_GPR);
1206 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 1839 Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1207 right = (left >> 8); left &= 255; 1840 right = (left >> 8); left &= 255;
1208 emit_dst(as, MIPSI_MUL, dest, left, right); 1841 if (LJ_64 && irt_is64(ir->t)) {
1842#if !LJ_TARGET_MIPSR6
1843 emit_dst(as, MIPSI_MFLO, dest, 0, 0);
1844 emit_dst(as, MIPSI_DMULT, 0, left, right);
1845#else
1846 emit_dst(as, MIPSI_DMUL, dest, left, right);
1847#endif
1848 } else {
1849 emit_dst(as, MIPSI_MUL, dest, left, right);
1850 }
1209 } 1851 }
1210} 1852}
1211 1853
1854#if !LJ_SOFTFP32
1855static void asm_fpdiv(ASMState *as, IRIns *ir)
1856{
1857#if !LJ_SOFTFP
1858 asm_fparith(as, ir, MIPSI_DIV_D);
1859#else
1860 asm_callid(as, ir, IRCALL_softfp_div);
1861#endif
1862}
1863#endif
1864
1212static void asm_neg(ASMState *as, IRIns *ir) 1865static void asm_neg(ASMState *as, IRIns *ir)
1213{ 1866{
1867#if !LJ_SOFTFP
1214 if (irt_isnum(ir->t)) { 1868 if (irt_isnum(ir->t)) {
1215 asm_fpunary(as, ir, MIPSI_NEG_D); 1869 asm_fpunary(as, ir, MIPSI_NEG_D);
1216 } else { 1870 } else
1871#elif LJ_64 /* && LJ_SOFTFP */
1872 if (irt_isnum(ir->t)) {
1873 Reg dest = ra_dest(as, ir, RSET_GPR);
1874 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1875 emit_dst(as, MIPSI_XOR, dest, left,
1876 ra_allock(as, 0x8000000000000000ll, rset_exclude(RSET_GPR, dest)));
1877 } else
1878#endif
1879 {
1217 Reg dest = ra_dest(as, ir, RSET_GPR); 1880 Reg dest = ra_dest(as, ir, RSET_GPR);
1218 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1881 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1219 emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left); 1882 emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest,
1883 RID_ZERO, left);
1220 } 1884 }
1221} 1885}
1222 1886
1887#if !LJ_SOFTFP
1888#define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D)
1889#elif LJ_64 /* && LJ_SOFTFP */
1890static void asm_abs(ASMState *as, IRIns *ir)
1891{
1892 Reg dest = ra_dest(as, ir, RSET_GPR);
1893 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1894 emit_tsml(as, MIPSI_DEXTM, dest, left, 30, 0);
1895}
1896#endif
1897
1223static void asm_arithov(ASMState *as, IRIns *ir) 1898static void asm_arithov(ASMState *as, IRIns *ir)
1224{ 1899{
1900 /* TODO MIPSR6: bovc/bnvc. Caveat: no delay slot to load RID_TMP. */
1225 Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); 1901 Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
1902 lj_assertA(!irt_is64(ir->t), "bad usage");
1226 if (irref_isk(ir->op2)) { 1903 if (irref_isk(ir->op2)) {
1227 int k = IR(ir->op2)->i; 1904 int k = IR(ir->op2)->i;
1228 if (ir->o == IR_SUBOV) k = -k; 1905 if (ir->o == IR_SUBOV) k = -k;
@@ -1253,16 +1930,29 @@ static void asm_arithov(ASMState *as, IRIns *ir)
1253 emit_move(as, RID_TMP, dest == left ? left : right); 1930 emit_move(as, RID_TMP, dest == left ? left : right);
1254} 1931}
1255 1932
1933#define asm_addov(as, ir) asm_arithov(as, ir)
1934#define asm_subov(as, ir) asm_arithov(as, ir)
1935
1256static void asm_mulov(ASMState *as, IRIns *ir) 1936static void asm_mulov(ASMState *as, IRIns *ir)
1257{ 1937{
1258#if LJ_DUALNUM 1938 Reg dest = ra_dest(as, ir, RSET_GPR);
1259#error "NYI: MULOV" 1939 Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR);
1940 right = (left >> 8); left &= 255;
1941 tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
1942 right), dest));
1943 asm_guard(as, MIPSI_BNE, RID_TMP, tmp);
1944 emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31);
1945#if !LJ_TARGET_MIPSR6
1946 emit_dst(as, MIPSI_MFHI, tmp, 0, 0);
1947 emit_dst(as, MIPSI_MFLO, dest, 0, 0);
1948 emit_dst(as, MIPSI_MULT, 0, left, right);
1260#else 1949#else
1261 UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused in single-number mode. */ 1950 emit_dst(as, MIPSI_MUL, dest, left, right);
1951 emit_dst(as, MIPSI_MUH, tmp, left, right);
1262#endif 1952#endif
1263} 1953}
1264 1954
1265#if LJ_HASFFI 1955#if LJ_32 && LJ_HASFFI
1266static void asm_add64(ASMState *as, IRIns *ir) 1956static void asm_add64(ASMState *as, IRIns *ir)
1267{ 1957{
1268 Reg dest = ra_dest(as, ir, RSET_GPR); 1958 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1346,7 +2036,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
1346} 2036}
1347#endif 2037#endif
1348 2038
1349static void asm_bitnot(ASMState *as, IRIns *ir) 2039static void asm_bnot(ASMState *as, IRIns *ir)
1350{ 2040{
1351 Reg left, right, dest = ra_dest(as, ir, RSET_GPR); 2041 Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
1352 IRIns *irl = IR(ir->op1); 2042 IRIns *irl = IR(ir->op1);
@@ -1360,11 +2050,12 @@ static void asm_bitnot(ASMState *as, IRIns *ir)
1360 emit_dst(as, MIPSI_NOR, dest, left, right); 2050 emit_dst(as, MIPSI_NOR, dest, left, right);
1361} 2051}
1362 2052
1363static void asm_bitswap(ASMState *as, IRIns *ir) 2053static void asm_bswap(ASMState *as, IRIns *ir)
1364{ 2054{
1365 Reg dest = ra_dest(as, ir, RSET_GPR); 2055 Reg dest = ra_dest(as, ir, RSET_GPR);
1366 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 2056 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1367 if ((as->flags & JIT_F_MIPS32R2)) { 2057#if LJ_32
2058 if ((as->flags & JIT_F_MIPSXXR2)) {
1368 emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); 2059 emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16);
1369 emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); 2060 emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left);
1370 } else { 2061 } else {
@@ -1379,6 +2070,15 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1379 emit_dta(as, MIPSI_SRL, tmp, left, 24); 2070 emit_dta(as, MIPSI_SRL, tmp, left, 24);
1380 emit_dta(as, MIPSI_SLL, RID_TMP, left, 24); 2071 emit_dta(as, MIPSI_SLL, RID_TMP, left, 24);
1381 } 2072 }
2073#else
2074 if (irt_is64(ir->t)) {
2075 emit_dst(as, MIPSI_DSHD, dest, 0, RID_TMP);
2076 emit_dst(as, MIPSI_DSBH, RID_TMP, 0, left);
2077 } else {
2078 emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16);
2079 emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left);
2080 }
2081#endif
1382} 2082}
1383 2083
1384static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) 2084static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
@@ -1386,7 +2086,7 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1386 Reg dest = ra_dest(as, ir, RSET_GPR); 2086 Reg dest = ra_dest(as, ir, RSET_GPR);
1387 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 2087 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1388 if (irref_isk(ir->op2)) { 2088 if (irref_isk(ir->op2)) {
1389 int32_t k = IR(ir->op2)->i; 2089 intptr_t k = get_kval(as, ir->op2);
1390 if (checku16(k)) { 2090 if (checku16(k)) {
1391 emit_tsi(as, mik, dest, left, k); 2091 emit_tsi(as, mik, dest, left, k);
1392 return; 2092 return;
@@ -1396,22 +2096,34 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1396 emit_dst(as, mi, dest, left, right); 2096 emit_dst(as, mi, dest, left, right);
1397} 2097}
1398 2098
2099#define asm_band(as, ir) asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI)
2100#define asm_bor(as, ir) asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI)
2101#define asm_bxor(as, ir) asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI)
2102
1399static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) 2103static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1400{ 2104{
1401 Reg dest = ra_dest(as, ir, RSET_GPR); 2105 Reg dest = ra_dest(as, ir, RSET_GPR);
1402 if (irref_isk(ir->op2)) { /* Constant shifts. */ 2106 if (irref_isk(ir->op2)) { /* Constant shifts. */
1403 uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31); 2107 uint32_t shift = (uint32_t)IR(ir->op2)->i;
1404 emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), shift); 2108 if (LJ_64 && irt_is64(ir->t)) mik |= (shift & 32) ? MIPSI_D32 : MIPSI_D;
2109 emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR),
2110 (shift & 31));
1405 } else { 2111 } else {
1406 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 2112 Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1407 right = (left >> 8); left &= 255; 2113 right = (left >> 8); left &= 255;
2114 if (LJ_64 && irt_is64(ir->t)) mi |= MIPSI_DV;
1408 emit_dst(as, mi, dest, right, left); /* Shift amount is in rs. */ 2115 emit_dst(as, mi, dest, right, left); /* Shift amount is in rs. */
1409 } 2116 }
1410} 2117}
1411 2118
1412static void asm_bitror(ASMState *as, IRIns *ir) 2119#define asm_bshl(as, ir) asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL)
2120#define asm_bshr(as, ir) asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL)
2121#define asm_bsar(as, ir) asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA)
2122#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
2123
2124static void asm_bror(ASMState *as, IRIns *ir)
1413{ 2125{
1414 if ((as->flags & JIT_F_MIPS32R2)) { 2126 if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) {
1415 asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); 2127 asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
1416 } else { 2128 } else {
1417 Reg dest = ra_dest(as, ir, RSET_GPR); 2129 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1430,55 +2142,182 @@ static void asm_bitror(ASMState *as, IRIns *ir)
1430 } 2142 }
1431} 2143}
1432 2144
2145#if LJ_SOFTFP
2146static void asm_sfpmin_max(ASMState *as, IRIns *ir)
2147{
2148 CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax];
2149#if LJ_64
2150 IRRef args[2];
2151 args[0] = ir->op1;
2152 args[1] = ir->op2;
2153#else
2154 IRRef args[4];
2155 args[0^LJ_BE] = ir->op1;
2156 args[1^LJ_BE] = (ir+1)->op1;
2157 args[2^LJ_BE] = ir->op2;
2158 args[3^LJ_BE] = (ir+1)->op2;
2159#endif
2160 asm_setupresult(as, ir, &ci);
2161 emit_call(as, (void *)ci.func, 0);
2162 ci.func = NULL;
2163 asm_gencall(as, &ci, args);
2164}
2165#endif
2166
1433static void asm_min_max(ASMState *as, IRIns *ir, int ismax) 2167static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1434{ 2168{
1435 if (irt_isnum(ir->t)) { 2169 if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
2170#if LJ_SOFTFP
2171 asm_sfpmin_max(as, ir);
2172#else
1436 Reg dest = ra_dest(as, ir, RSET_FPR); 2173 Reg dest = ra_dest(as, ir, RSET_FPR);
1437 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 2174 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1438 right = (left >> 8); left &= 255; 2175 right = (left >> 8); left &= 255;
2176#if !LJ_TARGET_MIPSR6
1439 if (dest == left) { 2177 if (dest == left) {
1440 emit_fg(as, MIPSI_MOVT_D, dest, right); 2178 emit_fg(as, MIPSI_MOVF_D, dest, right);
1441 } else { 2179 } else {
1442 emit_fg(as, MIPSI_MOVF_D, dest, left); 2180 emit_fg(as, MIPSI_MOVT_D, dest, left);
1443 if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); 2181 if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right);
1444 } 2182 }
1445 emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left); 2183 emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? right : left, ismax ? left : right);
2184#else
2185 emit_fgh(as, ismax ? MIPSI_MAX_D : MIPSI_MIN_D, dest, left, right);
2186#endif
2187#endif
1446 } else { 2188 } else {
1447 Reg dest = ra_dest(as, ir, RSET_GPR); 2189 Reg dest = ra_dest(as, ir, RSET_GPR);
1448 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 2190 Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1449 right = (left >> 8); left &= 255; 2191 right = (left >> 8); left &= 255;
1450 if (dest == left) { 2192 if (left == right) {
1451 emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP); 2193 if (dest != left) emit_move(as, dest, left);
1452 } else { 2194 } else {
1453 emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP); 2195#if !LJ_TARGET_MIPSR6
1454 if (dest != right) emit_move(as, dest, right); 2196 if (dest == left) {
2197 emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP);
2198 } else {
2199 emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP);
2200 if (dest != right) emit_move(as, dest, right);
2201 }
2202#else
2203 emit_dst(as, MIPSI_OR, dest, dest, RID_TMP);
2204 if (dest != right) {
2205 emit_dst(as, MIPSI_SELNEZ, RID_TMP, right, RID_TMP);
2206 emit_dst(as, MIPSI_SELEQZ, dest, left, RID_TMP);
2207 } else {
2208 emit_dst(as, MIPSI_SELEQZ, RID_TMP, left, RID_TMP);
2209 emit_dst(as, MIPSI_SELNEZ, dest, right, RID_TMP);
2210 }
2211#endif
2212 emit_dst(as, MIPSI_SLT, RID_TMP,
2213 ismax ? left : right, ismax ? right : left);
1455 } 2214 }
1456 emit_dst(as, MIPSI_SLT, RID_TMP,
1457 ismax ? left : right, ismax ? right : left);
1458 } 2215 }
1459} 2216}
1460 2217
2218#define asm_min(as, ir) asm_min_max(as, ir, 0)
2219#define asm_max(as, ir) asm_min_max(as, ir, 1)
2220
1461/* -- Comparisons --------------------------------------------------------- */ 2221/* -- Comparisons --------------------------------------------------------- */
1462 2222
2223#if LJ_SOFTFP
2224/* SFP comparisons. */
2225static void asm_sfpcomp(ASMState *as, IRIns *ir)
2226{
2227 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
2228 RegSet drop = RSET_SCRATCH;
2229 Reg r;
2230#if LJ_64
2231 IRRef args[2];
2232 args[0] = ir->op1;
2233 args[1] = ir->op2;
2234#else
2235 IRRef args[4];
2236 args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1;
2237 args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2;
2238#endif
2239
2240 for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+(LJ_64?1:3); r++) {
2241 if (!rset_test(as->freeset, r) &&
2242 regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
2243 rset_clear(drop, r);
2244 }
2245 ra_evictset(as, drop);
2246
2247 asm_setupresult(as, ir, ci);
2248
2249 switch ((IROp)ir->o) {
2250 case IR_LT:
2251 asm_guard(as, MIPSI_BGEZ, RID_RET, 0);
2252 break;
2253 case IR_ULT:
2254 asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP);
2255 emit_loadi(as, RID_TMP, 1);
2256 asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO);
2257 break;
2258 case IR_GE:
2259 asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP);
2260 emit_loadi(as, RID_TMP, 2);
2261 asm_guard(as, MIPSI_BLTZ, RID_RET, 0);
2262 break;
2263 case IR_LE:
2264 asm_guard(as, MIPSI_BGTZ, RID_RET, 0);
2265 break;
2266 case IR_GT:
2267 asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP);
2268 emit_loadi(as, RID_TMP, 2);
2269 asm_guard(as, MIPSI_BLEZ, RID_RET, 0);
2270 break;
2271 case IR_UGE:
2272 asm_guard(as, MIPSI_BLTZ, RID_RET, 0);
2273 break;
2274 case IR_ULE:
2275 asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP);
2276 emit_loadi(as, RID_TMP, 1);
2277 break;
2278 case IR_UGT: case IR_ABC:
2279 asm_guard(as, MIPSI_BLEZ, RID_RET, 0);
2280 break;
2281 case IR_EQ: case IR_NE:
2282 asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_RET, RID_ZERO);
2283 default:
2284 break;
2285 }
2286 asm_gencall(as, ci, args);
2287}
2288#endif
2289
1463static void asm_comp(ASMState *as, IRIns *ir) 2290static void asm_comp(ASMState *as, IRIns *ir)
1464{ 2291{
1465 /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ 2292 /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */
1466 IROp op = ir->o; 2293 IROp op = ir->o;
1467 if (irt_isnum(ir->t)) { 2294 if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
2295#if LJ_SOFTFP
2296 asm_sfpcomp(as, ir);
2297#else
2298#if !LJ_TARGET_MIPSR6
1468 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 2299 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1469 right = (left >> 8); left &= 255; 2300 right = (left >> 8); left &= 255;
1470 asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); 2301 asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
1471 emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right); 2302 emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right);
2303#else
2304 Reg tmp, right, left = ra_alloc2(as, ir, RSET_FPR);
2305 right = (left >> 8); left &= 255;
2306 tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right));
2307 asm_guard(as, (op&1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31));
2308 emit_fgh(as, MIPSI_CMP_LT_D + ((op&3) ^ ((op>>2)&1)), tmp, left, right);
2309#endif
2310#endif
1472 } else { 2311 } else {
1473 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); 2312 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
1474 if (op == IR_ABC) op = IR_UGT; 2313 if (op == IR_ABC) op = IR_UGT;
1475 if ((op&4) == 0 && irref_isk(ir->op2) && IR(ir->op2)->i == 0) { 2314 if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) {
1476 MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) : 2315 MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) :
1477 ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ); 2316 ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ);
1478 asm_guard(as, mi, left, 0); 2317 asm_guard(as, mi, left, 0);
1479 } else { 2318 } else {
1480 if (irref_isk(ir->op2)) { 2319 if (irref_isk(ir->op2)) {
1481 int32_t k = IR(ir->op2)->i; 2320 intptr_t k = get_kval(as, ir->op2);
1482 if ((op&2)) k++; 2321 if ((op&2)) k++;
1483 if (checki16(k)) { 2322 if (checki16(k)) {
1484 asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); 2323 asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
@@ -1495,19 +2334,28 @@ static void asm_comp(ASMState *as, IRIns *ir)
1495 } 2334 }
1496} 2335}
1497 2336
1498static void asm_compeq(ASMState *as, IRIns *ir) 2337static void asm_equal(ASMState *as, IRIns *ir)
1499{ 2338{
1500 Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR); 2339 Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ?
2340 RSET_FPR : RSET_GPR);
1501 right = (left >> 8); left &= 255; 2341 right = (left >> 8); left &= 255;
1502 if (irt_isnum(ir->t)) { 2342 if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
2343#if LJ_SOFTFP
2344 asm_sfpcomp(as, ir);
2345#elif !LJ_TARGET_MIPSR6
1503 asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); 2346 asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
1504 emit_fgh(as, MIPSI_C_EQ_D, 0, left, right); 2347 emit_fgh(as, MIPSI_C_EQ_D, 0, left, right);
2348#else
2349 Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right));
2350 asm_guard(as, (ir->o & 1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31));
2351 emit_fgh(as, MIPSI_CMP_EQ_D, tmp, left, right);
2352#endif
1505 } else { 2353 } else {
1506 asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right); 2354 asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right);
1507 } 2355 }
1508} 2356}
1509 2357
1510#if LJ_HASFFI 2358#if LJ_32 && LJ_HASFFI
1511/* 64 bit integer comparisons. */ 2359/* 64 bit integer comparisons. */
1512static void asm_comp64(ASMState *as, IRIns *ir) 2360static void asm_comp64(ASMState *as, IRIns *ir)
1513{ 2361{
@@ -1549,51 +2397,101 @@ static void asm_comp64eq(ASMState *as, IRIns *ir)
1549/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ 2397/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
1550static void asm_hiop(ASMState *as, IRIns *ir) 2398static void asm_hiop(ASMState *as, IRIns *ir)
1551{ 2399{
1552#if LJ_HASFFI 2400#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP)
1553 /* HIOP is marked as a store because it needs its own DCE logic. */ 2401 /* HIOP is marked as a store because it needs its own DCE logic. */
1554 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 2402 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
1555 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 2403 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
1556 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ 2404 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
1557 as->curins--; /* Always skip the CONV. */ 2405 as->curins--; /* Always skip the CONV. */
2406#if LJ_HASFFI && !LJ_SOFTFP
1558 if (usehi || uselo) 2407 if (usehi || uselo)
1559 asm_conv64(as, ir); 2408 asm_conv64(as, ir);
1560 return; 2409 return;
2410#endif
1561 } else if ((ir-1)->o < IR_EQ) { /* 64 bit integer comparisons. ORDER IR. */ 2411 } else if ((ir-1)->o < IR_EQ) { /* 64 bit integer comparisons. ORDER IR. */
1562 as->curins--; /* Always skip the loword comparison. */ 2412 as->curins--; /* Always skip the loword comparison. */
2413#if LJ_SOFTFP
2414 if (!irt_isint(ir->t)) {
2415 asm_sfpcomp(as, ir-1);
2416 return;
2417 }
2418#endif
2419#if LJ_HASFFI
1563 asm_comp64(as, ir); 2420 asm_comp64(as, ir);
2421#endif
1564 return; 2422 return;
1565 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ 2423 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
1566 as->curins--; /* Always skip the loword comparison. */ 2424 as->curins--; /* Always skip the loword comparison. */
2425#if LJ_SOFTFP
2426 if (!irt_isint(ir->t)) {
2427 asm_sfpcomp(as, ir-1);
2428 return;
2429 }
2430#endif
2431#if LJ_HASFFI
1567 asm_comp64eq(as, ir); 2432 asm_comp64eq(as, ir);
2433#endif
2434 return;
2435#if LJ_SOFTFP
2436 } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
2437 as->curins--; /* Always skip the loword min/max. */
2438 if (uselo || usehi)
2439 asm_sfpmin_max(as, ir-1);
1568 return; 2440 return;
2441#endif
1569 } else if ((ir-1)->o == IR_XSTORE) { 2442 } else if ((ir-1)->o == IR_XSTORE) {
1570 as->curins--; /* Handle both stores here. */ 2443 as->curins--; /* Handle both stores here. */
1571 if ((ir-1)->r != RID_SINK) { 2444 if ((ir-1)->r != RID_SINK) {
1572 asm_xstore(as, ir, LJ_LE ? 4 : 0); 2445 asm_xstore_(as, ir, LJ_LE ? 4 : 0);
1573 asm_xstore(as, ir-1, LJ_LE ? 0 : 4); 2446 asm_xstore_(as, ir-1, LJ_LE ? 0 : 4);
1574 } 2447 }
1575 return; 2448 return;
1576 } 2449 }
1577 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 2450 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
1578 switch ((ir-1)->o) { 2451 switch ((ir-1)->o) {
2452#if LJ_HASFFI
1579 case IR_ADD: as->curins--; asm_add64(as, ir); break; 2453 case IR_ADD: as->curins--; asm_add64(as, ir); break;
1580 case IR_SUB: as->curins--; asm_sub64(as, ir); break; 2454 case IR_SUB: as->curins--; asm_sub64(as, ir); break;
1581 case IR_NEG: as->curins--; asm_neg64(as, ir); break; 2455 case IR_NEG: as->curins--; asm_neg64(as, ir); break;
2456#endif
2457#if LJ_SOFTFP
2458 case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2459 case IR_STRTO:
2460 if (!uselo)
2461 ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
2462 break;
2463#endif
1582 case IR_CALLN: 2464 case IR_CALLN:
2465 case IR_CALLS:
1583 case IR_CALLXS: 2466 case IR_CALLXS:
1584 if (!uselo) 2467 if (!uselo)
1585 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ 2468 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
1586 break; 2469 break;
2470#if LJ_SOFTFP
2471 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
2472#endif
1587 case IR_CNEWI: 2473 case IR_CNEWI:
1588 /* Nothing to do here. Handled by lo op itself. */ 2474 /* Nothing to do here. Handled by lo op itself. */
1589 break; 2475 break;
1590 default: lua_assert(0); break; 2476 default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
1591 } 2477 }
1592#else 2478#else
1593 UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ 2479 /* Unused on MIPS64 or without SOFTFP or FFI. */
2480 UNUSED(as); UNUSED(ir); lj_assertA(0, "unexpected HIOP");
1594#endif 2481#endif
1595} 2482}
1596 2483
2484/* -- Profiling ----------------------------------------------------------- */
2485
2486static void asm_prof(ASMState *as, IRIns *ir)
2487{
2488 UNUSED(ir);
2489 asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
2490 emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE);
2491 emit_lsglptr(as, MIPSI_LBU, RID_TMP,
2492 (int32_t)offsetof(global_State, hookmask));
2493}
2494
1597/* -- Stack handling ------------------------------------------------------ */ 2495/* -- Stack handling ------------------------------------------------------ */
1598 2496
1599/* Check Lua stack size for overflow. Use exit handler as fallback. */ 2497/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1604,47 +2502,70 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1604 Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; 2502 Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE;
1605 ExitNo oldsnap = as->snapno; 2503 ExitNo oldsnap = as->snapno;
1606 rset_clear(allow, pbase); 2504 rset_clear(allow, pbase);
2505#if LJ_32
1607 tmp = allow ? rset_pickbot(allow) : 2506 tmp = allow ? rset_pickbot(allow) :
1608 (pbase == RID_RETHI ? RID_RETLO : RID_RETHI); 2507 (pbase == RID_RETHI ? RID_RETLO : RID_RETHI);
2508#else
2509 tmp = allow ? rset_pickbot(allow) : RID_RET;
2510#endif
1609 as->snapno = exitno; 2511 as->snapno = exitno;
1610 asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); 2512 asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
1611 as->snapno = oldsnap; 2513 as->snapno = oldsnap;
1612 if (allow == RSET_EMPTY) /* Restore temp. register. */ 2514 if (allow == RSET_EMPTY) /* Restore temp. register. */
1613 emit_tsi(as, MIPSI_LW, tmp, RID_SP, 0); 2515 emit_tsi(as, MIPSI_AL, tmp, RID_SP, 0);
1614 else 2516 else
1615 ra_modified(as, tmp); 2517 ra_modified(as, tmp);
1616 emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); 2518 emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot));
1617 emit_dst(as, MIPSI_SUBU, RID_TMP, tmp, pbase); 2519 emit_dst(as, MIPSI_ASUBU, RID_TMP, tmp, pbase);
1618 emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack)); 2520 emit_tsi(as, MIPSI_AL, tmp, tmp, offsetof(lua_State, maxstack));
1619 if (pbase == RID_TMP) 2521 if (pbase == RID_TMP)
1620 emit_getgl(as, RID_TMP, jit_base); 2522 emit_getgl(as, RID_TMP, jit_base);
1621 emit_getgl(as, tmp, jit_L); 2523 emit_getgl(as, tmp, cur_L);
1622 if (allow == RSET_EMPTY) /* Spill temp. register. */ 2524 if (allow == RSET_EMPTY) /* Spill temp. register. */
1623 emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0); 2525 emit_tsi(as, MIPSI_AS, tmp, RID_SP, 0);
1624} 2526}
1625 2527
1626/* Restore Lua stack from on-trace state. */ 2528/* Restore Lua stack from on-trace state. */
1627static void asm_stack_restore(ASMState *as, SnapShot *snap) 2529static void asm_stack_restore(ASMState *as, SnapShot *snap)
1628{ 2530{
1629 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 2531 SnapEntry *map = &as->T->snapmap[snap->mapofs];
1630 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; 2532#if LJ_32 || defined(LUA_USE_ASSERT)
2533 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
2534#endif
1631 MSize n, nent = snap->nent; 2535 MSize n, nent = snap->nent;
1632 /* Store the value of all modified slots to the Lua stack. */ 2536 /* Store the value of all modified slots to the Lua stack. */
1633 for (n = 0; n < nent; n++) { 2537 for (n = 0; n < nent; n++) {
1634 SnapEntry sn = map[n]; 2538 SnapEntry sn = map[n];
1635 BCReg s = snap_slot(sn); 2539 BCReg s = snap_slot(sn);
1636 int32_t ofs = 8*((int32_t)s-1); 2540 int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
1637 IRRef ref = snap_ref(sn); 2541 IRRef ref = snap_ref(sn);
1638 IRIns *ir = IR(ref); 2542 IRIns *ir = IR(ref);
1639 if ((sn & SNAP_NORESTORE)) 2543 if ((sn & SNAP_NORESTORE))
1640 continue; 2544 continue;
1641 if (irt_isnum(ir->t)) { 2545 if (irt_isnum(ir->t)) {
2546#if LJ_SOFTFP32
2547 Reg tmp;
2548 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
2549 /* LJ_SOFTFP: must be a number constant. */
2550 lj_assertA(irref_isk(ref), "unsplit FP op");
2551 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow);
2552 emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?4:0));
2553 if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
2554 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
2555 emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
2556#elif LJ_SOFTFP /* && LJ_64 */
2557 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
2558 emit_tsi(as, MIPSI_SD, src, RID_BASE, ofs);
2559#else
1642 Reg src = ra_alloc1(as, ref, RSET_FPR); 2560 Reg src = ra_alloc1(as, ref, RSET_FPR);
1643 emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs); 2561 emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs);
2562#endif
1644 } else { 2563 } else {
1645 Reg type; 2564#if LJ_32
1646 RegSet allow = rset_exclude(RSET_GPR, RID_BASE); 2565 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
1647 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); 2566 Reg type;
2567 lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
2568 "restore of IR type %d", irt_type(ir->t));
1648 if (!irt_ispri(ir->t)) { 2569 if (!irt_ispri(ir->t)) {
1649 Reg src = ra_alloc1(as, ref, allow); 2570 Reg src = ra_alloc1(as, ref, allow);
1650 rset_clear(allow, src); 2571 rset_clear(allow, src);
@@ -1653,14 +2574,21 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
1653 if ((sn & (SNAP_CONT|SNAP_FRAME))) { 2574 if ((sn & (SNAP_CONT|SNAP_FRAME))) {
1654 if (s == 0) continue; /* Do not overwrite link to previous frame. */ 2575 if (s == 0) continue; /* Do not overwrite link to previous frame. */
1655 type = ra_allock(as, (int32_t)(*flinks--), allow); 2576 type = ra_allock(as, (int32_t)(*flinks--), allow);
2577#if LJ_SOFTFP
2578 } else if ((sn & SNAP_SOFTFPNUM)) {
2579 type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
2580#endif
1656 } else { 2581 } else {
1657 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 2582 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
1658 } 2583 }
1659 emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4)); 2584 emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4));
2585#else
2586 asm_tvstore64(as, RID_BASE, ofs, ref);
2587#endif
1660 } 2588 }
1661 checkmclim(as); 2589 checkmclim(as);
1662 } 2590 }
1663 lua_assert(map + nent == flinks); 2591 lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
1664} 2592}
1665 2593
1666/* -- GC handling --------------------------------------------------------- */ 2594/* -- GC handling --------------------------------------------------------- */
@@ -1684,7 +2612,7 @@ static void asm_gc_check(ASMState *as)
1684 args[1] = ASMREF_TMP2; /* MSize steps */ 2612 args[1] = ASMREF_TMP2; /* MSize steps */
1685 asm_gencall(as, ci, args); 2613 asm_gencall(as, ci, args);
1686 l_end[-3] = MIPS_NOPATCH_GC_CHECK; /* Replace the nop after the call. */ 2614 l_end[-3] = MIPS_NOPATCH_GC_CHECK; /* Replace the nop after the call. */
1687 emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); 2615 emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
1688 tmp = ra_releasetmp(as, ASMREF_TMP2); 2616 tmp = ra_releasetmp(as, ASMREF_TMP2);
1689 emit_loadi(as, tmp, as->gcsteps); 2617 emit_loadi(as, tmp, as->gcsteps);
1690 /* Jump around GC step if GC total < GC threshold. */ 2618 /* Jump around GC step if GC total < GC threshold. */
@@ -1759,7 +2687,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
1759 MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; 2687 MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp;
1760 int32_t spadj = as->T->spadjust; 2688 int32_t spadj = as->T->spadjust;
1761 MCode *p = as->mctop-1; 2689 MCode *p = as->mctop-1;
1762 *p = spadj ? (MIPSI_ADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; 2690 *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP;
1763 p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); 2691 p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
1764} 2692}
1765 2693
@@ -1770,139 +2698,26 @@ static void asm_tail_prep(ASMState *as)
1770 as->invmcp = as->loopref ? as->mcp : NULL; 2698 as->invmcp = as->loopref ? as->mcp : NULL;
1771} 2699}
1772 2700
1773/* -- Instruction dispatch ------------------------------------------------ */
1774
1775/* Assemble a single instruction. */
1776static void asm_ir(ASMState *as, IRIns *ir)
1777{
1778 switch ((IROp)ir->o) {
1779 /* Miscellaneous ops. */
1780 case IR_LOOP: asm_loop(as); break;
1781 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1782 case IR_USE:
1783 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1784 case IR_PHI: asm_phi(as, ir); break;
1785 case IR_HIOP: asm_hiop(as, ir); break;
1786 case IR_GCSTEP: asm_gcstep(as, ir); break;
1787
1788 /* Guarded assertions. */
1789 case IR_EQ: case IR_NE: asm_compeq(as, ir); break;
1790 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1791 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1792 case IR_ABC:
1793 asm_comp(as, ir);
1794 break;
1795
1796 case IR_RETF: asm_retf(as, ir); break;
1797
1798 /* Bit ops. */
1799 case IR_BNOT: asm_bitnot(as, ir); break;
1800 case IR_BSWAP: asm_bitswap(as, ir); break;
1801
1802 case IR_BAND: asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI); break;
1803 case IR_BOR: asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI); break;
1804 case IR_BXOR: asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI); break;
1805
1806 case IR_BSHL: asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL); break;
1807 case IR_BSHR: asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL); break;
1808 case IR_BSAR: asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA); break;
1809 case IR_BROL: lua_assert(0); break;
1810 case IR_BROR: asm_bitror(as, ir); break;
1811
1812 /* Arithmetic ops. */
1813 case IR_ADD: asm_add(as, ir); break;
1814 case IR_SUB: asm_sub(as, ir); break;
1815 case IR_MUL: asm_mul(as, ir); break;
1816 case IR_DIV: asm_fparith(as, ir, MIPSI_DIV_D); break;
1817 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
1818 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
1819 case IR_NEG: asm_neg(as, ir); break;
1820
1821 case IR_ABS: asm_fpunary(as, ir, MIPSI_ABS_D); break;
1822 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
1823 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
1824 case IR_MIN: asm_min_max(as, ir, 0); break;
1825 case IR_MAX: asm_min_max(as, ir, 1); break;
1826 case IR_FPMATH:
1827 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1828 break;
1829 if (ir->op2 <= IRFPM_TRUNC)
1830 asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
1831 else if (ir->op2 == IRFPM_SQRT)
1832 asm_fpunary(as, ir, MIPSI_SQRT_D);
1833 else
1834 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1835 break;
1836
1837 /* Overflow-checking arithmetic ops. */
1838 case IR_ADDOV: asm_arithov(as, ir); break;
1839 case IR_SUBOV: asm_arithov(as, ir); break;
1840 case IR_MULOV: asm_mulov(as, ir); break;
1841
1842 /* Memory references. */
1843 case IR_AREF: asm_aref(as, ir); break;
1844 case IR_HREF: asm_href(as, ir); break;
1845 case IR_HREFK: asm_hrefk(as, ir); break;
1846 case IR_NEWREF: asm_newref(as, ir); break;
1847 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1848 case IR_FREF: asm_fref(as, ir); break;
1849 case IR_STRREF: asm_strref(as, ir); break;
1850
1851 /* Loads and stores. */
1852 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1853 asm_ahuvload(as, ir);
1854 break;
1855 case IR_FLOAD: asm_fload(as, ir); break;
1856 case IR_XLOAD: asm_xload(as, ir); break;
1857 case IR_SLOAD: asm_sload(as, ir); break;
1858
1859 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1860 case IR_FSTORE: asm_fstore(as, ir); break;
1861 case IR_XSTORE: asm_xstore(as, ir, 0); break;
1862
1863 /* Allocations. */
1864 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1865 case IR_TNEW: asm_tnew(as, ir); break;
1866 case IR_TDUP: asm_tdup(as, ir); break;
1867 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1868
1869 /* Write barriers. */
1870 case IR_TBAR: asm_tbar(as, ir); break;
1871 case IR_OBAR: asm_obar(as, ir); break;
1872
1873 /* Type conversions. */
1874 case IR_CONV: asm_conv(as, ir); break;
1875 case IR_TOBIT: asm_tobit(as, ir); break;
1876 case IR_TOSTR: asm_tostr(as, ir); break;
1877 case IR_STRTO: asm_strto(as, ir); break;
1878
1879 /* Calls. */
1880 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1881 case IR_CALLXS: asm_callx(as, ir); break;
1882 case IR_CARG: break;
1883
1884 default:
1885 setintV(&as->J->errinfo, ir->o);
1886 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1887 break;
1888 }
1889}
1890
1891/* -- Trace setup --------------------------------------------------------- */ 2701/* -- Trace setup --------------------------------------------------------- */
1892 2702
1893/* Ensure there are enough stack slots for call arguments. */ 2703/* Ensure there are enough stack slots for call arguments. */
1894static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2704static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
1895{ 2705{
1896 IRRef args[CCI_NARGS_MAX*2]; 2706 IRRef args[CCI_NARGS_MAX*2];
1897 uint32_t i, nargs = (int)CCI_NARGS(ci); 2707 uint32_t i, nargs = CCI_XNARGS(ci);
2708#if LJ_32
1898 int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; 2709 int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
2710#else
2711 int nslots = 0, ngpr = REGARG_NUMGPR;
2712#endif
1899 asm_collectargs(as, ir, ci, args); 2713 asm_collectargs(as, ir, ci, args);
1900 for (i = 0; i < nargs; i++) { 2714 for (i = 0; i < nargs; i++) {
1901 if (args[i] && irt_isfp(IR(args[i])->t) && 2715#if LJ_32
2716 if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t) &&
1902 nfpr > 0 && !(ci->flags & CCI_VARARG)) { 2717 nfpr > 0 && !(ci->flags & CCI_VARARG)) {
1903 nfpr--; 2718 nfpr--;
1904 ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1; 2719 ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1;
1905 } else if (args[i] && irt_isnum(IR(args[i])->t)) { 2720 } else if (!LJ_SOFTFP && args[i] && irt_isnum(IR(args[i])->t)) {
1906 nfpr = 0; 2721 nfpr = 0;
1907 ngpr = ngpr & ~1; 2722 ngpr = ngpr & ~1;
1908 if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1; 2723 if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1;
@@ -1910,6 +2725,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
1910 nfpr = 0; 2725 nfpr = 0;
1911 if (ngpr > 0) ngpr--; else nslots++; 2726 if (ngpr > 0) ngpr--; else nslots++;
1912 } 2727 }
2728#else
2729 if (ngpr > 0) ngpr--; else nslots += 2;
2730#endif
1913 } 2731 }
1914 if (nslots > as->evenspill) /* Leave room for args in stack slots. */ 2732 if (nslots > as->evenspill) /* Leave room for args in stack slots. */
1915 as->evenspill = nslots; 2733 as->evenspill = nslots;
@@ -1940,35 +2758,35 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
1940 if (((p[-1] ^ (px-p)) & 0xffffu) == 0 && 2758 if (((p[-1] ^ (px-p)) & 0xffffu) == 0 &&
1941 ((p[-1] & 0xf0000000u) == MIPSI_BEQ || 2759 ((p[-1] & 0xf0000000u) == MIPSI_BEQ ||
1942 (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ || 2760 (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ ||
1943 (p[-1] & 0xffe00000u) == MIPSI_BC1F) && 2761#if !LJ_TARGET_MIPSR6
1944 p[-2] != MIPS_NOPATCH_GC_CHECK) { 2762 (p[-1] & 0xffe00000u) == MIPSI_BC1F
2763#else
2764 (p[-1] & 0xff600000u) == MIPSI_BC1EQZ
2765#endif
2766 ) && p[-2] != MIPS_NOPATCH_GC_CHECK) {
1945 ptrdiff_t delta = target - p; 2767 ptrdiff_t delta = target - p;
1946 if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */ 2768 if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */
1947 patchbranch: 2769 patchbranch:
1948 p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu); 2770 p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu);
1949 *p = MIPSI_NOP; /* Replace the load of the exit number. */ 2771 *p = MIPSI_NOP; /* Replace the load of the exit number. */
1950 cstop = p; 2772 cstop = p+1;
1951 if (!cstart) cstart = p-1; 2773 if (!cstart) cstart = p-1;
1952 } else { /* Branch out of range. Use spare jump slot in mcarea. */ 2774 } else { /* Branch out of range. Use spare jump slot in mcarea. */
1953 int i; 2775 MCode *mcjump = asm_sparejump_use(mcarea, tjump);
1954 for (i = (int)(sizeof(MCLink)/sizeof(MCode)); 2776 if (mcjump) {
1955 i < (int)(sizeof(MCLink)/sizeof(MCode)+MIPS_SPAREJUMP*2); 2777 lj_mcode_sync(mcjump, mcjump+1);
1956 i += 2) { 2778 delta = mcjump - p;
1957 if (mcarea[i] == tjump) { 2779 if (((delta + 0x8000) >> 16) == 0) {
1958 delta = mcarea+i - p;
1959 goto patchbranch;
1960 } else if (mcarea[i] == MIPSI_NOP) {
1961 mcarea[i] = tjump;
1962 cstart = mcarea+i;
1963 delta = mcarea+i - p;
1964 goto patchbranch; 2780 goto patchbranch;
2781 } else {
2782 lj_assertJ(0, "spare jump out of range: -Osizemcode too big");
1965 } 2783 }
1966 } 2784 }
1967 /* Ignore jump slot overflow. Child trace is simply not attached. */ 2785 /* Ignore jump slot overflow. Child trace is simply not attached. */
1968 } 2786 }
1969 } else if (p+1 == pe) { 2787 } else if (p+1 == pe) {
1970 /* Patch NOP after code for inverted loop branch. Use of J is ok. */ 2788 /* Patch NOP after code for inverted loop branch. Use of J is ok. */
1971 lua_assert(p[1] == MIPSI_NOP); 2789 lj_assertJ(p[1] == MIPSI_NOP, "expected NOP");
1972 p[1] = tjump; 2790 p[1] = tjump;
1973 *p = MIPSI_NOP; /* Replace the load of the exit number. */ 2791 *p = MIPSI_NOP; /* Replace the load of the exit number. */
1974 cstop = p+2; 2792 cstop = p+2;
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index 8f91d7ce..c27ee71c 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -156,6 +156,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
156 return ra_allock(as, ofs-(int16_t)ofs, allow); 156 return ra_allock(as, ofs-(int16_t)ofs, allow);
157 } 157 }
158 } 158 }
159 } else if (ir->o == IR_TMPREF) {
160 *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768);
161 return RID_JGL;
159 } 162 }
160 } 163 }
161 *ofsp = 0; 164 *ofsp = 0;
@@ -181,7 +184,7 @@ static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
181 return; 184 return;
182 } 185 }
183 } else if (ir->o == IR_STRREF) { 186 } else if (ir->o == IR_STRREF) {
184 lua_assert(ofs == 0); 187 lj_assertA(ofs == 0, "bad usage");
185 ofs = (int32_t)sizeof(GCstr); 188 ofs = (int32_t)sizeof(GCstr);
186 if (irref_isk(ir->op2)) { 189 if (irref_isk(ir->op2)) {
187 ofs += IR(ir->op2)->i; 190 ofs += IR(ir->op2)->i;
@@ -226,6 +229,7 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
226 emit_tab(as, pi, rt, left, right); 229 emit_tab(as, pi, rt, left, right);
227} 230}
228 231
232#if !LJ_SOFTFP
229/* Fuse to multiply-add/sub instruction. */ 233/* Fuse to multiply-add/sub instruction. */
230static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) 234static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
231{ 235{
@@ -245,24 +249,30 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
245 } 249 }
246 return 0; 250 return 0;
247} 251}
252#endif
248 253
249/* -- Calls --------------------------------------------------------------- */ 254/* -- Calls --------------------------------------------------------------- */
250 255
251/* Generate a call to a C function. */ 256/* Generate a call to a C function. */
252static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 257static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
253{ 258{
254 uint32_t n, nargs = CCI_NARGS(ci); 259 uint32_t n, nargs = CCI_XNARGS(ci);
255 int32_t ofs = 8; 260 int32_t ofs = 8;
256 Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; 261 Reg gpr = REGARG_FIRSTGPR;
262#if !LJ_SOFTFP
263 Reg fpr = REGARG_FIRSTFPR;
264#endif
257 if ((void *)ci->func) 265 if ((void *)ci->func)
258 emit_call(as, (void *)ci->func); 266 emit_call(as, (void *)ci->func);
259 for (n = 0; n < nargs; n++) { /* Setup args. */ 267 for (n = 0; n < nargs; n++) { /* Setup args. */
260 IRRef ref = args[n]; 268 IRRef ref = args[n];
261 if (ref) { 269 if (ref) {
262 IRIns *ir = IR(ref); 270 IRIns *ir = IR(ref);
271#if !LJ_SOFTFP
263 if (irt_isfp(ir->t)) { 272 if (irt_isfp(ir->t)) {
264 if (fpr <= REGARG_LASTFPR) { 273 if (fpr <= REGARG_LASTFPR) {
265 lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ 274 lj_assertA(rset_test(as->freeset, fpr),
275 "reg %d not free", fpr); /* Already evicted. */
266 ra_leftov(as, fpr, ref); 276 ra_leftov(as, fpr, ref);
267 fpr++; 277 fpr++;
268 } else { 278 } else {
@@ -271,9 +281,12 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
271 emit_spstore(as, ir, r, ofs); 281 emit_spstore(as, ir, r, ofs);
272 ofs += irt_isnum(ir->t) ? 8 : 4; 282 ofs += irt_isnum(ir->t) ? 8 : 4;
273 } 283 }
274 } else { 284 } else
285#endif
286 {
275 if (gpr <= REGARG_LASTGPR) { 287 if (gpr <= REGARG_LASTGPR) {
276 lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ 288 lj_assertA(rset_test(as->freeset, gpr),
289 "reg %d not free", gpr); /* Already evicted. */
277 ra_leftov(as, gpr, ref); 290 ra_leftov(as, gpr, ref);
278 gpr++; 291 gpr++;
279 } else { 292 } else {
@@ -290,8 +303,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
290 } 303 }
291 checkmclim(as); 304 checkmclim(as);
292 } 305 }
306#if !LJ_SOFTFP
293 if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ 307 if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */
294 emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); 308 emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
309#endif
295} 310}
296 311
297/* Setup result reg/sp for call. Evict scratch regs. */ 312/* Setup result reg/sp for call. Evict scratch regs. */
@@ -299,16 +314,18 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
299{ 314{
300 RegSet drop = RSET_SCRATCH; 315 RegSet drop = RSET_SCRATCH;
301 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); 316 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
317#if !LJ_SOFTFP
302 if ((ci->flags & CCI_NOFPRCLOBBER)) 318 if ((ci->flags & CCI_NOFPRCLOBBER))
303 drop &= ~RSET_FPR; 319 drop &= ~RSET_FPR;
320#endif
304 if (ra_hasreg(ir->r)) 321 if (ra_hasreg(ir->r))
305 rset_clear(drop, ir->r); /* Dest reg handled below. */ 322 rset_clear(drop, ir->r); /* Dest reg handled below. */
306 if (hiop && ra_hasreg((ir+1)->r)) 323 if (hiop && ra_hasreg((ir+1)->r))
307 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ 324 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
308 ra_evictset(as, drop); /* Evictions must be performed first. */ 325 ra_evictset(as, drop); /* Evictions must be performed first. */
309 if (ra_used(ir)) { 326 if (ra_used(ir)) {
310 lua_assert(!irt_ispri(ir->t)); 327 lj_assertA(!irt_ispri(ir->t), "PRI dest");
311 if (irt_isfp(ir->t)) { 328 if (!LJ_SOFTFP && irt_isfp(ir->t)) {
312 if ((ci->flags & CCI_CASTU64)) { 329 if ((ci->flags & CCI_CASTU64)) {
313 /* Use spill slot or temp slots. */ 330 /* Use spill slot or temp slots. */
314 int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; 331 int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
@@ -323,23 +340,16 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
323 } else { 340 } else {
324 ra_destreg(as, ir, RID_FPRET); 341 ra_destreg(as, ir, RID_FPRET);
325 } 342 }
343#if LJ_32
326 } else if (hiop) { 344 } else if (hiop) {
327 ra_destpair(as, ir); 345 ra_destpair(as, ir);
346#endif
328 } else { 347 } else {
329 ra_destreg(as, ir, RID_RET); 348 ra_destreg(as, ir, RID_RET);
330 } 349 }
331 } 350 }
332} 351}
333 352
334static void asm_call(ASMState *as, IRIns *ir)
335{
336 IRRef args[CCI_NARGS_MAX];
337 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
338 asm_collectargs(as, ir, ci, args);
339 asm_setupresult(as, ir, ci);
340 asm_gencall(as, ci, args);
341}
342
343static void asm_callx(ASMState *as, IRIns *ir) 353static void asm_callx(ASMState *as, IRIns *ir)
344{ 354{
345 IRRef args[CCI_NARGS_MAX*2]; 355 IRRef args[CCI_NARGS_MAX*2];
@@ -352,7 +362,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
352 func = ir->op2; irf = IR(func); 362 func = ir->op2; irf = IR(func);
353 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } 363 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
354 if (irref_isk(func)) { /* Call to constant address. */ 364 if (irref_isk(func)) { /* Call to constant address. */
355 ci.func = (ASMFunction)(void *)(irf->i); 365 ci.func = (ASMFunction)(void *)(intptr_t)(irf->i);
356 } else { /* Need a non-argument register for indirect calls. */ 366 } else { /* Need a non-argument register for indirect calls. */
357 RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); 367 RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
358 Reg freg = ra_alloc1(as, func, allow); 368 Reg freg = ra_alloc1(as, func, allow);
@@ -363,16 +373,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
363 asm_gencall(as, &ci, args); 373 asm_gencall(as, &ci, args);
364} 374}
365 375
366static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
367{
368 const CCallInfo *ci = &lj_ir_callinfo[id];
369 IRRef args[2];
370 args[0] = ir->op1;
371 args[1] = ir->op2;
372 asm_setupresult(as, ir, ci);
373 asm_gencall(as, ci, args);
374}
375
376/* -- Returns ------------------------------------------------------------- */ 376/* -- Returns ------------------------------------------------------------- */
377 377
378/* Return to lower frame. Guard that it goes to the right spot. */ 378/* Return to lower frame. Guard that it goes to the right spot. */
@@ -380,7 +380,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
380{ 380{
381 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 381 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
382 void *pc = ir_kptr(IR(ir->op2)); 382 void *pc = ir_kptr(IR(ir->op2));
383 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 383 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
384 as->topslot -= (BCReg)delta; 384 as->topslot -= (BCReg)delta;
385 if ((int32_t)as->topslot < 0) as->topslot = 0; 385 if ((int32_t)as->topslot < 0) as->topslot = 0;
386 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 386 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -392,8 +392,24 @@ static void asm_retf(ASMState *as, IRIns *ir)
392 emit_tai(as, PPCI_LWZ, RID_TMP, base, -8); 392 emit_tai(as, PPCI_LWZ, RID_TMP, base, -8);
393} 393}
394 394
395/* -- Buffer operations --------------------------------------------------- */
396
397#if LJ_HASBUFFER
398static void asm_bufhdr_write(ASMState *as, Reg sb)
399{
400 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
401 IRIns irgc;
402 irgc.ot = IRT(0, IRT_PGC); /* GC type. */
403 emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
404 emit_rot(as, PPCI_RLWIMI, RID_TMP, tmp, 0, 31-lj_fls(SBUF_MASK_FLAG), 31);
405 emit_getgl(as, RID_TMP, cur_L);
406 emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
407}
408#endif
409
395/* -- Type conversions ---------------------------------------------------- */ 410/* -- Type conversions ---------------------------------------------------- */
396 411
412#if !LJ_SOFTFP
397static void asm_tointg(ASMState *as, IRIns *ir, Reg left) 413static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
398{ 414{
399 RegSet allow = RSET_FPR; 415 RegSet allow = RSET_FPR;
@@ -410,8 +426,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
410 emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); 426 emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000);
411 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); 427 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
412 emit_lsptr(as, PPCI_LFS, (fbias & 31), 428 emit_lsptr(as, PPCI_LFS, (fbias & 31),
413 (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), 429 (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR);
414 RSET_GPR);
415 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); 430 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
416 emit_fb(as, PPCI_FCTIWZ, tmp, left); 431 emit_fb(as, PPCI_FCTIWZ, tmp, left);
417} 432}
@@ -427,15 +442,27 @@ static void asm_tobit(ASMState *as, IRIns *ir)
427 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); 442 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
428 emit_fab(as, PPCI_FADD, tmp, left, right); 443 emit_fab(as, PPCI_FADD, tmp, left, right);
429} 444}
445#endif
430 446
431static void asm_conv(ASMState *as, IRIns *ir) 447static void asm_conv(ASMState *as, IRIns *ir)
432{ 448{
433 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 449 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
450#if !LJ_SOFTFP
434 int stfp = (st == IRT_NUM || st == IRT_FLOAT); 451 int stfp = (st == IRT_NUM || st == IRT_FLOAT);
452#endif
435 IRRef lref = ir->op1; 453 IRRef lref = ir->op1;
436 lua_assert(irt_type(ir->t) != st); 454 /* 64 bit integer conversions are handled by SPLIT. */
437 lua_assert(!(irt_isint64(ir->t) || 455 lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)),
438 (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ 456 "IR %04d has unsplit 64 bit type",
457 (int)(ir - as->ir) - REF_BIAS);
458#if LJ_SOFTFP
459 /* FP conversions are handled by SPLIT. */
460 lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT),
461 "IR %04d has FP type",
462 (int)(ir - as->ir) - REF_BIAS);
463 /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
464#else
465 lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
439 if (irt_isfp(ir->t)) { 466 if (irt_isfp(ir->t)) {
440 Reg dest = ra_dest(as, ir, RSET_FPR); 467 Reg dest = ra_dest(as, ir, RSET_FPR);
441 if (stfp) { /* FP to FP conversion. */ 468 if (stfp) { /* FP to FP conversion. */
@@ -450,13 +477,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
450 Reg left = ra_alloc1(as, lref, allow); 477 Reg left = ra_alloc1(as, lref, allow);
451 Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); 478 Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left));
452 Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); 479 Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
453 const float *kbias;
454 if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); 480 if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest);
455 emit_fab(as, PPCI_FSUB, dest, dest, fbias); 481 emit_fab(as, PPCI_FSUB, dest, dest, fbias);
456 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); 482 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
457 kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000)); 483 emit_lsptr(as, PPCI_LFS, (fbias & 31),
458 if (st == IRT_U32) kbias++; 484 &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31],
459 emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias,
460 rset_clear(allow, hibias)); 485 rset_clear(allow, hibias));
461 emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, 486 emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP,
462 RID_SP, SPOFS_TMPLO); 487 RID_SP, SPOFS_TMPLO);
@@ -466,7 +491,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
466 } else if (stfp) { /* FP to integer conversion. */ 491 } else if (stfp) { /* FP to integer conversion. */
467 if (irt_isguard(ir->t)) { 492 if (irt_isguard(ir->t)) {
468 /* Checked conversions are only supported from number to int. */ 493 /* Checked conversions are only supported from number to int. */
469 lua_assert(irt_isint(ir->t) && st == IRT_NUM); 494 lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
495 "bad type for checked CONV");
470 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 496 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
471 } else { 497 } else {
472 Reg dest = ra_dest(as, ir, RSET_GPR); 498 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -489,19 +515,20 @@ static void asm_conv(ASMState *as, IRIns *ir)
489 emit_fb(as, PPCI_FCTIWZ, tmp, tmp); 515 emit_fb(as, PPCI_FCTIWZ, tmp, tmp);
490 emit_fab(as, PPCI_FSUB, tmp, left, tmp); 516 emit_fab(as, PPCI_FSUB, tmp, left, tmp);
491 emit_lsptr(as, PPCI_LFS, (tmp & 31), 517 emit_lsptr(as, PPCI_LFS, (tmp & 31),
492 (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)), 518 (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
493 RSET_GPR);
494 } else { 519 } else {
495 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); 520 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
496 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); 521 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
497 emit_fb(as, PPCI_FCTIWZ, tmp, left); 522 emit_fb(as, PPCI_FCTIWZ, tmp, left);
498 } 523 }
499 } 524 }
500 } else { 525 } else
526#endif
527 {
501 Reg dest = ra_dest(as, ir, RSET_GPR); 528 Reg dest = ra_dest(as, ir, RSET_GPR);
502 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ 529 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
503 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 530 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
504 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); 531 lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
505 if ((ir->op2 & IRCONV_SEXT)) 532 if ((ir->op2 & IRCONV_SEXT))
506 emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left); 533 emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left);
507 else 534 else
@@ -513,90 +540,102 @@ static void asm_conv(ASMState *as, IRIns *ir)
513 } 540 }
514} 541}
515 542
516#if LJ_HASFFI
517static void asm_conv64(ASMState *as, IRIns *ir)
518{
519 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
520 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
521 IRCallID id;
522 const CCallInfo *ci;
523 IRRef args[2];
524 args[0] = ir->op1;
525 args[1] = (ir-1)->op1;
526 if (st == IRT_NUM || st == IRT_FLOAT) {
527 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
528 ir--;
529 } else {
530 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
531 }
532 ci = &lj_ir_callinfo[id];
533 asm_setupresult(as, ir, ci);
534 asm_gencall(as, ci, args);
535}
536#endif
537
538static void asm_strto(ASMState *as, IRIns *ir) 543static void asm_strto(ASMState *as, IRIns *ir)
539{ 544{
540 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 545 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
541 IRRef args[2]; 546 IRRef args[2];
542 int32_t ofs; 547 int32_t ofs = SPOFS_TMP;
548#if LJ_SOFTFP
549 ra_evictset(as, RSET_SCRATCH);
550 if (ra_used(ir)) {
551 if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
552 (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) {
553 int i;
554 for (i = 0; i < 2; i++) {
555 Reg r = (ir+i)->r;
556 if (ra_hasreg(r)) {
557 ra_free(as, r);
558 ra_modified(as, r);
559 emit_spload(as, ir+i, r, sps_scale((ir+i)->s));
560 }
561 }
562 ofs = sps_scale(ir->s & ~1);
563 } else {
564 Reg rhi = ra_dest(as, ir+1, RSET_GPR);
565 Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
566 emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs);
567 emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4);
568 }
569 }
570#else
543 RegSet drop = RSET_SCRATCH; 571 RegSet drop = RSET_SCRATCH;
544 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ 572 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
545 ra_evictset(as, drop); 573 ra_evictset(as, drop);
574 if (ir->s) ofs = sps_scale(ir->s);
575#endif
546 asm_guardcc(as, CC_EQ); 576 asm_guardcc(as, CC_EQ);
547 emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ 577 emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */
548 args[0] = ir->op1; /* GCstr *str */ 578 args[0] = ir->op1; /* GCstr *str */
549 args[1] = ASMREF_TMP1; /* TValue *n */ 579 args[1] = ASMREF_TMP1; /* TValue *n */
550 asm_gencall(as, ci, args); 580 asm_gencall(as, ci, args);
551 /* Store the result to the spill slot or temp slots. */ 581 /* Store the result to the spill slot or temp slots. */
552 ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
553 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); 582 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
554} 583}
555 584
585/* -- Memory references --------------------------------------------------- */
586
556/* Get pointer to TValue. */ 587/* Get pointer to TValue. */
557static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 588static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
558{ 589{
559 IRIns *ir = IR(ref); 590 int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768);
560 if (irt_isnum(ir->t)) { 591 if ((mode & IRTMPREF_IN1)) {
561 if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ 592 IRIns *ir = IR(ref);
562 ra_allockreg(as, i32ptr(ir_knum(ir)), dest); 593 if (irt_isnum(ir->t)) {
563 else /* Otherwise force a spill and use the spill slot. */ 594 if ((mode & IRTMPREF_OUT1)) {
564 emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir)); 595#if LJ_SOFTFP
565 } else { 596 lj_assertA(irref_isk(ref), "unsplit FP op");
566 /* Otherwise use g->tmptv to hold the TValue. */ 597 emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
567 RegSet allow = rset_exclude(RSET_GPR, dest); 598 emit_setgl(as,
568 Reg type; 599 ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
569 emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768); 600 tmptv.u32.lo);
570 if (!irt_ispri(ir->t)) { 601 emit_setgl(as,
571 Reg src = ra_alloc1(as, ref, allow); 602 ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
572 emit_setgl(as, src, tmptv.gcr); 603 tmptv.u32.hi);
604#else
605 Reg src = ra_alloc1(as, ref, RSET_FPR);
606 emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
607 emit_fai(as, PPCI_STFD, src, RID_JGL, tmpofs);
608#endif
609 } else if (irref_isk(ref)) {
610 /* Use the number constant itself as a TValue. */
611 ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
612 } else {
613#if LJ_SOFTFP
614 lj_assertA(0, "unsplit FP op");
615#else
616 /* Otherwise force a spill and use the spill slot. */
617 emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir));
618#endif
619 }
620 } else {
621 /* Otherwise use g->tmptv to hold the TValue. */
622 Reg type;
623 emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
624 if (!irt_ispri(ir->t)) {
625 Reg src = ra_alloc1(as, ref, RSET_GPR);
626 emit_setgl(as, src, tmptv.gcr);
627 }
628 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
629 type = ra_alloc1(as, ref+1, RSET_GPR);
630 else
631 type = ra_allock(as, irt_toitype(ir->t), RSET_GPR);
632 emit_setgl(as, type, tmptv.it);
573 } 633 }
574 type = ra_allock(as, irt_toitype(ir->t), allow);
575 emit_setgl(as, type, tmptv.it);
576 }
577}
578
579static void asm_tostr(ASMState *as, IRIns *ir)
580{
581 IRRef args[2];
582 args[0] = ASMREF_L;
583 as->gcsteps++;
584 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
585 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
586 args[1] = ASMREF_TMP1; /* const lua_Number * */
587 asm_setupresult(as, ir, ci); /* GCstr * */
588 asm_gencall(as, ci, args);
589 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
590 } else { 634 } else {
591 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; 635 emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
592 args[1] = ir->op1; /* int32_t k */
593 asm_setupresult(as, ir, ci); /* GCstr * */
594 asm_gencall(as, ci, args);
595 } 636 }
596} 637}
597 638
598/* -- Memory references --------------------------------------------------- */
599
600static void asm_aref(ASMState *as, IRIns *ir) 639static void asm_aref(ASMState *as, IRIns *ir)
601{ 640{
602 Reg dest = ra_dest(as, ir, RSET_GPR); 641 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -636,11 +675,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
636 Reg tisnum = RID_NONE, tmpnum = RID_NONE; 675 Reg tisnum = RID_NONE, tmpnum = RID_NONE;
637 IRRef refkey = ir->op2; 676 IRRef refkey = ir->op2;
638 IRIns *irkey = IR(refkey); 677 IRIns *irkey = IR(refkey);
678 int isk = irref_isk(refkey);
639 IRType1 kt = irkey->t; 679 IRType1 kt = irkey->t;
640 uint32_t khash; 680 uint32_t khash;
641 MCLabel l_end, l_loop, l_next; 681 MCLabel l_end, l_loop, l_next;
642 682
643 rset_clear(allow, tab); 683 rset_clear(allow, tab);
684#if LJ_SOFTFP
685 if (!isk) {
686 key = ra_alloc1(as, refkey, allow);
687 rset_clear(allow, key);
688 if (irkey[1].o == IR_HIOP) {
689 if (ra_hasreg((irkey+1)->r)) {
690 tmpnum = (irkey+1)->r;
691 ra_noweak(as, tmpnum);
692 } else {
693 tmpnum = ra_allocref(as, refkey+1, allow);
694 }
695 rset_clear(allow, tmpnum);
696 }
697 }
698#else
644 if (irt_isnum(kt)) { 699 if (irt_isnum(kt)) {
645 key = ra_alloc1(as, refkey, RSET_FPR); 700 key = ra_alloc1(as, refkey, RSET_FPR);
646 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); 701 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
@@ -650,6 +705,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
650 key = ra_alloc1(as, refkey, allow); 705 key = ra_alloc1(as, refkey, allow);
651 rset_clear(allow, key); 706 rset_clear(allow, key);
652 } 707 }
708#endif
653 tmp2 = ra_scratch(as, allow); 709 tmp2 = ra_scratch(as, allow);
654 rset_clear(allow, tmp2); 710 rset_clear(allow, tmp2);
655 711
@@ -672,7 +728,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
672 asm_guardcc(as, CC_EQ); 728 asm_guardcc(as, CC_EQ);
673 else 729 else
674 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); 730 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
675 if (irt_isnum(kt)) { 731 if (!LJ_SOFTFP && irt_isnum(kt)) {
676 emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); 732 emit_fab(as, PPCI_FCMPU, 0, tmpnum, key);
677 emit_condbranch(as, PPCI_BC, CC_GE, l_next); 733 emit_condbranch(as, PPCI_BC, CC_GE, l_next);
678 emit_ab(as, PPCI_CMPLW, tmp1, tisnum); 734 emit_ab(as, PPCI_CMPLW, tmp1, tisnum);
@@ -682,7 +738,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
682 emit_ab(as, PPCI_CMPW, tmp2, key); 738 emit_ab(as, PPCI_CMPW, tmp2, key);
683 emit_condbranch(as, PPCI_BC, CC_NE, l_next); 739 emit_condbranch(as, PPCI_BC, CC_NE, l_next);
684 } 740 }
685 emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); 741 if (LJ_SOFTFP && ra_hasreg(tmpnum))
742 emit_ab(as, PPCI_CMPW, tmp1, tmpnum);
743 else
744 emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
686 if (!irt_ispri(kt)) 745 if (!irt_ispri(kt))
687 emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); 746 emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
688 } 747 }
@@ -691,35 +750,41 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
691 (((char *)as->mcp-(char *)l_loop) & 0xffffu); 750 (((char *)as->mcp-(char *)l_loop) & 0xffffu);
692 751
693 /* Load main position relative to tab->node into dest. */ 752 /* Load main position relative to tab->node into dest. */
694 khash = irref_isk(refkey) ? ir_khash(irkey) : 1; 753 khash = isk ? ir_khash(as, irkey) : 1;
695 if (khash == 0) { 754 if (khash == 0) {
696 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); 755 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
697 } else { 756 } else {
698 Reg tmphash = tmp1; 757 Reg tmphash = tmp1;
699 if (irref_isk(refkey)) 758 if (isk)
700 tmphash = ra_allock(as, khash, allow); 759 tmphash = ra_allock(as, khash, allow);
701 emit_tab(as, PPCI_ADD, dest, dest, tmp1); 760 emit_tab(as, PPCI_ADD, dest, dest, tmp1);
702 emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); 761 emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node));
703 emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); 762 emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash);
704 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); 763 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
705 emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); 764 emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
706 if (irref_isk(refkey)) { 765 if (isk) {
707 /* Nothing to do. */ 766 /* Nothing to do. */
708 } else if (irt_isstr(kt)) { 767 } else if (irt_isstr(kt)) {
709 emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); 768 emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, sid));
710 } else { /* Must match with hash*() in lj_tab.c. */ 769 } else { /* Must match with hash*() in lj_tab.c. */
711 emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1); 770 emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1);
712 emit_rotlwi(as, tmp2, tmp2, HASH_ROT3); 771 emit_rotlwi(as, tmp2, tmp2, HASH_ROT3);
713 emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); 772 emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2);
714 emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); 773 emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31);
715 emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); 774 emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2);
716 if (irt_isnum(kt)) { 775 if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
776#if LJ_SOFTFP
777 emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
778 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
779 emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum);
780#else
717 int32_t ofs = ra_spill(as, irkey); 781 int32_t ofs = ra_spill(as, irkey);
718 emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); 782 emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1);
719 emit_rotlwi(as, dest, tmp1, HASH_ROT1); 783 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
720 emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); 784 emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1);
721 emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); 785 emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4);
722 emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); 786 emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs);
787#endif
723 } else { 788 } else {
724 emit_asb(as, PPCI_XOR, tmp2, key, tmp1); 789 emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
725 emit_rotlwi(as, dest, tmp1, HASH_ROT1); 790 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
@@ -740,7 +805,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
740 Reg node = ra_alloc1(as, ir->op1, RSET_GPR); 805 Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
741 Reg key = RID_NONE, type = RID_TMP, idx = node; 806 Reg key = RID_NONE, type = RID_TMP, idx = node;
742 RegSet allow = rset_exclude(RSET_GPR, node); 807 RegSet allow = rset_exclude(RSET_GPR, node);
743 lua_assert(ofs % sizeof(Node) == 0); 808 lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
744 if (ofs > 32736) { 809 if (ofs > 32736) {
745 idx = dest; 810 idx = dest;
746 rset_clear(allow, dest); 811 rset_clear(allow, dest);
@@ -773,20 +838,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
773 } 838 }
774} 839}
775 840
776static void asm_newref(ASMState *as, IRIns *ir)
777{
778 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
779 IRRef args[3];
780 if (ir->r == RID_SINK)
781 return;
782 args[0] = ASMREF_L; /* lua_State *L */
783 args[1] = ir->op1; /* GCtab *t */
784 args[2] = ASMREF_TMP1; /* cTValue *key */
785 asm_setupresult(as, ir, ci); /* TValue * */
786 asm_gencall(as, ci, args);
787 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
788}
789
790static void asm_uref(ASMState *as, IRIns *ir) 841static void asm_uref(ASMState *as, IRIns *ir)
791{ 842{
792 Reg dest = ra_dest(as, ir, RSET_GPR); 843 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -813,7 +864,7 @@ static void asm_uref(ASMState *as, IRIns *ir)
813static void asm_fref(ASMState *as, IRIns *ir) 864static void asm_fref(ASMState *as, IRIns *ir)
814{ 865{
815 UNUSED(as); UNUSED(ir); 866 UNUSED(as); UNUSED(ir);
816 lua_assert(!ra_used(ir)); 867 lj_assertA(!ra_used(ir), "unfused FREF");
817} 868}
818 869
819static void asm_strref(ASMState *as, IRIns *ir) 870static void asm_strref(ASMState *as, IRIns *ir)
@@ -853,26 +904,28 @@ static void asm_strref(ASMState *as, IRIns *ir)
853 904
854/* -- Loads and stores ---------------------------------------------------- */ 905/* -- Loads and stores ---------------------------------------------------- */
855 906
856static PPCIns asm_fxloadins(IRIns *ir) 907static PPCIns asm_fxloadins(ASMState *as, IRIns *ir)
857{ 908{
909 UNUSED(as);
858 switch (irt_type(ir->t)) { 910 switch (irt_type(ir->t)) {
859 case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */ 911 case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */
860 case IRT_U8: return PPCI_LBZ; 912 case IRT_U8: return PPCI_LBZ;
861 case IRT_I16: return PPCI_LHA; 913 case IRT_I16: return PPCI_LHA;
862 case IRT_U16: return PPCI_LHZ; 914 case IRT_U16: return PPCI_LHZ;
863 case IRT_NUM: return PPCI_LFD; 915 case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_LFD;
864 case IRT_FLOAT: return PPCI_LFS; 916 case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS;
865 default: return PPCI_LWZ; 917 default: return PPCI_LWZ;
866 } 918 }
867} 919}
868 920
869static PPCIns asm_fxstoreins(IRIns *ir) 921static PPCIns asm_fxstoreins(ASMState *as, IRIns *ir)
870{ 922{
923 UNUSED(as);
871 switch (irt_type(ir->t)) { 924 switch (irt_type(ir->t)) {
872 case IRT_I8: case IRT_U8: return PPCI_STB; 925 case IRT_I8: case IRT_U8: return PPCI_STB;
873 case IRT_I16: case IRT_U16: return PPCI_STH; 926 case IRT_I16: case IRT_U16: return PPCI_STH;
874 case IRT_NUM: return PPCI_STFD; 927 case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_STFD;
875 case IRT_FLOAT: return PPCI_STFS; 928 case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS;
876 default: return PPCI_STW; 929 default: return PPCI_STW;
877 } 930 }
878} 931}
@@ -880,18 +933,24 @@ static PPCIns asm_fxstoreins(IRIns *ir)
880static void asm_fload(ASMState *as, IRIns *ir) 933static void asm_fload(ASMState *as, IRIns *ir)
881{ 934{
882 Reg dest = ra_dest(as, ir, RSET_GPR); 935 Reg dest = ra_dest(as, ir, RSET_GPR);
883 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); 936 PPCIns pi = asm_fxloadins(as, ir);
884 PPCIns pi = asm_fxloadins(ir); 937 Reg idx;
885 int32_t ofs; 938 int32_t ofs;
886 if (ir->op2 == IRFL_TAB_ARRAY) { 939 if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
887 ofs = asm_fuseabase(as, ir->op1); 940 idx = RID_JGL;
888 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ 941 ofs = (ir->op2 << 2) - 32768 - GG_OFS(g);
889 emit_tai(as, PPCI_ADDI, dest, idx, ofs); 942 } else {
890 return; 943 idx = ra_alloc1(as, ir->op1, RSET_GPR);
944 if (ir->op2 == IRFL_TAB_ARRAY) {
945 ofs = asm_fuseabase(as, ir->op1);
946 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
947 emit_tai(as, PPCI_ADDI, dest, idx, ofs);
948 return;
949 }
891 } 950 }
951 ofs = field_ofs[ir->op2];
892 } 952 }
893 ofs = field_ofs[ir->op2]; 953 lj_assertA(!irt_isi8(ir->t), "unsupported FLOAD I8");
894 lua_assert(!irt_isi8(ir->t));
895 emit_tai(as, pi, dest, idx, ofs); 954 emit_tai(as, pi, dest, idx, ofs);
896} 955}
897 956
@@ -902,21 +961,22 @@ static void asm_fstore(ASMState *as, IRIns *ir)
902 IRIns *irf = IR(ir->op1); 961 IRIns *irf = IR(ir->op1);
903 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); 962 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
904 int32_t ofs = field_ofs[irf->op2]; 963 int32_t ofs = field_ofs[irf->op2];
905 PPCIns pi = asm_fxstoreins(ir); 964 PPCIns pi = asm_fxstoreins(as, ir);
906 emit_tai(as, pi, src, idx, ofs); 965 emit_tai(as, pi, src, idx, ofs);
907 } 966 }
908} 967}
909 968
910static void asm_xload(ASMState *as, IRIns *ir) 969static void asm_xload(ASMState *as, IRIns *ir)
911{ 970{
912 Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 971 Reg dest = ra_dest(as, ir,
913 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); 972 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
973 lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD");
914 if (irt_isi8(ir->t)) 974 if (irt_isi8(ir->t))
915 emit_as(as, PPCI_EXTSB, dest, dest); 975 emit_as(as, PPCI_EXTSB, dest, dest);
916 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 976 asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0);
917} 977}
918 978
919static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 979static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
920{ 980{
921 IRIns *irb; 981 IRIns *irb;
922 if (ir->r == RID_SINK) 982 if (ir->r == RID_SINK)
@@ -927,22 +987,35 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
927 Reg src = ra_alloc1(as, irb->op1, RSET_GPR); 987 Reg src = ra_alloc1(as, irb->op1, RSET_GPR);
928 asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); 988 asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src));
929 } else { 989 } else {
930 Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 990 Reg src = ra_alloc1(as, ir->op2,
931 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, 991 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
992 asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1,
932 rset_exclude(RSET_GPR, src), ofs); 993 rset_exclude(RSET_GPR, src), ofs);
933 } 994 }
934} 995}
935 996
997#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
998
936static void asm_ahuvload(ASMState *as, IRIns *ir) 999static void asm_ahuvload(ASMState *as, IRIns *ir)
937{ 1000{
938 IRType1 t = ir->t; 1001 IRType1 t = ir->t;
939 Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; 1002 Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx;
940 RegSet allow = RSET_GPR; 1003 RegSet allow = RSET_GPR;
941 int32_t ofs = AHUREF_LSX; 1004 int32_t ofs = AHUREF_LSX;
1005 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) {
1006 t.irt = IRT_NUM;
1007 if (ra_used(ir+1)) {
1008 type = ra_dest(as, ir+1, allow);
1009 rset_clear(allow, type);
1010 }
1011 ofs = 0;
1012 }
942 if (ra_used(ir)) { 1013 if (ra_used(ir)) {
943 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1014 lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
944 if (!irt_isnum(t)) ofs = 0; 1015 irt_isint(ir->t) || irt_isaddr(ir->t),
945 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); 1016 "bad load type %d", irt_type(ir->t));
1017 if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0;
1018 dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
946 rset_clear(allow, dest); 1019 rset_clear(allow, dest);
947 } 1020 }
948 idx = asm_fuseahuref(as, ir->op1, &ofs, allow); 1021 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
@@ -951,12 +1024,13 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
951 asm_guardcc(as, CC_GE); 1024 asm_guardcc(as, CC_GE);
952 emit_ab(as, PPCI_CMPLW, type, tisnum); 1025 emit_ab(as, PPCI_CMPLW, type, tisnum);
953 if (ra_hasreg(dest)) { 1026 if (ra_hasreg(dest)) {
954 if (ofs == AHUREF_LSX) { 1027 if (!LJ_SOFTFP && ofs == AHUREF_LSX) {
955 tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, 1028 tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR,
956 (idx&255)), (idx>>8))); 1029 (idx&255)), (idx>>8)));
957 emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); 1030 emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp);
958 } else { 1031 } else {
959 emit_fai(as, PPCI_LFD, dest, idx, ofs); 1032 emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx,
1033 ofs+4*LJ_SOFTFP);
960 } 1034 }
961 } 1035 }
962 } else { 1036 } else {
@@ -979,7 +1053,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
979 int32_t ofs = AHUREF_LSX; 1053 int32_t ofs = AHUREF_LSX;
980 if (ir->r == RID_SINK) 1054 if (ir->r == RID_SINK)
981 return; 1055 return;
982 if (irt_isnum(ir->t)) { 1056 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
983 src = ra_alloc1(as, ir->op2, RSET_FPR); 1057 src = ra_alloc1(as, ir->op2, RSET_FPR);
984 } else { 1058 } else {
985 if (!irt_ispri(ir->t)) { 1059 if (!irt_ispri(ir->t)) {
@@ -987,11 +1061,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
987 rset_clear(allow, src); 1061 rset_clear(allow, src);
988 ofs = 0; 1062 ofs = 0;
989 } 1063 }
990 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 1064 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
1065 type = ra_alloc1(as, (ir+1)->op2, allow);
1066 else
1067 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
991 rset_clear(allow, type); 1068 rset_clear(allow, type);
992 } 1069 }
993 idx = asm_fuseahuref(as, ir->op1, &ofs, allow); 1070 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
994 if (irt_isnum(ir->t)) { 1071 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
995 if (ofs == AHUREF_LSX) { 1072 if (ofs == AHUREF_LSX) {
996 emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); 1073 emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP);
997 emit_slwi(as, RID_TMP, (idx>>8), 3); 1074 emit_slwi(as, RID_TMP, (idx>>8), 3);
@@ -1016,21 +1093,38 @@ static void asm_sload(ASMState *as, IRIns *ir)
1016 IRType1 t = ir->t; 1093 IRType1 t = ir->t;
1017 Reg dest = RID_NONE, type = RID_NONE, base; 1094 Reg dest = RID_NONE, type = RID_NONE, base;
1018 RegSet allow = RSET_GPR; 1095 RegSet allow = RSET_GPR;
1019 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 1096 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
1020 lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); 1097 if (hiop)
1021 lua_assert(LJ_DUALNUM || 1098 t.irt = IRT_NUM;
1022 !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); 1099 lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
1100 "bad parent SLOAD"); /* Handled by asm_head_side(). */
1101 lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
1102 "inconsistent SLOAD variant");
1103 lj_assertA(LJ_DUALNUM ||
1104 !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)),
1105 "bad SLOAD type");
1106#if LJ_SOFTFP
1107 lj_assertA(!(ir->op2 & IRSLOAD_CONVERT),
1108 "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */
1109 if (hiop && ra_used(ir+1)) {
1110 type = ra_dest(as, ir+1, allow);
1111 rset_clear(allow, type);
1112 }
1113#else
1023 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { 1114 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
1024 dest = ra_scratch(as, RSET_FPR); 1115 dest = ra_scratch(as, RSET_FPR);
1025 asm_tointg(as, ir, dest); 1116 asm_tointg(as, ir, dest);
1026 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1117 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1027 } else if (ra_used(ir)) { 1118 } else
1028 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1119#endif
1029 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); 1120 if (ra_used(ir)) {
1121 lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t),
1122 "bad SLOAD type %d", irt_type(ir->t));
1123 dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
1030 rset_clear(allow, dest); 1124 rset_clear(allow, dest);
1031 base = ra_alloc1(as, REF_BASE, allow); 1125 base = ra_alloc1(as, REF_BASE, allow);
1032 rset_clear(allow, base); 1126 rset_clear(allow, base);
1033 if ((ir->op2 & IRSLOAD_CONVERT)) { 1127 if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) {
1034 if (irt_isint(t)) { 1128 if (irt_isint(t)) {
1035 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); 1129 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
1036 dest = ra_scratch(as, RSET_FPR); 1130 dest = ra_scratch(as, RSET_FPR);
@@ -1044,7 +1138,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1044 emit_fab(as, PPCI_FSUB, dest, dest, fbias); 1138 emit_fab(as, PPCI_FSUB, dest, dest, fbias);
1045 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); 1139 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
1046 emit_lsptr(as, PPCI_LFS, (fbias & 31), 1140 emit_lsptr(as, PPCI_LFS, (fbias & 31),
1047 (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), 1141 (void *)&as->J->k32[LJ_K32_2P52_2P31],
1048 rset_clear(allow, hibias)); 1142 rset_clear(allow, hibias));
1049 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); 1143 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO);
1050 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); 1144 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);
@@ -1062,10 +1156,13 @@ dotypecheck:
1062 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1156 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1063 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); 1157 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow);
1064 asm_guardcc(as, CC_GE); 1158 asm_guardcc(as, CC_GE);
1065 emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum); 1159#if !LJ_SOFTFP
1066 type = RID_TMP; 1160 type = RID_TMP;
1161#endif
1162 emit_ab(as, PPCI_CMPLW, type, tisnum);
1067 } 1163 }
1068 if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4); 1164 if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest,
1165 base, ofs-(LJ_SOFTFP?0:4));
1069 } else { 1166 } else {
1070 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1167 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1071 asm_guardcc(as, CC_NE); 1168 asm_guardcc(as, CC_NE);
@@ -1083,19 +1180,16 @@ dotypecheck:
1083static void asm_cnew(ASMState *as, IRIns *ir) 1180static void asm_cnew(ASMState *as, IRIns *ir)
1084{ 1181{
1085 CTState *cts = ctype_ctsG(J2G(as->J)); 1182 CTState *cts = ctype_ctsG(J2G(as->J));
1086 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1183 CTypeID id = (CTypeID)IR(ir->op1)->i;
1087 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1184 CTSize sz;
1088 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1185 CTInfo info = lj_ctype_info(cts, id, &sz);
1089 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1186 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1090 IRRef args[2]; 1187 IRRef args[4];
1091 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1092 RegSet drop = RSET_SCRATCH; 1188 RegSet drop = RSET_SCRATCH;
1093 lua_assert(sz != CTSIZE_INVALID); 1189 lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
1190 "bad CNEW/CNEWI operands");
1094 1191
1095 args[0] = ASMREF_L; /* lua_State *L */
1096 args[1] = ASMREF_TMP1; /* MSize size */
1097 as->gcsteps++; 1192 as->gcsteps++;
1098
1099 if (ra_hasreg(ir->r)) 1193 if (ra_hasreg(ir->r))
1100 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1194 rset_clear(drop, ir->r); /* Dest reg handled below. */
1101 ra_evictset(as, drop); 1195 ra_evictset(as, drop);
@@ -1104,11 +1198,12 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1104 1198
1105 /* Initialize immutable cdata object. */ 1199 /* Initialize immutable cdata object. */
1106 if (ir->o == IR_CNEWI) { 1200 if (ir->o == IR_CNEWI) {
1201 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1107 int32_t ofs = sizeof(GCcdata); 1202 int32_t ofs = sizeof(GCcdata);
1108 lua_assert(sz == 4 || sz == 8); 1203 lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
1109 if (sz == 8) { 1204 if (sz == 8) {
1110 ofs += 4; 1205 ofs += 4;
1111 lua_assert((ir+1)->o == IR_HIOP); 1206 lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI");
1112 } 1207 }
1113 for (;;) { 1208 for (;;) {
1114 Reg r = ra_alloc1(as, ir->op2, allow); 1209 Reg r = ra_alloc1(as, ir->op2, allow);
@@ -1117,18 +1212,28 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1117 if (ofs == sizeof(GCcdata)) break; 1212 if (ofs == sizeof(GCcdata)) break;
1118 ofs -= 4; ir++; 1213 ofs -= 4; ir++;
1119 } 1214 }
1215 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1216 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1217 args[0] = ASMREF_L; /* lua_State *L */
1218 args[1] = ir->op1; /* CTypeID id */
1219 args[2] = ir->op2; /* CTSize sz */
1220 args[3] = ASMREF_TMP1; /* CTSize align */
1221 asm_gencall(as, ci, args);
1222 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1223 return;
1120 } 1224 }
1225
1121 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1226 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1122 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); 1227 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
1123 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); 1228 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
1124 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); 1229 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA);
1125 emit_ti(as, PPCI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ 1230 emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
1231 args[0] = ASMREF_L; /* lua_State *L */
1232 args[1] = ASMREF_TMP1; /* MSize size */
1126 asm_gencall(as, ci, args); 1233 asm_gencall(as, ci, args);
1127 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1234 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1128 ra_releasetmp(as, ASMREF_TMP1)); 1235 ra_releasetmp(as, ASMREF_TMP1));
1129} 1236}
1130#else
1131#define asm_cnew(as, ir) ((void)0)
1132#endif 1237#endif
1133 1238
1134/* -- Write barriers ------------------------------------------------------ */ 1239/* -- Write barriers ------------------------------------------------------ */
@@ -1142,7 +1247,7 @@ static void asm_tbar(ASMState *as, IRIns *ir)
1142 emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist)); 1247 emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist));
1143 emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked)); 1248 emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked));
1144 emit_setgl(as, tab, gc.grayagain); 1249 emit_setgl(as, tab, gc.grayagain);
1145 lua_assert(LJ_GC_BLACK == 0x04); 1250 lj_assertA(LJ_GC_BLACK == 0x04, "bad LJ_GC_BLACK");
1146 emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */ 1251 emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */
1147 emit_getgl(as, link, gc.grayagain); 1252 emit_getgl(as, link, gc.grayagain);
1148 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); 1253 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
@@ -1157,7 +1262,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
1157 MCLabel l_end; 1262 MCLabel l_end;
1158 Reg obj, val, tmp; 1263 Reg obj, val, tmp;
1159 /* No need for other object barriers (yet). */ 1264 /* No need for other object barriers (yet). */
1160 lua_assert(IR(ir->op1)->o == IR_UREFC); 1265 lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
1161 ra_evictset(as, RSET_SCRATCH); 1266 ra_evictset(as, RSET_SCRATCH);
1162 l_end = emit_label(as); 1267 l_end = emit_label(as);
1163 args[0] = ASMREF_TMP1; /* global_State *g */ 1268 args[0] = ASMREF_TMP1; /* global_State *g */
@@ -1178,6 +1283,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
1178 1283
1179/* -- Arithmetic and logic operations ------------------------------------- */ 1284/* -- Arithmetic and logic operations ------------------------------------- */
1180 1285
1286#if !LJ_SOFTFP
1181static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) 1287static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi)
1182{ 1288{
1183 Reg dest = ra_dest(as, ir, RSET_FPR); 1289 Reg dest = ra_dest(as, ir, RSET_FPR);
@@ -1196,31 +1302,24 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
1196 emit_fb(as, pi, dest, left); 1302 emit_fb(as, pi, dest, left);
1197} 1303}
1198 1304
1199static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1305static void asm_fpmath(ASMState *as, IRIns *ir)
1200{ 1306{
1201 IRIns *irp = IR(ir->op1); 1307 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
1202 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1308 asm_fpunary(as, ir, PPCI_FSQRT);
1203 IRIns *irpp = IR(irp->op1); 1309 else
1204 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1310 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1205 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1206 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
1207 IRRef args[2];
1208 args[0] = irpp->op1;
1209 args[1] = irp->op2;
1210 asm_setupresult(as, ir, ci);
1211 asm_gencall(as, ci, args);
1212 return 1;
1213 }
1214 }
1215 return 0;
1216} 1311}
1312#endif
1217 1313
1218static void asm_add(ASMState *as, IRIns *ir) 1314static void asm_add(ASMState *as, IRIns *ir)
1219{ 1315{
1316#if !LJ_SOFTFP
1220 if (irt_isnum(ir->t)) { 1317 if (irt_isnum(ir->t)) {
1221 if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) 1318 if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD))
1222 asm_fparith(as, ir, PPCI_FADD); 1319 asm_fparith(as, ir, PPCI_FADD);
1223 } else { 1320 } else
1321#endif
1322 {
1224 Reg dest = ra_dest(as, ir, RSET_GPR); 1323 Reg dest = ra_dest(as, ir, RSET_GPR);
1225 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1324 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1226 PPCIns pi; 1325 PPCIns pi;
@@ -1259,10 +1358,13 @@ static void asm_add(ASMState *as, IRIns *ir)
1259 1358
1260static void asm_sub(ASMState *as, IRIns *ir) 1359static void asm_sub(ASMState *as, IRIns *ir)
1261{ 1360{
1361#if !LJ_SOFTFP
1262 if (irt_isnum(ir->t)) { 1362 if (irt_isnum(ir->t)) {
1263 if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) 1363 if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB))
1264 asm_fparith(as, ir, PPCI_FSUB); 1364 asm_fparith(as, ir, PPCI_FSUB);
1265 } else { 1365 } else
1366#endif
1367 {
1266 PPCIns pi = PPCI_SUBF; 1368 PPCIns pi = PPCI_SUBF;
1267 Reg dest = ra_dest(as, ir, RSET_GPR); 1369 Reg dest = ra_dest(as, ir, RSET_GPR);
1268 Reg left, right; 1370 Reg left, right;
@@ -1288,9 +1390,12 @@ static void asm_sub(ASMState *as, IRIns *ir)
1288 1390
1289static void asm_mul(ASMState *as, IRIns *ir) 1391static void asm_mul(ASMState *as, IRIns *ir)
1290{ 1392{
1393#if !LJ_SOFTFP
1291 if (irt_isnum(ir->t)) { 1394 if (irt_isnum(ir->t)) {
1292 asm_fparith(as, ir, PPCI_FMUL); 1395 asm_fparith(as, ir, PPCI_FMUL);
1293 } else { 1396 } else
1397#endif
1398 {
1294 PPCIns pi = PPCI_MULLW; 1399 PPCIns pi = PPCI_MULLW;
1295 Reg dest = ra_dest(as, ir, RSET_GPR); 1400 Reg dest = ra_dest(as, ir, RSET_GPR);
1296 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1401 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
@@ -1312,11 +1417,16 @@ static void asm_mul(ASMState *as, IRIns *ir)
1312 } 1417 }
1313} 1418}
1314 1419
1420#define asm_fpdiv(as, ir) asm_fparith(as, ir, PPCI_FDIV)
1421
1315static void asm_neg(ASMState *as, IRIns *ir) 1422static void asm_neg(ASMState *as, IRIns *ir)
1316{ 1423{
1424#if !LJ_SOFTFP
1317 if (irt_isnum(ir->t)) { 1425 if (irt_isnum(ir->t)) {
1318 asm_fpunary(as, ir, PPCI_FNEG); 1426 asm_fpunary(as, ir, PPCI_FNEG);
1319 } else { 1427 } else
1428#endif
1429 {
1320 Reg dest, left; 1430 Reg dest, left;
1321 PPCIns pi = PPCI_NEG; 1431 PPCIns pi = PPCI_NEG;
1322 if (as->flagmcp == as->mcp) { 1432 if (as->flagmcp == as->mcp) {
@@ -1330,6 +1440,8 @@ static void asm_neg(ASMState *as, IRIns *ir)
1330 } 1440 }
1331} 1441}
1332 1442
1443#define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS)
1444
1333static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) 1445static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1334{ 1446{
1335 Reg dest, left, right; 1447 Reg dest, left, right;
@@ -1345,6 +1457,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1345 emit_tab(as, pi|PPCF_DOT, dest, left, right); 1457 emit_tab(as, pi|PPCF_DOT, dest, left, right);
1346} 1458}
1347 1459
1460#define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO)
1461#define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO)
1462#define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO)
1463
1348#if LJ_HASFFI 1464#if LJ_HASFFI
1349static void asm_add64(ASMState *as, IRIns *ir) 1465static void asm_add64(ASMState *as, IRIns *ir)
1350{ 1466{
@@ -1424,7 +1540,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
1424} 1540}
1425#endif 1541#endif
1426 1542
1427static void asm_bitnot(ASMState *as, IRIns *ir) 1543static void asm_bnot(ASMState *as, IRIns *ir)
1428{ 1544{
1429 Reg dest, left, right; 1545 Reg dest, left, right;
1430 PPCIns pi = PPCI_NOR; 1546 PPCIns pi = PPCI_NOR;
@@ -1451,7 +1567,7 @@ nofuse:
1451 emit_asb(as, pi, dest, left, right); 1567 emit_asb(as, pi, dest, left, right);
1452} 1568}
1453 1569
1454static void asm_bitswap(ASMState *as, IRIns *ir) 1570static void asm_bswap(ASMState *as, IRIns *ir)
1455{ 1571{
1456 Reg dest = ra_dest(as, ir, RSET_GPR); 1572 Reg dest = ra_dest(as, ir, RSET_GPR);
1457 IRIns *irx; 1573 IRIns *irx;
@@ -1472,32 +1588,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1472 } 1588 }
1473} 1589}
1474 1590
1475static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1476{
1477 Reg dest = ra_dest(as, ir, RSET_GPR);
1478 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1479 if (irref_isk(ir->op2)) {
1480 int32_t k = IR(ir->op2)->i;
1481 Reg tmp = left;
1482 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1483 if (!checku16(k)) {
1484 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1485 if ((k & 0xffff) == 0) return;
1486 }
1487 emit_asi(as, pik, dest, left, k);
1488 return;
1489 }
1490 }
1491 /* May fail due to spills/restores above, but simplifies the logic. */
1492 if (as->flagmcp == as->mcp) {
1493 as->flagmcp = NULL;
1494 as->mcp++;
1495 pi |= PPCF_DOT;
1496 }
1497 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1498 emit_asb(as, pi, dest, left, right);
1499}
1500
1501/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ 1591/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
1502static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) 1592static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
1503{ 1593{
@@ -1528,7 +1618,7 @@ nofuse:
1528 *--as->mcp = pi | PPCF_T(left); 1618 *--as->mcp = pi | PPCF_T(left);
1529} 1619}
1530 1620
1531static void asm_bitand(ASMState *as, IRIns *ir) 1621static void asm_band(ASMState *as, IRIns *ir)
1532{ 1622{
1533 Reg dest, left, right; 1623 Reg dest, left, right;
1534 IRRef lref = ir->op1; 1624 IRRef lref = ir->op1;
@@ -1583,6 +1673,35 @@ static void asm_bitand(ASMState *as, IRIns *ir)
1583 emit_asb(as, PPCI_AND ^ dot, dest, left, right); 1673 emit_asb(as, PPCI_AND ^ dot, dest, left, right);
1584} 1674}
1585 1675
1676static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1677{
1678 Reg dest = ra_dest(as, ir, RSET_GPR);
1679 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1680 if (irref_isk(ir->op2)) {
1681 int32_t k = IR(ir->op2)->i;
1682 Reg tmp = left;
1683 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1684 if (!checku16(k)) {
1685 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1686 if ((k & 0xffff) == 0) return;
1687 }
1688 emit_asi(as, pik, dest, left, k);
1689 return;
1690 }
1691 }
1692 /* May fail due to spills/restores above, but simplifies the logic. */
1693 if (as->flagmcp == as->mcp) {
1694 as->flagmcp = NULL;
1695 as->mcp++;
1696 pi |= PPCF_DOT;
1697 }
1698 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1699 emit_asb(as, pi, dest, left, right);
1700}
1701
1702#define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI)
1703#define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI)
1704
1586static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) 1705static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1587{ 1706{
1588 Reg dest, left; 1707 Reg dest, left;
@@ -1608,9 +1727,48 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1608 } 1727 }
1609} 1728}
1610 1729
1730#define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0)
1731#define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1)
1732#define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI)
1733#define asm_brol(as, ir) \
1734 asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \
1735 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
1736#define asm_bror(as, ir) lj_assertA(0, "unexpected BROR")
1737
1738#if LJ_SOFTFP
1739static void asm_sfpmin_max(ASMState *as, IRIns *ir)
1740{
1741 CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp];
1742 IRRef args[4];
1743 MCLabel l_right, l_end;
1744 Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR);
1745 Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR);
1746 Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR);
1747 PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE;
1748 righthi = (lefthi >> 8); lefthi &= 255;
1749 rightlo = (leftlo >> 8); leftlo &= 255;
1750 args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
1751 args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
1752 l_end = emit_label(as);
1753 if (desthi != righthi) emit_mr(as, desthi, righthi);
1754 if (destlo != rightlo) emit_mr(as, destlo, rightlo);
1755 l_right = emit_label(as);
1756 if (l_end != l_right) emit_jmp(as, l_end);
1757 if (desthi != lefthi) emit_mr(as, desthi, lefthi);
1758 if (destlo != leftlo) emit_mr(as, destlo, leftlo);
1759 if (l_right == as->mcp+1) {
1760 cond ^= 4; l_right = l_end; ++as->mcp;
1761 }
1762 emit_condbranch(as, PPCI_BC, cond, l_right);
1763 ra_evictset(as, RSET_SCRATCH);
1764 emit_cmpi(as, RID_RET, 1);
1765 asm_gencall(as, &ci, args);
1766}
1767#endif
1768
1611static void asm_min_max(ASMState *as, IRIns *ir, int ismax) 1769static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1612{ 1770{
1613 if (irt_isnum(ir->t)) { 1771 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1614 Reg dest = ra_dest(as, ir, RSET_FPR); 1772 Reg dest = ra_dest(as, ir, RSET_FPR);
1615 Reg tmp = dest; 1773 Reg tmp = dest;
1616 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 1774 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
@@ -1618,9 +1776,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1618 if (tmp == left || tmp == right) 1776 if (tmp == left || tmp == right)
1619 tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR, 1777 tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR,
1620 dest), left), right)); 1778 dest), left), right));
1621 emit_facb(as, PPCI_FSEL, dest, tmp, 1779 emit_facb(as, PPCI_FSEL, dest, tmp, left, right);
1622 ismax ? left : right, ismax ? right : left); 1780 emit_fab(as, PPCI_FSUB, tmp, ismax ? left : right, ismax ? right : left);
1623 emit_fab(as, PPCI_FSUB, tmp, left, right);
1624 } else { 1781 } else {
1625 Reg dest = ra_dest(as, ir, RSET_GPR); 1782 Reg dest = ra_dest(as, ir, RSET_GPR);
1626 Reg tmp1 = RID_TMP, tmp2 = dest; 1783 Reg tmp1 = RID_TMP, tmp2 = dest;
@@ -1638,6 +1795,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1638 } 1795 }
1639} 1796}
1640 1797
1798#define asm_min(as, ir) asm_min_max(as, ir, 0)
1799#define asm_max(as, ir) asm_min_max(as, ir, 1)
1800
1641/* -- Comparisons --------------------------------------------------------- */ 1801/* -- Comparisons --------------------------------------------------------- */
1642 1802
1643#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ 1803#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */
@@ -1695,7 +1855,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc)
1695static void asm_comp(ASMState *as, IRIns *ir) 1855static void asm_comp(ASMState *as, IRIns *ir)
1696{ 1856{
1697 PPCCC cc = asm_compmap[ir->o]; 1857 PPCCC cc = asm_compmap[ir->o];
1698 if (irt_isnum(ir->t)) { 1858 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1699 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 1859 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1700 right = (left >> 8); left &= 255; 1860 right = (left >> 8); left &= 255;
1701 asm_guardcc(as, (cc >> 4)); 1861 asm_guardcc(as, (cc >> 4));
@@ -1714,6 +1874,46 @@ static void asm_comp(ASMState *as, IRIns *ir)
1714 } 1874 }
1715} 1875}
1716 1876
1877#define asm_equal(as, ir) asm_comp(as, ir)
1878
1879#if LJ_SOFTFP
1880/* SFP comparisons. */
1881static void asm_sfpcomp(ASMState *as, IRIns *ir)
1882{
1883 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
1884 RegSet drop = RSET_SCRATCH;
1885 Reg r;
1886 IRRef args[4];
1887 args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
1888 args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
1889
1890 for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) {
1891 if (!rset_test(as->freeset, r) &&
1892 regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
1893 rset_clear(drop, r);
1894 }
1895 ra_evictset(as, drop);
1896 asm_setupresult(as, ir, ci);
1897 switch ((IROp)ir->o) {
1898 case IR_ULT:
1899 asm_guardcc(as, CC_EQ);
1900 emit_ai(as, PPCI_CMPWI, RID_RET, 0);
1901 case IR_ULE:
1902 asm_guardcc(as, CC_EQ);
1903 emit_ai(as, PPCI_CMPWI, RID_RET, 1);
1904 break;
1905 case IR_GE: case IR_GT:
1906 asm_guardcc(as, CC_EQ);
1907 emit_ai(as, PPCI_CMPWI, RID_RET, 2);
1908 default:
1909 asm_guardcc(as, (asm_compmap[ir->o] & 0xf));
1910 emit_ai(as, PPCI_CMPWI, RID_RET, 0);
1911 break;
1912 }
1913 asm_gencall(as, ci, args);
1914}
1915#endif
1916
1717#if LJ_HASFFI 1917#if LJ_HASFFI
1718/* 64 bit integer comparisons. */ 1918/* 64 bit integer comparisons. */
1719static void asm_comp64(ASMState *as, IRIns *ir) 1919static void asm_comp64(ASMState *as, IRIns *ir)
@@ -1743,47 +1943,89 @@ static void asm_comp64(ASMState *as, IRIns *ir)
1743/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ 1943/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
1744static void asm_hiop(ASMState *as, IRIns *ir) 1944static void asm_hiop(ASMState *as, IRIns *ir)
1745{ 1945{
1746#if LJ_HASFFI 1946#if LJ_HASFFI || LJ_SOFTFP
1747 /* HIOP is marked as a store because it needs its own DCE logic. */ 1947 /* HIOP is marked as a store because it needs its own DCE logic. */
1748 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 1948 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
1749 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 1949 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
1750 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ 1950 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
1751 as->curins--; /* Always skip the CONV. */ 1951 as->curins--; /* Always skip the CONV. */
1952#if LJ_HASFFI && !LJ_SOFTFP
1752 if (usehi || uselo) 1953 if (usehi || uselo)
1753 asm_conv64(as, ir); 1954 asm_conv64(as, ir);
1754 return; 1955 return;
1956#endif
1755 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ 1957 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
1756 as->curins--; /* Always skip the loword comparison. */ 1958 as->curins--; /* Always skip the loword comparison. */
1959#if LJ_SOFTFP
1960 if (!irt_isint(ir->t)) {
1961 asm_sfpcomp(as, ir-1);
1962 return;
1963 }
1964#endif
1965#if LJ_HASFFI
1757 asm_comp64(as, ir); 1966 asm_comp64(as, ir);
1967#endif
1758 return; 1968 return;
1969#if LJ_SOFTFP
1970 } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
1971 as->curins--; /* Always skip the loword min/max. */
1972 if (uselo || usehi)
1973 asm_sfpmin_max(as, ir-1);
1974 return;
1975#endif
1759 } else if ((ir-1)->o == IR_XSTORE) { 1976 } else if ((ir-1)->o == IR_XSTORE) {
1760 as->curins--; /* Handle both stores here. */ 1977 as->curins--; /* Handle both stores here. */
1761 if ((ir-1)->r != RID_SINK) { 1978 if ((ir-1)->r != RID_SINK) {
1762 asm_xstore(as, ir, 0); 1979 asm_xstore_(as, ir, 0);
1763 asm_xstore(as, ir-1, 4); 1980 asm_xstore_(as, ir-1, 4);
1764 } 1981 }
1765 return; 1982 return;
1766 } 1983 }
1767 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1984 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
1768 switch ((ir-1)->o) { 1985 switch ((ir-1)->o) {
1986#if LJ_HASFFI
1769 case IR_ADD: as->curins--; asm_add64(as, ir); break; 1987 case IR_ADD: as->curins--; asm_add64(as, ir); break;
1770 case IR_SUB: as->curins--; asm_sub64(as, ir); break; 1988 case IR_SUB: as->curins--; asm_sub64(as, ir); break;
1771 case IR_NEG: as->curins--; asm_neg64(as, ir); break; 1989 case IR_NEG: as->curins--; asm_neg64(as, ir); break;
1990#endif
1991#if LJ_SOFTFP
1992 case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1993 case IR_STRTO:
1994 if (!uselo)
1995 ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
1996 break;
1997#endif
1772 case IR_CALLN: 1998 case IR_CALLN:
1999 case IR_CALLS:
1773 case IR_CALLXS: 2000 case IR_CALLXS:
1774 if (!uselo) 2001 if (!uselo)
1775 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ 2002 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
1776 break; 2003 break;
2004#if LJ_SOFTFP
2005 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
2006#endif
1777 case IR_CNEWI: 2007 case IR_CNEWI:
1778 /* Nothing to do here. Handled by lo op itself. */ 2008 /* Nothing to do here. Handled by lo op itself. */
1779 break; 2009 break;
1780 default: lua_assert(0); break; 2010 default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
1781 } 2011 }
1782#else 2012#else
1783 UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ 2013 /* Unused without SOFTFP or FFI. */
2014 UNUSED(as); UNUSED(ir); lj_assertA(0, "unexpected HIOP");
1784#endif 2015#endif
1785} 2016}
1786 2017
2018/* -- Profiling ----------------------------------------------------------- */
2019
2020static void asm_prof(ASMState *as, IRIns *ir)
2021{
2022 UNUSED(ir);
2023 asm_guardcc(as, CC_NE);
2024 emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE);
2025 emit_lsglptr(as, PPCI_LBZ, RID_TMP,
2026 (int32_t)offsetof(global_State, hookmask));
2027}
2028
1787/* -- Stack handling ------------------------------------------------------ */ 2029/* -- Stack handling ------------------------------------------------------ */
1788 2030
1789/* Check Lua stack size for overflow. Use exit handler as fallback. */ 2031/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1805,7 +2047,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1805 emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); 2047 emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack));
1806 if (pbase == RID_TMP) 2048 if (pbase == RID_TMP)
1807 emit_getgl(as, RID_TMP, jit_base); 2049 emit_getgl(as, RID_TMP, jit_base);
1808 emit_getgl(as, tmp, jit_L); 2050 emit_getgl(as, tmp, cur_L);
1809 if (allow == RSET_EMPTY) /* Spill temp. register. */ 2051 if (allow == RSET_EMPTY) /* Spill temp. register. */
1810 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); 2052 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW);
1811} 2053}
@@ -1826,12 +2068,25 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
1826 if ((sn & SNAP_NORESTORE)) 2068 if ((sn & SNAP_NORESTORE))
1827 continue; 2069 continue;
1828 if (irt_isnum(ir->t)) { 2070 if (irt_isnum(ir->t)) {
2071#if LJ_SOFTFP
2072 Reg tmp;
2073 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
2074 /* LJ_SOFTFP: must be a number constant. */
2075 lj_assertA(irref_isk(ref), "unsplit FP op");
2076 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow);
2077 emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0));
2078 if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
2079 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
2080 emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
2081#else
1829 Reg src = ra_alloc1(as, ref, RSET_FPR); 2082 Reg src = ra_alloc1(as, ref, RSET_FPR);
1830 emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); 2083 emit_fai(as, PPCI_STFD, src, RID_BASE, ofs);
2084#endif
1831 } else { 2085 } else {
1832 Reg type; 2086 Reg type;
1833 RegSet allow = rset_exclude(RSET_GPR, RID_BASE); 2087 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
1834 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); 2088 lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
2089 "restore of IR type %d", irt_type(ir->t));
1835 if (!irt_ispri(ir->t)) { 2090 if (!irt_ispri(ir->t)) {
1836 Reg src = ra_alloc1(as, ref, allow); 2091 Reg src = ra_alloc1(as, ref, allow);
1837 rset_clear(allow, src); 2092 rset_clear(allow, src);
@@ -1840,6 +2095,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
1840 if ((sn & (SNAP_CONT|SNAP_FRAME))) { 2095 if ((sn & (SNAP_CONT|SNAP_FRAME))) {
1841 if (s == 0) continue; /* Do not overwrite link to previous frame. */ 2096 if (s == 0) continue; /* Do not overwrite link to previous frame. */
1842 type = ra_allock(as, (int32_t)(*flinks--), allow); 2097 type = ra_allock(as, (int32_t)(*flinks--), allow);
2098#if LJ_SOFTFP
2099 } else if ((sn & SNAP_SOFTFPNUM)) {
2100 type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
2101#endif
1843 } else { 2102 } else {
1844 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 2103 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
1845 } 2104 }
@@ -1847,7 +2106,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
1847 } 2106 }
1848 checkmclim(as); 2107 checkmclim(as);
1849 } 2108 }
1850 lua_assert(map + nent == flinks); 2109 lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
1851} 2110}
1852 2111
1853/* -- GC handling --------------------------------------------------------- */ 2112/* -- GC handling --------------------------------------------------------- */
@@ -1949,7 +2208,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
1949 as->mctop = p; 2208 as->mctop = p;
1950 } else { 2209 } else {
1951 /* Patch stack adjustment. */ 2210 /* Patch stack adjustment. */
1952 lua_assert(checki16(CFRAME_SIZE+spadj)); 2211 lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range");
1953 p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); 2212 p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj);
1954 p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; 2213 p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj;
1955 } 2214 }
@@ -1970,147 +2229,25 @@ static void asm_tail_prep(ASMState *as)
1970 } 2229 }
1971} 2230}
1972 2231
1973/* -- Instruction dispatch ------------------------------------------------ */
1974
1975/* Assemble a single instruction. */
1976static void asm_ir(ASMState *as, IRIns *ir)
1977{
1978 switch ((IROp)ir->o) {
1979 /* Miscellaneous ops. */
1980 case IR_LOOP: asm_loop(as); break;
1981 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1982 case IR_USE:
1983 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1984 case IR_PHI: asm_phi(as, ir); break;
1985 case IR_HIOP: asm_hiop(as, ir); break;
1986 case IR_GCSTEP: asm_gcstep(as, ir); break;
1987
1988 /* Guarded assertions. */
1989 case IR_EQ: case IR_NE:
1990 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1991 as->curins--;
1992 asm_href(as, ir-1, (IROp)ir->o);
1993 break;
1994 }
1995 /* fallthrough */
1996 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1997 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1998 case IR_ABC:
1999 asm_comp(as, ir);
2000 break;
2001
2002 case IR_RETF: asm_retf(as, ir); break;
2003
2004 /* Bit ops. */
2005 case IR_BNOT: asm_bitnot(as, ir); break;
2006 case IR_BSWAP: asm_bitswap(as, ir); break;
2007
2008 case IR_BAND: asm_bitand(as, ir); break;
2009 case IR_BOR: asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break;
2010 case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break;
2011
2012 case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break;
2013 case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break;
2014 case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break;
2015 case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31),
2016 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break;
2017 case IR_BROR: lua_assert(0); break;
2018
2019 /* Arithmetic ops. */
2020 case IR_ADD: asm_add(as, ir); break;
2021 case IR_SUB: asm_sub(as, ir); break;
2022 case IR_MUL: asm_mul(as, ir); break;
2023 case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break;
2024 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2025 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2026 case IR_NEG: asm_neg(as, ir); break;
2027
2028 case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break;
2029 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2030 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2031 case IR_MIN: asm_min_max(as, ir, 0); break;
2032 case IR_MAX: asm_min_max(as, ir, 1); break;
2033 case IR_FPMATH:
2034 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2035 break;
2036 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
2037 asm_fpunary(as, ir, PPCI_FSQRT);
2038 else
2039 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2040 break;
2041
2042 /* Overflow-checking arithmetic ops. */
2043 case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break;
2044 case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break;
2045 case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break;
2046
2047 /* Memory references. */
2048 case IR_AREF: asm_aref(as, ir); break;
2049 case IR_HREF: asm_href(as, ir, 0); break;
2050 case IR_HREFK: asm_hrefk(as, ir); break;
2051 case IR_NEWREF: asm_newref(as, ir); break;
2052 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2053 case IR_FREF: asm_fref(as, ir); break;
2054 case IR_STRREF: asm_strref(as, ir); break;
2055
2056 /* Loads and stores. */
2057 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2058 asm_ahuvload(as, ir);
2059 break;
2060 case IR_FLOAD: asm_fload(as, ir); break;
2061 case IR_XLOAD: asm_xload(as, ir); break;
2062 case IR_SLOAD: asm_sload(as, ir); break;
2063
2064 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2065 case IR_FSTORE: asm_fstore(as, ir); break;
2066 case IR_XSTORE: asm_xstore(as, ir, 0); break;
2067
2068 /* Allocations. */
2069 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2070 case IR_TNEW: asm_tnew(as, ir); break;
2071 case IR_TDUP: asm_tdup(as, ir); break;
2072 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2073
2074 /* Write barriers. */
2075 case IR_TBAR: asm_tbar(as, ir); break;
2076 case IR_OBAR: asm_obar(as, ir); break;
2077
2078 /* Type conversions. */
2079 case IR_CONV: asm_conv(as, ir); break;
2080 case IR_TOBIT: asm_tobit(as, ir); break;
2081 case IR_TOSTR: asm_tostr(as, ir); break;
2082 case IR_STRTO: asm_strto(as, ir); break;
2083
2084 /* Calls. */
2085 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2086 case IR_CALLXS: asm_callx(as, ir); break;
2087 case IR_CARG: break;
2088
2089 default:
2090 setintV(&as->J->errinfo, ir->o);
2091 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2092 break;
2093 }
2094}
2095
2096/* -- Trace setup --------------------------------------------------------- */ 2232/* -- Trace setup --------------------------------------------------------- */
2097 2233
2098/* Ensure there are enough stack slots for call arguments. */ 2234/* Ensure there are enough stack slots for call arguments. */
2099static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2235static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2100{ 2236{
2101 IRRef args[CCI_NARGS_MAX*2]; 2237 IRRef args[CCI_NARGS_MAX*2];
2102 uint32_t i, nargs = (int)CCI_NARGS(ci); 2238 uint32_t i, nargs = CCI_XNARGS(ci);
2103 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; 2239 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
2104 asm_collectargs(as, ir, ci, args); 2240 asm_collectargs(as, ir, ci, args);
2105 for (i = 0; i < nargs; i++) 2241 for (i = 0; i < nargs; i++)
2106 if (args[i] && irt_isfp(IR(args[i])->t)) { 2242 if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) {
2107 if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; 2243 if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1;
2108 } else { 2244 } else {
2109 if (ngpr > 0) ngpr--; else nslots++; 2245 if (ngpr > 0) ngpr--; else nslots++;
2110 } 2246 }
2111 if (nslots > as->evenspill) /* Leave room for args in stack slots. */ 2247 if (nslots > as->evenspill) /* Leave room for args in stack slots. */
2112 as->evenspill = nslots; 2248 as->evenspill = nslots;
2113 return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); 2249 return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) :
2250 REGSP_HINT(RID_RET);
2114} 2251}
2115 2252
2116static void asm_setup_target(ASMState *as) 2253static void asm_setup_target(ASMState *as)
@@ -2150,7 +2287,8 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2150 } else if ((ins & 0xfc000000u) == PPCI_B && 2287 } else if ((ins & 0xfc000000u) == PPCI_B &&
2151 ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) { 2288 ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) {
2152 ptrdiff_t delta = (char *)target - (char *)p; 2289 ptrdiff_t delta = (char *)target - (char *)p;
2153 lua_assert(((delta + 0x02000000) >> 26) == 0); 2290 lj_assertJ(((delta + 0x02000000) >> 26) == 0,
2291 "branch target out of range");
2154 *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu); 2292 *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu);
2155 if (!cstart) cstart = p; 2293 if (!cstart) cstart = p;
2156 } 2294 }
@@ -2158,7 +2296,8 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2158 /* Always patch long-range branch in exit stub itself. Except, if we can't. */ 2296 /* Always patch long-range branch in exit stub itself. Except, if we can't. */
2159 if (patchlong) { 2297 if (patchlong) {
2160 ptrdiff_t delta = (char *)target - (char *)px - clearso; 2298 ptrdiff_t delta = (char *)target - (char *)px - clearso;
2161 lua_assert(((delta + 0x02000000) >> 26) == 0); 2299 lj_assertJ(((delta + 0x02000000) >> 26) == 0,
2300 "branch target out of range");
2162 *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu); 2301 *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu);
2163 } 2302 }
2164 if (!cstart) cstart = px; 2303 if (!cstart) cstart = px;
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 7b75a66e..c8ed46d2 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -21,15 +21,17 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
21 } 21 }
22 /* Push the high byte of the exitno for each exit stub group. */ 22 /* Push the high byte of the exitno for each exit stub group. */
23 *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8); 23 *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8);
24#if !LJ_GC64
24 /* Store DISPATCH at original stack slot 0. Account for the two push ops. */ 25 /* Store DISPATCH at original stack slot 0. Account for the two push ops. */
25 *mxp++ = XI_MOVmi; 26 *mxp++ = XI_MOVmi;
26 *mxp++ = MODRM(XM_OFS8, 0, RID_ESP); 27 *mxp++ = MODRM(XM_OFS8, 0, RID_ESP);
27 *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP); 28 *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
28 *mxp++ = 2*sizeof(void *); 29 *mxp++ = 2*sizeof(void *);
29 *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; 30 *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4;
31#endif
30 /* Jump to exit handler which fills in the ExitState. */ 32 /* Jump to exit handler which fills in the ExitState. */
31 *mxp++ = XI_JMP; mxp += 4; 33 *mxp++ = XI_JMP; mxp += 4;
32 *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler); 34 *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, (MCode *)(void *)lj_vm_exit_handler);
33 /* Commit the code for this group (even if assembly fails later on). */ 35 /* Commit the code for this group (even if assembly fails later on). */
34 lj_mcode_commitbot(as->J, mxp); 36 lj_mcode_commitbot(as->J, mxp);
35 as->mcbot = mxp; 37 as->mcbot = mxp;
@@ -58,14 +60,18 @@ static void asm_guardcc(ASMState *as, int cc)
58 MCode *p = as->mcp; 60 MCode *p = as->mcp;
59 if (LJ_UNLIKELY(p == as->invmcp)) { 61 if (LJ_UNLIKELY(p == as->invmcp)) {
60 as->loopinv = 1; 62 as->loopinv = 1;
61 *(int32_t *)(p+1) = jmprel(p+5, target); 63 *(int32_t *)(p+1) = jmprel(as->J, p+5, target);
62 target = p; 64 target = p;
63 cc ^= 1; 65 cc ^= 1;
64 if (as->realign) { 66 if (as->realign) {
67 if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP))
68 as->mrm.ofs += 2; /* Fixup RIP offset for pending fused load. */
65 emit_sjcc(as, cc, target); 69 emit_sjcc(as, cc, target);
66 return; 70 return;
67 } 71 }
68 } 72 }
73 if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP))
74 as->mrm.ofs += 6; /* Fixup RIP offset for pending fused load. */
69 emit_jcc(as, cc, target); 75 emit_jcc(as, cc, target);
70} 76}
71 77
@@ -79,6 +85,15 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
79{ 85{
80 if (irref_isk(ref)) { 86 if (irref_isk(ref)) {
81 IRIns *ir = IR(ref); 87 IRIns *ir = IR(ref);
88#if LJ_GC64
89 if (ir->o == IR_KNULL || !irt_is64(ir->t)) {
90 *k = ir->i;
91 return 1;
92 } else if (checki32((int64_t)ir_k64(ir)->u64)) {
93 *k = (int32_t)ir_k64(ir)->u64;
94 return 1;
95 }
96#else
82 if (ir->o != IR_KINT64) { 97 if (ir->o != IR_KINT64) {
83 *k = ir->i; 98 *k = ir->i;
84 return 1; 99 return 1;
@@ -86,6 +101,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
86 *k = (int32_t)ir_kint64(ir)->u64; 101 *k = (int32_t)ir_kint64(ir)->u64;
87 return 1; 102 return 1;
88 } 103 }
104#endif
89 } 105 }
90 return 0; 106 return 0;
91} 107}
@@ -115,7 +131,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
115 as->mrm.ofs = 0; 131 as->mrm.ofs = 0;
116 if (irb->o == IR_FLOAD) { 132 if (irb->o == IR_FLOAD) {
117 IRIns *ira = IR(irb->op1); 133 IRIns *ira = IR(irb->op1);
118 lua_assert(irb->op2 == IRFL_TAB_ARRAY); 134 lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY");
119 /* We can avoid the FLOAD of t->array for colocated arrays. */ 135 /* We can avoid the FLOAD of t->array for colocated arrays. */
120 if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && 136 if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
121 !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) { 137 !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) {
@@ -134,7 +150,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
134static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow) 150static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow)
135{ 151{
136 IRIns *irx; 152 IRIns *irx;
137 lua_assert(ir->o == IR_AREF); 153 lj_assertA(ir->o == IR_AREF, "expected AREF");
138 as->mrm.base = (uint8_t)ra_alloc1(as, asm_fuseabase(as, ir->op1), allow); 154 as->mrm.base = (uint8_t)ra_alloc1(as, asm_fuseabase(as, ir->op1), allow);
139 irx = IR(ir->op2); 155 irx = IR(ir->op2);
140 if (irref_isk(ir->op2)) { 156 if (irref_isk(ir->op2)) {
@@ -185,14 +201,35 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
185 if (irref_isk(ir->op1)) { 201 if (irref_isk(ir->op1)) {
186 GCfunc *fn = ir_kfunc(IR(ir->op1)); 202 GCfunc *fn = ir_kfunc(IR(ir->op1));
187 GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; 203 GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
204#if LJ_GC64
205 int64_t ofs = dispofs(as, &uv->tv);
206 if (checki32(ofs) && checki32(ofs+4)) {
207 as->mrm.ofs = (int32_t)ofs;
208 as->mrm.base = RID_DISPATCH;
209 as->mrm.idx = RID_NONE;
210 return;
211 }
212#else
188 as->mrm.ofs = ptr2addr(&uv->tv); 213 as->mrm.ofs = ptr2addr(&uv->tv);
189 as->mrm.base = as->mrm.idx = RID_NONE; 214 as->mrm.base = as->mrm.idx = RID_NONE;
190 return; 215 return;
216#endif
191 } 217 }
192 break; 218 break;
219 case IR_TMPREF:
220#if LJ_GC64
221 as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->tmptv);
222 as->mrm.base = RID_DISPATCH;
223 as->mrm.idx = RID_NONE;
224#else
225 as->mrm.ofs = igcptr(&J2G(as->J)->tmptv);
226 as->mrm.base = as->mrm.idx = RID_NONE;
227#endif
228 return;
193 default: 229 default:
194 lua_assert(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO || 230 lj_assertA(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO ||
195 ir->o == IR_KKPTR); 231 ir->o == IR_KKPTR,
232 "bad IR op %d", ir->o);
196 break; 233 break;
197 } 234 }
198 } 235 }
@@ -204,26 +241,53 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
204/* Fuse FLOAD/FREF reference into memory operand. */ 241/* Fuse FLOAD/FREF reference into memory operand. */
205static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) 242static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow)
206{ 243{
207 lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF); 244 lj_assertA(ir->o == IR_FLOAD || ir->o == IR_FREF,
208 as->mrm.ofs = field_ofs[ir->op2]; 245 "bad IR op %d", ir->o);
209 as->mrm.idx = RID_NONE; 246 as->mrm.idx = RID_NONE;
247 if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
248#if LJ_GC64
249 as->mrm.ofs = (int32_t)(ir->op2 << 2) - GG_OFS(dispatch);
250 as->mrm.base = RID_DISPATCH;
251#else
252 as->mrm.ofs = (int32_t)(ir->op2 << 2) + ptr2addr(J2GG(as->J));
253 as->mrm.base = RID_NONE;
254#endif
255 return;
256 }
257 as->mrm.ofs = field_ofs[ir->op2];
210 if (irref_isk(ir->op1)) { 258 if (irref_isk(ir->op1)) {
211 as->mrm.ofs += IR(ir->op1)->i; 259 IRIns *op1 = IR(ir->op1);
260#if LJ_GC64
261 if (ir->op1 == REF_NIL) {
262 as->mrm.ofs -= GG_OFS(dispatch);
263 as->mrm.base = RID_DISPATCH;
264 return;
265 } else if (op1->o == IR_KPTR || op1->o == IR_KKPTR) {
266 intptr_t ofs = dispofs(as, ir_kptr(op1));
267 if (checki32(as->mrm.ofs + ofs)) {
268 as->mrm.ofs += (int32_t)ofs;
269 as->mrm.base = RID_DISPATCH;
270 return;
271 }
272 }
273#else
274 as->mrm.ofs += op1->i;
212 as->mrm.base = RID_NONE; 275 as->mrm.base = RID_NONE;
213 } else { 276 return;
214 as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); 277#endif
215 } 278 }
279 as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
216} 280}
217 281
218/* Fuse string reference into memory operand. */ 282/* Fuse string reference into memory operand. */
219static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) 283static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
220{ 284{
221 IRIns *irr; 285 IRIns *irr;
222 lua_assert(ir->o == IR_STRREF); 286 lj_assertA(ir->o == IR_STRREF, "bad IR op %d", ir->o);
223 as->mrm.base = as->mrm.idx = RID_NONE; 287 as->mrm.base = as->mrm.idx = RID_NONE;
224 as->mrm.scale = XM_SCALE1; 288 as->mrm.scale = XM_SCALE1;
225 as->mrm.ofs = sizeof(GCstr); 289 as->mrm.ofs = sizeof(GCstr);
226 if (irref_isk(ir->op1)) { 290 if (!LJ_GC64 && irref_isk(ir->op1)) {
227 as->mrm.ofs += IR(ir->op1)->i; 291 as->mrm.ofs += IR(ir->op1)->i;
228 } else { 292 } else {
229 Reg r = ra_alloc1(as, ir->op1, allow); 293 Reg r = ra_alloc1(as, ir->op1, allow);
@@ -255,10 +319,20 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
255 IRIns *ir = IR(ref); 319 IRIns *ir = IR(ref);
256 as->mrm.idx = RID_NONE; 320 as->mrm.idx = RID_NONE;
257 if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { 321 if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
322#if LJ_GC64
323 intptr_t ofs = dispofs(as, ir_kptr(ir));
324 if (checki32(ofs)) {
325 as->mrm.ofs = (int32_t)ofs;
326 as->mrm.base = RID_DISPATCH;
327 return;
328 }
329 } if (0) {
330#else
258 as->mrm.ofs = ir->i; 331 as->mrm.ofs = ir->i;
259 as->mrm.base = RID_NONE; 332 as->mrm.base = RID_NONE;
260 } else if (ir->o == IR_STRREF) { 333 } else if (ir->o == IR_STRREF) {
261 asm_fusestrref(as, ir, allow); 334 asm_fusestrref(as, ir, allow);
335#endif
262 } else { 336 } else {
263 as->mrm.ofs = 0; 337 as->mrm.ofs = 0;
264 if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) { 338 if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) {
@@ -301,7 +375,47 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
301 } 375 }
302} 376}
303 377
304/* Fuse load into memory operand. */ 378/* Fuse load of 64 bit IR constant into memory operand. */
379static Reg asm_fuseloadk64(ASMState *as, IRIns *ir)
380{
381 const uint64_t *k = &ir_k64(ir)->u64;
382 if (!LJ_GC64 || checki32((intptr_t)k)) {
383 as->mrm.ofs = ptr2addr(k);
384 as->mrm.base = RID_NONE;
385#if LJ_GC64
386 } else if (checki32(dispofs(as, k))) {
387 as->mrm.ofs = (int32_t)dispofs(as, k);
388 as->mrm.base = RID_DISPATCH;
389 } else if (checki32(mcpofs(as, k)) && checki32(mcpofs(as, k+1)) &&
390 checki32(mctopofs(as, k)) && checki32(mctopofs(as, k+1))) {
391 as->mrm.ofs = (int32_t)mcpofs(as, k);
392 as->mrm.base = RID_RIP;
393 } else { /* Intern 64 bit constant at bottom of mcode. */
394 if (ir->i) {
395 lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i),
396 "bad interned 64 bit constant");
397 } else {
398 while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
399 *(uint64_t*)as->mcbot = *k;
400 ir->i = (int32_t)(as->mctop - as->mcbot);
401 as->mcbot += 8;
402 as->mclim = as->mcbot + MCLIM_REDZONE;
403 lj_mcode_commitbot(as->J, as->mcbot);
404 }
405 as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i);
406 as->mrm.base = RID_RIP;
407#endif
408 }
409 as->mrm.idx = RID_NONE;
410 return RID_MRM;
411}
412
413/* Fuse load into memory operand.
414**
415** Important caveat: this may emit RIP-relative loads! So don't place any
416** code emitters between this function and the use of its result.
417** The only permitted exception is asm_guardcc().
418*/
305static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) 419static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
306{ 420{
307 IRIns *ir = IR(ref); 421 IRIns *ir = IR(ref);
@@ -319,27 +433,36 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
319 } 433 }
320 if (ir->o == IR_KNUM) { 434 if (ir->o == IR_KNUM) {
321 RegSet avail = as->freeset & ~as->modset & RSET_FPR; 435 RegSet avail = as->freeset & ~as->modset & RSET_FPR;
322 lua_assert(allow != RSET_EMPTY); 436 lj_assertA(allow != RSET_EMPTY, "no register allowed");
323 if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ 437 if (!(avail & (avail-1))) /* Fuse if less than two regs available. */
324 as->mrm.ofs = ptr2addr(ir_knum(ir)); 438 return asm_fuseloadk64(as, ir);
325 as->mrm.base = as->mrm.idx = RID_NONE;
326 return RID_MRM;
327 }
328 } else if (ref == REF_BASE || ir->o == IR_KINT64) { 439 } else if (ref == REF_BASE || ir->o == IR_KINT64) {
329 RegSet avail = as->freeset & ~as->modset & RSET_GPR; 440 RegSet avail = as->freeset & ~as->modset & RSET_GPR;
330 lua_assert(allow != RSET_EMPTY); 441 lj_assertA(allow != RSET_EMPTY, "no register allowed");
331 if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ 442 if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
332 as->mrm.ofs = ptr2addr(ref == REF_BASE ? (void *)&J2G(as->J)->jit_base : (void *)ir_kint64(ir)); 443 if (ref == REF_BASE) {
333 as->mrm.base = as->mrm.idx = RID_NONE; 444#if LJ_GC64
334 return RID_MRM; 445 as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->jit_base);
446 as->mrm.base = RID_DISPATCH;
447#else
448 as->mrm.ofs = ptr2addr(&J2G(as->J)->jit_base);
449 as->mrm.base = RID_NONE;
450#endif
451 as->mrm.idx = RID_NONE;
452 return RID_MRM;
453 } else {
454 return asm_fuseloadk64(as, ir);
455 }
335 } 456 }
336 } else if (mayfuse(as, ref)) { 457 } else if (mayfuse(as, ref)) {
337 RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; 458 RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
338 if (ir->o == IR_SLOAD) { 459 if (ir->o == IR_SLOAD) {
339 if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && 460 if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
340 noconflict(as, ref, IR_RETF, 0)) { 461 noconflict(as, ref, IR_RETF, 0) &&
462 !(LJ_GC64 && irt_isaddr(ir->t))) {
341 as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); 463 as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
342 as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0); 464 as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
465 (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
343 as->mrm.idx = RID_NONE; 466 as->mrm.idx = RID_NONE;
344 return RID_MRM; 467 return RID_MRM;
345 } 468 }
@@ -351,7 +474,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
351 return RID_MRM; 474 return RID_MRM;
352 } 475 }
353 } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { 476 } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
354 if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) { 477 if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) &&
478 !(LJ_GC64 && irt_isaddr(ir->t))) {
355 asm_fuseahuref(as, ir->op1, xallow); 479 asm_fuseahuref(as, ir->op1, xallow);
356 return RID_MRM; 480 return RID_MRM;
357 } 481 }
@@ -364,11 +488,15 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
364 asm_fusexref(as, ir->op1, xallow); 488 asm_fusexref(as, ir->op1, xallow);
365 return RID_MRM; 489 return RID_MRM;
366 } 490 }
367 } else if (ir->o == IR_VLOAD) { 491 } else if (ir->o == IR_VLOAD && !(LJ_GC64 && irt_isaddr(ir->t))) {
368 asm_fuseahuref(as, ir->op1, xallow); 492 asm_fuseahuref(as, ir->op1, xallow);
369 return RID_MRM; 493 return RID_MRM;
370 } 494 }
371 } 495 }
496 if (ir->o == IR_FLOAD && ir->op1 == REF_NIL) {
497 asm_fusefref(as, ir, RSET_EMPTY);
498 return RID_MRM;
499 }
372 if (!(as->freeset & allow) && !emit_canremat(ref) && 500 if (!(as->freeset & allow) && !emit_canremat(ref) &&
373 (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref))) 501 (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref)))
374 goto fusespill; 502 goto fusespill;
@@ -392,7 +520,7 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64)
392/* Count the required number of stack slots for a call. */ 520/* Count the required number of stack slots for a call. */
393static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) 521static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
394{ 522{
395 uint32_t i, nargs = CCI_NARGS(ci); 523 uint32_t i, nargs = CCI_XNARGS(ci);
396 int nslots = 0; 524 int nslots = 0;
397#if LJ_64 525#if LJ_64
398 if (LJ_ABI_WIN) { 526 if (LJ_ABI_WIN) {
@@ -425,7 +553,7 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
425/* Generate a call to a C function. */ 553/* Generate a call to a C function. */
426static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 554static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
427{ 555{
428 uint32_t n, nargs = CCI_NARGS(ci); 556 uint32_t n, nargs = CCI_XNARGS(ci);
429 int32_t ofs = STACKARG_OFS; 557 int32_t ofs = STACKARG_OFS;
430#if LJ_64 558#if LJ_64
431 uint32_t gprs = REGARG_GPRS; 559 uint32_t gprs = REGARG_GPRS;
@@ -485,13 +613,14 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
485 if (r) { /* Argument is in a register. */ 613 if (r) { /* Argument is in a register. */
486 if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { 614 if (r < RID_MAX_GPR && ref < ASMREF_TMP1) {
487#if LJ_64 615#if LJ_64
488 if (ir->o == IR_KINT64) 616 if (LJ_GC64 ? !(ir->o == IR_KINT || ir->o == IR_KNULL) : ir->o == IR_KINT64)
489 emit_loadu64(as, r, ir_kint64(ir)->u64); 617 emit_loadu64(as, r, ir_k64(ir)->u64);
490 else 618 else
491#endif 619#endif
492 emit_loadi(as, r, ir->i); 620 emit_loadi(as, r, ir->i);
493 } else { 621 } else {
494 lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ 622 /* Must have been evicted. */
623 lj_assertA(rset_test(as->freeset, r), "reg %d not free", r);
495 if (ra_hasreg(ir->r)) { 624 if (ra_hasreg(ir->r)) {
496 ra_noweak(as, ir->r); 625 ra_noweak(as, ir->r);
497 emit_movrr(as, ir, r, ir->r); 626 emit_movrr(as, ir, r, ir->r);
@@ -500,7 +629,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
500 } 629 }
501 } 630 }
502 } else if (irt_isfp(ir->t)) { /* FP argument is on stack. */ 631 } else if (irt_isfp(ir->t)) { /* FP argument is on stack. */
503 lua_assert(!(irt_isfloat(ir->t) && irref_isk(ref))); /* No float k. */ 632 lj_assertA(!(irt_isfloat(ir->t) && irref_isk(ref)),
633 "unexpected float constant");
504 if (LJ_32 && (ofs & 4) && irref_isk(ref)) { 634 if (LJ_32 && (ofs & 4) && irref_isk(ref)) {
505 /* Split stores for unaligned FP consts. */ 635 /* Split stores for unaligned FP consts. */
506 emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); 636 emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo);
@@ -560,7 +690,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
560 if (ra_hasreg(dest)) { 690 if (ra_hasreg(dest)) {
561 ra_free(as, dest); 691 ra_free(as, dest);
562 ra_modified(as, dest); 692 ra_modified(as, dest);
563 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 693 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS,
564 dest, RID_ESP, ofs); 694 dest, RID_ESP, ofs);
565 } 695 }
566 if ((ci->flags & CCI_CASTU64)) { 696 if ((ci->flags & CCI_CASTU64)) {
@@ -576,7 +706,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
576 ra_destpair(as, ir); 706 ra_destpair(as, ir);
577#endif 707#endif
578 } else { 708 } else {
579 lua_assert(!irt_ispri(ir->t)); 709 lj_assertA(!irt_ispri(ir->t), "PRI dest");
580 ra_destreg(as, ir, RID_RET); 710 ra_destreg(as, ir, RID_RET);
581 } 711 }
582 } else if (LJ_32 && irt_isfp(ir->t) && !(ci->flags & CCI_CASTU64)) { 712 } else if (LJ_32 && irt_isfp(ir->t) && !(ci->flags & CCI_CASTU64)) {
@@ -584,15 +714,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
584 } 714 }
585} 715}
586 716
587static void asm_call(ASMState *as, IRIns *ir)
588{
589 IRRef args[CCI_NARGS_MAX];
590 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
591 asm_collectargs(as, ir, ci, args);
592 asm_setupresult(as, ir, ci);
593 asm_gencall(as, ci, args);
594}
595
596/* Return a constant function pointer or NULL for indirect calls. */ 717/* Return a constant function pointer or NULL for indirect calls. */
597static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) 718static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
598{ 719{
@@ -651,17 +772,40 @@ static void asm_callx(ASMState *as, IRIns *ir)
651static void asm_retf(ASMState *as, IRIns *ir) 772static void asm_retf(ASMState *as, IRIns *ir)
652{ 773{
653 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 774 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
775#if LJ_FR2
776 Reg rpc = ra_scratch(as, rset_exclude(RSET_GPR, base));
777#endif
654 void *pc = ir_kptr(IR(ir->op2)); 778 void *pc = ir_kptr(IR(ir->op2));
655 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 779 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
656 as->topslot -= (BCReg)delta; 780 as->topslot -= (BCReg)delta;
657 if ((int32_t)as->topslot < 0) as->topslot = 0; 781 if ((int32_t)as->topslot < 0) as->topslot = 0;
658 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 782 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
659 emit_setgl(as, base, jit_base); 783 emit_setgl(as, base, jit_base);
660 emit_addptr(as, base, -8*delta); 784 emit_addptr(as, base, -8*delta);
661 asm_guardcc(as, CC_NE); 785 asm_guardcc(as, CC_NE);
786#if LJ_FR2
787 emit_rmro(as, XO_CMP, rpc|REX_GC64, base, -8);
788 emit_loadu64(as, rpc, u64ptr(pc));
789#else
662 emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); 790 emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc));
791#endif
663} 792}
664 793
794/* -- Buffer operations --------------------------------------------------- */
795
796#if LJ_HASBUFFER
797static void asm_bufhdr_write(ASMState *as, Reg sb)
798{
799 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
800 IRIns irgc;
801 irgc.ot = IRT(0, IRT_PGC); /* GC type. */
802 emit_storeofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
803 emit_opgl(as, XO_ARITH(XOg_OR), tmp|REX_GC64, cur_L);
804 emit_gri(as, XG_ARITHi(XOg_AND), tmp, SBUF_MASK_FLAG);
805 emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
806}
807#endif
808
665/* -- Type conversions ---------------------------------------------------- */ 809/* -- Type conversions ---------------------------------------------------- */
666 810
667static void asm_tointg(ASMState *as, IRIns *ir, Reg left) 811static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
@@ -672,8 +816,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
672 asm_guardcc(as, CC_NE); 816 asm_guardcc(as, CC_NE);
673 emit_rr(as, XO_UCOMISD, left, tmp); 817 emit_rr(as, XO_UCOMISD, left, tmp);
674 emit_rr(as, XO_CVTSI2SD, tmp, dest); 818 emit_rr(as, XO_CVTSI2SD, tmp, dest);
675 if (!(as->flags & JIT_F_SPLIT_XMM)) 819 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
676 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
677 emit_rr(as, XO_CVTTSD2SI, dest, left); 820 emit_rr(as, XO_CVTTSD2SI, dest, left);
678 /* Can't fuse since left is needed twice. */ 821 /* Can't fuse since left is needed twice. */
679} 822}
@@ -684,8 +827,9 @@ static void asm_tobit(ASMState *as, IRIns *ir)
684 Reg tmp = ra_noreg(IR(ir->op1)->r) ? 827 Reg tmp = ra_noreg(IR(ir->op1)->r) ?
685 ra_alloc1(as, ir->op1, RSET_FPR) : 828 ra_alloc1(as, ir->op1, RSET_FPR) :
686 ra_scratch(as, RSET_FPR); 829 ra_scratch(as, RSET_FPR);
687 Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp)); 830 Reg right;
688 emit_rr(as, XO_MOVDto, tmp, dest); 831 emit_rr(as, XO_MOVDto, tmp, dest);
832 right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp));
689 emit_mrm(as, XO_ADDSD, tmp, right); 833 emit_mrm(as, XO_ADDSD, tmp, right);
690 ra_left(as, tmp, ir->op1); 834 ra_left(as, tmp, ir->op1);
691} 835}
@@ -696,8 +840,10 @@ static void asm_conv(ASMState *as, IRIns *ir)
696 int st64 = (st == IRT_I64 || st == IRT_U64 || (LJ_64 && st == IRT_P64)); 840 int st64 = (st == IRT_I64 || st == IRT_U64 || (LJ_64 && st == IRT_P64));
697 int stfp = (st == IRT_NUM || st == IRT_FLOAT); 841 int stfp = (st == IRT_NUM || st == IRT_FLOAT);
698 IRRef lref = ir->op1; 842 IRRef lref = ir->op1;
699 lua_assert(irt_type(ir->t) != st); 843 lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
700 lua_assert(!(LJ_32 && (irt_isint64(ir->t) || st64))); /* Handled by SPLIT. */ 844 lj_assertA(!(LJ_32 && (irt_isint64(ir->t) || st64)),
845 "IR %04d has unsplit 64 bit type",
846 (int)(ir - as->ir) - REF_BIAS);
701 if (irt_isfp(ir->t)) { 847 if (irt_isfp(ir->t)) {
702 Reg dest = ra_dest(as, ir, RSET_FPR); 848 Reg dest = ra_dest(as, ir, RSET_FPR);
703 if (stfp) { /* FP to FP conversion. */ 849 if (stfp) { /* FP to FP conversion. */
@@ -706,13 +852,13 @@ static void asm_conv(ASMState *as, IRIns *ir)
706 if (left == dest) return; /* Avoid the XO_XORPS. */ 852 if (left == dest) return; /* Avoid the XO_XORPS. */
707 } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */ 853 } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */
708 /* number = (2^52+2^51 .. u32) - (2^52+2^51) */ 854 /* number = (2^52+2^51 .. u32) - (2^52+2^51) */
709 cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000)); 855 cTValue *k = &as->J->k64[LJ_K64_TOBIT];
710 Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); 856 Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
711 if (irt_isfloat(ir->t)) 857 if (irt_isfloat(ir->t))
712 emit_rr(as, XO_CVTSD2SS, dest, dest); 858 emit_rr(as, XO_CVTSD2SS, dest, dest);
713 emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ 859 emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */
714 emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ 860 emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */
715 emit_loadn(as, bias, k); 861 emit_rma(as, XO_MOVSD, bias, k);
716 emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); 862 emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR));
717 return; 863 return;
718 } else { /* Integer to FP conversion. */ 864 } else { /* Integer to FP conversion. */
@@ -721,7 +867,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
721 asm_fuseloadm(as, lref, RSET_GPR, st64); 867 asm_fuseloadm(as, lref, RSET_GPR, st64);
722 if (LJ_64 && st == IRT_U64) { 868 if (LJ_64 && st == IRT_U64) {
723 MCLabel l_end = emit_label(as); 869 MCLabel l_end = emit_label(as);
724 const void *k = lj_ir_k64_find(as->J, U64x(43f00000,00000000)); 870 cTValue *k = &as->J->k64[LJ_K64_2P64];
725 emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */ 871 emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */
726 emit_sjcc(as, CC_NS, l_end); 872 emit_sjcc(as, CC_NS, l_end);
727 emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */ 873 emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */
@@ -729,18 +875,16 @@ static void asm_conv(ASMState *as, IRIns *ir)
729 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, 875 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
730 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); 876 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
731 } 877 }
732 if (!(as->flags & JIT_F_SPLIT_XMM)) 878 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
733 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
734 } else if (stfp) { /* FP to integer conversion. */ 879 } else if (stfp) { /* FP to integer conversion. */
735 if (irt_isguard(ir->t)) { 880 if (irt_isguard(ir->t)) {
736 /* Checked conversions are only supported from number to int. */ 881 /* Checked conversions are only supported from number to int. */
737 lua_assert(irt_isint(ir->t) && st == IRT_NUM); 882 lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
883 "bad type for checked CONV");
738 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 884 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
739 } else { 885 } else {
740 Reg dest = ra_dest(as, ir, RSET_GPR); 886 Reg dest = ra_dest(as, ir, RSET_GPR);
741 x86Op op = st == IRT_NUM ? 887 x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
742 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) :
743 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI);
744 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { 888 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
745 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ 889 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
746 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ 890 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
@@ -751,30 +895,27 @@ static void asm_conv(ASMState *as, IRIns *ir)
751 emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); 895 emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000);
752 emit_rr(as, op, dest|REX_64, tmp); 896 emit_rr(as, op, dest|REX_64, tmp);
753 if (st == IRT_NUM) 897 if (st == IRT_NUM)
754 emit_rma(as, XO_ADDSD, tmp, lj_ir_k64_find(as->J, 898 emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]);
755 LJ_64 ? U64x(c3f00000,00000000) : U64x(c1e00000,00000000)));
756 else 899 else
757 emit_rma(as, XO_ADDSS, tmp, lj_ir_k64_find(as->J, 900 emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]);
758 LJ_64 ? U64x(00000000,df800000) : U64x(00000000,cf000000)));
759 emit_sjcc(as, CC_NS, l_end); 901 emit_sjcc(as, CC_NS, l_end);
760 emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */ 902 emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */
761 emit_rr(as, op, dest|REX_64, tmp); 903 emit_rr(as, op, dest|REX_64, tmp);
762 ra_left(as, tmp, lref); 904 ra_left(as, tmp, lref);
763 } else { 905 } else {
764 Reg left = asm_fuseload(as, lref, RSET_FPR);
765 if (LJ_64 && irt_isu32(ir->t)) 906 if (LJ_64 && irt_isu32(ir->t))
766 emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ 907 emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */
767 emit_mrm(as, op, 908 emit_mrm(as, op,
768 dest|((LJ_64 && 909 dest|((LJ_64 &&
769 (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), 910 (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0),
770 left); 911 asm_fuseload(as, lref, RSET_FPR));
771 } 912 }
772 } 913 }
773 } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ 914 } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
774 Reg left, dest = ra_dest(as, ir, RSET_GPR); 915 Reg left, dest = ra_dest(as, ir, RSET_GPR);
775 RegSet allow = RSET_GPR; 916 RegSet allow = RSET_GPR;
776 x86Op op; 917 x86Op op;
777 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); 918 lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
778 if (st == IRT_I8) { 919 if (st == IRT_I8) {
779 op = XO_MOVSXb; allow = RSET_GPR8; dest |= FORCE_REX; 920 op = XO_MOVSXb; allow = RSET_GPR8; dest |= FORCE_REX;
780 } else if (st == IRT_U8) { 921 } else if (st == IRT_U8) {
@@ -808,7 +949,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
808 } 949 }
809 } else { 950 } else {
810 Reg dest = ra_dest(as, ir, RSET_GPR); 951 Reg dest = ra_dest(as, ir, RSET_GPR);
811 if (st64) { 952 if (st64 && !(ir->op2 & IRCONV_NONE)) {
812 Reg left = asm_fuseload(as, lref, RSET_GPR); 953 Reg left = asm_fuseload(as, lref, RSET_GPR);
813 /* This is either a 32 bit reg/reg mov which zeroes the hiword 954 /* This is either a 32 bit reg/reg mov which zeroes the hiword
814 ** or a load of the loword from a 64 bit address. 955 ** or a load of the loword from a 64 bit address.
@@ -834,20 +975,18 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
834 if (ra_hasreg(dest)) { 975 if (ra_hasreg(dest)) {
835 ra_free(as, dest); 976 ra_free(as, dest);
836 ra_modified(as, dest); 977 ra_modified(as, dest);
837 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 978 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs);
838 dest, RID_ESP, ofs);
839 } 979 }
840 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, 980 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
841 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); 981 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
842 if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) { 982 if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) {
843 /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */ 983 /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */
844 MCLabel l_end = emit_label(as); 984 MCLabel l_end = emit_label(as);
845 emit_rma(as, XO_FADDq, XOg_FADDq, 985 emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_2P64]);
846 lj_ir_k64_find(as->J, U64x(43f00000,00000000)));
847 emit_sjcc(as, CC_NS, l_end); 986 emit_sjcc(as, CC_NS, l_end);
848 emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ 987 emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */
849 } else { 988 } else {
850 lua_assert(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64); 989 lj_assertA(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64, "bad type for CONV");
851 } 990 }
852 emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0); 991 emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0);
853 /* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */ 992 /* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */
@@ -861,9 +1000,8 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
861 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); 1000 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
862 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); 1001 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
863 Reg lo, hi; 1002 Reg lo, hi;
864 lua_assert(st == IRT_NUM || st == IRT_FLOAT); 1003 lj_assertA(st == IRT_NUM || st == IRT_FLOAT, "bad type for CONV");
865 lua_assert(dt == IRT_I64 || dt == IRT_U64); 1004 lj_assertA(dt == IRT_I64 || dt == IRT_U64, "bad type for CONV");
866 lua_assert(((ir-1)->op2 & IRCONV_TRUNC));
867 hi = ra_dest(as, ir, RSET_GPR); 1005 hi = ra_dest(as, ir, RSET_GPR);
868 lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); 1006 lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
869 if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); 1007 if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
@@ -884,8 +1022,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
884 emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); 1022 emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
885 else 1023 else
886 emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); 1024 emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0);
887 emit_rma(as, XO_FADDq, XOg_FADDq, 1025 emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]);
888 lj_ir_k64_find(as->J, U64x(c3f00000,00000000)));
889 emit_sjcc(as, CC_NS, l_pop); 1026 emit_sjcc(as, CC_NS, l_pop);
890 emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ 1027 emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */
891 } 1028 }
@@ -906,6 +1043,14 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
906 st == IRT_NUM ? XOg_FLDq: XOg_FLDd, 1043 st == IRT_NUM ? XOg_FLDq: XOg_FLDd,
907 asm_fuseload(as, ir->op1, RSET_EMPTY)); 1044 asm_fuseload(as, ir->op1, RSET_EMPTY));
908} 1045}
1046
1047static void asm_conv64(ASMState *as, IRIns *ir)
1048{
1049 if (irt_isfp(ir->t))
1050 asm_conv_fp_int64(as, ir);
1051 else
1052 asm_conv_int64_fp(as, ir);
1053}
909#endif 1054#endif
910 1055
911static void asm_strto(ASMState *as, IRIns *ir) 1056static void asm_strto(ASMState *as, IRIns *ir)
@@ -927,54 +1072,61 @@ static void asm_strto(ASMState *as, IRIns *ir)
927 RID_ESP, sps_scale(ir->s)); 1072 RID_ESP, sps_scale(ir->s));
928} 1073}
929 1074
930static void asm_tostr(ASMState *as, IRIns *ir) 1075/* -- Memory references --------------------------------------------------- */
1076
1077/* Get pointer to TValue. */
1078static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
931{ 1079{
932 IRIns *irl = IR(ir->op1); 1080 if ((mode & IRTMPREF_IN1)) {
933 IRRef args[2]; 1081 IRIns *ir = IR(ref);
934 args[0] = ASMREF_L; 1082 if (irt_isnum(ir->t)) {
935 as->gcsteps++; 1083 if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) {
936 if (irt_isnum(irl->t)) { 1084 /* Use the number constant itself as a TValue. */
937 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; 1085 emit_loada(as, dest, ir_knum(ir));
938 args[1] = ASMREF_TMP1; /* const lua_Number * */ 1086 return;
939 asm_setupresult(as, ir, ci); /* GCstr * */ 1087 }
940 asm_gencall(as, ci, args); 1088 emit_rmro(as, XO_MOVSDto, ra_alloc1(as, ref, RSET_FPR), dest, 0);
941 emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64, 1089 } else {
942 RID_ESP, ra_spill(as, irl)); 1090#if LJ_GC64
943 } else { 1091 if (irref_isk(ref)) {
944 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; 1092 TValue k;
945 args[1] = ir->op1; /* int32_t k */ 1093 lj_ir_kvalue(as->J->L, &k, ir);
946 asm_setupresult(as, ir, ci); /* GCstr * */ 1094 emit_movmroi(as, dest, 4, k.u32.hi);
947 asm_gencall(as, ci, args); 1095 emit_movmroi(as, dest, 0, k.u32.lo);
1096 } else {
1097 /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
1098 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
1099 if (irt_is64(ir->t)) {
1100 emit_u32(as, irt_toitype(ir->t) << 15);
1101 emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4);
1102 } else {
1103 emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15));
1104 }
1105 emit_movtomro(as, REX_64IR(ir, src), dest, 0);
1106 }
1107#else
1108 if (!irref_isk(ref)) {
1109 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
1110 emit_movtomro(as, REX_64IR(ir, src), dest, 0);
1111 } else if (!irt_ispri(ir->t)) {
1112 emit_movmroi(as, dest, 0, ir->i);
1113 }
1114 if (!(LJ_64 && irt_islightud(ir->t)))
1115 emit_movmroi(as, dest, 4, irt_toitype(ir->t));
1116#endif
1117 }
948 } 1118 }
1119 emit_loada(as, dest, &J2G(as->J)->tmptv); /* g->tmptv holds the TValue(s). */
949} 1120}
950 1121
951/* -- Memory references --------------------------------------------------- */
952
953static void asm_aref(ASMState *as, IRIns *ir) 1122static void asm_aref(ASMState *as, IRIns *ir)
954{ 1123{
955 Reg dest = ra_dest(as, ir, RSET_GPR); 1124 Reg dest = ra_dest(as, ir, RSET_GPR);
956 asm_fusearef(as, ir, RSET_GPR); 1125 asm_fusearef(as, ir, RSET_GPR);
957 if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0)) 1126 if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0))
958 emit_mrm(as, XO_LEA, dest, RID_MRM); 1127 emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM);
959 else if (as->mrm.base != dest) 1128 else if (as->mrm.base != dest)
960 emit_rr(as, XO_MOV, dest, as->mrm.base); 1129 emit_rr(as, XO_MOV, dest|REX_GC64, as->mrm.base);
961}
962
963/* Merge NE(HREF, niltv) check. */
964static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
965{
966 /* Assumes nothing else generates NE of HREF. */
967 if ((ir[1].o == IR_NE || ir[1].o == IR_EQ) && ir[1].op1 == as->curins &&
968 ra_hasreg(ir->r)) {
969 MCode *p = as->mcp;
970 p += (LJ_64 && *p != XI_ARITHi) ? 7+6 : 6+6;
971 /* Ensure no loop branch inversion happened. */
972 if (p[-6] == 0x0f && p[-5] == XI_JCCn+(CC_NE^(ir[1].o & 1))) {
973 as->mcp = p; /* Kill cmp reg, imm32 + jz exit. */
974 return p + *(int32_t *)(p-4); /* Return exit address. */
975 }
976 }
977 return NULL;
978} 1130}
979 1131
980/* Inlined hash lookup. Specialized for key type and for const keys. 1132/* Inlined hash lookup. Specialized for key type and for const keys.
@@ -985,10 +1137,10 @@ static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
985** } while ((n = nextnode(n))); 1137** } while ((n = nextnode(n)));
986** return niltv(L); 1138** return niltv(L);
987*/ 1139*/
988static void asm_href(ASMState *as, IRIns *ir) 1140static void asm_href(ASMState *as, IRIns *ir, IROp merge)
989{ 1141{
990 MCode *nilexit = merge_href_niltv(as, ir); /* Do this before any restores. */
991 RegSet allow = RSET_GPR; 1142 RegSet allow = RSET_GPR;
1143 int destused = ra_used(ir);
992 Reg dest = ra_dest(as, ir, allow); 1144 Reg dest = ra_dest(as, ir, allow);
993 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); 1145 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
994 Reg key = RID_NONE, tmp = RID_NONE; 1146 Reg key = RID_NONE, tmp = RID_NONE;
@@ -1001,28 +1153,26 @@ static void asm_href(ASMState *as, IRIns *ir)
1001 if (!isk) { 1153 if (!isk) {
1002 rset_clear(allow, tab); 1154 rset_clear(allow, tab);
1003 key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); 1155 key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
1004 if (!irt_isstr(kt)) 1156 if (LJ_GC64 || !irt_isstr(kt))
1005 tmp = ra_scratch(as, rset_exclude(allow, key)); 1157 tmp = ra_scratch(as, rset_exclude(allow, key));
1006 } 1158 }
1007 1159
1008 /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */ 1160 /* Key not found in chain: jump to exit (if merged) or load niltv. */
1009 l_end = emit_label(as); 1161 l_end = emit_label(as);
1010 if (nilexit && ir[1].o == IR_NE) { 1162 if (merge == IR_NE)
1011 emit_jcc(as, CC_E, nilexit); /* XI_JMP is not found by lj_asm_patchexit. */ 1163 asm_guardcc(as, CC_E); /* XI_JMP is not found by lj_asm_patchexit. */
1012 nilexit = NULL; 1164 else if (destused)
1013 } else {
1014 emit_loada(as, dest, niltvg(J2G(as->J))); 1165 emit_loada(as, dest, niltvg(J2G(as->J)));
1015 }
1016 1166
1017 /* Follow hash chain until the end. */ 1167 /* Follow hash chain until the end. */
1018 l_loop = emit_sjcc_label(as, CC_NZ); 1168 l_loop = emit_sjcc_label(as, CC_NZ);
1019 emit_rr(as, XO_TEST, dest, dest); 1169 emit_rr(as, XO_TEST, dest|REX_GC64, dest);
1020 emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next)); 1170 emit_rmro(as, XO_MOV, dest|REX_GC64, dest, offsetof(Node, next));
1021 l_next = emit_label(as); 1171 l_next = emit_label(as);
1022 1172
1023 /* Type and value comparison. */ 1173 /* Type and value comparison. */
1024 if (nilexit) 1174 if (merge == IR_EQ)
1025 emit_jcc(as, CC_E, nilexit); 1175 asm_guardcc(as, CC_E);
1026 else 1176 else
1027 emit_sjcc(as, CC_E, l_end); 1177 emit_sjcc(as, CC_E, l_end);
1028 if (irt_isnum(kt)) { 1178 if (irt_isnum(kt)) {
@@ -1038,7 +1188,7 @@ static void asm_href(ASMState *as, IRIns *ir)
1038 emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); 1188 emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n));
1039 emit_sjcc(as, CC_AE, l_next); 1189 emit_sjcc(as, CC_AE, l_next);
1040 /* The type check avoids NaN penalties and complaints from Valgrind. */ 1190 /* The type check avoids NaN penalties and complaints from Valgrind. */
1041#if LJ_64 1191#if LJ_64 && !LJ_GC64
1042 emit_u32(as, LJ_TISNUM); 1192 emit_u32(as, LJ_TISNUM);
1043 emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); 1193 emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
1044#else 1194#else
@@ -1046,13 +1196,31 @@ static void asm_href(ASMState *as, IRIns *ir)
1046 emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); 1196 emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
1047#endif 1197#endif
1048 } 1198 }
1049#if LJ_64 1199#if LJ_64 && !LJ_GC64
1050 } else if (irt_islightud(kt)) { 1200 } else if (irt_islightud(kt)) {
1051 emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64)); 1201 emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64));
1052#endif 1202#endif
1203#if LJ_GC64
1204 } else if (irt_isaddr(kt)) {
1205 if (isk) {
1206 TValue k;
1207 k.u64 = ((uint64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
1208 emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo),
1209 k.u32.lo);
1210 emit_sjcc(as, CC_NE, l_next);
1211 emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi),
1212 k.u32.hi);
1213 } else {
1214 emit_rmro(as, XO_CMP, tmp|REX_64, dest, offsetof(Node, key.u64));
1215 }
1216 } else {
1217 lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
1218 emit_u32(as, (irt_toitype(kt)<<15)|0x7fff);
1219 emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
1220#else
1053 } else { 1221 } else {
1054 if (!irt_ispri(kt)) { 1222 if (!irt_ispri(kt)) {
1055 lua_assert(irt_isaddr(kt)); 1223 lj_assertA(irt_isaddr(kt), "bad HREF key type");
1056 if (isk) 1224 if (isk)
1057 emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr), 1225 emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr),
1058 ptr2addr(ir_kgc(irkey))); 1226 ptr2addr(ir_kgc(irkey)));
@@ -1060,31 +1228,33 @@ static void asm_href(ASMState *as, IRIns *ir)
1060 emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr)); 1228 emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr));
1061 emit_sjcc(as, CC_NE, l_next); 1229 emit_sjcc(as, CC_NE, l_next);
1062 } 1230 }
1063 lua_assert(!irt_isnil(kt)); 1231 lj_assertA(!irt_isnil(kt), "bad HREF key type");
1064 emit_i8(as, irt_toitype(kt)); 1232 emit_i8(as, irt_toitype(kt));
1065 emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); 1233 emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
1234#endif
1066 } 1235 }
1067 emit_sfixup(as, l_loop); 1236 emit_sfixup(as, l_loop);
1068 checkmclim(as); 1237 checkmclim(as);
1238#if LJ_GC64
1239 if (!isk && irt_isaddr(kt)) {
1240 emit_rr(as, XO_OR, tmp|REX_64, key);
1241 emit_loadu64(as, tmp, (uint64_t)irt_toitype(kt) << 47);
1242 }
1243#endif
1069 1244
1070 /* Load main position relative to tab->node into dest. */ 1245 /* Load main position relative to tab->node into dest. */
1071 khash = isk ? ir_khash(irkey) : 1; 1246 khash = isk ? ir_khash(as, irkey) : 1;
1072 if (khash == 0) { 1247 if (khash == 0) {
1073 emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node)); 1248 emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node));
1074 } else { 1249 } else {
1075 emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node)); 1250 emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node));
1076 if ((as->flags & JIT_F_PREFER_IMUL)) { 1251 emit_shifti(as, XOg_SHL, dest, 3);
1077 emit_i8(as, sizeof(Node)); 1252 emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0);
1078 emit_rr(as, XO_IMULi8, dest, dest);
1079 } else {
1080 emit_shifti(as, XOg_SHL, dest, 3);
1081 emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0);
1082 }
1083 if (isk) { 1253 if (isk) {
1084 emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash); 1254 emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash);
1085 emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); 1255 emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
1086 } else if (irt_isstr(kt)) { 1256 } else if (irt_isstr(kt)) {
1087 emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, hash)); 1257 emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, sid));
1088 emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); 1258 emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
1089 } else { /* Must match with hashrot() in lj_tab.c. */ 1259 } else { /* Must match with hashrot() in lj_tab.c. */
1090 emit_rmro(as, XO_ARITH(XOg_AND), dest, tab, offsetof(GCtab, hmask)); 1260 emit_rmro(as, XO_ARITH(XOg_AND), dest, tab, offsetof(GCtab, hmask));
@@ -1107,7 +1277,19 @@ static void asm_href(ASMState *as, IRIns *ir)
1107#endif 1277#endif
1108 } else { 1278 } else {
1109 emit_rr(as, XO_MOV, tmp, key); 1279 emit_rr(as, XO_MOV, tmp, key);
1280#if LJ_GC64
1281 checkmclim(as);
1282 emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15);
1283 if ((as->flags & JIT_F_BMI2)) {
1284 emit_i8(as, 32);
1285 emit_mrm(as, XV_RORX|VEX_64, dest, key);
1286 } else {
1287 emit_shifti(as, XOg_SHR|REX_64, dest, 32);
1288 emit_rr(as, XO_MOV, dest|REX_64, key|REX_64);
1289 }
1290#else
1110 emit_rmro(as, XO_LEA, dest, key, HASH_BIAS); 1291 emit_rmro(as, XO_LEA, dest, key, HASH_BIAS);
1292#endif
1111 } 1293 }
1112 } 1294 }
1113 } 1295 }
@@ -1123,15 +1305,15 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1123#if !LJ_64 1305#if !LJ_64
1124 MCLabel l_exit; 1306 MCLabel l_exit;
1125#endif 1307#endif
1126 lua_assert(ofs % sizeof(Node) == 0); 1308 lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
1127 if (ra_hasreg(dest)) { 1309 if (ra_hasreg(dest)) {
1128 if (ofs != 0) { 1310 if (ofs != 0) {
1129 if (dest == node && !(as->flags & JIT_F_LEA_AGU)) 1311 if (dest == node)
1130 emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs); 1312 emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs);
1131 else 1313 else
1132 emit_rmro(as, XO_LEA, dest, node, ofs); 1314 emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs);
1133 } else if (dest != node) { 1315 } else if (dest != node) {
1134 emit_rr(as, XO_MOV, dest, node); 1316 emit_rr(as, XO_MOV, dest|REX_GC64, node);
1135 } 1317 }
1136 } 1318 }
1137 asm_guardcc(as, CC_NE); 1319 asm_guardcc(as, CC_NE);
@@ -1140,16 +1322,28 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1140 Reg key = ra_scratch(as, rset_exclude(RSET_GPR, node)); 1322 Reg key = ra_scratch(as, rset_exclude(RSET_GPR, node));
1141 emit_rmro(as, XO_CMP, key|REX_64, node, 1323 emit_rmro(as, XO_CMP, key|REX_64, node,
1142 ofs + (int32_t)offsetof(Node, key.u64)); 1324 ofs + (int32_t)offsetof(Node, key.u64));
1143 lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t)); 1325 lj_assertA(irt_isnum(irkey->t) || irt_isgcv(irkey->t),
1326 "bad HREFK key type");
1144 /* Assumes -0.0 is already canonicalized to +0.0. */ 1327 /* Assumes -0.0 is already canonicalized to +0.0. */
1145 emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 : 1328 emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 :
1329#if LJ_GC64
1330 ((uint64_t)irt_toitype(irkey->t) << 47) |
1331 (uint64_t)ir_kgc(irkey));
1332#else
1146 ((uint64_t)irt_toitype(irkey->t) << 32) | 1333 ((uint64_t)irt_toitype(irkey->t) << 32) |
1147 (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey))); 1334 (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey)));
1335#endif
1148 } else { 1336 } else {
1149 lua_assert(!irt_isnil(irkey->t)); 1337 lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type");
1338#if LJ_GC64
1339 emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff);
1340 emit_rmro(as, XO_ARITHi, XOg_CMP, node,
1341 ofs + (int32_t)offsetof(Node, key.it));
1342#else
1150 emit_i8(as, irt_toitype(irkey->t)); 1343 emit_i8(as, irt_toitype(irkey->t));
1151 emit_rmro(as, XO_ARITHi8, XOg_CMP, node, 1344 emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
1152 ofs + (int32_t)offsetof(Node, key.it)); 1345 ofs + (int32_t)offsetof(Node, key.it));
1346#endif
1153 } 1347 }
1154#else 1348#else
1155 l_exit = emit_label(as); 1349 l_exit = emit_label(as);
@@ -1164,13 +1358,13 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1164 (int32_t)ir_knum(irkey)->u32.hi); 1358 (int32_t)ir_knum(irkey)->u32.hi);
1165 } else { 1359 } else {
1166 if (!irt_ispri(irkey->t)) { 1360 if (!irt_ispri(irkey->t)) {
1167 lua_assert(irt_isgcv(irkey->t)); 1361 lj_assertA(irt_isgcv(irkey->t), "bad HREFK key type");
1168 emit_gmroi(as, XG_ARITHi(XOg_CMP), node, 1362 emit_gmroi(as, XG_ARITHi(XOg_CMP), node,
1169 ofs + (int32_t)offsetof(Node, key.gcr), 1363 ofs + (int32_t)offsetof(Node, key.gcr),
1170 ptr2addr(ir_kgc(irkey))); 1364 ptr2addr(ir_kgc(irkey)));
1171 emit_sjcc(as, CC_NE, l_exit); 1365 emit_sjcc(as, CC_NE, l_exit);
1172 } 1366 }
1173 lua_assert(!irt_isnil(irkey->t)); 1367 lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type");
1174 emit_i8(as, irt_toitype(irkey->t)); 1368 emit_i8(as, irt_toitype(irkey->t));
1175 emit_rmro(as, XO_ARITHi8, XOg_CMP, node, 1369 emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
1176 ofs + (int32_t)offsetof(Node, key.it)); 1370 ofs + (int32_t)offsetof(Node, key.it));
@@ -1178,61 +1372,27 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1178#endif 1372#endif
1179} 1373}
1180 1374
1181static void asm_newref(ASMState *as, IRIns *ir)
1182{
1183 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1184 IRRef args[3];
1185 IRIns *irkey;
1186 Reg tmp;
1187 if (ir->r == RID_SINK)
1188 return;
1189 args[0] = ASMREF_L; /* lua_State *L */
1190 args[1] = ir->op1; /* GCtab *t */
1191 args[2] = ASMREF_TMP1; /* cTValue *key */
1192 asm_setupresult(as, ir, ci); /* TValue * */
1193 asm_gencall(as, ci, args);
1194 tmp = ra_releasetmp(as, ASMREF_TMP1);
1195 irkey = IR(ir->op2);
1196 if (irt_isnum(irkey->t)) {
1197 /* For numbers use the constant itself or a spill slot as a TValue. */
1198 if (irref_isk(ir->op2))
1199 emit_loada(as, tmp, ir_knum(irkey));
1200 else
1201 emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey));
1202 } else {
1203 /* Otherwise use g->tmptv to hold the TValue. */
1204 if (!irref_isk(ir->op2)) {
1205 Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
1206 emit_movtomro(as, REX_64IR(irkey, src), tmp, 0);
1207 } else if (!irt_ispri(irkey->t)) {
1208 emit_movmroi(as, tmp, 0, irkey->i);
1209 }
1210 if (!(LJ_64 && irt_islightud(irkey->t)))
1211 emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
1212 emit_loada(as, tmp, &J2G(as->J)->tmptv);
1213 }
1214}
1215
1216static void asm_uref(ASMState *as, IRIns *ir) 1375static void asm_uref(ASMState *as, IRIns *ir)
1217{ 1376{
1218 Reg dest = ra_dest(as, ir, RSET_GPR); 1377 Reg dest = ra_dest(as, ir, RSET_GPR);
1219 if (irref_isk(ir->op1)) { 1378 if (irref_isk(ir->op1)) {
1220 GCfunc *fn = ir_kfunc(IR(ir->op1)); 1379 GCfunc *fn = ir_kfunc(IR(ir->op1));
1221 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; 1380 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
1222 emit_rma(as, XO_MOV, dest, v); 1381 emit_rma(as, XO_MOV, dest|REX_GC64, v);
1223 } else { 1382 } else {
1224 Reg uv = ra_scratch(as, RSET_GPR); 1383 Reg uv = ra_scratch(as, RSET_GPR);
1225 Reg func = ra_alloc1(as, ir->op1, RSET_GPR); 1384 Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
1226 if (ir->o == IR_UREFC) { 1385 if (ir->o == IR_UREFC) {
1227 emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv)); 1386 emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
1228 asm_guardcc(as, CC_NE); 1387 asm_guardcc(as, CC_NE);
1229 emit_i8(as, 1); 1388 emit_i8(as, 1);
1230 emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); 1389 emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
1231 } else { 1390 } else {
1232 emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v)); 1391 emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
1233 } 1392 }
1234 emit_rmro(as, XO_MOV, uv, func, 1393 emit_rmro(as, XO_MOV, uv|REX_GC64, func,
1235 (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); 1394 (int32_t)offsetof(GCfuncL, uvptr) +
1395 (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
1236 } 1396 }
1237} 1397}
1238 1398
@@ -1250,9 +1410,9 @@ static void asm_strref(ASMState *as, IRIns *ir)
1250 if (as->mrm.base == RID_NONE) 1410 if (as->mrm.base == RID_NONE)
1251 emit_loadi(as, dest, as->mrm.ofs); 1411 emit_loadi(as, dest, as->mrm.ofs);
1252 else if (as->mrm.base == dest && as->mrm.idx == RID_NONE) 1412 else if (as->mrm.base == dest && as->mrm.idx == RID_NONE)
1253 emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs); 1413 emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, as->mrm.ofs);
1254 else 1414 else
1255 emit_mrm(as, XO_LEA, dest, RID_MRM); 1415 emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM);
1256} 1416}
1257 1417
1258/* -- Loads and stores ---------------------------------------------------- */ 1418/* -- Loads and stores ---------------------------------------------------- */
@@ -1271,19 +1431,23 @@ static void asm_fxload(ASMState *as, IRIns *ir)
1271 case IRT_U8: xo = XO_MOVZXb; break; 1431 case IRT_U8: xo = XO_MOVZXb; break;
1272 case IRT_I16: xo = XO_MOVSXw; break; 1432 case IRT_I16: xo = XO_MOVSXw; break;
1273 case IRT_U16: xo = XO_MOVZXw; break; 1433 case IRT_U16: xo = XO_MOVZXw; break;
1274 case IRT_NUM: xo = XMM_MOVRM(as); break; 1434 case IRT_NUM: xo = XO_MOVSD; break;
1275 case IRT_FLOAT: xo = XO_MOVSS; break; 1435 case IRT_FLOAT: xo = XO_MOVSS; break;
1276 default: 1436 default:
1277 if (LJ_64 && irt_is64(ir->t)) 1437 if (LJ_64 && irt_is64(ir->t))
1278 dest |= REX_64; 1438 dest |= REX_64;
1279 else 1439 else
1280 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); 1440 lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t),
1441 "unsplit 64 bit load");
1281 xo = XO_MOV; 1442 xo = XO_MOV;
1282 break; 1443 break;
1283 } 1444 }
1284 emit_mrm(as, xo, dest, RID_MRM); 1445 emit_mrm(as, xo, dest, RID_MRM);
1285} 1446}
1286 1447
1448#define asm_fload(as, ir) asm_fxload(as, ir)
1449#define asm_xload(as, ir) asm_fxload(as, ir)
1450
1287static void asm_fxstore(ASMState *as, IRIns *ir) 1451static void asm_fxstore(ASMState *as, IRIns *ir)
1288{ 1452{
1289 RegSet allow = RSET_GPR; 1453 RegSet allow = RSET_GPR;
@@ -1318,14 +1482,17 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
1318 case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; 1482 case IRT_I16: case IRT_U16: xo = XO_MOVtow; break;
1319 case IRT_NUM: xo = XO_MOVSDto; break; 1483 case IRT_NUM: xo = XO_MOVSDto; break;
1320 case IRT_FLOAT: xo = XO_MOVSSto; break; 1484 case IRT_FLOAT: xo = XO_MOVSSto; break;
1321#if LJ_64 1485#if LJ_64 && !LJ_GC64
1322 case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ 1486 case IRT_LIGHTUD:
1487 /* NYI: mask 64 bit lightuserdata. */
1488 lj_assertA(0, "store of lightuserdata");
1323#endif 1489#endif
1324 default: 1490 default:
1325 if (LJ_64 && irt_is64(ir->t)) 1491 if (LJ_64 && irt_is64(ir->t))
1326 src |= REX_64; 1492 src |= REX_64;
1327 else 1493 else
1328 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); 1494 lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t),
1495 "unsplit 64 bit store");
1329 xo = XO_MOVto; 1496 xo = XO_MOVto;
1330 break; 1497 break;
1331 } 1498 }
@@ -1339,15 +1506,18 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
1339 emit_i8(as, k); 1506 emit_i8(as, k);
1340 emit_mrm(as, XO_MOVmib, 0, RID_MRM); 1507 emit_mrm(as, XO_MOVmib, 0, RID_MRM);
1341 } else { 1508 } else {
1342 lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) || 1509 lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) ||
1343 irt_isaddr(ir->t)); 1510 irt_isaddr(ir->t), "bad store type");
1344 emit_i32(as, k); 1511 emit_i32(as, k);
1345 emit_mrm(as, XO_MOVmi, REX_64IR(ir, 0), RID_MRM); 1512 emit_mrm(as, XO_MOVmi, REX_64IR(ir, 0), RID_MRM);
1346 } 1513 }
1347 } 1514 }
1348} 1515}
1349 1516
1350#if LJ_64 1517#define asm_fstore(as, ir) asm_fxstore(as, ir)
1518#define asm_xstore(as, ir) asm_fxstore(as, ir)
1519
1520#if LJ_64 && !LJ_GC64
1351static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) 1521static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
1352{ 1522{
1353 if (ra_used(ir) || typecheck) { 1523 if (ra_used(ir) || typecheck) {
@@ -1369,9 +1539,13 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
1369 1539
1370static void asm_ahuvload(ASMState *as, IRIns *ir) 1540static void asm_ahuvload(ASMState *as, IRIns *ir)
1371{ 1541{
1372 lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || 1542#if LJ_GC64
1373 (LJ_DUALNUM && irt_isint(ir->t))); 1543 Reg tmp = RID_NONE;
1374#if LJ_64 1544#endif
1545 lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
1546 (LJ_DUALNUM && irt_isint(ir->t)),
1547 "bad load type %d", irt_type(ir->t));
1548#if LJ_64 && !LJ_GC64
1375 if (irt_islightud(ir->t)) { 1549 if (irt_islightud(ir->t)) {
1376 Reg dest = asm_load_lightud64(as, ir, 1); 1550 Reg dest = asm_load_lightud64(as, ir, 1);
1377 if (ra_hasreg(dest)) { 1551 if (ra_hasreg(dest)) {
@@ -1385,20 +1559,65 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1385 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; 1559 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1386 Reg dest = ra_dest(as, ir, allow); 1560 Reg dest = ra_dest(as, ir, allow);
1387 asm_fuseahuref(as, ir->op1, RSET_GPR); 1561 asm_fuseahuref(as, ir->op1, RSET_GPR);
1388 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); 1562#if LJ_GC64
1563 if (irt_isaddr(ir->t)) {
1564 emit_shifti(as, XOg_SHR|REX_64, dest, 17);
1565 asm_guardcc(as, CC_NE);
1566 emit_i8(as, irt_toitype(ir->t));
1567 emit_rr(as, XO_ARITHi8, XOg_CMP, dest);
1568 emit_i8(as, XI_O16);
1569 if ((as->flags & JIT_F_BMI2)) {
1570 emit_i8(as, 47);
1571 emit_mrm(as, XV_RORX|VEX_64, dest, RID_MRM);
1572 } else {
1573 emit_shifti(as, XOg_ROR|REX_64, dest, 47);
1574 emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
1575 }
1576 return;
1577 } else
1578#endif
1579 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
1389 } else { 1580 } else {
1390 asm_fuseahuref(as, ir->op1, RSET_GPR); 1581 RegSet gpr = RSET_GPR;
1582#if LJ_GC64
1583 if (irt_isaddr(ir->t)) {
1584 tmp = ra_scratch(as, RSET_GPR);
1585 gpr = rset_exclude(gpr, tmp);
1586 }
1587#endif
1588 asm_fuseahuref(as, ir->op1, gpr);
1391 } 1589 }
1392 /* Always do the type check, even if the load result is unused. */ 1590 /* Always do the type check, even if the load result is unused. */
1393 as->mrm.ofs += 4; 1591 as->mrm.ofs += 4;
1394 asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); 1592 asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE);
1395 if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { 1593 if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
1396 lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); 1594 lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
1595 "bad load type %d", irt_type(ir->t));
1596#if LJ_GC64
1597 emit_u32(as, LJ_TISNUM << 15);
1598#else
1397 emit_u32(as, LJ_TISNUM); 1599 emit_u32(as, LJ_TISNUM);
1600#endif
1398 emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); 1601 emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
1602#if LJ_GC64
1603 } else if (irt_isaddr(ir->t)) {
1604 as->mrm.ofs -= 4;
1605 emit_i8(as, irt_toitype(ir->t));
1606 emit_mrm(as, XO_ARITHi8, XOg_CMP, tmp);
1607 emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
1608 emit_mrm(as, XO_MOV, tmp|REX_64, RID_MRM);
1609 } else if (irt_isnil(ir->t)) {
1610 as->mrm.ofs -= 4;
1611 emit_i8(as, -1);
1612 emit_mrm(as, XO_ARITHi8, XOg_CMP|REX_64, RID_MRM);
1613 } else {
1614 emit_u32(as, (irt_toitype(ir->t) << 15) | 0x7fff);
1615 emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
1616#else
1399 } else { 1617 } else {
1400 emit_i8(as, irt_toitype(ir->t)); 1618 emit_i8(as, irt_toitype(ir->t));
1401 emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM); 1619 emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM);
1620#endif
1402 } 1621 }
1403} 1622}
1404 1623
@@ -1410,12 +1629,28 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
1410 Reg src = ra_alloc1(as, ir->op2, RSET_FPR); 1629 Reg src = ra_alloc1(as, ir->op2, RSET_FPR);
1411 asm_fuseahuref(as, ir->op1, RSET_GPR); 1630 asm_fuseahuref(as, ir->op1, RSET_GPR);
1412 emit_mrm(as, XO_MOVSDto, src, RID_MRM); 1631 emit_mrm(as, XO_MOVSDto, src, RID_MRM);
1413#if LJ_64 1632#if LJ_64 && !LJ_GC64
1414 } else if (irt_islightud(ir->t)) { 1633 } else if (irt_islightud(ir->t)) {
1415 Reg src = ra_alloc1(as, ir->op2, RSET_GPR); 1634 Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
1416 asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src)); 1635 asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src));
1417 emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); 1636 emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
1418#endif 1637#endif
1638#if LJ_GC64
1639 } else if (irref_isk(ir->op2)) {
1640 TValue k;
1641 lj_ir_kvalue(as->J->L, &k, IR(ir->op2));
1642 asm_fuseahuref(as, ir->op1, RSET_GPR);
1643 if (tvisnil(&k)) {
1644 emit_i32(as, -1);
1645 emit_mrm(as, XO_MOVmi, REX_64, RID_MRM);
1646 } else {
1647 emit_u32(as, k.u32.lo);
1648 emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1649 as->mrm.ofs += 4;
1650 emit_u32(as, k.u32.hi);
1651 emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1652 }
1653#endif
1419 } else { 1654 } else {
1420 IRIns *irr = IR(ir->op2); 1655 IRIns *irr = IR(ir->op2);
1421 RegSet allow = RSET_GPR; 1656 RegSet allow = RSET_GPR;
@@ -1426,34 +1661,55 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
1426 } 1661 }
1427 asm_fuseahuref(as, ir->op1, allow); 1662 asm_fuseahuref(as, ir->op1, allow);
1428 if (ra_hasreg(src)) { 1663 if (ra_hasreg(src)) {
1664#if LJ_GC64
1665 if (!(LJ_DUALNUM && irt_isinteger(ir->t))) {
1666 /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
1667 as->mrm.ofs += 4;
1668 emit_u32(as, irt_toitype(ir->t) << 15);
1669 emit_mrm(as, XO_ARITHi, XOg_OR, RID_MRM);
1670 as->mrm.ofs -= 4;
1671 emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
1672 return;
1673 }
1674#endif
1429 emit_mrm(as, XO_MOVto, src, RID_MRM); 1675 emit_mrm(as, XO_MOVto, src, RID_MRM);
1430 } else if (!irt_ispri(irr->t)) { 1676 } else if (!irt_ispri(irr->t)) {
1431 lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); 1677 lj_assertA(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t)),
1678 "bad store type");
1432 emit_i32(as, irr->i); 1679 emit_i32(as, irr->i);
1433 emit_mrm(as, XO_MOVmi, 0, RID_MRM); 1680 emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1434 } 1681 }
1435 as->mrm.ofs += 4; 1682 as->mrm.ofs += 4;
1683#if LJ_GC64
1684 lj_assertA(LJ_DUALNUM && irt_isinteger(ir->t), "bad store type");
1685 emit_i32(as, LJ_TNUMX << 15);
1686#else
1436 emit_i32(as, (int32_t)irt_toitype(ir->t)); 1687 emit_i32(as, (int32_t)irt_toitype(ir->t));
1688#endif
1437 emit_mrm(as, XO_MOVmi, 0, RID_MRM); 1689 emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1438 } 1690 }
1439} 1691}
1440 1692
1441static void asm_sload(ASMState *as, IRIns *ir) 1693static void asm_sload(ASMState *as, IRIns *ir)
1442{ 1694{
1443 int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); 1695 int32_t ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
1696 (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
1444 IRType1 t = ir->t; 1697 IRType1 t = ir->t;
1445 Reg base; 1698 Reg base;
1446 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 1699 lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
1447 lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); 1700 "bad parent SLOAD"); /* Handled by asm_head_side(). */
1448 lua_assert(LJ_DUALNUM || 1701 lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK),
1449 !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); 1702 "inconsistent SLOAD variant");
1703 lj_assertA(LJ_DUALNUM ||
1704 !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)),
1705 "bad SLOAD type");
1450 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { 1706 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
1451 Reg left = ra_scratch(as, RSET_FPR); 1707 Reg left = ra_scratch(as, RSET_FPR);
1452 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ 1708 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */
1453 base = ra_alloc1(as, REF_BASE, RSET_GPR); 1709 base = ra_alloc1(as, REF_BASE, RSET_GPR);
1454 emit_rmro(as, XMM_MOVRM(as), left, base, ofs); 1710 emit_rmro(as, XO_MOVSD, left, base, ofs);
1455 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1711 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1456#if LJ_64 1712#if LJ_64 && !LJ_GC64
1457 } else if (irt_islightud(t)) { 1713 } else if (irt_islightud(t)) {
1458 Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK)); 1714 Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK));
1459 if (ra_hasreg(dest)) { 1715 if (ra_hasreg(dest)) {
@@ -1466,14 +1722,43 @@ static void asm_sload(ASMState *as, IRIns *ir)
1466 RegSet allow = irt_isnum(t) ? RSET_FPR : RSET_GPR; 1722 RegSet allow = irt_isnum(t) ? RSET_FPR : RSET_GPR;
1467 Reg dest = ra_dest(as, ir, allow); 1723 Reg dest = ra_dest(as, ir, allow);
1468 base = ra_alloc1(as, REF_BASE, RSET_GPR); 1724 base = ra_alloc1(as, REF_BASE, RSET_GPR);
1469 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1725 lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t),
1726 "bad SLOAD type %d", irt_type(t));
1470 if ((ir->op2 & IRSLOAD_CONVERT)) { 1727 if ((ir->op2 & IRSLOAD_CONVERT)) {
1471 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ 1728 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
1472 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); 1729 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
1473 } else if (irt_isnum(t)) {
1474 emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
1475 } else { 1730 } else {
1476 emit_rmro(as, XO_MOV, dest, base, ofs); 1731#if LJ_GC64
1732 if (irt_isaddr(t)) {
1733 /* LJ_GC64 type check + tag removal without BMI2 and with BMI2:
1734 **
1735 ** mov r64, [addr] rorx r64, [addr], 47
1736 ** ror r64, 47
1737 ** cmp r16, itype cmp r16, itype
1738 ** jne ->exit jne ->exit
1739 ** shr r64, 16 shr r64, 16
1740 */
1741 emit_shifti(as, XOg_SHR|REX_64, dest, 17);
1742 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1743 asm_guardcc(as, CC_NE);
1744 emit_i8(as, irt_toitype(t));
1745 emit_rr(as, XO_ARITHi8, XOg_CMP, dest);
1746 emit_i8(as, XI_O16);
1747 }
1748 if ((as->flags & JIT_F_BMI2)) {
1749 emit_i8(as, 47);
1750 emit_rmro(as, XV_RORX|VEX_64, dest, base, ofs);
1751 } else {
1752 if ((ir->op2 & IRSLOAD_TYPECHECK))
1753 emit_shifti(as, XOg_ROR|REX_64, dest, 47);
1754 else
1755 emit_shifti(as, XOg_SHL|REX_64, dest, 17);
1756 emit_rmro(as, XO_MOV, dest|REX_64, base, ofs);
1757 }
1758 return;
1759 } else
1760#endif
1761 emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
1477 } 1762 }
1478 } else { 1763 } else {
1479 if (!(ir->op2 & IRSLOAD_TYPECHECK)) 1764 if (!(ir->op2 & IRSLOAD_TYPECHECK))
@@ -1484,12 +1769,44 @@ static void asm_sload(ASMState *as, IRIns *ir)
1484 /* Need type check, even if the load result is unused. */ 1769 /* Need type check, even if the load result is unused. */
1485 asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); 1770 asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE);
1486 if (LJ_64 && irt_type(t) >= IRT_NUM) { 1771 if (LJ_64 && irt_type(t) >= IRT_NUM) {
1487 lua_assert(irt_isinteger(t) || irt_isnum(t)); 1772 lj_assertA(irt_isinteger(t) || irt_isnum(t),
1773 "bad SLOAD type %d", irt_type(t));
1774#if LJ_GC64
1775 emit_u32(as, LJ_TISNUM << 15);
1776#else
1488 emit_u32(as, LJ_TISNUM); 1777 emit_u32(as, LJ_TISNUM);
1778#endif
1779 emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
1780#if LJ_GC64
1781 } else if (irt_isnil(t)) {
1782 /* LJ_GC64 type check for nil:
1783 **
1784 ** cmp qword [addr], -1
1785 ** jne ->exit
1786 */
1787 emit_i8(as, -1);
1788 emit_rmro(as, XO_ARITHi8, XOg_CMP|REX_64, base, ofs);
1789 } else if (irt_ispri(t)) {
1790 emit_u32(as, (irt_toitype(t) << 15) | 0x7fff);
1489 emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); 1791 emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
1490 } else { 1792 } else {
1793 /* LJ_GC64 type check only:
1794 **
1795 ** mov r64, [addr]
1796 ** sar r64, 47
1797 ** cmp r32, itype
1798 ** jne ->exit
1799 */
1800 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, base));
1801 emit_i8(as, irt_toitype(t));
1802 emit_rr(as, XO_ARITHi8, XOg_CMP, tmp);
1803 emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
1804 emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs);
1805#else
1806 } else {
1491 emit_i8(as, irt_toitype(t)); 1807 emit_i8(as, irt_toitype(t));
1492 emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4); 1808 emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4);
1809#endif
1493 } 1810 }
1494 } 1811 }
1495} 1812}
@@ -1500,15 +1817,14 @@ static void asm_sload(ASMState *as, IRIns *ir)
1500static void asm_cnew(ASMState *as, IRIns *ir) 1817static void asm_cnew(ASMState *as, IRIns *ir)
1501{ 1818{
1502 CTState *cts = ctype_ctsG(J2G(as->J)); 1819 CTState *cts = ctype_ctsG(J2G(as->J));
1503 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1820 CTypeID id = (CTypeID)IR(ir->op1)->i;
1504 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1821 CTSize sz;
1505 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1822 CTInfo info = lj_ctype_info(cts, id, &sz);
1506 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1823 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1507 IRRef args[2]; 1824 IRRef args[4];
1508 lua_assert(sz != CTSIZE_INVALID); 1825 lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
1826 "bad CNEW/CNEWI operands");
1509 1827
1510 args[0] = ASMREF_L; /* lua_State *L */
1511 args[1] = ASMREF_TMP1; /* MSize size */
1512 as->gcsteps++; 1828 as->gcsteps++;
1513 asm_setupresult(as, ir, ci); /* GCcdata * */ 1829 asm_setupresult(as, ir, ci); /* GCcdata * */
1514 1830
@@ -1519,8 +1835,9 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1519 Reg r64 = sz == 8 ? REX_64 : 0; 1835 Reg r64 = sz == 8 ? REX_64 : 0;
1520 if (irref_isk(ir->op2)) { 1836 if (irref_isk(ir->op2)) {
1521 IRIns *irk = IR(ir->op2); 1837 IRIns *irk = IR(ir->op2);
1522 uint64_t k = irk->o == IR_KINT64 ? ir_k64(irk)->u64 : 1838 uint64_t k = (irk->o == IR_KINT64 ||
1523 (uint64_t)(uint32_t)irk->i; 1839 (LJ_GC64 && (irk->o == IR_KPTR || irk->o == IR_KKPTR))) ?
1840 ir_k64(irk)->u64 : (uint64_t)(uint32_t)irk->i;
1524 if (sz == 4 || checki32((int64_t)k)) { 1841 if (sz == 4 || checki32((int64_t)k)) {
1525 emit_i32(as, (int32_t)k); 1842 emit_i32(as, (int32_t)k);
1526 emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata)); 1843 emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata));
@@ -1536,7 +1853,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1536 int32_t ofs = sizeof(GCcdata); 1853 int32_t ofs = sizeof(GCcdata);
1537 if (sz == 8) { 1854 if (sz == 8) {
1538 ofs += 4; ir++; 1855 ofs += 4; ir++;
1539 lua_assert(ir->o == IR_HIOP); 1856 lj_assertA(ir->o == IR_HIOP, "missing CNEWI HIOP");
1540 } 1857 }
1541 do { 1858 do {
1542 if (irref_isk(ir->op2)) { 1859 if (irref_isk(ir->op2)) {
@@ -1550,21 +1867,30 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1550 ofs -= 4; ir--; 1867 ofs -= 4; ir--;
1551 } while (1); 1868 } while (1);
1552#endif 1869#endif
1553 lua_assert(sz == 4 || sz == 8); 1870 lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
1871 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1872 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1873 args[0] = ASMREF_L; /* lua_State *L */
1874 args[1] = ir->op1; /* CTypeID id */
1875 args[2] = ir->op2; /* CTSize sz */
1876 args[3] = ASMREF_TMP1; /* CTSize align */
1877 asm_gencall(as, ci, args);
1878 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1879 return;
1554 } 1880 }
1555 1881
1556 /* Combine initialization of marked, gct and ctypeid. */ 1882 /* Combine initialization of marked, gct and ctypeid. */
1557 emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked)); 1883 emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked));
1558 emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX, 1884 emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX,
1559 (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16))); 1885 (int32_t)((~LJ_TCDATA<<8)+(id<<16)));
1560 emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES); 1886 emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES);
1561 emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite); 1887 emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite);
1562 1888
1889 args[0] = ASMREF_L; /* lua_State *L */
1890 args[1] = ASMREF_TMP1; /* MSize size */
1563 asm_gencall(as, ci, args); 1891 asm_gencall(as, ci, args);
1564 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); 1892 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
1565} 1893}
1566#else
1567#define asm_cnew(as, ir) ((void)0)
1568#endif 1894#endif
1569 1895
1570/* -- Write barriers ------------------------------------------------------ */ 1896/* -- Write barriers ------------------------------------------------------ */
@@ -1574,7 +1900,7 @@ static void asm_tbar(ASMState *as, IRIns *ir)
1574 Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); 1900 Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
1575 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab)); 1901 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab));
1576 MCLabel l_end = emit_label(as); 1902 MCLabel l_end = emit_label(as);
1577 emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist)); 1903 emit_movtomro(as, tmp|REX_GC64, tab, offsetof(GCtab, gclist));
1578 emit_setgl(as, tab, gc.grayagain); 1904 emit_setgl(as, tab, gc.grayagain);
1579 emit_getgl(as, tmp, gc.grayagain); 1905 emit_getgl(as, tmp, gc.grayagain);
1580 emit_i8(as, ~LJ_GC_BLACK); 1906 emit_i8(as, ~LJ_GC_BLACK);
@@ -1591,7 +1917,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
1591 MCLabel l_end; 1917 MCLabel l_end;
1592 Reg obj; 1918 Reg obj;
1593 /* No need for other object barriers (yet). */ 1919 /* No need for other object barriers (yet). */
1594 lua_assert(IR(ir->op1)->o == IR_UREFC); 1920 lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
1595 ra_evictset(as, RSET_SCRATCH); 1921 ra_evictset(as, RSET_SCRATCH);
1596 l_end = emit_label(as); 1922 l_end = emit_label(as);
1597 args[0] = ASMREF_TMP1; /* global_State *g */ 1923 args[0] = ASMREF_TMP1; /* global_State *g */
@@ -1637,36 +1963,9 @@ static void asm_x87load(ASMState *as, IRRef ref)
1637 } 1963 }
1638} 1964}
1639 1965
1640/* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */
1641static int fpmjoin_pow(ASMState *as, IRIns *ir)
1642{
1643 IRIns *irp = IR(ir->op1);
1644 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
1645 IRIns *irpp = IR(irp->op1);
1646 if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1647 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1648 /* The modified regs must match with the *.dasc implementation. */
1649 RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
1650 IRIns *irx;
1651 if (ra_hasreg(ir->r))
1652 rset_clear(drop, ir->r); /* Dest reg handled below. */
1653 ra_evictset(as, drop);
1654 ra_destreg(as, ir, RID_XMM0);
1655 emit_call(as, lj_vm_pow_sse);
1656 irx = IR(irpp->op1);
1657 if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
1658 irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */
1659 ra_left(as, RID_XMM0, irpp->op1);
1660 ra_left(as, RID_XMM1, irp->op2);
1661 return 1;
1662 }
1663 }
1664 return 0;
1665}
1666
1667static void asm_fpmath(ASMState *as, IRIns *ir) 1966static void asm_fpmath(ASMState *as, IRIns *ir)
1668{ 1967{
1669 IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER; 1968 IRFPMathOp fpm = (IRFPMathOp)ir->op2;
1670 if (fpm == IRFPM_SQRT) { 1969 if (fpm == IRFPM_SQRT) {
1671 Reg dest = ra_dest(as, ir, RSET_FPR); 1970 Reg dest = ra_dest(as, ir, RSET_FPR);
1672 Reg left = asm_fuseload(as, ir->op1, RSET_FPR); 1971 Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
@@ -1697,51 +1996,25 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1697 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); 1996 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
1698 ra_left(as, RID_XMM0, ir->op1); 1997 ra_left(as, RID_XMM0, ir->op1);
1699 } 1998 }
1700 } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) { 1999 } else {
1701 /* Rejoined to pow(). */ 2000 asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
1702 } else { /* Handle x87 ops. */ 2001 }
1703 int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ 2002}
1704 Reg dest = ir->r; 2003
1705 if (ra_hasreg(dest)) { 2004static void asm_ldexp(ASMState *as, IRIns *ir)
1706 ra_free(as, dest); 2005{
1707 ra_modified(as, dest); 2006 int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
1708 emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); 2007 Reg dest = ir->r;
1709 } 2008 if (ra_hasreg(dest)) {
1710 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); 2009 ra_free(as, dest);
1711 switch (fpm) { /* st0 = lj_vm_*(st0) */ 2010 ra_modified(as, dest);
1712 case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break; 2011 emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
1713 case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break;
1714 case IRFPM_SIN: emit_x87op(as, XI_FSIN); break;
1715 case IRFPM_COS: emit_x87op(as, XI_FCOS); break;
1716 case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break;
1717 case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10:
1718 /* Note: the use of fyl2xp1 would be pointless here. When computing
1719 ** log(1.0+eps) the precision is already lost after 1.0 is added.
1720 ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense.
1721 */
1722 emit_x87op(as, XI_FYL2X); break;
1723 case IRFPM_OTHER:
1724 switch (ir->o) {
1725 case IR_ATAN2:
1726 emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break;
1727 case IR_LDEXP:
1728 emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break;
1729 default: lua_assert(0); break;
1730 }
1731 break;
1732 default: lua_assert(0); break;
1733 }
1734 asm_x87load(as, ir->op1);
1735 switch (fpm) {
1736 case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break;
1737 case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break;
1738 case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break;
1739 case IRFPM_OTHER:
1740 if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2);
1741 break;
1742 default: break;
1743 }
1744 } 2012 }
2013 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
2014 emit_x87op(as, XI_FPOP1);
2015 emit_x87op(as, XI_FSCALE);
2016 asm_x87load(as, ir->op1);
2017 asm_x87load(as, ir->op2);
1745} 2018}
1746 2019
1747static void asm_fppowi(ASMState *as, IRIns *ir) 2020static void asm_fppowi(ASMState *as, IRIns *ir)
@@ -1757,33 +2030,11 @@ static void asm_fppowi(ASMState *as, IRIns *ir)
1757 ra_left(as, RID_EAX, ir->op2); 2030 ra_left(as, RID_EAX, ir->op2);
1758} 2031}
1759 2032
1760#if LJ_64 && LJ_HASFFI
1761static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id)
1762{
1763 const CCallInfo *ci = &lj_ir_callinfo[id];
1764 IRRef args[2];
1765 args[0] = ir->op1;
1766 args[1] = ir->op2;
1767 asm_setupresult(as, ir, ci);
1768 asm_gencall(as, ci, args);
1769}
1770#endif
1771
1772static void asm_intmod(ASMState *as, IRIns *ir)
1773{
1774 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi];
1775 IRRef args[2];
1776 args[0] = ir->op1;
1777 args[1] = ir->op2;
1778 asm_setupresult(as, ir, ci);
1779 asm_gencall(as, ci, args);
1780}
1781
1782static int asm_swapops(ASMState *as, IRIns *ir) 2033static int asm_swapops(ASMState *as, IRIns *ir)
1783{ 2034{
1784 IRIns *irl = IR(ir->op1); 2035 IRIns *irl = IR(ir->op1);
1785 IRIns *irr = IR(ir->op2); 2036 IRIns *irr = IR(ir->op2);
1786 lua_assert(ra_noreg(irr->r)); 2037 lj_assertA(ra_noreg(irr->r), "bad usage");
1787 if (!irm_iscomm(lj_ir_mode[ir->o])) 2038 if (!irm_iscomm(lj_ir_mode[ir->o]))
1788 return 0; /* Can't swap non-commutative operations. */ 2039 return 0; /* Can't swap non-commutative operations. */
1789 if (irref_isk(ir->op2)) 2040 if (irref_isk(ir->op2))
@@ -1955,11 +2206,28 @@ static void asm_add(ASMState *as, IRIns *ir)
1955{ 2206{
1956 if (irt_isnum(ir->t)) 2207 if (irt_isnum(ir->t))
1957 asm_fparith(as, ir, XO_ADDSD); 2208 asm_fparith(as, ir, XO_ADDSD);
1958 else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp || 2209 else if (as->flagmcp == as->mcp || irt_is64(ir->t) || !asm_lea(as, ir))
1959 irt_is64(ir->t) || !asm_lea(as, ir))
1960 asm_intarith(as, ir, XOg_ADD); 2210 asm_intarith(as, ir, XOg_ADD);
1961} 2211}
1962 2212
2213static void asm_sub(ASMState *as, IRIns *ir)
2214{
2215 if (irt_isnum(ir->t))
2216 asm_fparith(as, ir, XO_SUBSD);
2217 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
2218 asm_intarith(as, ir, XOg_SUB);
2219}
2220
2221static void asm_mul(ASMState *as, IRIns *ir)
2222{
2223 if (irt_isnum(ir->t))
2224 asm_fparith(as, ir, XO_MULSD);
2225 else
2226 asm_intarith(as, ir, XOg_X_IMUL);
2227}
2228
2229#define asm_fpdiv(as, ir) asm_fparith(as, ir, XO_DIVSD)
2230
1963static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) 2231static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
1964{ 2232{
1965 Reg dest = ra_dest(as, ir, RSET_GPR); 2233 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1967,7 +2235,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
1967 ra_left(as, dest, ir->op1); 2235 ra_left(as, dest, ir->op1);
1968} 2236}
1969 2237
1970static void asm_min_max(ASMState *as, IRIns *ir, int cc) 2238static void asm_neg(ASMState *as, IRIns *ir)
2239{
2240 if (irt_isnum(ir->t))
2241 asm_fparith(as, ir, XO_XORPS);
2242 else
2243 asm_neg_not(as, ir, XOg_NEG);
2244}
2245
2246#define asm_abs(as, ir) asm_fparith(as, ir, XO_ANDPS)
2247
2248static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1971{ 2249{
1972 Reg right, dest = ra_dest(as, ir, RSET_GPR); 2250 Reg right, dest = ra_dest(as, ir, RSET_GPR);
1973 IRRef lref = ir->op1, rref = ir->op2; 2251 IRRef lref = ir->op1, rref = ir->op2;
@@ -1978,7 +2256,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc)
1978 ra_left(as, dest, lref); 2256 ra_left(as, dest, lref);
1979} 2257}
1980 2258
1981static void asm_bitswap(ASMState *as, IRIns *ir) 2259static void asm_min(ASMState *as, IRIns *ir)
2260{
2261 if (irt_isnum(ir->t))
2262 asm_fparith(as, ir, XO_MINSD);
2263 else
2264 asm_intmin_max(as, ir, CC_G);
2265}
2266
2267static void asm_max(ASMState *as, IRIns *ir)
2268{
2269 if (irt_isnum(ir->t))
2270 asm_fparith(as, ir, XO_MAXSD);
2271 else
2272 asm_intmin_max(as, ir, CC_L);
2273}
2274
2275/* Note: don't use LEA for overflow-checking arithmetic! */
2276#define asm_addov(as, ir) asm_intarith(as, ir, XOg_ADD)
2277#define asm_subov(as, ir) asm_intarith(as, ir, XOg_SUB)
2278#define asm_mulov(as, ir) asm_intarith(as, ir, XOg_X_IMUL)
2279
2280#define asm_bnot(as, ir) asm_neg_not(as, ir, XOg_NOT)
2281
2282static void asm_bswap(ASMState *as, IRIns *ir)
1982{ 2283{
1983 Reg dest = ra_dest(as, ir, RSET_GPR); 2284 Reg dest = ra_dest(as, ir, RSET_GPR);
1984 as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), 2285 as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24),
@@ -1986,7 +2287,11 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1986 ra_left(as, dest, ir->op1); 2287 ra_left(as, dest, ir->op1);
1987} 2288}
1988 2289
1989static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) 2290#define asm_band(as, ir) asm_intarith(as, ir, XOg_AND)
2291#define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR)
2292#define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR)
2293
2294static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs, x86Op xv)
1990{ 2295{
1991 IRRef rref = ir->op2; 2296 IRRef rref = ir->op2;
1992 IRIns *irr = IR(rref); 2297 IRIns *irr = IR(rref);
@@ -1995,17 +2300,33 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
1995 int shift; 2300 int shift;
1996 dest = ra_dest(as, ir, RSET_GPR); 2301 dest = ra_dest(as, ir, RSET_GPR);
1997 shift = irr->i & (irt_is64(ir->t) ? 63 : 31); 2302 shift = irr->i & (irt_is64(ir->t) ? 63 : 31);
2303 if (!xv && shift && (as->flags & JIT_F_BMI2)) {
2304 Reg left = asm_fuseloadm(as, ir->op1, RSET_GPR, irt_is64(ir->t));
2305 if (left != dest) { /* BMI2 rotate right by constant. */
2306 emit_i8(as, xs == XOg_ROL ? -shift : shift);
2307 emit_mrm(as, VEX_64IR(ir, XV_RORX), dest, left);
2308 return;
2309 }
2310 }
1998 switch (shift) { 2311 switch (shift) {
1999 case 0: break; 2312 case 0: break;
2000 case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break; 2313 case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break;
2001 default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break; 2314 default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break;
2002 } 2315 }
2316 } else if ((as->flags & JIT_F_BMI2) && xv) { /* BMI2 variable shifts. */
2317 Reg left, right;
2318 dest = ra_dest(as, ir, RSET_GPR);
2319 right = ra_alloc1(as, rref, RSET_GPR);
2320 left = asm_fuseloadm(as, ir->op1, rset_exclude(RSET_GPR, right),
2321 irt_is64(ir->t));
2322 emit_mrm(as, VEX_64IR(ir, xv) ^ (right << 19), dest, left);
2323 return;
2003 } else { /* Variable shifts implicitly use register cl (i.e. ecx). */ 2324 } else { /* Variable shifts implicitly use register cl (i.e. ecx). */
2004 Reg right; 2325 Reg right;
2005 dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX)); 2326 dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX));
2006 if (dest == RID_ECX) { 2327 if (dest == RID_ECX) {
2007 dest = ra_scratch(as, rset_exclude(RSET_GPR, RID_ECX)); 2328 dest = ra_scratch(as, rset_exclude(RSET_GPR, RID_ECX));
2008 emit_rr(as, XO_MOV, RID_ECX, dest); 2329 emit_rr(as, XO_MOV, REX_64IR(ir, RID_ECX), dest);
2009 } 2330 }
2010 right = irr->r; 2331 right = irr->r;
2011 if (ra_noreg(right)) 2332 if (ra_noreg(right))
@@ -2025,6 +2346,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
2025 */ 2346 */
2026} 2347}
2027 2348
2349#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL, XV_SHLX)
2350#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR, XV_SHRX)
2351#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR, XV_SARX)
2352#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL, 0)
2353#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR, 0)
2354
2028/* -- Comparisons --------------------------------------------------------- */ 2355/* -- Comparisons --------------------------------------------------------- */
2029 2356
2030/* Virtual flags for unordered FP comparisons. */ 2357/* Virtual flags for unordered FP comparisons. */
@@ -2051,8 +2378,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = {
2051}; 2378};
2052 2379
2053/* FP and integer comparisons. */ 2380/* FP and integer comparisons. */
2054static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) 2381static void asm_comp(ASMState *as, IRIns *ir)
2055{ 2382{
2383 uint32_t cc = asm_compmap[ir->o];
2056 if (irt_isnum(ir->t)) { 2384 if (irt_isnum(ir->t)) {
2057 IRRef lref = ir->op1; 2385 IRRef lref = ir->op1;
2058 IRRef rref = ir->op2; 2386 IRRef rref = ir->op2;
@@ -2073,7 +2401,6 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
2073 cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */ 2401 cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */
2074 } 2402 }
2075 left = ra_alloc1(as, lref, RSET_FPR); 2403 left = ra_alloc1(as, lref, RSET_FPR);
2076 right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
2077 l_around = emit_label(as); 2404 l_around = emit_label(as);
2078 asm_guardcc(as, cc >> 4); 2405 asm_guardcc(as, cc >> 4);
2079 if (cc & VCC_P) { /* Extra CC_P branch required? */ 2406 if (cc & VCC_P) { /* Extra CC_P branch required? */
@@ -2090,14 +2417,16 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
2090 emit_jcc(as, CC_P, as->mcp); 2417 emit_jcc(as, CC_P, as->mcp);
2091 } 2418 }
2092 } 2419 }
2420 right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
2093 emit_mrm(as, XO_UCOMISD, left, right); 2421 emit_mrm(as, XO_UCOMISD, left, right);
2094 } else { 2422 } else {
2095 IRRef lref = ir->op1, rref = ir->op2; 2423 IRRef lref = ir->op1, rref = ir->op2;
2096 IROp leftop = (IROp)(IR(lref)->o); 2424 IROp leftop = (IROp)(IR(lref)->o);
2097 Reg r64 = REX_64IR(ir, 0); 2425 Reg r64 = REX_64IR(ir, 0);
2098 int32_t imm = 0; 2426 int32_t imm = 0;
2099 lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || 2427 lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) ||
2100 irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t)); 2428 irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t),
2429 "bad comparison data type %d", irt_type(ir->t));
2101 /* Swap constants (only for ABC) and fusable loads to the right. */ 2430 /* Swap constants (only for ABC) and fusable loads to the right. */
2102 if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { 2431 if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) {
2103 if ((cc & 0xc) == 0xc) cc ^= 0x53; /* L <-> G, LE <-> GE */ 2432 if ((cc & 0xc) == 0xc) cc ^= 0x53; /* L <-> G, LE <-> GE */
@@ -2179,7 +2508,7 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
2179 /* Use test r,r instead of cmp r,0. */ 2508 /* Use test r,r instead of cmp r,0. */
2180 x86Op xo = XO_TEST; 2509 x86Op xo = XO_TEST;
2181 if (irt_isu8(ir->t)) { 2510 if (irt_isu8(ir->t)) {
2182 lua_assert(ir->o == IR_EQ || ir->o == IR_NE); 2511 lj_assertA(ir->o == IR_EQ || ir->o == IR_NE, "bad usage");
2183 xo = XO_TESTb; 2512 xo = XO_TESTb;
2184 if (!rset_test(RSET_RANGE(RID_EAX, RID_EBX+1), left)) { 2513 if (!rset_test(RSET_RANGE(RID_EAX, RID_EBX+1), left)) {
2185 if (LJ_64) { 2514 if (LJ_64) {
@@ -2207,6 +2536,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
2207 } 2536 }
2208} 2537}
2209 2538
2539#define asm_equal(as, ir) asm_comp(as, ir)
2540
2210#if LJ_32 && LJ_HASFFI 2541#if LJ_32 && LJ_HASFFI
2211/* 64 bit integer comparisons in 32 bit mode. */ 2542/* 64 bit integer comparisons in 32 bit mode. */
2212static void asm_comp_int64(ASMState *as, IRIns *ir) 2543static void asm_comp_int64(ASMState *as, IRIns *ir)
@@ -2289,13 +2620,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
2289 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 2620 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
2290 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 2621 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
2291 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ 2622 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
2292 if (usehi || uselo) {
2293 if (irt_isfp(ir->t))
2294 asm_conv_fp_int64(as, ir);
2295 else
2296 asm_conv_int64_fp(as, ir);
2297 }
2298 as->curins--; /* Always skip the CONV. */ 2623 as->curins--; /* Always skip the CONV. */
2624 if (usehi || uselo)
2625 asm_conv64(as, ir);
2299 return; 2626 return;
2300 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ 2627 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
2301 asm_comp_int64(as, ir); 2628 asm_comp_int64(as, ir);
@@ -2337,13 +2664,24 @@ static void asm_hiop(ASMState *as, IRIns *ir)
2337 case IR_CNEWI: 2664 case IR_CNEWI:
2338 /* Nothing to do here. Handled by CNEWI itself. */ 2665 /* Nothing to do here. Handled by CNEWI itself. */
2339 break; 2666 break;
2340 default: lua_assert(0); break; 2667 default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
2341 } 2668 }
2342#else 2669#else
2343 UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on x64 or without FFI. */ 2670 /* Unused on x64 or without FFI. */
2671 UNUSED(as); UNUSED(ir); lj_assertA(0, "unexpected HIOP");
2344#endif 2672#endif
2345} 2673}
2346 2674
2675/* -- Profiling ----------------------------------------------------------- */
2676
2677static void asm_prof(ASMState *as, IRIns *ir)
2678{
2679 UNUSED(ir);
2680 asm_guardcc(as, CC_NE);
2681 emit_i8(as, HOOK_PROFILE);
2682 emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask);
2683}
2684
2347/* -- Stack handling ------------------------------------------------------ */ 2685/* -- Stack handling ------------------------------------------------------ */
2348 2686
2349/* Check Lua stack size for overflow. Use exit handler as fallback. */ 2687/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -2358,14 +2696,19 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
2358 emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0); 2696 emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0);
2359 else 2697 else
2360 ra_modified(as, r); 2698 ra_modified(as, r);
2361 emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot)); 2699 emit_gri(as, XG_ARITHi(XOg_CMP), r|REX_GC64, (int32_t)(8*topslot));
2362 if (ra_hasreg(pbase) && pbase != r) 2700 if (ra_hasreg(pbase) && pbase != r)
2363 emit_rr(as, XO_ARITH(XOg_SUB), r, pbase); 2701 emit_rr(as, XO_ARITH(XOg_SUB), r|REX_GC64, pbase);
2364 else 2702 else
2703#if LJ_GC64
2704 emit_rmro(as, XO_ARITH(XOg_SUB), r|REX_64, RID_DISPATCH,
2705 (int32_t)dispofs(as, &J2G(as->J)->jit_base));
2706#else
2365 emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, 2707 emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
2366 ptr2addr(&J2G(as->J)->jit_base)); 2708 ptr2addr(&J2G(as->J)->jit_base));
2367 emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); 2709#endif
2368 emit_getgl(as, r, jit_L); 2710 emit_rmro(as, XO_MOV, r|REX_GC64, r, offsetof(lua_State, maxstack));
2711 emit_getgl(as, r, cur_L);
2369 if (allow == RSET_EMPTY) /* Spill temp. register. */ 2712 if (allow == RSET_EMPTY) /* Spill temp. register. */
2370 emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); 2713 emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
2371} 2714}
@@ -2374,13 +2717,15 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
2374static void asm_stack_restore(ASMState *as, SnapShot *snap) 2717static void asm_stack_restore(ASMState *as, SnapShot *snap)
2375{ 2718{
2376 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 2719 SnapEntry *map = &as->T->snapmap[snap->mapofs];
2377 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; 2720#if !LJ_FR2 || defined(LUA_USE_ASSERT)
2721 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
2722#endif
2378 MSize n, nent = snap->nent; 2723 MSize n, nent = snap->nent;
2379 /* Store the value of all modified slots to the Lua stack. */ 2724 /* Store the value of all modified slots to the Lua stack. */
2380 for (n = 0; n < nent; n++) { 2725 for (n = 0; n < nent; n++) {
2381 SnapEntry sn = map[n]; 2726 SnapEntry sn = map[n];
2382 BCReg s = snap_slot(sn); 2727 BCReg s = snap_slot(sn);
2383 int32_t ofs = 8*((int32_t)s-1); 2728 int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
2384 IRRef ref = snap_ref(sn); 2729 IRRef ref = snap_ref(sn);
2385 IRIns *ir = IR(ref); 2730 IRIns *ir = IR(ref);
2386 if ((sn & SNAP_NORESTORE)) 2731 if ((sn & SNAP_NORESTORE))
@@ -2389,25 +2734,54 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
2389 Reg src = ra_alloc1(as, ref, RSET_FPR); 2734 Reg src = ra_alloc1(as, ref, RSET_FPR);
2390 emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); 2735 emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
2391 } else { 2736 } else {
2392 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || 2737 lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) ||
2393 (LJ_DUALNUM && irt_isinteger(ir->t))); 2738 (LJ_DUALNUM && irt_isinteger(ir->t)),
2739 "restore of IR type %d", irt_type(ir->t));
2394 if (!irref_isk(ref)) { 2740 if (!irref_isk(ref)) {
2395 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); 2741 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
2742#if LJ_GC64
2743 if (irt_is64(ir->t)) {
2744 /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
2745 emit_u32(as, irt_toitype(ir->t) << 15);
2746 emit_rmro(as, XO_ARITHi, XOg_OR, RID_BASE, ofs+4);
2747 } else if (LJ_DUALNUM && irt_isinteger(ir->t)) {
2748 emit_movmroi(as, RID_BASE, ofs+4, LJ_TISNUM << 15);
2749 } else {
2750 emit_movmroi(as, RID_BASE, ofs+4, (irt_toitype(ir->t)<<15)|0x7fff);
2751 }
2752#endif
2396 emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); 2753 emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs);
2754#if LJ_GC64
2755 } else {
2756 TValue k;
2757 lj_ir_kvalue(as->J->L, &k, ir);
2758 if (tvisnil(&k)) {
2759 emit_i32(as, -1);
2760 emit_rmro(as, XO_MOVmi, REX_64, RID_BASE, ofs);
2761 } else {
2762 emit_movmroi(as, RID_BASE, ofs+4, k.u32.hi);
2763 emit_movmroi(as, RID_BASE, ofs, k.u32.lo);
2764 }
2765#else
2397 } else if (!irt_ispri(ir->t)) { 2766 } else if (!irt_ispri(ir->t)) {
2398 emit_movmroi(as, RID_BASE, ofs, ir->i); 2767 emit_movmroi(as, RID_BASE, ofs, ir->i);
2768#endif
2399 } 2769 }
2400 if ((sn & (SNAP_CONT|SNAP_FRAME))) { 2770 if ((sn & (SNAP_CONT|SNAP_FRAME))) {
2771#if !LJ_FR2
2401 if (s != 0) /* Do not overwrite link to previous frame. */ 2772 if (s != 0) /* Do not overwrite link to previous frame. */
2402 emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); 2773 emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
2774#endif
2775#if !LJ_GC64
2403 } else { 2776 } else {
2404 if (!(LJ_64 && irt_islightud(ir->t))) 2777 if (!(LJ_64 && irt_islightud(ir->t)))
2405 emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); 2778 emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
2779#endif
2406 } 2780 }
2407 } 2781 }
2408 checkmclim(as); 2782 checkmclim(as);
2409 } 2783 }
2410 lua_assert(map + nent == flinks); 2784 lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
2411} 2785}
2412 2786
2413/* -- GC handling --------------------------------------------------------- */ 2787/* -- GC handling --------------------------------------------------------- */
@@ -2428,11 +2802,15 @@ static void asm_gc_check(ASMState *as)
2428 args[1] = ASMREF_TMP2; /* MSize steps */ 2802 args[1] = ASMREF_TMP2; /* MSize steps */
2429 asm_gencall(as, ci, args); 2803 asm_gencall(as, ci, args);
2430 tmp = ra_releasetmp(as, ASMREF_TMP1); 2804 tmp = ra_releasetmp(as, ASMREF_TMP1);
2805#if LJ_GC64
2806 emit_rmro(as, XO_LEA, tmp|REX_64, RID_DISPATCH, GG_DISP2G);
2807#else
2431 emit_loada(as, tmp, J2G(as->J)); 2808 emit_loada(as, tmp, J2G(as->J));
2809#endif
2432 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps); 2810 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps);
2433 /* Jump around GC step if GC total < GC threshold. */ 2811 /* Jump around GC step if GC total < GC threshold. */
2434 emit_sjcc(as, CC_B, l_end); 2812 emit_sjcc(as, CC_B, l_end);
2435 emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold); 2813 emit_opgl(as, XO_ARITH(XOg_CMP), tmp|REX_GC64, gc.threshold);
2436 emit_getgl(as, tmp, gc.total); 2814 emit_getgl(as, tmp, gc.total);
2437 as->gcsteps = 0; 2815 as->gcsteps = 0;
2438 checkmclim(as); 2816 checkmclim(as);
@@ -2447,16 +2825,16 @@ static void asm_loop_fixup(ASMState *as)
2447 MCode *target = as->mcp; 2825 MCode *target = as->mcp;
2448 if (as->realign) { /* Realigned loops use short jumps. */ 2826 if (as->realign) { /* Realigned loops use short jumps. */
2449 as->realign = NULL; /* Stop another retry. */ 2827 as->realign = NULL; /* Stop another retry. */
2450 lua_assert(((intptr_t)target & 15) == 0); 2828 lj_assertA(((intptr_t)target & 15) == 0, "loop realign failed");
2451 if (as->loopinv) { /* Inverted loop branch? */ 2829 if (as->loopinv) { /* Inverted loop branch? */
2452 p -= 5; 2830 p -= 5;
2453 p[0] = XI_JMP; 2831 p[0] = XI_JMP;
2454 lua_assert(target - p >= -128); 2832 lj_assertA(target - p >= -128, "loop realign failed");
2455 p[-1] = (MCode)(target - p); /* Patch sjcc. */ 2833 p[-1] = (MCode)(target - p); /* Patch sjcc. */
2456 if (as->loopinv == 2) 2834 if (as->loopinv == 2)
2457 p[-3] = (MCode)(target - p + 2); /* Patch opt. short jp. */ 2835 p[-3] = (MCode)(target - p + 2); /* Patch opt. short jp. */
2458 } else { 2836 } else {
2459 lua_assert(target - p >= -128); 2837 lj_assertA(target - p >= -128, "loop realign failed");
2460 p[-1] = (MCode)(int8_t)(target - p); /* Patch short jmp. */ 2838 p[-1] = (MCode)(int8_t)(target - p); /* Patch short jmp. */
2461 p[-2] = XI_JMPs; 2839 p[-2] = XI_JMPs;
2462 } 2840 }
@@ -2497,7 +2875,7 @@ static void asm_head_root_base(ASMState *as)
2497 if (rset_test(as->modset, r) || irt_ismarked(ir->t)) 2875 if (rset_test(as->modset, r) || irt_ismarked(ir->t))
2498 ir->r = RID_INIT; /* No inheritance for modified BASE register. */ 2876 ir->r = RID_INIT; /* No inheritance for modified BASE register. */
2499 if (r != RID_BASE) 2877 if (r != RID_BASE)
2500 emit_rr(as, XO_MOV, r, RID_BASE); 2878 emit_rr(as, XO_MOV, r|REX_GC64, RID_BASE);
2501 } 2879 }
2502} 2880}
2503 2881
@@ -2513,8 +2891,9 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
2513 if (irp->r == r) { 2891 if (irp->r == r) {
2514 rset_clear(allow, r); /* Mark same BASE register as coalesced. */ 2892 rset_clear(allow, r); /* Mark same BASE register as coalesced. */
2515 } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { 2893 } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
2894 /* Move from coalesced parent reg. */
2516 rset_clear(allow, irp->r); 2895 rset_clear(allow, irp->r);
2517 emit_rr(as, XO_MOV, r, irp->r); /* Move from coalesced parent reg. */ 2896 emit_rr(as, XO_MOV, r|REX_GC64, irp->r);
2518 } else { 2897 } else {
2519 emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ 2898 emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
2520 } 2899 }
@@ -2532,7 +2911,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
2532 MCode *target, *q; 2911 MCode *target, *q;
2533 int32_t spadj = as->T->spadjust; 2912 int32_t spadj = as->T->spadjust;
2534 if (spadj == 0) { 2913 if (spadj == 0) {
2535 p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0); 2914 p -= LJ_64 ? 7 : 6;
2536 } else { 2915 } else {
2537 MCode *p1; 2916 MCode *p1;
2538 /* Patch stack adjustment. */ 2917 /* Patch stack adjustment. */
@@ -2544,24 +2923,15 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
2544 p1 = p-9; 2923 p1 = p-9;
2545 *(int32_t *)p1 = spadj; 2924 *(int32_t *)p1 = spadj;
2546 } 2925 }
2547 if ((as->flags & JIT_F_LEA_AGU)) {
2548#if LJ_64
2549 p1[-4] = 0x48;
2550#endif
2551 p1[-3] = (MCode)XI_LEA;
2552 p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP);
2553 p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
2554 } else {
2555#if LJ_64 2926#if LJ_64
2556 p1[-3] = 0x48; 2927 p1[-3] = 0x48;
2557#endif 2928#endif
2558 p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); 2929 p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
2559 p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); 2930 p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
2560 }
2561 } 2931 }
2562 /* Patch exit branch. */ 2932 /* Patch exit branch. */
2563 target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; 2933 target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
2564 *(int32_t *)(p-4) = jmprel(p, target); 2934 *(int32_t *)(p-4) = jmprel(as->J, p, target);
2565 p[-5] = XI_JMP; 2935 p[-5] = XI_JMP;
2566 /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */ 2936 /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */
2567 for (q = as->mctop-1; q >= p; q--) 2937 for (q = as->mctop-1; q >= p; q--)
@@ -2588,168 +2958,11 @@ static void asm_tail_prep(ASMState *as)
2588 as->invmcp = as->mcp = p; 2958 as->invmcp = as->mcp = p;
2589 } else { 2959 } else {
2590 /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ 2960 /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */
2591 as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0)); 2961 as->mcp = p - (LJ_64 ? 7 : 6);
2592 as->invmcp = NULL; 2962 as->invmcp = NULL;
2593 } 2963 }
2594} 2964}
2595 2965
2596/* -- Instruction dispatch ------------------------------------------------ */
2597
2598/* Assemble a single instruction. */
2599static void asm_ir(ASMState *as, IRIns *ir)
2600{
2601 switch ((IROp)ir->o) {
2602 /* Miscellaneous ops. */
2603 case IR_LOOP: asm_loop(as); break;
2604 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
2605 case IR_USE:
2606 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
2607 case IR_PHI: asm_phi(as, ir); break;
2608 case IR_HIOP: asm_hiop(as, ir); break;
2609 case IR_GCSTEP: asm_gcstep(as, ir); break;
2610
2611 /* Guarded assertions. */
2612 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
2613 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
2614 case IR_EQ: case IR_NE: case IR_ABC:
2615 asm_comp(as, ir, asm_compmap[ir->o]);
2616 break;
2617
2618 case IR_RETF: asm_retf(as, ir); break;
2619
2620 /* Bit ops. */
2621 case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break;
2622 case IR_BSWAP: asm_bitswap(as, ir); break;
2623
2624 case IR_BAND: asm_intarith(as, ir, XOg_AND); break;
2625 case IR_BOR: asm_intarith(as, ir, XOg_OR); break;
2626 case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break;
2627
2628 case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break;
2629 case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break;
2630 case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break;
2631 case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break;
2632 case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break;
2633
2634 /* Arithmetic ops. */
2635 case IR_ADD: asm_add(as, ir); break;
2636 case IR_SUB:
2637 if (irt_isnum(ir->t))
2638 asm_fparith(as, ir, XO_SUBSD);
2639 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
2640 asm_intarith(as, ir, XOg_SUB);
2641 break;
2642 case IR_MUL:
2643 if (irt_isnum(ir->t))
2644 asm_fparith(as, ir, XO_MULSD);
2645 else
2646 asm_intarith(as, ir, XOg_X_IMUL);
2647 break;
2648 case IR_DIV:
2649#if LJ_64 && LJ_HASFFI
2650 if (!irt_isnum(ir->t))
2651 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
2652 IRCALL_lj_carith_divu64);
2653 else
2654#endif
2655 asm_fparith(as, ir, XO_DIVSD);
2656 break;
2657 case IR_MOD:
2658#if LJ_64 && LJ_HASFFI
2659 if (!irt_isint(ir->t))
2660 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
2661 IRCALL_lj_carith_modu64);
2662 else
2663#endif
2664 asm_intmod(as, ir);
2665 break;
2666
2667 case IR_NEG:
2668 if (irt_isnum(ir->t))
2669 asm_fparith(as, ir, XO_XORPS);
2670 else
2671 asm_neg_not(as, ir, XOg_NEG);
2672 break;
2673 case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break;
2674
2675 case IR_MIN:
2676 if (irt_isnum(ir->t))
2677 asm_fparith(as, ir, XO_MINSD);
2678 else
2679 asm_min_max(as, ir, CC_G);
2680 break;
2681 case IR_MAX:
2682 if (irt_isnum(ir->t))
2683 asm_fparith(as, ir, XO_MAXSD);
2684 else
2685 asm_min_max(as, ir, CC_L);
2686 break;
2687
2688 case IR_FPMATH: case IR_ATAN2: case IR_LDEXP:
2689 asm_fpmath(as, ir);
2690 break;
2691 case IR_POW:
2692#if LJ_64 && LJ_HASFFI
2693 if (!irt_isnum(ir->t))
2694 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
2695 IRCALL_lj_carith_powu64);
2696 else
2697#endif
2698 asm_fppowi(as, ir);
2699 break;
2700
2701 /* Overflow-checking arithmetic ops. Note: don't use LEA here! */
2702 case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break;
2703 case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break;
2704 case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break;
2705
2706 /* Memory references. */
2707 case IR_AREF: asm_aref(as, ir); break;
2708 case IR_HREF: asm_href(as, ir); break;
2709 case IR_HREFK: asm_hrefk(as, ir); break;
2710 case IR_NEWREF: asm_newref(as, ir); break;
2711 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2712 case IR_FREF: asm_fref(as, ir); break;
2713 case IR_STRREF: asm_strref(as, ir); break;
2714
2715 /* Loads and stores. */
2716 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2717 asm_ahuvload(as, ir);
2718 break;
2719 case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
2720 case IR_SLOAD: asm_sload(as, ir); break;
2721
2722 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2723 case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break;
2724
2725 /* Allocations. */
2726 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2727 case IR_TNEW: asm_tnew(as, ir); break;
2728 case IR_TDUP: asm_tdup(as, ir); break;
2729 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2730
2731 /* Write barriers. */
2732 case IR_TBAR: asm_tbar(as, ir); break;
2733 case IR_OBAR: asm_obar(as, ir); break;
2734
2735 /* Type conversions. */
2736 case IR_TOBIT: asm_tobit(as, ir); break;
2737 case IR_CONV: asm_conv(as, ir); break;
2738 case IR_TOSTR: asm_tostr(as, ir); break;
2739 case IR_STRTO: asm_strto(as, ir); break;
2740
2741 /* Calls. */
2742 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2743 case IR_CALLXS: asm_callx(as, ir); break;
2744 case IR_CARG: break;
2745
2746 default:
2747 setintV(&as->J->errinfo, ir->o);
2748 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2749 break;
2750 }
2751}
2752
2753/* -- Trace setup --------------------------------------------------------- */ 2966/* -- Trace setup --------------------------------------------------------- */
2754 2967
2755/* Ensure there are enough stack slots for call arguments. */ 2968/* Ensure there are enough stack slots for call arguments. */
@@ -2772,6 +2985,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2772static void asm_setup_target(ASMState *as) 2985static void asm_setup_target(ASMState *as)
2773{ 2986{
2774 asm_exitstub_setup(as, as->T->nsnap); 2987 asm_exitstub_setup(as, as->T->nsnap);
2988 as->mrm.base = 0;
2775} 2989}
2776 2990
2777/* -- Trace patching ------------------------------------------------------ */ 2991/* -- Trace patching ------------------------------------------------------ */
@@ -2885,18 +3099,24 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2885 MCode *px = exitstub_addr(J, exitno) - 6; 3099 MCode *px = exitstub_addr(J, exitno) - 6;
2886 MCode *pe = p+len-6; 3100 MCode *pe = p+len-6;
2887 MCode *pgc = NULL; 3101 MCode *pgc = NULL;
2888 uint32_t stateaddr = u32ptr(&J2G(J)->vmstate); 3102#if LJ_GC64
3103 uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch));
3104#else
3105 uint32_t statei = u32ptr(&J2G(J)->vmstate);
3106#endif
2889 if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) 3107 if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px)
2890 *(int32_t *)(p+len-4) = jmprel(p+len, target); 3108 *(int32_t *)(p+len-4) = jmprel(J, p+len, target);
2891 /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ 3109 /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */
2892 for (; p < pe; p += asm_x86_inslen(p)) 3110 for (; p < pe; p += asm_x86_inslen(p)) {
2893 if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) 3111 intptr_t ofs = LJ_GC64 ? (p[0] & 0xf0) == 0x40 : LJ_64;
3112 if (*(uint32_t *)(p+2+ofs) == statei && p[ofs+LJ_GC64-LJ_64] == XI_MOVmi)
2894 break; 3113 break;
2895 lua_assert(p < pe); 3114 }
3115 lj_assertJ(p < pe, "instruction length decoder failed");
2896 for (; p < pe; p += asm_x86_inslen(p)) { 3116 for (; p < pe; p += asm_x86_inslen(p)) {
2897 if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px && 3117 if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px &&
2898 p != pgc) { 3118 p != pgc) {
2899 *(int32_t *)(p+2) = jmprel(p+6, target); 3119 *(int32_t *)(p+2) = jmprel(J, p+6, target);
2900 } else if (*p == XI_CALL && 3120 } else if (*p == XI_CALL &&
2901 (void *)(p+5+*(int32_t *)(p+1)) == (void *)lj_gc_step_jit) { 3121 (void *)(p+5+*(int32_t *)(p+1)) == (void *)lj_gc_step_jit) {
2902 pgc = p+7; /* Do not patch GC check exit. */ 3122 pgc = p+7; /* Do not patch GC check exit. */
diff --git a/src/lj_assert.c b/src/lj_assert.c
new file mode 100644
index 00000000..35a63ce3
--- /dev/null
+++ b/src/lj_assert.c
@@ -0,0 +1,28 @@
1/*
2** Internal assertions.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_assert_c
7#define LUA_CORE
8
9#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK)
10
11#include <stdio.h>
12
13#include "lj_obj.h"
14
15void lj_assert_fail(global_State *g, const char *file, int line,
16 const char *func, const char *fmt, ...)
17{
18 va_list argp;
19 va_start(argp, fmt);
20 fprintf(stderr, "LuaJIT ASSERT %s:%d: %s: ", file, line, func);
21 vfprintf(stderr, fmt, argp);
22 fputc('\n', stderr);
23 va_end(argp);
24 UNUSED(g); /* May be NULL. TODO: optionally dump state. */
25 abort();
26}
27
28#endif
diff --git a/src/lj_bc.h b/src/lj_bc.h
index b78c5147..ad517b6b 100644
--- a/src/lj_bc.h
+++ b/src/lj_bc.h
@@ -89,6 +89,8 @@
89 _(ISFC, dst, ___, var, ___) \ 89 _(ISFC, dst, ___, var, ___) \
90 _(IST, ___, ___, var, ___) \ 90 _(IST, ___, ___, var, ___) \
91 _(ISF, ___, ___, var, ___) \ 91 _(ISF, ___, ___, var, ___) \
92 _(ISTYPE, var, ___, lit, ___) \
93 _(ISNUM, var, ___, lit, ___) \
92 \ 94 \
93 /* Unary ops. */ \ 95 /* Unary ops. */ \
94 _(MOV, dst, ___, var, ___) \ 96 _(MOV, dst, ___, var, ___) \
@@ -143,10 +145,12 @@
143 _(TGETV, dst, var, var, index) \ 145 _(TGETV, dst, var, var, index) \
144 _(TGETS, dst, var, str, index) \ 146 _(TGETS, dst, var, str, index) \
145 _(TGETB, dst, var, lit, index) \ 147 _(TGETB, dst, var, lit, index) \
148 _(TGETR, dst, var, var, index) \
146 _(TSETV, var, var, var, newindex) \ 149 _(TSETV, var, var, var, newindex) \
147 _(TSETS, var, var, str, newindex) \ 150 _(TSETS, var, var, str, newindex) \
148 _(TSETB, var, var, lit, newindex) \ 151 _(TSETB, var, var, lit, newindex) \
149 _(TSETM, base, ___, num, newindex) \ 152 _(TSETM, base, ___, num, newindex) \
153 _(TSETR, var, var, var, newindex) \
150 \ 154 \
151 /* Calls and vararg handling. T = tail call. */ \ 155 /* Calls and vararg handling. T = tail call. */ \
152 _(CALLM, base, lit, lit, call) \ 156 _(CALLM, base, lit, lit, call) \
diff --git a/src/lj_bcdump.h b/src/lj_bcdump.h
index 08e44573..d968d3f4 100644
--- a/src/lj_bcdump.h
+++ b/src/lj_bcdump.h
@@ -36,14 +36,15 @@
36/* If you perform *any* kind of private modifications to the bytecode itself 36/* If you perform *any* kind of private modifications to the bytecode itself
37** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher. 37** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher.
38*/ 38*/
39#define BCDUMP_VERSION 1 39#define BCDUMP_VERSION 2
40 40
41/* Compatibility flags. */ 41/* Compatibility flags. */
42#define BCDUMP_F_BE 0x01 42#define BCDUMP_F_BE 0x01
43#define BCDUMP_F_STRIP 0x02 43#define BCDUMP_F_STRIP 0x02
44#define BCDUMP_F_FFI 0x04 44#define BCDUMP_F_FFI 0x04
45#define BCDUMP_F_FR2 0x08
45 46
46#define BCDUMP_F_KNOWN (BCDUMP_F_FFI*2-1) 47#define BCDUMP_F_KNOWN (BCDUMP_F_FR2*2-1)
47 48
48/* Type codes for the GC constants of a prototype. Plus length for strings. */ 49/* Type codes for the GC constants of a prototype. Plus length for strings. */
49enum { 50enum {
@@ -61,6 +62,7 @@ enum {
61 62
62LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, 63LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
63 void *data, int strip); 64 void *data, int strip);
65LJ_FUNC GCproto *lj_bcread_proto(LexState *ls);
64LJ_FUNC GCproto *lj_bcread(LexState *ls); 66LJ_FUNC GCproto *lj_bcread(LexState *ls);
65 67
66#endif 68#endif
diff --git a/src/lj_bcread.c b/src/lj_bcread.c
index 0d3c8a75..298e6c45 100644
--- a/src/lj_bcread.c
+++ b/src/lj_bcread.c
@@ -9,6 +9,7 @@
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_buf.h"
12#include "lj_str.h" 13#include "lj_str.h"
13#include "lj_tab.h" 14#include "lj_tab.h"
14#include "lj_bc.h" 15#include "lj_bc.h"
@@ -20,6 +21,7 @@
20#include "lj_lex.h" 21#include "lj_lex.h"
21#include "lj_bcdump.h" 22#include "lj_bcdump.h"
22#include "lj_state.h" 23#include "lj_state.h"
24#include "lj_strfmt.h"
23 25
24/* Reuse some lexer fields for our own purposes. */ 26/* Reuse some lexer fields for our own purposes. */
25#define bcread_flags(ls) ls->level 27#define bcread_flags(ls) ls->level
@@ -38,85 +40,74 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
38 const char *name = ls->chunkarg; 40 const char *name = ls->chunkarg;
39 if (*name == BCDUMP_HEAD1) name = "(binary)"; 41 if (*name == BCDUMP_HEAD1) name = "(binary)";
40 else if (*name == '@' || *name == '=') name++; 42 else if (*name == '@' || *name == '=') name++;
41 lj_str_pushf(L, "%s: %s", name, err2msg(em)); 43 lj_strfmt_pushf(L, "%s: %s", name, err2msg(em));
42 lj_err_throw(L, LUA_ERRSYNTAX); 44 lj_err_throw(L, LUA_ERRSYNTAX);
43} 45}
44 46
45/* Resize input buffer. */ 47/* Refill buffer. */
46static void bcread_resize(LexState *ls, MSize len)
47{
48 if (ls->sb.sz < len) {
49 MSize sz = ls->sb.sz * 2;
50 while (len > sz) sz = sz * 2;
51 lj_str_resizebuf(ls->L, &ls->sb, sz);
52 /* Caveat: this may change ls->sb.buf which may affect ls->p. */
53 }
54}
55
56/* Refill buffer if needed. */
57static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) 48static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
58{ 49{
59 lua_assert(len != 0); 50 lj_assertLS(len != 0, "empty refill");
60 if (len > LJ_MAX_MEM || ls->current < 0) 51 if (len > LJ_MAX_BUF || ls->c < 0)
61 bcread_error(ls, LJ_ERR_BCBAD); 52 bcread_error(ls, LJ_ERR_BCBAD);
62 do { 53 do {
63 const char *buf; 54 const char *buf;
64 size_t size; 55 size_t sz;
65 if (ls->n) { /* Copy remainder to buffer. */ 56 char *p = ls->sb.b;
66 if (ls->sb.n) { /* Move down in buffer. */ 57 MSize n = (MSize)(ls->pe - ls->p);
67 lua_assert(ls->p + ls->n == ls->sb.buf + ls->sb.n); 58 if (n) { /* Copy remainder to buffer. */
68 if (ls->n != ls->sb.n) 59 if (sbuflen(&ls->sb)) { /* Move down in buffer. */
69 memmove(ls->sb.buf, ls->p, ls->n); 60 lj_assertLS(ls->pe == ls->sb.w, "bad buffer pointer");
61 if (ls->p != p) memmove(p, ls->p, n);
70 } else { /* Copy from buffer provided by reader. */ 62 } else { /* Copy from buffer provided by reader. */
71 bcread_resize(ls, len); 63 p = lj_buf_need(&ls->sb, len);
72 memcpy(ls->sb.buf, ls->p, ls->n); 64 memcpy(p, ls->p, n);
73 } 65 }
74 ls->p = ls->sb.buf; 66 ls->p = p;
67 ls->pe = p + n;
75 } 68 }
76 ls->sb.n = ls->n; 69 ls->sb.w = p + n;
77 buf = ls->rfunc(ls->L, ls->rdata, &size); /* Get more data from reader. */ 70 buf = ls->rfunc(ls->L, ls->rdata, &sz); /* Get more data from reader. */
78 if (buf == NULL || size == 0) { /* EOF? */ 71 if (buf == NULL || sz == 0) { /* EOF? */
79 if (need) bcread_error(ls, LJ_ERR_BCBAD); 72 if (need) bcread_error(ls, LJ_ERR_BCBAD);
80 ls->current = -1; /* Only bad if we get called again. */ 73 ls->c = -1; /* Only bad if we get called again. */
81 break; 74 break;
82 } 75 }
83 if (size >= LJ_MAX_MEM - ls->sb.n) lj_err_mem(ls->L); 76 if (sz >= LJ_MAX_BUF - n) lj_err_mem(ls->L);
84 if (ls->sb.n) { /* Append to buffer. */ 77 if (n) { /* Append to buffer. */
85 MSize n = ls->sb.n + (MSize)size; 78 n += (MSize)sz;
86 bcread_resize(ls, n < len ? len : n); 79 p = lj_buf_need(&ls->sb, n < len ? len : n);
87 memcpy(ls->sb.buf + ls->sb.n, buf, size); 80 memcpy(ls->sb.w, buf, sz);
88 ls->n = ls->sb.n = n; 81 ls->sb.w = p + n;
89 ls->p = ls->sb.buf; 82 ls->p = p;
83 ls->pe = p + n;
90 } else { /* Return buffer provided by reader. */ 84 } else { /* Return buffer provided by reader. */
91 ls->n = (MSize)size;
92 ls->p = buf; 85 ls->p = buf;
86 ls->pe = buf + sz;
93 } 87 }
94 } while (ls->n < len); 88 } while ((MSize)(ls->pe - ls->p) < len);
95} 89}
96 90
97/* Need a certain number of bytes. */ 91/* Need a certain number of bytes. */
98static LJ_AINLINE void bcread_need(LexState *ls, MSize len) 92static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
99{ 93{
100 if (LJ_UNLIKELY(ls->n < len)) 94 if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
101 bcread_fill(ls, len, 1); 95 bcread_fill(ls, len, 1);
102} 96}
103 97
104/* Want to read up to a certain number of bytes, but may need less. */ 98/* Want to read up to a certain number of bytes, but may need less. */
105static LJ_AINLINE void bcread_want(LexState *ls, MSize len) 99static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
106{ 100{
107 if (LJ_UNLIKELY(ls->n < len)) 101 if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
108 bcread_fill(ls, len, 0); 102 bcread_fill(ls, len, 0);
109} 103}
110 104
111#define bcread_dec(ls) check_exp(ls->n > 0, ls->n--)
112#define bcread_consume(ls, len) check_exp(ls->n >= (len), ls->n -= (len))
113
114/* Return memory block from buffer. */ 105/* Return memory block from buffer. */
115static uint8_t *bcread_mem(LexState *ls, MSize len) 106static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len)
116{ 107{
117 uint8_t *p = (uint8_t *)ls->p; 108 uint8_t *p = (uint8_t *)ls->p;
118 bcread_consume(ls, len); 109 ls->p += len;
119 ls->p = (char *)p + len; 110 lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
120 return p; 111 return p;
121} 112}
122 113
@@ -129,25 +120,15 @@ static void bcread_block(LexState *ls, void *q, MSize len)
129/* Read byte from buffer. */ 120/* Read byte from buffer. */
130static LJ_AINLINE uint32_t bcread_byte(LexState *ls) 121static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
131{ 122{
132 bcread_dec(ls); 123 lj_assertLS(ls->p < ls->pe, "buffer read overflow");
133 return (uint32_t)(uint8_t)*ls->p++; 124 return (uint32_t)(uint8_t)*ls->p++;
134} 125}
135 126
136/* Read ULEB128 value from buffer. */ 127/* Read ULEB128 value from buffer. */
137static uint32_t bcread_uleb128(LexState *ls) 128static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls)
138{ 129{
139 const uint8_t *p = (const uint8_t *)ls->p; 130 uint32_t v = lj_buf_ruleb128(&ls->p);
140 uint32_t v = *p++; 131 lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
141 if (LJ_UNLIKELY(v >= 0x80)) {
142 int sh = 0;
143 v &= 0x7f;
144 do {
145 v |= ((*p & 0x7f) << (sh += 7));
146 bcread_dec(ls);
147 } while (*p++ >= 0x80);
148 }
149 bcread_dec(ls);
150 ls->p = (char *)p;
151 return v; 132 return v;
152} 133}
153 134
@@ -161,11 +142,10 @@ static uint32_t bcread_uleb128_33(LexState *ls)
161 v &= 0x3f; 142 v &= 0x3f;
162 do { 143 do {
163 v |= ((*p & 0x7f) << (sh += 7)); 144 v |= ((*p & 0x7f) << (sh += 7));
164 bcread_dec(ls);
165 } while (*p++ >= 0x80); 145 } while (*p++ >= 0x80);
166 } 146 }
167 bcread_dec(ls);
168 ls->p = (char *)p; 147 ls->p = (char *)p;
148 lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
169 return v; 149 return v;
170} 150}
171 151
@@ -212,8 +192,8 @@ static void bcread_ktabk(LexState *ls, TValue *o)
212 o->u32.lo = bcread_uleb128(ls); 192 o->u32.lo = bcread_uleb128(ls);
213 o->u32.hi = bcread_uleb128(ls); 193 o->u32.hi = bcread_uleb128(ls);
214 } else { 194 } else {
215 lua_assert(tp <= BCDUMP_KTAB_TRUE); 195 lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp);
216 setitype(o, ~tp); 196 setpriV(o, ~tp);
217 } 197 }
218} 198}
219 199
@@ -234,7 +214,7 @@ static GCtab *bcread_ktab(LexState *ls)
234 for (i = 0; i < nhash; i++) { 214 for (i = 0; i < nhash; i++) {
235 TValue key; 215 TValue key;
236 bcread_ktabk(ls, &key); 216 bcread_ktabk(ls, &key);
237 lua_assert(!tvisnil(&key)); 217 lj_assertLS(!tvisnil(&key), "nil key");
238 bcread_ktabk(ls, lj_tab_set(ls->L, t, &key)); 218 bcread_ktabk(ls, lj_tab_set(ls->L, t, &key));
239 } 219 }
240 } 220 }
@@ -271,7 +251,7 @@ static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc)
271#endif 251#endif
272 } else { 252 } else {
273 lua_State *L = ls->L; 253 lua_State *L = ls->L;
274 lua_assert(tp == BCDUMP_KGC_CHILD); 254 lj_assertLS(tp == BCDUMP_KGC_CHILD, "bad constant type %d", tp);
275 if (L->top <= bcread_oldtop(L, ls)) /* Stack underflow? */ 255 if (L->top <= bcread_oldtop(L, ls)) /* Stack underflow? */
276 bcread_error(ls, LJ_ERR_BCBAD); 256 bcread_error(ls, LJ_ERR_BCBAD);
277 L->top--; 257 L->top--;
@@ -327,25 +307,13 @@ static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv)
327} 307}
328 308
329/* Read a prototype. */ 309/* Read a prototype. */
330static GCproto *bcread_proto(LexState *ls) 310GCproto *lj_bcread_proto(LexState *ls)
331{ 311{
332 GCproto *pt; 312 GCproto *pt;
333 MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept; 313 MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
334 MSize ofsk, ofsuv, ofsdbg; 314 MSize ofsk, ofsuv, ofsdbg;
335 MSize sizedbg = 0; 315 MSize sizedbg = 0;
336 BCLine firstline = 0, numline = 0; 316 BCLine firstline = 0, numline = 0;
337 MSize len, startn;
338
339 /* Read length. */
340 if (ls->n > 0 && ls->p[0] == 0) { /* Shortcut EOF. */
341 ls->n--; ls->p++;
342 return NULL;
343 }
344 bcread_want(ls, 5);
345 len = bcread_uleb128(ls);
346 if (!len) return NULL; /* EOF */
347 bcread_need(ls, len);
348 startn = ls->n;
349 317
350 /* Read prototype header. */ 318 /* Read prototype header. */
351 flags = bcread_byte(ls); 319 flags = bcread_byte(ls);
@@ -414,9 +382,6 @@ static GCproto *bcread_proto(LexState *ls)
414 setmref(pt->uvinfo, NULL); 382 setmref(pt->uvinfo, NULL);
415 setmref(pt->varinfo, NULL); 383 setmref(pt->varinfo, NULL);
416 } 384 }
417
418 if (len != startn - ls->n)
419 bcread_error(ls, LJ_ERR_BCBAD);
420 return pt; 385 return pt;
421} 386}
422 387
@@ -430,14 +395,11 @@ static int bcread_header(LexState *ls)
430 bcread_byte(ls) != BCDUMP_VERSION) return 0; 395 bcread_byte(ls) != BCDUMP_VERSION) return 0;
431 bcread_flags(ls) = flags = bcread_uleb128(ls); 396 bcread_flags(ls) = flags = bcread_uleb128(ls);
432 if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0; 397 if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
398 if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0;
433 if ((flags & BCDUMP_F_FFI)) { 399 if ((flags & BCDUMP_F_FFI)) {
434#if LJ_HASFFI 400#if LJ_HASFFI
435 lua_State *L = ls->L; 401 lua_State *L = ls->L;
436 if (!ctype_ctsG(G(L))) { 402 ctype_loadffi(L);
437 ptrdiff_t oldtop = savestack(L, L->top);
438 luaopen_ffi(L); /* Load FFI library on-demand. */
439 L->top = restorestack(L, oldtop);
440 }
441#else 403#else
442 return 0; 404 return 0;
443#endif 405#endif
@@ -456,19 +418,33 @@ static int bcread_header(LexState *ls)
456GCproto *lj_bcread(LexState *ls) 418GCproto *lj_bcread(LexState *ls)
457{ 419{
458 lua_State *L = ls->L; 420 lua_State *L = ls->L;
459 lua_assert(ls->current == BCDUMP_HEAD1); 421 lj_assertLS(ls->c == BCDUMP_HEAD1, "bad bytecode header");
460 bcread_savetop(L, ls, L->top); 422 bcread_savetop(L, ls, L->top);
461 lj_str_resetbuf(&ls->sb); 423 lj_buf_reset(&ls->sb);
462 /* Check for a valid bytecode dump header. */ 424 /* Check for a valid bytecode dump header. */
463 if (!bcread_header(ls)) 425 if (!bcread_header(ls))
464 bcread_error(ls, LJ_ERR_BCFMT); 426 bcread_error(ls, LJ_ERR_BCFMT);
465 for (;;) { /* Process all prototypes in the bytecode dump. */ 427 for (;;) { /* Process all prototypes in the bytecode dump. */
466 GCproto *pt = bcread_proto(ls); 428 GCproto *pt;
467 if (!pt) break; 429 MSize len;
430 const char *startp;
431 /* Read length. */
432 if (ls->p < ls->pe && ls->p[0] == 0) { /* Shortcut EOF. */
433 ls->p++;
434 break;
435 }
436 bcread_want(ls, 5);
437 len = bcread_uleb128(ls);
438 if (!len) break; /* EOF */
439 bcread_need(ls, len);
440 startp = ls->p;
441 pt = lj_bcread_proto(ls);
442 if (ls->p != startp + len)
443 bcread_error(ls, LJ_ERR_BCBAD);
468 setprotoV(L, L->top, pt); 444 setprotoV(L, L->top, pt);
469 incr_top(L); 445 incr_top(L);
470 } 446 }
471 if ((ls->n && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls)) 447 if ((ls->pe != ls->p && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls))
472 bcread_error(ls, LJ_ERR_BCBAD); 448 bcread_error(ls, LJ_ERR_BCBAD);
473 /* Pop off last prototype. */ 449 /* Pop off last prototype. */
474 L->top--; 450 L->top--;
diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c
index 41b4e10e..c5c042e0 100644
--- a/src/lj_bcwrite.c
+++ b/src/lj_bcwrite.c
@@ -8,7 +8,7 @@
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_str.h" 11#include "lj_buf.h"
12#include "lj_bc.h" 12#include "lj_bc.h"
13#if LJ_HASFFI 13#if LJ_HASFFI
14#include "lj_ctype.h" 14#include "lj_ctype.h"
@@ -17,99 +17,67 @@
17#include "lj_dispatch.h" 17#include "lj_dispatch.h"
18#include "lj_jit.h" 18#include "lj_jit.h"
19#endif 19#endif
20#include "lj_strfmt.h"
20#include "lj_bcdump.h" 21#include "lj_bcdump.h"
21#include "lj_vm.h" 22#include "lj_vm.h"
22 23
23/* Context for bytecode writer. */ 24/* Context for bytecode writer. */
24typedef struct BCWriteCtx { 25typedef struct BCWriteCtx {
25 SBuf sb; /* Output buffer. */ 26 SBuf sb; /* Output buffer. */
26 lua_State *L; /* Lua state. */
27 GCproto *pt; /* Root prototype. */ 27 GCproto *pt; /* Root prototype. */
28 lua_Writer wfunc; /* Writer callback. */ 28 lua_Writer wfunc; /* Writer callback. */
29 void *wdata; /* Writer callback data. */ 29 void *wdata; /* Writer callback data. */
30 int strip; /* Strip debug info. */ 30 int strip; /* Strip debug info. */
31 int status; /* Status from writer callback. */ 31 int status; /* Status from writer callback. */
32#ifdef LUA_USE_ASSERT
33 global_State *g;
34#endif
32} BCWriteCtx; 35} BCWriteCtx;
33 36
34/* -- Output buffer handling ---------------------------------------------- */ 37#ifdef LUA_USE_ASSERT
35 38#define lj_assertBCW(c, ...) lj_assertG_(ctx->g, (c), __VA_ARGS__)
36/* Resize buffer if needed. */ 39#else
37static LJ_NOINLINE void bcwrite_resize(BCWriteCtx *ctx, MSize len) 40#define lj_assertBCW(c, ...) ((void)ctx)
38{ 41#endif
39 MSize sz = ctx->sb.sz * 2;
40 while (ctx->sb.n + len > sz) sz = sz * 2;
41 lj_str_resizebuf(ctx->L, &ctx->sb, sz);
42}
43
44/* Need a certain amount of buffer space. */
45static LJ_AINLINE void bcwrite_need(BCWriteCtx *ctx, MSize len)
46{
47 if (LJ_UNLIKELY(ctx->sb.n + len > ctx->sb.sz))
48 bcwrite_resize(ctx, len);
49}
50
51/* Add memory block to buffer. */
52static void bcwrite_block(BCWriteCtx *ctx, const void *p, MSize len)
53{
54 uint8_t *q = (uint8_t *)(ctx->sb.buf + ctx->sb.n);
55 MSize i;
56 ctx->sb.n += len;
57 for (i = 0; i < len; i++) q[i] = ((uint8_t *)p)[i];
58}
59
60/* Add byte to buffer. */
61static LJ_AINLINE void bcwrite_byte(BCWriteCtx *ctx, uint8_t b)
62{
63 ctx->sb.buf[ctx->sb.n++] = b;
64}
65
66/* Add ULEB128 value to buffer. */
67static void bcwrite_uleb128(BCWriteCtx *ctx, uint32_t v)
68{
69 MSize n = ctx->sb.n;
70 uint8_t *p = (uint8_t *)ctx->sb.buf;
71 for (; v >= 0x80; v >>= 7)
72 p[n++] = (uint8_t)((v & 0x7f) | 0x80);
73 p[n++] = (uint8_t)v;
74 ctx->sb.n = n;
75}
76 42
77/* -- Bytecode writer ----------------------------------------------------- */ 43/* -- Bytecode writer ----------------------------------------------------- */
78 44
79/* Write a single constant key/value of a template table. */ 45/* Write a single constant key/value of a template table. */
80static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) 46static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
81{ 47{
82 bcwrite_need(ctx, 1+10); 48 char *p = lj_buf_more(&ctx->sb, 1+10);
83 if (tvisstr(o)) { 49 if (tvisstr(o)) {
84 const GCstr *str = strV(o); 50 const GCstr *str = strV(o);
85 MSize len = str->len; 51 MSize len = str->len;
86 bcwrite_need(ctx, 5+len); 52 p = lj_buf_more(&ctx->sb, 5+len);
87 bcwrite_uleb128(ctx, BCDUMP_KTAB_STR+len); 53 p = lj_strfmt_wuleb128(p, BCDUMP_KTAB_STR+len);
88 bcwrite_block(ctx, strdata(str), len); 54 p = lj_buf_wmem(p, strdata(str), len);
89 } else if (tvisint(o)) { 55 } else if (tvisint(o)) {
90 bcwrite_byte(ctx, BCDUMP_KTAB_INT); 56 *p++ = BCDUMP_KTAB_INT;
91 bcwrite_uleb128(ctx, intV(o)); 57 p = lj_strfmt_wuleb128(p, intV(o));
92 } else if (tvisnum(o)) { 58 } else if (tvisnum(o)) {
93 if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */ 59 if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */
94 lua_Number num = numV(o); 60 lua_Number num = numV(o);
95 int32_t k = lj_num2int(num); 61 int32_t k = lj_num2int(num);
96 if (num == (lua_Number)k) { /* -0 is never a constant. */ 62 if (num == (lua_Number)k) { /* -0 is never a constant. */
97 bcwrite_byte(ctx, BCDUMP_KTAB_INT); 63 *p++ = BCDUMP_KTAB_INT;
98 bcwrite_uleb128(ctx, k); 64 p = lj_strfmt_wuleb128(p, k);
65 ctx->sb.w = p;
99 return; 66 return;
100 } 67 }
101 } 68 }
102 bcwrite_byte(ctx, BCDUMP_KTAB_NUM); 69 *p++ = BCDUMP_KTAB_NUM;
103 bcwrite_uleb128(ctx, o->u32.lo); 70 p = lj_strfmt_wuleb128(p, o->u32.lo);
104 bcwrite_uleb128(ctx, o->u32.hi); 71 p = lj_strfmt_wuleb128(p, o->u32.hi);
105 } else { 72 } else {
106 lua_assert(tvispri(o)); 73 lj_assertBCW(tvispri(o), "unhandled type %d", itype(o));
107 bcwrite_byte(ctx, BCDUMP_KTAB_NIL+~itype(o)); 74 *p++ = BCDUMP_KTAB_NIL+~itype(o);
108 } 75 }
76 ctx->sb.w = p;
109} 77}
110 78
111/* Write a template table. */ 79/* Write a template table. */
112static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t) 80static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
113{ 81{
114 MSize narray = 0, nhash = 0; 82 MSize narray = 0, nhash = 0;
115 if (t->asize > 0) { /* Determine max. length of array part. */ 83 if (t->asize > 0) { /* Determine max. length of array part. */
@@ -127,8 +95,9 @@ static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t)
127 nhash += !tvisnil(&node[i].val); 95 nhash += !tvisnil(&node[i].val);
128 } 96 }
129 /* Write number of array slots and hash slots. */ 97 /* Write number of array slots and hash slots. */
130 bcwrite_uleb128(ctx, narray); 98 p = lj_strfmt_wuleb128(p, narray);
131 bcwrite_uleb128(ctx, nhash); 99 p = lj_strfmt_wuleb128(p, nhash);
100 ctx->sb.w = p;
132 if (narray) { /* Write array entries (may contain nil). */ 101 if (narray) { /* Write array entries (may contain nil). */
133 MSize i; 102 MSize i;
134 TValue *o = tvref(t->array); 103 TValue *o = tvref(t->array);
@@ -155,12 +124,13 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
155 for (i = 0; i < sizekgc; i++, kr++) { 124 for (i = 0; i < sizekgc; i++, kr++) {
156 GCobj *o = gcref(*kr); 125 GCobj *o = gcref(*kr);
157 MSize tp, need = 1; 126 MSize tp, need = 1;
127 char *p;
158 /* Determine constant type and needed size. */ 128 /* Determine constant type and needed size. */
159 if (o->gch.gct == ~LJ_TSTR) { 129 if (o->gch.gct == ~LJ_TSTR) {
160 tp = BCDUMP_KGC_STR + gco2str(o)->len; 130 tp = BCDUMP_KGC_STR + gco2str(o)->len;
161 need = 5+gco2str(o)->len; 131 need = 5+gco2str(o)->len;
162 } else if (o->gch.gct == ~LJ_TPROTO) { 132 } else if (o->gch.gct == ~LJ_TPROTO) {
163 lua_assert((pt->flags & PROTO_CHILD)); 133 lj_assertBCW((pt->flags & PROTO_CHILD), "prototype has unexpected child");
164 tp = BCDUMP_KGC_CHILD; 134 tp = BCDUMP_KGC_CHILD;
165#if LJ_HASFFI 135#if LJ_HASFFI
166 } else if (o->gch.gct == ~LJ_TCDATA) { 136 } else if (o->gch.gct == ~LJ_TCDATA) {
@@ -171,34 +141,38 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
171 } else if (id == CTID_UINT64) { 141 } else if (id == CTID_UINT64) {
172 tp = BCDUMP_KGC_U64; 142 tp = BCDUMP_KGC_U64;
173 } else { 143 } else {
174 lua_assert(id == CTID_COMPLEX_DOUBLE); 144 lj_assertBCW(id == CTID_COMPLEX_DOUBLE,
145 "bad cdata constant CTID %d", id);
175 tp = BCDUMP_KGC_COMPLEX; 146 tp = BCDUMP_KGC_COMPLEX;
176 } 147 }
177#endif 148#endif
178 } else { 149 } else {
179 lua_assert(o->gch.gct == ~LJ_TTAB); 150 lj_assertBCW(o->gch.gct == ~LJ_TTAB,
151 "bad constant GC type %d", o->gch.gct);
180 tp = BCDUMP_KGC_TAB; 152 tp = BCDUMP_KGC_TAB;
181 need = 1+2*5; 153 need = 1+2*5;
182 } 154 }
183 /* Write constant type. */ 155 /* Write constant type. */
184 bcwrite_need(ctx, need); 156 p = lj_buf_more(&ctx->sb, need);
185 bcwrite_uleb128(ctx, tp); 157 p = lj_strfmt_wuleb128(p, tp);
186 /* Write constant data (if any). */ 158 /* Write constant data (if any). */
187 if (tp >= BCDUMP_KGC_STR) { 159 if (tp >= BCDUMP_KGC_STR) {
188 bcwrite_block(ctx, strdata(gco2str(o)), gco2str(o)->len); 160 p = lj_buf_wmem(p, strdata(gco2str(o)), gco2str(o)->len);
189 } else if (tp == BCDUMP_KGC_TAB) { 161 } else if (tp == BCDUMP_KGC_TAB) {
190 bcwrite_ktab(ctx, gco2tab(o)); 162 bcwrite_ktab(ctx, p, gco2tab(o));
163 continue;
191#if LJ_HASFFI 164#if LJ_HASFFI
192 } else if (tp != BCDUMP_KGC_CHILD) { 165 } else if (tp != BCDUMP_KGC_CHILD) {
193 cTValue *p = (TValue *)cdataptr(gco2cd(o)); 166 cTValue *q = (TValue *)cdataptr(gco2cd(o));
194 bcwrite_uleb128(ctx, p[0].u32.lo); 167 p = lj_strfmt_wuleb128(p, q[0].u32.lo);
195 bcwrite_uleb128(ctx, p[0].u32.hi); 168 p = lj_strfmt_wuleb128(p, q[0].u32.hi);
196 if (tp == BCDUMP_KGC_COMPLEX) { 169 if (tp == BCDUMP_KGC_COMPLEX) {
197 bcwrite_uleb128(ctx, p[1].u32.lo); 170 p = lj_strfmt_wuleb128(p, q[1].u32.lo);
198 bcwrite_uleb128(ctx, p[1].u32.hi); 171 p = lj_strfmt_wuleb128(p, q[1].u32.hi);
199 } 172 }
200#endif 173#endif
201 } 174 }
175 ctx->sb.w = p;
202 } 176 }
203} 177}
204 178
@@ -207,7 +181,7 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
207{ 181{
208 MSize i, sizekn = pt->sizekn; 182 MSize i, sizekn = pt->sizekn;
209 cTValue *o = mref(pt->k, TValue); 183 cTValue *o = mref(pt->k, TValue);
210 bcwrite_need(ctx, 10*sizekn); 184 char *p = lj_buf_more(&ctx->sb, 10*sizekn);
211 for (i = 0; i < sizekn; i++, o++) { 185 for (i = 0; i < sizekn; i++, o++) {
212 int32_t k; 186 int32_t k;
213 if (tvisint(o)) { 187 if (tvisint(o)) {
@@ -220,55 +194,55 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
220 k = lj_num2int(num); 194 k = lj_num2int(num);
221 if (num == (lua_Number)k) { /* -0 is never a constant. */ 195 if (num == (lua_Number)k) { /* -0 is never a constant. */
222 save_int: 196 save_int:
223 bcwrite_uleb128(ctx, 2*(uint32_t)k | ((uint32_t)k & 0x80000000u)); 197 p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u));
224 if (k < 0) { 198 if (k < 0)
225 char *p = &ctx->sb.buf[ctx->sb.n-1]; 199 p[-1] = (p[-1] & 7) | ((k>>27) & 0x18);
226 *p = (*p & 7) | ((k>>27) & 0x18);
227 }
228 continue; 200 continue;
229 } 201 }
230 } 202 }
231 bcwrite_uleb128(ctx, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u))); 203 p = lj_strfmt_wuleb128(p, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u)));
232 if (o->u32.lo >= 0x80000000u) { 204 if (o->u32.lo >= 0x80000000u)
233 char *p = &ctx->sb.buf[ctx->sb.n-1]; 205 p[-1] = (p[-1] & 7) | ((o->u32.lo>>27) & 0x18);
234 *p = (*p & 7) | ((o->u32.lo>>27) & 0x18); 206 p = lj_strfmt_wuleb128(p, o->u32.hi);
235 }
236 bcwrite_uleb128(ctx, o->u32.hi);
237 } 207 }
238 } 208 }
209 ctx->sb.w = p;
239} 210}
240 211
241/* Write bytecode instructions. */ 212/* Write bytecode instructions. */
242static void bcwrite_bytecode(BCWriteCtx *ctx, GCproto *pt) 213static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto *pt)
243{ 214{
244 MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */ 215 MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */
245#if LJ_HASJIT 216#if LJ_HASJIT
246 uint8_t *p = (uint8_t *)&ctx->sb.buf[ctx->sb.n]; 217 uint8_t *q = (uint8_t *)p;
247#endif 218#endif
248 bcwrite_block(ctx, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns)); 219 p = lj_buf_wmem(p, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns));
220 UNUSED(ctx);
249#if LJ_HASJIT 221#if LJ_HASJIT
250 /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */ 222 /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */
251 if ((pt->flags & PROTO_ILOOP) || pt->trace) { 223 if ((pt->flags & PROTO_ILOOP) || pt->trace) {
252 jit_State *J = L2J(ctx->L); 224 jit_State *J = L2J(sbufL(&ctx->sb));
253 MSize i; 225 MSize i;
254 for (i = 0; i < nbc; i++, p += sizeof(BCIns)) { 226 for (i = 0; i < nbc; i++, q += sizeof(BCIns)) {
255 BCOp op = (BCOp)p[LJ_ENDIAN_SELECT(0, 3)]; 227 BCOp op = (BCOp)q[LJ_ENDIAN_SELECT(0, 3)];
256 if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP || 228 if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP ||
257 op == BC_JFORI) { 229 op == BC_JFORI) {
258 p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL); 230 q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL);
259 } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) { 231 } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
260 BCReg rd = p[LJ_ENDIAN_SELECT(2, 1)] + (p[LJ_ENDIAN_SELECT(3, 0)] << 8); 232 BCReg rd = q[LJ_ENDIAN_SELECT(2, 1)] + (q[LJ_ENDIAN_SELECT(3, 0)] << 8);
261 memcpy(p, &traceref(J, rd)->startins, 4); 233 memcpy(q, &traceref(J, rd)->startins, 4);
262 } 234 }
263 } 235 }
264 } 236 }
265#endif 237#endif
238 return p;
266} 239}
267 240
268/* Write prototype. */ 241/* Write prototype. */
269static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) 242static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
270{ 243{
271 MSize sizedbg = 0; 244 MSize sizedbg = 0;
245 char *p;
272 246
273 /* Recursively write children of prototype. */ 247 /* Recursively write children of prototype. */
274 if ((pt->flags & PROTO_CHILD)) { 248 if ((pt->flags & PROTO_CHILD)) {
@@ -282,31 +256,32 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
282 } 256 }
283 257
284 /* Start writing the prototype info to a buffer. */ 258 /* Start writing the prototype info to a buffer. */
285 lj_str_resetbuf(&ctx->sb); 259 p = lj_buf_need(&ctx->sb,
286 ctx->sb.n = 5; /* Leave room for final size. */ 260 5+4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2);
287 bcwrite_need(ctx, 4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2); 261 p += 5; /* Leave room for final size. */
288 262
289 /* Write prototype header. */ 263 /* Write prototype header. */
290 bcwrite_byte(ctx, (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI))); 264 *p++ = (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI));
291 bcwrite_byte(ctx, pt->numparams); 265 *p++ = pt->numparams;
292 bcwrite_byte(ctx, pt->framesize); 266 *p++ = pt->framesize;
293 bcwrite_byte(ctx, pt->sizeuv); 267 *p++ = pt->sizeuv;
294 bcwrite_uleb128(ctx, pt->sizekgc); 268 p = lj_strfmt_wuleb128(p, pt->sizekgc);
295 bcwrite_uleb128(ctx, pt->sizekn); 269 p = lj_strfmt_wuleb128(p, pt->sizekn);
296 bcwrite_uleb128(ctx, pt->sizebc-1); 270 p = lj_strfmt_wuleb128(p, pt->sizebc-1);
297 if (!ctx->strip) { 271 if (!ctx->strip) {
298 if (proto_lineinfo(pt)) 272 if (proto_lineinfo(pt))
299 sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt); 273 sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt);
300 bcwrite_uleb128(ctx, sizedbg); 274 p = lj_strfmt_wuleb128(p, sizedbg);
301 if (sizedbg) { 275 if (sizedbg) {
302 bcwrite_uleb128(ctx, pt->firstline); 276 p = lj_strfmt_wuleb128(p, pt->firstline);
303 bcwrite_uleb128(ctx, pt->numline); 277 p = lj_strfmt_wuleb128(p, pt->numline);
304 } 278 }
305 } 279 }
306 280
307 /* Write bytecode instructions and upvalue refs. */ 281 /* Write bytecode instructions and upvalue refs. */
308 bcwrite_bytecode(ctx, pt); 282 p = bcwrite_bytecode(ctx, p, pt);
309 bcwrite_block(ctx, proto_uv(pt), pt->sizeuv*2); 283 p = lj_buf_wmem(p, proto_uv(pt), pt->sizeuv*2);
284 ctx->sb.w = p;
310 285
311 /* Write constants. */ 286 /* Write constants. */
312 bcwrite_kgc(ctx, pt); 287 bcwrite_kgc(ctx, pt);
@@ -314,18 +289,19 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
314 289
315 /* Write debug info, if not stripped. */ 290 /* Write debug info, if not stripped. */
316 if (sizedbg) { 291 if (sizedbg) {
317 bcwrite_need(ctx, sizedbg); 292 p = lj_buf_more(&ctx->sb, sizedbg);
318 bcwrite_block(ctx, proto_lineinfo(pt), sizedbg); 293 p = lj_buf_wmem(p, proto_lineinfo(pt), sizedbg);
294 ctx->sb.w = p;
319 } 295 }
320 296
321 /* Pass buffer to writer function. */ 297 /* Pass buffer to writer function. */
322 if (ctx->status == 0) { 298 if (ctx->status == 0) {
323 MSize n = ctx->sb.n - 5; 299 MSize n = sbuflen(&ctx->sb) - 5;
324 MSize nn = (lj_fls(n)+8)*9 >> 6; 300 MSize nn = (lj_fls(n)+8)*9 >> 6;
325 ctx->sb.n = 5 - nn; 301 char *q = ctx->sb.b + (5 - nn);
326 bcwrite_uleb128(ctx, n); /* Fill in final size. */ 302 p = lj_strfmt_wuleb128(q, n); /* Fill in final size. */
327 lua_assert(ctx->sb.n == 5); 303 lj_assertBCW(p == ctx->sb.b + 5, "bad ULEB128 write");
328 ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf+5-nn, nn+n, ctx->wdata); 304 ctx->status = ctx->wfunc(sbufL(&ctx->sb), q, nn+n, ctx->wdata);
329 } 305 }
330} 306}
331 307
@@ -335,20 +311,21 @@ static void bcwrite_header(BCWriteCtx *ctx)
335 GCstr *chunkname = proto_chunkname(ctx->pt); 311 GCstr *chunkname = proto_chunkname(ctx->pt);
336 const char *name = strdata(chunkname); 312 const char *name = strdata(chunkname);
337 MSize len = chunkname->len; 313 MSize len = chunkname->len;
338 lj_str_resetbuf(&ctx->sb); 314 char *p = lj_buf_need(&ctx->sb, 5+5+len);
339 bcwrite_need(ctx, 5+5+len); 315 *p++ = BCDUMP_HEAD1;
340 bcwrite_byte(ctx, BCDUMP_HEAD1); 316 *p++ = BCDUMP_HEAD2;
341 bcwrite_byte(ctx, BCDUMP_HEAD2); 317 *p++ = BCDUMP_HEAD3;
342 bcwrite_byte(ctx, BCDUMP_HEAD3); 318 *p++ = BCDUMP_VERSION;
343 bcwrite_byte(ctx, BCDUMP_VERSION); 319 *p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) +
344 bcwrite_byte(ctx, (ctx->strip ? BCDUMP_F_STRIP : 0) + 320 LJ_BE*BCDUMP_F_BE +
345 (LJ_BE ? BCDUMP_F_BE : 0) + 321 ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0) +
346 ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0)); 322 LJ_FR2*BCDUMP_F_FR2;
347 if (!ctx->strip) { 323 if (!ctx->strip) {
348 bcwrite_uleb128(ctx, len); 324 p = lj_strfmt_wuleb128(p, len);
349 bcwrite_block(ctx, name, len); 325 p = lj_buf_wmem(p, name, len);
350 } 326 }
351 ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf, ctx->sb.n, ctx->wdata); 327 ctx->status = ctx->wfunc(sbufL(&ctx->sb), ctx->sb.b,
328 (MSize)(p - ctx->sb.b), ctx->wdata);
352} 329}
353 330
354/* Write footer of bytecode dump. */ 331/* Write footer of bytecode dump. */
@@ -356,7 +333,7 @@ static void bcwrite_footer(BCWriteCtx *ctx)
356{ 333{
357 if (ctx->status == 0) { 334 if (ctx->status == 0) {
358 uint8_t zero = 0; 335 uint8_t zero = 0;
359 ctx->status = ctx->wfunc(ctx->L, &zero, 1, ctx->wdata); 336 ctx->status = ctx->wfunc(sbufL(&ctx->sb), &zero, 1, ctx->wdata);
360 } 337 }
361} 338}
362 339
@@ -364,8 +341,8 @@ static void bcwrite_footer(BCWriteCtx *ctx)
364static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud) 341static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
365{ 342{
366 BCWriteCtx *ctx = (BCWriteCtx *)ud; 343 BCWriteCtx *ctx = (BCWriteCtx *)ud;
367 UNUSED(dummy); 344 UNUSED(L); UNUSED(dummy);
368 lj_str_resizebuf(L, &ctx->sb, 1024); /* Avoids resize for most prototypes. */ 345 lj_buf_need(&ctx->sb, 1024); /* Avoids resize for most prototypes. */
369 bcwrite_header(ctx); 346 bcwrite_header(ctx);
370 bcwrite_proto(ctx, ctx->pt); 347 bcwrite_proto(ctx, ctx->pt);
371 bcwrite_footer(ctx); 348 bcwrite_footer(ctx);
@@ -378,16 +355,18 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
378{ 355{
379 BCWriteCtx ctx; 356 BCWriteCtx ctx;
380 int status; 357 int status;
381 ctx.L = L;
382 ctx.pt = pt; 358 ctx.pt = pt;
383 ctx.wfunc = writer; 359 ctx.wfunc = writer;
384 ctx.wdata = data; 360 ctx.wdata = data;
385 ctx.strip = strip; 361 ctx.strip = strip;
386 ctx.status = 0; 362 ctx.status = 0;
387 lj_str_initbuf(&ctx.sb); 363#ifdef LUA_USE_ASSERT
364 ctx.g = G(L);
365#endif
366 lj_buf_init(L, &ctx.sb);
388 status = lj_vm_cpcall(L, NULL, &ctx, cpwriter); 367 status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
389 if (status == 0) status = ctx.status; 368 if (status == 0) status = ctx.status;
390 lj_str_freebuf(G(ctx.L), &ctx.sb); 369 lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb);
391 return status; 370 return status;
392} 371}
393 372
diff --git a/src/lj_buf.c b/src/lj_buf.c
new file mode 100644
index 00000000..d31bd99e
--- /dev/null
+++ b/src/lj_buf.c
@@ -0,0 +1,305 @@
1/*
2** Buffer handling.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_buf_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10#include "lj_gc.h"
11#include "lj_err.h"
12#include "lj_buf.h"
13#include "lj_str.h"
14#include "lj_tab.h"
15#include "lj_strfmt.h"
16
17/* -- Buffer management --------------------------------------------------- */
18
19static void buf_grow(SBuf *sb, MSize sz)
20{
21 MSize osz = sbufsz(sb), len = sbuflen(sb), nsz = osz;
22 char *b;
23 GCSize flag;
24 if (nsz < LJ_MIN_SBUF) nsz = LJ_MIN_SBUF;
25 while (nsz < sz) nsz += nsz;
26 flag = sbufflag(sb);
27 if ((flag & SBUF_FLAG_COW)) { /* Copy-on-write semantics. */
28 lj_assertG_(G(sbufL(sb)), sb->w == sb->e, "bad SBuf COW");
29 b = (char *)lj_mem_new(sbufL(sb), nsz);
30 setsbufflag(sb, flag & ~(GCSize)SBUF_FLAG_COW);
31 setgcrefnull(sbufX(sb)->cowref);
32 memcpy(b, sb->b, osz);
33 } else {
34 b = (char *)lj_mem_realloc(sbufL(sb), sb->b, osz, nsz);
35 }
36 if ((flag & SBUF_FLAG_EXT)) {
37 sbufX(sb)->r = sbufX(sb)->r - sb->b + b; /* Adjust read pointer, too. */
38 }
39 /* Adjust buffer pointers. */
40 sb->b = b;
41 sb->w = b + len;
42 sb->e = b + nsz;
43 if ((flag & SBUF_FLAG_BORROW)) { /* Adjust borrowed buffer pointers. */
44 SBuf *bsb = mref(sbufX(sb)->bsb, SBuf);
45 bsb->b = b;
46 bsb->w = b + len;
47 bsb->e = b + nsz;
48 }
49}
50
51LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz)
52{
53 lj_assertG_(G(sbufL(sb)), sz > sbufsz(sb), "SBuf overflow");
54 if (LJ_UNLIKELY(sz > LJ_MAX_BUF))
55 lj_err_mem(sbufL(sb));
56 buf_grow(sb, sz);
57 return sb->b;
58}
59
60LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz)
61{
62 if (sbufisext(sb)) {
63 SBufExt *sbx = (SBufExt *)sb;
64 MSize len = sbufxlen(sbx);
65 if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
66 lj_err_mem(sbufL(sbx));
67 if (len + sz > sbufsz(sbx)) { /* Must grow. */
68 buf_grow((SBuf *)sbx, len + sz);
69 } else if (sbufxslack(sbx) < (sbufsz(sbx) >> 3)) {
70 /* Also grow to avoid excessive compactions, if slack < size/8. */
71 buf_grow((SBuf *)sbx, sbuflen(sbx) + sz); /* Not sbufxlen! */
72 return sbx->w;
73 }
74 if (sbx->r != sbx->b) { /* Compact by moving down. */
75 memmove(sbx->b, sbx->r, len);
76 sbx->r = sbx->b;
77 sbx->w = sbx->b + len;
78 lj_assertG_(G(sbufL(sbx)), len + sz <= sbufsz(sbx), "bad SBuf compact");
79 }
80 } else {
81 MSize len = sbuflen(sb);
82 lj_assertG_(G(sbufL(sb)), sz > sbufleft(sb), "SBuf overflow");
83 if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
84 lj_err_mem(sbufL(sb));
85 buf_grow(sb, len + sz);
86 }
87 return sb->w;
88}
89
90void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb)
91{
92 char *b = sb->b;
93 MSize osz = (MSize)(sb->e - b);
94 if (osz > 2*LJ_MIN_SBUF) {
95 MSize n = (MSize)(sb->w - b);
96 b = lj_mem_realloc(L, b, osz, (osz >> 1));
97 sb->b = b;
98 sb->w = b + n;
99 sb->e = b + (osz >> 1);
100 }
101 lj_assertG_(G(sbufL(sb)), !sbufisext(sb), "YAGNI shrink SBufExt");
102}
103
104char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz)
105{
106 SBuf *sb = &G(L)->tmpbuf;
107 setsbufL(sb, L);
108 return lj_buf_need(sb, sz);
109}
110
111#if LJ_HASBUFFER && LJ_HASJIT
112void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *ref)
113{
114 lua_State *L = sbufL(sbx);
115 lj_bufx_free(L, sbx);
116 lj_bufx_set_cow(L, sbx, p, len);
117 setgcref(sbx->cowref, ref);
118 lj_gc_objbarrier(L, (GCudata *)sbx - 1, ref);
119}
120
121#if LJ_HASFFI
122MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz)
123{
124 lj_buf_more((SBuf *)sbx, sz);
125 return sbufleft(sbx);
126}
127#endif
128#endif
129
130/* -- Low-level buffer put operations ------------------------------------- */
131
132SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len)
133{
134 char *w = lj_buf_more(sb, len);
135 w = lj_buf_wmem(w, q, len);
136 sb->w = w;
137 return sb;
138}
139
140#if LJ_HASJIT || LJ_HASFFI
141static LJ_NOINLINE SBuf * LJ_FASTCALL lj_buf_putchar2(SBuf *sb, int c)
142{
143 char *w = lj_buf_more2(sb, 1);
144 *w++ = (char)c;
145 sb->w = w;
146 return sb;
147}
148
149SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c)
150{
151 char *w = sb->w;
152 if (LJ_LIKELY(w < sb->e)) {
153 *w++ = (char)c;
154 sb->w = w;
155 return sb;
156 }
157 return lj_buf_putchar2(sb, c);
158}
159#endif
160
161SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s)
162{
163 MSize len = s->len;
164 char *w = lj_buf_more(sb, len);
165 w = lj_buf_wmem(w, strdata(s), len);
166 sb->w = w;
167 return sb;
168}
169
170/* -- High-level buffer put operations ------------------------------------ */
171
172SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s)
173{
174 MSize len = s->len;
175 char *w = lj_buf_more(sb, len), *e = w+len;
176 const char *q = strdata(s)+len-1;
177 while (w < e)
178 *w++ = *q--;
179 sb->w = w;
180 return sb;
181}
182
183SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s)
184{
185 MSize len = s->len;
186 char *w = lj_buf_more(sb, len), *e = w+len;
187 const char *q = strdata(s);
188 for (; w < e; w++, q++) {
189 uint32_t c = *(unsigned char *)q;
190#if LJ_TARGET_PPC
191 *w = c + ((c >= 'A' && c <= 'Z') << 5);
192#else
193 if (c >= 'A' && c <= 'Z') c += 0x20;
194 *w = c;
195#endif
196 }
197 sb->w = w;
198 return sb;
199}
200
201SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s)
202{
203 MSize len = s->len;
204 char *w = lj_buf_more(sb, len), *e = w+len;
205 const char *q = strdata(s);
206 for (; w < e; w++, q++) {
207 uint32_t c = *(unsigned char *)q;
208#if LJ_TARGET_PPC
209 *w = c - ((c >= 'a' && c <= 'z') << 5);
210#else
211 if (c >= 'a' && c <= 'z') c -= 0x20;
212 *w = c;
213#endif
214 }
215 sb->w = w;
216 return sb;
217}
218
219SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep)
220{
221 MSize len = s->len;
222 if (rep > 0 && len) {
223 uint64_t tlen = (uint64_t)rep * len;
224 char *w;
225 if (LJ_UNLIKELY(tlen > LJ_MAX_STR))
226 lj_err_mem(sbufL(sb));
227 w = lj_buf_more(sb, (MSize)tlen);
228 if (len == 1) { /* Optimize a common case. */
229 uint32_t c = strdata(s)[0];
230 do { *w++ = c; } while (--rep > 0);
231 } else {
232 const char *e = strdata(s) + len;
233 do {
234 const char *q = strdata(s);
235 do { *w++ = *q++; } while (q < e);
236 } while (--rep > 0);
237 }
238 sb->w = w;
239 }
240 return sb;
241}
242
243SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e)
244{
245 MSize seplen = sep ? sep->len : 0;
246 if (i <= e) {
247 for (;;) {
248 cTValue *o = lj_tab_getint(t, i);
249 char *w;
250 if (!o) {
251 badtype: /* Error: bad element type. */
252 sb->w = (char *)(intptr_t)i; /* Store failing index. */
253 return NULL;
254 } else if (tvisstr(o)) {
255 MSize len = strV(o)->len;
256 w = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len);
257 } else if (tvisint(o)) {
258 w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o));
259 } else if (tvisnum(o)) {
260 w = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen);
261 } else {
262 goto badtype;
263 }
264 if (i++ == e) {
265 sb->w = w;
266 break;
267 }
268 if (seplen) w = lj_buf_wmem(w, strdata(sep), seplen);
269 sb->w = w;
270 }
271 }
272 return sb;
273}
274
275/* -- Miscellaneous buffer operations ------------------------------------- */
276
277GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb)
278{
279 return lj_str_new(sbufL(sb), sb->b, sbuflen(sb));
280}
281
282/* Concatenate two strings. */
283GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2)
284{
285 MSize len1 = s1->len, len2 = s2->len;
286 char *buf = lj_buf_tmp(L, len1 + len2);
287 memcpy(buf, strdata(s1), len1);
288 memcpy(buf+len1, strdata(s2), len2);
289 return lj_str_new(L, buf, len1 + len2);
290}
291
292/* Read ULEB128 from buffer. */
293uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp)
294{
295 const uint8_t *w = (const uint8_t *)*pp;
296 uint32_t v = *w++;
297 if (LJ_UNLIKELY(v >= 0x80)) {
298 int sh = 0;
299 v &= 0x7f;
300 do { v |= ((*w & 0x7f) << (sh += 7)); } while (*w++ >= 0x80);
301 }
302 *pp = (const char *)w;
303 return v;
304}
305
diff --git a/src/lj_buf.h b/src/lj_buf.h
new file mode 100644
index 00000000..4ace2685
--- /dev/null
+++ b/src/lj_buf.h
@@ -0,0 +1,197 @@
1/*
2** Buffer handling.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_BUF_H
7#define _LJ_BUF_H
8
9#include "lj_obj.h"
10#include "lj_gc.h"
11#include "lj_str.h"
12
13/* Resizable string buffers. */
14
15/* The SBuf struct definition is in lj_obj.h:
16** char *w; Write pointer.
17** char *e; End pointer.
18** char *b; Base pointer.
19** MRef L; lua_State, used for buffer resizing. Extension bits in 3 LSB.
20*/
21
22/* Extended string buffer. */
23typedef struct SBufExt {
24 SBufHeader;
25 union {
26 GCRef cowref; /* Copy-on-write object reference. */
27 MRef bsb; /* Borrowed string buffer. */
28 };
29 char *r; /* Read pointer. */
30 GCRef dict; /* Serialization string dictionary table. */
31 int depth; /* Remaining recursion depth. */
32} SBufExt;
33
34#define sbufsz(sb) ((MSize)((sb)->e - (sb)->b))
35#define sbuflen(sb) ((MSize)((sb)->w - (sb)->b))
36#define sbufleft(sb) ((MSize)((sb)->e - (sb)->w))
37#define sbufxlen(sbx) ((MSize)((sbx)->w - (sbx)->r))
38#define sbufxslack(sbx) ((MSize)((sbx)->r - (sbx)->b))
39
40#define SBUF_MASK_FLAG (7)
41#define SBUF_MASK_L (~(GCSize)SBUF_MASK_FLAG)
42#define SBUF_FLAG_EXT 1 /* Extended string buffer. */
43#define SBUF_FLAG_COW 2 /* Copy-on-write buffer. */
44#define SBUF_FLAG_BORROW 4 /* Borrowed string buffer. */
45
46#define sbufL(sb) \
47 ((lua_State *)(void *)(uintptr_t)(mrefu((sb)->L) & SBUF_MASK_L))
48#define setsbufL(sb, l) (setmref((sb)->L, (l)))
49#define setsbufXL(sb, l, flag) \
50 (setmrefu((sb)->L, (GCSize)(uintptr_t)(void *)(l) + (flag)))
51#define setsbufXL_(sb, l) \
52 (setmrefu((sb)->L, (GCSize)(uintptr_t)(void *)(l) | (mrefu((sb)->L) & SBUF_MASK_FLAG)))
53
54#define sbufflag(sb) (mrefu((sb)->L))
55#define sbufisext(sb) (sbufflag((sb)) & SBUF_FLAG_EXT)
56#define sbufiscow(sb) (sbufflag((sb)) & SBUF_FLAG_COW)
57#define sbufisborrow(sb) (sbufflag((sb)) & SBUF_FLAG_BORROW)
58#define sbufiscoworborrow(sb) (sbufflag((sb)) & (SBUF_FLAG_COW|SBUF_FLAG_BORROW))
59#define sbufX(sb) \
60 (lj_assertG_(G(sbufL(sb)), sbufisext(sb), "not an SBufExt"), (SBufExt *)(sb))
61#define setsbufflag(sb, flag) (setmrefu((sb)->L, (flag)))
62
63#define tvisbuf(o) \
64 (LJ_HASBUFFER && tvisudata(o) && udataV(o)->udtype == UDTYPE_BUFFER)
65#define bufV(o) check_exp(tvisbuf(o), ((SBufExt *)uddata(udataV(o))))
66
67/* Buffer management */
68LJ_FUNC char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz);
69LJ_FUNC char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz);
70LJ_FUNC void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb);
71LJ_FUNC char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz);
72
73static LJ_AINLINE void lj_buf_init(lua_State *L, SBuf *sb)
74{
75 setsbufL(sb, L);
76 sb->w = sb->e = sb->b = NULL;
77}
78
79static LJ_AINLINE void lj_buf_reset(SBuf *sb)
80{
81 sb->w = sb->b;
82}
83
84static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L)
85{
86 SBuf *sb = &G(L)->tmpbuf;
87 setsbufL(sb, L);
88 lj_buf_reset(sb);
89 return sb;
90}
91
92static LJ_AINLINE void lj_buf_free(global_State *g, SBuf *sb)
93{
94 lj_assertG(!sbufisext(sb), "bad free of SBufExt");
95 lj_mem_free(g, sb->b, sbufsz(sb));
96}
97
98static LJ_AINLINE char *lj_buf_need(SBuf *sb, MSize sz)
99{
100 if (LJ_UNLIKELY(sz > sbufsz(sb)))
101 return lj_buf_need2(sb, sz);
102 return sb->b;
103}
104
105static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz)
106{
107 if (LJ_UNLIKELY(sz > sbufleft(sb)))
108 return lj_buf_more2(sb, sz);
109 return sb->w;
110}
111
112/* Extended buffer management */
113static LJ_AINLINE void lj_bufx_init(lua_State *L, SBufExt *sbx)
114{
115 memset(sbx, 0, sizeof(SBufExt));
116 setsbufXL(sbx, L, SBUF_FLAG_EXT);
117}
118
119static LJ_AINLINE void lj_bufx_set_borrow(lua_State *L, SBufExt *sbx, SBuf *sb)
120{
121 setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_BORROW);
122 setmref(sbx->bsb, sb);
123 sbx->r = sbx->w = sbx->b = sb->b;
124 sbx->e = sb->e;
125}
126
127static LJ_AINLINE void lj_bufx_set_cow(lua_State *L, SBufExt *sbx,
128 const char *p, MSize len)
129{
130 setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_COW);
131 sbx->r = sbx->b = (char *)p;
132 sbx->w = sbx->e = (char *)p + len;
133}
134
135static LJ_AINLINE void lj_bufx_reset(SBufExt *sbx)
136{
137 if (sbufiscow(sbx)) {
138 setmrefu(sbx->L, (mrefu(sbx->L) & ~(GCSize)SBUF_FLAG_COW));
139 setgcrefnull(sbx->cowref);
140 sbx->b = sbx->e = NULL;
141 }
142 sbx->r = sbx->w = sbx->b;
143}
144
145static LJ_AINLINE void lj_bufx_free(lua_State *L, SBufExt *sbx)
146{
147 if (!sbufiscoworborrow(sbx)) lj_mem_free(G(L), sbx->b, sbufsz(sbx));
148 setsbufXL(sbx, L, SBUF_FLAG_EXT);
149 setgcrefnull(sbx->cowref);
150 sbx->r = sbx->w = sbx->b = sbx->e = NULL;
151}
152
153#if LJ_HASBUFFER && LJ_HASJIT
154LJ_FUNC void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *o);
155#if LJ_HASFFI
156LJ_FUNC MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz);
157#endif
158#endif
159
160/* Low-level buffer put operations */
161LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len);
162#if LJ_HASJIT || LJ_HASFFI
163LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c);
164#endif
165LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s);
166
167static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len)
168{
169 return (char *)memcpy(p, q, len) + len;
170}
171
172static LJ_AINLINE void lj_buf_putb(SBuf *sb, int c)
173{
174 char *w = lj_buf_more(sb, 1);
175 *w++ = (char)c;
176 sb->w = w;
177}
178
179/* High-level buffer put operations */
180LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s);
181LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s);
182LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s);
183LJ_FUNC SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep);
184LJ_FUNC SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep,
185 int32_t i, int32_t e);
186
187/* Miscellaneous buffer operations */
188LJ_FUNCA GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb);
189LJ_FUNC GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2);
190LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp);
191
192static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb)
193{
194 return lj_str_new(L, sb->b, sbuflen(sb));
195}
196
197#endif
diff --git a/src/lj_carith.c b/src/lj_carith.c
index 3eaa9322..dc745a37 100644
--- a/src/lj_carith.c
+++ b/src/lj_carith.c
@@ -11,10 +11,12 @@
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_tab.h" 12#include "lj_tab.h"
13#include "lj_meta.h" 13#include "lj_meta.h"
14#include "lj_ir.h"
14#include "lj_ctype.h" 15#include "lj_ctype.h"
15#include "lj_cconv.h" 16#include "lj_cconv.h"
16#include "lj_cdata.h" 17#include "lj_cdata.h"
17#include "lj_carith.h" 18#include "lj_carith.h"
19#include "lj_strscan.h"
18 20
19/* -- C data arithmetic --------------------------------------------------- */ 21/* -- C data arithmetic --------------------------------------------------- */
20 22
@@ -120,7 +122,7 @@ static int carith_ptr(lua_State *L, CTState *cts, CDArith *ca, MMS mm)
120 setboolV(L->top-1, ((uintptr_t)pp < (uintptr_t)pp2)); 122 setboolV(L->top-1, ((uintptr_t)pp < (uintptr_t)pp2));
121 return 1; 123 return 1;
122 } else { 124 } else {
123 lua_assert(mm == MM_le); 125 lj_assertL(mm == MM_le, "bad metamethod %d", mm);
124 setboolV(L->top-1, ((uintptr_t)pp <= (uintptr_t)pp2)); 126 setboolV(L->top-1, ((uintptr_t)pp <= (uintptr_t)pp2));
125 return 1; 127 return 1;
126 } 128 }
@@ -206,7 +208,9 @@ static int carith_int64(lua_State *L, CTState *cts, CDArith *ca, MMS mm)
206 *up = lj_carith_powu64(u0, u1); 208 *up = lj_carith_powu64(u0, u1);
207 break; 209 break;
208 case MM_unm: *up = (uint64_t)-(int64_t)u0; break; 210 case MM_unm: *up = (uint64_t)-(int64_t)u0; break;
209 default: lua_assert(0); break; 211 default:
212 lj_assertL(0, "bad metamethod %d", mm);
213 break;
210 } 214 }
211 lj_gc_check(L); 215 lj_gc_check(L);
212 return 1; 216 return 1;
@@ -272,6 +276,81 @@ int lj_carith_op(lua_State *L, MMS mm)
272 return lj_carith_meta(L, cts, &ca, mm); 276 return lj_carith_meta(L, cts, &ca, mm);
273} 277}
274 278
279/* -- 64 bit bit operations helpers --------------------------------------- */
280
281#if LJ_64
282#define B64DEF(name) \
283 static LJ_AINLINE uint64_t lj_carith_##name(uint64_t x, int32_t sh)
284#else
285/* Not inlined on 32 bit archs, since some of these are quite lengthy. */
286#define B64DEF(name) \
287 uint64_t LJ_NOINLINE lj_carith_##name(uint64_t x, int32_t sh)
288#endif
289
290B64DEF(shl64) { return x << (sh&63); }
291B64DEF(shr64) { return x >> (sh&63); }
292B64DEF(sar64) { return (uint64_t)((int64_t)x >> (sh&63)); }
293B64DEF(rol64) { return lj_rol(x, (sh&63)); }
294B64DEF(ror64) { return lj_ror(x, (sh&63)); }
295
296#undef B64DEF
297
298uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op)
299{
300 switch (op) {
301 case IR_BSHL-IR_BSHL: x = lj_carith_shl64(x, sh); break;
302 case IR_BSHR-IR_BSHL: x = lj_carith_shr64(x, sh); break;
303 case IR_BSAR-IR_BSHL: x = lj_carith_sar64(x, sh); break;
304 case IR_BROL-IR_BSHL: x = lj_carith_rol64(x, sh); break;
305 case IR_BROR-IR_BSHL: x = lj_carith_ror64(x, sh); break;
306 default:
307 lj_assertX(0, "bad shift op %d", op);
308 break;
309 }
310 return x;
311}
312
313/* Equivalent to lj_lib_checkbit(), but handles cdata. */
314uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id)
315{
316 TValue *o = L->base + narg-1;
317 if (o >= L->top) {
318 err:
319 lj_err_argt(L, narg, LUA_TNUMBER);
320 } else if (LJ_LIKELY(tvisnumber(o))) {
321 /* Handled below. */
322 } else if (tviscdata(o)) {
323 CTState *cts = ctype_cts(L);
324 uint8_t *sp = (uint8_t *)cdataptr(cdataV(o));
325 CTypeID sid = cdataV(o)->ctypeid;
326 CType *s = ctype_get(cts, sid);
327 uint64_t x;
328 if (ctype_isref(s->info)) {
329 sp = *(void **)sp;
330 sid = ctype_cid(s->info);
331 }
332 s = ctype_raw(cts, sid);
333 if (ctype_isenum(s->info)) s = ctype_child(cts, s);
334 if ((s->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
335 CTINFO(CT_NUM, CTF_UNSIGNED) && s->size == 8)
336 *id = CTID_UINT64; /* Use uint64_t, since it has the highest rank. */
337 else if (!*id)
338 *id = CTID_INT64; /* Use int64_t, unless already set. */
339 lj_cconv_ct_ct(cts, ctype_get(cts, *id), s,
340 (uint8_t *)&x, sp, CCF_ARG(narg));
341 return x;
342 } else if (!(tvisstr(o) && lj_strscan_number(strV(o), o))) {
343 goto err;
344 }
345 if (LJ_LIKELY(tvisint(o))) {
346 return (uint32_t)intV(o);
347 } else {
348 int32_t i = lj_num2bit(numV(o));
349 if (LJ_DUALNUM) setintV(o, i);
350 return (uint32_t)i;
351 }
352}
353
275/* -- 64 bit integer arithmetic helpers ----------------------------------- */ 354/* -- 64 bit integer arithmetic helpers ----------------------------------- */
276 355
277#if LJ_32 && LJ_HASJIT 356#if LJ_32 && LJ_HASJIT
diff --git a/src/lj_carith.h b/src/lj_carith.h
index 119291b4..2fa5c657 100644
--- a/src/lj_carith.h
+++ b/src/lj_carith.h
@@ -12,6 +12,16 @@
12 12
13LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); 13LJ_FUNC int lj_carith_op(lua_State *L, MMS mm);
14 14
15#if LJ_32
16LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh);
17LJ_FUNC uint64_t lj_carith_shr64(uint64_t x, int32_t sh);
18LJ_FUNC uint64_t lj_carith_sar64(uint64_t x, int32_t sh);
19LJ_FUNC uint64_t lj_carith_rol64(uint64_t x, int32_t sh);
20LJ_FUNC uint64_t lj_carith_ror64(uint64_t x, int32_t sh);
21#endif
22LJ_FUNC uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op);
23LJ_FUNC uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id);
24
15#if LJ_32 && LJ_HASJIT 25#if LJ_32 && LJ_HASJIT
16LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k); 26LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k);
17#endif 27#endif
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index da7fa459..3c029823 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -9,7 +9,6 @@
9 9
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h"
13#include "lj_tab.h" 12#include "lj_tab.h"
14#include "lj_ctype.h" 13#include "lj_ctype.h"
15#include "lj_cconv.h" 14#include "lj_cconv.h"
@@ -291,56 +290,85 @@
291#define CCALL_HANDLE_RET \ 290#define CCALL_HANDLE_RET \
292 if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0]; 291 if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0];
293 292
294#elif LJ_TARGET_PPC 293#elif LJ_TARGET_ARM64
295/* -- PPC calling conventions --------------------------------------------- */ 294/* -- ARM64 calling conventions ------------------------------------------- */
296 295
297#define CCALL_HANDLE_STRUCTRET \ 296#define CCALL_HANDLE_STRUCTRET \
298 cc->retref = 1; /* Return all structs by reference. */ \ 297 cc->retref = !ccall_classify_struct(cts, ctr); \
299 cc->gpr[ngpr++] = (GPRArg)dp; 298 if (cc->retref) cc->retp = dp;
299
300#define CCALL_HANDLE_STRUCTRET2 \
301 unsigned int cl = ccall_classify_struct(cts, ctr); \
302 if ((cl & 4)) { /* Combine float HFA from separate registers. */ \
303 CTSize i = (cl >> 8) - 1; \
304 do { ((uint32_t *)dp)[i] = cc->fpr[i].lo; } while (i--); \
305 } else { \
306 if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \
307 memcpy(dp, sp, ctr->size); \
308 }
300 309
301#define CCALL_HANDLE_COMPLEXRET \ 310#define CCALL_HANDLE_COMPLEXRET \
302 /* Complex values are returned in 2 or 4 GPRs. */ \ 311 /* Complex values are returned in one or two FPRs. */ \
303 cc->retref = 0; 312 cc->retref = 0;
304 313
305#define CCALL_HANDLE_COMPLEXRET2 \ 314#define CCALL_HANDLE_COMPLEXRET2 \
306 memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */ 315 if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
316 ((float *)dp)[0] = cc->fpr[0].f; \
317 ((float *)dp)[1] = cc->fpr[1].f; \
318 } else { /* Copy complex double from FPRs. */ \
319 ((double *)dp)[0] = cc->fpr[0].d; \
320 ((double *)dp)[1] = cc->fpr[1].d; \
321 }
307 322
308#define CCALL_HANDLE_STRUCTARG \ 323#define CCALL_HANDLE_STRUCTARG \
309 rp = cdataptr(lj_cdata_new(cts, did, sz)); \ 324 unsigned int cl = ccall_classify_struct(cts, d); \
310 sz = CTSIZE_PTR; /* Pass all structs by reference. */ 325 if (cl == 0) { /* Pass struct by reference. */ \
326 rp = cdataptr(lj_cdata_new(cts, did, sz)); \
327 sz = CTSIZE_PTR; \
328 } else if (cl > 1) { /* Pass struct in FPRs or on stack. */ \
329 isfp = (cl & 4) ? 2 : 1; \
330 } /* else: Pass struct in GPRs or on stack. */
311 331
312#define CCALL_HANDLE_COMPLEXARG \ 332#define CCALL_HANDLE_COMPLEXARG \
313 /* Pass complex by value in 2 or 4 GPRs. */ 333 /* Pass complex by value in separate (!) FPRs or on stack. */ \
334 isfp = sz == 2*sizeof(float) ? 2 : 1;
314 335
315#define CCALL_HANDLE_REGARG \ 336#define CCALL_HANDLE_REGARG \
316 if (isfp) { /* Try to pass argument in FPRs. */ \ 337 if (LJ_TARGET_OSX && isva) { \
317 if (nfpr + 1 <= CCALL_NARG_FPR) { \ 338 /* IOS: All variadic arguments are on the stack. */ \
339 } else if (isfp) { /* Try to pass argument in FPRs. */ \
340 int n2 = ctype_isvector(d->info) ? 1 : \
341 isfp == 1 ? n : (d->size >> (4-isfp)); \
342 if (nfpr + n2 <= CCALL_NARG_FPR) { \
318 dp = &cc->fpr[nfpr]; \ 343 dp = &cc->fpr[nfpr]; \
319 nfpr += 1; \ 344 nfpr += n2; \
320 d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
321 goto done; \ 345 goto done; \
346 } else { \
347 nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \
348 if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \
322 } \ 349 } \
323 } else { /* Try to pass argument in GPRs. */ \ 350 } else { /* Try to pass argument in GPRs. */ \
324 if (n > 1) { \ 351 if (!LJ_TARGET_OSX && (d->info & CTF_ALIGN) > CTALIGN_PTR) \
325 lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \ 352 ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
326 if (ctype_isinteger(d->info)) \
327 ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
328 else if (ngpr + n > maxgpr) \
329 ngpr = maxgpr; /* Prevent reordering. */ \
330 } \
331 if (ngpr + n <= maxgpr) { \ 353 if (ngpr + n <= maxgpr) { \
332 dp = &cc->gpr[ngpr]; \ 354 dp = &cc->gpr[ngpr]; \
333 ngpr += n; \ 355 ngpr += n; \
334 goto done; \ 356 goto done; \
357 } else { \
358 ngpr = maxgpr; /* Prevent reordering. */ \
359 if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \
335 } \ 360 } \
336 } 361 }
337 362
363#if LJ_BE
338#define CCALL_HANDLE_RET \ 364#define CCALL_HANDLE_RET \
339 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ 365 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
340 ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ 366 sp = (uint8_t *)&cc->fpr[0].f;
367#endif
341 368
342#elif LJ_TARGET_PPCSPE 369
343/* -- PPC/SPE calling conventions ----------------------------------------- */ 370#elif LJ_TARGET_PPC
371/* -- PPC calling conventions --------------------------------------------- */
344 372
345#define CCALL_HANDLE_STRUCTRET \ 373#define CCALL_HANDLE_STRUCTRET \
346 cc->retref = 1; /* Return all structs by reference. */ \ 374 cc->retref = 1; /* Return all structs by reference. */ \
@@ -360,12 +388,13 @@
360#define CCALL_HANDLE_COMPLEXARG \ 388#define CCALL_HANDLE_COMPLEXARG \
361 /* Pass complex by value in 2 or 4 GPRs. */ 389 /* Pass complex by value in 2 or 4 GPRs. */
362 390
363/* PPC/SPE has a softfp ABI. */ 391#define CCALL_HANDLE_GPR \
364#define CCALL_HANDLE_REGARG \ 392 /* Try to pass argument in GPRs. */ \
365 if (n > 1) { /* Doesn't fit in a single GPR? */ \ 393 if (n > 1) { \
366 lua_assert(n == 2 || n == 4); /* int64_t, double or complex (float). */ \ 394 /* int64_t or complex (float). */ \
367 if (n == 2) \ 395 lj_assertL(n == 2 || n == 4, "bad GPR size %d", n); \
368 ngpr = (ngpr + 1u) & ~1u; /* Only align 64 bit value to regpair. */ \ 396 if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \
397 ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
369 else if (ngpr + n > maxgpr) \ 398 else if (ngpr + n > maxgpr) \
370 ngpr = maxgpr; /* Prevent reordering. */ \ 399 ngpr = maxgpr; /* Prevent reordering. */ \
371 } \ 400 } \
@@ -373,10 +402,32 @@
373 dp = &cc->gpr[ngpr]; \ 402 dp = &cc->gpr[ngpr]; \
374 ngpr += n; \ 403 ngpr += n; \
375 goto done; \ 404 goto done; \
405 } \
406
407#if LJ_ABI_SOFTFP
408#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR
409#else
410#define CCALL_HANDLE_REGARG \
411 if (isfp) { /* Try to pass argument in FPRs. */ \
412 if (nfpr + 1 <= CCALL_NARG_FPR) { \
413 dp = &cc->fpr[nfpr]; \
414 nfpr += 1; \
415 d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
416 goto done; \
417 } \
418 } else { \
419 CCALL_HANDLE_GPR \
376 } 420 }
421#endif
377 422
378#elif LJ_TARGET_MIPS 423#if !LJ_ABI_SOFTFP
379/* -- MIPS calling conventions -------------------------------------------- */ 424#define CCALL_HANDLE_RET \
425 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
426 ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
427#endif
428
429#elif LJ_TARGET_MIPS32
430/* -- MIPS o32 calling conventions ---------------------------------------- */
380 431
381#define CCALL_HANDLE_STRUCTRET \ 432#define CCALL_HANDLE_STRUCTRET \
382 cc->retref = 1; /* Return all structs by reference. */ \ 433 cc->retref = 1; /* Return all structs by reference. */ \
@@ -386,6 +437,18 @@
386 /* Complex values are returned in 1 or 2 FPRs. */ \ 437 /* Complex values are returned in 1 or 2 FPRs. */ \
387 cc->retref = 0; 438 cc->retref = 0;
388 439
440#if LJ_ABI_SOFTFP
441#define CCALL_HANDLE_COMPLEXRET2 \
442 if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \
443 ((intptr_t *)dp)[0] = cc->gpr[0]; \
444 ((intptr_t *)dp)[1] = cc->gpr[1]; \
445 } else { /* Copy complex double from GPRs. */ \
446 ((intptr_t *)dp)[0] = cc->gpr[0]; \
447 ((intptr_t *)dp)[1] = cc->gpr[1]; \
448 ((intptr_t *)dp)[2] = cc->gpr[2]; \
449 ((intptr_t *)dp)[3] = cc->gpr[3]; \
450 }
451#else
389#define CCALL_HANDLE_COMPLEXRET2 \ 452#define CCALL_HANDLE_COMPLEXRET2 \
390 if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ 453 if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
391 ((float *)dp)[0] = cc->fpr[0].f; \ 454 ((float *)dp)[0] = cc->fpr[0].f; \
@@ -394,6 +457,7 @@
394 ((double *)dp)[0] = cc->fpr[0].d; \ 457 ((double *)dp)[0] = cc->fpr[0].d; \
395 ((double *)dp)[1] = cc->fpr[1].d; \ 458 ((double *)dp)[1] = cc->fpr[1].d; \
396 } 459 }
460#endif
397 461
398#define CCALL_HANDLE_STRUCTARG \ 462#define CCALL_HANDLE_STRUCTARG \
399 /* Pass all structs by value in registers and/or on the stack. */ 463 /* Pass all structs by value in registers and/or on the stack. */
@@ -401,6 +465,22 @@
401#define CCALL_HANDLE_COMPLEXARG \ 465#define CCALL_HANDLE_COMPLEXARG \
402 /* Pass complex by value in 2 or 4 GPRs. */ 466 /* Pass complex by value in 2 or 4 GPRs. */
403 467
468#define CCALL_HANDLE_GPR \
469 if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \
470 ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
471 if (ngpr < maxgpr) { \
472 dp = &cc->gpr[ngpr]; \
473 if (ngpr + n > maxgpr) { \
474 nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
475 if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
476 ngpr = maxgpr; \
477 } else { \
478 ngpr += n; \
479 } \
480 goto done; \
481 }
482
483#if !LJ_ABI_SOFTFP /* MIPS32 hard-float */
404#define CCALL_HANDLE_REGARG \ 484#define CCALL_HANDLE_REGARG \
405 if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \ 485 if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \
406 /* Try to pass argument in FPRs. */ \ 486 /* Try to pass argument in FPRs. */ \
@@ -409,25 +489,91 @@
409 goto done; \ 489 goto done; \
410 } else { /* Try to pass argument in GPRs. */ \ 490 } else { /* Try to pass argument in GPRs. */ \
411 nfpr = CCALL_NARG_FPR; \ 491 nfpr = CCALL_NARG_FPR; \
412 if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \ 492 CCALL_HANDLE_GPR \
413 ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ 493 }
414 if (ngpr < maxgpr) { \ 494#else /* MIPS32 soft-float */
415 dp = &cc->gpr[ngpr]; \ 495#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR
416 if (ngpr + n > maxgpr) { \ 496#endif
417 nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ 497
418 if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ 498#if !LJ_ABI_SOFTFP
419 ngpr = maxgpr; \ 499/* On MIPS64 soft-float, position of float return values is endian-dependant. */
420 } else { \ 500#define CCALL_HANDLE_RET \
421 ngpr += n; \ 501 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
422 } \ 502 sp = (uint8_t *)&cc->fpr[0].f;
423 goto done; \ 503#endif
424 } \ 504
505#elif LJ_TARGET_MIPS64
506/* -- MIPS n64 calling conventions ---------------------------------------- */
507
508#define CCALL_HANDLE_STRUCTRET \
509 cc->retref = !(sz <= 16); \
510 if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
511
512#define CCALL_HANDLE_STRUCTRET2 \
513 ccall_copy_struct(cc, ctr, dp, sp, ccall_classify_struct(cts, ctr, ct));
514
515#define CCALL_HANDLE_COMPLEXRET \
516 /* Complex values are returned in 1 or 2 FPRs. */ \
517 cc->retref = 0;
518
519#if LJ_ABI_SOFTFP /* MIPS64 soft-float */
520
521#define CCALL_HANDLE_COMPLEXRET2 \
522 if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \
523 ((intptr_t *)dp)[0] = cc->gpr[0]; \
524 } else { /* Copy complex double from GPRs. */ \
525 ((intptr_t *)dp)[0] = cc->gpr[0]; \
526 ((intptr_t *)dp)[1] = cc->gpr[1]; \
527 }
528
529#define CCALL_HANDLE_COMPLEXARG \
530 /* Pass complex by value in 2 or 4 GPRs. */
531
532/* Position of soft-float 'float' return value depends on endianess. */
533#define CCALL_HANDLE_RET \
534 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
535 sp = (uint8_t *)cc->gpr + LJ_ENDIAN_SELECT(0, 4);
536
537#else /* MIPS64 hard-float */
538
539#define CCALL_HANDLE_COMPLEXRET2 \
540 if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
541 ((float *)dp)[0] = cc->fpr[0].f; \
542 ((float *)dp)[1] = cc->fpr[1].f; \
543 } else { /* Copy complex double from FPRs. */ \
544 ((double *)dp)[0] = cc->fpr[0].d; \
545 ((double *)dp)[1] = cc->fpr[1].d; \
546 }
547
548#define CCALL_HANDLE_COMPLEXARG \
549 if (sz == 2*sizeof(float)) { \
550 isfp = 2; \
551 if (ngpr < maxgpr) \
552 sz *= 2; \
425 } 553 }
426 554
427#define CCALL_HANDLE_RET \ 555#define CCALL_HANDLE_RET \
428 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ 556 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
429 sp = (uint8_t *)&cc->fpr[0].f; 557 sp = (uint8_t *)&cc->fpr[0].f;
430 558
559#endif
560
561#define CCALL_HANDLE_STRUCTARG \
562 /* Pass all structs by value in registers and/or on the stack. */
563
564#define CCALL_HANDLE_REGARG \
565 if (ngpr < maxgpr) { \
566 dp = &cc->gpr[ngpr]; \
567 if (ngpr + n > maxgpr) { \
568 nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
569 if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
570 ngpr = maxgpr; \
571 } else { \
572 ngpr += n; \
573 } \
574 goto done; \
575 }
576
431#else 577#else
432#error "Missing calling convention definitions for this architecture" 578#error "Missing calling convention definitions for this architecture"
433#endif 579#endif
@@ -497,7 +643,8 @@ static void ccall_classify_ct(CTState *cts, CType *ct, int *rcl, CTSize ofs)
497 ccall_classify_struct(cts, ct, rcl, ofs); 643 ccall_classify_struct(cts, ct, rcl, ofs);
498 } else { 644 } else {
499 int cl = ctype_isfp(ct->info) ? CCALL_RCL_SSE : CCALL_RCL_INT; 645 int cl = ctype_isfp(ct->info) ? CCALL_RCL_SSE : CCALL_RCL_INT;
500 lua_assert(ctype_hassize(ct->info)); 646 lj_assertCTS(ctype_hassize(ct->info),
647 "classify ctype %08x without size", ct->info);
501 if ((ofs & (ct->size-1))) cl = CCALL_RCL_MEM; /* Unaligned. */ 648 if ((ofs & (ct->size-1))) cl = CCALL_RCL_MEM; /* Unaligned. */
502 rcl[(ofs >= 8)] |= cl; 649 rcl[(ofs >= 8)] |= cl;
503 } 650 }
@@ -522,12 +669,13 @@ static int ccall_classify_struct(CTState *cts, CType *ct, int *rcl, CTSize ofs)
522} 669}
523 670
524/* Try to split up a small struct into registers. */ 671/* Try to split up a small struct into registers. */
525static int ccall_struct_reg(CCallState *cc, GPRArg *dp, int *rcl) 672static int ccall_struct_reg(CCallState *cc, CTState *cts, GPRArg *dp, int *rcl)
526{ 673{
527 MSize ngpr = cc->ngpr, nfpr = cc->nfpr; 674 MSize ngpr = cc->ngpr, nfpr = cc->nfpr;
528 uint32_t i; 675 uint32_t i;
676 UNUSED(cts);
529 for (i = 0; i < 2; i++) { 677 for (i = 0; i < 2; i++) {
530 lua_assert(!(rcl[i] & CCALL_RCL_MEM)); 678 lj_assertCTS(!(rcl[i] & CCALL_RCL_MEM), "pass mem struct in reg");
531 if ((rcl[i] & CCALL_RCL_INT)) { /* Integer class takes precedence. */ 679 if ((rcl[i] & CCALL_RCL_INT)) { /* Integer class takes precedence. */
532 if (ngpr >= CCALL_NARG_GPR) return 1; /* Register overflow. */ 680 if (ngpr >= CCALL_NARG_GPR) return 1; /* Register overflow. */
533 cc->gpr[ngpr++] = dp[i]; 681 cc->gpr[ngpr++] = dp[i];
@@ -548,7 +696,8 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl,
548 dp[0] = dp[1] = 0; 696 dp[0] = dp[1] = 0;
549 /* Convert to temp. struct. */ 697 /* Convert to temp. struct. */
550 lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); 698 lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
551 if (ccall_struct_reg(cc, dp, rcl)) { /* Register overflow? Pass on stack. */ 699 if (ccall_struct_reg(cc, cts, dp, rcl)) {
700 /* Register overflow? Pass on stack. */
552 MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1; 701 MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1;
553 if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */ 702 if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */
554 cc->nsp = nsp + n; 703 cc->nsp = nsp + n;
@@ -621,6 +770,125 @@ noth: /* Not a homogeneous float/double aggregate. */
621 770
622#endif 771#endif
623 772
773/* -- ARM64 ABI struct classification ------------------------------------- */
774
775#if LJ_TARGET_ARM64
776
777/* Classify a struct based on its fields. */
778static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
779{
780 CTSize sz = ct->size;
781 unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
782 while (ct->sib) {
783 CType *sct;
784 ct = ctype_get(cts, ct->sib);
785 if (ctype_isfield(ct->info)) {
786 sct = ctype_rawchild(cts, ct);
787 if (ctype_isfp(sct->info)) {
788 r |= sct->size;
789 if (!isu) n++; else if (n == 0) n = 1;
790 } else if (ctype_iscomplex(sct->info)) {
791 r |= (sct->size >> 1);
792 if (!isu) n += 2; else if (n < 2) n = 2;
793 } else if (ctype_isstruct(sct->info)) {
794 goto substruct;
795 } else {
796 goto noth;
797 }
798 } else if (ctype_isbitfield(ct->info)) {
799 goto noth;
800 } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
801 sct = ctype_rawchild(cts, ct);
802 substruct:
803 if (sct->size > 0) {
804 unsigned int s = ccall_classify_struct(cts, sct);
805 if (s <= 1) goto noth;
806 r |= (s & 255);
807 if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
808 }
809 }
810 }
811 if ((r == 4 || r == 8) && n <= 4)
812 return r + (n << 8);
813noth: /* Not a homogeneous float/double aggregate. */
814 return (sz <= 16); /* Return structs of size <= 16 in GPRs. */
815}
816
817#endif
818
819/* -- MIPS64 ABI struct classification ---------------------------- */
820
821#if LJ_TARGET_MIPS64
822
823#define FTYPE_FLOAT 1
824#define FTYPE_DOUBLE 2
825
826/* Classify FP fields (max. 2) and their types. */
827static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf)
828{
829 int n = 0, ft = 0;
830 if ((ctf->info & CTF_VARARG) || (ct->info & CTF_UNION))
831 goto noth;
832 while (ct->sib) {
833 CType *sct;
834 ct = ctype_get(cts, ct->sib);
835 if (n == 2) {
836 goto noth;
837 } else if (ctype_isfield(ct->info)) {
838 sct = ctype_rawchild(cts, ct);
839 if (ctype_isfp(sct->info)) {
840 ft |= (sct->size == 4 ? FTYPE_FLOAT : FTYPE_DOUBLE) << 2*n;
841 n++;
842 } else {
843 goto noth;
844 }
845 } else if (ctype_isbitfield(ct->info) ||
846 ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
847 goto noth;
848 }
849 }
850 if (n <= 2)
851 return ft;
852noth: /* Not a homogeneous float/double aggregate. */
853 return 0; /* Struct is in GPRs. */
854}
855
856static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp,
857 int ft)
858{
859 if (LJ_ABI_SOFTFP ? ft :
860 ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) {
861 int i, ofs = 0;
862 for (i = 0; ft != 0; i++, ft >>= 2) {
863 if ((ft & 3) == FTYPE_FLOAT) {
864#if LJ_ABI_SOFTFP
865 /* The 2nd FP struct result is in CARG1 (gpr[2]) and not CRET2. */
866 memcpy((uint8_t *)dp + ofs,
867 (uint8_t *)&cc->gpr[2*i] + LJ_ENDIAN_SELECT(0, 4), 4);
868#else
869 *(float *)((uint8_t *)dp + ofs) = cc->fpr[i].f;
870#endif
871 ofs += 4;
872 } else {
873 ofs = (ofs + 7) & ~7; /* 64 bit alignment. */
874#if LJ_ABI_SOFTFP
875 *(intptr_t *)((uint8_t *)dp + ofs) = cc->gpr[2*i];
876#else
877 *(double *)((uint8_t *)dp + ofs) = cc->fpr[i].d;
878#endif
879 ofs += 8;
880 }
881 }
882 } else {
883#if !LJ_ABI_SOFTFP
884 if (ft) sp = (uint8_t *)&cc->fpr[0];
885#endif
886 memcpy(dp, sp, ctr->size);
887 }
888}
889
890#endif
891
624/* -- Common C call handling ---------------------------------------------- */ 892/* -- Common C call handling ---------------------------------------------- */
625 893
626/* Infer the destination CTypeID for a vararg argument. */ 894/* Infer the destination CTypeID for a vararg argument. */
@@ -726,7 +994,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
726 if (fid) { /* Get argument type from field. */ 994 if (fid) { /* Get argument type from field. */
727 CType *ctf = ctype_get(cts, fid); 995 CType *ctf = ctype_get(cts, fid);
728 fid = ctf->sib; 996 fid = ctf->sib;
729 lua_assert(ctype_isfield(ctf->info)); 997 lj_assertL(ctype_isfield(ctf->info), "field expected");
730 did = ctype_cid(ctf->info); 998 did = ctype_cid(ctf->info);
731 } else { 999 } else {
732 if (!(ct->info & CTF_VARARG)) 1000 if (!(ct->info & CTF_VARARG))
@@ -788,6 +1056,19 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
788 *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp : 1056 *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp :
789 (int32_t)*(int16_t *)dp; 1057 (int32_t)*(int16_t *)dp;
790 } 1058 }
1059#if LJ_TARGET_ARM64 && LJ_BE
1060 if (isfp && d->size == sizeof(float))
1061 ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */
1062#endif
1063#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
1064 if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
1065#if LJ_TARGET_MIPS64
1066 || (isfp && nsp == 0)
1067#endif
1068 ) && d->size <= 4) {
1069 *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */
1070 }
1071#endif
791#if LJ_TARGET_X64 && LJ_ABI_WIN 1072#if LJ_TARGET_X64 && LJ_ABI_WIN
792 if (isva) { /* Windows/x64 mirrors varargs in both register sets. */ 1073 if (isva) { /* Windows/x64 mirrors varargs in both register sets. */
793 if (nfpr == ngpr) 1074 if (nfpr == ngpr)
@@ -803,13 +1084,19 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
803 cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */ 1084 cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */
804 cc->fpr[nfpr-2].d[1] = 0; 1085 cc->fpr[nfpr-2].d[1] = 0;
805 } 1086 }
1087#elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP)
1088 if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) {
1089 /* Split float HFA or complex float into separate registers. */
1090 CTSize i = (sz >> 2) - 1;
1091 do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--);
1092 }
806#else 1093#else
807 UNUSED(isfp); 1094 UNUSED(isfp);
808#endif 1095#endif
809 } 1096 }
810 if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ 1097 if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
811 1098
812#if LJ_TARGET_X64 || LJ_TARGET_PPC 1099#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
813 cc->nfpr = nfpr; /* Required for vararg functions. */ 1100 cc->nfpr = nfpr; /* Required for vararg functions. */
814#endif 1101#endif
815 cc->nsp = nsp; 1102 cc->nsp = nsp;
@@ -844,7 +1131,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct,
844 CCALL_HANDLE_COMPLEXRET2 1131 CCALL_HANDLE_COMPLEXRET2
845 return 1; /* One GC step. */ 1132 return 1; /* One GC step. */
846 } 1133 }
847 if (LJ_BE && ctype_isinteger_or_bool(ctr->info) && ctr->size < CTSIZE_PTR) 1134 if (LJ_BE && ctr->size < CTSIZE_PTR &&
1135 (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info)))
848 sp += (CTSIZE_PTR - ctr->size); 1136 sp += (CTSIZE_PTR - ctr->size);
849#if CCALL_NUM_FPR 1137#if CCALL_NUM_FPR
850 if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info)) 1138 if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info))
@@ -854,7 +1142,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct,
854 CCALL_HANDLE_RET 1142 CCALL_HANDLE_RET
855#endif 1143#endif
856 /* No reference types end up here, so there's no need for the CTypeID. */ 1144 /* No reference types end up here, so there's no need for the CTypeID. */
857 lua_assert(!(ctype_isrefarray(ctr->info) || ctype_isstruct(ctr->info))); 1145 lj_assertL(!(ctype_isrefarray(ctr->info) || ctype_isstruct(ctr->info)),
1146 "unexpected reference ctype");
858 return lj_cconv_tv_ct(cts, ctr, 0, L->top-1, sp); 1147 return lj_cconv_tv_ct(cts, ctr, 0, L->top-1, sp);
859} 1148}
860 1149
@@ -878,7 +1167,7 @@ int lj_ccall_func(lua_State *L, GCcdata *cd)
878 lj_vm_ffi_call(&cc); 1167 lj_vm_ffi_call(&cc);
879 if (cts->cb.slot != ~0u) { /* Blacklist function that called a callback. */ 1168 if (cts->cb.slot != ~0u) { /* Blacklist function that called a callback. */
880 TValue tv; 1169 TValue tv;
881 setlightudV(&tv, (void *)cc.func); 1170 tv.u64 = ((uintptr_t)(void *)cc.func >> 2) | U64x(800000000, 00000000);
882 setboolV(lj_tab_set(L, cts->miscmap, &tv), 1); 1171 setboolV(lj_tab_set(L, cts->miscmap, &tv), 1);
883 } 1172 }
884 ct = (CType *)((intptr_t)ct+(intptr_t)cts->tab); /* May be reallocated. */ 1173 ct = (CType *)((intptr_t)ct+(intptr_t)cts->tab); /* May be reallocated. */
diff --git a/src/lj_ccall.h b/src/lj_ccall.h
index 385a5190..aae5777b 100644
--- a/src/lj_ccall.h
+++ b/src/lj_ccall.h
@@ -68,35 +68,56 @@ typedef union FPRArg {
68 float f[2]; 68 float f[2];
69} FPRArg; 69} FPRArg;
70 70
71#elif LJ_TARGET_PPC 71#elif LJ_TARGET_ARM64
72 72
73#define CCALL_NARG_GPR 8 73#define CCALL_NARG_GPR 8
74#define CCALL_NRET_GPR 2
74#define CCALL_NARG_FPR 8 75#define CCALL_NARG_FPR 8
76#define CCALL_NRET_FPR 4
77#define CCALL_SPS_FREE 0
78
79typedef intptr_t GPRArg;
80typedef union FPRArg {
81 double d;
82 struct { LJ_ENDIAN_LOHI(float f; , float g;) };
83 struct { LJ_ENDIAN_LOHI(uint32_t lo; , uint32_t hi;) };
84} FPRArg;
85
86#elif LJ_TARGET_PPC
87
88#define CCALL_NARG_GPR 8
89#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8)
75#define CCALL_NRET_GPR 4 /* For complex double. */ 90#define CCALL_NRET_GPR 4 /* For complex double. */
76#define CCALL_NRET_FPR 1 91#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1)
77#define CCALL_SPS_EXTRA 4 92#define CCALL_SPS_EXTRA 4
78#define CCALL_SPS_FREE 0 93#define CCALL_SPS_FREE 0
79 94
80typedef intptr_t GPRArg; 95typedef intptr_t GPRArg;
81typedef double FPRArg; 96typedef double FPRArg;
82 97
83#elif LJ_TARGET_PPCSPE 98#elif LJ_TARGET_MIPS32
84 99
85#define CCALL_NARG_GPR 8 100#define CCALL_NARG_GPR 4
86#define CCALL_NARG_FPR 0 101#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 2)
87#define CCALL_NRET_GPR 4 /* For softfp complex double. */ 102#define CCALL_NRET_GPR (LJ_ABI_SOFTFP ? 4 : 2)
88#define CCALL_NRET_FPR 0 103#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2)
89#define CCALL_SPS_FREE 0 /* NYI */ 104#define CCALL_SPS_EXTRA 7
105#define CCALL_SPS_FREE 1
90 106
91typedef intptr_t GPRArg; 107typedef intptr_t GPRArg;
108typedef union FPRArg {
109 double d;
110 struct { LJ_ENDIAN_LOHI(float f; , float g;) };
111} FPRArg;
92 112
93#elif LJ_TARGET_MIPS 113#elif LJ_TARGET_MIPS64
94 114
95#define CCALL_NARG_GPR 4 115/* FP args are positional and overlay the GPR array. */
96#define CCALL_NARG_FPR 2 116#define CCALL_NARG_GPR 8
117#define CCALL_NARG_FPR 0
97#define CCALL_NRET_GPR 2 118#define CCALL_NRET_GPR 2
98#define CCALL_NRET_FPR 2 119#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2)
99#define CCALL_SPS_EXTRA 7 120#define CCALL_SPS_EXTRA 3
100#define CCALL_SPS_FREE 1 121#define CCALL_SPS_FREE 1
101 122
102typedef intptr_t GPRArg; 123typedef intptr_t GPRArg;
@@ -145,6 +166,8 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
145 uint8_t nfpr; /* Number of arguments in FPRs. */ 166 uint8_t nfpr; /* Number of arguments in FPRs. */
146#elif LJ_TARGET_X86 167#elif LJ_TARGET_X86
147 uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ 168 uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */
169#elif LJ_TARGET_ARM64
170 void *retp; /* Aggregate return pointer in x8. */
148#elif LJ_TARGET_PPC 171#elif LJ_TARGET_PPC
149 uint8_t nfpr; /* Number of arguments in FPRs. */ 172 uint8_t nfpr; /* Number of arguments in FPRs. */
150#endif 173#endif
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index 3514fc22..80d738c6 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -27,7 +27,7 @@
27 27
28#if LJ_OS_NOJIT 28#if LJ_OS_NOJIT
29 29
30/* Disabled callback support. */ 30/* Callbacks disabled. */
31#define CALLBACK_SLOT2OFS(slot) (0*(slot)) 31#define CALLBACK_SLOT2OFS(slot) (0*(slot))
32#define CALLBACK_OFS2SLOT(ofs) (0*(ofs)) 32#define CALLBACK_OFS2SLOT(ofs) (0*(ofs))
33#define CALLBACK_MAX_SLOT 0 33#define CALLBACK_MAX_SLOT 0
@@ -35,7 +35,7 @@
35#elif LJ_TARGET_X86ORX64 35#elif LJ_TARGET_X86ORX64
36 36
37#define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0) 37#define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0)
38#define CALLBACK_MCODE_GROUP (-2+1+2+5+(LJ_64 ? 6 : 5)) 38#define CALLBACK_MCODE_GROUP (-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5))
39 39
40#define CALLBACK_SLOT2OFS(slot) \ 40#define CALLBACK_SLOT2OFS(slot) \
41 (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot)) 41 (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot))
@@ -54,23 +54,22 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
54#elif LJ_TARGET_ARM 54#elif LJ_TARGET_ARM
55 55
56#define CALLBACK_MCODE_HEAD 32 56#define CALLBACK_MCODE_HEAD 32
57#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) 57
58#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) 58#elif LJ_TARGET_ARM64
59#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) 59
60#define CALLBACK_MCODE_HEAD 32
60 61
61#elif LJ_TARGET_PPC 62#elif LJ_TARGET_PPC
62 63
63#define CALLBACK_MCODE_HEAD 24 64#define CALLBACK_MCODE_HEAD 24
64#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
65#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
66#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
67 65
68#elif LJ_TARGET_MIPS 66#elif LJ_TARGET_MIPS32
69 67
70#define CALLBACK_MCODE_HEAD 24 68#define CALLBACK_MCODE_HEAD 20
71#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) 69
72#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) 70#elif LJ_TARGET_MIPS64
73#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) 71
72#define CALLBACK_MCODE_HEAD 52
74 73
75#else 74#else
76 75
@@ -81,6 +80,12 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
81 80
82#endif 81#endif
83 82
83#ifndef CALLBACK_SLOT2OFS
84#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
85#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
86#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
87#endif
88
84/* Convert callback slot number to callback function pointer. */ 89/* Convert callback slot number to callback function pointer. */
85static void *callback_slot2ptr(CTState *cts, MSize slot) 90static void *callback_slot2ptr(CTState *cts, MSize slot)
86{ 91{
@@ -102,9 +107,9 @@ MSize lj_ccallback_ptr2slot(CTState *cts, void *p)
102/* Initialize machine code for callback function pointers. */ 107/* Initialize machine code for callback function pointers. */
103#if LJ_OS_NOJIT 108#if LJ_OS_NOJIT
104/* Disabled callback support. */ 109/* Disabled callback support. */
105#define callback_mcode_init(g, p) UNUSED(p) 110#define callback_mcode_init(g, p) (p)
106#elif LJ_TARGET_X86ORX64 111#elif LJ_TARGET_X86ORX64
107static void callback_mcode_init(global_State *g, uint8_t *page) 112static void *callback_mcode_init(global_State *g, uint8_t *page)
108{ 113{
109 uint8_t *p = page; 114 uint8_t *p = page;
110 uint8_t *target = (uint8_t *)(void *)lj_vm_ffi_callback; 115 uint8_t *target = (uint8_t *)(void *)lj_vm_ffi_callback;
@@ -119,8 +124,13 @@ static void callback_mcode_init(global_State *g, uint8_t *page)
119 /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */ 124 /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */
120 *p++ = XI_PUSH + RID_EBP; 125 *p++ = XI_PUSH + RID_EBP;
121 *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8); 126 *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8);
127#if LJ_GC64
128 *p++ = 0x48; *p++ = XI_MOVri | RID_EBP;
129 *(uint64_t *)p = (uint64_t)(g); p += 8;
130#else
122 *p++ = XI_MOVri | RID_EBP; 131 *p++ = XI_MOVri | RID_EBP;
123 *(int32_t *)p = i32ptr(g); p += 4; 132 *(int32_t *)p = i32ptr(g); p += 4;
133#endif
124#if LJ_64 134#if LJ_64
125 /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */ 135 /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */
126 *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; 136 *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP;
@@ -133,10 +143,10 @@ static void callback_mcode_init(global_State *g, uint8_t *page)
133 *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2); 143 *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2);
134 } 144 }
135 } 145 }
136 lua_assert(p - page <= CALLBACK_MCODE_SIZE); 146 return p;
137} 147}
138#elif LJ_TARGET_ARM 148#elif LJ_TARGET_ARM
139static void callback_mcode_init(global_State *g, uint32_t *page) 149static void *callback_mcode_init(global_State *g, uint32_t *page)
140{ 150{
141 uint32_t *p = page; 151 uint32_t *p = page;
142 void *target = (void *)lj_vm_ffi_callback; 152 void *target = (void *)lj_vm_ffi_callback;
@@ -155,10 +165,30 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
155 *p = ARMI_B | ((page-p-2) & 0x00ffffffu); 165 *p = ARMI_B | ((page-p-2) & 0x00ffffffu);
156 p++; 166 p++;
157 } 167 }
158 lua_assert(p - page <= CALLBACK_MCODE_SIZE); 168 return p;
169}
170#elif LJ_TARGET_ARM64
171static void *callback_mcode_init(global_State *g, uint32_t *page)
172{
173 uint32_t *p = page;
174 void *target = (void *)lj_vm_ffi_callback;
175 MSize slot;
176 *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4));
177 *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5));
178 *p++ = A64I_LE(A64I_BR | A64F_N(RID_X11));
179 *p++ = A64I_LE(A64I_NOP);
180 ((void **)p)[0] = target;
181 ((void **)p)[1] = g;
182 p += 4;
183 for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
184 *p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot));
185 *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu));
186 p++;
187 }
188 return p;
159} 189}
160#elif LJ_TARGET_PPC 190#elif LJ_TARGET_PPC
161static void callback_mcode_init(global_State *g, uint32_t *page) 191static void *callback_mcode_init(global_State *g, uint32_t *page)
162{ 192{
163 uint32_t *p = page; 193 uint32_t *p = page;
164 void *target = (void *)lj_vm_ffi_callback; 194 void *target = (void *)lj_vm_ffi_callback;
@@ -174,30 +204,43 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
174 *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2); 204 *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2);
175 p++; 205 p++;
176 } 206 }
177 lua_assert(p - page <= CALLBACK_MCODE_SIZE); 207 return p;
178} 208}
179#elif LJ_TARGET_MIPS 209#elif LJ_TARGET_MIPS
180static void callback_mcode_init(global_State *g, uint32_t *page) 210static void *callback_mcode_init(global_State *g, uint32_t *page)
181{ 211{
182 uint32_t *p = page; 212 uint32_t *p = page;
183 void *target = (void *)lj_vm_ffi_callback; 213 uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback;
214 uintptr_t ug = (uintptr_t)(void *)g;
184 MSize slot; 215 MSize slot;
185 *p++ = MIPSI_SW | MIPSF_T(RID_R1)|MIPSF_S(RID_SP) | 0; 216#if LJ_TARGET_MIPS32
186 *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (u32ptr(target) >> 16); 217 *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 16);
187 *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (u32ptr(g) >> 16); 218 *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 16);
188 *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) |(u32ptr(target)&0xffff); 219#else
220 *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 48);
221 *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 48);
222 *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 32) & 0xffff);
223 *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 32) & 0xffff);
224 *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16);
225 *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16);
226 *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 16) & 0xffff);
227 *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 16) & 0xffff);
228 *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16);
229 *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16);
230#endif
231 *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | (target & 0xffff);
189 *p++ = MIPSI_JR | MIPSF_S(RID_R3); 232 *p++ = MIPSI_JR | MIPSF_S(RID_R3);
190 *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (u32ptr(g)&0xffff); 233 *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (ug & 0xffff);
191 for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { 234 for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
192 *p = MIPSI_B | ((page-p-1) & 0x0000ffffu); 235 *p = MIPSI_B | ((page-p-1) & 0x0000ffffu);
193 p++; 236 p++;
194 *p++ = MIPSI_LI | MIPSF_T(RID_R1) | slot; 237 *p++ = MIPSI_LI | MIPSF_T(RID_R1) | slot;
195 } 238 }
196 lua_assert(p - page <= CALLBACK_MCODE_SIZE); 239 return p;
197} 240}
198#else 241#else
199/* Missing support for this architecture. */ 242/* Missing support for this architecture. */
200#define callback_mcode_init(g, p) UNUSED(p) 243#define callback_mcode_init(g, p) (p)
201#endif 244#endif
202 245
203/* -- Machine code management --------------------------------------------- */ 246/* -- Machine code management --------------------------------------------- */
@@ -213,6 +256,11 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
213#ifndef MAP_ANONYMOUS 256#ifndef MAP_ANONYMOUS
214#define MAP_ANONYMOUS MAP_ANON 257#define MAP_ANONYMOUS MAP_ANON
215#endif 258#endif
259#ifdef PROT_MPROTECT
260#define CCPROT_CREATE (PROT_MPROTECT(PROT_EXEC))
261#else
262#define CCPROT_CREATE 0
263#endif
216 264
217#endif 265#endif
218 266
@@ -220,15 +268,15 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
220static void callback_mcode_new(CTState *cts) 268static void callback_mcode_new(CTState *cts)
221{ 269{
222 size_t sz = (size_t)CALLBACK_MCODE_SIZE; 270 size_t sz = (size_t)CALLBACK_MCODE_SIZE;
223 void *p; 271 void *p, *pe;
224 if (CALLBACK_MAX_SLOT == 0) 272 if (CALLBACK_MAX_SLOT == 0)
225 lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); 273 lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
226#if LJ_TARGET_WINDOWS 274#if LJ_TARGET_WINDOWS
227 p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); 275 p = LJ_WIN_VALLOC(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
228 if (!p) 276 if (!p)
229 lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); 277 lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
230#elif LJ_TARGET_POSIX 278#elif LJ_TARGET_POSIX
231 p = mmap(NULL, sz, (PROT_READ|PROT_WRITE), MAP_PRIVATE|MAP_ANONYMOUS, 279 p = mmap(NULL, sz, (PROT_READ|PROT_WRITE|CCPROT_CREATE), MAP_PRIVATE|MAP_ANONYMOUS,
232 -1, 0); 280 -1, 0);
233 if (p == MAP_FAILED) 281 if (p == MAP_FAILED)
234 lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); 282 lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
@@ -237,12 +285,15 @@ static void callback_mcode_new(CTState *cts)
237 p = lj_mem_new(cts->L, sz); 285 p = lj_mem_new(cts->L, sz);
238#endif 286#endif
239 cts->cb.mcode = p; 287 cts->cb.mcode = p;
240 callback_mcode_init(cts->g, p); 288 pe = callback_mcode_init(cts->g, p);
289 UNUSED(pe);
290 lj_assertCTS((size_t)((char *)pe - (char *)p) <= sz,
291 "miscalculated CALLBACK_MAX_SLOT");
241 lj_mcode_sync(p, (char *)p + sz); 292 lj_mcode_sync(p, (char *)p + sz);
242#if LJ_TARGET_WINDOWS 293#if LJ_TARGET_WINDOWS
243 { 294 {
244 DWORD oprot; 295 DWORD oprot;
245 VirtualProtect(p, sz, PAGE_EXECUTE_READ, &oprot); 296 LJ_WIN_VPROTECT(p, sz, PAGE_EXECUTE_READ, &oprot);
246 } 297 }
247#elif LJ_TARGET_POSIX 298#elif LJ_TARGET_POSIX
248 mprotect(p, sz, (PROT_READ|PROT_EXEC)); 299 mprotect(p, sz, (PROT_READ|PROT_EXEC));
@@ -351,33 +402,78 @@ void lj_ccallback_mcode_free(CTState *cts)
351 goto done; \ 402 goto done; \
352 } CALLBACK_HANDLE_REGARG_FP2 403 } CALLBACK_HANDLE_REGARG_FP2
353 404
354#elif LJ_TARGET_PPC 405#elif LJ_TARGET_ARM64
355 406
356#define CALLBACK_HANDLE_REGARG \ 407#define CALLBACK_HANDLE_REGARG \
357 if (isfp) { \ 408 if (isfp) { \
358 if (nfpr + 1 <= CCALL_NARG_FPR) { \ 409 if (nfpr + n <= CCALL_NARG_FPR) { \
359 sp = &cts->cb.fpr[nfpr++]; \ 410 sp = &cts->cb.fpr[nfpr]; \
360 cta = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ 411 nfpr += n; \
361 goto done; \ 412 goto done; \
413 } else { \
414 nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \
362 } \ 415 } \
363 } else { /* Try to pass argument in GPRs. */ \ 416 } else { \
364 if (n > 1) { \ 417 if (!LJ_TARGET_OSX && n > 1) \
365 lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \ 418 ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
366 ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
367 } \
368 if (ngpr + n <= maxgpr) { \ 419 if (ngpr + n <= maxgpr) { \
369 sp = &cts->cb.gpr[ngpr]; \ 420 sp = &cts->cb.gpr[ngpr]; \
370 ngpr += n; \ 421 ngpr += n; \
371 goto done; \ 422 goto done; \
423 } else { \
424 ngpr = CCALL_NARG_GPR; /* Prevent reordering. */ \
425 } \
426 }
427
428#elif LJ_TARGET_PPC
429
430#define CALLBACK_HANDLE_GPR \
431 if (n > 1) { \
432 lj_assertCTS(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */ \
433 ctype_isinteger(cta->info)) && n == 2, /* int64_t. */ \
434 "bad GPR type"); \
435 ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
436 } \
437 if (ngpr + n <= maxgpr) { \
438 sp = &cts->cb.gpr[ngpr]; \
439 ngpr += n; \
440 goto done; \
441 }
442
443#if LJ_ABI_SOFTFP
444#define CALLBACK_HANDLE_REGARG \
445 CALLBACK_HANDLE_GPR \
446 UNUSED(isfp);
447#else
448#define CALLBACK_HANDLE_REGARG \
449 if (isfp) { \
450 if (nfpr + 1 <= CCALL_NARG_FPR) { \
451 sp = &cts->cb.fpr[nfpr++]; \
452 cta = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
453 goto done; \
372 } \ 454 } \
455 } else { /* Try to pass argument in GPRs. */ \
456 CALLBACK_HANDLE_GPR \
373 } 457 }
458#endif
374 459
460#if !LJ_ABI_SOFTFP
375#define CALLBACK_HANDLE_RET \ 461#define CALLBACK_HANDLE_RET \
376 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ 462 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
377 *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ 463 *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */
464#endif
378 465
379#elif LJ_TARGET_MIPS 466#elif LJ_TARGET_MIPS32
380 467
468#define CALLBACK_HANDLE_GPR \
469 if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
470 if (ngpr + n <= maxgpr) { \
471 sp = &cts->cb.gpr[ngpr]; \
472 ngpr += n; \
473 goto done; \
474 }
475
476#if !LJ_ABI_SOFTFP /* MIPS32 hard-float */
381#define CALLBACK_HANDLE_REGARG \ 477#define CALLBACK_HANDLE_REGARG \
382 if (isfp && nfpr < CCALL_NARG_FPR) { /* Try to pass argument in FPRs. */ \ 478 if (isfp && nfpr < CCALL_NARG_FPR) { /* Try to pass argument in FPRs. */ \
383 sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \ 479 sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \
@@ -385,13 +481,36 @@ void lj_ccallback_mcode_free(CTState *cts)
385 goto done; \ 481 goto done; \
386 } else { /* Try to pass argument in GPRs. */ \ 482 } else { /* Try to pass argument in GPRs. */ \
387 nfpr = CCALL_NARG_FPR; \ 483 nfpr = CCALL_NARG_FPR; \
388 if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ 484 CALLBACK_HANDLE_GPR \
389 if (ngpr + n <= maxgpr) { \ 485 }
390 sp = &cts->cb.gpr[ngpr]; \ 486#else /* MIPS32 soft-float */
391 ngpr += n; \ 487#define CALLBACK_HANDLE_REGARG \
392 goto done; \ 488 CALLBACK_HANDLE_GPR \
393 } \ 489 UNUSED(isfp);
490#endif
491
492#define CALLBACK_HANDLE_RET \
493 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
494 ((float *)dp)[1] = *(float *)dp;
495
496#elif LJ_TARGET_MIPS64
497
498#if !LJ_ABI_SOFTFP /* MIPS64 hard-float */
499#define CALLBACK_HANDLE_REGARG \
500 if (ngpr + n <= maxgpr) { \
501 sp = isfp ? (void*) &cts->cb.fpr[ngpr] : (void*) &cts->cb.gpr[ngpr]; \
502 ngpr += n; \
503 goto done; \
394 } 504 }
505#else /* MIPS64 soft-float */
506#define CALLBACK_HANDLE_REGARG \
507 if (ngpr + n <= maxgpr) { \
508 UNUSED(isfp); \
509 sp = (void*) &cts->cb.gpr[ngpr]; \
510 ngpr += n; \
511 goto done; \
512 }
513#endif
395 514
396#define CALLBACK_HANDLE_RET \ 515#define CALLBACK_HANDLE_RET \
397 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ 516 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
@@ -411,6 +530,7 @@ static void callback_conv_args(CTState *cts, lua_State *L)
411 int gcsteps = 0; 530 int gcsteps = 0;
412 CType *ct; 531 CType *ct;
413 GCfunc *fn; 532 GCfunc *fn;
533 int fntp;
414 MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR; 534 MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR;
415#if CCALL_NARG_FPR 535#if CCALL_NARG_FPR
416 MSize nfpr = 0; 536 MSize nfpr = 0;
@@ -421,18 +541,27 @@ static void callback_conv_args(CTState *cts, lua_State *L)
421 541
422 if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) { 542 if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) {
423 ct = ctype_get(cts, id); 543 ct = ctype_get(cts, id);
424 rid = ctype_cid(ct->info); 544 rid = ctype_cid(ct->info); /* Return type. x86: +(spadj<<16). */
425 fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot)); 545 fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot));
546 fntp = LJ_TFUNC;
426 } else { /* Must set up frame first, before throwing the error. */ 547 } else { /* Must set up frame first, before throwing the error. */
427 ct = NULL; 548 ct = NULL;
428 rid = 0; 549 rid = 0;
429 fn = (GCfunc *)L; 550 fn = (GCfunc *)L;
551 fntp = LJ_TTHREAD;
552 }
553 /* Continuation returns from callback. */
554 if (LJ_FR2) {
555 (o++)->u64 = LJ_CONT_FFI_CALLBACK;
556 (o++)->u64 = rid;
557 } else {
558 o->u32.lo = LJ_CONT_FFI_CALLBACK;
559 o->u32.hi = rid;
560 o++;
430 } 561 }
431 o->u32.lo = LJ_CONT_FFI_CALLBACK; /* Continuation returns from callback. */ 562 setframe_gc(o, obj2gco(fn), fntp);
432 o->u32.hi = rid; /* Return type. x86: +(spadj<<16). */ 563 if (LJ_FR2) o++;
433 o++; 564 setframe_ftsz(o, ((char *)(o+1) - (char *)L->base) + FRAME_CONT);
434 setframe_gc(o, obj2gco(fn));
435 setframe_ftsz(o, (int)((char *)(o+1) - (char *)L->base) + FRAME_CONT);
436 L->top = L->base = ++o; 565 L->top = L->base = ++o;
437 if (!ct) 566 if (!ct)
438 lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK); 567 lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK);
@@ -459,7 +588,7 @@ static void callback_conv_args(CTState *cts, lua_State *L)
459 CTSize sz; 588 CTSize sz;
460 int isfp; 589 int isfp;
461 MSize n; 590 MSize n;
462 lua_assert(ctype_isfield(ctf->info)); 591 lj_assertCTS(ctype_isfield(ctf->info), "field expected");
463 cta = ctype_rawchild(cts, ctf); 592 cta = ctype_rawchild(cts, ctf);
464 isfp = ctype_isfp(cta->info); 593 isfp = ctype_isfp(cta->info);
465 sz = (cta->size + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); 594 sz = (cta->size + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
@@ -474,7 +603,11 @@ static void callback_conv_args(CTState *cts, lua_State *L)
474 nsp += n; 603 nsp += n;
475 604
476 done: 605 done:
477 if (LJ_BE && cta->size < CTSIZE_PTR) 606 if (LJ_BE && cta->size < CTSIZE_PTR
607#if LJ_TARGET_MIPS64
608 && !(isfp && nsp)
609#endif
610 )
478 sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size); 611 sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size);
479 gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp); 612 gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp);
480 } 613 }
@@ -483,9 +616,14 @@ static void callback_conv_args(CTState *cts, lua_State *L)
483 L->top = o; 616 L->top = o;
484#if LJ_TARGET_X86 617#if LJ_TARGET_X86
485 /* Store stack adjustment for returns from non-cdecl callbacks. */ 618 /* Store stack adjustment for returns from non-cdecl callbacks. */
486 if (ctype_cconv(ct->info) != CTCC_CDECL) 619 if (ctype_cconv(ct->info) != CTCC_CDECL) {
620#if LJ_FR2
621 (L->base-3)->u64 |= (nsp << (16+2));
622#else
487 (L->base-2)->u32.hi |= (nsp << (16+2)); 623 (L->base-2)->u32.hi |= (nsp << (16+2));
488#endif 624#endif
625 }
626#endif
489 while (gcsteps-- > 0) 627 while (gcsteps-- > 0)
490 lj_gc_check(L); 628 lj_gc_check(L);
491} 629}
@@ -493,7 +631,11 @@ static void callback_conv_args(CTState *cts, lua_State *L)
493/* Convert Lua object to callback result. */ 631/* Convert Lua object to callback result. */
494static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) 632static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
495{ 633{
634#if LJ_FR2
635 CType *ctr = ctype_raw(cts, (uint16_t)(L->base-3)->u64);
636#else
496 CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi); 637 CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi);
638#endif
497#if LJ_TARGET_X86 639#if LJ_TARGET_X86
498 cts->cb.gpr[2] = 0; 640 cts->cb.gpr[2] = 0;
499#endif 641#endif
@@ -503,6 +645,10 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
503 if (ctype_isfp(ctr->info)) 645 if (ctype_isfp(ctr->info))
504 dp = (uint8_t *)&cts->cb.fpr[0]; 646 dp = (uint8_t *)&cts->cb.fpr[0];
505#endif 647#endif
648#if LJ_TARGET_ARM64 && LJ_BE
649 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float))
650 dp = (uint8_t *)&cts->cb.fpr[0].f[1];
651#endif
506 lj_cconv_ct_tv(cts, ctr, dp, o, 0); 652 lj_cconv_ct_tv(cts, ctr, dp, o, 0);
507#ifdef CALLBACK_HANDLE_RET 653#ifdef CALLBACK_HANDLE_RET
508 CALLBACK_HANDLE_RET 654 CALLBACK_HANDLE_RET
@@ -516,6 +662,12 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
516 *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : 662 *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
517 (int32_t)*(int16_t *)dp; 663 (int32_t)*(int16_t *)dp;
518 } 664 }
665#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
666 /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
667 if (ctr->size <= 4 &&
668 (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))
669 *(int64_t *)dp = (int64_t)*(int32_t *)dp;
670#endif
519#if LJ_TARGET_X86 671#if LJ_TARGET_X86
520 if (ctype_isfp(ctr->info)) 672 if (ctype_isfp(ctr->info))
521 cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2; 673 cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2;
@@ -528,8 +680,8 @@ lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf)
528{ 680{
529 lua_State *L = cts->L; 681 lua_State *L = cts->L;
530 global_State *g = cts->g; 682 global_State *g = cts->g;
531 lua_assert(L != NULL); 683 lj_assertG(L != NULL, "uninitialized cts->L in callback");
532 if (gcref(g->jit_L)) { 684 if (tvref(g->jit_base)) {
533 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK)); 685 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK));
534 if (g->panic) g->panic(L); 686 if (g->panic) g->panic(L);
535 exit(EXIT_FAILURE); 687 exit(EXIT_FAILURE);
@@ -562,9 +714,9 @@ void LJ_FASTCALL lj_ccallback_leave(CTState *cts, TValue *o)
562 } 714 }
563 callback_conv_result(cts, L, o); 715 callback_conv_result(cts, L, o);
564 /* Finally drop C frame and continuation frame. */ 716 /* Finally drop C frame and continuation frame. */
565 L->cframe = cframe_prev(L->cframe); 717 L->top -= 2+2*LJ_FR2;
566 L->top -= 2;
567 L->base = obase; 718 L->base = obase;
719 L->cframe = cframe_prev(L->cframe);
568 cts->cb.slot = 0; /* Blacklist C function that called the callback. */ 720 cts->cb.slot = 0; /* Blacklist C function that called the callback. */
569} 721}
570 722
@@ -613,7 +765,7 @@ static CType *callback_checkfunc(CTState *cts, CType *ct)
613 CType *ctf = ctype_get(cts, fid); 765 CType *ctf = ctype_get(cts, fid);
614 if (!ctype_isattrib(ctf->info)) { 766 if (!ctype_isattrib(ctf->info)) {
615 CType *cta; 767 CType *cta;
616 lua_assert(ctype_isfield(ctf->info)); 768 lj_assertCTS(ctype_isfield(ctf->info), "field expected");
617 cta = ctype_rawchild(cts, ctf); 769 cta = ctype_rawchild(cts, ctf);
618 if (!(ctype_isenum(cta->info) || ctype_isptr(cta->info) || 770 if (!(ctype_isenum(cta->info) || ctype_isptr(cta->info) ||
619 (ctype_isnum(cta->info) && cta->size <= 8)) || 771 (ctype_isnum(cta->info) && cta->size <= 8)) ||
diff --git a/src/lj_cconv.c b/src/lj_cconv.c
index 55a72657..613f66e2 100644
--- a/src/lj_cconv.c
+++ b/src/lj_cconv.c
@@ -8,6 +8,7 @@
8#if LJ_HASFFI 8#if LJ_HASFFI
9 9
10#include "lj_err.h" 10#include "lj_err.h"
11#include "lj_buf.h"
11#include "lj_tab.h" 12#include "lj_tab.h"
12#include "lj_ctype.h" 13#include "lj_ctype.h"
13#include "lj_cdata.h" 14#include "lj_cdata.h"
@@ -122,19 +123,25 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s,
122 CTInfo dinfo = d->info, sinfo = s->info; 123 CTInfo dinfo = d->info, sinfo = s->info;
123 void *tmpptr; 124 void *tmpptr;
124 125
125 lua_assert(!ctype_isenum(dinfo) && !ctype_isenum(sinfo)); 126 lj_assertCTS(!ctype_isenum(dinfo) && !ctype_isenum(sinfo),
126 lua_assert(!ctype_isattrib(dinfo) && !ctype_isattrib(sinfo)); 127 "unresolved enum");
128 lj_assertCTS(!ctype_isattrib(dinfo) && !ctype_isattrib(sinfo),
129 "unstripped attribute");
127 130
128 if (ctype_type(dinfo) > CT_MAYCONVERT || ctype_type(sinfo) > CT_MAYCONVERT) 131 if (ctype_type(dinfo) > CT_MAYCONVERT || ctype_type(sinfo) > CT_MAYCONVERT)
129 goto err_conv; 132 goto err_conv;
130 133
131 /* Some basic sanity checks. */ 134 /* Some basic sanity checks. */
132 lua_assert(!ctype_isnum(dinfo) || dsize > 0); 135 lj_assertCTS(!ctype_isnum(dinfo) || dsize > 0, "bad size for number type");
133 lua_assert(!ctype_isnum(sinfo) || ssize > 0); 136 lj_assertCTS(!ctype_isnum(sinfo) || ssize > 0, "bad size for number type");
134 lua_assert(!ctype_isbool(dinfo) || dsize == 1 || dsize == 4); 137 lj_assertCTS(!ctype_isbool(dinfo) || dsize == 1 || dsize == 4,
135 lua_assert(!ctype_isbool(sinfo) || ssize == 1 || ssize == 4); 138 "bad size for bool type");
136 lua_assert(!ctype_isinteger(dinfo) || (1u<<lj_fls(dsize)) == dsize); 139 lj_assertCTS(!ctype_isbool(sinfo) || ssize == 1 || ssize == 4,
137 lua_assert(!ctype_isinteger(sinfo) || (1u<<lj_fls(ssize)) == ssize); 140 "bad size for bool type");
141 lj_assertCTS(!ctype_isinteger(dinfo) || (1u<<lj_fls(dsize)) == dsize,
142 "bad size for integer type");
143 lj_assertCTS(!ctype_isinteger(sinfo) || (1u<<lj_fls(ssize)) == ssize,
144 "bad size for integer type");
138 145
139 switch (cconv_idx2(dinfo, sinfo)) { 146 switch (cconv_idx2(dinfo, sinfo)) {
140 /* Destination is a bool. */ 147 /* Destination is a bool. */
@@ -357,7 +364,7 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s,
357 if ((flags & CCF_CAST) || (d->info & CTF_VLA) || d != s) 364 if ((flags & CCF_CAST) || (d->info & CTF_VLA) || d != s)
358 goto err_conv; /* Must be exact same type. */ 365 goto err_conv; /* Must be exact same type. */
359copyval: /* Copy value. */ 366copyval: /* Copy value. */
360 lua_assert(dsize == ssize); 367 lj_assertCTS(dsize == ssize, "value copy with different sizes");
361 memcpy(dp, sp, dsize); 368 memcpy(dp, sp, dsize);
362 break; 369 break;
363 370
@@ -389,7 +396,7 @@ int lj_cconv_tv_ct(CTState *cts, CType *s, CTypeID sid,
389 lj_cconv_ct_ct(cts, ctype_get(cts, CTID_DOUBLE), s, 396 lj_cconv_ct_ct(cts, ctype_get(cts, CTID_DOUBLE), s,
390 (uint8_t *)&o->n, sp, 0); 397 (uint8_t *)&o->n, sp, 0);
391 /* Numbers are NOT canonicalized here! Beware of uninitialized data. */ 398 /* Numbers are NOT canonicalized here! Beware of uninitialized data. */
392 lua_assert(tvisnum(o)); 399 lj_assertCTS(tvisnum(o), "non-canonical NaN passed");
393 } 400 }
394 } else { 401 } else {
395 uint32_t b = s->size == 1 ? (*sp != 0) : (*(int *)sp != 0); 402 uint32_t b = s->size == 1 ? (*sp != 0) : (*(int *)sp != 0);
@@ -406,7 +413,7 @@ int lj_cconv_tv_ct(CTState *cts, CType *s, CTypeID sid,
406 CTSize sz; 413 CTSize sz;
407 copyval: /* Copy value. */ 414 copyval: /* Copy value. */
408 sz = s->size; 415 sz = s->size;
409 lua_assert(sz != CTSIZE_INVALID); 416 lj_assertCTS(sz != CTSIZE_INVALID, "value copy with invalid size");
410 /* Attributes are stripped, qualifiers are kept (but mostly ignored). */ 417 /* Attributes are stripped, qualifiers are kept (but mostly ignored). */
411 cd = lj_cdata_new(cts, ctype_typeid(cts, s), sz); 418 cd = lj_cdata_new(cts, ctype_typeid(cts, s), sz);
412 setcdataV(cts->L, o, cd); 419 setcdataV(cts->L, o, cd);
@@ -421,19 +428,22 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp)
421 CTInfo info = s->info; 428 CTInfo info = s->info;
422 CTSize pos, bsz; 429 CTSize pos, bsz;
423 uint32_t val; 430 uint32_t val;
424 lua_assert(ctype_isbitfield(info)); 431 lj_assertCTS(ctype_isbitfield(info), "bitfield expected");
425 /* NYI: packed bitfields may cause misaligned reads. */ 432 /* NYI: packed bitfields may cause misaligned reads. */
426 switch (ctype_bitcsz(info)) { 433 switch (ctype_bitcsz(info)) {
427 case 4: val = *(uint32_t *)sp; break; 434 case 4: val = *(uint32_t *)sp; break;
428 case 2: val = *(uint16_t *)sp; break; 435 case 2: val = *(uint16_t *)sp; break;
429 case 1: val = *(uint8_t *)sp; break; 436 case 1: val = *(uint8_t *)sp; break;
430 default: lua_assert(0); val = 0; break; 437 default:
438 lj_assertCTS(0, "bad bitfield container size %d", ctype_bitcsz(info));
439 val = 0;
440 break;
431 } 441 }
432 /* Check if a packed bitfield crosses a container boundary. */ 442 /* Check if a packed bitfield crosses a container boundary. */
433 pos = ctype_bitpos(info); 443 pos = ctype_bitpos(info);
434 bsz = ctype_bitbsz(info); 444 bsz = ctype_bitbsz(info);
435 lua_assert(pos < 8*ctype_bitcsz(info)); 445 lj_assertCTS(pos < 8*ctype_bitcsz(info), "bad bitfield position");
436 lua_assert(bsz > 0 && bsz <= 8*ctype_bitcsz(info)); 446 lj_assertCTS(bsz > 0 && bsz <= 8*ctype_bitcsz(info), "bad bitfield size");
437 if (pos + bsz > 8*ctype_bitcsz(info)) 447 if (pos + bsz > 8*ctype_bitcsz(info))
438 lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT); 448 lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT);
439 if (!(info & CTF_BOOL)) { 449 if (!(info & CTF_BOOL)) {
@@ -448,8 +458,10 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp)
448 setintV(o, (int32_t)val); 458 setintV(o, (int32_t)val);
449 } 459 }
450 } else { 460 } else {
451 lua_assert(bsz == 1); 461 uint32_t b = (val >> pos) & 1;
452 setboolV(o, (val >> pos) & 1); 462 lj_assertCTS(bsz == 1, "bad bool bitfield size");
463 setboolV(o, b);
464 setboolV(&cts->g->tmptv2, b); /* Remember for trace recorder. */
453 } 465 }
454 return 0; /* No GC step needed. */ 466 return 0; /* No GC step needed. */
455} 467}
@@ -551,7 +563,7 @@ void lj_cconv_ct_tv(CTState *cts, CType *d,
551 sid = cdataV(o)->ctypeid; 563 sid = cdataV(o)->ctypeid;
552 s = ctype_get(cts, sid); 564 s = ctype_get(cts, sid);
553 if (ctype_isref(s->info)) { /* Resolve reference for value. */ 565 if (ctype_isref(s->info)) { /* Resolve reference for value. */
554 lua_assert(s->size == CTSIZE_PTR); 566 lj_assertCTS(s->size == CTSIZE_PTR, "ref is not pointer-sized");
555 sp = *(void **)sp; 567 sp = *(void **)sp;
556 sid = ctype_cid(s->info); 568 sid = ctype_cid(s->info);
557 } 569 }
@@ -571,7 +583,7 @@ void lj_cconv_ct_tv(CTState *cts, CType *d,
571 CType *cct = lj_ctype_getfield(cts, d, str, &ofs); 583 CType *cct = lj_ctype_getfield(cts, d, str, &ofs);
572 if (!cct || !ctype_isconstval(cct->info)) 584 if (!cct || !ctype_isconstval(cct->info))
573 goto err_conv; 585 goto err_conv;
574 lua_assert(d->size == 4); 586 lj_assertCTS(d->size == 4, "only 32 bit enum supported"); /* NYI */
575 sp = (uint8_t *)&cct->size; 587 sp = (uint8_t *)&cct->size;
576 sid = ctype_cid(cct->info); 588 sid = ctype_cid(cct->info);
577 } else if (ctype_isrefarray(d->info)) { /* Copy string to array. */ 589 } else if (ctype_isrefarray(d->info)) { /* Copy string to array. */
@@ -610,8 +622,10 @@ void lj_cconv_ct_tv(CTState *cts, CType *d,
610 tmpptr = uddata(ud); 622 tmpptr = uddata(ud);
611 if (ud->udtype == UDTYPE_IO_FILE) 623 if (ud->udtype == UDTYPE_IO_FILE)
612 tmpptr = *(void **)tmpptr; 624 tmpptr = *(void **)tmpptr;
625 else if (ud->udtype == UDTYPE_BUFFER)
626 tmpptr = ((SBufExt *)tmpptr)->r;
613 } else if (tvislightud(o)) { 627 } else if (tvislightud(o)) {
614 tmpptr = lightudV(o); 628 tmpptr = lightudV(cts->g, o);
615 } else if (tvisfunc(o)) { 629 } else if (tvisfunc(o)) {
616 void *p = lj_ccallback_new(cts, d, funcV(o)); 630 void *p = lj_ccallback_new(cts, d, funcV(o));
617 if (p) { 631 if (p) {
@@ -635,10 +649,10 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o)
635 CTInfo info = d->info; 649 CTInfo info = d->info;
636 CTSize pos, bsz; 650 CTSize pos, bsz;
637 uint32_t val, mask; 651 uint32_t val, mask;
638 lua_assert(ctype_isbitfield(info)); 652 lj_assertCTS(ctype_isbitfield(info), "bitfield expected");
639 if ((info & CTF_BOOL)) { 653 if ((info & CTF_BOOL)) {
640 uint8_t tmpbool; 654 uint8_t tmpbool;
641 lua_assert(ctype_bitbsz(info) == 1); 655 lj_assertCTS(ctype_bitbsz(info) == 1, "bad bool bitfield size");
642 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_BOOL), &tmpbool, o, 0); 656 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_BOOL), &tmpbool, o, 0);
643 val = tmpbool; 657 val = tmpbool;
644 } else { 658 } else {
@@ -647,8 +661,8 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o)
647 } 661 }
648 pos = ctype_bitpos(info); 662 pos = ctype_bitpos(info);
649 bsz = ctype_bitbsz(info); 663 bsz = ctype_bitbsz(info);
650 lua_assert(pos < 8*ctype_bitcsz(info)); 664 lj_assertCTS(pos < 8*ctype_bitcsz(info), "bad bitfield position");
651 lua_assert(bsz > 0 && bsz <= 8*ctype_bitcsz(info)); 665 lj_assertCTS(bsz > 0 && bsz <= 8*ctype_bitcsz(info), "bad bitfield size");
652 /* Check if a packed bitfield crosses a container boundary. */ 666 /* Check if a packed bitfield crosses a container boundary. */
653 if (pos + bsz > 8*ctype_bitcsz(info)) 667 if (pos + bsz > 8*ctype_bitcsz(info))
654 lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT); 668 lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT);
@@ -659,7 +673,9 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o)
659 case 4: *(uint32_t *)dp = (*(uint32_t *)dp & ~mask) | (uint32_t)val; break; 673 case 4: *(uint32_t *)dp = (*(uint32_t *)dp & ~mask) | (uint32_t)val; break;
660 case 2: *(uint16_t *)dp = (*(uint16_t *)dp & ~mask) | (uint16_t)val; break; 674 case 2: *(uint16_t *)dp = (*(uint16_t *)dp & ~mask) | (uint16_t)val; break;
661 case 1: *(uint8_t *)dp = (*(uint8_t *)dp & ~mask) | (uint8_t)val; break; 675 case 1: *(uint8_t *)dp = (*(uint8_t *)dp & ~mask) | (uint8_t)val; break;
662 default: lua_assert(0); break; 676 default:
677 lj_assertCTS(0, "bad bitfield container size %d", ctype_bitcsz(info));
678 break;
663 } 679 }
664} 680}
665 681
diff --git a/src/lj_cconv.h b/src/lj_cconv.h
index 214d6122..cd927328 100644
--- a/src/lj_cconv.h
+++ b/src/lj_cconv.h
@@ -27,13 +27,14 @@ enum {
27static LJ_AINLINE uint32_t cconv_idx(CTInfo info) 27static LJ_AINLINE uint32_t cconv_idx(CTInfo info)
28{ 28{
29 uint32_t idx = ((info >> 26) & 15u); /* Dispatch bits. */ 29 uint32_t idx = ((info >> 26) & 15u); /* Dispatch bits. */
30 lua_assert(ctype_type(info) <= CT_MAYCONVERT); 30 lj_assertX(ctype_type(info) <= CT_MAYCONVERT,
31 "cannot convert ctype %08x", info);
31#if LJ_64 32#if LJ_64
32 idx = ((uint32_t)(U64x(f436fff5,fff7f021) >> 4*idx) & 15u); 33 idx = ((uint32_t)(U64x(f436fff5,fff7f021) >> 4*idx) & 15u);
33#else 34#else
34 idx = (((idx < 8 ? 0xfff7f021u : 0xf436fff5) >> 4*(idx & 7u)) & 15u); 35 idx = (((idx < 8 ? 0xfff7f021u : 0xf436fff5) >> 4*(idx & 7u)) & 15u);
35#endif 36#endif
36 lua_assert(idx < 8); 37 lj_assertX(idx < 8, "cannot convert ctype %08x", info);
37 return idx; 38 return idx;
38} 39}
39 40
diff --git a/src/lj_cdata.c b/src/lj_cdata.c
index 52bb07b4..ffc31078 100644
--- a/src/lj_cdata.c
+++ b/src/lj_cdata.c
@@ -9,7 +9,6 @@
9 9
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h"
13#include "lj_tab.h" 12#include "lj_tab.h"
14#include "lj_ctype.h" 13#include "lj_ctype.h"
15#include "lj_cconv.h" 14#include "lj_cconv.h"
@@ -27,20 +26,20 @@ GCcdata *lj_cdata_newref(CTState *cts, const void *p, CTypeID id)
27} 26}
28 27
29/* Allocate variable-sized or specially aligned C data object. */ 28/* Allocate variable-sized or specially aligned C data object. */
30GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align) 29GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align)
31{ 30{
32 global_State *g; 31 global_State *g;
33 MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) + 32 MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) +
34 (align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0); 33 (align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0);
35 char *p = lj_mem_newt(cts->L, extra + sz, char); 34 char *p = lj_mem_newt(L, extra + sz, char);
36 uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata); 35 uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata);
37 uintptr_t almask = (1u << align) - 1u; 36 uintptr_t almask = (1u << align) - 1u;
38 GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata)); 37 GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata));
39 lua_assert((char *)cd - p < 65536); 38 lj_assertL((char *)cd - p < 65536, "excessive cdata alignment");
40 cdatav(cd)->offset = (uint16_t)((char *)cd - p); 39 cdatav(cd)->offset = (uint16_t)((char *)cd - p);
41 cdatav(cd)->extra = extra; 40 cdatav(cd)->extra = extra;
42 cdatav(cd)->len = sz; 41 cdatav(cd)->len = sz;
43 g = cts->g; 42 g = G(L);
44 setgcrefr(cd->nextgc, g->gc.root); 43 setgcrefr(cd->nextgc, g->gc.root);
45 setgcref(g->gc.root, obj2gco(cd)); 44 setgcref(g->gc.root, obj2gco(cd));
46 newwhite(g, obj2gco(cd)); 45 newwhite(g, obj2gco(cd));
@@ -50,6 +49,15 @@ GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align)
50 return cd; 49 return cd;
51} 50}
52 51
52/* Allocate arbitrary C data object. */
53GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz, CTInfo info)
54{
55 if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN)
56 return lj_cdata_new(cts, id, sz);
57 else
58 return lj_cdata_newv(cts->L, id, sz, ctype_align(info));
59}
60
53/* Free a C data object. */ 61/* Free a C data object. */
54void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd) 62void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
55{ 63{
@@ -68,29 +76,30 @@ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
68 } else if (LJ_LIKELY(!cdataisv(cd))) { 76 } else if (LJ_LIKELY(!cdataisv(cd))) {
69 CType *ct = ctype_raw(ctype_ctsG(g), cd->ctypeid); 77 CType *ct = ctype_raw(ctype_ctsG(g), cd->ctypeid);
70 CTSize sz = ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR; 78 CTSize sz = ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR;
71 lua_assert(ctype_hassize(ct->info) || ctype_isfunc(ct->info) || 79 lj_assertG(ctype_hassize(ct->info) || ctype_isfunc(ct->info) ||
72 ctype_isextern(ct->info)); 80 ctype_isextern(ct->info), "free of ctype without a size");
73 lj_mem_free(g, cd, sizeof(GCcdata) + sz); 81 lj_mem_free(g, cd, sizeof(GCcdata) + sz);
74 } else { 82 } else {
75 lj_mem_free(g, memcdatav(cd), sizecdatav(cd)); 83 lj_mem_free(g, memcdatav(cd), sizecdatav(cd));
76 } 84 }
77} 85}
78 86
79TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd) 87void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it)
80{ 88{
81 global_State *g = G(L); 89 GCtab *t = ctype_ctsG(G(L))->finalizer;
82 GCtab *t = ctype_ctsG(g)->finalizer;
83 if (gcref(t->metatable)) { 90 if (gcref(t->metatable)) {
84 /* Add cdata to finalizer table, if still enabled. */ 91 /* Add cdata to finalizer table, if still enabled. */
85 TValue *tv, tmp; 92 TValue *tv, tmp;
86 setcdataV(L, &tmp, cd); 93 setcdataV(L, &tmp, cd);
87 lj_gc_anybarriert(L, t); 94 lj_gc_anybarriert(L, t);
88 tv = lj_tab_set(L, t, &tmp); 95 tv = lj_tab_set(L, t, &tmp);
89 cd->marked |= LJ_GC_CDATA_FIN; 96 if (it == LJ_TNIL) {
90 return tv; 97 setnilV(tv);
91 } else { 98 cd->marked &= ~LJ_GC_CDATA_FIN;
92 /* Otherwise return dummy TValue. */ 99 } else {
93 return &g->tmptv; 100 setgcV(L, tv, obj, it);
101 cd->marked |= LJ_GC_CDATA_FIN;
102 }
94 } 103 }
95} 104}
96 105
@@ -106,7 +115,7 @@ CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, uint8_t **pp,
106 115
107 /* Resolve reference for cdata object. */ 116 /* Resolve reference for cdata object. */
108 if (ctype_isref(ct->info)) { 117 if (ctype_isref(ct->info)) {
109 lua_assert(ct->size == CTSIZE_PTR); 118 lj_assertCTS(ct->size == CTSIZE_PTR, "ref is not pointer-sized");
110 p = *(uint8_t **)p; 119 p = *(uint8_t **)p;
111 ct = ctype_child(cts, ct); 120 ct = ctype_child(cts, ct);
112 } 121 }
@@ -117,13 +126,19 @@ collect_attrib:
117 if (ctype_attrib(ct->info) == CTA_QUAL) *qual |= ct->size; 126 if (ctype_attrib(ct->info) == CTA_QUAL) *qual |= ct->size;
118 ct = ctype_child(cts, ct); 127 ct = ctype_child(cts, ct);
119 } 128 }
120 lua_assert(!ctype_isref(ct->info)); /* Interning rejects refs to refs. */ 129 /* Interning rejects refs to refs. */
130 lj_assertCTS(!ctype_isref(ct->info), "bad ref of ref");
121 131
122 if (tvisint(key)) { 132 if (tvisint(key)) {
123 idx = (ptrdiff_t)intV(key); 133 idx = (ptrdiff_t)intV(key);
124 goto integer_key; 134 goto integer_key;
125 } else if (tvisnum(key)) { /* Numeric key. */ 135 } else if (tvisnum(key)) { /* Numeric key. */
126 idx = LJ_64 ? (ptrdiff_t)numV(key) : (ptrdiff_t)lj_num2int(numV(key)); 136#ifdef _MSC_VER
137 /* Workaround for MSVC bug. */
138 volatile
139#endif
140 lua_Number n = numV(key);
141 idx = LJ_64 ? (ptrdiff_t)n : (ptrdiff_t)lj_num2int(n);
127 integer_key: 142 integer_key:
128 if (ctype_ispointer(ct->info)) { 143 if (ctype_ispointer(ct->info)) {
129 CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */ 144 CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */
@@ -198,7 +213,8 @@ collect_attrib:
198static void cdata_getconst(CTState *cts, TValue *o, CType *ct) 213static void cdata_getconst(CTState *cts, TValue *o, CType *ct)
199{ 214{
200 CType *ctt = ctype_child(cts, ct); 215 CType *ctt = ctype_child(cts, ct);
201 lua_assert(ctype_isinteger(ctt->info) && ctt->size <= 4); 216 lj_assertCTS(ctype_isinteger(ctt->info) && ctt->size <= 4,
217 "only 32 bit const supported"); /* NYI */
202 /* Constants are already zero-extended/sign-extended to 32 bits. */ 218 /* Constants are already zero-extended/sign-extended to 32 bits. */
203 if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0) 219 if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0)
204 setnumV(o, (lua_Number)(uint32_t)ct->size); 220 setnumV(o, (lua_Number)(uint32_t)ct->size);
@@ -219,13 +235,14 @@ int lj_cdata_get(CTState *cts, CType *s, TValue *o, uint8_t *sp)
219 } 235 }
220 236
221 /* Get child type of pointer/array/field. */ 237 /* Get child type of pointer/array/field. */
222 lua_assert(ctype_ispointer(s->info) || ctype_isfield(s->info)); 238 lj_assertCTS(ctype_ispointer(s->info) || ctype_isfield(s->info),
239 "pointer or field expected");
223 sid = ctype_cid(s->info); 240 sid = ctype_cid(s->info);
224 s = ctype_get(cts, sid); 241 s = ctype_get(cts, sid);
225 242
226 /* Resolve reference for field. */ 243 /* Resolve reference for field. */
227 if (ctype_isref(s->info)) { 244 if (ctype_isref(s->info)) {
228 lua_assert(s->size == CTSIZE_PTR); 245 lj_assertCTS(s->size == CTSIZE_PTR, "ref is not pointer-sized");
229 sp = *(uint8_t **)sp; 246 sp = *(uint8_t **)sp;
230 sid = ctype_cid(s->info); 247 sid = ctype_cid(s->info);
231 s = ctype_get(cts, sid); 248 s = ctype_get(cts, sid);
@@ -252,12 +269,13 @@ void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o, CTInfo qual)
252 } 269 }
253 270
254 /* Get child type of pointer/array/field. */ 271 /* Get child type of pointer/array/field. */
255 lua_assert(ctype_ispointer(d->info) || ctype_isfield(d->info)); 272 lj_assertCTS(ctype_ispointer(d->info) || ctype_isfield(d->info),
273 "pointer or field expected");
256 d = ctype_child(cts, d); 274 d = ctype_child(cts, d);
257 275
258 /* Resolve reference for field. */ 276 /* Resolve reference for field. */
259 if (ctype_isref(d->info)) { 277 if (ctype_isref(d->info)) {
260 lua_assert(d->size == CTSIZE_PTR); 278 lj_assertCTS(d->size == CTSIZE_PTR, "ref is not pointer-sized");
261 dp = *(uint8_t **)dp; 279 dp = *(uint8_t **)dp;
262 d = ctype_child(cts, d); 280 d = ctype_child(cts, d);
263 } 281 }
@@ -272,7 +290,8 @@ void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o, CTInfo qual)
272 d = ctype_child(cts, d); 290 d = ctype_child(cts, d);
273 } 291 }
274 292
275 lua_assert(ctype_hassize(d->info) && !ctype_isvoid(d->info)); 293 lj_assertCTS(ctype_hassize(d->info), "store to ctype without size");
294 lj_assertCTS(!ctype_isvoid(d->info), "store to void type");
276 295
277 if (((d->info|qual) & CTF_CONST)) { 296 if (((d->info|qual) & CTF_CONST)) {
278 err_const: 297 err_const:
diff --git a/src/lj_cdata.h b/src/lj_cdata.h
index 4623c525..b93bec86 100644
--- a/src/lj_cdata.h
+++ b/src/lj_cdata.h
@@ -18,7 +18,7 @@ static LJ_AINLINE void *cdata_getptr(void *p, CTSize sz)
18 if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */ 18 if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */
19 return ((void *)(uintptr_t)*(uint32_t *)p); 19 return ((void *)(uintptr_t)*(uint32_t *)p);
20 } else { 20 } else {
21 lua_assert(sz == CTSIZE_PTR); 21 lj_assertX(sz == CTSIZE_PTR, "bad pointer size %d", sz);
22 return *(void **)p; 22 return *(void **)p;
23 } 23 }
24} 24}
@@ -29,7 +29,7 @@ static LJ_AINLINE void cdata_setptr(void *p, CTSize sz, const void *v)
29 if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */ 29 if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */
30 *(uint32_t *)p = (uint32_t)(uintptr_t)v; 30 *(uint32_t *)p = (uint32_t)(uintptr_t)v;
31 } else { 31 } else {
32 lua_assert(sz == CTSIZE_PTR); 32 lj_assertX(sz == CTSIZE_PTR, "bad pointer size %d", sz);
33 *(void **)p = (void *)v; 33 *(void **)p = (void *)v;
34 } 34 }
35} 35}
@@ -40,7 +40,8 @@ static LJ_AINLINE GCcdata *lj_cdata_new(CTState *cts, CTypeID id, CTSize sz)
40 GCcdata *cd; 40 GCcdata *cd;
41#ifdef LUA_USE_ASSERT 41#ifdef LUA_USE_ASSERT
42 CType *ct = ctype_raw(cts, id); 42 CType *ct = ctype_raw(cts, id);
43 lua_assert((ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR) == sz); 43 lj_assertCTS((ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR) == sz,
44 "inconsistent size of fixed-size cdata alloc");
44#endif 45#endif
45 cd = (GCcdata *)lj_mem_newgco(cts->L, sizeof(GCcdata) + sz); 46 cd = (GCcdata *)lj_mem_newgco(cts->L, sizeof(GCcdata) + sz);
46 cd->gct = ~LJ_TCDATA; 47 cd->gct = ~LJ_TCDATA;
@@ -58,11 +59,14 @@ static LJ_AINLINE GCcdata *lj_cdata_new_(lua_State *L, CTypeID id, CTSize sz)
58} 59}
59 60
60LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id); 61LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id);
61LJ_FUNC GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, 62LJ_FUNC GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz,
62 CTSize align); 63 CTSize align);
64LJ_FUNC GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz,
65 CTInfo info);
63 66
64LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd); 67LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd);
65LJ_FUNCA TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd); 68LJ_FUNC void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj,
69 uint32_t it);
66 70
67LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, 71LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key,
68 uint8_t **pp, CTInfo *qual); 72 uint8_t **pp, CTInfo *qual);
diff --git a/src/lj_clib.c b/src/lj_clib.c
index 6a443ac5..d8636a48 100644
--- a/src/lj_clib.c
+++ b/src/lj_clib.c
@@ -16,6 +16,7 @@
16#include "lj_cconv.h" 16#include "lj_cconv.h"
17#include "lj_cdata.h" 17#include "lj_cdata.h"
18#include "lj_clib.h" 18#include "lj_clib.h"
19#include "lj_strfmt.h"
19 20
20/* -- OS-specific functions ----------------------------------------------- */ 21/* -- OS-specific functions ----------------------------------------------- */
21 22
@@ -61,7 +62,7 @@ static const char *clib_extname(lua_State *L, const char *name)
61#endif 62#endif
62 ) { 63 ) {
63 if (!strchr(name, '.')) { 64 if (!strchr(name, '.')) {
64 name = lj_str_pushf(L, CLIB_SOEXT, name); 65 name = lj_strfmt_pushf(L, CLIB_SOEXT, name);
65 L->top--; 66 L->top--;
66#if LJ_TARGET_CYGWIN 67#if LJ_TARGET_CYGWIN
67 } else { 68 } else {
@@ -70,7 +71,7 @@ static const char *clib_extname(lua_State *L, const char *name)
70 } 71 }
71 if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] && 72 if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] &&
72 name[2] == CLIB_SOPREFIX[2])) { 73 name[2] == CLIB_SOPREFIX[2])) {
73 name = lj_str_pushf(L, CLIB_SOPREFIX "%s", name); 74 name = lj_strfmt_pushf(L, CLIB_SOPREFIX "%s", name);
74 L->top--; 75 L->top--;
75 } 76 }
76 } 77 }
@@ -158,11 +159,13 @@ BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*);
158/* Default libraries. */ 159/* Default libraries. */
159enum { 160enum {
160 CLIB_HANDLE_EXE, 161 CLIB_HANDLE_EXE,
162#if !LJ_TARGET_UWP
161 CLIB_HANDLE_DLL, 163 CLIB_HANDLE_DLL,
162 CLIB_HANDLE_CRT, 164 CLIB_HANDLE_CRT,
163 CLIB_HANDLE_KERNEL32, 165 CLIB_HANDLE_KERNEL32,
164 CLIB_HANDLE_USER32, 166 CLIB_HANDLE_USER32,
165 CLIB_HANDLE_GDI32, 167 CLIB_HANDLE_GDI32,
168#endif
166 CLIB_HANDLE_MAX 169 CLIB_HANDLE_MAX
167}; 170};
168 171
@@ -172,11 +175,19 @@ LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt,
172 const char *name) 175 const char *name)
173{ 176{
174 DWORD err = GetLastError(); 177 DWORD err = GetLastError();
178#if LJ_TARGET_XBOXONE
179 wchar_t wbuf[128];
180 char buf[128*2];
181 if (!FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM,
182 NULL, err, 0, wbuf, sizeof(wbuf)/sizeof(wchar_t), NULL) ||
183 !WideCharToMultiByte(CP_ACP, 0, wbuf, 128, buf, 128*2, NULL, NULL))
184#else
175 char buf[128]; 185 char buf[128];
176 if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM, 186 if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM,
177 NULL, err, 0, buf, sizeof(buf), NULL)) 187 NULL, err, 0, buf, sizeof(buf), NULL))
188#endif
178 buf[0] = '\0'; 189 buf[0] = '\0';
179 lj_err_callermsg(L, lj_str_pushf(L, fmt, name, buf)); 190 lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, buf));
180} 191}
181 192
182static int clib_needext(const char *s) 193static int clib_needext(const char *s)
@@ -191,7 +202,7 @@ static int clib_needext(const char *s)
191static const char *clib_extname(lua_State *L, const char *name) 202static const char *clib_extname(lua_State *L, const char *name)
192{ 203{
193 if (clib_needext(name)) { 204 if (clib_needext(name)) {
194 name = lj_str_pushf(L, "%s.dll", name); 205 name = lj_strfmt_pushf(L, "%s.dll", name);
195 L->top--; 206 L->top--;
196 } 207 }
197 return name; 208 return name;
@@ -200,7 +211,7 @@ static const char *clib_extname(lua_State *L, const char *name)
200static void *clib_loadlib(lua_State *L, const char *name, int global) 211static void *clib_loadlib(lua_State *L, const char *name, int global)
201{ 212{
202 DWORD oldwerr = GetLastError(); 213 DWORD oldwerr = GetLastError();
203 void *h = (void *)LoadLibraryA(clib_extname(L, name)); 214 void *h = LJ_WIN_LOADLIBA(clib_extname(L, name));
204 if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name); 215 if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name);
205 SetLastError(oldwerr); 216 SetLastError(oldwerr);
206 UNUSED(global); 217 UNUSED(global);
@@ -210,6 +221,7 @@ static void *clib_loadlib(lua_State *L, const char *name, int global)
210static void clib_unloadlib(CLibrary *cl) 221static void clib_unloadlib(CLibrary *cl)
211{ 222{
212 if (cl->handle == CLIB_DEFHANDLE) { 223 if (cl->handle == CLIB_DEFHANDLE) {
224#if !LJ_TARGET_UWP
213 MSize i; 225 MSize i;
214 for (i = CLIB_HANDLE_KERNEL32; i < CLIB_HANDLE_MAX; i++) { 226 for (i = CLIB_HANDLE_KERNEL32; i < CLIB_HANDLE_MAX; i++) {
215 void *h = clib_def_handle[i]; 227 void *h = clib_def_handle[i];
@@ -218,11 +230,16 @@ static void clib_unloadlib(CLibrary *cl)
218 FreeLibrary((HINSTANCE)h); 230 FreeLibrary((HINSTANCE)h);
219 } 231 }
220 } 232 }
233#endif
221 } else if (cl->handle) { 234 } else if (cl->handle) {
222 FreeLibrary((HINSTANCE)cl->handle); 235 FreeLibrary((HINSTANCE)cl->handle);
223 } 236 }
224} 237}
225 238
239#if LJ_TARGET_UWP
240EXTERN_C IMAGE_DOS_HEADER __ImageBase;
241#endif
242
226static void *clib_getsym(CLibrary *cl, const char *name) 243static void *clib_getsym(CLibrary *cl, const char *name)
227{ 244{
228 void *p = NULL; 245 void *p = NULL;
@@ -231,6 +248,9 @@ static void *clib_getsym(CLibrary *cl, const char *name)
231 for (i = 0; i < CLIB_HANDLE_MAX; i++) { 248 for (i = 0; i < CLIB_HANDLE_MAX; i++) {
232 HINSTANCE h = (HINSTANCE)clib_def_handle[i]; 249 HINSTANCE h = (HINSTANCE)clib_def_handle[i];
233 if (!(void *)h) { /* Resolve default library handles (once). */ 250 if (!(void *)h) { /* Resolve default library handles (once). */
251#if LJ_TARGET_UWP
252 h = (HINSTANCE)&__ImageBase;
253#else
234 switch (i) { 254 switch (i) {
235 case CLIB_HANDLE_EXE: GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, NULL, &h); break; 255 case CLIB_HANDLE_EXE: GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, NULL, &h); break;
236 case CLIB_HANDLE_DLL: 256 case CLIB_HANDLE_DLL:
@@ -241,11 +261,12 @@ static void *clib_getsym(CLibrary *cl, const char *name)
241 GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, 261 GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
242 (const char *)&_fmode, &h); 262 (const char *)&_fmode, &h);
243 break; 263 break;
244 case CLIB_HANDLE_KERNEL32: h = LoadLibraryA("kernel32.dll"); break; 264 case CLIB_HANDLE_KERNEL32: h = LJ_WIN_LOADLIBA("kernel32.dll"); break;
245 case CLIB_HANDLE_USER32: h = LoadLibraryA("user32.dll"); break; 265 case CLIB_HANDLE_USER32: h = LJ_WIN_LOADLIBA("user32.dll"); break;
246 case CLIB_HANDLE_GDI32: h = LoadLibraryA("gdi32.dll"); break; 266 case CLIB_HANDLE_GDI32: h = LJ_WIN_LOADLIBA("gdi32.dll"); break;
247 } 267 }
248 if (!h) continue; 268 if (!h) continue;
269#endif
249 clib_def_handle[i] = (void *)h; 270 clib_def_handle[i] = (void *)h;
250 } 271 }
251 p = (void *)GetProcAddress(h, name); 272 p = (void *)GetProcAddress(h, name);
@@ -264,7 +285,7 @@ static void *clib_getsym(CLibrary *cl, const char *name)
264LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt, 285LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt,
265 const char *name) 286 const char *name)
266{ 287{
267 lj_err_callermsg(L, lj_str_pushf(L, fmt, name, "no support for this OS")); 288 lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, "no support for this OS"));
268} 289}
269 290
270static void *clib_loadlib(lua_State *L, const char *name, int global) 291static void *clib_loadlib(lua_State *L, const char *name, int global)
@@ -329,7 +350,8 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name)
329 lj_err_callerv(L, LJ_ERR_FFI_NODECL, strdata(name)); 350 lj_err_callerv(L, LJ_ERR_FFI_NODECL, strdata(name));
330 if (ctype_isconstval(ct->info)) { 351 if (ctype_isconstval(ct->info)) {
331 CType *ctt = ctype_child(cts, ct); 352 CType *ctt = ctype_child(cts, ct);
332 lua_assert(ctype_isinteger(ctt->info) && ctt->size <= 4); 353 lj_assertCTS(ctype_isinteger(ctt->info) && ctt->size <= 4,
354 "only 32 bit const supported"); /* NYI */
333 if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0) 355 if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0)
334 setnumV(tv, (lua_Number)(uint32_t)ct->size); 356 setnumV(tv, (lua_Number)(uint32_t)ct->size);
335 else 357 else
@@ -341,14 +363,15 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name)
341#endif 363#endif
342 void *p = clib_getsym(cl, sym); 364 void *p = clib_getsym(cl, sym);
343 GCcdata *cd; 365 GCcdata *cd;
344 lua_assert(ctype_isfunc(ct->info) || ctype_isextern(ct->info)); 366 lj_assertCTS(ctype_isfunc(ct->info) || ctype_isextern(ct->info),
367 "unexpected ctype %08x in clib", ct->info);
345#if LJ_TARGET_X86 && LJ_ABI_WIN 368#if LJ_TARGET_X86 && LJ_ABI_WIN
346 /* Retry with decorated name for fastcall/stdcall functions. */ 369 /* Retry with decorated name for fastcall/stdcall functions. */
347 if (!p && ctype_isfunc(ct->info)) { 370 if (!p && ctype_isfunc(ct->info)) {
348 CTInfo cconv = ctype_cconv(ct->info); 371 CTInfo cconv = ctype_cconv(ct->info);
349 if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) { 372 if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) {
350 CTSize sz = clib_func_argsize(cts, ct); 373 CTSize sz = clib_func_argsize(cts, ct);
351 const char *symd = lj_str_pushf(L, 374 const char *symd = lj_strfmt_pushf(L,
352 cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d", 375 cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d",
353 sym, sz); 376 sym, sz);
354 L->top--; 377 L->top--;
diff --git a/src/lj_cparse.c b/src/lj_cparse.c
index ef705cb0..efe80759 100644
--- a/src/lj_cparse.c
+++ b/src/lj_cparse.c
@@ -9,13 +9,14 @@
9 9
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h" 12#include "lj_buf.h"
13#include "lj_ctype.h" 13#include "lj_ctype.h"
14#include "lj_cparse.h" 14#include "lj_cparse.h"
15#include "lj_frame.h" 15#include "lj_frame.h"
16#include "lj_vm.h" 16#include "lj_vm.h"
17#include "lj_char.h" 17#include "lj_char.h"
18#include "lj_strscan.h" 18#include "lj_strscan.h"
19#include "lj_strfmt.h"
19 20
20/* 21/*
21** Important note: this is NOT a validating C parser! This is a minimal 22** Important note: this is NOT a validating C parser! This is a minimal
@@ -27,6 +28,30 @@
27** If in doubt, please check the input against your favorite C compiler. 28** If in doubt, please check the input against your favorite C compiler.
28*/ 29*/
29 30
31#ifdef LUA_USE_ASSERT
32#define lj_assertCP(c, ...) (lj_assertG_(G(cp->L), (c), __VA_ARGS__))
33#else
34#define lj_assertCP(c, ...) ((void)cp)
35#endif
36
37/* -- Miscellaneous ------------------------------------------------------- */
38
39/* Match string against a C literal. */
40#define cp_str_is(str, k) \
41 ((str)->len == sizeof(k)-1 && !memcmp(strdata(str), k, sizeof(k)-1))
42
43/* Check string against a linear list of matches. */
44int lj_cparse_case(GCstr *str, const char *match)
45{
46 MSize len;
47 int n;
48 for (n = 0; (len = (MSize)*match++); n++, match += len) {
49 if (str->len == len && !memcmp(match, strdata(str), len))
50 return n;
51 }
52 return -1;
53}
54
30/* -- C lexer ------------------------------------------------------------- */ 55/* -- C lexer ------------------------------------------------------------- */
31 56
32/* C lexer token names. */ 57/* C lexer token names. */
@@ -42,13 +67,13 @@ LJ_NORET static void cp_err(CPState *cp, ErrMsg em);
42 67
43static const char *cp_tok2str(CPState *cp, CPToken tok) 68static const char *cp_tok2str(CPState *cp, CPToken tok)
44{ 69{
45 lua_assert(tok < CTOK_FIRSTDECL); 70 lj_assertCP(tok < CTOK_FIRSTDECL, "bad CPToken %d", tok);
46 if (tok > CTOK_OFS) 71 if (tok > CTOK_OFS)
47 return ctoknames[tok-CTOK_OFS-1]; 72 return ctoknames[tok-CTOK_OFS-1];
48 else if (!lj_char_iscntrl(tok)) 73 else if (!lj_char_iscntrl(tok))
49 return lj_str_pushf(cp->L, "%c", tok); 74 return lj_strfmt_pushf(cp->L, "%c", tok);
50 else 75 else
51 return lj_str_pushf(cp->L, "char(%d)", tok); 76 return lj_strfmt_pushf(cp->L, "char(%d)", tok);
52} 77}
53 78
54/* End-of-line? */ 79/* End-of-line? */
@@ -85,24 +110,10 @@ static LJ_NOINLINE CPChar cp_get_bs(CPState *cp)
85 return cp_get(cp); 110 return cp_get(cp);
86} 111}
87 112
88/* Grow save buffer. */
89static LJ_NOINLINE void cp_save_grow(CPState *cp, CPChar c)
90{
91 MSize newsize;
92 if (cp->sb.sz >= CPARSE_MAX_BUF/2)
93 cp_err(cp, LJ_ERR_XELEM);
94 newsize = cp->sb.sz * 2;
95 lj_str_resizebuf(cp->L, &cp->sb, newsize);
96 cp->sb.buf[cp->sb.n++] = (char)c;
97}
98
99/* Save character in buffer. */ 113/* Save character in buffer. */
100static LJ_AINLINE void cp_save(CPState *cp, CPChar c) 114static LJ_AINLINE void cp_save(CPState *cp, CPChar c)
101{ 115{
102 if (LJ_UNLIKELY(cp->sb.n + 1 > cp->sb.sz)) 116 lj_buf_putb(&cp->sb, c);
103 cp_save_grow(cp, c);
104 else
105 cp->sb.buf[cp->sb.n++] = (char)c;
106} 117}
107 118
108/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */ 119/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
@@ -122,20 +133,20 @@ LJ_NORET static void cp_errmsg(CPState *cp, CPToken tok, ErrMsg em, ...)
122 tokstr = NULL; 133 tokstr = NULL;
123 } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING || 134 } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING ||
124 tok >= CTOK_FIRSTDECL) { 135 tok >= CTOK_FIRSTDECL) {
125 if (cp->sb.n == 0) cp_save(cp, '$'); 136 if (cp->sb.w == cp->sb.b) cp_save(cp, '$');
126 cp_save(cp, '\0'); 137 cp_save(cp, '\0');
127 tokstr = cp->sb.buf; 138 tokstr = cp->sb.b;
128 } else { 139 } else {
129 tokstr = cp_tok2str(cp, tok); 140 tokstr = cp_tok2str(cp, tok);
130 } 141 }
131 L = cp->L; 142 L = cp->L;
132 va_start(argp, em); 143 va_start(argp, em);
133 msg = lj_str_pushvf(L, err2msg(em), argp); 144 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
134 va_end(argp); 145 va_end(argp);
135 if (tokstr) 146 if (tokstr)
136 msg = lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr); 147 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr);
137 if (cp->linenumber > 1) 148 if (cp->linenumber > 1)
138 msg = lj_str_pushf(L, "%s at line %d", msg, cp->linenumber); 149 msg = lj_strfmt_pushf(L, "%s at line %d", msg, cp->linenumber);
139 lj_err_callermsg(L, msg); 150 lj_err_callermsg(L, msg);
140} 151}
141 152
@@ -164,7 +175,8 @@ static CPToken cp_number(CPState *cp)
164 TValue o; 175 TValue o;
165 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); 176 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
166 cp_save(cp, '\0'); 177 cp_save(cp, '\0');
167 fmt = lj_strscan_scan((const uint8_t *)cp->sb.buf, &o, STRSCAN_OPT_C); 178 fmt = lj_strscan_scan((const uint8_t *)(cp->sb.b), sbuflen(&cp->sb)-1,
179 &o, STRSCAN_OPT_C);
168 if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32; 180 if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
169 else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32; 181 else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
170 else if (!(cp->mode & CPARSE_MODE_SKIP)) 182 else if (!(cp->mode & CPARSE_MODE_SKIP))
@@ -177,7 +189,7 @@ static CPToken cp_number(CPState *cp)
177static CPToken cp_ident(CPState *cp) 189static CPToken cp_ident(CPState *cp)
178{ 190{
179 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); 191 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
180 cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n); 192 cp->str = lj_buf_str(cp->L, &cp->sb);
181 cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask); 193 cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask);
182 if (ctype_type(cp->ct->info) == CT_KW) 194 if (ctype_type(cp->ct->info) == CT_KW)
183 return ctype_cid(cp->ct->info); 195 return ctype_cid(cp->ct->info);
@@ -263,11 +275,11 @@ static CPToken cp_string(CPState *cp)
263 } 275 }
264 cp_get(cp); 276 cp_get(cp);
265 if (delim == '"') { 277 if (delim == '"') {
266 cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n); 278 cp->str = lj_buf_str(cp->L, &cp->sb);
267 return CTOK_STRING; 279 return CTOK_STRING;
268 } else { 280 } else {
269 if (cp->sb.n != 1) cp_err_token(cp, '\''); 281 if (sbuflen(&cp->sb) != 1) cp_err_token(cp, '\'');
270 cp->val.i32 = (int32_t)(char)cp->sb.buf[0]; 282 cp->val.i32 = (int32_t)(char)*cp->sb.b;
271 cp->val.id = CTID_INT32; 283 cp->val.id = CTID_INT32;
272 return CTOK_INTEGER; 284 return CTOK_INTEGER;
273 } 285 }
@@ -296,7 +308,7 @@ static void cp_comment_cpp(CPState *cp)
296/* Lexical scanner for C. Only a minimal subset is implemented. */ 308/* Lexical scanner for C. Only a minimal subset is implemented. */
297static CPToken cp_next_(CPState *cp) 309static CPToken cp_next_(CPState *cp)
298{ 310{
299 lj_str_resetbuf(&cp->sb); 311 lj_buf_reset(&cp->sb);
300 for (;;) { 312 for (;;) {
301 if (lj_char_isident(cp->c)) 313 if (lj_char_isident(cp->c))
302 return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp); 314 return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp);
@@ -385,9 +397,8 @@ static void cp_init(CPState *cp)
385 cp->depth = 0; 397 cp->depth = 0;
386 cp->curpack = 0; 398 cp->curpack = 0;
387 cp->packstack[0] = 255; 399 cp->packstack[0] = 255;
388 lj_str_initbuf(&cp->sb); 400 lj_buf_init(cp->L, &cp->sb);
389 lj_str_resizebuf(cp->L, &cp->sb, LJ_MIN_SBUF); 401 lj_assertCP(cp->p != NULL, "uninitialized cp->p");
390 lua_assert(cp->p != NULL);
391 cp_get(cp); /* Read-ahead first char. */ 402 cp_get(cp); /* Read-ahead first char. */
392 cp->tok = 0; 403 cp->tok = 0;
393 cp->tmask = CPNS_DEFAULT; 404 cp->tmask = CPNS_DEFAULT;
@@ -398,7 +409,7 @@ static void cp_init(CPState *cp)
398static void cp_cleanup(CPState *cp) 409static void cp_cleanup(CPState *cp)
399{ 410{
400 global_State *g = G(cp->L); 411 global_State *g = G(cp->L);
401 lj_str_freebuf(g, &cp->sb); 412 lj_buf_free(g, &cp->sb);
402} 413}
403 414
404/* Check and consume optional token. */ 415/* Check and consume optional token. */
@@ -848,12 +859,13 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl)
848 /* The cid is already part of info for copies of pointers/functions. */ 859 /* The cid is already part of info for copies of pointers/functions. */
849 idx = ct->next; 860 idx = ct->next;
850 if (ctype_istypedef(info)) { 861 if (ctype_istypedef(info)) {
851 lua_assert(id == 0); 862 lj_assertCP(id == 0, "typedef not at toplevel");
852 id = ctype_cid(info); 863 id = ctype_cid(info);
853 /* Always refetch info/size, since struct/enum may have been completed. */ 864 /* Always refetch info/size, since struct/enum may have been completed. */
854 cinfo = ctype_get(cp->cts, id)->info; 865 cinfo = ctype_get(cp->cts, id)->info;
855 csize = ctype_get(cp->cts, id)->size; 866 csize = ctype_get(cp->cts, id)->size;
856 lua_assert(ctype_isstruct(cinfo) || ctype_isenum(cinfo)); 867 lj_assertCP(ctype_isstruct(cinfo) || ctype_isenum(cinfo),
868 "typedef of bad type");
857 } else if (ctype_isfunc(info)) { /* Intern function. */ 869 } else if (ctype_isfunc(info)) { /* Intern function. */
858 CType *fct; 870 CType *fct;
859 CTypeID fid; 871 CTypeID fid;
@@ -886,7 +898,7 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl)
886 /* Inherit csize/cinfo from original type. */ 898 /* Inherit csize/cinfo from original type. */
887 } else { 899 } else {
888 if (ctype_isnum(info)) { /* Handle mode/vector-size attributes. */ 900 if (ctype_isnum(info)) { /* Handle mode/vector-size attributes. */
889 lua_assert(id == 0); 901 lj_assertCP(id == 0, "number not at toplevel");
890 if (!(info & CTF_BOOL)) { 902 if (!(info & CTF_BOOL)) {
891 CTSize msize = ctype_msizeP(decl->attr); 903 CTSize msize = ctype_msizeP(decl->attr);
892 CTSize vsize = ctype_vsizeP(decl->attr); 904 CTSize vsize = ctype_vsizeP(decl->attr);
@@ -941,7 +953,7 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl)
941 info = (info & ~CTF_ALIGN) | (cinfo & CTF_ALIGN); 953 info = (info & ~CTF_ALIGN) | (cinfo & CTF_ALIGN);
942 info |= (cinfo & CTF_QUAL); /* Inherit qual. */ 954 info |= (cinfo & CTF_QUAL); /* Inherit qual. */
943 } else { 955 } else {
944 lua_assert(ctype_isvoid(info)); 956 lj_assertCP(ctype_isvoid(info), "bad ctype %08x", info);
945 } 957 }
946 csize = size; 958 csize = size;
947 cinfo = info+id; 959 cinfo = info+id;
@@ -953,8 +965,6 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl)
953 965
954/* -- C declaration parser ------------------------------------------------ */ 966/* -- C declaration parser ------------------------------------------------ */
955 967
956#define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be)
957
958/* Reset declaration state to declaration specifier. */ 968/* Reset declaration state to declaration specifier. */
959static void cp_decl_reset(CPDecl *decl) 969static void cp_decl_reset(CPDecl *decl)
960{ 970{
@@ -1031,7 +1041,7 @@ static void cp_decl_asm(CPState *cp, CPDecl *decl)
1031 if (cp->tok == CTOK_STRING) { 1041 if (cp->tok == CTOK_STRING) {
1032 GCstr *str = cp->str; 1042 GCstr *str = cp->str;
1033 while (cp_next(cp) == CTOK_STRING) { 1043 while (cp_next(cp) == CTOK_STRING) {
1034 lj_str_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str)); 1044 lj_strfmt_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str));
1035 cp->L->top--; 1045 cp->L->top--;
1036 str = strV(cp->L->top); 1046 str = strV(cp->L->top);
1037 } 1047 }
@@ -1083,44 +1093,57 @@ static void cp_decl_gccattribute(CPState *cp, CPDecl *decl)
1083 if (cp->tok == CTOK_IDENT) { 1093 if (cp->tok == CTOK_IDENT) {
1084 GCstr *attrstr = cp->str; 1094 GCstr *attrstr = cp->str;
1085 cp_next(cp); 1095 cp_next(cp);
1086 switch (attrstr->hash) { 1096 switch (lj_cparse_case(attrstr,
1087 case H_(64a9208e,8ce14319): case H_(8e6331b2,95a282af): /* aligned */ 1097 "\007aligned" "\013__aligned__"
1098 "\006packed" "\012__packed__"
1099 "\004mode" "\010__mode__"
1100 "\013vector_size" "\017__vector_size__"
1101#if LJ_TARGET_X86
1102 "\007regparm" "\013__regparm__"
1103 "\005cdecl" "\011__cdecl__"
1104 "\010thiscall" "\014__thiscall__"
1105 "\010fastcall" "\014__fastcall__"
1106 "\007stdcall" "\013__stdcall__"
1107 "\012sseregparm" "\016__sseregparm__"
1108#endif
1109 )) {
1110 case 0: case 1: /* aligned */
1088 cp_decl_align(cp, decl); 1111 cp_decl_align(cp, decl);
1089 break; 1112 break;
1090 case H_(42eb47de,f0ede26c): case H_(29f48a09,cf383e0c): /* packed */ 1113 case 2: case 3: /* packed */
1091 decl->attr |= CTFP_PACKED; 1114 decl->attr |= CTFP_PACKED;
1092 break; 1115 break;
1093 case H_(0a84eef6,8dfab04c): case H_(995cf92c,d5696591): /* mode */ 1116 case 4: case 5: /* mode */
1094 cp_decl_mode(cp, decl); 1117 cp_decl_mode(cp, decl);
1095 break; 1118 break;
1096 case H_(0ab31997,2d5213fa): case H_(bf875611,200e9990): /* vector_size */ 1119 case 6: case 7: /* vector_size */
1097 { 1120 {
1098 CTSize vsize = cp_decl_sizeattr(cp); 1121 CTSize vsize = cp_decl_sizeattr(cp);
1099 if (vsize) CTF_INSERT(decl->attr, VSIZEP, lj_fls(vsize)); 1122 if (vsize) CTF_INSERT(decl->attr, VSIZEP, lj_fls(vsize));
1100 } 1123 }
1101 break; 1124 break;
1102#if LJ_TARGET_X86 1125#if LJ_TARGET_X86
1103 case H_(5ad22db8,c689b848): case H_(439150fa,65ea78cb): /* regparm */ 1126 case 8: case 9: /* regparm */
1104 CTF_INSERT(decl->fattr, REGPARM, cp_decl_sizeattr(cp)); 1127 CTF_INSERT(decl->fattr, REGPARM, cp_decl_sizeattr(cp));
1105 decl->fattr |= CTFP_CCONV; 1128 decl->fattr |= CTFP_CCONV;
1106 break; 1129 break;
1107 case H_(18fc0b98,7ff4c074): case H_(4e62abed,0a747424): /* cdecl */ 1130 case 10: case 11: /* cdecl */
1108 CTF_INSERT(decl->fattr, CCONV, CTCC_CDECL); 1131 CTF_INSERT(decl->fattr, CCONV, CTCC_CDECL);
1109 decl->fattr |= CTFP_CCONV; 1132 decl->fattr |= CTFP_CCONV;
1110 break; 1133 break;
1111 case H_(72b2e41b,494c5a44): case H_(f2356d59,f25fc9bd): /* thiscall */ 1134 case 12: case 13: /* thiscall */
1112 CTF_INSERT(decl->fattr, CCONV, CTCC_THISCALL); 1135 CTF_INSERT(decl->fattr, CCONV, CTCC_THISCALL);
1113 decl->fattr |= CTFP_CCONV; 1136 decl->fattr |= CTFP_CCONV;
1114 break; 1137 break;
1115 case H_(0d0ffc42,ab746f88): case H_(21c54ba1,7f0ca7e3): /* fastcall */ 1138 case 14: case 15: /* fastcall */
1116 CTF_INSERT(decl->fattr, CCONV, CTCC_FASTCALL); 1139 CTF_INSERT(decl->fattr, CCONV, CTCC_FASTCALL);
1117 decl->fattr |= CTFP_CCONV; 1140 decl->fattr |= CTFP_CCONV;
1118 break; 1141 break;
1119 case H_(ef76b040,9412e06a): case H_(de56697b,c750e6e1): /* stdcall */ 1142 case 16: case 17: /* stdcall */
1120 CTF_INSERT(decl->fattr, CCONV, CTCC_STDCALL); 1143 CTF_INSERT(decl->fattr, CCONV, CTCC_STDCALL);
1121 decl->fattr |= CTFP_CCONV; 1144 decl->fattr |= CTFP_CCONV;
1122 break; 1145 break;
1123 case H_(ea78b622,f234bd8e): case H_(252ffb06,8d50f34b): /* sseregparm */ 1146 case 18: case 19: /* sseregparm */
1124 decl->fattr |= CTF_SSEREGPARM; 1147 decl->fattr |= CTF_SSEREGPARM;
1125 decl->fattr |= CTFP_CCONV; 1148 decl->fattr |= CTFP_CCONV;
1126 break; 1149 break;
@@ -1152,16 +1175,13 @@ static void cp_decl_msvcattribute(CPState *cp, CPDecl *decl)
1152 while (cp->tok == CTOK_IDENT) { 1175 while (cp->tok == CTOK_IDENT) {
1153 GCstr *attrstr = cp->str; 1176 GCstr *attrstr = cp->str;
1154 cp_next(cp); 1177 cp_next(cp);
1155 switch (attrstr->hash) { 1178 if (cp_str_is(attrstr, "align")) {
1156 case H_(bc2395fa,98f267f8): /* align */
1157 cp_decl_align(cp, decl); 1179 cp_decl_align(cp, decl);
1158 break; 1180 } else { /* Ignore all other attributes. */
1159 default: /* Ignore all other attributes. */
1160 if (cp_opt(cp, '(')) { 1181 if (cp_opt(cp, '(')) {
1161 while (cp->tok != ')' && cp->tok != CTOK_EOF) cp_next(cp); 1182 while (cp->tok != ')' && cp->tok != CTOK_EOF) cp_next(cp);
1162 cp_check(cp, ')'); 1183 cp_check(cp, ')');
1163 } 1184 }
1164 break;
1165 } 1185 }
1166 } 1186 }
1167 cp_check(cp, ')'); 1187 cp_check(cp, ')');
@@ -1572,7 +1592,7 @@ end_decl:
1572 cp_errmsg(cp, cp->tok, LJ_ERR_FFI_DECLSPEC); 1592 cp_errmsg(cp, cp->tok, LJ_ERR_FFI_DECLSPEC);
1573 sz = sizeof(int); 1593 sz = sizeof(int);
1574 } 1594 }
1575 lua_assert(sz != 0); 1595 lj_assertCP(sz != 0, "basic ctype with zero size");
1576 info += CTALIGN(lj_fls(sz)); /* Use natural alignment. */ 1596 info += CTALIGN(lj_fls(sz)); /* Use natural alignment. */
1577 info += (decl->attr & CTF_QUAL); /* Merge qualifiers. */ 1597 info += (decl->attr & CTF_QUAL); /* Merge qualifiers. */
1578 cp_push(decl, info, sz); 1598 cp_push(decl, info, sz);
@@ -1741,17 +1761,16 @@ static CTypeID cp_decl_abstract(CPState *cp)
1741static void cp_pragma(CPState *cp, BCLine pragmaline) 1761static void cp_pragma(CPState *cp, BCLine pragmaline)
1742{ 1762{
1743 cp_next(cp); 1763 cp_next(cp);
1744 if (cp->tok == CTOK_IDENT && 1764 if (cp->tok == CTOK_IDENT && cp_str_is(cp->str, "pack")) {
1745 cp->str->hash == H_(e79b999f,42ca3e85)) { /* pack */
1746 cp_next(cp); 1765 cp_next(cp);
1747 cp_check(cp, '('); 1766 cp_check(cp, '(');
1748 if (cp->tok == CTOK_IDENT) { 1767 if (cp->tok == CTOK_IDENT) {
1749 if (cp->str->hash == H_(738e923c,a1b65954)) { /* push */ 1768 if (cp_str_is(cp->str, "push")) {
1750 if (cp->curpack < CPARSE_MAX_PACKSTACK) { 1769 if (cp->curpack < CPARSE_MAX_PACKSTACK) {
1751 cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack]; 1770 cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack];
1752 cp->curpack++; 1771 cp->curpack++;
1753 } 1772 }
1754 } else if (cp->str->hash == H_(6c71cf27,6c71cf27)) { /* pop */ 1773 } else if (cp_str_is(cp->str, "pop")) {
1755 if (cp->curpack > 0) cp->curpack--; 1774 if (cp->curpack > 0) cp->curpack--;
1756 } else { 1775 } else {
1757 cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL); 1776 cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL);
@@ -1773,6 +1792,16 @@ static void cp_pragma(CPState *cp, BCLine pragmaline)
1773 } 1792 }
1774} 1793}
1775 1794
1795/* Handle line number. */
1796static void cp_line(CPState *cp, BCLine hashline)
1797{
1798 BCLine newline = cp->val.u32;
1799 /* TODO: Handle file name and include it in error messages. */
1800 while (cp->tok != CTOK_EOF && cp->linenumber == hashline)
1801 cp_next(cp);
1802 cp->linenumber = newline;
1803}
1804
1776/* Parse multiple C declarations of types or extern identifiers. */ 1805/* Parse multiple C declarations of types or extern identifiers. */
1777static void cp_decl_multi(CPState *cp) 1806static void cp_decl_multi(CPState *cp)
1778{ 1807{
@@ -1785,12 +1814,21 @@ static void cp_decl_multi(CPState *cp)
1785 continue; 1814 continue;
1786 } 1815 }
1787 if (cp->tok == '#') { /* Workaround, since we have no preprocessor, yet. */ 1816 if (cp->tok == '#') { /* Workaround, since we have no preprocessor, yet. */
1788 BCLine pragmaline = cp->linenumber; 1817 BCLine hashline = cp->linenumber;
1789 if (!(cp_next(cp) == CTOK_IDENT && 1818 CPToken tok = cp_next(cp);
1790 cp->str->hash == H_(f5e6b4f8,1d509107))) /* pragma */ 1819 if (tok == CTOK_INTEGER) {
1820 cp_line(cp, hashline);
1821 continue;
1822 } else if (tok == CTOK_IDENT && cp_str_is(cp->str, "line")) {
1823 if (cp_next(cp) != CTOK_INTEGER) cp_err_token(cp, tok);
1824 cp_line(cp, hashline);
1825 continue;
1826 } else if (tok == CTOK_IDENT && cp_str_is(cp->str, "pragma")) {
1827 cp_pragma(cp, hashline);
1828 continue;
1829 } else {
1791 cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL); 1830 cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL);
1792 cp_pragma(cp, pragmaline); 1831 }
1793 continue;
1794 } 1832 }
1795 scl = cp_decl_spec(cp, &decl, CDF_TYPEDEF|CDF_EXTERN|CDF_STATIC); 1833 scl = cp_decl_spec(cp, &decl, CDF_TYPEDEF|CDF_EXTERN|CDF_STATIC);
1796 if ((cp->tok == ';' || cp->tok == CTOK_EOF) && 1834 if ((cp->tok == ';' || cp->tok == CTOK_EOF) &&
@@ -1814,7 +1852,7 @@ static void cp_decl_multi(CPState *cp)
1814 /* Treat both static and extern function declarations as extern. */ 1852 /* Treat both static and extern function declarations as extern. */
1815 ct = ctype_get(cp->cts, ctypeid); 1853 ct = ctype_get(cp->cts, ctypeid);
1816 /* We always get new anonymous functions (typedefs are copied). */ 1854 /* We always get new anonymous functions (typedefs are copied). */
1817 lua_assert(gcref(ct->name) == NULL); 1855 lj_assertCP(gcref(ct->name) == NULL, "unexpected named function");
1818 id = ctypeid; /* Just name it. */ 1856 id = ctypeid; /* Just name it. */
1819 } else if ((scl & CDF_STATIC)) { /* Accept static constants. */ 1857 } else if ((scl & CDF_STATIC)) { /* Accept static constants. */
1820 id = cp_decl_constinit(cp, &ct, ctypeid); 1858 id = cp_decl_constinit(cp, &ct, ctypeid);
@@ -1856,8 +1894,6 @@ static void cp_decl_single(CPState *cp)
1856 if (cp->tok != CTOK_EOF) cp_err_token(cp, CTOK_EOF); 1894 if (cp->tok != CTOK_EOF) cp_err_token(cp, CTOK_EOF);
1857} 1895}
1858 1896
1859#undef H_
1860
1861/* ------------------------------------------------------------------------ */ 1897/* ------------------------------------------------------------------------ */
1862 1898
1863/* Protected callback for C parser. */ 1899/* Protected callback for C parser. */
@@ -1873,7 +1909,7 @@ static TValue *cpcparser(lua_State *L, lua_CFunction dummy, void *ud)
1873 cp_decl_single(cp); 1909 cp_decl_single(cp);
1874 if (cp->param && cp->param != cp->L->top) 1910 if (cp->param && cp->param != cp->L->top)
1875 cp_err(cp, LJ_ERR_FFI_NUMPARAM); 1911 cp_err(cp, LJ_ERR_FFI_NUMPARAM);
1876 lua_assert(cp->depth == 0); 1912 lj_assertCP(cp->depth == 0, "unbalanced cparser declaration depth");
1877 return NULL; 1913 return NULL;
1878} 1914}
1879 1915
diff --git a/src/lj_cparse.h b/src/lj_cparse.h
index 81aa3839..fd88a9f4 100644
--- a/src/lj_cparse.h
+++ b/src/lj_cparse.h
@@ -60,6 +60,8 @@ typedef struct CPState {
60 60
61LJ_FUNC int lj_cparse(CPState *cp); 61LJ_FUNC int lj_cparse(CPState *cp);
62 62
63LJ_FUNC int lj_cparse_case(GCstr *str, const char *match);
64
63#endif 65#endif
64 66
65#endif 67#endif
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index 70305069..95850611 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -11,13 +11,13 @@
11#if LJ_HASJIT && LJ_HASFFI 11#if LJ_HASJIT && LJ_HASFFI
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_str.h"
15#include "lj_tab.h" 14#include "lj_tab.h"
16#include "lj_frame.h" 15#include "lj_frame.h"
17#include "lj_ctype.h" 16#include "lj_ctype.h"
18#include "lj_cdata.h" 17#include "lj_cdata.h"
19#include "lj_cparse.h" 18#include "lj_cparse.h"
20#include "lj_cconv.h" 19#include "lj_cconv.h"
20#include "lj_carith.h"
21#include "lj_clib.h" 21#include "lj_clib.h"
22#include "lj_ccall.h" 22#include "lj_ccall.h"
23#include "lj_ff.h" 23#include "lj_ff.h"
@@ -31,6 +31,7 @@
31#include "lj_snap.h" 31#include "lj_snap.h"
32#include "lj_crecord.h" 32#include "lj_crecord.h"
33#include "lj_dispatch.h" 33#include "lj_dispatch.h"
34#include "lj_strfmt.h"
34 35
35/* Some local macros to save typing. Undef'd at the end. */ 36/* Some local macros to save typing. Undef'd at the end. */
36#define IR(ref) (&J->cur.ir[(ref)]) 37#define IR(ref) (&J->cur.ir[(ref)])
@@ -60,7 +61,8 @@ static GCcdata *argv2cdata(jit_State *J, TRef tr, cTValue *o)
60static CTypeID crec_constructor(jit_State *J, GCcdata *cd, TRef tr) 61static CTypeID crec_constructor(jit_State *J, GCcdata *cd, TRef tr)
61{ 62{
62 CTypeID id; 63 CTypeID id;
63 lua_assert(tref_iscdata(tr) && cd->ctypeid == CTID_CTYPEID); 64 lj_assertJ(tref_iscdata(tr) && cd->ctypeid == CTID_CTYPEID,
65 "expected CTypeID cdata");
64 id = *(CTypeID *)cdataptr(cd); 66 id = *(CTypeID *)cdataptr(cd);
65 tr = emitir(IRT(IR_FLOAD, IRT_INT), tr, IRFL_CDATA_INT); 67 tr = emitir(IRT(IR_FLOAD, IRT_INT), tr, IRFL_CDATA_INT);
66 emitir(IRTG(IR_EQ, IRT_INT), tr, lj_ir_kint(J, (int32_t)id)); 68 emitir(IRTG(IR_EQ, IRT_INT), tr, lj_ir_kint(J, (int32_t)id));
@@ -211,7 +213,7 @@ static void crec_copy_emit(jit_State *J, CRecMemList *ml, MSize mlp,
211 ml[i].trval = emitir(IRT(IR_XLOAD, ml[i].tp), trsptr, 0); 213 ml[i].trval = emitir(IRT(IR_XLOAD, ml[i].tp), trsptr, 0);
212 ml[i].trofs = trofs; 214 ml[i].trofs = trofs;
213 i++; 215 i++;
214 rwin += (LJ_SOFTFP && ml[i].tp == IRT_NUM) ? 2 : 1; 216 rwin += (LJ_SOFTFP32 && ml[i].tp == IRT_NUM) ? 2 : 1;
215 if (rwin >= CREC_COPY_REGWIN || i >= mlp) { /* Flush buffered stores. */ 217 if (rwin >= CREC_COPY_REGWIN || i >= mlp) { /* Flush buffered stores. */
216 rwin = 0; 218 rwin = 0;
217 for ( ; j < i; j++) { 219 for ( ; j < i; j++) {
@@ -236,13 +238,14 @@ static void crec_copy(jit_State *J, TRef trdst, TRef trsrc, TRef trlen,
236 if (len > CREC_COPY_MAXLEN) goto fallback; 238 if (len > CREC_COPY_MAXLEN) goto fallback;
237 if (ct) { 239 if (ct) {
238 CTState *cts = ctype_ctsG(J2G(J)); 240 CTState *cts = ctype_ctsG(J2G(J));
239 lua_assert(ctype_isarray(ct->info) || ctype_isstruct(ct->info)); 241 lj_assertJ(ctype_isarray(ct->info) || ctype_isstruct(ct->info),
242 "copy of non-aggregate");
240 if (ctype_isarray(ct->info)) { 243 if (ctype_isarray(ct->info)) {
241 CType *cct = ctype_rawchild(cts, ct); 244 CType *cct = ctype_rawchild(cts, ct);
242 tp = crec_ct2irt(cts, cct); 245 tp = crec_ct2irt(cts, cct);
243 if (tp == IRT_CDATA) goto rawcopy; 246 if (tp == IRT_CDATA) goto rawcopy;
244 step = lj_ir_type_size[tp]; 247 step = lj_ir_type_size[tp];
245 lua_assert((len & (step-1)) == 0); 248 lj_assertJ((len & (step-1)) == 0, "copy of fractional size");
246 } else if ((ct->info & CTF_UNION)) { 249 } else if ((ct->info & CTF_UNION)) {
247 step = (1u << ctype_align(ct->info)); 250 step = (1u << ctype_align(ct->info));
248 goto rawcopy; 251 goto rawcopy;
@@ -441,7 +444,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
441 /* fallthrough */ 444 /* fallthrough */
442 case CCX(I, F): 445 case CCX(I, F):
443 if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; 446 if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi;
444 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC|IRCONV_ANY); 447 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY);
445 goto xstore; 448 goto xstore;
446 case CCX(I, P): 449 case CCX(I, P):
447 case CCX(I, A): 450 case CCX(I, A):
@@ -521,7 +524,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
521 if (st == IRT_CDATA) goto err_nyi; 524 if (st == IRT_CDATA) goto err_nyi;
522 /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ 525 /* The signed conversion is cheaper. x64 really has 47 bit pointers. */
523 sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, 526 sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32,
524 st, IRCONV_TRUNC|IRCONV_ANY); 527 st, IRCONV_ANY);
525 goto xstore; 528 goto xstore;
526 529
527 /* Destination is an array. */ 530 /* Destination is an array. */
@@ -613,10 +616,12 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval)
613 sp = lj_ir_kptr(J, NULL); 616 sp = lj_ir_kptr(J, NULL);
614 } else if (tref_isudata(sp)) { 617 } else if (tref_isudata(sp)) {
615 GCudata *ud = udataV(sval); 618 GCudata *ud = udataV(sval);
616 if (ud->udtype == UDTYPE_IO_FILE) { 619 if (ud->udtype == UDTYPE_IO_FILE || ud->udtype == UDTYPE_BUFFER) {
617 TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), sp, IRFL_UDATA_UDTYPE); 620 TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), sp, IRFL_UDATA_UDTYPE);
618 emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, UDTYPE_IO_FILE)); 621 emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, ud->udtype));
619 sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp, IRFL_UDATA_FILE); 622 sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp,
623 ud->udtype == UDTYPE_IO_FILE ? IRFL_UDATA_FILE :
624 IRFL_SBUF_R);
620 } else { 625 } else {
621 sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCudata))); 626 sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCudata)));
622 } 627 }
@@ -628,7 +633,8 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval)
628 /* Specialize to the name of the enum constant. */ 633 /* Specialize to the name of the enum constant. */
629 emitir(IRTG(IR_EQ, IRT_STR), sp, lj_ir_kstr(J, str)); 634 emitir(IRTG(IR_EQ, IRT_STR), sp, lj_ir_kstr(J, str));
630 if (cct && ctype_isconstval(cct->info)) { 635 if (cct && ctype_isconstval(cct->info)) {
631 lua_assert(ctype_child(cts, cct)->size == 4); 636 lj_assertJ(ctype_child(cts, cct)->size == 4,
637 "only 32 bit const supported"); /* NYI */
632 svisnz = (void *)(intptr_t)(ofs != 0); 638 svisnz = (void *)(intptr_t)(ofs != 0);
633 sp = lj_ir_kint(J, (int32_t)ofs); 639 sp = lj_ir_kint(J, (int32_t)ofs);
634 sid = ctype_cid(cct->info); 640 sid = ctype_cid(cct->info);
@@ -640,12 +646,22 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval)
640 sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCstr))); 646 sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCstr)));
641 sid = CTID_A_CCHAR; 647 sid = CTID_A_CCHAR;
642 } 648 }
643 } else { /* NYI: tref_istab(sp), tref_islightud(sp). */ 649 } else if (tref_islightud(sp)) {
650#if LJ_64
651 lj_trace_err(J, LJ_TRERR_NYICONV);
652#endif
653 } else { /* NYI: tref_istab(sp). */
644 IRType t; 654 IRType t;
645 sid = argv2cdata(J, sp, sval)->ctypeid; 655 sid = argv2cdata(J, sp, sval)->ctypeid;
646 s = ctype_raw(cts, sid); 656 s = ctype_raw(cts, sid);
647 svisnz = cdataptr(cdataV(sval)); 657 svisnz = cdataptr(cdataV(sval));
648 t = crec_ct2irt(cts, s); 658 if (ctype_isfunc(s->info)) {
659 sid = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|sid), CTSIZE_PTR);
660 s = ctype_get(cts, sid);
661 t = IRT_PTR;
662 } else {
663 t = crec_ct2irt(cts, s);
664 }
649 if (ctype_isptr(s->info)) { 665 if (ctype_isptr(s->info)) {
650 sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_PTR); 666 sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_PTR);
651 if (ctype_isref(s->info)) { 667 if (ctype_isref(s->info)) {
@@ -700,6 +716,19 @@ static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz)
700 return tr; 716 return tr;
701} 717}
702 718
719/* Tailcall to function. */
720static void crec_tailcall(jit_State *J, RecordFFData *rd, cTValue *tv)
721{
722 TRef kfunc = lj_ir_kfunc(J, funcV(tv));
723#if LJ_FR2
724 J->base[-2] = kfunc;
725 J->base[-1] = TREF_FRAME;
726#else
727 J->base[-1] = kfunc | TREF_FRAME;
728#endif
729 rd->nres = -1; /* Pending tailcall. */
730}
731
703/* Record ctype __index/__newindex metamethods. */ 732/* Record ctype __index/__newindex metamethods. */
704static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, 733static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
705 RecordFFData *rd) 734 RecordFFData *rd)
@@ -709,8 +738,7 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
709 if (!tv) 738 if (!tv)
710 lj_trace_err(J, LJ_TRERR_BADTYPE); 739 lj_trace_err(J, LJ_TRERR_BADTYPE);
711 if (tvisfunc(tv)) { 740 if (tvisfunc(tv)) {
712 J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; 741 crec_tailcall(J, rd, tv);
713 rd->nres = -1; /* Pending tailcall. */
714 } else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) { 742 } else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) {
715 /* Specialize to result of __index lookup. */ 743 /* Specialize to result of __index lookup. */
716 cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]); 744 cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]);
@@ -727,6 +755,48 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
727 } 755 }
728} 756}
729 757
758/* Record bitfield load/store. */
759static void crec_index_bf(jit_State *J, RecordFFData *rd, TRef ptr, CTInfo info)
760{
761 IRType t = IRT_I8 + 2*lj_fls(ctype_bitcsz(info)) + ((info&CTF_UNSIGNED)?1:0);
762 TRef tr = emitir(IRT(IR_XLOAD, t), ptr, 0);
763 CTSize pos = ctype_bitpos(info), bsz = ctype_bitbsz(info), shift = 32 - bsz;
764 lj_assertJ(t <= IRT_U32, "only 32 bit bitfields supported"); /* NYI */
765 if (rd->data == 0) { /* __index metamethod. */
766 if ((info & CTF_BOOL)) {
767 tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << pos))));
768 /* Assume not equal to zero. Fixup and emit pending guard later. */
769 lj_ir_set(J, IRTGI(IR_NE), tr, lj_ir_kint(J, 0));
770 J->postproc = LJ_POST_FIXGUARD;
771 tr = TREF_TRUE;
772 } else if (!(info & CTF_UNSIGNED)) {
773 tr = emitir(IRTI(IR_BSHL), tr, lj_ir_kint(J, shift - pos));
774 tr = emitir(IRTI(IR_BSAR), tr, lj_ir_kint(J, shift));
775 } else {
776 lj_assertJ(bsz < 32, "unexpected full bitfield index");
777 tr = emitir(IRTI(IR_BSHR), tr, lj_ir_kint(J, pos));
778 tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << bsz)-1)));
779 /* We can omit the U32 to NUM conversion, since bsz < 32. */
780 }
781 J->base[0] = tr;
782 } else { /* __newindex metamethod. */
783 CTState *cts = ctype_ctsG(J2G(J));
784 CType *ct = ctype_get(cts,
785 (info & CTF_BOOL) ? CTID_BOOL :
786 (info & CTF_UNSIGNED) ? CTID_UINT32 : CTID_INT32);
787 int32_t mask = (int32_t)(((1u << bsz)-1) << pos);
788 TRef sp = crec_ct_tv(J, ct, 0, J->base[2], &rd->argv[2]);
789 sp = emitir(IRTI(IR_BSHL), sp, lj_ir_kint(J, pos));
790 /* Use of the target type avoids forwarding conversions. */
791 sp = emitir(IRT(IR_BAND, t), sp, lj_ir_kint(J, mask));
792 tr = emitir(IRT(IR_BAND, t), tr, lj_ir_kint(J, (int32_t)~mask));
793 tr = emitir(IRT(IR_BOR, t), tr, sp);
794 emitir(IRT(IR_XSTORE, t), ptr, tr);
795 rd->nres = 0;
796 J->needsnap = 1;
797 }
798}
799
730void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) 800void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd)
731{ 801{
732 TRef idx, ptr = J->base[0]; 802 TRef idx, ptr = J->base[0];
@@ -801,6 +871,7 @@ again:
801 CType *fct; 871 CType *fct;
802 fct = lj_ctype_getfield(cts, ct, name, &fofs); 872 fct = lj_ctype_getfield(cts, ct, name, &fofs);
803 if (fct) { 873 if (fct) {
874 ofs += (ptrdiff_t)fofs;
804 /* Always specialize to the field name. */ 875 /* Always specialize to the field name. */
805 emitir(IRTG(IR_EQ, IRT_STR), idx, lj_ir_kstr(J, name)); 876 emitir(IRTG(IR_EQ, IRT_STR), idx, lj_ir_kstr(J, name));
806 if (ctype_isconstval(fct->info)) { 877 if (ctype_isconstval(fct->info)) {
@@ -812,12 +883,14 @@ again:
812 J->base[0] = lj_ir_kint(J, (int32_t)fct->size); 883 J->base[0] = lj_ir_kint(J, (int32_t)fct->size);
813 return; /* Interpreter will throw for newindex. */ 884 return; /* Interpreter will throw for newindex. */
814 } else if (ctype_isbitfield(fct->info)) { 885 } else if (ctype_isbitfield(fct->info)) {
815 lj_trace_err(J, LJ_TRERR_NYICONV); 886 if (ofs)
887 ptr = emitir(IRT(IR_ADD, IRT_PTR), ptr, lj_ir_kintp(J, ofs));
888 crec_index_bf(J, rd, ptr, fct->info);
889 return;
816 } else { 890 } else {
817 lua_assert(ctype_isfield(fct->info)); 891 lj_assertJ(ctype_isfield(fct->info), "field expected");
818 sid = ctype_cid(fct->info); 892 sid = ctype_cid(fct->info);
819 } 893 }
820 ofs += (ptrdiff_t)fofs;
821 } 894 }
822 } else if (ctype_iscomplex(ct->info)) { 895 } else if (ctype_iscomplex(ct->info)) {
823 if (name->len == 2 && 896 if (name->len == 2 &&
@@ -867,21 +940,17 @@ again:
867} 940}
868 941
869/* Record setting a finalizer. */ 942/* Record setting a finalizer. */
870static void crec_finalizer(jit_State *J, TRef trcd, cTValue *fin) 943static void crec_finalizer(jit_State *J, TRef trcd, TRef trfin, cTValue *fin)
871{ 944{
872 TRef trlo = lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd); 945 if (tvisgcv(fin)) {
873 TRef trhi = emitir(IRT(IR_ADD, IRT_P32), trlo, lj_ir_kint(J, 4)); 946 if (!trfin) trfin = lj_ir_kptr(J, gcval(fin));
874 if (LJ_BE) { TRef tmp = trlo; trlo = trhi; trhi = tmp; } 947 } else if (tvisnil(fin)) {
875 if (tvisfunc(fin)) { 948 trfin = lj_ir_kptr(J, NULL);
876 emitir(IRT(IR_XSTORE, IRT_P32), trlo, lj_ir_kfunc(J, funcV(fin)));
877 emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TFUNC));
878 } else if (tviscdata(fin)) {
879 emitir(IRT(IR_XSTORE, IRT_P32), trlo,
880 lj_ir_kgc(J, obj2gco(cdataV(fin)), IRT_CDATA));
881 emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TCDATA));
882 } else { 949 } else {
883 lj_trace_err(J, LJ_TRERR_BADTYPE); 950 lj_trace_err(J, LJ_TRERR_BADTYPE);
884 } 951 }
952 lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd,
953 trfin, lj_ir_kint(J, (int32_t)itype(fin)));
885 J->needsnap = 1; 954 J->needsnap = 1;
886} 955}
887 956
@@ -892,10 +961,8 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
892 CTSize sz; 961 CTSize sz;
893 CTInfo info = lj_ctype_info(cts, id, &sz); 962 CTInfo info = lj_ctype_info(cts, id, &sz);
894 CType *d = ctype_raw(cts, id); 963 CType *d = ctype_raw(cts, id);
895 TRef trid; 964 TRef trcd, trid = lj_ir_kint(J, id);
896 if (!sz || sz > 128 || (info & CTF_VLA) || ctype_align(info) > CT_MEMALIGN) 965 cTValue *fin;
897 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: large/special allocations. */
898 trid = lj_ir_kint(J, id);
899 /* Use special instruction to box pointer or 32/64 bit integer. */ 966 /* Use special instruction to box pointer or 32/64 bit integer. */
900 if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) { 967 if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) {
901 TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) : 968 TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) :
@@ -903,11 +970,36 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
903 sz == 4 ? lj_ir_kint(J, 0) : 970 sz == 4 ? lj_ir_kint(J, 0) :
904 (lj_needsplit(J), lj_ir_kint64(J, 0)); 971 (lj_needsplit(J), lj_ir_kint64(J, 0));
905 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp); 972 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp);
973 return;
906 } else { 974 } else {
907 TRef trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, TREF_NIL); 975 TRef trsz = TREF_NIL;
908 cTValue *fin; 976 if ((info & CTF_VLA)) { /* Calculate VLA/VLS size at runtime. */
909 J->base[0] = trcd; 977 CTSize sz0, sz1;
910 if (J->base[1] && !J->base[2] && 978 if (!J->base[1] || J->base[2])
979 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init VLA/VLS. */
980 trsz = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0,
981 J->base[1], &rd->argv[1]);
982 sz0 = lj_ctype_vlsize(cts, d, 0);
983 sz1 = lj_ctype_vlsize(cts, d, 1);
984 trsz = emitir(IRTGI(IR_MULOV), trsz, lj_ir_kint(J, (int32_t)(sz1-sz0)));
985 trsz = emitir(IRTGI(IR_ADDOV), trsz, lj_ir_kint(J, (int32_t)sz0));
986 J->base[1] = 0; /* Simplify logic below. */
987 } else if (ctype_align(info) > CT_MEMALIGN) {
988 trsz = lj_ir_kint(J, sz);
989 }
990 trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, trsz);
991 if (sz > 128 || (info & CTF_VLA)) {
992 TRef dp;
993 CTSize align;
994 special: /* Only handle bulk zero-fill for large/VLA/VLS types. */
995 if (J->base[1])
996 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init large/VLA/VLS types. */
997 dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, sizeof(GCcdata)));
998 if (trsz == TREF_NIL) trsz = lj_ir_kint(J, sz);
999 align = ctype_align(info);
1000 if (align < CT_MEMALIGN) align = CT_MEMALIGN;
1001 crec_fill(J, dp, trsz, lj_ir_kint(J, 0), (1u << align));
1002 } else if (J->base[1] && !J->base[2] &&
911 !lj_cconv_multi_init(cts, d, &rd->argv[1])) { 1003 !lj_cconv_multi_init(cts, d, &rd->argv[1])) {
912 goto single_init; 1004 goto single_init;
913 } else if (ctype_isarray(d->info)) { 1005 } else if (ctype_isarray(d->info)) {
@@ -918,8 +1010,9 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
918 TValue *sval = &tv; 1010 TValue *sval = &tv;
919 MSize i; 1011 MSize i;
920 tv.u64 = 0; 1012 tv.u64 = 0;
921 if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info))) 1013 if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)) ||
922 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init array of aggregates. */ 1014 esize * CREC_FILL_MAXUNROLL < sz)
1015 goto special;
923 for (i = 1, ofs = 0; ofs < sz; ofs += esize) { 1016 for (i = 1, ofs = 0; ofs < sz; ofs += esize) {
924 TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, 1017 TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd,
925 lj_ir_kintp(J, ofs + sizeof(GCcdata))); 1018 lj_ir_kintp(J, ofs + sizeof(GCcdata)));
@@ -933,8 +1026,26 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
933 crec_ct_tv(J, dc, dp, sp, sval); 1026 crec_ct_tv(J, dc, dp, sp, sval);
934 } 1027 }
935 } else if (ctype_isstruct(d->info)) { 1028 } else if (ctype_isstruct(d->info)) {
936 CTypeID fid = d->sib; 1029 CTypeID fid;
937 MSize i = 1; 1030 MSize i = 1;
1031 if (!J->base[1]) { /* Handle zero-fill of struct-of-NYI. */
1032 fid = d->sib;
1033 while (fid) {
1034 CType *df = ctype_get(cts, fid);
1035 fid = df->sib;
1036 if (ctype_isfield(df->info)) {
1037 CType *dc;
1038 if (!gcref(df->name)) continue; /* Ignore unnamed fields. */
1039 dc = ctype_rawchild(cts, df); /* Field type. */
1040 if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info) ||
1041 ctype_isenum(dc->info)))
1042 goto special;
1043 } else if (!ctype_isconstval(df->info)) {
1044 goto special;
1045 }
1046 }
1047 }
1048 fid = d->sib;
938 while (fid) { 1049 while (fid) {
939 CType *df = ctype_get(cts, fid); 1050 CType *df = ctype_get(cts, fid);
940 fid = df->sib; 1051 fid = df->sib;
@@ -981,11 +1092,12 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
981 crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv); 1092 crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv);
982 } 1093 }
983 } 1094 }
984 /* Handle __gc metamethod. */
985 fin = lj_ctype_meta(cts, id, MM_gc);
986 if (fin)
987 crec_finalizer(J, trcd, fin);
988 } 1095 }
1096 J->base[0] = trcd;
1097 /* Handle __gc metamethod. */
1098 fin = lj_ctype_meta(cts, id, MM_gc);
1099 if (fin)
1100 crec_finalizer(J, trcd, 0, fin);
989} 1101}
990 1102
991/* Record argument conversions. */ 1103/* Record argument conversions. */
@@ -1026,7 +1138,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
1026 if (fid) { /* Get argument type from field. */ 1138 if (fid) { /* Get argument type from field. */
1027 CType *ctf = ctype_get(cts, fid); 1139 CType *ctf = ctype_get(cts, fid);
1028 fid = ctf->sib; 1140 fid = ctf->sib;
1029 lua_assert(ctype_isfield(ctf->info)); 1141 lj_assertJ(ctype_isfield(ctf->info), "field expected");
1030 did = ctype_cid(ctf->info); 1142 did = ctype_cid(ctf->info);
1031 } else { 1143 } else {
1032 if (!(ct->info & CTF_VARARG)) 1144 if (!(ct->info & CTF_VARARG))
@@ -1045,7 +1157,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
1045 else 1157 else
1046 tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT); 1158 tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT);
1047 } 1159 }
1048 } else if (LJ_SOFTFP && ctype_isfp(d->info) && d->size > 4) { 1160 } else if (LJ_SOFTFP32 && ctype_isfp(d->info) && d->size > 4) {
1049 lj_needsplit(J); 1161 lj_needsplit(J);
1050 } 1162 }
1051#if LJ_TARGET_X86 1163#if LJ_TARGET_X86
@@ -1091,20 +1203,20 @@ static void crec_snap_caller(jit_State *J)
1091 lua_State *L = J->L; 1203 lua_State *L = J->L;
1092 TValue *base = L->base, *top = L->top; 1204 TValue *base = L->base, *top = L->top;
1093 const BCIns *pc = J->pc; 1205 const BCIns *pc = J->pc;
1094 TRef ftr = J->base[-1]; 1206 TRef ftr = J->base[-1-LJ_FR2];
1095 ptrdiff_t delta; 1207 ptrdiff_t delta;
1096 if (!frame_islua(base-1) || J->framedepth <= 0) 1208 if (!frame_islua(base-1) || J->framedepth <= 0)
1097 lj_trace_err(J, LJ_TRERR_NYICALL); 1209 lj_trace_err(J, LJ_TRERR_NYICALL);
1098 J->pc = frame_pc(base-1); delta = 1+bc_a(J->pc[-1]); 1210 J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]);
1099 L->top = base; L->base = base - delta; 1211 L->top = base; L->base = base - delta;
1100 J->base[-1] = TREF_FALSE; 1212 J->base[-1-LJ_FR2] = TREF_FALSE;
1101 J->base -= delta; J->baseslot -= (BCReg)delta; 1213 J->base -= delta; J->baseslot -= (BCReg)delta;
1102 J->maxslot = (BCReg)delta; J->framedepth--; 1214 J->maxslot = (BCReg)delta-LJ_FR2; J->framedepth--;
1103 lj_snap_add(J); 1215 lj_snap_add(J);
1104 L->base = base; L->top = top; 1216 L->base = base; L->top = top;
1105 J->framedepth++; J->maxslot = 1; 1217 J->framedepth++; J->maxslot = 1;
1106 J->base += delta; J->baseslot += (BCReg)delta; 1218 J->base += delta; J->baseslot += (BCReg)delta;
1107 J->base[-1] = ftr; J->pc = pc; 1219 J->base[-1-LJ_FR2] = ftr; J->pc = pc;
1108} 1220}
1109 1221
1110/* Record function call. */ 1222/* Record function call. */
@@ -1124,8 +1236,7 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd)
1124 TRef tr; 1236 TRef tr;
1125 TValue tv; 1237 TValue tv;
1126 /* Check for blacklisted C functions that might call a callback. */ 1238 /* Check for blacklisted C functions that might call a callback. */
1127 setlightudV(&tv, 1239 tv.u64 = ((uintptr_t)cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4) >> 2) | U64x(800000000, 00000000);
1128 cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4));
1129 if (tvistrue(lj_tab_get(J->L, cts->miscmap, &tv))) 1240 if (tvistrue(lj_tab_get(J->L, cts->miscmap, &tv)))
1130 lj_trace_err(J, LJ_TRERR_BLACKL); 1241 lj_trace_err(J, LJ_TRERR_BLACKL);
1131 if (ctype_isvoid(ctr->info)) { 1242 if (ctype_isvoid(ctr->info)) {
@@ -1196,8 +1307,7 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd)
1196 tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm); 1307 tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm);
1197 if (tv) { 1308 if (tv) {
1198 if (tvisfunc(tv)) { 1309 if (tvisfunc(tv)) {
1199 J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; 1310 crec_tailcall(J, rd, tv);
1200 rd->nres = -1; /* Pending tailcall. */
1201 return; 1311 return;
1202 } 1312 }
1203 } else if (mm == MM_new) { 1313 } else if (mm == MM_new) {
@@ -1238,7 +1348,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm)
1238 for (i = 0; i < 2; i++) { 1348 for (i = 0; i < 2; i++) {
1239 IRType st = tref_type(sp[i]); 1349 IRType st = tref_type(sp[i]);
1240 if (st == IRT_NUM || st == IRT_FLOAT) 1350 if (st == IRT_NUM || st == IRT_FLOAT)
1241 sp[i] = emitconv(sp[i], dt, st, IRCONV_TRUNC|IRCONV_ANY); 1351 sp[i] = emitconv(sp[i], dt, st, IRCONV_ANY);
1242 else if (!(st == IRT_I64 || st == IRT_U64)) 1352 else if (!(st == IRT_I64 || st == IRT_U64))
1243 sp[i] = emitconv(sp[i], dt, IRT_INT, 1353 sp[i] = emitconv(sp[i], dt, IRT_INT,
1244 (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); 1354 (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT);
@@ -1307,15 +1417,14 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm)
1307 CTypeID id; 1417 CTypeID id;
1308#if LJ_64 1418#if LJ_64
1309 if (t == IRT_NUM || t == IRT_FLOAT) 1419 if (t == IRT_NUM || t == IRT_FLOAT)
1310 tr = emitconv(tr, IRT_INTP, t, IRCONV_TRUNC|IRCONV_ANY); 1420 tr = emitconv(tr, IRT_INTP, t, IRCONV_ANY);
1311 else if (!(t == IRT_I64 || t == IRT_U64)) 1421 else if (!(t == IRT_I64 || t == IRT_U64))
1312 tr = emitconv(tr, IRT_INTP, IRT_INT, 1422 tr = emitconv(tr, IRT_INTP, IRT_INT,
1313 ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT); 1423 ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT);
1314#else 1424#else
1315 if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) { 1425 if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) {
1316 tr = emitconv(tr, IRT_INTP, t, 1426 tr = emitconv(tr, IRT_INTP, t,
1317 (t == IRT_NUM || t == IRT_FLOAT) ? 1427 (t == IRT_NUM || t == IRT_FLOAT) ? IRCONV_ANY : 0);
1318 IRCONV_TRUNC|IRCONV_ANY : 0);
1319 } 1428 }
1320#endif 1429#endif
1321 tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz)); 1430 tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz));
@@ -1347,8 +1456,7 @@ static TRef crec_arith_meta(jit_State *J, TRef *sp, CType **s, CTState *cts,
1347 } 1456 }
1348 if (tv) { 1457 if (tv) {
1349 if (tvisfunc(tv)) { 1458 if (tvisfunc(tv)) {
1350 J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; 1459 crec_tailcall(J, rd, tv);
1351 rd->nres = -1; /* Pending tailcall. */
1352 return 0; 1460 return 0;
1353 } /* NYI: non-function metamethods. */ 1461 } /* NYI: non-function metamethods. */
1354 } else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */ 1462 } else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */
@@ -1460,8 +1568,7 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd)
1460 !irt_isguard(J->guardemit)) { 1568 !irt_isguard(J->guardemit)) {
1461 const BCIns *pc = frame_contpc(J->L->base-1) - 1; 1569 const BCIns *pc = frame_contpc(J->L->base-1) - 1;
1462 if (bc_op(*pc) <= BC_ISNEP) { 1570 if (bc_op(*pc) <= BC_ISNEP) {
1463 setframe_pc(&J2G(J)->tmptv, pc); 1571 J2G(J)->tmptv.u64 = (uint64_t)(uintptr_t)pc;
1464 J2G(J)->tmptv.u32.lo = ((tref_istrue(tr) ^ bc_op(*pc)) & 1);
1465 J->postproc = LJ_POST_FIXCOMP; 1572 J->postproc = LJ_POST_FIXCOMP;
1466 } 1573 }
1467 } 1574 }
@@ -1650,7 +1757,139 @@ void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd)
1650void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd) 1757void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd)
1651{ 1758{
1652 argv2cdata(J, J->base[0], &rd->argv[0]); 1759 argv2cdata(J, J->base[0], &rd->argv[0]);
1653 crec_finalizer(J, J->base[0], &rd->argv[1]); 1760 if (!J->base[1])
1761 lj_trace_err(J, LJ_TRERR_BADTYPE);
1762 crec_finalizer(J, J->base[0], J->base[1], &rd->argv[1]);
1763}
1764
1765/* -- 64 bit bit.* library functions -------------------------------------- */
1766
1767/* Determine bit operation type from argument type. */
1768static CTypeID crec_bit64_type(CTState *cts, cTValue *tv)
1769{
1770 if (tviscdata(tv)) {
1771 CType *ct = lj_ctype_rawref(cts, cdataV(tv)->ctypeid);
1772 if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
1773 if ((ct->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
1774 CTINFO(CT_NUM, CTF_UNSIGNED) && ct->size == 8)
1775 return CTID_UINT64; /* Use uint64_t, since it has the highest rank. */
1776 return CTID_INT64; /* Otherwise use int64_t. */
1777 }
1778 return 0; /* Use regular 32 bit ops. */
1779}
1780
1781void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd)
1782{
1783 CTState *cts = ctype_ctsG(J2G(J));
1784 TRef tr = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0,
1785 J->base[0], &rd->argv[0]);
1786 if (!tref_isinteger(tr))
1787 tr = emitconv(tr, IRT_INT, tref_type(tr), 0);
1788 J->base[0] = tr;
1789}
1790
1791int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd)
1792{
1793 CTState *cts = ctype_ctsG(J2G(J));
1794 CTypeID id = crec_bit64_type(cts, &rd->argv[0]);
1795 if (id) {
1796 TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
1797 tr = emitir(IRT(rd->data, id-CTID_INT64+IRT_I64), tr, 0);
1798 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1799 return 1;
1800 }
1801 return 0;
1802}
1803
1804int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd)
1805{
1806 CTState *cts = ctype_ctsG(J2G(J));
1807 CTypeID id = 0;
1808 MSize i;
1809 for (i = 0; J->base[i] != 0; i++) {
1810 CTypeID aid = crec_bit64_type(cts, &rd->argv[i]);
1811 if (id < aid) id = aid; /* Determine highest type rank of all arguments. */
1812 }
1813 if (id) {
1814 CType *ct = ctype_get(cts, id);
1815 uint32_t ot = IRT(rd->data, id-CTID_INT64+IRT_I64);
1816 TRef tr = crec_ct_tv(J, ct, 0, J->base[0], &rd->argv[0]);
1817 for (i = 1; J->base[i] != 0; i++) {
1818 TRef tr2 = crec_ct_tv(J, ct, 0, J->base[i], &rd->argv[i]);
1819 tr = emitir(ot, tr, tr2);
1820 }
1821 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1822 return 1;
1823 }
1824 return 0;
1825}
1826
1827int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd)
1828{
1829 CTState *cts = ctype_ctsG(J2G(J));
1830 CTypeID id;
1831 TRef tsh = 0;
1832 if (J->base[0] && tref_iscdata(J->base[1])) {
1833 tsh = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0,
1834 J->base[1], &rd->argv[1]);
1835 if (!tref_isinteger(tsh))
1836 tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0);
1837 J->base[1] = tsh;
1838 }
1839 id = crec_bit64_type(cts, &rd->argv[0]);
1840 if (id) {
1841 TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
1842 uint32_t op = rd->data;
1843 if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]);
1844 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1845 !tref_isk(tsh))
1846 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63));
1847#ifdef LJ_TARGET_UNIFYROT
1848 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
1849 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
1850 tsh = emitir(IRTI(IR_NEG), tsh, tsh);
1851 }
1852#endif
1853 tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh);
1854 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1855 return 1;
1856 }
1857 return 0;
1858}
1859
1860TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr)
1861{
1862 CTState *cts = ctype_ctsG(J2G(J));
1863 CTypeID id = crec_bit64_type(cts, &rd->argv[0]);
1864 TRef tr, trsf = J->base[1];
1865 SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
1866 int32_t n;
1867 if (trsf) {
1868 CTypeID id2 = 0;
1869 n = (int32_t)lj_carith_check64(J->L, 2, &id2);
1870 if (id2)
1871 trsf = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0, trsf, &rd->argv[1]);
1872 else
1873 trsf = lj_opt_narrow_tobit(J, trsf);
1874 emitir(IRTGI(IR_EQ), trsf, lj_ir_kint(J, n)); /* Specialize to n. */
1875 } else {
1876 n = id ? 16 : 8;
1877 }
1878 if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
1879 sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
1880 if (id) {
1881 tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
1882 if (n < 16)
1883 tr = emitir(IRT(IR_BAND, IRT_U64), tr,
1884 lj_ir_kint64(J, ((uint64_t)1 << 4*n)-1));
1885 } else {
1886 tr = lj_opt_narrow_tobit(J, J->base[0]);
1887 if (n < 8)
1888 tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << 4*n)-1)));
1889 tr = emitconv(tr, IRT_U64, IRT_INT, 0); /* No sign-extension. */
1890 lj_needsplit(J);
1891 }
1892 return lj_ir_call(J, IRCALL_lj_strfmt_putfxint, hdr, lj_ir_kint(J, sf), tr);
1654} 1893}
1655 1894
1656/* -- Miscellaneous library functions ------------------------------------- */ 1895/* -- Miscellaneous library functions ------------------------------------- */
@@ -1674,6 +1913,30 @@ void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd)
1674 } 1913 }
1675} 1914}
1676 1915
1916TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o)
1917{
1918 CTypeID id = argv2cdata(J, tr, o)->ctypeid;
1919 if (!(id == CTID_INT64 || id == CTID_UINT64))
1920 lj_trace_err(J, LJ_TRERR_BADTYPE);
1921 lj_needsplit(J);
1922 return emitir(IRT(IR_FLOAD, id == CTID_INT64 ? IRT_I64 : IRT_U64), tr,
1923 IRFL_CDATA_INT64);
1924}
1925
1926#if LJ_HASBUFFER
1927TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o)
1928{
1929 CTState *cts = ctype_ctsG(J2G(J));
1930 if (!tref_iscdata(tr)) lj_trace_err(J, LJ_TRERR_BADTYPE);
1931 return crec_ct_tv(J, ctype_get(cts, CTID_P_CVOID), 0, tr, o);
1932}
1933
1934TRef lj_crecord_topuint8(jit_State *J, TRef tr)
1935{
1936 return emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, CTID_P_UINT8), tr);
1937}
1938#endif
1939
1677#undef IR 1940#undef IR
1678#undef emitir 1941#undef emitir
1679#undef emitconv 1942#undef emitconv
diff --git a/src/lj_crecord.h b/src/lj_crecord.h
index ed6a6254..e1a2d9c0 100644
--- a/src/lj_crecord.h
+++ b/src/lj_crecord.h
@@ -25,7 +25,19 @@ LJ_FUNC void LJ_FASTCALL recff_ffi_istype(jit_State *J, RecordFFData *rd);
25LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd); 25LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd);
26LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd); 26LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd);
27LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd); 27LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd);
28
29LJ_FUNC void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd);
30LJ_FUNC int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd);
31LJ_FUNC int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd);
32LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd);
33LJ_FUNC TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr);
34
28LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd); 35LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd);
36LJ_FUNC TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o);
37#if LJ_HASBUFFER
38LJ_FUNC TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o);
39LJ_FUNC TRef lj_crecord_topuint8(jit_State *J, TRef tr);
40#endif
29#endif 41#endif
30 42
31#endif 43#endif
diff --git a/src/lj_ctype.c b/src/lj_ctype.c
index d2845f6f..6741437c 100644
--- a/src/lj_ctype.c
+++ b/src/lj_ctype.c
@@ -11,8 +11,10 @@
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h" 12#include "lj_str.h"
13#include "lj_tab.h" 13#include "lj_tab.h"
14#include "lj_strfmt.h"
14#include "lj_ctype.h" 15#include "lj_ctype.h"
15#include "lj_ccallback.h" 16#include "lj_ccallback.h"
17#include "lj_buf.h"
16 18
17/* -- C type definitions -------------------------------------------------- */ 19/* -- C type definitions -------------------------------------------------- */
18 20
@@ -37,6 +39,8 @@
37 _("uint64_t", UINT64) \ 39 _("uint64_t", UINT64) \
38 _("intptr_t", INT_PSZ) \ 40 _("intptr_t", INT_PSZ) \
39 _("uintptr_t", UINT_PSZ) \ 41 _("uintptr_t", UINT_PSZ) \
42 /* From POSIX. */ \
43 _("ssize_t", INT_PSZ) \
40 /* End of typedef list. */ 44 /* End of typedef list. */
41 45
42/* Keywords (only the ones we actually care for). */ 46/* Keywords (only the ones we actually care for). */
@@ -149,7 +153,7 @@ CTypeID lj_ctype_new(CTState *cts, CType **ctp)
149{ 153{
150 CTypeID id = cts->top; 154 CTypeID id = cts->top;
151 CType *ct; 155 CType *ct;
152 lua_assert(cts->L); 156 lj_assertCTS(cts->L, "uninitialized cts->L");
153 if (LJ_UNLIKELY(id >= cts->sizetab)) { 157 if (LJ_UNLIKELY(id >= cts->sizetab)) {
154 if (id >= CTID_MAX) lj_err_msg(cts->L, LJ_ERR_TABOV); 158 if (id >= CTID_MAX) lj_err_msg(cts->L, LJ_ERR_TABOV);
155#ifdef LUAJIT_CTYPE_CHECK_ANCHOR 159#ifdef LUAJIT_CTYPE_CHECK_ANCHOR
@@ -178,7 +182,7 @@ CTypeID lj_ctype_intern(CTState *cts, CTInfo info, CTSize size)
178{ 182{
179 uint32_t h = ct_hashtype(info, size); 183 uint32_t h = ct_hashtype(info, size);
180 CTypeID id = cts->hash[h]; 184 CTypeID id = cts->hash[h];
181 lua_assert(cts->L); 185 lj_assertCTS(cts->L, "uninitialized cts->L");
182 while (id) { 186 while (id) {
183 CType *ct = ctype_get(cts, id); 187 CType *ct = ctype_get(cts, id);
184 if (ct->info == info && ct->size == size) 188 if (ct->info == info && ct->size == size)
@@ -294,9 +298,9 @@ CTSize lj_ctype_vlsize(CTState *cts, CType *ct, CTSize nelem)
294 } 298 }
295 ct = ctype_raw(cts, arrid); 299 ct = ctype_raw(cts, arrid);
296 } 300 }
297 lua_assert(ctype_isvlarray(ct->info)); /* Must be a VLA. */ 301 lj_assertCTS(ctype_isvlarray(ct->info), "VLA expected");
298 ct = ctype_rawchild(cts, ct); /* Get array element. */ 302 ct = ctype_rawchild(cts, ct); /* Get array element. */
299 lua_assert(ctype_hassize(ct->info)); 303 lj_assertCTS(ctype_hassize(ct->info), "bad VLA without size");
300 /* Calculate actual size of VLA and check for overflow. */ 304 /* Calculate actual size of VLA and check for overflow. */
301 xsz += (uint64_t)ct->size * nelem; 305 xsz += (uint64_t)ct->size * nelem;
302 return xsz < 0x80000000u ? (CTSize)xsz : CTSIZE_INVALID; 306 return xsz < 0x80000000u ? (CTSize)xsz : CTSIZE_INVALID;
@@ -319,7 +323,8 @@ CTInfo lj_ctype_info(CTState *cts, CTypeID id, CTSize *szp)
319 } else { 323 } else {
320 if (!(qual & CTFP_ALIGNED)) qual |= (info & CTF_ALIGN); 324 if (!(qual & CTFP_ALIGNED)) qual |= (info & CTF_ALIGN);
321 qual |= (info & ~(CTF_ALIGN|CTMASK_CID)); 325 qual |= (info & ~(CTF_ALIGN|CTMASK_CID));
322 lua_assert(ctype_hassize(info) || ctype_isfunc(info)); 326 lj_assertCTS(ctype_hassize(info) || ctype_isfunc(info),
327 "ctype without size");
323 *szp = ctype_isfunc(info) ? CTSIZE_INVALID : ct->size; 328 *szp = ctype_isfunc(info) ? CTSIZE_INVALID : ct->size;
324 break; 329 break;
325 } 330 }
@@ -524,7 +529,7 @@ static void ctype_repr(CTRepr *ctr, CTypeID id)
524 ctype_appc(ctr, ')'); 529 ctype_appc(ctr, ')');
525 break; 530 break;
526 default: 531 default:
527 lua_assert(0); 532 lj_assertG_(ctr->cts->g, 0, "bad ctype %08x", info);
528 break; 533 break;
529 } 534 }
530 ct = ctype_get(ctr->cts, ctype_cid(info)); 535 ct = ctype_get(ctr->cts, ctype_cid(info));
@@ -568,19 +573,18 @@ GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned)
568/* Convert complex to string with 'i' or 'I' suffix. */ 573/* Convert complex to string with 'i' or 'I' suffix. */
569GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size) 574GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size)
570{ 575{
571 char buf[2*LJ_STR_NUMBUF+2+1]; 576 SBuf *sb = lj_buf_tmp_(L);
572 TValue re, im; 577 TValue re, im;
573 size_t len;
574 if (size == 2*sizeof(double)) { 578 if (size == 2*sizeof(double)) {
575 re.n = *(double *)sp; im.n = ((double *)sp)[1]; 579 re.n = *(double *)sp; im.n = ((double *)sp)[1];
576 } else { 580 } else {
577 re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1]; 581 re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1];
578 } 582 }
579 len = lj_str_bufnum(buf, &re); 583 lj_strfmt_putfnum(sb, STRFMT_G14, re.n);
580 if (!(im.u32.hi & 0x80000000u) || im.n != im.n) buf[len++] = '+'; 584 if (!(im.u32.hi & 0x80000000u) || im.n != im.n) lj_buf_putchar(sb, '+');
581 len += lj_str_bufnum(buf+len, &im); 585 lj_strfmt_putfnum(sb, STRFMT_G14, im.n);
582 buf[len] = buf[len-1] >= 'a' ? 'I' : 'i'; 586 lj_buf_putchar(sb, sb->w[-1] >= 'a' ? 'I' : 'i');
583 return lj_str_new(L, buf, len+1); 587 return lj_buf_str(L, sb);
584} 588}
585 589
586/* -- C type state -------------------------------------------------------- */ 590/* -- C type state -------------------------------------------------------- */
diff --git a/src/lj_ctype.h b/src/lj_ctype.h
index 9f251d88..700250df 100644
--- a/src/lj_ctype.h
+++ b/src/lj_ctype.h
@@ -260,10 +260,16 @@ typedef struct CTState {
260 260
261#define CT_MEMALIGN 3 /* Alignment guaranteed by memory allocator. */ 261#define CT_MEMALIGN 3 /* Alignment guaranteed by memory allocator. */
262 262
263#ifdef LUA_USE_ASSERT
264#define lj_assertCTS(c, ...) (lj_assertG_(cts->g, (c), __VA_ARGS__))
265#else
266#define lj_assertCTS(c, ...) ((void)cts)
267#endif
268
263/* -- Predefined types ---------------------------------------------------- */ 269/* -- Predefined types ---------------------------------------------------- */
264 270
265/* Target-dependent types. */ 271/* Target-dependent types. */
266#if LJ_TARGET_PPC || LJ_TARGET_PPCSPE 272#if LJ_TARGET_PPC
267#define CTTYDEFP(_) \ 273#define CTTYDEFP(_) \
268 _(LINT32, 4, CT_NUM, CTF_LONG|CTALIGN(2)) 274 _(LINT32, 4, CT_NUM, CTF_LONG|CTALIGN(2))
269#else 275#else
@@ -292,6 +298,7 @@ typedef struct CTState {
292 _(P_VOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_VOID) \ 298 _(P_VOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_VOID) \
293 _(P_CVOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CVOID) \ 299 _(P_CVOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CVOID) \
294 _(P_CCHAR, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CCHAR) \ 300 _(P_CCHAR, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CCHAR) \
301 _(P_UINT8, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_UINT8) \
295 _(A_CCHAR, -1, CT_ARRAY, CTF_CONST|CTALIGN(0)|CTID_CCHAR) \ 302 _(A_CCHAR, -1, CT_ARRAY, CTF_CONST|CTALIGN(0)|CTID_CCHAR) \
296 _(CTYPEID, 4, CT_ENUM, CTALIGN(2)|CTID_INT32) \ 303 _(CTYPEID, 4, CT_ENUM, CTALIGN(2)|CTID_INT32) \
297 CTTYDEFP(_) \ 304 CTTYDEFP(_) \
@@ -383,6 +390,16 @@ static LJ_AINLINE CTState *ctype_cts(lua_State *L)
383 return cts; 390 return cts;
384} 391}
385 392
393/* Load FFI library on-demand. */
394#define ctype_loadffi(L) \
395 do { \
396 if (!ctype_ctsG(G(L))) { \
397 ptrdiff_t oldtop = (char *)L->top - mref(L->stack, char); \
398 luaopen_ffi(L); \
399 L->top = (TValue *)(mref(L->stack, char) + oldtop); \
400 } \
401 } while (0)
402
386/* Save and restore state of C type table. */ 403/* Save and restore state of C type table. */
387#define LJ_CTYPE_SAVE(cts) CTState savects_ = *(cts) 404#define LJ_CTYPE_SAVE(cts) CTState savects_ = *(cts)
388#define LJ_CTYPE_RESTORE(cts) \ 405#define LJ_CTYPE_RESTORE(cts) \
@@ -392,7 +409,8 @@ static LJ_AINLINE CTState *ctype_cts(lua_State *L)
392/* Check C type ID for validity when assertions are enabled. */ 409/* Check C type ID for validity when assertions are enabled. */
393static LJ_AINLINE CTypeID ctype_check(CTState *cts, CTypeID id) 410static LJ_AINLINE CTypeID ctype_check(CTState *cts, CTypeID id)
394{ 411{
395 lua_assert(id > 0 && id < cts->top); UNUSED(cts); 412 UNUSED(cts);
413 lj_assertCTS(id > 0 && id < cts->top, "bad CTID %d", id);
396 return id; 414 return id;
397} 415}
398 416
@@ -408,8 +426,9 @@ static LJ_AINLINE CType *ctype_get(CTState *cts, CTypeID id)
408/* Get child C type. */ 426/* Get child C type. */
409static LJ_AINLINE CType *ctype_child(CTState *cts, CType *ct) 427static LJ_AINLINE CType *ctype_child(CTState *cts, CType *ct)
410{ 428{
411 lua_assert(!(ctype_isvoid(ct->info) || ctype_isstruct(ct->info) || 429 lj_assertCTS(!(ctype_isvoid(ct->info) || ctype_isstruct(ct->info) ||
412 ctype_isbitfield(ct->info))); /* These don't have children. */ 430 ctype_isbitfield(ct->info)),
431 "ctype %08x has no children", ct->info);
413 return ctype_get(cts, ctype_cid(ct->info)); 432 return ctype_get(cts, ctype_cid(ct->info));
414} 433}
415 434
diff --git a/src/lj_debug.c b/src/lj_debug.c
index e34442b7..3dffad90 100644
--- a/src/lj_debug.c
+++ b/src/lj_debug.c
@@ -9,12 +9,12 @@
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_err.h" 10#include "lj_err.h"
11#include "lj_debug.h" 11#include "lj_debug.h"
12#include "lj_str.h" 12#include "lj_buf.h"
13#include "lj_tab.h" 13#include "lj_tab.h"
14#include "lj_state.h" 14#include "lj_state.h"
15#include "lj_frame.h" 15#include "lj_frame.h"
16#include "lj_bc.h" 16#include "lj_bc.h"
17#include "lj_vm.h" 17#include "lj_strfmt.h"
18#if LJ_HASJIT 18#if LJ_HASJIT
19#include "lj_jit.h" 19#include "lj_jit.h"
20#endif 20#endif
@@ -24,11 +24,11 @@
24/* Get frame corresponding to a level. */ 24/* Get frame corresponding to a level. */
25cTValue *lj_debug_frame(lua_State *L, int level, int *size) 25cTValue *lj_debug_frame(lua_State *L, int level, int *size)
26{ 26{
27 cTValue *frame, *nextframe, *bot = tvref(L->stack); 27 cTValue *frame, *nextframe, *bot = tvref(L->stack)+LJ_FR2;
28 /* Traverse frames backwards. */ 28 /* Traverse frames backwards. */
29 for (nextframe = frame = L->base-1; frame > bot; ) { 29 for (nextframe = frame = L->base-1; frame > bot; ) {
30 if (frame_gc(frame) == obj2gco(L)) 30 if (frame_gc(frame) == obj2gco(L))
31 level++; /* Skip dummy frames. See lj_meta_call(). */ 31 level++; /* Skip dummy frames. See lj_err_optype_call(). */
32 if (level-- == 0) { 32 if (level-- == 0) {
33 *size = (int)(nextframe - frame); 33 *size = (int)(nextframe - frame);
34 return frame; /* Level found. */ 34 return frame; /* Level found. */
@@ -55,7 +55,8 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
55 const BCIns *ins; 55 const BCIns *ins;
56 GCproto *pt; 56 GCproto *pt;
57 BCPos pos; 57 BCPos pos;
58 lua_assert(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD); 58 lj_assertL(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD,
59 "function or frame expected");
59 if (!isluafunc(fn)) { /* Cannot derive a PC for non-Lua functions. */ 60 if (!isluafunc(fn)) { /* Cannot derive a PC for non-Lua functions. */
60 return NO_BCPOS; 61 return NO_BCPOS;
61 } else if (nextframe == NULL) { /* Lua function on top. */ 62 } else if (nextframe == NULL) { /* Lua function on top. */
@@ -87,8 +88,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
87 if (frame_islua(f)) { 88 if (frame_islua(f)) {
88 f = frame_prevl(f); 89 f = frame_prevl(f);
89 } else { 90 } else {
90 if (frame_isc(f) || (LJ_HASFFI && frame_iscont(f) && 91 if (frame_isc(f) || (frame_iscont(f) && frame_iscont_fficb(f)))
91 (f-1)->u32.lo == LJ_CONT_FFI_CALLBACK))
92 cf = cframe_raw(cframe_prev(cf)); 92 cf = cframe_raw(cframe_prev(cf));
93 f = frame_prevd(f); 93 f = frame_prevd(f);
94 } 94 }
@@ -102,7 +102,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
102#if LJ_HASJIT 102#if LJ_HASJIT
103 if (pos > pt->sizebc) { /* Undo the effects of lj_trace_exit for JLOOP. */ 103 if (pos > pt->sizebc) { /* Undo the effects of lj_trace_exit for JLOOP. */
104 GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins)); 104 GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins));
105 lua_assert(bc_isret(bc_op(ins[-1]))); 105 lj_assertL(bc_isret(bc_op(ins[-1])), "return bytecode expected");
106 pos = proto_bcpos(pt, mref(T->startpc, const BCIns)); 106 pos = proto_bcpos(pt, mref(T->startpc, const BCIns));
107 } 107 }
108#endif 108#endif
@@ -135,7 +135,7 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe)
135 BCPos pc = debug_framepc(L, fn, nextframe); 135 BCPos pc = debug_framepc(L, fn, nextframe);
136 if (pc != NO_BCPOS) { 136 if (pc != NO_BCPOS) {
137 GCproto *pt = funcproto(fn); 137 GCproto *pt = funcproto(fn);
138 lua_assert(pc <= pt->sizebc); 138 lj_assertL(pc <= pt->sizebc, "PC out of range");
139 return lj_debug_line(pt, pc); 139 return lj_debug_line(pt, pc);
140 } 140 }
141 return -1; 141 return -1;
@@ -143,38 +143,25 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe)
143 143
144/* -- Variable names ------------------------------------------------------ */ 144/* -- Variable names ------------------------------------------------------ */
145 145
146/* Read ULEB128 value. */
147static uint32_t debug_read_uleb128(const uint8_t **pp)
148{
149 const uint8_t *p = *pp;
150 uint32_t v = *p++;
151 if (LJ_UNLIKELY(v >= 0x80)) {
152 int sh = 0;
153 v &= 0x7f;
154 do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
155 }
156 *pp = p;
157 return v;
158}
159
160/* Get name of a local variable from slot number and PC. */ 146/* Get name of a local variable from slot number and PC. */
161static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot) 147static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot)
162{ 148{
163 const uint8_t *p = proto_varinfo(pt); 149 const char *p = (const char *)proto_varinfo(pt);
164 if (p) { 150 if (p) {
165 BCPos lastpc = 0; 151 BCPos lastpc = 0;
166 for (;;) { 152 for (;;) {
167 const char *name = (const char *)p; 153 const char *name = p;
168 uint32_t vn = *p++; 154 uint32_t vn = *(const uint8_t *)p;
169 BCPos startpc, endpc; 155 BCPos startpc, endpc;
170 if (vn < VARNAME__MAX) { 156 if (vn < VARNAME__MAX) {
171 if (vn == VARNAME_END) break; /* End of varinfo. */ 157 if (vn == VARNAME_END) break; /* End of varinfo. */
172 } else { 158 } else {
173 while (*p++) ; /* Skip over variable name string. */ 159 do { p++; } while (*(const uint8_t *)p); /* Skip over variable name. */
174 } 160 }
175 lastpc = startpc = lastpc + debug_read_uleb128(&p); 161 p++;
162 lastpc = startpc = lastpc + lj_buf_ruleb128(&p);
176 if (startpc > pc) break; 163 if (startpc > pc) break;
177 endpc = startpc + debug_read_uleb128(&p); 164 endpc = startpc + lj_buf_ruleb128(&p);
178 if (pc < endpc && slot-- == 0) { 165 if (pc < endpc && slot-- == 0) {
179 if (vn < VARNAME__MAX) { 166 if (vn < VARNAME__MAX) {
180#define VARNAMESTR(name, str) str "\0" 167#define VARNAMESTR(name, str) str "\0"
@@ -199,7 +186,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
199 TValue *nextframe = size ? frame + size : NULL; 186 TValue *nextframe = size ? frame + size : NULL;
200 GCfunc *fn = frame_func(frame); 187 GCfunc *fn = frame_func(frame);
201 BCPos pc = debug_framepc(L, fn, nextframe); 188 BCPos pc = debug_framepc(L, fn, nextframe);
202 if (!nextframe) nextframe = L->top; 189 if (!nextframe) nextframe = L->top+LJ_FR2;
203 if ((int)slot1 < 0) { /* Negative slot number is for varargs. */ 190 if ((int)slot1 < 0) { /* Negative slot number is for varargs. */
204 if (pc != NO_BCPOS) { 191 if (pc != NO_BCPOS) {
205 GCproto *pt = funcproto(fn); 192 GCproto *pt = funcproto(fn);
@@ -209,7 +196,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
209 nextframe = frame; 196 nextframe = frame;
210 frame = frame_prevd(frame); 197 frame = frame_prevd(frame);
211 } 198 }
212 if (frame + slot1 < nextframe) { 199 if (frame + slot1+LJ_FR2 < nextframe) {
213 *name = "(*vararg)"; 200 *name = "(*vararg)";
214 return frame+slot1; 201 return frame+slot1;
215 } 202 }
@@ -220,7 +207,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
220 if (pc != NO_BCPOS && 207 if (pc != NO_BCPOS &&
221 (*name = debug_varname(funcproto(fn), pc, slot1-1)) != NULL) 208 (*name = debug_varname(funcproto(fn), pc, slot1-1)) != NULL)
222 ; 209 ;
223 else if (slot1 > 0 && frame + slot1 < nextframe) 210 else if (slot1 > 0 && frame + slot1+LJ_FR2 < nextframe)
224 *name = "(*temporary)"; 211 *name = "(*temporary)";
225 return frame+slot1; 212 return frame+slot1;
226} 213}
@@ -229,7 +216,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
229const char *lj_debug_uvname(GCproto *pt, uint32_t idx) 216const char *lj_debug_uvname(GCproto *pt, uint32_t idx)
230{ 217{
231 const uint8_t *p = proto_uvinfo(pt); 218 const uint8_t *p = proto_uvinfo(pt);
232 lua_assert(idx < pt->sizeuv); 219 lj_assertX(idx < pt->sizeuv, "bad upvalue index");
233 if (!p) return ""; 220 if (!p) return "";
234 if (idx) while (*p++ || --idx) ; 221 if (idx) while (*p++ || --idx) ;
235 return (const char *)p; 222 return (const char *)p;
@@ -286,7 +273,7 @@ restart:
286 *name = strdata(gco2str(proto_kgc(pt, ~(ptrdiff_t)bc_c(ins)))); 273 *name = strdata(gco2str(proto_kgc(pt, ~(ptrdiff_t)bc_c(ins))));
287 if (ip > proto_bc(pt)) { 274 if (ip > proto_bc(pt)) {
288 BCIns insp = ip[-1]; 275 BCIns insp = ip[-1];
289 if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1 && 276 if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1+LJ_FR2 &&
290 bc_d(insp) == bc_b(ins)) 277 bc_d(insp) == bc_b(ins))
291 return "method"; 278 return "method";
292 } 279 }
@@ -303,12 +290,12 @@ restart:
303} 290}
304 291
305/* Deduce function name from caller of a frame. */ 292/* Deduce function name from caller of a frame. */
306const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name) 293const char *lj_debug_funcname(lua_State *L, cTValue *frame, const char **name)
307{ 294{
308 TValue *pframe; 295 cTValue *pframe;
309 GCfunc *fn; 296 GCfunc *fn;
310 BCPos pc; 297 BCPos pc;
311 if (frame <= tvref(L->stack)) 298 if (frame <= tvref(L->stack)+LJ_FR2)
312 return NULL; 299 return NULL;
313 if (frame_isvarg(frame)) 300 if (frame_isvarg(frame))
314 frame = frame_prevd(frame); 301 frame = frame_prevd(frame);
@@ -334,7 +321,7 @@ const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name)
334/* -- Source code locations ----------------------------------------------- */ 321/* -- Source code locations ----------------------------------------------- */
335 322
336/* Generate shortened source name. */ 323/* Generate shortened source name. */
337void lj_debug_shortname(char *out, GCstr *str) 324void lj_debug_shortname(char *out, GCstr *str, BCLine line)
338{ 325{
339 const char *src = strdata(str); 326 const char *src = strdata(str);
340 if (*src == '=') { 327 if (*src == '=') {
@@ -348,11 +335,11 @@ void lj_debug_shortname(char *out, GCstr *str)
348 *out++ = '.'; *out++ = '.'; *out++ = '.'; 335 *out++ = '.'; *out++ = '.'; *out++ = '.';
349 } 336 }
350 strcpy(out, src); 337 strcpy(out, src);
351 } else { /* Output [string "string"]. */ 338 } else { /* Output [string "string"] or [builtin:name]. */
352 size_t len; /* Length, up to first control char. */ 339 size_t len; /* Length, up to first control char. */
353 for (len = 0; len < LUA_IDSIZE-12; len++) 340 for (len = 0; len < LUA_IDSIZE-12; len++)
354 if (((const unsigned char *)src)[len] < ' ') break; 341 if (((const unsigned char *)src)[len] < ' ') break;
355 strcpy(out, "[string \""); out += 9; 342 strcpy(out, line == ~(BCLine)0 ? "[builtin:" : "[string \""); out += 9;
356 if (src[len] != '\0') { /* Must truncate? */ 343 if (src[len] != '\0') { /* Must truncate? */
357 if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15; 344 if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15;
358 strncpy(out, src, len); out += len; 345 strncpy(out, src, len); out += len;
@@ -360,7 +347,7 @@ void lj_debug_shortname(char *out, GCstr *str)
360 } else { 347 } else {
361 strcpy(out, src); out += len; 348 strcpy(out, src); out += len;
362 } 349 }
363 strcpy(out, "\"]"); 350 strcpy(out, line == ~(BCLine)0 ? "]" : "\"]");
364 } 351 }
365} 352}
366 353
@@ -373,14 +360,15 @@ void lj_debug_addloc(lua_State *L, const char *msg,
373 if (isluafunc(fn)) { 360 if (isluafunc(fn)) {
374 BCLine line = debug_frameline(L, fn, nextframe); 361 BCLine line = debug_frameline(L, fn, nextframe);
375 if (line >= 0) { 362 if (line >= 0) {
363 GCproto *pt = funcproto(fn);
376 char buf[LUA_IDSIZE]; 364 char buf[LUA_IDSIZE];
377 lj_debug_shortname(buf, proto_chunkname(funcproto(fn))); 365 lj_debug_shortname(buf, proto_chunkname(pt), pt->firstline);
378 lj_str_pushf(L, "%s:%d: %s", buf, line, msg); 366 lj_strfmt_pushf(L, "%s:%d: %s", buf, line, msg);
379 return; 367 return;
380 } 368 }
381 } 369 }
382 } 370 }
383 lj_str_pushf(L, "%s", msg); 371 lj_strfmt_pushf(L, "%s", msg);
384} 372}
385 373
386/* Push location string for a bytecode position to Lua stack. */ 374/* Push location string for a bytecode position to Lua stack. */
@@ -390,20 +378,22 @@ void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc)
390 const char *s = strdata(name); 378 const char *s = strdata(name);
391 MSize i, len = name->len; 379 MSize i, len = name->len;
392 BCLine line = lj_debug_line(pt, pc); 380 BCLine line = lj_debug_line(pt, pc);
393 if (*s == '@') { 381 if (pt->firstline == ~(BCLine)0) {
382 lj_strfmt_pushf(L, "builtin:%s", s);
383 } else if (*s == '@') {
394 s++; len--; 384 s++; len--;
395 for (i = len; i > 0; i--) 385 for (i = len; i > 0; i--)
396 if (s[i] == '/' || s[i] == '\\') { 386 if (s[i] == '/' || s[i] == '\\') {
397 s += i+1; 387 s += i+1;
398 break; 388 break;
399 } 389 }
400 lj_str_pushf(L, "%s:%d", s, line); 390 lj_strfmt_pushf(L, "%s:%d", s, line);
401 } else if (len > 40) { 391 } else if (len > 40) {
402 lj_str_pushf(L, "%p:%d", pt, line); 392 lj_strfmt_pushf(L, "%p:%d", pt, line);
403 } else if (*s == '=') { 393 } else if (*s == '=') {
404 lj_str_pushf(L, "%s:%d", s+1, line); 394 lj_strfmt_pushf(L, "%s:%d", s+1, line);
405 } else { 395 } else {
406 lj_str_pushf(L, "\"%s\":%d", s, line); 396 lj_strfmt_pushf(L, "\"%s\":%d", s, line);
407 } 397 }
408} 398}
409 399
@@ -451,13 +441,14 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext)
451 } else { 441 } else {
452 uint32_t offset = (uint32_t)ar->i_ci & 0xffff; 442 uint32_t offset = (uint32_t)ar->i_ci & 0xffff;
453 uint32_t size = (uint32_t)ar->i_ci >> 16; 443 uint32_t size = (uint32_t)ar->i_ci >> 16;
454 lua_assert(offset != 0); 444 lj_assertL(offset != 0, "bad frame offset");
455 frame = tvref(L->stack) + offset; 445 frame = tvref(L->stack) + offset;
456 if (size) nextframe = frame + size; 446 if (size) nextframe = frame + size;
457 lua_assert(frame <= tvref(L->maxstack) && 447 lj_assertL(frame <= tvref(L->maxstack) &&
458 (!nextframe || nextframe <= tvref(L->maxstack))); 448 (!nextframe || nextframe <= tvref(L->maxstack)),
449 "broken frame chain");
459 fn = frame_func(frame); 450 fn = frame_func(frame);
460 lua_assert(fn->c.gct == ~LJ_TFUNC); 451 lj_assertL(fn->c.gct == ~LJ_TFUNC, "bad frame function");
461 } 452 }
462 for (; *what; what++) { 453 for (; *what; what++) {
463 if (*what == 'S') { 454 if (*what == 'S') {
@@ -466,7 +457,7 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext)
466 BCLine firstline = pt->firstline; 457 BCLine firstline = pt->firstline;
467 GCstr *name = proto_chunkname(pt); 458 GCstr *name = proto_chunkname(pt);
468 ar->source = strdata(name); 459 ar->source = strdata(name);
469 lj_debug_shortname(ar->short_src, name); 460 lj_debug_shortname(ar->short_src, name, pt->firstline);
470 ar->linedefined = (int)firstline; 461 ar->linedefined = (int)firstline;
471 ar->lastlinedefined = (int)(firstline + pt->numline); 462 ar->lastlinedefined = (int)(firstline + pt->numline);
472 ar->what = (firstline || !pt->numline) ? "Lua" : "main"; 463 ar->what = (firstline || !pt->numline) ? "Lua" : "main";
@@ -556,6 +547,111 @@ LUA_API int lua_getstack(lua_State *L, int level, lua_Debug *ar)
556 } 547 }
557} 548}
558 549
550#if LJ_HASPROFILE
551/* Put the chunkname into a buffer. */
552static int debug_putchunkname(SBuf *sb, GCproto *pt, int pathstrip)
553{
554 GCstr *name = proto_chunkname(pt);
555 const char *p = strdata(name);
556 if (pt->firstline == ~(BCLine)0) {
557 lj_buf_putmem(sb, "[builtin:", 9);
558 lj_buf_putstr(sb, name);
559 lj_buf_putb(sb, ']');
560 return 0;
561 }
562 if (*p == '=' || *p == '@') {
563 MSize len = name->len-1;
564 p++;
565 if (pathstrip) {
566 int i;
567 for (i = len-1; i >= 0; i--)
568 if (p[i] == '/' || p[i] == '\\') {
569 len -= i+1;
570 p = p+i+1;
571 break;
572 }
573 }
574 lj_buf_putmem(sb, p, len);
575 } else {
576 lj_buf_putmem(sb, "[string]", 8);
577 }
578 return 1;
579}
580
581/* Put a compact stack dump into a buffer. */
582void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int depth)
583{
584 int level = 0, dir = 1, pathstrip = 1;
585 MSize lastlen = 0;
586 if (depth < 0) { level = ~depth; depth = dir = -1; } /* Reverse frames. */
587 while (level != depth) { /* Loop through all frame. */
588 int size;
589 cTValue *frame = lj_debug_frame(L, level, &size);
590 if (frame) {
591 cTValue *nextframe = size ? frame+size : NULL;
592 GCfunc *fn = frame_func(frame);
593 const uint8_t *p = (const uint8_t *)fmt;
594 int c;
595 while ((c = *p++)) {
596 switch (c) {
597 case 'p': /* Preserve full path. */
598 pathstrip = 0;
599 break;
600 case 'F': case 'f': { /* Dump function name. */
601 const char *name;
602 const char *what = lj_debug_funcname(L, frame, &name);
603 if (what) {
604 if (c == 'F' && isluafunc(fn)) { /* Dump module:name for 'F'. */
605 GCproto *pt = funcproto(fn);
606 if (pt->firstline != ~(BCLine)0) { /* Not a bytecode builtin. */
607 debug_putchunkname(sb, pt, pathstrip);
608 lj_buf_putb(sb, ':');
609 }
610 }
611 lj_buf_putmem(sb, name, (MSize)strlen(name));
612 break;
613 } /* else: can't derive a name, dump module:line. */
614 }
615 /* fallthrough */
616 case 'l': /* Dump module:line. */
617 if (isluafunc(fn)) {
618 GCproto *pt = funcproto(fn);
619 if (debug_putchunkname(sb, pt, pathstrip)) {
620 /* Regular Lua function. */
621 BCLine line = c == 'l' ? debug_frameline(L, fn, nextframe) :
622 pt->firstline;
623 lj_buf_putb(sb, ':');
624 lj_strfmt_putint(sb, line >= 0 ? line : pt->firstline);
625 }
626 } else if (isffunc(fn)) { /* Dump numbered builtins. */
627 lj_buf_putmem(sb, "[builtin#", 9);
628 lj_strfmt_putint(sb, fn->c.ffid);
629 lj_buf_putb(sb, ']');
630 } else { /* Dump C function address. */
631 lj_buf_putb(sb, '@');
632 lj_strfmt_putptr(sb, fn->c.f);
633 }
634 break;
635 case 'Z': /* Zap trailing separator. */
636 lastlen = sbuflen(sb);
637 break;
638 default:
639 lj_buf_putb(sb, c);
640 break;
641 }
642 }
643 } else if (dir == 1) {
644 break;
645 } else {
646 level -= size; /* Reverse frame order: quickly skip missing level. */
647 }
648 level += dir;
649 }
650 if (lastlen)
651 sb->w = sb->b + lastlen; /* Zap trailing separator. */
652}
653#endif
654
559/* Number of frames for the leading and trailing part of a traceback. */ 655/* Number of frames for the leading and trailing part of a traceback. */
560#define TRACEBACK_LEVELS1 12 656#define TRACEBACK_LEVELS1 12
561#define TRACEBACK_LEVELS2 10 657#define TRACEBACK_LEVELS2 10
diff --git a/src/lj_debug.h b/src/lj_debug.h
index 7559e3f2..a6e21701 100644
--- a/src/lj_debug.h
+++ b/src/lj_debug.h
@@ -33,14 +33,18 @@ LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp,
33 GCobj **op); 33 GCobj **op);
34LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc, 34LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc,
35 BCReg slot, const char **name); 35 BCReg slot, const char **name);
36LJ_FUNC const char *lj_debug_funcname(lua_State *L, TValue *frame, 36LJ_FUNC const char *lj_debug_funcname(lua_State *L, cTValue *frame,
37 const char **name); 37 const char **name);
38LJ_FUNC void lj_debug_shortname(char *out, GCstr *str); 38LJ_FUNC void lj_debug_shortname(char *out, GCstr *str, BCLine line);
39LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg, 39LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg,
40 cTValue *frame, cTValue *nextframe); 40 cTValue *frame, cTValue *nextframe);
41LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc); 41LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc);
42LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, 42LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar,
43 int ext); 43 int ext);
44#if LJ_HASPROFILE
45LJ_FUNC void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt,
46 int depth);
47#endif
44 48
45/* Fixed internal variable names. */ 49/* Fixed internal variable names. */
46#define VARNAMEDEF(_) \ 50#define VARNAMEDEF(_) \
diff --git a/src/lj_def.h b/src/lj_def.h
index 85997745..3a28026c 100644
--- a/src/lj_def.h
+++ b/src/lj_def.h
@@ -46,10 +46,14 @@ typedef unsigned int uintptr_t;
46#include <stdlib.h> 46#include <stdlib.h>
47 47
48/* Various VM limits. */ 48/* Various VM limits. */
49#define LJ_MAX_MEM 0x7fffff00 /* Max. total memory allocation. */ 49#define LJ_MAX_MEM32 0x7fffff00 /* Max. 32 bit memory allocation. */
50#define LJ_MAX_MEM64 ((uint64_t)1<<47) /* Max. 64 bit memory allocation. */
51/* Max. total memory allocation. */
52#define LJ_MAX_MEM (LJ_GC64 ? LJ_MAX_MEM64 : LJ_MAX_MEM32)
50#define LJ_MAX_ALLOC LJ_MAX_MEM /* Max. individual allocation length. */ 53#define LJ_MAX_ALLOC LJ_MAX_MEM /* Max. individual allocation length. */
51#define LJ_MAX_STR LJ_MAX_MEM /* Max. string length. */ 54#define LJ_MAX_STR LJ_MAX_MEM32 /* Max. string length. */
52#define LJ_MAX_UDATA LJ_MAX_MEM /* Max. userdata length. */ 55#define LJ_MAX_BUF LJ_MAX_MEM32 /* Max. buffer length. */
56#define LJ_MAX_UDATA LJ_MAX_MEM32 /* Max. userdata length. */
53 57
54#define LJ_MAX_STRTAB (1<<26) /* Max. string table size. */ 58#define LJ_MAX_STRTAB (1<<26) /* Max. string table size. */
55#define LJ_MAX_HBITS 26 /* Max. hash bits. */ 59#define LJ_MAX_HBITS 26 /* Max. hash bits. */
@@ -57,7 +61,7 @@ typedef unsigned int uintptr_t;
57#define LJ_MAX_ASIZE ((1<<(LJ_MAX_ABITS-1))+1) /* Max. array part size. */ 61#define LJ_MAX_ASIZE ((1<<(LJ_MAX_ABITS-1))+1) /* Max. array part size. */
58#define LJ_MAX_COLOSIZE 16 /* Max. elems for colocated array. */ 62#define LJ_MAX_COLOSIZE 16 /* Max. elems for colocated array. */
59 63
60#define LJ_MAX_LINE LJ_MAX_MEM /* Max. source code line number. */ 64#define LJ_MAX_LINE LJ_MAX_MEM32 /* Max. source code line number. */
61#define LJ_MAX_XLEVEL 200 /* Max. syntactic nesting level. */ 65#define LJ_MAX_XLEVEL 200 /* Max. syntactic nesting level. */
62#define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */ 66#define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */
63#define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */ 67#define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */
@@ -65,7 +69,7 @@ typedef unsigned int uintptr_t;
65#define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */ 69#define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */
66 70
67#define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */ 71#define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */
68#define LJ_STACK_EXTRA 5 /* Extra stack space (metamethods). */ 72#define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */
69 73
70#define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */ 74#define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */
71 75
@@ -76,7 +80,6 @@ typedef unsigned int uintptr_t;
76#define LJ_MIN_SBUF 32 /* Min. string buffer length. */ 80#define LJ_MIN_SBUF 32 /* Min. string buffer length. */
77#define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */ 81#define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */
78#define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */ 82#define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */
79#define LJ_MIN_K64SZ 16 /* Min. size for chained K64Array. */
80 83
81/* JIT compiler limits. */ 84/* JIT compiler limits. */
82#define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */ 85#define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */
@@ -91,6 +94,9 @@ typedef unsigned int uintptr_t;
91#define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo) 94#define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo)
92#define i32ptr(p) ((int32_t)(intptr_t)(void *)(p)) 95#define i32ptr(p) ((int32_t)(intptr_t)(void *)(p))
93#define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p)) 96#define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p))
97#define i64ptr(p) ((int64_t)(intptr_t)(void *)(p))
98#define u64ptr(p) ((uint64_t)(intptr_t)(void *)(p))
99#define igcptr(p) (LJ_GC64 ? i64ptr(p) : i32ptr(p))
94 100
95#define checki8(x) ((x) == (int32_t)(int8_t)(x)) 101#define checki8(x) ((x) == (int32_t)(int8_t)(x))
96#define checku8(x) ((x) == (int32_t)(uint8_t)(x)) 102#define checku8(x) ((x) == (int32_t)(uint8_t)(x))
@@ -98,7 +104,10 @@ typedef unsigned int uintptr_t;
98#define checku16(x) ((x) == (int32_t)(uint16_t)(x)) 104#define checku16(x) ((x) == (int32_t)(uint16_t)(x))
99#define checki32(x) ((x) == (int32_t)(x)) 105#define checki32(x) ((x) == (int32_t)(x))
100#define checku32(x) ((x) == (uint32_t)(x)) 106#define checku32(x) ((x) == (uint32_t)(x))
107#define checkptr31(x) (((uint64_t)(uintptr_t)(x) >> 31) == 0)
101#define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x)) 108#define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x))
109#define checkptr47(x) (((uint64_t)(uintptr_t)(x) >> 47) == 0)
110#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr31((x)) :1)
102 111
103/* Every half-decent C compiler transforms this into a rotate instruction. */ 112/* Every half-decent C compiler transforms this into a rotate instruction. */
104#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1)))) 113#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
@@ -111,7 +120,7 @@ typedef uintptr_t BloomFilter;
111#define bloomset(b, x) ((b) |= bloombit((x))) 120#define bloomset(b, x) ((b) |= bloombit((x)))
112#define bloomtest(b, x) ((b) & bloombit((x))) 121#define bloomtest(b, x) ((b) & bloombit((x)))
113 122
114#if defined(__GNUC__) || defined(__psp2__) 123#if defined(__GNUC__) || defined(__clang__) || defined(__psp2__)
115 124
116#define LJ_NORET __attribute__((noreturn)) 125#define LJ_NORET __attribute__((noreturn))
117#define LJ_ALIGN(n) __attribute__((aligned(n))) 126#define LJ_ALIGN(n) __attribute__((aligned(n)))
@@ -173,7 +182,7 @@ static LJ_AINLINE uint64_t lj_bswap64(uint64_t x)
173{ 182{
174 return ((uint64_t)lj_bswap((uint32_t)x)<<32) | lj_bswap((uint32_t)(x>>32)); 183 return ((uint64_t)lj_bswap((uint32_t)x)<<32) | lj_bswap((uint32_t)(x>>32));
175} 184}
176#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) 185#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __clang__
177static LJ_AINLINE uint32_t lj_bswap(uint32_t x) 186static LJ_AINLINE uint32_t lj_bswap(uint32_t x)
178{ 187{
179 return (uint32_t)__builtin_bswap32((int32_t)x); 188 return (uint32_t)__builtin_bswap32((int32_t)x);
@@ -329,14 +338,28 @@ static LJ_AINLINE uint32_t lj_getu32(const void *v)
329#define LJ_FUNCA_NORET LJ_FUNCA LJ_NORET 338#define LJ_FUNCA_NORET LJ_FUNCA LJ_NORET
330#define LJ_ASMF_NORET LJ_ASMF LJ_NORET 339#define LJ_ASMF_NORET LJ_ASMF LJ_NORET
331 340
332/* Runtime assertions. */ 341/* Internal assertions. */
333#ifdef lua_assert 342#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK)
334#define check_exp(c, e) (lua_assert(c), (e)) 343#define lj_assert_check(g, c, ...) \
335#define api_check(l, e) lua_assert(e) 344 ((c) ? (void)0 : \
345 (lj_assert_fail((g), __FILE__, __LINE__, __func__, __VA_ARGS__), 0))
346#define lj_checkapi(c, ...) lj_assert_check(G(L), (c), __VA_ARGS__)
336#else 347#else
337#define lua_assert(c) ((void)0) 348#define lj_checkapi(c, ...) ((void)L)
349#endif
350
351#ifdef LUA_USE_ASSERT
352#define lj_assertG_(g, c, ...) lj_assert_check((g), (c), __VA_ARGS__)
353#define lj_assertG(c, ...) lj_assert_check(g, (c), __VA_ARGS__)
354#define lj_assertL(c, ...) lj_assert_check(G(L), (c), __VA_ARGS__)
355#define lj_assertX(c, ...) lj_assert_check(NULL, (c), __VA_ARGS__)
356#define check_exp(c, e) (lj_assertX((c), #c), (e))
357#else
358#define lj_assertG_(g, c, ...) ((void)0)
359#define lj_assertG(c, ...) ((void)g)
360#define lj_assertL(c, ...) ((void)L)
361#define lj_assertX(c, ...) ((void)0)
338#define check_exp(c, e) (e) 362#define check_exp(c, e) (e)
339#define api_check luai_apicheck
340#endif 363#endif
341 364
342/* Static assertions. */ 365/* Static assertions. */
@@ -350,4 +373,9 @@ static LJ_AINLINE uint32_t lj_getu32(const void *v)
350 extern void LJ_ASSERT_NAME(__LINE__)(int STATIC_ASSERTION_FAILED[(cond)?1:-1]) 373 extern void LJ_ASSERT_NAME(__LINE__)(int STATIC_ASSERTION_FAILED[(cond)?1:-1])
351#endif 374#endif
352 375
376/* PRNG state. Need this here, details in lj_prng.h. */
377typedef struct PRNGState {
378 uint64_t u[4];
379} PRNGState;
380
353#endif 381#endif
diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c
index 0c7225e6..bf8d8812 100644
--- a/src/lj_dispatch.c
+++ b/src/lj_dispatch.c
@@ -8,6 +8,7 @@
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_err.h" 10#include "lj_err.h"
11#include "lj_buf.h"
11#include "lj_func.h" 12#include "lj_func.h"
12#include "lj_str.h" 13#include "lj_str.h"
13#include "lj_tab.h" 14#include "lj_tab.h"
@@ -17,6 +18,7 @@
17#include "lj_frame.h" 18#include "lj_frame.h"
18#include "lj_bc.h" 19#include "lj_bc.h"
19#include "lj_ff.h" 20#include "lj_ff.h"
21#include "lj_strfmt.h"
20#if LJ_HASJIT 22#if LJ_HASJIT
21#include "lj_jit.h" 23#include "lj_jit.h"
22#endif 24#endif
@@ -25,6 +27,9 @@
25#endif 27#endif
26#include "lj_trace.h" 28#include "lj_trace.h"
27#include "lj_dispatch.h" 29#include "lj_dispatch.h"
30#if LJ_HASPROFILE
31#include "lj_profile.h"
32#endif
28#include "lj_vm.h" 33#include "lj_vm.h"
29#include "luajit.h" 34#include "luajit.h"
30 35
@@ -37,6 +42,12 @@ LJ_STATIC_ASSERT(GG_NUM_ASMFF == FF_NUM_ASMFUNC);
37#include <math.h> 42#include <math.h>
38LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, 43LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L,
39 lua_State *co); 44 lua_State *co);
45#if !LJ_HASJIT
46#define lj_dispatch_stitch lj_dispatch_ins
47#endif
48#if !LJ_HASPROFILE
49#define lj_dispatch_profile lj_dispatch_ins
50#endif
40 51
41#define GOTFUNC(name) (ASMFunction)name, 52#define GOTFUNC(name) (ASMFunction)name,
42static const ASMFunction dispatch_got[] = { 53static const ASMFunction dispatch_got[] = {
@@ -64,7 +75,7 @@ void lj_dispatch_init(GG_State *GG)
64 for (i = 0; i < GG_NUM_ASMFF; i++) 75 for (i = 0; i < GG_NUM_ASMFF; i++)
65 GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0); 76 GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0);
66#if LJ_TARGET_MIPS 77#if LJ_TARGET_MIPS
67 memcpy(GG->got, dispatch_got, LJ_GOT__MAX*4); 78 memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *));
68#endif 79#endif
69} 80}
70 81
@@ -82,11 +93,12 @@ void lj_dispatch_init_hotcount(global_State *g)
82#endif 93#endif
83 94
84/* Internal dispatch mode bits. */ 95/* Internal dispatch mode bits. */
85#define DISPMODE_JIT 0x01 /* JIT compiler on. */ 96#define DISPMODE_CALL 0x01 /* Override call dispatch. */
86#define DISPMODE_REC 0x02 /* Recording active. */ 97#define DISPMODE_RET 0x02 /* Override return dispatch. */
87#define DISPMODE_INS 0x04 /* Override instruction dispatch. */ 98#define DISPMODE_INS 0x04 /* Override instruction dispatch. */
88#define DISPMODE_CALL 0x08 /* Override call dispatch. */ 99#define DISPMODE_JIT 0x10 /* JIT compiler on. */
89#define DISPMODE_RET 0x10 /* Override return dispatch. */ 100#define DISPMODE_REC 0x20 /* Recording active. */
101#define DISPMODE_PROF 0x40 /* Profiling active. */
90 102
91/* Update dispatch table depending on various flags. */ 103/* Update dispatch table depending on various flags. */
92void lj_dispatch_update(global_State *g) 104void lj_dispatch_update(global_State *g)
@@ -98,6 +110,9 @@ void lj_dispatch_update(global_State *g)
98 mode |= G2J(g)->state != LJ_TRACE_IDLE ? 110 mode |= G2J(g)->state != LJ_TRACE_IDLE ?
99 (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0; 111 (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0;
100#endif 112#endif
113#if LJ_HASPROFILE
114 mode |= (g->hookmask & HOOK_PROFILE) ? (DISPMODE_PROF|DISPMODE_INS) : 0;
115#endif
101 mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0; 116 mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0;
102 mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0; 117 mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0;
103 mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0; 118 mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0;
@@ -126,9 +141,9 @@ void lj_dispatch_update(global_State *g)
126 disp[GG_LEN_DDISP+BC_LOOP] = f_loop; 141 disp[GG_LEN_DDISP+BC_LOOP] = f_loop;
127 142
128 /* Set dynamic instruction dispatch. */ 143 /* Set dynamic instruction dispatch. */
129 if ((oldmode ^ mode) & (DISPMODE_REC|DISPMODE_INS)) { 144 if ((oldmode ^ mode) & (DISPMODE_PROF|DISPMODE_REC|DISPMODE_INS)) {
130 /* Need to update the whole table. */ 145 /* Need to update the whole table. */
131 if (!(mode & (DISPMODE_REC|DISPMODE_INS))) { /* No ins dispatch? */ 146 if (!(mode & DISPMODE_INS)) { /* No ins dispatch? */
132 /* Copy static dispatch table to dynamic dispatch table. */ 147 /* Copy static dispatch table to dynamic dispatch table. */
133 memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction)); 148 memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction));
134 /* Overwrite with dynamic return dispatch. */ 149 /* Overwrite with dynamic return dispatch. */
@@ -140,12 +155,13 @@ void lj_dispatch_update(global_State *g)
140 } 155 }
141 } else { 156 } else {
142 /* The recording dispatch also checks for hooks. */ 157 /* The recording dispatch also checks for hooks. */
143 ASMFunction f = (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook; 158 ASMFunction f = (mode & DISPMODE_PROF) ? lj_vm_profhook :
159 (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook;
144 uint32_t i; 160 uint32_t i;
145 for (i = 0; i < GG_LEN_SDISP; i++) 161 for (i = 0; i < GG_LEN_SDISP; i++)
146 disp[i] = f; 162 disp[i] = f;
147 } 163 }
148 } else if (!(mode & (DISPMODE_REC|DISPMODE_INS))) { 164 } else if (!(mode & DISPMODE_INS)) {
149 /* Otherwise set dynamic counting ins. */ 165 /* Otherwise set dynamic counting ins. */
150 disp[BC_FORL] = f_forl; 166 disp[BC_FORL] = f_forl;
151 disp[BC_ITERL] = f_iterl; 167 disp[BC_ITERL] = f_iterl;
@@ -236,22 +252,15 @@ int luaJIT_setmode(lua_State *L, int idx, int mode)
236 } else { 252 } else {
237 if (!(mode & LUAJIT_MODE_ON)) 253 if (!(mode & LUAJIT_MODE_ON))
238 G2J(g)->flags &= ~(uint32_t)JIT_F_ON; 254 G2J(g)->flags &= ~(uint32_t)JIT_F_ON;
239#if LJ_TARGET_X86ORX64
240 else if ((G2J(g)->flags & JIT_F_SSE2))
241 G2J(g)->flags |= (uint32_t)JIT_F_ON;
242 else
243 return 0; /* Don't turn on JIT compiler without SSE2 support. */
244#else
245 else 255 else
246 G2J(g)->flags |= (uint32_t)JIT_F_ON; 256 G2J(g)->flags |= (uint32_t)JIT_F_ON;
247#endif
248 lj_dispatch_update(g); 257 lj_dispatch_update(g);
249 } 258 }
250 break; 259 break;
251 case LUAJIT_MODE_FUNC: 260 case LUAJIT_MODE_FUNC:
252 case LUAJIT_MODE_ALLFUNC: 261 case LUAJIT_MODE_ALLFUNC:
253 case LUAJIT_MODE_ALLSUBFUNC: { 262 case LUAJIT_MODE_ALLSUBFUNC: {
254 cTValue *tv = idx == 0 ? frame_prev(L->base-1) : 263 cTValue *tv = idx == 0 ? frame_prev(L->base-1)-LJ_FR2 :
255 idx > 0 ? L->base + (idx-1) : L->top + idx; 264 idx > 0 ? L->base + (idx-1) : L->top + idx;
256 GCproto *pt; 265 GCproto *pt;
257 if ((idx == 0 || tvisfunc(tv)) && isluafunc(&gcval(tv)->fn)) 266 if ((idx == 0 || tvisfunc(tv)) && isluafunc(&gcval(tv)->fn))
@@ -286,7 +295,7 @@ int luaJIT_setmode(lua_State *L, int idx, int mode)
286 if (idx != 0) { 295 if (idx != 0) {
287 cTValue *tv = idx > 0 ? L->base + (idx-1) : L->top + idx; 296 cTValue *tv = idx > 0 ? L->base + (idx-1) : L->top + idx;
288 if (tvislightud(tv)) 297 if (tvislightud(tv))
289 g->wrapf = (lua_CFunction)lightudV(tv); 298 g->wrapf = (lua_CFunction)lightudV(g, tv);
290 else 299 else
291 return 0; /* Failed. */ 300 return 0; /* Failed. */
292 } else { 301 } else {
@@ -352,10 +361,19 @@ static void callhook(lua_State *L, int event, BCLine line)
352 /* Top frame, nextframe = NULL. */ 361 /* Top frame, nextframe = NULL. */
353 ar.i_ci = (int)((L->base-1) - tvref(L->stack)); 362 ar.i_ci = (int)((L->base-1) - tvref(L->stack));
354 lj_state_checkstack(L, 1+LUA_MINSTACK); 363 lj_state_checkstack(L, 1+LUA_MINSTACK);
364#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF
365 lj_profile_hook_enter(g);
366#else
355 hook_enter(g); 367 hook_enter(g);
368#endif
356 hookf(L, &ar); 369 hookf(L, &ar);
357 lua_assert(hook_active(g)); 370 lj_assertG(hook_active(g), "active hook flag removed");
371 setgcref(g->cur_L, obj2gco(L));
372#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF
373 lj_profile_hook_leave(g);
374#else
358 hook_leave(g); 375 hook_leave(g);
376#endif
359 } 377 }
360} 378}
361 379
@@ -368,7 +386,7 @@ static BCReg cur_topslot(GCproto *pt, const BCIns *pc, uint32_t nres)
368 if (bc_op(ins) == BC_UCLO) 386 if (bc_op(ins) == BC_UCLO)
369 ins = pc[bc_j(ins)]; 387 ins = pc[bc_j(ins)];
370 switch (bc_op(ins)) { 388 switch (bc_op(ins)) {
371 case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1; 389 case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1+LJ_FR2;
372 case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1; 390 case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1;
373 case BC_TSETM: return bc_a(ins) + nres-1; 391 case BC_TSETM: return bc_a(ins) + nres-1;
374 default: return pt->framesize; 392 default: return pt->framesize;
@@ -397,7 +415,8 @@ void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc)
397#endif 415#endif
398 J->L = L; 416 J->L = L;
399 lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */ 417 lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */
400 lua_assert(L->top - L->base == delta); 418 lj_assertG(L->top - L->base == delta,
419 "unbalanced stack after tracing of instruction");
401 } 420 }
402 } 421 }
403#endif 422#endif
@@ -457,7 +476,8 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc)
457#endif 476#endif
458 pc = (const BCIns *)((uintptr_t)pc & ~(uintptr_t)1); 477 pc = (const BCIns *)((uintptr_t)pc & ~(uintptr_t)1);
459 lj_trace_hot(J, pc); 478 lj_trace_hot(J, pc);
460 lua_assert(L->top - L->base == delta); 479 lj_assertG(L->top - L->base == delta,
480 "unbalanced stack after hot call");
461 goto out; 481 goto out;
462 } else if (J->state != LJ_TRACE_IDLE && 482 } else if (J->state != LJ_TRACE_IDLE &&
463 !(g->hookmask & (HOOK_GC|HOOK_VMEVENT))) { 483 !(g->hookmask & (HOOK_GC|HOOK_VMEVENT))) {
@@ -466,7 +486,8 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc)
466#endif 486#endif
467 /* Record the FUNC* bytecodes, too. */ 487 /* Record the FUNC* bytecodes, too. */
468 lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */ 488 lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */
469 lua_assert(L->top - L->base == delta); 489 lj_assertG(L->top - L->base == delta,
490 "unbalanced stack after hot instruction");
470 } 491 }
471#endif 492#endif
472 if ((g->hookmask & LUA_MASKCALL)) { 493 if ((g->hookmask & LUA_MASKCALL)) {
@@ -492,3 +513,41 @@ out:
492 return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */ 513 return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */
493} 514}
494 515
516#if LJ_HASJIT
517/* Stitch a new trace. */
518void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc)
519{
520 ERRNO_SAVE
521 lua_State *L = J->L;
522 void *cf = cframe_raw(L->cframe);
523 const BCIns *oldpc = cframe_pc(cf);
524 setcframe_pc(cf, pc);
525 /* Before dispatch, have to bias PC by 1. */
526 L->top = L->base + cur_topslot(curr_proto(L), pc+1, cframe_multres_n(cf));
527 lj_trace_stitch(J, pc-1); /* Point to the CALL instruction. */
528 setcframe_pc(cf, oldpc);
529 ERRNO_RESTORE
530}
531#endif
532
533#if LJ_HASPROFILE
534/* Profile dispatch. */
535void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc)
536{
537 ERRNO_SAVE
538 GCfunc *fn = curr_func(L);
539 GCproto *pt = funcproto(fn);
540 void *cf = cframe_raw(L->cframe);
541 const BCIns *oldpc = cframe_pc(cf);
542 global_State *g;
543 setcframe_pc(cf, pc);
544 L->top = L->base + cur_topslot(pt, pc, cframe_multres_n(cf));
545 lj_profile_interpreter(L);
546 setcframe_pc(cf, oldpc);
547 g = G(L);
548 setgcref(g->cur_L, obj2gco(L));
549 setvmstate(g, INTERP);
550 ERRNO_RESTORE
551}
552#endif
553
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
index a6eaf6a9..2331bd42 100644
--- a/src/lj_dispatch.h
+++ b/src/lj_dispatch.h
@@ -14,8 +14,24 @@
14 14
15#if LJ_TARGET_MIPS 15#if LJ_TARGET_MIPS
16/* Need our own global offset table for the dreaded MIPS calling conventions. */ 16/* Need our own global offset table for the dreaded MIPS calling conventions. */
17
18#ifndef _LJ_VM_H
19LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b);
20#endif
21
22#if LJ_SOFTFP
23#ifndef _LJ_IRCALL_H
24extern double __adddf3(double a, double b);
25extern double __subdf3(double a, double b);
26extern double __muldf3(double a, double b);
27extern double __divdf3(double a, double b);
28#endif
29#define SFGOTDEF(_) _(sqrt) _(__adddf3) _(__subdf3) _(__muldf3) _(__divdf3)
30#else
31#define SFGOTDEF(_)
32#endif
17#if LJ_HASJIT 33#if LJ_HASJIT
18#define JITGOTDEF(_) _(lj_trace_exit) _(lj_trace_hot) 34#define JITGOTDEF(_) _(lj_err_trace) _(lj_trace_exit) _(lj_trace_hot)
19#else 35#else
20#define JITGOTDEF(_) 36#define JITGOTDEF(_)
21#endif 37#endif
@@ -28,16 +44,19 @@
28#define GOTDEF(_) \ 44#define GOTDEF(_) \
29 _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ 45 _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \
30 _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ 46 _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
31 _(pow) _(fmod) _(ldexp) \ 47 _(pow) _(fmod) _(ldexp) _(lj_vm_modi) \
32 _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_err_throw) _(lj_err_run) \ 48 _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \
49 _(lj_dispatch_profile) _(lj_err_throw) \
33 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ 50 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
34 _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ 51 _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \
35 _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ 52 _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \
36 _(lj_meta_for) _(lj_meta_len) _(lj_meta_tget) _(lj_meta_tset) \ 53 _(lj_meta_for) _(lj_meta_istype) _(lj_meta_len) _(lj_meta_tget) \
37 _(lj_state_growstack) _(lj_str_fromnum) _(lj_str_fromnumber) _(lj_str_new) \ 54 _(lj_meta_tset) _(lj_state_growstack) _(lj_strfmt_number) \
38 _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) _(lj_tab_new) \ 55 _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \
39 _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ 56 _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \
40 JITGOTDEF(_) FFIGOTDEF(_) 57 _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \
58 _(lj_buf_putstr_upper) _(lj_buf_tostr) \
59 JITGOTDEF(_) FFIGOTDEF(_) SFGOTDEF(_)
41 60
42enum { 61enum {
43#define GOTENUM(name) LJ_GOT_##name, 62#define GOTENUM(name) LJ_GOT_##name,
@@ -60,7 +79,7 @@ typedef uint16_t HotCount;
60#define HOTCOUNT_CALL 1 79#define HOTCOUNT_CALL 1
61 80
62/* This solves a circular dependency problem -- bump as needed. Sigh. */ 81/* This solves a circular dependency problem -- bump as needed. Sigh. */
63#define GG_NUM_ASMFF 62 82#define GG_NUM_ASMFF 57
64 83
65#define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF) 84#define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF)
66#define GG_LEN_SDISP BC_FUNCF 85#define GG_LEN_SDISP BC_FUNCF
@@ -96,6 +115,7 @@ typedef struct GG_State {
96#define J2G(J) (&J2GG(J)->g) 115#define J2G(J) (&J2GG(J)->g)
97#define G2J(gl) (&G2GG(gl)->J) 116#define G2J(gl) (&G2GG(gl)->J)
98#define L2J(L) (&L2GG(L)->J) 117#define L2J(L) (&L2GG(L)->J)
118#define GG_G2J (GG_OFS(J) - GG_OFS(g))
99#define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g)) 119#define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g))
100#define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch)) 120#define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch))
101#define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch)) 121#define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch))
@@ -117,7 +137,12 @@ LJ_FUNC void lj_dispatch_update(global_State *g);
117/* Instruction dispatch callback for hooks or when recording. */ 137/* Instruction dispatch callback for hooks or when recording. */
118LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc); 138LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc);
119LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc); 139LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc);
120LJ_FUNCA void LJ_FASTCALL lj_dispatch_return(lua_State *L, const BCIns *pc); 140#if LJ_HASJIT
141LJ_FUNCA void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc);
142#endif
143#if LJ_HASPROFILE
144LJ_FUNCA void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc);
145#endif
121 146
122#if LJ_HASFFI && !defined(_BUILDVM_H) 147#if LJ_HASFFI && !defined(_BUILDVM_H)
123/* Save/restore errno and GetLastError() around hooks, exits and recording. */ 148/* Save/restore errno and GetLastError() around hooks, exits and recording. */
diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h
index 224e1981..615e4c3a 100644
--- a/src/lj_emit_arm.h
+++ b/src/lj_emit_arm.h
@@ -81,7 +81,8 @@ static void emit_m(ASMState *as, ARMIns ai, Reg rm)
81 81
82static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) 82static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
83{ 83{
84 lua_assert(ofs >= -255 && ofs <= 255); 84 lj_assertA(ofs >= -255 && ofs <= 255,
85 "load/store offset %d out of range", ofs);
85 if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; 86 if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U;
86 *--as->mcp = ai | ARMI_LS_P | ARMI_LSX_I | ARMF_D(rd) | ARMF_N(rn) | 87 *--as->mcp = ai | ARMI_LS_P | ARMI_LSX_I | ARMF_D(rd) | ARMF_N(rn) |
87 ((ofs & 0xf0) << 4) | (ofs & 0x0f); 88 ((ofs & 0xf0) << 4) | (ofs & 0x0f);
@@ -89,7 +90,8 @@ static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
89 90
90static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) 91static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
91{ 92{
92 lua_assert(ofs >= -4095 && ofs <= 4095); 93 lj_assertA(ofs >= -4095 && ofs <= 4095,
94 "load/store offset %d out of range", ofs);
93 /* Combine LDR/STR pairs to LDRD/STRD. */ 95 /* Combine LDR/STR pairs to LDRD/STRD. */
94 if (*as->mcp == (ai|ARMI_LS_P|ARMI_LS_U|ARMF_D(rd^1)|ARMF_N(rn)|(ofs^4)) && 96 if (*as->mcp == (ai|ARMI_LS_P|ARMI_LS_U|ARMF_D(rd^1)|ARMF_N(rn)|(ofs^4)) &&
95 (ai & ~(ARMI_LDR^ARMI_STR)) == ARMI_STR && rd != rn && 97 (ai & ~(ARMI_LDR^ARMI_STR)) == ARMI_STR && rd != rn &&
@@ -106,7 +108,8 @@ static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
106#if !LJ_SOFTFP 108#if !LJ_SOFTFP
107static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) 109static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
108{ 110{
109 lua_assert(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0); 111 lj_assertA(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0,
112 "load/store offset %d out of range", ofs);
110 if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; 113 if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U;
111 *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2); 114 *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2);
112} 115}
@@ -124,7 +127,7 @@ static int emit_kdelta1(ASMState *as, Reg d, int32_t i)
124 while (work) { 127 while (work) {
125 Reg r = rset_picktop(work); 128 Reg r = rset_picktop(work);
126 IRRef ref = regcost_ref(as->cost[r]); 129 IRRef ref = regcost_ref(as->cost[r]);
127 lua_assert(r != d); 130 lj_assertA(r != d, "dest reg not free");
128 if (emit_canremat(ref)) { 131 if (emit_canremat(ref)) {
129 int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); 132 int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i);
130 uint32_t k = emit_isk12(ARMI_ADD, delta); 133 uint32_t k = emit_isk12(ARMI_ADD, delta);
@@ -142,13 +145,13 @@ static int emit_kdelta1(ASMState *as, Reg d, int32_t i)
142} 145}
143 146
144/* Try to find a two step delta relative to another constant. */ 147/* Try to find a two step delta relative to another constant. */
145static int emit_kdelta2(ASMState *as, Reg d, int32_t i) 148static int emit_kdelta2(ASMState *as, Reg rd, int32_t i)
146{ 149{
147 RegSet work = ~as->freeset & RSET_GPR; 150 RegSet work = ~as->freeset & RSET_GPR;
148 while (work) { 151 while (work) {
149 Reg r = rset_picktop(work); 152 Reg r = rset_picktop(work);
150 IRRef ref = regcost_ref(as->cost[r]); 153 IRRef ref = regcost_ref(as->cost[r]);
151 lua_assert(r != d); 154 lj_assertA(r != rd, "dest reg %d not free", rd);
152 if (emit_canremat(ref)) { 155 if (emit_canremat(ref)) {
153 int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i; 156 int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i;
154 if (other) { 157 if (other) {
@@ -159,8 +162,8 @@ static int emit_kdelta2(ASMState *as, Reg d, int32_t i)
159 k2 = emit_isk12(0, delta & (255 << sh)); 162 k2 = emit_isk12(0, delta & (255 << sh));
160 k = emit_isk12(0, delta & ~(255 << sh)); 163 k = emit_isk12(0, delta & ~(255 << sh));
161 if (k) { 164 if (k) {
162 emit_dn(as, ARMI_ADD^k2^inv, d, d); 165 emit_dn(as, ARMI_ADD^k2^inv, rd, rd);
163 emit_dn(as, ARMI_ADD^k^inv, d, r); 166 emit_dn(as, ARMI_ADD^k^inv, rd, r);
164 return 1; 167 return 1;
165 } 168 }
166 } 169 }
@@ -171,23 +174,24 @@ static int emit_kdelta2(ASMState *as, Reg d, int32_t i)
171} 174}
172 175
173/* Load a 32 bit constant into a GPR. */ 176/* Load a 32 bit constant into a GPR. */
174static void emit_loadi(ASMState *as, Reg r, int32_t i) 177static void emit_loadi(ASMState *as, Reg rd, int32_t i)
175{ 178{
176 uint32_t k = emit_isk12(ARMI_MOV, i); 179 uint32_t k = emit_isk12(ARMI_MOV, i);
177 lua_assert(rset_test(as->freeset, r) || r == RID_TMP); 180 lj_assertA(rset_test(as->freeset, rd) || rd == RID_TMP,
181 "dest reg %d not free", rd);
178 if (k) { 182 if (k) {
179 /* Standard K12 constant. */ 183 /* Standard K12 constant. */
180 emit_d(as, ARMI_MOV^k, r); 184 emit_d(as, ARMI_MOV^k, rd);
181 } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) { 185 } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) {
182 /* 16 bit loword constant for ARMv6T2. */ 186 /* 16 bit loword constant for ARMv6T2. */
183 emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r); 187 emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd);
184 } else if (emit_kdelta1(as, r, i)) { 188 } else if (emit_kdelta1(as, rd, i)) {
185 /* One step delta relative to another constant. */ 189 /* One step delta relative to another constant. */
186 } else if ((as->flags & JIT_F_ARMV6T2)) { 190 } else if ((as->flags & JIT_F_ARMV6T2)) {
187 /* 32 bit hiword/loword constant for ARMv6T2. */ 191 /* 32 bit hiword/loword constant for ARMv6T2. */
188 emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), r); 192 emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), rd);
189 emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r); 193 emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd);
190 } else if (emit_kdelta2(as, r, i)) { 194 } else if (emit_kdelta2(as, rd, i)) {
191 /* Two step delta relative to another constant. */ 195 /* Two step delta relative to another constant. */
192 } else { 196 } else {
193 /* Otherwise construct the constant with up to 4 instructions. */ 197 /* Otherwise construct the constant with up to 4 instructions. */
@@ -197,17 +201,17 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
197 int32_t m = i & (255 << sh); 201 int32_t m = i & (255 << sh);
198 i &= ~(255 << sh); 202 i &= ~(255 << sh);
199 if (i == 0) { 203 if (i == 0) {
200 emit_d(as, ARMI_MOV ^ emit_isk12(0, m), r); 204 emit_d(as, ARMI_MOV ^ emit_isk12(0, m), rd);
201 break; 205 break;
202 } 206 }
203 emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), r, r); 207 emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), rd, rd);
204 } 208 }
205 } 209 }
206} 210}
207 211
208#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) 212#define emit_loada(as, rd, addr) emit_loadi(as, (rd), i32ptr((addr)))
209 213
210static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); 214static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
211 215
212/* Get/set from constant pointer. */ 216/* Get/set from constant pointer. */
213static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) 217static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p)
@@ -219,8 +223,9 @@ static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p)
219 223
220#if !LJ_SOFTFP 224#if !LJ_SOFTFP
221/* Load a number constant into an FPR. */ 225/* Load a number constant into an FPR. */
222static void emit_loadn(ASMState *as, Reg r, cTValue *tv) 226static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
223{ 227{
228 cTValue *tv = ir_knum(ir);
224 int32_t i; 229 int32_t i;
225 if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) { 230 if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) {
226 uint32_t hi = tv->u32.hi; 231 uint32_t hi = tv->u32.hi;
@@ -260,7 +265,7 @@ static void emit_branch(ASMState *as, ARMIns ai, MCode *target)
260{ 265{
261 MCode *p = as->mcp; 266 MCode *p = as->mcp;
262 ptrdiff_t delta = (target - p) - 1; 267 ptrdiff_t delta = (target - p) - 1;
263 lua_assert(((delta + 0x00800000) >> 24) == 0); 268 lj_assertA(((delta + 0x00800000) >> 24) == 0, "branch target out of range");
264 *--p = ai | ((uint32_t)delta & 0x00ffffffu); 269 *--p = ai | ((uint32_t)delta & 0x00ffffffu);
265 as->mcp = p; 270 as->mcp = p;
266} 271}
@@ -288,7 +293,7 @@ static void emit_call(ASMState *as, void *target)
288static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) 293static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
289{ 294{
290#if LJ_SOFTFP 295#if LJ_SOFTFP
291 lua_assert(!irt_isnum(ir->t)); UNUSED(ir); 296 lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
292#else 297#else
293 if (dst >= RID_MAX_GPR) { 298 if (dst >= RID_MAX_GPR) {
294 emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S, 299 emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S,
@@ -308,30 +313,30 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
308 emit_dm(as, ARMI_MOV, dst, src); 313 emit_dm(as, ARMI_MOV, dst, src);
309} 314}
310 315
311/* Generic load of register from stack slot. */ 316/* Generic load of register with base and (small) offset address. */
312static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 317static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
313{ 318{
314#if LJ_SOFTFP 319#if LJ_SOFTFP
315 lua_assert(!irt_isnum(ir->t)); UNUSED(ir); 320 lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
316#else 321#else
317 if (r >= RID_MAX_GPR) 322 if (r >= RID_MAX_GPR)
318 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, RID_SP, ofs); 323 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs);
319 else 324 else
320#endif 325#endif
321 emit_lso(as, ARMI_LDR, r, RID_SP, ofs); 326 emit_lso(as, ARMI_LDR, r, base, ofs);
322} 327}
323 328
324/* Generic store of register to stack slot. */ 329/* Generic store of register with base and (small) offset address. */
325static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 330static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
326{ 331{
327#if LJ_SOFTFP 332#if LJ_SOFTFP
328 lua_assert(!irt_isnum(ir->t)); UNUSED(ir); 333 lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
329#else 334#else
330 if (r >= RID_MAX_GPR) 335 if (r >= RID_MAX_GPR)
331 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, RID_SP, ofs); 336 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs);
332 else 337 else
333#endif 338#endif
334 emit_lso(as, ARMI_STR, r, RID_SP, ofs); 339 emit_lso(as, ARMI_STR, r, base, ofs);
335} 340}
336 341
337/* Emit an arithmetic/logic operation with a constant operand. */ 342/* Emit an arithmetic/logic operation with a constant operand. */
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h
new file mode 100644
index 00000000..00086e8a
--- /dev/null
+++ b/src/lj_emit_arm64.h
@@ -0,0 +1,424 @@
1/*
2** ARM64 instruction emitter.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4**
5** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
6** Sponsored by Cisco Systems, Inc.
7*/
8
9/* -- Constant encoding --------------------------------------------------- */
10
11static uint64_t get_k64val(ASMState *as, IRRef ref)
12{
13 IRIns *ir = IR(ref);
14 if (ir->o == IR_KINT64) {
15 return ir_kint64(ir)->u64;
16 } else if (ir->o == IR_KGC) {
17 return (uint64_t)ir_kgc(ir);
18 } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
19 return (uint64_t)ir_kptr(ir);
20 } else {
21 lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL,
22 "bad 64 bit const IR op %d", ir->o);
23 return ir->i; /* Sign-extended. */
24 }
25}
26
27/* Encode constant in K12 format for data processing instructions. */
28static uint32_t emit_isk12(int64_t n)
29{
30 uint64_t k = (n < 0) ? -n : n;
31 uint32_t m = (n < 0) ? 0x40000000 : 0;
32 if (k < 0x1000) {
33 return A64I_K12|m|A64F_U12(k);
34 } else if ((k & 0xfff000) == k) {
35 return A64I_K12|m|0x400000|A64F_U12(k>>12);
36 }
37 return 0;
38}
39
40#define emit_clz64(n) __builtin_clzll(n)
41#define emit_ctz64(n) __builtin_ctzll(n)
42
43/* Encode constant in K13 format for logical data processing instructions. */
44static uint32_t emit_isk13(uint64_t n, int is64)
45{
46 int inv = 0, w = 128, lz, tz;
47 if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */
48 if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */
49 do { /* Find the repeat width. */
50 if (is64 && (uint32_t)(n^(n>>32))) break;
51 n = (uint32_t)n;
52 if (!n) return 0; /* Ditto when passing n=0xffffffff and is64=0. */
53 w = 32; if ((n^(n>>16)) & 0xffff) break;
54 n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break;
55 n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break;
56 n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break;
57 n = n & 0x3; w = 2;
58 } while (0);
59 lz = emit_clz64(n);
60 tz = emit_ctz64(n);
61 if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */
62 if (inv)
63 return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10);
64 else
65 return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10);
66}
67
68static uint32_t emit_isfpk64(uint64_t n)
69{
70 uint64_t etop9 = ((n >> 54) & 0x1ff);
71 if ((n << 16) == 0 && (etop9 == 0x100 || etop9 == 0x0ff)) {
72 return (uint32_t)(((n >> 48) & 0x7f) | ((n >> 56) & 0x80));
73 }
74 return ~0u;
75}
76
77/* -- Emit basic instructions --------------------------------------------- */
78
79static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra)
80{
81 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm) | A64F_A(ra);
82}
83
84static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm)
85{
86 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm);
87}
88
89static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm)
90{
91 *--as->mcp = ai | A64F_D(rd) | A64F_M(rm);
92}
93
94static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn)
95{
96 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn);
97}
98
99static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm)
100{
101 *--as->mcp = ai | A64F_N(rn) | A64F_M(rm);
102}
103
104static void emit_d(ASMState *as, A64Ins ai, Reg rd)
105{
106 *--as->mcp = ai | A64F_D(rd);
107}
108
109static void emit_n(ASMState *as, A64Ins ai, Reg rn)
110{
111 *--as->mcp = ai | A64F_N(rn);
112}
113
114static int emit_checkofs(A64Ins ai, int64_t ofs)
115{
116 int scale = (ai >> 30) & 3;
117 if (ofs < 0 || (ofs & ((1<<scale)-1))) {
118 return (ofs >= -256 && ofs <= 255) ? -1 : 0;
119 } else {
120 return (ofs < (4096<<scale)) ? 1 : 0;
121 }
122}
123
124static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs)
125{
126 int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3;
127 lj_assertA(ot, "load/store offset %d out of range", ofs);
128 /* Combine LDR/STR pairs to LDP/STP. */
129 if ((sc == 2 || sc == 3) &&
130 (!(ai & 0x400000) || rd != rn) &&
131 as->mcp != as->mcloop) {
132 uint32_t prev = *as->mcp & ~A64F_D(31);
133 int ofsm = ofs - (1<<sc), ofsp = ofs + (1<<sc);
134 A64Ins aip;
135 if (prev == (ai | A64F_N(rn) | A64F_U12(ofsm>>sc)) ||
136 prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) {
137 aip = (A64F_A(rd) | A64F_D(*as->mcp & 31));
138 } else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) ||
139 prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) {
140 aip = (A64F_D(rd) | A64F_A(*as->mcp & 31));
141 ofsm = ofs;
142 } else {
143 goto nopair;
144 }
145 if (ofsm >= (int)((unsigned int)-64<<sc) && ofsm <= (63<<sc)) {
146 *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) |
147 (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000));
148 return;
149 }
150 }
151nopair:
152 if (ot == 1)
153 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_U12(ofs >> sc);
154 else
155 *--as->mcp = (ai^A64I_LS_U) | A64F_D(rd) | A64F_N(rn) | A64F_S9(ofs & 0x1ff);
156}
157
158/* -- Emit loads/stores --------------------------------------------------- */
159
160/* Prefer rematerialization of BASE/L from global_State over spills. */
161#define emit_canremat(ref) ((ref) <= ASMREF_L)
162
163/* Try to find an N-step delta relative to other consts with N < lim. */
164static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
165{
166 RegSet work = (~as->freeset & RSET_GPR) | RID2RSET(RID_GL);
167 if (lim <= 1) return 0; /* Can't beat that. */
168 while (work) {
169 Reg r = rset_picktop(work);
170 IRRef ref = regcost_ref(as->cost[r]);
171 lj_assertA(r != rd, "dest reg %d not free", rd);
172 if (ref < REF_TRUE) {
173 uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) :
174 get_k64val(as, ref);
175 int64_t delta = (int64_t)(k - kx);
176 if (delta == 0) {
177 emit_dm(as, A64I_MOVx, rd, r);
178 return 1;
179 } else {
180 uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta);
181 if (k12) {
182 emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r);
183 return 1;
184 }
185 /* Do other ops or multi-step deltas pay off? Probably not.
186 ** E.g. XOR rarely helps with pointer consts.
187 */
188 }
189 }
190 rset_clear(work, r);
191 }
192 return 0; /* Failed. */
193}
194
195static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64)
196{
197 int i, zeros = 0, ones = 0, neg;
198 if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */
199 /* Count homogeneous 16 bit fragments. */
200 for (i = 0; i < 4; i++) {
201 uint64_t frag = (u64 >> i*16) & 0xffff;
202 zeros += (frag == 0);
203 ones += (frag == 0xffff);
204 }
205 neg = ones > zeros; /* Use MOVN if it pays off. */
206 if ((neg ? ones : zeros) < 3) { /* Need 2+ ins. Try shorter K13 encoding. */
207 uint32_t k13 = emit_isk13(u64, is64);
208 if (k13) {
209 emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
210 return;
211 }
212 }
213 if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) {
214 int shift = 0, lshift = 0;
215 uint64_t n64 = neg ? ~u64 : u64;
216 if (n64 != 0) {
217 /* Find first/last fragment to be filled. */
218 shift = (63-emit_clz64(n64)) & ~15;
219 lshift = emit_ctz64(n64) & ~15;
220 }
221 /* MOVK requires the original value (u64). */
222 while (shift > lshift) {
223 uint32_t u16 = (u64 >> shift) & 0xffff;
224 /* Skip fragments that are correctly filled by MOVN/MOVZ. */
225 if (u16 != (neg ? 0xffff : 0))
226 emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd);
227 shift -= 16;
228 }
229 /* But MOVN needs an inverted value (n64). */
230 emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) |
231 A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
232 }
233}
234
235/* Load a 32 bit constant into a GPR. */
236#define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0)
237
238/* Load a 64 bit constant into a GPR. */
239#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X)
240
241#define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr))
242
243#define glofs(as, k) \
244 ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g))
245#define mcpofs(as, k) \
246 ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1)))
247#define checkmcpofs(as, k) \
248 (A64F_S_OK(mcpofs(as, k)>>2, 19))
249
250static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
251
252/* Get/set from constant pointer. */
253static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p)
254{
255 /* First, check if ip + offset is in range. */
256 if ((ai & 0x00400000) && checkmcpofs(as, p)) {
257 emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r);
258 } else {
259 Reg base = RID_GL; /* Next, try GL + offset. */
260 int64_t ofs = glofs(as, p);
261 if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */
262 int64_t i64 = i64ptr(p);
263 base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r));
264 ofs = i64 & 0x7fffull;
265 }
266 emit_lso(as, ai, r, base, ofs);
267 }
268}
269
270/* Load 64 bit IR constant into register. */
271static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
272{
273 const uint64_t *k = &ir_k64(ir)->u64;
274 int64_t ofs;
275 if (r >= RID_MAX_GPR) {
276 uint32_t fpk = emit_isfpk64(*k);
277 if (fpk != ~0u) {
278 emit_d(as, A64I_FMOV_DI | A64F_FP8(fpk), (r & 31));
279 return;
280 }
281 }
282 ofs = glofs(as, k);
283 if (emit_checkofs(A64I_LDRx, ofs)) {
284 emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx,
285 (r & 31), RID_GL, ofs);
286 } else {
287 if (r >= RID_MAX_GPR) {
288 emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP);
289 r = RID_TMP;
290 }
291 if (checkmcpofs(as, k))
292 emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r);
293 else
294 emit_loadu64(as, r, *k);
295 }
296}
297
298/* Get/set global_State fields. */
299#define emit_getgl(as, r, field) \
300 emit_lsptr(as, A64I_LDRx, (r), (void *)&J2G(as->J)->field)
301#define emit_setgl(as, r, field) \
302 emit_lsptr(as, A64I_STRx, (r), (void *)&J2G(as->J)->field)
303
304/* Trace number is determined from pc of exit instruction. */
305#define emit_setvmstate(as, i) UNUSED(i)
306
307/* -- Emit control-flow instructions -------------------------------------- */
308
309/* Label for internal jumps. */
310typedef MCode *MCLabel;
311
312/* Return label pointing to current PC. */
313#define emit_label(as) ((as)->mcp)
314
315static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target)
316{
317 MCode *p = --as->mcp;
318 ptrdiff_t delta = target - p;
319 lj_assertA(A64F_S_OK(delta, 19), "branch target out of range");
320 *p = A64I_BCC | A64F_S19(delta) | cond;
321}
322
323static void emit_branch(ASMState *as, A64Ins ai, MCode *target)
324{
325 MCode *p = --as->mcp;
326 ptrdiff_t delta = target - p;
327 lj_assertA(A64F_S_OK(delta, 26), "branch target out of range");
328 *p = ai | A64F_S26(delta);
329}
330
331static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target)
332{
333 MCode *p = --as->mcp;
334 ptrdiff_t delta = target - p;
335 lj_assertA(bit < 63, "bit number out of range");
336 lj_assertA(A64F_S_OK(delta, 14), "branch target out of range");
337 if (bit > 31) ai |= A64I_X;
338 *p = ai | A64F_BIT(bit & 31) | A64F_S14(delta) | r;
339}
340
341static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target)
342{
343 MCode *p = --as->mcp;
344 ptrdiff_t delta = target - p;
345 lj_assertA(A64F_S_OK(delta, 19), "branch target out of range");
346 *p = ai | A64F_S19(delta) | r;
347}
348
349#define emit_jmp(as, target) emit_branch(as, A64I_B, (target))
350
351static void emit_call(ASMState *as, void *target)
352{
353 MCode *p = --as->mcp;
354 ptrdiff_t delta = (char *)target - (char *)p;
355 if (A64F_S_OK(delta>>2, 26)) {
356 *p = A64I_BL | A64F_S26(delta>>2);
357 } else { /* Target out of range: need indirect call. But don't use R0-R7. */
358 Reg r = ra_allock(as, i64ptr(target),
359 RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
360 *p = A64I_BLR | A64F_N(r);
361 }
362}
363
364/* -- Emit generic operations --------------------------------------------- */
365
366/* Generic move between two regs. */
367static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
368{
369 if (dst >= RID_MAX_GPR) {
370 emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D : A64I_FMOV_S,
371 (dst & 31), (src & 31));
372 return;
373 }
374 if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */
375 MCode ins = *as->mcp, swp = (src^dst);
376 if ((ins & 0xbf800000) == 0xb9000000) {
377 if (!((ins ^ (dst << 5)) & 0x000003e0))
378 *as->mcp = ins ^ (swp << 5); /* Swap N in load/store. */
379 if (!(ins & 0x00400000) && !((ins ^ dst) & 0x0000001f))
380 *as->mcp = ins ^ swp; /* Swap D in store. */
381 }
382 }
383 emit_dm(as, A64I_MOVx, dst, src);
384}
385
386/* Generic load of register with base and (small) offset address. */
387static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
388{
389 if (r >= RID_MAX_GPR)
390 emit_lso(as, irt_isnum(ir->t) ? A64I_LDRd : A64I_LDRs, (r & 31), base, ofs);
391 else
392 emit_lso(as, irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw, r, base, ofs);
393}
394
395/* Generic store of register with base and (small) offset address. */
396static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
397{
398 if (r >= RID_MAX_GPR)
399 emit_lso(as, irt_isnum(ir->t) ? A64I_STRd : A64I_STRs, (r & 31), base, ofs);
400 else
401 emit_lso(as, irt_is64(ir->t) ? A64I_STRx : A64I_STRw, r, base, ofs);
402}
403
404/* Emit an arithmetic operation with a constant operand. */
405static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src,
406 int32_t i, RegSet allow)
407{
408 uint32_t k = emit_isk12(i);
409 if (k)
410 emit_dn(as, ai^k, dest, src);
411 else
412 emit_dnm(as, ai, dest, src, ra_allock(as, i, allow));
413}
414
415/* Add offset to pointer. */
416static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
417{
418 if (ofs)
419 emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r,
420 ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r));
421}
422
423#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs))
424
diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h
index ff5a3fe2..c13615dd 100644
--- a/src/lj_emit_mips.h
+++ b/src/lj_emit_mips.h
@@ -3,6 +3,32 @@
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h 3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/ 4*/
5 5
6#if LJ_64
7static intptr_t get_k64val(ASMState *as, IRRef ref)
8{
9 IRIns *ir = IR(ref);
10 if (ir->o == IR_KINT64) {
11 return (intptr_t)ir_kint64(ir)->u64;
12 } else if (ir->o == IR_KGC) {
13 return (intptr_t)ir_kgc(ir);
14 } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
15 return (intptr_t)ir_kptr(ir);
16 } else if (LJ_SOFTFP && ir->o == IR_KNUM) {
17 return (intptr_t)ir_knum(ir)->u64;
18 } else {
19 lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL,
20 "bad 64 bit const IR op %d", ir->o);
21 return ir->i; /* Sign-extended. */
22 }
23}
24#endif
25
26#if LJ_64
27#define get_kval(as, ref) get_k64val(as, ref)
28#else
29#define get_kval(as, ref) (IR((ref))->i)
30#endif
31
6/* -- Emit basic instructions --------------------------------------------- */ 32/* -- Emit basic instructions --------------------------------------------- */
7 33
8static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt) 34static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt)
@@ -35,7 +61,7 @@ static void emit_fgh(ASMState *as, MIPSIns mi, Reg rf, Reg rg, Reg rh)
35 61
36static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) 62static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift)
37{ 63{
38 if ((as->flags & JIT_F_MIPS32R2)) { 64 if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) {
39 emit_dta(as, MIPSI_ROTR, dest, src, shift); 65 emit_dta(as, MIPSI_ROTR, dest, src, shift);
40 } else { 66 } else {
41 emit_dst(as, MIPSI_OR, dest, dest, tmp); 67 emit_dst(as, MIPSI_OR, dest, dest, tmp);
@@ -44,23 +70,32 @@ static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift)
44 } 70 }
45} 71}
46 72
73#if LJ_64 || LJ_HASBUFFER
74static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb,
75 uint32_t lsb)
76{
77 *--as->mcp = mi | MIPSF_T(rt) | MIPSF_S(rs) | MIPSF_M(msb) | MIPSF_L(lsb);
78}
79#endif
80
47/* -- Emit loads/stores --------------------------------------------------- */ 81/* -- Emit loads/stores --------------------------------------------------- */
48 82
49/* Prefer rematerialization of BASE/L from global_State over spills. */ 83/* Prefer rematerialization of BASE/L from global_State over spills. */
50#define emit_canremat(ref) ((ref) <= REF_BASE) 84#define emit_canremat(ref) ((ref) <= REF_BASE)
51 85
52/* Try to find a one step delta relative to another constant. */ 86/* Try to find a one step delta relative to another constant. */
53static int emit_kdelta1(ASMState *as, Reg t, int32_t i) 87static int emit_kdelta1(ASMState *as, Reg rd, intptr_t i)
54{ 88{
55 RegSet work = ~as->freeset & RSET_GPR; 89 RegSet work = ~as->freeset & RSET_GPR;
56 while (work) { 90 while (work) {
57 Reg r = rset_picktop(work); 91 Reg r = rset_picktop(work);
58 IRRef ref = regcost_ref(as->cost[r]); 92 IRRef ref = regcost_ref(as->cost[r]);
59 lua_assert(r != t); 93 lj_assertA(r != rd, "dest reg %d not free", rd);
60 if (ref < ASMREF_L) { 94 if (ref < ASMREF_L) {
61 int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); 95 intptr_t delta = (intptr_t)((uintptr_t)i -
96 (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(as, ref)));
62 if (checki16(delta)) { 97 if (checki16(delta)) {
63 emit_tsi(as, MIPSI_ADDIU, t, r, delta); 98 emit_tsi(as, MIPSI_AADDIU, rd, r, delta);
64 return 1; 99 return 1;
65 } 100 }
66 } 101 }
@@ -76,8 +111,8 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
76 emit_ti(as, MIPSI_LI, r, i); 111 emit_ti(as, MIPSI_LI, r, i);
77 } else { 112 } else {
78 if ((i & 0xffff)) { 113 if ((i & 0xffff)) {
79 int32_t jgl = i32ptr(J2G(as->J)); 114 intptr_t jgl = (intptr_t)(void *)J2G(as->J);
80 if ((uint32_t)(i-jgl) < 65536) { 115 if ((uintptr_t)(i-jgl) < 65536) {
81 emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768); 116 emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768);
82 return; 117 return;
83 } else if (emit_kdelta1(as, r, i)) { 118 } else if (emit_kdelta1(as, r, i)) {
@@ -92,16 +127,49 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
92 } 127 }
93} 128}
94 129
130#if LJ_64
131/* Load a 64 bit constant into a GPR. */
132static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
133{
134 if (checki32((int64_t)u64)) {
135 emit_loadi(as, r, (int32_t)u64);
136 } else {
137 uint64_t delta = u64 - (uint64_t)(void *)J2G(as->J);
138 if (delta < 65536) {
139 emit_tsi(as, MIPSI_DADDIU, r, RID_JGL, (int32_t)(delta-32768));
140 } else if (emit_kdelta1(as, r, (intptr_t)u64)) {
141 return;
142 } else {
143 /* TODO MIPSR6: Use DAHI & DATI. Caveat: sign-extension. */
144 if ((u64 & 0xffff)) {
145 emit_tsi(as, MIPSI_ORI, r, r, u64 & 0xffff);
146 }
147 if (((u64 >> 16) & 0xffff)) {
148 emit_dta(as, MIPSI_DSLL, r, r, 16);
149 emit_tsi(as, MIPSI_ORI, r, r, (u64 >> 16) & 0xffff);
150 emit_dta(as, MIPSI_DSLL, r, r, 16);
151 } else {
152 emit_dta(as, MIPSI_DSLL32, r, r, 0);
153 }
154 emit_loadi(as, r, (int32_t)(u64 >> 32));
155 }
156 /* TODO: There are probably more optimization opportunities. */
157 }
158}
159
160#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr)))
161#else
95#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) 162#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr)))
163#endif
96 164
97static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); 165static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
98static void ra_allockreg(ASMState *as, int32_t k, Reg r); 166static void ra_allockreg(ASMState *as, intptr_t k, Reg r);
99 167
100/* Get/set from constant pointer. */ 168/* Get/set from constant pointer. */
101static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) 169static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow)
102{ 170{
103 int32_t jgl = i32ptr(J2G(as->J)); 171 intptr_t jgl = (intptr_t)(J2G(as->J));
104 int32_t i = i32ptr(p); 172 intptr_t i = (intptr_t)(p);
105 Reg base; 173 Reg base;
106 if ((uint32_t)(i-jgl) < 65536) { 174 if ((uint32_t)(i-jgl) < 65536) {
107 i = i-jgl-32768; 175 i = i-jgl-32768;
@@ -112,8 +180,24 @@ static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow)
112 emit_tsi(as, mi, r, base, i); 180 emit_tsi(as, mi, r, base, i);
113} 181}
114 182
115#define emit_loadn(as, r, tv) \ 183#if LJ_64
116 emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)(tv), RSET_GPR) 184static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
185{
186 const uint64_t *k = &ir_k64(ir)->u64;
187 Reg r64 = r;
188 if (rset_test(RSET_FPR, r)) {
189 r64 = RID_TMP;
190 emit_tg(as, MIPSI_DMTC1, r64, r);
191 }
192 if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536)
193 emit_lsptr(as, MIPSI_LD, r64, (void *)k, 0);
194 else
195 emit_loadu64(as, r64, *k);
196}
197#else
198#define emit_loadk64(as, r, ir) \
199 emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR)
200#endif
117 201
118/* Get/set global_State fields. */ 202/* Get/set global_State fields. */
119static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) 203static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs)
@@ -122,9 +206,9 @@ static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs)
122} 206}
123 207
124#define emit_getgl(as, r, field) \ 208#define emit_getgl(as, r, field) \
125 emit_lsglptr(as, MIPSI_LW, (r), (int32_t)offsetof(global_State, field)) 209 emit_lsglptr(as, MIPSI_AL, (r), (int32_t)offsetof(global_State, field))
126#define emit_setgl(as, r, field) \ 210#define emit_setgl(as, r, field) \
127 emit_lsglptr(as, MIPSI_SW, (r), (int32_t)offsetof(global_State, field)) 211 emit_lsglptr(as, MIPSI_AS, (r), (int32_t)offsetof(global_State, field))
128 212
129/* Trace number is determined from per-trace exit stubs. */ 213/* Trace number is determined from per-trace exit stubs. */
130#define emit_setvmstate(as, i) UNUSED(i) 214#define emit_setvmstate(as, i) UNUSED(i)
@@ -141,7 +225,7 @@ static void emit_branch(ASMState *as, MIPSIns mi, Reg rs, Reg rt, MCode *target)
141{ 225{
142 MCode *p = as->mcp; 226 MCode *p = as->mcp;
143 ptrdiff_t delta = target - p; 227 ptrdiff_t delta = target - p;
144 lua_assert(((delta + 0x8000) >> 16) == 0); 228 lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of range");
145 *--p = mi | MIPSF_S(rs) | MIPSF_T(rt) | ((uint32_t)delta & 0xffffu); 229 *--p = mi | MIPSF_S(rs) | MIPSF_T(rt) | ((uint32_t)delta & 0xffffu);
146 as->mcp = p; 230 as->mcp = p;
147} 231}
@@ -152,16 +236,31 @@ static void emit_jmp(ASMState *as, MCode *target)
152 emit_branch(as, MIPSI_B, RID_ZERO, RID_ZERO, (target)); 236 emit_branch(as, MIPSI_B, RID_ZERO, RID_ZERO, (target));
153} 237}
154 238
155static void emit_call(ASMState *as, void *target) 239static void emit_call(ASMState *as, void *target, int needcfa)
156{ 240{
157 MCode *p = as->mcp; 241 MCode *p = as->mcp;
158 *--p = MIPSI_NOP; 242#if LJ_TARGET_MIPSR6
159 if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) 243 ptrdiff_t delta = (char *)target - (char *)p;
244 if ((((delta>>2) + 0x02000000) >> 26) == 0) { /* Try compact call first. */
245 *--p = MIPSI_BALC | (((uintptr_t)delta >>2) & 0x03ffffffu);
246 as->mcp = p;
247 return;
248 }
249#endif
250 *--p = MIPSI_NOP; /* Delay slot. */
251 if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) {
252#if !LJ_TARGET_MIPSR6
253 *--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) |
254 (((uintptr_t)target >>2) & 0x03ffffffu);
255#else
160 *--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu); 256 *--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu);
161 else /* Target out of range: need indirect call. */ 257#endif
258 } else { /* Target out of range: need indirect call. */
162 *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR); 259 *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR);
260 needcfa = 1;
261 }
163 as->mcp = p; 262 as->mcp = p;
164 ra_allockreg(as, i32ptr(target), RID_CFUNCADDR); 263 if (needcfa) ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR);
165} 264}
166 265
167/* -- Emit generic operations --------------------------------------------- */ 266/* -- Emit generic operations --------------------------------------------- */
@@ -178,32 +277,32 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
178 emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src); 277 emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src);
179} 278}
180 279
181/* Generic load of register from stack slot. */ 280/* Generic load of register with base and (small) offset address. */
182static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 281static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
183{ 282{
184 if (r < RID_MAX_GPR) 283 if (r < RID_MAX_GPR)
185 emit_tsi(as, MIPSI_LW, r, RID_SP, ofs); 284 emit_tsi(as, irt_is64(ir->t) ? MIPSI_LD : MIPSI_LW, r, base, ofs);
186 else 285 else
187 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1, 286 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1,
188 (r & 31), RID_SP, ofs); 287 (r & 31), base, ofs);
189} 288}
190 289
191/* Generic store of register to stack slot. */ 290/* Generic store of register with base and (small) offset address. */
192static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 291static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
193{ 292{
194 if (r < RID_MAX_GPR) 293 if (r < RID_MAX_GPR)
195 emit_tsi(as, MIPSI_SW, r, RID_SP, ofs); 294 emit_tsi(as, irt_is64(ir->t) ? MIPSI_SD : MIPSI_SW, r, base, ofs);
196 else 295 else
197 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1, 296 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1,
198 (r&31), RID_SP, ofs); 297 (r&31), base, ofs);
199} 298}
200 299
201/* Add offset to pointer. */ 300/* Add offset to pointer. */
202static void emit_addptr(ASMState *as, Reg r, int32_t ofs) 301static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
203{ 302{
204 if (ofs) { 303 if (ofs) {
205 lua_assert(checki16(ofs)); 304 lj_assertA(checki16(ofs), "offset %d out of range", ofs);
206 emit_tsi(as, MIPSI_ADDIU, r, r, ofs); 305 emit_tsi(as, MIPSI_AADDIU, r, r, ofs);
207 } 306 }
208} 307}
209 308
diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h
index 6d8e97c3..649a6d17 100644
--- a/src/lj_emit_ppc.h
+++ b/src/lj_emit_ppc.h
@@ -41,13 +41,13 @@ static void emit_rot(ASMState *as, PPCIns pi, Reg ra, Reg rs,
41 41
42static void emit_slwi(ASMState *as, Reg ra, Reg rs, int32_t n) 42static void emit_slwi(ASMState *as, Reg ra, Reg rs, int32_t n)
43{ 43{
44 lua_assert(n >= 0 && n < 32); 44 lj_assertA(n >= 0 && n < 32, "shift out or range");
45 emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31-n); 45 emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31-n);
46} 46}
47 47
48static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n) 48static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n)
49{ 49{
50 lua_assert(n >= 0 && n < 32); 50 lj_assertA(n >= 0 && n < 32, "shift out or range");
51 emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31); 51 emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31);
52} 52}
53 53
@@ -57,17 +57,17 @@ static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n)
57#define emit_canremat(ref) ((ref) <= REF_BASE) 57#define emit_canremat(ref) ((ref) <= REF_BASE)
58 58
59/* Try to find a one step delta relative to another constant. */ 59/* Try to find a one step delta relative to another constant. */
60static int emit_kdelta1(ASMState *as, Reg t, int32_t i) 60static int emit_kdelta1(ASMState *as, Reg rd, int32_t i)
61{ 61{
62 RegSet work = ~as->freeset & RSET_GPR; 62 RegSet work = ~as->freeset & RSET_GPR;
63 while (work) { 63 while (work) {
64 Reg r = rset_picktop(work); 64 Reg r = rset_picktop(work);
65 IRRef ref = regcost_ref(as->cost[r]); 65 IRRef ref = regcost_ref(as->cost[r]);
66 lua_assert(r != t); 66 lj_assertA(r != rd, "dest reg %d not free", rd);
67 if (ref < ASMREF_L) { 67 if (ref < ASMREF_L) {
68 int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); 68 int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i);
69 if (checki16(delta)) { 69 if (checki16(delta)) {
70 emit_tai(as, PPCI_ADDI, t, r, delta); 70 emit_tai(as, PPCI_ADDI, rd, r, delta);
71 return 1; 71 return 1;
72 } 72 }
73 } 73 }
@@ -98,7 +98,7 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
98 98
99#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) 99#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr)))
100 100
101static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); 101static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
102 102
103/* Get/set from constant pointer. */ 103/* Get/set from constant pointer. */
104static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow) 104static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow)
@@ -115,8 +115,8 @@ static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow)
115 emit_tai(as, pi, r, base, i); 115 emit_tai(as, pi, r, base, i);
116} 116}
117 117
118#define emit_loadn(as, r, tv) \ 118#define emit_loadk64(as, r, ir) \
119 emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)(tv), RSET_GPR) 119 emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR)
120 120
121/* Get/set global_State fields. */ 121/* Get/set global_State fields. */
122static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs) 122static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs)
@@ -144,7 +144,7 @@ static void emit_condbranch(ASMState *as, PPCIns pi, PPCCC cc, MCode *target)
144{ 144{
145 MCode *p = --as->mcp; 145 MCode *p = --as->mcp;
146 ptrdiff_t delta = (char *)target - (char *)p; 146 ptrdiff_t delta = (char *)target - (char *)p;
147 lua_assert(((delta + 0x8000) >> 16) == 0); 147 lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of range");
148 pi ^= (delta & 0x8000) * (PPCF_Y/0x8000); 148 pi ^= (delta & 0x8000) * (PPCF_Y/0x8000);
149 *p = pi | PPCF_CC(cc) | ((uint32_t)delta & 0xffffu); 149 *p = pi | PPCF_CC(cc) | ((uint32_t)delta & 0xffffu);
150} 150}
@@ -186,22 +186,22 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
186 emit_fb(as, PPCI_FMR, dst, src); 186 emit_fb(as, PPCI_FMR, dst, src);
187} 187}
188 188
189/* Generic load of register from stack slot. */ 189/* Generic load of register with base and (small) offset address. */
190static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 190static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
191{ 191{
192 if (r < RID_MAX_GPR) 192 if (r < RID_MAX_GPR)
193 emit_tai(as, PPCI_LWZ, r, RID_SP, ofs); 193 emit_tai(as, PPCI_LWZ, r, base, ofs);
194 else 194 else
195 emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, RID_SP, ofs); 195 emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, base, ofs);
196} 196}
197 197
198/* Generic store of register to stack slot. */ 198/* Generic store of register with base and (small) offset address. */
199static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 199static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
200{ 200{
201 if (r < RID_MAX_GPR) 201 if (r < RID_MAX_GPR)
202 emit_tai(as, PPCI_STW, r, RID_SP, ofs); 202 emit_tai(as, PPCI_STW, r, base, ofs);
203 else 203 else
204 emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, RID_SP, ofs); 204 emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, base, ofs);
205} 205}
206 206
207/* Emit a compare (for equality) with a constant operand. */ 207/* Emit a compare (for equality) with a constant operand. */
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index 079ef68e..85202768 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -13,10 +13,17 @@
13 if (rex != 0x40) *--(p) = rex; } 13 if (rex != 0x40) *--(p) = rex; }
14#define FORCE_REX 0x200 14#define FORCE_REX 0x200
15#define REX_64 (FORCE_REX|0x080000) 15#define REX_64 (FORCE_REX|0x080000)
16#define VEX_64 0x800000
16#else 17#else
17#define REXRB(p, rr, rb) ((void)0) 18#define REXRB(p, rr, rb) ((void)0)
18#define FORCE_REX 0 19#define FORCE_REX 0
19#define REX_64 0 20#define REX_64 0
21#define VEX_64 0
22#endif
23#if LJ_GC64
24#define REX_GC64 REX_64
25#else
26#define REX_GC64 0
20#endif 27#endif
21 28
22#define emit_i8(as, i) (*--as->mcp = (MCode)(i)) 29#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
@@ -31,7 +38,14 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
31 MCode *p, int delta) 38 MCode *p, int delta)
32{ 39{
33 int n = (int8_t)xo; 40 int n = (int8_t)xo;
34#if defined(__GNUC__) 41 if (n == -60) { /* VEX-encoded instruction */
42#if LJ_64
43 xo ^= (((rr>>1)&4)+((rx>>2)&2)+((rb>>3)&1))<<13;
44#endif
45 *(uint32_t *)(p+delta-5) = (uint32_t)xo;
46 return p+delta-5;
47 }
48#if defined(__GNUC__) || defined(__clang__)
35 if (__builtin_constant_p(xo) && n == -2) 49 if (__builtin_constant_p(xo) && n == -2)
36 p[delta-2] = (MCode)(xo >> 24); 50 p[delta-2] = (MCode)(xo >> 24);
37 else if (__builtin_constant_p(xo) && n == -3) 51 else if (__builtin_constant_p(xo) && n == -3)
@@ -78,33 +92,24 @@ static void emit_rr(ASMState *as, x86Op xo, Reg r1, Reg r2)
78/* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */ 92/* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */
79static int32_t ptr2addr(const void *p) 93static int32_t ptr2addr(const void *p)
80{ 94{
81 lua_assert((uintptr_t)p < (uintptr_t)0x80000000); 95 lj_assertX((uintptr_t)p < (uintptr_t)0x80000000, "pointer outside 2G range");
82 return i32ptr(p); 96 return i32ptr(p);
83} 97}
84#else 98#else
85#define ptr2addr(p) (i32ptr((p))) 99#define ptr2addr(p) (i32ptr((p)))
86#endif 100#endif
87 101
88/* op r, [addr] */
89static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
90{
91 MCode *p = as->mcp;
92 *(int32_t *)(p-4) = ptr2addr(addr);
93#if LJ_64
94 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
95 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
96#else
97 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
98#endif
99}
100
101/* op r, [base+ofs] */ 102/* op r, [base+ofs] */
102static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) 103static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs)
103{ 104{
104 MCode *p = as->mcp; 105 MCode *p = as->mcp;
105 x86Mode mode; 106 x86Mode mode;
106 if (ra_hasreg(rb)) { 107 if (ra_hasreg(rb)) {
107 if (ofs == 0 && (rb&7) != RID_EBP) { 108 if (LJ_GC64 && rb == RID_RIP) {
109 mode = XM_OFS0;
110 p -= 4;
111 *(int32_t *)p = ofs;
112 } else if (ofs == 0 && (rb&7) != RID_EBP) {
108 mode = XM_OFS0; 113 mode = XM_OFS0;
109 } else if (checki8(ofs)) { 114 } else if (checki8(ofs)) {
110 *--p = (MCode)ofs; 115 *--p = (MCode)ofs;
@@ -202,6 +207,11 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb)
202 *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); 207 *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
203 rb = RID_ESP; 208 rb = RID_ESP;
204#endif 209#endif
210 } else if (LJ_GC64 && rb == RID_RIP) {
211 lj_assertA(as->mrm.idx == RID_NONE, "RIP-rel mrm cannot have index");
212 mode = XM_OFS0;
213 p -= 4;
214 *(int32_t *)p = as->mrm.ofs;
205 } else { 215 } else {
206 if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { 216 if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) {
207 mode = XM_OFS0; 217 mode = XM_OFS0;
@@ -241,10 +251,6 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i)
241 251
242/* -- Emit loads/stores --------------------------------------------------- */ 252/* -- Emit loads/stores --------------------------------------------------- */
243 253
244/* Instruction selection for XMM moves. */
245#define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS)
246#define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD)
247
248/* mov [base+ofs], i */ 254/* mov [base+ofs], i */
249static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) 255static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
250{ 256{
@@ -259,8 +265,8 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
259/* Get/set global_State fields. */ 265/* Get/set global_State fields. */
260#define emit_opgl(as, xo, r, field) \ 266#define emit_opgl(as, xo, r, field) \
261 emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) 267 emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field)
262#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field) 268#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field)
263#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field) 269#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field)
264 270
265#define emit_setvmstate(as, i) \ 271#define emit_setvmstate(as, i) \
266 (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate)) 272 (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate))
@@ -285,9 +291,21 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
285 } 291 }
286} 292}
287 293
294#if LJ_GC64
295#define dispofs(as, k) \
296 ((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch))
297#define mcpofs(as, k) \
298 ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp))
299#define mctopofs(as, k) \
300 ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop))
301/* mov r, addr */
302#define emit_loada(as, r, addr) \
303 emit_loadu64(as, (r), (uintptr_t)(addr))
304#else
288/* mov r, addr */ 305/* mov r, addr */
289#define emit_loada(as, r, addr) \ 306#define emit_loada(as, r, addr) \
290 emit_loadi(as, (r), ptr2addr((addr))) 307 emit_loadi(as, (r), ptr2addr((addr)))
308#endif
291 309
292#if LJ_64 310#if LJ_64
293/* mov r, imm64 or shorter 32 bit extended load. */ 311/* mov r, imm64 or shorter 32 bit extended load. */
@@ -299,6 +317,15 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
299 MCode *p = as->mcp; 317 MCode *p = as->mcp;
300 *(int32_t *)(p-4) = (int32_t)u64; 318 *(int32_t *)(p-4) = (int32_t)u64;
301 as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); 319 as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4);
320#if LJ_GC64
321 } else if (checki32(dispofs(as, u64))) {
322 emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64));
323 } else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) {
324 /* Since as->realign assumes the code size doesn't change, check
325 ** RIP-relative addressing reachability for both as->mcp and as->mctop.
326 */
327 emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64));
328#endif
302 } else { /* Full-size 64 bit load. */ 329 } else { /* Full-size 64 bit load. */
303 MCode *p = as->mcp; 330 MCode *p = as->mcp;
304 *(uint64_t *)(p-8) = u64; 331 *(uint64_t *)(p-8) = u64;
@@ -310,13 +337,90 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
310} 337}
311#endif 338#endif
312 339
313/* movsd r, [&tv->n] / xorps r, r */ 340/* op r, [addr] */
314static void emit_loadn(ASMState *as, Reg r, cTValue *tv) 341static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
315{ 342{
316 if (tvispzero(tv)) /* Use xor only for +0. */ 343#if LJ_GC64
317 emit_rr(as, XO_XORPS, r, r); 344 if (checki32(dispofs(as, addr))) {
318 else 345 emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr));
319 emit_rma(as, XMM_MOVRM(as), r, &tv->n); 346 } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) {
347 emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr));
348 } else if (!checki32((intptr_t)addr)) {
349 Reg ra = (rr & 15);
350 if (xo != XO_MOV) {
351 /* We can't allocate a register here. Use and restore DISPATCH. Ugly. */
352 uint64_t dispaddr = (uintptr_t)J2GG(as->J)->dispatch;
353 uint8_t i8 = xo == XO_GROUP3b ? *as->mcp++ : 0;
354 ra = RID_DISPATCH;
355 if (checku32(dispaddr)) {
356 emit_loadi(as, ra, (int32_t)dispaddr);
357 } else { /* Full-size 64 bit load. */
358 MCode *p = as->mcp;
359 *(uint64_t *)(p-8) = dispaddr;
360 p[-9] = (MCode)(XI_MOVri+(ra&7));
361 p[-10] = 0x48 + ((ra>>3)&1);
362 p -= 10;
363 as->mcp = p;
364 }
365 if (xo == XO_GROUP3b) emit_i8(as, i8);
366 }
367 emit_rmro(as, xo, rr, ra, 0);
368 emit_loadu64(as, ra, (uintptr_t)addr);
369 } else
370#endif
371 {
372 MCode *p = as->mcp;
373 *(int32_t *)(p-4) = ptr2addr(addr);
374#if LJ_64
375 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
376 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
377#else
378 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
379#endif
380 }
381}
382
383/* Load 64 bit IR constant into register. */
384static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
385{
386 Reg r64;
387 x86Op xo;
388 const uint64_t *k = &ir_k64(ir)->u64;
389 if (rset_test(RSET_FPR, r)) {
390 r64 = r;
391 xo = XO_MOVSD;
392 } else {
393 r64 = r | REX_64;
394 xo = XO_MOV;
395 }
396 if (*k == 0) {
397 emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r);
398#if LJ_GC64
399 } else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) ||
400 (checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) {
401 emit_rma(as, xo, r64, k);
402 } else {
403 if (ir->i) {
404 lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i),
405 "bad interned 64 bit constant");
406 } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) {
407 emit_loadu64(as, r, *k);
408 return;
409 } else {
410 /* If all else fails, add the FP constant at the MCode area bottom. */
411 while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
412 *(uint64_t *)as->mcbot = *k;
413 ir->i = (int32_t)(as->mctop - as->mcbot);
414 as->mcbot += 8;
415 as->mclim = as->mcbot + MCLIM_REDZONE;
416 lj_mcode_commitbot(as->J, as->mcbot);
417 }
418 emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i));
419#else
420 } else {
421 emit_rma(as, xo, r64, k);
422#endif
423 }
320} 424}
321 425
322/* -- Emit control-flow instructions -------------------------------------- */ 426/* -- Emit control-flow instructions -------------------------------------- */
@@ -330,7 +434,7 @@ static void emit_sjmp(ASMState *as, MCLabel target)
330{ 434{
331 MCode *p = as->mcp; 435 MCode *p = as->mcp;
332 ptrdiff_t delta = target - p; 436 ptrdiff_t delta = target - p;
333 lua_assert(delta == (int8_t)delta); 437 lj_assertA(delta == (int8_t)delta, "short jump target out of range");
334 p[-1] = (MCode)(int8_t)delta; 438 p[-1] = (MCode)(int8_t)delta;
335 p[-2] = XI_JMPs; 439 p[-2] = XI_JMPs;
336 as->mcp = p - 2; 440 as->mcp = p - 2;
@@ -342,7 +446,7 @@ static void emit_sjcc(ASMState *as, int cc, MCLabel target)
342{ 446{
343 MCode *p = as->mcp; 447 MCode *p = as->mcp;
344 ptrdiff_t delta = target - p; 448 ptrdiff_t delta = target - p;
345 lua_assert(delta == (int8_t)delta); 449 lj_assertA(delta == (int8_t)delta, "short jump target out of range");
346 p[-1] = (MCode)(int8_t)delta; 450 p[-1] = (MCode)(int8_t)delta;
347 p[-2] = (MCode)(XI_JCCs+(cc&15)); 451 p[-2] = (MCode)(XI_JCCs+(cc&15));
348 as->mcp = p - 2; 452 as->mcp = p - 2;
@@ -368,10 +472,11 @@ static void emit_sfixup(ASMState *as, MCLabel source)
368#define emit_label(as) ((as)->mcp) 472#define emit_label(as) ((as)->mcp)
369 473
370/* Compute relative 32 bit offset for jump and call instructions. */ 474/* Compute relative 32 bit offset for jump and call instructions. */
371static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target) 475static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target)
372{ 476{
373 ptrdiff_t delta = target - p; 477 ptrdiff_t delta = target - p;
374 lua_assert(delta == (int32_t)delta); 478 UNUSED(J);
479 lj_assertJ(delta == (int32_t)delta, "jump target out of range");
375 return (int32_t)delta; 480 return (int32_t)delta;
376} 481}
377 482
@@ -379,7 +484,7 @@ static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target)
379static void emit_jcc(ASMState *as, int cc, MCode *target) 484static void emit_jcc(ASMState *as, int cc, MCode *target)
380{ 485{
381 MCode *p = as->mcp; 486 MCode *p = as->mcp;
382 *(int32_t *)(p-4) = jmprel(p, target); 487 *(int32_t *)(p-4) = jmprel(as->J, p, target);
383 p[-5] = (MCode)(XI_JCCn+(cc&15)); 488 p[-5] = (MCode)(XI_JCCn+(cc&15));
384 p[-6] = 0x0f; 489 p[-6] = 0x0f;
385 as->mcp = p - 6; 490 as->mcp = p - 6;
@@ -389,7 +494,7 @@ static void emit_jcc(ASMState *as, int cc, MCode *target)
389static void emit_jmp(ASMState *as, MCode *target) 494static void emit_jmp(ASMState *as, MCode *target)
390{ 495{
391 MCode *p = as->mcp; 496 MCode *p = as->mcp;
392 *(int32_t *)(p-4) = jmprel(p, target); 497 *(int32_t *)(p-4) = jmprel(as->J, p, target);
393 p[-5] = XI_JMP; 498 p[-5] = XI_JMP;
394 as->mcp = p - 5; 499 as->mcp = p - 5;
395} 500}
@@ -406,7 +511,7 @@ static void emit_call_(ASMState *as, MCode *target)
406 return; 511 return;
407 } 512 }
408#endif 513#endif
409 *(int32_t *)(p-4) = jmprel(p, target); 514 *(int32_t *)(p-4) = jmprel(as->J, p, target);
410 p[-5] = XI_CALL; 515 p[-5] = XI_CALL;
411 as->mcp = p - 5; 516 as->mcp = p - 5;
412} 517}
@@ -418,8 +523,10 @@ static void emit_call_(ASMState *as, MCode *target)
418/* Use 64 bit operations to handle 64 bit IR types. */ 523/* Use 64 bit operations to handle 64 bit IR types. */
419#if LJ_64 524#if LJ_64
420#define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) 525#define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0))
526#define VEX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? VEX_64 : 0))
421#else 527#else
422#define REX_64IR(ir, r) (r) 528#define REX_64IR(ir, r) (r)
529#define VEX_64IR(ir, r) (r)
423#endif 530#endif
424 531
425/* Generic move between two regs. */ 532/* Generic move between two regs. */
@@ -429,35 +536,32 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
429 if (dst < RID_MAX_GPR) 536 if (dst < RID_MAX_GPR)
430 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); 537 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src);
431 else 538 else
432 emit_rr(as, XMM_MOVRR(as), dst, src); 539 emit_rr(as, XO_MOVAPS, dst, src);
433} 540}
434 541
435/* Generic load of register from stack slot. */ 542/* Generic load of register with base and (small) offset address. */
436static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 543static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
437{ 544{
438 if (r < RID_MAX_GPR) 545 if (r < RID_MAX_GPR)
439 emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); 546 emit_rmro(as, XO_MOV, REX_64IR(ir, r), base, ofs);
440 else 547 else
441 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs); 548 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, base, ofs);
442} 549}
443 550
444/* Generic store of register to stack slot. */ 551/* Generic store of register with base and (small) offset address. */
445static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 552static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
446{ 553{
447 if (r < RID_MAX_GPR) 554 if (r < RID_MAX_GPR)
448 emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, ofs); 555 emit_rmro(as, XO_MOVto, REX_64IR(ir, r), base, ofs);
449 else 556 else
450 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, RID_ESP, ofs); 557 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, base, ofs);
451} 558}
452 559
453/* Add offset to pointer. */ 560/* Add offset to pointer. */
454static void emit_addptr(ASMState *as, Reg r, int32_t ofs) 561static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
455{ 562{
456 if (ofs) { 563 if (ofs) {
457 if ((as->flags & JIT_F_LEA_AGU)) 564 emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
458 emit_rmro(as, XO_LEA, r, r, ofs);
459 else
460 emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs);
461 } 565 }
462} 566}
463 567
diff --git a/src/lj_err.c b/src/lj_err.c
index 76cd05ce..fda4a59c 100644
--- a/src/lj_err.c
+++ b/src/lj_err.c
@@ -16,6 +16,7 @@
16#include "lj_ff.h" 16#include "lj_ff.h"
17#include "lj_trace.h" 17#include "lj_trace.h"
18#include "lj_vm.h" 18#include "lj_vm.h"
19#include "lj_strfmt.h"
19 20
20/* 21/*
21** LuaJIT can either use internal or external frame unwinding: 22** LuaJIT can either use internal or external frame unwinding:
@@ -28,12 +29,18 @@
28** Pros and Cons: 29** Pros and Cons:
29** 30**
30** - EXT requires unwind tables for *all* functions on the C stack between 31** - EXT requires unwind tables for *all* functions on the C stack between
31** the pcall/catch and the error/throw. This is the default on x64, 32** the pcall/catch and the error/throw. C modules used by Lua code can
32** but needs to be manually enabled on x86/PPC for non-C++ code. 33** throw errors, so these need to have unwind tables, too. Transitively
34** this applies to all system libraries used by C modules -- at least
35** when they have callbacks which may throw an error.
33** 36**
34** - INT is faster when actually throwing errors (but this happens rarely). 37** - INT is faster when actually throwing errors, but this happens rarely.
35** Setting up error handlers is zero-cost in any case. 38** Setting up error handlers is zero-cost in any case.
36** 39**
40** - INT needs to save *all* callee-saved registers when entering the
41** interpreter. EXT only needs to save those actually used inside the
42** interpreter. JIT-compiled code may need to save some more.
43**
37** - EXT provides full interoperability with C++ exceptions. You can throw 44** - EXT provides full interoperability with C++ exceptions. You can throw
38** Lua errors or C++ exceptions through a mix of Lua frames and C++ frames. 45** Lua errors or C++ exceptions through a mix of Lua frames and C++ frames.
39** C++ destructors are called as needed. C++ exceptions caught by pcall 46** C++ destructors are called as needed. C++ exceptions caught by pcall
@@ -45,27 +52,38 @@
45** the wrapper function feature. Lua errors thrown through C++ frames 52** the wrapper function feature. Lua errors thrown through C++ frames
46** cannot be caught by C++ code and C++ destructors are not run. 53** cannot be caught by C++ code and C++ destructors are not run.
47** 54**
48** EXT is the default on x64 systems, INT is the default on all other systems. 55** - EXT can handle errors from internal helper functions that are called
56** from JIT-compiled code (except for Windows/x86 and 32 bit ARM).
57** INT has no choice but to call the panic handler, if this happens.
58** Note: this is mainly relevant for out-of-memory errors.
59**
60** EXT is the default on all systems where the toolchain produces unwind
61** tables by default (*). This is hard-coded and/or detected in src/Makefile.
62** You can thwart the detection with: TARGET_XCFLAGS=-DLUAJIT_UNWIND_INTERNAL
63**
64** INT is the default on all other systems.
65**
66** EXT can be manually enabled for toolchains that are able to produce
67** conforming unwind tables:
68** "TARGET_XCFLAGS=-funwind-tables -DLUAJIT_UNWIND_EXTERNAL"
69** As explained above, *all* C code used directly or indirectly by LuaJIT
70** must be compiled with -funwind-tables (or -fexceptions). C++ code must
71** *not* be compiled with -fno-exceptions.
72**
73** If you're unsure whether error handling inside the VM works correctly,
74** try running this and check whether it prints "OK":
49** 75**
50** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack 76** luajit -e "print(select(2, load('OK')):match('OK'))"
51** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled
52** with -funwind-tables (or -fexceptions). This includes LuaJIT itself (set
53** TARGET_CFLAGS), all of your C/Lua binding code, all loadable C modules
54** and all C libraries that have callbacks which may be used to call back
55** into Lua. C++ code must *not* be compiled with -fno-exceptions.
56** 77**
57** EXT cannot be enabled on WIN32 since system exceptions use code-driven SEH. 78** (*) Originally, toolchains only generated unwind tables for C++ code. For
58** EXT is mandatory on WIN64 since the calling convention has an abundance 79** interoperability reasons, this can be manually enabled for plain C code,
59** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15). 80** too (with -funwind-tables). With the introduction of the x64 architecture,
60** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4). 81** the corresponding POSIX and Windows ABIs mandated unwind tables for all
82** code. Over the following years most desktop and server platforms have
83** enabled unwind tables by default on all architectures. OTOH mobile and
84** embedded platforms do not consistently mandate unwind tables.
61*/ 85*/
62 86
63#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND
64#define LJ_UNWIND_EXT 1
65#elif LJ_TARGET_X64 && LJ_TARGET_WINDOWS
66#define LJ_UNWIND_EXT 1
67#endif
68
69/* -- Error messages ------------------------------------------------------ */ 87/* -- Error messages ------------------------------------------------------ */
70 88
71/* Error message strings. */ 89/* Error message strings. */
@@ -98,14 +116,14 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
98 TValue *top = restorestack(L, -nres); 116 TValue *top = restorestack(L, -nres);
99 if (frame < top) { /* Frame reached? */ 117 if (frame < top) { /* Frame reached? */
100 if (errcode) { 118 if (errcode) {
101 L->cframe = cframe_prev(cf);
102 L->base = frame+1; 119 L->base = frame+1;
120 L->cframe = cframe_prev(cf);
103 unwindstack(L, top); 121 unwindstack(L, top);
104 } 122 }
105 return cf; 123 return cf;
106 } 124 }
107 } 125 }
108 if (frame <= tvref(L->stack)) 126 if (frame <= tvref(L->stack)+LJ_FR2)
109 break; 127 break;
110 switch (frame_typep(frame)) { 128 switch (frame_typep(frame)) {
111 case FRAME_LUA: /* Lua frame. */ 129 case FRAME_LUA: /* Lua frame. */
@@ -113,14 +131,12 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
113 frame = frame_prevl(frame); 131 frame = frame_prevl(frame);
114 break; 132 break;
115 case FRAME_C: /* C frame. */ 133 case FRAME_C: /* C frame. */
116#if LJ_HASFFI
117 unwind_c: 134 unwind_c:
118#endif
119#if LJ_UNWIND_EXT 135#if LJ_UNWIND_EXT
120 if (errcode) { 136 if (errcode) {
121 L->cframe = cframe_prev(cf);
122 L->base = frame_prevd(frame) + 1; 137 L->base = frame_prevd(frame) + 1;
123 unwindstack(L, frame); 138 L->cframe = cframe_prev(cf);
139 unwindstack(L, frame - LJ_FR2);
124 } else if (cf != stopcf) { 140 } else if (cf != stopcf) {
125 cf = cframe_prev(cf); 141 cf = cframe_prev(cf);
126 frame = frame_prevd(frame); 142 frame = frame_prevd(frame);
@@ -143,16 +159,14 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
143 return cf; 159 return cf;
144 } 160 }
145 if (errcode) { 161 if (errcode) {
146 L->cframe = cframe_prev(cf);
147 L->base = frame_prevd(frame) + 1; 162 L->base = frame_prevd(frame) + 1;
148 unwindstack(L, frame); 163 L->cframe = cframe_prev(cf);
164 unwindstack(L, frame - LJ_FR2);
149 } 165 }
150 return cf; 166 return cf;
151 case FRAME_CONT: /* Continuation frame. */ 167 case FRAME_CONT: /* Continuation frame. */
152#if LJ_HASFFI 168 if (frame_iscont_fficb(frame))
153 if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK)
154 goto unwind_c; 169 goto unwind_c;
155#endif
156 /* fallthrough */ 170 /* fallthrough */
157 case FRAME_VARG: /* Vararg frame. */ 171 case FRAME_VARG: /* Vararg frame. */
158 frame = frame_prevd(frame); 172 frame = frame_prevd(frame);
@@ -166,8 +180,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
166 } 180 }
167 if (frame_typep(frame) == FRAME_PCALL) 181 if (frame_typep(frame) == FRAME_PCALL)
168 hook_leave(G(L)); 182 hook_leave(G(L));
169 L->cframe = cf;
170 L->base = frame_prevd(frame) + 1; 183 L->base = frame_prevd(frame) + 1;
184 L->cframe = cf;
171 unwindstack(L, L->base); 185 unwindstack(L, L->base);
172 } 186 }
173 return (void *)((intptr_t)cf | CFRAME_UNWIND_FF); 187 return (void *)((intptr_t)cf | CFRAME_UNWIND_FF);
@@ -175,8 +189,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
175 } 189 }
176 /* No C frame. */ 190 /* No C frame. */
177 if (errcode) { 191 if (errcode) {
192 L->base = tvref(L->stack)+1+LJ_FR2;
178 L->cframe = NULL; 193 L->cframe = NULL;
179 L->base = tvref(L->stack)+1;
180 unwindstack(L, L->base); 194 unwindstack(L, L->base);
181 if (G(L)->panic) 195 if (G(L)->panic)
182 G(L)->panic(L); 196 G(L)->panic(L);
@@ -187,33 +201,206 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
187 201
188/* -- External frame unwinding -------------------------------------------- */ 202/* -- External frame unwinding -------------------------------------------- */
189 203
190#if defined(__GNUC__) && !LJ_NO_UNWIND && !LJ_ABI_WIN 204#if LJ_ABI_WIN
191 205
192/* 206/*
193** We have to use our own definitions instead of the mandatory (!) unwind.h, 207** Someone in Redmond owes me several days of my life. A lot of this is
194** since various OS, distros and compilers mess up the header installation. 208** undocumented or just plain wrong on MSDN. Some of it can be gathered
209** from 3rd party docs or must be found by trial-and-error. They really
210** don't want you to write your own language-specific exception handler
211** or to interact gracefully with MSVC. :-(
212**
213** Apparently MSVC doesn't call C++ destructors for foreign exceptions
214** unless you compile your C++ code with /EHa. Unfortunately this means
215** catch (...) also catches things like access violations. The use of
216** _set_se_translator doesn't really help, because it requires /EHa, too.
195*/ 217*/
196 218
197typedef struct _Unwind_Exception 219#define WIN32_LEAN_AND_MEAN
220#include <windows.h>
221
222#if LJ_TARGET_X86
223typedef void *UndocumentedDispatcherContext; /* Unused on x86. */
224#else
225/* Taken from: http://www.nynaeve.net/?p=99 */
226typedef struct UndocumentedDispatcherContext {
227 ULONG64 ControlPc;
228 ULONG64 ImageBase;
229 PRUNTIME_FUNCTION FunctionEntry;
230 ULONG64 EstablisherFrame;
231 ULONG64 TargetIp;
232 PCONTEXT ContextRecord;
233 void (*LanguageHandler)(void);
234 PVOID HandlerData;
235 PUNWIND_HISTORY_TABLE HistoryTable;
236 ULONG ScopeIndex;
237 ULONG Fill0;
238} UndocumentedDispatcherContext;
239#endif
240
241/* Another wild guess. */
242extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
243
244#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT)
245/* Workaround for broken MinGW64 declaration. */
246VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
247#define RtlUnwindEx RtlUnwindEx_FIXED
248#endif
249
250#define LJ_MSVC_EXCODE ((DWORD)0xe06d7363)
251#define LJ_GCC_EXCODE ((DWORD)0x20474343)
252
253#define LJ_EXCODE ((DWORD)0xe24c4a00)
254#define LJ_EXCODE_MAKE(c) (LJ_EXCODE | (DWORD)(c))
255#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff)
256#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff))
257
258/* Windows exception handler for interpreter frame. */
259LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
260 void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
198{ 261{
199 uint64_t exclass; 262#if LJ_TARGET_X86
200 void (*excleanup)(int, struct _Unwind_Exception *); 263 void *cf = (char *)f - CFRAME_OFS_SEH;
201 uintptr_t p1, p2; 264#else
202} __attribute__((__aligned__)) _Unwind_Exception; 265 void *cf = f;
266#endif
267 lua_State *L = cframe_L(cf);
268 int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
269 LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
270 if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */
271 /* Unwind internal frames. */
272 err_unwind(L, cf, errcode);
273 } else {
274 void *cf2 = err_unwind(L, cf, 0);
275 if (cf2) { /* We catch it, so start unwinding the upper frames. */
276 if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
277 rec->ExceptionCode == LJ_GCC_EXCODE) {
278#if !LJ_TARGET_CYGWIN
279 __DestructExceptionObject(rec, 1);
280#endif
281 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
282 } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
283 /* Don't catch access violations etc. */
284 return 1; /* ExceptionContinueSearch */
285 }
286#if LJ_TARGET_X86
287 UNUSED(ctx);
288 UNUSED(dispatch);
289 /* Call all handlers for all lower C frames (including ourselves) again
290 ** with EH_UNWINDING set. Then call the specified function, passing cf
291 ** and errcode.
292 */
293 lj_vm_rtlunwind(cf, (void *)rec,
294 (cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
295 (void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
296 /* lj_vm_rtlunwind does not return. */
297#else
298 /* Unwind the stack and call all handlers for all lower C frames
299 ** (including ourselves) again with EH_UNWINDING set. Then set
300 ** stack pointer = cf, result = errcode and jump to the specified target.
301 */
302 RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
303 lj_vm_unwind_ff_eh :
304 lj_vm_unwind_c_eh),
305 rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
306 /* RtlUnwindEx should never return. */
307#endif
308 }
309 }
310 return 1; /* ExceptionContinueSearch */
311}
312
313#if LJ_UNWIND_JIT
314
315#if LJ_TARGET_X64
316#define CONTEXT_REG_PC Rip
317#elif LJ_TARGET_ARM64
318#define CONTEXT_REG_PC Pc
319#else
320#error "NYI: Windows arch-specific unwinder for JIT-compiled code"
321#endif
322
323/* Windows unwinder for JIT-compiled code. */
324static void err_unwind_win_jit(global_State *g, int errcode)
325{
326 CONTEXT ctx;
327 UNWIND_HISTORY_TABLE hist;
328
329 memset(&hist, 0, sizeof(hist));
330 RtlCaptureContext(&ctx);
331 while (1) {
332 uintptr_t frame, base, addr = ctx.CONTEXT_REG_PC;
333 void *hdata;
334 PRUNTIME_FUNCTION func = RtlLookupFunctionEntry(addr, &base, &hist);
335 if (!func) { /* Found frame without .pdata: must be JIT-compiled code. */
336 ExitNo exitno;
337 uintptr_t stub = lj_trace_unwind(G2J(g), addr - sizeof(MCode), &exitno);
338 if (stub) { /* Jump to side exit to unwind the trace. */
339 ctx.CONTEXT_REG_PC = stub;
340 G2J(g)->exitcode = errcode;
341 RtlRestoreContext(&ctx, NULL); /* Does not return. */
342 }
343 break;
344 }
345 RtlVirtualUnwind(UNW_FLAG_NHANDLER, base, addr, func,
346 &ctx, &hdata, &frame, NULL);
347 if (!addr) break;
348 }
349 /* Unwinding failed, if we end up here. */
350}
351#endif
352
353/* Raise Windows exception. */
354static void err_raise_ext(global_State *g, int errcode)
355{
356#if LJ_UNWIND_JIT
357 if (tvref(g->jit_base)) {
358 err_unwind_win_jit(g, errcode);
359 return; /* Unwinding failed. */
360 }
361#elif LJ_HASJIT
362 /* Cannot catch on-trace errors for Windows/x86 SEH. Unwind to interpreter. */
363 setmref(g->jit_base, NULL);
364#endif
365 UNUSED(g);
366 RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL);
367}
368
369#elif !LJ_NO_UNWIND && (defined(__GNUC__) || defined(__clang__))
370
371/*
372** We have to use our own definitions instead of the mandatory (!) unwind.h,
373** since various OS, distros and compilers mess up the header installation.
374*/
203 375
204typedef struct _Unwind_Context _Unwind_Context; 376typedef struct _Unwind_Context _Unwind_Context;
205 377
206#define _URC_OK 0 378#define _URC_OK 0
379#define _URC_FATAL_PHASE2_ERROR 2
207#define _URC_FATAL_PHASE1_ERROR 3 380#define _URC_FATAL_PHASE1_ERROR 3
208#define _URC_HANDLER_FOUND 6 381#define _URC_HANDLER_FOUND 6
209#define _URC_INSTALL_CONTEXT 7 382#define _URC_INSTALL_CONTEXT 7
210#define _URC_CONTINUE_UNWIND 8 383#define _URC_CONTINUE_UNWIND 8
211#define _URC_FAILURE 9 384#define _URC_FAILURE 9
212 385
386#define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */
387#define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c))
388#define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff)
389#define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff))
390
213#if !LJ_TARGET_ARM 391#if !LJ_TARGET_ARM
214 392
393typedef struct _Unwind_Exception
394{
395 uint64_t exclass;
396 void (*excleanup)(int, struct _Unwind_Exception *);
397 uintptr_t p1, p2;
398} __attribute__((__aligned__)) _Unwind_Exception;
399#define UNWIND_EXCEPTION_TYPE _Unwind_Exception
400
215extern uintptr_t _Unwind_GetCFA(_Unwind_Context *); 401extern uintptr_t _Unwind_GetCFA(_Unwind_Context *);
216extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t); 402extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t);
403extern uintptr_t _Unwind_GetIP(_Unwind_Context *);
217extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t); 404extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t);
218extern void _Unwind_DeleteException(_Unwind_Exception *); 405extern void _Unwind_DeleteException(_Unwind_Exception *);
219extern int _Unwind_RaiseException(_Unwind_Exception *); 406extern int _Unwind_RaiseException(_Unwind_Exception *);
@@ -223,11 +410,6 @@ extern int _Unwind_RaiseException(_Unwind_Exception *);
223#define _UA_HANDLER_FRAME 4 410#define _UA_HANDLER_FRAME 4
224#define _UA_FORCE_UNWIND 8 411#define _UA_FORCE_UNWIND 8
225 412
226#define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */
227#define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c))
228#define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff)
229#define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff))
230
231/* DWARF2 personality handler referenced from interpreter .eh_frame. */ 413/* DWARF2 personality handler referenced from interpreter .eh_frame. */
232LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, 414LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
233 uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx) 415 uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx)
@@ -236,7 +418,6 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
236 lua_State *L; 418 lua_State *L;
237 if (version != 1) 419 if (version != 1)
238 return _URC_FATAL_PHASE1_ERROR; 420 return _URC_FATAL_PHASE1_ERROR;
239 UNUSED(uexclass);
240 cf = (void *)_Unwind_GetCFA(ctx); 421 cf = (void *)_Unwind_GetCFA(ctx);
241 L = cframe_L(cf); 422 L = cframe_L(cf);
242 if ((actions & _UA_SEARCH_PHASE)) { 423 if ((actions & _UA_SEARCH_PHASE)) {
@@ -284,27 +465,159 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
284 ** it on non-x64 because the interpreter restores all callee-saved regs. 465 ** it on non-x64 because the interpreter restores all callee-saved regs.
285 */ 466 */
286 lj_err_throw(L, errcode); 467 lj_err_throw(L, errcode);
468#if LJ_TARGET_X64
469#error "Broken build system -- only use the provided Makefiles!"
470#endif
287#endif 471#endif
288 } 472 }
289 return _URC_CONTINUE_UNWIND; 473 return _URC_CONTINUE_UNWIND;
290} 474}
291 475
292#if LJ_UNWIND_EXT 476#if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT)
293static __thread _Unwind_Exception static_uex; 477struct dwarf_eh_bases { void *tbase, *dbase, *func; };
478extern const void *_Unwind_Find_FDE(void *pc, struct dwarf_eh_bases *bases);
294 479
295/* Raise DWARF2 exception. */ 480/* Verify that external error handling actually has a chance to work. */
296static void err_raise_ext(int errcode) 481void lj_err_verify(void)
297{ 482{
298 static_uex.exclass = LJ_UEXCLASS_MAKE(errcode); 483 struct dwarf_eh_bases ehb;
299 static_uex.excleanup = NULL; 484 lj_assertX(_Unwind_Find_FDE((void *)lj_err_throw, &ehb), "broken build: external frame unwinding enabled, but missing -funwind-tables");
300 _Unwind_RaiseException(&static_uex); 485 /* Check disabled, because of broken Fedora/ARM64. See #722.
486 lj_assertX(_Unwind_Find_FDE((void *)_Unwind_RaiseException, &ehb), "broken build: external frame unwinding enabled, but system libraries have no unwind tables");
487 */
301} 488}
302#endif 489#endif
303 490
491#if LJ_UNWIND_JIT
492/* DWARF2 personality handler for JIT-compiled code. */
493static int err_unwind_jit(int version, int actions,
494 uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx)
495{
496 /* NYI: FFI C++ exception interoperability. */
497 if (version != 1 || !LJ_UEXCLASS_CHECK(uexclass))
498 return _URC_FATAL_PHASE1_ERROR;
499 if ((actions & _UA_SEARCH_PHASE)) {
500 return _URC_HANDLER_FOUND;
501 }
502 if ((actions & _UA_CLEANUP_PHASE)) {
503 global_State *g = *(global_State **)(uex+1);
504 ExitNo exitno;
505 uintptr_t addr = _Unwind_GetIP(ctx); /* Return address _after_ call. */
506 uintptr_t stub = lj_trace_unwind(G2J(g), addr - sizeof(MCode), &exitno);
507 lj_assertG(tvref(g->jit_base), "unexpected throw across mcode frame");
508 if (stub) { /* Jump to side exit to unwind the trace. */
509 G2J(g)->exitcode = LJ_UEXCLASS_ERRCODE(uexclass);
510#ifdef LJ_TARGET_MIPS
511 _Unwind_SetGR(ctx, 4, stub);
512 _Unwind_SetGR(ctx, 5, exitno);
513 _Unwind_SetIP(ctx, (uintptr_t)(void *)lj_vm_unwind_stub);
514#else
515 _Unwind_SetIP(ctx, stub);
516#endif
517 return _URC_INSTALL_CONTEXT;
518 }
519 return _URC_FATAL_PHASE2_ERROR;
520 }
521 return _URC_FATAL_PHASE1_ERROR;
522}
523
524/* DWARF2 template frame info for JIT-compiled code.
525**
526** After copying the template to the start of the mcode segment,
527** the frame handler function and the code size is patched.
528** The frame handler always installs a new context to jump to the exit,
529** so don't bother to add any unwind opcodes.
530*/
531static const uint8_t err_frame_jit_template[] = {
532#if LJ_BE
533 0,0,0,
534#endif
535 LJ_64 ? 0x1c : 0x14, /* CIE length. */
536#if LJ_LE
537 0,0,0,
538#endif
539 0,0,0,0, 1, 'z','P','R',0, /* CIE mark, CIE version, augmentation. */
540 1, LJ_64 ? 0x78 : 0x7c, LJ_TARGET_EHRAREG, /* Code/data align, RA. */
541#if LJ_64
542 10, 0, 0,0,0,0,0,0,0,0, 0x1b, /* Aug. data ABS handler, PCREL|SDATA4 code. */
543 0,0,0,0,0, /* Alignment. */
544#else
545 6, 0, 0,0,0,0, 0x1b, /* Aug. data ABS handler, PCREL|SDATA4 code. */
546 0, /* Alignment. */
547#endif
548#if LJ_BE
549 0,0,0,
550#endif
551 LJ_64 ? 0x14 : 0x10, /* FDE length. */
552 0,0,0,
553 LJ_64 ? 0x24 : 0x1c, /* CIE offset. */
554 0,0,0,
555 LJ_64 ? 0x14 : 0x10, /* Code offset. After Final FDE. */
556#if LJ_LE
557 0,0,0,
558#endif
559 0,0,0,0, 0, 0,0,0, /* Code size, augmentation length, alignment. */
560#if LJ_64
561 0,0,0,0, /* Alignment. */
562#endif
563 0,0,0,0 /* Final FDE. */
564};
565
566#define ERR_FRAME_JIT_OFS_HANDLER 0x12
567#define ERR_FRAME_JIT_OFS_FDE (LJ_64 ? 0x20 : 0x18)
568#define ERR_FRAME_JIT_OFS_CODE_SIZE (LJ_64 ? 0x2c : 0x24)
569#if LJ_TARGET_OSX
570#define ERR_FRAME_JIT_OFS_REGISTER ERR_FRAME_JIT_OFS_FDE
304#else 571#else
572#define ERR_FRAME_JIT_OFS_REGISTER 0
573#endif
574
575extern void __register_frame(const void *);
576extern void __deregister_frame(const void *);
305 577
306extern void _Unwind_DeleteException(void *); 578uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info)
307extern int __gnu_unwind_frame (void *, _Unwind_Context *); 579{
580 void **handler;
581 memcpy(info, err_frame_jit_template, sizeof(err_frame_jit_template));
582 handler = (void *)err_unwind_jit;
583 memcpy(info + ERR_FRAME_JIT_OFS_HANDLER, &handler, sizeof(handler));
584 *(uint32_t *)(info + ERR_FRAME_JIT_OFS_CODE_SIZE) =
585 (uint32_t)(sz - sizeof(err_frame_jit_template) - (info - (uint8_t *)base));
586 __register_frame(info + ERR_FRAME_JIT_OFS_REGISTER);
587#ifdef LUA_USE_ASSERT
588 {
589 struct dwarf_eh_bases ehb;
590 lj_assertX(_Unwind_Find_FDE(info + sizeof(err_frame_jit_template)+1, &ehb),
591 "bad JIT unwind table registration");
592 }
593#endif
594 return info + sizeof(err_frame_jit_template);
595}
596
597void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info)
598{
599 UNUSED(base); UNUSED(sz);
600 __deregister_frame(info + ERR_FRAME_JIT_OFS_REGISTER);
601}
602#endif
603
604#else /* LJ_TARGET_ARM */
605
606#define _US_VIRTUAL_UNWIND_FRAME 0
607#define _US_UNWIND_FRAME_STARTING 1
608#define _US_ACTION_MASK 3
609#define _US_FORCE_UNWIND 8
610
611typedef struct _Unwind_Control_Block _Unwind_Control_Block;
612#define UNWIND_EXCEPTION_TYPE _Unwind_Control_Block
613
614struct _Unwind_Control_Block {
615 uint64_t exclass;
616 uint32_t misc[20];
617};
618
619extern int _Unwind_RaiseException(_Unwind_Control_Block *);
620extern int __gnu_unwind_frame(_Unwind_Control_Block *, _Unwind_Context *);
308extern int _Unwind_VRS_Set(_Unwind_Context *, int, uint32_t, int, void *); 621extern int _Unwind_VRS_Set(_Unwind_Context *, int, uint32_t, int, void *);
309extern int _Unwind_VRS_Get(_Unwind_Context *, int, uint32_t, int, void *); 622extern int _Unwind_VRS_Get(_Unwind_Context *, int, uint32_t, int, void *);
310 623
@@ -320,126 +633,98 @@ static inline void _Unwind_SetGR(_Unwind_Context *ctx, int r, uint32_t v)
320 _Unwind_VRS_Set(ctx, 0, r, 0, &v); 633 _Unwind_VRS_Set(ctx, 0, r, 0, &v);
321} 634}
322 635
323#define _US_VIRTUAL_UNWIND_FRAME 0 636extern void lj_vm_unwind_ext(void);
324#define _US_UNWIND_FRAME_STARTING 1
325#define _US_ACTION_MASK 3
326#define _US_FORCE_UNWIND 8
327 637
328/* ARM unwinder personality handler referenced from interpreter .ARM.extab. */ 638/* ARM unwinder personality handler referenced from interpreter .ARM.extab. */
329LJ_FUNCA int lj_err_unwind_arm(int state, void *ucb, _Unwind_Context *ctx) 639LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block *ucb,
640 _Unwind_Context *ctx)
330{ 641{
331 void *cf = (void *)_Unwind_GetGR(ctx, 13); 642 void *cf = (void *)_Unwind_GetGR(ctx, 13);
332 lua_State *L = cframe_L(cf); 643 lua_State *L = cframe_L(cf);
333 if ((state & _US_ACTION_MASK) == _US_VIRTUAL_UNWIND_FRAME) { 644 int errcode;
334 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); 645
646 switch ((state & _US_ACTION_MASK)) {
647 case _US_VIRTUAL_UNWIND_FRAME:
648 if ((state & _US_FORCE_UNWIND)) break;
335 return _URC_HANDLER_FOUND; 649 return _URC_HANDLER_FOUND;
336 } 650 case _US_UNWIND_FRAME_STARTING:
337 if ((state&(_US_ACTION_MASK|_US_FORCE_UNWIND)) == _US_UNWIND_FRAME_STARTING) { 651 if (LJ_UEXCLASS_CHECK(ucb->exclass)) {
338 _Unwind_DeleteException(ucb); 652 errcode = LJ_UEXCLASS_ERRCODE(ucb->exclass);
339 _Unwind_SetGR(ctx, 15, (uint32_t)(void *)lj_err_throw); 653 } else {
340 _Unwind_SetGR(ctx, 0, (uint32_t)L); 654 errcode = LUA_ERRRUN;
341 _Unwind_SetGR(ctx, 1, (uint32_t)LUA_ERRRUN); 655 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
656 }
657 cf = err_unwind(L, cf, errcode);
658 if ((state & _US_FORCE_UNWIND) || cf == NULL) break;
659 _Unwind_SetGR(ctx, 15, (uint32_t)lj_vm_unwind_ext);
660 _Unwind_SetGR(ctx, 0, (uint32_t)ucb);
661 _Unwind_SetGR(ctx, 1, (uint32_t)errcode);
662 _Unwind_SetGR(ctx, 2, cframe_unwind_ff(cf) ?
663 (uint32_t)lj_vm_unwind_ff_eh :
664 (uint32_t)lj_vm_unwind_c_eh);
342 return _URC_INSTALL_CONTEXT; 665 return _URC_INSTALL_CONTEXT;
666 default:
667 return _URC_FAILURE;
343 } 668 }
344 if (__gnu_unwind_frame(ucb, ctx) != _URC_OK) 669 if (__gnu_unwind_frame(ucb, ctx) != _URC_OK)
345 return _URC_FAILURE; 670 return _URC_FAILURE;
671#ifdef LUA_USE_ASSERT
672 /* We should never get here unless this is a forced unwind aka backtrace. */
673 if (_Unwind_GetGR(ctx, 0) == 0xff33aa77) {
674 _Unwind_SetGR(ctx, 0, 0xff33aa88);
675 }
676#endif
346 return _URC_CONTINUE_UNWIND; 677 return _URC_CONTINUE_UNWIND;
347} 678}
348 679
349#endif 680#if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT)
681typedef int (*_Unwind_Trace_Fn)(_Unwind_Context *, void *);
682extern int _Unwind_Backtrace(_Unwind_Trace_Fn, void *);
350 683
351#elif LJ_TARGET_X64 && LJ_ABI_WIN 684static int err_verify_bt(_Unwind_Context *ctx, int *got)
685{
686 if (_Unwind_GetGR(ctx, 0) == 0xff33aa88) { *got = 2; }
687 else if (*got == 0) { *got = 1; _Unwind_SetGR(ctx, 0, 0xff33aa77); }
688 return _URC_OK;
689}
690
691/* Verify that external error handling actually has a chance to work. */
692void lj_err_verify(void)
693{
694 int got = 0;
695 _Unwind_Backtrace((_Unwind_Trace_Fn)err_verify_bt, &got);
696 lj_assertX(got == 2, "broken build: external frame unwinding enabled, but missing -funwind-tables");
697}
698#endif
352 699
353/* 700/*
354** Someone in Redmond owes me several days of my life. A lot of this is 701** Note: LJ_UNWIND_JIT is not implemented for 32 bit ARM.
355** undocumented or just plain wrong on MSDN. Some of it can be gathered
356** from 3rd party docs or must be found by trial-and-error. They really
357** don't want you to write your own language-specific exception handler
358** or to interact gracefully with MSVC. :-(
359** 702**
360** Apparently MSVC doesn't call C++ destructors for foreign exceptions 703** The quirky ARM unwind API doesn't have __register_frame().
361** unless you compile your C++ code with /EHa. Unfortunately this means 704** A potential workaround might involve _Unwind_Backtrace.
362** catch (...) also catches things like access violations. The use of 705** But most 32 bit ARM targets don't qualify for LJ_UNWIND_EXT, anyway,
363** _set_se_translator doesn't really help, because it requires /EHa, too. 706** since they are built without unwind tables by default.
364*/ 707*/
365 708
366#define WIN32_LEAN_AND_MEAN 709#endif /* LJ_TARGET_ARM */
367#include <windows.h>
368
369/* Taken from: http://www.nynaeve.net/?p=99 */
370typedef struct UndocumentedDispatcherContext {
371 ULONG64 ControlPc;
372 ULONG64 ImageBase;
373 PRUNTIME_FUNCTION FunctionEntry;
374 ULONG64 EstablisherFrame;
375 ULONG64 TargetIp;
376 PCONTEXT ContextRecord;
377 void (*LanguageHandler)(void);
378 PVOID HandlerData;
379 PUNWIND_HISTORY_TABLE HistoryTable;
380 ULONG ScopeIndex;
381 ULONG Fill0;
382} UndocumentedDispatcherContext;
383
384/* Another wild guess. */
385extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
386
387#ifdef MINGW_SDK_INIT
388/* Workaround for broken MinGW64 declaration. */
389VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
390#define RtlUnwindEx RtlUnwindEx_FIXED
391#endif
392 710
393#define LJ_MSVC_EXCODE ((DWORD)0xe06d7363)
394#define LJ_GCC_EXCODE ((DWORD)0x20474343)
395 711
396#define LJ_EXCODE ((DWORD)0xe24c4a00) 712#if LJ_UNWIND_EXT
397#define LJ_EXCODE_MAKE(c) (LJ_EXCODE | (DWORD)(c)) 713static __thread struct {
398#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff) 714 UNWIND_EXCEPTION_TYPE ex;
399#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff)) 715 global_State *g;
716} static_uex;
400 717
401/* Win64 exception handler for interpreter frame. */ 718/* Raise external exception. */
402LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec, 719static void err_raise_ext(global_State *g, int errcode)
403 void *cf, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
404{ 720{
405 lua_State *L = cframe_L(cf); 721 memset(&static_uex, 0, sizeof(static_uex));
406 int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ? 722 static_uex.ex.exclass = LJ_UEXCLASS_MAKE(errcode);
407 LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN; 723 static_uex.g = g;
408 if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */ 724 _Unwind_RaiseException(&static_uex.ex);
409 /* Unwind internal frames. */
410 err_unwind(L, cf, errcode);
411 } else {
412 void *cf2 = err_unwind(L, cf, 0);
413 if (cf2) { /* We catch it, so start unwinding the upper frames. */
414 if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
415 rec->ExceptionCode == LJ_GCC_EXCODE) {
416#if LJ_TARGET_WINDOWS
417 __DestructExceptionObject(rec, 1);
418#endif
419 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
420 } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
421 /* Don't catch access violations etc. */
422 return ExceptionContinueSearch;
423 }
424 /* Unwind the stack and call all handlers for all lower C frames
425 ** (including ourselves) again with EH_UNWINDING set. Then set
426 ** rsp = cf, rax = errcode and jump to the specified target.
427 */
428 RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
429 lj_vm_unwind_ff_eh :
430 lj_vm_unwind_c_eh),
431 rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
432 /* RtlUnwindEx should never return. */
433 }
434 }
435 return ExceptionContinueSearch;
436} 725}
437 726
438/* Raise Windows exception. */ 727#endif
439static void err_raise_ext(int errcode)
440{
441 RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL);
442}
443 728
444#endif 729#endif
445 730
@@ -450,22 +735,23 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode)
450{ 735{
451 global_State *g = G(L); 736 global_State *g = G(L);
452 lj_trace_abort(g); 737 lj_trace_abort(g);
453 setgcrefnull(g->jit_L); 738 L->status = LUA_OK;
454 L->status = 0;
455#if LJ_UNWIND_EXT 739#if LJ_UNWIND_EXT
456 err_raise_ext(errcode); 740 err_raise_ext(g, errcode);
457 /* 741 /*
458 ** A return from this function signals a corrupt C stack that cannot be 742 ** A return from this function signals a corrupt C stack that cannot be
459 ** unwound. We have no choice but to call the panic function and exit. 743 ** unwound. We have no choice but to call the panic function and exit.
460 ** 744 **
461 ** Usually this is caused by a C function without unwind information. 745 ** Usually this is caused by a C function without unwind information.
462 ** This should never happen on x64, but may happen if you've manually 746 ** This may happen if you've manually enabled LUAJIT_UNWIND_EXTERNAL
463 ** enabled LUAJIT_UNWIND_EXTERNAL and forgot to recompile *every* 747 ** and forgot to recompile *every* non-C++ file with -funwind-tables.
464 ** non-C++ file with -funwind-tables.
465 */ 748 */
466 if (G(L)->panic) 749 if (G(L)->panic)
467 G(L)->panic(L); 750 G(L)->panic(L);
468#else 751#else
752#if LJ_HASJIT
753 setmref(g->jit_base, NULL);
754#endif
469 { 755 {
470 void *cf = err_unwind(L, NULL, errcode); 756 void *cf = err_unwind(L, NULL, errcode);
471 if (cframe_unwind_ff(cf)) 757 if (cframe_unwind_ff(cf))
@@ -495,7 +781,7 @@ LJ_NOINLINE void lj_err_mem(lua_State *L)
495/* Find error function for runtime errors. Requires an extra stack traversal. */ 781/* Find error function for runtime errors. Requires an extra stack traversal. */
496static ptrdiff_t finderrfunc(lua_State *L) 782static ptrdiff_t finderrfunc(lua_State *L)
497{ 783{
498 cTValue *frame = L->base-1, *bot = tvref(L->stack); 784 cTValue *frame = L->base-1, *bot = tvref(L->stack)+LJ_FR2;
499 void *cf = L->cframe; 785 void *cf = L->cframe;
500 while (frame > bot && cf) { 786 while (frame > bot && cf) {
501 while (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */ 787 while (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */
@@ -519,10 +805,8 @@ static ptrdiff_t finderrfunc(lua_State *L)
519 frame = frame_prevd(frame); 805 frame = frame_prevd(frame);
520 break; 806 break;
521 case FRAME_CONT: 807 case FRAME_CONT:
522#if LJ_HASFFI 808 if (frame_iscont_fficb(frame))
523 if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK)
524 cf = cframe_prev(cf); 809 cf = cframe_prev(cf);
525#endif
526 frame = frame_prevd(frame); 810 frame = frame_prevd(frame);
527 break; 811 break;
528 case FRAME_CP: 812 case FRAME_CP:
@@ -534,11 +818,11 @@ static ptrdiff_t finderrfunc(lua_State *L)
534 break; 818 break;
535 case FRAME_PCALL: 819 case FRAME_PCALL:
536 case FRAME_PCALLH: 820 case FRAME_PCALLH:
537 if (frame_ftsz(frame) >= (ptrdiff_t)(2*sizeof(TValue))) /* xpcall? */ 821 if (frame_func(frame_prevd(frame))->c.ffid == FF_xpcall)
538 return savestack(L, frame-1); /* Point to xpcall's errorfunc. */ 822 return savestack(L, frame_prevd(frame)+1); /* xpcall's errorfunc. */
539 return 0; 823 return 0;
540 default: 824 default:
541 lua_assert(0); 825 lj_assertL(0, "bad frame type");
542 return 0; 826 return 0;
543 } 827 }
544 } 828 }
@@ -548,7 +832,7 @@ static ptrdiff_t finderrfunc(lua_State *L)
548/* Runtime error. */ 832/* Runtime error. */
549LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L) 833LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L)
550{ 834{
551 ptrdiff_t ef = finderrfunc(L); 835 ptrdiff_t ef = (LJ_HASJIT && tvref(G(L)->jit_base)) ? 0 : finderrfunc(L);
552 if (ef) { 836 if (ef) {
553 TValue *errfunc = restorestack(L, ef); 837 TValue *errfunc = restorestack(L, ef);
554 TValue *top = L->top; 838 TValue *top = L->top;
@@ -558,14 +842,25 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L)
558 lj_err_throw(L, LUA_ERRERR); 842 lj_err_throw(L, LUA_ERRERR);
559 } 843 }
560 L->status = LUA_ERRERR; 844 L->status = LUA_ERRERR;
561 copyTV(L, top, top-1); 845 copyTV(L, top+LJ_FR2, top-1);
562 copyTV(L, top-1, errfunc); 846 copyTV(L, top-1, errfunc);
847 if (LJ_FR2) setnilV(top++);
563 L->top = top+1; 848 L->top = top+1;
564 lj_vm_call(L, top, 1+1); /* Stack: |errfunc|msg| -> |msg| */ 849 lj_vm_call(L, top, 1+1); /* Stack: |errfunc|msg| -> |msg| */
565 } 850 }
566 lj_err_throw(L, LUA_ERRRUN); 851 lj_err_throw(L, LUA_ERRRUN);
567} 852}
568 853
854#if LJ_HASJIT
855LJ_NOINLINE void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode)
856{
857 if (errcode == LUA_ERRRUN)
858 lj_err_run(L);
859 else
860 lj_err_throw(L, errcode);
861}
862#endif
863
569/* Formatted runtime error message. */ 864/* Formatted runtime error message. */
570LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...) 865LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...)
571{ 866{
@@ -573,7 +868,7 @@ LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...)
573 va_list argp; 868 va_list argp;
574 va_start(argp, em); 869 va_start(argp, em);
575 if (curr_funcisL(L)) L->top = curr_topL(L); 870 if (curr_funcisL(L)) L->top = curr_topL(L);
576 msg = lj_str_pushvf(L, err2msg(em), argp); 871 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
577 va_end(argp); 872 va_end(argp);
578 lj_debug_addloc(L, msg, L->base-1, NULL); 873 lj_debug_addloc(L, msg, L->base-1, NULL);
579 lj_err_run(L); 874 lj_err_run(L);
@@ -591,11 +886,11 @@ LJ_NOINLINE void lj_err_lex(lua_State *L, GCstr *src, const char *tok,
591{ 886{
592 char buff[LUA_IDSIZE]; 887 char buff[LUA_IDSIZE];
593 const char *msg; 888 const char *msg;
594 lj_debug_shortname(buff, src); 889 lj_debug_shortname(buff, src, line);
595 msg = lj_str_pushvf(L, err2msg(em), argp); 890 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
596 msg = lj_str_pushf(L, "%s:%d: %s", buff, line, msg); 891 msg = lj_strfmt_pushf(L, "%s:%d: %s", buff, line, msg);
597 if (tok) 892 if (tok)
598 lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok); 893 lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok);
599 lj_err_throw(L, LUA_ERRSYNTAX); 894 lj_err_throw(L, LUA_ERRSYNTAX);
600} 895}
601 896
@@ -634,8 +929,9 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o)
634 const BCIns *pc = cframe_Lpc(L); 929 const BCIns *pc = cframe_Lpc(L);
635 if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) { 930 if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) {
636 const char *tname = lj_typename(o); 931 const char *tname = lj_typename(o);
932 setframe_gc(o, obj2gco(L), LJ_TTHREAD);
933 if (LJ_FR2) o++;
637 setframe_pc(o, pc); 934 setframe_pc(o, pc);
638 setframe_gc(o, obj2gco(L));
639 L->top = L->base = o+1; 935 L->top = L->base = o+1;
640 err_msgv(L, LJ_ERR_BADCALL, tname); 936 err_msgv(L, LJ_ERR_BADCALL, tname);
641 } 937 }
@@ -645,28 +941,27 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o)
645/* Error in context of caller. */ 941/* Error in context of caller. */
646LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg) 942LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg)
647{ 943{
648 TValue *frame = L->base-1; 944 TValue *frame = NULL, *pframe = NULL;
649 TValue *pframe = NULL; 945 if (!(LJ_HASJIT && tvref(G(L)->jit_base))) {
650 if (frame_islua(frame)) { 946 frame = L->base-1;
651 pframe = frame_prevl(frame); 947 if (frame_islua(frame)) {
652 } else if (frame_iscont(frame)) { 948 pframe = frame_prevl(frame);
949 } else if (frame_iscont(frame)) {
950 if (frame_iscont_fficb(frame)) {
951 pframe = frame;
952 frame = NULL;
953 } else {
954 pframe = frame_prevd(frame);
653#if LJ_HASFFI 955#if LJ_HASFFI
654 if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK) { 956 /* Remove frame for FFI metamethods. */
655 pframe = frame; 957 if (frame_func(frame)->c.ffid >= FF_ffi_meta___index &&
656 frame = NULL; 958 frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) {
657 } else 959 L->base = pframe+1;
960 L->top = frame;
961 setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame));
962 }
658#endif 963#endif
659 {
660 pframe = frame_prevd(frame);
661#if LJ_HASFFI
662 /* Remove frame for FFI metamethods. */
663 if (frame_func(frame)->c.ffid >= FF_ffi_meta___index &&
664 frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) {
665 L->base = pframe+1;
666 L->top = frame;
667 setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame));
668 } 964 }
669#endif
670 } 965 }
671 } 966 }
672 lj_debug_addloc(L, msg, pframe, frame); 967 lj_debug_addloc(L, msg, pframe, frame);
@@ -679,7 +974,7 @@ LJ_NOINLINE void lj_err_callerv(lua_State *L, ErrMsg em, ...)
679 const char *msg; 974 const char *msg;
680 va_list argp; 975 va_list argp;
681 va_start(argp, em); 976 va_start(argp, em);
682 msg = lj_str_pushvf(L, err2msg(em), argp); 977 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
683 va_end(argp); 978 va_end(argp);
684 lj_err_callermsg(L, msg); 979 lj_err_callermsg(L, msg);
685} 980}
@@ -699,9 +994,9 @@ LJ_NORET LJ_NOINLINE static void err_argmsg(lua_State *L, int narg,
699 if (narg < 0 && narg > LUA_REGISTRYINDEX) 994 if (narg < 0 && narg > LUA_REGISTRYINDEX)
700 narg = (int)(L->top - L->base) + narg + 1; 995 narg = (int)(L->top - L->base) + narg + 1;
701 if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */ 996 if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */
702 msg = lj_str_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg); 997 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg);
703 else 998 else
704 msg = lj_str_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg); 999 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg);
705 lj_err_callermsg(L, msg); 1000 lj_err_callermsg(L, msg);
706} 1001}
707 1002
@@ -711,7 +1006,7 @@ LJ_NOINLINE void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...)
711 const char *msg; 1006 const char *msg;
712 va_list argp; 1007 va_list argp;
713 va_start(argp, em); 1008 va_start(argp, em);
714 msg = lj_str_pushvf(L, err2msg(em), argp); 1009 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
715 va_end(argp); 1010 va_end(argp);
716 err_argmsg(L, narg, msg); 1011 err_argmsg(L, narg, msg);
717} 1012}
@@ -741,7 +1036,7 @@ LJ_NOINLINE void lj_err_argtype(lua_State *L, int narg, const char *xname)
741 TValue *o = narg < 0 ? L->top + narg : L->base + narg-1; 1036 TValue *o = narg < 0 ? L->top + narg : L->base + narg-1;
742 tname = o < L->top ? lj_typename(o) : lj_obj_typename[0]; 1037 tname = o < L->top ? lj_typename(o) : lj_obj_typename[0];
743 } 1038 }
744 msg = lj_str_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname); 1039 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname);
745 err_argmsg(L, narg, msg); 1040 err_argmsg(L, narg, msg);
746} 1041}
747 1042
@@ -791,7 +1086,7 @@ LUALIB_API int luaL_error(lua_State *L, const char *fmt, ...)
791 const char *msg; 1086 const char *msg;
792 va_list argp; 1087 va_list argp;
793 va_start(argp, fmt); 1088 va_start(argp, fmt);
794 msg = lj_str_pushvf(L, fmt, argp); 1089 msg = lj_strfmt_pushvf(L, fmt, argp);
795 va_end(argp); 1090 va_end(argp);
796 lj_err_callermsg(L, msg); 1091 lj_err_callermsg(L, msg);
797 return 0; /* unreachable */ 1092 return 0; /* unreachable */
diff --git a/src/lj_err.h b/src/lj_err.h
index 770553fc..2e8a251f 100644
--- a/src/lj_err.h
+++ b/src/lj_err.h
@@ -23,7 +23,10 @@ LJ_DATA const char *lj_err_allmsg;
23LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em); 23LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em);
24LJ_FUNCA_NORET void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode); 24LJ_FUNCA_NORET void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode);
25LJ_FUNC_NORET void lj_err_mem(lua_State *L); 25LJ_FUNC_NORET void lj_err_mem(lua_State *L);
26LJ_FUNCA_NORET void LJ_FASTCALL lj_err_run(lua_State *L); 26LJ_FUNC_NORET void LJ_FASTCALL lj_err_run(lua_State *L);
27#if LJ_HASJIT
28LJ_FUNCA_NORET void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode);
29#endif
27LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em); 30LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em);
28LJ_FUNC_NORET void lj_err_lex(lua_State *L, GCstr *src, const char *tok, 31LJ_FUNC_NORET void lj_err_lex(lua_State *L, GCstr *src, const char *tok,
29 BCLine line, ErrMsg em, va_list argp); 32 BCLine line, ErrMsg em, va_list argp);
@@ -38,4 +41,18 @@ LJ_FUNC_NORET void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...);
38LJ_FUNC_NORET void lj_err_argtype(lua_State *L, int narg, const char *xname); 41LJ_FUNC_NORET void lj_err_argtype(lua_State *L, int narg, const char *xname);
39LJ_FUNC_NORET void lj_err_argt(lua_State *L, int narg, int tt); 42LJ_FUNC_NORET void lj_err_argt(lua_State *L, int narg, int tt);
40 43
44#if LJ_UNWIND_JIT && !LJ_ABI_WIN
45LJ_FUNC uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info);
46LJ_FUNC void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info);
47#else
48#define lj_err_register_mcode(base, sz, info) (info)
49#define lj_err_deregister_mcode(base, sz, info) UNUSED(base)
50#endif
51
52#if LJ_UNWIND_EXT && !LJ_ABI_WIN && defined(LUA_USE_ASSERT)
53LJ_FUNC void lj_err_verify(void);
54#else
55#define lj_err_verify() ((void)0)
56#endif
57
41#endif 58#endif
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h
index 49aef429..89e67496 100644
--- a/src/lj_errmsg.h
+++ b/src/lj_errmsg.h
@@ -67,6 +67,7 @@ ERRDEF(PROTMT, "cannot change a protected metatable")
67ERRDEF(UNPACK, "too many results to unpack") 67ERRDEF(UNPACK, "too many results to unpack")
68ERRDEF(RDRSTR, "reader function must return a string") 68ERRDEF(RDRSTR, "reader function must return a string")
69ERRDEF(PRTOSTR, LUA_QL("tostring") " must return a string to " LUA_QL("print")) 69ERRDEF(PRTOSTR, LUA_QL("tostring") " must return a string to " LUA_QL("print"))
70ERRDEF(NUMRNG, "number out of range")
70ERRDEF(IDXRNG, "index out of range") 71ERRDEF(IDXRNG, "index out of range")
71ERRDEF(BASERNG, "base out of range") 72ERRDEF(BASERNG, "base out of range")
72ERRDEF(LVLRNG, "level out of range") 73ERRDEF(LVLRNG, "level out of range")
@@ -96,18 +97,12 @@ ERRDEF(STRPATX, "pattern too complex")
96ERRDEF(STRCAPI, "invalid capture index") 97ERRDEF(STRCAPI, "invalid capture index")
97ERRDEF(STRCAPN, "too many captures") 98ERRDEF(STRCAPN, "too many captures")
98ERRDEF(STRCAPU, "unfinished capture") 99ERRDEF(STRCAPU, "unfinished capture")
99ERRDEF(STRFMTO, "invalid option " LUA_QL("%%%c") " to " LUA_QL("format")) 100ERRDEF(STRFMT, "invalid option " LUA_QS " to " LUA_QL("format"))
100ERRDEF(STRFMTR, "invalid format (repeated flags)")
101ERRDEF(STRFMTW, "invalid format (width or precision too long)")
102ERRDEF(STRGSRV, "invalid replacement value (a %s)") 101ERRDEF(STRGSRV, "invalid replacement value (a %s)")
103ERRDEF(BADMODN, "name conflict for module " LUA_QS) 102ERRDEF(BADMODN, "name conflict for module " LUA_QS)
104#if LJ_HASJIT 103#if LJ_HASJIT
105ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?") 104ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?")
106#if LJ_TARGET_X86ORX64
107ERRDEF(NOJIT, "JIT compiler disabled, CPU does not support SSE2")
108#else
109ERRDEF(NOJIT, "JIT compiler disabled") 105ERRDEF(NOJIT, "JIT compiler disabled")
110#endif
111#elif defined(LJ_ARCH_NOJIT) 106#elif defined(LJ_ARCH_NOJIT)
112ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)") 107ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)")
113#else 108#else
@@ -118,7 +113,6 @@ ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS)
118/* Lexer/parser errors. */ 113/* Lexer/parser errors. */
119ERRDEF(XMODE, "attempt to load chunk with wrong mode") 114ERRDEF(XMODE, "attempt to load chunk with wrong mode")
120ERRDEF(XNEAR, "%s near " LUA_QS) 115ERRDEF(XNEAR, "%s near " LUA_QS)
121ERRDEF(XELEM, "lexical element too long")
122ERRDEF(XLINES, "chunk has too many lines") 116ERRDEF(XLINES, "chunk has too many lines")
123ERRDEF(XLEVELS, "chunk has too many syntax levels") 117ERRDEF(XLEVELS, "chunk has too many syntax levels")
124ERRDEF(XNUMBER, "malformed number") 118ERRDEF(XNUMBER, "malformed number")
@@ -186,6 +180,19 @@ ERRDEF(FFI_NYIPACKBIT, "NYI: packed bit fields")
186ERRDEF(FFI_NYICALL, "NYI: cannot call this C function (yet)") 180ERRDEF(FFI_NYICALL, "NYI: cannot call this C function (yet)")
187#endif 181#endif
188 182
183#if LJ_HASBUFFER
184/* String buffer errors. */
185ERRDEF(BUFFER_SELF, "cannot put buffer into itself")
186ERRDEF(BUFFER_BADOPT, "bad options table")
187ERRDEF(BUFFER_BADENC, "cannot serialize " LUA_QS)
188ERRDEF(BUFFER_BADDEC, "cannot deserialize tag 0x%02x")
189ERRDEF(BUFFER_BADDICTX, "cannot deserialize dictionary index %d")
190ERRDEF(BUFFER_DEPTH, "too deep to serialize")
191ERRDEF(BUFFER_DUPKEY, "duplicate table key")
192ERRDEF(BUFFER_EOB, "unexpected end of buffer")
193ERRDEF(BUFFER_LEFTOV, "left-over data in buffer")
194#endif
195
189#undef ERRDEF 196#undef ERRDEF
190 197
191/* Detecting unused error messages: 198/* Detecting unused error messages:
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 9405e694..92902b70 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -11,6 +11,7 @@
11#if LJ_HASJIT 11#if LJ_HASJIT
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_buf.h"
14#include "lj_str.h" 15#include "lj_str.h"
15#include "lj_tab.h" 16#include "lj_tab.h"
16#include "lj_frame.h" 17#include "lj_frame.h"
@@ -27,6 +28,8 @@
27#include "lj_dispatch.h" 28#include "lj_dispatch.h"
28#include "lj_vm.h" 29#include "lj_vm.h"
29#include "lj_strscan.h" 30#include "lj_strscan.h"
31#include "lj_strfmt.h"
32#include "lj_serialize.h"
30 33
31/* Some local macros to save typing. Undef'd at the end. */ 34/* Some local macros to save typing. Undef'd at the end. */
32#define IR(ref) (&J->cur.ir[(ref)]) 35#define IR(ref) (&J->cur.ir[(ref)])
@@ -79,10 +82,7 @@ static GCstr *argv2str(jit_State *J, TValue *o)
79 GCstr *s; 82 GCstr *s;
80 if (!tvisnumber(o)) 83 if (!tvisnumber(o))
81 lj_trace_err(J, LJ_TRERR_BADTYPE); 84 lj_trace_err(J, LJ_TRERR_BADTYPE);
82 if (tvisint(o)) 85 s = lj_strfmt_number(J->L, o);
83 s = lj_str_fromint(J->L, intV(o));
84 else
85 s = lj_str_fromnum(J->L, &o->n);
86 setstrV(J->L, o, s); 86 setstrV(J->L, o, s);
87 return s; 87 return s;
88 } 88 }
@@ -98,27 +98,94 @@ static ptrdiff_t results_wanted(jit_State *J)
98 return -1; 98 return -1;
99} 99}
100 100
101/* Throw error for unsupported variant of fast function. */ 101/* Trace stitching: add continuation below frame to start a new trace. */
102LJ_NORET static void recff_nyiu(jit_State *J) 102static void recff_stitch(jit_State *J)
103{ 103{
104 setfuncV(J->L, &J->errinfo, J->fn); 104 ASMFunction cont = lj_cont_stitch;
105 lj_trace_err_info(J, LJ_TRERR_NYIFFU); 105 lua_State *L = J->L;
106 TValue *base = L->base;
107 BCReg nslot = J->maxslot + 1 + LJ_FR2;
108 TValue *nframe = base + 1 + LJ_FR2;
109 const BCIns *pc = frame_pc(base-1);
110 TValue *pframe = frame_prevl(base-1);
111
112 /* Check for this now. Throwing in lj_record_stop messes up the stack. */
113 if (J->cur.nsnap >= (MSize)J->param[JIT_P_maxsnap])
114 lj_trace_err(J, LJ_TRERR_SNAPOV);
115
116 /* Move func + args up in Lua stack and insert continuation. */
117 memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot);
118 setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT);
119 setcont(base-LJ_FR2, cont);
120 setframe_pc(base, pc);
121 setnilV(base-1-LJ_FR2); /* Incorrect, but rec_check_slots() won't run anymore. */
122 L->base += 2 + LJ_FR2;
123 L->top += 2 + LJ_FR2;
124
125 /* Ditto for the IR. */
126 memmove(&J->base[1], &J->base[-1-LJ_FR2], sizeof(TRef)*nslot);
127#if LJ_FR2
128 J->base[2] = TREF_FRAME;
129 J->base[-1] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont)));
130 J->base[0] = lj_ir_k64(J, IR_KNUM, u64ptr(pc)) | TREF_CONT;
131#else
132 J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT;
133#endif
134 J->ktrace = tref_ref((J->base[-1-LJ_FR2] = lj_ir_ktrace(J)));
135 J->base += 2 + LJ_FR2;
136 J->baseslot += 2 + LJ_FR2;
137 J->framedepth++;
138
139 lj_record_stop(J, LJ_TRLINK_STITCH, 0);
140
141 /* Undo Lua stack changes. */
142 memmove(&base[-1-LJ_FR2], &base[1], sizeof(TValue)*nslot);
143 setframe_pc(base-1, pc);
144 L->base -= 2 + LJ_FR2;
145 L->top -= 2 + LJ_FR2;
106} 146}
107 147
108/* Fallback handler for all fast functions that are not recorded (yet). */ 148/* Fallback handler for fast functions that are not recorded (yet). */
109static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd) 149static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
110{ 150{
111 setfuncV(J->L, &J->errinfo, J->fn); 151 if (J->cur.nins < (IRRef)J->param[JIT_P_minstitch] + REF_BASE) {
112 lj_trace_err_info(J, LJ_TRERR_NYIFF); 152 lj_trace_err_info(J, LJ_TRERR_TRACEUV);
113 UNUSED(rd); 153 } else {
154 /* Can only stitch from Lua call. */
155 if (J->framedepth && frame_islua(J->L->base-1)) {
156 BCOp op = bc_op(*frame_pc(J->L->base-1));
157 /* Stitched trace cannot start with *M op with variable # of args. */
158 if (!(op == BC_CALLM || op == BC_CALLMT ||
159 op == BC_RETM || op == BC_TSETM)) {
160 switch (J->fn->c.ffid) {
161 case FF_error:
162 case FF_debug_sethook:
163 case FF_jit_flush:
164 break; /* Don't stitch across special builtins. */
165 default:
166 recff_stitch(J); /* Use trace stitching. */
167 rd->nres = -1;
168 return;
169 }
170 }
171 }
172 /* Otherwise stop trace and return to interpreter. */
173 lj_record_stop(J, LJ_TRLINK_RETURN, 0);
174 rd->nres = -1;
175 }
114} 176}
115 177
116/* C functions can have arbitrary side-effects and are not recorded (yet). */ 178/* Fallback handler for unsupported variants of fast functions. */
117static void LJ_FASTCALL recff_c(jit_State *J, RecordFFData *rd) 179#define recff_nyiu recff_nyi
180
181/* Must stop the trace for classic C functions with arbitrary side-effects. */
182#define recff_c recff_nyi
183
184/* Emit BUFHDR for the global temporary buffer. */
185static TRef recff_bufhdr(jit_State *J)
118{ 186{
119 setfuncV(J->L, &J->errinfo, J->fn); 187 return emitir(IRT(IR_BUFHDR, IRT_PGC),
120 lj_trace_err_info(J, LJ_TRERR_NYICF); 188 lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
121 UNUSED(rd);
122} 189}
123 190
124/* -- Base library fast functions ----------------------------------------- */ 191/* -- Base library fast functions ----------------------------------------- */
@@ -135,7 +202,7 @@ static void LJ_FASTCALL recff_type(jit_State *J, RecordFFData *rd)
135 uint32_t t; 202 uint32_t t;
136 if (tvisnumber(&rd->argv[0])) 203 if (tvisnumber(&rd->argv[0]))
137 t = ~LJ_TNUMX; 204 t = ~LJ_TNUMX;
138 else if (LJ_64 && tvislightud(&rd->argv[0])) 205 else if (LJ_64 && !LJ_GC64 && tvislightud(&rd->argv[0]))
139 t = ~LJ_TLIGHTUD; 206 t = ~LJ_TLIGHTUD;
140 else 207 else
141 t = ~itype(&rd->argv[0]); 208 t = ~itype(&rd->argv[0]);
@@ -167,7 +234,7 @@ static void LJ_FASTCALL recff_setmetatable(jit_State *J, RecordFFData *rd)
167 ix.tab = tr; 234 ix.tab = tr;
168 copyTV(J->L, &ix.tabv, &rd->argv[0]); 235 copyTV(J->L, &ix.tabv, &rd->argv[0]);
169 lj_record_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable. */ 236 lj_record_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable. */
170 fref = emitir(IRT(IR_FREF, IRT_P32), tr, IRFL_TAB_META); 237 fref = emitir(IRT(IR_FREF, IRT_PGC), tr, IRFL_TAB_META);
171 mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt; 238 mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt;
172 emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref); 239 emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref);
173 if (!tref_isnil(mt)) 240 if (!tref_isnil(mt))
@@ -220,7 +287,7 @@ static void LJ_FASTCALL recff_rawlen(jit_State *J, RecordFFData *rd)
220 if (tref_isstr(tr)) 287 if (tref_isstr(tr))
221 J->base[0] = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN); 288 J->base[0] = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN);
222 else if (tref_istab(tr)) 289 else if (tref_istab(tr))
223 J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, tr); 290 J->base[0] = emitir(IRTI(IR_ALEN), tr, TREF_NIL);
224 /* else: Interpreter will throw. */ 291 /* else: Interpreter will throw. */
225 UNUSED(rd); 292 UNUSED(rd);
226} 293}
@@ -233,7 +300,7 @@ int32_t lj_ffrecord_select_mode(jit_State *J, TRef tr, TValue *tv)
233 if (strV(tv)->len == 1) { 300 if (strV(tv)->len == 1) {
234 emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, strV(tv))); 301 emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, strV(tv)));
235 } else { 302 } else {
236 TRef trptr = emitir(IRT(IR_STRREF, IRT_P32), tr, lj_ir_kint(J, 0)); 303 TRef trptr = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0));
237 TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY); 304 TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY);
238 emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#')); 305 emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#'));
239 } 306 }
@@ -263,7 +330,8 @@ static void LJ_FASTCALL recff_select(jit_State *J, RecordFFData *rd)
263 J->base[i] = J->base[start+i]; 330 J->base[i] = J->base[start+i];
264 } /* else: Interpreter will throw. */ 331 } /* else: Interpreter will throw. */
265 } else { 332 } else {
266 recff_nyiu(J); 333 recff_nyiu(J, rd);
334 return;
267 } 335 }
268 } /* else: Interpreter will throw. */ 336 } /* else: Interpreter will throw. */
269} 337}
@@ -274,14 +342,18 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd)
274 TRef base = J->base[1]; 342 TRef base = J->base[1];
275 if (tr && !tref_isnil(base)) { 343 if (tr && !tref_isnil(base)) {
276 base = lj_opt_narrow_toint(J, base); 344 base = lj_opt_narrow_toint(J, base);
277 if (!tref_isk(base) || IR(tref_ref(base))->i != 10) 345 if (!tref_isk(base) || IR(tref_ref(base))->i != 10) {
278 recff_nyiu(J); 346 recff_nyiu(J, rd);
347 return;
348 }
279 } 349 }
280 if (tref_isnumber_str(tr)) { 350 if (tref_isnumber_str(tr)) {
281 if (tref_isstr(tr)) { 351 if (tref_isstr(tr)) {
282 TValue tmp; 352 TValue tmp;
283 if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) 353 if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) {
284 recff_nyiu(J); /* Would need an inverted STRTO for this case. */ 354 recff_nyiu(J, rd); /* Would need an inverted STRTO for this case. */
355 return;
356 }
285 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); 357 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
286 } 358 }
287#if LJ_HASFFI 359#if LJ_HASFFI
@@ -313,10 +385,10 @@ static int recff_metacall(jit_State *J, RecordFFData *rd, MMS mm)
313 int errcode; 385 int errcode;
314 TValue argv0; 386 TValue argv0;
315 /* Temporarily insert metamethod below object. */ 387 /* Temporarily insert metamethod below object. */
316 J->base[1] = J->base[0]; 388 J->base[1+LJ_FR2] = J->base[0];
317 J->base[0] = ix.mobj; 389 J->base[0] = ix.mobj;
318 copyTV(J->L, &argv0, &rd->argv[0]); 390 copyTV(J->L, &argv0, &rd->argv[0]);
319 copyTV(J->L, &rd->argv[1], &rd->argv[0]); 391 copyTV(J->L, &rd->argv[1+LJ_FR2], &rd->argv[0]);
320 copyTV(J->L, &rd->argv[0], &ix.mobjv); 392 copyTV(J->L, &rd->argv[0], &ix.mobjv);
321 /* Need to protect lj_record_tailcall because it may throw. */ 393 /* Need to protect lj_record_tailcall because it may throw. */
322 errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp); 394 errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp);
@@ -336,13 +408,15 @@ static void LJ_FASTCALL recff_tostring(jit_State *J, RecordFFData *rd)
336 if (tref_isstr(tr)) { 408 if (tref_isstr(tr)) {
337 /* Ignore __tostring in the string base metatable. */ 409 /* Ignore __tostring in the string base metatable. */
338 /* Pass on result in J->base[0]. */ 410 /* Pass on result in J->base[0]. */
339 } else if (!recff_metacall(J, rd, MM_tostring)) { 411 } else if (tr && !recff_metacall(J, rd, MM_tostring)) {
340 if (tref_isnumber(tr)) { 412 if (tref_isnumber(tr)) {
341 J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); 413 J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr,
414 tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT);
342 } else if (tref_ispri(tr)) { 415 } else if (tref_ispri(tr)) {
343 J->base[0] = lj_ir_kstr(J, strV(&J->fn->c.upvalue[tref_type(tr)])); 416 J->base[0] = lj_ir_kstr(J, lj_strfmt_obj(J->L, &rd->argv[0]));
344 } else { 417 } else {
345 recff_nyiu(J); 418 recff_nyiu(J, rd);
419 return;
346 } 420 }
347 } 421 }
348} 422}
@@ -364,15 +438,15 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd)
364 } /* else: Interpreter will throw. */ 438 } /* else: Interpreter will throw. */
365} 439}
366 440
367static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd) 441static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd)
368{ 442{
369 TRef tr = J->base[0]; 443 TRef tr = J->base[0];
370 if (!((LJ_52 || (LJ_HASFFI && tref_iscdata(tr))) && 444 if (!((LJ_52 || (LJ_HASFFI && tref_iscdata(tr))) &&
371 recff_metacall(J, rd, MM_ipairs))) { 445 recff_metacall(J, rd, MM_pairs + rd->data))) {
372 if (tref_istab(tr)) { 446 if (tref_istab(tr)) {
373 J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0])); 447 J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0]));
374 J->base[1] = tr; 448 J->base[1] = tr;
375 J->base[2] = lj_ir_kint(J, 0); 449 J->base[2] = rd->data ? lj_ir_kint(J, 0) : TREF_NIL;
376 rd->nres = 3; 450 rd->nres = 3;
377 } /* else: Interpreter will throw. */ 451 } /* else: Interpreter will throw. */
378 } 452 }
@@ -381,8 +455,13 @@ static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd)
381static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd) 455static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd)
382{ 456{
383 if (J->maxslot >= 1) { 457 if (J->maxslot >= 1) {
458#if LJ_FR2
459 /* Shift function arguments up. */
460 memmove(J->base + 1, J->base, sizeof(TRef) * J->maxslot);
461#endif
384 lj_record_call(J, 0, J->maxslot - 1); 462 lj_record_call(J, 0, J->maxslot - 1);
385 rd->nres = -1; /* Pending call. */ 463 rd->nres = -1; /* Pending call. */
464 J->needsnap = 1; /* Start catching on-trace errors. */
386 } /* else: Interpreter will throw. */ 465 } /* else: Interpreter will throw. */
387} 466}
388 467
@@ -406,6 +485,10 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
406 copyTV(J->L, &argv1, &rd->argv[1]); 485 copyTV(J->L, &argv1, &rd->argv[1]);
407 copyTV(J->L, &rd->argv[0], &argv1); 486 copyTV(J->L, &rd->argv[0], &argv1);
408 copyTV(J->L, &rd->argv[1], &argv0); 487 copyTV(J->L, &rd->argv[1], &argv0);
488#if LJ_FR2
489 /* Shift function arguments up. */
490 memmove(J->base + 2, J->base + 1, sizeof(TRef) * (J->maxslot-1));
491#endif
409 /* Need to protect lj_record_call because it may throw. */ 492 /* Need to protect lj_record_call because it may throw. */
410 errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp); 493 errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp);
411 /* Always undo Lua stack swap to avoid confusing the interpreter. */ 494 /* Always undo Lua stack swap to avoid confusing the interpreter. */
@@ -414,15 +497,28 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
414 if (errcode) 497 if (errcode)
415 lj_err_throw(J->L, errcode); /* Propagate errors. */ 498 lj_err_throw(J->L, errcode); /* Propagate errors. */
416 rd->nres = -1; /* Pending call. */ 499 rd->nres = -1; /* Pending call. */
500 J->needsnap = 1; /* Start catching on-trace errors. */
417 } /* else: Interpreter will throw. */ 501 } /* else: Interpreter will throw. */
418} 502}
419 503
504static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd)
505{
506 TRef tr = J->base[0];
507 /* Only support getfenv(0) for now. */
508 if (tref_isint(tr) && tref_isk(tr) && IR(tref_ref(tr))->i == 0) {
509 TRef trl = emitir(IRT(IR_LREF, IRT_THREAD), 0, 0);
510 J->base[0] = emitir(IRT(IR_FLOAD, IRT_TAB), trl, IRFL_THREAD_ENV);
511 return;
512 }
513 recff_nyiu(J, rd);
514}
515
420/* -- Math library fast functions ----------------------------------------- */ 516/* -- Math library fast functions ----------------------------------------- */
421 517
422static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd) 518static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd)
423{ 519{
424 TRef tr = lj_ir_tonum(J, J->base[0]); 520 TRef tr = lj_ir_tonum(J, J->base[0]);
425 J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_knum_abs(J)); 521 J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_ksimd(J, LJ_KSIMD_ABS));
426 UNUSED(rd); 522 UNUSED(rd);
427} 523}
428 524
@@ -475,7 +571,7 @@ static void LJ_FASTCALL recff_math_atan2(jit_State *J, RecordFFData *rd)
475{ 571{
476 TRef tr = lj_ir_tonum(J, J->base[0]); 572 TRef tr = lj_ir_tonum(J, J->base[0]);
477 TRef tr2 = lj_ir_tonum(J, J->base[1]); 573 TRef tr2 = lj_ir_tonum(J, J->base[1]);
478 J->base[0] = emitir(IRTN(IR_ATAN2), tr, tr2); 574 J->base[0] = lj_ir_call(J, IRCALL_atan2, tr, tr2);
479 UNUSED(rd); 575 UNUSED(rd);
480} 576}
481 577
@@ -492,51 +588,12 @@ static void LJ_FASTCALL recff_math_ldexp(jit_State *J, RecordFFData *rd)
492 UNUSED(rd); 588 UNUSED(rd);
493} 589}
494 590
495/* Record math.asin, math.acos, math.atan. */ 591static void LJ_FASTCALL recff_math_call(jit_State *J, RecordFFData *rd)
496static void LJ_FASTCALL recff_math_atrig(jit_State *J, RecordFFData *rd)
497{
498 TRef y = lj_ir_tonum(J, J->base[0]);
499 TRef x = lj_ir_knum_one(J);
500 uint32_t ffid = rd->data;
501 if (ffid != FF_math_atan) {
502 TRef tmp = emitir(IRTN(IR_MUL), y, y);
503 tmp = emitir(IRTN(IR_SUB), x, tmp);
504 tmp = emitir(IRTN(IR_FPMATH), tmp, IRFPM_SQRT);
505 if (ffid == FF_math_asin) { x = tmp; } else { x = y; y = tmp; }
506 }
507 J->base[0] = emitir(IRTN(IR_ATAN2), y, x);
508}
509
510static void LJ_FASTCALL recff_math_htrig(jit_State *J, RecordFFData *rd)
511{ 592{
512 TRef tr = lj_ir_tonum(J, J->base[0]); 593 TRef tr = lj_ir_tonum(J, J->base[0]);
513 J->base[0] = emitir(IRTN(IR_CALLN), tr, rd->data); 594 J->base[0] = emitir(IRTN(IR_CALLN), tr, rd->data);
514} 595}
515 596
516static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd)
517{
518 TRef tr = J->base[0];
519 if (tref_isinteger(tr)) {
520 J->base[0] = tr;
521 J->base[1] = lj_ir_kint(J, 0);
522 } else {
523 TRef trt;
524 tr = lj_ir_tonum(J, tr);
525 trt = emitir(IRTN(IR_FPMATH), tr, IRFPM_TRUNC);
526 J->base[0] = trt;
527 J->base[1] = emitir(IRTN(IR_SUB), tr, trt);
528 }
529 rd->nres = 2;
530}
531
532static void LJ_FASTCALL recff_math_degrad(jit_State *J, RecordFFData *rd)
533{
534 TRef tr = lj_ir_tonum(J, J->base[0]);
535 TRef trm = lj_ir_knum(J, numV(&J->fn->c.upvalue[0]));
536 J->base[0] = emitir(IRTN(IR_MUL), tr, trm);
537 UNUSED(rd);
538}
539
540static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) 597static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd)
541{ 598{
542 J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1], 599 J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1],
@@ -567,7 +624,7 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd)
567 GCudata *ud = udataV(&J->fn->c.upvalue[0]); 624 GCudata *ud = udataV(&J->fn->c.upvalue[0]);
568 TRef tr, one; 625 TRef tr, one;
569 lj_ir_kgc(J, obj2gco(ud), IRT_UDATA); /* Prevent collection. */ 626 lj_ir_kgc(J, obj2gco(ud), IRT_UDATA); /* Prevent collection. */
570 tr = lj_ir_call(J, IRCALL_lj_math_random_step, lj_ir_kptr(J, uddata(ud))); 627 tr = lj_ir_call(J, IRCALL_lj_prng_u64d, lj_ir_kptr(J, uddata(ud)));
571 one = lj_ir_knum_one(J); 628 one = lj_ir_knum_one(J);
572 tr = emitir(IRTN(IR_SUB), tr, one); 629 tr = emitir(IRTN(IR_SUB), tr, one);
573 if (J->base[0]) { 630 if (J->base[0]) {
@@ -591,48 +648,105 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd)
591 648
592/* -- Bit library fast functions ------------------------------------------ */ 649/* -- Bit library fast functions ------------------------------------------ */
593 650
594/* Record unary bit.tobit, bit.bnot, bit.bswap. */ 651/* Record bit.tobit. */
652static void LJ_FASTCALL recff_bit_tobit(jit_State *J, RecordFFData *rd)
653{
654 TRef tr = J->base[0];
655#if LJ_HASFFI
656 if (tref_iscdata(tr)) { recff_bit64_tobit(J, rd); return; }
657#endif
658 J->base[0] = lj_opt_narrow_tobit(J, tr);
659 UNUSED(rd);
660}
661
662/* Record unary bit.bnot, bit.bswap. */
595static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) 663static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd)
596{ 664{
597 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 665#if LJ_HASFFI
598 J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0); 666 if (recff_bit64_unary(J, rd))
667 return;
668#endif
669 J->base[0] = emitir(IRTI(rd->data), lj_opt_narrow_tobit(J, J->base[0]), 0);
599} 670}
600 671
601/* Record N-ary bit.band, bit.bor, bit.bxor. */ 672/* Record N-ary bit.band, bit.bor, bit.bxor. */
602static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) 673static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd)
603{ 674{
604 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 675#if LJ_HASFFI
605 uint32_t op = rd->data; 676 if (recff_bit64_nary(J, rd))
606 BCReg i; 677 return;
607 for (i = 1; J->base[i] != 0; i++) 678#endif
608 tr = emitir(IRTI(op), tr, lj_opt_narrow_tobit(J, J->base[i])); 679 {
609 J->base[0] = tr; 680 TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
681 uint32_t ot = IRTI(rd->data);
682 BCReg i;
683 for (i = 1; J->base[i] != 0; i++)
684 tr = emitir(ot, tr, lj_opt_narrow_tobit(J, J->base[i]));
685 J->base[0] = tr;
686 }
610} 687}
611 688
612/* Record bit shifts. */ 689/* Record bit shifts. */
613static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) 690static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd)
614{ 691{
615 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 692#if LJ_HASFFI
616 TRef tsh = lj_opt_narrow_tobit(J, J->base[1]); 693 if (recff_bit64_shift(J, rd))
617 IROp op = (IROp)rd->data; 694 return;
618 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && 695#endif
619 !tref_isk(tsh)) 696 {
620 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); 697 TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
698 TRef tsh = lj_opt_narrow_tobit(J, J->base[1]);
699 IROp op = (IROp)rd->data;
700 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
701 !tref_isk(tsh))
702 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31));
621#ifdef LJ_TARGET_UNIFYROT 703#ifdef LJ_TARGET_UNIFYROT
622 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { 704 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
623 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; 705 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
624 tsh = emitir(IRTI(IR_NEG), tsh, tsh); 706 tsh = emitir(IRTI(IR_NEG), tsh, tsh);
707 }
708#endif
709 J->base[0] = emitir(IRTI(op), tr, tsh);
625 } 710 }
711}
712
713static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData *rd)
714{
715#if LJ_HASFFI
716 TRef hdr = recff_bufhdr(J);
717 TRef tr = recff_bit64_tohex(J, rd, hdr);
718 J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
719#else
720 recff_nyiu(J, rd); /* Don't bother working around this NYI. */
626#endif 721#endif
627 J->base[0] = emitir(IRTI(op), tr, tsh);
628} 722}
629 723
630/* -- String library fast functions --------------------------------------- */ 724/* -- String library fast functions --------------------------------------- */
631 725
632static void LJ_FASTCALL recff_string_len(jit_State *J, RecordFFData *rd) 726/* Specialize to relative starting position for string. */
727static TRef recff_string_start(jit_State *J, GCstr *s, int32_t *st, TRef tr,
728 TRef trlen, TRef tr0)
633{ 729{
634 J->base[0] = emitir(IRTI(IR_FLOAD), lj_ir_tostr(J, J->base[0]), IRFL_STR_LEN); 730 int32_t start = *st;
635 UNUSED(rd); 731 if (start < 0) {
732 emitir(IRTGI(IR_LT), tr, tr0);
733 tr = emitir(IRTI(IR_ADD), trlen, tr);
734 start = start + (int32_t)s->len;
735 emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), tr, tr0);
736 if (start < 0) {
737 tr = tr0;
738 start = 0;
739 }
740 } else if (start == 0) {
741 emitir(IRTGI(IR_EQ), tr, tr0);
742 tr = tr0;
743 } else {
744 tr = emitir(IRTI(IR_ADD), tr, lj_ir_kint(J, -1));
745 emitir(IRTGI(IR_GE), tr, tr0);
746 start--;
747 }
748 *st = start;
749 return tr;
636} 750}
637 751
638/* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */ 752/* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */
@@ -679,39 +793,21 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
679 } else if ((MSize)end <= str->len) { 793 } else if ((MSize)end <= str->len) {
680 emitir(IRTGI(IR_ULE), trend, trlen); 794 emitir(IRTGI(IR_ULE), trend, trlen);
681 } else { 795 } else {
682 emitir(IRTGI(IR_GT), trend, trlen); 796 emitir(IRTGI(IR_UGT), trend, trlen);
683 end = (int32_t)str->len; 797 end = (int32_t)str->len;
684 trend = trlen; 798 trend = trlen;
685 } 799 }
686 if (start < 0) { 800 trstart = recff_string_start(J, str, &start, trstart, trlen, tr0);
687 emitir(IRTGI(IR_LT), trstart, tr0);
688 trstart = emitir(IRTI(IR_ADD), trlen, trstart);
689 start = start+(int32_t)str->len;
690 emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), trstart, tr0);
691 if (start < 0) {
692 trstart = tr0;
693 start = 0;
694 }
695 } else {
696 if (start == 0) {
697 emitir(IRTGI(IR_EQ), trstart, tr0);
698 trstart = tr0;
699 } else {
700 trstart = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, -1));
701 emitir(IRTGI(IR_GE), trstart, tr0);
702 start--;
703 }
704 }
705 if (rd->data) { /* Return string.sub result. */ 801 if (rd->data) { /* Return string.sub result. */
706 if (end - start >= 0) { 802 if (end - start >= 0) {
707 /* Also handle empty range here, to avoid extra traces. */ 803 /* Also handle empty range here, to avoid extra traces. */
708 TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart); 804 TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart);
709 emitir(IRTGI(IR_GE), trslen, tr0); 805 emitir(IRTGI(IR_GE), trslen, tr0);
710 trptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart); 806 trptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart);
711 J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen); 807 J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen);
712 } else { /* Range underflow: return empty string. */ 808 } else { /* Range underflow: return empty string. */
713 emitir(IRTGI(IR_LT), trend, trstart); 809 emitir(IRTGI(IR_LT), trend, trstart);
714 J->base[0] = lj_ir_kstr(J, lj_str_new(J->L, strdata(str), 0)); 810 J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty);
715 } 811 }
716 } else { /* Return string.byte result(s). */ 812 } else { /* Return string.byte result(s). */
717 ptrdiff_t i, len = end - start; 813 ptrdiff_t i, len = end - start;
@@ -723,7 +819,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
723 rd->nres = len; 819 rd->nres = len;
724 for (i = 0; i < len; i++) { 820 for (i = 0; i < len; i++) {
725 TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, (int32_t)i)); 821 TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, (int32_t)i));
726 tmp = emitir(IRT(IR_STRREF, IRT_P32), trstr, tmp); 822 tmp = emitir(IRT(IR_STRREF, IRT_PGC), trstr, tmp);
727 J->base[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY); 823 J->base[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY);
728 } 824 }
729 } else { /* Empty range or range underflow: return no results. */ 825 } else { /* Empty range or range underflow: return no results. */
@@ -733,48 +829,535 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
733 } 829 }
734} 830}
735 831
736/* -- Table library fast functions ---------------------------------------- */ 832static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd)
737
738static void LJ_FASTCALL recff_table_getn(jit_State *J, RecordFFData *rd)
739{ 833{
740 if (tref_istab(J->base[0])) 834 TRef k255 = lj_ir_kint(J, 255);
741 J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, J->base[0]); 835 BCReg i;
742 /* else: Interpreter will throw. */ 836 for (i = 0; J->base[i] != 0; i++) { /* Convert char values to strings. */
837 TRef tr = lj_opt_narrow_toint(J, J->base[i]);
838 emitir(IRTGI(IR_ULE), tr, k255);
839 J->base[i] = emitir(IRT(IR_TOSTR, IRT_STR), tr, IRTOSTR_CHAR);
840 }
841 if (i > 1) { /* Concatenate the strings, if there's more than one. */
842 TRef hdr = recff_bufhdr(J), tr = hdr;
843 for (i = 0; J->base[i] != 0; i++)
844 tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, J->base[i]);
845 J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
846 } else if (i == 0) {
847 J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty);
848 }
743 UNUSED(rd); 849 UNUSED(rd);
744} 850}
745 851
746static void LJ_FASTCALL recff_table_remove(jit_State *J, RecordFFData *rd) 852static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd)
747{ 853{
748 TRef tab = J->base[0]; 854 TRef str = lj_ir_tostr(J, J->base[0]);
749 rd->nres = 0; 855 TRef rep = lj_opt_narrow_toint(J, J->base[1]);
750 if (tref_istab(tab)) { 856 TRef hdr, tr, str2 = 0;
751 if (tref_isnil(J->base[1])) { /* Simple pop: t[#t] = nil */ 857 if (!tref_isnil(J->base[2])) {
752 TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, tab); 858 TRef sep = lj_ir_tostr(J, J->base[2]);
753 GCtab *t = tabV(&rd->argv[0]); 859 int32_t vrep = argv2int(J, &rd->argv[1]);
754 MSize len = lj_tab_len(t); 860 emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1));
755 emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0)); 861 if (vrep > 1) {
756 if (len) { 862 TRef hdr2 = recff_bufhdr(J);
757 RecordIndex ix; 863 TRef tr2 = emitir(IRTG(IR_BUFPUT, IRT_PGC), hdr2, sep);
758 ix.tab = tab; 864 tr2 = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr2, str);
759 ix.key = trlen; 865 str2 = emitir(IRTG(IR_BUFSTR, IRT_STR), tr2, hdr2);
760 settabV(J->L, &ix.tabv, t); 866 }
761 setintV(&ix.keyv, len); 867 }
762 ix.idxchain = 0; 868 tr = hdr = recff_bufhdr(J);
763 if (results_wanted(J) != 0) { /* Specialize load only if needed. */ 869 if (str2) {
764 ix.val = 0; 870 tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, str);
765 J->base[0] = lj_record_idx(J, &ix); /* Load previous value. */ 871 str = str2;
766 rd->nres = 1; 872 rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1));
767 /* Assumes ix.key/ix.tab is not modified for raw lj_record_idx(). */ 873 }
874 tr = lj_ir_call(J, IRCALL_lj_buf_putstr_rep, tr, str, rep);
875 J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
876}
877
878static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd)
879{
880 TRef str = lj_ir_tostr(J, J->base[0]);
881 TRef hdr = recff_bufhdr(J);
882 TRef tr = lj_ir_call(J, rd->data, hdr, str);
883 J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
884}
885
886static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
887{
888 TRef trstr = lj_ir_tostr(J, J->base[0]);
889 TRef trpat = lj_ir_tostr(J, J->base[1]);
890 TRef trlen = emitir(IRTI(IR_FLOAD), trstr, IRFL_STR_LEN);
891 TRef tr0 = lj_ir_kint(J, 0);
892 TRef trstart;
893 GCstr *str = argv2str(J, &rd->argv[0]);
894 GCstr *pat = argv2str(J, &rd->argv[1]);
895 int32_t start;
896 J->needsnap = 1;
897 if (tref_isnil(J->base[2])) {
898 trstart = lj_ir_kint(J, 1);
899 start = 1;
900 } else {
901 trstart = lj_opt_narrow_toint(J, J->base[2]);
902 start = argv2int(J, &rd->argv[2]);
903 }
904 trstart = recff_string_start(J, str, &start, trstart, trlen, tr0);
905 if ((MSize)start <= str->len) {
906 emitir(IRTGI(IR_ULE), trstart, trlen);
907 } else {
908 emitir(IRTGI(IR_UGT), trstart, trlen);
909#if LJ_52
910 J->base[0] = TREF_NIL;
911 return;
912#else
913 trstart = trlen;
914 start = str->len;
915#endif
916 }
917 /* Fixed arg or no pattern matching chars? (Specialized to pattern string.) */
918 if ((J->base[2] && tref_istruecond(J->base[3])) ||
919 (emitir(IRTG(IR_EQ, IRT_STR), trpat, lj_ir_kstr(J, pat)),
920 !lj_str_haspattern(pat))) { /* Search for fixed string. */
921 TRef trsptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart);
922 TRef trpptr = emitir(IRT(IR_STRREF, IRT_PGC), trpat, tr0);
923 TRef trslen = emitir(IRTI(IR_SUB), trlen, trstart);
924 TRef trplen = emitir(IRTI(IR_FLOAD), trpat, IRFL_STR_LEN);
925 TRef tr = lj_ir_call(J, IRCALL_lj_str_find, trsptr, trpptr, trslen, trplen);
926 TRef trp0 = lj_ir_kkptr(J, NULL);
927 if (lj_str_find(strdata(str)+(MSize)start, strdata(pat),
928 str->len-(MSize)start, pat->len)) {
929 TRef pos;
930 emitir(IRTG(IR_NE, IRT_PGC), tr, trp0);
931 /* Recompute offset. trsptr may not point into trstr after folding. */
932 pos = emitir(IRTI(IR_ADD), emitir(IRTI(IR_SUB), tr, trsptr), trstart);
933 J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1));
934 J->base[1] = emitir(IRTI(IR_ADD), pos, trplen);
935 rd->nres = 2;
936 } else {
937 emitir(IRTG(IR_EQ, IRT_PGC), tr, trp0);
938 J->base[0] = TREF_NIL;
939 }
940 } else { /* Search for pattern. */
941 recff_nyiu(J, rd);
942 return;
943 }
944}
945
946static void recff_format(jit_State *J, RecordFFData *rd, TRef hdr, int sbufx)
947{
948 ptrdiff_t arg = sbufx;
949 TRef tr = hdr, trfmt = lj_ir_tostr(J, J->base[arg]);
950 GCstr *fmt = argv2str(J, &rd->argv[arg]);
951 FormatState fs;
952 SFormat sf;
953 /* Specialize to the format string. */
954 emitir(IRTG(IR_EQ, IRT_STR), trfmt, lj_ir_kstr(J, fmt));
955 lj_strfmt_init(&fs, strdata(fmt), fmt->len);
956 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { /* Parse format. */
957 TRef tra = sf == STRFMT_LIT ? 0 : J->base[++arg];
958 TRef trsf = lj_ir_kint(J, (int32_t)sf);
959 IRCallID id;
960 switch (STRFMT_TYPE(sf)) {
961 case STRFMT_LIT:
962 tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr,
963 lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len)));
964 break;
965 case STRFMT_INT:
966 id = IRCALL_lj_strfmt_putfnum_int;
967 handle_int:
968 if (!tref_isinteger(tra)) {
969#if LJ_HASFFI
970 if (tref_iscdata(tra)) {
971 tra = lj_crecord_loadiu64(J, tra, &rd->argv[arg]);
972 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra);
973 break;
768 } 974 }
769 ix.val = TREF_NIL; 975#endif
770 lj_record_idx(J, &ix); /* Remove value. */ 976 goto handle_num;
977 }
978 if (sf == STRFMT_INT) { /* Shortcut for plain %d. */
979 tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr,
980 emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT));
981 } else {
982#if LJ_HASFFI
983 tra = emitir(IRT(IR_CONV, IRT_U64), tra,
984 (IRT_INT|(IRT_U64<<5)|IRCONV_SEXT));
985 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra);
986 lj_needsplit(J);
987#else
988 recff_nyiu(J, rd); /* Don't bother working around this NYI. */
989 return;
990#endif
771 } 991 }
772 } else { /* Complex case: remove in the middle. */ 992 break;
773 recff_nyiu(J); 993 case STRFMT_UINT:
994 id = IRCALL_lj_strfmt_putfnum_uint;
995 goto handle_int;
996 case STRFMT_NUM:
997 id = IRCALL_lj_strfmt_putfnum;
998 handle_num:
999 tra = lj_ir_tonum(J, tra);
1000 tr = lj_ir_call(J, id, tr, trsf, tra);
1001 if (LJ_SOFTFP32) lj_needsplit(J);
1002 break;
1003 case STRFMT_STR:
1004 if (!tref_isstr(tra)) {
1005 recff_nyiu(J, rd); /* NYI: __tostring and non-string types for %s. */
1006 /* NYI: also buffers. */
1007 return;
1008 }
1009 if (sf == STRFMT_STR) /* Shortcut for plain %s. */
1010 tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, tra);
1011 else if ((sf & STRFMT_T_QUOTED))
1012 tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra);
1013 else
1014 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfstr, tr, trsf, tra);
1015 break;
1016 case STRFMT_CHAR:
1017 tra = lj_opt_narrow_toint(J, tra);
1018 if (sf == STRFMT_CHAR) /* Shortcut for plain %c. */
1019 tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr,
1020 emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR));
1021 else
1022 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra);
1023 break;
1024 case STRFMT_PTR: /* NYI */
1025 case STRFMT_ERR:
1026 default:
1027 recff_nyiu(J, rd);
1028 return;
774 } 1029 }
1030 }
1031 if (sbufx) {
1032 emitir(IRT(IR_USE, IRT_NIL), tr, 0);
1033 } else {
1034 J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
1035 }
1036}
1037
1038static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
1039{
1040 recff_format(J, rd, recff_bufhdr(J), 0);
1041}
1042
1043/* -- Buffer library fast functions --------------------------------------- */
1044
1045#if LJ_HASBUFFER
1046
1047static LJ_AINLINE TRef recff_sbufx_get_L(jit_State *J, TRef ud)
1048{
1049 return emitir(IRT(IR_FLOAD, IRT_PGC), ud, IRFL_SBUF_L);
1050}
1051
1052static LJ_AINLINE void recff_sbufx_set_L(jit_State *J, TRef ud, TRef val)
1053{
1054 TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_L);
1055 emitir(IRT(IR_FSTORE, IRT_PGC), fref, val);
1056}
1057
1058static LJ_AINLINE TRef recff_sbufx_get_ptr(jit_State *J, TRef ud, IRFieldID fl)
1059{
1060 return emitir(IRT(IR_FLOAD, IRT_PTR), ud, fl);
1061}
1062
1063static LJ_AINLINE void recff_sbufx_set_ptr(jit_State *J, TRef ud, IRFieldID fl, TRef val)
1064{
1065 TRef fref = emitir(IRT(IR_FREF, IRT_PTR), ud, fl);
1066 emitir(IRT(IR_FSTORE, IRT_PTR), fref, val);
1067}
1068
1069static LJ_AINLINE TRef recff_sbufx_len(jit_State *J, TRef trr, TRef trw)
1070{
1071 TRef len = emitir(IRT(IR_SUB, IRT_INTP), trw, trr);
1072 if (LJ_64)
1073 len = emitir(IRTI(IR_CONV), len, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE);
1074 return len;
1075}
1076
1077/* Emit typecheck for string buffer. */
1078static TRef recff_sbufx_check(jit_State *J, RecordFFData *rd, int arg)
1079{
1080 TRef trtype, ud = J->base[arg];
1081 if (!tvisbuf(&rd->argv[arg])) lj_trace_err(J, LJ_TRERR_BADTYPE);
1082 trtype = emitir(IRT(IR_FLOAD, IRT_U8), ud, IRFL_UDATA_UDTYPE);
1083 emitir(IRTGI(IR_EQ), trtype, lj_ir_kint(J, UDTYPE_BUFFER));
1084 return ud;
1085}
1086
1087/* Emit BUFHDR for write to extended string buffer. */
1088static TRef recff_sbufx_write(jit_State *J, TRef ud)
1089{
1090 TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kint(J, sizeof(GCudata)));
1091 return emitir(IRT(IR_BUFHDR, IRT_PGC), trbuf, IRBUFHDR_WRITE);
1092}
1093
1094/* Check for integer in range for the buffer API. */
1095static TRef recff_sbufx_checkint(jit_State *J, RecordFFData *rd, int arg)
1096{
1097 TRef tr = J->base[arg];
1098 TRef trlim = lj_ir_kint(J, LJ_MAX_BUF);
1099 if (tref_isinteger(tr)) {
1100 emitir(IRTGI(IR_ULE), tr, trlim);
1101 } else if (tref_isnum(tr)) {
1102 tr = emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY);
1103 emitir(IRTGI(IR_ULE), tr, trlim);
1104#if LJ_HASFFI
1105 } else if (tref_iscdata(tr)) {
1106 tr = lj_crecord_loadiu64(J, tr, &rd->argv[arg]);
1107 emitir(IRTG(IR_ULE, IRT_U64), tr, lj_ir_kint64(J, LJ_MAX_BUF));
1108 tr = emitir(IRTI(IR_CONV), tr, (IRT_INT<<5)|IRT_I64|IRCONV_NONE);
1109#else
1110 UNUSED(rd);
1111#endif
1112 } else {
1113 lj_trace_err(J, LJ_TRERR_BADTYPE);
1114 }
1115 return tr;
1116}
1117
1118static void LJ_FASTCALL recff_buffer_method_reset(jit_State *J, RecordFFData *rd)
1119{
1120 TRef ud = recff_sbufx_check(J, rd, 0);
1121 SBufExt *sbx = bufV(&rd->argv[0]);
1122 int iscow = (int)sbufiscow(sbx);
1123 TRef trl = recff_sbufx_get_L(J, ud);
1124 TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kint(J, SBUF_FLAG_COW));
1125 TRef zero = lj_ir_kint(J, 0);
1126 emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zero);
1127 if (iscow) {
1128 trl = emitir(IRT(IR_BXOR, IRT_IGC), trl,
1129 LJ_GC64 ? lj_ir_kint64(J, SBUF_FLAG_COW) :
1130 lj_ir_kint(J, SBUF_FLAG_COW));
1131 recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zero);
1132 recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zero);
1133 recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zero);
1134 recff_sbufx_set_L(J, ud, trl);
1135 emitir(IRT(IR_FSTORE, IRT_PGC),
1136 emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zero);
1137 recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zero);
1138 } else {
1139 TRef trb = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_B);
1140 recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trb);
1141 recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trb);
1142 }
1143}
1144
1145static void LJ_FASTCALL recff_buffer_method_skip(jit_State *J, RecordFFData *rd)
1146{
1147 TRef ud = recff_sbufx_check(J, rd, 0);
1148 TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
1149 TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
1150 TRef len = recff_sbufx_len(J, trr, trw);
1151 TRef trn = recff_sbufx_checkint(J, rd, 1);
1152 len = emitir(IRTI(IR_MIN), len, trn);
1153 trr = emitir(IRT(IR_ADD, IRT_PTR), trr, len);
1154 recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
1155}
1156
1157static void LJ_FASTCALL recff_buffer_method_set(jit_State *J, RecordFFData *rd)
1158{
1159 TRef ud = recff_sbufx_check(J, rd, 0);
1160 TRef trbuf = recff_sbufx_write(J, ud);
1161 TRef tr = J->base[1];
1162 if (tref_isstr(tr)) {
1163 TRef trp = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0));
1164 TRef len = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN);
1165 lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr);
1166#if LJ_HASFFI
1167 } else if (tref_iscdata(tr)) {
1168 TRef trp = lj_crecord_topcvoid(J, tr, &rd->argv[1]);
1169 TRef len = recff_sbufx_checkint(J, rd, 2);
1170 lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr);
1171#endif
1172 } /* else: Interpreter will throw. */
1173}
1174
1175static void LJ_FASTCALL recff_buffer_method_put(jit_State *J, RecordFFData *rd)
1176{
1177 TRef ud = recff_sbufx_check(J, rd, 0);
1178 TRef trbuf = recff_sbufx_write(J, ud);
1179 TRef tr;
1180 ptrdiff_t arg;
1181 if (!J->base[1]) return;
1182 for (arg = 1; (tr = J->base[arg]); arg++) {
1183 if (tref_isstr(tr)) {
1184 trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf, tr);
1185 } else if (tref_isnumber(tr)) {
1186 trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf,
1187 emitir(IRT(IR_TOSTR, IRT_STR), tr,
1188 tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT));
1189 } else if (tref_isudata(tr)) {
1190 TRef ud2 = recff_sbufx_check(J, rd, arg);
1191 TRef trr = recff_sbufx_get_ptr(J, ud2, IRFL_SBUF_R);
1192 TRef trw = recff_sbufx_get_ptr(J, ud2, IRFL_SBUF_W);
1193 TRef len = recff_sbufx_len(J, trr, trw);
1194 emitir(IRTG(IR_NE, IRT_PGC), ud, ud2);
1195 trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, trr, len);
1196 } else {
1197 recff_nyiu(J, rd);
1198 }
1199 }
1200 emitir(IRT(IR_USE, IRT_NIL), trbuf, 0);
1201}
1202
1203static void LJ_FASTCALL recff_buffer_method_putf(jit_State *J, RecordFFData *rd)
1204{
1205 TRef ud = recff_sbufx_check(J, rd, 0);
1206 TRef trbuf = recff_sbufx_write(J, ud);
1207 recff_format(J, rd, trbuf, 1);
1208}
1209
1210static void LJ_FASTCALL recff_buffer_method_get(jit_State *J, RecordFFData *rd)
1211{
1212 TRef ud = recff_sbufx_check(J, rd, 0);
1213 TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
1214 TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
1215 TRef tr;
1216 ptrdiff_t arg;
1217 if (!J->base[1]) { J->base[1] = TREF_NIL; J->base[2] = 0; }
1218 for (arg = 0; (tr = J->base[arg+1]); arg++) {
1219 TRef len = recff_sbufx_len(J, trr, trw);
1220 if (tref_isnil(tr)) {
1221 J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len);
1222 trr = trw;
1223 } else {
1224 TRef trn = recff_sbufx_checkint(J, rd, arg+1);
1225 TRef tru;
1226 len = emitir(IRTI(IR_MIN), len, trn);
1227 tru = emitir(IRT(IR_ADD, IRT_PTR), trr, len);
1228 J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len);
1229 trr = tru; /* Doing the ADD before the SNEW generates better code. */
1230 }
1231 recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
1232 }
1233 rd->nres = arg;
1234}
1235
1236static void LJ_FASTCALL recff_buffer_method___tostring(jit_State *J, RecordFFData *rd)
1237{
1238 TRef ud = recff_sbufx_check(J, rd, 0);
1239 TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
1240 TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
1241 J->base[0] = emitir(IRT(IR_XSNEW, IRT_STR), trr, recff_sbufx_len(J, trr, trw));
1242}
1243
1244static void LJ_FASTCALL recff_buffer_method___len(jit_State *J, RecordFFData *rd)
1245{
1246 TRef ud = recff_sbufx_check(J, rd, 0);
1247 TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
1248 TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
1249 J->base[0] = recff_sbufx_len(J, trr, trw);
1250}
1251
1252#if LJ_HASFFI
1253static void LJ_FASTCALL recff_buffer_method_putcdata(jit_State *J, RecordFFData *rd)
1254{
1255 TRef ud = recff_sbufx_check(J, rd, 0);
1256 TRef trbuf = recff_sbufx_write(J, ud);
1257 TRef tr = lj_crecord_topcvoid(J, J->base[1], &rd->argv[1]);
1258 TRef len = recff_sbufx_checkint(J, rd, 2);
1259 trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, tr, len);
1260 emitir(IRT(IR_USE, IRT_NIL), trbuf, 0);
1261}
1262
1263static void LJ_FASTCALL recff_buffer_method_reserve(jit_State *J, RecordFFData *rd)
1264{
1265 TRef ud = recff_sbufx_check(J, rd, 0);
1266 TRef trbuf = recff_sbufx_write(J, ud);
1267 TRef trsz = recff_sbufx_checkint(J, rd, 1);
1268 J->base[1] = lj_ir_call(J, IRCALL_lj_bufx_more, trbuf, trsz);
1269 J->base[0] = lj_crecord_topuint8(J, recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W));
1270 rd->nres = 2;
1271}
1272
1273static void LJ_FASTCALL recff_buffer_method_commit(jit_State *J, RecordFFData *rd)
1274{
1275 TRef ud = recff_sbufx_check(J, rd, 0);
1276 TRef len = recff_sbufx_checkint(J, rd, 1);
1277 TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
1278 TRef tre = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_E);
1279 TRef left = emitir(IRT(IR_SUB, IRT_INTP), tre, trw);
1280 if (LJ_64)
1281 left = emitir(IRTI(IR_CONV), left, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE);
1282 emitir(IRTGI(IR_ULE), len, left);
1283 trw = emitir(IRT(IR_ADD, IRT_PTR), trw, len);
1284 recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trw);
1285}
1286
1287static void LJ_FASTCALL recff_buffer_method_ref(jit_State *J, RecordFFData *rd)
1288{
1289 TRef ud = recff_sbufx_check(J, rd, 0);
1290 TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
1291 TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
1292 J->base[0] = lj_crecord_topuint8(J, trr);
1293 J->base[1] = recff_sbufx_len(J, trr, trw);
1294 rd->nres = 2;
1295}
1296#endif
1297
1298static void LJ_FASTCALL recff_buffer_method_encode(jit_State *J, RecordFFData *rd)
1299{
1300 TRef ud = recff_sbufx_check(J, rd, 0);
1301 TRef trbuf = recff_sbufx_write(J, ud);
1302 TRef tmp, tr = J->base[1];
1303 if (!LJ_DUALNUM && tref_isinteger(tr))
1304 tr = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT);
1305 tmp = emitir(IRT(IR_TMPREF, IRT_PGC), tr, IRTMPREF_IN1);
1306 lj_ir_call(J, IRCALL_lj_serialize_put, trbuf, tmp);
1307 /* No IR_USE needed, since the call is a store. */
1308}
1309
1310static void LJ_FASTCALL recff_buffer_method_decode(jit_State *J, RecordFFData *rd)
1311{
1312 TRef ud = recff_sbufx_check(J, rd, 0);
1313 TRef trbuf = recff_sbufx_write(J, ud);
1314 TRef trr, tmp;
1315 IRType t;
1316 tmp = emitir(IRT(IR_TMPREF, IRT_PGC), REF_NIL, IRTMPREF_OUT1);
1317 trr = lj_ir_call(J, IRCALL_lj_serialize_get, trbuf, tmp);
1318 /* No IR_USE needed, since the call is a store. */
1319 t = (IRType)lj_serialize_peektype(bufV(&rd->argv[0]));
1320 J->base[0] = lj_record_vload(J, tmp, t);
1321 /* The sbx->r store must be after the VLOAD type check, in case it fails. */
1322 recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
1323}
1324
1325static void LJ_FASTCALL recff_buffer_encode(jit_State *J, RecordFFData *rd)
1326{
1327 TRef tmp, tr = J->base[0];
1328 if (!LJ_DUALNUM && tref_isinteger(tr))
1329 tr = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT);
1330 tmp = emitir(IRT(IR_TMPREF, IRT_PGC), tr, IRTMPREF_IN1);
1331 J->base[0] = lj_ir_call(J, IRCALL_lj_serialize_encode, tmp);
1332 /* IR_USE needed for IR_CALLA, because the encoder may throw non-OOM. */
1333 emitir(IRT(IR_USE, IRT_NIL), J->base[0], 0);
1334 UNUSED(rd);
1335}
1336
1337static void LJ_FASTCALL recff_buffer_decode(jit_State *J, RecordFFData *rd)
1338{
1339 if (tvisstr(&rd->argv[0])) {
1340 GCstr *str = strV(&rd->argv[0]);
1341 SBufExt sbx;
1342 TRef tr, tmp;
1343 IRType t;
1344 tmp = emitir(IRT(IR_TMPREF, IRT_PGC), REF_NIL, IRTMPREF_OUT1);
1345 tr = lj_ir_call(J, IRCALL_lj_serialize_decode, tmp, J->base[0]);
1346 /* IR_USE needed for IR_CALLA, because the decoder may throw non-OOM.
1347 ** That's why IRCALL_lj_serialize_decode needs a fake INT result.
1348 */
1349 emitir(IRT(IR_USE, IRT_NIL), tr, 0);
1350 memset(&sbx, 0, sizeof(SBufExt));
1351 lj_bufx_set_cow(J->L, &sbx, strdata(str), str->len);
1352 t = (IRType)lj_serialize_peektype(&sbx);
1353 J->base[0] = lj_record_vload(J, tmp, t);
775 } /* else: Interpreter will throw. */ 1354 } /* else: Interpreter will throw. */
776} 1355}
777 1356
1357#endif
1358
1359/* -- Table library fast functions ---------------------------------------- */
1360
778static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) 1361static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
779{ 1362{
780 RecordIndex ix; 1363 RecordIndex ix;
@@ -783,7 +1366,7 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
783 rd->nres = 0; 1366 rd->nres = 0;
784 if (tref_istab(ix.tab) && ix.val) { 1367 if (tref_istab(ix.tab) && ix.val) {
785 if (!J->base[2]) { /* Simple push: t[#t+1] = v */ 1368 if (!J->base[2]) { /* Simple push: t[#t+1] = v */
786 TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, ix.tab); 1369 TRef trlen = emitir(IRTI(IR_ALEN), ix.tab, TREF_NIL);
787 GCtab *t = tabV(&rd->argv[0]); 1370 GCtab *t = tabV(&rd->argv[0]);
788 ix.key = emitir(IRTI(IR_ADD), trlen, lj_ir_kint(J, 1)); 1371 ix.key = emitir(IRTI(IR_ADD), trlen, lj_ir_kint(J, 1));
789 settabV(J->L, &ix.tabv, t); 1372 settabV(J->L, &ix.tabv, t);
@@ -791,11 +1374,49 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
791 ix.idxchain = 0; 1374 ix.idxchain = 0;
792 lj_record_idx(J, &ix); /* Set new value. */ 1375 lj_record_idx(J, &ix); /* Set new value. */
793 } else { /* Complex case: insert in the middle. */ 1376 } else { /* Complex case: insert in the middle. */
794 recff_nyiu(J); 1377 recff_nyiu(J, rd);
1378 return;
795 } 1379 }
796 } /* else: Interpreter will throw. */ 1380 } /* else: Interpreter will throw. */
797} 1381}
798 1382
1383static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd)
1384{
1385 TRef tab = J->base[0];
1386 if (tref_istab(tab)) {
1387 TRef sep = !tref_isnil(J->base[1]) ?
1388 lj_ir_tostr(J, J->base[1]) : lj_ir_knull(J, IRT_STR);
1389 TRef tri = (J->base[1] && !tref_isnil(J->base[2])) ?
1390 lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1);
1391 TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ?
1392 lj_opt_narrow_toint(J, J->base[3]) :
1393 emitir(IRTI(IR_ALEN), tab, TREF_NIL);
1394 TRef hdr = recff_bufhdr(J);
1395 TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre);
1396 emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL));
1397 J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
1398 } /* else: Interpreter will throw. */
1399 UNUSED(rd);
1400}
1401
1402static void LJ_FASTCALL recff_table_new(jit_State *J, RecordFFData *rd)
1403{
1404 TRef tra = lj_opt_narrow_toint(J, J->base[0]);
1405 TRef trh = lj_opt_narrow_toint(J, J->base[1]);
1406 J->base[0] = lj_ir_call(J, IRCALL_lj_tab_new_ah, tra, trh);
1407 UNUSED(rd);
1408}
1409
1410static void LJ_FASTCALL recff_table_clear(jit_State *J, RecordFFData *rd)
1411{
1412 TRef tr = J->base[0];
1413 if (tref_istab(tr)) {
1414 rd->nres = 0;
1415 lj_ir_call(J, IRCALL_lj_tab_clear, tr);
1416 J->needsnap = 1;
1417 } /* else: Interpreter will throw. */
1418}
1419
799/* -- I/O library fast functions ------------------------------------------ */ 1420/* -- I/O library fast functions ------------------------------------------ */
800 1421
801/* Get FILE* for I/O function. Any I/O error aborts recording, so there's 1422/* Get FILE* for I/O function. Any I/O error aborts recording, so there's
@@ -805,8 +1426,7 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id)
805{ 1426{
806 TRef tr, ud, fp; 1427 TRef tr, ud, fp;
807 if (id) { /* io.func() */ 1428 if (id) { /* io.func() */
808 tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); 1429 ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id]));
809 ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0);
810 } else { /* fp:method() */ 1430 } else { /* fp:method() */
811 ud = J->base[0]; 1431 ud = J->base[0];
812 if (!tref_isudata(ud)) 1432 if (!tref_isudata(ud))
@@ -828,10 +1448,13 @@ static void LJ_FASTCALL recff_io_write(jit_State *J, RecordFFData *rd)
828 ptrdiff_t i = rd->data == 0 ? 1 : 0; 1448 ptrdiff_t i = rd->data == 0 ? 1 : 0;
829 for (; J->base[i]; i++) { 1449 for (; J->base[i]; i++) {
830 TRef str = lj_ir_tostr(J, J->base[i]); 1450 TRef str = lj_ir_tostr(J, J->base[i]);
831 TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero); 1451 TRef buf = emitir(IRT(IR_STRREF, IRT_PGC), str, zero);
832 TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN); 1452 TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN);
833 if (tref_isk(len) && IR(tref_ref(len))->i == 1) { 1453 if (tref_isk(len) && IR(tref_ref(len))->i == 1) {
834 TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY); 1454 IRIns *irs = IR(tref_ref(str));
1455 TRef tr = (irs->o == IR_TOSTR && irs->op2 == IRTOSTR_CHAR) ?
1456 irs->op1 :
1457 emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY);
835 tr = lj_ir_call(J, IRCALL_fputc, tr, fp); 1458 tr = lj_ir_call(J, IRCALL_fputc, tr, fp);
836 if (results_wanted(J) != 0) /* Check result only if not ignored. */ 1459 if (results_wanted(J) != 0) /* Check result only if not ignored. */
837 emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1)); 1460 emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1));
@@ -853,6 +1476,28 @@ static void LJ_FASTCALL recff_io_flush(jit_State *J, RecordFFData *rd)
853 J->base[0] = TREF_TRUE; 1476 J->base[0] = TREF_TRUE;
854} 1477}
855 1478
1479/* -- Debug library fast functions ---------------------------------------- */
1480
1481static void LJ_FASTCALL recff_debug_getmetatable(jit_State *J, RecordFFData *rd)
1482{
1483 GCtab *mt;
1484 TRef mtref;
1485 TRef tr = J->base[0];
1486 if (tref_istab(tr)) {
1487 mt = tabref(tabV(&rd->argv[0])->metatable);
1488 mtref = emitir(IRT(IR_FLOAD, IRT_TAB), tr, IRFL_TAB_META);
1489 } else if (tref_isudata(tr)) {
1490 mt = tabref(udataV(&rd->argv[0])->metatable);
1491 mtref = emitir(IRT(IR_FLOAD, IRT_TAB), tr, IRFL_UDATA_META);
1492 } else {
1493 mt = tabref(basemt_obj(J2G(J), &rd->argv[0]));
1494 J->base[0] = mt ? lj_ir_ktab(J, mt) : TREF_NIL;
1495 return;
1496 }
1497 emitir(IRTG(mt ? IR_NE : IR_EQ, IRT_TAB), mtref, lj_ir_knull(J, IRT_TAB));
1498 J->base[0] = mt ? mtref : TREF_NIL;
1499}
1500
856/* -- Record calls to fast functions -------------------------------------- */ 1501/* -- Record calls to fast functions -------------------------------------- */
857 1502
858#include "lj_recdef.h" 1503#include "lj_recdef.h"
diff --git a/src/lj_frame.h b/src/lj_frame.h
index 2cb260d1..f67a2332 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -11,7 +11,16 @@
11 11
12/* -- Lua stack frame ----------------------------------------------------- */ 12/* -- Lua stack frame ----------------------------------------------------- */
13 13
14/* Frame type markers in callee function slot (callee base-1). */ 14/* Frame type markers in LSB of PC (4-byte aligned) or delta (8-byte aligned:
15**
16** PC 00 Lua frame
17** delta 001 C frame
18** delta 010 Continuation frame
19** delta 011 Lua vararg frame
20** delta 101 cpcall() frame
21** delta 110 ff pcall() frame
22** delta 111 ff pcall() frame with active hook
23*/
15enum { 24enum {
16 FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG, 25 FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG,
17 FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH 26 FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH
@@ -21,9 +30,47 @@ enum {
21#define FRAME_TYPEP (FRAME_TYPE|FRAME_P) 30#define FRAME_TYPEP (FRAME_TYPE|FRAME_P)
22 31
23/* Macros to access and modify Lua frames. */ 32/* Macros to access and modify Lua frames. */
33#if LJ_FR2
34/* Two-slot frame info, required for 64 bit PC/GCRef:
35**
36** base-2 base-1 | base base+1 ...
37** [func PC/delta/ft] | [slots ...]
38** ^-- frame | ^-- base ^-- top
39**
40** Continuation frames:
41**
42** base-4 base-3 base-2 base-1 | base base+1 ...
43** [cont PC ] [func PC/delta/ft] | [slots ...]
44** ^-- frame | ^-- base ^-- top
45*/
46#define frame_gc(f) (gcval((f)-1))
47#define frame_ftsz(f) ((ptrdiff_t)(f)->ftsz)
48#define frame_pc(f) ((const BCIns *)frame_ftsz(f))
49#define setframe_gc(f, p, tp) (setgcVraw((f), (p), (tp)))
50#define setframe_ftsz(f, sz) ((f)->ftsz = (sz))
51#define setframe_pc(f, pc) ((f)->ftsz = (int64_t)(intptr_t)(pc))
52#else
53/* One-slot frame info, sufficient for 32 bit PC/GCRef:
54**
55** base-1 | base base+1 ...
56** lo hi |
57** [func | PC/delta/ft] | [slots ...]
58** ^-- frame | ^-- base ^-- top
59**
60** Continuation frames:
61**
62** base-2 base-1 | base base+1 ...
63** lo hi lo hi |
64** [cont | PC] [func | PC/delta/ft] | [slots ...]
65** ^-- frame | ^-- base ^-- top
66*/
24#define frame_gc(f) (gcref((f)->fr.func)) 67#define frame_gc(f) (gcref((f)->fr.func))
25#define frame_func(f) (&frame_gc(f)->fn) 68#define frame_ftsz(f) ((ptrdiff_t)(f)->fr.tp.ftsz)
26#define frame_ftsz(f) ((f)->fr.tp.ftsz) 69#define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns))
70#define setframe_gc(f, p, tp) (setgcref((f)->fr.func, (p)), UNUSED(tp))
71#define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (int32_t)(sz))
72#define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc)))
73#endif
27 74
28#define frame_type(f) (frame_ftsz(f) & FRAME_TYPE) 75#define frame_type(f) (frame_ftsz(f) & FRAME_TYPE)
29#define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP) 76#define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP)
@@ -33,33 +80,53 @@ enum {
33#define frame_isvarg(f) (frame_typep(f) == FRAME_VARG) 80#define frame_isvarg(f) (frame_typep(f) == FRAME_VARG)
34#define frame_ispcall(f) ((frame_ftsz(f) & 6) == FRAME_PCALL) 81#define frame_ispcall(f) ((frame_ftsz(f) & 6) == FRAME_PCALL)
35 82
36#define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns)) 83#define frame_func(f) (&frame_gc(f)->fn)
84#define frame_delta(f) (frame_ftsz(f) >> 3)
85#define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP)
86
87enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
88
89#if LJ_FR2
90#define frame_contpc(f) (frame_pc((f)-2))
91#define frame_contv(f) (((f)-3)->u64)
92#else
37#define frame_contpc(f) (frame_pc((f)-1)) 93#define frame_contpc(f) (frame_pc((f)-1))
38#if LJ_64 94#define frame_contv(f) (((f)-1)->u32.lo)
95#endif
96#if LJ_FR2
97#define frame_contf(f) ((ASMFunction)(uintptr_t)((f)-3)->u64)
98#elif LJ_64
39#define frame_contf(f) \ 99#define frame_contf(f) \
40 ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin + \ 100 ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin + \
41 (intptr_t)(int32_t)((f)-1)->u32.lo)) 101 (intptr_t)(int32_t)((f)-1)->u32.lo))
42#else 102#else
43#define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void)) 103#define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void))
44#endif 104#endif
45#define frame_delta(f) (frame_ftsz(f) >> 3) 105#define frame_iscont_fficb(f) \
46#define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP) 106 (LJ_HASFFI && frame_contv(f) == LJ_CONT_FFI_CALLBACK)
47 107
48#define frame_prevl(f) ((f) - (1+bc_a(frame_pc(f)[-1]))) 108#define frame_prevl(f) ((f) - (1+LJ_FR2+bc_a(frame_pc(f)[-1])))
49#define frame_prevd(f) ((TValue *)((char *)(f) - frame_sized(f))) 109#define frame_prevd(f) ((TValue *)((char *)(f) - frame_sized(f)))
50#define frame_prev(f) (frame_islua(f)?frame_prevl(f):frame_prevd(f)) 110#define frame_prev(f) (frame_islua(f)?frame_prevl(f):frame_prevd(f))
51/* Note: this macro does not skip over FRAME_VARG. */ 111/* Note: this macro does not skip over FRAME_VARG. */
52 112
53#define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc)))
54#define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (sz))
55#define setframe_gc(f, p) (setgcref((f)->fr.func, (p)))
56
57/* -- C stack frame ------------------------------------------------------- */ 113/* -- C stack frame ------------------------------------------------------- */
58 114
59/* Macros to access and modify the C stack frame chain. */ 115/* Macros to access and modify the C stack frame chain. */
60 116
61/* These definitions must match with the arch-specific *.dasc files. */ 117/* These definitions must match with the arch-specific *.dasc files. */
62#if LJ_TARGET_X86 118#if LJ_TARGET_X86
119#if LJ_ABI_WIN
120#define CFRAME_OFS_ERRF (19*4)
121#define CFRAME_OFS_NRES (18*4)
122#define CFRAME_OFS_PREV (17*4)
123#define CFRAME_OFS_L (16*4)
124#define CFRAME_OFS_SEH (9*4)
125#define CFRAME_OFS_PC (6*4)
126#define CFRAME_OFS_MULTRES (5*4)
127#define CFRAME_SIZE (16*4)
128#define CFRAME_SHIFT_MULTRES 0
129#else
63#define CFRAME_OFS_ERRF (15*4) 130#define CFRAME_OFS_ERRF (15*4)
64#define CFRAME_OFS_NRES (14*4) 131#define CFRAME_OFS_NRES (14*4)
65#define CFRAME_OFS_PREV (13*4) 132#define CFRAME_OFS_PREV (13*4)
@@ -68,24 +135,41 @@ enum {
68#define CFRAME_OFS_MULTRES (5*4) 135#define CFRAME_OFS_MULTRES (5*4)
69#define CFRAME_SIZE (12*4) 136#define CFRAME_SIZE (12*4)
70#define CFRAME_SHIFT_MULTRES 0 137#define CFRAME_SHIFT_MULTRES 0
138#endif
71#elif LJ_TARGET_X64 139#elif LJ_TARGET_X64
72#if LJ_ABI_WIN 140#if LJ_ABI_WIN
73#define CFRAME_OFS_PREV (13*8) 141#define CFRAME_OFS_PREV (13*8)
142#if LJ_GC64
143#define CFRAME_OFS_PC (12*8)
144#define CFRAME_OFS_L (11*8)
145#define CFRAME_OFS_ERRF (21*4)
146#define CFRAME_OFS_NRES (20*4)
147#define CFRAME_OFS_MULTRES (8*4)
148#else
74#define CFRAME_OFS_PC (25*4) 149#define CFRAME_OFS_PC (25*4)
75#define CFRAME_OFS_L (24*4) 150#define CFRAME_OFS_L (24*4)
76#define CFRAME_OFS_ERRF (23*4) 151#define CFRAME_OFS_ERRF (23*4)
77#define CFRAME_OFS_NRES (22*4) 152#define CFRAME_OFS_NRES (22*4)
78#define CFRAME_OFS_MULTRES (21*4) 153#define CFRAME_OFS_MULTRES (21*4)
154#endif
79#define CFRAME_SIZE (10*8) 155#define CFRAME_SIZE (10*8)
80#define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8) 156#define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8)
81#define CFRAME_SHIFT_MULTRES 0 157#define CFRAME_SHIFT_MULTRES 0
82#else 158#else
83#define CFRAME_OFS_PREV (4*8) 159#define CFRAME_OFS_PREV (4*8)
160#if LJ_GC64
161#define CFRAME_OFS_PC (3*8)
162#define CFRAME_OFS_L (2*8)
163#define CFRAME_OFS_ERRF (3*4)
164#define CFRAME_OFS_NRES (2*4)
165#define CFRAME_OFS_MULTRES (0*4)
166#else
84#define CFRAME_OFS_PC (7*4) 167#define CFRAME_OFS_PC (7*4)
85#define CFRAME_OFS_L (6*4) 168#define CFRAME_OFS_L (6*4)
86#define CFRAME_OFS_ERRF (5*4) 169#define CFRAME_OFS_ERRF (5*4)
87#define CFRAME_OFS_NRES (4*4) 170#define CFRAME_OFS_NRES (4*4)
88#define CFRAME_OFS_MULTRES (1*4) 171#define CFRAME_OFS_MULTRES (1*4)
172#endif
89#if LJ_NO_UNWIND 173#if LJ_NO_UNWIND
90#define CFRAME_SIZE (12*8) 174#define CFRAME_SIZE (12*8)
91#else 175#else
@@ -107,6 +191,15 @@ enum {
107#define CFRAME_SIZE 64 191#define CFRAME_SIZE 64
108#endif 192#endif
109#define CFRAME_SHIFT_MULTRES 3 193#define CFRAME_SHIFT_MULTRES 3
194#elif LJ_TARGET_ARM64
195#define CFRAME_OFS_ERRF 196
196#define CFRAME_OFS_NRES 200
197#define CFRAME_OFS_PREV 160
198#define CFRAME_OFS_L 176
199#define CFRAME_OFS_PC 168
200#define CFRAME_OFS_MULTRES 192
201#define CFRAME_SIZE 208
202#define CFRAME_SHIFT_MULTRES 3
110#elif LJ_TARGET_PPC 203#elif LJ_TARGET_PPC
111#if LJ_TARGET_XBOX360 204#if LJ_TARGET_XBOX360
112#define CFRAME_OFS_ERRF 424 205#define CFRAME_OFS_ERRF 424
@@ -117,7 +210,7 @@ enum {
117#define CFRAME_OFS_MULTRES 408 210#define CFRAME_OFS_MULTRES 408
118#define CFRAME_SIZE 384 211#define CFRAME_SIZE 384
119#define CFRAME_SHIFT_MULTRES 3 212#define CFRAME_SHIFT_MULTRES 3
120#elif LJ_ARCH_PPC64 213#elif LJ_ARCH_PPC32ON64
121#define CFRAME_OFS_ERRF 472 214#define CFRAME_OFS_ERRF 472
122#define CFRAME_OFS_NRES 468 215#define CFRAME_OFS_NRES 468
123#define CFRAME_OFS_PREV 448 216#define CFRAME_OFS_PREV 448
@@ -133,26 +226,43 @@ enum {
133#define CFRAME_OFS_L 36 226#define CFRAME_OFS_L 36
134#define CFRAME_OFS_PC 32 227#define CFRAME_OFS_PC 32
135#define CFRAME_OFS_MULTRES 28 228#define CFRAME_OFS_MULTRES 28
136#define CFRAME_SIZE 272 229#define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128)
137#define CFRAME_SHIFT_MULTRES 3 230#define CFRAME_SHIFT_MULTRES 3
138#endif 231#endif
139#elif LJ_TARGET_PPCSPE 232#elif LJ_TARGET_MIPS32
140#define CFRAME_OFS_ERRF 28 233#if LJ_ARCH_HASFPU
141#define CFRAME_OFS_NRES 24
142#define CFRAME_OFS_PREV 20
143#define CFRAME_OFS_L 16
144#define CFRAME_OFS_PC 12
145#define CFRAME_OFS_MULTRES 8
146#define CFRAME_SIZE 184
147#define CFRAME_SHIFT_MULTRES 3
148#elif LJ_TARGET_MIPS
149#define CFRAME_OFS_ERRF 124 234#define CFRAME_OFS_ERRF 124
150#define CFRAME_OFS_NRES 120 235#define CFRAME_OFS_NRES 120
151#define CFRAME_OFS_PREV 116 236#define CFRAME_OFS_PREV 116
152#define CFRAME_OFS_L 112 237#define CFRAME_OFS_L 112
238#define CFRAME_SIZE 112
239#else
240#define CFRAME_OFS_ERRF 76
241#define CFRAME_OFS_NRES 72
242#define CFRAME_OFS_PREV 68
243#define CFRAME_OFS_L 64
244#define CFRAME_SIZE 64
245#endif
153#define CFRAME_OFS_PC 20 246#define CFRAME_OFS_PC 20
154#define CFRAME_OFS_MULTRES 16 247#define CFRAME_OFS_MULTRES 16
155#define CFRAME_SIZE 112 248#define CFRAME_SHIFT_MULTRES 3
249#elif LJ_TARGET_MIPS64
250#if LJ_ARCH_HASFPU
251#define CFRAME_OFS_ERRF 188
252#define CFRAME_OFS_NRES 184
253#define CFRAME_OFS_PREV 176
254#define CFRAME_OFS_L 168
255#define CFRAME_OFS_PC 160
256#define CFRAME_SIZE 192
257#else
258#define CFRAME_OFS_ERRF 124
259#define CFRAME_OFS_NRES 120
260#define CFRAME_OFS_PREV 112
261#define CFRAME_OFS_L 104
262#define CFRAME_OFS_PC 96
263#define CFRAME_SIZE 128
264#endif
265#define CFRAME_OFS_MULTRES 0
156#define CFRAME_SHIFT_MULTRES 3 266#define CFRAME_SHIFT_MULTRES 3
157#else 267#else
158#error "Missing CFRAME_* definitions for this architecture" 268#error "Missing CFRAME_* definitions for this architecture"
diff --git a/src/lj_func.c b/src/lj_func.c
index fce9b7b3..cf8ca08f 100644
--- a/src/lj_func.c
+++ b/src/lj_func.c
@@ -24,9 +24,11 @@ void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt)
24 24
25/* -- Upvalues ------------------------------------------------------------ */ 25/* -- Upvalues ------------------------------------------------------------ */
26 26
27static void unlinkuv(GCupval *uv) 27static void unlinkuv(global_State *g, GCupval *uv)
28{ 28{
29 lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); 29 UNUSED(g);
30 lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv,
31 "broken upvalue chain");
30 setgcrefr(uvnext(uv)->prev, uv->prev); 32 setgcrefr(uvnext(uv)->prev, uv->prev);
31 setgcrefr(uvprev(uv)->next, uv->next); 33 setgcrefr(uvprev(uv)->next, uv->next);
32} 34}
@@ -40,7 +42,7 @@ static GCupval *func_finduv(lua_State *L, TValue *slot)
40 GCupval *uv; 42 GCupval *uv;
41 /* Search the sorted list of open upvalues. */ 43 /* Search the sorted list of open upvalues. */
42 while (gcref(*pp) != NULL && uvval((p = gco2uv(gcref(*pp)))) >= slot) { 44 while (gcref(*pp) != NULL && uvval((p = gco2uv(gcref(*pp)))) >= slot) {
43 lua_assert(!p->closed && uvval(p) != &p->tv); 45 lj_assertG(!p->closed && uvval(p) != &p->tv, "closed upvalue in chain");
44 if (uvval(p) == slot) { /* Found open upvalue pointing to same slot? */ 46 if (uvval(p) == slot) { /* Found open upvalue pointing to same slot? */
45 if (isdead(g, obj2gco(p))) /* Resurrect it, if it's dead. */ 47 if (isdead(g, obj2gco(p))) /* Resurrect it, if it's dead. */
46 flipwhite(obj2gco(p)); 48 flipwhite(obj2gco(p));
@@ -61,7 +63,8 @@ static GCupval *func_finduv(lua_State *L, TValue *slot)
61 setgcrefr(uv->next, g->uvhead.next); 63 setgcrefr(uv->next, g->uvhead.next);
62 setgcref(uvnext(uv)->prev, obj2gco(uv)); 64 setgcref(uvnext(uv)->prev, obj2gco(uv));
63 setgcref(g->uvhead.next, obj2gco(uv)); 65 setgcref(g->uvhead.next, obj2gco(uv));
64 lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); 66 lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv,
67 "broken upvalue chain");
65 return uv; 68 return uv;
66} 69}
67 70
@@ -84,12 +87,13 @@ void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level)
84 while (gcref(L->openupval) != NULL && 87 while (gcref(L->openupval) != NULL &&
85 uvval((uv = gco2uv(gcref(L->openupval)))) >= level) { 88 uvval((uv = gco2uv(gcref(L->openupval)))) >= level) {
86 GCobj *o = obj2gco(uv); 89 GCobj *o = obj2gco(uv);
87 lua_assert(!isblack(o) && !uv->closed && uvval(uv) != &uv->tv); 90 lj_assertG(!isblack(o), "bad black upvalue");
91 lj_assertG(!uv->closed && uvval(uv) != &uv->tv, "closed upvalue in chain");
88 setgcrefr(L->openupval, uv->nextgc); /* No longer in open list. */ 92 setgcrefr(L->openupval, uv->nextgc); /* No longer in open list. */
89 if (isdead(g, o)) { 93 if (isdead(g, o)) {
90 lj_func_freeuv(g, uv); 94 lj_func_freeuv(g, uv);
91 } else { 95 } else {
92 unlinkuv(uv); 96 unlinkuv(g, uv);
93 lj_gc_closeuv(g, uv); 97 lj_gc_closeuv(g, uv);
94 } 98 }
95 } 99 }
@@ -98,7 +102,7 @@ void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level)
98void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv) 102void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv)
99{ 103{
100 if (!uv->closed) 104 if (!uv->closed)
101 unlinkuv(uv); 105 unlinkuv(g, uv);
102 lj_mem_freet(g, uv); 106 lj_mem_freet(g, uv);
103} 107}
104 108
diff --git a/src/lj_gc.c b/src/lj_gc.c
index ef3db6a5..646a27b2 100644
--- a/src/lj_gc.c
+++ b/src/lj_gc.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_func.h" 18#include "lj_func.h"
@@ -24,6 +25,7 @@
24#include "lj_cdata.h" 25#include "lj_cdata.h"
25#endif 26#endif
26#include "lj_trace.h" 27#include "lj_trace.h"
28#include "lj_dispatch.h"
27#include "lj_vm.h" 29#include "lj_vm.h"
28 30
29#define GCSTEPSIZE 1024u 31#define GCSTEPSIZE 1024u
@@ -40,7 +42,8 @@
40 42
41/* Mark a TValue (if needed). */ 43/* Mark a TValue (if needed). */
42#define gc_marktv(g, tv) \ 44#define gc_marktv(g, tv) \
43 { lua_assert(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct)); \ 45 { lj_assertG(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct), \
46 "TValue and GC type mismatch"); \
44 if (tviswhite(tv)) gc_mark(g, gcV(tv)); } 47 if (tviswhite(tv)) gc_mark(g, gcV(tv)); }
45 48
46/* Mark a GCobj (if needed). */ 49/* Mark a GCobj (if needed). */
@@ -54,21 +57,30 @@
54static void gc_mark(global_State *g, GCobj *o) 57static void gc_mark(global_State *g, GCobj *o)
55{ 58{
56 int gct = o->gch.gct; 59 int gct = o->gch.gct;
57 lua_assert(iswhite(o) && !isdead(g, o)); 60 lj_assertG(iswhite(o), "mark of non-white object");
61 lj_assertG(!isdead(g, o), "mark of dead object");
58 white2gray(o); 62 white2gray(o);
59 if (LJ_UNLIKELY(gct == ~LJ_TUDATA)) { 63 if (LJ_UNLIKELY(gct == ~LJ_TUDATA)) {
60 GCtab *mt = tabref(gco2ud(o)->metatable); 64 GCtab *mt = tabref(gco2ud(o)->metatable);
61 gray2black(o); /* Userdata are never gray. */ 65 gray2black(o); /* Userdata are never gray. */
62 if (mt) gc_markobj(g, mt); 66 if (mt) gc_markobj(g, mt);
63 gc_markobj(g, tabref(gco2ud(o)->env)); 67 gc_markobj(g, tabref(gco2ud(o)->env));
68 if (LJ_HASBUFFER && gco2ud(o)->udtype == UDTYPE_BUFFER) {
69 SBufExt *sbx = (SBufExt *)uddata(gco2ud(o));
70 if (sbufiscow(sbx) && gcref(sbx->cowref))
71 gc_markobj(g, gcref(sbx->cowref));
72 if (gcref(sbx->dict))
73 gc_markobj(g, gcref(sbx->dict));
74 }
64 } else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) { 75 } else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) {
65 GCupval *uv = gco2uv(o); 76 GCupval *uv = gco2uv(o);
66 gc_marktv(g, uvval(uv)); 77 gc_marktv(g, uvval(uv));
67 if (uv->closed) 78 if (uv->closed)
68 gray2black(o); /* Closed upvalues are never gray. */ 79 gray2black(o); /* Closed upvalues are never gray. */
69 } else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) { 80 } else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) {
70 lua_assert(gct == ~LJ_TFUNC || gct == ~LJ_TTAB || 81 lj_assertG(gct == ~LJ_TFUNC || gct == ~LJ_TTAB ||
71 gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO); 82 gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO || gct == ~LJ_TTRACE,
83 "bad GC type %d", gct);
72 setgcrefr(o->gch.gclist, g->gc.gray); 84 setgcrefr(o->gch.gclist, g->gc.gray);
73 setgcref(g->gc.gray, o); 85 setgcref(g->gc.gray, o);
74 } 86 }
@@ -101,7 +113,8 @@ static void gc_mark_uv(global_State *g)
101{ 113{
102 GCupval *uv; 114 GCupval *uv;
103 for (uv = uvnext(&g->uvhead); uv != &g->uvhead; uv = uvnext(uv)) { 115 for (uv = uvnext(&g->uvhead); uv != &g->uvhead; uv = uvnext(uv)) {
104 lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); 116 lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv,
117 "broken upvalue chain");
105 if (isgray(obj2gco(uv))) 118 if (isgray(obj2gco(uv)))
106 gc_marktv(g, uvval(uv)); 119 gc_marktv(g, uvval(uv));
107 } 120 }
@@ -196,7 +209,7 @@ static int gc_traverse_tab(global_State *g, GCtab *t)
196 for (i = 0; i <= hmask; i++) { 209 for (i = 0; i <= hmask; i++) {
197 Node *n = &node[i]; 210 Node *n = &node[i];
198 if (!tvisnil(&n->val)) { /* Mark non-empty slot. */ 211 if (!tvisnil(&n->val)) { /* Mark non-empty slot. */
199 lua_assert(!tvisnil(&n->key)); 212 lj_assertG(!tvisnil(&n->key), "mark of nil key in non-empty slot");
200 if (!(weak & LJ_GC_WEAKKEY)) gc_marktv(g, &n->key); 213 if (!(weak & LJ_GC_WEAKKEY)) gc_marktv(g, &n->key);
201 if (!(weak & LJ_GC_WEAKVAL)) gc_marktv(g, &n->val); 214 if (!(weak & LJ_GC_WEAKVAL)) gc_marktv(g, &n->val);
202 } 215 }
@@ -211,7 +224,8 @@ static void gc_traverse_func(global_State *g, GCfunc *fn)
211 gc_markobj(g, tabref(fn->c.env)); 224 gc_markobj(g, tabref(fn->c.env));
212 if (isluafunc(fn)) { 225 if (isluafunc(fn)) {
213 uint32_t i; 226 uint32_t i;
214 lua_assert(fn->l.nupvalues <= funcproto(fn)->sizeuv); 227 lj_assertG(fn->l.nupvalues <= funcproto(fn)->sizeuv,
228 "function upvalues out of range");
215 gc_markobj(g, funcproto(fn)); 229 gc_markobj(g, funcproto(fn));
216 for (i = 0; i < fn->l.nupvalues; i++) /* Mark Lua function upvalues. */ 230 for (i = 0; i < fn->l.nupvalues; i++) /* Mark Lua function upvalues. */
217 gc_markobj(g, &gcref(fn->l.uvptr[i])->uv); 231 gc_markobj(g, &gcref(fn->l.uvptr[i])->uv);
@@ -227,7 +241,7 @@ static void gc_traverse_func(global_State *g, GCfunc *fn)
227static void gc_marktrace(global_State *g, TraceNo traceno) 241static void gc_marktrace(global_State *g, TraceNo traceno)
228{ 242{
229 GCobj *o = obj2gco(traceref(G2J(g), traceno)); 243 GCobj *o = obj2gco(traceref(G2J(g), traceno));
230 lua_assert(traceno != G2J(g)->cur.traceno); 244 lj_assertG(traceno != G2J(g)->cur.traceno, "active trace escaped");
231 if (iswhite(o)) { 245 if (iswhite(o)) {
232 white2gray(o); 246 white2gray(o);
233 setgcrefr(o->gch.gclist, g->gc.gray); 247 setgcrefr(o->gch.gclist, g->gc.gray);
@@ -244,6 +258,8 @@ static void gc_traverse_trace(global_State *g, GCtrace *T)
244 IRIns *ir = &T->ir[ref]; 258 IRIns *ir = &T->ir[ref];
245 if (ir->o == IR_KGC) 259 if (ir->o == IR_KGC)
246 gc_markobj(g, ir_kgc(ir)); 260 gc_markobj(g, ir_kgc(ir));
261 if (irt_is64(ir->t) && ir->o != IR_KNULL)
262 ref++;
247 } 263 }
248 if (T->link) gc_marktrace(g, T->link); 264 if (T->link) gc_marktrace(g, T->link);
249 if (T->nextroot) gc_marktrace(g, T->nextroot); 265 if (T->nextroot) gc_marktrace(g, T->nextroot);
@@ -274,12 +290,12 @@ static MSize gc_traverse_frames(global_State *g, lua_State *th)
274{ 290{
275 TValue *frame, *top = th->top-1, *bot = tvref(th->stack); 291 TValue *frame, *top = th->top-1, *bot = tvref(th->stack);
276 /* Note: extra vararg frame not skipped, marks function twice (harmless). */ 292 /* Note: extra vararg frame not skipped, marks function twice (harmless). */
277 for (frame = th->base-1; frame > bot; frame = frame_prev(frame)) { 293 for (frame = th->base-1; frame > bot+LJ_FR2; frame = frame_prev(frame)) {
278 GCfunc *fn = frame_func(frame); 294 GCfunc *fn = frame_func(frame);
279 TValue *ftop = frame; 295 TValue *ftop = frame;
280 if (isluafunc(fn)) ftop += funcproto(fn)->framesize; 296 if (isluafunc(fn)) ftop += funcproto(fn)->framesize;
281 if (ftop > top) top = ftop; 297 if (ftop > top) top = ftop;
282 gc_markobj(g, fn); /* Need to mark hidden function (or L). */ 298 if (!LJ_FR2) gc_markobj(g, fn); /* Need to mark hidden function (or L). */
283 } 299 }
284 top++; /* Correct bias of -1 (frame == base-1). */ 300 top++; /* Correct bias of -1 (frame == base-1). */
285 if (top > tvref(th->maxstack)) top = tvref(th->maxstack); 301 if (top > tvref(th->maxstack)) top = tvref(th->maxstack);
@@ -290,7 +306,7 @@ static MSize gc_traverse_frames(global_State *g, lua_State *th)
290static void gc_traverse_thread(global_State *g, lua_State *th) 306static void gc_traverse_thread(global_State *g, lua_State *th)
291{ 307{
292 TValue *o, *top = th->top; 308 TValue *o, *top = th->top;
293 for (o = tvref(th->stack)+1; o < top; o++) 309 for (o = tvref(th->stack)+1+LJ_FR2; o < top; o++)
294 gc_marktv(g, o); 310 gc_marktv(g, o);
295 if (g->gc.state == GCSatomic) { 311 if (g->gc.state == GCSatomic) {
296 top = tvref(th->stack) + th->stacksize; 312 top = tvref(th->stack) + th->stacksize;
@@ -306,7 +322,7 @@ static size_t propagatemark(global_State *g)
306{ 322{
307 GCobj *o = gcref(g->gc.gray); 323 GCobj *o = gcref(g->gc.gray);
308 int gct = o->gch.gct; 324 int gct = o->gch.gct;
309 lua_assert(isgray(o)); 325 lj_assertG(isgray(o), "propagation of non-gray object");
310 gray2black(o); 326 gray2black(o);
311 setgcrefr(g->gc.gray, o->gch.gclist); /* Remove from gray list. */ 327 setgcrefr(g->gc.gray, o->gch.gclist); /* Remove from gray list. */
312 if (LJ_LIKELY(gct == ~LJ_TTAB)) { 328 if (LJ_LIKELY(gct == ~LJ_TTAB)) {
@@ -338,7 +354,7 @@ static size_t propagatemark(global_State *g)
338 return ((sizeof(GCtrace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) + 354 return ((sizeof(GCtrace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) +
339 T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(SnapEntry); 355 T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(SnapEntry);
340#else 356#else
341 lua_assert(0); 357 lj_assertG(0, "bad GC type %d", gct);
342 return 0; 358 return 0;
343#endif 359#endif
344 } 360 }
@@ -355,15 +371,6 @@ static size_t gc_propagate_gray(global_State *g)
355 371
356/* -- Sweep phase --------------------------------------------------------- */ 372/* -- Sweep phase --------------------------------------------------------- */
357 373
358/* Try to shrink some common data structures. */
359static void gc_shrink(global_State *g, lua_State *L)
360{
361 if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1)
362 lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */
363 if (g->tmpbuf.sz > LJ_MIN_SBUF*2)
364 lj_str_resizebuf(L, &g->tmpbuf, g->tmpbuf.sz >> 1); /* Shrink temp buf. */
365}
366
367/* Type of GC free functions. */ 374/* Type of GC free functions. */
368typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o); 375typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o);
369 376
@@ -389,7 +396,7 @@ static const GCFreeFunc gc_freefunc[] = {
389}; 396};
390 397
391/* Full sweep of a GC list. */ 398/* Full sweep of a GC list. */
392#define gc_fullsweep(g, p) gc_sweep(g, (p), LJ_MAX_MEM) 399#define gc_fullsweep(g, p) gc_sweep(g, (p), ~(uint32_t)0)
393 400
394/* Partial sweep of a GC list. */ 401/* Partial sweep of a GC list. */
395static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) 402static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim)
@@ -401,11 +408,13 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim)
401 if (o->gch.gct == ~LJ_TTHREAD) /* Need to sweep open upvalues, too. */ 408 if (o->gch.gct == ~LJ_TTHREAD) /* Need to sweep open upvalues, too. */
402 gc_fullsweep(g, &gco2th(o)->openupval); 409 gc_fullsweep(g, &gco2th(o)->openupval);
403 if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */ 410 if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */
404 lua_assert(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED)); 411 lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED),
412 "sweep of undead object");
405 makewhite(g, o); /* Value is alive, change to the current white. */ 413 makewhite(g, o); /* Value is alive, change to the current white. */
406 p = &o->gch.nextgc; 414 p = &o->gch.nextgc;
407 } else { /* Otherwise value is dead, free it. */ 415 } else { /* Otherwise value is dead, free it. */
408 lua_assert(isdead(g, o) || ow == LJ_GC_SFIXED); 416 lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED,
417 "sweep of unlive object");
409 setgcrefr(*p, o->gch.nextgc); 418 setgcrefr(*p, o->gch.nextgc);
410 if (o == gcref(g->gc.root)) 419 if (o == gcref(g->gc.root))
411 setgcrefr(g->gc.root, o->gch.nextgc); /* Adjust list anchor. */ 420 setgcrefr(g->gc.root, o->gch.nextgc); /* Adjust list anchor. */
@@ -415,6 +424,32 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim)
415 return p; 424 return p;
416} 425}
417 426
427/* Sweep one string interning table chain. Preserves hashalg bit. */
428static void gc_sweepstr(global_State *g, GCRef *chain)
429{
430 /* Mask with other white and LJ_GC_FIXED. Or LJ_GC_SFIXED on shutdown. */
431 int ow = otherwhite(g);
432 uintptr_t u = gcrefu(*chain);
433 GCRef q;
434 GCRef *p = &q;
435 GCobj *o;
436 setgcrefp(q, (u & ~(uintptr_t)1));
437 while ((o = gcref(*p)) != NULL) {
438 if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */
439 lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED),
440 "sweep of undead string");
441 makewhite(g, o); /* String is alive, change to the current white. */
442 p = &o->gch.nextgc;
443 } else { /* Otherwise string is dead, free it. */
444 lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED,
445 "sweep of unlive string");
446 setgcrefr(*p, o->gch.nextgc);
447 lj_str_free(g, gco2str(o));
448 }
449 }
450 setgcrefp(*chain, (gcrefu(q) | (u & 1)));
451}
452
418/* Check whether we can clear a key or a value slot from a table. */ 453/* Check whether we can clear a key or a value slot from a table. */
419static int gc_mayclear(cTValue *o, int val) 454static int gc_mayclear(cTValue *o, int val)
420{ 455{
@@ -432,11 +467,12 @@ static int gc_mayclear(cTValue *o, int val)
432} 467}
433 468
434/* Clear collected entries from weak tables. */ 469/* Clear collected entries from weak tables. */
435static void gc_clearweak(GCobj *o) 470static void gc_clearweak(global_State *g, GCobj *o)
436{ 471{
472 UNUSED(g);
437 while (o) { 473 while (o) {
438 GCtab *t = gco2tab(o); 474 GCtab *t = gco2tab(o);
439 lua_assert((t->marked & LJ_GC_WEAK)); 475 lj_assertG((t->marked & LJ_GC_WEAK), "clear of non-weak table");
440 if ((t->marked & LJ_GC_WEAKVAL)) { 476 if ((t->marked & LJ_GC_WEAKVAL)) {
441 MSize i, asize = t->asize; 477 MSize i, asize = t->asize;
442 for (i = 0; i < asize; i++) { 478 for (i = 0; i < asize; i++) {
@@ -467,18 +503,21 @@ static void gc_call_finalizer(global_State *g, lua_State *L,
467{ 503{
468 /* Save and restore lots of state around the __gc callback. */ 504 /* Save and restore lots of state around the __gc callback. */
469 uint8_t oldh = hook_save(g); 505 uint8_t oldh = hook_save(g);
470 MSize oldt = g->gc.threshold; 506 GCSize oldt = g->gc.threshold;
471 int errcode; 507 int errcode;
472 TValue *top; 508 TValue *top;
473 lj_trace_abort(g); 509 lj_trace_abort(g);
474 top = L->top;
475 L->top = top+2;
476 hook_entergc(g); /* Disable hooks and new traces during __gc. */ 510 hook_entergc(g); /* Disable hooks and new traces during __gc. */
511 if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g);
477 g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */ 512 g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */
478 copyTV(L, top, mo); 513 top = L->top;
479 setgcV(L, top+1, o, ~o->gch.gct); 514 copyTV(L, top++, mo);
480 errcode = lj_vm_pcall(L, top+1, 1+0, -1); /* Stack: |mo|o| -> | */ 515 if (LJ_FR2) setnilV(top++);
516 setgcV(L, top, o, ~o->gch.gct);
517 L->top = top+1;
518 errcode = lj_vm_pcall(L, top, 1+0, -1); /* Stack: |mo|o| -> | */
481 hook_restore(g, oldh); 519 hook_restore(g, oldh);
520 if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g);
482 g->gc.threshold = oldt; /* Restore GC threshold. */ 521 g->gc.threshold = oldt; /* Restore GC threshold. */
483 if (errcode) 522 if (errcode)
484 lj_err_throw(L, errcode); /* Propagate errors. */ 523 lj_err_throw(L, errcode); /* Propagate errors. */
@@ -490,7 +529,7 @@ static void gc_finalize(lua_State *L)
490 global_State *g = G(L); 529 global_State *g = G(L);
491 GCobj *o = gcnext(gcref(g->gc.mmudata)); 530 GCobj *o = gcnext(gcref(g->gc.mmudata));
492 cTValue *mo; 531 cTValue *mo;
493 lua_assert(gcref(g->jit_L) == NULL); /* Must not be called on trace. */ 532 lj_assertG(tvref(g->jit_base) == NULL, "finalizer called on trace");
494 /* Unchain from list of userdata to be finalized. */ 533 /* Unchain from list of userdata to be finalized. */
495 if (o == gcref(g->gc.mmudata)) 534 if (o == gcref(g->gc.mmudata))
496 setgcrefnull(g->gc.mmudata); 535 setgcrefnull(g->gc.mmudata);
@@ -565,9 +604,9 @@ void lj_gc_freeall(global_State *g)
565 /* Free everything, except super-fixed objects (the main thread). */ 604 /* Free everything, except super-fixed objects (the main thread). */
566 g->gc.currentwhite = LJ_GC_WHITES | LJ_GC_SFIXED; 605 g->gc.currentwhite = LJ_GC_WHITES | LJ_GC_SFIXED;
567 gc_fullsweep(g, &g->gc.root); 606 gc_fullsweep(g, &g->gc.root);
568 strmask = g->strmask; 607 strmask = g->str.mask;
569 for (i = 0; i <= strmask; i++) /* Free all string hash chains. */ 608 for (i = 0; i <= strmask; i++) /* Free all string hash chains. */
570 gc_fullsweep(g, &g->strhash[i]); 609 gc_sweepstr(g, &g->str.tab[i]);
571} 610}
572 611
573/* -- Collector ----------------------------------------------------------- */ 612/* -- Collector ----------------------------------------------------------- */
@@ -582,7 +621,7 @@ static void atomic(global_State *g, lua_State *L)
582 621
583 setgcrefr(g->gc.gray, g->gc.weak); /* Empty the list of weak tables. */ 622 setgcrefr(g->gc.gray, g->gc.weak); /* Empty the list of weak tables. */
584 setgcrefnull(g->gc.weak); 623 setgcrefnull(g->gc.weak);
585 lua_assert(!iswhite(obj2gco(mainthread(g)))); 624 lj_assertG(!iswhite(obj2gco(mainthread(g))), "main thread turned white");
586 gc_markobj(g, L); /* Mark running thread. */ 625 gc_markobj(g, L); /* Mark running thread. */
587 gc_traverse_curtrace(g); /* Traverse current trace. */ 626 gc_traverse_curtrace(g); /* Traverse current trace. */
588 gc_mark_gcroot(g); /* Mark GC roots (again). */ 627 gc_mark_gcroot(g); /* Mark GC roots (again). */
@@ -597,13 +636,15 @@ static void atomic(global_State *g, lua_State *L)
597 udsize += gc_propagate_gray(g); /* And propagate the marks. */ 636 udsize += gc_propagate_gray(g); /* And propagate the marks. */
598 637
599 /* All marking done, clear weak tables. */ 638 /* All marking done, clear weak tables. */
600 gc_clearweak(gcref(g->gc.weak)); 639 gc_clearweak(g, gcref(g->gc.weak));
640
641 lj_buf_shrink(L, &g->tmpbuf); /* Shrink temp buffer. */
601 642
602 /* Prepare for sweep phase. */ 643 /* Prepare for sweep phase. */
603 g->gc.currentwhite = (uint8_t)otherwhite(g); /* Flip current white. */ 644 g->gc.currentwhite = (uint8_t)otherwhite(g); /* Flip current white. */
604 g->strempty.marked = g->gc.currentwhite; 645 g->strempty.marked = g->gc.currentwhite;
605 setmref(g->gc.sweep, &g->gc.root); 646 setmref(g->gc.sweep, &g->gc.root);
606 g->gc.estimate = g->gc.total - (MSize)udsize; /* Initial estimate. */ 647 g->gc.estimate = g->gc.total - (GCSize)udsize; /* Initial estimate. */
607} 648}
608 649
609/* GC state machine. Returns a cost estimate for each step performed. */ 650/* GC state machine. Returns a cost estimate for each step performed. */
@@ -620,28 +661,29 @@ static size_t gc_onestep(lua_State *L)
620 g->gc.state = GCSatomic; /* End of mark phase. */ 661 g->gc.state = GCSatomic; /* End of mark phase. */
621 return 0; 662 return 0;
622 case GCSatomic: 663 case GCSatomic:
623 if (gcref(g->jit_L)) /* Don't run atomic phase on trace. */ 664 if (tvref(g->jit_base)) /* Don't run atomic phase on trace. */
624 return LJ_MAX_MEM; 665 return LJ_MAX_MEM;
625 atomic(g, L); 666 atomic(g, L);
626 g->gc.state = GCSsweepstring; /* Start of sweep phase. */ 667 g->gc.state = GCSsweepstring; /* Start of sweep phase. */
627 g->gc.sweepstr = 0; 668 g->gc.sweepstr = 0;
628 return 0; 669 return 0;
629 case GCSsweepstring: { 670 case GCSsweepstring: {
630 MSize old = g->gc.total; 671 GCSize old = g->gc.total;
631 gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */ 672 gc_sweepstr(g, &g->str.tab[g->gc.sweepstr++]); /* Sweep one chain. */
632 if (g->gc.sweepstr > g->strmask) 673 if (g->gc.sweepstr > g->str.mask)
633 g->gc.state = GCSsweep; /* All string hash chains sweeped. */ 674 g->gc.state = GCSsweep; /* All string hash chains sweeped. */
634 lua_assert(old >= g->gc.total); 675 lj_assertG(old >= g->gc.total, "sweep increased memory");
635 g->gc.estimate -= old - g->gc.total; 676 g->gc.estimate -= old - g->gc.total;
636 return GCSWEEPCOST; 677 return GCSWEEPCOST;
637 } 678 }
638 case GCSsweep: { 679 case GCSsweep: {
639 MSize old = g->gc.total; 680 GCSize old = g->gc.total;
640 setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX)); 681 setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX));
641 lua_assert(old >= g->gc.total); 682 lj_assertG(old >= g->gc.total, "sweep increased memory");
642 g->gc.estimate -= old - g->gc.total; 683 g->gc.estimate -= old - g->gc.total;
643 if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) { 684 if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) {
644 gc_shrink(g, L); 685 if (g->str.num <= (g->str.mask >> 2) && g->str.mask > LJ_MIN_STRTAB*2-1)
686 lj_str_resize(L, g->str.mask >> 1); /* Shrink string table. */
645 if (gcref(g->gc.mmudata)) { /* Need any finalizations? */ 687 if (gcref(g->gc.mmudata)) { /* Need any finalizations? */
646 g->gc.state = GCSfinalize; 688 g->gc.state = GCSfinalize;
647#if LJ_HASFFI 689#if LJ_HASFFI
@@ -656,7 +698,7 @@ static size_t gc_onestep(lua_State *L)
656 } 698 }
657 case GCSfinalize: 699 case GCSfinalize:
658 if (gcref(g->gc.mmudata) != NULL) { 700 if (gcref(g->gc.mmudata) != NULL) {
659 if (gcref(g->jit_L)) /* Don't call finalizers on trace. */ 701 if (tvref(g->jit_base)) /* Don't call finalizers on trace. */
660 return LJ_MAX_MEM; 702 return LJ_MAX_MEM;
661 gc_finalize(L); /* Finalize one userdata object. */ 703 gc_finalize(L); /* Finalize one userdata object. */
662 if (g->gc.estimate > GCFINALIZECOST) 704 if (g->gc.estimate > GCFINALIZECOST)
@@ -670,7 +712,7 @@ static size_t gc_onestep(lua_State *L)
670 g->gc.debt = 0; 712 g->gc.debt = 0;
671 return 0; 713 return 0;
672 default: 714 default:
673 lua_assert(0); 715 lj_assertG(0, "bad GC state");
674 return 0; 716 return 0;
675 } 717 }
676} 718}
@@ -679,7 +721,7 @@ static size_t gc_onestep(lua_State *L)
679int LJ_FASTCALL lj_gc_step(lua_State *L) 721int LJ_FASTCALL lj_gc_step(lua_State *L)
680{ 722{
681 global_State *g = G(L); 723 global_State *g = G(L);
682 MSize lim; 724 GCSize lim;
683 int32_t ostate = g->vmstate; 725 int32_t ostate = g->vmstate;
684 setvmstate(g, GC); 726 setvmstate(g, GC);
685 lim = (GCSTEPSIZE/100) * g->gc.stepmul; 727 lim = (GCSTEPSIZE/100) * g->gc.stepmul;
@@ -688,13 +730,13 @@ int LJ_FASTCALL lj_gc_step(lua_State *L)
688 if (g->gc.total > g->gc.threshold) 730 if (g->gc.total > g->gc.threshold)
689 g->gc.debt += g->gc.total - g->gc.threshold; 731 g->gc.debt += g->gc.total - g->gc.threshold;
690 do { 732 do {
691 lim -= (MSize)gc_onestep(L); 733 lim -= (GCSize)gc_onestep(L);
692 if (g->gc.state == GCSpause) { 734 if (g->gc.state == GCSpause) {
693 g->gc.threshold = (g->gc.estimate/100) * g->gc.pause; 735 g->gc.threshold = (g->gc.estimate/100) * g->gc.pause;
694 g->vmstate = ostate; 736 g->vmstate = ostate;
695 return 1; /* Finished a GC cycle. */ 737 return 1; /* Finished a GC cycle. */
696 } 738 }
697 } while ((int32_t)lim > 0); 739 } while (sizeof(lim) == 8 ? ((int64_t)lim > 0) : ((int32_t)lim > 0));
698 if (g->gc.debt < GCSTEPSIZE) { 740 if (g->gc.debt < GCSTEPSIZE) {
699 g->gc.threshold = g->gc.total + GCSTEPSIZE; 741 g->gc.threshold = g->gc.total + GCSTEPSIZE;
700 g->vmstate = ostate; 742 g->vmstate = ostate;
@@ -718,8 +760,8 @@ void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L)
718/* Perform multiple GC steps. Called from JIT-compiled code. */ 760/* Perform multiple GC steps. Called from JIT-compiled code. */
719int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps) 761int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps)
720{ 762{
721 lua_State *L = gco2th(gcref(g->jit_L)); 763 lua_State *L = gco2th(gcref(g->cur_L));
722 L->base = mref(G(L)->jit_base, TValue); 764 L->base = tvref(G(L)->jit_base);
723 L->top = curr_topL(L); 765 L->top = curr_topL(L);
724 while (steps-- > 0 && lj_gc_step(L) == 0) 766 while (steps-- > 0 && lj_gc_step(L) == 0)
725 ; 767 ;
@@ -744,7 +786,8 @@ void lj_gc_fullgc(lua_State *L)
744 } 786 }
745 while (g->gc.state == GCSsweepstring || g->gc.state == GCSsweep) 787 while (g->gc.state == GCSsweepstring || g->gc.state == GCSsweep)
746 gc_onestep(L); /* Finish sweep. */ 788 gc_onestep(L); /* Finish sweep. */
747 lua_assert(g->gc.state == GCSfinalize || g->gc.state == GCSpause); 789 lj_assertG(g->gc.state == GCSfinalize || g->gc.state == GCSpause,
790 "bad GC state");
748 /* Now perform a full GC. */ 791 /* Now perform a full GC. */
749 g->gc.state = GCSpause; 792 g->gc.state = GCSpause;
750 do { gc_onestep(L); } while (g->gc.state != GCSpause); 793 do { gc_onestep(L); } while (g->gc.state != GCSpause);
@@ -757,9 +800,11 @@ void lj_gc_fullgc(lua_State *L)
757/* Move the GC propagation frontier forward. */ 800/* Move the GC propagation frontier forward. */
758void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v) 801void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v)
759{ 802{
760 lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o)); 803 lj_assertG(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o),
761 lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); 804 "bad object states for forward barrier");
762 lua_assert(o->gch.gct != ~LJ_TTAB); 805 lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause,
806 "bad GC state");
807 lj_assertG(o->gch.gct != ~LJ_TTAB, "barrier object is not a table");
763 /* Preserve invariant during propagation. Otherwise it doesn't matter. */ 808 /* Preserve invariant during propagation. Otherwise it doesn't matter. */
764 if (g->gc.state == GCSpropagate || g->gc.state == GCSatomic) 809 if (g->gc.state == GCSpropagate || g->gc.state == GCSatomic)
765 gc_mark(g, v); /* Move frontier forward. */ 810 gc_mark(g, v); /* Move frontier forward. */
@@ -796,7 +841,8 @@ void lj_gc_closeuv(global_State *g, GCupval *uv)
796 lj_gc_barrierf(g, o, gcV(&uv->tv)); 841 lj_gc_barrierf(g, o, gcV(&uv->tv));
797 } else { 842 } else {
798 makewhite(g, o); /* Make it white, i.e. sweep the upvalue. */ 843 makewhite(g, o); /* Make it white, i.e. sweep the upvalue. */
799 lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); 844 lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause,
845 "bad GC state");
800 } 846 }
801 } 847 }
802} 848}
@@ -813,27 +859,29 @@ void lj_gc_barriertrace(global_State *g, uint32_t traceno)
813/* -- Allocator ----------------------------------------------------------- */ 859/* -- Allocator ----------------------------------------------------------- */
814 860
815/* Call pluggable memory allocator to allocate or resize a fragment. */ 861/* Call pluggable memory allocator to allocate or resize a fragment. */
816void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz) 862void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz)
817{ 863{
818 global_State *g = G(L); 864 global_State *g = G(L);
819 lua_assert((osz == 0) == (p == NULL)); 865 lj_assertG((osz == 0) == (p == NULL), "realloc API violation");
820 p = g->allocf(g->allocd, p, osz, nsz); 866 p = g->allocf(g->allocd, p, osz, nsz);
821 if (p == NULL && nsz > 0) 867 if (p == NULL && nsz > 0)
822 lj_err_mem(L); 868 lj_err_mem(L);
823 lua_assert((nsz == 0) == (p == NULL)); 869 lj_assertG((nsz == 0) == (p == NULL), "allocf API violation");
824 lua_assert(checkptr32(p)); 870 lj_assertG(checkptrGC(p),
871 "allocated memory address %p outside required range", p);
825 g->gc.total = (g->gc.total - osz) + nsz; 872 g->gc.total = (g->gc.total - osz) + nsz;
826 return p; 873 return p;
827} 874}
828 875
829/* Allocate new GC object and link it to the root set. */ 876/* Allocate new GC object and link it to the root set. */
830void * LJ_FASTCALL lj_mem_newgco(lua_State *L, MSize size) 877void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size)
831{ 878{
832 global_State *g = G(L); 879 global_State *g = G(L);
833 GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size); 880 GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size);
834 if (o == NULL) 881 if (o == NULL)
835 lj_err_mem(L); 882 lj_err_mem(L);
836 lua_assert(checkptr32(o)); 883 lj_assertG(checkptrGC(o),
884 "allocated memory address %p outside required range", o);
837 g->gc.total += size; 885 g->gc.total += size;
838 setgcrefr(o->gch.nextgc, g->gc.root); 886 setgcrefr(o->gch.nextgc, g->gc.root);
839 setgcref(g->gc.root, o); 887 setgcref(g->gc.root, o);
diff --git a/src/lj_gc.h b/src/lj_gc.h
index 3f71e9b7..af8c476b 100644
--- a/src/lj_gc.h
+++ b/src/lj_gc.h
@@ -81,8 +81,10 @@ LJ_FUNC void lj_gc_barriertrace(global_State *g, uint32_t traceno);
81static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t) 81static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t)
82{ 82{
83 GCobj *o = obj2gco(t); 83 GCobj *o = obj2gco(t);
84 lua_assert(isblack(o) && !isdead(g, o)); 84 lj_assertG(isblack(o) && !isdead(g, o),
85 lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); 85 "bad object states for backward barrier");
86 lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause,
87 "bad GC state");
86 black2gray(o); 88 black2gray(o);
87 setgcrefr(t->gclist, g->gc.grayagain); 89 setgcrefr(t->gclist, g->gc.grayagain);
88 setgcref(g->gc.grayagain, o); 90 setgcref(g->gc.grayagain, o);
@@ -107,8 +109,8 @@ static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t)
107 lj_gc_barrierf(G(L), obj2gco(p), obj2gco(o)); } 109 lj_gc_barrierf(G(L), obj2gco(p), obj2gco(o)); }
108 110
109/* Allocator. */ 111/* Allocator. */
110LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz); 112LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz);
111LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, MSize size); 113LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size);
112LJ_FUNC void *lj_mem_grow(lua_State *L, void *p, 114LJ_FUNC void *lj_mem_grow(lua_State *L, void *p,
113 MSize *szp, MSize lim, MSize esz); 115 MSize *szp, MSize lim, MSize esz);
114 116
@@ -116,13 +118,13 @@ LJ_FUNC void *lj_mem_grow(lua_State *L, void *p,
116 118
117static LJ_AINLINE void lj_mem_free(global_State *g, void *p, size_t osize) 119static LJ_AINLINE void lj_mem_free(global_State *g, void *p, size_t osize)
118{ 120{
119 g->gc.total -= (MSize)osize; 121 g->gc.total -= (GCSize)osize;
120 g->allocf(g->allocd, p, osize, 0); 122 g->allocf(g->allocd, p, osize, 0);
121} 123}
122 124
123#define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (MSize)((n)*sizeof(t)))) 125#define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (GCSize)((n)*sizeof(t))))
124#define lj_mem_reallocvec(L, p, on, n, t) \ 126#define lj_mem_reallocvec(L, p, on, n, t) \
125 ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (MSize)((n)*sizeof(t)))) 127 ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (GCSize)((n)*sizeof(t))))
126#define lj_mem_growvec(L, p, n, m, t) \ 128#define lj_mem_growvec(L, p, n, m, t) \
127 ((p) = (t *)lj_mem_grow(L, (p), &(n), (m), (MSize)sizeof(t))) 129 ((p) = (t *)lj_mem_grow(L, (p), &(n), (m), (MSize)sizeof(t)))
128#define lj_mem_freevec(g, p, n, t) lj_mem_free(g, (p), (n)*sizeof(t)) 130#define lj_mem_freevec(g, p, n, t) lj_mem_free(g, (p), (n)*sizeof(t))
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c
index 78936288..0e2777b8 100644
--- a/src/lj_gdbjit.c
+++ b/src/lj_gdbjit.c
@@ -14,6 +14,8 @@
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_debug.h" 15#include "lj_debug.h"
16#include "lj_frame.h" 16#include "lj_frame.h"
17#include "lj_buf.h"
18#include "lj_strfmt.h"
17#include "lj_jit.h" 19#include "lj_jit.h"
18#include "lj_dispatch.h" 20#include "lj_dispatch.h"
19 21
@@ -294,6 +296,9 @@ enum {
294#elif LJ_TARGET_ARM 296#elif LJ_TARGET_ARM
295 DW_REG_SP = 13, 297 DW_REG_SP = 13,
296 DW_REG_RA = 14, 298 DW_REG_RA = 14,
299#elif LJ_TARGET_ARM64
300 DW_REG_SP = 31,
301 DW_REG_RA = 30,
297#elif LJ_TARGET_PPC 302#elif LJ_TARGET_PPC
298 DW_REG_SP = 1, 303 DW_REG_SP = 1,
299 DW_REG_RA = 65, 304 DW_REG_RA = 65,
@@ -358,7 +363,7 @@ static const ELFheader elfhdr_template = {
358 .eosabi = 12, 363 .eosabi = 12,
359#elif defined(__DragonFly__) 364#elif defined(__DragonFly__)
360 .eosabi = 0, 365 .eosabi = 0,
361#elif (defined(__sun__) && defined(__svr4__)) 366#elif LJ_TARGET_SOLARIS
362 .eosabi = 6, 367 .eosabi = 6,
363#else 368#else
364 .eosabi = 0, 369 .eosabi = 0,
@@ -372,6 +377,8 @@ static const ELFheader elfhdr_template = {
372 .machine = 62, 377 .machine = 62,
373#elif LJ_TARGET_ARM 378#elif LJ_TARGET_ARM
374 .machine = 40, 379 .machine = 40,
380#elif LJ_TARGET_ARM64
381 .machine = 183,
375#elif LJ_TARGET_PPC 382#elif LJ_TARGET_PPC
376 .machine = 20, 383 .machine = 20,
377#elif LJ_TARGET_MIPS 384#elif LJ_TARGET_MIPS
@@ -428,16 +435,6 @@ static void gdbjit_catnum(GDBJITctx *ctx, uint32_t n)
428 *ctx->p++ = '0' + n; 435 *ctx->p++ = '0' + n;
429} 436}
430 437
431/* Add a ULEB128 value. */
432static void gdbjit_uleb128(GDBJITctx *ctx, uint32_t v)
433{
434 uint8_t *p = ctx->p;
435 for (; v >= 0x80; v >>= 7)
436 *p++ = (uint8_t)((v & 0x7f) | 0x80);
437 *p++ = (uint8_t)v;
438 ctx->p = p;
439}
440
441/* Add a SLEB128 value. */ 438/* Add a SLEB128 value. */
442static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v) 439static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v)
443{ 440{
@@ -454,7 +451,7 @@ static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v)
454#define DU16(x) (*(uint16_t *)p = (x), p += 2) 451#define DU16(x) (*(uint16_t *)p = (x), p += 2)
455#define DU32(x) (*(uint32_t *)p = (x), p += 4) 452#define DU32(x) (*(uint32_t *)p = (x), p += 4)
456#define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t)) 453#define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t))
457#define DUV(x) (ctx->p = p, gdbjit_uleb128(ctx, (x)), p = ctx->p) 454#define DUV(x) (p = (uint8_t *)lj_strfmt_wuleb128((char *)p, (x)))
458#define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p) 455#define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p)
459#define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p) 456#define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p)
460#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop 457#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop
@@ -564,13 +561,20 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx)
564 DB(DW_CFA_offset|DW_REG_15); DUV(4); 561 DB(DW_CFA_offset|DW_REG_15); DUV(4);
565 DB(DW_CFA_offset|DW_REG_14); DUV(5); 562 DB(DW_CFA_offset|DW_REG_14); DUV(5);
566 /* Extra registers saved for JIT-compiled code. */ 563 /* Extra registers saved for JIT-compiled code. */
567 DB(DW_CFA_offset|DW_REG_13); DUV(9); 564 DB(DW_CFA_offset|DW_REG_13); DUV(LJ_GC64 ? 10 : 9);
568 DB(DW_CFA_offset|DW_REG_12); DUV(10); 565 DB(DW_CFA_offset|DW_REG_12); DUV(LJ_GC64 ? 11 : 10);
569#elif LJ_TARGET_ARM 566#elif LJ_TARGET_ARM
570 { 567 {
571 int i; 568 int i;
572 for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); } 569 for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); }
573 } 570 }
571#elif LJ_TARGET_ARM64
572 {
573 int i;
574 DB(DW_CFA_offset|31); DUV(2);
575 for (i = 28; i >= 19; i--) { DB(DW_CFA_offset|i); DUV(3+(28-i)); }
576 for (i = 15; i >= 8; i--) { DB(DW_CFA_offset|32|i); DUV(28-i); }
577 }
574#elif LJ_TARGET_PPC 578#elif LJ_TARGET_PPC
575 { 579 {
576 int i; 580 int i;
@@ -720,13 +724,27 @@ static void gdbjit_buildobj(GDBJITctx *ctx)
720 SECTALIGN(ctx->p, sizeof(uintptr_t)); 724 SECTALIGN(ctx->p, sizeof(uintptr_t));
721 gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame, gdbjit_ehframe); 725 gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame, gdbjit_ehframe);
722 ctx->objsize = (size_t)((char *)ctx->p - (char *)obj); 726 ctx->objsize = (size_t)((char *)ctx->p - (char *)obj);
723 lua_assert(ctx->objsize < sizeof(GDBJITobj)); 727 lj_assertX(ctx->objsize < sizeof(GDBJITobj), "GDBJITobj overflow");
724} 728}
725 729
726#undef SECTALIGN 730#undef SECTALIGN
727 731
728/* -- Interface to GDB JIT API -------------------------------------------- */ 732/* -- Interface to GDB JIT API -------------------------------------------- */
729 733
734static int gdbjit_lock;
735
736static void gdbjit_lock_acquire()
737{
738 while (__sync_lock_test_and_set(&gdbjit_lock, 1)) {
739 /* Just spin; futexes or pthreads aren't worth the portability cost. */
740 }
741}
742
743static void gdbjit_lock_release()
744{
745 __sync_lock_release(&gdbjit_lock);
746}
747
730/* Add new entry to GDB JIT symbol chain. */ 748/* Add new entry to GDB JIT symbol chain. */
731static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) 749static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
732{ 750{
@@ -738,6 +756,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
738 ctx->T->gdbjit_entry = (void *)eo; 756 ctx->T->gdbjit_entry = (void *)eo;
739 /* Link new entry to chain and register it. */ 757 /* Link new entry to chain and register it. */
740 eo->entry.prev_entry = NULL; 758 eo->entry.prev_entry = NULL;
759 gdbjit_lock_acquire();
741 eo->entry.next_entry = __jit_debug_descriptor.first_entry; 760 eo->entry.next_entry = __jit_debug_descriptor.first_entry;
742 if (eo->entry.next_entry) 761 if (eo->entry.next_entry)
743 eo->entry.next_entry->prev_entry = &eo->entry; 762 eo->entry.next_entry->prev_entry = &eo->entry;
@@ -747,6 +766,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
747 __jit_debug_descriptor.relevant_entry = &eo->entry; 766 __jit_debug_descriptor.relevant_entry = &eo->entry;
748 __jit_debug_descriptor.action_flag = GDBJIT_REGISTER; 767 __jit_debug_descriptor.action_flag = GDBJIT_REGISTER;
749 __jit_debug_register_code(); 768 __jit_debug_register_code();
769 gdbjit_lock_release();
750} 770}
751 771
752/* Add debug info for newly compiled trace and notify GDB. */ 772/* Add debug info for newly compiled trace and notify GDB. */
@@ -762,7 +782,8 @@ void lj_gdbjit_addtrace(jit_State *J, GCtrace *T)
762 ctx.spadjp = CFRAME_SIZE_JIT + 782 ctx.spadjp = CFRAME_SIZE_JIT +
763 (MSize)(parent ? traceref(J, parent)->spadjust : 0); 783 (MSize)(parent ? traceref(J, parent)->spadjust : 0);
764 ctx.spadj = CFRAME_SIZE_JIT + T->spadjust; 784 ctx.spadj = CFRAME_SIZE_JIT + T->spadjust;
765 lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc); 785 lj_assertJ(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc,
786 "start PC out of range");
766 ctx.lineno = lj_debug_line(pt, proto_bcpos(pt, startpc)); 787 ctx.lineno = lj_debug_line(pt, proto_bcpos(pt, startpc));
767 ctx.filename = proto_chunknamestr(pt); 788 ctx.filename = proto_chunknamestr(pt);
768 if (*ctx.filename == '@' || *ctx.filename == '=') 789 if (*ctx.filename == '@' || *ctx.filename == '=')
@@ -778,6 +799,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T)
778{ 799{
779 GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry; 800 GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry;
780 if (eo) { 801 if (eo) {
802 gdbjit_lock_acquire();
781 if (eo->entry.prev_entry) 803 if (eo->entry.prev_entry)
782 eo->entry.prev_entry->next_entry = eo->entry.next_entry; 804 eo->entry.prev_entry->next_entry = eo->entry.next_entry;
783 else 805 else
@@ -787,6 +809,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T)
787 __jit_debug_descriptor.relevant_entry = &eo->entry; 809 __jit_debug_descriptor.relevant_entry = &eo->entry;
788 __jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER; 810 __jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER;
789 __jit_debug_register_code(); 811 __jit_debug_register_code();
812 gdbjit_lock_release();
790 lj_mem_free(J2G(J), eo, eo->sz); 813 lj_mem_free(J2G(J), eo, eo->sz);
791 } 814 }
792} 815}
diff --git a/src/lj_ir.c b/src/lj_ir.c
index 62c2cedd..71bf8855 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -15,6 +15,7 @@
15#if LJ_HASJIT 15#if LJ_HASJIT
16 16
17#include "lj_gc.h" 17#include "lj_gc.h"
18#include "lj_buf.h"
18#include "lj_str.h" 19#include "lj_str.h"
19#include "lj_tab.h" 20#include "lj_tab.h"
20#include "lj_ir.h" 21#include "lj_ir.h"
@@ -29,14 +30,16 @@
29#endif 30#endif
30#include "lj_vm.h" 31#include "lj_vm.h"
31#include "lj_strscan.h" 32#include "lj_strscan.h"
32#include "lj_lib.h" 33#include "lj_serialize.h"
34#include "lj_strfmt.h"
35#include "lj_prng.h"
33 36
34/* Some local macros to save typing. Undef'd at the end. */ 37/* Some local macros to save typing. Undef'd at the end. */
35#define IR(ref) (&J->cur.ir[(ref)]) 38#define IR(ref) (&J->cur.ir[(ref)])
36#define fins (&J->fold.ins) 39#define fins (&J->fold.ins)
37 40
38/* Pass IR on to next optimization in chain (FOLD). */ 41/* Pass IR on to next optimization in chain (FOLD). */
39#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) 42#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
40 43
41/* -- IR tables ----------------------------------------------------------- */ 44/* -- IR tables ----------------------------------------------------------- */
42 45
@@ -88,8 +91,9 @@ static void lj_ir_growbot(jit_State *J)
88{ 91{
89 IRIns *baseir = J->irbuf + J->irbotlim; 92 IRIns *baseir = J->irbuf + J->irbotlim;
90 MSize szins = J->irtoplim - J->irbotlim; 93 MSize szins = J->irtoplim - J->irbotlim;
91 lua_assert(szins != 0); 94 lj_assertJ(szins != 0, "zero IR size");
92 lua_assert(J->cur.nk == J->irbotlim); 95 lj_assertJ(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim,
96 "unexpected IR growth");
93 if (J->cur.nins + (szins >> 1) < J->irtoplim) { 97 if (J->cur.nins + (szins >> 1) < J->irtoplim) {
94 /* More than half of the buffer is free on top: shift up by a quarter. */ 98 /* More than half of the buffer is free on top: shift up by a quarter. */
95 MSize ofs = szins >> 2; 99 MSize ofs = szins >> 2;
@@ -143,6 +147,17 @@ TRef lj_ir_call(jit_State *J, IRCallID id, ...)
143 return emitir(CCI_OPTYPE(ci), tr, id); 147 return emitir(CCI_OPTYPE(ci), tr, id);
144} 148}
145 149
150/* Load field of type t from GG_State + offset. Must be 32 bit aligned. */
151TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs)
152{
153 lj_assertJ((ofs & 3) == 0, "unaligned GG_State field offset");
154 ofs >>= 2;
155 lj_assertJ(ofs >= IRFL__MAX && ofs <= 0x3ff,
156 "GG_State field offset breaks 10 bit FOLD key limit");
157 lj_ir_set(J, IRT(IR_FLOAD, t), REF_NIL, ofs);
158 return lj_opt_fold(J);
159}
160
146/* -- Interning of constants ---------------------------------------------- */ 161/* -- Interning of constants ---------------------------------------------- */
147 162
148/* 163/*
@@ -163,6 +178,24 @@ static LJ_AINLINE IRRef ir_nextk(jit_State *J)
163 return ref; 178 return ref;
164} 179}
165 180
181/* Get ref of next 64 bit IR constant and optionally grow IR.
182** Note: this may invalidate all IRIns *!
183*/
184static LJ_AINLINE IRRef ir_nextk64(jit_State *J)
185{
186 IRRef ref = J->cur.nk - 2;
187 lj_assertJ(J->state != LJ_TRACE_ASM, "bad JIT state");
188 if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J);
189 J->cur.nk = ref;
190 return ref;
191}
192
193#if LJ_GC64
194#define ir_nextkgc ir_nextk64
195#else
196#define ir_nextkgc ir_nextk
197#endif
198
166/* Intern int32_t constant. */ 199/* Intern int32_t constant. */
167TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k) 200TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k)
168{ 201{
@@ -182,79 +215,21 @@ found:
182 return TREF(ref, IRT_INT); 215 return TREF(ref, IRT_INT);
183} 216}
184 217
185/* The MRef inside the KNUM/KINT64 IR instructions holds the address of the 218/* Intern 64 bit constant, given by its 64 bit pattern. */
186** 64 bit constant. The constants themselves are stored in a chained array 219TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64)
187** and shared across traces.
188**
189** Rationale for choosing this data structure:
190** - The address of the constants is embedded in the generated machine code
191** and must never move. A resizable array or hash table wouldn't work.
192** - Most apps need very few non-32 bit integer constants (less than a dozen).
193** - Linear search is hard to beat in terms of speed and low complexity.
194*/
195typedef struct K64Array {
196 MRef next; /* Pointer to next list. */
197 MSize numk; /* Number of used elements in this array. */
198 TValue k[LJ_MIN_K64SZ]; /* Array of constants. */
199} K64Array;
200
201/* Free all chained arrays. */
202void lj_ir_k64_freeall(jit_State *J)
203{
204 K64Array *k;
205 for (k = mref(J->k64, K64Array); k; ) {
206 K64Array *next = mref(k->next, K64Array);
207 lj_mem_free(J2G(J), k, sizeof(K64Array));
208 k = next;
209 }
210}
211
212/* Find 64 bit constant in chained array or add it. */
213cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64)
214{
215 K64Array *k, *kp = NULL;
216 TValue *ntv;
217 MSize idx;
218 /* Search for the constant in the whole chain of arrays. */
219 for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) {
220 kp = k; /* Remember previous element in list. */
221 for (idx = 0; idx < k->numk; idx++) { /* Search one array. */
222 TValue *tv = &k->k[idx];
223 if (tv->u64 == u64) /* Needed for +-0/NaN/absmask. */
224 return tv;
225 }
226 }
227 /* Constant was not found, need to add it. */
228 if (!(kp && kp->numk < LJ_MIN_K64SZ)) { /* Allocate a new array. */
229 K64Array *kn = lj_mem_newt(J->L, sizeof(K64Array), K64Array);
230 setmref(kn->next, NULL);
231 kn->numk = 0;
232 if (kp)
233 setmref(kp->next, kn); /* Chain to the end of the list. */
234 else
235 setmref(J->k64, kn); /* Link first array. */
236 kp = kn;
237 }
238 ntv = &kp->k[kp->numk++]; /* Add to current array. */
239 ntv->u64 = u64;
240 return ntv;
241}
242
243/* Intern 64 bit constant, given by its address. */
244TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv)
245{ 220{
246 IRIns *ir, *cir = J->cur.ir; 221 IRIns *ir, *cir = J->cur.ir;
247 IRRef ref; 222 IRRef ref;
248 IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64; 223 IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64;
249 for (ref = J->chain[op]; ref; ref = cir[ref].prev) 224 for (ref = J->chain[op]; ref; ref = cir[ref].prev)
250 if (ir_k64(&cir[ref]) == tv) 225 if (ir_k64(&cir[ref])->u64 == u64)
251 goto found; 226 goto found;
252 ref = ir_nextk(J); 227 ref = ir_nextk64(J);
253 ir = IR(ref); 228 ir = IR(ref);
254 lua_assert(checkptr32(tv)); 229 ir[1].tv.u64 = u64;
255 setmref(ir->ptr, tv);
256 ir->t.irt = t; 230 ir->t.irt = t;
257 ir->o = op; 231 ir->o = op;
232 ir->op12 = 0;
258 ir->prev = J->chain[op]; 233 ir->prev = J->chain[op];
259 J->chain[op] = (IRRef1)ref; 234 J->chain[op] = (IRRef1)ref;
260found: 235found:
@@ -264,13 +239,13 @@ found:
264/* Intern FP constant, given by its 64 bit pattern. */ 239/* Intern FP constant, given by its 64 bit pattern. */
265TRef lj_ir_knum_u64(jit_State *J, uint64_t u64) 240TRef lj_ir_knum_u64(jit_State *J, uint64_t u64)
266{ 241{
267 return lj_ir_k64(J, IR_KNUM, lj_ir_k64_find(J, u64)); 242 return lj_ir_k64(J, IR_KNUM, u64);
268} 243}
269 244
270/* Intern 64 bit integer constant. */ 245/* Intern 64 bit integer constant. */
271TRef lj_ir_kint64(jit_State *J, uint64_t u64) 246TRef lj_ir_kint64(jit_State *J, uint64_t u64)
272{ 247{
273 return lj_ir_k64(J, IR_KINT64, lj_ir_k64_find(J, u64)); 248 return lj_ir_k64(J, IR_KINT64, u64);
274} 249}
275 250
276/* Check whether a number is int and return it. -0 is NOT considered an int. */ 251/* Check whether a number is int and return it. -0 is NOT considered an int. */
@@ -305,14 +280,15 @@ TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t)
305{ 280{
306 IRIns *ir, *cir = J->cur.ir; 281 IRIns *ir, *cir = J->cur.ir;
307 IRRef ref; 282 IRRef ref;
308 lua_assert(!isdead(J2G(J), o)); 283 lj_assertJ(!isdead(J2G(J), o), "interning of dead GC object");
309 for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev) 284 for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev)
310 if (ir_kgc(&cir[ref]) == o) 285 if (ir_kgc(&cir[ref]) == o)
311 goto found; 286 goto found;
312 ref = ir_nextk(J); 287 ref = ir_nextkgc(J);
313 ir = IR(ref); 288 ir = IR(ref);
314 /* NOBARRIER: Current trace is a GC root. */ 289 /* NOBARRIER: Current trace is a GC root. */
315 setgcref(ir->gcr, o); 290 ir->op12 = 0;
291 setgcref(ir[LJ_GC64].gcr, o);
316 ir->t.irt = (uint8_t)t; 292 ir->t.irt = (uint8_t)t;
317 ir->o = IR_KGC; 293 ir->o = IR_KGC;
318 ir->prev = J->chain[IR_KGC]; 294 ir->prev = J->chain[IR_KGC];
@@ -321,24 +297,44 @@ found:
321 return TREF(ref, t); 297 return TREF(ref, t);
322} 298}
323 299
324/* Intern 32 bit pointer constant. */ 300/* Allocate GCtrace constant placeholder (no interning). */
301TRef lj_ir_ktrace(jit_State *J)
302{
303 IRRef ref = ir_nextkgc(J);
304 IRIns *ir = IR(ref);
305 lj_assertJ(irt_toitype_(IRT_P64) == LJ_TTRACE, "mismatched type mapping");
306 ir->t.irt = IRT_P64;
307 ir->o = LJ_GC64 ? IR_KNUM : IR_KNULL; /* Not IR_KGC yet, but same size. */
308 ir->op12 = 0;
309 ir->prev = 0;
310 return TREF(ref, IRT_P64);
311}
312
313/* Intern pointer constant. */
325TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr) 314TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr)
326{ 315{
327 IRIns *ir, *cir = J->cur.ir; 316 IRIns *ir, *cir = J->cur.ir;
328 IRRef ref; 317 IRRef ref;
329 lua_assert((void *)(intptr_t)i32ptr(ptr) == ptr); 318#if LJ_64 && !LJ_GC64
319 lj_assertJ((void *)(uintptr_t)u32ptr(ptr) == ptr, "out-of-range GC pointer");
320#endif
330 for (ref = J->chain[op]; ref; ref = cir[ref].prev) 321 for (ref = J->chain[op]; ref; ref = cir[ref].prev)
331 if (mref(cir[ref].ptr, void) == ptr) 322 if (ir_kptr(&cir[ref]) == ptr)
332 goto found; 323 goto found;
324#if LJ_GC64
325 ref = ir_nextk64(J);
326#else
333 ref = ir_nextk(J); 327 ref = ir_nextk(J);
328#endif
334 ir = IR(ref); 329 ir = IR(ref);
335 setmref(ir->ptr, ptr); 330 ir->op12 = 0;
336 ir->t.irt = IRT_P32; 331 setmref(ir[LJ_GC64].ptr, ptr);
332 ir->t.irt = IRT_PGC;
337 ir->o = op; 333 ir->o = op;
338 ir->prev = J->chain[op]; 334 ir->prev = J->chain[op];
339 J->chain[op] = (IRRef1)ref; 335 J->chain[op] = (IRRef1)ref;
340found: 336found:
341 return TREF(ref, IRT_P32); 337 return TREF(ref, IRT_PGC);
342} 338}
343 339
344/* Intern typed NULL constant. */ 340/* Intern typed NULL constant. */
@@ -367,7 +363,8 @@ TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot)
367 IRRef2 op12 = IRREF2((IRRef1)key, (IRRef1)slot); 363 IRRef2 op12 = IRREF2((IRRef1)key, (IRRef1)slot);
368 IRRef ref; 364 IRRef ref;
369 /* Const part is not touched by CSE/DCE, so 0-65535 is ok for IRMlit here. */ 365 /* Const part is not touched by CSE/DCE, so 0-65535 is ok for IRMlit here. */
370 lua_assert(tref_isk(key) && slot == (IRRef)(IRRef1)slot); 366 lj_assertJ(tref_isk(key) && slot == (IRRef)(IRRef1)slot,
367 "out-of-range key/slot");
371 for (ref = J->chain[IR_KSLOT]; ref; ref = cir[ref].prev) 368 for (ref = J->chain[IR_KSLOT]; ref; ref = cir[ref].prev)
372 if (cir[ref].op12 == op12) 369 if (cir[ref].op12 == op12)
373 goto found; 370 goto found;
@@ -388,14 +385,15 @@ found:
388void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) 385void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir)
389{ 386{
390 UNUSED(L); 387 UNUSED(L);
391 lua_assert(ir->o != IR_KSLOT); /* Common mistake. */ 388 lj_assertL(ir->o != IR_KSLOT, "unexpected KSLOT"); /* Common mistake. */
392 switch (ir->o) { 389 switch (ir->o) {
393 case IR_KPRI: setitype(tv, irt_toitype(ir->t)); break; 390 case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break;
394 case IR_KINT: setintV(tv, ir->i); break; 391 case IR_KINT: setintV(tv, ir->i); break;
395 case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break; 392 case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break;
396 case IR_KPTR: case IR_KKPTR: case IR_KNULL: 393 case IR_KPTR: case IR_KKPTR:
397 setlightudV(tv, mref(ir->ptr, void)); 394 setnumV(tv, (lua_Number)(uintptr_t)ir_kptr(ir));
398 break; 395 break;
396 case IR_KNULL: setintV(tv, 0); break;
399 case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break; 397 case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break;
400#if LJ_HASFFI 398#if LJ_HASFFI
401 case IR_KINT64: { 399 case IR_KINT64: {
@@ -405,7 +403,7 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir)
405 break; 403 break;
406 } 404 }
407#endif 405#endif
408 default: lua_assert(0); break; 406 default: lj_assertL(0, "bad IR constant op %d", ir->o); break;
409 } 407 }
410} 408}
411 409
@@ -443,7 +441,8 @@ TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr)
443 if (!tref_isstr(tr)) { 441 if (!tref_isstr(tr)) {
444 if (!tref_isnumber(tr)) 442 if (!tref_isnumber(tr))
445 lj_trace_err(J, LJ_TRERR_BADTYPE); 443 lj_trace_err(J, LJ_TRERR_BADTYPE);
446 tr = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); 444 tr = emitir(IRT(IR_TOSTR, IRT_STR), tr,
445 tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT);
447 } 446 }
448 return tr; 447 return tr;
449} 448}
@@ -464,7 +463,7 @@ int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op)
464 case IR_UGE: return !(a < b); 463 case IR_UGE: return !(a < b);
465 case IR_ULE: return !(a > b); 464 case IR_ULE: return !(a > b);
466 case IR_UGT: return !(a <= b); 465 case IR_UGT: return !(a <= b);
467 default: lua_assert(0); return 0; 466 default: lj_assertX(0, "bad IR op %d", op); return 0;
468 } 467 }
469} 468}
470 469
@@ -477,7 +476,7 @@ int lj_ir_strcmp(GCstr *a, GCstr *b, IROp op)
477 case IR_GE: return (res >= 0); 476 case IR_GE: return (res >= 0);
478 case IR_LE: return (res <= 0); 477 case IR_LE: return (res <= 0);
479 case IR_GT: return (res > 0); 478 case IR_GT: return (res > 0);
480 default: lua_assert(0); return 0; 479 default: lj_assertX(0, "bad IR op %d", op); return 0;
481 } 480 }
482} 481}
483 482
diff --git a/src/lj_ir.h b/src/lj_ir.h
index 35c8e289..b3faaea8 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -40,6 +40,7 @@
40 _(USE, S , ref, ___) \ 40 _(USE, S , ref, ___) \
41 _(PHI, S , ref, ref) \ 41 _(PHI, S , ref, ref) \
42 _(RENAME, S , ref, lit) \ 42 _(RENAME, S , ref, lit) \
43 _(PROF, S , ___, ___) \
43 \ 44 \
44 /* Constants. */ \ 45 /* Constants. */ \
45 _(KPRI, N , ___, ___) \ 46 _(KPRI, N , ___, ___) \
@@ -74,7 +75,6 @@
74 _(NEG, N , ref, ref) \ 75 _(NEG, N , ref, ref) \
75 \ 76 \
76 _(ABS, N , ref, ref) \ 77 _(ABS, N , ref, ref) \
77 _(ATAN2, N , ref, ref) \
78 _(LDEXP, N , ref, ref) \ 78 _(LDEXP, N , ref, ref) \
79 _(MIN, C , ref, ref) \ 79 _(MIN, C , ref, ref) \
80 _(MAX, C , ref, ref) \ 80 _(MAX, C , ref, ref) \
@@ -95,7 +95,9 @@
95 _(UREFO, LW, ref, lit) \ 95 _(UREFO, LW, ref, lit) \
96 _(UREFC, LW, ref, lit) \ 96 _(UREFC, LW, ref, lit) \
97 _(FREF, R , ref, lit) \ 97 _(FREF, R , ref, lit) \
98 _(TMPREF, S , ref, lit) \
98 _(STRREF, N , ref, ref) \ 99 _(STRREF, N , ref, ref) \
100 _(LREF, L , ___, ___) \
99 \ 101 \
100 /* Loads and Stores. These must be in the same order. */ \ 102 /* Loads and Stores. These must be in the same order. */ \
101 _(ALOAD, L , ref, ___) \ 103 _(ALOAD, L , ref, ___) \
@@ -105,6 +107,7 @@
105 _(XLOAD, L , ref, lit) \ 107 _(XLOAD, L , ref, lit) \
106 _(SLOAD, L , lit, lit) \ 108 _(SLOAD, L , lit, lit) \
107 _(VLOAD, L , ref, ___) \ 109 _(VLOAD, L , ref, ___) \
110 _(ALEN, L , ref, ref) \
108 \ 111 \
109 _(ASTORE, S , ref, ref) \ 112 _(ASTORE, S , ref, ref) \
110 _(HSTORE, S , ref, ref) \ 113 _(HSTORE, S , ref, ref) \
@@ -120,6 +123,11 @@
120 _(CNEW, AW, ref, ref) \ 123 _(CNEW, AW, ref, ref) \
121 _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \ 124 _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \
122 \ 125 \
126 /* Buffer operations. */ \
127 _(BUFHDR, L , ref, lit) \
128 _(BUFPUT, LW, ref, ref) \
129 _(BUFSTR, AW, ref, ref) \
130 \
123 /* Barriers. */ \ 131 /* Barriers. */ \
124 _(TBAR, S , ref, ___) \ 132 _(TBAR, S , ref, ___) \
125 _(OBAR, S , ref, ref) \ 133 _(OBAR, S , ref, ref) \
@@ -128,12 +136,13 @@
128 /* Type conversions. */ \ 136 /* Type conversions. */ \
129 _(CONV, N , ref, lit) \ 137 _(CONV, N , ref, lit) \
130 _(TOBIT, N , ref, ref) \ 138 _(TOBIT, N , ref, ref) \
131 _(TOSTR, N , ref, ___) \ 139 _(TOSTR, N , ref, lit) \
132 _(STRTO, N , ref, ___) \ 140 _(STRTO, N , ref, ___) \
133 \ 141 \
134 /* Calls. */ \ 142 /* Calls. */ \
135 _(CALLN, N , ref, lit) \ 143 _(CALLN, NW, ref, lit) \
136 _(CALLL, L , ref, lit) \ 144 _(CALLA, AW, ref, lit) \
145 _(CALLL, LW, ref, lit) \
137 _(CALLS, S , ref, lit) \ 146 _(CALLS, S , ref, lit) \
138 _(CALLXS, S , ref, ref) \ 147 _(CALLXS, S , ref, ref) \
139 _(CARG, N , ref, ref) \ 148 _(CARG, N , ref, ref) \
@@ -170,8 +179,7 @@ LJ_STATIC_ASSERT((int)IR_XLOAD + IRDELTA_L2S == (int)IR_XSTORE);
170/* FPMATH sub-functions. ORDER FPM. */ 179/* FPMATH sub-functions. ORDER FPM. */
171#define IRFPMDEF(_) \ 180#define IRFPMDEF(_) \
172 _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \ 181 _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \
173 _(SQRT) _(EXP) _(EXP2) _(LOG) _(LOG2) _(LOG10) \ 182 _(SQRT) _(LOG) _(LOG2) \
174 _(SIN) _(COS) _(TAN) \
175 _(OTHER) 183 _(OTHER)
176 184
177typedef enum { 185typedef enum {
@@ -186,6 +194,8 @@ IRFPMDEF(FPMENUM)
186 _(STR_LEN, offsetof(GCstr, len)) \ 194 _(STR_LEN, offsetof(GCstr, len)) \
187 _(FUNC_ENV, offsetof(GCfunc, l.env)) \ 195 _(FUNC_ENV, offsetof(GCfunc, l.env)) \
188 _(FUNC_PC, offsetof(GCfunc, l.pc)) \ 196 _(FUNC_PC, offsetof(GCfunc, l.pc)) \
197 _(FUNC_FFID, offsetof(GCfunc, l.ffid)) \
198 _(THREAD_ENV, offsetof(lua_State, env)) \
189 _(TAB_META, offsetof(GCtab, metatable)) \ 199 _(TAB_META, offsetof(GCtab, metatable)) \
190 _(TAB_ARRAY, offsetof(GCtab, array)) \ 200 _(TAB_ARRAY, offsetof(GCtab, array)) \
191 _(TAB_NODE, offsetof(GCtab, node)) \ 201 _(TAB_NODE, offsetof(GCtab, node)) \
@@ -195,9 +205,15 @@ IRFPMDEF(FPMENUM)
195 _(UDATA_META, offsetof(GCudata, metatable)) \ 205 _(UDATA_META, offsetof(GCudata, metatable)) \
196 _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \ 206 _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \
197 _(UDATA_FILE, sizeof(GCudata)) \ 207 _(UDATA_FILE, sizeof(GCudata)) \
208 _(SBUF_W, sizeof(GCudata) + offsetof(SBufExt, w)) \
209 _(SBUF_E, sizeof(GCudata) + offsetof(SBufExt, e)) \
210 _(SBUF_B, sizeof(GCudata) + offsetof(SBufExt, b)) \
211 _(SBUF_L, sizeof(GCudata) + offsetof(SBufExt, L)) \
212 _(SBUF_REF, sizeof(GCudata) + offsetof(SBufExt, cowref)) \
213 _(SBUF_R, sizeof(GCudata) + offsetof(SBufExt, r)) \
198 _(CDATA_CTYPEID, offsetof(GCcdata, ctypeid)) \ 214 _(CDATA_CTYPEID, offsetof(GCcdata, ctypeid)) \
199 _(CDATA_PTR, sizeof(GCcdata)) \ 215 _(CDATA_PTR, sizeof(GCcdata)) \
200 _(CDATA_INT, sizeof(GCcdata)) \ 216 _(CDATA_INT, sizeof(GCcdata)) \
201 _(CDATA_INT64, sizeof(GCcdata)) \ 217 _(CDATA_INT64, sizeof(GCcdata)) \
202 _(CDATA_INT64_4, sizeof(GCcdata) + 4) 218 _(CDATA_INT64_4, sizeof(GCcdata) + 4)
203 219
@@ -208,18 +224,28 @@ IRFLDEF(FLENUM)
208 IRFL__MAX 224 IRFL__MAX
209} IRFieldID; 225} IRFieldID;
210 226
227/* TMPREF mode bits, stored in op2. */
228#define IRTMPREF_IN1 0x01 /* First input value. */
229#define IRTMPREF_OUT1 0x02 /* First output value. */
230#define IRTMPREF_OUT2 0x04 /* Second output value. */
231
211/* SLOAD mode bits, stored in op2. */ 232/* SLOAD mode bits, stored in op2. */
212#define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */ 233#define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */
213#define IRSLOAD_FRAME 0x02 /* Load hiword of frame. */ 234#define IRSLOAD_FRAME 0x02 /* Load 32 bits of ftsz. */
214#define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */ 235#define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */
215#define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ 236#define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */
216#define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ 237#define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */
217#define IRSLOAD_INHERIT 0x20 /* Inherited by exits/side traces. */ 238#define IRSLOAD_INHERIT 0x20 /* Inherited by exits/side traces. */
218 239
219/* XLOAD mode, stored in op2. */ 240/* XLOAD mode bits, stored in op2. */
220#define IRXLOAD_READONLY 1 /* Load from read-only data. */ 241#define IRXLOAD_READONLY 0x01 /* Load from read-only data. */
221#define IRXLOAD_VOLATILE 2 /* Load from volatile data. */ 242#define IRXLOAD_VOLATILE 0x02 /* Load from volatile data. */
222#define IRXLOAD_UNALIGNED 4 /* Unaligned load. */ 243#define IRXLOAD_UNALIGNED 0x04 /* Unaligned load. */
244
245/* BUFHDR mode, stored in op2. */
246#define IRBUFHDR_RESET 0 /* Reset buffer. */
247#define IRBUFHDR_APPEND 1 /* Append to buffer. */
248#define IRBUFHDR_WRITE 2 /* Write to string buffer. */
223 249
224/* CONV mode, stored in op2. */ 250/* CONV mode, stored in op2. */
225#define IRCONV_SRCMASK 0x001f /* Source IRType. */ 251#define IRCONV_SRCMASK 0x001f /* Source IRType. */
@@ -227,7 +253,6 @@ IRFLDEF(FLENUM)
227#define IRCONV_DSH 5 253#define IRCONV_DSH 5
228#define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT) 254#define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT)
229#define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM) 255#define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM)
230#define IRCONV_TRUNC 0x0400 /* Truncate number to integer. */
231#define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */ 256#define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */
232#define IRCONV_MODEMASK 0x0fff 257#define IRCONV_MODEMASK 0x0fff
233#define IRCONV_CONVMASK 0xf000 258#define IRCONV_CONVMASK 0xf000
@@ -237,6 +262,12 @@ IRFLDEF(FLENUM)
237#define IRCONV_ANY (1<<IRCONV_CSH) /* Any FP number is ok. */ 262#define IRCONV_ANY (1<<IRCONV_CSH) /* Any FP number is ok. */
238#define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */ 263#define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */
239#define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */ 264#define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */
265#define IRCONV_NONE IRCONV_ANY /* INT|*64 no conv, but change type. */
266
267/* TOSTR mode, stored in op2. */
268#define IRTOSTR_INT 0 /* Convert integer to string. */
269#define IRTOSTR_NUM 1 /* Convert number to string. */
270#define IRTOSTR_CHAR 2 /* Convert char value to string. */
240 271
241/* -- IR operands --------------------------------------------------------- */ 272/* -- IR operands --------------------------------------------------------- */
242 273
@@ -276,7 +307,9 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
276 307
277/* -- IR instruction types ------------------------------------------------ */ 308/* -- IR instruction types ------------------------------------------------ */
278 309
279/* Map of itypes to non-negative numbers. ORDER LJ_T. 310#define IRTSIZE_PGC (LJ_GC64 ? 8 : 4)
311
312/* Map of itypes to non-negative numbers and their sizes. ORDER LJ_T.
280** LJ_TUPVAL/LJ_TTRACE never appear in a TValue. Use these itypes for 313** LJ_TUPVAL/LJ_TTRACE never appear in a TValue. Use these itypes for
281** IRT_P32 and IRT_P64, which never escape the IR. 314** IRT_P32 and IRT_P64, which never escape the IR.
282** The various integers are only used in the IR and can only escape to 315** The various integers are only used in the IR and can only escape to
@@ -284,12 +317,13 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
284** contiguous and next to IRT_NUM (see the typerange macros below). 317** contiguous and next to IRT_NUM (see the typerange macros below).
285*/ 318*/
286#define IRTDEF(_) \ 319#define IRTDEF(_) \
287 _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) _(STR, 4) \ 320 _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) \
288 _(P32, 4) _(THREAD, 4) _(PROTO, 4) _(FUNC, 4) _(P64, 8) _(CDATA, 4) \ 321 _(STR, IRTSIZE_PGC) _(P32, 4) _(THREAD, IRTSIZE_PGC) _(PROTO, IRTSIZE_PGC) \
289 _(TAB, 4) _(UDATA, 4) \ 322 _(FUNC, IRTSIZE_PGC) _(P64, 8) _(CDATA, IRTSIZE_PGC) _(TAB, IRTSIZE_PGC) \
323 _(UDATA, IRTSIZE_PGC) \
290 _(FLOAT, 4) _(NUM, 8) _(I8, 1) _(U8, 1) _(I16, 2) _(U16, 2) \ 324 _(FLOAT, 4) _(NUM, 8) _(I8, 1) _(U8, 1) _(I16, 2) _(U16, 2) \
291 _(INT, 4) _(U32, 4) _(I64, 8) _(U64, 8) \ 325 _(INT, 4) _(U32, 4) _(I64, 8) _(U64, 8) \
292 _(SOFTFP, 4) /* There is room for 9 more types. */ 326 _(SOFTFP, 4) /* There is room for 8 more types. */
293 327
294/* IR result type and flags (8 bit). */ 328/* IR result type and flags (8 bit). */
295typedef enum { 329typedef enum {
@@ -300,6 +334,8 @@ IRTDEF(IRTENUM)
300 334
301 /* Native pointer type and the corresponding integer type. */ 335 /* Native pointer type and the corresponding integer type. */
302 IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32, 336 IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32,
337 IRT_PGC = LJ_GC64 ? IRT_P64 : IRT_P32,
338 IRT_IGC = LJ_GC64 ? IRT_I64 : IRT_INT,
303 IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT, 339 IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT,
304 IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32, 340 IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32,
305 341
@@ -353,7 +389,14 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
353#define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) 389#define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA))
354#define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64)) 390#define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64))
355 391
356#if LJ_64 392#if LJ_GC64
393/* Include IRT_NIL, so IR(ASMREF_L) (aka REF_NIL) is considered 64 bit. */
394#define IRT_IS64 \
395 ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|\
396 (1u<<IRT_LIGHTUD)|(1u<<IRT_STR)|(1u<<IRT_THREAD)|(1u<<IRT_PROTO)|\
397 (1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA)|\
398 (1u<<IRT_NIL))
399#elif LJ_64
357#define IRT_IS64 \ 400#define IRT_IS64 \
358 ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD)) 401 ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD))
359#else 402#else
@@ -374,7 +417,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv)
374 return IRT_INT; 417 return IRT_INT;
375 else if (tvisnum(tv)) 418 else if (tvisnum(tv))
376 return IRT_NUM; 419 return IRT_NUM;
377#if LJ_64 420#if LJ_64 && !LJ_GC64
378 else if (tvislightud(tv)) 421 else if (tvislightud(tv))
379 return IRT_LIGHTUD; 422 return IRT_LIGHTUD;
380#endif 423#endif
@@ -384,11 +427,12 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv)
384 427
385static LJ_AINLINE uint32_t irt_toitype_(IRType t) 428static LJ_AINLINE uint32_t irt_toitype_(IRType t)
386{ 429{
387 lua_assert(!LJ_64 || t != IRT_LIGHTUD); 430 lj_assertX(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD,
431 "no plain type tag for lightuserdata");
388 if (LJ_DUALNUM && t > IRT_NUM) { 432 if (LJ_DUALNUM && t > IRT_NUM) {
389 return LJ_TISNUM; 433 return LJ_TISNUM;
390 } else { 434 } else {
391 lua_assert(t <= IRT_NUM); 435 lj_assertX(t <= IRT_NUM, "no plain type tag for IR type %d", t);
392 return ~(uint32_t)t; 436 return ~(uint32_t)t;
393 } 437 }
394} 438}
@@ -464,6 +508,7 @@ typedef uint32_t TRef;
464#define tref_isnil(tr) (tref_istype((tr), IRT_NIL)) 508#define tref_isnil(tr) (tref_istype((tr), IRT_NIL))
465#define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE)) 509#define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE))
466#define tref_istrue(tr) (tref_istype((tr), IRT_TRUE)) 510#define tref_istrue(tr) (tref_istype((tr), IRT_TRUE))
511#define tref_islightud(tr) (tref_istype((tr), IRT_LIGHTUD))
467#define tref_isstr(tr) (tref_istype((tr), IRT_STR)) 512#define tref_isstr(tr) (tref_istype((tr), IRT_STR))
468#define tref_isfunc(tr) (tref_istype((tr), IRT_FUNC)) 513#define tref_isfunc(tr) (tref_istype((tr), IRT_FUNC))
469#define tref_iscdata(tr) (tref_istype((tr), IRT_CDATA)) 514#define tref_iscdata(tr) (tref_istype((tr), IRT_CDATA))
@@ -496,7 +541,9 @@ typedef uint32_t TRef;
496** +-------+-------+---+---+---+---+ 541** +-------+-------+---+---+---+---+
497** | op1 | op2 | t | o | r | s | 542** | op1 | op2 | t | o | r | s |
498** +-------+-------+---+---+---+---+ 543** +-------+-------+---+---+---+---+
499** | op12/i/gco | ot | prev | (alternative fields in union) 544** | op12/i/gco32 | ot | prev | (alternative fields in union)
545** +-------+-------+---+---+---+---+
546** | TValue/gco64 | (2nd IR slot for 64 bit constants)
500** +---------------+-------+-------+ 547** +---------------+-------+-------+
501** 32 16 16 548** 32 16 16
502** 549**
@@ -524,21 +571,27 @@ typedef union IRIns {
524 ) 571 )
525 }; 572 };
526 int32_t i; /* 32 bit signed integer literal (overlaps op12). */ 573 int32_t i; /* 32 bit signed integer literal (overlaps op12). */
527 GCRef gcr; /* GCobj constant (overlaps op12). */ 574 GCRef gcr; /* GCobj constant (overlaps op12 or entire slot). */
528 MRef ptr; /* Pointer constant (overlaps op12). */ 575 MRef ptr; /* Pointer constant (overlaps op12 or entire slot). */
576 TValue tv; /* TValue constant (overlaps entire slot). */
529} IRIns; 577} IRIns;
530 578
531#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)->gcr)) 579#define ir_isk64(ir) \
580 ((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \
581 (LJ_GC64 && \
582 ((ir)->o == IR_KGC || (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)))
583
584#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)[LJ_GC64].gcr))
532#define ir_kstr(ir) (gco2str(ir_kgc((ir)))) 585#define ir_kstr(ir) (gco2str(ir_kgc((ir))))
533#define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) 586#define ir_ktab(ir) (gco2tab(ir_kgc((ir))))
534#define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) 587#define ir_kfunc(ir) (gco2func(ir_kgc((ir))))
535#define ir_kcdata(ir) (gco2cd(ir_kgc((ir)))) 588#define ir_kcdata(ir) (gco2cd(ir_kgc((ir))))
536#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, mref((ir)->ptr, cTValue)) 589#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv)
537#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, mref((ir)->ptr,cTValue)) 590#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv)
538#define ir_k64(ir) \ 591#define ir_k64(ir) check_exp(ir_isk64(ir), &(ir)[1].tv)
539 check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, mref((ir)->ptr,cTValue))
540#define ir_kptr(ir) \ 592#define ir_kptr(ir) \
541 check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void)) 593 check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \
594 mref((ir)[LJ_GC64].ptr, void))
542 595
543/* A store or any other op with a non-weak guard has a side-effect. */ 596/* A store or any other op with a non-weak guard has a side-effect. */
544static LJ_AINLINE int ir_sideeff(IRIns *ir) 597static LJ_AINLINE int ir_sideeff(IRIns *ir)
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 13bc40dd..c837b18d 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -16,22 +16,26 @@ typedef struct CCallInfo {
16 uint32_t flags; /* Number of arguments and flags. */ 16 uint32_t flags; /* Number of arguments and flags. */
17} CCallInfo; 17} CCallInfo;
18 18
19#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* Extract # of args. */ 19#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* # of args. */
20#define CCI_NARGS_MAX 32 /* Max. # of args. */ 20#define CCI_NARGS_MAX 32 /* Max. # of args. */
21 21
22#define CCI_OTSHIFT 16 22#define CCI_OTSHIFT 16
23#define CCI_OPTYPE(ci) ((ci)->flags >> CCI_OTSHIFT) /* Get op/type. */ 23#define CCI_OPTYPE(ci) ((ci)->flags >> CCI_OTSHIFT) /* Get op/type. */
24#define CCI_TYPE(ci) (((ci)->flags>>CCI_OTSHIFT) & IRT_TYPE)
24#define CCI_OPSHIFT 24 25#define CCI_OPSHIFT 24
25#define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */ 26#define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */
26 27
27#define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT) 28#define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT)
29#define CCI_CALL_A (IR_CALLA << CCI_OPSHIFT)
28#define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT) 30#define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT)
29#define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT) 31#define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT)
30#define CCI_CALL_FN (CCI_CALL_N|CCI_CC_FASTCALL) 32#define CCI_CALL_FN (CCI_CALL_N|CCI_CC_FASTCALL)
33#define CCI_CALL_FA (CCI_CALL_A|CCI_CC_FASTCALL)
31#define CCI_CALL_FL (CCI_CALL_L|CCI_CC_FASTCALL) 34#define CCI_CALL_FL (CCI_CALL_L|CCI_CC_FASTCALL)
32#define CCI_CALL_FS (CCI_CALL_S|CCI_CC_FASTCALL) 35#define CCI_CALL_FS (CCI_CALL_S|CCI_CC_FASTCALL)
33 36
34/* C call info flags. */ 37/* C call info flags. */
38#define CCI_T (IRT_GUARD << CCI_OTSHIFT) /* May throw. */
35#define CCI_L 0x0100 /* Implicit L arg. */ 39#define CCI_L 0x0100 /* Implicit L arg. */
36#define CCI_CASTU64 0x0200 /* Cast u64 result to number. */ 40#define CCI_CASTU64 0x0200 /* Cast u64 result to number. */
37#define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */ 41#define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */
@@ -45,6 +49,17 @@ typedef struct CCallInfo {
45#define CCI_CC_FASTCALL 0x2000 /* Fastcall calling convention. */ 49#define CCI_CC_FASTCALL 0x2000 /* Fastcall calling convention. */
46#define CCI_CC_STDCALL 0x3000 /* Stdcall calling convention. */ 50#define CCI_CC_STDCALL 0x3000 /* Stdcall calling convention. */
47 51
52/* Extra args for SOFTFP, SPLIT 64 bit. */
53#define CCI_XARGS_SHIFT 14
54#define CCI_XARGS(ci) (((ci)->flags >> CCI_XARGS_SHIFT) & 3)
55#define CCI_XA (1u << CCI_XARGS_SHIFT)
56
57#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
58#define CCI_XNARGS(ci) (CCI_NARGS((ci)) + CCI_XARGS((ci)))
59#else
60#define CCI_XNARGS(ci) CCI_NARGS((ci))
61#endif
62
48/* Helpers for conditional function definitions. */ 63/* Helpers for conditional function definitions. */
49#define IRCALLCOND_ANY(x) x 64#define IRCALLCOND_ANY(x) x
50 65
@@ -66,6 +81,18 @@ typedef struct CCallInfo {
66#define IRCALLCOND_SOFTFP_FFI(x) NULL 81#define IRCALLCOND_SOFTFP_FFI(x) NULL
67#endif 82#endif
68 83
84#if LJ_SOFTFP && LJ_TARGET_MIPS
85#define IRCALLCOND_SOFTFP_MIPS(x) x
86#else
87#define IRCALLCOND_SOFTFP_MIPS(x) NULL
88#endif
89
90#if LJ_SOFTFP && LJ_TARGET_MIPS64
91#define IRCALLCOND_SOFTFP_MIPS64(x) x
92#else
93#define IRCALLCOND_SOFTFP_MIPS64(x) NULL
94#endif
95
69#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS) 96#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS)
70 97
71#if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) 98#if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64)
@@ -86,93 +113,157 @@ typedef struct CCallInfo {
86#define IRCALLCOND_FFI32(x) NULL 113#define IRCALLCOND_FFI32(x) NULL
87#endif 114#endif
88 115
116#if LJ_HASBUFFER
117#define IRCALLCOND_BUFFER(x) x
118#else
119#define IRCALLCOND_BUFFER(x) NULL
120#endif
121
122#if LJ_HASBUFFER && LJ_HASFFI
123#define IRCALLCOND_BUFFFI(x) x
124#else
125#define IRCALLCOND_BUFFFI(x) NULL
126#endif
127
89#if LJ_SOFTFP 128#if LJ_SOFTFP
90#define ARG1_FP 2 /* Treat as 2 32 bit arguments. */ 129#define XA_FP CCI_XA
130#define XA2_FP (CCI_XA+CCI_XA)
91#else 131#else
92#define ARG1_FP 1 132#define XA_FP 0
133#define XA2_FP 0
134#endif
135
136#if LJ_SOFTFP32
137#define XA_FP32 CCI_XA
138#define XA2_FP32 (CCI_XA+CCI_XA)
139#else
140#define XA_FP32 0
141#define XA2_FP32 0
93#endif 142#endif
94 143
95#if LJ_32 144#if LJ_32
96#define ARG2_64 4 /* Treat as 4 32 bit arguments. */ 145#define XA_64 CCI_XA
146#define XA2_64 (CCI_XA+CCI_XA)
97#else 147#else
98#define ARG2_64 2 148#define XA_64 0
149#define XA2_64 0
99#endif 150#endif
100 151
101/* Function definitions for CALL* instructions. */ 152/* Function definitions for CALL* instructions. */
102#define IRCALLDEF(_) \ 153#define IRCALLDEF(_) \
103 _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ 154 _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \
104 _(ANY, lj_str_new, 3, S, STR, CCI_L) \ 155 _(ANY, lj_str_find, 4, N, PGC, 0) \
156 _(ANY, lj_str_new, 3, S, STR, CCI_L|CCI_T) \
105 _(ANY, lj_strscan_num, 2, FN, INT, 0) \ 157 _(ANY, lj_strscan_num, 2, FN, INT, 0) \
106 _(ANY, lj_str_fromint, 2, FN, STR, CCI_L) \ 158 _(ANY, lj_strfmt_int, 2, FN, STR, CCI_L|CCI_T) \
107 _(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \ 159 _(ANY, lj_strfmt_num, 2, FN, STR, CCI_L|CCI_T) \
108 _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ 160 _(ANY, lj_strfmt_char, 2, FN, STR, CCI_L|CCI_T) \
109 _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ 161 _(ANY, lj_strfmt_putint, 2, FL, PGC, CCI_T) \
110 _(ANY, lj_tab_newkey, 3, S, P32, CCI_L) \ 162 _(ANY, lj_strfmt_putnum, 2, FL, PGC, CCI_T) \
163 _(ANY, lj_strfmt_putquoted, 2, FL, PGC, CCI_T) \
164 _(ANY, lj_strfmt_putfxint, 3, L, PGC, XA_64|CCI_T) \
165 _(ANY, lj_strfmt_putfnum_int, 3, L, PGC, XA_FP|CCI_T) \
166 _(ANY, lj_strfmt_putfnum_uint, 3, L, PGC, XA_FP|CCI_T) \
167 _(ANY, lj_strfmt_putfnum, 3, L, PGC, XA_FP|CCI_T) \
168 _(ANY, lj_strfmt_putfstr, 3, L, PGC, CCI_T) \
169 _(ANY, lj_strfmt_putfchar, 3, L, PGC, CCI_T) \
170 _(ANY, lj_buf_putmem, 3, S, PGC, CCI_T) \
171 _(ANY, lj_buf_putstr, 2, FL, PGC, CCI_T) \
172 _(ANY, lj_buf_putchar, 2, FL, PGC, CCI_T) \
173 _(ANY, lj_buf_putstr_reverse, 2, FL, PGC, CCI_T) \
174 _(ANY, lj_buf_putstr_lower, 2, FL, PGC, CCI_T) \
175 _(ANY, lj_buf_putstr_upper, 2, FL, PGC, CCI_T) \
176 _(ANY, lj_buf_putstr_rep, 3, L, PGC, CCI_T) \
177 _(ANY, lj_buf_puttab, 5, L, PGC, CCI_T) \
178 _(BUFFER, lj_bufx_set, 4, S, NIL, 0) \
179 _(BUFFFI, lj_bufx_more, 2, FS, INT, CCI_T) \
180 _(BUFFER, lj_serialize_put, 2, FS, PGC, CCI_T) \
181 _(BUFFER, lj_serialize_get, 2, FS, PTR, CCI_T) \
182 _(BUFFER, lj_serialize_encode, 2, FA, STR, CCI_L|CCI_T) \
183 _(BUFFER, lj_serialize_decode, 3, A, INT, CCI_L|CCI_T) \
184 _(ANY, lj_buf_tostr, 1, FL, STR, CCI_T) \
185 _(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L|CCI_T) \
186 _(ANY, lj_tab_new1, 2, FA, TAB, CCI_L|CCI_T) \
187 _(ANY, lj_tab_dup, 2, FA, TAB, CCI_L|CCI_T) \
188 _(ANY, lj_tab_clear, 1, FS, NIL, 0) \
189 _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L|CCI_T) \
111 _(ANY, lj_tab_len, 1, FL, INT, 0) \ 190 _(ANY, lj_tab_len, 1, FL, INT, 0) \
191 _(ANY, lj_tab_len_hint, 2, FL, INT, 0) \
112 _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ 192 _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \
113 _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \ 193 _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \
114 _(ANY, lj_mem_newgco, 2, FS, P32, CCI_L) \ 194 _(ANY, lj_mem_newgco, 2, FA, PGC, CCI_L|CCI_T) \
115 _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64) \ 195 _(ANY, lj_prng_u64d, 1, FS, NUM, CCI_CASTU64) \
116 _(ANY, lj_vm_modi, 2, FN, INT, 0) \ 196 _(ANY, lj_vm_modi, 2, FN, INT, 0) \
117 _(ANY, sinh, ARG1_FP, N, NUM, 0) \ 197 _(ANY, log10, 1, N, NUM, XA_FP) \
118 _(ANY, cosh, ARG1_FP, N, NUM, 0) \ 198 _(ANY, exp, 1, N, NUM, XA_FP) \
119 _(ANY, tanh, ARG1_FP, N, NUM, 0) \ 199 _(ANY, sin, 1, N, NUM, XA_FP) \
120 _(ANY, fputc, 2, S, INT, 0) \ 200 _(ANY, cos, 1, N, NUM, XA_FP) \
121 _(ANY, fwrite, 4, S, INT, 0) \ 201 _(ANY, tan, 1, N, NUM, XA_FP) \
122 _(ANY, fflush, 1, S, INT, 0) \ 202 _(ANY, asin, 1, N, NUM, XA_FP) \
203 _(ANY, acos, 1, N, NUM, XA_FP) \
204 _(ANY, atan, 1, N, NUM, XA_FP) \
205 _(ANY, sinh, 1, N, NUM, XA_FP) \
206 _(ANY, cosh, 1, N, NUM, XA_FP) \
207 _(ANY, tanh, 1, N, NUM, XA_FP) \
208 _(ANY, fputc, 2, S, INT, 0) \
209 _(ANY, fwrite, 4, S, INT, 0) \
210 _(ANY, fflush, 1, S, INT, 0) \
123 /* ORDER FPM */ \ 211 /* ORDER FPM */ \
124 _(FPMATH, lj_vm_floor, ARG1_FP, N, NUM, 0) \ 212 _(FPMATH, lj_vm_floor, 1, N, NUM, XA_FP) \
125 _(FPMATH, lj_vm_ceil, ARG1_FP, N, NUM, 0) \ 213 _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \
126 _(FPMATH, lj_vm_trunc, ARG1_FP, N, NUM, 0) \ 214 _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \
127 _(FPMATH, sqrt, ARG1_FP, N, NUM, 0) \ 215 _(FPMATH, sqrt, 1, N, NUM, XA_FP) \
128 _(FPMATH, exp, ARG1_FP, N, NUM, 0) \ 216 _(ANY, log, 1, N, NUM, XA_FP) \
129 _(FPMATH, lj_vm_exp2, ARG1_FP, N, NUM, 0) \ 217 _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \
130 _(FPMATH, log, ARG1_FP, N, NUM, 0) \ 218 _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \
131 _(FPMATH, lj_vm_log2, ARG1_FP, N, NUM, 0) \ 219 _(ANY, pow, 2, N, NUM, XA2_FP) \
132 _(FPMATH, log10, ARG1_FP, N, NUM, 0) \ 220 _(ANY, atan2, 2, N, NUM, XA2_FP) \
133 _(FPMATH, sin, ARG1_FP, N, NUM, 0) \ 221 _(ANY, ldexp, 2, N, NUM, XA_FP) \
134 _(FPMATH, cos, ARG1_FP, N, NUM, 0) \ 222 _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \
135 _(FPMATH, tan, ARG1_FP, N, NUM, 0) \ 223 _(SOFTFP, softfp_add, 2, N, NUM, XA2_FP32) \
136 _(FPMATH, lj_vm_powi, ARG1_FP+1, N, NUM, 0) \ 224 _(SOFTFP, softfp_sub, 2, N, NUM, XA2_FP32) \
137 _(FPMATH, pow, ARG1_FP*2, N, NUM, 0) \ 225 _(SOFTFP, softfp_mul, 2, N, NUM, XA2_FP32) \
138 _(FPMATH, atan2, ARG1_FP*2, N, NUM, 0) \ 226 _(SOFTFP, softfp_div, 2, N, NUM, XA2_FP32) \
139 _(FPMATH, ldexp, ARG1_FP+1, N, NUM, 0) \ 227 _(SOFTFP, softfp_cmp, 2, N, NIL, XA2_FP32) \
140 _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \
141 _(SOFTFP, softfp_add, 4, N, NUM, 0) \
142 _(SOFTFP, softfp_sub, 4, N, NUM, 0) \
143 _(SOFTFP, softfp_mul, 4, N, NUM, 0) \
144 _(SOFTFP, softfp_div, 4, N, NUM, 0) \
145 _(SOFTFP, softfp_cmp, 4, N, NIL, 0) \
146 _(SOFTFP, softfp_i2d, 1, N, NUM, 0) \ 228 _(SOFTFP, softfp_i2d, 1, N, NUM, 0) \
147 _(SOFTFP, softfp_d2i, 2, N, INT, 0) \ 229 _(SOFTFP, softfp_d2i, 1, N, INT, XA_FP32) \
230 _(SOFTFP_MIPS, lj_vm_sfmin, 2, N, NUM, XA2_FP32) \
231 _(SOFTFP_MIPS, lj_vm_sfmax, 2, N, NUM, XA2_FP32) \
232 _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \
148 _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \ 233 _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \
149 _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \ 234 _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \
150 _(SOFTFP_FFI, softfp_d2ui, 2, N, INT, 0) \ 235 _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \
151 _(SOFTFP_FFI, softfp_d2f, 2, N, FLOAT, 0) \ 236 _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \
152 _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \ 237 _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \
153 _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ 238 _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \
154 _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ 239 _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \
155 _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \ 240 _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \
156 _(FP64_FFI, fp64_l2d, 2, N, NUM, 0) \ 241 _(FP64_FFI, fp64_l2d, 1, N, NUM, XA_64) \
157 _(FP64_FFI, fp64_ul2d, 2, N, NUM, 0) \ 242 _(FP64_FFI, fp64_ul2d, 1, N, NUM, XA_64) \
158 _(FP64_FFI, fp64_l2f, 2, N, FLOAT, 0) \ 243 _(FP64_FFI, fp64_l2f, 1, N, FLOAT, XA_64) \
159 _(FP64_FFI, fp64_ul2f, 2, N, FLOAT, 0) \ 244 _(FP64_FFI, fp64_ul2f, 1, N, FLOAT, XA_64) \
160 _(FP64_FFI, fp64_d2l, ARG1_FP, N, I64, 0) \ 245 _(FP64_FFI, fp64_d2l, 1, N, I64, XA_FP) \
161 _(FP64_FFI, fp64_d2ul, ARG1_FP, N, U64, 0) \ 246 _(FP64_FFI, fp64_d2ul, 1, N, U64, XA_FP) \
162 _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \ 247 _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \
163 _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \ 248 _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \
164 _(FFI, lj_carith_divi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ 249 _(FFI, lj_carith_divi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
165 _(FFI, lj_carith_divu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ 250 _(FFI, lj_carith_divu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
166 _(FFI, lj_carith_modi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ 251 _(FFI, lj_carith_modi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
167 _(FFI, lj_carith_modu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ 252 _(FFI, lj_carith_modu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
168 _(FFI, lj_carith_powi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ 253 _(FFI, lj_carith_powi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
169 _(FFI, lj_carith_powu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ 254 _(FFI, lj_carith_powu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
170 _(FFI, lj_cdata_setfin, 2, FN, P32, CCI_L) \ 255 _(FFI, lj_cdata_newv, 4, S, CDATA, CCI_L) \
171 _(FFI, strlen, 1, L, INTP, 0) \ 256 _(FFI, lj_cdata_setfin, 4, S, NIL, CCI_L) \
172 _(FFI, memcpy, 3, S, PTR, 0) \ 257 _(FFI, strlen, 1, L, INTP, 0) \
173 _(FFI, memset, 3, S, PTR, 0) \ 258 _(FFI, memcpy, 3, S, PTR, 0) \
174 _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \ 259 _(FFI, memset, 3, S, PTR, 0) \
175 _(FFI32, lj_carith_mul64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) 260 _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \
261 _(FFI32, lj_carith_mul64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
262 _(FFI32, lj_carith_shl64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
263 _(FFI32, lj_carith_shr64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
264 _(FFI32, lj_carith_sar64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
265 _(FFI32, lj_carith_rol64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
266 _(FFI32, lj_carith_ror64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
176 \ 267 \
177 /* End of list. */ 268 /* End of list. */
178 269
@@ -220,6 +311,22 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
220#define fp64_f2l __aeabi_f2lz 311#define fp64_f2l __aeabi_f2lz
221#define fp64_f2ul __aeabi_f2ulz 312#define fp64_f2ul __aeabi_f2ulz
222#endif 313#endif
314#elif LJ_TARGET_MIPS || LJ_TARGET_PPC
315#define softfp_add __adddf3
316#define softfp_sub __subdf3
317#define softfp_mul __muldf3
318#define softfp_div __divdf3
319#define softfp_cmp __ledf2
320#define softfp_i2d __floatsidf
321#define softfp_d2i __fixdfsi
322#define softfp_ui2d __floatunsidf
323#define softfp_f2d __extendsfdf2
324#define softfp_d2ui __fixunsdfsi
325#define softfp_d2f __truncdfsf2
326#define softfp_i2f __floatsisf
327#define softfp_ui2f __floatunsisf
328#define softfp_f2i __fixsfsi
329#define softfp_f2ui __fixunssfsi
223#else 330#else
224#error "Missing soft-float definitions for target architecture" 331#error "Missing soft-float definitions for target architecture"
225#endif 332#endif
@@ -240,10 +347,14 @@ extern float softfp_ui2f(uint32_t a);
240extern int32_t softfp_f2i(float a); 347extern int32_t softfp_f2i(float a);
241extern uint32_t softfp_f2ui(float a); 348extern uint32_t softfp_f2ui(float a);
242#endif 349#endif
350#if LJ_TARGET_MIPS
351extern double lj_vm_sfmin(double a, double b);
352extern double lj_vm_sfmax(double a, double b);
353#endif
243#endif 354#endif
244 355
245#if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP) 356#if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP)
246#ifdef __GNUC__ 357#if defined(__GNUC__) || defined(__clang__)
247#define fp64_l2d __floatdidf 358#define fp64_l2d __floatdidf
248#define fp64_ul2d __floatundidf 359#define fp64_ul2d __floatundidf
249#define fp64_l2f __floatdisf 360#define fp64_l2f __floatdisf
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
index 06465842..0541090d 100644
--- a/src/lj_iropt.h
+++ b/src/lj_iropt.h
@@ -36,11 +36,11 @@ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J)
36 return ref; 36 return ref;
37} 37}
38 38
39LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs);
40
39/* Interning of constants. */ 41/* Interning of constants. */
40LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k); 42LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k);
41LJ_FUNC void lj_ir_k64_freeall(jit_State *J); 43LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64);
42LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv);
43LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64);
44LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64); 44LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64);
45LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n); 45LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n);
46LJ_FUNC TRef lj_ir_kint64(jit_State *J, uint64_t u64); 46LJ_FUNC TRef lj_ir_kint64(jit_State *J, uint64_t u64);
@@ -48,6 +48,7 @@ LJ_FUNC TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t);
48LJ_FUNC TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr); 48LJ_FUNC TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr);
49LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t); 49LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t);
50LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot); 50LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot);
51LJ_FUNC TRef lj_ir_ktrace(jit_State *J);
51 52
52#if LJ_64 53#if LJ_64
53#define lj_ir_kintp(J, k) lj_ir_kint64(J, (uint64_t)(k)) 54#define lj_ir_kintp(J, k) lj_ir_kint64(J, (uint64_t)(k))
@@ -74,8 +75,8 @@ static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n)
74#define lj_ir_knum_tobit(J) lj_ir_knum_u64(J, U64x(43380000,00000000)) 75#define lj_ir_knum_tobit(J) lj_ir_knum_u64(J, U64x(43380000,00000000))
75 76
76/* Special 128 bit SIMD constants. */ 77/* Special 128 bit SIMD constants. */
77#define lj_ir_knum_abs(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_ABS)) 78#define lj_ir_ksimd(J, idx) \
78#define lj_ir_knum_neg(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_NEG)) 79 lj_ir_ggfload(J, IRT_NUM, (uintptr_t)LJ_KSIMD(J, idx) - (uintptr_t)J2GG(J))
79 80
80/* Access to constants. */ 81/* Access to constants. */
81LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir); 82LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir);
@@ -119,10 +120,11 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J);
119LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); 120LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J);
120LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); 121LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J);
121LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J); 122LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J);
122LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J); 123LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J);
123LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J); 124LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J);
124LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J); 125LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J);
125LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim); 126LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim);
127LJ_FUNC int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim);
126LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref); 128LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref);
127 129
128/* Dead-store elimination. */ 130/* Dead-store elimination. */
@@ -149,7 +151,7 @@ LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase);
149/* Optimization passes. */ 151/* Optimization passes. */
150LJ_FUNC void lj_opt_dce(jit_State *J); 152LJ_FUNC void lj_opt_dce(jit_State *J);
151LJ_FUNC int lj_opt_loop(jit_State *J); 153LJ_FUNC int lj_opt_loop(jit_State *J);
152#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) 154#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
153LJ_FUNC void lj_opt_split(jit_State *J); 155LJ_FUNC void lj_opt_split(jit_State *J);
154#else 156#else
155#define lj_opt_split(J) UNUSED(J) 157#define lj_opt_split(J) UNUSED(J)
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 9ccbb594..34ddf907 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -9,71 +9,85 @@
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_ir.h" 10#include "lj_ir.h"
11 11
12/* JIT engine flags. */ 12/* -- JIT engine flags ---------------------------------------------------- */
13
14/* General JIT engine flags. 4 bits. */
13#define JIT_F_ON 0x00000001 15#define JIT_F_ON 0x00000001
14 16
15/* CPU-specific JIT engine flags. */ 17/* CPU-specific JIT engine flags. 12 bits. Flags and strings must match. */
18#define JIT_F_CPU 0x00000010
19
16#if LJ_TARGET_X86ORX64 20#if LJ_TARGET_X86ORX64
17#define JIT_F_CMOV 0x00000010 21
18#define JIT_F_SSE2 0x00000020 22#define JIT_F_SSE3 (JIT_F_CPU << 0)
19#define JIT_F_SSE3 0x00000040 23#define JIT_F_SSE4_1 (JIT_F_CPU << 1)
20#define JIT_F_SSE4_1 0x00000080 24#define JIT_F_BMI2 (JIT_F_CPU << 2)
21#define JIT_F_P4 0x00000100 25
22#define JIT_F_PREFER_IMUL 0x00000200 26
23#define JIT_F_SPLIT_XMM 0x00000400 27#define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2"
24#define JIT_F_LEA_AGU 0x00000800 28
25
26/* Names for the CPU-specific flags. Must match the order above. */
27#define JIT_F_CPU_FIRST JIT_F_CMOV
28#define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM"
29#elif LJ_TARGET_ARM 29#elif LJ_TARGET_ARM
30#define JIT_F_ARMV6_ 0x00000010 30
31#define JIT_F_ARMV6T2_ 0x00000020 31#define JIT_F_ARMV6_ (JIT_F_CPU << 0)
32#define JIT_F_ARMV7 0x00000040 32#define JIT_F_ARMV6T2_ (JIT_F_CPU << 1)
33#define JIT_F_VFPV2 0x00000080 33#define JIT_F_ARMV7 (JIT_F_CPU << 2)
34#define JIT_F_VFPV3 0x00000100 34#define JIT_F_ARMV8 (JIT_F_CPU << 3)
35 35#define JIT_F_VFPV2 (JIT_F_CPU << 4)
36#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7) 36#define JIT_F_VFPV3 (JIT_F_CPU << 5)
37#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7) 37
38#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8)
39#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8)
38#define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3) 40#define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3)
39 41
40/* Names for the CPU-specific flags. Must match the order above. */ 42#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5ARMv8\5VFPv2\5VFPv3"
41#define JIT_F_CPU_FIRST JIT_F_ARMV6_ 43
42#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5VFPv2\5VFPv3"
43#elif LJ_TARGET_PPC 44#elif LJ_TARGET_PPC
44#define JIT_F_SQRT 0x00000010
45#define JIT_F_ROUND 0x00000020
46 45
47/* Names for the CPU-specific flags. Must match the order above. */ 46#define JIT_F_SQRT (JIT_F_CPU << 0)
48#define JIT_F_CPU_FIRST JIT_F_SQRT 47#define JIT_F_ROUND (JIT_F_CPU << 1)
48
49#define JIT_F_CPUSTRING "\4SQRT\5ROUND" 49#define JIT_F_CPUSTRING "\4SQRT\5ROUND"
50
50#elif LJ_TARGET_MIPS 51#elif LJ_TARGET_MIPS
51#define JIT_F_MIPS32R2 0x00000010
52 52
53/* Names for the CPU-specific flags. Must match the order above. */ 53#define JIT_F_MIPSXXR2 (JIT_F_CPU << 0)
54#define JIT_F_CPU_FIRST JIT_F_MIPS32R2 54
55#if LJ_TARGET_MIPS32
56#if LJ_TARGET_MIPSR6
57#define JIT_F_CPUSTRING "\010MIPS32R6"
58#else
55#define JIT_F_CPUSTRING "\010MIPS32R2" 59#define JIT_F_CPUSTRING "\010MIPS32R2"
60#endif
61#else
62#if LJ_TARGET_MIPSR6
63#define JIT_F_CPUSTRING "\010MIPS64R6"
56#else 64#else
57#define JIT_F_CPU_FIRST 0 65#define JIT_F_CPUSTRING "\010MIPS64R2"
66#endif
67#endif
68
69#else
70
58#define JIT_F_CPUSTRING "" 71#define JIT_F_CPUSTRING ""
72
59#endif 73#endif
60 74
61/* Optimization flags. */ 75/* Optimization flags. 12 bits. */
76#define JIT_F_OPT 0x00010000
62#define JIT_F_OPT_MASK 0x0fff0000 77#define JIT_F_OPT_MASK 0x0fff0000
63 78
64#define JIT_F_OPT_FOLD 0x00010000 79#define JIT_F_OPT_FOLD (JIT_F_OPT << 0)
65#define JIT_F_OPT_CSE 0x00020000 80#define JIT_F_OPT_CSE (JIT_F_OPT << 1)
66#define JIT_F_OPT_DCE 0x00040000 81#define JIT_F_OPT_DCE (JIT_F_OPT << 2)
67#define JIT_F_OPT_FWD 0x00080000 82#define JIT_F_OPT_FWD (JIT_F_OPT << 3)
68#define JIT_F_OPT_DSE 0x00100000 83#define JIT_F_OPT_DSE (JIT_F_OPT << 4)
69#define JIT_F_OPT_NARROW 0x00200000 84#define JIT_F_OPT_NARROW (JIT_F_OPT << 5)
70#define JIT_F_OPT_LOOP 0x00400000 85#define JIT_F_OPT_LOOP (JIT_F_OPT << 6)
71#define JIT_F_OPT_ABC 0x00800000 86#define JIT_F_OPT_ABC (JIT_F_OPT << 7)
72#define JIT_F_OPT_SINK 0x01000000 87#define JIT_F_OPT_SINK (JIT_F_OPT << 8)
73#define JIT_F_OPT_FUSE 0x02000000 88#define JIT_F_OPT_FUSE (JIT_F_OPT << 9)
74 89
75/* Optimizations names for -O. Must match the order above. */ 90/* Optimizations names for -O. Must match the order above. */
76#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD
77#define JIT_F_OPTSTRING \ 91#define JIT_F_OPTSTRING \
78 "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse" 92 "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse"
79 93
@@ -85,6 +99,8 @@
85 JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE) 99 JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE)
86#define JIT_F_OPT_DEFAULT JIT_F_OPT_3 100#define JIT_F_OPT_DEFAULT JIT_F_OPT_3
87 101
102/* -- JIT engine parameters ----------------------------------------------- */
103
88#if LJ_TARGET_WINDOWS || LJ_64 104#if LJ_TARGET_WINDOWS || LJ_64
89/* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ 105/* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */
90#define JIT_P_sizemcode_DEFAULT 64 106#define JIT_P_sizemcode_DEFAULT 64
@@ -100,6 +116,7 @@
100 _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ 116 _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \
101 _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ 117 _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \
102 _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ 118 _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \
119 _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \
103 \ 120 \
104 _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ 121 _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \
105 _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ 122 _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \
@@ -126,6 +143,8 @@ JIT_PARAMDEF(JIT_PARAMENUM)
126#define JIT_PARAMSTR(len, name, value) #len #name 143#define JIT_PARAMSTR(len, name, value) #len #name
127#define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR) 144#define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR)
128 145
146/* -- JIT engine data structures ------------------------------------------ */
147
129/* Trace compiler state. */ 148/* Trace compiler state. */
130typedef enum { 149typedef enum {
131 LJ_TRACE_IDLE, /* Trace compiler idle. */ 150 LJ_TRACE_IDLE, /* Trace compiler idle. */
@@ -165,6 +184,7 @@ typedef struct MCLink {
165typedef struct SnapShot { 184typedef struct SnapShot {
166 uint32_t mapofs; /* Offset into snapshot map. */ 185 uint32_t mapofs; /* Offset into snapshot map. */
167 IRRef1 ref; /* First IR ref for this snapshot. */ 186 IRRef1 ref; /* First IR ref for this snapshot. */
187 uint16_t mcofs; /* Offset into machine code in MCode units. */
168 uint8_t nslots; /* Number of valid slots. */ 188 uint8_t nslots; /* Number of valid slots. */
169 uint8_t topslot; /* Maximum frame extent. */ 189 uint8_t topslot; /* Maximum frame extent. */
170 uint8_t nent; /* Number of compressed entries. */ 190 uint8_t nent; /* Number of compressed entries. */
@@ -186,14 +206,26 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT);
186#define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) 206#define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref))
187#define SNAP_TR(slot, tr) \ 207#define SNAP_TR(slot, tr) \
188 (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) 208 (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK)))
209#if !LJ_FR2
189#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) 210#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc))
211#endif
190#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) 212#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz))
191#define snap_ref(sn) ((sn) & 0xffff) 213#define snap_ref(sn) ((sn) & 0xffff)
192#define snap_slot(sn) ((BCReg)((sn) >> 24)) 214#define snap_slot(sn) ((BCReg)((sn) >> 24))
193#define snap_isframe(sn) ((sn) & SNAP_FRAME) 215#define snap_isframe(sn) ((sn) & SNAP_FRAME)
194#define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn))
195#define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) 216#define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref))
196 217
218static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn)
219{
220#if LJ_FR2
221 uint64_t pcbase;
222 memcpy(&pcbase, sn, sizeof(uint64_t));
223 return (const BCIns *)(pcbase >> 8);
224#else
225 return (const BCIns *)(uintptr_t)*sn;
226#endif
227}
228
197/* Snapshot and exit numbers. */ 229/* Snapshot and exit numbers. */
198typedef uint32_t SnapNo; 230typedef uint32_t SnapNo;
199typedef uint32_t ExitNo; 231typedef uint32_t ExitNo;
@@ -211,7 +243,8 @@ typedef enum {
211 LJ_TRLINK_UPREC, /* Up-recursion. */ 243 LJ_TRLINK_UPREC, /* Up-recursion. */
212 LJ_TRLINK_DOWNREC, /* Down-recursion. */ 244 LJ_TRLINK_DOWNREC, /* Down-recursion. */
213 LJ_TRLINK_INTERP, /* Fallback to interpreter. */ 245 LJ_TRLINK_INTERP, /* Fallback to interpreter. */
214 LJ_TRLINK_RETURN /* Return to interpreter. */ 246 LJ_TRLINK_RETURN, /* Return to interpreter. */
247 LJ_TRLINK_STITCH /* Trace stitching. */
215} TraceLink; 248} TraceLink;
216 249
217/* Trace object. */ 250/* Trace object. */
@@ -219,6 +252,9 @@ typedef struct GCtrace {
219 GCHeader; 252 GCHeader;
220 uint16_t nsnap; /* Number of snapshots. */ 253 uint16_t nsnap; /* Number of snapshots. */
221 IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ 254 IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */
255#if LJ_GC64
256 uint32_t unused_gc64;
257#endif
222 GCRef gclist; 258 GCRef gclist;
223 IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ 259 IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */
224 IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ 260 IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */
@@ -294,6 +330,16 @@ typedef struct ScEvEntry {
294 uint8_t dir; /* Direction. 1: +, 0: -. */ 330 uint8_t dir; /* Direction. 1: +, 0: -. */
295} ScEvEntry; 331} ScEvEntry;
296 332
333/* Reverse bytecode map (IRRef -> PC). Only for selected instructions. */
334typedef struct RBCHashEntry {
335 MRef pc; /* Bytecode PC. */
336 GCRef pt; /* Prototype. */
337 IRRef ref; /* IR reference. */
338} RBCHashEntry;
339
340/* Number of slots in the reverse bytecode hash table. Must be a power of 2. */
341#define RBCHASH_SLOTS 8
342
297/* 128 bit SIMD constants. */ 343/* 128 bit SIMD constants. */
298enum { 344enum {
299 LJ_KSIMD_ABS, 345 LJ_KSIMD_ABS,
@@ -301,12 +347,51 @@ enum {
301 LJ_KSIMD__MAX 347 LJ_KSIMD__MAX
302}; 348};
303 349
350enum {
351#if LJ_TARGET_X86ORX64
352 LJ_K64_TOBIT, /* 2^52 + 2^51 */
353 LJ_K64_2P64, /* 2^64 */
354 LJ_K64_M2P64, /* -2^64 */
355#if LJ_32
356 LJ_K64_M2P64_31, /* -2^64 or -2^31 */
357#else
358 LJ_K64_M2P64_31 = LJ_K64_M2P64,
359#endif
360#endif
361#if LJ_TARGET_MIPS
362 LJ_K64_2P31, /* 2^31 */
363#if LJ_64
364 LJ_K64_2P63, /* 2^63 */
365 LJ_K64_M2P64, /* -2^64 */
366#endif
367#endif
368 LJ_K64__MAX,
369};
370
371enum {
372#if LJ_TARGET_X86ORX64
373 LJ_K32_M2P64_31, /* -2^64 or -2^31 */
374#endif
375#if LJ_TARGET_PPC
376 LJ_K32_2P52_2P31, /* 2^52 + 2^31 */
377 LJ_K32_2P52, /* 2^52 */
378#endif
379#if LJ_TARGET_PPC || LJ_TARGET_MIPS
380 LJ_K32_2P31, /* 2^31 */
381#endif
382#if LJ_TARGET_MIPS64
383 LJ_K32_2P63, /* 2^63 */
384 LJ_K32_M2P64, /* -2^64 */
385#endif
386 LJ_K32__MAX
387};
388
304/* Get 16 byte aligned pointer to SIMD constant. */ 389/* Get 16 byte aligned pointer to SIMD constant. */
305#define LJ_KSIMD(J, n) \ 390#define LJ_KSIMD(J, n) \
306 ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) 391 ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15))
307 392
308/* Set/reset flag to activate the SPLIT pass for the current trace. */ 393/* Set/reset flag to activate the SPLIT pass for the current trace. */
309#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) 394#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
310#define lj_needsplit(J) (J->needsplit = 1) 395#define lj_needsplit(J) (J->needsplit = 1)
311#define lj_resetsplit(J) (J->needsplit = 0) 396#define lj_resetsplit(J) (J->needsplit = 0)
312#else 397#else
@@ -317,13 +402,14 @@ enum {
317/* Fold state is used to fold instructions on-the-fly. */ 402/* Fold state is used to fold instructions on-the-fly. */
318typedef struct FoldState { 403typedef struct FoldState {
319 IRIns ins; /* Currently emitted instruction. */ 404 IRIns ins; /* Currently emitted instruction. */
320 IRIns left; /* Instruction referenced by left operand. */ 405 IRIns left[2]; /* Instruction referenced by left operand. */
321 IRIns right; /* Instruction referenced by right operand. */ 406 IRIns right[2]; /* Instruction referenced by right operand. */
322} FoldState; 407} FoldState;
323 408
324/* JIT compiler state. */ 409/* JIT compiler state. */
325typedef struct jit_State { 410typedef struct jit_State {
326 GCtrace cur; /* Current trace. */ 411 GCtrace cur; /* Current trace. */
412 GCtrace *curfinal; /* Final address of current trace (set during asm). */
327 413
328 lua_State *L; /* Current Lua state. */ 414 lua_State *L; /* Current Lua state. */
329 const BCIns *pc; /* Current PC. */ 415 const BCIns *pc; /* Current PC. */
@@ -353,8 +439,9 @@ typedef struct jit_State {
353 int32_t framedepth; /* Current frame depth. */ 439 int32_t framedepth; /* Current frame depth. */
354 int32_t retdepth; /* Return frame depth (count of RETF). */ 440 int32_t retdepth; /* Return frame depth (count of RETF). */
355 441
356 MRef k64; /* Pointer to chained array of 64 bit constants. */ 442 uint32_t k32[LJ_K32__MAX]; /* Common 4 byte constants used by backends. */
357 TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ 443 TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */
444 TValue k64[LJ_K64__MAX]; /* Common 8 byte constants. */
358 445
359 IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ 446 IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */
360 IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ 447 IRRef irtoplim; /* Upper limit of instuction buffer (biased). */
@@ -367,13 +454,15 @@ typedef struct jit_State {
367 MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ 454 MSize sizesnapmap; /* Size of temp. snapshot map buffer. */
368 455
369 PostProc postproc; /* Required post-processing after execution. */ 456 PostProc postproc; /* Required post-processing after execution. */
370#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) 457#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
371 int needsplit; /* Need SPLIT pass. */ 458 uint8_t needsplit; /* Need SPLIT pass. */
372#endif 459#endif
460 uint8_t retryrec; /* Retry recording. */
373 461
374 GCRef *trace; /* Array of traces. */ 462 GCRef *trace; /* Array of traces. */
375 TraceNo freetrace; /* Start of scan for next free trace. */ 463 TraceNo freetrace; /* Start of scan for next free trace. */
376 MSize sizetrace; /* Size of trace array. */ 464 MSize sizetrace; /* Size of trace array. */
465 IRRef1 ktrace; /* Reference to KGC with GCtrace. */
377 466
378 IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ 467 IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */
379 TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ 468 TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */
@@ -384,7 +473,10 @@ typedef struct jit_State {
384 473
385 HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */ 474 HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */
386 uint32_t penaltyslot; /* Round-robin index into penalty slots. */ 475 uint32_t penaltyslot; /* Round-robin index into penalty slots. */
387 uint32_t prngstate; /* PRNG state. */ 476
477#ifdef LUAJIT_ENABLE_TABLE_BUMP
478 RBCHashEntry rbchash[RBCHASH_SLOTS]; /* Reverse bytecode map. */
479#endif
388 480
389 BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */ 481 BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */
390 uint32_t bpropslot; /* Round-robin index into bpropcache slots. */ 482 uint32_t bpropslot; /* Round-robin index into bpropcache slots. */
@@ -394,6 +486,7 @@ typedef struct jit_State {
394 const BCIns *startpc; /* Bytecode PC of starting instruction. */ 486 const BCIns *startpc; /* Bytecode PC of starting instruction. */
395 TraceNo parent; /* Parent of current side trace (0 for root traces). */ 487 TraceNo parent; /* Parent of current side trace (0 for root traces). */
396 ExitNo exitno; /* Exit number in parent of current side trace. */ 488 ExitNo exitno; /* Exit number in parent of current side trace. */
489 int exitcode; /* Exit code from unwound trace. */
397 490
398 BCIns *patchpc; /* PC for pending re-patch. */ 491 BCIns *patchpc; /* PC for pending re-patch. */
399 BCIns patchins; /* Instruction for pending re-patch. */ 492 BCIns patchins; /* Instruction for pending re-patch. */
@@ -406,14 +499,18 @@ typedef struct jit_State {
406 size_t szallmcarea; /* Total size of all allocated mcode areas. */ 499 size_t szallmcarea; /* Total size of all allocated mcode areas. */
407 500
408 TValue errinfo; /* Additional info element for trace errors. */ 501 TValue errinfo; /* Additional info element for trace errors. */
502
503#if LJ_HASPROFILE
504 GCproto *prev_pt; /* Previous prototype. */
505 BCLine prev_line; /* Previous line. */
506 int prof_mode; /* Profiling mode: 0, 'f', 'l'. */
507#endif
409} jit_State; 508} jit_State;
410 509
411/* Trivial PRNG e.g. used for penalty randomization. */ 510#ifdef LUA_USE_ASSERT
412static LJ_AINLINE uint32_t LJ_PRNG_BITS(jit_State *J, int bits) 511#define lj_assertJ(c, ...) lj_assertG_(J2G(J), (c), __VA_ARGS__)
413{ 512#else
414 /* Yes, this LCG is very weak, but that doesn't matter for our use case. */ 513#define lj_assertJ(c, ...) ((void)J)
415 J->prngstate = J->prngstate * 1103515245 + 12345; 514#endif
416 return J->prngstate >> (32-bits);
417}
418 515
419#endif 516#endif
diff --git a/src/lj_lex.c b/src/lj_lex.c
index 5160cefd..cc6fa533 100644
--- a/src/lj_lex.c
+++ b/src/lj_lex.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#if LJ_HASFFI 17#if LJ_HASFFI
17#include "lj_tab.h" 18#include "lj_tab.h"
@@ -24,6 +25,7 @@
24#include "lj_parse.h" 25#include "lj_parse.h"
25#include "lj_char.h" 26#include "lj_char.h"
26#include "lj_strscan.h" 27#include "lj_strscan.h"
28#include "lj_strfmt.h"
27 29
28/* Lua lexer token names. */ 30/* Lua lexer token names. */
29static const char *const tokennames[] = { 31static const char *const tokennames[] = {
@@ -37,54 +39,54 @@ TKDEF(TKSTR1, TKSTR2)
37 39
38/* -- Buffer handling ----------------------------------------------------- */ 40/* -- Buffer handling ----------------------------------------------------- */
39 41
40#define char2int(c) ((int)(uint8_t)(c)) 42#define LEX_EOF (-1)
41#define next(ls) \ 43#define lex_iseol(ls) (ls->c == '\n' || ls->c == '\r')
42 (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls))
43#define save_and_next(ls) (save(ls, ls->current), next(ls))
44#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
45#define END_OF_STREAM (-1)
46 44
47static int fillbuf(LexState *ls) 45/* Get more input from reader. */
46static LJ_NOINLINE LexChar lex_more(LexState *ls)
48{ 47{
49 size_t sz; 48 size_t sz;
50 const char *buf = ls->rfunc(ls->L, ls->rdata, &sz); 49 const char *p = ls->rfunc(ls->L, ls->rdata, &sz);
51 if (buf == NULL || sz == 0) return END_OF_STREAM; 50 if (p == NULL || sz == 0) return LEX_EOF;
52 if (sz >= LJ_MAX_MEM) { 51 if (sz >= LJ_MAX_BUF) {
53 if (sz != ~(size_t)0) lj_err_mem(ls->L); 52 if (sz != ~(size_t)0) lj_err_mem(ls->L);
53 sz = ~(uintptr_t)0 - (uintptr_t)p;
54 if (sz >= LJ_MAX_BUF) sz = LJ_MAX_BUF-1;
54 ls->endmark = 1; 55 ls->endmark = 1;
55 } 56 }
56 ls->n = (MSize)sz - 1; 57 ls->pe = p + sz;
57 ls->p = buf; 58 ls->p = p + 1;
58 return char2int(*(ls->p++)); 59 return (LexChar)(uint8_t)p[0];
59} 60}
60 61
61static LJ_NOINLINE void save_grow(LexState *ls, int c) 62/* Get next character. */
63static LJ_AINLINE LexChar lex_next(LexState *ls)
62{ 64{
63 MSize newsize; 65 return (ls->c = ls->p < ls->pe ? (LexChar)(uint8_t)*ls->p++ : lex_more(ls));
64 if (ls->sb.sz >= LJ_MAX_STR/2)
65 lj_lex_error(ls, 0, LJ_ERR_XELEM);
66 newsize = ls->sb.sz * 2;
67 lj_str_resizebuf(ls->L, &ls->sb, newsize);
68 ls->sb.buf[ls->sb.n++] = (char)c;
69} 66}
70 67
71static LJ_AINLINE void save(LexState *ls, int c) 68/* Save character. */
69static LJ_AINLINE void lex_save(LexState *ls, LexChar c)
72{ 70{
73 if (LJ_UNLIKELY(ls->sb.n + 1 > ls->sb.sz)) 71 lj_buf_putb(&ls->sb, c);
74 save_grow(ls, c); 72}
75 else 73
76 ls->sb.buf[ls->sb.n++] = (char)c; 74/* Save previous character and get next character. */
75static LJ_AINLINE LexChar lex_savenext(LexState *ls)
76{
77 lex_save(ls, ls->c);
78 return lex_next(ls);
77} 79}
78 80
79static void inclinenumber(LexState *ls) 81/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
82static void lex_newline(LexState *ls)
80{ 83{
81 int old = ls->current; 84 LexChar old = ls->c;
82 lua_assert(currIsNewline(ls)); 85 lj_assertLS(lex_iseol(ls), "bad usage");
83 next(ls); /* skip `\n' or `\r' */ 86 lex_next(ls); /* Skip "\n" or "\r". */
84 if (currIsNewline(ls) && ls->current != old) 87 if (lex_iseol(ls) && ls->c != old) lex_next(ls); /* Skip "\n\r" or "\r\n". */
85 next(ls); /* skip `\n\r' or `\r\n' */
86 if (++ls->linenumber >= LJ_MAX_LINE) 88 if (++ls->linenumber >= LJ_MAX_LINE)
87 lj_lex_error(ls, ls->token, LJ_ERR_XLINES); 89 lj_lex_error(ls, ls->tok, LJ_ERR_XLINES);
88} 90}
89 91
90/* -- Scanner for terminals ----------------------------------------------- */ 92/* -- Scanner for terminals ----------------------------------------------- */
@@ -93,19 +95,17 @@ static void inclinenumber(LexState *ls)
93static void lex_number(LexState *ls, TValue *tv) 95static void lex_number(LexState *ls, TValue *tv)
94{ 96{
95 StrScanFmt fmt; 97 StrScanFmt fmt;
96 int c, xp = 'e'; 98 LexChar c, xp = 'e';
97 lua_assert(lj_char_isdigit(ls->current)); 99 lj_assertLS(lj_char_isdigit(ls->c), "bad usage");
98 if ((c = ls->current) == '0') { 100 if ((c = ls->c) == '0' && (lex_savenext(ls) | 0x20) == 'x')
99 save_and_next(ls); 101 xp = 'p';
100 if ((ls->current | 0x20) == 'x') xp = 'p'; 102 while (lj_char_isident(ls->c) || ls->c == '.' ||
101 } 103 ((ls->c == '-' || ls->c == '+') && (c | 0x20) == xp)) {
102 while (lj_char_isident(ls->current) || ls->current == '.' || 104 c = ls->c;
103 ((ls->current == '-' || ls->current == '+') && (c | 0x20) == xp)) { 105 lex_savenext(ls);
104 c = ls->current;
105 save_and_next(ls);
106 } 106 }
107 save(ls, '\0'); 107 lex_save(ls, '\0');
108 fmt = lj_strscan_scan((const uint8_t *)ls->sb.buf, tv, 108 fmt = lj_strscan_scan((const uint8_t *)ls->sb.b, sbuflen(&ls->sb)-1, tv,
109 (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) | 109 (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
110 (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0)); 110 (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
111 if (LJ_DUALNUM && fmt == STRSCAN_INT) { 111 if (LJ_DUALNUM && fmt == STRSCAN_INT) {
@@ -116,12 +116,9 @@ static void lex_number(LexState *ls, TValue *tv)
116 } else if (fmt != STRSCAN_ERROR) { 116 } else if (fmt != STRSCAN_ERROR) {
117 lua_State *L = ls->L; 117 lua_State *L = ls->L;
118 GCcdata *cd; 118 GCcdata *cd;
119 lua_assert(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG); 119 lj_assertLS(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG,
120 if (!ctype_ctsG(G(L))) { 120 "unexpected number format %d", fmt);
121 ptrdiff_t oldtop = savestack(L, L->top); 121 ctype_loadffi(L);
122 luaopen_ffi(L); /* Load FFI library on-demand. */
123 L->top = restorestack(L, oldtop);
124 }
125 if (fmt == STRSCAN_IMAG) { 122 if (fmt == STRSCAN_IMAG) {
126 cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double)); 123 cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double));
127 ((double *)cdataptr(cd))[0] = 0; 124 ((double *)cdataptr(cd))[0] = 0;
@@ -133,65 +130,66 @@ static void lex_number(LexState *ls, TValue *tv)
133 lj_parse_keepcdata(ls, tv, cd); 130 lj_parse_keepcdata(ls, tv, cd);
134#endif 131#endif
135 } else { 132 } else {
136 lua_assert(fmt == STRSCAN_ERROR); 133 lj_assertLS(fmt == STRSCAN_ERROR,
134 "unexpected number format %d", fmt);
137 lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER); 135 lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER);
138 } 136 }
139} 137}
140 138
141static int skip_sep(LexState *ls) 139/* Skip equal signs for "[=...=[" and "]=...=]" and return their count. */
140static int lex_skipeq(LexState *ls)
142{ 141{
143 int count = 0; 142 int count = 0;
144 int s = ls->current; 143 LexChar s = ls->c;
145 lua_assert(s == '[' || s == ']'); 144 lj_assertLS(s == '[' || s == ']', "bad usage");
146 save_and_next(ls); 145 while (lex_savenext(ls) == '=' && count < 0x20000000)
147 while (ls->current == '=' && count < 0x20000000) {
148 save_and_next(ls);
149 count++; 146 count++;
150 } 147 return (ls->c == s) ? count : (-count) - 1;
151 return (ls->current == s) ? count : (-count) - 1;
152} 148}
153 149
154static void read_long_string(LexState *ls, TValue *tv, int sep) 150/* Parse a long string or long comment (tv set to NULL). */
151static void lex_longstring(LexState *ls, TValue *tv, int sep)
155{ 152{
156 save_and_next(ls); /* skip 2nd `[' */ 153 lex_savenext(ls); /* Skip second '['. */
157 if (currIsNewline(ls)) /* string starts with a newline? */ 154 if (lex_iseol(ls)) /* Skip initial newline. */
158 inclinenumber(ls); /* skip it */ 155 lex_newline(ls);
159 for (;;) { 156 for (;;) {
160 switch (ls->current) { 157 switch (ls->c) {
161 case END_OF_STREAM: 158 case LEX_EOF:
162 lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM); 159 lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM);
163 break; 160 break;
164 case ']': 161 case ']':
165 if (skip_sep(ls) == sep) { 162 if (lex_skipeq(ls) == sep) {
166 save_and_next(ls); /* skip 2nd `]' */ 163 lex_savenext(ls); /* Skip second ']'. */
167 goto endloop; 164 goto endloop;
168 } 165 }
169 break; 166 break;
170 case '\n': 167 case '\n':
171 case '\r': 168 case '\r':
172 save(ls, '\n'); 169 lex_save(ls, '\n');
173 inclinenumber(ls); 170 lex_newline(ls);
174 if (!tv) lj_str_resetbuf(&ls->sb); /* avoid wasting space */ 171 if (!tv) lj_buf_reset(&ls->sb); /* Don't waste space for comments. */
175 break; 172 break;
176 default: 173 default:
177 if (tv) save_and_next(ls); 174 lex_savenext(ls);
178 else next(ls);
179 break; 175 break;
180 } 176 }
181 } endloop: 177 } endloop:
182 if (tv) { 178 if (tv) {
183 GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep), 179 GCstr *str = lj_parse_keepstr(ls, ls->sb.b + (2 + (MSize)sep),
184 ls->sb.n - 2*(2 + (MSize)sep)); 180 sbuflen(&ls->sb) - 2*(2 + (MSize)sep));
185 setstrV(ls->L, tv, str); 181 setstrV(ls->L, tv, str);
186 } 182 }
187} 183}
188 184
189static void read_string(LexState *ls, int delim, TValue *tv) 185/* Parse a string. */
186static void lex_string(LexState *ls, TValue *tv)
190{ 187{
191 save_and_next(ls); 188 LexChar delim = ls->c; /* Delimiter is '\'' or '"'. */
192 while (ls->current != delim) { 189 lex_savenext(ls);
193 switch (ls->current) { 190 while (ls->c != delim) {
194 case END_OF_STREAM: 191 switch (ls->c) {
192 case LEX_EOF:
195 lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); 193 lj_lex_error(ls, TK_eof, LJ_ERR_XSTR);
196 continue; 194 continue;
197 case '\n': 195 case '\n':
@@ -199,7 +197,7 @@ static void read_string(LexState *ls, int delim, TValue *tv)
199 lj_lex_error(ls, TK_string, LJ_ERR_XSTR); 197 lj_lex_error(ls, TK_string, LJ_ERR_XSTR);
200 continue; 198 continue;
201 case '\\': { 199 case '\\': {
202 int c = next(ls); /* Skip the '\\'. */ 200 LexChar c = lex_next(ls); /* Skip the '\\'. */
203 switch (c) { 201 switch (c) {
204 case 'a': c = '\a'; break; 202 case 'a': c = '\a'; break;
205 case 'b': c = '\b'; break; 203 case 'b': c = '\b'; break;
@@ -209,111 +207,139 @@ static void read_string(LexState *ls, int delim, TValue *tv)
209 case 't': c = '\t'; break; 207 case 't': c = '\t'; break;
210 case 'v': c = '\v'; break; 208 case 'v': c = '\v'; break;
211 case 'x': /* Hexadecimal escape '\xXX'. */ 209 case 'x': /* Hexadecimal escape '\xXX'. */
212 c = (next(ls) & 15u) << 4; 210 c = (lex_next(ls) & 15u) << 4;
213 if (!lj_char_isdigit(ls->current)) { 211 if (!lj_char_isdigit(ls->c)) {
214 if (!lj_char_isxdigit(ls->current)) goto err_xesc; 212 if (!lj_char_isxdigit(ls->c)) goto err_xesc;
215 c += 9 << 4; 213 c += 9 << 4;
216 } 214 }
217 c += (next(ls) & 15u); 215 c += (lex_next(ls) & 15u);
218 if (!lj_char_isdigit(ls->current)) { 216 if (!lj_char_isdigit(ls->c)) {
219 if (!lj_char_isxdigit(ls->current)) goto err_xesc; 217 if (!lj_char_isxdigit(ls->c)) goto err_xesc;
220 c += 9; 218 c += 9;
221 } 219 }
222 break; 220 break;
221 case 'u': /* Unicode escape '\u{XX...}'. */
222 if (lex_next(ls) != '{') goto err_xesc;
223 lex_next(ls);
224 c = 0;
225 do {
226 c = (c << 4) | (ls->c & 15u);
227 if (!lj_char_isdigit(ls->c)) {
228 if (!lj_char_isxdigit(ls->c)) goto err_xesc;
229 c += 9;
230 }
231 if (c >= 0x110000) goto err_xesc; /* Out of Unicode range. */
232 } while (lex_next(ls) != '}');
233 if (c < 0x800) {
234 if (c < 0x80) break;
235 lex_save(ls, 0xc0 | (c >> 6));
236 } else {
237 if (c >= 0x10000) {
238 lex_save(ls, 0xf0 | (c >> 18));
239 lex_save(ls, 0x80 | ((c >> 12) & 0x3f));
240 } else {
241 if (c >= 0xd800 && c < 0xe000) goto err_xesc; /* No surrogates. */
242 lex_save(ls, 0xe0 | (c >> 12));
243 }
244 lex_save(ls, 0x80 | ((c >> 6) & 0x3f));
245 }
246 c = 0x80 | (c & 0x3f);
247 break;
223 case 'z': /* Skip whitespace. */ 248 case 'z': /* Skip whitespace. */
224 next(ls); 249 lex_next(ls);
225 while (lj_char_isspace(ls->current)) 250 while (lj_char_isspace(ls->c))
226 if (currIsNewline(ls)) inclinenumber(ls); else next(ls); 251 if (lex_iseol(ls)) lex_newline(ls); else lex_next(ls);
227 continue; 252 continue;
228 case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue; 253 case '\n': case '\r': lex_save(ls, '\n'); lex_newline(ls); continue;
229 case '\\': case '\"': case '\'': break; 254 case '\\': case '\"': case '\'': break;
230 case END_OF_STREAM: continue; 255 case LEX_EOF: continue;
231 default: 256 default:
232 if (!lj_char_isdigit(c)) 257 if (!lj_char_isdigit(c))
233 goto err_xesc; 258 goto err_xesc;
234 c -= '0'; /* Decimal escape '\ddd'. */ 259 c -= '0'; /* Decimal escape '\ddd'. */
235 if (lj_char_isdigit(next(ls))) { 260 if (lj_char_isdigit(lex_next(ls))) {
236 c = c*10 + (ls->current - '0'); 261 c = c*10 + (ls->c - '0');
237 if (lj_char_isdigit(next(ls))) { 262 if (lj_char_isdigit(lex_next(ls))) {
238 c = c*10 + (ls->current - '0'); 263 c = c*10 + (ls->c - '0');
239 if (c > 255) { 264 if (c > 255) {
240 err_xesc: 265 err_xesc:
241 lj_lex_error(ls, TK_string, LJ_ERR_XESC); 266 lj_lex_error(ls, TK_string, LJ_ERR_XESC);
242 } 267 }
243 next(ls); 268 lex_next(ls);
244 } 269 }
245 } 270 }
246 save(ls, c); 271 lex_save(ls, c);
247 continue; 272 continue;
248 } 273 }
249 save(ls, c); 274 lex_save(ls, c);
250 next(ls); 275 lex_next(ls);
251 continue; 276 continue;
252 } 277 }
253 default: 278 default:
254 save_and_next(ls); 279 lex_savenext(ls);
255 break; 280 break;
256 } 281 }
257 } 282 }
258 save_and_next(ls); /* skip delimiter */ 283 lex_savenext(ls); /* Skip trailing delimiter. */
259 setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2)); 284 setstrV(ls->L, tv,
285 lj_parse_keepstr(ls, ls->sb.b+1, sbuflen(&ls->sb)-2));
260} 286}
261 287
262/* -- Main lexical scanner ------------------------------------------------ */ 288/* -- Main lexical scanner ------------------------------------------------ */
263 289
264static int llex(LexState *ls, TValue *tv) 290/* Get next lexical token. */
291static LexToken lex_scan(LexState *ls, TValue *tv)
265{ 292{
266 lj_str_resetbuf(&ls->sb); 293 lj_buf_reset(&ls->sb);
267 for (;;) { 294 for (;;) {
268 if (lj_char_isident(ls->current)) { 295 if (lj_char_isident(ls->c)) {
269 GCstr *s; 296 GCstr *s;
270 if (lj_char_isdigit(ls->current)) { /* Numeric literal. */ 297 if (lj_char_isdigit(ls->c)) { /* Numeric literal. */
271 lex_number(ls, tv); 298 lex_number(ls, tv);
272 return TK_number; 299 return TK_number;
273 } 300 }
274 /* Identifier or reserved word. */ 301 /* Identifier or reserved word. */
275 do { 302 do {
276 save_and_next(ls); 303 lex_savenext(ls);
277 } while (lj_char_isident(ls->current)); 304 } while (lj_char_isident(ls->c));
278 s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n); 305 s = lj_parse_keepstr(ls, ls->sb.b, sbuflen(&ls->sb));
279 setstrV(ls->L, tv, s); 306 setstrV(ls->L, tv, s);
280 if (s->reserved > 0) /* Reserved word? */ 307 if (s->reserved > 0) /* Reserved word? */
281 return TK_OFS + s->reserved; 308 return TK_OFS + s->reserved;
282 return TK_name; 309 return TK_name;
283 } 310 }
284 switch (ls->current) { 311 switch (ls->c) {
285 case '\n': 312 case '\n':
286 case '\r': 313 case '\r':
287 inclinenumber(ls); 314 lex_newline(ls);
288 continue; 315 continue;
289 case ' ': 316 case ' ':
290 case '\t': 317 case '\t':
291 case '\v': 318 case '\v':
292 case '\f': 319 case '\f':
293 next(ls); 320 lex_next(ls);
294 continue; 321 continue;
295 case '-': 322 case '-':
296 next(ls); 323 lex_next(ls);
297 if (ls->current != '-') return '-'; 324 if (ls->c != '-') return '-';
298 /* else is a comment */ 325 lex_next(ls);
299 next(ls); 326 if (ls->c == '[') { /* Long comment "--[=*[...]=*]". */
300 if (ls->current == '[') { 327 int sep = lex_skipeq(ls);
301 int sep = skip_sep(ls); 328 lj_buf_reset(&ls->sb); /* `lex_skipeq' may dirty the buffer */
302 lj_str_resetbuf(&ls->sb); /* `skip_sep' may dirty the buffer */
303 if (sep >= 0) { 329 if (sep >= 0) {
304 read_long_string(ls, NULL, sep); /* long comment */ 330 lex_longstring(ls, NULL, sep);
305 lj_str_resetbuf(&ls->sb); 331 lj_buf_reset(&ls->sb);
306 continue; 332 continue;
307 } 333 }
308 } 334 }
309 /* else short comment */ 335 /* Short comment "--.*\n". */
310 while (!currIsNewline(ls) && ls->current != END_OF_STREAM) 336 while (!lex_iseol(ls) && ls->c != LEX_EOF)
311 next(ls); 337 lex_next(ls);
312 continue; 338 continue;
313 case '[': { 339 case '[': {
314 int sep = skip_sep(ls); 340 int sep = lex_skipeq(ls);
315 if (sep >= 0) { 341 if (sep >= 0) {
316 read_long_string(ls, tv, sep); 342 lex_longstring(ls, tv, sep);
317 return TK_string; 343 return TK_string;
318 } else if (sep == -1) { 344 } else if (sep == -1) {
319 return '['; 345 return '[';
@@ -323,44 +349,43 @@ static int llex(LexState *ls, TValue *tv)
323 } 349 }
324 } 350 }
325 case '=': 351 case '=':
326 next(ls); 352 lex_next(ls);
327 if (ls->current != '=') return '='; else { next(ls); return TK_eq; } 353 if (ls->c != '=') return '='; else { lex_next(ls); return TK_eq; }
328 case '<': 354 case '<':
329 next(ls); 355 lex_next(ls);
330 if (ls->current != '=') return '<'; else { next(ls); return TK_le; } 356 if (ls->c != '=') return '<'; else { lex_next(ls); return TK_le; }
331 case '>': 357 case '>':
332 next(ls); 358 lex_next(ls);
333 if (ls->current != '=') return '>'; else { next(ls); return TK_ge; } 359 if (ls->c != '=') return '>'; else { lex_next(ls); return TK_ge; }
334 case '~': 360 case '~':
335 next(ls); 361 lex_next(ls);
336 if (ls->current != '=') return '~'; else { next(ls); return TK_ne; } 362 if (ls->c != '=') return '~'; else { lex_next(ls); return TK_ne; }
337 case ':': 363 case ':':
338 next(ls); 364 lex_next(ls);
339 if (ls->current != ':') return ':'; else { next(ls); return TK_label; } 365 if (ls->c != ':') return ':'; else { lex_next(ls); return TK_label; }
340 case '"': 366 case '"':
341 case '\'': 367 case '\'':
342 read_string(ls, ls->current, tv); 368 lex_string(ls, tv);
343 return TK_string; 369 return TK_string;
344 case '.': 370 case '.':
345 save_and_next(ls); 371 if (lex_savenext(ls) == '.') {
346 if (ls->current == '.') { 372 lex_next(ls);
347 next(ls); 373 if (ls->c == '.') {
348 if (ls->current == '.') { 374 lex_next(ls);
349 next(ls);
350 return TK_dots; /* ... */ 375 return TK_dots; /* ... */
351 } 376 }
352 return TK_concat; /* .. */ 377 return TK_concat; /* .. */
353 } else if (!lj_char_isdigit(ls->current)) { 378 } else if (!lj_char_isdigit(ls->c)) {
354 return '.'; 379 return '.';
355 } else { 380 } else {
356 lex_number(ls, tv); 381 lex_number(ls, tv);
357 return TK_number; 382 return TK_number;
358 } 383 }
359 case END_OF_STREAM: 384 case LEX_EOF:
360 return TK_eof; 385 return TK_eof;
361 default: { 386 default: {
362 int c = ls->current; 387 LexChar c = ls->c;
363 next(ls); 388 lex_next(ls);
364 return c; /* Single-char tokens (+ - / ...). */ 389 return c; /* Single-char tokens (+ - / ...). */
365 } 390 }
366 } 391 }
@@ -375,36 +400,33 @@ int lj_lex_setup(lua_State *L, LexState *ls)
375 int header = 0; 400 int header = 0;
376 ls->L = L; 401 ls->L = L;
377 ls->fs = NULL; 402 ls->fs = NULL;
378 ls->n = 0; 403 ls->pe = ls->p = NULL;
379 ls->p = NULL;
380 ls->vstack = NULL; 404 ls->vstack = NULL;
381 ls->sizevstack = 0; 405 ls->sizevstack = 0;
382 ls->vtop = 0; 406 ls->vtop = 0;
383 ls->bcstack = NULL; 407 ls->bcstack = NULL;
384 ls->sizebcstack = 0; 408 ls->sizebcstack = 0;
385 ls->token = 0; 409 ls->tok = 0;
386 ls->lookahead = TK_eof; /* No look-ahead token. */ 410 ls->lookahead = TK_eof; /* No look-ahead token. */
387 ls->linenumber = 1; 411 ls->linenumber = 1;
388 ls->lastline = 1; 412 ls->lastline = 1;
389 ls->endmark = 0; 413 ls->endmark = 0;
390 lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF); 414 lex_next(ls); /* Read-ahead first char. */
391 next(ls); /* Read-ahead first char. */ 415 if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb &&
392 if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb && 416 (uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
393 char2int(ls->p[1]) == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
394 ls->n -= 2;
395 ls->p += 2; 417 ls->p += 2;
396 next(ls); 418 lex_next(ls);
397 header = 1; 419 header = 1;
398 } 420 }
399 if (ls->current == '#') { /* Skip POSIX #! header line. */ 421 if (ls->c == '#') { /* Skip POSIX #! header line. */
400 do { 422 do {
401 next(ls); 423 lex_next(ls);
402 if (ls->current == END_OF_STREAM) return 0; 424 if (ls->c == LEX_EOF) return 0;
403 } while (!currIsNewline(ls)); 425 } while (!lex_iseol(ls));
404 inclinenumber(ls); 426 lex_newline(ls);
405 header = 1; 427 header = 1;
406 } 428 }
407 if (ls->current == LUA_SIGNATURE[0]) { /* Bytecode dump. */ 429 if (ls->c == LUA_SIGNATURE[0]) { /* Bytecode dump. */
408 if (header) { 430 if (header) {
409 /* 431 /*
410 ** Loading bytecode with an extra header is disabled for security 432 ** Loading bytecode with an extra header is disabled for security
@@ -426,55 +448,60 @@ void lj_lex_cleanup(lua_State *L, LexState *ls)
426 global_State *g = G(L); 448 global_State *g = G(L);
427 lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine); 449 lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine);
428 lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo); 450 lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo);
429 lj_str_freebuf(g, &ls->sb); 451 lj_buf_free(g, &ls->sb);
430} 452}
431 453
454/* Return next lexical token. */
432void lj_lex_next(LexState *ls) 455void lj_lex_next(LexState *ls)
433{ 456{
434 ls->lastline = ls->linenumber; 457 ls->lastline = ls->linenumber;
435 if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */ 458 if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */
436 ls->token = llex(ls, &ls->tokenval); /* Get next token. */ 459 ls->tok = lex_scan(ls, &ls->tokval); /* Get next token. */
437 } else { /* Otherwise return lookahead token. */ 460 } else { /* Otherwise return lookahead token. */
438 ls->token = ls->lookahead; 461 ls->tok = ls->lookahead;
439 ls->lookahead = TK_eof; 462 ls->lookahead = TK_eof;
440 ls->tokenval = ls->lookaheadval; 463 ls->tokval = ls->lookaheadval;
441 } 464 }
442} 465}
443 466
467/* Look ahead for the next token. */
444LexToken lj_lex_lookahead(LexState *ls) 468LexToken lj_lex_lookahead(LexState *ls)
445{ 469{
446 lua_assert(ls->lookahead == TK_eof); 470 lj_assertLS(ls->lookahead == TK_eof, "double lookahead");
447 ls->lookahead = llex(ls, &ls->lookaheadval); 471 ls->lookahead = lex_scan(ls, &ls->lookaheadval);
448 return ls->lookahead; 472 return ls->lookahead;
449} 473}
450 474
451const char *lj_lex_token2str(LexState *ls, LexToken token) 475/* Convert token to string. */
476const char *lj_lex_token2str(LexState *ls, LexToken tok)
452{ 477{
453 if (token > TK_OFS) 478 if (tok > TK_OFS)
454 return tokennames[token-TK_OFS-1]; 479 return tokennames[tok-TK_OFS-1];
455 else if (!lj_char_iscntrl(token)) 480 else if (!lj_char_iscntrl(tok))
456 return lj_str_pushf(ls->L, "%c", token); 481 return lj_strfmt_pushf(ls->L, "%c", tok);
457 else 482 else
458 return lj_str_pushf(ls->L, "char(%d)", token); 483 return lj_strfmt_pushf(ls->L, "char(%d)", tok);
459} 484}
460 485
461void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...) 486/* Lexer error. */
487void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...)
462{ 488{
463 const char *tok; 489 const char *tokstr;
464 va_list argp; 490 va_list argp;
465 if (token == 0) { 491 if (tok == 0) {
466 tok = NULL; 492 tokstr = NULL;
467 } else if (token == TK_name || token == TK_string || token == TK_number) { 493 } else if (tok == TK_name || tok == TK_string || tok == TK_number) {
468 save(ls, '\0'); 494 lex_save(ls, '\0');
469 tok = ls->sb.buf; 495 tokstr = ls->sb.b;
470 } else { 496 } else {
471 tok = lj_lex_token2str(ls, token); 497 tokstr = lj_lex_token2str(ls, tok);
472 } 498 }
473 va_start(argp, em); 499 va_start(argp, em);
474 lj_err_lex(ls->L, ls->chunkname, tok, ls->linenumber, em, argp); 500 lj_err_lex(ls->L, ls->chunkname, tokstr, ls->linenumber, em, argp);
475 va_end(argp); 501 va_end(argp);
476} 502}
477 503
504/* Initialize strings for reserved words. */
478void lj_lex_init(lua_State *L) 505void lj_lex_init(lua_State *L)
479{ 506{
480 uint32_t i; 507 uint32_t i;
diff --git a/src/lj_lex.h b/src/lj_lex.h
index 40e797da..d2230b6a 100644
--- a/src/lj_lex.h
+++ b/src/lj_lex.h
@@ -30,7 +30,8 @@ TKDEF(TKENUM1, TKENUM2)
30 TK_RESERVED = TK_while - TK_OFS 30 TK_RESERVED = TK_while - TK_OFS
31}; 31};
32 32
33typedef int LexToken; 33typedef int LexChar; /* Lexical character. Unsigned ext. from char. */
34typedef int LexToken; /* Lexical token. */
34 35
35/* Combined bytecode ins/line. Only used during bytecode generation. */ 36/* Combined bytecode ins/line. Only used during bytecode generation. */
36typedef struct BCInsLine { 37typedef struct BCInsLine {
@@ -51,13 +52,13 @@ typedef struct VarInfo {
51typedef struct LexState { 52typedef struct LexState {
52 struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */ 53 struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */
53 struct lua_State *L; /* Lua state. */ 54 struct lua_State *L; /* Lua state. */
54 TValue tokenval; /* Current token value. */ 55 TValue tokval; /* Current token value. */
55 TValue lookaheadval; /* Lookahead token value. */ 56 TValue lookaheadval; /* Lookahead token value. */
56 int current; /* Current character (charint). */
57 LexToken token; /* Current token. */
58 LexToken lookahead; /* Lookahead token. */
59 MSize n; /* Bytes left in input buffer. */
60 const char *p; /* Current position in input buffer. */ 57 const char *p; /* Current position in input buffer. */
58 const char *pe; /* End of input buffer. */
59 LexChar c; /* Current character. */
60 LexToken tok; /* Current token. */
61 LexToken lookahead; /* Lookahead token. */
61 SBuf sb; /* String buffer for tokens. */ 62 SBuf sb; /* String buffer for tokens. */
62 lua_Reader rfunc; /* Reader callback. */ 63 lua_Reader rfunc; /* Reader callback. */
63 void *rdata; /* Reader callback data. */ 64 void *rdata; /* Reader callback data. */
@@ -79,8 +80,14 @@ LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls);
79LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls); 80LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls);
80LJ_FUNC void lj_lex_next(LexState *ls); 81LJ_FUNC void lj_lex_next(LexState *ls);
81LJ_FUNC LexToken lj_lex_lookahead(LexState *ls); 82LJ_FUNC LexToken lj_lex_lookahead(LexState *ls);
82LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken token); 83LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken tok);
83LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...); 84LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...);
84LJ_FUNC void lj_lex_init(lua_State *L); 85LJ_FUNC void lj_lex_init(lua_State *L);
85 86
87#ifdef LUA_USE_ASSERT
88#define lj_assertLS(c, ...) (lj_assertG_(G(ls->L), (c), __VA_ARGS__))
89#else
90#define lj_assertLS(c, ...) ((void)ls)
91#endif
92
86#endif 93#endif
diff --git a/src/lj_lib.c b/src/lj_lib.c
index 811d11fc..438056d8 100644
--- a/src/lj_lib.c
+++ b/src/lj_lib.c
@@ -16,8 +16,14 @@
16#include "lj_func.h" 16#include "lj_func.h"
17#include "lj_bc.h" 17#include "lj_bc.h"
18#include "lj_dispatch.h" 18#include "lj_dispatch.h"
19#if LJ_HASFFI
20#include "lj_ctype.h"
21#endif
19#include "lj_vm.h" 22#include "lj_vm.h"
20#include "lj_strscan.h" 23#include "lj_strscan.h"
24#include "lj_strfmt.h"
25#include "lj_lex.h"
26#include "lj_bcdump.h"
21#include "lj_lib.h" 27#include "lj_lib.h"
22 28
23/* -- Library initialization ---------------------------------------------- */ 29/* -- Library initialization ---------------------------------------------- */
@@ -43,6 +49,28 @@ static GCtab *lib_create_table(lua_State *L, const char *libname, int hsize)
43 return tabV(L->top-1); 49 return tabV(L->top-1);
44} 50}
45 51
52static const uint8_t *lib_read_lfunc(lua_State *L, const uint8_t *p, GCtab *tab)
53{
54 int len = *p++;
55 GCstr *name = lj_str_new(L, (const char *)p, len);
56 LexState ls;
57 GCproto *pt;
58 GCfunc *fn;
59 memset(&ls, 0, sizeof(ls));
60 ls.L = L;
61 ls.p = (const char *)(p+len);
62 ls.pe = (const char *)~(uintptr_t)0;
63 ls.c = -1;
64 ls.level = (BCDUMP_F_STRIP|(LJ_BE*BCDUMP_F_BE));
65 ls.chunkname = name;
66 pt = lj_bcread_proto(&ls);
67 pt->firstline = ~(BCLine)0;
68 fn = lj_func_newL_empty(L, pt, tabref(L->env));
69 /* NOBARRIER: See below for common barrier. */
70 setfuncV(L, lj_tab_setstr(L, tab, name), fn);
71 return (const uint8_t *)ls.p;
72}
73
46void lj_lib_register(lua_State *L, const char *libname, 74void lj_lib_register(lua_State *L, const char *libname,
47 const uint8_t *p, const lua_CFunction *cf) 75 const uint8_t *p, const lua_CFunction *cf)
48{ 76{
@@ -87,6 +115,9 @@ void lj_lib_register(lua_State *L, const char *libname,
87 ofn = fn; 115 ofn = fn;
88 } else { 116 } else {
89 switch (tag | len) { 117 switch (tag | len) {
118 case LIBINIT_LUA:
119 p = lib_read_lfunc(L, p, tab);
120 break;
90 case LIBINIT_SET: 121 case LIBINIT_SET:
91 L->top -= 2; 122 L->top -= 2;
92 if (tvisstr(L->top+1) && strV(L->top+1)->len == 0) 123 if (tvisstr(L->top+1) && strV(L->top+1)->len == 0)
@@ -120,6 +151,37 @@ void lj_lib_register(lua_State *L, const char *libname,
120 } 151 }
121} 152}
122 153
154/* Push internal function on the stack. */
155GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n)
156{
157 GCfunc *fn;
158 lua_pushcclosure(L, f, n);
159 fn = funcV(L->top-1);
160 fn->c.ffid = (uint8_t)id;
161 setmref(fn->c.pc, &G(L)->bc_cfunc_int);
162 return fn;
163}
164
165void lj_lib_prereg(lua_State *L, const char *name, lua_CFunction f, GCtab *env)
166{
167 luaL_findtable(L, LUA_REGISTRYINDEX, "_PRELOAD", 4);
168 lua_pushcfunction(L, f);
169 /* NOBARRIER: The function is new (marked white). */
170 setgcref(funcV(L->top-1)->c.env, obj2gco(env));
171 lua_setfield(L, -2, name);
172 L->top--;
173}
174
175int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id, const char *name)
176{
177 GCfunc *fn = lj_lib_pushcf(L, cf, id);
178 GCtab *t = tabref(curr_func(L)->c.env); /* Reference to parent table. */
179 setfuncV(L, lj_tab_setstr(L, t, lj_str_newz(L, name)), fn);
180 lj_gc_anybarriert(L, t);
181 setfuncV(L, L->top++, fn);
182 return 1;
183}
184
123/* -- Type checks --------------------------------------------------------- */ 185/* -- Type checks --------------------------------------------------------- */
124 186
125TValue *lj_lib_checkany(lua_State *L, int narg) 187TValue *lj_lib_checkany(lua_State *L, int narg)
@@ -137,7 +199,7 @@ GCstr *lj_lib_checkstr(lua_State *L, int narg)
137 if (LJ_LIKELY(tvisstr(o))) { 199 if (LJ_LIKELY(tvisstr(o))) {
138 return strV(o); 200 return strV(o);
139 } else if (tvisnumber(o)) { 201 } else if (tvisnumber(o)) {
140 GCstr *s = lj_str_fromnumber(L, o); 202 GCstr *s = lj_strfmt_number(L, o);
141 setstrV(L, o, s); 203 setstrV(L, o, s);
142 return s; 204 return s;
143 } 205 }
@@ -196,20 +258,6 @@ int32_t lj_lib_optint(lua_State *L, int narg, int32_t def)
196 return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def; 258 return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def;
197} 259}
198 260
199int32_t lj_lib_checkbit(lua_State *L, int narg)
200{
201 TValue *o = L->base + narg-1;
202 if (!(o < L->top && lj_strscan_numberobj(o)))
203 lj_err_argt(L, narg, LUA_TNUMBER);
204 if (LJ_LIKELY(tvisint(o))) {
205 return intV(o);
206 } else {
207 int32_t i = lj_num2bit(numV(o));
208 if (LJ_DUALNUM) setintV(o, i);
209 return i;
210 }
211}
212
213GCfunc *lj_lib_checkfunc(lua_State *L, int narg) 261GCfunc *lj_lib_checkfunc(lua_State *L, int narg)
214{ 262{
215 TValue *o = L->base + narg-1; 263 TValue *o = L->base + narg-1;
@@ -256,3 +304,56 @@ int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst)
256 return def; 304 return def;
257} 305}
258 306
307/* -- Strict type checks -------------------------------------------------- */
308
309/* The following type checks do not coerce between strings and numbers.
310** And they handle plain int64_t/uint64_t FFI numbers, too.
311*/
312
313#if LJ_HASBUFFER
314GCstr *lj_lib_checkstrx(lua_State *L, int narg)
315{
316 TValue *o = L->base + narg-1;
317 if (!(o < L->top && tvisstr(o))) lj_err_argt(L, narg, LUA_TSTRING);
318 return strV(o);
319}
320
321int32_t lj_lib_checkintrange(lua_State *L, int narg, int32_t a, int32_t b)
322{
323 TValue *o = L->base + narg-1;
324 lj_assertL(b >= 0, "expected range must be non-negative");
325 if (o < L->top) {
326 if (LJ_LIKELY(tvisint(o))) {
327 int32_t i = intV(o);
328 if (i >= a && i <= b) return i;
329 } else if (LJ_LIKELY(tvisnum(o))) {
330 /* For performance reasons, this doesn't check for integerness or
331 ** integer overflow. Overflow detection still works, since all FPUs
332 ** return either MININT or MAXINT, which is then out of range.
333 */
334 int32_t i = (int32_t)numV(o);
335 if (i >= a && i <= b) return i;
336#if LJ_HASFFI
337 } else if (tviscdata(o)) {
338 GCcdata *cd = cdataV(o);
339 if (cd->ctypeid == CTID_INT64) {
340 int64_t i = *(int64_t *)cdataptr(cd);
341 if (i >= (int64_t)a && i <= (int64_t)b) return (int32_t)i;
342 } else if (cd->ctypeid == CTID_UINT64) {
343 uint64_t i = *(uint64_t *)cdataptr(cd);
344 if ((a < 0 || i >= (uint64_t)a) && i <= (uint64_t)b) return (int32_t)i;
345 } else {
346 goto badtype;
347 }
348#endif
349 } else {
350 goto badtype;
351 }
352 lj_err_arg(L, narg, LJ_ERR_NUMRNG);
353 }
354badtype:
355 lj_err_argt(L, narg, LUA_TNUMBER);
356 return 0; /* unreachable */
357}
358#endif
359
diff --git a/src/lj_lib.h b/src/lj_lib.h
index f149af9d..f59e9ea2 100644
--- a/src/lj_lib.h
+++ b/src/lj_lib.h
@@ -41,15 +41,28 @@ LJ_FUNC void lj_lib_checknumber(lua_State *L, int narg);
41LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg); 41LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg);
42LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg); 42LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg);
43LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def); 43LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def);
44LJ_FUNC int32_t lj_lib_checkbit(lua_State *L, int narg);
45LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg); 44LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg);
46LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg); 45LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg);
47LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); 46LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg);
48LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); 47LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
49 48
49#if LJ_HASBUFFER
50LJ_FUNC GCstr *lj_lib_checkstrx(lua_State *L, int narg);
51LJ_FUNC int32_t lj_lib_checkintrange(lua_State *L, int narg,
52 int32_t a, int32_t b);
53#endif
54
50/* Avoid including lj_frame.h. */ 55/* Avoid including lj_frame.h. */
56#if LJ_GC64
57#define lj_lib_upvalue(L, n) \
58 (&gcval(L->base-2)->fn.c.upvalue[(n)-1])
59#elif LJ_FR2
60#define lj_lib_upvalue(L, n) \
61 (&gcref((L->base-2)->gcr)->fn.c.upvalue[(n)-1])
62#else
51#define lj_lib_upvalue(L, n) \ 63#define lj_lib_upvalue(L, n) \
52 (&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1]) 64 (&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1])
65#endif
53 66
54#if LJ_TARGET_WINDOWS 67#if LJ_TARGET_WINDOWS
55#define lj_lib_checkfpu(L) \ 68#define lj_lib_checkfpu(L) \
@@ -60,23 +73,14 @@ LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
60#define lj_lib_checkfpu(L) UNUSED(L) 73#define lj_lib_checkfpu(L) UNUSED(L)
61#endif 74#endif
62 75
63/* Push internal function on the stack. */ 76LJ_FUNC GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n);
64static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f,
65 int id, int n)
66{
67 GCfunc *fn;
68 lua_pushcclosure(L, f, n);
69 fn = funcV(L->top-1);
70 fn->c.ffid = (uint8_t)id;
71 setmref(fn->c.pc, &G(L)->bc_cfunc_int);
72}
73
74#define lj_lib_pushcf(L, fn, id) (lj_lib_pushcc(L, (fn), (id), 0)) 77#define lj_lib_pushcf(L, fn, id) (lj_lib_pushcc(L, (fn), (id), 0))
75 78
76/* Library function declarations. Scanned by buildvm. */ 79/* Library function declarations. Scanned by buildvm. */
77#define LJLIB_CF(name) static int lj_cf_##name(lua_State *L) 80#define LJLIB_CF(name) static int lj_cf_##name(lua_State *L)
78#define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L) 81#define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L)
79#define LJLIB_ASM_(name) 82#define LJLIB_ASM_(name)
83#define LJLIB_LUA(name)
80#define LJLIB_SET(name) 84#define LJLIB_SET(name)
81#define LJLIB_PUSH(arg) 85#define LJLIB_PUSH(arg)
82#define LJLIB_REC(handler) 86#define LJLIB_REC(handler)
@@ -88,6 +92,10 @@ static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f,
88 92
89LJ_FUNC void lj_lib_register(lua_State *L, const char *libname, 93LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
90 const uint8_t *init, const lua_CFunction *cf); 94 const uint8_t *init, const lua_CFunction *cf);
95LJ_FUNC void lj_lib_prereg(lua_State *L, const char *name, lua_CFunction f,
96 GCtab *env);
97LJ_FUNC int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id,
98 const char *name);
91 99
92/* Library init data tags. */ 100/* Library init data tags. */
93#define LIBINIT_LENMASK 0x3f 101#define LIBINIT_LENMASK 0x3f
@@ -96,7 +104,8 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
96#define LIBINIT_ASM 0x40 104#define LIBINIT_ASM 0x40
97#define LIBINIT_ASM_ 0x80 105#define LIBINIT_ASM_ 0x80
98#define LIBINIT_STRING 0xc0 106#define LIBINIT_STRING 0xc0
99#define LIBINIT_MAXSTR 0x39 107#define LIBINIT_MAXSTR 0x38
108#define LIBINIT_LUA 0xf9
100#define LIBINIT_SET 0xfa 109#define LIBINIT_SET 0xfa
101#define LIBINIT_NUMBER 0xfb 110#define LIBINIT_NUMBER 0xfb
102#define LIBINIT_COPY 0xfc 111#define LIBINIT_COPY 0xfc
@@ -104,9 +113,4 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
104#define LIBINIT_FFID 0xfe 113#define LIBINIT_FFID 0xfe
105#define LIBINIT_END 0xff 114#define LIBINIT_END 0xff
106 115
107/* Exported library functions. */
108
109typedef struct RandomState RandomState;
110LJ_FUNC uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs);
111
112#endif 116#endif
diff --git a/src/lj_load.c b/src/lj_load.c
index 6df19150..af0c2b1f 100644
--- a/src/lj_load.c
+++ b/src/lj_load.c
@@ -15,7 +15,7 @@
15#include "lj_obj.h" 15#include "lj_obj.h"
16#include "lj_gc.h" 16#include "lj_gc.h"
17#include "lj_err.h" 17#include "lj_err.h"
18#include "lj_str.h" 18#include "lj_buf.h"
19#include "lj_func.h" 19#include "lj_func.h"
20#include "lj_frame.h" 20#include "lj_frame.h"
21#include "lj_vm.h" 21#include "lj_vm.h"
@@ -54,7 +54,7 @@ LUA_API int lua_loadx(lua_State *L, lua_Reader reader, void *data,
54 ls.rdata = data; 54 ls.rdata = data;
55 ls.chunkarg = chunkname ? chunkname : "?"; 55 ls.chunkarg = chunkname ? chunkname : "?";
56 ls.mode = mode; 56 ls.mode = mode;
57 lj_str_initbuf(&ls.sb); 57 lj_buf_init(L, &ls.sb);
58 status = lj_vm_cpcall(L, NULL, &ls, cpparser); 58 status = lj_vm_cpcall(L, NULL, &ls, cpparser);
59 lj_lex_cleanup(L, &ls); 59 lj_lex_cleanup(L, &ls);
60 lj_gc_check(L); 60 lj_gc_check(L);
@@ -159,7 +159,7 @@ LUALIB_API int luaL_loadstring(lua_State *L, const char *s)
159LUA_API int lua_dump(lua_State *L, lua_Writer writer, void *data) 159LUA_API int lua_dump(lua_State *L, lua_Writer writer, void *data)
160{ 160{
161 cTValue *o = L->top-1; 161 cTValue *o = L->top-1;
162 api_check(L, L->top > L->base); 162 lj_checkapi(L->top > L->base, "top slot empty");
163 if (tvisfunc(o) && isluafunc(funcV(o))) 163 if (tvisfunc(o) && isluafunc(funcV(o)))
164 return lj_bcwrite(L, funcproto(funcV(o)), writer, data, 0); 164 return lj_bcwrite(L, funcproto(funcV(o)), writer, data, 0);
165 else 165 else
diff --git a/src/lj_mcode.c b/src/lj_mcode.c
index 417fcb6f..b3efbc55 100644
--- a/src/lj_mcode.c
+++ b/src/lj_mcode.c
@@ -14,6 +14,7 @@
14#include "lj_mcode.h" 14#include "lj_mcode.h"
15#include "lj_trace.h" 15#include "lj_trace.h"
16#include "lj_dispatch.h" 16#include "lj_dispatch.h"
17#include "lj_prng.h"
17#endif 18#endif
18#if LJ_HASJIT || LJ_HASFFI 19#if LJ_HASJIT || LJ_HASFFI
19#include "lj_vm.h" 20#include "lj_vm.h"
@@ -44,7 +45,7 @@ void lj_mcode_sync(void *start, void *end)
44 sys_icache_invalidate(start, (char *)end-(char *)start); 45 sys_icache_invalidate(start, (char *)end-(char *)start);
45#elif LJ_TARGET_PPC 46#elif LJ_TARGET_PPC
46 lj_vm_cachesync(start, end); 47 lj_vm_cachesync(start, end);
47#elif defined(__GNUC__) 48#elif defined(__GNUC__) || defined(__clang__)
48 __clear_cache(start, end); 49 __clear_cache(start, end);
49#else 50#else
50#error "Missing builtin to flush instruction cache" 51#error "Missing builtin to flush instruction cache"
@@ -66,8 +67,8 @@ void lj_mcode_sync(void *start, void *end)
66 67
67static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot) 68static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot)
68{ 69{
69 void *p = VirtualAlloc((void *)hint, sz, 70 void *p = LJ_WIN_VALLOC((void *)hint, sz,
70 MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); 71 MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot);
71 if (!p && !hint) 72 if (!p && !hint)
72 lj_trace_err(J, LJ_TRERR_MCODEAL); 73 lj_trace_err(J, LJ_TRERR_MCODEAL);
73 return p; 74 return p;
@@ -82,7 +83,7 @@ static void mcode_free(jit_State *J, void *p, size_t sz)
82static int mcode_setprot(void *p, size_t sz, DWORD prot) 83static int mcode_setprot(void *p, size_t sz, DWORD prot)
83{ 84{
84 DWORD oprot; 85 DWORD oprot;
85 return !VirtualProtect(p, sz, prot, &oprot); 86 return !LJ_WIN_VPROTECT(p, sz, prot, &oprot);
86} 87}
87 88
88#elif LJ_TARGET_POSIX 89#elif LJ_TARGET_POSIX
@@ -96,10 +97,15 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot)
96#define MCPROT_RW (PROT_READ|PROT_WRITE) 97#define MCPROT_RW (PROT_READ|PROT_WRITE)
97#define MCPROT_RX (PROT_READ|PROT_EXEC) 98#define MCPROT_RX (PROT_READ|PROT_EXEC)
98#define MCPROT_RWX (PROT_READ|PROT_WRITE|PROT_EXEC) 99#define MCPROT_RWX (PROT_READ|PROT_WRITE|PROT_EXEC)
100#ifdef PROT_MPROTECT
101#define MCPROT_CREATE (PROT_MPROTECT(MCPROT_RWX))
102#else
103#define MCPROT_CREATE 0
104#endif
99 105
100static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) 106static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot)
101{ 107{
102 void *p = mmap((void *)hint, sz, prot, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); 108 void *p = mmap((void *)hint, sz, prot|MCPROT_CREATE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
103 if (p == MAP_FAILED) { 109 if (p == MAP_FAILED) {
104 if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL); 110 if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL);
105 p = NULL; 111 p = NULL;
@@ -118,52 +124,34 @@ static int mcode_setprot(void *p, size_t sz, int prot)
118 return mprotect(p, sz, prot); 124 return mprotect(p, sz, prot);
119} 125}
120 126
121#elif LJ_64
122
123#error "Missing OS support for explicit placement of executable memory"
124
125#else 127#else
126 128
127/* Fallback allocator. This will fail if memory is not executable by default. */ 129#error "Missing OS support for explicit placement of executable memory"
128#define LUAJIT_UNPROTECT_MCODE
129#define MCPROT_RW 0
130#define MCPROT_RX 0
131#define MCPROT_RWX 0
132
133static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot)
134{
135 UNUSED(hint); UNUSED(prot);
136 return lj_mem_new(J->L, sz);
137}
138
139static void mcode_free(jit_State *J, void *p, size_t sz)
140{
141 lj_mem_free(J2G(J), p, sz);
142}
143 130
144#endif 131#endif
145 132
146/* -- MCode area protection ----------------------------------------------- */ 133/* -- MCode area protection ----------------------------------------------- */
147 134
148/* Define this ONLY if page protection twiddling becomes a bottleneck. */ 135#if LUAJIT_SECURITY_MCODE == 0
149#ifdef LUAJIT_UNPROTECT_MCODE
150 136
151/* It's generally considered to be a potential security risk to have 137/* Define this ONLY if page protection twiddling becomes a bottleneck.
138**
139** It's generally considered to be a potential security risk to have
152** pages with simultaneous write *and* execute access in a process. 140** pages with simultaneous write *and* execute access in a process.
153** 141**
154** Do not even think about using this mode for server processes or 142** Do not even think about using this mode for server processes or
155** apps handling untrusted external data (such as a browser). 143** apps handling untrusted external data.
156** 144**
157** The security risk is not in LuaJIT itself -- but if an adversary finds 145** The security risk is not in LuaJIT itself -- but if an adversary finds
158** any *other* flaw in your C application logic, then any RWX memory page 146** any *other* flaw in your C application logic, then any RWX memory pages
159** simplifies writing an exploit considerably. 147** simplify writing an exploit considerably.
160*/ 148*/
161#define MCPROT_GEN MCPROT_RWX 149#define MCPROT_GEN MCPROT_RWX
162#define MCPROT_RUN MCPROT_RWX 150#define MCPROT_RUN MCPROT_RWX
163 151
164static void mcode_protect(jit_State *J, int prot) 152static void mcode_protect(jit_State *J, int prot)
165{ 153{
166 UNUSED(J); UNUSED(prot); 154 UNUSED(J); UNUSED(prot); UNUSED(mcode_setprot);
167} 155}
168 156
169#else 157#else
@@ -221,8 +209,8 @@ static void *mcode_alloc(jit_State *J, size_t sz)
221 */ 209 */
222#if LJ_TARGET_MIPS 210#if LJ_TARGET_MIPS
223 /* Use the middle of the 256MB-aligned region. */ 211 /* Use the middle of the 256MB-aligned region. */
224 uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & 0xf0000000u) + 212 uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler &
225 0x08000000u; 213 ~(uintptr_t)0x0fffffffu) + 0x08000000u;
226#else 214#else
227 uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff; 215 uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff;
228#endif 216#endif
@@ -242,7 +230,7 @@ static void *mcode_alloc(jit_State *J, size_t sz)
242 } 230 }
243 /* Next try probing 64K-aligned pseudo-random addresses. */ 231 /* Next try probing 64K-aligned pseudo-random addresses. */
244 do { 232 do {
245 hint = LJ_PRNG_BITS(J, LJ_TARGET_JUMPRANGE-16) << 16; 233 hint = lj_prng_u64(&J2G(J)->prng) & ((1u<<LJ_TARGET_JUMPRANGE)-0x10000);
246 } while (!(hint + sz < range+range)); 234 } while (!(hint + sz < range+range));
247 hint = target + hint - range; 235 hint = target + hint - range;
248 } 236 }
@@ -255,7 +243,7 @@ static void *mcode_alloc(jit_State *J, size_t sz)
255/* All memory addresses are reachable by relative jumps. */ 243/* All memory addresses are reachable by relative jumps. */
256static void *mcode_alloc(jit_State *J, size_t sz) 244static void *mcode_alloc(jit_State *J, size_t sz)
257{ 245{
258#ifdef __OpenBSD__ 246#if defined(__OpenBSD__) || defined(__NetBSD__) || LJ_TARGET_UWP
259 /* Allow better executable memory allocation for OpenBSD W^X mode. */ 247 /* Allow better executable memory allocation for OpenBSD W^X mode. */
260 void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN); 248 void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN);
261 if (p && mcode_setprot(p, sz, MCPROT_GEN)) { 249 if (p && mcode_setprot(p, sz, MCPROT_GEN)) {
@@ -286,6 +274,7 @@ static void mcode_allocarea(jit_State *J)
286 ((MCLink *)J->mcarea)->next = oldarea; 274 ((MCLink *)J->mcarea)->next = oldarea;
287 ((MCLink *)J->mcarea)->size = sz; 275 ((MCLink *)J->mcarea)->size = sz;
288 J->szallmcarea += sz; 276 J->szallmcarea += sz;
277 J->mcbot = (MCode *)lj_err_register_mcode(J->mcarea, sz, (uint8_t *)J->mcbot);
289} 278}
290 279
291/* Free all MCode areas. */ 280/* Free all MCode areas. */
@@ -296,7 +285,9 @@ void lj_mcode_free(jit_State *J)
296 J->szallmcarea = 0; 285 J->szallmcarea = 0;
297 while (mc) { 286 while (mc) {
298 MCode *next = ((MCLink *)mc)->next; 287 MCode *next = ((MCLink *)mc)->next;
299 mcode_free(J, mc, ((MCLink *)mc)->size); 288 size_t sz = ((MCLink *)mc)->size;
289 lj_err_deregister_mcode(mc, sz, (uint8_t *)mc + sizeof(MCLink));
290 mcode_free(J, mc, sz);
300 mc = next; 291 mc = next;
301 } 292 }
302} 293}
@@ -331,35 +322,36 @@ void lj_mcode_abort(jit_State *J)
331/* Set/reset protection to allow patching of MCode areas. */ 322/* Set/reset protection to allow patching of MCode areas. */
332MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish) 323MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish)
333{ 324{
334#ifdef LUAJIT_UNPROTECT_MCODE
335 UNUSED(J); UNUSED(ptr); UNUSED(finish);
336 return NULL;
337#else
338 if (finish) { 325 if (finish) {
326#if LUAJIT_SECURITY_MCODE
339 if (J->mcarea == ptr) 327 if (J->mcarea == ptr)
340 mcode_protect(J, MCPROT_RUN); 328 mcode_protect(J, MCPROT_RUN);
341 else if (LJ_UNLIKELY(mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN))) 329 else if (LJ_UNLIKELY(mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN)))
342 mcode_protfail(J); 330 mcode_protfail(J);
331#endif
343 return NULL; 332 return NULL;
344 } else { 333 } else {
345 MCode *mc = J->mcarea; 334 MCode *mc = J->mcarea;
346 /* Try current area first to use the protection cache. */ 335 /* Try current area first to use the protection cache. */
347 if (ptr >= mc && ptr < (MCode *)((char *)mc + J->szmcarea)) { 336 if (ptr >= mc && ptr < (MCode *)((char *)mc + J->szmcarea)) {
337#if LUAJIT_SECURITY_MCODE
348 mcode_protect(J, MCPROT_GEN); 338 mcode_protect(J, MCPROT_GEN);
339#endif
349 return mc; 340 return mc;
350 } 341 }
351 /* Otherwise search through the list of MCode areas. */ 342 /* Otherwise search through the list of MCode areas. */
352 for (;;) { 343 for (;;) {
353 mc = ((MCLink *)mc)->next; 344 mc = ((MCLink *)mc)->next;
354 lua_assert(mc != NULL); 345 lj_assertJ(mc != NULL, "broken MCode area chain");
355 if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink *)mc)->size)) { 346 if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink *)mc)->size)) {
347#if LUAJIT_SECURITY_MCODE
356 if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN))) 348 if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN)))
357 mcode_protfail(J); 349 mcode_protfail(J);
350#endif
358 return mc; 351 return mc;
359 } 352 }
360 } 353 }
361 } 354 }
362#endif
363} 355}
364 356
365/* Limit of MCode reservation reached. */ 357/* Limit of MCode reservation reached. */
diff --git a/src/lj_meta.c b/src/lj_meta.c
index e30a4148..660dfec0 100644
--- a/src/lj_meta.c
+++ b/src/lj_meta.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_meta.h" 18#include "lj_meta.h"
@@ -19,6 +20,8 @@
19#include "lj_bc.h" 20#include "lj_bc.h"
20#include "lj_vm.h" 21#include "lj_vm.h"
21#include "lj_strscan.h" 22#include "lj_strscan.h"
23#include "lj_strfmt.h"
24#include "lj_lib.h"
22 25
23/* -- Metamethod handling ------------------------------------------------- */ 26/* -- Metamethod handling ------------------------------------------------- */
24 27
@@ -44,7 +47,7 @@ void lj_meta_init(lua_State *L)
44cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name) 47cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name)
45{ 48{
46 cTValue *mo = lj_tab_getstr(mt, name); 49 cTValue *mo = lj_tab_getstr(mt, name);
47 lua_assert(mm <= MM_FAST); 50 lj_assertX(mm <= MM_FAST, "bad metamethod %d", mm);
48 if (!mo || tvisnil(mo)) { /* No metamethod? */ 51 if (!mo || tvisnil(mo)) { /* No metamethod? */
49 mt->nomm |= (uint8_t)(1u<<mm); /* Set negative cache flag. */ 52 mt->nomm |= (uint8_t)(1u<<mm); /* Set negative cache flag. */
50 return NULL; 53 return NULL;
@@ -77,12 +80,16 @@ int lj_meta_tailcall(lua_State *L, cTValue *tv)
77 TValue *base = L->base; 80 TValue *base = L->base;
78 TValue *top = L->top; 81 TValue *top = L->top;
79 const BCIns *pc = frame_pc(base-1); /* Preserve old PC from frame. */ 82 const BCIns *pc = frame_pc(base-1); /* Preserve old PC from frame. */
80 copyTV(L, base-1, tv); /* Replace frame with new object. */ 83 copyTV(L, base-1-LJ_FR2, tv); /* Replace frame with new object. */
81 top->u32.lo = LJ_CONT_TAILCALL; 84 if (LJ_FR2)
82 setframe_pc(top, pc); 85 (top++)->u64 = LJ_CONT_TAILCALL;
83 setframe_gc(top+1, obj2gco(L)); /* Dummy frame object. */ 86 else
84 setframe_ftsz(top+1, (int)((char *)(top+2) - (char *)base) + FRAME_CONT); 87 top->u32.lo = LJ_CONT_TAILCALL;
85 L->base = L->top = top+2; 88 setframe_pc(top++, pc);
89 setframe_gc(top, obj2gco(L), LJ_TTHREAD); /* Dummy frame object. */
90 if (LJ_FR2) top++;
91 setframe_ftsz(top, ((char *)(top+1) - (char *)base) + FRAME_CONT);
92 L->base = L->top = top+1;
86 /* 93 /*
87 ** before: [old_mo|PC] [... ...] 94 ** before: [old_mo|PC] [... ...]
88 ** ^base ^top 95 ** ^base ^top
@@ -113,11 +120,13 @@ static TValue *mmcall(lua_State *L, ASMFunction cont, cTValue *mo,
113 */ 120 */
114 TValue *top = L->top; 121 TValue *top = L->top;
115 if (curr_funcisL(L)) top = curr_topL(L); 122 if (curr_funcisL(L)) top = curr_topL(L);
116 setcont(top, cont); /* Assembler VM stores PC in upper word. */ 123 setcont(top++, cont); /* Assembler VM stores PC in upper word or FR2. */
117 copyTV(L, top+1, mo); /* Store metamethod and two arguments. */ 124 if (LJ_FR2) setnilV(top++);
118 copyTV(L, top+2, a); 125 copyTV(L, top++, mo); /* Store metamethod and two arguments. */
119 copyTV(L, top+3, b); 126 if (LJ_FR2) setnilV(top++);
120 return top+2; /* Return new base. */ 127 copyTV(L, top, a);
128 copyTV(L, top+1, b);
129 return top; /* Return new base. */
121} 130}
122 131
123/* -- C helpers for some instructions, called from assembler VM ----------- */ 132/* -- C helpers for some instructions, called from assembler VM ----------- */
@@ -225,27 +234,14 @@ TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb, cTValue *rc,
225 } 234 }
226} 235}
227 236
228/* In-place coercion of a number to a string. */
229static LJ_AINLINE int tostring(lua_State *L, TValue *o)
230{
231 if (tvisstr(o)) {
232 return 1;
233 } else if (tvisnumber(o)) {
234 setstrV(L, o, lj_str_fromnumber(L, o));
235 return 1;
236 } else {
237 return 0;
238 }
239}
240
241/* Helper for CAT. Coercion, iterative concat, __concat metamethod. */ 237/* Helper for CAT. Coercion, iterative concat, __concat metamethod. */
242TValue *lj_meta_cat(lua_State *L, TValue *top, int left) 238TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
243{ 239{
244 int fromc = 0; 240 int fromc = 0;
245 if (left < 0) { left = -left; fromc = 1; } 241 if (left < 0) { left = -left; fromc = 1; }
246 do { 242 do {
247 int n = 1; 243 if (!(tvisstr(top) || tvisnumber(top) || tvisbuf(top)) ||
248 if (!(tvisstr(top-1) || tvisnumber(top-1)) || !tostring(L, top)) { 244 !(tvisstr(top-1) || tvisnumber(top-1) || tvisbuf(top-1))) {
249 cTValue *mo = lj_meta_lookup(L, top-1, MM_concat); 245 cTValue *mo = lj_meta_lookup(L, top-1, MM_concat);
250 if (tvisnil(mo)) { 246 if (tvisnil(mo)) {
251 mo = lj_meta_lookup(L, top, MM_concat); 247 mo = lj_meta_lookup(L, top, MM_concat);
@@ -266,13 +262,12 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
266 ** after mm: [...][CAT stack ...] <--push-- [result] 262 ** after mm: [...][CAT stack ...] <--push-- [result]
267 ** next step: [...][CAT stack .............] 263 ** next step: [...][CAT stack .............]
268 */ 264 */
269 copyTV(L, top+2, top); /* Careful with the order of stack copies! */ 265 copyTV(L, top+2*LJ_FR2+2, top); /* Carefully ordered stack copies! */
270 copyTV(L, top+1, top-1); 266 copyTV(L, top+2*LJ_FR2+1, top-1);
271 copyTV(L, top, mo); 267 copyTV(L, top+LJ_FR2, mo);
272 setcont(top-1, lj_cont_cat); 268 setcont(top-1, lj_cont_cat);
269 if (LJ_FR2) { setnilV(top); setnilV(top+2); top += 2; }
273 return top+1; /* Trigger metamethod call. */ 270 return top+1; /* Trigger metamethod call. */
274 } else if (strV(top)->len == 0) { /* Shortcut. */
275 (void)tostring(L, top-1);
276 } else { 271 } else {
277 /* Pick as many strings as possible from the top and concatenate them: 272 /* Pick as many strings as possible from the top and concatenate them:
278 ** 273 **
@@ -281,27 +276,33 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
281 ** concat: [...][CAT stack ...] [result] 276 ** concat: [...][CAT stack ...] [result]
282 ** next step: [...][CAT stack ............] 277 ** next step: [...][CAT stack ............]
283 */ 278 */
284 MSize tlen = strV(top)->len; 279 TValue *e, *o = top;
285 char *buffer; 280 uint64_t tlen = tvisstr(o) ? strV(o)->len :
286 int i; 281 tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM;
287 for (n = 1; n <= left && tostring(L, top-n); n++) { 282 SBuf *sb;
288 MSize len = strV(top-n)->len; 283 do {
289 if (len >= LJ_MAX_STR - tlen) 284 o--; tlen += tvisstr(o) ? strV(o)->len :
290 lj_err_msg(L, LJ_ERR_STROV); 285 tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM;
291 tlen += len; 286 } while (--left > 0 && (tvisstr(o-1) || tvisnumber(o-1)));
292 } 287 if (tlen >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV);
293 buffer = lj_str_needbuf(L, &G(L)->tmpbuf, tlen); 288 sb = lj_buf_tmp_(L);
294 n--; 289 lj_buf_more(sb, (MSize)tlen);
295 tlen = 0; 290 for (e = top, top = o; o <= e; o++) {
296 for (i = n; i >= 0; i--) { 291 if (tvisstr(o)) {
297 MSize len = strV(top-i)->len; 292 GCstr *s = strV(o);
298 memcpy(buffer + tlen, strVdata(top-i), len); 293 MSize len = s->len;
299 tlen += len; 294 lj_buf_putmem(sb, strdata(s), len);
295 } else if (tvisbuf(o)) {
296 SBufExt *sbx = bufV(o);
297 lj_buf_putmem(sb, sbx->r, sbufxlen(sbx));
298 } else if (tvisint(o)) {
299 lj_strfmt_putint(sb, intV(o));
300 } else {
301 lj_strfmt_putfnum(sb, STRFMT_G14, numV(o));
302 }
300 } 303 }
301 setstrV(L, top-n, lj_str_new(L, buffer, tlen)); 304 setstrV(L, top, lj_buf_str(L, sb));
302 } 305 }
303 left -= n;
304 top -= n;
305 } while (left >= 1); 306 } while (left >= 1);
306 if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) { 307 if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) {
307 if (!fromc) L->top = curr_topL(L); 308 if (!fromc) L->top = curr_topL(L);
@@ -338,12 +339,14 @@ TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne)
338 return (TValue *)(intptr_t)ne; 339 return (TValue *)(intptr_t)ne;
339 } 340 }
340 top = curr_top(L); 341 top = curr_top(L);
341 setcont(top, ne ? lj_cont_condf : lj_cont_condt); 342 setcont(top++, ne ? lj_cont_condf : lj_cont_condt);
342 copyTV(L, top+1, mo); 343 if (LJ_FR2) setnilV(top++);
344 copyTV(L, top++, mo);
345 if (LJ_FR2) setnilV(top++);
343 it = ~(uint32_t)o1->gch.gct; 346 it = ~(uint32_t)o1->gch.gct;
344 setgcV(L, top+2, o1, it); 347 setgcV(L, top, o1, it);
345 setgcV(L, top+3, o2, it); 348 setgcV(L, top+1, o2, it);
346 return top+2; /* Trigger metamethod call. */ 349 return top; /* Trigger metamethod call. */
347 } 350 }
348 return (TValue *)(intptr_t)ne; 351 return (TValue *)(intptr_t)ne;
349} 352}
@@ -365,8 +368,8 @@ TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins)
365 } else if (op == BC_ISEQN) { 368 } else if (op == BC_ISEQN) {
366 o2 = &mref(curr_proto(L)->k, cTValue)[bc_d(ins)]; 369 o2 = &mref(curr_proto(L)->k, cTValue)[bc_d(ins)];
367 } else { 370 } else {
368 lua_assert(op == BC_ISEQP); 371 lj_assertL(op == BC_ISEQP, "bad bytecode op %d", op);
369 setitype(&tv, ~bc_d(ins)); 372 setpriV(&tv, ~bc_d(ins));
370 o2 = &tv; 373 o2 = &tv;
371 } 374 }
372 mo = lj_meta_lookup(L, o1mm, MM_eq); 375 mo = lj_meta_lookup(L, o1mm, MM_eq);
@@ -423,6 +426,18 @@ TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op)
423 } 426 }
424} 427}
425 428
429/* Helper for ISTYPE and ISNUM. Implicit coercion or error. */
430void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp)
431{
432 L->top = curr_topL(L);
433 ra++; tp--;
434 lj_assertL(LJ_DUALNUM || tp != ~LJ_TNUMX, "bad type for ISTYPE");
435 if (LJ_DUALNUM && tp == ~LJ_TNUMX) lj_lib_checkint(L, ra);
436 else if (tp == ~LJ_TNUMX+1) lj_lib_checknum(L, ra);
437 else if (tp == ~LJ_TSTR) lj_lib_checkstr(L, ra);
438 else lj_err_argtype(L, ra, lj_obj_itypename[tp]);
439}
440
426/* Helper for calls. __call metamethod. */ 441/* Helper for calls. __call metamethod. */
427void lj_meta_call(lua_State *L, TValue *func, TValue *top) 442void lj_meta_call(lua_State *L, TValue *func, TValue *top)
428{ 443{
@@ -430,7 +445,8 @@ void lj_meta_call(lua_State *L, TValue *func, TValue *top)
430 TValue *p; 445 TValue *p;
431 if (!tvisfunc(mo)) 446 if (!tvisfunc(mo))
432 lj_err_optype_call(L, func); 447 lj_err_optype_call(L, func);
433 for (p = top; p > func; p--) copyTV(L, p, p-1); 448 for (p = top; p > func+2*LJ_FR2; p--) copyTV(L, p, p-1);
449 if (LJ_FR2) copyTV(L, func+2, func);
434 copyTV(L, func, mo); 450 copyTV(L, func, mo);
435} 451}
436 452
diff --git a/src/lj_meta.h b/src/lj_meta.h
index 0e3f217a..400a1d74 100644
--- a/src/lj_meta.h
+++ b/src/lj_meta.h
@@ -31,6 +31,7 @@ LJ_FUNCA TValue * LJ_FASTCALL lj_meta_len(lua_State *L, cTValue *o);
31LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne); 31LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne);
32LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); 32LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins);
33LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); 33LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op);
34LJ_FUNCA void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp);
34LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); 35LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top);
35LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o); 36LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o);
36 37
diff --git a/src/lj_obj.c b/src/lj_obj.c
index ba8e299d..a2c3dc5b 100644
--- a/src/lj_obj.c
+++ b/src/lj_obj.c
@@ -20,7 +20,7 @@ LJ_DATADEF const char *const lj_obj_itypename[] = { /* ORDER LJ_T */
20}; 20};
21 21
22/* Compare two objects without calling metamethods. */ 22/* Compare two objects without calling metamethods. */
23int lj_obj_equal(cTValue *o1, cTValue *o2) 23int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2)
24{ 24{
25 if (itype(o1) == itype(o2)) { 25 if (itype(o1) == itype(o2)) {
26 if (tvispri(o1)) 26 if (tvispri(o1))
@@ -33,3 +33,19 @@ int lj_obj_equal(cTValue *o1, cTValue *o2)
33 return numberVnum(o1) == numberVnum(o2); 33 return numberVnum(o1) == numberVnum(o2);
34} 34}
35 35
36/* Return pointer to object or its object data. */
37const void * LJ_FASTCALL lj_obj_ptr(global_State *g, cTValue *o)
38{
39 UNUSED(g);
40 if (tvisudata(o))
41 return uddata(udataV(o));
42 else if (tvislightud(o))
43 return lightudV(g, o);
44 else if (LJ_HASFFI && tviscdata(o))
45 return cdataptr(cdataV(o));
46 else if (tvisgcv(o))
47 return gcV(o);
48 else
49 return NULL;
50}
51
diff --git a/src/lj_obj.h b/src/lj_obj.h
index e6e55308..5547a79b 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -13,44 +13,81 @@
13#include "lj_def.h" 13#include "lj_def.h"
14#include "lj_arch.h" 14#include "lj_arch.h"
15 15
16/* -- Memory references (32 bit address space) ---------------------------- */ 16/* -- Memory references --------------------------------------------------- */
17 17
18/* Memory size. */ 18/* Memory and GC object sizes. */
19typedef uint32_t MSize; 19typedef uint32_t MSize;
20#if LJ_GC64
21typedef uint64_t GCSize;
22#else
23typedef uint32_t GCSize;
24#endif
20 25
21/* Memory reference */ 26/* Memory reference */
22typedef struct MRef { 27typedef struct MRef {
28#if LJ_GC64
29 uint64_t ptr64; /* True 64 bit pointer. */
30#else
23 uint32_t ptr32; /* Pseudo 32 bit pointer. */ 31 uint32_t ptr32; /* Pseudo 32 bit pointer. */
32#endif
24} MRef; 33} MRef;
25 34
35#if LJ_GC64
36#define mref(r, t) ((t *)(void *)(r).ptr64)
37#define mrefu(r) ((r).ptr64)
38
39#define setmref(r, p) ((r).ptr64 = (uint64_t)(void *)(p))
40#define setmrefu(r, u) ((r).ptr64 = (uint64_t)(u))
41#define setmrefr(r, v) ((r).ptr64 = (v).ptr64)
42#else
26#define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32) 43#define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32)
44#define mrefu(r) ((r).ptr32)
27 45
28#define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p)) 46#define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p))
47#define setmrefu(r, u) ((r).ptr32 = (uint32_t)(u))
29#define setmrefr(r, v) ((r).ptr32 = (v).ptr32) 48#define setmrefr(r, v) ((r).ptr32 = (v).ptr32)
49#endif
30 50
31/* -- GC object references (32 bit address space) ------------------------- */ 51/* -- GC object references ------------------------------------------------ */
32 52
33/* GCobj reference */ 53/* GCobj reference */
34typedef struct GCRef { 54typedef struct GCRef {
55#if LJ_GC64
56 uint64_t gcptr64; /* True 64 bit pointer. */
57#else
35 uint32_t gcptr32; /* Pseudo 32 bit pointer. */ 58 uint32_t gcptr32; /* Pseudo 32 bit pointer. */
59#endif
36} GCRef; 60} GCRef;
37 61
38/* Common GC header for all collectable objects. */ 62/* Common GC header for all collectable objects. */
39#define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct 63#define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct
40/* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */ 64/* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */
41 65
66#if LJ_GC64
67#define gcref(r) ((GCobj *)(r).gcptr64)
68#define gcrefp(r, t) ((t *)(void *)(r).gcptr64)
69#define gcrefu(r) ((r).gcptr64)
70#define gcrefeq(r1, r2) ((r1).gcptr64 == (r2).gcptr64)
71
72#define setgcref(r, gc) ((r).gcptr64 = (uint64_t)&(gc)->gch)
73#define setgcreft(r, gc, it) \
74 (r).gcptr64 = (uint64_t)&(gc)->gch | (((uint64_t)(it)) << 47)
75#define setgcrefp(r, p) ((r).gcptr64 = (uint64_t)(p))
76#define setgcrefnull(r) ((r).gcptr64 = 0)
77#define setgcrefr(r, v) ((r).gcptr64 = (v).gcptr64)
78#else
42#define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32) 79#define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32)
43#define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32) 80#define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32)
44#define gcrefu(r) ((r).gcptr32) 81#define gcrefu(r) ((r).gcptr32)
45#define gcrefi(r) ((int32_t)(r).gcptr32)
46#define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32) 82#define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32)
47#define gcnext(gc) (gcref((gc)->gch.nextgc))
48 83
49#define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch) 84#define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch)
50#define setgcrefi(r, i) ((r).gcptr32 = (uint32_t)(i))
51#define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p)) 85#define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p))
52#define setgcrefnull(r) ((r).gcptr32 = 0) 86#define setgcrefnull(r) ((r).gcptr32 = 0)
53#define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32) 87#define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32)
88#endif
89
90#define gcnext(gc) (gcref((gc)->gch.nextgc))
54 91
55/* IMPORTANT NOTE: 92/* IMPORTANT NOTE:
56** 93**
@@ -119,11 +156,10 @@ typedef int32_t BCLine; /* Bytecode line number. */
119/* Internal assembler functions. Never call these directly from C. */ 156/* Internal assembler functions. Never call these directly from C. */
120typedef void (*ASMFunction)(void); 157typedef void (*ASMFunction)(void);
121 158
122/* Resizable string buffer. Need this here, details in lj_str.h. */ 159/* Resizable string buffer. Need this here, details in lj_buf.h. */
160#define SBufHeader char *w, *e, *b; MRef L
123typedef struct SBuf { 161typedef struct SBuf {
124 char *buf; /* String buffer base. */ 162 SBufHeader;
125 MSize n; /* String buffer length. */
126 MSize sz; /* String buffer size. */
127} SBuf; 163} SBuf;
128 164
129/* -- Tags and values ----------------------------------------------------- */ 165/* -- Tags and values ----------------------------------------------------- */
@@ -131,13 +167,23 @@ typedef struct SBuf {
131/* Frame link. */ 167/* Frame link. */
132typedef union { 168typedef union {
133 int32_t ftsz; /* Frame type and size of previous frame. */ 169 int32_t ftsz; /* Frame type and size of previous frame. */
134 MRef pcr; /* Overlaps PC for Lua frames. */ 170 MRef pcr; /* Or PC for Lua frames. */
135} FrameLink; 171} FrameLink;
136 172
137/* Tagged value. */ 173/* Tagged value. */
138typedef LJ_ALIGN(8) union TValue { 174typedef LJ_ALIGN(8) union TValue {
139 uint64_t u64; /* 64 bit pattern overlaps number. */ 175 uint64_t u64; /* 64 bit pattern overlaps number. */
140 lua_Number n; /* Number object overlaps split tag/value object. */ 176 lua_Number n; /* Number object overlaps split tag/value object. */
177#if LJ_GC64
178 GCRef gcr; /* GCobj reference with tag. */
179 int64_t it64;
180 struct {
181 LJ_ENDIAN_LOHI(
182 int32_t i; /* Integer value. */
183 , uint32_t it; /* Internal object tag. Must overlap MSW of number. */
184 )
185 };
186#else
141 struct { 187 struct {
142 LJ_ENDIAN_LOHI( 188 LJ_ENDIAN_LOHI(
143 union { 189 union {
@@ -147,12 +193,17 @@ typedef LJ_ALIGN(8) union TValue {
147 , uint32_t it; /* Internal object tag. Must overlap MSW of number. */ 193 , uint32_t it; /* Internal object tag. Must overlap MSW of number. */
148 ) 194 )
149 }; 195 };
196#endif
197#if LJ_FR2
198 int64_t ftsz; /* Frame type and size of previous frame, or PC. */
199#else
150 struct { 200 struct {
151 LJ_ENDIAN_LOHI( 201 LJ_ENDIAN_LOHI(
152 GCRef func; /* Function for next frame (or dummy L). */ 202 GCRef func; /* Function for next frame (or dummy L). */
153 , FrameLink tp; /* Link to previous frame. */ 203 , FrameLink tp; /* Link to previous frame. */
154 ) 204 )
155 } fr; 205 } fr;
206#endif
156 struct { 207 struct {
157 LJ_ENDIAN_LOHI( 208 LJ_ENDIAN_LOHI(
158 uint32_t lo; /* Lower 32 bits of number. */ 209 uint32_t lo; /* Lower 32 bits of number. */
@@ -172,6 +223,8 @@ typedef const TValue cTValue;
172 223
173/* Internal object tags. 224/* Internal object tags.
174** 225**
226** Format for 32 bit GC references (!LJ_GC64):
227**
175** Internal tags overlap the MSW of a number object (must be a double). 228** Internal tags overlap the MSW of a number object (must be a double).
176** Interpreted as a double these are special NaNs. The FPU only generates 229** Interpreted as a double these are special NaNs. The FPU only generates
177** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available 230** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available
@@ -181,11 +234,24 @@ typedef const TValue cTValue;
181** ---MSW---.---LSW--- 234** ---MSW---.---LSW---
182** primitive types | itype | | 235** primitive types | itype | |
183** lightuserdata | itype | void * | (32 bit platforms) 236** lightuserdata | itype | void * | (32 bit platforms)
184** lightuserdata |ffff| void * | (64 bit platforms, 47 bit pointers) 237** lightuserdata |ffff|seg| ofs | (64 bit platforms)
185** GC objects | itype | GCRef | 238** GC objects | itype | GCRef |
186** int (LJ_DUALNUM)| itype | int | 239** int (LJ_DUALNUM)| itype | int |
187** number -------double------ 240** number -------double------
188** 241**
242** Format for 64 bit GC references (LJ_GC64):
243**
244** The upper 13 bits must be 1 (0xfff8...) for a special NaN. The next
245** 4 bits hold the internal tag. The lowest 47 bits either hold a pointer,
246** a zero-extended 32 bit integer or all bits set to 1 for primitive types.
247**
248** ------MSW------.------LSW------
249** primitive types |1..1|itype|1..................1|
250** GC objects |1..1|itype|-------GCRef--------|
251** lightuserdata |1..1|itype|seg|------ofs-------|
252** int (LJ_DUALNUM) |1..1|itype|0..0|-----int-------|
253** number ------------double-------------
254**
189** ORDER LJ_T 255** ORDER LJ_T
190** Primitive types nil/false/true must be first, lightuserdata next. 256** Primitive types nil/false/true must be first, lightuserdata next.
191** GC objects are at the end, table/userdata must be lowest. 257** GC objects are at the end, table/userdata must be lowest.
@@ -208,7 +274,7 @@ typedef const TValue cTValue;
208#define LJ_TNUMX (~13u) 274#define LJ_TNUMX (~13u)
209 275
210/* Integers have itype == LJ_TISNUM doubles have itype < LJ_TISNUM */ 276/* Integers have itype == LJ_TISNUM doubles have itype < LJ_TISNUM */
211#if LJ_64 277#if LJ_64 && !LJ_GC64
212#define LJ_TISNUM 0xfffeffffu 278#define LJ_TISNUM 0xfffeffffu
213#else 279#else
214#define LJ_TISNUM LJ_TNUMX 280#define LJ_TISNUM LJ_TNUMX
@@ -218,14 +284,28 @@ typedef const TValue cTValue;
218#define LJ_TISGCV (LJ_TSTR+1) 284#define LJ_TISGCV (LJ_TSTR+1)
219#define LJ_TISTABUD LJ_TTAB 285#define LJ_TISTABUD LJ_TTAB
220 286
287#if LJ_GC64
288#define LJ_GCVMASK (((uint64_t)1 << 47) - 1)
289#endif
290
291#if LJ_64
292/* To stay within 47 bits, lightuserdata is segmented. */
293#define LJ_LIGHTUD_BITS_SEG 8
294#define LJ_LIGHTUD_BITS_LO (47 - LJ_LIGHTUD_BITS_SEG)
295#endif
296
221/* -- String object ------------------------------------------------------- */ 297/* -- String object ------------------------------------------------------- */
222 298
299typedef uint32_t StrHash; /* String hash value. */
300typedef uint32_t StrID; /* String ID. */
301
223/* String object header. String payload follows. */ 302/* String object header. String payload follows. */
224typedef struct GCstr { 303typedef struct GCstr {
225 GCHeader; 304 GCHeader;
226 uint8_t reserved; /* Used by lexer for fast lookup of reserved words. */ 305 uint8_t reserved; /* Used by lexer for fast lookup of reserved words. */
227 uint8_t unused; 306 uint8_t hashalg; /* Hash algorithm. */
228 MSize hash; /* Hash of string. */ 307 StrID sid; /* Interned string ID. */
308 StrHash hash; /* Hash of string. */
229 MSize len; /* Size of string. */ 309 MSize len; /* Size of string. */
230} GCstr; 310} GCstr;
231 311
@@ -233,7 +313,6 @@ typedef struct GCstr {
233#define strdata(s) ((const char *)((s)+1)) 313#define strdata(s) ((const char *)((s)+1))
234#define strdatawr(s) ((char *)((s)+1)) 314#define strdatawr(s) ((char *)((s)+1))
235#define strVdata(o) strdata(strV(o)) 315#define strVdata(o) strdata(strV(o))
236#define sizestring(s) (sizeof(struct GCstr)+(s)->len+1)
237 316
238/* -- Userdata object ----------------------------------------------------- */ 317/* -- Userdata object ----------------------------------------------------- */
239 318
@@ -253,6 +332,7 @@ enum {
253 UDTYPE_USERDATA, /* Regular userdata. */ 332 UDTYPE_USERDATA, /* Regular userdata. */
254 UDTYPE_IO_FILE, /* I/O library FILE. */ 333 UDTYPE_IO_FILE, /* I/O library FILE. */
255 UDTYPE_FFI_CLIB, /* FFI C library namespace. */ 334 UDTYPE_FFI_CLIB, /* FFI C library namespace. */
335 UDTYPE_BUFFER, /* String buffer. */
256 UDTYPE__MAX 336 UDTYPE__MAX
257}; 337};
258 338
@@ -291,6 +371,9 @@ typedef struct GCproto {
291 uint8_t numparams; /* Number of parameters. */ 371 uint8_t numparams; /* Number of parameters. */
292 uint8_t framesize; /* Fixed frame size. */ 372 uint8_t framesize; /* Fixed frame size. */
293 MSize sizebc; /* Number of bytecode instructions. */ 373 MSize sizebc; /* Number of bytecode instructions. */
374#if LJ_GC64
375 uint32_t unused_gc64;
376#endif
294 GCRef gclist; 377 GCRef gclist;
295 MRef k; /* Split constant array (points to the middle). */ 378 MRef k; /* Split constant array (points to the middle). */
296 MRef uv; /* Upvalue list. local slot|0x8000 or parent uv idx. */ 379 MRef uv; /* Upvalue list. local slot|0x8000 or parent uv idx. */
@@ -402,7 +485,9 @@ typedef struct Node {
402 TValue val; /* Value object. Must be first field. */ 485 TValue val; /* Value object. Must be first field. */
403 TValue key; /* Key object. */ 486 TValue key; /* Key object. */
404 MRef next; /* Hash chain. */ 487 MRef next; /* Hash chain. */
488#if !LJ_GC64
405 MRef freetop; /* Top of free elements (stored in t->node[0]). */ 489 MRef freetop; /* Top of free elements (stored in t->node[0]). */
490#endif
406} Node; 491} Node;
407 492
408LJ_STATIC_ASSERT(offsetof(Node, val) == 0); 493LJ_STATIC_ASSERT(offsetof(Node, val) == 0);
@@ -417,12 +502,22 @@ typedef struct GCtab {
417 MRef node; /* Hash part. */ 502 MRef node; /* Hash part. */
418 uint32_t asize; /* Size of array part (keys [0, asize-1]). */ 503 uint32_t asize; /* Size of array part (keys [0, asize-1]). */
419 uint32_t hmask; /* Hash part mask (size of hash part - 1). */ 504 uint32_t hmask; /* Hash part mask (size of hash part - 1). */
505#if LJ_GC64
506 MRef freetop; /* Top of free elements. */
507#endif
420} GCtab; 508} GCtab;
421 509
422#define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab)) 510#define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab))
423#define tabref(r) (&gcref((r))->tab) 511#define tabref(r) (&gcref((r))->tab)
424#define noderef(r) (mref((r), Node)) 512#define noderef(r) (mref((r), Node))
425#define nextnode(n) (mref((n)->next, Node)) 513#define nextnode(n) (mref((n)->next, Node))
514#if LJ_GC64
515#define getfreetop(t, n) (noderef((t)->freetop))
516#define setfreetop(t, n, v) (setmref((t)->freetop, (v)))
517#else
518#define getfreetop(t, n) (noderef((n)->freetop))
519#define setfreetop(t, n, v) (setmref((n)->freetop, (v)))
520#endif
426 521
427/* -- State objects ------------------------------------------------------- */ 522/* -- State objects ------------------------------------------------------- */
428 523
@@ -488,13 +583,18 @@ typedef enum {
488#define basemt_obj(g, o) ((g)->gcroot[GCROOT_BASEMT+itypemap(o)]) 583#define basemt_obj(g, o) ((g)->gcroot[GCROOT_BASEMT+itypemap(o)])
489#define mmname_str(g, mm) (strref((g)->gcroot[GCROOT_MMNAME+(mm)])) 584#define mmname_str(g, mm) (strref((g)->gcroot[GCROOT_MMNAME+(mm)]))
490 585
586/* Garbage collector state. */
491typedef struct GCState { 587typedef struct GCState {
492 MSize total; /* Memory currently allocated. */ 588 GCSize total; /* Memory currently allocated. */
493 MSize threshold; /* Memory threshold. */ 589 GCSize threshold; /* Memory threshold. */
494 uint8_t currentwhite; /* Current white color. */ 590 uint8_t currentwhite; /* Current white color. */
495 uint8_t state; /* GC state. */ 591 uint8_t state; /* GC state. */
496 uint8_t nocdatafin; /* No cdata finalizer called. */ 592 uint8_t nocdatafin; /* No cdata finalizer called. */
497 uint8_t unused2; 593#if LJ_64
594 uint8_t lightudnum; /* Number of lightuserdata segments - 1. */
595#else
596 uint8_t unused1;
597#endif
498 MSize sweepstr; /* Sweep position in string table. */ 598 MSize sweepstr; /* Sweep position in string table. */
499 GCRef root; /* List of all collectable objects. */ 599 GCRef root; /* List of all collectable objects. */
500 MRef sweep; /* Sweep position in root list. */ 600 MRef sweep; /* Sweep position in root list. */
@@ -502,42 +602,57 @@ typedef struct GCState {
502 GCRef grayagain; /* List of objects for atomic traversal. */ 602 GCRef grayagain; /* List of objects for atomic traversal. */
503 GCRef weak; /* List of weak tables (to be cleared). */ 603 GCRef weak; /* List of weak tables (to be cleared). */
504 GCRef mmudata; /* List of userdata (to be finalized). */ 604 GCRef mmudata; /* List of userdata (to be finalized). */
605 GCSize debt; /* Debt (how much GC is behind schedule). */
606 GCSize estimate; /* Estimate of memory actually in use. */
505 MSize stepmul; /* Incremental GC step granularity. */ 607 MSize stepmul; /* Incremental GC step granularity. */
506 MSize debt; /* Debt (how much GC is behind schedule). */
507 MSize estimate; /* Estimate of memory actually in use. */
508 MSize pause; /* Pause between successive GC cycles. */ 608 MSize pause; /* Pause between successive GC cycles. */
609#if LJ_64
610 MRef lightudseg; /* Upper bits of lightuserdata segments. */
611#endif
509} GCState; 612} GCState;
510 613
614/* String interning state. */
615typedef struct StrInternState {
616 GCRef *tab; /* String hash table anchors. */
617 MSize mask; /* String hash mask (size of hash table - 1). */
618 MSize num; /* Number of strings in hash table. */
619 StrID id; /* Next string ID. */
620 uint8_t idreseed; /* String ID reseed counter. */
621 uint8_t second; /* String interning table uses secondary hashing. */
622 uint8_t unused1;
623 uint8_t unused2;
624 LJ_ALIGN(8) uint64_t seed; /* Random string seed. */
625} StrInternState;
626
511/* Global state, shared by all threads of a Lua universe. */ 627/* Global state, shared by all threads of a Lua universe. */
512typedef struct global_State { 628typedef struct global_State {
513 GCRef *strhash; /* String hash table (hash chain anchors). */
514 MSize strmask; /* String hash mask (size of hash table - 1). */
515 MSize strnum; /* Number of strings in hash table. */
516 lua_Alloc allocf; /* Memory allocator. */ 629 lua_Alloc allocf; /* Memory allocator. */
517 void *allocd; /* Memory allocator data. */ 630 void *allocd; /* Memory allocator data. */
518 GCState gc; /* Garbage collector. */ 631 GCState gc; /* Garbage collector. */
519 SBuf tmpbuf; /* Temporary buffer for string concatenation. */
520 Node nilnode; /* Fallback 1-element hash part (nil key and value). */
521 GCstr strempty; /* Empty string. */ 632 GCstr strempty; /* Empty string. */
522 uint8_t stremptyz; /* Zero terminator of empty string. */ 633 uint8_t stremptyz; /* Zero terminator of empty string. */
523 uint8_t hookmask; /* Hook mask. */ 634 uint8_t hookmask; /* Hook mask. */
524 uint8_t dispatchmode; /* Dispatch mode. */ 635 uint8_t dispatchmode; /* Dispatch mode. */
525 uint8_t vmevmask; /* VM event mask. */ 636 uint8_t vmevmask; /* VM event mask. */
637 StrInternState str; /* String interning. */
638 volatile int32_t vmstate; /* VM state or current JIT code trace number. */
526 GCRef mainthref; /* Link to main thread. */ 639 GCRef mainthref; /* Link to main thread. */
527 TValue registrytv; /* Anchor for registry. */ 640 SBuf tmpbuf; /* Temporary string buffer. */
528 TValue tmptv, tmptv2; /* Temporary TValues. */ 641 TValue tmptv, tmptv2; /* Temporary TValues. */
642 Node nilnode; /* Fallback 1-element hash part (nil key and value). */
643 TValue registrytv; /* Anchor for registry. */
529 GCupval uvhead; /* Head of double-linked list of all open upvalues. */ 644 GCupval uvhead; /* Head of double-linked list of all open upvalues. */
530 int32_t hookcount; /* Instruction hook countdown. */ 645 int32_t hookcount; /* Instruction hook countdown. */
531 int32_t hookcstart; /* Start count for instruction hook counter. */ 646 int32_t hookcstart; /* Start count for instruction hook counter. */
532 lua_Hook hookf; /* Hook function. */ 647 lua_Hook hookf; /* Hook function. */
533 lua_CFunction wrapf; /* Wrapper for C function calls. */ 648 lua_CFunction wrapf; /* Wrapper for C function calls. */
534 lua_CFunction panic; /* Called as a last resort for errors. */ 649 lua_CFunction panic; /* Called as a last resort for errors. */
535 volatile int32_t vmstate; /* VM state or current JIT code trace number. */
536 BCIns bc_cfunc_int; /* Bytecode for internal C function calls. */ 650 BCIns bc_cfunc_int; /* Bytecode for internal C function calls. */
537 BCIns bc_cfunc_ext; /* Bytecode for external C function calls. */ 651 BCIns bc_cfunc_ext; /* Bytecode for external C function calls. */
538 GCRef jit_L; /* Current JIT code lua_State or NULL. */ 652 GCRef cur_L; /* Currently executing lua_State. */
539 MRef jit_base; /* Current JIT code L->base. */ 653 MRef jit_base; /* Current JIT code L->base or NULL. */
540 MRef ctype_state; /* Pointer to C type state. */ 654 MRef ctype_state; /* Pointer to C type state. */
655 PRNGState prng; /* Global PRNG state. */
541 GCRef gcroot[GCROOT_MAX]; /* GC roots. */ 656 GCRef gcroot[GCROOT_MAX]; /* GC roots. */
542} global_State; 657} global_State;
543 658
@@ -553,9 +668,11 @@ typedef struct global_State {
553#define HOOK_ACTIVE_SHIFT 4 668#define HOOK_ACTIVE_SHIFT 4
554#define HOOK_VMEVENT 0x20 669#define HOOK_VMEVENT 0x20
555#define HOOK_GC 0x40 670#define HOOK_GC 0x40
671#define HOOK_PROFILE 0x80
556#define hook_active(g) ((g)->hookmask & HOOK_ACTIVE) 672#define hook_active(g) ((g)->hookmask & HOOK_ACTIVE)
557#define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE) 673#define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE)
558#define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC)) 674#define hook_entergc(g) \
675 ((g)->hookmask = ((g)->hookmask | (HOOK_ACTIVE|HOOK_GC)) & ~HOOK_PROFILE)
559#define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT)) 676#define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT))
560#define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE) 677#define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE)
561#define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK) 678#define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK)
@@ -583,12 +700,23 @@ struct lua_State {
583#define registry(L) (&G(L)->registrytv) 700#define registry(L) (&G(L)->registrytv)
584 701
585/* Macros to access the currently executing (Lua) function. */ 702/* Macros to access the currently executing (Lua) function. */
703#if LJ_GC64
704#define curr_func(L) (&gcval(L->base-2)->fn)
705#elif LJ_FR2
706#define curr_func(L) (&gcref((L->base-2)->gcr)->fn)
707#else
586#define curr_func(L) (&gcref((L->base-1)->fr.func)->fn) 708#define curr_func(L) (&gcref((L->base-1)->fr.func)->fn)
709#endif
587#define curr_funcisL(L) (isluafunc(curr_func(L))) 710#define curr_funcisL(L) (isluafunc(curr_func(L)))
588#define curr_proto(L) (funcproto(curr_func(L))) 711#define curr_proto(L) (funcproto(curr_func(L)))
589#define curr_topL(L) (L->base + curr_proto(L)->framesize) 712#define curr_topL(L) (L->base + curr_proto(L)->framesize)
590#define curr_top(L) (curr_funcisL(L) ? curr_topL(L) : L->top) 713#define curr_top(L) (curr_funcisL(L) ? curr_topL(L) : L->top)
591 714
715#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK)
716LJ_FUNC_NORET void lj_assert_fail(global_State *g, const char *file, int line,
717 const char *func, const char *fmt, ...);
718#endif
719
592/* -- GC object definition and conversions -------------------------------- */ 720/* -- GC object definition and conversions -------------------------------- */
593 721
594/* GC header for generic access to common fields of GC objects. */ 722/* GC header for generic access to common fields of GC objects. */
@@ -642,17 +770,18 @@ typedef union GCobj {
642 770
643/* -- TValue getters/setters ---------------------------------------------- */ 771/* -- TValue getters/setters ---------------------------------------------- */
644 772
645#ifdef LUA_USE_ASSERT
646#include "lj_gc.h"
647#endif
648
649/* Macros to test types. */ 773/* Macros to test types. */
774#if LJ_GC64
775#define itype(o) ((uint32_t)((o)->it64 >> 47))
776#define tvisnil(o) ((o)->it64 == -1)
777#else
650#define itype(o) ((o)->it) 778#define itype(o) ((o)->it)
651#define tvisnil(o) (itype(o) == LJ_TNIL) 779#define tvisnil(o) (itype(o) == LJ_TNIL)
780#endif
652#define tvisfalse(o) (itype(o) == LJ_TFALSE) 781#define tvisfalse(o) (itype(o) == LJ_TFALSE)
653#define tvistrue(o) (itype(o) == LJ_TTRUE) 782#define tvistrue(o) (itype(o) == LJ_TTRUE)
654#define tvisbool(o) (tvisfalse(o) || tvistrue(o)) 783#define tvisbool(o) (tvisfalse(o) || tvistrue(o))
655#if LJ_64 784#if LJ_64 && !LJ_GC64
656#define tvislightud(o) (((int32_t)itype(o) >> 15) == -2) 785#define tvislightud(o) (((int32_t)itype(o) >> 15) == -2)
657#else 786#else
658#define tvislightud(o) (itype(o) == LJ_TLIGHTUD) 787#define tvislightud(o) (itype(o) == LJ_TLIGHTUD)
@@ -686,7 +815,7 @@ typedef union GCobj {
686#define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64) 815#define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64)
687 816
688/* Macros to convert type ids. */ 817/* Macros to convert type ids. */
689#if LJ_64 818#if LJ_64 && !LJ_GC64
690#define itypemap(o) \ 819#define itypemap(o) \
691 (tvisnumber(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o)) 820 (tvisnumber(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o))
692#else 821#else
@@ -694,13 +823,30 @@ typedef union GCobj {
694#endif 823#endif
695 824
696/* Macros to get tagged values. */ 825/* Macros to get tagged values. */
826#if LJ_GC64
827#define gcval(o) ((GCobj *)(gcrefu((o)->gcr) & LJ_GCVMASK))
828#else
697#define gcval(o) (gcref((o)->gcr)) 829#define gcval(o) (gcref((o)->gcr))
698#define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - (o)->it)) 830#endif
831#define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - itype(o)))
699#if LJ_64 832#if LJ_64
700#define lightudV(o) \ 833#define lightudseg(u) \
701 check_exp(tvislightud(o), (void *)((o)->u64 & U64x(00007fff,ffffffff))) 834 (((u) >> LJ_LIGHTUD_BITS_LO) & ((1 << LJ_LIGHTUD_BITS_SEG)-1))
835#define lightudlo(u) \
836 ((u) & (((uint64_t)1 << LJ_LIGHTUD_BITS_LO) - 1))
837#define lightudup(p) \
838 ((uint32_t)(((p) >> LJ_LIGHTUD_BITS_LO) << (LJ_LIGHTUD_BITS_LO-32)))
839static LJ_AINLINE void *lightudV(global_State *g, cTValue *o)
840{
841 uint64_t u = o->u64;
842 uint64_t seg = lightudseg(u);
843 uint32_t *segmap = mref(g->gc.lightudseg, uint32_t);
844 lj_assertG(tvislightud(o), "lightuserdata expected");
845 lj_assertG(seg <= g->gc.lightudnum, "bad lightuserdata segment %d", seg);
846 return (void *)(((uint64_t)segmap[seg] << 32) | lightudlo(u));
847}
702#else 848#else
703#define lightudV(o) check_exp(tvislightud(o), gcrefp((o)->gcr, void)) 849#define lightudV(g, o) check_exp(tvislightud(o), gcrefp((o)->gcr, void))
704#endif 850#endif
705#define gcV(o) check_exp(tvisgcv(o), gcval(o)) 851#define gcV(o) check_exp(tvisgcv(o), gcval(o))
706#define strV(o) check_exp(tvisstr(o), &gcval(o)->str) 852#define strV(o) check_exp(tvisstr(o), &gcval(o)->str)
@@ -714,40 +860,70 @@ typedef union GCobj {
714#define intV(o) check_exp(tvisint(o), (int32_t)(o)->i) 860#define intV(o) check_exp(tvisint(o), (int32_t)(o)->i)
715 861
716/* Macros to set tagged values. */ 862/* Macros to set tagged values. */
863#if LJ_GC64
864#define setitype(o, i) ((o)->it = ((i) << 15))
865#define setnilV(o) ((o)->it64 = -1)
866#define setpriV(o, x) ((o)->it64 = (int64_t)~((uint64_t)~(x)<<47))
867#define setboolV(o, x) ((o)->it64 = (int64_t)~((uint64_t)((x)+1)<<47))
868#else
717#define setitype(o, i) ((o)->it = (i)) 869#define setitype(o, i) ((o)->it = (i))
718#define setnilV(o) ((o)->it = LJ_TNIL) 870#define setnilV(o) ((o)->it = LJ_TNIL)
719#define setboolV(o, x) ((o)->it = LJ_TFALSE-(uint32_t)(x)) 871#define setboolV(o, x) ((o)->it = LJ_TFALSE-(uint32_t)(x))
872#define setpriV(o, i) (setitype((o), (i)))
873#endif
720 874
721static LJ_AINLINE void setlightudV(TValue *o, void *p) 875static LJ_AINLINE void setrawlightudV(TValue *o, void *p)
722{ 876{
723#if LJ_64 877#if LJ_GC64
878 o->u64 = (uint64_t)p | (((uint64_t)LJ_TLIGHTUD) << 47);
879#elif LJ_64
724 o->u64 = (uint64_t)p | (((uint64_t)0xffff) << 48); 880 o->u64 = (uint64_t)p | (((uint64_t)0xffff) << 48);
725#else 881#else
726 setgcrefp(o->gcr, p); setitype(o, LJ_TLIGHTUD); 882 setgcrefp(o->gcr, p); setitype(o, LJ_TLIGHTUD);
727#endif 883#endif
728} 884}
729 885
730#if LJ_64 886#if LJ_FR2 || LJ_32
731#define checklightudptr(L, p) \ 887#define contptr(f) ((void *)(f))
732 (((uint64_t)(p) >> 47) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p)) 888#define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)contptr(f))
889#else
890#define contptr(f) \
891 ((void *)(uintptr_t)(uint32_t)((intptr_t)(f) - (intptr_t)lj_vm_asm_begin))
733#define setcont(o, f) \ 892#define setcont(o, f) \
734 ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin) 893 ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin)
735#else
736#define checklightudptr(L, p) (p)
737#define setcont(o, f) setlightudV((o), (void *)(f))
738#endif 894#endif
739 895
740#define tvchecklive(L, o) \ 896static LJ_AINLINE void checklivetv(lua_State *L, TValue *o, const char *msg)
741 UNUSED(L), lua_assert(!tvisgcv(o) || \ 897{
742 ((~itype(o) == gcval(o)->gch.gct) && !isdead(G(L), gcval(o)))) 898 UNUSED(L); UNUSED(o); UNUSED(msg);
899#if LUA_USE_ASSERT
900 if (tvisgcv(o)) {
901 lj_assertL(~itype(o) == gcval(o)->gch.gct,
902 "mismatch of TValue type %d vs GC type %d",
903 ~itype(o), gcval(o)->gch.gct);
904 /* Copy of isdead check from lj_gc.h to avoid circular include. */
905 lj_assertL(!(gcval(o)->gch.marked & (G(L)->gc.currentwhite ^ 3) & 3), msg);
906 }
907#endif
908}
909
910static LJ_AINLINE void setgcVraw(TValue *o, GCobj *v, uint32_t itype)
911{
912#if LJ_GC64
913 setgcreft(o->gcr, v, itype);
914#else
915 setgcref(o->gcr, v); setitype(o, itype);
916#endif
917}
743 918
744static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t itype) 919static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t it)
745{ 920{
746 setgcref(o->gcr, v); setitype(o, itype); tvchecklive(L, o); 921 setgcVraw(o, v, it);
922 checklivetv(L, o, "store to dead GC object");
747} 923}
748 924
749#define define_setV(name, type, tag) \ 925#define define_setV(name, type, tag) \
750static LJ_AINLINE void name(lua_State *L, TValue *o, type *v) \ 926static LJ_AINLINE void name(lua_State *L, TValue *o, const type *v) \
751{ \ 927{ \
752 setgcV(L, o, obj2gco(v), tag); \ 928 setgcV(L, o, obj2gco(v), tag); \
753} 929}
@@ -790,13 +966,17 @@ static LJ_AINLINE void setint64V(TValue *o, int64_t i)
790/* Copy tagged values. */ 966/* Copy tagged values. */
791static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2) 967static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2)
792{ 968{
793 *o1 = *o2; tvchecklive(L, o1); 969 *o1 = *o2;
970 checklivetv(L, o1, "copy of dead GC object");
794} 971}
795 972
796/* -- Number to integer conversion ---------------------------------------- */ 973/* -- Number to integer conversion ---------------------------------------- */
797 974
798#if LJ_SOFTFP 975#if LJ_SOFTFP
799LJ_ASMF int32_t lj_vm_tobit(double x); 976LJ_ASMF int32_t lj_vm_tobit(double x);
977#if LJ_TARGET_MIPS64
978LJ_ASMF int32_t lj_vm_tointg(double x);
979#endif
800#endif 980#endif
801 981
802static LJ_AINLINE int32_t lj_num2bit(lua_Number n) 982static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
@@ -810,11 +990,7 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
810#endif 990#endif
811} 991}
812 992
813#if LJ_TARGET_X86 && !defined(__SSE2__)
814#define lj_num2int(n) lj_num2bit((n))
815#else
816#define lj_num2int(n) ((int32_t)(n)) 993#define lj_num2int(n) ((int32_t)(n))
817#endif
818 994
819/* 995/*
820** This must match the JIT backend behavior. In particular for archs 996** This must match the JIT backend behavior. In particular for archs
@@ -859,6 +1035,7 @@ LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1];
859#define lj_typename(o) (lj_obj_itypename[itypemap(o)]) 1035#define lj_typename(o) (lj_obj_itypename[itypemap(o)])
860 1036
861/* Compare two objects without calling metamethods. */ 1037/* Compare two objects without calling metamethods. */
862LJ_FUNC int lj_obj_equal(cTValue *o1, cTValue *o2); 1038LJ_FUNC int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2);
1039LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(global_State *g, cTValue *o);
863 1040
864#endif 1041#endif
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index e5929442..41e0d1ca 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -14,18 +14,21 @@
14 14
15#if LJ_HASJIT 15#if LJ_HASJIT
16 16
17#include "lj_buf.h"
17#include "lj_str.h" 18#include "lj_str.h"
18#include "lj_tab.h" 19#include "lj_tab.h"
19#include "lj_ir.h" 20#include "lj_ir.h"
20#include "lj_jit.h" 21#include "lj_jit.h"
22#include "lj_ircall.h"
21#include "lj_iropt.h" 23#include "lj_iropt.h"
22#include "lj_trace.h" 24#include "lj_trace.h"
23#if LJ_HASFFI 25#if LJ_HASFFI
24#include "lj_ctype.h" 26#include "lj_ctype.h"
25#endif
26#include "lj_carith.h" 27#include "lj_carith.h"
28#endif
27#include "lj_vm.h" 29#include "lj_vm.h"
28#include "lj_strscan.h" 30#include "lj_strscan.h"
31#include "lj_strfmt.h"
29 32
30/* Here's a short description how the FOLD engine processes instructions: 33/* Here's a short description how the FOLD engine processes instructions:
31** 34**
@@ -133,8 +136,8 @@
133/* Some local macros to save typing. Undef'd at the end. */ 136/* Some local macros to save typing. Undef'd at the end. */
134#define IR(ref) (&J->cur.ir[(ref)]) 137#define IR(ref) (&J->cur.ir[(ref)])
135#define fins (&J->fold.ins) 138#define fins (&J->fold.ins)
136#define fleft (&J->fold.left) 139#define fleft (J->fold.left)
137#define fright (&J->fold.right) 140#define fright (J->fold.right)
138#define knumleft (ir_knum(fleft)->n) 141#define knumleft (ir_knum(fleft)->n)
139#define knumright (ir_knum(fright)->n) 142#define knumright (ir_knum(fright)->n)
140 143
@@ -155,13 +158,14 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
155 158
156/* Barrier to prevent folding across a GC step. 159/* Barrier to prevent folding across a GC step.
157** GC steps can only happen at the head of a trace and at LOOP. 160** GC steps can only happen at the head of a trace and at LOOP.
158** And the GC is only driven forward if there is at least one allocation. 161** And the GC is only driven forward if there's at least one allocation.
159*/ 162*/
160#define gcstep_barrier(J, ref) \ 163#define gcstep_barrier(J, ref) \
161 ((ref) < J->chain[IR_LOOP] && \ 164 ((ref) < J->chain[IR_LOOP] && \
162 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ 165 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
163 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ 166 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
164 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR])) 167 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \
168 J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA]))
165 169
166/* -- Constant folding for FP numbers ------------------------------------- */ 170/* -- Constant folding for FP numbers ------------------------------------- */
167 171
@@ -169,9 +173,6 @@ LJFOLD(ADD KNUM KNUM)
169LJFOLD(SUB KNUM KNUM) 173LJFOLD(SUB KNUM KNUM)
170LJFOLD(MUL KNUM KNUM) 174LJFOLD(MUL KNUM KNUM)
171LJFOLD(DIV KNUM KNUM) 175LJFOLD(DIV KNUM KNUM)
172LJFOLD(NEG KNUM KNUM)
173LJFOLD(ABS KNUM KNUM)
174LJFOLD(ATAN2 KNUM KNUM)
175LJFOLD(LDEXP KNUM KNUM) 176LJFOLD(LDEXP KNUM KNUM)
176LJFOLD(MIN KNUM KNUM) 177LJFOLD(MIN KNUM KNUM)
177LJFOLD(MAX KNUM KNUM) 178LJFOLD(MAX KNUM KNUM)
@@ -183,6 +184,15 @@ LJFOLDF(kfold_numarith)
183 return lj_ir_knum(J, y); 184 return lj_ir_knum(J, y);
184} 185}
185 186
187LJFOLD(NEG KNUM FLOAD)
188LJFOLD(ABS KNUM FLOAD)
189LJFOLDF(kfold_numabsneg)
190{
191 lua_Number a = knumleft;
192 lua_Number y = lj_vm_foldarith(a, a, fins->o - IR_ADD);
193 return lj_ir_knum(J, y);
194}
195
186LJFOLD(LDEXP KNUM KINT) 196LJFOLD(LDEXP KNUM KINT)
187LJFOLDF(kfold_ldexp) 197LJFOLDF(kfold_ldexp)
188{ 198{
@@ -202,11 +212,36 @@ LJFOLDF(kfold_fpmath)
202 return lj_ir_knum(J, y); 212 return lj_ir_knum(J, y);
203} 213}
204 214
215LJFOLD(CALLN KNUM any)
216LJFOLDF(kfold_fpcall1)
217{
218 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
219 if (CCI_TYPE(ci) == IRT_NUM) {
220 double y = ((double (*)(double))ci->func)(knumleft);
221 return lj_ir_knum(J, y);
222 }
223 return NEXTFOLD;
224}
225
226LJFOLD(CALLN CARG IRCALL_atan2)
227LJFOLDF(kfold_fpcall2)
228{
229 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
230 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
231 double a = ir_knum(IR(fleft->op1))->n;
232 double b = ir_knum(IR(fleft->op2))->n;
233 double y = ((double (*)(double, double))ci->func)(a, b);
234 return lj_ir_knum(J, y);
235 }
236 return NEXTFOLD;
237}
238
205LJFOLD(POW KNUM KINT) 239LJFOLD(POW KNUM KINT)
240LJFOLD(POW KNUM KNUM)
206LJFOLDF(kfold_numpow) 241LJFOLDF(kfold_numpow)
207{ 242{
208 lua_Number a = knumleft; 243 lua_Number a = knumleft;
209 lua_Number b = (lua_Number)fright->i; 244 lua_Number b = fright->o == IR_KINT ? (lua_Number)fright->i : knumright;
210 lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD); 245 lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
211 return lj_ir_knum(J, y); 246 return lj_ir_knum(J, y);
212} 247}
@@ -247,7 +282,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
247 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break; 282 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
248 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break; 283 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break;
249 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break; 284 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break;
250 default: lua_assert(0); break; 285 default: lj_assertX(0, "bad IR op %d", op); break;
251 } 286 }
252 return k1; 287 return k1;
253} 288}
@@ -319,7 +354,7 @@ LJFOLDF(kfold_intcomp)
319 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b); 354 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
320 case IR_ABC: 355 case IR_ABC:
321 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b); 356 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
322 default: lua_assert(0); return FAILFOLD; 357 default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
323 } 358 }
324} 359}
325 360
@@ -333,21 +368,29 @@ LJFOLDF(kfold_intcomp0)
333 368
334/* -- Constant folding for 64 bit integers -------------------------------- */ 369/* -- Constant folding for 64 bit integers -------------------------------- */
335 370
336static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) 371static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
372 IROp op)
337{ 373{
374 UNUSED(J);
375#if LJ_HASFFI
338 switch (op) { 376 switch (op) {
339#if LJ_64 || LJ_HASFFI
340 case IR_ADD: k1 += k2; break; 377 case IR_ADD: k1 += k2; break;
341 case IR_SUB: k1 -= k2; break; 378 case IR_SUB: k1 -= k2; break;
342#endif
343#if LJ_HASFFI
344 case IR_MUL: k1 *= k2; break; 379 case IR_MUL: k1 *= k2; break;
345 case IR_BAND: k1 &= k2; break; 380 case IR_BAND: k1 &= k2; break;
346 case IR_BOR: k1 |= k2; break; 381 case IR_BOR: k1 |= k2; break;
347 case IR_BXOR: k1 ^= k2; break; 382 case IR_BXOR: k1 ^= k2; break;
348#endif 383 case IR_BSHL: k1 <<= (k2 & 63); break;
349 default: UNUSED(k2); lua_assert(0); break; 384 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break;
385 case IR_BSAR: k1 >>= (k2 & 63); break;
386 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break;
387 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break;
388 default: lj_assertJ(0, "bad IR op %d", op); break;
350 } 389 }
390#else
391 UNUSED(k2); UNUSED(op);
392 lj_assertJ(0, "FFI IR op without FFI");
393#endif
351 return k1; 394 return k1;
352} 395}
353 396
@@ -359,7 +402,7 @@ LJFOLD(BOR KINT64 KINT64)
359LJFOLD(BXOR KINT64 KINT64) 402LJFOLD(BXOR KINT64 KINT64)
360LJFOLDF(kfold_int64arith) 403LJFOLDF(kfold_int64arith)
361{ 404{
362 return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64, 405 return INT64FOLD(kfold_int64arith(J, ir_k64(fleft)->u64,
363 ir_k64(fright)->u64, (IROp)fins->o)); 406 ir_k64(fright)->u64, (IROp)fins->o));
364} 407}
365 408
@@ -381,7 +424,7 @@ LJFOLDF(kfold_int64arith2)
381 } 424 }
382 return INT64FOLD(k1); 425 return INT64FOLD(k1);
383#else 426#else
384 UNUSED(J); lua_assert(0); return FAILFOLD; 427 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
385#endif 428#endif
386} 429}
387 430
@@ -392,22 +435,12 @@ LJFOLD(BROL KINT64 KINT)
392LJFOLD(BROR KINT64 KINT) 435LJFOLD(BROR KINT64 KINT)
393LJFOLDF(kfold_int64shift) 436LJFOLDF(kfold_int64shift)
394{ 437{
395#if LJ_HASFFI || LJ_64 438#if LJ_HASFFI
396 uint64_t k = ir_k64(fleft)->u64; 439 uint64_t k = ir_k64(fleft)->u64;
397 int32_t sh = (fright->i & 63); 440 int32_t sh = (fright->i & 63);
398 switch ((IROp)fins->o) { 441 return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
399 case IR_BSHL: k <<= sh; break;
400#if LJ_HASFFI
401 case IR_BSHR: k >>= sh; break;
402 case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
403 case IR_BROL: k = lj_rol(k, sh); break;
404 case IR_BROR: k = lj_ror(k, sh); break;
405#endif
406 default: lua_assert(0); break;
407 }
408 return INT64FOLD(k);
409#else 442#else
410 UNUSED(J); lua_assert(0); return FAILFOLD; 443 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
411#endif 444#endif
412} 445}
413 446
@@ -417,7 +450,7 @@ LJFOLDF(kfold_bnot64)
417#if LJ_HASFFI 450#if LJ_HASFFI
418 return INT64FOLD(~ir_k64(fleft)->u64); 451 return INT64FOLD(~ir_k64(fleft)->u64);
419#else 452#else
420 UNUSED(J); lua_assert(0); return FAILFOLD; 453 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
421#endif 454#endif
422} 455}
423 456
@@ -427,7 +460,7 @@ LJFOLDF(kfold_bswap64)
427#if LJ_HASFFI 460#if LJ_HASFFI
428 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64)); 461 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
429#else 462#else
430 UNUSED(J); lua_assert(0); return FAILFOLD; 463 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
431#endif 464#endif
432} 465}
433 466
@@ -452,10 +485,10 @@ LJFOLDF(kfold_int64comp)
452 case IR_UGE: return CONDFOLD(a >= b); 485 case IR_UGE: return CONDFOLD(a >= b);
453 case IR_ULE: return CONDFOLD(a <= b); 486 case IR_ULE: return CONDFOLD(a <= b);
454 case IR_UGT: return CONDFOLD(a > b); 487 case IR_UGT: return CONDFOLD(a > b);
455 default: lua_assert(0); return FAILFOLD; 488 default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
456 } 489 }
457#else 490#else
458 UNUSED(J); lua_assert(0); return FAILFOLD; 491 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
459#endif 492#endif
460} 493}
461 494
@@ -467,7 +500,7 @@ LJFOLDF(kfold_int64comp0)
467 return DROPFOLD; 500 return DROPFOLD;
468 return NEXTFOLD; 501 return NEXTFOLD;
469#else 502#else
470 UNUSED(J); lua_assert(0); return FAILFOLD; 503 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
471#endif 504#endif
472} 505}
473 506
@@ -481,6 +514,7 @@ LJFOLDF(kfold_snew_kptr)
481} 514}
482 515
483LJFOLD(SNEW any KINT) 516LJFOLD(SNEW any KINT)
517LJFOLD(XSNEW any KINT)
484LJFOLDF(kfold_snew_empty) 518LJFOLDF(kfold_snew_empty)
485{ 519{
486 if (fright->i == 0) 520 if (fright->i == 0)
@@ -492,7 +526,7 @@ LJFOLD(STRREF KGC KINT)
492LJFOLDF(kfold_strref) 526LJFOLDF(kfold_strref)
493{ 527{
494 GCstr *str = ir_kstr(fleft); 528 GCstr *str = ir_kstr(fleft);
495 lua_assert((MSize)fright->i <= str->len); 529 lj_assertJ((MSize)fright->i <= str->len, "bad string ref");
496 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i); 530 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
497} 531}
498 532
@@ -510,7 +544,7 @@ LJFOLDF(kfold_strref_snew)
510 PHIBARRIER(ir); 544 PHIBARRIER(ir);
511 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ 545 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
512 fins->op1 = str; 546 fins->op1 = str;
513 fins->ot = IRT(IR_STRREF, IRT_P32); 547 fins->ot = IRT(IR_STRREF, IRT_PGC);
514 return RETRYFOLD; 548 return RETRYFOLD;
515 } 549 }
516 } 550 }
@@ -528,6 +562,209 @@ LJFOLDF(kfold_strcmp)
528 return NEXTFOLD; 562 return NEXTFOLD;
529} 563}
530 564
565/* -- Constant folding and forwarding for buffers ------------------------- */
566
567/*
568** Buffer ops perform stores, but their effect is limited to the buffer
569** itself. Also, buffer ops are chained: a use of an op implies a use of
570** all other ops up the chain. Conversely, if an op is unused, all ops
571** up the chain can go unsed. This largely eliminates the need to treat
572** them as stores.
573**
574** Alas, treating them as normal (IRM_N) ops doesn't work, because they
575** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP
576** or if FOLD is disabled.
577**
578** The compromise is to declare them as loads, emit them like stores and
579** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
580** fragments left over from CSE are eliminated by DCE.
581**
582** The string buffer methods emit a USE instead of a BUFSTR to keep the
583** chain alive.
584*/
585
586LJFOLD(BUFHDR any any)
587LJFOLDF(bufhdr_merge)
588{
589 return fins->op2 == IRBUFHDR_WRITE ? CSEFOLD : EMITFOLD;
590}
591
592LJFOLD(BUFPUT any BUFSTR)
593LJFOLDF(bufput_bufstr)
594{
595 if ((J->flags & JIT_F_OPT_FWD)) {
596 IRRef hdr = fright->op2;
597 /* New buffer, no other buffer op inbetween and same buffer? */
598 if (fleft->o == IR_BUFHDR && fleft->op2 == IRBUFHDR_RESET &&
599 fleft->prev == hdr &&
600 fleft->op1 == IR(hdr)->op1) {
601 IRRef ref = fins->op1;
602 IR(ref)->op2 = IRBUFHDR_APPEND; /* Modify BUFHDR. */
603 IR(ref)->op1 = fright->op1;
604 return ref;
605 }
606 /* Replay puts to global temporary buffer. */
607 if (IR(hdr)->op2 == IRBUFHDR_RESET) {
608 IRIns *ir = IR(fright->op1);
609 /* For now only handle single string.reverse .lower .upper .rep. */
610 if (ir->o == IR_CALLL &&
611 ir->op2 >= IRCALL_lj_buf_putstr_reverse &&
612 ir->op2 <= IRCALL_lj_buf_putstr_rep) {
613 IRIns *carg1 = IR(ir->op1);
614 if (ir->op2 == IRCALL_lj_buf_putstr_rep) {
615 IRIns *carg2 = IR(carg1->op1);
616 if (carg2->op1 == hdr) {
617 return lj_ir_call(J, ir->op2, fins->op1, carg2->op2, carg1->op2);
618 }
619 } else if (carg1->op1 == hdr) {
620 return lj_ir_call(J, ir->op2, fins->op1, carg1->op2);
621 }
622 }
623 }
624 }
625 return EMITFOLD; /* Always emit, CSE later. */
626}
627
628LJFOLD(BUFPUT any any)
629LJFOLDF(bufput_kgc)
630{
631 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) {
632 GCstr *s2 = ir_kstr(fright);
633 if (s2->len == 0) { /* Empty string? */
634 return LEFTFOLD;
635 } else {
636 if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) &&
637 !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */
638 GCstr *s1 = ir_kstr(IR(fleft->op2));
639 IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2));
640 /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */
641 IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */
642 return fins->op1;
643 }
644 }
645 }
646 return EMITFOLD; /* Always emit, CSE later. */
647}
648
649LJFOLD(BUFSTR any any)
650LJFOLDF(bufstr_kfold_cse)
651{
652 lj_assertJ(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
653 fleft->o == IR_CALLL,
654 "bad buffer constructor IR op %d", fleft->o);
655 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
656 if (fleft->o == IR_BUFHDR) { /* No put operations? */
657 if (fleft->op2 == IRBUFHDR_RESET) /* Empty buffer? */
658 return lj_ir_kstr(J, &J2G(J)->strempty);
659 fins->op1 = fleft->op1;
660 fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */
661 return CSEFOLD;
662 } else if (fleft->o == IR_BUFPUT) {
663 IRIns *irb = IR(fleft->op1);
664 if (irb->o == IR_BUFHDR && irb->op2 == IRBUFHDR_RESET)
665 return fleft->op2; /* Shortcut for a single put operation. */
666 }
667 }
668 /* Try to CSE the whole chain. */
669 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
670 IRRef ref = J->chain[IR_BUFSTR];
671 while (ref) {
672 IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1);
673 while (ira->o == irb->o && ira->op2 == irb->op2) {
674 lj_assertJ(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
675 ira->o == IR_CALLL || ira->o == IR_CARG,
676 "bad buffer constructor IR op %d", ira->o);
677 if (ira->o == IR_BUFHDR && ira->op2 == IRBUFHDR_RESET)
678 return ref; /* CSE succeeded. */
679 if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
680 break;
681 ira = IR(ira->op1);
682 irb = IR(irb->op1);
683 }
684 ref = irs->prev;
685 }
686 }
687 return EMITFOLD; /* No CSE possible. */
688}
689
690LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse)
691LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)
692LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower)
693LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted)
694LJFOLDF(bufput_kfold_op)
695{
696 if (irref_isk(fleft->op2)) {
697 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
698 SBuf *sb = lj_buf_tmp_(J->L);
699 sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb,
700 ir_kstr(IR(fleft->op2)));
701 fins->o = IR_BUFPUT;
702 fins->op1 = fleft->op1;
703 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
704 return RETRYFOLD;
705 }
706 return EMITFOLD; /* Always emit, CSE later. */
707}
708
709LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep)
710LJFOLDF(bufput_kfold_rep)
711{
712 if (irref_isk(fleft->op2)) {
713 IRIns *irc = IR(fleft->op1);
714 if (irref_isk(irc->op2)) {
715 SBuf *sb = lj_buf_tmp_(J->L);
716 sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i);
717 fins->o = IR_BUFPUT;
718 fins->op1 = irc->op1;
719 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
720 return RETRYFOLD;
721 }
722 }
723 return EMITFOLD; /* Always emit, CSE later. */
724}
725
726LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint)
727LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int)
728LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint)
729LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum)
730LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr)
731LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar)
732LJFOLDF(bufput_kfold_fmt)
733{
734 IRIns *irc = IR(fleft->op1);
735 lj_assertJ(irref_isk(irc->op2), "SFormat must be const");
736 if (irref_isk(fleft->op2)) {
737 SFormat sf = (SFormat)IR(irc->op2)->i;
738 IRIns *ira = IR(fleft->op2);
739 SBuf *sb = lj_buf_tmp_(J->L);
740 switch (fins->op2) {
741 case IRCALL_lj_strfmt_putfxint:
742 sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64);
743 break;
744 case IRCALL_lj_strfmt_putfstr:
745 sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira));
746 break;
747 case IRCALL_lj_strfmt_putfchar:
748 sb = lj_strfmt_putfchar(sb, sf, ira->i);
749 break;
750 case IRCALL_lj_strfmt_putfnum_int:
751 case IRCALL_lj_strfmt_putfnum_uint:
752 case IRCALL_lj_strfmt_putfnum:
753 default: {
754 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
755 sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf,
756 ir_knum(ira)->n);
757 break;
758 }
759 }
760 fins->o = IR_BUFPUT;
761 fins->op1 = irc->op1;
762 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
763 return RETRYFOLD;
764 }
765 return EMITFOLD; /* Always emit, CSE later. */
766}
767
531/* -- Constant folding of pointer arithmetic ------------------------------ */ 768/* -- Constant folding of pointer arithmetic ------------------------------ */
532 769
533LJFOLD(ADD KGC KINT) 770LJFOLD(ADD KGC KINT)
@@ -648,27 +885,22 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM)
648LJFOLDF(kfold_conv_knum_int_num) 885LJFOLDF(kfold_conv_knum_int_num)
649{ 886{
650 lua_Number n = knumleft; 887 lua_Number n = knumleft;
651 if (!(fins->op2 & IRCONV_TRUNC)) { 888 int32_t k = lj_num2int(n);
652 int32_t k = lj_num2int(n); 889 if (irt_isguard(fins->t) && n != (lua_Number)k) {
653 if (irt_isguard(fins->t) && n != (lua_Number)k) { 890 /* We're about to create a guard which always fails, like CONV +1.5.
654 /* We're about to create a guard which always fails, like CONV +1.5. 891 ** Some pathological loops cause this during LICM, e.g.:
655 ** Some pathological loops cause this during LICM, e.g.: 892 ** local x,k,t = 0,1.5,{1,[1.5]=2}
656 ** local x,k,t = 0,1.5,{1,[1.5]=2} 893 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
657 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end 894 ** assert(x == 300)
658 ** assert(x == 300) 895 */
659 */ 896 return FAILFOLD;
660 return FAILFOLD;
661 }
662 return INTFOLD(k);
663 } else {
664 return INTFOLD((int32_t)n);
665 } 897 }
898 return INTFOLD(k);
666} 899}
667 900
668LJFOLD(CONV KNUM IRCONV_U32_NUM) 901LJFOLD(CONV KNUM IRCONV_U32_NUM)
669LJFOLDF(kfold_conv_knum_u32_num) 902LJFOLDF(kfold_conv_knum_u32_num)
670{ 903{
671 lua_assert((fins->op2 & IRCONV_TRUNC));
672#ifdef _MSC_VER 904#ifdef _MSC_VER
673 { /* Workaround for MSVC bug. */ 905 { /* Workaround for MSVC bug. */
674 volatile uint32_t u = (uint32_t)knumleft; 906 volatile uint32_t u = (uint32_t)knumleft;
@@ -682,27 +914,27 @@ LJFOLDF(kfold_conv_knum_u32_num)
682LJFOLD(CONV KNUM IRCONV_I64_NUM) 914LJFOLD(CONV KNUM IRCONV_I64_NUM)
683LJFOLDF(kfold_conv_knum_i64_num) 915LJFOLDF(kfold_conv_knum_i64_num)
684{ 916{
685 lua_assert((fins->op2 & IRCONV_TRUNC));
686 return INT64FOLD((uint64_t)(int64_t)knumleft); 917 return INT64FOLD((uint64_t)(int64_t)knumleft);
687} 918}
688 919
689LJFOLD(CONV KNUM IRCONV_U64_NUM) 920LJFOLD(CONV KNUM IRCONV_U64_NUM)
690LJFOLDF(kfold_conv_knum_u64_num) 921LJFOLDF(kfold_conv_knum_u64_num)
691{ 922{
692 lua_assert((fins->op2 & IRCONV_TRUNC));
693 return INT64FOLD(lj_num2u64(knumleft)); 923 return INT64FOLD(lj_num2u64(knumleft));
694} 924}
695 925
696LJFOLD(TOSTR KNUM) 926LJFOLD(TOSTR KNUM any)
697LJFOLDF(kfold_tostr_knum) 927LJFOLDF(kfold_tostr_knum)
698{ 928{
699 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft)); 929 return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft)));
700} 930}
701 931
702LJFOLD(TOSTR KINT) 932LJFOLD(TOSTR KINT any)
703LJFOLDF(kfold_tostr_kint) 933LJFOLDF(kfold_tostr_kint)
704{ 934{
705 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i)); 935 return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ?
936 lj_strfmt_int(J->L, fleft->i) :
937 lj_strfmt_char(J->L, fleft->i));
706} 938}
707 939
708LJFOLD(STRTO KGC) 940LJFOLD(STRTO KGC)
@@ -750,13 +982,13 @@ LJFOLDF(shortcut_round)
750 return NEXTFOLD; 982 return NEXTFOLD;
751} 983}
752 984
753LJFOLD(ABS ABS KNUM) 985LJFOLD(ABS ABS FLOAD)
754LJFOLDF(shortcut_left) 986LJFOLDF(shortcut_left)
755{ 987{
756 return LEFTFOLD; /* f(g(x)) ==> g(x) */ 988 return LEFTFOLD; /* f(g(x)) ==> g(x) */
757} 989}
758 990
759LJFOLD(ABS NEG KNUM) 991LJFOLD(ABS NEG FLOAD)
760LJFOLDF(shortcut_dropleft) 992LJFOLDF(shortcut_dropleft)
761{ 993{
762 PHIBARRIER(fleft); 994 PHIBARRIER(fleft);
@@ -837,8 +1069,10 @@ LJFOLDF(simplify_nummuldiv_k)
837 if (n == 1.0) { /* x o 1 ==> x */ 1069 if (n == 1.0) { /* x o 1 ==> x */
838 return LEFTFOLD; 1070 return LEFTFOLD;
839 } else if (n == -1.0) { /* x o -1 ==> -x */ 1071 } else if (n == -1.0) { /* x o -1 ==> -x */
1072 IRRef op1 = fins->op1;
1073 fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */
1074 fins->op1 = op1;
840 fins->o = IR_NEG; 1075 fins->o = IR_NEG;
841 fins->op2 = (IRRef1)lj_ir_knum_neg(J);
842 return RETRYFOLD; 1076 return RETRYFOLD;
843 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ 1077 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
844 fins->o = IR_ADD; 1078 fins->o = IR_ADD;
@@ -879,7 +1113,7 @@ LJFOLDF(simplify_nummuldiv_negneg)
879} 1113}
880 1114
881LJFOLD(POW any KINT) 1115LJFOLD(POW any KINT)
882LJFOLDF(simplify_numpow_xk) 1116LJFOLDF(simplify_numpow_xkint)
883{ 1117{
884 int32_t k = fright->i; 1118 int32_t k = fright->i;
885 TRef ref = fins->op1; 1119 TRef ref = fins->op1;
@@ -908,13 +1142,22 @@ LJFOLDF(simplify_numpow_xk)
908 return ref; 1142 return ref;
909} 1143}
910 1144
1145LJFOLD(POW any KNUM)
1146LJFOLDF(simplify_numpow_xknum)
1147{
1148 if (knumright == 0.5) /* x ^ 0.5 ==> sqrt(x) */
1149 return emitir(IRTN(IR_FPMATH), fins->op1, IRFPM_SQRT);
1150 return NEXTFOLD;
1151}
1152
911LJFOLD(POW KNUM any) 1153LJFOLD(POW KNUM any)
912LJFOLDF(simplify_numpow_kx) 1154LJFOLDF(simplify_numpow_kx)
913{ 1155{
914 lua_Number n = knumleft; 1156 lua_Number n = knumleft;
915 if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */ 1157 if (n == 2.0 && irt_isint(fright->t)) { /* 2.0 ^ i ==> ldexp(1.0, i) */
916 fins->o = IR_CONV;
917#if LJ_TARGET_X86ORX64 1158#if LJ_TARGET_X86ORX64
1159 /* Different IR_LDEXP calling convention on x86/x64 requires conversion. */
1160 fins->o = IR_CONV;
918 fins->op1 = fins->op2; 1161 fins->op1 = fins->op2;
919 fins->op2 = IRCONV_NUM_INT; 1162 fins->op2 = IRCONV_NUM_INT;
920 fins->op2 = (IRRef1)lj_opt_fold(J); 1163 fins->op2 = (IRRef1)lj_opt_fold(J);
@@ -1008,10 +1251,10 @@ LJFOLDF(simplify_tobit_conv)
1008{ 1251{
1009 /* Fold even across PHI to avoid expensive num->int conversions in loop. */ 1252 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1010 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) { 1253 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
1011 lua_assert(irt_isnum(fleft->t)); 1254 lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
1012 return fleft->op1; 1255 return fleft->op1;
1013 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { 1256 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
1014 lua_assert(irt_isnum(fleft->t)); 1257 lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
1015 fins->o = IR_CONV; 1258 fins->o = IR_CONV;
1016 fins->op1 = fleft->op1; 1259 fins->op1 = fleft->op1;
1017 fins->op2 = (IRT_INT<<5)|IRT_U32; 1260 fins->op2 = (IRT_INT<<5)|IRT_U32;
@@ -1051,7 +1294,7 @@ LJFOLDF(simplify_conv_sext)
1051 /* Use scalar evolution analysis results to strength-reduce sign-extension. */ 1294 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
1052 if (ref == J->scev.idx) { 1295 if (ref == J->scev.idx) {
1053 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; 1296 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
1054 lua_assert(irt_isint(J->scev.t)); 1297 lj_assertJ(irt_isint(J->scev.t), "only int SCEV supported");
1055 if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) { 1298 if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) {
1056 ok_reduce: 1299 ok_reduce:
1057#if LJ_TARGET_X64 1300#if LJ_TARGET_X64
@@ -1082,6 +1325,10 @@ LJFOLD(CONV SUB IRCONV_U32_U64)
1082LJFOLD(CONV MUL IRCONV_U32_U64) 1325LJFOLD(CONV MUL IRCONV_U32_U64)
1083LJFOLDF(simplify_conv_narrow) 1326LJFOLDF(simplify_conv_narrow)
1084{ 1327{
1328#if LJ_64
1329 UNUSED(J);
1330 return NEXTFOLD;
1331#else
1085 IROp op = (IROp)fleft->o; 1332 IROp op = (IROp)fleft->o;
1086 IRType t = irt_type(fins->t); 1333 IRType t = irt_type(fins->t);
1087 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2; 1334 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
@@ -1092,6 +1339,7 @@ LJFOLDF(simplify_conv_narrow)
1092 fins->op1 = op1; 1339 fins->op1 = op1;
1093 fins->op2 = op2; 1340 fins->op2 = op2;
1094 return RETRYFOLD; 1341 return RETRYFOLD;
1342#endif
1095} 1343}
1096 1344
1097/* Special CSE rule for CONV. */ 1345/* Special CSE rule for CONV. */
@@ -1127,7 +1375,8 @@ LJFOLDF(narrow_convert)
1127 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */ 1375 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
1128 if (J->chain[IR_LOOP]) 1376 if (J->chain[IR_LOOP])
1129 return NEXTFOLD; 1377 return NEXTFOLD;
1130 lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT); 1378 lj_assertJ(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT,
1379 "unexpected CONV TOBIT");
1131 return lj_opt_narrow_convert(J); 1380 return lj_opt_narrow_convert(J);
1132} 1381}
1133 1382
@@ -1205,7 +1454,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1205 ** But this is mainly intended for simple address arithmetic. 1454 ** But this is mainly intended for simple address arithmetic.
1206 ** Also it's easier for the backend to optimize the original multiplies. 1455 ** Also it's easier for the backend to optimize the original multiplies.
1207 */ 1456 */
1208 if (k == 1) { /* i * 1 ==> i */ 1457 if (k == 0) { /* i * 0 ==> 0 */
1458 return RIGHTFOLD;
1459 } else if (k == 1) { /* i * 1 ==> i */
1209 return LEFTFOLD; 1460 return LEFTFOLD;
1210 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ 1461 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1211 fins->o = IR_BSHL; 1462 fins->o = IR_BSHL;
@@ -1218,9 +1469,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1218LJFOLD(MUL any KINT) 1469LJFOLD(MUL any KINT)
1219LJFOLDF(simplify_intmul_k32) 1470LJFOLDF(simplify_intmul_k32)
1220{ 1471{
1221 if (fright->i == 0) /* i * 0 ==> 0 */ 1472 if (fright->i >= 0)
1222 return INTFOLD(0);
1223 else if (fright->i > 0)
1224 return simplify_intmul_k(J, fright->i); 1473 return simplify_intmul_k(J, fright->i);
1225 return NEXTFOLD; 1474 return NEXTFOLD;
1226} 1475}
@@ -1228,21 +1477,20 @@ LJFOLDF(simplify_intmul_k32)
1228LJFOLD(MUL any KINT64) 1477LJFOLD(MUL any KINT64)
1229LJFOLDF(simplify_intmul_k64) 1478LJFOLDF(simplify_intmul_k64)
1230{ 1479{
1231 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ 1480#if LJ_HASFFI
1232 return INT64FOLD(0); 1481 if (ir_kint64(fright)->u64 < 0x80000000u)
1233#if LJ_64
1234 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1235 else if (ir_kint64(fright)->u64 < 0x80000000u)
1236 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); 1482 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1237#endif
1238 return NEXTFOLD; 1483 return NEXTFOLD;
1484#else
1485 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1486#endif
1239} 1487}
1240 1488
1241LJFOLD(MOD any KINT) 1489LJFOLD(MOD any KINT)
1242LJFOLDF(simplify_intmod_k) 1490LJFOLDF(simplify_intmod_k)
1243{ 1491{
1244 int32_t k = fright->i; 1492 int32_t k = fright->i;
1245 lua_assert(k != 0); 1493 lj_assertJ(k != 0, "integer mod 0");
1246 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */ 1494 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */
1247 fins->o = IR_BAND; 1495 fins->o = IR_BAND;
1248 fins->op2 = lj_ir_kint(J, k-1); 1496 fins->op2 = lj_ir_kint(J, k-1);
@@ -1491,6 +1739,15 @@ LJFOLDF(simplify_shiftk_andk)
1491 fins->op2 = (IRRef1)lj_ir_kint(J, k); 1739 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1492 fins->ot = IRTI(IR_BAND); 1740 fins->ot = IRTI(IR_BAND);
1493 return RETRYFOLD; 1741 return RETRYFOLD;
1742 } else if (irk->o == IR_KINT64) {
1743 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, fright->i,
1744 (IROp)fins->o);
1745 IROpT ot = fleft->ot;
1746 fins->op1 = fleft->op1;
1747 fins->op1 = (IRRef1)lj_opt_fold(J);
1748 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1749 fins->ot = ot;
1750 return RETRYFOLD;
1494 } 1751 }
1495 return NEXTFOLD; 1752 return NEXTFOLD;
1496} 1753}
@@ -1506,6 +1763,47 @@ LJFOLDF(simplify_andk_shiftk)
1506 return NEXTFOLD; 1763 return NEXTFOLD;
1507} 1764}
1508 1765
1766LJFOLD(BAND BOR KINT)
1767LJFOLD(BOR BAND KINT)
1768LJFOLDF(simplify_andor_k)
1769{
1770 IRIns *irk = IR(fleft->op2);
1771 PHIBARRIER(fleft);
1772 if (irk->o == IR_KINT) {
1773 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1774 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1775 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1776 if (k == (fins->o == IR_BAND ? 0 : -1)) {
1777 fins->op1 = fleft->op1;
1778 return RETRYFOLD;
1779 }
1780 }
1781 return NEXTFOLD;
1782}
1783
1784LJFOLD(BAND BOR KINT64)
1785LJFOLD(BOR BAND KINT64)
1786LJFOLDF(simplify_andor_k64)
1787{
1788#if LJ_HASFFI
1789 IRIns *irk = IR(fleft->op2);
1790 PHIBARRIER(fleft);
1791 if (irk->o == IR_KINT64) {
1792 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
1793 (IROp)fins->o);
1794 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1795 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1796 if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) {
1797 fins->op1 = fleft->op1;
1798 return RETRYFOLD;
1799 }
1800 }
1801 return NEXTFOLD;
1802#else
1803 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1804#endif
1805}
1806
1509/* -- Reassociation ------------------------------------------------------- */ 1807/* -- Reassociation ------------------------------------------------------- */
1510 1808
1511LJFOLD(ADD ADD KINT) 1809LJFOLD(ADD ADD KINT)
@@ -1535,11 +1833,11 @@ LJFOLD(BOR BOR KINT64)
1535LJFOLD(BXOR BXOR KINT64) 1833LJFOLD(BXOR BXOR KINT64)
1536LJFOLDF(reassoc_intarith_k64) 1834LJFOLDF(reassoc_intarith_k64)
1537{ 1835{
1538#if LJ_HASFFI || LJ_64 1836#if LJ_HASFFI
1539 IRIns *irk = IR(fleft->op2); 1837 IRIns *irk = IR(fleft->op2);
1540 if (irk->o == IR_KINT64) { 1838 if (irk->o == IR_KINT64) {
1541 uint64_t k = kfold_int64arith(ir_k64(irk)->u64, 1839 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
1542 ir_k64(fright)->u64, (IROp)fins->o); 1840 (IROp)fins->o);
1543 PHIBARRIER(fleft); 1841 PHIBARRIER(fleft);
1544 fins->op1 = fleft->op1; 1842 fins->op1 = fleft->op1;
1545 fins->op2 = (IRRef1)lj_ir_kint64(J, k); 1843 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
@@ -1547,12 +1845,10 @@ LJFOLDF(reassoc_intarith_k64)
1547 } 1845 }
1548 return NEXTFOLD; 1846 return NEXTFOLD;
1549#else 1847#else
1550 UNUSED(J); lua_assert(0); return FAILFOLD; 1848 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1551#endif 1849#endif
1552} 1850}
1553 1851
1554LJFOLD(MIN MIN any)
1555LJFOLD(MAX MAX any)
1556LJFOLD(BAND BAND any) 1852LJFOLD(BAND BAND any)
1557LJFOLD(BOR BOR any) 1853LJFOLD(BOR BOR any)
1558LJFOLDF(reassoc_dup) 1854LJFOLDF(reassoc_dup)
@@ -1562,6 +1858,15 @@ LJFOLDF(reassoc_dup)
1562 return NEXTFOLD; 1858 return NEXTFOLD;
1563} 1859}
1564 1860
1861LJFOLD(MIN MIN any)
1862LJFOLD(MAX MAX any)
1863LJFOLDF(reassoc_dup_minmax)
1864{
1865 if (fins->op2 == fleft->op2)
1866 return LEFTFOLD; /* (a o b) o b ==> a o b */
1867 return NEXTFOLD;
1868}
1869
1565LJFOLD(BXOR BXOR any) 1870LJFOLD(BXOR BXOR any)
1566LJFOLDF(reassoc_bxor) 1871LJFOLDF(reassoc_bxor)
1567{ 1872{
@@ -1600,23 +1905,12 @@ LJFOLDF(reassoc_shift)
1600 return NEXTFOLD; 1905 return NEXTFOLD;
1601} 1906}
1602 1907
1603LJFOLD(MIN MIN KNUM)
1604LJFOLD(MAX MAX KNUM)
1605LJFOLD(MIN MIN KINT) 1908LJFOLD(MIN MIN KINT)
1606LJFOLD(MAX MAX KINT) 1909LJFOLD(MAX MAX KINT)
1607LJFOLDF(reassoc_minmax_k) 1910LJFOLDF(reassoc_minmax_k)
1608{ 1911{
1609 IRIns *irk = IR(fleft->op2); 1912 IRIns *irk = IR(fleft->op2);
1610 if (irk->o == IR_KNUM) { 1913 if (irk->o == IR_KINT) {
1611 lua_Number a = ir_knum(irk)->n;
1612 lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD);
1613 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1614 return LEFTFOLD;
1615 PHIBARRIER(fleft);
1616 fins->op1 = fleft->op1;
1617 fins->op2 = (IRRef1)lj_ir_knum(J, y);
1618 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1619 } else if (irk->o == IR_KINT) {
1620 int32_t a = irk->i; 1914 int32_t a = irk->i;
1621 int32_t y = kfold_intop(a, fright->i, fins->o); 1915 int32_t y = kfold_intop(a, fright->i, fins->o);
1622 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ 1916 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
@@ -1629,24 +1923,6 @@ LJFOLDF(reassoc_minmax_k)
1629 return NEXTFOLD; 1923 return NEXTFOLD;
1630} 1924}
1631 1925
1632LJFOLD(MIN MAX any)
1633LJFOLD(MAX MIN any)
1634LJFOLDF(reassoc_minmax_left)
1635{
1636 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1637 return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
1638 return NEXTFOLD;
1639}
1640
1641LJFOLD(MIN any MAX)
1642LJFOLD(MAX any MIN)
1643LJFOLDF(reassoc_minmax_right)
1644{
1645 if (fins->op1 == fright->op1 || fins->op1 == fright->op2)
1646 return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
1647 return NEXTFOLD;
1648}
1649
1650/* -- Array bounds check elimination -------------------------------------- */ 1926/* -- Array bounds check elimination -------------------------------------- */
1651 1927
1652/* Eliminate ABC across PHIs to handle t[i-1] forwarding case. 1928/* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
@@ -1772,8 +2048,6 @@ LJFOLDF(comm_comp)
1772 2048
1773LJFOLD(BAND any any) 2049LJFOLD(BAND any any)
1774LJFOLD(BOR any any) 2050LJFOLD(BOR any any)
1775LJFOLD(MIN any any)
1776LJFOLD(MAX any any)
1777LJFOLDF(comm_dup) 2051LJFOLDF(comm_dup)
1778{ 2052{
1779 if (fins->op1 == fins->op2) /* x o x ==> x */ 2053 if (fins->op1 == fins->op2) /* x o x ==> x */
@@ -1781,6 +2055,15 @@ LJFOLDF(comm_dup)
1781 return fold_comm_swap(J); 2055 return fold_comm_swap(J);
1782} 2056}
1783 2057
2058LJFOLD(MIN any any)
2059LJFOLD(MAX any any)
2060LJFOLDF(comm_dup_minmax)
2061{
2062 if (fins->op1 == fins->op2) /* x o x ==> x */
2063 return LEFTFOLD;
2064 return NEXTFOLD;
2065}
2066
1784LJFOLD(BXOR any any) 2067LJFOLD(BXOR any any)
1785LJFOLDF(comm_bxor) 2068LJFOLDF(comm_bxor)
1786{ 2069{
@@ -1817,7 +2100,7 @@ LJFOLDF(merge_eqne_snew_kgc)
1817{ 2100{
1818 GCstr *kstr = ir_kstr(fright); 2101 GCstr *kstr = ir_kstr(fright);
1819 int32_t len = (int32_t)kstr->len; 2102 int32_t len = (int32_t)kstr->len;
1820 lua_assert(irt_isstr(fins->t)); 2103 lj_assertJ(irt_isstr(fins->t), "bad equality IR type");
1821 2104
1822#if LJ_TARGET_UNALIGNED 2105#if LJ_TARGET_UNALIGNED
1823#define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */ 2106#define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
@@ -1881,7 +2164,7 @@ LJFOLD(HLOAD KKPTR)
1881LJFOLDF(kfold_hload_kkptr) 2164LJFOLDF(kfold_hload_kkptr)
1882{ 2165{
1883 UNUSED(J); 2166 UNUSED(J);
1884 lua_assert(ir_kptr(fleft) == niltvg(J2G(J))); 2167 lj_assertJ(ir_kptr(fleft) == niltvg(J2G(J)), "expected niltv");
1885 return TREF_NIL; 2168 return TREF_NIL;
1886} 2169}
1887 2170
@@ -1891,8 +2174,8 @@ LJFOLDX(lj_opt_fwd_hload)
1891LJFOLD(ULOAD any) 2174LJFOLD(ULOAD any)
1892LJFOLDX(lj_opt_fwd_uload) 2175LJFOLDX(lj_opt_fwd_uload)
1893 2176
1894LJFOLD(CALLL any IRCALL_lj_tab_len) 2177LJFOLD(ALEN any any)
1895LJFOLDX(lj_opt_fwd_tab_len) 2178LJFOLDX(lj_opt_fwd_alen)
1896 2179
1897/* Upvalue refs are really loads, but there are no corresponding stores. 2180/* Upvalue refs are really loads, but there are no corresponding stores.
1898** So CSE is ok for them, except for UREFO across a GC step (see below). 2181** So CSE is ok for them, except for UREFO across a GC step (see below).
@@ -1953,6 +2236,7 @@ LJFOLDF(fwd_href_tdup)
1953** an aliased table, as it may invalidate all of the pointers and fields. 2236** an aliased table, as it may invalidate all of the pointers and fields.
1954** Only HREF needs the NEWREF check -- AREF and HREFK already depend on 2237** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1955** FLOADs. And NEWREF itself is treated like a store (see below). 2238** FLOADs. And NEWREF itself is treated like a store (see below).
2239** LREF is constant (per trace) since coroutine switches are not inlined.
1956*/ 2240*/
1957LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) 2241LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
1958LJFOLDF(fload_tab_tnew_asize) 2242LJFOLDF(fload_tab_tnew_asize)
@@ -2016,6 +2300,26 @@ LJFOLDF(fload_str_len_snew)
2016 return NEXTFOLD; 2300 return NEXTFOLD;
2017} 2301}
2018 2302
2303LJFOLD(FLOAD TOSTR IRFL_STR_LEN)
2304LJFOLDF(fload_str_len_tostr)
2305{
2306 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR)
2307 return INTFOLD(1);
2308 return NEXTFOLD;
2309}
2310
2311LJFOLD(FLOAD any IRFL_SBUF_W)
2312LJFOLD(FLOAD any IRFL_SBUF_E)
2313LJFOLD(FLOAD any IRFL_SBUF_B)
2314LJFOLD(FLOAD any IRFL_SBUF_L)
2315LJFOLD(FLOAD any IRFL_SBUF_REF)
2316LJFOLD(FLOAD any IRFL_SBUF_R)
2317LJFOLDF(fload_sbuf)
2318{
2319 TRef tr = lj_opt_fwd_fload(J);
2320 return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD;
2321}
2322
2019/* The C type ID of cdata objects is immutable. */ 2323/* The C type ID of cdata objects is immutable. */
2020LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) 2324LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
2021LJFOLDF(fload_cdata_typeid_kgc) 2325LJFOLDF(fload_cdata_typeid_kgc)
@@ -2062,6 +2366,8 @@ LJFOLDF(fload_cdata_ptr_int64_cnew)
2062} 2366}
2063 2367
2064LJFOLD(FLOAD any IRFL_STR_LEN) 2368LJFOLD(FLOAD any IRFL_STR_LEN)
2369LJFOLD(FLOAD any IRFL_FUNC_ENV)
2370LJFOLD(FLOAD any IRFL_THREAD_ENV)
2065LJFOLD(FLOAD any IRFL_CDATA_CTYPEID) 2371LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
2066LJFOLD(FLOAD any IRFL_CDATA_PTR) 2372LJFOLD(FLOAD any IRFL_CDATA_PTR)
2067LJFOLD(FLOAD any IRFL_CDATA_INT) 2373LJFOLD(FLOAD any IRFL_CDATA_INT)
@@ -2081,7 +2387,7 @@ LJFOLDF(fwd_sload)
2081 TRef tr = lj_opt_cse(J); 2387 TRef tr = lj_opt_cse(J);
2082 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr; 2388 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
2083 } else { 2389 } else {
2084 lua_assert(J->slot[fins->op1] != 0); 2390 lj_assertJ(J->slot[fins->op1] != 0, "uninitialized slot accessed");
2085 return J->slot[fins->op1]; 2391 return J->slot[fins->op1];
2086 } 2392 }
2087} 2393}
@@ -2127,6 +2433,17 @@ LJFOLDF(barrier_tnew_tdup)
2127 return DROPFOLD; 2433 return DROPFOLD;
2128} 2434}
2129 2435
2436/* -- Profiling ----------------------------------------------------------- */
2437
2438LJFOLD(PROF any any)
2439LJFOLDF(prof)
2440{
2441 IRRef ref = J->chain[IR_PROF];
2442 if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */
2443 return ref;
2444 return EMITFOLD;
2445}
2446
2130/* -- Stores and allocations ---------------------------------------------- */ 2447/* -- Stores and allocations ---------------------------------------------- */
2131 2448
2132/* Stores and allocations cannot be folded or passed on to CSE in general. 2449/* Stores and allocations cannot be folded or passed on to CSE in general.
@@ -2149,8 +2466,10 @@ LJFOLD(XSTORE any any)
2149LJFOLDX(lj_opt_dse_xstore) 2466LJFOLDX(lj_opt_dse_xstore)
2150 2467
2151LJFOLD(NEWREF any any) /* Treated like a store. */ 2468LJFOLD(NEWREF any any) /* Treated like a store. */
2152LJFOLD(CALLS any any) 2469LJFOLD(TMPREF any any)
2470LJFOLD(CALLA any any)
2153LJFOLD(CALLL any any) /* Safeguard fallback. */ 2471LJFOLD(CALLL any any) /* Safeguard fallback. */
2472LJFOLD(CALLS any any)
2154LJFOLD(CALLXS any any) 2473LJFOLD(CALLXS any any)
2155LJFOLD(XBAR) 2474LJFOLD(XBAR)
2156LJFOLD(RETF any any) /* Modifies BASE. */ 2475LJFOLD(RETF any any) /* Modifies BASE. */
@@ -2183,8 +2502,9 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
2183 IRRef ref; 2502 IRRef ref;
2184 2503
2185 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) { 2504 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
2186 lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | 2505 lj_assertJ(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
2187 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT); 2506 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT,
2507 "bad JIT_F_OPT_DEFAULT");
2188 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */ 2508 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
2189 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N) 2509 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
2190 return lj_opt_cse(J); 2510 return lj_opt_cse(J);
@@ -2209,10 +2529,14 @@ retry:
2209 if (fins->op1 >= J->cur.nk) { 2529 if (fins->op1 >= J->cur.nk) {
2210 key += (uint32_t)IR(fins->op1)->o << 10; 2530 key += (uint32_t)IR(fins->op1)->o << 10;
2211 *fleft = *IR(fins->op1); 2531 *fleft = *IR(fins->op1);
2532 if (fins->op1 < REF_TRUE)
2533 fleft[1] = IR(fins->op1)[1];
2212 } 2534 }
2213 if (fins->op2 >= J->cur.nk) { 2535 if (fins->op2 >= J->cur.nk) {
2214 key += (uint32_t)IR(fins->op2)->o; 2536 key += (uint32_t)IR(fins->op2)->o;
2215 *fright = *IR(fins->op2); 2537 *fright = *IR(fins->op2);
2538 if (fins->op2 < REF_TRUE)
2539 fright[1] = IR(fins->op2)[1];
2216 } else { 2540 } else {
2217 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ 2541 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
2218 } 2542 }
@@ -2242,7 +2566,7 @@ retry:
2242 return lj_ir_kint(J, fins->i); 2566 return lj_ir_kint(J, fins->i);
2243 if (ref == FAILFOLD) 2567 if (ref == FAILFOLD)
2244 lj_trace_err(J, LJ_TRERR_GFAIL); 2568 lj_trace_err(J, LJ_TRERR_GFAIL);
2245 lua_assert(ref == DROPFOLD); 2569 lj_assertJ(ref == DROPFOLD, "bad fold result");
2246 return REF_DROP; 2570 return REF_DROP;
2247} 2571}
2248 2572
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
index 8fadce11..df5811a9 100644
--- a/src/lj_opt_loop.c
+++ b/src/lj_opt_loop.c
@@ -11,7 +11,7 @@
11#if LJ_HASJIT 11#if LJ_HASJIT
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_str.h" 14#include "lj_buf.h"
15#include "lj_ir.h" 15#include "lj_ir.h"
16#include "lj_jit.h" 16#include "lj_jit.h"
17#include "lj_iropt.h" 17#include "lj_iropt.h"
@@ -225,6 +225,7 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
225 /* Setup new snapshot. */ 225 /* Setup new snapshot. */
226 snap->mapofs = (uint32_t)nmapofs; 226 snap->mapofs = (uint32_t)nmapofs;
227 snap->ref = (IRRef1)J->cur.nins; 227 snap->ref = (IRRef1)J->cur.nins;
228 snap->mcofs = 0;
228 snap->nslots = nslots; 229 snap->nslots = nslots;
229 snap->topslot = osnap->topslot; 230 snap->topslot = osnap->topslot;
230 snap->count = 0; 231 snap->count = 0;
@@ -254,9 +255,16 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
254 J->cur.nsnapmap = (uint32_t)(nmap - J->cur.snapmap); 255 J->cur.nsnapmap = (uint32_t)(nmap - J->cur.snapmap);
255} 256}
256 257
258typedef struct LoopState {
259 jit_State *J;
260 IRRef1 *subst;
261 MSize sizesubst;
262} LoopState;
263
257/* Unroll loop. */ 264/* Unroll loop. */
258static void loop_unroll(jit_State *J) 265static void loop_unroll(LoopState *lps)
259{ 266{
267 jit_State *J = lps->J;
260 IRRef1 phi[LJ_MAX_PHI]; 268 IRRef1 phi[LJ_MAX_PHI];
261 uint32_t nphi = 0; 269 uint32_t nphi = 0;
262 IRRef1 *subst; 270 IRRef1 *subst;
@@ -265,13 +273,13 @@ static void loop_unroll(jit_State *J)
265 SnapEntry *loopmap, *psentinel; 273 SnapEntry *loopmap, *psentinel;
266 IRRef ins, invar; 274 IRRef ins, invar;
267 275
268 /* Use temp buffer for substitution table. 276 /* Allocate substitution table.
269 ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. 277 ** Only non-constant refs in [REF_BIAS,invar) are valid indexes.
270 ** Caveat: don't call into the VM or run the GC or the buffer may be gone.
271 */ 278 */
272 invar = J->cur.nins; 279 invar = J->cur.nins;
273 subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, 280 lps->sizesubst = invar - REF_BIAS;
274 (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS; 281 lps->subst = lj_mem_newvec(J->L, lps->sizesubst, IRRef1);
282 subst = lps->subst - REF_BIAS;
275 subst[REF_BASE] = REF_BASE; 283 subst[REF_BASE] = REF_BASE;
276 284
277 /* LOOP separates the pre-roll from the loop body. */ 285 /* LOOP separates the pre-roll from the loop body. */
@@ -292,7 +300,8 @@ static void loop_unroll(jit_State *J)
292 loopmap = &J->cur.snapmap[loopsnap->mapofs]; 300 loopmap = &J->cur.snapmap[loopsnap->mapofs];
293 /* The PC of snapshot #0 and the loop snapshot must match. */ 301 /* The PC of snapshot #0 and the loop snapshot must match. */
294 psentinel = &loopmap[loopsnap->nent]; 302 psentinel = &loopmap[loopsnap->nent];
295 lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]); 303 lj_assertJ(*psentinel == J->cur.snapmap[J->cur.snap[0].nent],
304 "mismatched PC for loop snapshot");
296 *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */ 305 *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */
297 306
298 /* Start substitution with snapshot #1 (#0 is empty for root traces). */ 307 /* Start substitution with snapshot #1 (#0 is empty for root traces). */
@@ -345,10 +354,12 @@ static void loop_unroll(jit_State *J)
345 irr = IR(ref); 354 irr = IR(ref);
346 goto phiconv; 355 goto phiconv;
347 } 356 }
348 } else if (ref != REF_DROP && irr->o == IR_CONV && 357 } else if (ref != REF_DROP && ref > invar &&
349 ref > invar && irr->op1 < invar) { 358 ((irr->o == IR_CONV && irr->op1 < invar) ||
350 /* May need an extra PHI for a CONV. */ 359 (irr->o == IR_ALEN && irr->op2 < invar &&
351 ref = irr->op1; 360 irr->op2 != REF_NIL))) {
361 /* May need an extra PHI for a CONV or ALEN hint. */
362 ref = irr->o == IR_CONV ? irr->op1 : irr->op2;
352 irr = IR(ref); 363 irr = IR(ref);
353 phiconv: 364 phiconv:
354 if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) { 365 if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) {
@@ -363,7 +374,7 @@ static void loop_unroll(jit_State *J)
363 } 374 }
364 if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */ 375 if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */
365 J->cur.nsnapmap = (uint32_t)J->cur.snap[--J->cur.nsnap].mapofs; 376 J->cur.nsnapmap = (uint32_t)J->cur.snap[--J->cur.nsnap].mapofs;
366 lua_assert(J->cur.nsnapmap <= J->sizesnapmap); 377 lj_assertJ(J->cur.nsnapmap <= J->sizesnapmap, "bad snapshot map index");
367 *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */ 378 *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */
368 379
369 loop_emit_phi(J, subst, phi, nphi, onsnap); 380 loop_emit_phi(J, subst, phi, nphi, onsnap);
@@ -396,7 +407,7 @@ static void loop_undo(jit_State *J, IRRef ins, SnapNo nsnap, MSize nsnapmap)
396static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud) 407static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud)
397{ 408{
398 UNUSED(L); UNUSED(dummy); 409 UNUSED(L); UNUSED(dummy);
399 loop_unroll((jit_State *)ud); 410 loop_unroll((LoopState *)ud);
400 return NULL; 411 return NULL;
401} 412}
402 413
@@ -406,7 +417,13 @@ int lj_opt_loop(jit_State *J)
406 IRRef nins = J->cur.nins; 417 IRRef nins = J->cur.nins;
407 SnapNo nsnap = J->cur.nsnap; 418 SnapNo nsnap = J->cur.nsnap;
408 MSize nsnapmap = J->cur.nsnapmap; 419 MSize nsnapmap = J->cur.nsnapmap;
409 int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); 420 LoopState lps;
421 int errcode;
422 lps.J = J;
423 lps.subst = NULL;
424 lps.sizesubst = 0;
425 errcode = lj_vm_cpcall(J->L, NULL, &lps, cploop_opt);
426 lj_mem_freevec(J2G(J), lps.subst, lps.sizesubst, IRRef1);
410 if (LJ_UNLIKELY(errcode)) { 427 if (LJ_UNLIKELY(errcode)) {
411 lua_State *L = J->L; 428 lua_State *L = J->L;
412 if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */ 429 if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c
index 60f6574f..81184f14 100644
--- a/src/lj_opt_mem.c
+++ b/src/lj_opt_mem.c
@@ -17,12 +17,14 @@
17#include "lj_ir.h" 17#include "lj_ir.h"
18#include "lj_jit.h" 18#include "lj_jit.h"
19#include "lj_iropt.h" 19#include "lj_iropt.h"
20#include "lj_ircall.h"
21#include "lj_dispatch.h"
20 22
21/* Some local macros to save typing. Undef'd at the end. */ 23/* Some local macros to save typing. Undef'd at the end. */
22#define IR(ref) (&J->cur.ir[(ref)]) 24#define IR(ref) (&J->cur.ir[(ref)])
23#define fins (&J->fold.ins) 25#define fins (&J->fold.ins)
24#define fleft (&J->fold.left) 26#define fleft (J->fold.left)
25#define fright (&J->fold.right) 27#define fright (J->fold.right)
26 28
27/* 29/*
28** Caveat #1: return value is not always a TRef -- only use with tref_ref(). 30** Caveat #1: return value is not always a TRef -- only use with tref_ref().
@@ -55,8 +57,8 @@ static AliasRet aa_table(jit_State *J, IRRef ta, IRRef tb)
55{ 57{
56 IRIns *taba = IR(ta), *tabb = IR(tb); 58 IRIns *taba = IR(ta), *tabb = IR(tb);
57 int newa, newb; 59 int newa, newb;
58 lua_assert(ta != tb); 60 lj_assertJ(ta != tb, "bad usage");
59 lua_assert(irt_istab(taba->t) && irt_istab(tabb->t)); 61 lj_assertJ(irt_istab(taba->t) && irt_istab(tabb->t), "bad usage");
60 /* Disambiguate new allocations. */ 62 /* Disambiguate new allocations. */
61 newa = (taba->o == IR_TNEW || taba->o == IR_TDUP); 63 newa = (taba->o == IR_TNEW || taba->o == IR_TDUP);
62 newb = (tabb->o == IR_TNEW || tabb->o == IR_TDUP); 64 newb = (tabb->o == IR_TNEW || tabb->o == IR_TDUP);
@@ -98,7 +100,7 @@ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb)
98 /* Disambiguate array references based on index arithmetic. */ 100 /* Disambiguate array references based on index arithmetic. */
99 int32_t ofsa = 0, ofsb = 0; 101 int32_t ofsa = 0, ofsb = 0;
100 IRRef basea = ka, baseb = kb; 102 IRRef basea = ka, baseb = kb;
101 lua_assert(refb->o == IR_AREF); 103 lj_assertJ(refb->o == IR_AREF, "expected AREF");
102 /* Gather base and offset from t[base] or t[base+-ofs]. */ 104 /* Gather base and offset from t[base] or t[base+-ofs]. */
103 if (keya->o == IR_ADD && irref_isk(keya->op2)) { 105 if (keya->o == IR_ADD && irref_isk(keya->op2)) {
104 basea = keya->op1; 106 basea = keya->op1;
@@ -116,8 +118,9 @@ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb)
116 return ALIAS_NO; /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */ 118 return ALIAS_NO; /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */
117 } else { 119 } else {
118 /* Disambiguate hash references based on the type of their keys. */ 120 /* Disambiguate hash references based on the type of their keys. */
119 lua_assert((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) && 121 lj_assertJ((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) &&
120 (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF)); 122 (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF),
123 "bad xREF IR op %d or %d", refa->o, refb->o);
121 if (!irt_sametype(keya->t, keyb->t)) 124 if (!irt_sametype(keya->t, keyb->t))
122 return ALIAS_NO; /* Different key types. */ 125 return ALIAS_NO; /* Different key types. */
123 } 126 }
@@ -191,7 +194,8 @@ static TRef fwd_ahload(jit_State *J, IRRef xref)
191 if (key->o == IR_KSLOT) key = IR(key->op1); 194 if (key->o == IR_KSLOT) key = IR(key->op1);
192 lj_ir_kvalue(J->L, &keyv, key); 195 lj_ir_kvalue(J->L, &keyv, key);
193 tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv); 196 tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv);
194 lua_assert(itype2irt(tv) == irt_type(fins->t)); 197 lj_assertJ(itype2irt(tv) == irt_type(fins->t),
198 "mismatched type in constant table");
195 if (irt_isnum(fins->t)) 199 if (irt_isnum(fins->t))
196 return lj_ir_knum_u64(J, tv->u64); 200 return lj_ir_knum_u64(J, tv->u64);
197 else if (LJ_DUALNUM && irt_isint(fins->t)) 201 else if (LJ_DUALNUM && irt_isint(fins->t))
@@ -309,7 +313,21 @@ int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J)
309 return 1; /* No conflict. Can fold to niltv. */ 313 return 1; /* No conflict. Can fold to niltv. */
310} 314}
311 315
312/* Check whether there's no aliasing NEWREF for the left operand. */ 316/* Check whether there's no aliasing table.clear. */
317static int fwd_aa_tab_clear(jit_State *J, IRRef lim, IRRef ta)
318{
319 IRRef ref = J->chain[IR_CALLS];
320 while (ref > lim) {
321 IRIns *calls = IR(ref);
322 if (calls->op2 == IRCALL_lj_tab_clear &&
323 (ta == calls->op1 || aa_table(J, ta, calls->op1) != ALIAS_NO))
324 return 0; /* Conflict. */
325 ref = calls->prev;
326 }
327 return 1; /* No conflict. Can safely FOLD/CSE. */
328}
329
330/* Check whether there's no aliasing NEWREF/table.clear for the left operand. */
313int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim) 331int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim)
314{ 332{
315 IRRef ta = fins->op1; 333 IRRef ta = fins->op1;
@@ -320,7 +338,7 @@ int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim)
320 return 0; /* Conflict. */ 338 return 0; /* Conflict. */
321 ref = newref->prev; 339 ref = newref->prev;
322 } 340 }
323 return 1; /* No conflict. Can safely FOLD/CSE. */ 341 return fwd_aa_tab_clear(J, lim, ta);
324} 342}
325 343
326/* ASTORE/HSTORE elimination. */ 344/* ASTORE/HSTORE elimination. */
@@ -348,7 +366,7 @@ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J)
348 IRIns *ir; 366 IRIns *ir;
349 /* Check for any intervening guards (includes conflicting loads). */ 367 /* Check for any intervening guards (includes conflicting loads). */
350 for (ir = IR(J->cur.nins-1); ir > store; ir--) 368 for (ir = IR(J->cur.nins-1); ir > store; ir--)
351 if (irt_isguard(ir->t) || ir->o == IR_CALLL) 369 if (irt_isguard(ir->t) || ir->o == IR_ALEN)
352 goto doemit; /* No elimination possible. */ 370 goto doemit; /* No elimination possible. */
353 /* Remove redundant store from chain and replace with NOP. */ 371 /* Remove redundant store from chain and replace with NOP. */
354 *refp = store->prev; 372 *refp = store->prev;
@@ -363,6 +381,67 @@ doemit:
363 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ 381 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
364} 382}
365 383
384/* ALEN forwarding. */
385TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J)
386{
387 IRRef tab = fins->op1; /* Table reference. */
388 IRRef lim = tab; /* Search limit. */
389 IRRef ref;
390
391 /* Search for conflicting HSTORE with numeric key. */
392 ref = J->chain[IR_HSTORE];
393 while (ref > lim) {
394 IRIns *store = IR(ref);
395 IRIns *href = IR(store->op1);
396 IRIns *key = IR(href->op2);
397 if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) {
398 lim = ref; /* Conflicting store found, limits search for ALEN. */
399 break;
400 }
401 ref = store->prev;
402 }
403
404 /* Try to find a matching ALEN. */
405 ref = J->chain[IR_ALEN];
406 while (ref > lim) {
407 /* CSE for ALEN only depends on the table, not the hint. */
408 if (IR(ref)->op1 == tab) {
409 IRRef sref;
410
411 /* Search for aliasing table.clear. */
412 if (!fwd_aa_tab_clear(J, ref, tab))
413 break;
414
415 /* Search for hint-forwarding or conflicting store. */
416 sref = J->chain[IR_ASTORE];
417 while (sref > ref) {
418 IRIns *store = IR(sref);
419 IRIns *aref = IR(store->op1);
420 IRIns *fref = IR(aref->op1);
421 if (tab == fref->op1) { /* ASTORE to the same table. */
422 /* Detect t[#t+1] = x idiom for push. */
423 IRIns *idx = IR(aref->op2);
424 if (!irt_isnil(store->t) &&
425 idx->o == IR_ADD && idx->op1 == ref &&
426 IR(idx->op2)->o == IR_KINT && IR(idx->op2)->i == 1) {
427 /* Note: this requires an extra PHI check in loop unroll. */
428 fins->op2 = aref->op2; /* Set ALEN hint. */
429 }
430 goto doemit; /* Conflicting store, possibly giving a hint. */
431 } else if (aa_table(J, tab, fref->op1) == ALIAS_NO) {
432 goto doemit; /* Conflicting store. */
433 }
434 sref = store->prev;
435 }
436
437 return ref; /* Plain ALEN forwarding. */
438 }
439 ref = IR(ref)->prev;
440 }
441doemit:
442 return EMITFOLD;
443}
444
366/* -- ULOAD forwarding ---------------------------------------------------- */ 445/* -- ULOAD forwarding ---------------------------------------------------- */
367 446
368/* The current alias analysis for upvalues is very simplistic. It only 447/* The current alias analysis for upvalues is very simplistic. It only
@@ -412,7 +491,6 @@ TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J)
412 491
413cselim: 492cselim:
414 /* Try to find a matching load. Below the conflicting store, if any. */ 493 /* Try to find a matching load. Below the conflicting store, if any. */
415
416 ref = J->chain[IR_ULOAD]; 494 ref = J->chain[IR_ULOAD];
417 while (ref > lim) { 495 while (ref > lim) {
418 IRIns *ir = IR(ref); 496 IRIns *ir = IR(ref);
@@ -542,8 +620,9 @@ TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J)
542 goto doemit; 620 goto doemit;
543 break; /* Otherwise continue searching. */ 621 break; /* Otherwise continue searching. */
544 case ALIAS_MUST: 622 case ALIAS_MUST:
545 if (store->op2 == val) /* Same value: drop the new store. */ 623 if (store->op2 == val &&
546 return DROPFOLD; 624 !(xr->op2 >= IRFL_SBUF_W && xr->op2 <= IRFL_SBUF_R))
625 return DROPFOLD; /* Same value: drop the new store. */
547 /* Different value: try to eliminate the redundant store. */ 626 /* Different value: try to eliminate the redundant store. */
548 if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ 627 if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */
549 IRIns *ir; 628 IRIns *ir;
@@ -564,6 +643,29 @@ doemit:
564 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ 643 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
565} 644}
566 645
646/* Check whether there's no aliasing buffer op between IRFL_SBUF_*. */
647int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim)
648{
649 IRRef ref;
650 if (J->chain[IR_BUFPUT] > lim)
651 return 0; /* Conflict. */
652 ref = J->chain[IR_CALLS];
653 while (ref > lim) {
654 IRIns *ir = IR(ref);
655 if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr)
656 return 0; /* Conflict. */
657 ref = ir->prev;
658 }
659 ref = J->chain[IR_CALLL];
660 while (ref > lim) {
661 IRIns *ir = IR(ref);
662 if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr)
663 return 0; /* Conflict. */
664 ref = ir->prev;
665 }
666 return 1; /* No conflict. Can safely FOLD/CSE. */
667}
668
567/* -- XLOAD forwarding and XSTORE elimination ----------------------------- */ 669/* -- XLOAD forwarding and XSTORE elimination ----------------------------- */
568 670
569/* Find cdata allocation for a reference (if any). */ 671/* Find cdata allocation for a reference (if any). */
@@ -815,35 +917,6 @@ doemit:
815 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ 917 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
816} 918}
817 919
818/* -- Forwarding of lj_tab_len -------------------------------------------- */
819
820/* This is rather simplistic right now, but better than nothing. */
821TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J)
822{
823 IRRef tab = fins->op1; /* Table reference. */
824 IRRef lim = tab; /* Search limit. */
825 IRRef ref;
826
827 /* Any ASTORE is a conflict and limits the search. */
828 if (J->chain[IR_ASTORE] > lim) lim = J->chain[IR_ASTORE];
829
830 /* Search for conflicting HSTORE with numeric key. */
831 ref = J->chain[IR_HSTORE];
832 while (ref > lim) {
833 IRIns *store = IR(ref);
834 IRIns *href = IR(store->op1);
835 IRIns *key = IR(href->op2);
836 if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) {
837 lim = ref; /* Conflicting store found, limits search for TLEN. */
838 break;
839 }
840 ref = store->prev;
841 }
842
843 /* Try to find a matching load. Below the conflicting store, if any. */
844 return lj_opt_cselim(J, lim);
845}
846
847/* -- ASTORE/HSTORE previous type analysis -------------------------------- */ 920/* -- ASTORE/HSTORE previous type analysis -------------------------------- */
848 921
849/* Check whether the previous value for a table store is non-nil. 922/* Check whether the previous value for a table store is non-nil.
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index d4732796..1a332bca 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -372,17 +372,17 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
372 } else if (op == NARROW_CONV) { 372 } else if (op == NARROW_CONV) {
373 *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ 373 *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */
374 } else if (op == NARROW_SEXT) { 374 } else if (op == NARROW_SEXT) {
375 lua_assert(sp >= nc->stack+1); 375 lj_assertJ(sp >= nc->stack+1, "stack underflow");
376 sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1], 376 sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1],
377 (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); 377 (IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
378 } else if (op == NARROW_INT) { 378 } else if (op == NARROW_INT) {
379 lua_assert(next < last); 379 lj_assertJ(next < last, "missing arg to NARROW_INT");
380 *sp++ = nc->t == IRT_I64 ? 380 *sp++ = nc->t == IRT_I64 ?
381 lj_ir_kint64(J, (int64_t)(int32_t)*next++) : 381 lj_ir_kint64(J, (int64_t)(int32_t)*next++) :
382 lj_ir_kint(J, *next++); 382 lj_ir_kint(J, *next++);
383 } else { /* Regular IROpT. Pops two operands and pushes one result. */ 383 } else { /* Regular IROpT. Pops two operands and pushes one result. */
384 IRRef mode = nc->mode; 384 IRRef mode = nc->mode;
385 lua_assert(sp >= nc->stack+2); 385 lj_assertJ(sp >= nc->stack+2, "stack underflow");
386 sp--; 386 sp--;
387 /* Omit some overflow checks for array indexing. See comments above. */ 387 /* Omit some overflow checks for array indexing. See comments above. */
388 if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { 388 if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) {
@@ -398,7 +398,7 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
398 narrow_bpc_set(J, narrow_ref(ref), narrow_ref(sp[-1]), mode); 398 narrow_bpc_set(J, narrow_ref(ref), narrow_ref(sp[-1]), mode);
399 } 399 }
400 } 400 }
401 lua_assert(sp == nc->stack+1); 401 lj_assertJ(sp == nc->stack+1, "stack misalignment");
402 return nc->stack[0]; 402 return nc->stack[0];
403} 403}
404 404
@@ -452,7 +452,7 @@ static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef mode)
452TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr) 452TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr)
453{ 453{
454 IRIns *ir; 454 IRIns *ir;
455 lua_assert(tref_isnumber(tr)); 455 lj_assertJ(tref_isnumber(tr), "expected number type");
456 if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ 456 if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */
457 return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX); 457 return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX);
458 /* Omit some overflow checks for array indexing. See comments above. */ 458 /* Omit some overflow checks for array indexing. See comments above. */
@@ -499,7 +499,7 @@ TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr)
499/* Narrow C array index (overflow undefined). */ 499/* Narrow C array index (overflow undefined). */
500TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) 500TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)
501{ 501{
502 lua_assert(tref_isnumber(tr)); 502 lj_assertJ(tref_isnumber(tr), "expected number type");
503 if (tref_isnum(tr)) 503 if (tref_isnum(tr))
504 return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY); 504 return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY);
505 /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ 505 /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */
@@ -551,11 +551,16 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc)
551{ 551{
552 rc = conv_str_tonum(J, rc, vc); 552 rc = conv_str_tonum(J, rc, vc);
553 if (tref_isinteger(rc)) { 553 if (tref_isinteger(rc)) {
554 if ((uint32_t)numberVint(vc) != 0x80000000u) 554 uint32_t k = (uint32_t)numberVint(vc);
555 return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc); 555 if ((LJ_DUALNUM || k != 0) && k != 0x80000000u) {
556 TRef zero = lj_ir_kint(J, 0);
557 if (!LJ_DUALNUM)
558 emitir(IRTGI(IR_NE), rc, zero);
559 return emitir(IRTGI(IR_SUBOV), zero, rc);
560 }
556 rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); 561 rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);
557 } 562 }
558 return emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J)); 563 return emitir(IRTN(IR_NEG), rc, lj_ir_ksimd(J, LJ_KSIMD_NEG));
559} 564}
560 565
561/* Narrowing of modulo operator. */ 566/* Narrowing of modulo operator. */
@@ -588,10 +593,10 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
588 /* Narrowing must be unconditional to preserve (-x)^i semantics. */ 593 /* Narrowing must be unconditional to preserve (-x)^i semantics. */
589 if (tvisint(vc) || numisint(numV(vc))) { 594 if (tvisint(vc) || numisint(numV(vc))) {
590 int checkrange = 0; 595 int checkrange = 0;
591 /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */ 596 /* pow() is faster for bigger exponents. But do this only for (+k)^i. */
592 if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { 597 if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) {
593 int32_t k = numberVint(vc); 598 int32_t k = numberVint(vc);
594 if (!(k >= -65536 && k <= 65536)) goto split_pow; 599 if (!(k >= -65536 && k <= 65536)) goto force_pow_num;
595 checkrange = 1; 600 checkrange = 1;
596 } 601 }
597 if (!tref_isinteger(rc)) { 602 if (!tref_isinteger(rc)) {
@@ -602,19 +607,11 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
602 TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536)); 607 TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536));
603 emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536)); 608 emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536));
604 } 609 }
605 return emitir(IRTN(IR_POW), rb, rc); 610 } else {
611force_pow_num:
612 rc = lj_ir_tonum(J, rc); /* Want POW(num, num), not POW(num, int). */
606 } 613 }
607split_pow: 614 return emitir(IRTN(IR_POW), rb, rc);
608 /* FOLD covers most cases, but some are easier to do here. */
609 if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb)))))
610 return rb; /* 1 ^ x ==> 1 */
611 rc = lj_ir_tonum(J, rc);
612 if (tref_isk(rc) && ir_knum(IR(tref_ref(rc)))->n == 0.5)
613 return emitir(IRTN(IR_FPMATH), rb, IRFPM_SQRT); /* x ^ 0.5 ==> sqrt(x) */
614 /* Split up b^c into exp2(c*log2(b)). Assembler may rejoin later. */
615 rb = emitir(IRTN(IR_FPMATH), rb, IRFPM_LOG2);
616 rc = emitir(IRTN(IR_MUL), rb, rc);
617 return emitir(IRTN(IR_FPMATH), rc, IRFPM_EXP2);
618} 615}
619 616
620/* -- Predictive narrowing of induction variables ------------------------- */ 617/* -- Predictive narrowing of induction variables ------------------------- */
@@ -630,9 +627,10 @@ static int narrow_forl(jit_State *J, cTValue *o)
630/* Narrow the FORL index type by looking at the runtime values. */ 627/* Narrow the FORL index type by looking at the runtime values. */
631IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv) 628IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv)
632{ 629{
633 lua_assert(tvisnumber(&tv[FORL_IDX]) && 630 lj_assertJ(tvisnumber(&tv[FORL_IDX]) &&
634 tvisnumber(&tv[FORL_STOP]) && 631 tvisnumber(&tv[FORL_STOP]) &&
635 tvisnumber(&tv[FORL_STEP])); 632 tvisnumber(&tv[FORL_STEP]),
633 "expected number types");
636 /* Narrow only if the runtime values of start/stop/step are all integers. */ 634 /* Narrow only if the runtime values of start/stop/step are all integers. */
637 if (narrow_forl(J, &tv[FORL_IDX]) && 635 if (narrow_forl(J, &tv[FORL_IDX]) &&
638 narrow_forl(J, &tv[FORL_STOP]) && 636 narrow_forl(J, &tv[FORL_STOP]) &&
diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c
index 9af35bea..5306a7db 100644
--- a/src/lj_opt_sink.c
+++ b/src/lj_opt_sink.c
@@ -78,8 +78,7 @@ static void sink_mark_ins(jit_State *J)
78 switch (ir->o) { 78 switch (ir->o) {
79 case IR_BASE: 79 case IR_BASE:
80 return; /* Finished. */ 80 return; /* Finished. */
81 case IR_CALLL: /* IRCALL_lj_tab_len */ 81 case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR: case IR_ALEN:
82 case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR:
83 irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */ 82 irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */
84 break; 83 break;
85 case IR_FLOAD: 84 case IR_FLOAD:
@@ -165,8 +164,8 @@ static void sink_remark_phi(jit_State *J)
165/* Sweep instructions and tag sunken allocations and stores. */ 164/* Sweep instructions and tag sunken allocations and stores. */
166static void sink_sweep_ins(jit_State *J) 165static void sink_sweep_ins(jit_State *J)
167{ 166{
168 IRIns *ir, *irfirst = IR(J->cur.nk); 167 IRIns *ir, *irbase = IR(REF_BASE);
169 for (ir = IR(J->cur.nins-1) ; ir >= irfirst; ir--) { 168 for (ir = IR(J->cur.nins-1) ; ir >= irbase; ir--) {
170 switch (ir->o) { 169 switch (ir->o) {
171 case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: { 170 case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: {
172 IRIns *ira = sink_checkalloc(J, ir); 171 IRIns *ira = sink_checkalloc(J, ir);
@@ -216,6 +215,13 @@ static void sink_sweep_ins(jit_State *J)
216 break; 215 break;
217 } 216 }
218 } 217 }
218 for (ir = IR(J->cur.nk); ir < irbase; ir++) {
219 irt_clearmark(ir->t);
220 ir->prev = REGSP_INIT;
221 /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
222 if (irt_is64(ir->t) && ir->o != IR_KNULL)
223 ir++;
224 }
219} 225}
220 226
221/* Allocation sinking and store sinking. 227/* Allocation sinking and store sinking.
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
index 6a07cd1b..25c1c234 100644
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -8,14 +8,15 @@
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10 10
11#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) 11#if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI))
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_str.h" 14#include "lj_buf.h"
15#include "lj_ir.h" 15#include "lj_ir.h"
16#include "lj_jit.h" 16#include "lj_jit.h"
17#include "lj_ircall.h" 17#include "lj_ircall.h"
18#include "lj_iropt.h" 18#include "lj_iropt.h"
19#include "lj_dispatch.h"
19#include "lj_vm.h" 20#include "lj_vm.h"
20 21
21/* SPLIT pass: 22/* SPLIT pass:
@@ -139,6 +140,7 @@ static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
139 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); 140 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
140 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); 141 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
141} 142}
143#endif
142 144
143/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ 145/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
144static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, 146static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
@@ -155,7 +157,6 @@ static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
155 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); 157 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
156 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); 158 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
157} 159}
158#endif
159 160
160/* Emit a CALLN with two split 64 bit arguments. */ 161/* Emit a CALLN with two split 64 bit arguments. */
161static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, 162static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
@@ -192,9 +193,121 @@ static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
192 nref = ir->op1; 193 nref = ir->op1;
193 if (ofs == 0) return nref; 194 if (ofs == 0) return nref;
194 } 195 }
195 return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs)); 196 return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs));
196} 197}
197 198
199#if LJ_HASFFI
200static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
201 IRIns *oir, IRIns *nir, IRIns *ir)
202{
203 IROp op = ir->o;
204 IRRef kref = nir->op2;
205 if (irref_isk(kref)) { /* Optimize constant shifts. */
206 int32_t k = (IR(kref)->i & 63);
207 IRRef lo = nir->op1, hi = hisubst[ir->op1];
208 if (op == IR_BROL || op == IR_BROR) {
209 if (op == IR_BROR) k = (-k & 63);
210 if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; }
211 if (k == 0) {
212 passthrough:
213 J->cur.nins--;
214 ir->prev = lo;
215 return hi;
216 } else {
217 TRef k1, k2;
218 IRRef t1, t2, t3, t4;
219 J->cur.nins--;
220 k1 = lj_ir_kint(J, k);
221 k2 = lj_ir_kint(J, (-k & 31));
222 t1 = split_emit(J, IRTI(IR_BSHL), lo, k1);
223 t2 = split_emit(J, IRTI(IR_BSHL), hi, k1);
224 t3 = split_emit(J, IRTI(IR_BSHR), lo, k2);
225 t4 = split_emit(J, IRTI(IR_BSHR), hi, k2);
226 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4);
227 return split_emit(J, IRTI(IR_BOR), t2, t3);
228 }
229 } else if (k == 0) {
230 goto passthrough;
231 } else if (k < 32) {
232 if (op == IR_BSHL) {
233 IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref);
234 IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31)));
235 return split_emit(J, IRTI(IR_BOR), t1, t2);
236 } else {
237 IRRef t1 = ir->prev, t2;
238 lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage");
239 nir->o = IR_BSHR;
240 t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
241 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
242 return split_emit(J, IRTI(op), hi, kref);
243 }
244 } else {
245 if (op == IR_BSHL) {
246 if (k == 32)
247 J->cur.nins--;
248 else
249 lo = ir->prev;
250 ir->prev = lj_ir_kint(J, 0);
251 return lo;
252 } else {
253 lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage");
254 if (k == 32) {
255 J->cur.nins--;
256 ir->prev = hi;
257 } else {
258 nir->op1 = hi;
259 }
260 if (op == IR_BSHR)
261 return lj_ir_kint(J, 0);
262 else
263 return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31));
264 }
265 }
266 }
267 return split_call_li(J, hisubst, oir, ir,
268 op - IR_BSHL + IRCALL_lj_carith_shl64);
269}
270
271static IRRef split_bitop(jit_State *J, IRRef1 *hisubst,
272 IRIns *nir, IRIns *ir)
273{
274 IROp op = ir->o;
275 IRRef hi, kref = nir->op2;
276 if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */
277 int32_t k = IR(kref)->i;
278 if (k == 0 || k == -1) {
279 if (op == IR_BAND) k = ~k;
280 if (k == 0) {
281 J->cur.nins--;
282 ir->prev = nir->op1;
283 } else if (op == IR_BXOR) {
284 nir->o = IR_BNOT;
285 nir->op2 = 0;
286 } else {
287 J->cur.nins--;
288 ir->prev = kref;
289 }
290 }
291 }
292 hi = hisubst[ir->op1];
293 kref = hisubst[ir->op2];
294 if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */
295 int32_t k = IR(kref)->i;
296 if (k == 0 || k == -1) {
297 if (op == IR_BAND) k = ~k;
298 if (k == 0) {
299 return hi;
300 } else if (op == IR_BXOR) {
301 return split_emit(J, IRTI(IR_BNOT), hi, 0);
302 } else {
303 return kref;
304 }
305 }
306 }
307 return split_emit(J, IRTI(op), hi, kref);
308}
309#endif
310
198/* Substitute references of a snapshot. */ 311/* Substitute references of a snapshot. */
199static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) 312static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
200{ 313{
@@ -214,7 +327,7 @@ static void split_ir(jit_State *J)
214 IRRef nins = J->cur.nins, nk = J->cur.nk; 327 IRRef nins = J->cur.nins, nk = J->cur.nk;
215 MSize irlen = nins - nk; 328 MSize irlen = nins - nk;
216 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); 329 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
217 IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); 330 IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need);
218 IRRef1 *hisubst; 331 IRRef1 *hisubst;
219 IRRef ref, snref; 332 IRRef ref, snref;
220 SnapShot *snap; 333 SnapShot *snap;
@@ -241,6 +354,8 @@ static void split_ir(jit_State *J)
241 ir->prev = ref; /* Identity substitution for loword. */ 354 ir->prev = ref; /* Identity substitution for loword. */
242 hisubst[ref] = 0; 355 hisubst[ref] = 0;
243 } 356 }
357 if (irt_is64(ir->t) && ir->o != IR_KNULL)
358 ref++;
244 } 359 }
245 360
246 /* Process old IR instructions. */ 361 /* Process old IR instructions. */
@@ -288,32 +403,8 @@ static void split_ir(jit_State *J)
288 hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); 403 hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
289 break; 404 break;
290 case IR_FPMATH: 405 case IR_FPMATH:
291 /* Try to rejoin pow from EXP2, MUL and LOG2. */
292 if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) {
293 IRIns *irp = IR(nir->op1);
294 if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) {
295 IRIns *irm4 = IR(irp->op1);
296 IRIns *irm3 = IR(irm4->op1);
297 IRIns *irm12 = IR(irm3->op1);
298 IRIns *irl1 = IR(irm12->op1);
299 if (irm12->op1 > J->loopref && irl1->o == IR_CALLN &&
300 irl1->op2 == IRCALL_lj_vm_log2) {
301 IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */
302 IRRef arg3 = irm3->op2, arg4 = irm4->op2;
303 J->cur.nins--;
304 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3);
305 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4);
306 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow);
307 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
308 break;
309 }
310 }
311 }
312 hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); 406 hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
313 break; 407 break;
314 case IR_ATAN2:
315 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2);
316 break;
317 case IR_LDEXP: 408 case IR_LDEXP:
318 hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); 409 hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
319 break; 410 break;
@@ -321,7 +412,8 @@ static void split_ir(jit_State *J)
321 nir->o = IR_CONV; /* Pass through loword. */ 412 nir->o = IR_CONV; /* Pass through loword. */
322 nir->op2 = (IRT_INT << 5) | IRT_INT; 413 nir->op2 = (IRT_INT << 5) | IRT_INT;
323 hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), 414 hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
324 hisubst[ir->op1], hisubst[ir->op2]); 415 hisubst[ir->op1],
416 lj_ir_kint(J, (int32_t)(0x7fffffffu + (ir->o == IR_NEG))));
325 break; 417 break;
326 case IR_SLOAD: 418 case IR_SLOAD:
327 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ 419 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */
@@ -336,15 +428,24 @@ static void split_ir(jit_State *J)
336 case IR_STRTO: 428 case IR_STRTO:
337 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); 429 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
338 break; 430 break;
431 case IR_FLOAD:
432 lj_assertJ(ir->op1 == REF_NIL, "expected FLOAD from GG_State");
433 hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4));
434 nir->op2 += LJ_BE*4;
435 break;
339 case IR_XLOAD: { 436 case IR_XLOAD: {
340 IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ 437 IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */
341 J->cur.nins--; 438 J->cur.nins--;
342 hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ 439 hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */
440#if LJ_BE
441 hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2);
442 inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD);
443#endif
343 nref = lj_ir_nextins(J); 444 nref = lj_ir_nextins(J);
344 nir = IR(nref); 445 nir = IR(nref);
345 *nir = inslo; /* Re-emit lo XLOAD immediately before hi XLOAD. */ 446 *nir = inslo; /* Re-emit lo XLOAD. */
346 hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
347#if LJ_LE 447#if LJ_LE
448 hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
348 ir->prev = nref; 449 ir->prev = nref;
349#else 450#else
350 ir->prev = hi; hi = nref; 451 ir->prev = hi; hi = nref;
@@ -364,8 +465,9 @@ static void split_ir(jit_State *J)
364 break; 465 break;
365 } 466 }
366#endif 467#endif
367 lua_assert(st == IRT_INT || 468 lj_assertJ(st == IRT_INT ||
368 (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT))); 469 (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)),
470 "bad source type for CONV");
369 nir->o = IR_CALLN; 471 nir->o = IR_CALLN;
370#if LJ_32 && LJ_HASFFI 472#if LJ_32 && LJ_HASFFI
371 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : 473 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
@@ -395,7 +497,8 @@ static void split_ir(jit_State *J)
395 hi = nir->op2; 497 hi = nir->op2;
396 break; 498 break;
397 default: 499 default:
398 lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX); 500 lj_assertJ(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX,
501 "bad IR op %d", ir->o);
399 hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), 502 hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
400 hisubst[ir->op1], hisubst[ir->op2]); 503 hisubst[ir->op1], hisubst[ir->op2]);
401 break; 504 break;
@@ -438,8 +541,21 @@ static void split_ir(jit_State *J)
438 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : 541 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
439 IRCALL_lj_carith_powu64); 542 IRCALL_lj_carith_powu64);
440 break; 543 break;
544 case IR_BNOT:
545 hi = split_emit(J, IRTI(IR_BNOT), hiref, 0);
546 break;
547 case IR_BSWAP:
548 ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0);
549 hi = nref;
550 break;
551 case IR_BAND: case IR_BOR: case IR_BXOR:
552 hi = split_bitop(J, hisubst, nir, ir);
553 break;
554 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
555 hi = split_bitshift(J, hisubst, oir, nir, ir);
556 break;
441 case IR_FLOAD: 557 case IR_FLOAD:
442 lua_assert(ir->op2 == IRFL_CDATA_INT64); 558 lj_assertJ(ir->op2 == IRFL_CDATA_INT64, "only INT64 supported");
443 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); 559 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
444#if LJ_BE 560#if LJ_BE
445 ir->prev = hi; hi = nref; 561 ir->prev = hi; hi = nref;
@@ -505,7 +621,7 @@ static void split_ir(jit_State *J)
505 hi = nir->op2; 621 hi = nir->op2;
506 break; 622 break;
507 default: 623 default:
508 lua_assert(ir->o <= IR_NE); /* Comparisons. */ 624 lj_assertJ(ir->o <= IR_NE, "bad IR op %d", ir->o); /* Comparisons. */
509 split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); 625 split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
510 break; 626 break;
511 } 627 }
@@ -529,7 +645,7 @@ static void split_ir(jit_State *J)
529 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); 645 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
530#endif 646#endif
531 ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); 647 ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
532 } else if (ir->o == IR_TOSTR) { 648 } else if (ir->o == IR_TOSTR || ir->o == IR_TMPREF) {
533 if (hisubst[ir->op1]) { 649 if (hisubst[ir->op1]) {
534 if (irref_isk(ir->op1)) 650 if (irref_isk(ir->op1))
535 nir->op1 = ir->op1; 651 nir->op1 = ir->op1;
@@ -583,7 +699,7 @@ static void split_ir(jit_State *J)
583#if LJ_SOFTFP 699#if LJ_SOFTFP
584 if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { 700 if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
585 if (irt_isguard(ir->t)) { 701 if (irt_isguard(ir->t)) {
586 lua_assert(st == IRT_NUM && irt_isint(ir->t)); 702 lj_assertJ(st == IRT_NUM && irt_isint(ir->t), "bad CONV types");
587 J->cur.nins--; 703 J->cur.nins--;
588 ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); 704 ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1);
589 } else { 705 } else {
@@ -714,7 +830,7 @@ void lj_opt_split(jit_State *J)
714 if (!J->needsplit) 830 if (!J->needsplit)
715 J->needsplit = split_needsplit(J); 831 J->needsplit = split_needsplit(J);
716#else 832#else
717 lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */ 833 lj_assertJ(J->needsplit >= split_needsplit(J), "bad SPLIT state");
718#endif 834#endif
719 if (J->needsplit) { 835 if (J->needsplit) {
720 int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); 836 int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
diff --git a/src/lj_parse.c b/src/lj_parse.c
index 401b7d09..ea64677f 100644
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -13,6 +13,7 @@
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_debug.h" 15#include "lj_debug.h"
16#include "lj_buf.h"
16#include "lj_str.h" 17#include "lj_str.h"
17#include "lj_tab.h" 18#include "lj_tab.h"
18#include "lj_func.h" 19#include "lj_func.h"
@@ -21,6 +22,7 @@
21#if LJ_HASFFI 22#if LJ_HASFFI
22#include "lj_ctype.h" 23#include "lj_ctype.h"
23#endif 24#endif
25#include "lj_strfmt.h"
24#include "lj_lex.h" 26#include "lj_lex.h"
25#include "lj_parse.h" 27#include "lj_parse.h"
26#include "lj_vm.h" 28#include "lj_vm.h"
@@ -161,16 +163,22 @@ LJ_STATIC_ASSERT((int)BC_MULVV-(int)BC_ADDVV == (int)OPR_MUL-(int)OPR_ADD);
161LJ_STATIC_ASSERT((int)BC_DIVVV-(int)BC_ADDVV == (int)OPR_DIV-(int)OPR_ADD); 163LJ_STATIC_ASSERT((int)BC_DIVVV-(int)BC_ADDVV == (int)OPR_DIV-(int)OPR_ADD);
162LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD); 164LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD);
163 165
166#ifdef LUA_USE_ASSERT
167#define lj_assertFS(c, ...) (lj_assertG_(G(fs->L), (c), __VA_ARGS__))
168#else
169#define lj_assertFS(c, ...) ((void)fs)
170#endif
171
164/* -- Error handling ------------------------------------------------------ */ 172/* -- Error handling ------------------------------------------------------ */
165 173
166LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em) 174LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em)
167{ 175{
168 lj_lex_error(ls, ls->token, em); 176 lj_lex_error(ls, ls->tok, em);
169} 177}
170 178
171LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken token) 179LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken tok)
172{ 180{
173 lj_lex_error(ls, ls->token, LJ_ERR_XTOKEN, lj_lex_token2str(ls, token)); 181 lj_lex_error(ls, ls->tok, LJ_ERR_XTOKEN, lj_lex_token2str(ls, tok));
174} 182}
175 183
176LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what) 184LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what)
@@ -198,7 +206,7 @@ static BCReg const_num(FuncState *fs, ExpDesc *e)
198{ 206{
199 lua_State *L = fs->L; 207 lua_State *L = fs->L;
200 TValue *o; 208 TValue *o;
201 lua_assert(expr_isnumk(e)); 209 lj_assertFS(expr_isnumk(e), "bad usage");
202 o = lj_tab_set(L, fs->kt, &e->u.nval); 210 o = lj_tab_set(L, fs->kt, &e->u.nval);
203 if (tvhaskslot(o)) 211 if (tvhaskslot(o))
204 return tvkslot(o); 212 return tvkslot(o);
@@ -223,7 +231,7 @@ static BCReg const_gc(FuncState *fs, GCobj *gc, uint32_t itype)
223/* Add a string constant. */ 231/* Add a string constant. */
224static BCReg const_str(FuncState *fs, ExpDesc *e) 232static BCReg const_str(FuncState *fs, ExpDesc *e)
225{ 233{
226 lua_assert(expr_isstrk(e) || e->k == VGLOBAL); 234 lj_assertFS(expr_isstrk(e) || e->k == VGLOBAL, "bad usage");
227 return const_gc(fs, obj2gco(e->u.sval), LJ_TSTR); 235 return const_gc(fs, obj2gco(e->u.sval), LJ_TSTR);
228} 236}
229 237
@@ -311,7 +319,7 @@ static void jmp_patchins(FuncState *fs, BCPos pc, BCPos dest)
311{ 319{
312 BCIns *jmp = &fs->bcbase[pc].ins; 320 BCIns *jmp = &fs->bcbase[pc].ins;
313 BCPos offset = dest-(pc+1)+BCBIAS_J; 321 BCPos offset = dest-(pc+1)+BCBIAS_J;
314 lua_assert(dest != NO_JMP); 322 lj_assertFS(dest != NO_JMP, "uninitialized jump target");
315 if (offset > BCMAX_D) 323 if (offset > BCMAX_D)
316 err_syntax(fs->ls, LJ_ERR_XJUMP); 324 err_syntax(fs->ls, LJ_ERR_XJUMP);
317 setbc_d(jmp, offset); 325 setbc_d(jmp, offset);
@@ -360,7 +368,7 @@ static void jmp_patch(FuncState *fs, BCPos list, BCPos target)
360 if (target == fs->pc) { 368 if (target == fs->pc) {
361 jmp_tohere(fs, list); 369 jmp_tohere(fs, list);
362 } else { 370 } else {
363 lua_assert(target < fs->pc); 371 lj_assertFS(target < fs->pc, "bad jump target");
364 jmp_patchval(fs, list, target, NO_REG, target); 372 jmp_patchval(fs, list, target, NO_REG, target);
365 } 373 }
366} 374}
@@ -390,7 +398,7 @@ static void bcreg_free(FuncState *fs, BCReg reg)
390{ 398{
391 if (reg >= fs->nactvar) { 399 if (reg >= fs->nactvar) {
392 fs->freereg--; 400 fs->freereg--;
393 lua_assert(reg == fs->freereg); 401 lj_assertFS(reg == fs->freereg, "bad regfree");
394 } 402 }
395} 403}
396 404
@@ -540,7 +548,7 @@ static void expr_toreg_nobranch(FuncState *fs, ExpDesc *e, BCReg reg)
540 } else if (e->k <= VKTRUE) { 548 } else if (e->k <= VKTRUE) {
541 ins = BCINS_AD(BC_KPRI, reg, const_pri(e)); 549 ins = BCINS_AD(BC_KPRI, reg, const_pri(e));
542 } else { 550 } else {
543 lua_assert(e->k == VVOID || e->k == VJMP); 551 lj_assertFS(e->k == VVOID || e->k == VJMP, "bad expr type %d", e->k);
544 return; 552 return;
545 } 553 }
546 bcemit_INS(fs, ins); 554 bcemit_INS(fs, ins);
@@ -635,7 +643,7 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e)
635 ins = BCINS_AD(BC_GSET, ra, const_str(fs, var)); 643 ins = BCINS_AD(BC_GSET, ra, const_str(fs, var));
636 } else { 644 } else {
637 BCReg ra, rc; 645 BCReg ra, rc;
638 lua_assert(var->k == VINDEXED); 646 lj_assertFS(var->k == VINDEXED, "bad expr type %d", var->k);
639 ra = expr_toanyreg(fs, e); 647 ra = expr_toanyreg(fs, e);
640 rc = var->u.s.aux; 648 rc = var->u.s.aux;
641 if ((int32_t)rc < 0) { 649 if ((int32_t)rc < 0) {
@@ -643,10 +651,12 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e)
643 } else if (rc > BCMAX_C) { 651 } else if (rc > BCMAX_C) {
644 ins = BCINS_ABC(BC_TSETB, ra, var->u.s.info, rc-(BCMAX_C+1)); 652 ins = BCINS_ABC(BC_TSETB, ra, var->u.s.info, rc-(BCMAX_C+1));
645 } else { 653 } else {
654#ifdef LUA_USE_ASSERT
646 /* Free late alloced key reg to avoid assert on free of value reg. */ 655 /* Free late alloced key reg to avoid assert on free of value reg. */
647 /* This can only happen when called from expr_table(). */ 656 /* This can only happen when called from expr_table(). */
648 lua_assert(e->k != VNONRELOC || ra < fs->nactvar || 657 if (e->k == VNONRELOC && ra >= fs->nactvar && rc >= ra)
649 rc < ra || (bcreg_free(fs, rc),1)); 658 bcreg_free(fs, rc);
659#endif
650 ins = BCINS_ABC(BC_TSETV, ra, var->u.s.info, rc); 660 ins = BCINS_ABC(BC_TSETV, ra, var->u.s.info, rc);
651 } 661 }
652 } 662 }
@@ -660,16 +670,16 @@ static void bcemit_method(FuncState *fs, ExpDesc *e, ExpDesc *key)
660 BCReg idx, func, obj = expr_toanyreg(fs, e); 670 BCReg idx, func, obj = expr_toanyreg(fs, e);
661 expr_free(fs, e); 671 expr_free(fs, e);
662 func = fs->freereg; 672 func = fs->freereg;
663 bcemit_AD(fs, BC_MOV, func+1, obj); /* Copy object to first argument. */ 673 bcemit_AD(fs, BC_MOV, func+1+LJ_FR2, obj); /* Copy object to 1st argument. */
664 lua_assert(expr_isstrk(key)); 674 lj_assertFS(expr_isstrk(key), "bad usage");
665 idx = const_str(fs, key); 675 idx = const_str(fs, key);
666 if (idx <= BCMAX_C) { 676 if (idx <= BCMAX_C) {
667 bcreg_reserve(fs, 2); 677 bcreg_reserve(fs, 2+LJ_FR2);
668 bcemit_ABC(fs, BC_TGETS, func, obj, idx); 678 bcemit_ABC(fs, BC_TGETS, func, obj, idx);
669 } else { 679 } else {
670 bcreg_reserve(fs, 3); 680 bcreg_reserve(fs, 3+LJ_FR2);
671 bcemit_AD(fs, BC_KSTR, func+2, idx); 681 bcemit_AD(fs, BC_KSTR, func+2+LJ_FR2, idx);
672 bcemit_ABC(fs, BC_TGETV, func, obj, func+2); 682 bcemit_ABC(fs, BC_TGETV, func, obj, func+2+LJ_FR2);
673 fs->freereg--; 683 fs->freereg--;
674 } 684 }
675 e->u.s.info = func; 685 e->u.s.info = func;
@@ -801,7 +811,8 @@ static void bcemit_arith(FuncState *fs, BinOpr opr, ExpDesc *e1, ExpDesc *e2)
801 else 811 else
802 rc = expr_toanyreg(fs, e2); 812 rc = expr_toanyreg(fs, e2);
803 /* 1st operand discharged by bcemit_binop_left, but need KNUM/KSHORT. */ 813 /* 1st operand discharged by bcemit_binop_left, but need KNUM/KSHORT. */
804 lua_assert(expr_isnumk(e1) || e1->k == VNONRELOC); 814 lj_assertFS(expr_isnumk(e1) || e1->k == VNONRELOC,
815 "bad expr type %d", e1->k);
805 expr_toval(fs, e1); 816 expr_toval(fs, e1);
806 /* Avoid two consts to satisfy bytecode constraints. */ 817 /* Avoid two consts to satisfy bytecode constraints. */
807 if (expr_isnumk(e1) && !expr_isnumk(e2) && 818 if (expr_isnumk(e1) && !expr_isnumk(e2) &&
@@ -889,19 +900,20 @@ static void bcemit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2)
889 if (op <= OPR_POW) { 900 if (op <= OPR_POW) {
890 bcemit_arith(fs, op, e1, e2); 901 bcemit_arith(fs, op, e1, e2);
891 } else if (op == OPR_AND) { 902 } else if (op == OPR_AND) {
892 lua_assert(e1->t == NO_JMP); /* List must be closed. */ 903 lj_assertFS(e1->t == NO_JMP, "jump list not closed");
893 expr_discharge(fs, e2); 904 expr_discharge(fs, e2);
894 jmp_append(fs, &e2->f, e1->f); 905 jmp_append(fs, &e2->f, e1->f);
895 *e1 = *e2; 906 *e1 = *e2;
896 } else if (op == OPR_OR) { 907 } else if (op == OPR_OR) {
897 lua_assert(e1->f == NO_JMP); /* List must be closed. */ 908 lj_assertFS(e1->f == NO_JMP, "jump list not closed");
898 expr_discharge(fs, e2); 909 expr_discharge(fs, e2);
899 jmp_append(fs, &e2->t, e1->t); 910 jmp_append(fs, &e2->t, e1->t);
900 *e1 = *e2; 911 *e1 = *e2;
901 } else if (op == OPR_CONCAT) { 912 } else if (op == OPR_CONCAT) {
902 expr_toval(fs, e2); 913 expr_toval(fs, e2);
903 if (e2->k == VRELOCABLE && bc_op(*bcptr(fs, e2)) == BC_CAT) { 914 if (e2->k == VRELOCABLE && bc_op(*bcptr(fs, e2)) == BC_CAT) {
904 lua_assert(e1->u.s.info == bc_b(*bcptr(fs, e2))-1); 915 lj_assertFS(e1->u.s.info == bc_b(*bcptr(fs, e2))-1,
916 "bad CAT stack layout");
905 expr_free(fs, e1); 917 expr_free(fs, e1);
906 setbc_b(bcptr(fs, e2), e1->u.s.info); 918 setbc_b(bcptr(fs, e2), e1->u.s.info);
907 e1->u.s.info = e2->u.s.info; 919 e1->u.s.info = e2->u.s.info;
@@ -913,8 +925,9 @@ static void bcemit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2)
913 } 925 }
914 e1->k = VRELOCABLE; 926 e1->k = VRELOCABLE;
915 } else { 927 } else {
916 lua_assert(op == OPR_NE || op == OPR_EQ || 928 lj_assertFS(op == OPR_NE || op == OPR_EQ ||
917 op == OPR_LT || op == OPR_GE || op == OPR_LE || op == OPR_GT); 929 op == OPR_LT || op == OPR_GE || op == OPR_LE || op == OPR_GT,
930 "bad binop %d", op);
918 bcemit_comp(fs, op, e1, e2); 931 bcemit_comp(fs, op, e1, e2);
919 } 932 }
920} 933}
@@ -943,10 +956,10 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e)
943 e->u.s.info = fs->freereg-1; 956 e->u.s.info = fs->freereg-1;
944 e->k = VNONRELOC; 957 e->k = VNONRELOC;
945 } else { 958 } else {
946 lua_assert(e->k == VNONRELOC); 959 lj_assertFS(e->k == VNONRELOC, "bad expr type %d", e->k);
947 } 960 }
948 } else { 961 } else {
949 lua_assert(op == BC_UNM || op == BC_LEN); 962 lj_assertFS(op == BC_UNM || op == BC_LEN, "bad unop %d", op);
950 if (op == BC_UNM && !expr_hasjump(e)) { /* Constant-fold negations. */ 963 if (op == BC_UNM && !expr_hasjump(e)) { /* Constant-fold negations. */
951#if LJ_HASFFI 964#if LJ_HASFFI
952 if (e->k == VKCDATA) { /* Fold in-place since cdata is not interned. */ 965 if (e->k == VKCDATA) { /* Fold in-place since cdata is not interned. */
@@ -986,7 +999,7 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e)
986/* Check and consume optional token. */ 999/* Check and consume optional token. */
987static int lex_opt(LexState *ls, LexToken tok) 1000static int lex_opt(LexState *ls, LexToken tok)
988{ 1001{
989 if (ls->token == tok) { 1002 if (ls->tok == tok) {
990 lj_lex_next(ls); 1003 lj_lex_next(ls);
991 return 1; 1004 return 1;
992 } 1005 }
@@ -996,7 +1009,7 @@ static int lex_opt(LexState *ls, LexToken tok)
996/* Check and consume token. */ 1009/* Check and consume token. */
997static void lex_check(LexState *ls, LexToken tok) 1010static void lex_check(LexState *ls, LexToken tok)
998{ 1011{
999 if (ls->token != tok) 1012 if (ls->tok != tok)
1000 err_token(ls, tok); 1013 err_token(ls, tok);
1001 lj_lex_next(ls); 1014 lj_lex_next(ls);
1002} 1015}
@@ -1010,7 +1023,7 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line)
1010 } else { 1023 } else {
1011 const char *swhat = lj_lex_token2str(ls, what); 1024 const char *swhat = lj_lex_token2str(ls, what);
1012 const char *swho = lj_lex_token2str(ls, who); 1025 const char *swho = lj_lex_token2str(ls, who);
1013 lj_lex_error(ls, ls->token, LJ_ERR_XMATCH, swhat, swho, line); 1026 lj_lex_error(ls, ls->tok, LJ_ERR_XMATCH, swhat, swho, line);
1014 } 1027 }
1015 } 1028 }
1016} 1029}
@@ -1019,9 +1032,9 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line)
1019static GCstr *lex_str(LexState *ls) 1032static GCstr *lex_str(LexState *ls)
1020{ 1033{
1021 GCstr *s; 1034 GCstr *s;
1022 if (ls->token != TK_name && (LJ_52 || ls->token != TK_goto)) 1035 if (ls->tok != TK_name && (LJ_52 || ls->tok != TK_goto))
1023 err_token(ls, TK_name); 1036 err_token(ls, TK_name);
1024 s = strV(&ls->tokenval); 1037 s = strV(&ls->tokval);
1025 lj_lex_next(ls); 1038 lj_lex_next(ls);
1026 return s; 1039 return s;
1027} 1040}
@@ -1041,8 +1054,9 @@ static void var_new(LexState *ls, BCReg n, GCstr *name)
1041 lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK); 1054 lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK);
1042 lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo); 1055 lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo);
1043 } 1056 }
1044 lua_assert((uintptr_t)name < VARNAME__MAX || 1057 lj_assertFS((uintptr_t)name < VARNAME__MAX ||
1045 lj_tab_getstr(fs->kt, name) != NULL); 1058 lj_tab_getstr(fs->kt, name) != NULL,
1059 "unanchored variable name");
1046 /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */ 1060 /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */
1047 setgcref(ls->vstack[vtop].name, obj2gco(name)); 1061 setgcref(ls->vstack[vtop].name, obj2gco(name));
1048 fs->varmap[fs->nactvar+n] = (uint16_t)vtop; 1062 fs->varmap[fs->nactvar+n] = (uint16_t)vtop;
@@ -1097,7 +1111,7 @@ static MSize var_lookup_uv(FuncState *fs, MSize vidx, ExpDesc *e)
1097 return i; /* Already exists. */ 1111 return i; /* Already exists. */
1098 /* Otherwise create a new one. */ 1112 /* Otherwise create a new one. */
1099 checklimit(fs, fs->nuv, LJ_MAX_UPVAL, "upvalues"); 1113 checklimit(fs, fs->nuv, LJ_MAX_UPVAL, "upvalues");
1100 lua_assert(e->k == VLOCAL || e->k == VUPVAL); 1114 lj_assertFS(e->k == VLOCAL || e->k == VUPVAL, "bad expr type %d", e->k);
1101 fs->uvmap[n] = (uint16_t)vidx; 1115 fs->uvmap[n] = (uint16_t)vidx;
1102 fs->uvtmp[n] = (uint16_t)(e->k == VLOCAL ? vidx : LJ_MAX_VSTACK+e->u.s.info); 1116 fs->uvtmp[n] = (uint16_t)(e->k == VLOCAL ? vidx : LJ_MAX_VSTACK+e->u.s.info);
1103 fs->nuv = n+1; 1117 fs->nuv = n+1;
@@ -1148,7 +1162,8 @@ static MSize gola_new(LexState *ls, GCstr *name, uint8_t info, BCPos pc)
1148 lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK); 1162 lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK);
1149 lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo); 1163 lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo);
1150 } 1164 }
1151 lua_assert(name == NAME_BREAK || lj_tab_getstr(fs->kt, name) != NULL); 1165 lj_assertFS(name == NAME_BREAK || lj_tab_getstr(fs->kt, name) != NULL,
1166 "unanchored label name");
1152 /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */ 1167 /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */
1153 setgcref(ls->vstack[vtop].name, obj2gco(name)); 1168 setgcref(ls->vstack[vtop].name, obj2gco(name));
1154 ls->vstack[vtop].startpc = pc; 1169 ls->vstack[vtop].startpc = pc;
@@ -1178,8 +1193,9 @@ static void gola_close(LexState *ls, VarInfo *vg)
1178 FuncState *fs = ls->fs; 1193 FuncState *fs = ls->fs;
1179 BCPos pc = vg->startpc; 1194 BCPos pc = vg->startpc;
1180 BCIns *ip = &fs->bcbase[pc].ins; 1195 BCIns *ip = &fs->bcbase[pc].ins;
1181 lua_assert(gola_isgoto(vg)); 1196 lj_assertFS(gola_isgoto(vg), "expected goto");
1182 lua_assert(bc_op(*ip) == BC_JMP || bc_op(*ip) == BC_UCLO); 1197 lj_assertFS(bc_op(*ip) == BC_JMP || bc_op(*ip) == BC_UCLO,
1198 "bad bytecode op %d", bc_op(*ip));
1183 setbc_a(ip, vg->slot); 1199 setbc_a(ip, vg->slot);
1184 if (bc_op(*ip) == BC_JMP) { 1200 if (bc_op(*ip) == BC_JMP) {
1185 BCPos next = jmp_next(fs, pc); 1201 BCPos next = jmp_next(fs, pc);
@@ -1198,9 +1214,9 @@ static void gola_resolve(LexState *ls, FuncScope *bl, MSize idx)
1198 if (gcrefeq(vg->name, vl->name) && gola_isgoto(vg)) { 1214 if (gcrefeq(vg->name, vl->name) && gola_isgoto(vg)) {
1199 if (vg->slot < vl->slot) { 1215 if (vg->slot < vl->slot) {
1200 GCstr *name = strref(var_get(ls, ls->fs, vg->slot).name); 1216 GCstr *name = strref(var_get(ls, ls->fs, vg->slot).name);
1201 lua_assert((uintptr_t)name >= VARNAME__MAX); 1217 lj_assertLS((uintptr_t)name >= VARNAME__MAX, "expected goto name");
1202 ls->linenumber = ls->fs->bcbase[vg->startpc].line; 1218 ls->linenumber = ls->fs->bcbase[vg->startpc].line;
1203 lua_assert(strref(vg->name) != NAME_BREAK); 1219 lj_assertLS(strref(vg->name) != NAME_BREAK, "unexpected break");
1204 lj_lex_error(ls, 0, LJ_ERR_XGSCOPE, 1220 lj_lex_error(ls, 0, LJ_ERR_XGSCOPE,
1205 strdata(strref(vg->name)), strdata(name)); 1221 strdata(strref(vg->name)), strdata(name));
1206 } 1222 }
@@ -1264,7 +1280,7 @@ static void fscope_begin(FuncState *fs, FuncScope *bl, int flags)
1264 bl->vstart = fs->ls->vtop; 1280 bl->vstart = fs->ls->vtop;
1265 bl->prev = fs->bl; 1281 bl->prev = fs->bl;
1266 fs->bl = bl; 1282 fs->bl = bl;
1267 lua_assert(fs->freereg == fs->nactvar); 1283 lj_assertFS(fs->freereg == fs->nactvar, "bad regalloc");
1268} 1284}
1269 1285
1270/* End a scope. */ 1286/* End a scope. */
@@ -1275,7 +1291,7 @@ static void fscope_end(FuncState *fs)
1275 fs->bl = bl->prev; 1291 fs->bl = bl->prev;
1276 var_remove(ls, bl->nactvar); 1292 var_remove(ls, bl->nactvar);
1277 fs->freereg = fs->nactvar; 1293 fs->freereg = fs->nactvar;
1278 lua_assert(bl->nactvar == fs->nactvar); 1294 lj_assertFS(bl->nactvar == fs->nactvar, "bad regalloc");
1279 if ((bl->flags & (FSCOPE_UPVAL|FSCOPE_NOCLOSE)) == FSCOPE_UPVAL) 1295 if ((bl->flags & (FSCOPE_UPVAL|FSCOPE_NOCLOSE)) == FSCOPE_UPVAL)
1280 bcemit_AJ(fs, BC_UCLO, bl->nactvar, 0); 1296 bcemit_AJ(fs, BC_UCLO, bl->nactvar, 0);
1281 if ((bl->flags & FSCOPE_BREAK)) { 1297 if ((bl->flags & FSCOPE_BREAK)) {
@@ -1362,13 +1378,13 @@ static void fs_fixup_k(FuncState *fs, GCproto *pt, void *kptr)
1362 Node *n = &node[i]; 1378 Node *n = &node[i];
1363 if (tvhaskslot(&n->val)) { 1379 if (tvhaskslot(&n->val)) {
1364 ptrdiff_t kidx = (ptrdiff_t)tvkslot(&n->val); 1380 ptrdiff_t kidx = (ptrdiff_t)tvkslot(&n->val);
1365 lua_assert(!tvisint(&n->key)); 1381 lj_assertFS(!tvisint(&n->key), "unexpected integer key");
1366 if (tvisnum(&n->key)) { 1382 if (tvisnum(&n->key)) {
1367 TValue *tv = &((TValue *)kptr)[kidx]; 1383 TValue *tv = &((TValue *)kptr)[kidx];
1368 if (LJ_DUALNUM) { 1384 if (LJ_DUALNUM) {
1369 lua_Number nn = numV(&n->key); 1385 lua_Number nn = numV(&n->key);
1370 int32_t k = lj_num2int(nn); 1386 int32_t k = lj_num2int(nn);
1371 lua_assert(!tvismzero(&n->key)); 1387 lj_assertFS(!tvismzero(&n->key), "unexpected -0 key");
1372 if ((lua_Number)k == nn) 1388 if ((lua_Number)k == nn)
1373 setintV(tv, k); 1389 setintV(tv, k);
1374 else 1390 else
@@ -1416,98 +1432,66 @@ static void fs_fixup_line(FuncState *fs, GCproto *pt,
1416 uint8_t *li = (uint8_t *)lineinfo; 1432 uint8_t *li = (uint8_t *)lineinfo;
1417 do { 1433 do {
1418 BCLine delta = base[i].line - first; 1434 BCLine delta = base[i].line - first;
1419 lua_assert(delta >= 0 && delta < 256); 1435 lj_assertFS(delta >= 0 && delta < 256, "bad line delta");
1420 li[i] = (uint8_t)delta; 1436 li[i] = (uint8_t)delta;
1421 } while (++i < n); 1437 } while (++i < n);
1422 } else if (LJ_LIKELY(numline < 65536)) { 1438 } else if (LJ_LIKELY(numline < 65536)) {
1423 uint16_t *li = (uint16_t *)lineinfo; 1439 uint16_t *li = (uint16_t *)lineinfo;
1424 do { 1440 do {
1425 BCLine delta = base[i].line - first; 1441 BCLine delta = base[i].line - first;
1426 lua_assert(delta >= 0 && delta < 65536); 1442 lj_assertFS(delta >= 0 && delta < 65536, "bad line delta");
1427 li[i] = (uint16_t)delta; 1443 li[i] = (uint16_t)delta;
1428 } while (++i < n); 1444 } while (++i < n);
1429 } else { 1445 } else {
1430 uint32_t *li = (uint32_t *)lineinfo; 1446 uint32_t *li = (uint32_t *)lineinfo;
1431 do { 1447 do {
1432 BCLine delta = base[i].line - first; 1448 BCLine delta = base[i].line - first;
1433 lua_assert(delta >= 0); 1449 lj_assertFS(delta >= 0, "bad line delta");
1434 li[i] = (uint32_t)delta; 1450 li[i] = (uint32_t)delta;
1435 } while (++i < n); 1451 } while (++i < n);
1436 } 1452 }
1437} 1453}
1438 1454
1439/* Resize buffer if needed. */
1440static LJ_NOINLINE void fs_buf_resize(LexState *ls, MSize len)
1441{
1442 MSize sz = ls->sb.sz * 2;
1443 while (ls->sb.n + len > sz) sz = sz * 2;
1444 lj_str_resizebuf(ls->L, &ls->sb, sz);
1445}
1446
1447static LJ_AINLINE void fs_buf_need(LexState *ls, MSize len)
1448{
1449 if (LJ_UNLIKELY(ls->sb.n + len > ls->sb.sz))
1450 fs_buf_resize(ls, len);
1451}
1452
1453/* Add string to buffer. */
1454static void fs_buf_str(LexState *ls, const char *str, MSize len)
1455{
1456 char *p = ls->sb.buf + ls->sb.n;
1457 MSize i;
1458 ls->sb.n += len;
1459 for (i = 0; i < len; i++) p[i] = str[i];
1460}
1461
1462/* Add ULEB128 value to buffer. */
1463static void fs_buf_uleb128(LexState *ls, uint32_t v)
1464{
1465 MSize n = ls->sb.n;
1466 uint8_t *p = (uint8_t *)ls->sb.buf;
1467 for (; v >= 0x80; v >>= 7)
1468 p[n++] = (uint8_t)((v & 0x7f) | 0x80);
1469 p[n++] = (uint8_t)v;
1470 ls->sb.n = n;
1471}
1472
1473/* Prepare variable info for prototype. */ 1455/* Prepare variable info for prototype. */
1474static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar) 1456static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar)
1475{ 1457{
1476 VarInfo *vs =ls->vstack, *ve; 1458 VarInfo *vs =ls->vstack, *ve;
1477 MSize i, n; 1459 MSize i, n;
1478 BCPos lastpc; 1460 BCPos lastpc;
1479 lj_str_resetbuf(&ls->sb); /* Copy to temp. string buffer. */ 1461 lj_buf_reset(&ls->sb); /* Copy to temp. string buffer. */
1480 /* Store upvalue names. */ 1462 /* Store upvalue names. */
1481 for (i = 0, n = fs->nuv; i < n; i++) { 1463 for (i = 0, n = fs->nuv; i < n; i++) {
1482 GCstr *s = strref(vs[fs->uvmap[i]].name); 1464 GCstr *s = strref(vs[fs->uvmap[i]].name);
1483 MSize len = s->len+1; 1465 MSize len = s->len+1;
1484 fs_buf_need(ls, len); 1466 char *p = lj_buf_more(&ls->sb, len);
1485 fs_buf_str(ls, strdata(s), len); 1467 p = lj_buf_wmem(p, strdata(s), len);
1468 ls->sb.w = p;
1486 } 1469 }
1487 *ofsvar = ls->sb.n; 1470 *ofsvar = sbuflen(&ls->sb);
1488 lastpc = 0; 1471 lastpc = 0;
1489 /* Store local variable names and compressed ranges. */ 1472 /* Store local variable names and compressed ranges. */
1490 for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) { 1473 for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) {
1491 if (!gola_isgotolabel(vs)) { 1474 if (!gola_isgotolabel(vs)) {
1492 GCstr *s = strref(vs->name); 1475 GCstr *s = strref(vs->name);
1493 BCPos startpc; 1476 BCPos startpc;
1477 char *p;
1494 if ((uintptr_t)s < VARNAME__MAX) { 1478 if ((uintptr_t)s < VARNAME__MAX) {
1495 fs_buf_need(ls, 1 + 2*5); 1479 p = lj_buf_more(&ls->sb, 1 + 2*5);
1496 ls->sb.buf[ls->sb.n++] = (uint8_t)(uintptr_t)s; 1480 *p++ = (char)(uintptr_t)s;
1497 } else { 1481 } else {
1498 MSize len = s->len+1; 1482 MSize len = s->len+1;
1499 fs_buf_need(ls, len + 2*5); 1483 p = lj_buf_more(&ls->sb, len + 2*5);
1500 fs_buf_str(ls, strdata(s), len); 1484 p = lj_buf_wmem(p, strdata(s), len);
1501 } 1485 }
1502 startpc = vs->startpc; 1486 startpc = vs->startpc;
1503 fs_buf_uleb128(ls, startpc-lastpc); 1487 p = lj_strfmt_wuleb128(p, startpc-lastpc);
1504 fs_buf_uleb128(ls, vs->endpc-startpc); 1488 p = lj_strfmt_wuleb128(p, vs->endpc-startpc);
1489 ls->sb.w = p;
1505 lastpc = startpc; 1490 lastpc = startpc;
1506 } 1491 }
1507 } 1492 }
1508 fs_buf_need(ls, 1); 1493 lj_buf_putb(&ls->sb, '\0'); /* Terminator for varinfo. */
1509 ls->sb.buf[ls->sb.n++] = '\0'; /* Terminator for varinfo. */ 1494 return sbuflen(&ls->sb);
1510 return ls->sb.n;
1511} 1495}
1512 1496
1513/* Fixup variable info for prototype. */ 1497/* Fixup variable info for prototype. */
@@ -1515,7 +1499,7 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar)
1515{ 1499{
1516 setmref(pt->uvinfo, p); 1500 setmref(pt->uvinfo, p);
1517 setmref(pt->varinfo, (char *)p + ofsvar); 1501 setmref(pt->varinfo, (char *)p + ofsvar);
1518 memcpy(p, ls->sb.buf, ls->sb.n); /* Copy from temp. string buffer. */ 1502 memcpy(p, ls->sb.b, sbuflen(&ls->sb)); /* Copy from temp. buffer. */
1519} 1503}
1520#else 1504#else
1521 1505
@@ -1552,7 +1536,7 @@ static void fs_fixup_ret(FuncState *fs)
1552 } 1536 }
1553 fs->bl->flags |= FSCOPE_NOCLOSE; /* Handled above. */ 1537 fs->bl->flags |= FSCOPE_NOCLOSE; /* Handled above. */
1554 fscope_end(fs); 1538 fscope_end(fs);
1555 lua_assert(fs->bl == NULL); 1539 lj_assertFS(fs->bl == NULL, "bad scope nesting");
1556 /* May need to fixup returns encoded before first function was created. */ 1540 /* May need to fixup returns encoded before first function was created. */
1557 if (fs->flags & PROTO_FIXUP_RETURN) { 1541 if (fs->flags & PROTO_FIXUP_RETURN) {
1558 BCPos pc; 1542 BCPos pc;
@@ -1624,7 +1608,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line)
1624 L->top--; /* Pop table of constants. */ 1608 L->top--; /* Pop table of constants. */
1625 ls->vtop = fs->vbase; /* Reset variable stack. */ 1609 ls->vtop = fs->vbase; /* Reset variable stack. */
1626 ls->fs = fs->prev; 1610 ls->fs = fs->prev;
1627 lua_assert(ls->fs != NULL || ls->token == TK_eof); 1611 lj_assertL(ls->fs != NULL || ls->tok == TK_eof, "bad parser state");
1628 return pt; 1612 return pt;
1629} 1613}
1630 1614
@@ -1718,15 +1702,15 @@ static void expr_bracket(LexState *ls, ExpDesc *v)
1718} 1702}
1719 1703
1720/* Get value of constant expression. */ 1704/* Get value of constant expression. */
1721static void expr_kvalue(TValue *v, ExpDesc *e) 1705static void expr_kvalue(FuncState *fs, TValue *v, ExpDesc *e)
1722{ 1706{
1707 UNUSED(fs);
1723 if (e->k <= VKTRUE) { 1708 if (e->k <= VKTRUE) {
1724 setitype(v, ~(uint32_t)e->k); 1709 setpriV(v, ~(uint32_t)e->k);
1725 } else if (e->k == VKSTR) { 1710 } else if (e->k == VKSTR) {
1726 setgcref(v->gcr, obj2gco(e->u.sval)); 1711 setgcVraw(v, obj2gco(e->u.sval), LJ_TSTR);
1727 setitype(v, LJ_TSTR);
1728 } else { 1712 } else {
1729 lua_assert(tvisnumber(expr_numtv(e))); 1713 lj_assertFS(tvisnumber(expr_numtv(e)), "bad number constant");
1730 *v = *expr_numtv(e); 1714 *v = *expr_numtv(e);
1731 } 1715 }
1732} 1716}
@@ -1746,15 +1730,15 @@ static void expr_table(LexState *ls, ExpDesc *e)
1746 bcreg_reserve(fs, 1); 1730 bcreg_reserve(fs, 1);
1747 freg++; 1731 freg++;
1748 lex_check(ls, '{'); 1732 lex_check(ls, '{');
1749 while (ls->token != '}') { 1733 while (ls->tok != '}') {
1750 ExpDesc key, val; 1734 ExpDesc key, val;
1751 vcall = 0; 1735 vcall = 0;
1752 if (ls->token == '[') { 1736 if (ls->tok == '[') {
1753 expr_bracket(ls, &key); /* Already calls expr_toval. */ 1737 expr_bracket(ls, &key); /* Already calls expr_toval. */
1754 if (!expr_isk(&key)) expr_index(fs, e, &key); 1738 if (!expr_isk(&key)) expr_index(fs, e, &key);
1755 if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++; 1739 if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++;
1756 lex_check(ls, '='); 1740 lex_check(ls, '=');
1757 } else if ((ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) && 1741 } else if ((ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) &&
1758 lj_lex_lookahead(ls) == '=') { 1742 lj_lex_lookahead(ls) == '=') {
1759 expr_str(ls, &key); 1743 expr_str(ls, &key);
1760 lex_check(ls, '='); 1744 lex_check(ls, '=');
@@ -1776,11 +1760,11 @@ static void expr_table(LexState *ls, ExpDesc *e)
1776 fs->bcbase[pc].ins = BCINS_AD(BC_TDUP, freg-1, kidx); 1760 fs->bcbase[pc].ins = BCINS_AD(BC_TDUP, freg-1, kidx);
1777 } 1761 }
1778 vcall = 0; 1762 vcall = 0;
1779 expr_kvalue(&k, &key); 1763 expr_kvalue(fs, &k, &key);
1780 v = lj_tab_set(fs->L, t, &k); 1764 v = lj_tab_set(fs->L, t, &k);
1781 lj_gc_anybarriert(fs->L, t); 1765 lj_gc_anybarriert(fs->L, t);
1782 if (expr_isk_nojump(&val)) { /* Add const key/value to template table. */ 1766 if (expr_isk_nojump(&val)) { /* Add const key/value to template table. */
1783 expr_kvalue(v, &val); 1767 expr_kvalue(fs, v, &val);
1784 } else { /* Otherwise create dummy string key (avoids lj_tab_newkey). */ 1768 } else { /* Otherwise create dummy string key (avoids lj_tab_newkey). */
1785 settabV(fs->L, v, t); /* Preserve key with table itself as value. */ 1769 settabV(fs->L, v, t); /* Preserve key with table itself as value. */
1786 fixt = 1; /* Fix this later, after all resizes. */ 1770 fixt = 1; /* Fix this later, after all resizes. */
@@ -1799,8 +1783,9 @@ static void expr_table(LexState *ls, ExpDesc *e)
1799 if (vcall) { 1783 if (vcall) {
1800 BCInsLine *ilp = &fs->bcbase[fs->pc-1]; 1784 BCInsLine *ilp = &fs->bcbase[fs->pc-1];
1801 ExpDesc en; 1785 ExpDesc en;
1802 lua_assert(bc_a(ilp->ins) == freg && 1786 lj_assertFS(bc_a(ilp->ins) == freg &&
1803 bc_op(ilp->ins) == (narr > 256 ? BC_TSETV : BC_TSETB)); 1787 bc_op(ilp->ins) == (narr > 256 ? BC_TSETV : BC_TSETB),
1788 "bad CALL code generation");
1804 expr_init(&en, VKNUM, 0); 1789 expr_init(&en, VKNUM, 0);
1805 en.u.nval.u32.lo = narr-1; 1790 en.u.nval.u32.lo = narr-1;
1806 en.u.nval.u32.hi = 0x43300000; /* Biased integer to avoid denormals. */ 1791 en.u.nval.u32.hi = 0x43300000; /* Biased integer to avoid denormals. */
@@ -1830,7 +1815,7 @@ static void expr_table(LexState *ls, ExpDesc *e)
1830 for (i = 0; i <= hmask; i++) { 1815 for (i = 0; i <= hmask; i++) {
1831 Node *n = &node[i]; 1816 Node *n = &node[i];
1832 if (tvistab(&n->val)) { 1817 if (tvistab(&n->val)) {
1833 lua_assert(tabV(&n->val) == t); 1818 lj_assertFS(tabV(&n->val) == t, "bad dummy key in template table");
1834 setnilV(&n->val); /* Turn value into nil. */ 1819 setnilV(&n->val); /* Turn value into nil. */
1835 } 1820 }
1836 } 1821 }
@@ -1847,11 +1832,11 @@ static BCReg parse_params(LexState *ls, int needself)
1847 lex_check(ls, '('); 1832 lex_check(ls, '(');
1848 if (needself) 1833 if (needself)
1849 var_new_lit(ls, nparams++, "self"); 1834 var_new_lit(ls, nparams++, "self");
1850 if (ls->token != ')') { 1835 if (ls->tok != ')') {
1851 do { 1836 do {
1852 if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { 1837 if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) {
1853 var_new(ls, nparams++, lex_str(ls)); 1838 var_new(ls, nparams++, lex_str(ls));
1854 } else if (ls->token == TK_dots) { 1839 } else if (ls->tok == TK_dots) {
1855 lj_lex_next(ls); 1840 lj_lex_next(ls);
1856 fs->flags |= PROTO_VARARG; 1841 fs->flags |= PROTO_VARARG;
1857 break; 1842 break;
@@ -1861,7 +1846,7 @@ static BCReg parse_params(LexState *ls, int needself)
1861 } while (lex_opt(ls, ',')); 1846 } while (lex_opt(ls, ','));
1862 } 1847 }
1863 var_add(ls, nparams); 1848 var_add(ls, nparams);
1864 lua_assert(fs->nactvar == nparams); 1849 lj_assertFS(fs->nactvar == nparams, "bad regalloc");
1865 bcreg_reserve(fs, nparams); 1850 bcreg_reserve(fs, nparams);
1866 lex_check(ls, ')'); 1851 lex_check(ls, ')');
1867 return nparams; 1852 return nparams;
@@ -1885,7 +1870,7 @@ static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line)
1885 fs.bclim = pfs->bclim - pfs->pc; 1870 fs.bclim = pfs->bclim - pfs->pc;
1886 bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */ 1871 bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */
1887 parse_chunk(ls); 1872 parse_chunk(ls);
1888 if (ls->token != TK_end) lex_match(ls, TK_end, TK_function, line); 1873 if (ls->tok != TK_end) lex_match(ls, TK_end, TK_function, line);
1889 pt = fs_finish(ls, (ls->lastline = ls->linenumber)); 1874 pt = fs_finish(ls, (ls->lastline = ls->linenumber));
1890 pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */ 1875 pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */
1891 pfs->bclim = (BCPos)(ls->sizebcstack - oldbase); 1876 pfs->bclim = (BCPos)(ls->sizebcstack - oldbase);
@@ -1924,13 +1909,13 @@ static void parse_args(LexState *ls, ExpDesc *e)
1924 BCIns ins; 1909 BCIns ins;
1925 BCReg base; 1910 BCReg base;
1926 BCLine line = ls->linenumber; 1911 BCLine line = ls->linenumber;
1927 if (ls->token == '(') { 1912 if (ls->tok == '(') {
1928#if !LJ_52 1913#if !LJ_52
1929 if (line != ls->lastline) 1914 if (line != ls->lastline)
1930 err_syntax(ls, LJ_ERR_XAMBIG); 1915 err_syntax(ls, LJ_ERR_XAMBIG);
1931#endif 1916#endif
1932 lj_lex_next(ls); 1917 lj_lex_next(ls);
1933 if (ls->token == ')') { /* f(). */ 1918 if (ls->tok == ')') { /* f(). */
1934 args.k = VVOID; 1919 args.k = VVOID;
1935 } else { 1920 } else {
1936 expr_list(ls, &args); 1921 expr_list(ls, &args);
@@ -1938,24 +1923,24 @@ static void parse_args(LexState *ls, ExpDesc *e)
1938 setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */ 1923 setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */
1939 } 1924 }
1940 lex_match(ls, ')', '(', line); 1925 lex_match(ls, ')', '(', line);
1941 } else if (ls->token == '{') { 1926 } else if (ls->tok == '{') {
1942 expr_table(ls, &args); 1927 expr_table(ls, &args);
1943 } else if (ls->token == TK_string) { 1928 } else if (ls->tok == TK_string) {
1944 expr_init(&args, VKSTR, 0); 1929 expr_init(&args, VKSTR, 0);
1945 args.u.sval = strV(&ls->tokenval); 1930 args.u.sval = strV(&ls->tokval);
1946 lj_lex_next(ls); 1931 lj_lex_next(ls);
1947 } else { 1932 } else {
1948 err_syntax(ls, LJ_ERR_XFUNARG); 1933 err_syntax(ls, LJ_ERR_XFUNARG);
1949 return; /* Silence compiler. */ 1934 return; /* Silence compiler. */
1950 } 1935 }
1951 lua_assert(e->k == VNONRELOC); 1936 lj_assertFS(e->k == VNONRELOC, "bad expr type %d", e->k);
1952 base = e->u.s.info; /* Base register for call. */ 1937 base = e->u.s.info; /* Base register for call. */
1953 if (args.k == VCALL) { 1938 if (args.k == VCALL) {
1954 ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1); 1939 ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1 - LJ_FR2);
1955 } else { 1940 } else {
1956 if (args.k != VVOID) 1941 if (args.k != VVOID)
1957 expr_tonextreg(fs, &args); 1942 expr_tonextreg(fs, &args);
1958 ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base); 1943 ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base - LJ_FR2);
1959 } 1944 }
1960 expr_init(e, VCALL, bcemit_INS(fs, ins)); 1945 expr_init(e, VCALL, bcemit_INS(fs, ins));
1961 e->u.s.aux = base; 1946 e->u.s.aux = base;
@@ -1968,33 +1953,34 @@ static void expr_primary(LexState *ls, ExpDesc *v)
1968{ 1953{
1969 FuncState *fs = ls->fs; 1954 FuncState *fs = ls->fs;
1970 /* Parse prefix expression. */ 1955 /* Parse prefix expression. */
1971 if (ls->token == '(') { 1956 if (ls->tok == '(') {
1972 BCLine line = ls->linenumber; 1957 BCLine line = ls->linenumber;
1973 lj_lex_next(ls); 1958 lj_lex_next(ls);
1974 expr(ls, v); 1959 expr(ls, v);
1975 lex_match(ls, ')', '(', line); 1960 lex_match(ls, ')', '(', line);
1976 expr_discharge(ls->fs, v); 1961 expr_discharge(ls->fs, v);
1977 } else if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { 1962 } else if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) {
1978 var_lookup(ls, v); 1963 var_lookup(ls, v);
1979 } else { 1964 } else {
1980 err_syntax(ls, LJ_ERR_XSYMBOL); 1965 err_syntax(ls, LJ_ERR_XSYMBOL);
1981 } 1966 }
1982 for (;;) { /* Parse multiple expression suffixes. */ 1967 for (;;) { /* Parse multiple expression suffixes. */
1983 if (ls->token == '.') { 1968 if (ls->tok == '.') {
1984 expr_field(ls, v); 1969 expr_field(ls, v);
1985 } else if (ls->token == '[') { 1970 } else if (ls->tok == '[') {
1986 ExpDesc key; 1971 ExpDesc key;
1987 expr_toanyreg(fs, v); 1972 expr_toanyreg(fs, v);
1988 expr_bracket(ls, &key); 1973 expr_bracket(ls, &key);
1989 expr_index(fs, v, &key); 1974 expr_index(fs, v, &key);
1990 } else if (ls->token == ':') { 1975 } else if (ls->tok == ':') {
1991 ExpDesc key; 1976 ExpDesc key;
1992 lj_lex_next(ls); 1977 lj_lex_next(ls);
1993 expr_str(ls, &key); 1978 expr_str(ls, &key);
1994 bcemit_method(fs, v, &key); 1979 bcemit_method(fs, v, &key);
1995 parse_args(ls, v); 1980 parse_args(ls, v);
1996 } else if (ls->token == '(' || ls->token == TK_string || ls->token == '{') { 1981 } else if (ls->tok == '(' || ls->tok == TK_string || ls->tok == '{') {
1997 expr_tonextreg(fs, v); 1982 expr_tonextreg(fs, v);
1983 if (LJ_FR2) bcreg_reserve(fs, 1);
1998 parse_args(ls, v); 1984 parse_args(ls, v);
1999 } else { 1985 } else {
2000 break; 1986 break;
@@ -2005,14 +1991,14 @@ static void expr_primary(LexState *ls, ExpDesc *v)
2005/* Parse simple expression. */ 1991/* Parse simple expression. */
2006static void expr_simple(LexState *ls, ExpDesc *v) 1992static void expr_simple(LexState *ls, ExpDesc *v)
2007{ 1993{
2008 switch (ls->token) { 1994 switch (ls->tok) {
2009 case TK_number: 1995 case TK_number:
2010 expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokenval)) ? VKCDATA : VKNUM, 0); 1996 expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokval)) ? VKCDATA : VKNUM, 0);
2011 copyTV(ls->L, &v->u.nval, &ls->tokenval); 1997 copyTV(ls->L, &v->u.nval, &ls->tokval);
2012 break; 1998 break;
2013 case TK_string: 1999 case TK_string:
2014 expr_init(v, VKSTR, 0); 2000 expr_init(v, VKSTR, 0);
2015 v->u.sval = strV(&ls->tokenval); 2001 v->u.sval = strV(&ls->tokval);
2016 break; 2002 break;
2017 case TK_nil: 2003 case TK_nil:
2018 expr_init(v, VKNIL, 0); 2004 expr_init(v, VKNIL, 0);
@@ -2100,11 +2086,11 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit);
2100static void expr_unop(LexState *ls, ExpDesc *v) 2086static void expr_unop(LexState *ls, ExpDesc *v)
2101{ 2087{
2102 BCOp op; 2088 BCOp op;
2103 if (ls->token == TK_not) { 2089 if (ls->tok == TK_not) {
2104 op = BC_NOT; 2090 op = BC_NOT;
2105 } else if (ls->token == '-') { 2091 } else if (ls->tok == '-') {
2106 op = BC_UNM; 2092 op = BC_UNM;
2107 } else if (ls->token == '#') { 2093 } else if (ls->tok == '#') {
2108 op = BC_LEN; 2094 op = BC_LEN;
2109 } else { 2095 } else {
2110 expr_simple(ls, v); 2096 expr_simple(ls, v);
@@ -2121,7 +2107,7 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit)
2121 BinOpr op; 2107 BinOpr op;
2122 synlevel_begin(ls); 2108 synlevel_begin(ls);
2123 expr_unop(ls, v); 2109 expr_unop(ls, v);
2124 op = token2binop(ls->token); 2110 op = token2binop(ls->tok);
2125 while (op != OPR_NOBINOPR && priority[op].left > limit) { 2111 while (op != OPR_NOBINOPR && priority[op].left > limit) {
2126 ExpDesc v2; 2112 ExpDesc v2;
2127 BinOpr nextop; 2113 BinOpr nextop;
@@ -2310,9 +2296,9 @@ static void parse_func(LexState *ls, BCLine line)
2310 lj_lex_next(ls); /* Skip 'function'. */ 2296 lj_lex_next(ls); /* Skip 'function'. */
2311 /* Parse function name. */ 2297 /* Parse function name. */
2312 var_lookup(ls, &v); 2298 var_lookup(ls, &v);
2313 while (ls->token == '.') /* Multiple dot-separated fields. */ 2299 while (ls->tok == '.') /* Multiple dot-separated fields. */
2314 expr_field(ls, &v); 2300 expr_field(ls, &v);
2315 if (ls->token == ':') { /* Optional colon to signify method call. */ 2301 if (ls->tok == ':') { /* Optional colon to signify method call. */
2316 needself = 1; 2302 needself = 1;
2317 expr_field(ls, &v); 2303 expr_field(ls, &v);
2318 } 2304 }
@@ -2325,9 +2311,9 @@ static void parse_func(LexState *ls, BCLine line)
2325/* -- Control transfer statements ----------------------------------------- */ 2311/* -- Control transfer statements ----------------------------------------- */
2326 2312
2327/* Check for end of block. */ 2313/* Check for end of block. */
2328static int endofblock(LexToken token) 2314static int parse_isend(LexToken tok)
2329{ 2315{
2330 switch (token) { 2316 switch (tok) {
2331 case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof: 2317 case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof:
2332 return 1; 2318 return 1;
2333 default: 2319 default:
@@ -2342,7 +2328,7 @@ static void parse_return(LexState *ls)
2342 FuncState *fs = ls->fs; 2328 FuncState *fs = ls->fs;
2343 lj_lex_next(ls); /* Skip 'return'. */ 2329 lj_lex_next(ls); /* Skip 'return'. */
2344 fs->flags |= PROTO_HAS_RETURN; 2330 fs->flags |= PROTO_HAS_RETURN;
2345 if (endofblock(ls->token) || ls->token == ';') { /* Bare return. */ 2331 if (parse_isend(ls->tok) || ls->tok == ';') { /* Bare return. */
2346 ins = BCINS_AD(BC_RET0, 0, 1); 2332 ins = BCINS_AD(BC_RET0, 0, 1);
2347 } else { /* Return with one or more values. */ 2333 } else { /* Return with one or more values. */
2348 ExpDesc e; /* Receives the _last_ expression in the list. */ 2334 ExpDesc e; /* Receives the _last_ expression in the list. */
@@ -2408,18 +2394,18 @@ static void parse_label(LexState *ls)
2408 lex_check(ls, TK_label); 2394 lex_check(ls, TK_label);
2409 /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */ 2395 /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */
2410 for (;;) { 2396 for (;;) {
2411 if (ls->token == TK_label) { 2397 if (ls->tok == TK_label) {
2412 synlevel_begin(ls); 2398 synlevel_begin(ls);
2413 parse_label(ls); 2399 parse_label(ls);
2414 synlevel_end(ls); 2400 synlevel_end(ls);
2415 } else if (LJ_52 && ls->token == ';') { 2401 } else if (LJ_52 && ls->tok == ';') {
2416 lj_lex_next(ls); 2402 lj_lex_next(ls);
2417 } else { 2403 } else {
2418 break; 2404 break;
2419 } 2405 }
2420 } 2406 }
2421 /* Trailing label is considered to be outside of scope. */ 2407 /* Trailing label is considered to be outside of scope. */
2422 if (endofblock(ls->token) && ls->token != TK_until) 2408 if (parse_isend(ls->tok) && ls->tok != TK_until)
2423 ls->vstack[idx].slot = fs->bl->nactvar; 2409 ls->vstack[idx].slot = fs->bl->nactvar;
2424 gola_resolve(ls, fs->bl, idx); 2410 gola_resolve(ls, fs->bl, idx);
2425} 2411}
@@ -2575,7 +2561,8 @@ static void parse_for_iter(LexState *ls, GCstr *indexname)
2575 lex_check(ls, TK_in); 2561 lex_check(ls, TK_in);
2576 line = ls->linenumber; 2562 line = ls->linenumber;
2577 assign_adjust(ls, 3, expr_list(ls, &e), &e); 2563 assign_adjust(ls, 3, expr_list(ls, &e), &e);
2578 bcreg_bump(fs, 3); /* The iterator needs another 3 slots (func + 2 args). */ 2564 /* The iterator needs another 3 [4] slots (func [pc] | state ctl). */
2565 bcreg_bump(fs, 3+LJ_FR2);
2579 isnext = (nvars <= 5 && predict_next(ls, fs, exprpc)); 2566 isnext = (nvars <= 5 && predict_next(ls, fs, exprpc));
2580 var_add(ls, 3); /* Hidden control variables. */ 2567 var_add(ls, 3); /* Hidden control variables. */
2581 lex_check(ls, TK_do); 2568 lex_check(ls, TK_do);
@@ -2603,9 +2590,9 @@ static void parse_for(LexState *ls, BCLine line)
2603 fscope_begin(fs, &bl, FSCOPE_LOOP); 2590 fscope_begin(fs, &bl, FSCOPE_LOOP);
2604 lj_lex_next(ls); /* Skip 'for'. */ 2591 lj_lex_next(ls); /* Skip 'for'. */
2605 varname = lex_str(ls); /* Get first variable name. */ 2592 varname = lex_str(ls); /* Get first variable name. */
2606 if (ls->token == '=') 2593 if (ls->tok == '=')
2607 parse_for_num(ls, varname, line); 2594 parse_for_num(ls, varname, line);
2608 else if (ls->token == ',' || ls->token == TK_in) 2595 else if (ls->tok == ',' || ls->tok == TK_in)
2609 parse_for_iter(ls, varname); 2596 parse_for_iter(ls, varname);
2610 else 2597 else
2611 err_syntax(ls, LJ_ERR_XFOR); 2598 err_syntax(ls, LJ_ERR_XFOR);
@@ -2631,12 +2618,12 @@ static void parse_if(LexState *ls, BCLine line)
2631 BCPos flist; 2618 BCPos flist;
2632 BCPos escapelist = NO_JMP; 2619 BCPos escapelist = NO_JMP;
2633 flist = parse_then(ls); 2620 flist = parse_then(ls);
2634 while (ls->token == TK_elseif) { /* Parse multiple 'elseif' blocks. */ 2621 while (ls->tok == TK_elseif) { /* Parse multiple 'elseif' blocks. */
2635 jmp_append(fs, &escapelist, bcemit_jmp(fs)); 2622 jmp_append(fs, &escapelist, bcemit_jmp(fs));
2636 jmp_tohere(fs, flist); 2623 jmp_tohere(fs, flist);
2637 flist = parse_then(ls); 2624 flist = parse_then(ls);
2638 } 2625 }
2639 if (ls->token == TK_else) { /* Parse optional 'else' block. */ 2626 if (ls->tok == TK_else) { /* Parse optional 'else' block. */
2640 jmp_append(fs, &escapelist, bcemit_jmp(fs)); 2627 jmp_append(fs, &escapelist, bcemit_jmp(fs));
2641 jmp_tohere(fs, flist); 2628 jmp_tohere(fs, flist);
2642 lj_lex_next(ls); /* Skip 'else'. */ 2629 lj_lex_next(ls); /* Skip 'else'. */
@@ -2654,7 +2641,7 @@ static void parse_if(LexState *ls, BCLine line)
2654static int parse_stmt(LexState *ls) 2641static int parse_stmt(LexState *ls)
2655{ 2642{
2656 BCLine line = ls->linenumber; 2643 BCLine line = ls->linenumber;
2657 switch (ls->token) { 2644 switch (ls->tok) {
2658 case TK_if: 2645 case TK_if:
2659 parse_if(ls, line); 2646 parse_if(ls, line);
2660 break; 2647 break;
@@ -2713,11 +2700,12 @@ static void parse_chunk(LexState *ls)
2713{ 2700{
2714 int islast = 0; 2701 int islast = 0;
2715 synlevel_begin(ls); 2702 synlevel_begin(ls);
2716 while (!islast && !endofblock(ls->token)) { 2703 while (!islast && !parse_isend(ls->tok)) {
2717 islast = parse_stmt(ls); 2704 islast = parse_stmt(ls);
2718 lex_opt(ls, ';'); 2705 lex_opt(ls, ';');
2719 lua_assert(ls->fs->framesize >= ls->fs->freereg && 2706 lj_assertLS(ls->fs->framesize >= ls->fs->freereg &&
2720 ls->fs->freereg >= ls->fs->nactvar); 2707 ls->fs->freereg >= ls->fs->nactvar,
2708 "bad regalloc");
2721 ls->fs->freereg = ls->fs->nactvar; /* Free registers after each stmt. */ 2709 ls->fs->freereg = ls->fs->nactvar; /* Free registers after each stmt. */
2722 } 2710 }
2723 synlevel_end(ls); 2711 synlevel_end(ls);
@@ -2748,13 +2736,12 @@ GCproto *lj_parse(LexState *ls)
2748 bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */ 2736 bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */
2749 lj_lex_next(ls); /* Read-ahead first token. */ 2737 lj_lex_next(ls); /* Read-ahead first token. */
2750 parse_chunk(ls); 2738 parse_chunk(ls);
2751 if (ls->token != TK_eof) 2739 if (ls->tok != TK_eof)
2752 err_token(ls, TK_eof); 2740 err_token(ls, TK_eof);
2753 pt = fs_finish(ls, ls->linenumber); 2741 pt = fs_finish(ls, ls->linenumber);
2754 L->top--; /* Drop chunkname. */ 2742 L->top--; /* Drop chunkname. */
2755 lua_assert(fs.prev == NULL); 2743 lj_assertL(fs.prev == NULL && ls->fs == NULL, "mismatched frame nesting");
2756 lua_assert(ls->fs == NULL); 2744 lj_assertL(pt->sizeuv == 0, "toplevel proto has upvalues");
2757 lua_assert(pt->sizeuv == 0);
2758 return pt; 2745 return pt;
2759} 2746}
2760 2747
diff --git a/src/lj_prng.c b/src/lj_prng.c
new file mode 100644
index 00000000..bb32da8b
--- /dev/null
+++ b/src/lj_prng.c
@@ -0,0 +1,250 @@
1/*
2** Pseudo-random number generation.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_prng_c
7#define LUA_CORE
8
9/* To get the syscall prototype. */
10#if defined(__linux__) && !defined(_GNU_SOURCE)
11#define _GNU_SOURCE
12#endif
13
14#include "lj_def.h"
15#include "lj_arch.h"
16#include "lj_prng.h"
17
18/* -- PRNG step function -------------------------------------------------- */
19
20/* This implements a Tausworthe PRNG with period 2^223. Based on:
21** Tables of maximally-equidistributed combined LFSR generators,
22** Pierre L'Ecuyer, 1991, table 3, 1st entry.
23** Full-period ME-CF generator with L=64, J=4, k=223, N1=49.
24**
25** Important note: This PRNG is NOT suitable for cryptographic use!
26**
27** But it works fine for math.random(), which has an API that's not
28** suitable for cryptography, anyway.
29**
30** When used as a securely seeded global PRNG, it substantially raises
31** the difficulty for various attacks on the VM.
32*/
33
34/* Update generator i and compute a running xor of all states. */
35#define TW223_GEN(rs, z, r, i, k, q, s) \
36 z = rs->u[i]; \
37 z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \
38 r ^= z; rs->u[i] = z;
39
40#define TW223_STEP(rs, z, r) \
41 TW223_GEN(rs, z, r, 0, 63, 31, 18) \
42 TW223_GEN(rs, z, r, 1, 58, 19, 28) \
43 TW223_GEN(rs, z, r, 2, 55, 24, 7) \
44 TW223_GEN(rs, z, r, 3, 47, 21, 8)
45
46/* PRNG step function with uint64_t result. */
47LJ_NOINLINE uint64_t LJ_FASTCALL lj_prng_u64(PRNGState *rs)
48{
49 uint64_t z, r = 0;
50 TW223_STEP(rs, z, r)
51 return r;
52}
53
54/* PRNG step function with double in uint64_t result. */
55LJ_NOINLINE uint64_t LJ_FASTCALL lj_prng_u64d(PRNGState *rs)
56{
57 uint64_t z, r = 0;
58 TW223_STEP(rs, z, r)
59 /* Returns a double bit pattern in the range 1.0 <= d < 2.0. */
60 return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000);
61}
62
63/* Condition seed: ensure k[i] MSB of u[i] are non-zero. */
64static LJ_AINLINE void lj_prng_condition(PRNGState *rs)
65{
66 if (rs->u[0] < (1u << 1)) rs->u[0] += (1u << 1);
67 if (rs->u[1] < (1u << 6)) rs->u[1] += (1u << 6);
68 if (rs->u[2] < (1u << 9)) rs->u[2] += (1u << 9);
69 if (rs->u[3] < (1u << 17)) rs->u[3] += (1u << 17);
70}
71
72/* -- PRNG seeding from OS ------------------------------------------------ */
73
74#if LUAJIT_SECURITY_PRNG == 0
75
76/* Nothing to define. */
77
78#elif LJ_TARGET_XBOX360
79
80extern int XNetRandom(void *buf, unsigned int len);
81
82#elif LJ_TARGET_PS3
83
84extern int sys_get_random_number(void *buf, uint64_t len);
85
86#elif LJ_TARGET_PS4 || LJ_TARGET_PSVITA
87
88extern int sceRandomGetRandomNumber(void *buf, size_t len);
89
90#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOXONE
91
92#define WIN32_LEAN_AND_MEAN
93#include <windows.h>
94
95#if LJ_TARGET_UWP || LJ_TARGET_XBOXONE
96/* Must use BCryptGenRandom. */
97#include <bcrypt.h>
98#pragma comment(lib, "bcrypt.lib")
99#else
100/* If you wonder about this mess, then search online for RtlGenRandom. */
101typedef BOOLEAN (WINAPI *PRGR)(void *buf, ULONG len);
102static PRGR libfunc_rgr;
103#endif
104
105#elif LJ_TARGET_POSIX
106
107#if LJ_TARGET_LINUX
108/* Avoid a dependency on glibc 2.25+ and use the getrandom syscall instead. */
109#include <sys/syscall.h>
110#else
111
112#if LJ_TARGET_OSX && !LJ_TARGET_IOS
113/*
114** In their infinite wisdom Apple decided to disallow getentropy() in the
115** iOS App Store. Even though the call is common to all BSD-ish OS, it's
116** recommended by Apple in their own security-related docs, and, to top
117** off the foolery, /dev/urandom is handled by the same kernel code,
118** yet accessing it is actually permitted (but less efficient).
119*/
120#include <Availability.h>
121#if __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200
122#define LJ_TARGET_HAS_GETENTROPY 1
123#endif
124#elif (LJ_TARGET_BSD && !defined(__NetBSD__)) || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN
125#define LJ_TARGET_HAS_GETENTROPY 1
126#endif
127
128#if LJ_TARGET_HAS_GETENTROPY
129extern int getentropy(void *buf, size_t len);
130#ifdef __ELF__
131 __attribute__((weak))
132#endif
133;
134#endif
135
136#endif
137
138/* For the /dev/urandom fallback. */
139#include <fcntl.h>
140#include <unistd.h>
141
142#endif
143
144#if LUAJIT_SECURITY_PRNG == 0
145
146/* If you really don't care about security, then define
147** LUAJIT_SECURITY_PRNG=0. This yields a predictable seed
148** and provides NO SECURITY against various attacks on the VM.
149**
150** BTW: This is NOT the way to get predictable table iteration,
151** predictable trace generation, predictable bytecode generation, etc.
152*/
153int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs)
154{
155 lj_prng_seed_fixed(rs); /* The fixed seed is already conditioned. */
156 return 1;
157}
158
159#else
160
161/* Securely seed PRNG from system entropy. Returns 0 on failure. */
162int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs)
163{
164#if LJ_TARGET_XBOX360
165
166 if (XNetRandom(rs->u, (unsigned int)sizeof(rs->u)) == 0)
167 goto ok;
168
169#elif LJ_TARGET_PS3
170
171 if (sys_get_random_number(rs->u, sizeof(rs->u)) == 0)
172 goto ok;
173
174#elif LJ_TARGET_PS4 || LJ_TARGET_PSVITA
175
176 if (sceRandomGetRandomNumber(rs->u, sizeof(rs->u)) == 0)
177 goto ok;
178
179#elif LJ_TARGET_UWP || LJ_TARGET_XBOXONE
180
181 if (BCryptGenRandom(NULL, (PUCHAR)(rs->u), (ULONG)sizeof(rs->u),
182 BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0)
183 goto ok;
184
185#elif LJ_TARGET_WINDOWS
186
187 /* Keep the library loaded in case multiple VMs are started. */
188 if (!libfunc_rgr) {
189 HMODULE lib = LJ_WIN_LOADLIBA("advapi32.dll");
190 if (!lib) return 0;
191 libfunc_rgr = (PRGR)GetProcAddress(lib, "SystemFunction036");
192 if (!libfunc_rgr) return 0;
193 }
194 if (libfunc_rgr(rs->u, (ULONG)sizeof(rs->u)))
195 goto ok;
196
197#elif LJ_TARGET_POSIX
198
199#if LJ_TARGET_LINUX && defined(SYS_getrandom)
200
201 if (syscall(SYS_getrandom, rs->u, sizeof(rs->u), 0) == (long)sizeof(rs->u))
202 goto ok;
203
204#elif LJ_TARGET_HAS_GETENTROPY
205
206#ifdef __ELF__
207 if (&getentropy && getentropy(rs->u, sizeof(rs->u)) == 0)
208 goto ok;
209#else
210 if (getentropy(rs->u, sizeof(rs->u)) == 0)
211 goto ok;
212#endif
213
214#endif
215
216 /* Fallback to /dev/urandom. This may fail if the device is not
217 ** existent or accessible in a chroot or container, or if the process
218 ** or the OS ran out of file descriptors.
219 */
220 {
221 int fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC);
222 if (fd != -1) {
223 ssize_t n = read(fd, rs->u, sizeof(rs->u));
224 (void)close(fd);
225 if (n == (ssize_t)sizeof(rs->u))
226 goto ok;
227 }
228 }
229
230#else
231
232 /* Add an elif above for your OS with a secure PRNG seed.
233 ** Note that fiddling around with rand(), getpid(), time() or coercing
234 ** ASLR to yield a few bits of randomness is not helpful.
235 ** If you don't want any security, then don't pretend you have any
236 ** and simply define LUAJIT_SECURITY_PRNG=0 for the build.
237 */
238#error "Missing secure PRNG seed for this OS"
239
240#endif
241 return 0; /* Fail. */
242
243ok:
244 lj_prng_condition(rs);
245 (void)lj_prng_u64(rs);
246 return 1; /* Success. */
247}
248
249#endif
250
diff --git a/src/lj_prng.h b/src/lj_prng.h
new file mode 100644
index 00000000..216729be
--- /dev/null
+++ b/src/lj_prng.h
@@ -0,0 +1,24 @@
1/*
2** Pseudo-random number generation.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_PRNG_H
7#define _LJ_PRNG_H
8
9#include "lj_def.h"
10
11LJ_FUNC int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs);
12LJ_FUNC uint64_t LJ_FASTCALL lj_prng_u64(PRNGState *rs);
13LJ_FUNC uint64_t LJ_FASTCALL lj_prng_u64d(PRNGState *rs);
14
15/* This is just the precomputed result of lib_math.c:random_seed(rs, 0.0). */
16static LJ_AINLINE void lj_prng_seed_fixed(PRNGState *rs)
17{
18 rs->u[0] = U64x(a0d27757,0a345b8c);
19 rs->u[1] = U64x(764a296c,5d4aa64f);
20 rs->u[2] = U64x(51220704,070adeaa);
21 rs->u[3] = U64x(2a2717b5,a7b7b927);
22}
23
24#endif
diff --git a/src/lj_profile.c b/src/lj_profile.c
new file mode 100644
index 00000000..fbcb9878
--- /dev/null
+++ b/src/lj_profile.c
@@ -0,0 +1,367 @@
1/*
2** Low-overhead profiling.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_profile_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASPROFILE
12
13#include "lj_buf.h"
14#include "lj_frame.h"
15#include "lj_debug.h"
16#include "lj_dispatch.h"
17#if LJ_HASJIT
18#include "lj_jit.h"
19#include "lj_trace.h"
20#endif
21#include "lj_profile.h"
22
23#include "luajit.h"
24
25#if LJ_PROFILE_SIGPROF
26
27#include <sys/time.h>
28#include <signal.h>
29#define profile_lock(ps) UNUSED(ps)
30#define profile_unlock(ps) UNUSED(ps)
31
32#elif LJ_PROFILE_PTHREAD
33
34#include <pthread.h>
35#include <time.h>
36#if LJ_TARGET_PS3
37#include <sys/timer.h>
38#endif
39#define profile_lock(ps) pthread_mutex_lock(&ps->lock)
40#define profile_unlock(ps) pthread_mutex_unlock(&ps->lock)
41
42#elif LJ_PROFILE_WTHREAD
43
44#define WIN32_LEAN_AND_MEAN
45#if LJ_TARGET_XBOX360
46#include <xtl.h>
47#include <xbox.h>
48#else
49#include <windows.h>
50#endif
51typedef unsigned int (WINAPI *WMM_TPFUNC)(unsigned int);
52#define profile_lock(ps) EnterCriticalSection(&ps->lock)
53#define profile_unlock(ps) LeaveCriticalSection(&ps->lock)
54
55#endif
56
57/* Profiler state. */
58typedef struct ProfileState {
59 global_State *g; /* VM state that started the profiler. */
60 luaJIT_profile_callback cb; /* Profiler callback. */
61 void *data; /* Profiler callback data. */
62 SBuf sb; /* String buffer for stack dumps. */
63 int interval; /* Sample interval in milliseconds. */
64 int samples; /* Number of samples for next callback. */
65 int vmstate; /* VM state when profile timer triggered. */
66#if LJ_PROFILE_SIGPROF
67 struct sigaction oldsa; /* Previous SIGPROF state. */
68#elif LJ_PROFILE_PTHREAD
69 pthread_mutex_t lock; /* g->hookmask update lock. */
70 pthread_t thread; /* Timer thread. */
71 int abort; /* Abort timer thread. */
72#elif LJ_PROFILE_WTHREAD
73#if LJ_TARGET_WINDOWS
74 HINSTANCE wmm; /* WinMM library handle. */
75 WMM_TPFUNC wmm_tbp; /* WinMM timeBeginPeriod function. */
76 WMM_TPFUNC wmm_tep; /* WinMM timeEndPeriod function. */
77#endif
78 CRITICAL_SECTION lock; /* g->hookmask update lock. */
79 HANDLE thread; /* Timer thread. */
80 int abort; /* Abort timer thread. */
81#endif
82} ProfileState;
83
84/* Sadly, we have to use a static profiler state.
85**
86** The SIGPROF variant needs a static pointer to the global state, anyway.
87** And it would be hard to extend for multiple threads. You can still use
88** multiple VMs in multiple threads, but only profile one at a time.
89*/
90static ProfileState profile_state;
91
92/* Default sample interval in milliseconds. */
93#define LJ_PROFILE_INTERVAL_DEFAULT 10
94
95/* -- Profiler/hook interaction ------------------------------------------- */
96
97#if !LJ_PROFILE_SIGPROF
98void LJ_FASTCALL lj_profile_hook_enter(global_State *g)
99{
100 ProfileState *ps = &profile_state;
101 if (ps->g) {
102 profile_lock(ps);
103 hook_enter(g);
104 profile_unlock(ps);
105 } else {
106 hook_enter(g);
107 }
108}
109
110void LJ_FASTCALL lj_profile_hook_leave(global_State *g)
111{
112 ProfileState *ps = &profile_state;
113 if (ps->g) {
114 profile_lock(ps);
115 hook_leave(g);
116 profile_unlock(ps);
117 } else {
118 hook_leave(g);
119 }
120}
121#endif
122
123/* -- Profile callbacks --------------------------------------------------- */
124
125/* Callback from profile hook (HOOK_PROFILE already cleared). */
126void LJ_FASTCALL lj_profile_interpreter(lua_State *L)
127{
128 ProfileState *ps = &profile_state;
129 global_State *g = G(L);
130 uint8_t mask;
131 profile_lock(ps);
132 mask = (g->hookmask & ~HOOK_PROFILE);
133 if (!(mask & HOOK_VMEVENT)) {
134 int samples = ps->samples;
135 ps->samples = 0;
136 g->hookmask = HOOK_VMEVENT;
137 lj_dispatch_update(g);
138 profile_unlock(ps);
139 ps->cb(ps->data, L, samples, ps->vmstate); /* Invoke user callback. */
140 profile_lock(ps);
141 mask |= (g->hookmask & HOOK_PROFILE);
142 }
143 g->hookmask = mask;
144 lj_dispatch_update(g);
145 profile_unlock(ps);
146}
147
148/* Trigger profile hook. Asynchronous call from OS-specific profile timer. */
149static void profile_trigger(ProfileState *ps)
150{
151 global_State *g = ps->g;
152 uint8_t mask;
153 profile_lock(ps);
154 ps->samples++; /* Always increment number of samples. */
155 mask = g->hookmask;
156 if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT|HOOK_GC))) { /* Set profile hook. */
157 int st = g->vmstate;
158 ps->vmstate = st >= 0 ? 'N' :
159 st == ~LJ_VMST_INTERP ? 'I' :
160 st == ~LJ_VMST_C ? 'C' :
161 st == ~LJ_VMST_GC ? 'G' : 'J';
162 g->hookmask = (mask | HOOK_PROFILE);
163 lj_dispatch_update(g);
164 }
165 profile_unlock(ps);
166}
167
168/* -- OS-specific profile timer handling ---------------------------------- */
169
170#if LJ_PROFILE_SIGPROF
171
172/* SIGPROF handler. */
173static void profile_signal(int sig)
174{
175 UNUSED(sig);
176 profile_trigger(&profile_state);
177}
178
179/* Start profiling timer. */
180static void profile_timer_start(ProfileState *ps)
181{
182 int interval = ps->interval;
183 struct itimerval tm;
184 struct sigaction sa;
185 tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000;
186 tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000;
187 setitimer(ITIMER_PROF, &tm, NULL);
188 sa.sa_flags = SA_RESTART;
189 sa.sa_handler = profile_signal;
190 sigemptyset(&sa.sa_mask);
191 sigaction(SIGPROF, &sa, &ps->oldsa);
192}
193
194/* Stop profiling timer. */
195static void profile_timer_stop(ProfileState *ps)
196{
197 struct itimerval tm;
198 tm.it_value.tv_sec = tm.it_interval.tv_sec = 0;
199 tm.it_value.tv_usec = tm.it_interval.tv_usec = 0;
200 setitimer(ITIMER_PROF, &tm, NULL);
201 sigaction(SIGPROF, &ps->oldsa, NULL);
202}
203
204#elif LJ_PROFILE_PTHREAD
205
206/* POSIX timer thread. */
207static void *profile_thread(ProfileState *ps)
208{
209 int interval = ps->interval;
210#if !LJ_TARGET_PS3
211 struct timespec ts;
212 ts.tv_sec = interval / 1000;
213 ts.tv_nsec = (interval % 1000) * 1000000;
214#endif
215 while (1) {
216#if LJ_TARGET_PS3
217 sys_timer_usleep(interval * 1000);
218#else
219 nanosleep(&ts, NULL);
220#endif
221 if (ps->abort) break;
222 profile_trigger(ps);
223 }
224 return NULL;
225}
226
227/* Start profiling timer thread. */
228static void profile_timer_start(ProfileState *ps)
229{
230 pthread_mutex_init(&ps->lock, 0);
231 ps->abort = 0;
232 pthread_create(&ps->thread, NULL, (void *(*)(void *))profile_thread, ps);
233}
234
235/* Stop profiling timer thread. */
236static void profile_timer_stop(ProfileState *ps)
237{
238 ps->abort = 1;
239 pthread_join(ps->thread, NULL);
240 pthread_mutex_destroy(&ps->lock);
241}
242
243#elif LJ_PROFILE_WTHREAD
244
245/* Windows timer thread. */
246static DWORD WINAPI profile_thread(void *psx)
247{
248 ProfileState *ps = (ProfileState *)psx;
249 int interval = ps->interval;
250#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
251 ps->wmm_tbp(interval);
252#endif
253 while (1) {
254 Sleep(interval);
255 if (ps->abort) break;
256 profile_trigger(ps);
257 }
258#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
259 ps->wmm_tep(interval);
260#endif
261 return 0;
262}
263
264/* Start profiling timer thread. */
265static void profile_timer_start(ProfileState *ps)
266{
267#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
268 if (!ps->wmm) { /* Load WinMM library on-demand. */
269 ps->wmm = LJ_WIN_LOADLIBA("winmm.dll");
270 if (ps->wmm) {
271 ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeBeginPeriod");
272 ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeEndPeriod");
273 if (!ps->wmm_tbp || !ps->wmm_tep) {
274 ps->wmm = NULL;
275 return;
276 }
277 }
278 }
279#endif
280 InitializeCriticalSection(&ps->lock);
281 ps->abort = 0;
282 ps->thread = CreateThread(NULL, 0, profile_thread, ps, 0, NULL);
283}
284
285/* Stop profiling timer thread. */
286static void profile_timer_stop(ProfileState *ps)
287{
288 ps->abort = 1;
289 WaitForSingleObject(ps->thread, INFINITE);
290 DeleteCriticalSection(&ps->lock);
291}
292
293#endif
294
295/* -- Public profiling API ------------------------------------------------ */
296
297/* Start profiling. */
298LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
299 luaJIT_profile_callback cb, void *data)
300{
301 ProfileState *ps = &profile_state;
302 int interval = LJ_PROFILE_INTERVAL_DEFAULT;
303 while (*mode) {
304 int m = *mode++;
305 switch (m) {
306 case 'i':
307 interval = 0;
308 while (*mode >= '0' && *mode <= '9')
309 interval = interval * 10 + (*mode++ - '0');
310 if (interval <= 0) interval = 1;
311 break;
312#if LJ_HASJIT
313 case 'l': case 'f':
314 L2J(L)->prof_mode = m;
315 lj_trace_flushall(L);
316 break;
317#endif
318 default: /* Ignore unknown mode chars. */
319 break;
320 }
321 }
322 if (ps->g) {
323 luaJIT_profile_stop(L);
324 if (ps->g) return; /* Profiler in use by another VM. */
325 }
326 ps->g = G(L);
327 ps->interval = interval;
328 ps->cb = cb;
329 ps->data = data;
330 ps->samples = 0;
331 lj_buf_init(L, &ps->sb);
332 profile_timer_start(ps);
333}
334
335/* Stop profiling. */
336LUA_API void luaJIT_profile_stop(lua_State *L)
337{
338 ProfileState *ps = &profile_state;
339 global_State *g = ps->g;
340 if (G(L) == g) { /* Only stop profiler if started by this VM. */
341 profile_timer_stop(ps);
342 g->hookmask &= ~HOOK_PROFILE;
343 lj_dispatch_update(g);
344#if LJ_HASJIT
345 G2J(g)->prof_mode = 0;
346 lj_trace_flushall(L);
347#endif
348 lj_buf_free(g, &ps->sb);
349 ps->sb.w = ps->sb.e = NULL;
350 ps->g = NULL;
351 }
352}
353
354/* Return a compact stack dump. */
355LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
356 int depth, size_t *len)
357{
358 ProfileState *ps = &profile_state;
359 SBuf *sb = &ps->sb;
360 setsbufL(sb, L);
361 lj_buf_reset(sb);
362 lj_debug_dumpstack(L, sb, fmt, depth);
363 *len = (size_t)sbuflen(sb);
364 return sb->b;
365}
366
367#endif
diff --git a/src/lj_profile.h b/src/lj_profile.h
new file mode 100644
index 00000000..96706ee3
--- /dev/null
+++ b/src/lj_profile.h
@@ -0,0 +1,21 @@
1/*
2** Low-overhead profiling.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_PROFILE_H
7#define _LJ_PROFILE_H
8
9#include "lj_obj.h"
10
11#if LJ_HASPROFILE
12
13LJ_FUNC void LJ_FASTCALL lj_profile_interpreter(lua_State *L);
14#if !LJ_PROFILE_SIGPROF
15LJ_FUNC void LJ_FASTCALL lj_profile_hook_enter(global_State *g);
16LJ_FUNC void LJ_FASTCALL lj_profile_hook_leave(global_State *g);
17#endif
18
19#endif
20
21#endif
diff --git a/src/lj_record.c b/src/lj_record.c
index 8e26afe3..ee62179b 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -20,6 +20,9 @@
20#endif 20#endif
21#include "lj_bc.h" 21#include "lj_bc.h"
22#include "lj_ff.h" 22#include "lj_ff.h"
23#if LJ_HASPROFILE
24#include "lj_debug.h"
25#endif
23#include "lj_ir.h" 26#include "lj_ir.h"
24#include "lj_jit.h" 27#include "lj_jit.h"
25#include "lj_ircall.h" 28#include "lj_ircall.h"
@@ -30,6 +33,7 @@
30#include "lj_snap.h" 33#include "lj_snap.h"
31#include "lj_dispatch.h" 34#include "lj_dispatch.h"
32#include "lj_vm.h" 35#include "lj_vm.h"
36#include "lj_prng.h"
33 37
34/* Some local macros to save typing. Undef'd at the end. */ 38/* Some local macros to save typing. Undef'd at the end. */
35#define IR(ref) (&J->cur.ir[(ref)]) 39#define IR(ref) (&J->cur.ir[(ref)])
@@ -47,31 +51,52 @@
47static void rec_check_ir(jit_State *J) 51static void rec_check_ir(jit_State *J)
48{ 52{
49 IRRef i, nins = J->cur.nins, nk = J->cur.nk; 53 IRRef i, nins = J->cur.nins, nk = J->cur.nk;
50 lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536); 54 lj_assertJ(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536,
51 for (i = nins-1; i >= nk; i--) { 55 "inconsistent IR layout");
56 for (i = nk; i < nins; i++) {
52 IRIns *ir = IR(i); 57 IRIns *ir = IR(i);
53 uint32_t mode = lj_ir_mode[ir->o]; 58 uint32_t mode = lj_ir_mode[ir->o];
54 IRRef op1 = ir->op1; 59 IRRef op1 = ir->op1;
55 IRRef op2 = ir->op2; 60 IRRef op2 = ir->op2;
61 const char *err = NULL;
56 switch (irm_op1(mode)) { 62 switch (irm_op1(mode)) {
57 case IRMnone: lua_assert(op1 == 0); break; 63 case IRMnone:
58 case IRMref: lua_assert(op1 >= nk); 64 if (op1 != 0) err = "IRMnone op1 used";
59 lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break; 65 break;
66 case IRMref:
67 if (op1 < nk || (i >= REF_BIAS ? op1 >= i : op1 <= i))
68 err = "IRMref op1 out of range";
69 break;
60 case IRMlit: break; 70 case IRMlit: break;
61 case IRMcst: lua_assert(i < REF_BIAS); continue; 71 case IRMcst:
72 if (i >= REF_BIAS) { err = "constant in IR range"; break; }
73 if (irt_is64(ir->t) && ir->o != IR_KNULL)
74 i++;
75 continue;
62 } 76 }
63 switch (irm_op2(mode)) { 77 switch (irm_op2(mode)) {
64 case IRMnone: lua_assert(op2 == 0); break; 78 case IRMnone:
65 case IRMref: lua_assert(op2 >= nk); 79 if (op2) err = "IRMnone op2 used";
66 lua_assert(i >= REF_BIAS ? op2 < i : op2 > i); break; 80 break;
81 case IRMref:
82 if (op2 < nk || (i >= REF_BIAS ? op2 >= i : op2 <= i))
83 err = "IRMref op2 out of range";
84 break;
67 case IRMlit: break; 85 case IRMlit: break;
68 case IRMcst: lua_assert(0); break; 86 case IRMcst: err = "IRMcst op2"; break;
69 } 87 }
70 if (ir->prev) { 88 if (!err && ir->prev) {
71 lua_assert(ir->prev >= nk); 89 if (ir->prev < nk || (i >= REF_BIAS ? ir->prev >= i : ir->prev <= i))
72 lua_assert(i >= REF_BIAS ? ir->prev < i : ir->prev > i); 90 err = "chain out of range";
73 lua_assert(ir->o == IR_NOP || IR(ir->prev)->o == ir->o); 91 else if (ir->o != IR_NOP && IR(ir->prev)->o != ir->o)
92 err = "chain to different op";
74 } 93 }
94 lj_assertJ(!err, "bad IR %04d op %d(%04d,%04d): %s",
95 i-REF_BIAS,
96 ir->o,
97 irm_op1(mode) == IRMref ? op1-REF_BIAS : op1,
98 irm_op2(mode) == IRMref ? op2-REF_BIAS : op2,
99 err);
75 } 100 }
76} 101}
77 102
@@ -81,48 +106,76 @@ static void rec_check_slots(jit_State *J)
81 BCReg s, nslots = J->baseslot + J->maxslot; 106 BCReg s, nslots = J->baseslot + J->maxslot;
82 int32_t depth = 0; 107 int32_t depth = 0;
83 cTValue *base = J->L->base - J->baseslot; 108 cTValue *base = J->L->base - J->baseslot;
84 lua_assert(J->baseslot >= 1); 109 lj_assertJ(J->baseslot >= 1+LJ_FR2, "bad baseslot");
85 lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME)); 110 lj_assertJ(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME),
86 lua_assert(nslots <= LJ_MAX_JSLOTS); 111 "baseslot does not point to frame");
112 lj_assertJ(nslots <= LJ_MAX_JSLOTS, "slot overflow");
87 for (s = 0; s < nslots; s++) { 113 for (s = 0; s < nslots; s++) {
88 TRef tr = J->slot[s]; 114 TRef tr = J->slot[s];
89 if (tr) { 115 if (tr) {
90 cTValue *tv = &base[s]; 116 cTValue *tv = &base[s];
91 IRRef ref = tref_ref(tr); 117 IRRef ref = tref_ref(tr);
92 IRIns *ir; 118 IRIns *ir = NULL; /* Silence compiler. */
93 lua_assert(ref >= J->cur.nk && ref < J->cur.nins); 119 if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) {
94 ir = IR(ref); 120 lj_assertJ(ref >= J->cur.nk && ref < J->cur.nins,
95 lua_assert(irt_t(ir->t) == tref_t(tr)); 121 "slot %d ref %04d out of range", s, ref - REF_BIAS);
122 ir = IR(ref);
123 lj_assertJ(irt_t(ir->t) == tref_t(tr), "slot %d IR type mismatch", s);
124 }
96 if (s == 0) { 125 if (s == 0) {
97 lua_assert(tref_isfunc(tr)); 126 lj_assertJ(tref_isfunc(tr), "frame slot 0 is not a function");
127#if LJ_FR2
128 } else if (s == 1) {
129 lj_assertJ((tr & ~TREF_FRAME) == 0, "bad frame slot 1");
130#endif
98 } else if ((tr & TREF_FRAME)) { 131 } else if ((tr & TREF_FRAME)) {
99 GCfunc *fn = gco2func(frame_gc(tv)); 132 GCfunc *fn = gco2func(frame_gc(tv));
100 BCReg delta = (BCReg)(tv - frame_prev(tv)); 133 BCReg delta = (BCReg)(tv - frame_prev(tv));
101 lua_assert(tref_isfunc(tr)); 134#if LJ_FR2
102 if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); 135 lj_assertJ(!ref || ir_knum(ir)->u64 == tv->u64,
103 lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta)); 136 "frame slot %d PC mismatch", s);
137 tr = J->slot[s-1];
138 ir = IR(tref_ref(tr));
139#endif
140 lj_assertJ(tref_isfunc(tr),
141 "frame slot %d is not a function", s-LJ_FR2);
142 lj_assertJ(!tref_isk(tr) || fn == ir_kfunc(ir),
143 "frame slot %d function mismatch", s-LJ_FR2);
144 lj_assertJ(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME)
145 : (s == delta + LJ_FR2),
146 "frame slot %d broken chain", s-LJ_FR2);
104 depth++; 147 depth++;
105 } else if ((tr & TREF_CONT)) { 148 } else if ((tr & TREF_CONT)) {
106 lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); 149#if LJ_FR2
107 lua_assert((J->slot[s+1] & TREF_FRAME)); 150 lj_assertJ(!ref || ir_knum(ir)->u64 == tv->u64,
151 "cont slot %d continuation mismatch", s);
152#else
153 lj_assertJ(ir_kptr(ir) == gcrefp(tv->gcr, void),
154 "cont slot %d continuation mismatch", s);
155#endif
156 lj_assertJ((J->slot[s+1+LJ_FR2] & TREF_FRAME),
157 "cont slot %d not followed by frame", s);
108 depth++; 158 depth++;
109 } else { 159 } else {
110 if (tvisnumber(tv)) 160 /* Number repr. may differ, but other types must be the same. */
111 lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */ 161 lj_assertJ(tvisnumber(tv) ? tref_isnumber(tr) :
112 else 162 itype2irt(tv) == tref_type(tr),
113 lua_assert(itype2irt(tv) == tref_type(tr)); 163 "slot %d type mismatch: stack type %d vs IR type %d",
164 s, itypemap(tv), tref_type(tr));
114 if (tref_isk(tr)) { /* Compare constants. */ 165 if (tref_isk(tr)) { /* Compare constants. */
115 TValue tvk; 166 TValue tvk;
116 lj_ir_kvalue(J->L, &tvk, ir); 167 lj_ir_kvalue(J->L, &tvk, ir);
117 if (!(tvisnum(&tvk) && tvisnan(&tvk))) 168 lj_assertJ((tvisnum(&tvk) && tvisnan(&tvk)) ?
118 lua_assert(lj_obj_equal(tv, &tvk)); 169 (tvisnum(tv) && tvisnan(tv)) :
119 else 170 lj_obj_equal(tv, &tvk),
120 lua_assert(tvisnum(tv) && tvisnan(tv)); 171 "slot %d const mismatch: stack %016llx vs IR %016llx",
172 s, tv->u64, tvk.u64);
121 } 173 }
122 } 174 }
123 } 175 }
124 } 176 }
125 lua_assert(J->framedepth == depth); 177 lj_assertJ(J->framedepth == depth,
178 "frame depth mismatch %d vs %d", J->framedepth, depth);
126} 179}
127#endif 180#endif
128 181
@@ -156,10 +209,11 @@ static TRef sload(jit_State *J, int32_t slot)
156/* Get TRef for current function. */ 209/* Get TRef for current function. */
157static TRef getcurrf(jit_State *J) 210static TRef getcurrf(jit_State *J)
158{ 211{
159 if (J->base[-1]) 212 if (J->base[-1-LJ_FR2])
160 return J->base[-1]; 213 return J->base[-1-LJ_FR2];
161 lua_assert(J->baseslot == 1); 214 /* Non-base frame functions ought to be loaded already. */
162 return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY); 215 lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot");
216 return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY);
163} 217}
164 218
165/* Compare for raw object equality. 219/* Compare for raw object equality.
@@ -205,6 +259,14 @@ TRef lj_record_constify(jit_State *J, cTValue *o)
205 return 0; /* Can't represent lightuserdata (pointless). */ 259 return 0; /* Can't represent lightuserdata (pointless). */
206} 260}
207 261
262/* Emit a VLOAD with the correct type. */
263TRef lj_record_vload(jit_State *J, TRef ref, IRType t)
264{
265 TRef tr = emitir(IRTG(IR_VLOAD, t), ref, 0);
266 if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
267 return tr;
268}
269
208/* -- Record loop ops ----------------------------------------------------- */ 270/* -- Record loop ops ----------------------------------------------------- */
209 271
210/* Loop event. */ 272/* Loop event. */
@@ -230,8 +292,12 @@ static void canonicalize_slots(jit_State *J)
230} 292}
231 293
232/* Stop recording. */ 294/* Stop recording. */
233static void rec_stop(jit_State *J, TraceLink linktype, TraceNo lnk) 295void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk)
234{ 296{
297#ifdef LUAJIT_ENABLE_TABLE_BUMP
298 if (J->retryrec)
299 lj_trace_err(J, LJ_TRERR_RETRY);
300#endif
235 lj_trace_end(J); 301 lj_trace_end(J);
236 J->cur.linktype = (uint8_t)linktype; 302 J->cur.linktype = (uint8_t)linktype;
237 J->cur.link = (uint16_t)lnk; 303 J->cur.link = (uint16_t)lnk;
@@ -399,7 +465,8 @@ static void rec_for_loop(jit_State *J, const BCIns *fori, ScEvEntry *scev,
399 TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode); 465 TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode);
400 TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode); 466 TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode);
401 int tc, dir = rec_for_direction(&tv[FORL_STEP]); 467 int tc, dir = rec_for_direction(&tv[FORL_STEP]);
402 lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI); 468 lj_assertJ(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI,
469 "bad bytecode %d instead of FORI/JFORI", bc_op(*fori));
403 scev->t.irt = t; 470 scev->t.irt = t;
404 scev->dir = dir; 471 scev->dir = dir;
405 scev->stop = tref_ref(stop); 472 scev->stop = tref_ref(stop);
@@ -455,7 +522,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
455 IRT_NUM; 522 IRT_NUM;
456 for (i = FORL_IDX; i <= FORL_STEP; i++) { 523 for (i = FORL_IDX; i <= FORL_STEP; i++) {
457 if (!tr[i]) sload(J, ra+i); 524 if (!tr[i]) sload(J, ra+i);
458 lua_assert(tref_isnumber_str(tr[i])); 525 lj_assertJ(tref_isnumber_str(tr[i]), "bad FORI argument type");
459 if (tref_isstr(tr[i])) 526 if (tref_isstr(tr[i]))
460 tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0); 527 tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0);
461 if (t == IRT_INT) { 528 if (t == IRT_INT) {
@@ -499,8 +566,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
499static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) 566static LoopEvent rec_iterl(jit_State *J, const BCIns iterins)
500{ 567{
501 BCReg ra = bc_a(iterins); 568 BCReg ra = bc_a(iterins);
502 lua_assert(J->base[ra] != 0); 569 if (!tref_isnil(getslot(J, ra))) { /* Looping back? */
503 if (!tref_isnil(J->base[ra])) { /* Looping back? */
504 J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ 570 J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */
505 J->maxslot = ra-1+bc_b(J->pc[-1]); 571 J->maxslot = ra-1+bc_b(J->pc[-1]);
506 J->pc += bc_j(iterins)+1; 572 J->pc += bc_j(iterins)+1;
@@ -538,12 +604,12 @@ static int innerloopleft(jit_State *J, const BCIns *pc)
538/* Handle the case when an interpreted loop op is hit. */ 604/* Handle the case when an interpreted loop op is hit. */
539static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) 605static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
540{ 606{
541 if (J->parent == 0) { 607 if (J->parent == 0 && J->exitno == 0) {
542 if (pc == J->startpc && J->framedepth + J->retdepth == 0) { 608 if (pc == J->startpc && J->framedepth + J->retdepth == 0) {
543 /* Same loop? */ 609 /* Same loop? */
544 if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ 610 if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */
545 lj_trace_err(J, LJ_TRERR_LLEAVE); 611 lj_trace_err(J, LJ_TRERR_LLEAVE);
546 rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping root trace. */ 612 lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */
547 } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */ 613 } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */
548 /* It's usually better to abort here and wait until the inner loop 614 /* It's usually better to abort here and wait until the inner loop
549 ** is traced. But if the inner loop repeatedly didn't loop back, 615 ** is traced. But if the inner loop repeatedly didn't loop back,
@@ -568,18 +634,65 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
568/* Handle the case when an already compiled loop op is hit. */ 634/* Handle the case when an already compiled loop op is hit. */
569static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) 635static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev)
570{ 636{
571 if (J->parent == 0) { /* Root trace hit an inner loop. */ 637 if (J->parent == 0 && J->exitno == 0) { /* Root trace hit an inner loop. */
572 /* Better let the inner loop spawn a side trace back here. */ 638 /* Better let the inner loop spawn a side trace back here. */
573 lj_trace_err(J, LJ_TRERR_LINNER); 639 lj_trace_err(J, LJ_TRERR_LINNER);
574 } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */ 640 } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */
575 J->instunroll = 0; /* Cannot continue across a compiled loop op. */ 641 J->instunroll = 0; /* Cannot continue across a compiled loop op. */
576 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) 642 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0)
577 rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form an extra loop. */ 643 lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form extra loop. */
578 else 644 else
579 rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */ 645 lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */
580 } /* Side trace continues across a loop that's left or not entered. */ 646 } /* Side trace continues across a loop that's left or not entered. */
581} 647}
582 648
649/* -- Record profiler hook checks ----------------------------------------- */
650
651#if LJ_HASPROFILE
652
653/* Need to insert profiler hook check? */
654static int rec_profile_need(jit_State *J, GCproto *pt, const BCIns *pc)
655{
656 GCproto *ppt;
657 lj_assertJ(J->prof_mode == 'f' || J->prof_mode == 'l',
658 "bad profiler mode %c", J->prof_mode);
659 if (!pt)
660 return 0;
661 ppt = J->prev_pt;
662 J->prev_pt = pt;
663 if (pt != ppt && ppt) {
664 J->prev_line = -1;
665 return 1;
666 }
667 if (J->prof_mode == 'l') {
668 BCLine line = lj_debug_line(pt, proto_bcpos(pt, pc));
669 BCLine pline = J->prev_line;
670 J->prev_line = line;
671 if (pline != line)
672 return 1;
673 }
674 return 0;
675}
676
677static void rec_profile_ins(jit_State *J, const BCIns *pc)
678{
679 if (J->prof_mode && rec_profile_need(J, J->pt, pc)) {
680 emitir(IRTG(IR_PROF, IRT_NIL), 0, 0);
681 lj_snap_add(J);
682 }
683}
684
685static void rec_profile_ret(jit_State *J)
686{
687 if (J->prof_mode == 'f') {
688 emitir(IRTG(IR_PROF, IRT_NIL), 0, 0);
689 J->prev_pt = NULL;
690 lj_snap_add(J);
691 }
692}
693
694#endif
695
583/* -- Record calls and returns -------------------------------------------- */ 696/* -- Record calls and returns -------------------------------------------- */
584 697
585/* Specialize to the runtime value of the called function or its prototype. */ 698/* Specialize to the runtime value of the called function or its prototype. */
@@ -590,11 +703,26 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr)
590 GCproto *pt = funcproto(fn); 703 GCproto *pt = funcproto(fn);
591 /* Too many closures created? Probably not a monomorphic function. */ 704 /* Too many closures created? Probably not a monomorphic function. */
592 if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */ 705 if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */
593 TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC); 706 TRef trpt = emitir(IRT(IR_FLOAD, IRT_PGC), tr, IRFL_FUNC_PC);
594 emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt))); 707 emitir(IRTG(IR_EQ, IRT_PGC), trpt, lj_ir_kptr(J, proto_bc(pt)));
595 (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ 708 (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */
596 return tr; 709 return tr;
597 } 710 }
711 } else {
712 /* Don't specialize to non-monomorphic builtins. */
713 switch (fn->c.ffid) {
714 case FF_coroutine_wrap_aux:
715 case FF_string_gmatch_aux:
716 /* NYI: io_file_iter doesn't have an ffid, yet. */
717 { /* Specialize to the ffid. */
718 TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), tr, IRFL_FUNC_FFID);
719 emitir(IRTG(IR_EQ, IRT_INT), trid, lj_ir_kint(J, fn->c.ffid));
720 }
721 return tr;
722 default:
723 /* NYI: don't specialize to non-monomorphic C functions. */
724 break;
725 }
598 } 726 }
599 /* Otherwise specialize to the function (closure) value itself. */ 727 /* Otherwise specialize to the function (closure) value itself. */
600 kfunc = lj_ir_kfunc(J, fn); 728 kfunc = lj_ir_kfunc(J, fn);
@@ -607,21 +735,31 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs)
607{ 735{
608 RecordIndex ix; 736 RecordIndex ix;
609 TValue *functv = &J->L->base[func]; 737 TValue *functv = &J->L->base[func];
610 TRef *fbase = &J->base[func]; 738 TRef kfunc, *fbase = &J->base[func];
611 ptrdiff_t i; 739 ptrdiff_t i;
612 for (i = 0; i <= nargs; i++) 740 (void)getslot(J, func); /* Ensure func has a reference. */
613 (void)getslot(J, func+i); /* Ensure func and all args have a reference. */ 741 for (i = 1; i <= nargs; i++)
742 (void)getslot(J, func+LJ_FR2+i); /* Ensure all args have a reference. */
614 if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ 743 if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */
615 ix.tab = fbase[0]; 744 ix.tab = fbase[0];
616 copyTV(J->L, &ix.tabv, functv); 745 copyTV(J->L, &ix.tabv, functv);
617 if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) 746 if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj))
618 lj_trace_err(J, LJ_TRERR_NOMM); 747 lj_trace_err(J, LJ_TRERR_NOMM);
619 for (i = ++nargs; i > 0; i--) /* Shift arguments up. */ 748 for (i = ++nargs; i > LJ_FR2; i--) /* Shift arguments up. */
620 fbase[i] = fbase[i-1]; 749 fbase[i+LJ_FR2] = fbase[i+LJ_FR2-1];
750#if LJ_FR2
751 fbase[2] = fbase[0];
752#endif
621 fbase[0] = ix.mobj; /* Replace function. */ 753 fbase[0] = ix.mobj; /* Replace function. */
622 functv = &ix.mobjv; 754 functv = &ix.mobjv;
623 } 755 }
624 fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]); 756 kfunc = rec_call_specialize(J, funcV(functv), fbase[0]);
757#if LJ_FR2
758 fbase[0] = kfunc;
759 fbase[1] = TREF_FRAME;
760#else
761 fbase[0] = kfunc | TREF_FRAME;
762#endif
625 J->maxslot = (BCReg)nargs; 763 J->maxslot = (BCReg)nargs;
626} 764}
627 765
@@ -631,8 +769,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs)
631 rec_call_setup(J, func, nargs); 769 rec_call_setup(J, func, nargs);
632 /* Bump frame. */ 770 /* Bump frame. */
633 J->framedepth++; 771 J->framedepth++;
634 J->base += func+1; 772 J->base += func+1+LJ_FR2;
635 J->baseslot += func+1; 773 J->baseslot += func+1+LJ_FR2;
636 if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) 774 if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS)
637 lj_trace_err(J, LJ_TRERR_STACKOV); 775 lj_trace_err(J, LJ_TRERR_STACKOV);
638} 776}
@@ -650,7 +788,9 @@ void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs)
650 func += cbase; 788 func += cbase;
651 } 789 }
652 /* Move func + args down. */ 790 /* Move func + args down. */
653 memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1)); 791 if (LJ_FR2 && J->baseslot == 2)
792 J->base[func+1] = TREF_FRAME;
793 memmove(&J->base[-1-LJ_FR2], &J->base[func], sizeof(TRef)*(J->maxslot+1+LJ_FR2));
654 /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ 794 /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */
655 /* Tailcalls can form a loop, so count towards the loop unroll limit. */ 795 /* Tailcalls can form a loop, so count towards the loop unroll limit. */
656 if (++J->tailcalled > J->loopunroll) 796 if (++J->tailcalled > J->loopunroll)
@@ -680,6 +820,8 @@ static int check_downrec_unroll(jit_State *J, GCproto *pt)
680 return 0; 820 return 0;
681} 821}
682 822
823static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot);
824
683/* Record return. */ 825/* Record return. */
684void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) 826void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
685{ 827{
@@ -691,30 +833,32 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
691 BCReg cbase = (BCReg)frame_delta(frame); 833 BCReg cbase = (BCReg)frame_delta(frame);
692 if (--J->framedepth <= 0) 834 if (--J->framedepth <= 0)
693 lj_trace_err(J, LJ_TRERR_NYIRETL); 835 lj_trace_err(J, LJ_TRERR_NYIRETL);
694 lua_assert(J->baseslot > 1); 836 lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return");
695 gotresults++; 837 gotresults++;
696 rbase += cbase; 838 rbase += cbase;
697 J->baseslot -= (BCReg)cbase; 839 J->baseslot -= (BCReg)cbase;
698 J->base -= cbase; 840 J->base -= cbase;
699 J->base[--rbase] = TREF_TRUE; /* Prepend true to results. */ 841 J->base[--rbase] = TREF_TRUE; /* Prepend true to results. */
700 frame = frame_prevd(frame); 842 frame = frame_prevd(frame);
843 J->needsnap = 1; /* Stop catching on-trace errors. */
701 } 844 }
702 /* Return to lower frame via interpreter for unhandled cases. */ 845 /* Return to lower frame via interpreter for unhandled cases. */
703 if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) && 846 if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) &&
704 (!frame_islua(frame) || 847 (!frame_islua(frame) ||
705 (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))))) { 848 (J->parent == 0 && J->exitno == 0 &&
849 !bc_isret(bc_op(J->cur.startins))))) {
706 /* NYI: specialize to frame type and return directly, not via RET*. */ 850 /* NYI: specialize to frame type and return directly, not via RET*. */
707 for (i = 0; i < (ptrdiff_t)rbase; i++) 851 for (i = 0; i < (ptrdiff_t)rbase; i++)
708 J->base[i] = 0; /* Purge dead slots. */ 852 J->base[i] = 0; /* Purge dead slots. */
709 J->maxslot = rbase + (BCReg)gotresults; 853 J->maxslot = rbase + (BCReg)gotresults;
710 rec_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */ 854 lj_record_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */
711 return; 855 return;
712 } 856 }
713 if (frame_isvarg(frame)) { 857 if (frame_isvarg(frame)) {
714 BCReg cbase = (BCReg)frame_delta(frame); 858 BCReg cbase = (BCReg)frame_delta(frame);
715 if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ 859 if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */
716 lj_trace_err(J, LJ_TRERR_NYIRETL); 860 lj_trace_err(J, LJ_TRERR_NYIRETL);
717 lua_assert(J->baseslot > 1); 861 lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return");
718 rbase += cbase; 862 rbase += cbase;
719 J->baseslot -= (BCReg)cbase; 863 J->baseslot -= (BCReg)cbase;
720 J->base -= cbase; 864 J->base -= cbase;
@@ -724,27 +868,28 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
724 BCIns callins = *(frame_pc(frame)-1); 868 BCIns callins = *(frame_pc(frame)-1);
725 ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; 869 ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults;
726 BCReg cbase = bc_a(callins); 870 BCReg cbase = bc_a(callins);
727 GCproto *pt = funcproto(frame_func(frame - (cbase+1))); 871 GCproto *pt = funcproto(frame_func(frame - (cbase+1+LJ_FR2)));
728 if ((pt->flags & PROTO_NOJIT)) 872 if ((pt->flags & PROTO_NOJIT))
729 lj_trace_err(J, LJ_TRERR_CJITOFF); 873 lj_trace_err(J, LJ_TRERR_CJITOFF);
730 if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { 874 if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) {
731 if (check_downrec_unroll(J, pt)) { 875 if (check_downrec_unroll(J, pt)) {
732 J->maxslot = (BCReg)(rbase + gotresults); 876 J->maxslot = (BCReg)(rbase + gotresults);
733 lj_snap_purge(J); 877 lj_snap_purge(J);
734 rec_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-recursion. */ 878 lj_record_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-rec. */
735 return; 879 return;
736 } 880 }
737 lj_snap_add(J); 881 lj_snap_add(J);
738 } 882 }
739 for (i = 0; i < nresults; i++) /* Adjust results. */ 883 for (i = 0; i < nresults; i++) /* Adjust results. */
740 J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL; 884 J->base[i-1-LJ_FR2] = i < gotresults ? J->base[rbase+i] : TREF_NIL;
741 J->maxslot = cbase+(BCReg)nresults; 885 J->maxslot = cbase+(BCReg)nresults;
742 if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ 886 if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */
743 J->framedepth--; 887 J->framedepth--;
744 lua_assert(J->baseslot > cbase+1); 888 lj_assertJ(J->baseslot > cbase+1+LJ_FR2, "bad baseslot for return");
745 J->baseslot -= cbase+1; 889 J->baseslot -= cbase+1+LJ_FR2;
746 J->base -= cbase+1; 890 J->base -= cbase+1+LJ_FR2;
747 } else if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { 891 } else if (J->parent == 0 && J->exitno == 0 &&
892 !bc_isret(bc_op(J->cur.startins))) {
748 /* Return to lower frame would leave the loop in a root trace. */ 893 /* Return to lower frame would leave the loop in a root trace. */
749 lj_trace_err(J, LJ_TRERR_LLEAVE); 894 lj_trace_err(J, LJ_TRERR_LLEAVE);
750 } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */ 895 } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */
@@ -752,13 +897,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
752 } else { /* Return to lower frame. Guard for the target we return to. */ 897 } else { /* Return to lower frame. Guard for the target we return to. */
753 TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); 898 TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO);
754 TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); 899 TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame));
755 emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc); 900 emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc);
756 J->retdepth++; 901 J->retdepth++;
757 J->needsnap = 1; 902 J->needsnap = 1;
758 lua_assert(J->baseslot == 1); 903 lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot for return");
759 /* Shift result slots up and clear the slots of the new frame below. */ 904 /* Shift result slots up and clear the slots of the new frame below. */
760 memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults); 905 memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults);
761 memset(J->base-1, 0, sizeof(TRef)*(cbase+1)); 906 memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2));
762 } 907 }
763 } else if (frame_iscont(frame)) { /* Return to continuation frame. */ 908 } else if (frame_iscont(frame)) { /* Return to continuation frame. */
764 ASMFunction cont = frame_contf(frame); 909 ASMFunction cont = frame_contf(frame);
@@ -767,24 +912,52 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
767 lj_trace_err(J, LJ_TRERR_NYIRETL); 912 lj_trace_err(J, LJ_TRERR_NYIRETL);
768 J->baseslot -= (BCReg)cbase; 913 J->baseslot -= (BCReg)cbase;
769 J->base -= cbase; 914 J->base -= cbase;
770 J->maxslot = cbase-2; 915 J->maxslot = cbase-(2<<LJ_FR2);
771 if (cont == lj_cont_ra) { 916 if (cont == lj_cont_ra) {
772 /* Copy result to destination slot. */ 917 /* Copy result to destination slot. */
773 BCReg dst = bc_a(*(frame_contpc(frame)-1)); 918 BCReg dst = bc_a(*(frame_contpc(frame)-1));
774 J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL; 919 J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL;
775 if (dst >= J->maxslot) J->maxslot = dst+1; 920 if (dst >= J->maxslot) {
921 J->maxslot = dst+1;
922 }
776 } else if (cont == lj_cont_nop) { 923 } else if (cont == lj_cont_nop) {
777 /* Nothing to do here. */ 924 /* Nothing to do here. */
778 } else if (cont == lj_cont_cat) { 925 } else if (cont == lj_cont_cat) {
779 lua_assert(0); 926 BCReg bslot = bc_b(*(frame_contpc(frame)-1));
927 TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL;
928 if (bslot != J->maxslot) { /* Concatenate the remainder. */
929 TValue *b = J->L->base, save; /* Simulate lower frame and result. */
930 /* Can't handle MM_concat + CALLT + fast func side-effects. */
931 if (J->postproc != LJ_POST_NONE)
932 lj_trace_err(J, LJ_TRERR_NYIRETL);
933 J->base[J->maxslot] = tr;
934 copyTV(J->L, &save, b-(2<<LJ_FR2));
935 if (gotresults)
936 copyTV(J->L, b-(2<<LJ_FR2), b+rbase);
937 else
938 setnilV(b-(2<<LJ_FR2));
939 J->L->base = b - cbase;
940 tr = rec_cat(J, bslot, cbase-(2<<LJ_FR2));
941 b = J->L->base + cbase; /* Undo. */
942 J->L->base = b;
943 copyTV(J->L, b-(2<<LJ_FR2), &save);
944 }
945 if (tr) { /* Store final result. */
946 BCReg dst = bc_a(*(frame_contpc(frame)-1));
947 J->base[dst] = tr;
948 if (dst >= J->maxslot) {
949 J->maxslot = dst+1;
950 }
951 } /* Otherwise continue with another __concat call. */
780 } else { 952 } else {
781 /* Result type already specialized. */ 953 /* Result type already specialized. */
782 lua_assert(cont == lj_cont_condf || cont == lj_cont_condt); 954 lj_assertJ(cont == lj_cont_condf || cont == lj_cont_condt,
955 "bad continuation type");
783 } 956 }
784 } else { 957 } else {
785 lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ 958 lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */
786 } 959 }
787 lua_assert(J->baseslot >= 1); 960 lj_assertJ(J->baseslot >= 1+LJ_FR2, "bad baseslot for return");
788} 961}
789 962
790/* -- Metamethod handling ------------------------------------------------- */ 963/* -- Metamethod handling ------------------------------------------------- */
@@ -792,19 +965,17 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
792/* Prepare to record call to metamethod. */ 965/* Prepare to record call to metamethod. */
793static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) 966static BCReg rec_mm_prep(jit_State *J, ASMFunction cont)
794{ 967{
795 BCReg s, top = curr_proto(J->L)->framesize; 968 BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize;
796 TRef trcont; 969#if LJ_FR2
797 setcont(&J->L->base[top], cont); 970 J->base[top] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont)));
798#if LJ_64 971 J->base[top+1] = TREF_CONT;
799 trcont = lj_ir_kptr(J, (void *)((int64_t)cont - (int64_t)lj_vm_asm_begin));
800#else 972#else
801 trcont = lj_ir_kptr(J, (void *)cont); 973 J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT;
802#endif 974#endif
803 J->base[top] = trcont | TREF_CONT;
804 J->framedepth++; 975 J->framedepth++;
805 for (s = J->maxslot; s < top; s++) 976 for (s = J->maxslot; s < top; s++)
806 J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ 977 J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */
807 return top+1; 978 return top+1+LJ_FR2;
808} 979}
809 980
810/* Record metamethod lookup. */ 981/* Record metamethod lookup. */
@@ -823,7 +994,7 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
823 cTValue *mo; 994 cTValue *mo;
824 if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) { 995 if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) {
825 /* Specialize to the C library namespace object. */ 996 /* Specialize to the C library namespace object. */
826 emitir(IRTG(IR_EQ, IRT_P32), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv))); 997 emitir(IRTG(IR_EQ, IRT_PGC), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv)));
827 } else { 998 } else {
828 /* Specialize to the type of userdata. */ 999 /* Specialize to the type of userdata. */
829 TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE); 1000 TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE);
@@ -852,7 +1023,8 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
852 } 1023 }
853 /* The cdata metatable is treated as immutable. */ 1024 /* The cdata metatable is treated as immutable. */
854 if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; 1025 if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt;
855 ix->mt = mix.tab = lj_ir_ktab(J, mt); 1026 ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB,
1027 GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)]));
856 goto nocheck; 1028 goto nocheck;
857 } 1029 }
858 ix->mt = mt ? mix.tab : TREF_NIL; 1030 ix->mt = mt ? mix.tab : TREF_NIL;
@@ -879,12 +1051,12 @@ nocheck:
879static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) 1051static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
880{ 1052{
881 /* Set up metamethod call first to save ix->tab and ix->tabv. */ 1053 /* Set up metamethod call first to save ix->tab and ix->tabv. */
882 BCReg func = rec_mm_prep(J, lj_cont_ra); 1054 BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra);
883 TRef *base = J->base + func; 1055 TRef *base = J->base + func;
884 TValue *basev = J->L->base + func; 1056 TValue *basev = J->L->base + func;
885 base[1] = ix->tab; base[2] = ix->key; 1057 base[1+LJ_FR2] = ix->tab; base[2+LJ_FR2] = ix->key;
886 copyTV(J->L, basev+1, &ix->tabv); 1058 copyTV(J->L, basev+1+LJ_FR2, &ix->tabv);
887 copyTV(J->L, basev+2, &ix->keyv); 1059 copyTV(J->L, basev+2+LJ_FR2, &ix->keyv);
888 if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ 1060 if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */
889 if (mm != MM_unm) { 1061 if (mm != MM_unm) {
890 ix->tab = ix->key; 1062 ix->tab = ix->key;
@@ -896,6 +1068,9 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
896 } 1068 }
897ok: 1069ok:
898 base[0] = ix->mobj; 1070 base[0] = ix->mobj;
1071#if LJ_FR2
1072 base[1] = 0;
1073#endif
899 copyTV(J->L, basev+0, &ix->mobjv); 1074 copyTV(J->L, basev+0, &ix->mobjv);
900 lj_record_call(J, func, 2); 1075 lj_record_call(J, func, 2);
901 return 0; /* No result yet. */ 1076 return 0; /* No result yet. */
@@ -912,6 +1087,8 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
912 TRef *base = J->base + func; 1087 TRef *base = J->base + func;
913 TValue *basev = J->L->base + func; 1088 TValue *basev = J->L->base + func;
914 base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); 1089 base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv);
1090 base += LJ_FR2;
1091 basev += LJ_FR2;
915 base[1] = tr; copyTV(J->L, basev+1, tv); 1092 base[1] = tr; copyTV(J->L, basev+1, tv);
916#if LJ_52 1093#if LJ_52
917 base[2] = tr; copyTV(J->L, basev+2, tv); 1094 base[2] = tr; copyTV(J->L, basev+2, tv);
@@ -921,7 +1098,7 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
921 lj_record_call(J, func, 2); 1098 lj_record_call(J, func, 2);
922 } else { 1099 } else {
923 if (LJ_52 && tref_istab(tr)) 1100 if (LJ_52 && tref_istab(tr))
924 return lj_ir_call(J, IRCALL_lj_tab_len, tr); 1101 return emitir(IRTI(IR_ALEN), tr, TREF_NIL);
925 lj_trace_err(J, LJ_TRERR_NOMM); 1102 lj_trace_err(J, LJ_TRERR_NOMM);
926 } 1103 }
927 return 0; /* No result yet. */ 1104 return 0; /* No result yet. */
@@ -931,10 +1108,10 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
931static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) 1108static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op)
932{ 1109{
933 BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); 1110 BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt);
934 TRef *base = J->base + func; 1111 TRef *base = J->base + func + LJ_FR2;
935 TValue *tv = J->L->base + func; 1112 TValue *tv = J->L->base + func + LJ_FR2;
936 base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key; 1113 base[-LJ_FR2] = ix->mobj; base[1] = ix->val; base[2] = ix->key;
937 copyTV(J->L, tv+0, &ix->mobjv); 1114 copyTV(J->L, tv-LJ_FR2, &ix->mobjv);
938 copyTV(J->L, tv+1, &ix->valv); 1115 copyTV(J->L, tv+1, &ix->valv);
939 copyTV(J->L, tv+2, &ix->keyv); 1116 copyTV(J->L, tv+2, &ix->keyv);
940 lj_record_call(J, func, 2); 1117 lj_record_call(J, func, 2);
@@ -1030,7 +1207,7 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm)
1030 ix->tab = ix->val; 1207 ix->tab = ix->val;
1031 copyTV(J->L, &ix->tabv, &ix->valv); 1208 copyTV(J->L, &ix->tabv, &ix->valv);
1032 } else { 1209 } else {
1033 lua_assert(tref_iscdata(ix->key)); 1210 lj_assertJ(tref_iscdata(ix->key), "cdata expected");
1034 ix->tab = ix->key; 1211 ix->tab = ix->key;
1035 copyTV(J->L, &ix->tabv, &ix->keyv); 1212 copyTV(J->L, &ix->tabv, &ix->keyv);
1036 } 1213 }
@@ -1041,6 +1218,72 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm)
1041 1218
1042/* -- Indexed access ------------------------------------------------------ */ 1219/* -- Indexed access ------------------------------------------------------ */
1043 1220
1221#ifdef LUAJIT_ENABLE_TABLE_BUMP
1222/* Bump table allocations in bytecode when they grow during recording. */
1223static void rec_idx_bump(jit_State *J, RecordIndex *ix)
1224{
1225 RBCHashEntry *rbc = &J->rbchash[(ix->tab & (RBCHASH_SLOTS-1))];
1226 if (tref_ref(ix->tab) == rbc->ref) {
1227 const BCIns *pc = mref(rbc->pc, const BCIns);
1228 GCtab *tb = tabV(&ix->tabv);
1229 uint32_t nhbits;
1230 IRIns *ir;
1231 if (!tvisnil(&ix->keyv))
1232 (void)lj_tab_set(J->L, tb, &ix->keyv); /* Grow table right now. */
1233 nhbits = tb->hmask > 0 ? lj_fls(tb->hmask)+1 : 0;
1234 ir = IR(tref_ref(ix->tab));
1235 if (ir->o == IR_TNEW) {
1236 uint32_t ah = bc_d(*pc);
1237 uint32_t asize = ah & 0x7ff, hbits = ah >> 11;
1238 if (nhbits > hbits) hbits = nhbits;
1239 if (tb->asize > asize) {
1240 asize = tb->asize <= 0x7ff ? tb->asize : 0x7ff;
1241 }
1242 if ((asize | (hbits<<11)) != ah) { /* Has the size changed? */
1243 /* Patch bytecode, but continue recording (for more patching). */
1244 setbc_d(pc, (asize | (hbits<<11)));
1245 /* Patching TNEW operands is only safe if the trace is aborted. */
1246 ir->op1 = asize; ir->op2 = hbits;
1247 J->retryrec = 1; /* Abort the trace at the end of recording. */
1248 }
1249 } else if (ir->o == IR_TDUP) {
1250 GCtab *tpl = gco2tab(proto_kgc(&gcref(rbc->pt)->pt, ~(ptrdiff_t)bc_d(*pc)));
1251 /* Grow template table, but preserve keys with nil values. */
1252 if ((tb->asize > tpl->asize && (1u << nhbits)-1 == tpl->hmask) ||
1253 (tb->asize == tpl->asize && (1u << nhbits)-1 > tpl->hmask)) {
1254 Node *node = noderef(tpl->node);
1255 uint32_t i, hmask = tpl->hmask, asize;
1256 TValue *array;
1257 for (i = 0; i <= hmask; i++) {
1258 if (!tvisnil(&node[i].key) && tvisnil(&node[i].val))
1259 settabV(J->L, &node[i].val, tpl);
1260 }
1261 if (!tvisnil(&ix->keyv) && tref_isk(ix->key)) {
1262 TValue *o = lj_tab_set(J->L, tpl, &ix->keyv);
1263 if (tvisnil(o)) settabV(J->L, o, tpl);
1264 }
1265 lj_tab_resize(J->L, tpl, tb->asize, nhbits);
1266 node = noderef(tpl->node);
1267 hmask = tpl->hmask;
1268 for (i = 0; i <= hmask; i++) {
1269 /* This is safe, since template tables only hold immutable values. */
1270 if (tvistab(&node[i].val))
1271 setnilV(&node[i].val);
1272 }
1273 /* The shape of the table may have changed. Clean up array part, too. */
1274 asize = tpl->asize;
1275 array = tvref(tpl->array);
1276 for (i = 0; i < asize; i++) {
1277 if (tvistab(&array[i]))
1278 setnilV(&array[i]);
1279 }
1280 J->retryrec = 1; /* Abort the trace at the end of recording. */
1281 }
1282 }
1283 }
1284}
1285#endif
1286
1044/* Record bounds-check. */ 1287/* Record bounds-check. */
1045static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) 1288static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize)
1046{ 1289{
@@ -1061,7 +1304,8 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize)
1061 /* Got scalar evolution analysis results for this reference? */ 1304 /* Got scalar evolution analysis results for this reference? */
1062 if (ref == J->scev.idx) { 1305 if (ref == J->scev.idx) {
1063 int32_t stop; 1306 int32_t stop;
1064 lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD); 1307 lj_assertJ(irt_isint(J->scev.t) && ir->o == IR_SLOAD,
1308 "only int SCEV supported");
1065 stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]); 1309 stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]);
1066 /* Runtime value for stop of loop is within bounds? */ 1310 /* Runtime value for stop of loop is within bounds? */
1067 if ((uint64_t)stop + ofs < (uint64_t)asize) { 1311 if ((uint64_t)stop + ofs < (uint64_t)asize) {
@@ -1080,11 +1324,14 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize)
1080} 1324}
1081 1325
1082/* Record indexed key lookup. */ 1326/* Record indexed key lookup. */
1083static TRef rec_idx_key(jit_State *J, RecordIndex *ix) 1327static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref,
1328 IRType1 *rbguard)
1084{ 1329{
1085 TRef key; 1330 TRef key;
1086 GCtab *t = tabV(&ix->tabv); 1331 GCtab *t = tabV(&ix->tabv);
1087 ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */ 1332 ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */
1333 *rbref = 0;
1334 rbguard->irt = 0;
1088 1335
1089 /* Integer keys are looked up in the array part first. */ 1336 /* Integer keys are looked up in the array part first. */
1090 key = ix->key; 1337 key = ix->key;
@@ -1098,8 +1345,8 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
1098 if ((MSize)k < t->asize) { /* Currently an array key? */ 1345 if ((MSize)k < t->asize) { /* Currently an array key? */
1099 TRef arrayref; 1346 TRef arrayref;
1100 rec_idx_abc(J, asizeref, ikey, t->asize); 1347 rec_idx_abc(J, asizeref, ikey, t->asize);
1101 arrayref = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_ARRAY); 1348 arrayref = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_ARRAY);
1102 return emitir(IRT(IR_AREF, IRT_P32), arrayref, ikey); 1349 return emitir(IRT(IR_AREF, IRT_PGC), arrayref, ikey);
1103 } else { /* Currently not in array (may be an array extension)? */ 1350 } else { /* Currently not in array (may be an array extension)? */
1104 emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */ 1351 emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */
1105 if (k == 0 && tref_isk(key)) 1352 if (k == 0 && tref_isk(key))
@@ -1134,16 +1381,18 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
1134 MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val); 1381 MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val);
1135 if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) && 1382 if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) &&
1136 hslot <= 65535*(MSize)sizeof(Node)) { 1383 hslot <= 65535*(MSize)sizeof(Node)) {
1137 TRef node, kslot; 1384 TRef node, kslot, hm;
1138 TRef hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); 1385 *rbref = J->cur.nins; /* Mark possible rollback point. */
1386 *rbguard = J->guardemit;
1387 hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK);
1139 emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask)); 1388 emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask));
1140 node = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_NODE); 1389 node = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_NODE);
1141 kslot = lj_ir_kslot(J, key, hslot / sizeof(Node)); 1390 kslot = lj_ir_kslot(J, key, hslot / sizeof(Node));
1142 return emitir(IRTG(IR_HREFK, IRT_P32), node, kslot); 1391 return emitir(IRTG(IR_HREFK, IRT_PGC), node, kslot);
1143 } 1392 }
1144 } 1393 }
1145 /* Fall back to a regular hash lookup. */ 1394 /* Fall back to a regular hash lookup. */
1146 return emitir(IRT(IR_HREF, IRT_P32), ix->tab, key); 1395 return emitir(IRT(IR_HREF, IRT_PGC), ix->tab, key);
1147} 1396}
1148 1397
1149/* Determine whether a key is NOT one of the fast metamethod names. */ 1398/* Determine whether a key is NOT one of the fast metamethod names. */
@@ -1168,20 +1417,22 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1168{ 1417{
1169 TRef xref; 1418 TRef xref;
1170 IROp xrefop, loadop; 1419 IROp xrefop, loadop;
1420 IRRef rbref;
1421 IRType1 rbguard;
1171 cTValue *oldv; 1422 cTValue *oldv;
1172 1423
1173 while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */ 1424 while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */
1174 /* Never call raw lj_record_idx() on non-table. */ 1425 /* Never call raw lj_record_idx() on non-table. */
1175 lua_assert(ix->idxchain != 0); 1426 lj_assertJ(ix->idxchain != 0, "bad usage");
1176 if (!lj_record_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index)) 1427 if (!lj_record_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index))
1177 lj_trace_err(J, LJ_TRERR_NOMM); 1428 lj_trace_err(J, LJ_TRERR_NOMM);
1178 handlemm: 1429 handlemm:
1179 if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ 1430 if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */
1180 BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); 1431 BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra);
1181 TRef *base = J->base + func; 1432 TRef *base = J->base + func + LJ_FR2;
1182 TValue *tv = J->L->base + func; 1433 TValue *tv = J->L->base + func + LJ_FR2;
1183 base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; 1434 base[-LJ_FR2] = ix->mobj; base[1] = ix->tab; base[2] = ix->key;
1184 setfuncV(J->L, tv+0, funcV(&ix->mobjv)); 1435 setfuncV(J->L, tv-LJ_FR2, funcV(&ix->mobjv));
1185 copyTV(J->L, tv+1, &ix->tabv); 1436 copyTV(J->L, tv+1, &ix->tabv);
1186 copyTV(J->L, tv+2, &ix->keyv); 1437 copyTV(J->L, tv+2, &ix->keyv);
1187 if (ix->val) { 1438 if (ix->val) {
@@ -1194,6 +1445,16 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1194 return 0; /* No result yet. */ 1445 return 0; /* No result yet. */
1195 } 1446 }
1196 } 1447 }
1448#if LJ_HASBUFFER
1449 /* The index table of buffer objects is treated as immutable. */
1450 if (ix->mt == TREF_NIL && !ix->val &&
1451 tref_isudata(ix->tab) && udataV(&ix->tabv)->udtype == UDTYPE_BUFFER &&
1452 tref_istab(ix->mobj) && tref_isstr(ix->key) && tref_isk(ix->key)) {
1453 cTValue *val = lj_tab_getstr(tabV(&ix->mobjv), strV(&ix->keyv));
1454 TRef tr = lj_record_constify(J, val);
1455 if (tr) return tr; /* Specialize to the value, i.e. a method. */
1456 }
1457#endif
1197 /* Otherwise retry lookup with metaobject. */ 1458 /* Otherwise retry lookup with metaobject. */
1198 ix->tab = ix->mobj; 1459 ix->tab = ix->mobj;
1199 copyTV(J->L, &ix->tabv, &ix->mobjv); 1460 copyTV(J->L, &ix->tabv, &ix->mobjv);
@@ -1213,7 +1474,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1213 } 1474 }
1214 1475
1215 /* Record the key lookup. */ 1476 /* Record the key lookup. */
1216 xref = rec_idx_key(J, ix); 1477 xref = rec_idx_key(J, ix, &rbref, &rbguard);
1217 xrefop = IR(tref_ref(xref))->o; 1478 xrefop = IR(tref_ref(xref))->o;
1218 loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD; 1479 loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD;
1219 /* The lj_meta_tset() inconsistency is gone, but better play safe. */ 1480 /* The lj_meta_tset() inconsistency is gone, but better play safe. */
@@ -1223,11 +1484,15 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1223 IRType t = itype2irt(oldv); 1484 IRType t = itype2irt(oldv);
1224 TRef res; 1485 TRef res;
1225 if (oldv == niltvg(J2G(J))) { 1486 if (oldv == niltvg(J2G(J))) {
1226 emitir(IRTG(IR_EQ, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); 1487 emitir(IRTG(IR_EQ, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
1227 res = TREF_NIL; 1488 res = TREF_NIL;
1228 } else { 1489 } else {
1229 res = emitir(IRTG(loadop, t), xref, 0); 1490 res = emitir(IRTG(loadop, t), xref, 0);
1230 } 1491 }
1492 if (tref_ref(res) < rbref) { /* HREFK + load forwarded? */
1493 lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */
1494 J->guardemit = rbguard;
1495 }
1231 if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index)) 1496 if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index))
1232 goto handlemm; 1497 goto handlemm;
1233 if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */ 1498 if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */
@@ -1235,6 +1500,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1235 } else { /* Indexed store. */ 1500 } else { /* Indexed store. */
1236 GCtab *mt = tabref(tabV(&ix->tabv)->metatable); 1501 GCtab *mt = tabref(tabV(&ix->tabv)->metatable);
1237 int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val); 1502 int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val);
1503 if (tref_ref(xref) < rbref) { /* HREFK forwarded? */
1504 lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */
1505 J->guardemit = rbguard;
1506 }
1238 if (tvisnil(oldv)) { /* Previous value was nil? */ 1507 if (tvisnil(oldv)) { /* Previous value was nil? */
1239 /* Need to duplicate the hasmm check for the early guards. */ 1508 /* Need to duplicate the hasmm check for the early guards. */
1240 int hasmm = 0; 1509 int hasmm = 0;
@@ -1245,24 +1514,28 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1245 if (hasmm) 1514 if (hasmm)
1246 emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */ 1515 emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */
1247 else if (xrefop == IR_HREF) 1516 else if (xrefop == IR_HREF)
1248 emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_P32), 1517 emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_PGC),
1249 xref, lj_ir_kkptr(J, niltvg(J2G(J)))); 1518 xref, lj_ir_kkptr(J, niltvg(J2G(J))));
1250 if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) { 1519 if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) {
1251 lua_assert(hasmm); 1520 lj_assertJ(hasmm, "inconsistent metamethod handling");
1252 goto handlemm; 1521 goto handlemm;
1253 } 1522 }
1254 lua_assert(!hasmm); 1523 lj_assertJ(!hasmm, "inconsistent metamethod handling");
1255 if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */ 1524 if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */
1256 TRef key = ix->key; 1525 TRef key = ix->key;
1257 if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ 1526 if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */
1258 key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); 1527 key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT);
1259 xref = emitir(IRT(IR_NEWREF, IRT_P32), ix->tab, key); 1528 xref = emitir(IRT(IR_NEWREF, IRT_PGC), ix->tab, key);
1260 keybarrier = 0; /* NEWREF already takes care of the key barrier. */ 1529 keybarrier = 0; /* NEWREF already takes care of the key barrier. */
1530#ifdef LUAJIT_ENABLE_TABLE_BUMP
1531 if ((J->flags & JIT_F_OPT_SINK)) /* Avoid a separate flag. */
1532 rec_idx_bump(J, ix);
1533#endif
1261 } 1534 }
1262 } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) { 1535 } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) {
1263 /* Cannot derive that the previous value was non-nil, must do checks. */ 1536 /* Cannot derive that the previous value was non-nil, must do checks. */
1264 if (xrefop == IR_HREF) /* Guard against store to niltv. */ 1537 if (xrefop == IR_HREF) /* Guard against store to niltv. */
1265 emitir(IRTG(IR_NE, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); 1538 emitir(IRTG(IR_NE, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
1266 if (ix->idxchain) { /* Metamethod lookup required? */ 1539 if (ix->idxchain) { /* Metamethod lookup required? */
1267 /* A check for NULL metatable is cheaper (hoistable) than a load. */ 1540 /* A check for NULL metatable is cheaper (hoistable) than a load. */
1268 if (!mt) { 1541 if (!mt) {
@@ -1284,7 +1557,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1284 emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0); 1557 emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0);
1285 /* Invalidate neg. metamethod cache for stores with certain string keys. */ 1558 /* Invalidate neg. metamethod cache for stores with certain string keys. */
1286 if (!nommstr(J, ix->key)) { 1559 if (!nommstr(J, ix->key)) {
1287 TRef fref = emitir(IRT(IR_FREF, IRT_P32), ix->tab, IRFL_TAB_NOMM); 1560 TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ix->tab, IRFL_TAB_NOMM);
1288 emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0)); 1561 emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0));
1289 } 1562 }
1290 J->needsnap = 1; 1563 J->needsnap = 1;
@@ -1292,6 +1565,31 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1292 } 1565 }
1293} 1566}
1294 1567
1568static void rec_tsetm(jit_State *J, BCReg ra, BCReg rn, int32_t i)
1569{
1570 RecordIndex ix;
1571 cTValue *basev = J->L->base;
1572 GCtab *t = tabV(&basev[ra-1]);
1573 settabV(J->L, &ix.tabv, t);
1574 ix.tab = getslot(J, ra-1);
1575 ix.idxchain = 0;
1576#ifdef LUAJIT_ENABLE_TABLE_BUMP
1577 if ((J->flags & JIT_F_OPT_SINK)) {
1578 if (t->asize < i+rn-ra)
1579 lj_tab_reasize(J->L, t, i+rn-ra);
1580 setnilV(&ix.keyv);
1581 rec_idx_bump(J, &ix);
1582 }
1583#endif
1584 for (; ra < rn; i++, ra++) {
1585 setintV(&ix.keyv, i);
1586 ix.key = lj_ir_kint(J, i);
1587 copyTV(J->L, &ix.valv, &basev[ra]);
1588 ix.val = getslot(J, ra);
1589 lj_record_idx(J, &ix);
1590 }
1591}
1592
1295/* -- Upvalue access ------------------------------------------------------ */ 1593/* -- Upvalue access ------------------------------------------------------ */
1296 1594
1297/* Check whether upvalue is immutable and ok to constify. */ 1595/* Check whether upvalue is immutable and ok to constify. */
@@ -1328,13 +1626,17 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val)
1328 int needbarrier = 0; 1626 int needbarrier = 0;
1329 if (rec_upvalue_constify(J, uvp)) { /* Try to constify immutable upvalue. */ 1627 if (rec_upvalue_constify(J, uvp)) { /* Try to constify immutable upvalue. */
1330 TRef tr, kfunc; 1628 TRef tr, kfunc;
1331 lua_assert(val == 0); 1629 lj_assertJ(val == 0, "bad usage");
1332 if (!tref_isk(fn)) { /* Late specialization of current function. */ 1630 if (!tref_isk(fn)) { /* Late specialization of current function. */
1333 if (J->pt->flags >= PROTO_CLC_POLY) 1631 if (J->pt->flags >= PROTO_CLC_POLY)
1334 goto noconstify; 1632 goto noconstify;
1335 kfunc = lj_ir_kfunc(J, J->fn); 1633 kfunc = lj_ir_kfunc(J, J->fn);
1336 emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); 1634 emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc);
1337 J->base[-1] = TREF_FRAME | kfunc; 1635#if LJ_FR2
1636 J->base[-2] = kfunc;
1637#else
1638 J->base[-1] = kfunc | TREF_FRAME;
1639#endif
1338 fn = kfunc; 1640 fn = kfunc;
1339 } 1641 }
1340 tr = lj_record_constify(J, uvval(uvp)); 1642 tr = lj_record_constify(J, uvval(uvp));
@@ -1345,16 +1647,16 @@ noconstify:
1345 /* Note: this effectively limits LJ_MAX_UPVAL to 127. */ 1647 /* Note: this effectively limits LJ_MAX_UPVAL to 127. */
1346 uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff); 1648 uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff);
1347 if (!uvp->closed) { 1649 if (!uvp->closed) {
1348 uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_P32), fn, uv)); 1650 uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv));
1349 /* In current stack? */ 1651 /* In current stack? */
1350 if (uvval(uvp) >= tvref(J->L->stack) && 1652 if (uvval(uvp) >= tvref(J->L->stack) &&
1351 uvval(uvp) < tvref(J->L->maxstack)) { 1653 uvval(uvp) < tvref(J->L->maxstack)) {
1352 int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot)); 1654 int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot));
1353 if (slot >= 0) { /* Aliases an SSA slot? */ 1655 if (slot >= 0) { /* Aliases an SSA slot? */
1354 emitir(IRTG(IR_EQ, IRT_P32), 1656 emitir(IRTG(IR_EQ, IRT_PGC),
1355 REF_BASE, 1657 REF_BASE,
1356 emitir(IRT(IR_ADD, IRT_P32), uref, 1658 emitir(IRT(IR_ADD, IRT_PGC), uref,
1357 lj_ir_kint(J, (slot - 1) * -8))); 1659 lj_ir_kint(J, (slot - 1 - LJ_FR2) * -8)));
1358 slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */ 1660 slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */
1359 if (val == 0) { 1661 if (val == 0) {
1360 return getslot(J, slot); 1662 return getslot(J, slot);
@@ -1365,12 +1667,12 @@ noconstify:
1365 } 1667 }
1366 } 1668 }
1367 } 1669 }
1368 emitir(IRTG(IR_UGT, IRT_P32), 1670 emitir(IRTG(IR_UGT, IRT_PGC),
1369 emitir(IRT(IR_SUB, IRT_P32), uref, REF_BASE), 1671 emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE),
1370 lj_ir_kint(J, (J->baseslot + J->maxslot) * 8)); 1672 lj_ir_kint(J, (J->baseslot + J->maxslot) * 8));
1371 } else { 1673 } else {
1372 needbarrier = 1; 1674 needbarrier = 1;
1373 uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_P32), fn, uv)); 1675 uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv));
1374 } 1676 }
1375 if (val == 0) { /* Upvalue load */ 1677 if (val == 0) { /* Upvalue load */
1376 IRType t = itype2irt(uvval(uvp)); 1678 IRType t = itype2irt(uvval(uvp));
@@ -1409,16 +1711,16 @@ static void check_call_unroll(jit_State *J, TraceNo lnk)
1409 if (count + J->tailcalled > J->param[JIT_P_recunroll]) { 1711 if (count + J->tailcalled > J->param[JIT_P_recunroll]) {
1410 J->pc++; 1712 J->pc++;
1411 if (J->framedepth + J->retdepth == 0) 1713 if (J->framedepth + J->retdepth == 0)
1412 rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-recursion. */ 1714 lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-rec. */
1413 else 1715 else
1414 rec_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */ 1716 lj_record_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */
1415 } 1717 }
1416 } else { 1718 } else {
1417 if (count > J->param[JIT_P_callunroll]) { 1719 if (count > J->param[JIT_P_callunroll]) {
1418 if (lnk) { /* Possible tail- or up-recursion. */ 1720 if (lnk) { /* Possible tail- or up-recursion. */
1419 lj_trace_flush(J, lnk); /* Flush trace that only returns. */ 1721 lj_trace_flush(J, lnk); /* Flush trace that only returns. */
1420 /* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */ 1722 /* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */
1421 hotcount_set(J2GG(J), J->pc+1, LJ_PRNG_BITS(J, 4)); 1723 hotcount_set(J2GG(J), J->pc+1, lj_prng_u64(&J2G(J)->prng) & 15u);
1422 } 1724 }
1423 lj_trace_err(J, LJ_TRERR_CUNROLL); 1725 lj_trace_err(J, LJ_TRERR_CUNROLL);
1424 } 1726 }
@@ -1445,11 +1747,14 @@ static void rec_func_setup(jit_State *J)
1445static void rec_func_vararg(jit_State *J) 1747static void rec_func_vararg(jit_State *J)
1446{ 1748{
1447 GCproto *pt = J->pt; 1749 GCproto *pt = J->pt;
1448 BCReg s, fixargs, vframe = J->maxslot+1; 1750 BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2;
1449 lua_assert((pt->flags & PROTO_VARARG)); 1751 lj_assertJ((pt->flags & PROTO_VARARG), "FUNCV in non-vararg function");
1450 if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) 1752 if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS)
1451 lj_trace_err(J, LJ_TRERR_STACKOV); 1753 lj_trace_err(J, LJ_TRERR_STACKOV);
1452 J->base[vframe-1] = J->base[-1]; /* Copy function up. */ 1754 J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */
1755#if LJ_FR2
1756 J->base[vframe-1] = TREF_FRAME;
1757#endif
1453 /* Copy fixarg slots up and set their original slots to nil. */ 1758 /* Copy fixarg slots up and set their original slots to nil. */
1454 fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; 1759 fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot;
1455 for (s = 0; s < fixargs; s++) { 1760 for (s = 0; s < fixargs; s++) {
@@ -1485,9 +1790,9 @@ static void rec_func_jit(jit_State *J, TraceNo lnk)
1485 } 1790 }
1486 J->instunroll = 0; /* Cannot continue across a compiled function. */ 1791 J->instunroll = 0; /* Cannot continue across a compiled function. */
1487 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) 1792 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0)
1488 rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-recursion. */ 1793 lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-rec. */
1489 else 1794 else
1490 rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */ 1795 lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */
1491} 1796}
1492 1797
1493/* -- Vararg handling ----------------------------------------------------- */ 1798/* -- Vararg handling ----------------------------------------------------- */
@@ -1511,8 +1816,10 @@ static int select_detect(jit_State *J)
1511static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) 1816static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
1512{ 1817{
1513 int32_t numparams = J->pt->numparams; 1818 int32_t numparams = J->pt->numparams;
1514 ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1; 1819 ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2;
1515 lua_assert(frame_isvarg(J->L->base-1)); 1820 lj_assertJ(frame_isvarg(J->L->base-1), "VARG in non-vararg frame");
1821 if (LJ_FR2 && dst > J->maxslot)
1822 J->base[dst-1] = 0; /* Prevent resurrection of unrelated slot. */
1516 if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ 1823 if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */
1517 ptrdiff_t i; 1824 ptrdiff_t i;
1518 if (nvararg < 0) nvararg = 0; 1825 if (nvararg < 0) nvararg = 0;
@@ -1523,10 +1830,10 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
1523 J->maxslot = dst + (BCReg)nresults; 1830 J->maxslot = dst + (BCReg)nresults;
1524 } 1831 }
1525 for (i = 0; i < nresults; i++) 1832 for (i = 0; i < nresults; i++)
1526 J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1) : TREF_NIL; 1833 J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL;
1527 } else { /* Unknown number of varargs passed to trace. */ 1834 } else { /* Unknown number of varargs passed to trace. */
1528 TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME); 1835 TRef fr = emitir(IRTI(IR_SLOAD), LJ_FR2, IRSLOAD_READONLY|IRSLOAD_FRAME);
1529 int32_t frofs = 8*(1+numparams)+FRAME_VARG; 1836 int32_t frofs = 8*(1+LJ_FR2+numparams)+FRAME_VARG;
1530 if (nresults >= 0) { /* Known fixed number of results. */ 1837 if (nresults >= 0) { /* Known fixed number of results. */
1531 ptrdiff_t i; 1838 ptrdiff_t i;
1532 if (nvararg > 0) { 1839 if (nvararg > 0) {
@@ -1535,16 +1842,15 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
1535 if (nvararg >= nresults) 1842 if (nvararg >= nresults)
1536 emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults)); 1843 emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults));
1537 else 1844 else
1538 emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, frame_ftsz(J->L->base-1))); 1845 emitir(IRTGI(IR_EQ), fr,
1539 vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); 1846 lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1)));
1540 vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); 1847 vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
1848 vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8));
1541 for (i = 0; i < nload; i++) { 1849 for (i = 0; i < nload; i++) {
1542 IRType t = itype2irt(&J->L->base[i-1-nvararg]); 1850 IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]);
1543 TRef aref = emitir(IRT(IR_AREF, IRT_P32), 1851 TRef aref = emitir(IRT(IR_AREF, IRT_PGC),
1544 vbase, lj_ir_kint(J, (int32_t)i)); 1852 vbase, lj_ir_kint(J, (int32_t)i));
1545 TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0); 1853 J->base[dst+i] = lj_record_vload(J, aref, t);
1546 if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
1547 J->base[dst+i] = tr;
1548 } 1854 }
1549 } else { 1855 } else {
1550 emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs)); 1856 emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs));
@@ -1586,15 +1892,15 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
1586 } 1892 }
1587 if (idx != 0 && idx <= nvararg) { 1893 if (idx != 0 && idx <= nvararg) {
1588 IRType t; 1894 IRType t;
1589 TRef aref, vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); 1895 TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
1590 vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); 1896 vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase,
1591 t = itype2irt(&J->L->base[idx-2-nvararg]); 1897 lj_ir_kint(J, frofs-(8<<LJ_FR2)));
1592 aref = emitir(IRT(IR_AREF, IRT_P32), vbase, tridx); 1898 t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]);
1593 tr = emitir(IRTG(IR_VLOAD, t), aref, 0); 1899 aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx);
1594 if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ 1900 tr = lj_record_vload(J, aref, t);
1595 } 1901 }
1596 J->base[dst-2] = tr; 1902 J->base[dst-2-LJ_FR2] = tr;
1597 J->maxslot = dst-1; 1903 J->maxslot = dst-1-LJ_FR2;
1598 J->bcskip = 2; /* Skip CALLM + select. */ 1904 J->bcskip = 2; /* Skip CALLM + select. */
1599 } else { 1905 } else {
1600 nyivarg: 1906 nyivarg:
@@ -1612,8 +1918,63 @@ static TRef rec_tnew(jit_State *J, uint32_t ah)
1612{ 1918{
1613 uint32_t asize = ah & 0x7ff; 1919 uint32_t asize = ah & 0x7ff;
1614 uint32_t hbits = ah >> 11; 1920 uint32_t hbits = ah >> 11;
1921 TRef tr;
1615 if (asize == 0x7ff) asize = 0x801; 1922 if (asize == 0x7ff) asize = 0x801;
1616 return emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits); 1923 tr = emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits);
1924#ifdef LUAJIT_ENABLE_TABLE_BUMP
1925 J->rbchash[(tr & (RBCHASH_SLOTS-1))].ref = tref_ref(tr);
1926 setmref(J->rbchash[(tr & (RBCHASH_SLOTS-1))].pc, J->pc);
1927 setgcref(J->rbchash[(tr & (RBCHASH_SLOTS-1))].pt, obj2gco(J->pt));
1928#endif
1929 return tr;
1930}
1931
1932/* -- Concatenation ------------------------------------------------------- */
1933
1934static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot)
1935{
1936 TRef *top = &J->base[topslot];
1937 TValue savetv[5];
1938 BCReg s;
1939 RecordIndex ix;
1940 lj_assertJ(baseslot < topslot, "bad CAT arg");
1941 for (s = baseslot; s <= topslot; s++)
1942 (void)getslot(J, s); /* Ensure all arguments have a reference. */
1943 if (tref_isnumber_str(top[0]) && tref_isnumber_str(top[-1])) {
1944 TRef tr, hdr, *trp, *xbase, *base = &J->base[baseslot];
1945 /* First convert numbers to strings. */
1946 for (trp = top; trp >= base; trp--) {
1947 if (tref_isnumber(*trp))
1948 *trp = emitir(IRT(IR_TOSTR, IRT_STR), *trp,
1949 tref_isnum(*trp) ? IRTOSTR_NUM : IRTOSTR_INT);
1950 else if (!tref_isstr(*trp))
1951 break;
1952 }
1953 xbase = ++trp;
1954 tr = hdr = emitir(IRT(IR_BUFHDR, IRT_PGC),
1955 lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
1956 do {
1957 tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, *trp++);
1958 } while (trp <= top);
1959 tr = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
1960 J->maxslot = (BCReg)(xbase - J->base);
1961 if (xbase == base) return tr; /* Return simple concatenation result. */
1962 /* Pass partial result. */
1963 topslot = J->maxslot--;
1964 *xbase = tr;
1965 top = xbase;
1966 setstrV(J->L, &ix.keyv, &J2G(J)->strempty); /* Simulate string result. */
1967 } else {
1968 J->maxslot = topslot-1;
1969 copyTV(J->L, &ix.keyv, &J->L->base[topslot]);
1970 }
1971 copyTV(J->L, &ix.tabv, &J->L->base[topslot-1]);
1972 ix.tab = top[-1];
1973 ix.key = top[0];
1974 memcpy(savetv, &J->L->base[topslot-1], sizeof(savetv)); /* Save slots. */
1975 rec_mm_arith(J, &ix, MM_concat); /* Call __concat metamethod. */
1976 memcpy(&J->L->base[topslot-1], savetv, sizeof(savetv)); /* Restore slots. */
1977 return 0; /* No result yet. */
1617} 1978}
1618 1979
1619/* -- Record bytecode ops ------------------------------------------------- */ 1980/* -- Record bytecode ops ------------------------------------------------- */
@@ -1634,7 +1995,15 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond)
1634 const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); 1995 const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0);
1635 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; 1996 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
1636 /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ 1997 /* Set PC to opposite target to avoid re-recording the comp. in side trace. */
1998#if LJ_FR2
1999 SnapEntry *flink = &J->cur.snapmap[snap->mapofs + snap->nent];
2000 uint64_t pcbase;
2001 memcpy(&pcbase, flink, sizeof(uint64_t));
2002 pcbase = (pcbase & 0xff) | (u64ptr(npc) << 8);
2003 memcpy(flink, &pcbase, sizeof(uint64_t));
2004#else
1637 J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); 2005 J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc);
2006#endif
1638 J->needsnap = 1; 2007 J->needsnap = 1;
1639 if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); 2008 if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins);
1640 lj_snap_shrink(J); /* Shrink last snapshot if possible. */ 2009 lj_snap_shrink(J); /* Shrink last snapshot if possible. */
@@ -1654,7 +2023,7 @@ void lj_record_ins(jit_State *J)
1654 if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) { 2023 if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) {
1655 switch (J->postproc) { 2024 switch (J->postproc) {
1656 case LJ_POST_FIXCOMP: /* Fixup comparison. */ 2025 case LJ_POST_FIXCOMP: /* Fixup comparison. */
1657 pc = frame_pc(&J2G(J)->tmptv); 2026 pc = (const BCIns *)(uintptr_t)J2G(J)->tmptv.u64;
1658 rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1))); 2027 rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1)));
1659 /* fallthrough */ 2028 /* fallthrough */
1660 case LJ_POST_FIXGUARD: /* Fixup and emit pending guard. */ 2029 case LJ_POST_FIXGUARD: /* Fixup and emit pending guard. */
@@ -1692,7 +2061,7 @@ void lj_record_ins(jit_State *J)
1692 if (bc_op(*J->pc) >= BC__MAX) 2061 if (bc_op(*J->pc) >= BC__MAX)
1693 return; 2062 return;
1694 break; 2063 break;
1695 default: lua_assert(0); break; 2064 default: lj_assertJ(0, "bad post-processing mode"); break;
1696 } 2065 }
1697 J->postproc = LJ_POST_NONE; 2066 J->postproc = LJ_POST_NONE;
1698 } 2067 }
@@ -1700,7 +2069,7 @@ void lj_record_ins(jit_State *J)
1700 /* Need snapshot before recording next bytecode (e.g. after a store). */ 2069 /* Need snapshot before recording next bytecode (e.g. after a store). */
1701 if (J->needsnap) { 2070 if (J->needsnap) {
1702 J->needsnap = 0; 2071 J->needsnap = 0;
1703 lj_snap_purge(J); 2072 if (J->pt) lj_snap_purge(J);
1704 lj_snap_add(J); 2073 lj_snap_add(J);
1705 J->mergesnap = 1; 2074 J->mergesnap = 1;
1706 } 2075 }
@@ -1722,6 +2091,10 @@ void lj_record_ins(jit_State *J)
1722 rec_check_ir(J); 2091 rec_check_ir(J);
1723#endif 2092#endif
1724 2093
2094#if LJ_HASPROFILE
2095 rec_profile_ins(J, pc);
2096#endif
2097
1725 /* Keep a copy of the runtime values of var/num/str operands. */ 2098 /* Keep a copy of the runtime values of var/num/str operands. */
1726#define rav (&ix.valv) 2099#define rav (&ix.valv)
1727#define rbv (&ix.tabv) 2100#define rbv (&ix.tabv)
@@ -1748,7 +2121,7 @@ void lj_record_ins(jit_State *J)
1748 switch (bcmode_c(op)) { 2121 switch (bcmode_c(op)) {
1749 case BCMvar: 2122 case BCMvar:
1750 copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; 2123 copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break;
1751 case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; 2124 case BCMpri: setpriV(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break;
1752 case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc); 2125 case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc);
1753 copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) : 2126 copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) :
1754 lj_ir_knumint(J, numV(tv)); } break; 2127 lj_ir_knumint(J, numV(tv)); } break;
@@ -1843,6 +2216,18 @@ void lj_record_ins(jit_State *J)
1843 J->maxslot = bc_a(pc[1]); /* Shrink used slots. */ 2216 J->maxslot = bc_a(pc[1]); /* Shrink used slots. */
1844 break; 2217 break;
1845 2218
2219 case BC_ISTYPE: case BC_ISNUM:
2220 /* These coercions need to correspond with lj_meta_istype(). */
2221 if (LJ_DUALNUM && rc == ~LJ_TNUMX+1)
2222 ra = lj_opt_narrow_toint(J, ra);
2223 else if (rc == ~LJ_TNUMX+2)
2224 ra = lj_ir_tonum(J, ra);
2225 else if (rc == ~LJ_TSTR+1)
2226 ra = lj_ir_tostr(J, ra);
2227 /* else: type specialization suffices. */
2228 J->base[bc_a(ins)] = ra;
2229 break;
2230
1846 /* -- Unary ops --------------------------------------------------------- */ 2231 /* -- Unary ops --------------------------------------------------------- */
1847 2232
1848 case BC_NOT: 2233 case BC_NOT:
@@ -1854,7 +2239,7 @@ void lj_record_ins(jit_State *J)
1854 if (tref_isstr(rc)) 2239 if (tref_isstr(rc))
1855 rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); 2240 rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN);
1856 else if (!LJ_52 && tref_istab(rc)) 2241 else if (!LJ_52 && tref_istab(rc))
1857 rc = lj_ir_call(J, IRCALL_lj_tab_len, rc); 2242 rc = emitir(IRTI(IR_ALEN), rc, TREF_NIL);
1858 else 2243 else
1859 rc = rec_mm_len(J, rc, rcv); 2244 rc = rec_mm_len(J, rc, rcv);
1860 break; 2245 break;
@@ -1906,11 +2291,23 @@ void lj_record_ins(jit_State *J)
1906 rc = rec_mm_arith(J, &ix, MM_pow); 2291 rc = rec_mm_arith(J, &ix, MM_pow);
1907 break; 2292 break;
1908 2293
2294 /* -- Miscellaneous ops ------------------------------------------------- */
2295
2296 case BC_CAT:
2297 rc = rec_cat(J, rb, rc);
2298 break;
2299
1909 /* -- Constant and move ops --------------------------------------------- */ 2300 /* -- Constant and move ops --------------------------------------------- */
1910 2301
1911 case BC_MOV: 2302 case BC_MOV:
1912 /* Clear gap of method call to avoid resurrecting previous refs. */ 2303 /* Clear gap of method call to avoid resurrecting previous refs. */
1913 if (ra > J->maxslot) J->base[ra-1] = 0; 2304 if (ra > J->maxslot) {
2305#if LJ_FR2
2306 memset(J->base + J->maxslot, 0, (ra - J->maxslot) * sizeof(TRef));
2307#else
2308 J->base[ra-1] = 0;
2309#endif
2310 }
1914 break; 2311 break;
1915 case BC_KSTR: case BC_KNUM: case BC_KPRI: 2312 case BC_KSTR: case BC_KNUM: case BC_KPRI:
1916 break; 2313 break;
@@ -1918,6 +2315,8 @@ void lj_record_ins(jit_State *J)
1918 rc = lj_ir_kint(J, (int32_t)(int16_t)rc); 2315 rc = lj_ir_kint(J, (int32_t)(int16_t)rc);
1919 break; 2316 break;
1920 case BC_KNIL: 2317 case BC_KNIL:
2318 if (LJ_FR2 && ra > J->maxslot)
2319 J->base[ra-1] = 0;
1921 while (ra <= rc) 2320 while (ra <= rc)
1922 J->base[ra++] = TREF_NIL; 2321 J->base[ra++] = TREF_NIL;
1923 if (rc >= J->maxslot) J->maxslot = rc+1; 2322 if (rc >= J->maxslot) J->maxslot = rc+1;
@@ -1954,6 +2353,14 @@ void lj_record_ins(jit_State *J)
1954 ix.idxchain = LJ_MAX_IDXCHAIN; 2353 ix.idxchain = LJ_MAX_IDXCHAIN;
1955 rc = lj_record_idx(J, &ix); 2354 rc = lj_record_idx(J, &ix);
1956 break; 2355 break;
2356 case BC_TGETR: case BC_TSETR:
2357 ix.idxchain = 0;
2358 rc = lj_record_idx(J, &ix);
2359 break;
2360
2361 case BC_TSETM:
2362 rec_tsetm(J, ra, (BCReg)(J->L->top - J->L->base), (int32_t)rcv->u32.lo);
2363 break;
1957 2364
1958 case BC_TNEW: 2365 case BC_TNEW:
1959 rc = rec_tnew(J, rc); 2366 rc = rec_tnew(J, rc);
@@ -1961,33 +2368,38 @@ void lj_record_ins(jit_State *J)
1961 case BC_TDUP: 2368 case BC_TDUP:
1962 rc = emitir(IRTG(IR_TDUP, IRT_TAB), 2369 rc = emitir(IRTG(IR_TDUP, IRT_TAB),
1963 lj_ir_ktab(J, gco2tab(proto_kgc(J->pt, ~(ptrdiff_t)rc))), 0); 2370 lj_ir_ktab(J, gco2tab(proto_kgc(J->pt, ~(ptrdiff_t)rc))), 0);
2371#ifdef LUAJIT_ENABLE_TABLE_BUMP
2372 J->rbchash[(rc & (RBCHASH_SLOTS-1))].ref = tref_ref(rc);
2373 setmref(J->rbchash[(rc & (RBCHASH_SLOTS-1))].pc, pc);
2374 setgcref(J->rbchash[(rc & (RBCHASH_SLOTS-1))].pt, obj2gco(J->pt));
2375#endif
1964 break; 2376 break;
1965 2377
1966 /* -- Calls and vararg handling ----------------------------------------- */ 2378 /* -- Calls and vararg handling ----------------------------------------- */
1967 2379
1968 case BC_ITERC: 2380 case BC_ITERC:
1969 J->base[ra] = getslot(J, ra-3); 2381 J->base[ra] = getslot(J, ra-3);
1970 J->base[ra+1] = getslot(J, ra-2); 2382 J->base[ra+1+LJ_FR2] = getslot(J, ra-2);
1971 J->base[ra+2] = getslot(J, ra-1); 2383 J->base[ra+2+LJ_FR2] = getslot(J, ra-1);
1972 { /* Do the actual copy now because lj_record_call needs the values. */ 2384 { /* Do the actual copy now because lj_record_call needs the values. */
1973 TValue *b = &J->L->base[ra]; 2385 TValue *b = &J->L->base[ra];
1974 copyTV(J->L, b, b-3); 2386 copyTV(J->L, b, b-3);
1975 copyTV(J->L, b+1, b-2); 2387 copyTV(J->L, b+1+LJ_FR2, b-2);
1976 copyTV(J->L, b+2, b-1); 2388 copyTV(J->L, b+2+LJ_FR2, b-1);
1977 } 2389 }
1978 lj_record_call(J, ra, (ptrdiff_t)rc-1); 2390 lj_record_call(J, ra, (ptrdiff_t)rc-1);
1979 break; 2391 break;
1980 2392
1981 /* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */ 2393 /* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */
1982 case BC_CALLM: 2394 case BC_CALLM:
1983 rc = (BCReg)(J->L->top - J->L->base) - ra; 2395 rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2;
1984 /* fallthrough */ 2396 /* fallthrough */
1985 case BC_CALL: 2397 case BC_CALL:
1986 lj_record_call(J, ra, (ptrdiff_t)rc-1); 2398 lj_record_call(J, ra, (ptrdiff_t)rc-1);
1987 break; 2399 break;
1988 2400
1989 case BC_CALLMT: 2401 case BC_CALLMT:
1990 rc = (BCReg)(J->L->top - J->L->base) - ra; 2402 rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2;
1991 /* fallthrough */ 2403 /* fallthrough */
1992 case BC_CALLT: 2404 case BC_CALLT:
1993 lj_record_tailcall(J, ra, (ptrdiff_t)rc-1); 2405 lj_record_tailcall(J, ra, (ptrdiff_t)rc-1);
@@ -2004,6 +2416,9 @@ void lj_record_ins(jit_State *J)
2004 rc = (BCReg)(J->L->top - J->L->base) - ra + 1; 2416 rc = (BCReg)(J->L->top - J->L->base) - ra + 1;
2005 /* fallthrough */ 2417 /* fallthrough */
2006 case BC_RET: case BC_RET0: case BC_RET1: 2418 case BC_RET: case BC_RET0: case BC_RET1:
2419#if LJ_HASPROFILE
2420 rec_profile_ret(J);
2421#endif
2007 lj_record_ret(J, ra, (ptrdiff_t)rc-1); 2422 lj_record_ret(J, ra, (ptrdiff_t)rc-1);
2008 break; 2423 break;
2009 2424
@@ -2014,9 +2429,10 @@ void lj_record_ins(jit_State *J)
2014 J->loopref = J->cur.nins; 2429 J->loopref = J->cur.nins;
2015 break; 2430 break;
2016 case BC_JFORI: 2431 case BC_JFORI:
2017 lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL); 2432 lj_assertJ(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL,
2433 "JFORI does not point to JFORL");
2018 if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ 2434 if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */
2019 rec_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); 2435 lj_record_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J]));
2020 /* Continue tracing if the loop is not entered. */ 2436 /* Continue tracing if the loop is not entered. */
2021 break; 2437 break;
2022 2438
@@ -2068,7 +2484,8 @@ void lj_record_ins(jit_State *J)
2068 rec_func_lua(J); 2484 rec_func_lua(J);
2069 break; 2485 break;
2070 case BC_JFUNCV: 2486 case BC_JFUNCV:
2071 lua_assert(0); /* Cannot happen. No hotcall counting for varag funcs. */ 2487 /* Cannot happen. No hotcall counting for varag funcs. */
2488 lj_assertJ(0, "unsupported vararg hotcall");
2072 break; 2489 break;
2073 2490
2074 case BC_FUNCC: 2491 case BC_FUNCC:
@@ -2084,10 +2501,8 @@ void lj_record_ins(jit_State *J)
2084 /* fallthrough */ 2501 /* fallthrough */
2085 case BC_ITERN: 2502 case BC_ITERN:
2086 case BC_ISNEXT: 2503 case BC_ISNEXT:
2087 case BC_CAT:
2088 case BC_UCLO: 2504 case BC_UCLO:
2089 case BC_FNEW: 2505 case BC_FNEW:
2090 case BC_TSETM:
2091 setintV(&J->errinfo, (int32_t)op); 2506 setintV(&J->errinfo, (int32_t)op);
2092 lj_trace_err_info(J, LJ_TRERR_NYIBC); 2507 lj_trace_err_info(J, LJ_TRERR_NYIBC);
2093 break; 2508 break;
@@ -2096,15 +2511,21 @@ void lj_record_ins(jit_State *J)
2096 /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ 2511 /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */
2097 if (bcmode_a(op) == BCMdst && rc) { 2512 if (bcmode_a(op) == BCMdst && rc) {
2098 J->base[ra] = rc; 2513 J->base[ra] = rc;
2099 if (ra >= J->maxslot) J->maxslot = ra+1; 2514 if (ra >= J->maxslot) {
2515#if LJ_FR2
2516 if (ra > J->maxslot) J->base[ra-1] = 0;
2517#endif
2518 J->maxslot = ra+1;
2519 }
2100 } 2520 }
2101 2521
2102#undef rav 2522#undef rav
2103#undef rbv 2523#undef rbv
2104#undef rcv 2524#undef rcv
2105 2525
2106 /* Limit the number of recorded IR instructions. */ 2526 /* Limit the number of recorded IR instructions and constants. */
2107 if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord]) 2527 if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord] ||
2528 J->cur.nk < REF_BIAS-(IRRef)J->param[JIT_P_maxirconst])
2108 lj_trace_err(J, LJ_TRERR_TRACEOV); 2529 lj_trace_err(J, LJ_TRERR_TRACEOV);
2109} 2530}
2110 2531
@@ -2124,11 +2545,11 @@ static const BCIns *rec_setup_root(jit_State *J)
2124 J->bc_min = pc; 2545 J->bc_min = pc;
2125 break; 2546 break;
2126 case BC_ITERL: 2547 case BC_ITERL:
2127 lua_assert(bc_op(pc[-1]) == BC_ITERC); 2548 lj_assertJ(bc_op(pc[-1]) == BC_ITERC, "no ITERC before ITERL");
2128 J->maxslot = ra + bc_b(pc[-1]) - 1; 2549 J->maxslot = ra + bc_b(pc[-1]) - 1;
2129 J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); 2550 J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns);
2130 pc += 1+bc_j(ins); 2551 pc += 1+bc_j(ins);
2131 lua_assert(bc_op(pc[-1]) == BC_JMP); 2552 lj_assertJ(bc_op(pc[-1]) == BC_JMP, "ITERL does not point to JMP+1");
2132 J->bc_min = pc; 2553 J->bc_min = pc;
2133 break; 2554 break;
2134 case BC_LOOP: 2555 case BC_LOOP:
@@ -2153,8 +2574,14 @@ static const BCIns *rec_setup_root(jit_State *J)
2153 J->maxslot = J->pt->numparams; 2574 J->maxslot = J->pt->numparams;
2154 pc++; 2575 pc++;
2155 break; 2576 break;
2577 case BC_CALLM:
2578 case BC_CALL:
2579 case BC_ITERC:
2580 /* No bytecode range check for stitched traces. */
2581 pc++;
2582 break;
2156 default: 2583 default:
2157 lua_assert(0); 2584 lj_assertJ(0, "bad root trace start bytecode %d", bc_op(ins));
2158 break; 2585 break;
2159 } 2586 }
2160 return pc; 2587 return pc;
@@ -2168,11 +2595,14 @@ void lj_record_setup(jit_State *J)
2168 /* Initialize state related to current trace. */ 2595 /* Initialize state related to current trace. */
2169 memset(J->slot, 0, sizeof(J->slot)); 2596 memset(J->slot, 0, sizeof(J->slot));
2170 memset(J->chain, 0, sizeof(J->chain)); 2597 memset(J->chain, 0, sizeof(J->chain));
2598#ifdef LUAJIT_ENABLE_TABLE_BUMP
2599 memset(J->rbchash, 0, sizeof(J->rbchash));
2600#endif
2171 memset(J->bpropcache, 0, sizeof(J->bpropcache)); 2601 memset(J->bpropcache, 0, sizeof(J->bpropcache));
2172 J->scev.idx = REF_NIL; 2602 J->scev.idx = REF_NIL;
2173 setmref(J->scev.pc, NULL); 2603 setmref(J->scev.pc, NULL);
2174 2604
2175 J->baseslot = 1; /* Invoking function is at base[-1]. */ 2605 J->baseslot = 1+LJ_FR2; /* Invoking function is at base[-1-LJ_FR2]. */
2176 J->base = J->slot + J->baseslot; 2606 J->base = J->slot + J->baseslot;
2177 J->maxslot = 0; 2607 J->maxslot = 0;
2178 J->framedepth = 0; 2608 J->framedepth = 0;
@@ -2187,7 +2617,7 @@ void lj_record_setup(jit_State *J)
2187 J->bc_extent = ~(MSize)0; 2617 J->bc_extent = ~(MSize)0;
2188 2618
2189 /* Emit instructions for fixed references. Also triggers initial IR alloc. */ 2619 /* Emit instructions for fixed references. Also triggers initial IR alloc. */
2190 emitir_raw(IRT(IR_BASE, IRT_P32), J->parent, J->exitno); 2620 emitir_raw(IRT(IR_BASE, IRT_PGC), J->parent, J->exitno);
2191 for (i = 0; i <= 2; i++) { 2621 for (i = 0; i <= 2; i++) {
2192 IRIns *ir = IR(REF_NIL-i); 2622 IRIns *ir = IR(REF_NIL-i);
2193 ir->i = 0; 2623 ir->i = 0;
@@ -2221,7 +2651,7 @@ void lj_record_setup(jit_State *J)
2221 if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || 2651 if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] ||
2222 T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + 2652 T->snap[J->exitno].count >= J->param[JIT_P_hotexit] +
2223 J->param[JIT_P_tryside]) { 2653 J->param[JIT_P_tryside]) {
2224 rec_stop(J, LJ_TRLINK_INTERP, 0); 2654 lj_record_stop(J, LJ_TRLINK_INTERP, 0);
2225 } 2655 }
2226 } else { /* Root trace. */ 2656 } else { /* Root trace. */
2227 J->cur.root = 0; 2657 J->cur.root = 0;
@@ -2233,9 +2663,15 @@ void lj_record_setup(jit_State *J)
2233 lj_snap_add(J); 2663 lj_snap_add(J);
2234 if (bc_op(J->cur.startins) == BC_FORL) 2664 if (bc_op(J->cur.startins) == BC_FORL)
2235 rec_for_loop(J, J->pc-1, &J->scev, 1); 2665 rec_for_loop(J, J->pc-1, &J->scev, 1);
2666 else if (bc_op(J->cur.startins) == BC_ITERC)
2667 J->startpc = NULL;
2236 if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) 2668 if (1 + J->pt->framesize >= LJ_MAX_JSLOTS)
2237 lj_trace_err(J, LJ_TRERR_STACKOV); 2669 lj_trace_err(J, LJ_TRERR_STACKOV);
2238 } 2670 }
2671#if LJ_HASPROFILE
2672 J->prev_pt = NULL;
2673 J->prev_line = -1;
2674#endif
2239#ifdef LUAJIT_ENABLE_CHECKHOOK 2675#ifdef LUAJIT_ENABLE_CHECKHOOK
2240 /* Regularly check for instruction/line hooks from compiled code and 2676 /* Regularly check for instruction/line hooks from compiled code and
2241 ** exit to the interpreter if the hooks are set. 2677 ** exit to the interpreter if the hooks are set.
diff --git a/src/lj_record.h b/src/lj_record.h
index befc8c01..03d84a71 100644
--- a/src/lj_record.h
+++ b/src/lj_record.h
@@ -28,7 +28,9 @@ typedef struct RecordIndex {
28 28
29LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b, 29LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b,
30 cTValue *av, cTValue *bv); 30 cTValue *av, cTValue *bv);
31LJ_FUNC void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk);
31LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o); 32LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o);
33LJ_FUNC TRef lj_record_vload(jit_State *J, TRef ref, IRType t);
32 34
33LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs); 35LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs);
34LJ_FUNC void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs); 36LJ_FUNC void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs);
diff --git a/src/lj_serialize.c b/src/lj_serialize.c
new file mode 100644
index 00000000..70ff4796
--- /dev/null
+++ b/src/lj_serialize.c
@@ -0,0 +1,475 @@
1/*
2** Object de/serialization.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_serialize_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASBUFFER
12#include "lj_err.h"
13#include "lj_buf.h"
14#include "lj_str.h"
15#include "lj_tab.h"
16#include "lj_udata.h"
17#if LJ_HASFFI
18#include "lj_ctype.h"
19#include "lj_cdata.h"
20#endif
21#if LJ_HASJIT
22#include "lj_ir.h"
23#endif
24#include "lj_serialize.h"
25
26/* Tags for internal serialization format. */
27enum {
28 SER_TAG_NIL, /* 0x00 */
29 SER_TAG_FALSE,
30 SER_TAG_TRUE,
31 SER_TAG_NULL,
32 SER_TAG_LIGHTUD32,
33 SER_TAG_LIGHTUD64,
34 SER_TAG_INT,
35 SER_TAG_NUM,
36 SER_TAG_TAB, /* 0x08 */
37 SER_TAG_0x0e = SER_TAG_TAB+6,
38 SER_TAG_DICT,
39 SER_TAG_INT64, /* 0x10 */
40 SER_TAG_UINT64,
41 SER_TAG_COMPLEX,
42 SER_TAG_0x13,
43 SER_TAG_0x14,
44 SER_TAG_0x15,
45 SER_TAG_0x16,
46 SER_TAG_0x17,
47 SER_TAG_0x18, /* 0x18 */
48 SER_TAG_0x19,
49 SER_TAG_0x1a,
50 SER_TAG_0x1b,
51 SER_TAG_0x1c,
52 SER_TAG_0x1d,
53 SER_TAG_0x1e,
54 SER_TAG_0x1f,
55 SER_TAG_STR, /* 0x20 + str->len */
56};
57LJ_STATIC_ASSERT((SER_TAG_TAB & 7) == 0);
58
59/* -- Helper functions ---------------------------------------------------- */
60
61static LJ_AINLINE char *serialize_more(char *w, SBufExt *sbx, MSize sz)
62{
63 if (LJ_UNLIKELY(sz > (MSize)(sbx->e - w))) {
64 sbx->w = w;
65 w = lj_buf_more2((SBuf *)sbx, sz);
66 }
67 return w;
68}
69
70/* Write U124 to buffer. */
71static LJ_NOINLINE char *serialize_wu124_(char *w, uint32_t v)
72{
73 if (v < 0x1fe0) {
74 v -= 0xe0;
75 *w++ = (char)(0xe0 | (v >> 8)); *w++ = (char)v;
76 } else {
77 *w++ = (char)0xff;
78#if LJ_BE
79 v = lj_bswap(v);
80#endif
81 memcpy(w, &v, 4); w += 4;
82 }
83 return w;
84}
85
86static LJ_AINLINE char *serialize_wu124(char *w, uint32_t v)
87{
88 if (LJ_LIKELY(v < 0xe0)) {
89 *w++ = (char)v;
90 return w;
91 } else {
92 return serialize_wu124_(w, v);
93 }
94}
95
96static LJ_NOINLINE char *serialize_ru124_(char *r, char *w, uint32_t *pv)
97{
98 uint32_t v = *pv;
99 if (v != 0xff) {
100 if (r >= w) return NULL;
101 v = ((v & 0x1f) << 8) + *(uint8_t *)r + 0xe0; r++;
102 } else {
103 if (r + 4 > w) return NULL;
104 v = lj_getu32(r); r += 4;
105#if LJ_BE
106 v = lj_bswap(v);
107#endif
108 }
109 *pv = v;
110 return r;
111}
112
113static LJ_AINLINE char *serialize_ru124(char *r, char *w, uint32_t *pv)
114{
115 if (LJ_LIKELY(r < w)) {
116 uint32_t v = *(uint8_t *)r; r++;
117 *pv = v;
118 if (LJ_UNLIKELY(v >= 0xe0)) {
119 r = serialize_ru124_(r, w, pv);
120 }
121 return r;
122 }
123 return NULL;
124}
125
126/* Prepare string dictionary for use (once). */
127void LJ_FASTCALL lj_serialize_dict_prep(lua_State *L, GCtab *dict)
128{
129 if (!dict->hmask) { /* No hash part means not prepared, yet. */
130 MSize i, len = lj_tab_len(dict);
131 if (!len) return;
132 lj_tab_resize(L, dict, dict->asize, hsize2hbits(len));
133 for (i = 1; i <= len && i < dict->asize; i++) {
134 cTValue *o = arrayslot(dict, i);
135 if (tvisstr(o)) {
136 if (!lj_tab_getstr(dict, strV(o))) { /* Ignore dups. */
137 lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1);
138 }
139 } else if (!tvisfalse(o)) {
140 lj_err_caller(L, LJ_ERR_BUFFER_BADOPT);
141 }
142 }
143 }
144}
145
146/* -- Internal serializer ------------------------------------------------- */
147
148/* Put serialized object into buffer. */
149static char *serialize_put(char *w, SBufExt *sbx, cTValue *o)
150{
151 if (LJ_LIKELY(tvisstr(o))) {
152 const GCstr *str = strV(o);
153 MSize len = str->len;
154 w = serialize_more(w, sbx, 5+len);
155 w = serialize_wu124(w, SER_TAG_STR + len);
156 w = lj_buf_wmem(w, strdata(str), len);
157 } else if (tvisint(o)) {
158 uint32_t x = LJ_BE ? lj_bswap((uint32_t)intV(o)) : (uint32_t)intV(o);
159 w = serialize_more(w, sbx, 1+4);
160 *w++ = SER_TAG_INT; memcpy(w, &x, 4); w += 4;
161 } else if (tvisnum(o)) {
162 uint64_t x = LJ_BE ? lj_bswap64(o->u64) : o->u64;
163 w = serialize_more(w, sbx, 1+sizeof(lua_Number));
164 *w++ = SER_TAG_NUM; memcpy(w, &x, 8); w += 8;
165 } else if (tvispri(o)) {
166 w = serialize_more(w, sbx, 1);
167 *w++ = (char)(SER_TAG_NIL + ~itype(o));
168 } else if (tvistab(o)) {
169 const GCtab *t = tabV(o);
170 uint32_t narray = 0, nhash = 0, one = 2;
171 if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH);
172 sbx->depth--;
173 if (t->asize > 0) { /* Determine max. length of array part. */
174 ptrdiff_t i;
175 TValue *array = tvref(t->array);
176 for (i = (ptrdiff_t)t->asize-1; i >= 0; i--)
177 if (!tvisnil(&array[i]))
178 break;
179 narray = (uint32_t)(i+1);
180 if (narray && tvisnil(&array[0])) one = 4;
181 }
182 if (t->hmask > 0) { /* Count number of used hash slots. */
183 uint32_t i, hmask = t->hmask;
184 Node *node = noderef(t->node);
185 for (i = 0; i <= hmask; i++)
186 nhash += !tvisnil(&node[i].val);
187 }
188 /* Write number of array slots and hash slots. */
189 w = serialize_more(w, sbx, 1+2*5);
190 *w++ = (char)(SER_TAG_TAB + (nhash ? 1 : 0) + (narray ? one : 0));
191 if (narray) w = serialize_wu124(w, narray);
192 if (nhash) w = serialize_wu124(w, nhash);
193 if (narray) { /* Write array entries. */
194 cTValue *oa = tvref(t->array) + (one >> 2);
195 cTValue *oe = tvref(t->array) + narray;
196 while (oa < oe) w = serialize_put(w, sbx, oa++);
197 }
198 if (nhash) { /* Write hash entries. */
199 const Node *node = noderef(t->node) + t->hmask;
200 GCtab *dict = tabref(sbx->dict);
201 if (LJ_UNLIKELY(dict)) {
202 for (;; node--)
203 if (!tvisnil(&node->val)) {
204 if (LJ_LIKELY(tvisstr(&node->key))) {
205 /* Inlined lj_tab_getstr is 30% faster. */
206 const GCstr *str = strV(&node->key);
207 Node *n = hashstr(dict, str);
208 do {
209 if (tvisstr(&n->key) && strV(&n->key) == str) {
210 uint32_t idx = n->val.u32.lo;
211 w = serialize_more(w, sbx, 1+5);
212 *w++ = SER_TAG_DICT;
213 w = serialize_wu124(w, idx);
214 break;
215 }
216 n = nextnode(n);
217 if (!n) {
218 MSize len = str->len;
219 w = serialize_more(w, sbx, 5+len);
220 w = serialize_wu124(w, SER_TAG_STR + len);
221 w = lj_buf_wmem(w, strdata(str), len);
222 break;
223 }
224 } while (1);
225 } else {
226 w = serialize_put(w, sbx, &node->key);
227 }
228 w = serialize_put(w, sbx, &node->val);
229 if (--nhash == 0) break;
230 }
231 } else {
232 for (;; node--)
233 if (!tvisnil(&node->val)) {
234 w = serialize_put(w, sbx, &node->key);
235 w = serialize_put(w, sbx, &node->val);
236 if (--nhash == 0) break;
237 }
238 }
239 }
240 sbx->depth++;
241#if LJ_HASFFI
242 } else if (tviscdata(o)) {
243 CTState *cts = ctype_cts(sbufL(sbx));
244 CType *s = ctype_raw(cts, cdataV(o)->ctypeid);
245 uint8_t *sp = cdataptr(cdataV(o));
246 if (ctype_isinteger(s->info) && s->size == 8) {
247 w = serialize_more(w, sbx, 1+8);
248 *w++ = (s->info & CTF_UNSIGNED) ? SER_TAG_UINT64 : SER_TAG_INT64;
249#if LJ_BE
250 { uint64_t u = lj_bswap64(*(uint64_t *)sp); memcpy(w, &u, 8); }
251#else
252 memcpy(w, sp, 8);
253#endif
254 w += 8;
255 } else if (ctype_iscomplex(s->info) && s->size == 16) {
256 w = serialize_more(w, sbx, 1+16);
257 *w++ = SER_TAG_COMPLEX;
258#if LJ_BE
259 { /* Only swap the doubles. The re/im order stays the same. */
260 uint64_t u = lj_bswap64(((uint64_t *)sp)[0]); memcpy(w, &u, 8);
261 u = lj_bswap64(((uint64_t *)sp)[1]); memcpy(w+8, &u, 8);
262 }
263#else
264 memcpy(w, sp, 16);
265#endif
266 w += 16;
267 } else {
268 goto badenc; /* NYI other cdata */
269 }
270#endif
271 } else if (tvislightud(o)) {
272 uintptr_t ud = (uintptr_t)lightudV(G(sbufL(sbx)), o);
273 w = serialize_more(w, sbx, 1+sizeof(ud));
274 if (ud == 0) {
275 *w++ = SER_TAG_NULL;
276 } else if (LJ_32 || checku32(ud)) {
277#if LJ_BE && LJ_64
278 ud = lj_bswap64(ud);
279#elif LJ_BE
280 ud = lj_bswap(ud);
281#endif
282 *w++ = SER_TAG_LIGHTUD32; memcpy(w, &ud, 4); w += 4;
283#if LJ_64
284 } else {
285#if LJ_BE
286 ud = lj_bswap64(ud);
287#endif
288 *w++ = SER_TAG_LIGHTUD64; memcpy(w, &ud, 8); w += 8;
289#endif
290 }
291 } else {
292 /* NYI userdata */
293#if LJ_HASFFI
294 badenc:
295#endif
296 lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADENC, lj_typename(o));
297 }
298 return w;
299}
300
301/* Get serialized object from buffer. */
302static char *serialize_get(char *r, SBufExt *sbx, TValue *o)
303{
304 char *w = sbx->w;
305 uint32_t tp;
306 r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob;
307 if (LJ_LIKELY(tp >= SER_TAG_STR)) {
308 uint32_t len = tp - SER_TAG_STR;
309 if (LJ_UNLIKELY(len > (uint32_t)(w - r))) goto eob;
310 setstrV(sbufL(sbx), o, lj_str_new(sbufL(sbx), r, len));
311 r += len;
312 } else if (tp == SER_TAG_INT) {
313 if (LJ_UNLIKELY(r + 4 > w)) goto eob;
314 setintV(o, (int32_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r)));
315 r += 4;
316 } else if (tp == SER_TAG_NUM) {
317 if (LJ_UNLIKELY(r + 8 > w)) goto eob;
318 memcpy(o, r, 8); r += 8;
319#if LJ_BE
320 o->u64 = lj_bswap64(o->u64);
321#endif
322 if (!tvisnum(o)) setnanV(o);
323 } else if (tp <= SER_TAG_TRUE) {
324 setpriV(o, ~tp);
325 } else if (tp == SER_TAG_DICT) {
326 GCtab *dict;
327 uint32_t idx;
328 r = serialize_ru124(r, w, &idx);
329 idx++;
330 dict = tabref(sbx->dict);
331 if (dict && idx < dict->asize && tvisstr(arrayslot(dict, idx)))
332 copyTV(sbufL(sbx), o, arrayslot(dict, idx));
333 else
334 lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx);
335 } else if (tp >= SER_TAG_TAB && tp < SER_TAG_TAB+6) {
336 uint32_t narray = 0, nhash = 0;
337 GCtab *t;
338 if (tp >= SER_TAG_TAB+2) {
339 r = serialize_ru124(r, w, &narray); if (LJ_UNLIKELY(!r)) goto eob;
340 }
341 if ((tp & 1)) {
342 r = serialize_ru124(r, w, &nhash); if (LJ_UNLIKELY(!r)) goto eob;
343 }
344 t = lj_tab_new(sbufL(sbx), narray, hsize2hbits(nhash));
345 settabV(sbufL(sbx), o, t);
346 if (narray) {
347 TValue *oa = tvref(t->array) + (tp >= SER_TAG_TAB+4);
348 TValue *oe = tvref(t->array) + narray;
349 while (oa < oe) r = serialize_get(r, sbx, oa++);
350 }
351 if (nhash) {
352 do {
353 TValue k, *v;
354 r = serialize_get(r, sbx, &k);
355 v = lj_tab_set(sbufL(sbx), t, &k);
356 if (LJ_UNLIKELY(!tvisnil(v)))
357 lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DUPKEY);
358 r = serialize_get(r, sbx, v);
359 } while (--nhash);
360 }
361#if LJ_HASFFI
362 } else if (tp >= SER_TAG_INT64 && tp <= SER_TAG_COMPLEX) {
363 uint32_t sz = tp == SER_TAG_COMPLEX ? 16 : 8;
364 GCcdata *cd;
365 if (LJ_UNLIKELY(r + sz > w)) goto eob;
366 cd = lj_cdata_new_(sbufL(sbx),
367 tp == SER_TAG_INT64 ? CTID_INT64 :
368 tp == SER_TAG_UINT64 ? CTID_UINT64 : CTID_COMPLEX_DOUBLE,
369 sz);
370 memcpy(cdataptr(cd), r, sz); r += sz;
371#if LJ_BE
372 *(uint64_t *)cdataptr(cd) = lj_bswap64(*(uint64_t *)cdataptr(cd));
373 if (sz == 16)
374 ((uint64_t *)cdataptr(cd))[1] = lj_bswap64(((uint64_t *)cdataptr(cd))[1]);
375#endif
376 setcdataV(sbufL(sbx), o, cd);
377#endif
378 } else if (tp <= (LJ_64 ? SER_TAG_LIGHTUD64 : SER_TAG_LIGHTUD32)) {
379 uintptr_t ud = 0;
380 if (tp == SER_TAG_LIGHTUD32) {
381 if (LJ_UNLIKELY(r + 4 > w)) goto eob;
382 ud = (uintptr_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r));
383 r += 4;
384 }
385#if LJ_64
386 else if (tp == SER_TAG_LIGHTUD64) {
387 if (LJ_UNLIKELY(r + 8 > w)) goto eob;
388 memcpy(&ud, r, 8); r += 8;
389#if LJ_BE
390 ud = lj_bswap64(ud);
391#endif
392 }
393 setrawlightudV(o, lj_lightud_intern(sbufL(sbx), (void *)ud));
394#else
395 setrawlightudV(o, (void *)ud);
396#endif
397 } else {
398 lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDEC, tp);
399 }
400 return r;
401eob:
402 lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_EOB);
403 return NULL;
404}
405
406/* -- External serialization API ------------------------------------------ */
407
408/* Encode to buffer. */
409SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o)
410{
411 sbx->depth = LJ_SERIALIZE_DEPTH;
412 sbx->w = serialize_put(sbx->w, sbx, o);
413 return sbx;
414}
415
416/* Decode from buffer. */
417char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o)
418{
419 return serialize_get(sbx->r, sbx, o);
420}
421
422/* Stand-alone encoding, borrowing from global temporary buffer. */
423GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o)
424{
425 SBufExt sbx;
426 char *w;
427 memset(&sbx, 0, sizeof(SBufExt));
428 lj_bufx_set_borrow(L, &sbx, &G(L)->tmpbuf);
429 sbx.depth = LJ_SERIALIZE_DEPTH;
430 w = serialize_put(sbx.w, &sbx, o);
431 return lj_str_new(L, sbx.b, (size_t)(w - sbx.b));
432}
433
434/* Stand-alone decoding, copy-on-write from string. */
435void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str)
436{
437 SBufExt sbx;
438 char *r;
439 memset(&sbx, 0, sizeof(SBufExt));
440 lj_bufx_set_cow(L, &sbx, strdata(str), str->len);
441 /* No need to set sbx.cowref here. */
442 r = lj_serialize_get(&sbx, o);
443 if (r != sbx.w) lj_err_caller(L, LJ_ERR_BUFFER_LEFTOV);
444}
445
446#if LJ_HASJIT
447/* Peek into buffer to find the result IRType for specialization purposes. */
448LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx)
449{
450 uint32_t tp;
451 if (serialize_ru124(sbx->r, sbx->w, &tp)) {
452 /* This must match the handling of all tags in the decoder above. */
453 switch (tp) {
454 case SER_TAG_NIL: return IRT_NIL;
455 case SER_TAG_FALSE: return IRT_FALSE;
456 case SER_TAG_TRUE: return IRT_TRUE;
457 case SER_TAG_NULL: case SER_TAG_LIGHTUD32: case SER_TAG_LIGHTUD64:
458 return IRT_LIGHTUD;
459 case SER_TAG_INT: return LJ_DUALNUM ? IRT_INT : IRT_NUM;
460 case SER_TAG_NUM: return IRT_NUM;
461 case SER_TAG_TAB: case SER_TAG_TAB+1: case SER_TAG_TAB+2:
462 case SER_TAG_TAB+3: case SER_TAG_TAB+4: case SER_TAG_TAB+5:
463 return IRT_TAB;
464 case SER_TAG_INT64: case SER_TAG_UINT64: case SER_TAG_COMPLEX:
465 return IRT_CDATA;
466 case SER_TAG_DICT:
467 default:
468 return IRT_STR;
469 }
470 }
471 return IRT_NIL; /* Will fail on actual decode. */
472}
473#endif
474
475#endif
diff --git a/src/lj_serialize.h b/src/lj_serialize.h
new file mode 100644
index 00000000..9bd780ca
--- /dev/null
+++ b/src/lj_serialize.h
@@ -0,0 +1,27 @@
1/*
2** Object de/serialization.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_SERIALIZE_H
7#define _LJ_SERIALIZE_H
8
9#include "lj_obj.h"
10#include "lj_buf.h"
11
12#if LJ_HASBUFFER
13
14#define LJ_SERIALIZE_DEPTH 100 /* Default depth. */
15
16LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep(lua_State *L, GCtab *dict);
17LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o);
18LJ_FUNC char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o);
19LJ_FUNC GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o);
20LJ_FUNC void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str);
21#if LJ_HASJIT
22LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx);
23#endif
24
25#endif
26
27#endif
diff --git a/src/lj_snap.c b/src/lj_snap.c
index 91880fcf..b2b8450c 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -68,20 +68,37 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
68 for (s = 0; s < nslots; s++) { 68 for (s = 0; s < nslots; s++) {
69 TRef tr = J->slot[s]; 69 TRef tr = J->slot[s];
70 IRRef ref = tref_ref(tr); 70 IRRef ref = tref_ref(tr);
71#if LJ_FR2
72 if (s == 1) { /* Ignore slot 1 in LJ_FR2 mode, except if tailcalled. */
73 if ((tr & TREF_FRAME))
74 map[n++] = SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL);
75 continue;
76 }
77 if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) {
78 cTValue *base = J->L->base - J->baseslot;
79 tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64);
80 ref = tref_ref(tr);
81 }
82#endif
71 if (ref) { 83 if (ref) {
72 SnapEntry sn = SNAP_TR(s, tr); 84 SnapEntry sn = SNAP_TR(s, tr);
73 IRIns *ir = &J->cur.ir[ref]; 85 IRIns *ir = &J->cur.ir[ref];
74 if (!(sn & (SNAP_CONT|SNAP_FRAME)) && 86 if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) &&
75 ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { 87 ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
76 /* No need to snapshot unmodified non-inherited slots. */ 88 /*
77 if (!(ir->op2 & IRSLOAD_INHERIT)) 89 ** No need to snapshot unmodified non-inherited slots.
90 ** But always snapshot the function below a frame in LJ_FR2 mode.
91 */
92 if (!(ir->op2 & IRSLOAD_INHERIT) &&
93 (!LJ_FR2 || s == 0 || s+1 == nslots ||
94 !(J->slot[s+1] & (TREF_CONT|TREF_FRAME))))
78 continue; 95 continue;
79 /* No need to restore readonly slots and unmodified non-parent slots. */ 96 /* No need to restore readonly slots and unmodified non-parent slots. */
80 if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) && 97 if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) &&
81 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) 98 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
82 sn |= SNAP_NORESTORE; 99 sn |= SNAP_NORESTORE;
83 } 100 }
84 if (LJ_SOFTFP && irt_isnum(ir->t)) 101 if (LJ_SOFTFP32 && irt_isnum(ir->t))
85 sn |= SNAP_SOFTFPNUM; 102 sn |= SNAP_SOFTFPNUM;
86 map[n++] = sn; 103 map[n++] = sn;
87 } 104 }
@@ -90,35 +107,54 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
90} 107}
91 108
92/* Add frame links at the end of the snapshot. */ 109/* Add frame links at the end of the snapshot. */
93static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map) 110static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot)
94{ 111{
95 cTValue *frame = J->L->base - 1; 112 cTValue *frame = J->L->base - 1;
96 cTValue *lim = J->L->base - J->baseslot; 113 cTValue *lim = J->L->base - J->baseslot + LJ_FR2;
97 cTValue *ftop = frame + funcproto(frame_func(frame))->framesize; 114 GCfunc *fn = frame_func(frame);
115 cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
116#if LJ_FR2
117 uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2);
118 lj_assertJ(2 <= J->baseslot && J->baseslot <= 257, "bad baseslot");
119 memcpy(map, &pcbase, sizeof(uint64_t));
120#else
98 MSize f = 0; 121 MSize f = 0;
99 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ 122 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
100 lua_assert(!J->pt || 123#endif
124 lj_assertJ(!J->pt ||
101 (J->pc >= proto_bc(J->pt) && 125 (J->pc >= proto_bc(J->pt) &&
102 J->pc < proto_bc(J->pt) + J->pt->sizebc)); 126 J->pc < proto_bc(J->pt) + J->pt->sizebc), "bad snapshot PC");
103 while (frame > lim) { /* Backwards traversal of all frames above base. */ 127 while (frame > lim) { /* Backwards traversal of all frames above base. */
104 if (frame_islua(frame)) { 128 if (frame_islua(frame)) {
129#if !LJ_FR2
105 map[f++] = SNAP_MKPC(frame_pc(frame)); 130 map[f++] = SNAP_MKPC(frame_pc(frame));
131#endif
106 frame = frame_prevl(frame); 132 frame = frame_prevl(frame);
107 } else if (frame_iscont(frame)) { 133 } else if (frame_iscont(frame)) {
134#if !LJ_FR2
108 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); 135 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
109 map[f++] = SNAP_MKPC(frame_contpc(frame)); 136 map[f++] = SNAP_MKPC(frame_contpc(frame));
137#endif
110 frame = frame_prevd(frame); 138 frame = frame_prevd(frame);
111 } else { 139 } else {
112 lua_assert(!frame_isc(frame)); 140 lj_assertJ(!frame_isc(frame), "broken frame chain");
141#if !LJ_FR2
113 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); 142 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
143#endif
114 frame = frame_prevd(frame); 144 frame = frame_prevd(frame);
115 continue; 145 continue;
116 } 146 }
117 if (frame + funcproto(frame_func(frame))->framesize > ftop) 147 if (frame + funcproto(frame_func(frame))->framesize > ftop)
118 ftop = frame + funcproto(frame_func(frame))->framesize; 148 ftop = frame + funcproto(frame_func(frame))->framesize;
119 } 149 }
120 lua_assert(f == (MSize)(1 + J->framedepth)); 150 *topslot = (uint8_t)(ftop - lim);
121 return (BCReg)(ftop - lim); 151#if LJ_FR2
152 lj_assertJ(sizeof(SnapEntry) * 2 == sizeof(uint64_t), "bad SnapEntry def");
153 return 2;
154#else
155 lj_assertJ(f == (MSize)(1 + J->framedepth), "miscalculated snapshot size");
156 return f;
157#endif
122} 158}
123 159
124/* Take a snapshot of the current stack. */ 160/* Take a snapshot of the current stack. */
@@ -128,16 +164,17 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
128 MSize nent; 164 MSize nent;
129 SnapEntry *p; 165 SnapEntry *p;
130 /* Conservative estimate. */ 166 /* Conservative estimate. */
131 lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1); 167 lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1));
132 p = &J->cur.snapmap[nsnapmap]; 168 p = &J->cur.snapmap[nsnapmap];
133 nent = snapshot_slots(J, p, nslots); 169 nent = snapshot_slots(J, p, nslots);
134 snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent); 170 snap->nent = (uint8_t)nent;
171 nent += snapshot_framelinks(J, p + nent, &snap->topslot);
135 snap->mapofs = (uint32_t)nsnapmap; 172 snap->mapofs = (uint32_t)nsnapmap;
136 snap->ref = (IRRef1)J->cur.nins; 173 snap->ref = (IRRef1)J->cur.nins;
137 snap->nent = (uint8_t)nent; 174 snap->mcofs = 0;
138 snap->nslots = (uint8_t)nslots; 175 snap->nslots = (uint8_t)nslots;
139 snap->count = 0; 176 snap->count = 0;
140 J->cur.nsnapmap = (uint32_t)(nsnapmap + nent + 1 + J->framedepth); 177 J->cur.nsnapmap = (uint32_t)(nsnapmap + nent);
141} 178}
142 179
143/* Add or merge a snapshot. */ 180/* Add or merge a snapshot. */
@@ -146,8 +183,8 @@ void lj_snap_add(jit_State *J)
146 MSize nsnap = J->cur.nsnap; 183 MSize nsnap = J->cur.nsnap;
147 MSize nsnapmap = J->cur.nsnapmap; 184 MSize nsnapmap = J->cur.nsnapmap;
148 /* Merge if no ins. inbetween or if requested and no guard inbetween. */ 185 /* Merge if no ins. inbetween or if requested and no guard inbetween. */
149 if (J->mergesnap ? !irt_isguard(J->guardemit) : 186 if ((nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins) ||
150 (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { 187 (J->mergesnap && !irt_isguard(J->guardemit))) {
151 if (nsnap == 1) { /* But preserve snap #0 PC. */ 188 if (nsnap == 1) { /* But preserve snap #0 PC. */
152 emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0); 189 emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0);
153 goto nomerge; 190 goto nomerge;
@@ -194,7 +231,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
194#define DEF_SLOT(s) udf[(s)] *= 3 231#define DEF_SLOT(s) udf[(s)] *= 3
195 232
196 /* Scan through following bytecode and check for uses/defs. */ 233 /* Scan through following bytecode and check for uses/defs. */
197 lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); 234 lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc,
235 "snapshot PC out of range");
198 for (;;) { 236 for (;;) {
199 BCIns ins = *pc++; 237 BCIns ins = *pc++;
200 BCOp op = bc_op(ins); 238 BCOp op = bc_op(ins);
@@ -205,7 +243,7 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
205 switch (bcmode_c(op)) { 243 switch (bcmode_c(op)) {
206 case BCMvar: USE_SLOT(bc_c(ins)); break; 244 case BCMvar: USE_SLOT(bc_c(ins)); break;
207 case BCMrbase: 245 case BCMrbase:
208 lua_assert(op == BC_CAT); 246 lj_assertJ(op == BC_CAT, "unhandled op %d with RC rbase", op);
209 for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s); 247 for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s);
210 for (; s < maxslot; s++) DEF_SLOT(s); 248 for (; s < maxslot; s++) DEF_SLOT(s);
211 break; 249 break;
@@ -245,7 +283,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
245 case BCMbase: 283 case BCMbase:
246 if (op >= BC_CALLM && op <= BC_ITERN) { 284 if (op >= BC_CALLM && op <= BC_ITERN) {
247 BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ? 285 BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
248 maxslot : (bc_a(ins) + bc_c(ins)); 286 maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2);
287 if (LJ_FR2) DEF_SLOT(bc_a(ins)+1);
249 s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0); 288 s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0);
250 for (; s < top; s++) USE_SLOT(s); 289 for (; s < top; s++) USE_SLOT(s);
251 for (; s < maxslot; s++) DEF_SLOT(s); 290 for (; s < maxslot; s++) DEF_SLOT(s);
@@ -263,7 +302,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
263 break; 302 break;
264 default: break; 303 default: break;
265 } 304 }
266 lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); 305 lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc,
306 "use/def analysis PC out of range");
267 } 307 }
268 308
269#undef USE_SLOT 309#undef USE_SLOT
@@ -293,8 +333,8 @@ void lj_snap_shrink(jit_State *J)
293 MSize n, m, nlim, nent = snap->nent; 333 MSize n, m, nlim, nent = snap->nent;
294 uint8_t udf[SNAP_USEDEF_SLOTS]; 334 uint8_t udf[SNAP_USEDEF_SLOTS];
295 BCReg maxslot = J->maxslot; 335 BCReg maxslot = J->maxslot;
296 BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot);
297 BCReg baseslot = J->baseslot; 336 BCReg baseslot = J->baseslot;
337 BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot);
298 maxslot += baseslot; 338 maxslot += baseslot;
299 minslot += baseslot; 339 minslot += baseslot;
300 snap->nslots = (uint8_t)maxslot; 340 snap->nslots = (uint8_t)maxslot;
@@ -336,25 +376,26 @@ static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
336} 376}
337 377
338/* Copy RegSP from parent snapshot to the parent links of the IR. */ 378/* Copy RegSP from parent snapshot to the parent links of the IR. */
339IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir) 379IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno, IRIns *ir)
340{ 380{
341 SnapShot *snap = &T->snap[snapno]; 381 SnapShot *snap = &T->snap[snapno];
342 SnapEntry *map = &T->snapmap[snap->mapofs]; 382 SnapEntry *map = &T->snapmap[snap->mapofs];
343 BloomFilter rfilt = snap_renamefilter(T, snapno); 383 BloomFilter rfilt = snap_renamefilter(T, snapno);
344 MSize n = 0; 384 MSize n = 0;
345 IRRef ref = 0; 385 IRRef ref = 0;
386 UNUSED(J);
346 for ( ; ; ir++) { 387 for ( ; ; ir++) {
347 uint32_t rs; 388 uint32_t rs;
348 if (ir->o == IR_SLOAD) { 389 if (ir->o == IR_SLOAD) {
349 if (!(ir->op2 & IRSLOAD_PARENT)) break; 390 if (!(ir->op2 & IRSLOAD_PARENT)) break;
350 for ( ; ; n++) { 391 for ( ; ; n++) {
351 lua_assert(n < snap->nent); 392 lj_assertJ(n < snap->nent, "slot %d not found in snapshot", ir->op1);
352 if (snap_slot(map[n]) == ir->op1) { 393 if (snap_slot(map[n]) == ir->op1) {
353 ref = snap_ref(map[n++]); 394 ref = snap_ref(map[n++]);
354 break; 395 break;
355 } 396 }
356 } 397 }
357 } else if (LJ_SOFTFP && ir->o == IR_HIOP) { 398 } else if (LJ_SOFTFP32 && ir->o == IR_HIOP) {
358 ref++; 399 ref++;
359 } else if (ir->o == IR_PVAL) { 400 } else if (ir->o == IR_PVAL) {
360 ref = ir->op1 + REF_BIAS; 401 ref = ir->op1 + REF_BIAS;
@@ -365,7 +406,7 @@ IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir)
365 if (bloomtest(rfilt, ref)) 406 if (bloomtest(rfilt, ref))
366 rs = snap_renameref(T, snapno, ref, rs); 407 rs = snap_renameref(T, snapno, ref, rs);
367 ir->prev = (uint16_t)rs; 408 ir->prev = (uint16_t)rs;
368 lua_assert(regsp_used(rs)); 409 lj_assertJ(regsp_used(rs), "unused IR %04d in snapshot", ref - REF_BIAS);
369 } 410 }
370 return ir; 411 return ir;
371} 412}
@@ -380,10 +421,10 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir)
380 case IR_KPRI: return TREF_PRI(irt_type(ir->t)); 421 case IR_KPRI: return TREF_PRI(irt_type(ir->t));
381 case IR_KINT: return lj_ir_kint(J, ir->i); 422 case IR_KINT: return lj_ir_kint(J, ir->i);
382 case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); 423 case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
383 case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir)); 424 case IR_KNUM: case IR_KINT64:
384 case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir)); 425 return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64);
385 case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ 426 case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */
386 default: lua_assert(0); return TREF_NIL; break; 427 default: lj_assertJ(0, "bad IR constant op %d", ir->o); return TREF_NIL;
387 } 428 }
388} 429}
389 430
@@ -454,21 +495,25 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
454 goto setslot; 495 goto setslot;
455 bloomset(seen, ref); 496 bloomset(seen, ref);
456 if (irref_isk(ref)) { 497 if (irref_isk(ref)) {
457 tr = snap_replay_const(J, ir); 498 /* See special treatment of LJ_FR2 slot 1 in snapshot_slots() above. */
499 if (LJ_FR2 && (sn == SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL)))
500 tr = 0;
501 else
502 tr = snap_replay_const(J, ir);
458 } else if (!regsp_used(ir->prev)) { 503 } else if (!regsp_used(ir->prev)) {
459 pass23 = 1; 504 pass23 = 1;
460 lua_assert(s != 0); 505 lj_assertJ(s != 0, "unused slot 0 in snapshot");
461 tr = s; 506 tr = s;
462 } else { 507 } else {
463 IRType t = irt_type(ir->t); 508 IRType t = irt_type(ir->t);
464 uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; 509 uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
465 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; 510 if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
466 if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); 511 if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
467 tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); 512 tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
468 } 513 }
469 setslot: 514 setslot:
470 J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ 515 J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */
471 J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s); 516 J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2));
472 if ((sn & SNAP_FRAME)) 517 if ((sn & SNAP_FRAME))
473 J->baseslot = s+1; 518 J->baseslot = s+1;
474 } 519 }
@@ -483,8 +528,9 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
483 if (regsp_reg(ir->r) == RID_SUNK) { 528 if (regsp_reg(ir->r) == RID_SUNK) {
484 if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue; 529 if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
485 pass23 = 1; 530 pass23 = 1;
486 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || 531 lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP ||
487 ir->o == IR_CNEW || ir->o == IR_CNEWI); 532 ir->o == IR_CNEW || ir->o == IR_CNEWI,
533 "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o);
488 if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1); 534 if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
489 if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2); 535 if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
490 if (LJ_HASFFI && ir->o == IR_CNEWI) { 536 if (LJ_HASFFI && ir->o == IR_CNEWI) {
@@ -496,13 +542,14 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
496 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { 542 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
497 if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) 543 if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
498 snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); 544 snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
499 else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && 545 else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
500 irs+1 < irlast && (irs+1)->o == IR_HIOP) 546 irs+1 < irlast && (irs+1)->o == IR_HIOP)
501 snap_pref(J, T, map, nent, seen, (irs+1)->op2); 547 snap_pref(J, T, map, nent, seen, (irs+1)->op2);
502 } 548 }
503 } 549 }
504 } else if (!irref_isk(refp) && !regsp_used(ir->prev)) { 550 } else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
505 lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); 551 lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
552 "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o);
506 J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1); 553 J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
507 } 554 }
508 } 555 }
@@ -552,20 +599,21 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
552 val = snap_pref(J, T, map, nent, seen, irs->op2); 599 val = snap_pref(J, T, map, nent, seen, irs->op2);
553 if (val == 0) { 600 if (val == 0) {
554 IRIns *irc = &T->ir[irs->op2]; 601 IRIns *irc = &T->ir[irs->op2];
555 lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); 602 lj_assertJ(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT,
603 "sunk store for parent IR %04d with bad op %d",
604 refp - REF_BIAS, irc->o);
556 val = snap_pref(J, T, map, nent, seen, irc->op1); 605 val = snap_pref(J, T, map, nent, seen, irc->op1);
557 val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); 606 val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
558 } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && 607 } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
559 irs+1 < irlast && (irs+1)->o == IR_HIOP) { 608 irs+1 < irlast && (irs+1)->o == IR_HIOP) {
560 IRType t = IRT_I64; 609 IRType t = IRT_I64;
561 if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP) 610 if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP)
562 t = IRT_NUM; 611 t = IRT_NUM;
563 lj_needsplit(J); 612 lj_needsplit(J);
564 if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { 613 if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
565 uint64_t k = (uint32_t)T->ir[irs->op2].i + 614 uint64_t k = (uint32_t)T->ir[irs->op2].i +
566 ((uint64_t)T->ir[(irs+1)->op2].i << 32); 615 ((uint64_t)T->ir[(irs+1)->op2].i << 32);
567 val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, 616 val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k);
568 lj_ir_k64_find(J, k));
569 } else { 617 } else {
570 val = emitir_raw(IRT(IR_HIOP, t), val, 618 val = emitir_raw(IRT(IR_HIOP, t), val,
571 snap_pref(J, T, map, nent, seen, (irs+1)->op2)); 619 snap_pref(J, T, map, nent, seen, (irs+1)->op2));
@@ -603,7 +651,14 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
603 IRType1 t = ir->t; 651 IRType1 t = ir->t;
604 RegSP rs = ir->prev; 652 RegSP rs = ir->prev;
605 if (irref_isk(ref)) { /* Restore constant slot. */ 653 if (irref_isk(ref)) { /* Restore constant slot. */
606 lj_ir_kvalue(J->L, o, ir); 654 if (ir->o == IR_KPTR) {
655 o->u64 = (uint64_t)(uintptr_t)ir_kptr(ir);
656 } else {
657 lj_assertJ(!(ir->o == IR_KKPTR || ir->o == IR_KNULL),
658 "restore of const from IR %04d with bad op %d",
659 ref - REF_BIAS, ir->o);
660 lj_ir_kvalue(J->L, o, ir);
661 }
607 return; 662 return;
608 } 663 }
609 if (LJ_UNLIKELY(bloomtest(rfilt, ref))) 664 if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
@@ -612,22 +667,24 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
612 int32_t *sps = &ex->spill[regsp_spill(rs)]; 667 int32_t *sps = &ex->spill[regsp_spill(rs)];
613 if (irt_isinteger(t)) { 668 if (irt_isinteger(t)) {
614 setintV(o, *sps); 669 setintV(o, *sps);
615#if !LJ_SOFTFP 670#if !LJ_SOFTFP32
616 } else if (irt_isnum(t)) { 671 } else if (irt_isnum(t)) {
617 o->u64 = *(uint64_t *)sps; 672 o->u64 = *(uint64_t *)sps;
618#endif 673#endif
619 } else if (LJ_64 && irt_islightud(t)) { 674#if LJ_64 && !LJ_GC64
675 } else if (irt_islightud(t)) {
620 /* 64 bit lightuserdata which may escape already has the tag bits. */ 676 /* 64 bit lightuserdata which may escape already has the tag bits. */
621 o->u64 = *(uint64_t *)sps; 677 o->u64 = *(uint64_t *)sps;
678#endif
622 } else { 679 } else {
623 lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ 680 lj_assertJ(!irt_ispri(t), "PRI ref with spill slot");
624 setgcrefi(o->gcr, *sps); 681 setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
625 setitype(o, irt_toitype(t));
626 } 682 }
627 } else { /* Restore from register. */ 683 } else { /* Restore from register. */
628 Reg r = regsp_reg(rs); 684 Reg r = regsp_reg(rs);
629 if (ra_noreg(r)) { 685 if (ra_noreg(r)) {
630 lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); 686 lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
687 "restore from IR %04d has no reg", ref - REF_BIAS);
631 snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o); 688 snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
632 if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o)); 689 if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
633 return; 690 return;
@@ -636,21 +693,26 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
636#if !LJ_SOFTFP 693#if !LJ_SOFTFP
637 } else if (irt_isnum(t)) { 694 } else if (irt_isnum(t)) {
638 setnumV(o, ex->fpr[r-RID_MIN_FPR]); 695 setnumV(o, ex->fpr[r-RID_MIN_FPR]);
696#elif LJ_64 /* && LJ_SOFTFP */
697 } else if (irt_isnum(t)) {
698 o->u64 = ex->gpr[r-RID_MIN_GPR];
639#endif 699#endif
640 } else if (LJ_64 && irt_islightud(t)) { 700#if LJ_64 && !LJ_GC64
641 /* 64 bit lightuserdata which may escape already has the tag bits. */ 701 } else if (irt_is64(t)) {
702 /* 64 bit values that already have the tag bits. */
642 o->u64 = ex->gpr[r-RID_MIN_GPR]; 703 o->u64 = ex->gpr[r-RID_MIN_GPR];
704#endif
705 } else if (irt_ispri(t)) {
706 setpriV(o, irt_toitype(t));
643 } else { 707 } else {
644 if (!irt_ispri(t)) 708 setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t));
645 setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
646 setitype(o, irt_toitype(t));
647 } 709 }
648 } 710 }
649} 711}
650 712
651#if LJ_HASFFI 713#if LJ_HASFFI
652/* Restore raw data from the trace exit state. */ 714/* Restore raw data from the trace exit state. */
653static void snap_restoredata(GCtrace *T, ExitState *ex, 715static void snap_restoredata(jit_State *J, GCtrace *T, ExitState *ex,
654 SnapNo snapno, BloomFilter rfilt, 716 SnapNo snapno, BloomFilter rfilt,
655 IRRef ref, void *dst, CTSize sz) 717 IRRef ref, void *dst, CTSize sz)
656{ 718{
@@ -658,9 +720,10 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
658 RegSP rs = ir->prev; 720 RegSP rs = ir->prev;
659 int32_t *src; 721 int32_t *src;
660 uint64_t tmp; 722 uint64_t tmp;
723 UNUSED(J);
661 if (irref_isk(ref)) { 724 if (irref_isk(ref)) {
662 if (ir->o == IR_KNUM || ir->o == IR_KINT64) { 725 if (ir_isk64(ir)) {
663 src = mref(ir->ptr, int32_t); 726 src = (int32_t *)&ir[1];
664 } else if (sz == 8) { 727 } else if (sz == 8) {
665 tmp = (uint64_t)(uint32_t)ir->i; 728 tmp = (uint64_t)(uint32_t)ir->i;
666 src = (int32_t *)&tmp; 729 src = (int32_t *)&tmp;
@@ -680,8 +743,9 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
680 Reg r = regsp_reg(rs); 743 Reg r = regsp_reg(rs);
681 if (ra_noreg(r)) { 744 if (ra_noreg(r)) {
682 /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */ 745 /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
683 lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); 746 lj_assertJ(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
684 snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4); 747 "restore from IR %04d has no reg", ref - REF_BIAS);
748 snap_restoredata(J, T, ex, snapno, rfilt, ir->op1, dst, 4);
685 *(lua_Number *)dst = (lua_Number)*(int32_t *)dst; 749 *(lua_Number *)dst = (lua_Number)*(int32_t *)dst;
686 return; 750 return;
687 } 751 }
@@ -697,11 +761,13 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
697#else 761#else
698 if (LJ_BE && sz == 4) src++; 762 if (LJ_BE && sz == 4) src++;
699#endif 763#endif
700 } 764 } else
701#endif 765#endif
766 if (LJ_64 && LJ_BE && sz == 4) src++;
702 } 767 }
703 } 768 }
704 lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8); 769 lj_assertJ(sz == 1 || sz == 2 || sz == 4 || sz == 8,
770 "restore from IR %04d with bad size %d", ref - REF_BIAS, sz);
705 if (sz == 4) *(int32_t *)dst = *src; 771 if (sz == 4) *(int32_t *)dst = *src;
706 else if (sz == 8) *(int64_t *)dst = *(int64_t *)src; 772 else if (sz == 8) *(int64_t *)dst = *(int64_t *)src;
707 else if (sz == 1) *(int8_t *)dst = (int8_t)*src; 773 else if (sz == 1) *(int8_t *)dst = (int8_t)*src;
@@ -714,24 +780,27 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
714 SnapNo snapno, BloomFilter rfilt, 780 SnapNo snapno, BloomFilter rfilt,
715 IRIns *ir, TValue *o) 781 IRIns *ir, TValue *o)
716{ 782{
717 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || 783 lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP ||
718 ir->o == IR_CNEW || ir->o == IR_CNEWI); 784 ir->o == IR_CNEW || ir->o == IR_CNEWI,
785 "sunk allocation with bad op %d", ir->o);
719#if LJ_HASFFI 786#if LJ_HASFFI
720 if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { 787 if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
721 CTState *cts = ctype_cts(J->L); 788 CTState *cts = ctype_cts(J->L);
722 CTypeID id = (CTypeID)T->ir[ir->op1].i; 789 CTypeID id = (CTypeID)T->ir[ir->op1].i;
723 CTSize sz = lj_ctype_size(cts, id); 790 CTSize sz;
724 GCcdata *cd = lj_cdata_new(cts, id, sz); 791 CTInfo info = lj_ctype_info(cts, id, &sz);
792 GCcdata *cd = lj_cdata_newx(cts, id, sz, info);
725 setcdataV(J->L, o, cd); 793 setcdataV(J->L, o, cd);
726 if (ir->o == IR_CNEWI) { 794 if (ir->o == IR_CNEWI) {
727 uint8_t *p = (uint8_t *)cdataptr(cd); 795 uint8_t *p = (uint8_t *)cdataptr(cd);
728 lua_assert(sz == 4 || sz == 8); 796 lj_assertJ(sz == 4 || sz == 8, "sunk cdata with bad size %d", sz);
729 if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) { 797 if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) {
730 snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4); 798 snap_restoredata(J, T, ex, snapno, rfilt, (ir+1)->op2,
799 LJ_LE ? p+4 : p, 4);
731 if (LJ_BE) p += 4; 800 if (LJ_BE) p += 4;
732 sz = 4; 801 sz = 4;
733 } 802 }
734 snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz); 803 snap_restoredata(J, T, ex, snapno, rfilt, ir->op2, p, sz);
735 } else { 804 } else {
736 IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref]; 805 IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref];
737 for (irs = ir+1; irs < irlast; irs++) 806 for (irs = ir+1; irs < irlast; irs++)
@@ -739,8 +808,11 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
739 IRIns *iro = &T->ir[T->ir[irs->op1].op2]; 808 IRIns *iro = &T->ir[T->ir[irs->op1].op2];
740 uint8_t *p = (uint8_t *)cd; 809 uint8_t *p = (uint8_t *)cd;
741 CTSize szs; 810 CTSize szs;
742 lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD); 811 lj_assertJ(irs->o == IR_XSTORE, "sunk store with bad op %d", irs->o);
743 lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64); 812 lj_assertJ(T->ir[irs->op1].o == IR_ADD,
813 "sunk store with bad add op %d", T->ir[irs->op1].o);
814 lj_assertJ(iro->o == IR_KINT || iro->o == IR_KINT64,
815 "sunk store with bad const offset op %d", iro->o);
744 if (irt_is64(irs->t)) szs = 8; 816 if (irt_is64(irs->t)) szs = 8;
745 else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1; 817 else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1;
746 else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2; 818 else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2;
@@ -749,14 +821,16 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
749 p += (int64_t)ir_k64(iro)->u64; 821 p += (int64_t)ir_k64(iro)->u64;
750 else 822 else
751 p += iro->i; 823 p += iro->i;
752 lua_assert(p >= (uint8_t *)cdataptr(cd) && 824 lj_assertJ(p >= (uint8_t *)cdataptr(cd) &&
753 p + szs <= (uint8_t *)cdataptr(cd) + sz); 825 p + szs <= (uint8_t *)cdataptr(cd) + sz,
826 "sunk store with offset out of range");
754 if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { 827 if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
755 lua_assert(szs == 4); 828 lj_assertJ(szs == 4, "sunk store with bad size %d", szs);
756 snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4); 829 snap_restoredata(J, T, ex, snapno, rfilt, (irs+1)->op2,
830 LJ_LE ? p+4 : p, 4);
757 if (LJ_BE) p += 4; 831 if (LJ_BE) p += 4;
758 } 832 }
759 snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs); 833 snap_restoredata(J, T, ex, snapno, rfilt, irs->op2, p, szs);
760 } 834 }
761 } 835 }
762 } else 836 } else
@@ -771,10 +845,12 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
771 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { 845 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
772 IRIns *irk = &T->ir[irs->op1]; 846 IRIns *irk = &T->ir[irs->op1];
773 TValue tmp, *val; 847 TValue tmp, *val;
774 lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || 848 lj_assertJ(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
775 irs->o == IR_FSTORE); 849 irs->o == IR_FSTORE,
850 "sunk store with bad op %d", irs->o);
776 if (irk->o == IR_FREF) { 851 if (irk->o == IR_FREF) {
777 lua_assert(irk->op2 == IRFL_TAB_META); 852 lj_assertJ(irk->op2 == IRFL_TAB_META,
853 "sunk store with bad field %d", irk->op2);
778 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp); 854 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
779 /* NOBARRIER: The table is new (marked white). */ 855 /* NOBARRIER: The table is new (marked white). */
780 setgcref(t->metatable, obj2gco(tabV(&tmp))); 856 setgcref(t->metatable, obj2gco(tabV(&tmp)));
@@ -785,7 +861,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
785 val = lj_tab_set(J->L, t, &tmp); 861 val = lj_tab_set(J->L, t, &tmp);
786 /* NOBARRIER: The table is new (marked white). */ 862 /* NOBARRIER: The table is new (marked white). */
787 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); 863 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
788 if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { 864 if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
789 snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); 865 snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
790 val->u32.hi = tmp.u32.lo; 866 val->u32.hi = tmp.u32.lo;
791 } 867 }
@@ -803,11 +879,15 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
803 SnapShot *snap = &T->snap[snapno]; 879 SnapShot *snap = &T->snap[snapno];
804 MSize n, nent = snap->nent; 880 MSize n, nent = snap->nent;
805 SnapEntry *map = &T->snapmap[snap->mapofs]; 881 SnapEntry *map = &T->snapmap[snap->mapofs];
806 SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1]; 882#if !LJ_FR2 || defined(LUA_USE_ASSERT)
807 int32_t ftsz0; 883 SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2];
884#endif
885#if !LJ_FR2
886 ptrdiff_t ftsz0;
887#endif
808 TValue *frame; 888 TValue *frame;
809 BloomFilter rfilt = snap_renamefilter(T, snapno); 889 BloomFilter rfilt = snap_renamefilter(T, snapno);
810 const BCIns *pc = snap_pc(map[nent]); 890 const BCIns *pc = snap_pc(&map[nent]);
811 lua_State *L = J->L; 891 lua_State *L = J->L;
812 892
813 /* Set interpreter PC to the next PC to get correct error messages. */ 893 /* Set interpreter PC to the next PC to get correct error messages. */
@@ -820,8 +900,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
820 } 900 }
821 901
822 /* Fill stack slots with data from the registers and spill slots. */ 902 /* Fill stack slots with data from the registers and spill slots. */
823 frame = L->base-1; 903 frame = L->base-1-LJ_FR2;
904#if !LJ_FR2
824 ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ 905 ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */
906#endif
825 for (n = 0; n < nent; n++) { 907 for (n = 0; n < nent; n++) {
826 SnapEntry sn = map[n]; 908 SnapEntry sn = map[n];
827 if (!(sn & SNAP_NORESTORE)) { 909 if (!(sn & SNAP_NORESTORE)) {
@@ -840,18 +922,23 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
840 continue; 922 continue;
841 } 923 }
842 snap_restoreval(J, T, ex, snapno, rfilt, ref, o); 924 snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
843 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { 925 if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
844 TValue tmp; 926 TValue tmp;
845 snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); 927 snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
846 o->u32.hi = tmp.u32.lo; 928 o->u32.hi = tmp.u32.lo;
929#if !LJ_FR2
847 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { 930 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
848 /* Overwrite tag with frame link. */ 931 /* Overwrite tag with frame link. */
849 o->fr.tp.ftsz = snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0; 932 setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
850 L->base = o+1; 933 L->base = o+1;
934#endif
851 } 935 }
852 } 936 }
853 } 937 }
854 lua_assert(map + nent == flinks); 938#if LJ_FR2
939 L->base += (map[nent+LJ_BE] & 0xff);
940#endif
941 lj_assertJ(map + nent == flinks, "inconsistent frames in snapshot");
855 942
856 /* Compute current stack top. */ 943 /* Compute current stack top. */
857 switch (bc_op(*pc)) { 944 switch (bc_op(*pc)) {
diff --git a/src/lj_snap.h b/src/lj_snap.h
index 6a7b4e37..c73f75b3 100644
--- a/src/lj_snap.h
+++ b/src/lj_snap.h
@@ -13,7 +13,8 @@
13LJ_FUNC void lj_snap_add(jit_State *J); 13LJ_FUNC void lj_snap_add(jit_State *J);
14LJ_FUNC void lj_snap_purge(jit_State *J); 14LJ_FUNC void lj_snap_purge(jit_State *J);
15LJ_FUNC void lj_snap_shrink(jit_State *J); 15LJ_FUNC void lj_snap_shrink(jit_State *J);
16LJ_FUNC IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir); 16LJ_FUNC IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno,
17 IRIns *ir);
17LJ_FUNC void lj_snap_replay(jit_State *J, GCtrace *T); 18LJ_FUNC void lj_snap_replay(jit_State *J, GCtrace *T);
18LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr); 19LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr);
19LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need); 20LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need);
diff --git a/src/lj_state.c b/src/lj_state.c
index 421ec9f8..e87b945a 100644
--- a/src/lj_state.c
+++ b/src/lj_state.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_func.h" 18#include "lj_func.h"
@@ -24,8 +25,10 @@
24#include "lj_trace.h" 25#include "lj_trace.h"
25#include "lj_dispatch.h" 26#include "lj_dispatch.h"
26#include "lj_vm.h" 27#include "lj_vm.h"
28#include "lj_prng.h"
27#include "lj_lex.h" 29#include "lj_lex.h"
28#include "lj_alloc.h" 30#include "lj_alloc.h"
31#include "luajit.h"
29 32
30/* -- Stack handling ------------------------------------------------------ */ 33/* -- Stack handling ------------------------------------------------------ */
31 34
@@ -47,6 +50,7 @@
47** one extra slot if mobj is not a function. Only lj_meta_tset needs 5 50** one extra slot if mobj is not a function. Only lj_meta_tset needs 5
48** slots above top, but then mobj is always a function. So we can get by 51** slots above top, but then mobj is always a function. So we can get by
49** with 5 extra slots. 52** with 5 extra slots.
53** LJ_FR2: We need 2 more slots for the frame PC and the continuation PC.
50*/ 54*/
51 55
52/* Resize stack slots and adjust pointers in state. */ 56/* Resize stack slots and adjust pointers in state. */
@@ -57,9 +61,10 @@ static void resizestack(lua_State *L, MSize n)
57 MSize oldsize = L->stacksize; 61 MSize oldsize = L->stacksize;
58 MSize realsize = n + 1 + LJ_STACK_EXTRA; 62 MSize realsize = n + 1 + LJ_STACK_EXTRA;
59 GCobj *up; 63 GCobj *up;
60 lua_assert((MSize)(tvref(L->maxstack)-oldst)==L->stacksize-LJ_STACK_EXTRA-1); 64 lj_assertL((MSize)(tvref(L->maxstack)-oldst) == L->stacksize-LJ_STACK_EXTRA-1,
65 "inconsistent stack size");
61 st = (TValue *)lj_mem_realloc(L, tvref(L->stack), 66 st = (TValue *)lj_mem_realloc(L, tvref(L->stack),
62 (MSize)(L->stacksize*sizeof(TValue)), 67 (MSize)(oldsize*sizeof(TValue)),
63 (MSize)(realsize*sizeof(TValue))); 68 (MSize)(realsize*sizeof(TValue)));
64 setmref(L->stack, st); 69 setmref(L->stack, st);
65 delta = (char *)st - (char *)oldst; 70 delta = (char *)st - (char *)oldst;
@@ -67,12 +72,12 @@ static void resizestack(lua_State *L, MSize n)
67 while (oldsize < realsize) /* Clear new slots. */ 72 while (oldsize < realsize) /* Clear new slots. */
68 setnilV(st + oldsize++); 73 setnilV(st + oldsize++);
69 L->stacksize = realsize; 74 L->stacksize = realsize;
75 if ((size_t)(mref(G(L)->jit_base, char) - (char *)oldst) < oldsize)
76 setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta);
70 L->base = (TValue *)((char *)L->base + delta); 77 L->base = (TValue *)((char *)L->base + delta);
71 L->top = (TValue *)((char *)L->top + delta); 78 L->top = (TValue *)((char *)L->top + delta);
72 for (up = gcref(L->openupval); up != NULL; up = gcnext(up)) 79 for (up = gcref(L->openupval); up != NULL; up = gcnext(up))
73 setmref(gco2uv(up)->v, (TValue *)((char *)uvval(gco2uv(up)) + delta)); 80 setmref(gco2uv(up)->v, (TValue *)((char *)uvval(gco2uv(up)) + delta));
74 if (obj2gco(L) == gcref(G(L)->jit_L))
75 setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta);
76} 81}
77 82
78/* Relimit stack after error, in case the limit was overdrawn. */ 83/* Relimit stack after error, in case the limit was overdrawn. */
@@ -89,7 +94,8 @@ void lj_state_shrinkstack(lua_State *L, MSize used)
89 return; /* Avoid stack shrinking while handling stack overflow. */ 94 return; /* Avoid stack shrinking while handling stack overflow. */
90 if (4*used < L->stacksize && 95 if (4*used < L->stacksize &&
91 2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize && 96 2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize &&
92 obj2gco(L) != gcref(G(L)->jit_L)) /* Don't shrink stack of live trace. */ 97 /* Don't shrink stack of live trace. */
98 (tvref(G(L)->jit_base) == NULL || obj2gco(L) != gcref(G(L)->cur_L)))
93 resizestack(L, L->stacksize >> 1); 99 resizestack(L, L->stacksize >> 1);
94} 100}
95 101
@@ -125,8 +131,9 @@ static void stack_init(lua_State *L1, lua_State *L)
125 L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA; 131 L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA;
126 stend = st + L1->stacksize; 132 stend = st + L1->stacksize;
127 setmref(L1->maxstack, stend - LJ_STACK_EXTRA - 1); 133 setmref(L1->maxstack, stend - LJ_STACK_EXTRA - 1);
128 L1->base = L1->top = st+1; 134 setthreadV(L1, st++, L1); /* Needed for curr_funcisL() on empty stack. */
129 setthreadV(L1, st, L1); /* Needed for curr_funcisL() on empty stack. */ 135 if (LJ_FR2) setnilV(st++);
136 L1->base = L1->top = st;
130 while (st < stend) /* Clear new slots. */ 137 while (st < stend) /* Clear new slots. */
131 setnilV(st++); 138 setnilV(st++);
132} 139}
@@ -143,12 +150,13 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud)
143 /* NOBARRIER: State initialization, all objects are white. */ 150 /* NOBARRIER: State initialization, all objects are white. */
144 setgcref(L->env, obj2gco(lj_tab_new(L, 0, LJ_MIN_GLOBAL))); 151 setgcref(L->env, obj2gco(lj_tab_new(L, 0, LJ_MIN_GLOBAL)));
145 settabV(L, registry(L), lj_tab_new(L, 0, LJ_MIN_REGISTRY)); 152 settabV(L, registry(L), lj_tab_new(L, 0, LJ_MIN_REGISTRY));
146 lj_str_resize(L, LJ_MIN_STRTAB-1); 153 lj_str_init(L);
147 lj_meta_init(L); 154 lj_meta_init(L);
148 lj_lex_init(L); 155 lj_lex_init(L);
149 fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */ 156 fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */
150 g->gc.threshold = 4*g->gc.total; 157 g->gc.threshold = 4*g->gc.total;
151 lj_trace_initstate(g); 158 lj_trace_initstate(g);
159 lj_err_verify();
152 return NULL; 160 return NULL;
153} 161}
154 162
@@ -157,16 +165,25 @@ static void close_state(lua_State *L)
157 global_State *g = G(L); 165 global_State *g = G(L);
158 lj_func_closeuv(L, tvref(L->stack)); 166 lj_func_closeuv(L, tvref(L->stack));
159 lj_gc_freeall(g); 167 lj_gc_freeall(g);
160 lua_assert(gcref(g->gc.root) == obj2gco(L)); 168 lj_assertG(gcref(g->gc.root) == obj2gco(L),
161 lua_assert(g->strnum == 0); 169 "main thread is not first GC object");
170 lj_assertG(g->str.num == 0, "leaked %d strings", g->str.num);
162 lj_trace_freestate(g); 171 lj_trace_freestate(g);
163#if LJ_HASFFI 172#if LJ_HASFFI
164 lj_ctype_freestate(g); 173 lj_ctype_freestate(g);
165#endif 174#endif
166 lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); 175 lj_str_freetab(g);
167 lj_str_freebuf(g, &g->tmpbuf); 176 lj_buf_free(g, &g->tmpbuf);
168 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); 177 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
169 lua_assert(g->gc.total == sizeof(GG_State)); 178#if LJ_64
179 if (mref(g->gc.lightudseg, uint32_t)) {
180 MSize segnum = g->gc.lightudnum ? (2 << lj_fls(g->gc.lightudnum)) : 2;
181 lj_mem_freevec(g, mref(g->gc.lightudseg, uint32_t), segnum, uint32_t);
182 }
183#endif
184 lj_assertG(g->gc.total == sizeof(GG_State),
185 "memory leak of %lld bytes",
186 (long long)(g->gc.total - sizeof(GG_State)));
170#ifndef LUAJIT_USE_SYSMALLOC 187#ifndef LUAJIT_USE_SYSMALLOC
171 if (g->allocf == lj_alloc_f) 188 if (g->allocf == lj_alloc_f)
172 lj_alloc_destroy(g->allocd); 189 lj_alloc_destroy(g->allocd);
@@ -175,17 +192,34 @@ static void close_state(lua_State *L)
175 g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0); 192 g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0);
176} 193}
177 194
178#if LJ_64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) 195#if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC))
179lua_State *lj_state_newstate(lua_Alloc f, void *ud) 196lua_State *lj_state_newstate(lua_Alloc allocf, void *allocd)
180#else 197#else
181LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) 198LUA_API lua_State *lua_newstate(lua_Alloc allocf, void *allocd)
182#endif 199#endif
183{ 200{
184 GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State)); 201 PRNGState prng;
185 lua_State *L = &GG->L; 202 GG_State *GG;
186 global_State *g = &GG->g; 203 lua_State *L;
187 if (GG == NULL || !checkptr32(GG)) return NULL; 204 global_State *g;
205 /* We need the PRNG for the memory allocator, so initialize this first. */
206 if (!lj_prng_seed_secure(&prng)) {
207 lj_assertX(0, "secure PRNG seeding failed");
208 /* Can only return NULL here, so this errors with "not enough memory". */
209 return NULL;
210 }
211#ifndef LUAJIT_USE_SYSMALLOC
212 if (allocf == LJ_ALLOCF_INTERNAL) {
213 allocd = lj_alloc_create(&prng);
214 if (!allocd) return NULL;
215 allocf = lj_alloc_f;
216 }
217#endif
218 GG = (GG_State *)allocf(allocd, NULL, 0, sizeof(GG_State));
219 if (GG == NULL || !checkptrGC(GG)) return NULL;
188 memset(GG, 0, sizeof(GG_State)); 220 memset(GG, 0, sizeof(GG_State));
221 L = &GG->L;
222 g = &GG->g;
189 L->gct = ~LJ_TTHREAD; 223 L->gct = ~LJ_TTHREAD;
190 L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */ 224 L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */
191 L->dummy_ffid = FF_C; 225 L->dummy_ffid = FF_C;
@@ -193,17 +227,25 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
193 g->gc.currentwhite = LJ_GC_WHITE0 | LJ_GC_FIXED; 227 g->gc.currentwhite = LJ_GC_WHITE0 | LJ_GC_FIXED;
194 g->strempty.marked = LJ_GC_WHITE0; 228 g->strempty.marked = LJ_GC_WHITE0;
195 g->strempty.gct = ~LJ_TSTR; 229 g->strempty.gct = ~LJ_TSTR;
196 g->allocf = f; 230 g->allocf = allocf;
197 g->allocd = ud; 231 g->allocd = allocd;
232 g->prng = prng;
233#ifndef LUAJIT_USE_SYSMALLOC
234 if (allocf == lj_alloc_f) {
235 lj_alloc_setprng(allocd, &g->prng);
236 }
237#endif
198 setgcref(g->mainthref, obj2gco(L)); 238 setgcref(g->mainthref, obj2gco(L));
199 setgcref(g->uvhead.prev, obj2gco(&g->uvhead)); 239 setgcref(g->uvhead.prev, obj2gco(&g->uvhead));
200 setgcref(g->uvhead.next, obj2gco(&g->uvhead)); 240 setgcref(g->uvhead.next, obj2gco(&g->uvhead));
201 g->strmask = ~(MSize)0; 241 g->str.mask = ~(MSize)0;
202 setnilV(registry(L)); 242 setnilV(registry(L));
203 setnilV(&g->nilnode.val); 243 setnilV(&g->nilnode.val);
204 setnilV(&g->nilnode.key); 244 setnilV(&g->nilnode.key);
245#if !LJ_GC64
205 setmref(g->nilnode.freetop, &g->nilnode); 246 setmref(g->nilnode.freetop, &g->nilnode);
206 lj_str_initbuf(&g->tmpbuf); 247#endif
248 lj_buf_init(NULL, &g->tmpbuf);
207 g->gc.state = GCSpause; 249 g->gc.state = GCSpause;
208 setgcref(g->gc.root, obj2gco(L)); 250 setgcref(g->gc.root, obj2gco(L));
209 setmref(g->gc.sweep, &g->gc.root); 251 setmref(g->gc.sweep, &g->gc.root);
@@ -217,7 +259,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
217 close_state(L); 259 close_state(L);
218 return NULL; 260 return NULL;
219 } 261 }
220 L->status = 0; 262 L->status = LUA_OK;
221 return L; 263 return L;
222} 264}
223 265
@@ -236,6 +278,10 @@ LUA_API void lua_close(lua_State *L)
236 global_State *g = G(L); 278 global_State *g = G(L);
237 int i; 279 int i;
238 L = mainthread(g); /* Only the main thread can be closed. */ 280 L = mainthread(g); /* Only the main thread can be closed. */
281#if LJ_HASPROFILE
282 luaJIT_profile_stop(L);
283#endif
284 setgcrefnull(g->cur_L);
239 lj_func_closeuv(L, tvref(L->stack)); 285 lj_func_closeuv(L, tvref(L->stack));
240 lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */ 286 lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */
241#if LJ_HASJIT 287#if LJ_HASJIT
@@ -245,10 +291,10 @@ LUA_API void lua_close(lua_State *L)
245#endif 291#endif
246 for (i = 0;;) { 292 for (i = 0;;) {
247 hook_enter(g); 293 hook_enter(g);
248 L->status = 0; 294 L->status = LUA_OK;
295 L->base = L->top = tvref(L->stack) + 1 + LJ_FR2;
249 L->cframe = NULL; 296 L->cframe = NULL;
250 L->base = L->top = tvref(L->stack) + 1; 297 if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == LUA_OK) {
251 if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == 0) {
252 if (++i >= 10) break; 298 if (++i >= 10) break;
253 lj_gc_separateudata(g, 1); /* Separate udata again. */ 299 lj_gc_separateudata(g, 1); /* Separate udata again. */
254 if (gcref(g->gc.mmudata) == NULL) /* Until nothing is left to do. */ 300 if (gcref(g->gc.mmudata) == NULL) /* Until nothing is left to do. */
@@ -263,7 +309,7 @@ lua_State *lj_state_new(lua_State *L)
263 lua_State *L1 = lj_mem_newobj(L, lua_State); 309 lua_State *L1 = lj_mem_newobj(L, lua_State);
264 L1->gct = ~LJ_TTHREAD; 310 L1->gct = ~LJ_TTHREAD;
265 L1->dummy_ffid = FF_C; 311 L1->dummy_ffid = FF_C;
266 L1->status = 0; 312 L1->status = LUA_OK;
267 L1->stacksize = 0; 313 L1->stacksize = 0;
268 setmref(L1->stack, NULL); 314 setmref(L1->stack, NULL);
269 L1->cframe = NULL; 315 L1->cframe = NULL;
@@ -272,15 +318,17 @@ lua_State *lj_state_new(lua_State *L)
272 setmrefr(L1->glref, L->glref); 318 setmrefr(L1->glref, L->glref);
273 setgcrefr(L1->env, L->env); 319 setgcrefr(L1->env, L->env);
274 stack_init(L1, L); /* init stack */ 320 stack_init(L1, L); /* init stack */
275 lua_assert(iswhite(obj2gco(L1))); 321 lj_assertL(iswhite(obj2gco(L1)), "new thread object is not white");
276 return L1; 322 return L1;
277} 323}
278 324
279void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) 325void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L)
280{ 326{
281 lua_assert(L != mainthread(g)); 327 lj_assertG(L != mainthread(g), "free of main thread");
328 if (obj2gco(L) == gcref(g->cur_L))
329 setgcrefnull(g->cur_L);
282 lj_func_closeuv(L, tvref(L->stack)); 330 lj_func_closeuv(L, tvref(L->stack));
283 lua_assert(gcref(L->openupval) == NULL); 331 lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues");
284 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); 332 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
285 lj_mem_freet(g, L); 333 lj_mem_freet(g, L);
286} 334}
diff --git a/src/lj_state.h b/src/lj_state.h
index 9849cc2b..273b6b12 100644
--- a/src/lj_state.h
+++ b/src/lj_state.h
@@ -28,8 +28,10 @@ static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need)
28 28
29LJ_FUNC lua_State *lj_state_new(lua_State *L); 29LJ_FUNC lua_State *lj_state_new(lua_State *L);
30LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L); 30LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L);
31#if LJ_64 31#if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC))
32LJ_FUNC lua_State *lj_state_newstate(lua_Alloc f, void *ud); 32LJ_FUNC lua_State *lj_state_newstate(lua_Alloc f, void *ud);
33#endif 33#endif
34 34
35#define LJ_ALLOCF_INTERNAL ((lua_Alloc)(void *)(uintptr_t)(1237<<4))
36
35#endif 37#endif
diff --git a/src/lj_str.c b/src/lj_str.c
index e8821ad2..c6f2ceec 100644
--- a/src/lj_str.c
+++ b/src/lj_str.c
@@ -1,13 +1,8 @@
1/* 1/*
2** String handling. 2** String handling.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h 3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4**
5** Portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/ 4*/
8 5
9#include <stdio.h>
10
11#define lj_str_c 6#define lj_str_c
12#define LUA_CORE 7#define LUA_CORE
13 8
@@ -15,10 +10,10 @@
15#include "lj_gc.h" 10#include "lj_gc.h"
16#include "lj_err.h" 11#include "lj_err.h"
17#include "lj_str.h" 12#include "lj_str.h"
18#include "lj_state.h"
19#include "lj_char.h" 13#include "lj_char.h"
14#include "lj_prng.h"
20 15
21/* -- String interning ---------------------------------------------------- */ 16/* -- String helpers ------------------------------------------------------ */
22 17
23/* Ordered compare of strings. Assumes string data is 4-byte aligned. */ 18/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
24int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) 19int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
@@ -43,297 +38,333 @@ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
43 return (int32_t)(a->len - b->len); 38 return (int32_t)(a->len - b->len);
44} 39}
45 40
46/* Fast string data comparison. Caveat: unaligned access to 1st string! */ 41/* Find fixed string p inside string s. */
47static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) 42const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
48{ 43{
49 MSize i = 0; 44 if (plen <= slen) {
50 lua_assert(len > 0); 45 if (plen == 0) {
51 lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4); 46 return s;
52 do { /* Note: innocuous access up to end of string + 3. */ 47 } else {
53 uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i); 48 int c = *(const uint8_t *)p++;
54 if (v) { 49 plen--; slen -= plen;
55 i -= len; 50 while (slen) {
56#if LJ_LE 51 const char *q = (const char *)memchr(s, c, slen);
57 return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1; 52 if (!q) break;
58#else 53 if (memcmp(q+1, p, plen) == 0) return q;
59 return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1; 54 q++; slen -= (MSize)(q-s); s = q;
60#endif 55 }
61 } 56 }
62 i += 4; 57 }
63 } while (i < len); 58 return NULL;
64 return 0;
65} 59}
66 60
67/* Resize the string hash table (grow and shrink). */ 61/* Check whether a string has a pattern matching character. */
68void lj_str_resize(lua_State *L, MSize newmask) 62int lj_str_haspattern(GCstr *s)
69{ 63{
70 global_State *g = G(L); 64 const char *p = strdata(s), *q = p + s->len;
71 GCRef *newhash; 65 while (p < q) {
72 MSize i; 66 int c = *(const uint8_t *)p++;
73 if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) 67 if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
74 return; /* No resizing during GC traversal or if already too big. */ 68 return 1; /* Found a pattern matching char. */
75 newhash = lj_mem_newvec(L, newmask+1, GCRef);
76 memset(newhash, 0, (newmask+1)*sizeof(GCRef));
77 for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */
78 GCobj *p = gcref(g->strhash[i]);
79 while (p) { /* Follow each hash chain and reinsert all strings. */
80 MSize h = gco2str(p)->hash & newmask;
81 GCobj *next = gcnext(p);
82 /* NOBARRIER: The string table is a GC root. */
83 setgcrefr(p->gch.nextgc, newhash[h]);
84 setgcref(newhash[h], p);
85 p = next;
86 }
87 } 69 }
88 lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); 70 return 0; /* No pattern matching chars found. */
89 g->strmask = newmask;
90 g->strhash = newhash;
91} 71}
92 72
93/* Intern a string and return string object. */ 73/* -- String hashing ------------------------------------------------------ */
94GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) 74
75/* Keyed sparse ARX string hash. Constant time. */
76static StrHash hash_sparse(uint64_t seed, const char *str, MSize len)
95{ 77{
96 global_State *g; 78 /* Constants taken from lookup3 hash by Bob Jenkins. */
97 GCstr *s; 79 StrHash a, b, h = len ^ (StrHash)seed;
98 GCobj *o;
99 MSize len = (MSize)lenx;
100 MSize a, b, h = len;
101 if (lenx >= LJ_MAX_STR)
102 lj_err_msg(L, LJ_ERR_STROV);
103 g = G(L);
104 /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
105 if (len >= 4) { /* Caveat: unaligned access! */ 80 if (len >= 4) { /* Caveat: unaligned access! */
106 a = lj_getu32(str); 81 a = lj_getu32(str);
107 h ^= lj_getu32(str+len-4); 82 h ^= lj_getu32(str+len-4);
108 b = lj_getu32(str+(len>>1)-2); 83 b = lj_getu32(str+(len>>1)-2);
109 h ^= b; h -= lj_rol(b, 14); 84 h ^= b; h -= lj_rol(b, 14);
110 b += lj_getu32(str+(len>>2)-1); 85 b += lj_getu32(str+(len>>2)-1);
111 } else if (len > 0) { 86 } else {
112 a = *(const uint8_t *)str; 87 a = *(const uint8_t *)str;
113 h ^= *(const uint8_t *)(str+len-1); 88 h ^= *(const uint8_t *)(str+len-1);
114 b = *(const uint8_t *)(str+(len>>1)); 89 b = *(const uint8_t *)(str+(len>>1));
115 h ^= b; h -= lj_rol(b, 14); 90 h ^= b; h -= lj_rol(b, 14);
116 } else {
117 return &g->strempty;
118 } 91 }
119 a ^= h; a -= lj_rol(h, 11); 92 a ^= h; a -= lj_rol(h, 11);
120 b ^= a; b -= lj_rol(a, 25); 93 b ^= a; b -= lj_rol(a, 25);
121 h ^= b; h -= lj_rol(b, 16); 94 h ^= b; h -= lj_rol(b, 16);
122 /* Check if the string has already been interned. */ 95 return h;
123 o = gcref(g->strhash[h & g->strmask]);
124 if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
125 while (o != NULL) {
126 GCstr *sx = gco2str(o);
127 if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
128 /* Resurrect if dead. Can only happen with fixstring() (keywords). */
129 if (isdead(g, o)) flipwhite(o);
130 return sx; /* Return existing string. */
131 }
132 o = gcnext(o);
133 }
134 } else { /* Slow path: end of string is too close to a page boundary. */
135 while (o != NULL) {
136 GCstr *sx = gco2str(o);
137 if (sx->len == len && memcmp(str, strdata(sx), len) == 0) {
138 /* Resurrect if dead. Can only happen with fixstring() (keywords). */
139 if (isdead(g, o)) flipwhite(o);
140 return sx; /* Return existing string. */
141 }
142 o = gcnext(o);
143 }
144 }
145 /* Nope, create a new string. */
146 s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
147 newwhite(g, s);
148 s->gct = ~LJ_TSTR;
149 s->len = len;
150 s->hash = h;
151 s->reserved = 0;
152 memcpy(strdatawr(s), str, len);
153 strdatawr(s)[len] = '\0'; /* Zero-terminate string. */
154 /* Add it to string hash table. */
155 h &= g->strmask;
156 s->nextgc = g->strhash[h];
157 /* NOBARRIER: The string table is a GC root. */
158 setgcref(g->strhash[h], obj2gco(s));
159 if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */
160 lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */
161 return s; /* Return newly interned string. */
162} 96}
163 97
164void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) 98#if LUAJIT_SECURITY_STRHASH
99/* Keyed dense ARX string hash. Linear time. */
100static LJ_NOINLINE StrHash hash_dense(uint64_t seed, StrHash h,
101 const char *str, MSize len)
165{ 102{
166 g->strnum--; 103 StrHash b = lj_bswap(lj_rol(h ^ (StrHash)(seed >> 32), 4));
167 lj_mem_free(g, s, sizestring(s)); 104 if (len > 12) {
105 StrHash a = (StrHash)seed;
106 const char *pe = str+len-12, *p = pe, *q = str;
107 do {
108 a += lj_getu32(p);
109 b += lj_getu32(p+4);
110 h += lj_getu32(p+8);
111 p = q; q += 12;
112 h ^= b; h -= lj_rol(b, 14);
113 a ^= h; a -= lj_rol(h, 11);
114 b ^= a; b -= lj_rol(a, 25);
115 } while (p < pe);
116 h ^= b; h -= lj_rol(b, 16);
117 a ^= h; a -= lj_rol(h, 4);
118 b ^= a; b -= lj_rol(a, 14);
119 }
120 return b;
168} 121}
122#endif
169 123
170/* -- Type conversions ---------------------------------------------------- */ 124/* -- String interning ---------------------------------------------------- */
171 125
172/* Print number to buffer. Canonicalizes non-finite values. */ 126#define LJ_STR_MAXCOLL 32
173size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o)
174{
175 if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */
176 lua_Number n = o->n;
177#if __BIONIC__
178 if (tvismzero(o)) { s[0] = '-'; s[1] = '0'; return 2; }
179#endif
180 return (size_t)lua_number2str(s, n);
181 } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) {
182 s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3;
183 } else if ((o->u32.hi & 0x80000000) == 0) {
184 s[0] = 'i'; s[1] = 'n'; s[2] = 'f'; return 3;
185 } else {
186 s[0] = '-'; s[1] = 'i'; s[2] = 'n'; s[3] = 'f'; return 4;
187 }
188}
189 127
190/* Print integer to buffer. Returns pointer to start. */ 128/* Resize the string interning hash table (grow and shrink). */
191char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k) 129void lj_str_resize(lua_State *L, MSize newmask)
192{ 130{
193 uint32_t u = (uint32_t)(k < 0 ? -k : k); 131 global_State *g = G(L);
194 p += 1+10; 132 GCRef *newtab, *oldtab = g->str.tab;
195 do { *--p = (char)('0' + u % 10); } while (u /= 10); 133 MSize i;
196 if (k < 0) *--p = '-';
197 return p;
198}
199 134
200/* Convert number to string. */ 135 /* No resizing during GC traversal or if already too big. */
201GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np) 136 if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
202{ 137 return;
203 char buf[LJ_STR_NUMBUF];
204 size_t len = lj_str_bufnum(buf, (TValue *)np);
205 return lj_str_new(L, buf, len);
206}
207 138
208/* Convert integer to string. */ 139 newtab = lj_mem_newvec(L, newmask+1, GCRef);
209GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k) 140 memset(newtab, 0, (newmask+1)*sizeof(GCRef));
210{
211 char s[1+10];
212 char *p = lj_str_bufint(s, k);
213 return lj_str_new(L, p, (size_t)(s+sizeof(s)-p));
214}
215 141
216GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o) 142#if LUAJIT_SECURITY_STRHASH
217{ 143 /* Check which chains need secondary hashes. */
218 return tvisint(o) ? lj_str_fromint(L, intV(o)) : lj_str_fromnum(L, &o->n); 144 if (g->str.second) {
219} 145 int newsecond = 0;
146 /* Compute primary chain lengths. */
147 for (i = g->str.mask; i != ~(MSize)0; i--) {
148 GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1);
149 while (o) {
150 GCstr *s = gco2str(o);
151 MSize hash = s->hashalg ? hash_sparse(g->str.seed, strdata(s), s->len) :
152 s->hash;
153 hash &= newmask;
154 setgcrefp(newtab[hash], gcrefu(newtab[hash]) + 1);
155 o = gcnext(o);
156 }
157 }
158 /* Mark secondary chains. */
159 for (i = newmask; i != ~(MSize)0; i--) {
160 int secondary = gcrefu(newtab[i]) > LJ_STR_MAXCOLL;
161 newsecond |= secondary;
162 setgcrefp(newtab[i], secondary);
163 }
164 g->str.second = newsecond;
165 }
166#endif
220 167
221/* -- String formatting --------------------------------------------------- */ 168 /* Reinsert all strings from the old table into the new table. */
169 for (i = g->str.mask; i != ~(MSize)0; i--) {
170 GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1);
171 while (o) {
172 GCobj *next = gcnext(o);
173 GCstr *s = gco2str(o);
174 MSize hash = s->hash;
175#if LUAJIT_SECURITY_STRHASH
176 uintptr_t u;
177 if (LJ_LIKELY(!s->hashalg)) { /* String hashed with primary hash. */
178 hash &= newmask;
179 u = gcrefu(newtab[hash]);
180 if (LJ_UNLIKELY(u & 1)) { /* Switch string to secondary hash. */
181 s->hash = hash = hash_dense(g->str.seed, s->hash, strdata(s), s->len);
182 s->hashalg = 1;
183 hash &= newmask;
184 u = gcrefu(newtab[hash]);
185 }
186 } else { /* String hashed with secondary hash. */
187 MSize shash = hash_sparse(g->str.seed, strdata(s), s->len);
188 u = gcrefu(newtab[shash & newmask]);
189 if (u & 1) {
190 hash &= newmask;
191 u = gcrefu(newtab[hash]);
192 } else { /* Revert string back to primary hash. */
193 s->hash = shash;
194 s->hashalg = 0;
195 hash = (shash & newmask);
196 }
197 }
198 /* NOBARRIER: The string table is a GC root. */
199 setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1));
200 setgcrefp(newtab[hash], ((uintptr_t)o | (u & 1)));
201#else
202 hash &= newmask;
203 /* NOBARRIER: The string table is a GC root. */
204 setgcrefr(o->gch.nextgc, newtab[hash]);
205 setgcref(newtab[hash], o);
206#endif
207 o = next;
208 }
209 }
210
211 /* Free old table and replace with new table. */
212 lj_str_freetab(g);
213 g->str.tab = newtab;
214 g->str.mask = newmask;
215}
222 216
223static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len) 217#if LUAJIT_SECURITY_STRHASH
218/* Rehash and rechain all strings in a chain. */
219static LJ_NOINLINE GCstr *lj_str_rehash_chain(lua_State *L, StrHash hashc,
220 const char *str, MSize len)
224{ 221{
225 char *p; 222 global_State *g = G(L);
226 MSize i; 223 int ow = g->gc.state == GCSsweepstring ? otherwhite(g) : 0; /* Sweeping? */
227 if (sb->n + len > sb->sz) { 224 GCRef *strtab = g->str.tab;
228 MSize sz = sb->sz * 2; 225 MSize strmask = g->str.mask;
229 while (sb->n + len > sz) sz = sz * 2; 226 GCobj *o = gcref(strtab[hashc & strmask]);
230 lj_str_resizebuf(L, sb, sz); 227 setgcrefp(strtab[hashc & strmask], (void *)((uintptr_t)1));
228 g->str.second = 1;
229 while (o) {
230 uintptr_t u;
231 GCobj *next = gcnext(o);
232 GCstr *s = gco2str(o);
233 StrHash hash;
234 if (ow) { /* Must sweep while rechaining. */
235 if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* String alive? */
236 lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED),
237 "sweep of undead string");
238 makewhite(g, o);
239 } else { /* Free dead string. */
240 lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED,
241 "sweep of unlive string");
242 lj_str_free(g, s);
243 o = next;
244 continue;
245 }
246 }
247 hash = s->hash;
248 if (!s->hashalg) { /* Rehash with secondary hash. */
249 hash = hash_dense(g->str.seed, hash, strdata(s), s->len);
250 s->hash = hash;
251 s->hashalg = 1;
252 }
253 /* Rechain. */
254 hash &= strmask;
255 u = gcrefu(strtab[hash]);
256 setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1));
257 setgcrefp(strtab[hash], ((uintptr_t)o | (u & 1)));
258 o = next;
231 } 259 }
232 p = sb->buf + sb->n; 260 /* Try to insert the pending string again. */
233 sb->n += len; 261 return lj_str_new(L, str, len);
234 for (i = 0; i < len; i++) p[i] = str[i];
235} 262}
263#endif
264
265/* Reseed String ID from PRNG after random interval < 2^bits. */
266#if LUAJIT_SECURITY_STRID == 1
267#define STRID_RESEED_INTERVAL 8
268#elif LUAJIT_SECURITY_STRID == 2
269#define STRID_RESEED_INTERVAL 4
270#elif LUAJIT_SECURITY_STRID >= 3
271#define STRID_RESEED_INTERVAL 0
272#endif
236 273
237static void addchar(lua_State *L, SBuf *sb, int c) 274/* Allocate a new string and add to string interning table. */
275static GCstr *lj_str_alloc(lua_State *L, const char *str, MSize len,
276 StrHash hash, int hashalg)
238{ 277{
239 if (sb->n + 1 > sb->sz) { 278 GCstr *s = lj_mem_newt(L, lj_str_size(len), GCstr);
240 MSize sz = sb->sz * 2; 279 global_State *g = G(L);
241 lj_str_resizebuf(L, sb, sz); 280 uintptr_t u;
281 newwhite(g, s);
282 s->gct = ~LJ_TSTR;
283 s->len = len;
284 s->hash = hash;
285#ifndef STRID_RESEED_INTERVAL
286 s->sid = g->str.id++;
287#elif STRID_RESEED_INTERVAL
288 if (!g->str.idreseed--) {
289 uint64_t r = lj_prng_u64(&g->prng);
290 g->str.id = (StrID)r;
291 g->str.idreseed = (uint8_t)(r >> (64 - STRID_RESEED_INTERVAL));
242 } 292 }
243 sb->buf[sb->n++] = (char)c; 293 s->sid = g->str.id++;
294#else
295 s->sid = (StrID)lj_prng_u64(&g->prng);
296#endif
297 s->reserved = 0;
298 s->hashalg = (uint8_t)hashalg;
299 /* Clear last 4 bytes of allocated memory. Implies zero-termination, too. */
300 *(uint32_t *)(strdatawr(s)+(len & ~(MSize)3)) = 0;
301 memcpy(strdatawr(s), str, len);
302 /* Add to string hash table. */
303 hash &= g->str.mask;
304 u = gcrefu(g->str.tab[hash]);
305 setgcrefp(s->nextgc, (u & ~(uintptr_t)1));
306 /* NOBARRIER: The string table is a GC root. */
307 setgcrefp(g->str.tab[hash], ((uintptr_t)s | (u & 1)));
308 if (g->str.num++ > g->str.mask) /* Allow a 100% load factor. */
309 lj_str_resize(L, (g->str.mask<<1)+1); /* Grow string table. */
310 return s; /* Return newly interned string. */
244} 311}
245 312
246/* Push formatted message as a string object to Lua stack. va_list variant. */ 313/* Intern a string and return string object. */
247const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp) 314GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
248{ 315{
249 SBuf *sb = &G(L)->tmpbuf; 316 global_State *g = G(L);
250 lj_str_needbuf(L, sb, (MSize)strlen(fmt)); 317 if (lenx-1 < LJ_MAX_STR-1) {
251 lj_str_resetbuf(sb); 318 MSize len = (MSize)lenx;
252 for (;;) { 319 StrHash hash = hash_sparse(g->str.seed, str, len);
253 const char *e = strchr(fmt, '%'); 320 MSize coll = 0;
254 if (e == NULL) break; 321 int hashalg = 0;
255 addstr(L, sb, fmt, (MSize)(e-fmt)); 322 /* Check if the string has already been interned. */
256 /* This function only handles %s, %c, %d, %f and %p formats. */ 323 GCobj *o = gcref(g->str.tab[hash & g->str.mask]);
257 switch (e[1]) { 324#if LUAJIT_SECURITY_STRHASH
258 case 's': { 325 if (LJ_UNLIKELY((uintptr_t)o & 1)) { /* Secondary hash for this chain? */
259 const char *s = va_arg(argp, char *); 326 hashalg = 1;
260 if (s == NULL) s = "(null)"; 327 hash = hash_dense(g->str.seed, hash, str, len);
261 addstr(L, sb, s, (MSize)strlen(s)); 328 o = (GCobj *)(gcrefu(g->str.tab[hash & g->str.mask]) & ~(uintptr_t)1);
262 break; 329 }
263 }
264 case 'c':
265 addchar(L, sb, va_arg(argp, int));
266 break;
267 case 'd': {
268 char buf[LJ_STR_INTBUF];
269 char *p = lj_str_bufint(buf, va_arg(argp, int32_t));
270 addstr(L, sb, p, (MSize)(buf+LJ_STR_INTBUF-p));
271 break;
272 }
273 case 'f': {
274 char buf[LJ_STR_NUMBUF];
275 TValue tv;
276 MSize len;
277 tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER));
278 len = (MSize)lj_str_bufnum(buf, &tv);
279 addstr(L, sb, buf, len);
280 break;
281 }
282 case 'p': {
283#define FMTP_CHARS (2*sizeof(ptrdiff_t))
284 char buf[2+FMTP_CHARS];
285 ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *));
286 ptrdiff_t i, lasti = 2+FMTP_CHARS;
287 if (p == 0) {
288 addstr(L, sb, "NULL", 4);
289 break;
290 }
291#if LJ_64
292 /* Shorten output for 64 bit pointers. */
293 lasti = 2+2*4+((p >> 32) ? 2+2*(lj_fls((uint32_t)(p >> 32))>>3) : 0);
294#endif 330#endif
295 buf[0] = '0'; 331 while (o != NULL) {
296 buf[1] = 'x'; 332 GCstr *sx = gco2str(o);
297 for (i = lasti-1; i >= 2; i--, p >>= 4) 333 if (sx->hash == hash && sx->len == len) {
298 buf[i] = "0123456789abcdef"[(p & 15)]; 334 if (memcmp(str, strdata(sx), len) == 0) {
299 addstr(L, sb, buf, (MSize)lasti); 335 if (isdead(g, o)) flipwhite(o); /* Resurrect if dead. */
300 break; 336 return sx; /* Return existing string. */
337 }
338 coll++;
301 } 339 }
302 case '%': 340 coll++;
303 addchar(L, sb, '%'); 341 o = gcnext(o);
304 break; 342 }
305 default: 343#if LUAJIT_SECURITY_STRHASH
306 addchar(L, sb, '%'); 344 /* Rehash chain if there are too many collisions. */
307 addchar(L, sb, e[1]); 345 if (LJ_UNLIKELY(coll > LJ_STR_MAXCOLL) && !hashalg) {
308 break; 346 return lj_str_rehash_chain(L, hash, str, len);
309 } 347 }
310 fmt = e+2; 348#endif
349 /* Otherwise allocate a new string. */
350 return lj_str_alloc(L, str, len, hash, hashalg);
351 } else {
352 if (lenx)
353 lj_err_msg(L, LJ_ERR_STROV);
354 return &g->strempty;
311 } 355 }
312 addstr(L, sb, fmt, (MSize)strlen(fmt));
313 setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n));
314 incr_top(L);
315 return strVdata(L->top - 1);
316} 356}
317 357
318/* Push formatted message as a string object to Lua stack. Vararg variant. */ 358void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
319const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
320{ 359{
321 const char *msg; 360 g->str.num--;
322 va_list argp; 361 lj_mem_free(g, s, lj_str_size(s->len));
323 va_start(argp, fmt);
324 msg = lj_str_pushvf(L, fmt, argp);
325 va_end(argp);
326 return msg;
327} 362}
328 363
329/* -- Buffer handling ----------------------------------------------------- */ 364void LJ_FASTCALL lj_str_init(lua_State *L)
330
331char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz)
332{ 365{
333 if (sz > sb->sz) { 366 global_State *g = G(L);
334 if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF; 367 g->str.seed = lj_prng_u64(&g->prng);
335 lj_str_resizebuf(L, sb, sz); 368 lj_str_resize(L, LJ_MIN_STRTAB-1);
336 }
337 return sb->buf;
338} 369}
339 370
diff --git a/src/lj_str.h b/src/lj_str.h
index a4b5ac4e..39fa4f06 100644
--- a/src/lj_str.h
+++ b/src/lj_str.h
@@ -10,41 +10,22 @@
10 10
11#include "lj_obj.h" 11#include "lj_obj.h"
12 12
13/* String interning. */ 13/* String helpers. */
14LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); 14LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b);
15LJ_FUNC const char *lj_str_find(const char *s, const char *f,
16 MSize slen, MSize flen);
17LJ_FUNC int lj_str_haspattern(GCstr *s);
18
19/* String interning. */
15LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); 20LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
16LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); 21LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
17LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); 22LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
23LJ_FUNC void LJ_FASTCALL lj_str_init(lua_State *L);
24#define lj_str_freetab(g) \
25 (lj_mem_freevec(g, g->str.tab, g->str.mask+1, GCRef))
18 26
19#define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s))) 27#define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s)))
20#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) 28#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
21 29#define lj_str_size(len) (sizeof(GCstr) + (((len)+4) & ~(MSize)3))
22/* Type conversions. */
23LJ_FUNC size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o);
24LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k);
25LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np);
26LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k);
27LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o);
28
29#define LJ_STR_INTBUF (1+10)
30#define LJ_STR_NUMBUF LUAI_MAXNUMBER2STR
31
32/* String formatting. */
33LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp);
34LJ_FUNC const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
35#if defined(__GNUC__)
36 __attribute__ ((format (printf, 2, 3)))
37#endif
38 ;
39
40/* Resizable string buffers. Struct definition in lj_obj.h. */
41LJ_FUNC char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz);
42
43#define lj_str_initbuf(sb) ((sb)->buf = NULL, (sb)->sz = 0)
44#define lj_str_resetbuf(sb) ((sb)->n = 0)
45#define lj_str_resizebuf(L, sb, size) \
46 ((sb)->buf = (char *)lj_mem_realloc(L, (sb)->buf, (sb)->sz, (size)), \
47 (sb)->sz = (size))
48#define lj_str_freebuf(g, sb) lj_mem_free(g, (void *)(sb)->buf, (sb)->sz)
49 30
50#endif 31#endif
diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c
new file mode 100644
index 00000000..945954aa
--- /dev/null
+++ b/src/lj_strfmt.c
@@ -0,0 +1,606 @@
1/*
2** String formatting.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#include <stdio.h>
7
8#define lj_strfmt_c
9#define LUA_CORE
10
11#include "lj_obj.h"
12#include "lj_err.h"
13#include "lj_buf.h"
14#include "lj_str.h"
15#include "lj_meta.h"
16#include "lj_state.h"
17#include "lj_char.h"
18#include "lj_strfmt.h"
19#if LJ_HASFFI
20#include "lj_ctype.h"
21#endif
22#include "lj_lib.h"
23
24/* -- Format parser ------------------------------------------------------- */
25
26static const uint8_t strfmt_map[('x'-'A')+1] = {
27 STRFMT_A,0,0,0,STRFMT_E,STRFMT_F,STRFMT_G,0,0,0,0,0,0,
28 0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0,
29 0,0,0,0,0,0,
30 STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0,
31 0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X
32};
33
34SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs)
35{
36 const uint8_t *p = fs->p, *e = fs->e;
37 fs->str = (const char *)p;
38 for (; p < e; p++) {
39 if (*p == '%') { /* Escape char? */
40 if (p[1] == '%') { /* '%%'? */
41 fs->p = ++p+1;
42 goto retlit;
43 } else {
44 SFormat sf = 0;
45 uint32_t c;
46 if (p != (const uint8_t *)fs->str)
47 break;
48 for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) {
49 /* Parse flags. */
50 if (*p == '-') sf |= STRFMT_F_LEFT;
51 else if (*p == '+') sf |= STRFMT_F_PLUS;
52 else if (*p == '0') sf |= STRFMT_F_ZERO;
53 else if (*p == ' ') sf |= STRFMT_F_SPACE;
54 else if (*p == '#') sf |= STRFMT_F_ALT;
55 else break;
56 }
57 if ((uint32_t)*p - '0' < 10) { /* Parse width. */
58 uint32_t width = (uint32_t)*p++ - '0';
59 if ((uint32_t)*p - '0' < 10)
60 width = (uint32_t)*p++ - '0' + width*10;
61 sf |= (width << STRFMT_SH_WIDTH);
62 }
63 if (*p == '.') { /* Parse precision. */
64 uint32_t prec = 0;
65 p++;
66 if ((uint32_t)*p - '0' < 10) {
67 prec = (uint32_t)*p++ - '0';
68 if ((uint32_t)*p - '0' < 10)
69 prec = (uint32_t)*p++ - '0' + prec*10;
70 }
71 sf |= ((prec+1) << STRFMT_SH_PREC);
72 }
73 /* Parse conversion. */
74 c = (uint32_t)*p - 'A';
75 if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) {
76 uint32_t sx = strfmt_map[c];
77 if (sx) {
78 fs->p = p+1;
79 return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER));
80 }
81 }
82 /* Return error location. */
83 if (*p >= 32) p++;
84 fs->len = (MSize)(p - (const uint8_t *)fs->str);
85 fs->p = fs->e;
86 return STRFMT_ERR;
87 }
88 }
89 }
90 fs->p = p;
91retlit:
92 fs->len = (MSize)(p - (const uint8_t *)fs->str);
93 return fs->len ? STRFMT_LIT : STRFMT_EOF;
94}
95
96/* -- Raw conversions ----------------------------------------------------- */
97
98#define WINT_R(x, sh, sc) \
99 { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); }
100
101/* Write integer to buffer. */
102char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k)
103{
104 uint32_t u = (uint32_t)k;
105 if (k < 0) { u = (uint32_t)-k; *p++ = '-'; }
106 if (u < 10000) {
107 if (u < 10) goto dig1;
108 if (u < 100) goto dig2;
109 if (u < 1000) goto dig3;
110 } else {
111 uint32_t v = u / 10000; u -= v * 10000;
112 if (v < 10000) {
113 if (v < 10) goto dig5;
114 if (v < 100) goto dig6;
115 if (v < 1000) goto dig7;
116 } else {
117 uint32_t w = v / 10000; v -= w * 10000;
118 if (w >= 10) WINT_R(w, 10, 10)
119 *p++ = (char)('0'+w);
120 }
121 WINT_R(v, 23, 1000)
122 dig7: WINT_R(v, 12, 100)
123 dig6: WINT_R(v, 10, 10)
124 dig5: *p++ = (char)('0'+v);
125 }
126 WINT_R(u, 23, 1000)
127 dig3: WINT_R(u, 12, 100)
128 dig2: WINT_R(u, 10, 10)
129 dig1: *p++ = (char)('0'+u);
130 return p;
131}
132#undef WINT_R
133
134/* Write pointer to buffer. */
135char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v)
136{
137 ptrdiff_t x = (ptrdiff_t)v;
138 MSize i, n = STRFMT_MAXBUF_PTR;
139 if (x == 0) {
140 *p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L';
141 return p;
142 }
143#if LJ_64
144 /* Shorten output for 64 bit pointers. */
145 n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0);
146#endif
147 p[0] = '0';
148 p[1] = 'x';
149 for (i = n-1; i >= 2; i--, x >>= 4)
150 p[i] = "0123456789abcdef"[(x & 15)];
151 return p+n;
152}
153
154/* Write ULEB128 to buffer. */
155char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v)
156{
157 for (; v >= 0x80; v >>= 7)
158 *p++ = (char)((v & 0x7f) | 0x80);
159 *p++ = (char)v;
160 return p;
161}
162
163/* Return string or write number to tmp buffer and return pointer to start. */
164const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp)
165{
166 SBuf *sb;
167 if (tvisstr(o)) {
168 *lenp = strV(o)->len;
169 return strVdata(o);
170 } else if (tvisbuf(o)) {
171 SBufExt *sbx = bufV(o);
172 *lenp = sbufxlen(sbx);
173 return sbx->r;
174 } else if (tvisint(o)) {
175 sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o));
176 } else if (tvisnum(o)) {
177 sb = lj_strfmt_putfnum(lj_buf_tmp_(L), STRFMT_G14, o->n);
178 } else {
179 return NULL;
180 }
181 *lenp = sbuflen(sb);
182 return sb->b;
183}
184
185/* -- Unformatted conversions to buffer ----------------------------------- */
186
187/* Add integer to buffer. */
188SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k)
189{
190 sb->w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k);
191 return sb;
192}
193
194#if LJ_HASJIT
195/* Add number to buffer. */
196SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o)
197{
198 return lj_strfmt_putfnum(sb, STRFMT_G14, o->n);
199}
200#endif
201
202SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v)
203{
204 sb->w = lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v);
205 return sb;
206}
207
208/* Add quoted string to buffer. */
209static SBuf *strfmt_putquotedlen(SBuf *sb, const char *s, MSize len)
210{
211 lj_buf_putb(sb, '"');
212 while (len--) {
213 uint32_t c = (uint32_t)(uint8_t)*s++;
214 char *w = lj_buf_more(sb, 4);
215 if (c == '"' || c == '\\' || c == '\n') {
216 *w++ = '\\';
217 } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
218 uint32_t d;
219 *w++ = '\\';
220 if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
221 *w++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
222 goto tens;
223 } else if (c >= 10) {
224 tens:
225 d = (c * 205) >> 11; c -= d * 10; *w++ = (char)('0'+d);
226 }
227 c += '0';
228 }
229 *w++ = (char)c;
230 sb->w = w;
231 }
232 lj_buf_putb(sb, '"');
233 return sb;
234}
235
236#if LJ_HASJIT
237SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str)
238{
239 return strfmt_putquotedlen(sb, strdata(str), str->len);
240}
241#endif
242
243/* -- Formatted conversions to buffer ------------------------------------- */
244
245/* Add formatted char to buffer. */
246SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c)
247{
248 MSize width = STRFMT_WIDTH(sf);
249 char *w = lj_buf_more(sb, width > 1 ? width : 1);
250 if ((sf & STRFMT_F_LEFT)) *w++ = (char)c;
251 while (width-- > 1) *w++ = ' ';
252 if (!(sf & STRFMT_F_LEFT)) *w++ = (char)c;
253 sb->w = w;
254 return sb;
255}
256
257/* Add formatted string to buffer. */
258static SBuf *strfmt_putfstrlen(SBuf *sb, SFormat sf, const char *s, MSize len)
259{
260 MSize width = STRFMT_WIDTH(sf);
261 char *w;
262 if (len > STRFMT_PREC(sf)) len = STRFMT_PREC(sf);
263 w = lj_buf_more(sb, width > len ? width : len);
264 if ((sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len);
265 while (width-- > len) *w++ = ' ';
266 if (!(sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len);
267 sb->w = w;
268 return sb;
269}
270
271#if LJ_HASJIT
272SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str)
273{
274 return strfmt_putfstrlen(sb, sf, strdata(str), str->len);
275}
276#endif
277
278/* Add formatted signed/unsigned integer to buffer. */
279SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
280{
281 char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *w;
282#ifdef LUA_USE_ASSERT
283 char *ws;
284#endif
285 MSize prefix = 0, len, prec, pprec, width, need;
286
287 /* Figure out signed prefixes. */
288 if (STRFMT_TYPE(sf) == STRFMT_INT) {
289 if ((int64_t)k < 0) {
290 k = (uint64_t)-(int64_t)k;
291 prefix = 256 + '-';
292 } else if ((sf & STRFMT_F_PLUS)) {
293 prefix = 256 + '+';
294 } else if ((sf & STRFMT_F_SPACE)) {
295 prefix = 256 + ' ';
296 }
297 }
298
299 /* Convert number and store to fixed-size buffer in reverse order. */
300 prec = STRFMT_PREC(sf);
301 if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO;
302 if (k == 0) { /* Special-case zero argument. */
303 if (prec != 0 ||
304 (sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT))
305 *--q = '0';
306 } else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) { /* Decimal. */
307 uint32_t k2;
308 while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; }
309 k2 = (uint32_t)k;
310 do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2);
311 } else if ((sf & STRFMT_T_HEX)) { /* Hex. */
312 const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" :
313 "0123456789abcdef";
314 do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k);
315 if ((sf & STRFMT_F_ALT)) prefix = 512 + ((sf & STRFMT_F_UPPER) ? 'X' : 'x');
316 } else { /* Octal. */
317 do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k);
318 if ((sf & STRFMT_F_ALT)) *--q = '0';
319 }
320
321 /* Calculate sizes. */
322 len = (MSize)(buf + sizeof(buf) - q);
323 if ((int32_t)len >= (int32_t)prec) prec = len;
324 width = STRFMT_WIDTH(sf);
325 pprec = prec + (prefix >> 8);
326 need = width > pprec ? width : pprec;
327 w = lj_buf_more(sb, need);
328#ifdef LUA_USE_ASSERT
329 ws = w;
330#endif
331
332 /* Format number with leading/trailing whitespace and zeros. */
333 if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0)
334 while (width-- > pprec) *w++ = ' ';
335 if (prefix) {
336 if ((char)prefix >= 'X') *w++ = '0';
337 *w++ = (char)prefix;
338 }
339 if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO)
340 while (width-- > pprec) *w++ = '0';
341 while (prec-- > len) *w++ = '0';
342 while (q < buf + sizeof(buf)) *w++ = *q++; /* Add number itself. */
343 if ((sf & STRFMT_F_LEFT))
344 while (width-- > pprec) *w++ = ' ';
345
346 lj_assertX(need == (MSize)(w - ws), "miscalculated format size");
347 sb->w = w;
348 return sb;
349}
350
351/* Add number formatted as signed integer to buffer. */
352SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n)
353{
354 int64_t k = (int64_t)n;
355 if (checki32(k) && sf == STRFMT_INT)
356 return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */
357 else
358 return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
359}
360
361/* Add number formatted as unsigned integer to buffer. */
362SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n)
363{
364 int64_t k;
365 if (n >= 9223372036854775808.0)
366 k = (int64_t)(n - 18446744073709551616.0);
367 else
368 k = (int64_t)n;
369 return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
370}
371
372/* Format stack arguments to buffer. */
373int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry)
374{
375 int narg = (int)(L->top - L->base);
376 GCstr *fmt = lj_lib_checkstr(L, arg);
377 FormatState fs;
378 SFormat sf;
379 lj_strfmt_init(&fs, strdata(fmt), fmt->len);
380 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
381 if (sf == STRFMT_LIT) {
382 lj_buf_putmem(sb, fs.str, fs.len);
383 } else if (sf == STRFMT_ERR) {
384 lj_err_callerv(L, LJ_ERR_STRFMT,
385 strdata(lj_str_new(L, fs.str, fs.len)));
386 } else {
387 TValue *o = &L->base[arg++];
388 if (arg > narg)
389 lj_err_arg(L, arg, LJ_ERR_NOVAL);
390 switch (STRFMT_TYPE(sf)) {
391 case STRFMT_INT:
392 if (tvisint(o)) {
393 int32_t k = intV(o);
394 if (sf == STRFMT_INT)
395 lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */
396 else
397 lj_strfmt_putfxint(sb, sf, k);
398 break;
399 }
400#if LJ_HASFFI
401 if (tviscdata(o)) {
402 GCcdata *cd = cdataV(o);
403 if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) {
404 lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd));
405 break;
406 }
407 }
408#endif
409 lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg));
410 break;
411 case STRFMT_UINT:
412 if (tvisint(o)) {
413 lj_strfmt_putfxint(sb, sf, intV(o));
414 break;
415 }
416#if LJ_HASFFI
417 if (tviscdata(o)) {
418 GCcdata *cd = cdataV(o);
419 if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) {
420 lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd));
421 break;
422 }
423 }
424#endif
425 lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg));
426 break;
427 case STRFMT_NUM:
428 lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg));
429 break;
430 case STRFMT_STR: {
431 MSize len;
432 const char *s;
433 cTValue *mo;
434 if (LJ_UNLIKELY(!tvisstr(o) && !tvisbuf(o)) && retry >= 0 &&
435 !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
436 /* Call __tostring metamethod once. */
437 copyTV(L, L->top++, mo);
438 copyTV(L, L->top++, o);
439 lua_call(L, 1, 1);
440 o = &L->base[arg-1]; /* Stack may have been reallocated. */
441 copyTV(L, o, --L->top); /* Replace inline for retry. */
442 if (retry < 2) { /* Global buffer may have been overwritten. */
443 retry = 1;
444 break;
445 }
446 }
447 if (LJ_LIKELY(tvisstr(o))) {
448 len = strV(o)->len;
449 s = strVdata(o);
450#if LJ_HASBUFFER
451 } else if (tvisbuf(o)) {
452 SBufExt *sbx = bufV(o);
453 if (sbx == (SBufExt *)sb) lj_err_arg(L, arg+1, LJ_ERR_BUFFER_SELF);
454 len = sbufxlen(sbx);
455 s = sbx->r;
456#endif
457 } else {
458 GCstr *str = lj_strfmt_obj(L, o);
459 len = str->len;
460 s = strdata(str);
461 }
462 if ((sf & STRFMT_T_QUOTED))
463 strfmt_putquotedlen(sb, s, len); /* No formatting. */
464 else
465 strfmt_putfstrlen(sb, sf, s, len);
466 break;
467 }
468 case STRFMT_CHAR:
469 lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg));
470 break;
471 case STRFMT_PTR: /* No formatting. */
472 lj_strfmt_putptr(sb, lj_obj_ptr(G(L), o));
473 break;
474 default:
475 lj_assertL(0, "bad string format type");
476 break;
477 }
478 }
479 }
480 return retry;
481}
482
483/* -- Conversions to strings ---------------------------------------------- */
484
485/* Convert integer to string. */
486GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k)
487{
488 char buf[STRFMT_MAXBUF_INT];
489 MSize len = (MSize)(lj_strfmt_wint(buf, k) - buf);
490 return lj_str_new(L, buf, len);
491}
492
493/* Convert integer or number to string. */
494GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o)
495{
496 return tvisint(o) ? lj_strfmt_int(L, intV(o)) : lj_strfmt_num(L, o);
497}
498
499#if LJ_HASJIT
500/* Convert char value to string. */
501GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c)
502{
503 char buf[1];
504 buf[0] = c;
505 return lj_str_new(L, buf, 1);
506}
507#endif
508
509/* Raw conversion of object to string. */
510GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o)
511{
512 if (tvisstr(o)) {
513 return strV(o);
514 } else if (tvisnumber(o)) {
515 return lj_strfmt_number(L, o);
516 } else if (tvisnil(o)) {
517 return lj_str_newlit(L, "nil");
518 } else if (tvisfalse(o)) {
519 return lj_str_newlit(L, "false");
520 } else if (tvistrue(o)) {
521 return lj_str_newlit(L, "true");
522 } else {
523 char buf[8+2+2+16], *p = buf;
524 p = lj_buf_wmem(p, lj_typename(o), (MSize)strlen(lj_typename(o)));
525 *p++ = ':'; *p++ = ' ';
526 if (tvisfunc(o) && isffunc(funcV(o))) {
527 p = lj_buf_wmem(p, "builtin#", 8);
528 p = lj_strfmt_wint(p, funcV(o)->c.ffid);
529 } else {
530 p = lj_strfmt_wptr(p, lj_obj_ptr(G(L), o));
531 }
532 return lj_str_new(L, buf, (size_t)(p - buf));
533 }
534}
535
536/* -- Internal string formatting ------------------------------------------ */
537
538/*
539** These functions are only used for lua_pushfstring(), lua_pushvfstring()
540** and for internal string formatting (e.g. error messages). Caveat: unlike
541** string.format(), only a limited subset of formats and flags are supported!
542**
543** LuaJIT has support for a couple more formats than Lua 5.1/5.2:
544** - %d %u %o %x with full formatting, 32 bit integers only.
545** - %f and other FP formats are really %.14g.
546** - %s %c %p without formatting.
547*/
548
549/* Push formatted message as a string object to Lua stack. va_list variant. */
550const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp)
551{
552 SBuf *sb = lj_buf_tmp_(L);
553 FormatState fs;
554 SFormat sf;
555 GCstr *str;
556 lj_strfmt_init(&fs, fmt, (MSize)strlen(fmt));
557 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
558 switch (STRFMT_TYPE(sf)) {
559 case STRFMT_LIT:
560 lj_buf_putmem(sb, fs.str, fs.len);
561 break;
562 case STRFMT_INT:
563 lj_strfmt_putfxint(sb, sf, va_arg(argp, int32_t));
564 break;
565 case STRFMT_UINT:
566 lj_strfmt_putfxint(sb, sf, va_arg(argp, uint32_t));
567 break;
568 case STRFMT_NUM:
569 lj_strfmt_putfnum(sb, STRFMT_G14, va_arg(argp, lua_Number));
570 break;
571 case STRFMT_STR: {
572 const char *s = va_arg(argp, char *);
573 if (s == NULL) s = "(null)";
574 lj_buf_putmem(sb, s, (MSize)strlen(s));
575 break;
576 }
577 case STRFMT_CHAR:
578 lj_buf_putb(sb, va_arg(argp, int));
579 break;
580 case STRFMT_PTR:
581 lj_strfmt_putptr(sb, va_arg(argp, void *));
582 break;
583 case STRFMT_ERR:
584 default:
585 lj_buf_putb(sb, '?');
586 lj_assertL(0, "bad string format near offset %d", fs.len);
587 break;
588 }
589 }
590 str = lj_buf_str(L, sb);
591 setstrV(L, L->top, str);
592 incr_top(L);
593 return strdata(str);
594}
595
596/* Push formatted message as a string object to Lua stack. Vararg variant. */
597const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
598{
599 const char *msg;
600 va_list argp;
601 va_start(argp, fmt);
602 msg = lj_strfmt_pushvf(L, fmt, argp);
603 va_end(argp);
604 return msg;
605}
606
diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h
new file mode 100644
index 00000000..cb2c7360
--- /dev/null
+++ b/src/lj_strfmt.h
@@ -0,0 +1,131 @@
1/*
2** String formatting.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_STRFMT_H
7#define _LJ_STRFMT_H
8
9#include "lj_obj.h"
10
11typedef uint32_t SFormat; /* Format indicator. */
12
13/* Format parser state. */
14typedef struct FormatState {
15 const uint8_t *p; /* Current format string pointer. */
16 const uint8_t *e; /* End of format string. */
17 const char *str; /* Returned literal string. */
18 MSize len; /* Size of literal string. */
19} FormatState;
20
21/* Format types (max. 16). */
22typedef enum FormatType {
23 STRFMT_EOF, STRFMT_ERR, STRFMT_LIT,
24 STRFMT_INT, STRFMT_UINT, STRFMT_NUM, STRFMT_STR, STRFMT_CHAR, STRFMT_PTR
25} FormatType;
26
27/* Format subtypes (bits are reused). */
28#define STRFMT_T_HEX 0x0010 /* STRFMT_UINT */
29#define STRFMT_T_OCT 0x0020 /* STRFMT_UINT */
30#define STRFMT_T_FP_A 0x0000 /* STRFMT_NUM */
31#define STRFMT_T_FP_E 0x0010 /* STRFMT_NUM */
32#define STRFMT_T_FP_F 0x0020 /* STRFMT_NUM */
33#define STRFMT_T_FP_G 0x0030 /* STRFMT_NUM */
34#define STRFMT_T_QUOTED 0x0010 /* STRFMT_STR */
35
36/* Format flags. */
37#define STRFMT_F_LEFT 0x0100
38#define STRFMT_F_PLUS 0x0200
39#define STRFMT_F_ZERO 0x0400
40#define STRFMT_F_SPACE 0x0800
41#define STRFMT_F_ALT 0x1000
42#define STRFMT_F_UPPER 0x2000
43
44/* Format indicator fields. */
45#define STRFMT_SH_WIDTH 16
46#define STRFMT_SH_PREC 24
47
48#define STRFMT_TYPE(sf) ((FormatType)((sf) & 15))
49#define STRFMT_WIDTH(sf) (((sf) >> STRFMT_SH_WIDTH) & 255u)
50#define STRFMT_PREC(sf) ((((sf) >> STRFMT_SH_PREC) & 255u) - 1u)
51#define STRFMT_FP(sf) (((sf) >> 4) & 3)
52
53/* Formats for conversion characters. */
54#define STRFMT_A (STRFMT_NUM|STRFMT_T_FP_A)
55#define STRFMT_C (STRFMT_CHAR)
56#define STRFMT_D (STRFMT_INT)
57#define STRFMT_E (STRFMT_NUM|STRFMT_T_FP_E)
58#define STRFMT_F (STRFMT_NUM|STRFMT_T_FP_F)
59#define STRFMT_G (STRFMT_NUM|STRFMT_T_FP_G)
60#define STRFMT_I STRFMT_D
61#define STRFMT_O (STRFMT_UINT|STRFMT_T_OCT)
62#define STRFMT_P (STRFMT_PTR)
63#define STRFMT_Q (STRFMT_STR|STRFMT_T_QUOTED)
64#define STRFMT_S (STRFMT_STR)
65#define STRFMT_U (STRFMT_UINT)
66#define STRFMT_X (STRFMT_UINT|STRFMT_T_HEX)
67#define STRFMT_G14 (STRFMT_G | ((14+1) << STRFMT_SH_PREC))
68
69/* Maximum buffer sizes for conversions. */
70#define STRFMT_MAXBUF_XINT (1+22) /* '0' prefix + uint64_t in octal. */
71#define STRFMT_MAXBUF_INT (1+10) /* Sign + int32_t in decimal. */
72#define STRFMT_MAXBUF_NUM 32 /* Must correspond with STRFMT_G14. */
73#define STRFMT_MAXBUF_PTR (2+2*sizeof(ptrdiff_t)) /* "0x" + hex ptr. */
74
75/* Format parser. */
76LJ_FUNC SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs);
77
78static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len)
79{
80 fs->p = (const uint8_t *)p;
81 fs->e = (const uint8_t *)p + len;
82 /* Must be NUL-terminated. May have NULs inside, too. */
83 lj_assertX(*fs->e == 0, "format not NUL-terminated");
84}
85
86/* Raw conversions. */
87LJ_FUNC char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k);
88LJ_FUNC char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v);
89LJ_FUNC char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v);
90LJ_FUNC const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp);
91
92/* Unformatted conversions to buffer. */
93LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k);
94#if LJ_HASJIT
95LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o);
96#endif
97LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v);
98#if LJ_HASJIT
99LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str);
100#endif
101
102/* Formatted conversions to buffer. */
103LJ_FUNC SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k);
104LJ_FUNC SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n);
105LJ_FUNC SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n);
106LJ_FUNC SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat, lua_Number n);
107LJ_FUNC SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat, int32_t c);
108#if LJ_HASJIT
109LJ_FUNC SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat, GCstr *str);
110#endif
111LJ_FUNC int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry);
112
113/* Conversions to strings. */
114LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k);
115LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o);
116LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o);
117#if LJ_HASJIT
118LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c);
119#endif
120LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o);
121
122/* Internal string formatting. */
123LJ_FUNC const char *lj_strfmt_pushvf(lua_State *L, const char *fmt,
124 va_list argp);
125LJ_FUNC const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
126#if defined(__GNUC__) || defined(__clang__)
127 __attribute__ ((format (printf, 2, 3)))
128#endif
129 ;
130
131#endif
diff --git a/src/lj_strfmt_num.c b/src/lj_strfmt_num.c
new file mode 100644
index 00000000..dfd56bd4
--- /dev/null
+++ b/src/lj_strfmt_num.c
@@ -0,0 +1,592 @@
1/*
2** String formatting for floating-point numbers.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4** Contributed by Peter Cawley.
5*/
6
7#include <stdio.h>
8
9#define lj_strfmt_num_c
10#define LUA_CORE
11
12#include "lj_obj.h"
13#include "lj_buf.h"
14#include "lj_str.h"
15#include "lj_strfmt.h"
16
17/* -- Precomputed tables -------------------------------------------------- */
18
19/* Rescale factors to push the exponent of a number towards zero. */
20#define RESCALE_EXPONENTS(P, N) \
21 P(308), P(289), P(270), P(250), P(231), P(212), P(193), P(173), P(154), \
22 P(135), P(115), P(96), P(77), P(58), P(38), P(0), P(0), P(0), N(39), N(58), \
23 N(77), N(96), N(116), N(135), N(154), N(174), N(193), N(212), N(231), \
24 N(251), N(270), N(289)
25
26#define ONE_E_P(X) 1e+0 ## X
27#define ONE_E_N(X) 1e-0 ## X
28static const int16_t rescale_e[] = { RESCALE_EXPONENTS(-, +) };
29static const double rescale_n[] = { RESCALE_EXPONENTS(ONE_E_P, ONE_E_N) };
30#undef ONE_E_N
31#undef ONE_E_P
32
33/*
34** For p in range -70 through 57, this table encodes pairs (m, e) such that
35** 4*2^p <= (uint8_t)m*10^e, and is the smallest value for which this holds.
36*/
37static const int8_t four_ulp_m_e[] = {
38 34, -21, 68, -21, 14, -20, 28, -20, 55, -20, 2, -19, 3, -19, 5, -19, 9, -19,
39 -82, -18, 35, -18, 7, -17, -117, -17, 28, -17, 56, -17, 112, -16, -33, -16,
40 45, -16, 89, -16, -78, -15, 36, -15, 72, -15, -113, -14, 29, -14, 57, -14,
41 114, -13, -28, -13, 46, -13, 91, -12, -74, -12, 37, -12, 73, -12, 15, -11, 3,
42 -11, 59, -11, 2, -10, 3, -10, 5, -10, 1, -9, -69, -9, 38, -9, 75, -9, 15, -7,
43 3, -7, 6, -7, 12, -6, -17, -7, 48, -7, 96, -7, -65, -6, 39, -6, 77, -6, -103,
44 -5, 31, -5, 62, -5, 123, -4, -11, -4, 49, -4, 98, -4, -60, -3, 4, -2, 79, -3,
45 16, -2, 32, -2, 63, -2, 2, -1, 25, 0, 5, 1, 1, 2, 2, 2, 4, 2, 8, 2, 16, 2,
46 32, 2, 64, 2, -128, 2, 26, 2, 52, 2, 103, 3, -51, 3, 41, 4, 82, 4, -92, 4,
47 33, 4, 66, 4, -124, 5, 27, 5, 53, 5, 105, 6, 21, 6, 42, 6, 84, 6, 17, 7, 34,
48 7, 68, 7, 2, 8, 3, 8, 6, 8, 108, 9, -41, 9, 43, 10, 86, 9, -84, 10, 35, 10,
49 69, 10, -118, 11, 28, 11, 55, 12, 11, 13, 22, 13, 44, 13, 88, 13, -80, 13,
50 36, 13, 71, 13, -115, 14, 29, 14, 57, 14, 113, 15, -30, 15, 46, 15, 91, 15,
51 19, 16, 37, 16, 73, 16, 2, 17, 3, 17, 6, 17
52};
53
54/* min(2^32-1, 10^e-1) for e in range 0 through 10 */
55static uint32_t ndigits_dec_threshold[] = {
56 0, 9U, 99U, 999U, 9999U, 99999U, 999999U,
57 9999999U, 99999999U, 999999999U, 0xffffffffU
58};
59
60/* -- Helper functions ---------------------------------------------------- */
61
62/* Compute the number of digits in the decimal representation of x. */
63static MSize ndigits_dec(uint32_t x)
64{
65 MSize t = ((lj_fls(x | 1) * 77) >> 8) + 1; /* 2^8/77 is roughly log2(10) */
66 return t + (x > ndigits_dec_threshold[t]);
67}
68
69#define WINT_R(x, sh, sc) \
70 { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); }
71
72/* Write 9-digit unsigned integer to buffer. */
73static char *lj_strfmt_wuint9(char *p, uint32_t u)
74{
75 uint32_t v = u / 10000, w;
76 u -= v * 10000;
77 w = v / 10000;
78 v -= w * 10000;
79 *p++ = (char)('0'+w);
80 WINT_R(v, 23, 1000)
81 WINT_R(v, 12, 100)
82 WINT_R(v, 10, 10)
83 *p++ = (char)('0'+v);
84 WINT_R(u, 23, 1000)
85 WINT_R(u, 12, 100)
86 WINT_R(u, 10, 10)
87 *p++ = (char)('0'+u);
88 return p;
89}
90#undef WINT_R
91
92/* -- Extended precision arithmetic --------------------------------------- */
93
94/*
95** The "nd" format is a fixed-precision decimal representation for numbers. It
96** consists of up to 64 uint32_t values, with each uint32_t storing a value
97** in the range [0, 1e9). A number in "nd" format consists of three variables:
98**
99** uint32_t nd[64];
100** uint32_t ndlo;
101** uint32_t ndhi;
102**
103** The integral part of the number is stored in nd[0 ... ndhi], the value of
104** which is sum{i in [0, ndhi] | nd[i] * 10^(9*i)}. If the fractional part of
105** the number is zero, ndlo is zero. Otherwise, the fractional part is stored
106** in nd[ndlo ... 63], the value of which is taken to be
107** sum{i in [ndlo, 63] | nd[i] * 10^(9*(i-64))}.
108**
109** If the array part had 128 elements rather than 64, then every double would
110** have an exact representation in "nd" format. With 64 elements, all integral
111** doubles have an exact representation, and all non-integral doubles have
112** enough digits to make both %.99e and %.99f do the right thing.
113*/
114
115#if LJ_64
116#define ND_MUL2K_MAX_SHIFT 29
117#define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) / 1000000000))
118#else
119#define ND_MUL2K_MAX_SHIFT 11
120#define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) >> 9) / 1953125)
121#endif
122
123/* Multiply nd by 2^k and add carry_in (ndlo is assumed to be zero). */
124static uint32_t nd_mul2k(uint32_t* nd, uint32_t ndhi, uint32_t k,
125 uint32_t carry_in, SFormat sf)
126{
127 uint32_t i, ndlo = 0, start = 1;
128 /* Performance hacks. */
129 if (k > ND_MUL2K_MAX_SHIFT*2 && STRFMT_FP(sf) != STRFMT_FP(STRFMT_T_FP_F)) {
130 start = ndhi - (STRFMT_PREC(sf) + 17) / 8;
131 }
132 /* Real logic. */
133 while (k >= ND_MUL2K_MAX_SHIFT) {
134 for (i = ndlo; i <= ndhi; i++) {
135 uint64_t val = ((uint64_t)nd[i] << ND_MUL2K_MAX_SHIFT) | carry_in;
136 carry_in = ND_MUL2K_DIV1E9(val);
137 nd[i] = (uint32_t)val - carry_in * 1000000000;
138 }
139 if (carry_in) {
140 nd[++ndhi] = carry_in; carry_in = 0;
141 if (start++ == ndlo) ++ndlo;
142 }
143 k -= ND_MUL2K_MAX_SHIFT;
144 }
145 if (k) {
146 for (i = ndlo; i <= ndhi; i++) {
147 uint64_t val = ((uint64_t)nd[i] << k) | carry_in;
148 carry_in = ND_MUL2K_DIV1E9(val);
149 nd[i] = (uint32_t)val - carry_in * 1000000000;
150 }
151 if (carry_in) nd[++ndhi] = carry_in;
152 }
153 return ndhi;
154}
155
156/* Divide nd by 2^k (ndlo is assumed to be zero). */
157static uint32_t nd_div2k(uint32_t* nd, uint32_t ndhi, uint32_t k, SFormat sf)
158{
159 uint32_t ndlo = 0, stop1 = ~0, stop2 = ~0;
160 /* Performance hacks. */
161 if (!ndhi) {
162 if (!nd[0]) {
163 return 0;
164 } else {
165 uint32_t s = lj_ffs(nd[0]);
166 if (s >= k) { nd[0] >>= k; return 0; }
167 nd[0] >>= s; k -= s;
168 }
169 }
170 if (k > 18) {
171 if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_F)) {
172 stop1 = 63 - (int32_t)STRFMT_PREC(sf) / 9;
173 } else {
174 int32_t floorlog2 = ndhi * 29 + lj_fls(nd[ndhi]) - k;
175 int32_t floorlog10 = (int32_t)(floorlog2 * 0.30102999566398114);
176 stop1 = 62 + (floorlog10 - (int32_t)STRFMT_PREC(sf)) / 9;
177 stop2 = 61 + ndhi - (int32_t)STRFMT_PREC(sf) / 8;
178 }
179 }
180 /* Real logic. */
181 while (k >= 9) {
182 uint32_t i = ndhi, carry = 0;
183 for (;;) {
184 uint32_t val = nd[i];
185 nd[i] = (val >> 9) + carry;
186 carry = (val & 0x1ff) * 1953125;
187 if (i == ndlo) break;
188 i = (i - 1) & 0x3f;
189 }
190 if (ndlo != stop1 && ndlo != stop2) {
191 if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; }
192 if (!nd[ndhi]) { ndhi = (ndhi - 1) & 0x3f; stop2--; }
193 } else if (!nd[ndhi]) {
194 if (ndhi != ndlo) { ndhi = (ndhi - 1) & 0x3f; stop2--; }
195 else return ndlo;
196 }
197 k -= 9;
198 }
199 if (k) {
200 uint32_t mask = (1U << k) - 1, mul = 1000000000 >> k, i = ndhi, carry = 0;
201 for (;;) {
202 uint32_t val = nd[i];
203 nd[i] = (val >> k) + carry;
204 carry = (val & mask) * mul;
205 if (i == ndlo) break;
206 i = (i - 1) & 0x3f;
207 }
208 if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; }
209 }
210 return ndlo;
211}
212
213/* Add m*10^e to nd (assumes ndlo <= e/9 <= ndhi and 0 <= m <= 9). */
214static uint32_t nd_add_m10e(uint32_t* nd, uint32_t ndhi, uint8_t m, int32_t e)
215{
216 uint32_t i, carry;
217 if (e >= 0) {
218 i = (uint32_t)e/9;
219 carry = m * (ndigits_dec_threshold[e - (int32_t)i*9] + 1);
220 } else {
221 int32_t f = (e-8)/9;
222 i = (uint32_t)(64 + f);
223 carry = m * (ndigits_dec_threshold[e - f*9] + 1);
224 }
225 for (;;) {
226 uint32_t val = nd[i] + carry;
227 if (LJ_UNLIKELY(val >= 1000000000)) {
228 val -= 1000000000;
229 nd[i] = val;
230 if (LJ_UNLIKELY(i == ndhi)) {
231 ndhi = (ndhi + 1) & 0x3f;
232 nd[ndhi] = 1;
233 break;
234 }
235 carry = 1;
236 i = (i + 1) & 0x3f;
237 } else {
238 nd[i] = val;
239 break;
240 }
241 }
242 return ndhi;
243}
244
245/* Test whether two "nd" values are equal in their most significant digits. */
246static int nd_similar(uint32_t* nd, uint32_t ndhi, uint32_t* ref, MSize hilen,
247 MSize prec)
248{
249 char nd9[9], ref9[9];
250 if (hilen <= prec) {
251 if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0;
252 prec -= hilen; ref--; ndhi = (ndhi - 1) & 0x3f;
253 if (prec >= 9) {
254 if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0;
255 prec -= 9; ref--; ndhi = (ndhi - 1) & 0x3f;
256 }
257 } else {
258 prec -= hilen - 9;
259 }
260 lj_assertX(prec < 9, "bad precision %d", prec);
261 lj_strfmt_wuint9(nd9, nd[ndhi]);
262 lj_strfmt_wuint9(ref9, *ref);
263 return !memcmp(nd9, ref9, prec) && (nd9[prec] < '5') == (ref9[prec] < '5');
264}
265
266/* -- Formatted conversions to buffer ------------------------------------- */
267
268/* Write formatted floating-point number to either sb or p. */
269static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char *p)
270{
271 MSize width = STRFMT_WIDTH(sf), prec = STRFMT_PREC(sf), len;
272 TValue t;
273 t.n = n;
274 if (LJ_UNLIKELY((t.u32.hi << 1) >= 0xffe00000)) {
275 /* Handle non-finite values uniformly for %a, %e, %f, %g. */
276 int prefix = 0, ch = (sf & STRFMT_F_UPPER) ? 0x202020 : 0;
277 if (((t.u32.hi & 0x000fffff) | t.u32.lo) != 0) {
278 ch ^= ('n' << 16) | ('a' << 8) | 'n';
279 if ((sf & STRFMT_F_SPACE)) prefix = ' ';
280 } else {
281 ch ^= ('i' << 16) | ('n' << 8) | 'f';
282 if ((t.u32.hi & 0x80000000)) prefix = '-';
283 else if ((sf & STRFMT_F_PLUS)) prefix = '+';
284 else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
285 }
286 len = 3 + (prefix != 0);
287 if (!p) p = lj_buf_more(sb, width > len ? width : len);
288 if (!(sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
289 if (prefix) *p++ = prefix;
290 *p++ = (char)(ch >> 16); *p++ = (char)(ch >> 8); *p++ = (char)ch;
291 } else if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_A)) {
292 /* %a */
293 const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEFPX"
294 : "0123456789abcdefpx";
295 int32_t e = (t.u32.hi >> 20) & 0x7ff;
296 char prefix = 0, eprefix = '+';
297 if (t.u32.hi & 0x80000000) prefix = '-';
298 else if ((sf & STRFMT_F_PLUS)) prefix = '+';
299 else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
300 t.u32.hi &= 0xfffff;
301 if (e) {
302 t.u32.hi |= 0x100000;
303 e -= 1023;
304 } else if (t.u32.lo | t.u32.hi) {
305 /* Non-zero denormal - normalise it. */
306 uint32_t shift = t.u32.hi ? 20-lj_fls(t.u32.hi) : 52-lj_fls(t.u32.lo);
307 e = -1022 - shift;
308 t.u64 <<= shift;
309 }
310 /* abs(n) == t.u64 * 2^(e - 52) */
311 /* If n != 0, bit 52 of t.u64 is set, and is the highest set bit. */
312 if ((int32_t)prec < 0) {
313 /* Default precision: use smallest precision giving exact result. */
314 prec = t.u32.lo ? 13-lj_ffs(t.u32.lo)/4 : 5-lj_ffs(t.u32.hi|0x100000)/4;
315 } else if (prec < 13) {
316 /* Precision is sufficiently low as to maybe require rounding. */
317 t.u64 += (((uint64_t)1) << (51 - prec*4));
318 }
319 if (e < 0) {
320 eprefix = '-';
321 e = -e;
322 }
323 len = 5 + ndigits_dec((uint32_t)e) + prec + (prefix != 0)
324 + ((prec | (sf & STRFMT_F_ALT)) != 0);
325 if (!p) p = lj_buf_more(sb, width > len ? width : len);
326 if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) {
327 while (width-- > len) *p++ = ' ';
328 }
329 if (prefix) *p++ = prefix;
330 *p++ = '0';
331 *p++ = hexdig[17]; /* x or X */
332 if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) {
333 while (width-- > len) *p++ = '0';
334 }
335 *p++ = '0' + (t.u32.hi >> 20); /* Usually '1', sometimes '0' or '2'. */
336 if ((prec | (sf & STRFMT_F_ALT))) {
337 /* Emit fractional part. */
338 char *q = p + 1 + prec;
339 *p = '.';
340 if (prec < 13) t.u64 >>= (52 - prec*4);
341 else while (prec > 13) p[prec--] = '0';
342 while (prec) { p[prec--] = hexdig[t.u64 & 15]; t.u64 >>= 4; }
343 p = q;
344 }
345 *p++ = hexdig[16]; /* p or P */
346 *p++ = eprefix; /* + or - */
347 p = lj_strfmt_wint(p, e);
348 } else {
349 /* %e or %f or %g - begin by converting n to "nd" format. */
350 uint32_t nd[64];
351 uint32_t ndhi = 0, ndlo, i;
352 int32_t e = (t.u32.hi >> 20) & 0x7ff, ndebias = 0;
353 char prefix = 0, *q;
354 if (t.u32.hi & 0x80000000) prefix = '-';
355 else if ((sf & STRFMT_F_PLUS)) prefix = '+';
356 else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
357 prec += ((int32_t)prec >> 31) & 7; /* Default precision is 6. */
358 if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_G)) {
359 /* %g - decrement precision if non-zero (to make it like %e). */
360 prec--;
361 prec ^= (uint32_t)((int32_t)prec >> 31);
362 }
363 if ((sf & STRFMT_T_FP_E) && prec < 14 && n != 0) {
364 /* Precision is sufficiently low that rescaling will probably work. */
365 if ((ndebias = rescale_e[e >> 6])) {
366 t.n = n * rescale_n[e >> 6];
367 if (LJ_UNLIKELY(!e)) t.n *= 1e10, ndebias -= 10;
368 t.u64 -= 2; /* Convert 2ulp below (later we convert 2ulp above). */
369 nd[0] = 0x100000 | (t.u32.hi & 0xfffff);
370 e = ((t.u32.hi >> 20) & 0x7ff) - 1075 - (ND_MUL2K_MAX_SHIFT < 29);
371 goto load_t_lo; rescale_failed:
372 t.n = n;
373 e = (t.u32.hi >> 20) & 0x7ff;
374 ndebias = ndhi = 0;
375 }
376 }
377 nd[0] = t.u32.hi & 0xfffff;
378 if (e == 0) e++; else nd[0] |= 0x100000;
379 e -= 1043;
380 if (t.u32.lo) {
381 e -= 32 + (ND_MUL2K_MAX_SHIFT < 29); load_t_lo:
382#if ND_MUL2K_MAX_SHIFT >= 29
383 nd[0] = (nd[0] << 3) | (t.u32.lo >> 29);
384 ndhi = nd_mul2k(nd, ndhi, 29, t.u32.lo & 0x1fffffff, sf);
385#elif ND_MUL2K_MAX_SHIFT >= 11
386 ndhi = nd_mul2k(nd, ndhi, 11, t.u32.lo >> 21, sf);
387 ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo >> 10) & 0x7ff, sf);
388 ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo << 1) & 0x7ff, sf);
389#else
390#error "ND_MUL2K_MAX_SHIFT too small"
391#endif
392 }
393 if (e >= 0) {
394 ndhi = nd_mul2k(nd, ndhi, (uint32_t)e, 0, sf);
395 ndlo = 0;
396 } else {
397 ndlo = nd_div2k(nd, ndhi, (uint32_t)-e, sf);
398 if (ndhi && !nd[ndhi]) ndhi--;
399 }
400 /* abs(n) == nd * 10^ndebias (for slightly loose interpretation of ==) */
401 if ((sf & STRFMT_T_FP_E)) {
402 /* %e or %g - assume %e and start by calculating nd's exponent (nde). */
403 char eprefix = '+';
404 int32_t nde = -1;
405 MSize hilen;
406 if (ndlo && !nd[ndhi]) {
407 ndhi = 64; do {} while (!nd[--ndhi]);
408 nde -= 64 * 9;
409 }
410 hilen = ndigits_dec(nd[ndhi]);
411 nde += ndhi * 9 + hilen;
412 if (ndebias) {
413 /*
414 ** Rescaling was performed, but this introduced some error, and might
415 ** have pushed us across a rounding boundary. We check whether this
416 ** error affected the result by introducing even more error (2ulp in
417 ** either direction), and seeing whether a rounding boundary was
418 ** crossed. Having already converted the -2ulp case, we save off its
419 ** most significant digits, convert the +2ulp case, and compare them.
420 */
421 int32_t eidx = e + 70 + (ND_MUL2K_MAX_SHIFT < 29)
422 + (t.u32.lo >= 0xfffffffe && !(~t.u32.hi << 12));
423 const int8_t *m_e = four_ulp_m_e + eidx * 2;
424 lj_assertG_(G(sbufL(sb)), 0 <= eidx && eidx < 128, "bad eidx %d", eidx);
425 nd[33] = nd[ndhi];
426 nd[32] = nd[(ndhi - 1) & 0x3f];
427 nd[31] = nd[(ndhi - 2) & 0x3f];
428 nd_add_m10e(nd, ndhi, (uint8_t)*m_e, m_e[1]);
429 if (LJ_UNLIKELY(!nd_similar(nd, ndhi, nd + 33, hilen, prec + 1))) {
430 goto rescale_failed;
431 }
432 }
433 if ((int32_t)(prec - nde) < (0x3f & -(int32_t)ndlo) * 9) {
434 /* Precision is sufficiently low as to maybe require rounding. */
435 ndhi = nd_add_m10e(nd, ndhi, 5, nde - prec - 1);
436 nde += (hilen != ndigits_dec(nd[ndhi]));
437 }
438 nde += ndebias;
439 if ((sf & STRFMT_T_FP_F)) {
440 /* %g */
441 if ((int32_t)prec >= nde && nde >= -4) {
442 if (nde < 0) ndhi = 0;
443 prec -= nde;
444 goto g_format_like_f;
445 } else if (!(sf & STRFMT_F_ALT) && prec && width > 5) {
446 /* Decrease precision in order to strip trailing zeroes. */
447 char tail[9];
448 uint32_t maxprec = hilen - 1 + ((ndhi - ndlo) & 0x3f) * 9;
449 if (prec >= maxprec) prec = maxprec;
450 else ndlo = (ndhi - (((int32_t)(prec - hilen) + 9) / 9)) & 0x3f;
451 i = prec - hilen - (((ndhi - ndlo) & 0x3f) * 9) + 10;
452 lj_strfmt_wuint9(tail, nd[ndlo]);
453 while (prec && tail[--i] == '0') {
454 prec--;
455 if (!i) {
456 if (ndlo == ndhi) { prec = 0; break; }
457 lj_strfmt_wuint9(tail, nd[++ndlo]);
458 i = 9;
459 }
460 }
461 }
462 }
463 if (nde < 0) {
464 /* Make nde non-negative. */
465 eprefix = '-';
466 nde = -nde;
467 }
468 len = 3 + prec + (prefix != 0) + ndigits_dec((uint32_t)nde) + (nde < 10)
469 + ((prec | (sf & STRFMT_F_ALT)) != 0);
470 if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 5);
471 if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) {
472 while (width-- > len) *p++ = ' ';
473 }
474 if (prefix) *p++ = prefix;
475 if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) {
476 while (width-- > len) *p++ = '0';
477 }
478 q = lj_strfmt_wint(p + 1, nd[ndhi]);
479 p[0] = p[1]; /* Put leading digit in the correct place. */
480 if ((prec | (sf & STRFMT_F_ALT))) {
481 /* Emit fractional part. */
482 p[1] = '.'; p += 2;
483 prec -= (MSize)(q - p); p = q; /* Account for digits already emitted. */
484 /* Then emit chunks of 9 digits (this may emit 8 digits too many). */
485 for (i = ndhi; (int32_t)prec > 0 && i != ndlo; prec -= 9) {
486 i = (i - 1) & 0x3f;
487 p = lj_strfmt_wuint9(p, nd[i]);
488 }
489 if ((sf & STRFMT_T_FP_F) && !(sf & STRFMT_F_ALT)) {
490 /* %g (and not %#g) - strip trailing zeroes. */
491 p += (int32_t)prec & ((int32_t)prec >> 31);
492 while (p[-1] == '0') p--;
493 if (p[-1] == '.') p--;
494 } else {
495 /* %e (or %#g) - emit trailing zeroes. */
496 while ((int32_t)prec > 0) { *p++ = '0'; prec--; }
497 p += (int32_t)prec;
498 }
499 } else {
500 p++;
501 }
502 *p++ = (sf & STRFMT_F_UPPER) ? 'E' : 'e';
503 *p++ = eprefix; /* + or - */
504 if (nde < 10) *p++ = '0'; /* Always at least two digits of exponent. */
505 p = lj_strfmt_wint(p, nde);
506 } else {
507 /* %f (or, shortly, %g in %f style) */
508 if (prec < (MSize)(0x3f & -(int32_t)ndlo) * 9) {
509 /* Precision is sufficiently low as to maybe require rounding. */
510 ndhi = nd_add_m10e(nd, ndhi, 5, 0 - prec - 1);
511 }
512 g_format_like_f:
513 if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT) && prec && width) {
514 /* Decrease precision in order to strip trailing zeroes. */
515 if (ndlo) {
516 /* nd has a fractional part; we need to look at its digits. */
517 char tail[9];
518 uint32_t maxprec = (64 - ndlo) * 9;
519 if (prec >= maxprec) prec = maxprec;
520 else ndlo = 64 - (prec + 8) / 9;
521 i = prec - ((63 - ndlo) * 9);
522 lj_strfmt_wuint9(tail, nd[ndlo]);
523 while (prec && tail[--i] == '0') {
524 prec--;
525 if (!i) {
526 if (ndlo == 63) { prec = 0; break; }
527 lj_strfmt_wuint9(tail, nd[++ndlo]);
528 i = 9;
529 }
530 }
531 } else {
532 /* nd has no fractional part, so precision goes straight to zero. */
533 prec = 0;
534 }
535 }
536 len = ndhi * 9 + ndigits_dec(nd[ndhi]) + prec + (prefix != 0)
537 + ((prec | (sf & STRFMT_F_ALT)) != 0);
538 if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 8);
539 if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) {
540 while (width-- > len) *p++ = ' ';
541 }
542 if (prefix) *p++ = prefix;
543 if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) {
544 while (width-- > len) *p++ = '0';
545 }
546 /* Emit integer part. */
547 p = lj_strfmt_wint(p, nd[ndhi]);
548 i = ndhi;
549 while (i) p = lj_strfmt_wuint9(p, nd[--i]);
550 if ((prec | (sf & STRFMT_F_ALT))) {
551 /* Emit fractional part. */
552 *p++ = '.';
553 /* Emit chunks of 9 digits (this may emit 8 digits too many). */
554 while ((int32_t)prec > 0 && i != ndlo) {
555 i = (i - 1) & 0x3f;
556 p = lj_strfmt_wuint9(p, nd[i]);
557 prec -= 9;
558 }
559 if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT)) {
560 /* %g (and not %#g) - strip trailing zeroes. */
561 p += (int32_t)prec & ((int32_t)prec >> 31);
562 while (p[-1] == '0') p--;
563 if (p[-1] == '.') p--;
564 } else {
565 /* %f (or %#g) - emit trailing zeroes. */
566 while ((int32_t)prec > 0) { *p++ = '0'; prec--; }
567 p += (int32_t)prec;
568 }
569 }
570 }
571 }
572 if ((sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
573 return p;
574}
575
576/* Add formatted floating-point number to buffer. */
577SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n)
578{
579 sb->w = lj_strfmt_wfnum(sb, sf, n, NULL);
580 return sb;
581}
582
583/* -- Conversions to strings ---------------------------------------------- */
584
585/* Convert number to string. */
586GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o)
587{
588 char buf[STRFMT_MAXBUF_NUM];
589 MSize len = (MSize)(lj_strfmt_wfnum(NULL, STRFMT_G14, o->n, buf) - buf);
590 return lj_str_new(L, buf, len);
591}
592
diff --git a/src/lj_strscan.c b/src/lj_strscan.c
index ccf74860..f681fbb0 100644
--- a/src/lj_strscan.c
+++ b/src/lj_strscan.c
@@ -79,7 +79,7 @@ static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t neg)
79 /* Avoid double rounding for denormals. */ 79 /* Avoid double rounding for denormals. */
80 if (LJ_UNLIKELY(ex2 <= -1075 && x != 0)) { 80 if (LJ_UNLIKELY(ex2 <= -1075 && x != 0)) {
81 /* NYI: all of this generates way too much code on 32 bit CPUs. */ 81 /* NYI: all of this generates way too much code on 32 bit CPUs. */
82#if defined(__GNUC__) && LJ_64 82#if (defined(__GNUC__) || defined(__clang__)) && LJ_64
83 int32_t b = (int32_t)(__builtin_clzll(x)^63); 83 int32_t b = (int32_t)(__builtin_clzll(x)^63);
84#else 84#else
85 int32_t b = (x>>32) ? 32+(int32_t)lj_fls((uint32_t)(x>>32)) : 85 int32_t b = (x>>32) ? 32+(int32_t)lj_fls((uint32_t)(x>>32)) :
@@ -93,7 +93,7 @@ static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t neg)
93 } 93 }
94 94
95 /* Convert to double using a signed int64_t conversion, then rescale. */ 95 /* Convert to double using a signed int64_t conversion, then rescale. */
96 lua_assert((int64_t)x >= 0); 96 lj_assertX((int64_t)x >= 0, "bad double conversion");
97 n = (double)(int64_t)x; 97 n = (double)(int64_t)x;
98 if (neg) n = -n; 98 if (neg) n = -n;
99 if (ex2) n = ldexp(n, ex2); 99 if (ex2) n = ldexp(n, ex2);
@@ -140,7 +140,7 @@ static StrScanFmt strscan_hex(const uint8_t *p, TValue *o,
140 break; 140 break;
141 } 141 }
142 142
143 /* Reduce range then convert to double. */ 143 /* Reduce range, then convert to double. */
144 if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; } 144 if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; }
145 strscan_double(x, o, ex2, neg); 145 strscan_double(x, o, ex2, neg);
146 return fmt; 146 return fmt;
@@ -262,7 +262,7 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o,
262 uint32_t hi = 0, lo = (uint32_t)(xip-xi); 262 uint32_t hi = 0, lo = (uint32_t)(xip-xi);
263 int32_t ex2 = 0, idig = (int32_t)lo + (ex10 >> 1); 263 int32_t ex2 = 0, idig = (int32_t)lo + (ex10 >> 1);
264 264
265 lua_assert(lo > 0 && (ex10 & 1) == 0); 265 lj_assertX(lo > 0 && (ex10 & 1) == 0, "bad lo %d ex10 %d", lo, ex10);
266 266
267 /* Handle simple overflow/underflow. */ 267 /* Handle simple overflow/underflow. */
268 if (idig > 310/2) { if (neg) setminfV(o); else setpinfV(o); return fmt; } 268 if (idig > 310/2) { if (neg) setminfV(o); else setpinfV(o); return fmt; }
@@ -326,10 +326,55 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o,
326 return fmt; 326 return fmt;
327} 327}
328 328
329/* Parse binary number. */
330static StrScanFmt strscan_bin(const uint8_t *p, TValue *o,
331 StrScanFmt fmt, uint32_t opt,
332 int32_t ex2, int32_t neg, uint32_t dig)
333{
334 uint64_t x = 0;
335 uint32_t i;
336
337 if (ex2 || dig > 64) return STRSCAN_ERROR;
338
339 /* Scan binary digits. */
340 for (i = dig; i; i--, p++) {
341 if ((*p & ~1) != '0') return STRSCAN_ERROR;
342 x = (x << 1) | (*p & 1);
343 }
344
345 /* Format-specific handling. */
346 switch (fmt) {
347 case STRSCAN_INT:
348 if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) {
349 o->i = neg ? -(int32_t)x : (int32_t)x;
350 return STRSCAN_INT; /* Fast path for 32 bit integers. */
351 }
352 if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; break; }
353 /* fallthrough */
354 case STRSCAN_U32:
355 if (dig > 32) return STRSCAN_ERROR;
356 o->i = neg ? -(int32_t)x : (int32_t)x;
357 return STRSCAN_U32;
358 case STRSCAN_I64:
359 case STRSCAN_U64:
360 o->u64 = neg ? (uint64_t)-(int64_t)x : x;
361 return fmt;
362 default:
363 break;
364 }
365
366 /* Reduce range, then convert to double. */
367 if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; }
368 strscan_double(x, o, ex2, neg);
369 return fmt;
370}
371
329/* Scan string containing a number. Returns format. Returns value in o. */ 372/* Scan string containing a number. Returns format. Returns value in o. */
330StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) 373StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
374 uint32_t opt)
331{ 375{
332 int32_t neg = 0; 376 int32_t neg = 0;
377 const uint8_t *pe = p + len;
333 378
334 /* Remove leading space, parse sign and non-numbers. */ 379 /* Remove leading space, parse sign and non-numbers. */
335 if (LJ_UNLIKELY(!lj_char_isdigit(*p))) { 380 if (LJ_UNLIKELY(!lj_char_isdigit(*p))) {
@@ -347,7 +392,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
347 p += 3; 392 p += 3;
348 } 393 }
349 while (lj_char_isspace(*p)) p++; 394 while (lj_char_isspace(*p)) p++;
350 if (*p) return STRSCAN_ERROR; 395 if (*p || p < pe) return STRSCAN_ERROR;
351 o->u64 = tmp.u64; 396 o->u64 = tmp.u64;
352 return STRSCAN_NUM; 397 return STRSCAN_NUM;
353 } 398 }
@@ -364,8 +409,12 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
364 409
365 /* Determine base and skip leading zeros. */ 410 /* Determine base and skip leading zeros. */
366 if (LJ_UNLIKELY(*p <= '0')) { 411 if (LJ_UNLIKELY(*p <= '0')) {
367 if (*p == '0' && casecmp(p[1], 'x')) 412 if (*p == '0') {
368 base = 16, cmask = LJ_CHAR_XDIGIT, p += 2; 413 if (casecmp(p[1], 'x'))
414 base = 16, cmask = LJ_CHAR_XDIGIT, p += 2;
415 else if (casecmp(p[1], 'b'))
416 base = 2, cmask = LJ_CHAR_DIGIT, p += 2;
417 }
369 for ( ; ; p++) { 418 for ( ; ; p++) {
370 if (*p == '0') { 419 if (*p == '0') {
371 hasdig = 1; 420 hasdig = 1;
@@ -394,6 +443,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
394 443
395 /* Handle decimal point. */ 444 /* Handle decimal point. */
396 if (dp) { 445 if (dp) {
446 if (base == 2) return STRSCAN_ERROR;
397 fmt = STRSCAN_NUM; 447 fmt = STRSCAN_NUM;
398 if (dig) { 448 if (dig) {
399 ex = (int32_t)(dp-(p-1)); dp = p-1; 449 ex = (int32_t)(dp-(p-1)); dp = p-1;
@@ -403,7 +453,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
403 } 453 }
404 454
405 /* Parse exponent. */ 455 /* Parse exponent. */
406 if (casecmp(*p, (uint32_t)(base == 16 ? 'p' : 'e'))) { 456 if (base >= 10 && casecmp(*p, (uint32_t)(base == 16 ? 'p' : 'e'))) {
407 uint32_t xx; 457 uint32_t xx;
408 int negx = 0; 458 int negx = 0;
409 fmt = STRSCAN_NUM; p++; 459 fmt = STRSCAN_NUM; p++;
@@ -441,6 +491,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
441 while (lj_char_isspace(*p)) p++; 491 while (lj_char_isspace(*p)) p++;
442 if (*p) return STRSCAN_ERROR; 492 if (*p) return STRSCAN_ERROR;
443 } 493 }
494 if (p < pe) return STRSCAN_ERROR;
444 495
445 /* Fast path for decimal 32 bit integers. */ 496 /* Fast path for decimal 32 bit integers. */
446 if (fmt == STRSCAN_INT && base == 10 && 497 if (fmt == STRSCAN_INT && base == 10 &&
@@ -459,6 +510,8 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
459 return strscan_oct(sp, o, fmt, neg, dig); 510 return strscan_oct(sp, o, fmt, neg, dig);
460 if (base == 16) 511 if (base == 16)
461 fmt = strscan_hex(sp, o, fmt, opt, ex, neg, dig); 512 fmt = strscan_hex(sp, o, fmt, opt, ex, neg, dig);
513 else if (base == 2)
514 fmt = strscan_bin(sp, o, fmt, opt, ex, neg, dig);
462 else 515 else
463 fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig); 516 fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig);
464 517
@@ -474,18 +527,19 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
474 527
475int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o) 528int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o)
476{ 529{
477 StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o, 530 StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o,
478 STRSCAN_OPT_TONUM); 531 STRSCAN_OPT_TONUM);
479 lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM); 532 lj_assertX(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM, "bad scan format");
480 return (fmt != STRSCAN_ERROR); 533 return (fmt != STRSCAN_ERROR);
481} 534}
482 535
483#if LJ_DUALNUM 536#if LJ_DUALNUM
484int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o) 537int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o)
485{ 538{
486 StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o, 539 StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o,
487 STRSCAN_OPT_TOINT); 540 STRSCAN_OPT_TOINT);
488 lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT); 541 lj_assertX(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT,
542 "bad scan format");
489 if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM); 543 if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM);
490 return (fmt != STRSCAN_ERROR); 544 return (fmt != STRSCAN_ERROR);
491} 545}
diff --git a/src/lj_strscan.h b/src/lj_strscan.h
index d4d94923..7b7d6fed 100644
--- a/src/lj_strscan.h
+++ b/src/lj_strscan.h
@@ -22,7 +22,8 @@ typedef enum {
22 STRSCAN_INT, STRSCAN_U32, STRSCAN_I64, STRSCAN_U64, 22 STRSCAN_INT, STRSCAN_U32, STRSCAN_I64, STRSCAN_U64,
23} StrScanFmt; 23} StrScanFmt;
24 24
25LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt); 25LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
26 uint32_t opt);
26LJ_FUNC int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o); 27LJ_FUNC int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o);
27#if LJ_DUALNUM 28#if LJ_DUALNUM
28LJ_FUNC int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o); 29LJ_FUNC int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o);
diff --git a/src/lj_tab.c b/src/lj_tab.c
index 3dbe34ba..ed5fd2dd 100644
--- a/src/lj_tab.c
+++ b/src/lj_tab.c
@@ -16,25 +16,10 @@
16 16
17/* -- Object hashing ------------------------------------------------------ */ 17/* -- Object hashing ------------------------------------------------------ */
18 18
19/* Hash values are masked with the table hash mask and used as an index. */
20static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash)
21{
22 Node *n = noderef(t->node);
23 return &n[hash & t->hmask];
24}
25
26/* String hashes are precomputed when they are interned. */
27#define hashstr(t, s) hashmask(t, (s)->hash)
28
29#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi)))
30#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1))
31#define hashptr(t, p) hashlohi((t), u32ptr(p), u32ptr(p) + HASH_BIAS)
32#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS)
33
34/* Hash an arbitrary key and return its anchor position in the hash table. */ 19/* Hash an arbitrary key and return its anchor position in the hash table. */
35static Node *hashkey(const GCtab *t, cTValue *key) 20static Node *hashkey(const GCtab *t, cTValue *key)
36{ 21{
37 lua_assert(!tvisint(key)); 22 lj_assertX(!tvisint(key), "attempt to hash integer");
38 if (tvisstr(key)) 23 if (tvisstr(key))
39 return hashstr(t, strV(key)); 24 return hashstr(t, strV(key));
40 else if (tvisnum(key)) 25 else if (tvisnum(key))
@@ -53,13 +38,13 @@ static LJ_AINLINE void newhpart(lua_State *L, GCtab *t, uint32_t hbits)
53{ 38{
54 uint32_t hsize; 39 uint32_t hsize;
55 Node *node; 40 Node *node;
56 lua_assert(hbits != 0); 41 lj_assertL(hbits != 0, "zero hash size");
57 if (hbits > LJ_MAX_HBITS) 42 if (hbits > LJ_MAX_HBITS)
58 lj_err_msg(L, LJ_ERR_TABOV); 43 lj_err_msg(L, LJ_ERR_TABOV);
59 hsize = 1u << hbits; 44 hsize = 1u << hbits;
60 node = lj_mem_newvec(L, hsize, Node); 45 node = lj_mem_newvec(L, hsize, Node);
61 setmref(node->freetop, &node[hsize]);
62 setmref(t->node, node); 46 setmref(t->node, node);
47 setfreetop(t, node, &node[hsize]);
63 t->hmask = hsize-1; 48 t->hmask = hsize-1;
64} 49}
65 50
@@ -74,7 +59,7 @@ static LJ_AINLINE void clearhpart(GCtab *t)
74{ 59{
75 uint32_t i, hmask = t->hmask; 60 uint32_t i, hmask = t->hmask;
76 Node *node = noderef(t->node); 61 Node *node = noderef(t->node);
77 lua_assert(t->hmask != 0); 62 lj_assertX(t->hmask != 0, "empty hash part");
78 for (i = 0; i <= hmask; i++) { 63 for (i = 0; i <= hmask; i++) {
79 Node *n = &node[i]; 64 Node *n = &node[i];
80 setmref(n->next, NULL); 65 setmref(n->next, NULL);
@@ -98,7 +83,8 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
98 GCtab *t; 83 GCtab *t;
99 /* First try to colocate the array part. */ 84 /* First try to colocate the array part. */
100 if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <= LJ_MAX_COLOSIZE) { 85 if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <= LJ_MAX_COLOSIZE) {
101 lua_assert((sizeof(GCtab) & 7) == 0); 86 Node *nilnode;
87 lj_assertL((sizeof(GCtab) & 7) == 0, "bad GCtab size");
102 t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize)); 88 t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize));
103 t->gct = ~LJ_TTAB; 89 t->gct = ~LJ_TTAB;
104 t->nomm = (uint8_t)~0; 90 t->nomm = (uint8_t)~0;
@@ -107,8 +93,13 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
107 setgcrefnull(t->metatable); 93 setgcrefnull(t->metatable);
108 t->asize = asize; 94 t->asize = asize;
109 t->hmask = 0; 95 t->hmask = 0;
110 setmref(t->node, &G(L)->nilnode); 96 nilnode = &G(L)->nilnode;
97 setmref(t->node, nilnode);
98#if LJ_GC64
99 setmref(t->freetop, nilnode);
100#endif
111 } else { /* Otherwise separately allocate the array part. */ 101 } else { /* Otherwise separately allocate the array part. */
102 Node *nilnode;
112 t = lj_mem_newobj(L, GCtab); 103 t = lj_mem_newobj(L, GCtab);
113 t->gct = ~LJ_TTAB; 104 t->gct = ~LJ_TTAB;
114 t->nomm = (uint8_t)~0; 105 t->nomm = (uint8_t)~0;
@@ -117,7 +108,11 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
117 setgcrefnull(t->metatable); 108 setgcrefnull(t->metatable);
118 t->asize = 0; /* In case the array allocation fails. */ 109 t->asize = 0; /* In case the array allocation fails. */
119 t->hmask = 0; 110 t->hmask = 0;
120 setmref(t->node, &G(L)->nilnode); 111 nilnode = &G(L)->nilnode;
112 setmref(t->node, nilnode);
113#if LJ_GC64
114 setmref(t->freetop, nilnode);
115#endif
121 if (asize > 0) { 116 if (asize > 0) {
122 if (asize > LJ_MAX_ASIZE) 117 if (asize > LJ_MAX_ASIZE)
123 lj_err_msg(L, LJ_ERR_TABOV); 118 lj_err_msg(L, LJ_ERR_TABOV);
@@ -149,6 +144,12 @@ GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits)
149 return t; 144 return t;
150} 145}
151 146
147/* The API of this function conforms to lua_createtable(). */
148GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h)
149{
150 return lj_tab_new(L, (uint32_t)(a > 0 ? a+1 : 0), hsize2hbits(h));
151}
152
152#if LJ_HASJIT 153#if LJ_HASJIT
153GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize) 154GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize)
154{ 155{
@@ -165,7 +166,8 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
165 GCtab *t; 166 GCtab *t;
166 uint32_t asize, hmask; 167 uint32_t asize, hmask;
167 t = newtab(L, kt->asize, kt->hmask > 0 ? lj_fls(kt->hmask)+1 : 0); 168 t = newtab(L, kt->asize, kt->hmask > 0 ? lj_fls(kt->hmask)+1 : 0);
168 lua_assert(kt->asize == t->asize && kt->hmask == t->hmask); 169 lj_assertL(kt->asize == t->asize && kt->hmask == t->hmask,
170 "mismatched size of table and template");
169 t->nomm = 0; /* Keys with metamethod names may be present. */ 171 t->nomm = 0; /* Keys with metamethod names may be present. */
170 asize = kt->asize; 172 asize = kt->asize;
171 if (asize > 0) { 173 if (asize > 0) {
@@ -185,7 +187,7 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
185 Node *node = noderef(t->node); 187 Node *node = noderef(t->node);
186 Node *knode = noderef(kt->node); 188 Node *knode = noderef(kt->node);
187 ptrdiff_t d = (char *)node - (char *)knode; 189 ptrdiff_t d = (char *)node - (char *)knode;
188 setmref(node->freetop, (Node *)((char *)noderef(knode->freetop) + d)); 190 setfreetop(t, node, (Node *)((char *)getfreetop(kt, knode) + d));
189 for (i = 0; i <= hmask; i++) { 191 for (i = 0; i <= hmask; i++) {
190 Node *kn = &knode[i]; 192 Node *kn = &knode[i];
191 Node *n = &node[i]; 193 Node *n = &node[i];
@@ -198,6 +200,17 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
198 return t; 200 return t;
199} 201}
200 202
203/* Clear a table. */
204void LJ_FASTCALL lj_tab_clear(GCtab *t)
205{
206 clearapart(t);
207 if (t->hmask > 0) {
208 Node *node = noderef(t->node);
209 setfreetop(t, node, &node[t->hmask+1]);
210 clearhpart(t);
211 }
212}
213
201/* Free a table. */ 214/* Free a table. */
202void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t) 215void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t)
203{ 216{
@@ -214,7 +227,7 @@ void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t)
214/* -- Table resizing ------------------------------------------------------ */ 227/* -- Table resizing ------------------------------------------------------ */
215 228
216/* Resize a table to fit the new array/hash part sizes. */ 229/* Resize a table to fit the new array/hash part sizes. */
217static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) 230void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits)
218{ 231{
219 Node *oldnode = noderef(t->node); 232 Node *oldnode = noderef(t->node);
220 uint32_t oldasize = t->asize; 233 uint32_t oldasize = t->asize;
@@ -247,6 +260,9 @@ static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits)
247 } else { 260 } else {
248 global_State *g = G(L); 261 global_State *g = G(L);
249 setmref(t->node, &g->nilnode); 262 setmref(t->node, &g->nilnode);
263#if LJ_GC64
264 setmref(t->freetop, &g->nilnode);
265#endif
250 t->hmask = 0; 266 t->hmask = 0;
251 } 267 }
252 if (asize < oldasize) { /* Array part shrinks? */ 268 if (asize < oldasize) { /* Array part shrinks? */
@@ -276,7 +292,7 @@ static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits)
276 292
277static uint32_t countint(cTValue *key, uint32_t *bins) 293static uint32_t countint(cTValue *key, uint32_t *bins)
278{ 294{
279 lua_assert(!tvisint(key)); 295 lj_assertX(!tvisint(key), "bad integer key");
280 if (tvisnum(key)) { 296 if (tvisnum(key)) {
281 lua_Number nk = numV(key); 297 lua_Number nk = numV(key);
282 int32_t k = lj_num2int(nk); 298 int32_t k = lj_num2int(nk);
@@ -348,7 +364,7 @@ static void rehashtab(lua_State *L, GCtab *t, cTValue *ek)
348 asize += countint(ek, bins); 364 asize += countint(ek, bins);
349 na = bestasize(bins, &asize); 365 na = bestasize(bins, &asize);
350 total -= na; 366 total -= na;
351 resizetab(L, t, asize, hsize2hbits(total)); 367 lj_tab_resize(L, t, asize, hsize2hbits(total));
352} 368}
353 369
354#if LJ_HASFFI 370#if LJ_HASFFI
@@ -360,7 +376,7 @@ void lj_tab_rehash(lua_State *L, GCtab *t)
360 376
361void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize) 377void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize)
362{ 378{
363 resizetab(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0); 379 lj_tab_resize(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0);
364} 380}
365 381
366/* -- Table getters ------------------------------------------------------- */ 382/* -- Table getters ------------------------------------------------------- */
@@ -378,7 +394,7 @@ cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key)
378 return NULL; 394 return NULL;
379} 395}
380 396
381cTValue *lj_tab_getstr(GCtab *t, GCstr *key) 397cTValue *lj_tab_getstr(GCtab *t, const GCstr *key)
382{ 398{
383 Node *n = hashstr(t, key); 399 Node *n = hashstr(t, key);
384 do { 400 do {
@@ -428,16 +444,17 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
428 Node *n = hashkey(t, key); 444 Node *n = hashkey(t, key);
429 if (!tvisnil(&n->val) || t->hmask == 0) { 445 if (!tvisnil(&n->val) || t->hmask == 0) {
430 Node *nodebase = noderef(t->node); 446 Node *nodebase = noderef(t->node);
431 Node *collide, *freenode = noderef(nodebase->freetop); 447 Node *collide, *freenode = getfreetop(t, nodebase);
432 lua_assert(freenode >= nodebase && freenode <= nodebase+t->hmask+1); 448 lj_assertL(freenode >= nodebase && freenode <= nodebase+t->hmask+1,
449 "bad freenode");
433 do { 450 do {
434 if (freenode == nodebase) { /* No free node found? */ 451 if (freenode == nodebase) { /* No free node found? */
435 rehashtab(L, t, key); /* Rehash table. */ 452 rehashtab(L, t, key); /* Rehash table. */
436 return lj_tab_set(L, t, key); /* Retry key insertion. */ 453 return lj_tab_set(L, t, key); /* Retry key insertion. */
437 } 454 }
438 } while (!tvisnil(&(--freenode)->key)); 455 } while (!tvisnil(&(--freenode)->key));
439 setmref(nodebase->freetop, freenode); 456 setfreetop(t, nodebase, freenode);
440 lua_assert(freenode != &G(L)->nilnode); 457 lj_assertL(freenode != &G(L)->nilnode, "store to fallback hash");
441 collide = hashkey(t, &n->key); 458 collide = hashkey(t, &n->key);
442 if (collide != n) { /* Colliding node not the main node? */ 459 if (collide != n) { /* Colliding node not the main node? */
443 while (noderef(collide->next) != n) /* Find predecessor. */ 460 while (noderef(collide->next) != n) /* Find predecessor. */
@@ -493,7 +510,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
493 if (LJ_UNLIKELY(tvismzero(&n->key))) 510 if (LJ_UNLIKELY(tvismzero(&n->key)))
494 n->key.u64 = 0; 511 n->key.u64 = 0;
495 lj_gc_anybarriert(L, t); 512 lj_gc_anybarriert(L, t);
496 lua_assert(tvisnil(&n->val)); 513 lj_assertL(tvisnil(&n->val), "new hash slot is not empty");
497 return &n->val; 514 return &n->val;
498} 515}
499 516
@@ -510,7 +527,7 @@ TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key)
510 return lj_tab_newkey(L, t, &k); 527 return lj_tab_newkey(L, t, &k);
511} 528}
512 529
513TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key) 530TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key)
514{ 531{
515 TValue k; 532 TValue k;
516 Node *n = hashstr(t, key); 533 Node *n = hashstr(t, key);
@@ -605,49 +622,62 @@ int lj_tab_next(lua_State *L, GCtab *t, TValue *key)
605 622
606/* -- Table length calculation -------------------------------------------- */ 623/* -- Table length calculation -------------------------------------------- */
607 624
608static MSize unbound_search(GCtab *t, MSize j) 625/* Compute table length. Slow path with mixed array/hash lookups. */
626LJ_NOINLINE static MSize tab_len_slow(GCtab *t, size_t hi)
609{ 627{
610 cTValue *tv; 628 cTValue *tv;
611 MSize i = j; /* i is zero or a present index */ 629 size_t lo = hi;
612 j++; 630 hi++;
613 /* find `i' and `j' such that i is present and j is not */ 631 /* Widening search for an upper bound. */
614 while ((tv = lj_tab_getint(t, (int32_t)j)) && !tvisnil(tv)) { 632 while ((tv = lj_tab_getint(t, (int32_t)hi)) && !tvisnil(tv)) {
615 i = j; 633 lo = hi;
616 j *= 2; 634 hi += hi;
617 if (j > (MSize)(INT_MAX-2)) { /* overflow? */ 635 if (hi > (size_t)(INT_MAX-2)) { /* Punt and do a linear search. */
618 /* table was built with bad purposes: resort to linear search */ 636 lo = 1;
619 i = 1; 637 while ((tv = lj_tab_getint(t, (int32_t)lo)) && !tvisnil(tv)) lo++;
620 while ((tv = lj_tab_getint(t, (int32_t)i)) && !tvisnil(tv)) i++; 638 return (MSize)(lo - 1);
621 return i - 1;
622 } 639 }
623 } 640 }
624 /* now do a binary search between them */ 641 /* Binary search to find a non-nil to nil transition. */
625 while (j - i > 1) { 642 while (hi - lo > 1) {
626 MSize m = (i+j)/2; 643 size_t mid = (lo+hi) >> 1;
627 cTValue *tvb = lj_tab_getint(t, (int32_t)m); 644 cTValue *tvb = lj_tab_getint(t, (int32_t)mid);
628 if (tvb && !tvisnil(tvb)) i = m; else j = m; 645 if (tvb && !tvisnil(tvb)) lo = mid; else hi = mid;
629 } 646 }
630 return i; 647 return (MSize)lo;
631} 648}
632 649
633/* 650/* Compute table length. Fast path. */
634** Try to find a boundary in table `t'. A `boundary' is an integer index
635** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil).
636*/
637MSize LJ_FASTCALL lj_tab_len(GCtab *t) 651MSize LJ_FASTCALL lj_tab_len(GCtab *t)
638{ 652{
639 MSize j = (MSize)t->asize; 653 size_t hi = (size_t)t->asize;
640 if (j > 1 && tvisnil(arrayslot(t, j-1))) { 654 if (hi) hi--;
641 MSize i = 1; 655 /* In a growing array the last array element is very likely nil. */
642 while (j - i > 1) { 656 if (hi > 0 && LJ_LIKELY(tvisnil(arrayslot(t, hi)))) {
643 MSize m = (i+j)/2; 657 /* Binary search to find a non-nil to nil transition in the array. */
644 if (tvisnil(arrayslot(t, m-1))) j = m; else i = m; 658 size_t lo = 0;
659 while (hi - lo > 1) {
660 size_t mid = (lo+hi) >> 1;
661 if (tvisnil(arrayslot(t, mid))) hi = mid; else lo = mid;
645 } 662 }
646 return i-1; 663 return (MSize)lo;
647 } 664 }
648 if (j) j--; 665 /* Without a hash part, there's an implicit nil after the last element. */
649 if (t->hmask <= 0) 666 return t->hmask ? tab_len_slow(t, hi) : (MSize)hi;
650 return j;
651 return unbound_search(t, j);
652} 667}
653 668
669#if LJ_HASJIT
670/* Verify hinted table length or compute it. */
671MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint)
672{
673 size_t asize = (size_t)t->asize;
674 cTValue *tv = arrayslot(t, hint);
675 if (LJ_LIKELY(hint+1 < asize)) {
676 if (LJ_LIKELY(!tvisnil(tv) && tvisnil(tv+1))) return (MSize)hint;
677 } else if (hint+1 <= asize && LJ_LIKELY(t->hmask == 0) && !tvisnil(tv)) {
678 return (MSize)hint;
679 }
680 return lj_tab_len(t);
681}
682#endif
683
diff --git a/src/lj_tab.h b/src/lj_tab.h
index 059a81eb..1efa9506 100644
--- a/src/lj_tab.h
+++ b/src/lj_tab.h
@@ -31,30 +31,52 @@ static LJ_AINLINE uint32_t hashrot(uint32_t lo, uint32_t hi)
31 return hi; 31 return hi;
32} 32}
33 33
34/* Hash values are masked with the table hash mask and used as an index. */
35static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash)
36{
37 Node *n = noderef(t->node);
38 return &n[hash & t->hmask];
39}
40
41/* String IDs are generated when a string is interned. */
42#define hashstr(t, s) hashmask(t, (s)->sid)
43
44#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi)))
45#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1))
46#if LJ_GC64
47#define hashgcref(t, r) \
48 hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32))
49#else
50#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS)
51#endif
52
34#define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) 53#define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0)
35 54
36LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); 55LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits);
56LJ_FUNC GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h);
37#if LJ_HASJIT 57#if LJ_HASJIT
38LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize); 58LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize);
39#endif 59#endif
40LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt); 60LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt);
61LJ_FUNC void LJ_FASTCALL lj_tab_clear(GCtab *t);
41LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); 62LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t);
42#if LJ_HASFFI 63#if LJ_HASFFI
43LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t); 64LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t);
44#endif 65#endif
66LJ_FUNC void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits);
45LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); 67LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize);
46 68
47/* Caveat: all getters except lj_tab_get() can return NULL! */ 69/* Caveat: all getters except lj_tab_get() can return NULL! */
48 70
49LJ_FUNCA cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key); 71LJ_FUNCA cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key);
50LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, GCstr *key); 72LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, const GCstr *key);
51LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key); 73LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key);
52 74
53/* Caveat: all setters require a write barrier for the stored value. */ 75/* Caveat: all setters require a write barrier for the stored value. */
54 76
55LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key); 77LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key);
56LJ_FUNC TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key); 78LJ_FUNCA TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key);
57LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key); 79LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key);
58LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); 80LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
59 81
60#define inarray(t, key) ((MSize)(key) < (MSize)(t)->asize) 82#define inarray(t, key) ((MSize)(key) < (MSize)(t)->asize)
@@ -66,5 +88,8 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
66 88
67LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key); 89LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key);
68LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t); 90LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t);
91#if LJ_HASJIT
92LJ_FUNC MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint);
93#endif
69 94
70#endif 95#endif
diff --git a/src/lj_target.h b/src/lj_target.h
index 043af37f..2d186b14 100644
--- a/src/lj_target.h
+++ b/src/lj_target.h
@@ -55,7 +55,7 @@ typedef uint32_t RegSP;
55/* Bitset for registers. 32 registers suffice for most architectures. 55/* Bitset for registers. 32 registers suffice for most architectures.
56** Note that one set holds bits for both GPRs and FPRs. 56** Note that one set holds bits for both GPRs and FPRs.
57*/ 57*/
58#if LJ_TARGET_PPC || LJ_TARGET_MIPS 58#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
59typedef uint64_t RegSet; 59typedef uint64_t RegSet;
60#else 60#else
61typedef uint32_t RegSet; 61typedef uint32_t RegSet;
@@ -69,7 +69,7 @@ typedef uint32_t RegSet;
69#define rset_set(rs, r) (rs |= RID2RSET(r)) 69#define rset_set(rs, r) (rs |= RID2RSET(r))
70#define rset_clear(rs, r) (rs &= ~RID2RSET(r)) 70#define rset_clear(rs, r) (rs &= ~RID2RSET(r))
71#define rset_exclude(rs, r) (rs & ~RID2RSET(r)) 71#define rset_exclude(rs, r) (rs & ~RID2RSET(r))
72#if LJ_TARGET_PPC || LJ_TARGET_MIPS 72#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
73#define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) 73#define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63))
74#define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) 74#define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs))
75#else 75#else
@@ -138,6 +138,8 @@ typedef uint32_t RegCost;
138#include "lj_target_x86.h" 138#include "lj_target_x86.h"
139#elif LJ_TARGET_ARM 139#elif LJ_TARGET_ARM
140#include "lj_target_arm.h" 140#include "lj_target_arm.h"
141#elif LJ_TARGET_ARM64
142#include "lj_target_arm64.h"
141#elif LJ_TARGET_PPC 143#elif LJ_TARGET_PPC
142#include "lj_target_ppc.h" 144#include "lj_target_ppc.h"
143#elif LJ_TARGET_MIPS 145#elif LJ_TARGET_MIPS
@@ -150,7 +152,8 @@ typedef uint32_t RegCost;
150/* Return the address of an exit stub. */ 152/* Return the address of an exit stub. */
151static LJ_AINLINE char *exitstub_addr_(char **group, uint32_t exitno) 153static LJ_AINLINE char *exitstub_addr_(char **group, uint32_t exitno)
152{ 154{
153 lua_assert(group[exitno / EXITSTUBS_PER_GROUP] != NULL); 155 lj_assertX(group[exitno / EXITSTUBS_PER_GROUP] != NULL,
156 "exit stub group for exit %d uninitialized", exitno);
154 return (char *)group[exitno / EXITSTUBS_PER_GROUP] + 157 return (char *)group[exitno / EXITSTUBS_PER_GROUP] +
155 EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP); 158 EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP);
156} 159}
diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h
index f87ab635..72516bc2 100644
--- a/src/lj_target_arm.h
+++ b/src/lj_target_arm.h
@@ -211,6 +211,7 @@ typedef enum ARMIns {
211 /* ARMv6T2 */ 211 /* ARMv6T2 */
212 ARMI_MOVW = 0xe3000000, 212 ARMI_MOVW = 0xe3000000,
213 ARMI_MOVT = 0xe3400000, 213 ARMI_MOVT = 0xe3400000,
214 ARMI_BFI = 0xe7c00010,
214 215
215 /* VFP */ 216 /* VFP */
216 ARMI_VMOV_D = 0xeeb00b40, 217 ARMI_VMOV_D = 0xeeb00b40,
@@ -243,10 +244,6 @@ typedef enum ARMIns {
243 ARMI_VCVT_S32_F64 = 0xeebd0bc0, 244 ARMI_VCVT_S32_F64 = 0xeebd0bc0,
244 ARMI_VCVT_U32_F32 = 0xeebc0ac0, 245 ARMI_VCVT_U32_F32 = 0xeebc0ac0,
245 ARMI_VCVT_U32_F64 = 0xeebc0bc0, 246 ARMI_VCVT_U32_F64 = 0xeebc0bc0,
246 ARMI_VCVTR_S32_F32 = 0xeebd0a40,
247 ARMI_VCVTR_S32_F64 = 0xeebd0b40,
248 ARMI_VCVTR_U32_F32 = 0xeebc0a40,
249 ARMI_VCVTR_U32_F64 = 0xeebc0b40,
250 ARMI_VCVT_F32_S32 = 0xeeb80ac0, 247 ARMI_VCVT_F32_S32 = 0xeeb80ac0,
251 ARMI_VCVT_F64_S32 = 0xeeb80bc0, 248 ARMI_VCVT_F64_S32 = 0xeeb80bc0,
252 ARMI_VCVT_F32_U32 = 0xeeb80a40, 249 ARMI_VCVT_F32_U32 = 0xeeb80a40,
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
new file mode 100644
index 00000000..bf568a8d
--- /dev/null
+++ b/src/lj_target_arm64.h
@@ -0,0 +1,334 @@
1/*
2** Definitions for ARM64 CPUs.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_TARGET_ARM64_H
7#define _LJ_TARGET_ARM64_H
8
9/* -- Registers IDs ------------------------------------------------------- */
10
11#define GPRDEF(_) \
12 _(X0) _(X1) _(X2) _(X3) _(X4) _(X5) _(X6) _(X7) \
13 _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \
14 _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \
15 _(X24) _(X25) _(X26) _(X27) _(X28) _(FP) _(LR) _(SP)
16#define FPRDEF(_) \
17 _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \
18 _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) \
19 _(D16) _(D17) _(D18) _(D19) _(D20) _(D21) _(D22) _(D23) \
20 _(D24) _(D25) _(D26) _(D27) _(D28) _(D29) _(D30) _(D31)
21#define VRIDDEF(_)
22
23#define RIDENUM(name) RID_##name,
24
25enum {
26 GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
27 FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
28 RID_MAX,
29 RID_TMP = RID_LR,
30 RID_ZERO = RID_SP,
31
32 /* Calling conventions. */
33 RID_RET = RID_X0,
34 RID_FPRET = RID_D0,
35
36 /* These definitions must match with the *.dasc file(s): */
37 RID_BASE = RID_X19, /* Interpreter BASE. */
38 RID_LPC = RID_X21, /* Interpreter PC. */
39 RID_GL = RID_X22, /* Interpreter GL. */
40 RID_LREG = RID_X23, /* Interpreter L. */
41
42 /* Register ranges [min, max) and number of registers. */
43 RID_MIN_GPR = RID_X0,
44 RID_MAX_GPR = RID_SP+1,
45 RID_MIN_FPR = RID_MAX_GPR,
46 RID_MAX_FPR = RID_D31+1,
47 RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
48 RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
49};
50
51#define RID_NUM_KREF RID_NUM_GPR
52#define RID_MIN_KREF RID_X0
53
54/* -- Register sets ------------------------------------------------------- */
55
56/* Make use of all registers, except for x18, fp, lr and sp. */
57#define RSET_FIXED \
58 (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)|\
59 RID2RSET(RID_GL))
60#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
61#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
62#define RSET_ALL (RSET_GPR|RSET_FPR)
63#define RSET_INIT RSET_ALL
64
65/* lr is an implicit scratch register. */
66#define RSET_SCRATCH_GPR (RSET_RANGE(RID_X0, RID_X17+1))
67#define RSET_SCRATCH_FPR \
68 (RSET_RANGE(RID_D0, RID_D7+1)|RSET_RANGE(RID_D16, RID_D31+1))
69#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
70#define REGARG_FIRSTGPR RID_X0
71#define REGARG_LASTGPR RID_X7
72#define REGARG_NUMGPR 8
73#define REGARG_FIRSTFPR RID_D0
74#define REGARG_LASTFPR RID_D7
75#define REGARG_NUMFPR 8
76
77/* -- Spill slots --------------------------------------------------------- */
78
79/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
80**
81** SPS_FIXED: Available fixed spill slots in interpreter frame.
82** This definition must match with the vm_arm64.dasc file.
83** Pre-allocate some slots to avoid sp adjust in every root trace.
84**
85** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
86*/
87#define SPS_FIXED 4
88#define SPS_FIRST 2
89
90#define SPOFS_TMP 0
91
92#define sps_scale(slot) (4 * (int32_t)(slot))
93#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3)
94
95/* -- Exit state ---------------------------------------------------------- */
96
97/* This definition must match with the *.dasc file(s). */
98typedef struct {
99 lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
100 intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
101 int32_t spill[256]; /* Spill slots. */
102} ExitState;
103
104/* Highest exit + 1 indicates stack check. */
105#define EXITSTATE_CHECKEXIT 1
106
107/* Return the address of a per-trace exit stub. */
108static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
109{
110 while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */
111 return p + 3 + exitno;
112}
113/* Avoid dependence on lj_jit.h if only including lj_target.h. */
114#define exitstub_trace_addr(T, exitno) \
115 exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno))
116
117/* -- Instructions -------------------------------------------------------- */
118
119/* ARM64 instructions are always little-endian. Swap for ARM64BE. */
120#if LJ_BE
121#define A64I_LE(x) (lj_bswap(x))
122#else
123#define A64I_LE(x) (x)
124#endif
125
126/* Instruction fields. */
127#define A64F_D(r) (r)
128#define A64F_N(r) ((r) << 5)
129#define A64F_A(r) ((r) << 10)
130#define A64F_M(r) ((r) << 16)
131#define A64F_IMMS(x) ((x) << 10)
132#define A64F_IMMR(x) ((x) << 16)
133#define A64F_U16(x) ((x) << 5)
134#define A64F_U12(x) ((x) << 10)
135#define A64F_S26(x) (((uint32_t)(x) & 0x03ffffffu))
136#define A64F_S19(x) (((uint32_t)(x) & 0x7ffffu) << 5)
137#define A64F_S14(x) (((uint32_t)(x) & 0x3fffu) << 5)
138#define A64F_S9(x) ((x) << 12)
139#define A64F_BIT(x) ((x) << 19)
140#define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10))
141#define A64F_EX(ex) (A64I_EX | ((ex) << 13))
142#define A64F_EXSH(ex,x) (A64I_EX | ((ex) << 13) | ((x) << 10))
143#define A64F_FP8(x) ((x) << 13)
144#define A64F_CC(cc) ((cc) << 12)
145#define A64F_LSL16(x) (((x) / 16) << 21)
146#define A64F_BSH(sh) ((sh) << 10)
147
148/* Check for valid field range. */
149#define A64F_S_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0)
150
151typedef enum A64Ins {
152 A64I_S = 0x20000000,
153 A64I_X = 0x80000000,
154 A64I_EX = 0x00200000,
155 A64I_ON = 0x00200000,
156 A64I_K12 = 0x1a000000,
157 A64I_K13 = 0x18000000,
158 A64I_LS_U = 0x01000000,
159 A64I_LS_S = 0x00800000,
160 A64I_LS_R = 0x01200800,
161 A64I_LS_SH = 0x00001000,
162 A64I_LS_UXTWx = 0x00004000,
163 A64I_LS_SXTWx = 0x0000c000,
164 A64I_LS_SXTXx = 0x0000e000,
165 A64I_LS_LSLx = 0x00006000,
166
167 A64I_ADDw = 0x0b000000,
168 A64I_ADDx = 0x8b000000,
169 A64I_ADDSw = 0x2b000000,
170 A64I_ADDSx = 0xab000000,
171 A64I_NEGw = 0x4b0003e0,
172 A64I_NEGx = 0xcb0003e0,
173 A64I_SUBw = 0x4b000000,
174 A64I_SUBx = 0xcb000000,
175 A64I_SUBSw = 0x6b000000,
176 A64I_SUBSx = 0xeb000000,
177
178 A64I_MULw = 0x1b007c00,
179 A64I_MULx = 0x9b007c00,
180 A64I_SMULL = 0x9b207c00,
181
182 A64I_ANDw = 0x0a000000,
183 A64I_ANDx = 0x8a000000,
184 A64I_ANDSw = 0x6a000000,
185 A64I_ANDSx = 0xea000000,
186 A64I_EORw = 0x4a000000,
187 A64I_EORx = 0xca000000,
188 A64I_ORRw = 0x2a000000,
189 A64I_ORRx = 0xaa000000,
190 A64I_TSTw = 0x6a00001f,
191 A64I_TSTx = 0xea00001f,
192
193 A64I_CMPw = 0x6b00001f,
194 A64I_CMPx = 0xeb00001f,
195 A64I_CMNw = 0x2b00001f,
196 A64I_CMNx = 0xab00001f,
197 A64I_CCMPw = 0x7a400000,
198 A64I_CCMPx = 0xfa400000,
199 A64I_CSELw = 0x1a800000,
200 A64I_CSELx = 0x9a800000,
201
202 A64I_ASRw = 0x13007c00,
203 A64I_ASRx = 0x9340fc00,
204 A64I_LSLx = 0xd3400000,
205 A64I_LSRx = 0xd340fc00,
206 A64I_SHRw = 0x1ac02000,
207 A64I_SHRx = 0x9ac02000, /* lsl/lsr/asr/ror x0, x0, x0 */
208 A64I_REVw = 0x5ac00800,
209 A64I_REVx = 0xdac00c00,
210
211 A64I_EXTRw = 0x13800000,
212 A64I_EXTRx = 0x93c00000,
213 A64I_BFMw = 0x33000000,
214 A64I_BFMx = 0xb3400000,
215 A64I_SBFMw = 0x13000000,
216 A64I_SBFMx = 0x93400000,
217 A64I_SXTBw = 0x13001c00,
218 A64I_SXTHw = 0x13003c00,
219 A64I_SXTW = 0x93407c00,
220 A64I_UBFMw = 0x53000000,
221 A64I_UBFMx = 0xd3400000,
222 A64I_UXTBw = 0x53001c00,
223 A64I_UXTHw = 0x53003c00,
224
225 A64I_MOVw = 0x2a0003e0,
226 A64I_MOVx = 0xaa0003e0,
227 A64I_MVNw = 0x2a2003e0,
228 A64I_MVNx = 0xaa2003e0,
229 A64I_MOVKw = 0x72800000,
230 A64I_MOVKx = 0xf2800000,
231 A64I_MOVZw = 0x52800000,
232 A64I_MOVZx = 0xd2800000,
233 A64I_MOVNw = 0x12800000,
234 A64I_MOVNx = 0x92800000,
235
236 A64I_LDRB = 0x39400000,
237 A64I_LDRH = 0x79400000,
238 A64I_LDRw = 0xb9400000,
239 A64I_LDRx = 0xf9400000,
240 A64I_LDRLw = 0x18000000,
241 A64I_LDRLx = 0x58000000,
242 A64I_STRB = 0x39000000,
243 A64I_STRH = 0x79000000,
244 A64I_STRw = 0xb9000000,
245 A64I_STRx = 0xf9000000,
246 A64I_STPw = 0x29000000,
247 A64I_STPx = 0xa9000000,
248 A64I_LDPw = 0x29400000,
249 A64I_LDPx = 0xa9400000,
250
251 A64I_B = 0x14000000,
252 A64I_BCC = 0x54000000,
253 A64I_BL = 0x94000000,
254 A64I_BR = 0xd61f0000,
255 A64I_BLR = 0xd63f0000,
256 A64I_TBZ = 0x36000000,
257 A64I_TBNZ = 0x37000000,
258 A64I_CBZ = 0x34000000,
259 A64I_CBNZ = 0x35000000,
260
261 A64I_NOP = 0xd503201f,
262
263 /* FP */
264 A64I_FADDd = 0x1e602800,
265 A64I_FSUBd = 0x1e603800,
266 A64I_FMADDd = 0x1f400000,
267 A64I_FMSUBd = 0x1f408000,
268 A64I_FNMADDd = 0x1f600000,
269 A64I_FNMSUBd = 0x1f608000,
270 A64I_FMULd = 0x1e600800,
271 A64I_FDIVd = 0x1e601800,
272 A64I_FNEGd = 0x1e614000,
273 A64I_FABS = 0x1e60c000,
274 A64I_FSQRTd = 0x1e61c000,
275 A64I_LDRs = 0xbd400000,
276 A64I_LDRd = 0xfd400000,
277 A64I_STRs = 0xbd000000,
278 A64I_STRd = 0xfd000000,
279 A64I_LDPs = 0x2d400000,
280 A64I_LDPd = 0x6d400000,
281 A64I_STPs = 0x2d000000,
282 A64I_STPd = 0x6d000000,
283 A64I_FCMPd = 0x1e602000,
284 A64I_FCMPZd = 0x1e602008,
285 A64I_FCSELd = 0x1e600c00,
286 A64I_FRINTMd = 0x1e654000,
287 A64I_FRINTPd = 0x1e64c000,
288 A64I_FRINTZd = 0x1e65c000,
289
290 A64I_FCVT_F32_F64 = 0x1e624000,
291 A64I_FCVT_F64_F32 = 0x1e22c000,
292 A64I_FCVT_F32_S32 = 0x1e220000,
293 A64I_FCVT_F64_S32 = 0x1e620000,
294 A64I_FCVT_F32_U32 = 0x1e230000,
295 A64I_FCVT_F64_U32 = 0x1e630000,
296 A64I_FCVT_F32_S64 = 0x9e220000,
297 A64I_FCVT_F64_S64 = 0x9e620000,
298 A64I_FCVT_F32_U64 = 0x9e230000,
299 A64I_FCVT_F64_U64 = 0x9e630000,
300 A64I_FCVT_S32_F64 = 0x1e780000,
301 A64I_FCVT_S32_F32 = 0x1e380000,
302 A64I_FCVT_U32_F64 = 0x1e790000,
303 A64I_FCVT_U32_F32 = 0x1e390000,
304 A64I_FCVT_S64_F64 = 0x9e780000,
305 A64I_FCVT_S64_F32 = 0x9e380000,
306 A64I_FCVT_U64_F64 = 0x9e790000,
307 A64I_FCVT_U64_F32 = 0x9e390000,
308
309 A64I_FMOV_S = 0x1e204000,
310 A64I_FMOV_D = 0x1e604000,
311 A64I_FMOV_R_S = 0x1e260000,
312 A64I_FMOV_S_R = 0x1e270000,
313 A64I_FMOV_R_D = 0x9e660000,
314 A64I_FMOV_D_R = 0x9e670000,
315 A64I_FMOV_DI = 0x1e601000,
316} A64Ins;
317
318typedef enum A64Shift {
319 A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR
320} A64Shift;
321
322typedef enum A64Extend {
323 A64EX_UXTB, A64EX_UXTH, A64EX_UXTW, A64EX_UXTX,
324 A64EX_SXTB, A64EX_SXTH, A64EX_SXTW, A64EX_SXTX,
325} A64Extend;
326
327/* ARM condition codes. */
328typedef enum A64CC {
329 CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC,
330 CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL,
331 CC_HS = CC_CS, CC_LO = CC_CC
332} A64CC;
333
334#endif
diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h
index 6fb1613f..5da94605 100644
--- a/src/lj_target_mips.h
+++ b/src/lj_target_mips.h
@@ -13,11 +13,15 @@
13 _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \ 13 _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \
14 _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \ 14 _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \
15 _(R24) _(R25) _(SYS1) _(SYS2) _(R28) _(SP) _(R30) _(RA) 15 _(R24) _(R25) _(SYS1) _(SYS2) _(R28) _(SP) _(R30) _(RA)
16#if LJ_SOFTFP
17#define FPRDEF(_)
18#else
16#define FPRDEF(_) \ 19#define FPRDEF(_) \
17 _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ 20 _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \
18 _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ 21 _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \
19 _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ 22 _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \
20 _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) 23 _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31)
24#endif
21#define VRIDDEF(_) 25#define VRIDDEF(_)
22 26
23#define RIDENUM(name) RID_##name, 27#define RIDENUM(name) RID_##name,
@@ -39,7 +43,11 @@ enum {
39 RID_RETHI = RID_R2, 43 RID_RETHI = RID_R2,
40 RID_RETLO = RID_R3, 44 RID_RETLO = RID_R3,
41#endif 45#endif
46#if LJ_SOFTFP
47 RID_FPRET = RID_R2,
48#else
42 RID_FPRET = RID_F0, 49 RID_FPRET = RID_F0,
50#endif
43 RID_CFUNCADDR = RID_R25, 51 RID_CFUNCADDR = RID_R25,
44 52
45 /* These definitions must match with the *.dasc file(s): */ 53 /* These definitions must match with the *.dasc file(s): */
@@ -52,8 +60,12 @@ enum {
52 /* Register ranges [min, max) and number of registers. */ 60 /* Register ranges [min, max) and number of registers. */
53 RID_MIN_GPR = RID_R0, 61 RID_MIN_GPR = RID_R0,
54 RID_MAX_GPR = RID_RA+1, 62 RID_MAX_GPR = RID_RA+1,
55 RID_MIN_FPR = RID_F0, 63 RID_MIN_FPR = RID_MAX_GPR,
64#if LJ_SOFTFP
65 RID_MAX_FPR = RID_MIN_FPR,
66#else
56 RID_MAX_FPR = RID_F31+1, 67 RID_MAX_FPR = RID_F31+1,
68#endif
57 RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, 69 RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
58 RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */ 70 RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */
59}; 71};
@@ -68,28 +80,60 @@ enum {
68 (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\ 80 (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\
69 RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)|RID2RSET(RID_GP)) 81 RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)|RID2RSET(RID_GP))
70#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) 82#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
83#if LJ_SOFTFP
84#define RSET_FPR 0
85#else
86#if LJ_32
71#define RSET_FPR \ 87#define RSET_FPR \
72 (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ 88 (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\
73 RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ 89 RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\
74 RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\ 90 RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\
75 RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30)) 91 RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30))
76#define RSET_ALL (RSET_GPR|RSET_FPR) 92#else
77#define RSET_INIT RSET_ALL 93#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
94#endif
95#endif
96#define RSET_ALL (RSET_GPR|RSET_FPR)
97#define RSET_INIT RSET_ALL
78 98
79#define RSET_SCRATCH_GPR \ 99#define RSET_SCRATCH_GPR \
80 (RSET_RANGE(RID_R1, RID_R15+1)|\ 100 (RSET_RANGE(RID_R1, RID_R15+1)|\
81 RID2RSET(RID_R24)|RID2RSET(RID_R25)) 101 RID2RSET(RID_R24)|RID2RSET(RID_R25))
102#if LJ_SOFTFP
103#define RSET_SCRATCH_FPR 0
104#else
105#if LJ_32
82#define RSET_SCRATCH_FPR \ 106#define RSET_SCRATCH_FPR \
83 (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ 107 (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\
84 RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ 108 RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\
85 RID2RSET(RID_F16)|RID2RSET(RID_F18)) 109 RID2RSET(RID_F16)|RID2RSET(RID_F18))
110#else
111#define RSET_SCRATCH_FPR RSET_RANGE(RID_F0, RID_F24)
112#endif
113#endif
86#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) 114#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
87#define REGARG_FIRSTGPR RID_R4 115#define REGARG_FIRSTGPR RID_R4
116#if LJ_32
88#define REGARG_LASTGPR RID_R7 117#define REGARG_LASTGPR RID_R7
89#define REGARG_NUMGPR 4 118#define REGARG_NUMGPR 4
119#else
120#define REGARG_LASTGPR RID_R11
121#define REGARG_NUMGPR 8
122#endif
123#if LJ_ABI_SOFTFP
124#define REGARG_FIRSTFPR 0
125#define REGARG_LASTFPR 0
126#define REGARG_NUMFPR 0
127#else
90#define REGARG_FIRSTFPR RID_F12 128#define REGARG_FIRSTFPR RID_F12
129#if LJ_32
91#define REGARG_LASTFPR RID_F14 130#define REGARG_LASTFPR RID_F14
92#define REGARG_NUMFPR 2 131#define REGARG_NUMFPR 2
132#else
133#define REGARG_LASTFPR RID_F19
134#define REGARG_NUMFPR 8
135#endif
136#endif
93 137
94/* -- Spill slots --------------------------------------------------------- */ 138/* -- Spill slots --------------------------------------------------------- */
95 139
@@ -100,7 +144,11 @@ enum {
100** 144**
101** SPS_FIRST: First spill slot for general use. 145** SPS_FIRST: First spill slot for general use.
102*/ 146*/
147#if LJ_32
103#define SPS_FIXED 5 148#define SPS_FIXED 5
149#else
150#define SPS_FIXED 4
151#endif
104#define SPS_FIRST 4 152#define SPS_FIRST 4
105 153
106#define SPOFS_TMP 0 154#define SPOFS_TMP 0
@@ -112,8 +160,10 @@ enum {
112 160
113/* This definition must match with the *.dasc file(s). */ 161/* This definition must match with the *.dasc file(s). */
114typedef struct { 162typedef struct {
163#if !LJ_SOFTFP
115 lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ 164 lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
116 int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ 165#endif
166 intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
117 int32_t spill[256]; /* Spill slots. */ 167 int32_t spill[256]; /* Spill slots. */
118} ExitState; 168} ExitState;
119 169
@@ -142,52 +192,87 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p)
142#define MIPSF_F(r) ((r) << 6) 192#define MIPSF_F(r) ((r) << 6)
143#define MIPSF_A(n) ((n) << 6) 193#define MIPSF_A(n) ((n) << 6)
144#define MIPSF_M(n) ((n) << 11) 194#define MIPSF_M(n) ((n) << 11)
195#define MIPSF_L(n) ((n) << 6)
145 196
146typedef enum MIPSIns { 197typedef enum MIPSIns {
198 MIPSI_D = 0x38,
199 MIPSI_DV = 0x10,
200 MIPSI_D32 = 0x3c,
147 /* Integer instructions. */ 201 /* Integer instructions. */
148 MIPSI_MOVE = 0x00000021, 202 MIPSI_MOVE = 0x00000025,
149 MIPSI_NOP = 0x00000000, 203 MIPSI_NOP = 0x00000000,
150 204
151 MIPSI_LI = 0x24000000, 205 MIPSI_LI = 0x24000000,
152 MIPSI_LU = 0x34000000, 206 MIPSI_LU = 0x34000000,
153 MIPSI_LUI = 0x3c000000, 207 MIPSI_LUI = 0x3c000000,
154 208
155 MIPSI_ADDIU = 0x24000000, 209 MIPSI_AND = 0x00000024,
156 MIPSI_ANDI = 0x30000000, 210 MIPSI_ANDI = 0x30000000,
211 MIPSI_OR = 0x00000025,
157 MIPSI_ORI = 0x34000000, 212 MIPSI_ORI = 0x34000000,
213 MIPSI_XOR = 0x00000026,
158 MIPSI_XORI = 0x38000000, 214 MIPSI_XORI = 0x38000000,
215 MIPSI_NOR = 0x00000027,
216
217 MIPSI_SLT = 0x0000002a,
218 MIPSI_SLTU = 0x0000002b,
159 MIPSI_SLTI = 0x28000000, 219 MIPSI_SLTI = 0x28000000,
160 MIPSI_SLTIU = 0x2c000000, 220 MIPSI_SLTIU = 0x2c000000,
161 221
162 MIPSI_ADDU = 0x00000021, 222 MIPSI_ADDU = 0x00000021,
223 MIPSI_ADDIU = 0x24000000,
224 MIPSI_SUB = 0x00000022,
163 MIPSI_SUBU = 0x00000023, 225 MIPSI_SUBU = 0x00000023,
226
227#if !LJ_TARGET_MIPSR6
164 MIPSI_MUL = 0x70000002, 228 MIPSI_MUL = 0x70000002,
165 MIPSI_AND = 0x00000024, 229 MIPSI_DIV = 0x0000001a,
166 MIPSI_OR = 0x00000025, 230 MIPSI_DIVU = 0x0000001b,
167 MIPSI_XOR = 0x00000026, 231
168 MIPSI_NOR = 0x00000027,
169 MIPSI_SLT = 0x0000002a,
170 MIPSI_SLTU = 0x0000002b,
171 MIPSI_MOVZ = 0x0000000a, 232 MIPSI_MOVZ = 0x0000000a,
172 MIPSI_MOVN = 0x0000000b, 233 MIPSI_MOVN = 0x0000000b,
234 MIPSI_MFHI = 0x00000010,
235 MIPSI_MFLO = 0x00000012,
236 MIPSI_MULT = 0x00000018,
237#else
238 MIPSI_MUL = 0x00000098,
239 MIPSI_MUH = 0x000000d8,
240 MIPSI_DIV = 0x0000009a,
241 MIPSI_DIVU = 0x0000009b,
242
243 MIPSI_SELEQZ = 0x00000035,
244 MIPSI_SELNEZ = 0x00000037,
245#endif
173 246
174 MIPSI_SLL = 0x00000000, 247 MIPSI_SLL = 0x00000000,
175 MIPSI_SRL = 0x00000002, 248 MIPSI_SRL = 0x00000002,
176 MIPSI_SRA = 0x00000003, 249 MIPSI_SRA = 0x00000003,
177 MIPSI_ROTR = 0x00200002, /* MIPS32R2 */ 250 MIPSI_ROTR = 0x00200002, /* MIPSXXR2 */
251 MIPSI_DROTR = 0x0020003a,
252 MIPSI_DROTR32 = 0x0020003e,
178 MIPSI_SLLV = 0x00000004, 253 MIPSI_SLLV = 0x00000004,
179 MIPSI_SRLV = 0x00000006, 254 MIPSI_SRLV = 0x00000006,
180 MIPSI_SRAV = 0x00000007, 255 MIPSI_SRAV = 0x00000007,
181 MIPSI_ROTRV = 0x00000046, /* MIPS32R2 */ 256 MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */
257 MIPSI_DROTRV = 0x00000056,
258
259 MIPSI_INS = 0x7c000004, /* MIPSXXR2 */
182 260
183 MIPSI_SEB = 0x7c000420, /* MIPS32R2 */ 261 MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */
184 MIPSI_SEH = 0x7c000620, /* MIPS32R2 */ 262 MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */
185 MIPSI_WSBH = 0x7c0000a0, /* MIPS32R2 */ 263 MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */
264 MIPSI_DSBH = 0x7c0000a4,
186 265
187 MIPSI_B = 0x10000000, 266 MIPSI_B = 0x10000000,
188 MIPSI_J = 0x08000000, 267 MIPSI_J = 0x08000000,
189 MIPSI_JAL = 0x0c000000, 268 MIPSI_JAL = 0x0c000000,
269#if !LJ_TARGET_MIPSR6
270 MIPSI_JALX = 0x74000000,
190 MIPSI_JR = 0x00000008, 271 MIPSI_JR = 0x00000008,
272#else
273 MIPSI_JR = 0x00000009,
274 MIPSI_BALC = 0xe8000000,
275#endif
191 MIPSI_JALR = 0x0000f809, 276 MIPSI_JALR = 0x0000f809,
192 277
193 MIPSI_BEQ = 0x10000000, 278 MIPSI_BEQ = 0x10000000,
@@ -199,7 +284,9 @@ typedef enum MIPSIns {
199 284
200 /* Load/store instructions. */ 285 /* Load/store instructions. */
201 MIPSI_LW = 0x8c000000, 286 MIPSI_LW = 0x8c000000,
287 MIPSI_LD = 0xdc000000,
202 MIPSI_SW = 0xac000000, 288 MIPSI_SW = 0xac000000,
289 MIPSI_SD = 0xfc000000,
203 MIPSI_LB = 0x80000000, 290 MIPSI_LB = 0x80000000,
204 MIPSI_SB = 0xa0000000, 291 MIPSI_SB = 0xa0000000,
205 MIPSI_LH = 0x84000000, 292 MIPSI_LH = 0x84000000,
@@ -211,11 +298,69 @@ typedef enum MIPSIns {
211 MIPSI_LDC1 = 0xd4000000, 298 MIPSI_LDC1 = 0xd4000000,
212 MIPSI_SDC1 = 0xf4000000, 299 MIPSI_SDC1 = 0xf4000000,
213 300
301 /* MIPS64 instructions. */
302 MIPSI_DADD = 0x0000002c,
303 MIPSI_DADDU = 0x0000002d,
304 MIPSI_DADDIU = 0x64000000,
305 MIPSI_DSUB = 0x0000002e,
306 MIPSI_DSUBU = 0x0000002f,
307#if !LJ_TARGET_MIPSR6
308 MIPSI_DDIV = 0x0000001e,
309 MIPSI_DDIVU = 0x0000001f,
310 MIPSI_DMULT = 0x0000001c,
311 MIPSI_DMULTU = 0x0000001d,
312#else
313 MIPSI_DDIV = 0x0000009e,
314 MIPSI_DMOD = 0x000000de,
315 MIPSI_DDIVU = 0x0000009f,
316 MIPSI_DMODU = 0x000000df,
317 MIPSI_DMUL = 0x0000009c,
318 MIPSI_DMUH = 0x000000dc,
319#endif
320
321 MIPSI_DSLL = 0x00000038,
322 MIPSI_DSRL = 0x0000003a,
323 MIPSI_DSLLV = 0x00000014,
324 MIPSI_DSRLV = 0x00000016,
325 MIPSI_DSRA = 0x0000003b,
326 MIPSI_DSRAV = 0x00000017,
327 MIPSI_DSRA32 = 0x0000003f,
328 MIPSI_DSLL32 = 0x0000003c,
329 MIPSI_DSRL32 = 0x0000003e,
330 MIPSI_DSHD = 0x7c000164,
331
332 MIPSI_AADDU = LJ_32 ? MIPSI_ADDU : MIPSI_DADDU,
333 MIPSI_AADDIU = LJ_32 ? MIPSI_ADDIU : MIPSI_DADDIU,
334 MIPSI_ASUBU = LJ_32 ? MIPSI_SUBU : MIPSI_DSUBU,
335 MIPSI_AL = LJ_32 ? MIPSI_LW : MIPSI_LD,
336 MIPSI_AS = LJ_32 ? MIPSI_SW : MIPSI_SD,
337#if LJ_TARGET_MIPSR6
338 MIPSI_LSA = 0x00000005,
339 MIPSI_DLSA = 0x00000015,
340 MIPSI_ALSA = LJ_32 ? MIPSI_LSA : MIPSI_DLSA,
341#endif
342
343 /* Extract/insert instructions. */
344 MIPSI_DEXTM = 0x7c000001,
345 MIPSI_DEXTU = 0x7c000002,
346 MIPSI_DEXT = 0x7c000003,
347 MIPSI_DINSM = 0x7c000005,
348 MIPSI_DINSU = 0x7c000006,
349 MIPSI_DINS = 0x7c000007,
350
351 MIPSI_FLOOR_D = 0x4620000b,
352
214 /* FP instructions. */ 353 /* FP instructions. */
215 MIPSI_MOV_S = 0x46000006, 354 MIPSI_MOV_S = 0x46000006,
216 MIPSI_MOV_D = 0x46200006, 355 MIPSI_MOV_D = 0x46200006,
356#if !LJ_TARGET_MIPSR6
217 MIPSI_MOVT_D = 0x46210011, 357 MIPSI_MOVT_D = 0x46210011,
218 MIPSI_MOVF_D = 0x46200011, 358 MIPSI_MOVF_D = 0x46200011,
359#else
360 MIPSI_MIN_D = 0x4620001C,
361 MIPSI_MAX_D = 0x4620001E,
362 MIPSI_SEL_D = 0x46200010,
363#endif
219 364
220 MIPSI_ABS_D = 0x46200005, 365 MIPSI_ABS_D = 0x46200005,
221 MIPSI_NEG_D = 0x46200007, 366 MIPSI_NEG_D = 0x46200007,
@@ -235,23 +380,37 @@ typedef enum MIPSIns {
235 MIPSI_CVT_W_D = 0x46200024, 380 MIPSI_CVT_W_D = 0x46200024,
236 MIPSI_CVT_S_W = 0x46800020, 381 MIPSI_CVT_S_W = 0x46800020,
237 MIPSI_CVT_D_W = 0x46800021, 382 MIPSI_CVT_D_W = 0x46800021,
383 MIPSI_CVT_S_L = 0x46a00020,
384 MIPSI_CVT_D_L = 0x46a00021,
238 385
239 MIPSI_TRUNC_W_S = 0x4600000d, 386 MIPSI_TRUNC_W_S = 0x4600000d,
240 MIPSI_TRUNC_W_D = 0x4620000d, 387 MIPSI_TRUNC_W_D = 0x4620000d,
388 MIPSI_TRUNC_L_S = 0x46000009,
389 MIPSI_TRUNC_L_D = 0x46200009,
241 MIPSI_FLOOR_W_S = 0x4600000f, 390 MIPSI_FLOOR_W_S = 0x4600000f,
242 MIPSI_FLOOR_W_D = 0x4620000f, 391 MIPSI_FLOOR_W_D = 0x4620000f,
243 392
244 MIPSI_MFC1 = 0x44000000, 393 MIPSI_MFC1 = 0x44000000,
245 MIPSI_MTC1 = 0x44800000, 394 MIPSI_MTC1 = 0x44800000,
395 MIPSI_DMTC1 = 0x44a00000,
396 MIPSI_DMFC1 = 0x44200000,
246 397
398#if !LJ_TARGET_MIPSR6
247 MIPSI_BC1F = 0x45000000, 399 MIPSI_BC1F = 0x45000000,
248 MIPSI_BC1T = 0x45010000, 400 MIPSI_BC1T = 0x45010000,
249
250 MIPSI_C_EQ_D = 0x46200032, 401 MIPSI_C_EQ_D = 0x46200032,
402 MIPSI_C_OLT_S = 0x46000034,
251 MIPSI_C_OLT_D = 0x46200034, 403 MIPSI_C_OLT_D = 0x46200034,
252 MIPSI_C_ULT_D = 0x46200035, 404 MIPSI_C_ULT_D = 0x46200035,
253 MIPSI_C_OLE_D = 0x46200036, 405 MIPSI_C_OLE_D = 0x46200036,
254 MIPSI_C_ULE_D = 0x46200037, 406 MIPSI_C_ULE_D = 0x46200037,
407#else
408 MIPSI_BC1EQZ = 0x45200000,
409 MIPSI_BC1NEZ = 0x45a00000,
410 MIPSI_CMP_EQ_D = 0x46a00002,
411 MIPSI_CMP_LT_S = 0x46800004,
412 MIPSI_CMP_LT_D = 0x46a00004,
413#endif
255 414
256} MIPSIns; 415} MIPSIns;
257 416
diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h
index b78cc420..c83dcc5e 100644
--- a/src/lj_target_ppc.h
+++ b/src/lj_target_ppc.h
@@ -104,7 +104,7 @@ enum {
104/* This definition must match with the *.dasc file(s). */ 104/* This definition must match with the *.dasc file(s). */
105typedef struct { 105typedef struct {
106 lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ 106 lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
107 int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ 107 intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
108 int32_t spill[256]; /* Spill slots. */ 108 int32_t spill[256]; /* Spill slots. */
109} ExitState; 109} ExitState;
110 110
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index 000cae49..a403f820 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -22,7 +22,7 @@
22 _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) 22 _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7)
23#endif 23#endif
24#define VRIDDEF(_) \ 24#define VRIDDEF(_) \
25 _(MRM) 25 _(MRM) _(RIP)
26 26
27#define RIDENUM(name) RID_##name, 27#define RIDENUM(name) RID_##name,
28 28
@@ -31,8 +31,10 @@ enum {
31 FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ 31 FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
32 RID_MAX, 32 RID_MAX,
33 RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ 33 RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */
34 RID_RIP = RID_MAX+5, /* Pseudo-id for RIP (x64 only), rm bits = 5. */
34 35
35 /* Calling conventions. */ 36 /* Calling conventions. */
37 RID_SP = RID_ESP,
36 RID_RET = RID_EAX, 38 RID_RET = RID_EAX,
37#if LJ_64 39#if LJ_64
38 RID_FPRET = RID_XMM0, 40 RID_FPRET = RID_XMM0,
@@ -62,8 +64,10 @@ enum {
62 64
63/* -- Register sets ------------------------------------------------------- */ 65/* -- Register sets ------------------------------------------------------- */
64 66
65/* Make use of all registers, except the stack pointer. */ 67/* Make use of all registers, except the stack pointer (and maybe DISPATCH). */
66#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP)) 68#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \
69 - RID2RSET(RID_ESP) \
70 - LJ_GC64*RID2RSET(RID_DISPATCH))
67#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) 71#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
68#define RSET_ALL (RSET_GPR|RSET_FPR) 72#define RSET_ALL (RSET_GPR|RSET_FPR)
69#define RSET_INIT RSET_ALL 73#define RSET_INIT RSET_ALL
@@ -131,7 +135,11 @@ enum {
131#define SPS_FIXED (4*2) 135#define SPS_FIXED (4*2)
132#define SPS_FIRST (4*2) /* Don't use callee register save area. */ 136#define SPS_FIRST (4*2) /* Don't use callee register save area. */
133#else 137#else
138#if LJ_GC64
139#define SPS_FIXED 2
140#else
134#define SPS_FIXED 4 141#define SPS_FIXED 4
142#endif
135#define SPS_FIRST 2 143#define SPS_FIRST 2
136#endif 144#endif
137#else 145#else
@@ -157,6 +165,8 @@ typedef struct {
157#define EXITSTUB_SPACING (2+2) 165#define EXITSTUB_SPACING (2+2)
158#define EXITSTUBS_PER_GROUP 32 166#define EXITSTUBS_PER_GROUP 32
159 167
168#define EXITTRACE_VMSTATE 1 /* g->vmstate has traceno on exit. */
169
160/* -- x86 ModRM operand encoding ------------------------------------------ */ 170/* -- x86 ModRM operand encoding ------------------------------------------ */
161 171
162typedef enum { 172typedef enum {
@@ -184,12 +194,18 @@ typedef struct {
184#define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24))) 194#define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24)))
185#define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24))) 195#define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24)))
186 196
197#define XV_660f38(o) ((uint32_t)(0x79e2c4 + (0x##o<<24)))
198#define XV_f20f38(o) ((uint32_t)(0x7be2c4 + (0x##o<<24)))
199#define XV_f20f3a(o) ((uint32_t)(0x7be3c4 + (0x##o<<24)))
200#define XV_f30f38(o) ((uint32_t)(0x7ae2c4 + (0x##o<<24)))
201
187/* This list of x86 opcodes is not intended to be complete. Opcodes are only 202/* This list of x86 opcodes is not intended to be complete. Opcodes are only
188** included when needed. Take a look at DynASM or jit.dis_x86 to see the 203** included when needed. Take a look at DynASM or jit.dis_x86 to see the
189** whole mess. 204** whole mess.
190*/ 205*/
191typedef enum { 206typedef enum {
192 /* Fixed length opcodes. XI_* prefix. */ 207 /* Fixed length opcodes. XI_* prefix. */
208 XI_O16 = 0x66,
193 XI_NOP = 0x90, 209 XI_NOP = 0x90,
194 XI_XCHGa = 0x90, 210 XI_XCHGa = 0x90,
195 XI_CALL = 0xe8, 211 XI_CALL = 0xe8,
@@ -207,26 +223,28 @@ typedef enum {
207 XI_PUSHi8 = 0x6a, 223 XI_PUSHi8 = 0x6a,
208 XI_TESTb = 0x84, 224 XI_TESTb = 0x84,
209 XI_TEST = 0x85, 225 XI_TEST = 0x85,
226 XI_INT3 = 0xcc,
210 XI_MOVmi = 0xc7, 227 XI_MOVmi = 0xc7,
211 XI_GROUP5 = 0xff, 228 XI_GROUP5 = 0xff,
212 229
213 /* Note: little-endian byte-order! */ 230 /* Note: little-endian byte-order! */
214 XI_FLDZ = 0xeed9, 231 XI_FLDZ = 0xeed9,
215 XI_FLD1 = 0xe8d9, 232 XI_FLD1 = 0xe8d9,
216 XI_FLDLG2 = 0xecd9,
217 XI_FLDLN2 = 0xedd9,
218 XI_FDUP = 0xc0d9, /* Really fld st0. */ 233 XI_FDUP = 0xc0d9, /* Really fld st0. */
219 XI_FPOP = 0xd8dd, /* Really fstp st0. */ 234 XI_FPOP = 0xd8dd, /* Really fstp st0. */
220 XI_FPOP1 = 0xd9dd, /* Really fstp st1. */ 235 XI_FPOP1 = 0xd9dd, /* Really fstp st1. */
221 XI_FRNDINT = 0xfcd9, 236 XI_FRNDINT = 0xfcd9,
222 XI_FSIN = 0xfed9,
223 XI_FCOS = 0xffd9,
224 XI_FPTAN = 0xf2d9,
225 XI_FPATAN = 0xf3d9,
226 XI_FSCALE = 0xfdd9, 237 XI_FSCALE = 0xfdd9,
227 XI_FYL2X = 0xf1d9, 238 XI_FYL2X = 0xf1d9,
228 239
240 /* VEX-encoded instructions. XV_* prefix. */
241 XV_RORX = XV_f20f3a(f0),
242 XV_SARX = XV_f30f38(f7),
243 XV_SHLX = XV_660f38(f7),
244 XV_SHRX = XV_f20f38(f7),
245
229 /* Variable-length opcodes. XO_* prefix. */ 246 /* Variable-length opcodes. XO_* prefix. */
247 XO_OR = XO_(0b),
230 XO_MOV = XO_(8b), 248 XO_MOV = XO_(8b),
231 XO_MOVto = XO_(89), 249 XO_MOVto = XO_(89),
232 XO_MOVtow = XO_66(89), 250 XO_MOVtow = XO_66(89),
@@ -277,10 +295,8 @@ typedef enum {
277 XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */ 295 XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */
278 XO_UCOMISD = XO_660f(2e), 296 XO_UCOMISD = XO_660f(2e),
279 XO_CVTSI2SD = XO_f20f(2a), 297 XO_CVTSI2SD = XO_f20f(2a),
280 XO_CVTSD2SI = XO_f20f(2d),
281 XO_CVTTSD2SI= XO_f20f(2c), 298 XO_CVTTSD2SI= XO_f20f(2c),
282 XO_CVTSI2SS = XO_f30f(2a), 299 XO_CVTSI2SS = XO_f30f(2a),
283 XO_CVTSS2SI = XO_f30f(2d),
284 XO_CVTTSS2SI= XO_f30f(2c), 300 XO_CVTTSS2SI= XO_f30f(2c),
285 XO_CVTSS2SD = XO_f30f(5a), 301 XO_CVTSS2SD = XO_f30f(5a),
286 XO_CVTSD2SS = XO_f20f(5a), 302 XO_CVTSD2SS = XO_f20f(5a),
diff --git a/src/lj_trace.c b/src/lj_trace.c
index 654d157a..a0ff8864 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -30,6 +30,7 @@
30#include "lj_vm.h" 30#include "lj_vm.h"
31#include "lj_vmevent.h" 31#include "lj_vmevent.h"
32#include "lj_target.h" 32#include "lj_target.h"
33#include "lj_prng.h"
33 34
34/* -- Error handling ------------------------------------------------------ */ 35/* -- Error handling ------------------------------------------------------ */
35 36
@@ -104,7 +105,8 @@ static void perftools_addtrace(GCtrace *T)
104 name++; 105 name++;
105 else 106 else
106 name = "(string)"; 107 name = "(string)";
107 lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc); 108 lj_assertX(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc,
109 "trace PC out of range");
108 lineno = lj_debug_line(pt, proto_bcpos(pt, startpc)); 110 lineno = lj_debug_line(pt, proto_bcpos(pt, startpc));
109 if (!fp) { 111 if (!fp) {
110 char fname[40]; 112 char fname[40];
@@ -117,15 +119,26 @@ static void perftools_addtrace(GCtrace *T)
117} 119}
118#endif 120#endif
119 121
120/* Allocate space for copy of trace. */ 122/* Allocate space for copy of T. */
121static GCtrace *trace_save_alloc(jit_State *J) 123GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T)
122{ 124{
123 size_t sztr = ((sizeof(GCtrace)+7)&~7); 125 size_t sztr = ((sizeof(GCtrace)+7)&~7);
124 size_t szins = (J->cur.nins-J->cur.nk)*sizeof(IRIns); 126 size_t szins = (T->nins-T->nk)*sizeof(IRIns);
125 size_t sz = sztr + szins + 127 size_t sz = sztr + szins +
126 J->cur.nsnap*sizeof(SnapShot) + 128 T->nsnap*sizeof(SnapShot) +
127 J->cur.nsnapmap*sizeof(SnapEntry); 129 T->nsnapmap*sizeof(SnapEntry);
128 return lj_mem_newt(J->L, (MSize)sz, GCtrace); 130 GCtrace *T2 = lj_mem_newt(L, (MSize)sz, GCtrace);
131 char *p = (char *)T2 + sztr;
132 T2->gct = ~LJ_TTRACE;
133 T2->marked = 0;
134 T2->traceno = 0;
135 T2->ir = (IRIns *)p - T->nk;
136 T2->nins = T->nins;
137 T2->nk = T->nk;
138 T2->nsnap = T->nsnap;
139 T2->nsnapmap = T->nsnapmap;
140 memcpy(p, T->ir + T->nk, szins);
141 return T2;
129} 142}
130 143
131/* Save current trace by copying and compacting it. */ 144/* Save current trace by copying and compacting it. */
@@ -139,12 +152,12 @@ static void trace_save(jit_State *J, GCtrace *T)
139 setgcrefp(J2G(J)->gc.root, T); 152 setgcrefp(J2G(J)->gc.root, T);
140 newwhite(J2G(J), T); 153 newwhite(J2G(J), T);
141 T->gct = ~LJ_TTRACE; 154 T->gct = ~LJ_TTRACE;
142 T->ir = (IRIns *)p - J->cur.nk; 155 T->ir = (IRIns *)p - J->cur.nk; /* The IR has already been copied above. */
143 memcpy(p, J->cur.ir+J->cur.nk, szins);
144 p += szins; 156 p += szins;
145 TRACE_APPENDVEC(snap, nsnap, SnapShot) 157 TRACE_APPENDVEC(snap, nsnap, SnapShot)
146 TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry) 158 TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry)
147 J->cur.traceno = 0; 159 J->cur.traceno = 0;
160 J->curfinal = NULL;
148 setgcrefp(J->trace[T->traceno], T); 161 setgcrefp(J->trace[T->traceno], T);
149 lj_gc_barriertrace(J2G(J), T->traceno); 162 lj_gc_barriertrace(J2G(J), T->traceno);
150 lj_gdbjit_addtrace(J, T); 163 lj_gdbjit_addtrace(J, T);
@@ -172,7 +185,7 @@ void lj_trace_reenableproto(GCproto *pt)
172{ 185{
173 if ((pt->flags & PROTO_ILOOP)) { 186 if ((pt->flags & PROTO_ILOOP)) {
174 BCIns *bc = proto_bc(pt); 187 BCIns *bc = proto_bc(pt);
175 BCPos i, sizebc = pt->sizebc;; 188 BCPos i, sizebc = pt->sizebc;
176 pt->flags &= ~PROTO_ILOOP; 189 pt->flags &= ~PROTO_ILOOP;
177 if (bc_op(bc[0]) == BC_IFUNCF) 190 if (bc_op(bc[0]) == BC_IFUNCF)
178 setbc_op(&bc[0], BC_FUNCF); 191 setbc_op(&bc[0], BC_FUNCF);
@@ -194,27 +207,28 @@ static void trace_unpatch(jit_State *J, GCtrace *T)
194 return; /* No need to unpatch branches in parent traces (yet). */ 207 return; /* No need to unpatch branches in parent traces (yet). */
195 switch (bc_op(*pc)) { 208 switch (bc_op(*pc)) {
196 case BC_JFORL: 209 case BC_JFORL:
197 lua_assert(traceref(J, bc_d(*pc)) == T); 210 lj_assertJ(traceref(J, bc_d(*pc)) == T, "JFORL references other trace");
198 *pc = T->startins; 211 *pc = T->startins;
199 pc += bc_j(T->startins); 212 pc += bc_j(T->startins);
200 lua_assert(bc_op(*pc) == BC_JFORI); 213 lj_assertJ(bc_op(*pc) == BC_JFORI, "FORL does not point to JFORI");
201 setbc_op(pc, BC_FORI); 214 setbc_op(pc, BC_FORI);
202 break; 215 break;
203 case BC_JITERL: 216 case BC_JITERL:
204 case BC_JLOOP: 217 case BC_JLOOP:
205 lua_assert(op == BC_ITERL || op == BC_LOOP || bc_isret(op)); 218 lj_assertJ(op == BC_ITERL || op == BC_LOOP || bc_isret(op),
219 "bad original bytecode %d", op);
206 *pc = T->startins; 220 *pc = T->startins;
207 break; 221 break;
208 case BC_JMP: 222 case BC_JMP:
209 lua_assert(op == BC_ITERL); 223 lj_assertJ(op == BC_ITERL, "bad original bytecode %d", op);
210 pc += bc_j(*pc)+2; 224 pc += bc_j(*pc)+2;
211 if (bc_op(*pc) == BC_JITERL) { 225 if (bc_op(*pc) == BC_JITERL) {
212 lua_assert(traceref(J, bc_d(*pc)) == T); 226 lj_assertJ(traceref(J, bc_d(*pc)) == T, "JITERL references other trace");
213 *pc = T->startins; 227 *pc = T->startins;
214 } 228 }
215 break; 229 break;
216 case BC_JFUNCF: 230 case BC_JFUNCF:
217 lua_assert(op == BC_FUNCF); 231 lj_assertJ(op == BC_FUNCF, "bad original bytecode %d", op);
218 *pc = T->startins; 232 *pc = T->startins;
219 break; 233 break;
220 default: /* Already unpatched. */ 234 default: /* Already unpatched. */
@@ -226,7 +240,8 @@ static void trace_unpatch(jit_State *J, GCtrace *T)
226static void trace_flushroot(jit_State *J, GCtrace *T) 240static void trace_flushroot(jit_State *J, GCtrace *T)
227{ 241{
228 GCproto *pt = &gcref(T->startpt)->pt; 242 GCproto *pt = &gcref(T->startpt)->pt;
229 lua_assert(T->root == 0 && pt != NULL); 243 lj_assertJ(T->root == 0, "not a root trace");
244 lj_assertJ(pt != NULL, "trace has no prototype");
230 /* First unpatch any modified bytecode. */ 245 /* First unpatch any modified bytecode. */
231 trace_unpatch(J, T); 246 trace_unpatch(J, T);
232 /* Unlink root trace from chain anchored in prototype. */ 247 /* Unlink root trace from chain anchored in prototype. */
@@ -274,7 +289,7 @@ int lj_trace_flushall(lua_State *L)
274 if (T->root == 0) 289 if (T->root == 0)
275 trace_flushroot(J, T); 290 trace_flushroot(J, T);
276 lj_gdbjit_deltrace(J, T); 291 lj_gdbjit_deltrace(J, T);
277 T->traceno = 0; 292 T->traceno = T->link = 0; /* Blacklist the link for cont_stitch. */
278 setgcrefnull(J->trace[i]); 293 setgcrefnull(J->trace[i]);
279 } 294 }
280 } 295 }
@@ -296,13 +311,42 @@ void lj_trace_initstate(global_State *g)
296{ 311{
297 jit_State *J = G2J(g); 312 jit_State *J = G2J(g);
298 TValue *tv; 313 TValue *tv;
299 /* Initialize SIMD constants. */ 314
315 /* Initialize aligned SIMD constants. */
300 tv = LJ_KSIMD(J, LJ_KSIMD_ABS); 316 tv = LJ_KSIMD(J, LJ_KSIMD_ABS);
301 tv[0].u64 = U64x(7fffffff,ffffffff); 317 tv[0].u64 = U64x(7fffffff,ffffffff);
302 tv[1].u64 = U64x(7fffffff,ffffffff); 318 tv[1].u64 = U64x(7fffffff,ffffffff);
303 tv = LJ_KSIMD(J, LJ_KSIMD_NEG); 319 tv = LJ_KSIMD(J, LJ_KSIMD_NEG);
304 tv[0].u64 = U64x(80000000,00000000); 320 tv[0].u64 = U64x(80000000,00000000);
305 tv[1].u64 = U64x(80000000,00000000); 321 tv[1].u64 = U64x(80000000,00000000);
322
323 /* Initialize 32/64 bit constants. */
324#if LJ_TARGET_X86ORX64
325 J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000);
326#if LJ_32
327 J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000);
328#endif
329 J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000);
330 J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000;
331#endif
332#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64
333 J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
334#endif
335#if LJ_TARGET_PPC
336 J->k32[LJ_K32_2P52_2P31] = 0x59800004;
337 J->k32[LJ_K32_2P52] = 0x59800000;
338#endif
339#if LJ_TARGET_PPC || LJ_TARGET_MIPS
340 J->k32[LJ_K32_2P31] = 0x4f000000;
341#endif
342#if LJ_TARGET_MIPS
343 J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
344#if LJ_64
345 J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000);
346 J->k32[LJ_K32_2P63] = 0x5f000000;
347 J->k32[LJ_K32_M2P64] = 0xdf800000;
348#endif
349#endif
306} 350}
307 351
308/* Free everything associated with the JIT compiler state. */ 352/* Free everything associated with the JIT compiler state. */
@@ -313,11 +357,11 @@ void lj_trace_freestate(global_State *g)
313 { /* This assumes all traces have already been freed. */ 357 { /* This assumes all traces have already been freed. */
314 ptrdiff_t i; 358 ptrdiff_t i;
315 for (i = 1; i < (ptrdiff_t)J->sizetrace; i++) 359 for (i = 1; i < (ptrdiff_t)J->sizetrace; i++)
316 lua_assert(i == (ptrdiff_t)J->cur.traceno || traceref(J, i) == NULL); 360 lj_assertG(i == (ptrdiff_t)J->cur.traceno || traceref(J, i) == NULL,
361 "trace still allocated");
317 } 362 }
318#endif 363#endif
319 lj_mcode_free(J); 364 lj_mcode_free(J);
320 lj_ir_k64_freeall(J);
321 lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry); 365 lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry);
322 lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot); 366 lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot);
323 lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns); 367 lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns);
@@ -341,7 +385,7 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e)
341 if (mref(J->penalty[i].pc, const BCIns) == pc) { /* Cache slot found? */ 385 if (mref(J->penalty[i].pc, const BCIns) == pc) { /* Cache slot found? */
342 /* First try to bump its hotcount several times. */ 386 /* First try to bump its hotcount several times. */
343 val = ((uint32_t)J->penalty[i].val << 1) + 387 val = ((uint32_t)J->penalty[i].val << 1) +
344 LJ_PRNG_BITS(J, PENALTY_RNDBITS); 388 (lj_prng_u64(&J2G(J)->prng) & ((1u<<PENALTY_RNDBITS)-1));
345 if (val > PENALTY_MAX) { 389 if (val > PENALTY_MAX) {
346 blacklist_pc(pt, pc); /* Blacklist it, if that didn't help. */ 390 blacklist_pc(pt, pc); /* Blacklist it, if that didn't help. */
347 return; 391 return;
@@ -367,10 +411,11 @@ static void trace_start(jit_State *J)
367 TraceNo traceno; 411 TraceNo traceno;
368 412
369 if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ 413 if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */
370 if (J->parent == 0) { 414 if (J->parent == 0 && J->exitno == 0) {
371 /* Lazy bytecode patching to disable hotcount events. */ 415 /* Lazy bytecode patching to disable hotcount events. */
372 lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || 416 lj_assertJ(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL ||
373 bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF); 417 bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF,
418 "bad hot bytecode %d", bc_op(*J->pc));
374 setbc_op(J->pc, (int)bc_op(*J->pc)+(int)BC_ILOOP-(int)BC_LOOP); 419 setbc_op(J->pc, (int)bc_op(*J->pc)+(int)BC_ILOOP-(int)BC_LOOP);
375 J->pt->flags |= PROTO_ILOOP; 420 J->pt->flags |= PROTO_ILOOP;
376 } 421 }
@@ -381,7 +426,8 @@ static void trace_start(jit_State *J)
381 /* Get a new trace number. */ 426 /* Get a new trace number. */
382 traceno = trace_findfree(J); 427 traceno = trace_findfree(J);
383 if (LJ_UNLIKELY(traceno == 0)) { /* No free trace? */ 428 if (LJ_UNLIKELY(traceno == 0)) { /* No free trace? */
384 lua_assert((J2G(J)->hookmask & HOOK_GC) == 0); 429 lj_assertJ((J2G(J)->hookmask & HOOK_GC) == 0,
430 "recorder called from GC hook");
385 lj_trace_flushall(J->L); 431 lj_trace_flushall(J->L);
386 J->state = LJ_TRACE_IDLE; /* Silently ignored. */ 432 J->state = LJ_TRACE_IDLE; /* Silently ignored. */
387 return; 433 return;
@@ -401,6 +447,8 @@ static void trace_start(jit_State *J)
401 J->guardemit.irt = 0; 447 J->guardemit.irt = 0;
402 J->postproc = LJ_POST_NONE; 448 J->postproc = LJ_POST_NONE;
403 lj_resetsplit(J); 449 lj_resetsplit(J);
450 J->retryrec = 0;
451 J->ktrace = 0;
404 setgcref(J->cur.startpt, obj2gco(J->pt)); 452 setgcref(J->cur.startpt, obj2gco(J->pt));
405 453
406 L = J->L; 454 L = J->L;
@@ -412,6 +460,12 @@ static void trace_start(jit_State *J)
412 if (J->parent) { 460 if (J->parent) {
413 setintV(L->top++, J->parent); 461 setintV(L->top++, J->parent);
414 setintV(L->top++, J->exitno); 462 setintV(L->top++, J->exitno);
463 } else {
464 BCOp op = bc_op(*J->pc);
465 if (op == BC_CALLM || op == BC_CALL || op == BC_ITERC) {
466 setintV(L->top++, J->exitno); /* Parent of stitched trace. */
467 setintV(L->top++, -1);
468 }
415 } 469 }
416 ); 470 );
417 lj_record_setup(J); 471 lj_record_setup(J);
@@ -424,7 +478,7 @@ static void trace_stop(jit_State *J)
424 BCOp op = bc_op(J->cur.startins); 478 BCOp op = bc_op(J->cur.startins);
425 GCproto *pt = &gcref(J->cur.startpt)->pt; 479 GCproto *pt = &gcref(J->cur.startpt)->pt;
426 TraceNo traceno = J->cur.traceno; 480 TraceNo traceno = J->cur.traceno;
427 GCtrace *T = trace_save_alloc(J); /* Do this first. May throw OOM. */ 481 GCtrace *T = J->curfinal;
428 lua_State *L; 482 lua_State *L;
429 483
430 switch (op) { 484 switch (op) {
@@ -449,7 +503,7 @@ static void trace_stop(jit_State *J)
449 goto addroot; 503 goto addroot;
450 case BC_JMP: 504 case BC_JMP:
451 /* Patch exit branch in parent to side trace entry. */ 505 /* Patch exit branch in parent to side trace entry. */
452 lua_assert(J->parent != 0 && J->cur.root != 0); 506 lj_assertJ(J->parent != 0 && J->cur.root != 0, "not a side trace");
453 lj_asm_patchexit(J, traceref(J, J->parent), J->exitno, J->cur.mcode); 507 lj_asm_patchexit(J, traceref(J, J->parent), J->exitno, J->cur.mcode);
454 /* Avoid compiling a side trace twice (stack resizing uses parent exit). */ 508 /* Avoid compiling a side trace twice (stack resizing uses parent exit). */
455 traceref(J, J->parent)->snap[J->exitno].count = SNAPCOUNT_DONE; 509 traceref(J, J->parent)->snap[J->exitno].count = SNAPCOUNT_DONE;
@@ -461,8 +515,14 @@ static void trace_stop(jit_State *J)
461 root->nextside = (TraceNo1)traceno; 515 root->nextside = (TraceNo1)traceno;
462 } 516 }
463 break; 517 break;
518 case BC_CALLM:
519 case BC_CALL:
520 case BC_ITERC:
521 /* Trace stitching: patch link of previous trace. */
522 traceref(J, J->exitno)->link = traceno;
523 break;
464 default: 524 default:
465 lua_assert(0); 525 lj_assertJ(0, "bad stop bytecode %d", op);
466 break; 526 break;
467 } 527 }
468 528
@@ -475,6 +535,7 @@ static void trace_stop(jit_State *J)
475 lj_vmevent_send(L, TRACE, 535 lj_vmevent_send(L, TRACE,
476 setstrV(L, L->top++, lj_str_newlit(L, "stop")); 536 setstrV(L, L->top++, lj_str_newlit(L, "stop"));
477 setintV(L->top++, traceno); 537 setintV(L->top++, traceno);
538 setfuncV(L, L->top++, J->fn);
478 ); 539 );
479} 540}
480 541
@@ -482,8 +543,8 @@ static void trace_stop(jit_State *J)
482static int trace_downrec(jit_State *J) 543static int trace_downrec(jit_State *J)
483{ 544{
484 /* Restart recording at the return instruction. */ 545 /* Restart recording at the return instruction. */
485 lua_assert(J->pt != NULL); 546 lj_assertJ(J->pt != NULL, "no active prototype");
486 lua_assert(bc_isret(bc_op(*J->pc))); 547 lj_assertJ(bc_isret(bc_op(*J->pc)), "not at a return bytecode");
487 if (bc_op(*J->pc) == BC_RETM) 548 if (bc_op(*J->pc) == BC_RETM)
488 return 0; /* NYI: down-recursion with RETM. */ 549 return 0; /* NYI: down-recursion with RETM. */
489 J->parent = 0; 550 J->parent = 0;
@@ -502,6 +563,10 @@ static int trace_abort(jit_State *J)
502 563
503 J->postproc = LJ_POST_NONE; 564 J->postproc = LJ_POST_NONE;
504 lj_mcode_abort(J); 565 lj_mcode_abort(J);
566 if (J->curfinal) {
567 lj_trace_free(J2G(J), J->curfinal);
568 J->curfinal = NULL;
569 }
505 if (tvisnumber(L->top-1)) 570 if (tvisnumber(L->top-1))
506 e = (TraceError)numberVint(L->top-1); 571 e = (TraceError)numberVint(L->top-1);
507 if (e == LJ_TRERR_MCODELM) { 572 if (e == LJ_TRERR_MCODELM) {
@@ -510,8 +575,17 @@ static int trace_abort(jit_State *J)
510 return 1; /* Retry ASM with new MCode area. */ 575 return 1; /* Retry ASM with new MCode area. */
511 } 576 }
512 /* Penalize or blacklist starting bytecode instruction. */ 577 /* Penalize or blacklist starting bytecode instruction. */
513 if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) 578 if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) {
514 penalty_pc(J, &gcref(J->cur.startpt)->pt, mref(J->cur.startpc, BCIns), e); 579 if (J->exitno == 0) {
580 BCIns *startpc = mref(J->cur.startpc, BCIns);
581 if (e == LJ_TRERR_RETRY)
582 hotcount_set(J2GG(J), startpc+1, 1); /* Immediate retry. */
583 else
584 penalty_pc(J, &gcref(J->cur.startpt)->pt, startpc, e);
585 } else {
586 traceref(J, J->exitno)->link = J->exitno; /* Self-link is blacklisted. */
587 }
588 }
515 589
516 /* Is there anything to abort? */ 590 /* Is there anything to abort? */
517 traceno = J->cur.traceno; 591 traceno = J->cur.traceno;
@@ -680,15 +754,30 @@ static void trace_hotside(jit_State *J, const BCIns *pc)
680{ 754{
681 SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno]; 755 SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno];
682 if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT)) && 756 if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT)) &&
757 isluafunc(curr_func(J->L)) &&
683 snap->count != SNAPCOUNT_DONE && 758 snap->count != SNAPCOUNT_DONE &&
684 ++snap->count >= J->param[JIT_P_hotexit]) { 759 ++snap->count >= J->param[JIT_P_hotexit]) {
685 lua_assert(J->state == LJ_TRACE_IDLE); 760 lj_assertJ(J->state == LJ_TRACE_IDLE, "hot side exit while recording");
686 /* J->parent is non-zero for a side trace. */ 761 /* J->parent is non-zero for a side trace. */
687 J->state = LJ_TRACE_START; 762 J->state = LJ_TRACE_START;
688 lj_trace_ins(J, pc); 763 lj_trace_ins(J, pc);
689 } 764 }
690} 765}
691 766
767/* Stitch a new trace to the previous trace. */
768void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc)
769{
770 /* Only start a new trace if not recording or inside __gc call or vmevent. */
771 if (J->state == LJ_TRACE_IDLE &&
772 !(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) {
773 J->parent = 0; /* Have to treat it like a root trace. */
774 /* J->exitno is set to the invoking trace. */
775 J->state = LJ_TRACE_START;
776 lj_trace_ins(J, pc);
777 }
778}
779
780
692/* Tiny struct to pass data to protected call. */ 781/* Tiny struct to pass data to protected call. */
693typedef struct ExitDataCP { 782typedef struct ExitDataCP {
694 jit_State *J; 783 jit_State *J;
@@ -732,7 +821,7 @@ static void trace_exit_regs(lua_State *L, ExitState *ex)
732} 821}
733#endif 822#endif
734 823
735#ifdef EXITSTATE_PCREG 824#if defined(EXITSTATE_PCREG) || (LJ_UNWIND_JIT && !EXITTRACE_VMSTATE)
736/* Determine trace number from pc of exit instruction. */ 825/* Determine trace number from pc of exit instruction. */
737static TraceNo trace_exit_find(jit_State *J, MCode *pc) 826static TraceNo trace_exit_find(jit_State *J, MCode *pc)
738{ 827{
@@ -742,7 +831,7 @@ static TraceNo trace_exit_find(jit_State *J, MCode *pc)
742 if (T && pc >= T->mcode && pc < (MCode *)((char *)T->mcode + T->szmcode)) 831 if (T && pc >= T->mcode && pc < (MCode *)((char *)T->mcode + T->szmcode))
743 return traceno; 832 return traceno;
744 } 833 }
745 lua_assert(0); 834 lj_assertJ(0, "bad exit pc");
746 return 0; 835 return 0;
747} 836}
748#endif 837#endif
@@ -754,40 +843,55 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
754 lua_State *L = J->L; 843 lua_State *L = J->L;
755 ExitState *ex = (ExitState *)exptr; 844 ExitState *ex = (ExitState *)exptr;
756 ExitDataCP exd; 845 ExitDataCP exd;
757 int errcode; 846 int errcode, exitcode = J->exitcode;
847 TValue exiterr;
758 const BCIns *pc; 848 const BCIns *pc;
759 void *cf; 849 void *cf;
760 GCtrace *T; 850 GCtrace *T;
851
852 setnilV(&exiterr);
853 if (exitcode) { /* Trace unwound with error code. */
854 J->exitcode = 0;
855 copyTV(L, &exiterr, L->top-1);
856 }
857
761#ifdef EXITSTATE_PCREG 858#ifdef EXITSTATE_PCREG
762 J->parent = trace_exit_find(J, (MCode *)(intptr_t)ex->gpr[EXITSTATE_PCREG]); 859 J->parent = trace_exit_find(J, (MCode *)(intptr_t)ex->gpr[EXITSTATE_PCREG]);
763#endif 860#endif
764 T = traceref(J, J->parent); UNUSED(T); 861 T = traceref(J, J->parent); UNUSED(T);
765#ifdef EXITSTATE_CHECKEXIT 862#ifdef EXITSTATE_CHECKEXIT
766 if (J->exitno == T->nsnap) { /* Treat stack check like a parent exit. */ 863 if (J->exitno == T->nsnap) { /* Treat stack check like a parent exit. */
767 lua_assert(T->root != 0); 864 lj_assertJ(T->root != 0, "stack check in root trace");
768 J->exitno = T->ir[REF_BASE].op2; 865 J->exitno = T->ir[REF_BASE].op2;
769 J->parent = T->ir[REF_BASE].op1; 866 J->parent = T->ir[REF_BASE].op1;
770 T = traceref(J, J->parent); 867 T = traceref(J, J->parent);
771 } 868 }
772#endif 869#endif
773 lua_assert(T != NULL && J->exitno < T->nsnap); 870 lj_assertJ(T != NULL && J->exitno < T->nsnap, "bad trace or exit number");
774 exd.J = J; 871 exd.J = J;
775 exd.exptr = exptr; 872 exd.exptr = exptr;
776 errcode = lj_vm_cpcall(L, NULL, &exd, trace_exit_cp); 873 errcode = lj_vm_cpcall(L, NULL, &exd, trace_exit_cp);
777 if (errcode) 874 if (errcode)
778 return -errcode; /* Return negated error code. */ 875 return -errcode; /* Return negated error code. */
779 876
780 lj_vmevent_send(L, TEXIT, 877 if (exitcode) copyTV(L, L->top++, &exiterr); /* Anchor the error object. */
781 lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); 878
782 setintV(L->top++, J->parent); 879 if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)))
783 setintV(L->top++, J->exitno); 880 lj_vmevent_send(L, TEXIT,
784 trace_exit_regs(L, ex); 881 lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK);
785 ); 882 setintV(L->top++, J->parent);
883 setintV(L->top++, J->exitno);
884 trace_exit_regs(L, ex);
885 );
786 886
787 pc = exd.pc; 887 pc = exd.pc;
788 cf = cframe_raw(L->cframe); 888 cf = cframe_raw(L->cframe);
789 setcframe_pc(cf, pc); 889 setcframe_pc(cf, pc);
790 if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) { 890 if (exitcode) {
891 return -exitcode;
892 } else if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) {
893 /* Just exit to interpreter. */
894 } else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) {
791 if (!(G(L)->hookmask & HOOK_GC)) 895 if (!(G(L)->hookmask & HOOK_GC))
792 lj_gc_step(L); /* Exited because of GC: drive GC forward. */ 896 lj_gc_step(L); /* Exited because of GC: drive GC forward. */
793 } else { 897 } else {
@@ -811,7 +915,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
811 ERRNO_RESTORE 915 ERRNO_RESTORE
812 switch (bc_op(*pc)) { 916 switch (bc_op(*pc)) {
813 case BC_CALLM: case BC_CALLMT: 917 case BC_CALLM: case BC_CALLMT:
814 return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc)); 918 return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc) - LJ_FR2);
815 case BC_RETM: 919 case BC_RETM:
816 return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc)); 920 return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc));
817 case BC_TSETM: 921 case BC_TSETM:
@@ -823,4 +927,41 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
823 } 927 }
824} 928}
825 929
930#if LJ_UNWIND_JIT
931/* Given an mcode address determine trace exit address for unwinding. */
932uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo *ep)
933{
934#if EXITTRACE_VMSTATE
935 TraceNo traceno = J2G(J)->vmstate;
936#else
937 TraceNo traceno = trace_exit_find(J, (MCode *)addr);
938#endif
939 GCtrace *T = traceref(J, traceno);
940 if (T
941#if EXITTRACE_VMSTATE
942 && addr >= (uintptr_t)T->mcode && addr < (uintptr_t)T->mcode + T->szmcode
943#endif
944 ) {
945 SnapShot *snap = T->snap;
946 SnapNo lo = 0, exitno = T->nsnap;
947 uintptr_t ofs = (uintptr_t)((MCode *)addr - T->mcode); /* MCode units! */
948 /* Rightmost binary search for mcode offset to determine exit number. */
949 do {
950 SnapNo mid = (lo+exitno) >> 1;
951 if (ofs < snap[mid].mcofs) exitno = mid; else lo = mid + 1;
952 } while (lo < exitno);
953 exitno--;
954 *ep = exitno;
955#ifdef EXITSTUBS_PER_GROUP
956 return (uintptr_t)exitstub_addr(J, exitno);
957#else
958 return (uintptr_t)exitstub_trace_addr(T, exitno);
959#endif
960 }
961 /* Cannot correlate addr with trace/exit. This will be fatal. */
962 lj_assertJ(0, "bad exit pc");
963 return 0;
964}
965#endif
966
826#endif 967#endif
diff --git a/src/lj_trace.h b/src/lj_trace.h
index b725a139..e4cf2dc4 100644
--- a/src/lj_trace.h
+++ b/src/lj_trace.h
@@ -23,6 +23,7 @@ LJ_FUNC_NORET void lj_trace_err(jit_State *J, TraceError e);
23LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e); 23LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e);
24 24
25/* Trace management. */ 25/* Trace management. */
26LJ_FUNC GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T);
26LJ_FUNC void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T); 27LJ_FUNC void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T);
27LJ_FUNC void lj_trace_reenableproto(GCproto *pt); 28LJ_FUNC void lj_trace_reenableproto(GCproto *pt);
28LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt); 29LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt);
@@ -34,7 +35,11 @@ LJ_FUNC void lj_trace_freestate(global_State *g);
34/* Event handling. */ 35/* Event handling. */
35LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc); 36LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc);
36LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc); 37LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc);
38LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc);
37LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr); 39LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr);
40#if LJ_UNWIND_EXT
41LJ_FUNC uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo *ep);
42#endif
38 43
39/* Signal asynchronous abort of trace or end of trace. */ 44/* Signal asynchronous abort of trace or end of trace. */
40#define lj_trace_abort(g) (G2J(g)->state &= ~LJ_TRACE_ACTIVE) 45#define lj_trace_abort(g) (G2J(g)->state &= ~LJ_TRACE_ACTIVE)
diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h
index 477c9ab4..24dcb5c1 100644
--- a/src/lj_traceerr.h
+++ b/src/lj_traceerr.h
@@ -7,10 +7,12 @@
7 7
8/* Recording. */ 8/* Recording. */
9TREDEF(RECERR, "error thrown or hook called during recording") 9TREDEF(RECERR, "error thrown or hook called during recording")
10TREDEF(TRACEUV, "trace too short")
10TREDEF(TRACEOV, "trace too long") 11TREDEF(TRACEOV, "trace too long")
11TREDEF(STACKOV, "trace too deep") 12TREDEF(STACKOV, "trace too deep")
12TREDEF(SNAPOV, "too many snapshots") 13TREDEF(SNAPOV, "too many snapshots")
13TREDEF(BLACKL, "blacklisted") 14TREDEF(BLACKL, "blacklisted")
15TREDEF(RETRY, "retry recording")
14TREDEF(NYIBC, "NYI: bytecode %d") 16TREDEF(NYIBC, "NYI: bytecode %d")
15 17
16/* Recording loop ops. */ 18/* Recording loop ops. */
@@ -23,8 +25,6 @@ TREDEF(BADTYPE, "bad argument type")
23TREDEF(CJITOFF, "JIT compilation disabled for function") 25TREDEF(CJITOFF, "JIT compilation disabled for function")
24TREDEF(CUNROLL, "call unroll limit reached") 26TREDEF(CUNROLL, "call unroll limit reached")
25TREDEF(DOWNREC, "down-recursion, restarting") 27TREDEF(DOWNREC, "down-recursion, restarting")
26TREDEF(NYICF, "NYI: C function %s")
27TREDEF(NYIFF, "NYI: FastFunc %s")
28TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s") 28TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s")
29TREDEF(NYIRETL, "NYI: return to lower frame") 29TREDEF(NYIRETL, "NYI: return to lower frame")
30 30
diff --git a/src/lj_udata.c b/src/lj_udata.c
index ec3478c8..a0edd0df 100644
--- a/src/lj_udata.c
+++ b/src/lj_udata.c
@@ -8,6 +8,7 @@
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h"
11#include "lj_udata.h" 12#include "lj_udata.h"
12 13
13GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env) 14GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env)
@@ -32,3 +33,30 @@ void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud)
32 lj_mem_free(g, ud, sizeudata(ud)); 33 lj_mem_free(g, ud, sizeudata(ud));
33} 34}
34 35
36#if LJ_64
37void *lj_lightud_intern(lua_State *L, void *p)
38{
39 global_State *g = G(L);
40 uint64_t u = (uint64_t)p;
41 uint32_t up = lightudup(u);
42 uint32_t *segmap = mref(g->gc.lightudseg, uint32_t);
43 MSize segnum = g->gc.lightudnum;
44 if (segmap) {
45 MSize seg;
46 for (seg = 0; seg <= segnum; seg++)
47 if (segmap[seg] == up) /* Fast path. */
48 return (void *)(((uint64_t)seg << LJ_LIGHTUD_BITS_LO) | lightudlo(u));
49 segnum++;
50 /* Leave last segment unused to avoid clash with ITERN key. */
51 if (segnum >= (1 << LJ_LIGHTUD_BITS_SEG)-1) lj_err_msg(L, LJ_ERR_BADLU);
52 }
53 if (!((segnum-1) & segnum) && segnum != 1) {
54 lj_mem_reallocvec(L, segmap, segnum, segnum ? 2*segnum : 2u, uint32_t);
55 setmref(g->gc.lightudseg, segmap);
56 }
57 g->gc.lightudnum = segnum;
58 segmap[segnum] = up;
59 return (void *)(((uint64_t)segnum << LJ_LIGHTUD_BITS_LO) | lightudlo(u));
60}
61#endif
62
diff --git a/src/lj_udata.h b/src/lj_udata.h
index 1873b694..78522ecc 100644
--- a/src/lj_udata.h
+++ b/src/lj_udata.h
@@ -10,5 +10,8 @@
10 10
11LJ_FUNC GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env); 11LJ_FUNC GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env);
12LJ_FUNC void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud); 12LJ_FUNC void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud);
13#if LJ_64
14LJ_FUNC void * LJ_FASTCALL lj_lightud_intern(lua_State *L, void *p);
15#endif
13 16
14#endif 17#endif
diff --git a/src/lj_vm.h b/src/lj_vm.h
index aa985cfa..84348e7a 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -17,11 +17,18 @@ LJ_ASMF int lj_vm_cpcall(lua_State *L, lua_CFunction func, void *ud,
17LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef); 17LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef);
18LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_c(void *cframe, int errcode); 18LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_c(void *cframe, int errcode);
19LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_ff(void *cframe); 19LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_ff(void *cframe);
20#if LJ_ABI_WIN && LJ_TARGET_X86
21LJ_ASMF_NORET void LJ_FASTCALL lj_vm_rtlunwind(void *cframe, void *excptrec,
22 void *unwinder, int errcode);
23#endif
20LJ_ASMF void lj_vm_unwind_c_eh(void); 24LJ_ASMF void lj_vm_unwind_c_eh(void);
21LJ_ASMF void lj_vm_unwind_ff_eh(void); 25LJ_ASMF void lj_vm_unwind_ff_eh(void);
22#if LJ_TARGET_X86ORX64 26#if LJ_TARGET_X86ORX64
23LJ_ASMF void lj_vm_unwind_rethrow(void); 27LJ_ASMF void lj_vm_unwind_rethrow(void);
24#endif 28#endif
29#if LJ_TARGET_MIPS
30LJ_ASMF void lj_vm_unwind_stub(void);
31#endif
25 32
26/* Miscellaneous functions. */ 33/* Miscellaneous functions. */
27#if LJ_TARGET_X86ORX64 34#if LJ_TARGET_X86ORX64
@@ -43,13 +50,14 @@ LJ_ASMF void lj_vm_record(void);
43LJ_ASMF void lj_vm_inshook(void); 50LJ_ASMF void lj_vm_inshook(void);
44LJ_ASMF void lj_vm_rethook(void); 51LJ_ASMF void lj_vm_rethook(void);
45LJ_ASMF void lj_vm_callhook(void); 52LJ_ASMF void lj_vm_callhook(void);
53LJ_ASMF void lj_vm_profhook(void);
46 54
47/* Trace exit handling. */ 55/* Trace exit handling. */
48LJ_ASMF void lj_vm_exit_handler(void); 56LJ_ASMF void lj_vm_exit_handler(void);
49LJ_ASMF void lj_vm_exit_interp(void); 57LJ_ASMF void lj_vm_exit_interp(void);
50 58
51/* Internal math helper functions. */ 59/* Internal math helper functions. */
52#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC 60#if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP)
53#define lj_vm_floor floor 61#define lj_vm_floor floor
54#define lj_vm_ceil ceil 62#define lj_vm_ceil ceil
55#else 63#else
@@ -60,23 +68,26 @@ LJ_ASMF double lj_vm_floor_sf(double);
60LJ_ASMF double lj_vm_ceil_sf(double); 68LJ_ASMF double lj_vm_ceil_sf(double);
61#endif 69#endif
62#endif 70#endif
63#if defined(LUAJIT_NO_LOG2) || LJ_TARGET_X86ORX64 71#ifdef LUAJIT_NO_LOG2
64LJ_ASMF double lj_vm_log2(double); 72LJ_ASMF double lj_vm_log2(double);
65#else 73#else
66#define lj_vm_log2 log2 74#define lj_vm_log2 log2
67#endif 75#endif
76#if !(defined(_LJ_DISPATCH_H) && LJ_TARGET_MIPS)
77LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
78#endif
68 79
69#if LJ_HASJIT 80#if LJ_HASJIT
70#if LJ_TARGET_X86ORX64 81#if LJ_TARGET_X86ORX64
71LJ_ASMF void lj_vm_floor_sse(void); 82LJ_ASMF void lj_vm_floor_sse(void);
72LJ_ASMF void lj_vm_ceil_sse(void); 83LJ_ASMF void lj_vm_ceil_sse(void);
73LJ_ASMF void lj_vm_trunc_sse(void); 84LJ_ASMF void lj_vm_trunc_sse(void);
74LJ_ASMF void lj_vm_exp_x87(void);
75LJ_ASMF void lj_vm_exp2_x87(void);
76LJ_ASMF void lj_vm_pow_sse(void);
77LJ_ASMF void lj_vm_powi_sse(void); 85LJ_ASMF void lj_vm_powi_sse(void);
86#define lj_vm_powi NULL
78#else 87#else
79#if LJ_TARGET_PPC 88LJ_ASMF double lj_vm_powi(double, int32_t);
89#endif
90#if LJ_TARGET_PPC || LJ_TARGET_ARM64
80#define lj_vm_trunc trunc 91#define lj_vm_trunc trunc
81#else 92#else
82LJ_ASMF double lj_vm_trunc(double); 93LJ_ASMF double lj_vm_trunc(double);
@@ -84,14 +95,6 @@ LJ_ASMF double lj_vm_trunc(double);
84LJ_ASMF double lj_vm_trunc_sf(double); 95LJ_ASMF double lj_vm_trunc_sf(double);
85#endif 96#endif
86#endif 97#endif
87LJ_ASMF double lj_vm_powi(double, int32_t);
88#ifdef LUAJIT_NO_EXP2
89LJ_ASMF double lj_vm_exp2(double);
90#else
91#define lj_vm_exp2 exp2
92#endif
93#endif
94LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
95#if LJ_HASFFI 98#if LJ_HASFFI
96LJ_ASMF int lj_vm_errno(void); 99LJ_ASMF int lj_vm_errno(void);
97#endif 100#endif
@@ -104,8 +107,7 @@ LJ_ASMF void lj_cont_nop(void); /* Do nothing, just continue execution. */
104LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */ 107LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */
105LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */ 108LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */
106LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */ 109LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */
107 110LJ_ASMF void lj_cont_stitch(void); /* Trace stitching. */
108enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
109 111
110/* Start of the ASM code. */ 112/* Start of the ASM code. */
111LJ_ASMF char lj_vm_asm_begin[]; 113LJ_ASMF char lj_vm_asm_begin[];
diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c
index 24952548..45c82096 100644
--- a/src/lj_vmevent.c
+++ b/src/lj_vmevent.c
@@ -27,6 +27,7 @@ ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev)
27 if (tv && tvisfunc(tv)) { 27 if (tv && tvisfunc(tv)) {
28 lj_state_checkstack(L, LUA_MINSTACK); 28 lj_state_checkstack(L, LUA_MINSTACK);
29 setfuncV(L, L->top++, funcV(tv)); 29 setfuncV(L, L->top++, funcV(tv));
30 if (LJ_FR2) setnilV(L->top++);
30 return savestack(L, L->top); 31 return savestack(L, L->top);
31 } 32 }
32 } 33 }
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
index 8bcdc244..23ef0dd2 100644
--- a/src/lj_vmmath.c
+++ b/src/lj_vmmath.c
@@ -13,16 +13,29 @@
13#include "lj_ir.h" 13#include "lj_ir.h"
14#include "lj_vm.h" 14#include "lj_vm.h"
15 15
16/* -- Helper functions for generated machine code ------------------------- */ 16/* -- Wrapper functions --------------------------------------------------- */
17 17
18#if LJ_TARGET_X86ORX64 18#if LJ_TARGET_X86 && __ELF__ && __PIC__
19/* Wrapper functions to avoid linker issues on OSX. */ 19/* Wrapper functions to deal with the ELF/x86 PIC disaster. */
20LJ_FUNCA double lj_vm_sinh(double x) { return sinh(x); } 20LJ_FUNCA double lj_wrap_log(double x) { return log(x); }
21LJ_FUNCA double lj_vm_cosh(double x) { return cosh(x); } 21LJ_FUNCA double lj_wrap_log10(double x) { return log10(x); }
22LJ_FUNCA double lj_vm_tanh(double x) { return tanh(x); } 22LJ_FUNCA double lj_wrap_exp(double x) { return exp(x); }
23LJ_FUNCA double lj_wrap_sin(double x) { return sin(x); }
24LJ_FUNCA double lj_wrap_cos(double x) { return cos(x); }
25LJ_FUNCA double lj_wrap_tan(double x) { return tan(x); }
26LJ_FUNCA double lj_wrap_asin(double x) { return asin(x); }
27LJ_FUNCA double lj_wrap_acos(double x) { return acos(x); }
28LJ_FUNCA double lj_wrap_atan(double x) { return atan(x); }
29LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); }
30LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); }
31LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); }
32LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); }
33LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); }
34LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); }
23#endif 35#endif
24 36
25#if !LJ_TARGET_X86ORX64 37/* -- Helper functions for generated machine code ------------------------- */
38
26double lj_vm_foldarith(double x, double y, int op) 39double lj_vm_foldarith(double x, double y, int op)
27{ 40{
28 switch (op) { 41 switch (op) {
@@ -35,37 +48,20 @@ double lj_vm_foldarith(double x, double y, int op)
35 case IR_NEG - IR_ADD: return -x; break; 48 case IR_NEG - IR_ADD: return -x; break;
36 case IR_ABS - IR_ADD: return fabs(x); break; 49 case IR_ABS - IR_ADD: return fabs(x); break;
37#if LJ_HASJIT 50#if LJ_HASJIT
38 case IR_ATAN2 - IR_ADD: return atan2(x, y); break;
39 case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break; 51 case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break;
40 case IR_MIN - IR_ADD: return x > y ? y : x; break; 52 case IR_MIN - IR_ADD: return x < y ? x : y; break;
41 case IR_MAX - IR_ADD: return x < y ? y : x; break; 53 case IR_MAX - IR_ADD: return x > y ? x : y; break;
42#endif 54#endif
43 default: return x; 55 default: return x;
44 } 56 }
45} 57}
46#endif
47
48#if LJ_HASJIT
49 58
50#ifdef LUAJIT_NO_LOG2 59#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS
51double lj_vm_log2(double a)
52{
53 return log(a) * 1.4426950408889634074;
54}
55#endif
56
57#ifdef LUAJIT_NO_EXP2
58double lj_vm_exp2(double a)
59{
60 return exp(a * 0.6931471805599453);
61}
62#endif
63
64#if !(LJ_TARGET_ARM || LJ_TARGET_PPC)
65int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) 60int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
66{ 61{
67 uint32_t y, ua, ub; 62 uint32_t y, ua, ub;
68 lua_assert(b != 0); /* This must be checked before using this function. */ 63 /* This must be checked before using this function. */
64 lj_assertX(b != 0, "modulo with zero divisor");
69 ua = a < 0 ? (uint32_t)-a : (uint32_t)a; 65 ua = a < 0 ? (uint32_t)-a : (uint32_t)a;
70 ub = b < 0 ? (uint32_t)-b : (uint32_t)b; 66 ub = b < 0 ? (uint32_t)-b : (uint32_t)b;
71 y = ua % ub; 67 y = ua % ub;
@@ -75,12 +71,21 @@ int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
75} 71}
76#endif 72#endif
77 73
74#if LJ_HASJIT
75
76#ifdef LUAJIT_NO_LOG2
77double lj_vm_log2(double a)
78{
79 return log(a) * 1.4426950408889634074;
80}
81#endif
82
78#if !LJ_TARGET_X86ORX64 83#if !LJ_TARGET_X86ORX64
79/* Unsigned x^k. */ 84/* Unsigned x^k. */
80static double lj_vm_powui(double x, uint32_t k) 85static double lj_vm_powui(double x, uint32_t k)
81{ 86{
82 double y; 87 double y;
83 lua_assert(k != 0); 88 lj_assertX(k != 0, "pow with zero exponent");
84 for (; (k & 1) == 0; k >>= 1) x *= x; 89 for (; (k & 1) == 0; k >>= 1) x *= x;
85 y = x; 90 y = x;
86 if ((k >>= 1) != 0) { 91 if ((k >>= 1) != 0) {
@@ -107,6 +112,7 @@ double lj_vm_powi(double x, int32_t k)
107 else 112 else
108 return 1.0 / lj_vm_powui(x, (uint32_t)-k); 113 return 1.0 / lj_vm_powui(x, (uint32_t)-k);
109} 114}
115#endif
110 116
111/* Computes fpm(x) for extended math functions. */ 117/* Computes fpm(x) for extended math functions. */
112double lj_vm_foldfpm(double x, int fpm) 118double lj_vm_foldfpm(double x, int fpm)
@@ -116,19 +122,12 @@ double lj_vm_foldfpm(double x, int fpm)
116 case IRFPM_CEIL: return lj_vm_ceil(x); 122 case IRFPM_CEIL: return lj_vm_ceil(x);
117 case IRFPM_TRUNC: return lj_vm_trunc(x); 123 case IRFPM_TRUNC: return lj_vm_trunc(x);
118 case IRFPM_SQRT: return sqrt(x); 124 case IRFPM_SQRT: return sqrt(x);
119 case IRFPM_EXP: return exp(x);
120 case IRFPM_EXP2: return lj_vm_exp2(x);
121 case IRFPM_LOG: return log(x); 125 case IRFPM_LOG: return log(x);
122 case IRFPM_LOG2: return lj_vm_log2(x); 126 case IRFPM_LOG2: return lj_vm_log2(x);
123 case IRFPM_LOG10: return log10(x); 127 default: lj_assertX(0, "bad fpm %d", fpm);
124 case IRFPM_SIN: return sin(x);
125 case IRFPM_COS: return cos(x);
126 case IRFPM_TAN: return tan(x);
127 default: lua_assert(0);
128 } 128 }
129 return 0; 129 return 0;
130} 130}
131#endif
132 131
133#if LJ_HASFFI 132#if LJ_HASFFI
134int lj_vm_errno(void) 133int lj_vm_errno(void)
diff --git a/src/ljamalg.c b/src/ljamalg.c
index 5a36d102..384b3cc1 100644
--- a/src/ljamalg.c
+++ b/src/ljamalg.c
@@ -3,16 +3,6 @@
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h 3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/ 4*/
5 5
6/*
7+--------------------------------------------------------------------------+
8| WARNING: Compiling the amalgamation needs a lot of virtual memory |
9| (around 300 MB with GCC 4.x)! If you don't have enough physical memory |
10| your machine will start swapping to disk and the compile will not finish |
11| within a reasonable amount of time. |
12| So either compile on a bigger machine or use the non-amalgamated build. |
13+--------------------------------------------------------------------------+
14*/
15
16#define ljamalg_c 6#define ljamalg_c
17#define LUA_CORE 7#define LUA_CORE
18 8
@@ -28,23 +18,30 @@
28#include "lua.h" 18#include "lua.h"
29#include "lauxlib.h" 19#include "lauxlib.h"
30 20
21#include "lj_assert.c"
31#include "lj_gc.c" 22#include "lj_gc.c"
32#include "lj_err.c" 23#include "lj_err.c"
33#include "lj_char.c" 24#include "lj_char.c"
34#include "lj_bc.c" 25#include "lj_bc.c"
35#include "lj_obj.c" 26#include "lj_obj.c"
27#include "lj_buf.c"
36#include "lj_str.c" 28#include "lj_str.c"
37#include "lj_tab.c" 29#include "lj_tab.c"
38#include "lj_func.c" 30#include "lj_func.c"
39#include "lj_udata.c" 31#include "lj_udata.c"
40#include "lj_meta.c" 32#include "lj_meta.c"
41#include "lj_debug.c" 33#include "lj_debug.c"
34#include "lj_prng.c"
42#include "lj_state.c" 35#include "lj_state.c"
43#include "lj_dispatch.c" 36#include "lj_dispatch.c"
44#include "lj_vmevent.c" 37#include "lj_vmevent.c"
45#include "lj_vmmath.c" 38#include "lj_vmmath.c"
46#include "lj_strscan.c" 39#include "lj_strscan.c"
40#include "lj_strfmt.c"
41#include "lj_strfmt_num.c"
42#include "lj_serialize.c"
47#include "lj_api.c" 43#include "lj_api.c"
44#include "lj_profile.c"
48#include "lj_lex.c" 45#include "lj_lex.c"
49#include "lj_parse.c" 46#include "lj_parse.c"
50#include "lj_bcread.c" 47#include "lj_bcread.c"
@@ -89,5 +86,6 @@
89#include "lib_bit.c" 86#include "lib_bit.c"
90#include "lib_jit.c" 87#include "lib_jit.c"
91#include "lib_ffi.c" 88#include "lib_ffi.c"
89#include "lib_buffer.c"
92#include "lib_init.c" 90#include "lib_init.c"
93 91
diff --git a/src/lua.h b/src/lua.h
index 2bd683c2..6d1634d1 100644
--- a/src/lua.h
+++ b/src/lua.h
@@ -39,7 +39,8 @@
39#define lua_upvalueindex(i) (LUA_GLOBALSINDEX-(i)) 39#define lua_upvalueindex(i) (LUA_GLOBALSINDEX-(i))
40 40
41 41
42/* thread status; 0 is OK */ 42/* thread status */
43#define LUA_OK 0
43#define LUA_YIELD 1 44#define LUA_YIELD 1
44#define LUA_ERRRUN 2 45#define LUA_ERRRUN 2
45#define LUA_ERRSYNTAX 3 46#define LUA_ERRSYNTAX 3
@@ -226,6 +227,7 @@ LUA_API int (lua_status) (lua_State *L);
226#define LUA_GCSTEP 5 227#define LUA_GCSTEP 5
227#define LUA_GCSETPAUSE 6 228#define LUA_GCSETPAUSE 6
228#define LUA_GCSETSTEPMUL 7 229#define LUA_GCSETSTEPMUL 7
230#define LUA_GCISRUNNING 9
229 231
230LUA_API int (lua_gc) (lua_State *L, int what, int data); 232LUA_API int (lua_gc) (lua_State *L, int what, int data);
231 233
@@ -346,6 +348,13 @@ LUA_API void *lua_upvalueid (lua_State *L, int idx, int n);
346LUA_API void lua_upvaluejoin (lua_State *L, int idx1, int n1, int idx2, int n2); 348LUA_API void lua_upvaluejoin (lua_State *L, int idx1, int n1, int idx2, int n2);
347LUA_API int lua_loadx (lua_State *L, lua_Reader reader, void *dt, 349LUA_API int lua_loadx (lua_State *L, lua_Reader reader, void *dt,
348 const char *chunkname, const char *mode); 350 const char *chunkname, const char *mode);
351LUA_API const lua_Number *lua_version (lua_State *L);
352LUA_API void lua_copy (lua_State *L, int fromidx, int toidx);
353LUA_API lua_Number lua_tonumberx (lua_State *L, int idx, int *isnum);
354LUA_API lua_Integer lua_tointegerx (lua_State *L, int idx, int *isnum);
355
356/* From Lua 5.3. */
357LUA_API int lua_isyieldable (lua_State *L);
349 358
350 359
351struct lua_Debug { 360struct lua_Debug {
diff --git a/src/luaconf.h b/src/luaconf.h
index abf40572..5ba6eda9 100644
--- a/src/luaconf.h
+++ b/src/luaconf.h
@@ -37,7 +37,7 @@
37#endif 37#endif
38#define LUA_LROOT "/usr/local" 38#define LUA_LROOT "/usr/local"
39#define LUA_LUADIR "/lua/5.1/" 39#define LUA_LUADIR "/lua/5.1/"
40#define LUA_LJDIR "/luajit-2.0.5/" 40#define LUA_LJDIR "/luajit-2.1.0-beta3/"
41 41
42#ifdef LUA_ROOT 42#ifdef LUA_ROOT
43#define LUA_JROOT LUA_ROOT 43#define LUA_JROOT LUA_ROOT
@@ -79,7 +79,7 @@
79#define LUA_IGMARK "-" 79#define LUA_IGMARK "-"
80#define LUA_PATH_CONFIG \ 80#define LUA_PATH_CONFIG \
81 LUA_DIRSEP "\n" LUA_PATHSEP "\n" LUA_PATH_MARK "\n" \ 81 LUA_DIRSEP "\n" LUA_PATHSEP "\n" LUA_PATH_MARK "\n" \
82 LUA_EXECDIR "\n" LUA_IGMARK 82 LUA_EXECDIR "\n" LUA_IGMARK "\n"
83 83
84/* Quoting in error messages. */ 84/* Quoting in error messages. */
85#define LUA_QL(x) "'" x "'" 85#define LUA_QL(x) "'" x "'"
@@ -92,10 +92,6 @@
92#define LUAI_GCMUL 200 /* Run GC at 200% of allocation speed. */ 92#define LUAI_GCMUL 200 /* Run GC at 200% of allocation speed. */
93#define LUA_MAXCAPTURES 32 /* Max. pattern captures. */ 93#define LUA_MAXCAPTURES 32 /* Max. pattern captures. */
94 94
95/* Compatibility with older library function names. */
96#define LUA_COMPAT_MOD /* OLD: math.mod, NEW: math.fmod */
97#define LUA_COMPAT_GFIND /* OLD: string.gfind, NEW: string.gmatch */
98
99/* Configuration for the frontend (the luajit executable). */ 95/* Configuration for the frontend (the luajit executable). */
100#if defined(luajit_c) 96#if defined(luajit_c)
101#define LUA_PROGNAME "luajit" /* Fallback frontend name. */ 97#define LUA_PROGNAME "luajit" /* Fallback frontend name. */
@@ -140,7 +136,7 @@
140 136
141#define LUALIB_API LUA_API 137#define LUALIB_API LUA_API
142 138
143/* Support for internal assertions. */ 139/* Compatibility support for assertions. */
144#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) 140#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK)
145#include <assert.h> 141#include <assert.h>
146#endif 142#endif
diff --git a/src/luajit.c b/src/luajit.c
index 97901338..6aed5337 100644
--- a/src/luajit.c
+++ b/src/luajit.c
@@ -61,8 +61,9 @@ static void laction(int i)
61 61
62static void print_usage(void) 62static void print_usage(void)
63{ 63{
64 fprintf(stderr, 64 fputs("usage: ", stderr);
65 "usage: %s [options]... [script [args]...].\n" 65 fputs(progname, stderr);
66 fputs(" [options]... [script [args]...].\n"
66 "Available options are:\n" 67 "Available options are:\n"
67 " -e chunk Execute string " LUA_QL("chunk") ".\n" 68 " -e chunk Execute string " LUA_QL("chunk") ".\n"
68 " -l name Require library " LUA_QL("name") ".\n" 69 " -l name Require library " LUA_QL("name") ".\n"
@@ -73,16 +74,14 @@ static void print_usage(void)
73 " -v Show version information.\n" 74 " -v Show version information.\n"
74 " -E Ignore environment variables.\n" 75 " -E Ignore environment variables.\n"
75 " -- Stop handling options.\n" 76 " -- Stop handling options.\n"
76 " - Execute stdin and stop handling options.\n" 77 " - Execute stdin and stop handling options.\n", stderr);
77 ,
78 progname);
79 fflush(stderr); 78 fflush(stderr);
80} 79}
81 80
82static void l_message(const char *pname, const char *msg) 81static void l_message(const char *pname, const char *msg)
83{ 82{
84 if (pname) fprintf(stderr, "%s: ", pname); 83 if (pname) { fputs(pname, stderr); fputc(':', stderr); fputc(' ', stderr); }
85 fprintf(stderr, "%s\n", msg); 84 fputs(msg, stderr); fputc('\n', stderr);
86 fflush(stderr); 85 fflush(stderr);
87} 86}
88 87
@@ -125,7 +124,7 @@ static int docall(lua_State *L, int narg, int clear)
125#endif 124#endif
126 lua_remove(L, base); /* remove traceback function */ 125 lua_remove(L, base); /* remove traceback function */
127 /* force a complete garbage collection in case of errors */ 126 /* force a complete garbage collection in case of errors */
128 if (status != 0) lua_gc(L, LUA_GCCOLLECT, 0); 127 if (status != LUA_OK) lua_gc(L, LUA_GCCOLLECT, 0);
129 return status; 128 return status;
130} 129}
131 130
@@ -154,22 +153,15 @@ static void print_jit_status(lua_State *L)
154 lua_settop(L, 0); /* clear stack */ 153 lua_settop(L, 0); /* clear stack */
155} 154}
156 155
157static int getargs(lua_State *L, char **argv, int n) 156static void createargtable(lua_State *L, char **argv, int argc, int argf)
158{ 157{
159 int narg;
160 int i; 158 int i;
161 int argc = 0; 159 lua_createtable(L, argc - argf, argf);
162 while (argv[argc]) argc++; /* count total number of arguments */
163 narg = argc - (n + 1); /* number of arguments to the script */
164 luaL_checkstack(L, narg + 3, "too many arguments to script");
165 for (i = n+1; i < argc; i++)
166 lua_pushstring(L, argv[i]);
167 lua_createtable(L, narg, n + 1);
168 for (i = 0; i < argc; i++) { 160 for (i = 0; i < argc; i++) {
169 lua_pushstring(L, argv[i]); 161 lua_pushstring(L, argv[i]);
170 lua_rawseti(L, -2, i - n); 162 lua_rawseti(L, -2, i - argf);
171 } 163 }
172 return narg; 164 lua_setglobal(L, "arg");
173} 165}
174 166
175static int dofile(lua_State *L, const char *name) 167static int dofile(lua_State *L, const char *name)
@@ -258,9 +250,9 @@ static void dotty(lua_State *L)
258 const char *oldprogname = progname; 250 const char *oldprogname = progname;
259 progname = NULL; 251 progname = NULL;
260 while ((status = loadline(L)) != -1) { 252 while ((status = loadline(L)) != -1) {
261 if (status == 0) status = docall(L, 0, 0); 253 if (status == LUA_OK) status = docall(L, 0, 0);
262 report(L, status); 254 report(L, status);
263 if (status == 0 && lua_gettop(L) > 0) { /* any result to print? */ 255 if (status == LUA_OK && lua_gettop(L) > 0) { /* any result to print? */
264 lua_getglobal(L, "print"); 256 lua_getglobal(L, "print");
265 lua_insert(L, 1); 257 lua_insert(L, 1);
266 if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0) 258 if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0)
@@ -275,21 +267,30 @@ static void dotty(lua_State *L)
275 progname = oldprogname; 267 progname = oldprogname;
276} 268}
277 269
278static int handle_script(lua_State *L, char **argv, int n) 270static int handle_script(lua_State *L, char **argx)
279{ 271{
280 int status; 272 int status;
281 const char *fname; 273 const char *fname = argx[0];
282 int narg = getargs(L, argv, n); /* collect arguments */ 274 if (strcmp(fname, "-") == 0 && strcmp(argx[-1], "--") != 0)
283 lua_setglobal(L, "arg");
284 fname = argv[n];
285 if (strcmp(fname, "-") == 0 && strcmp(argv[n-1], "--") != 0)
286 fname = NULL; /* stdin */ 275 fname = NULL; /* stdin */
287 status = luaL_loadfile(L, fname); 276 status = luaL_loadfile(L, fname);
288 lua_insert(L, -(narg+1)); 277 if (status == LUA_OK) {
289 if (status == 0) 278 /* Fetch args from arg table. LUA_INIT or -e might have changed them. */
279 int narg = 0;
280 lua_getglobal(L, "arg");
281 if (lua_istable(L, -1)) {
282 do {
283 narg++;
284 lua_rawgeti(L, -narg, narg);
285 } while (!lua_isnil(L, -1));
286 lua_pop(L, 1);
287 lua_remove(L, -narg);
288 narg--;
289 } else {
290 lua_pop(L, 1);
291 }
290 status = docall(L, narg, 0); 292 status = docall(L, narg, 0);
291 else 293 }
292 lua_pop(L, narg);
293 return report(L, status); 294 return report(L, status);
294} 295}
295 296
@@ -386,7 +387,8 @@ static int dobytecode(lua_State *L, char **argv)
386 } 387 }
387 for (argv++; *argv != NULL; narg++, argv++) 388 for (argv++; *argv != NULL; narg++, argv++)
388 lua_pushstring(L, *argv); 389 lua_pushstring(L, *argv);
389 return report(L, lua_pcall(L, narg, 0, 0)); 390 report(L, lua_pcall(L, narg, 0, 0));
391 return -1;
390} 392}
391 393
392/* check that argument has no extra characters at the end */ 394/* check that argument has no extra characters at the end */
@@ -407,7 +409,7 @@ static int collectargs(char **argv, int *flags)
407 switch (argv[i][1]) { /* Check option. */ 409 switch (argv[i][1]) { /* Check option. */
408 case '-': 410 case '-':
409 notail(argv[i]); 411 notail(argv[i]);
410 return (argv[i+1] != NULL ? i+1 : 0); 412 return i+1;
411 case '\0': 413 case '\0':
412 return i; 414 return i;
413 case 'i': 415 case 'i':
@@ -433,23 +435,23 @@ static int collectargs(char **argv, int *flags)
433 case 'b': /* LuaJIT extension */ 435 case 'b': /* LuaJIT extension */
434 if (*flags) return -1; 436 if (*flags) return -1;
435 *flags |= FLAGS_EXEC; 437 *flags |= FLAGS_EXEC;
436 return 0; 438 return i+1;
437 case 'E': 439 case 'E':
438 *flags |= FLAGS_NOENV; 440 *flags |= FLAGS_NOENV;
439 break; 441 break;
440 default: return -1; /* invalid option */ 442 default: return -1; /* invalid option */
441 } 443 }
442 } 444 }
443 return 0; 445 return i;
444} 446}
445 447
446static int runargs(lua_State *L, char **argv, int n) 448static int runargs(lua_State *L, char **argv, int argn)
447{ 449{
448 int i; 450 int i;
449 for (i = 1; i < n; i++) { 451 for (i = 1; i < argn; i++) {
450 if (argv[i] == NULL) continue; 452 if (argv[i] == NULL) continue;
451 lua_assert(argv[i][0] == '-'); 453 lua_assert(argv[i][0] == '-');
452 switch (argv[i][1]) { /* option */ 454 switch (argv[i][1]) {
453 case 'e': { 455 case 'e': {
454 const char *chunk = argv[i] + 2; 456 const char *chunk = argv[i] + 2;
455 if (*chunk == '\0') chunk = argv[++i]; 457 if (*chunk == '\0') chunk = argv[++i];
@@ -463,10 +465,10 @@ static int runargs(lua_State *L, char **argv, int n)
463 if (*filename == '\0') filename = argv[++i]; 465 if (*filename == '\0') filename = argv[++i];
464 lua_assert(filename != NULL); 466 lua_assert(filename != NULL);
465 if (dolibrary(L, filename)) 467 if (dolibrary(L, filename))
466 return 1; /* stop if file fails */ 468 return 1;
467 break; 469 break;
468 } 470 }
469 case 'j': { /* LuaJIT extension */ 471 case 'j': { /* LuaJIT extension. */
470 const char *cmd = argv[i] + 2; 472 const char *cmd = argv[i] + 2;
471 if (*cmd == '\0') cmd = argv[++i]; 473 if (*cmd == '\0') cmd = argv[++i];
472 lua_assert(cmd != NULL); 474 lua_assert(cmd != NULL);
@@ -474,16 +476,16 @@ static int runargs(lua_State *L, char **argv, int n)
474 return 1; 476 return 1;
475 break; 477 break;
476 } 478 }
477 case 'O': /* LuaJIT extension */ 479 case 'O': /* LuaJIT extension. */
478 if (dojitopt(L, argv[i] + 2)) 480 if (dojitopt(L, argv[i] + 2))
479 return 1; 481 return 1;
480 break; 482 break;
481 case 'b': /* LuaJIT extension */ 483 case 'b': /* LuaJIT extension. */
482 return dobytecode(L, argv+i); 484 return dobytecode(L, argv+i);
483 default: break; 485 default: break;
484 } 486 }
485 } 487 }
486 return 0; 488 return LUA_OK;
487} 489}
488 490
489static int handle_luainit(lua_State *L) 491static int handle_luainit(lua_State *L)
@@ -494,7 +496,7 @@ static int handle_luainit(lua_State *L)
494 const char *init = getenv(LUA_INIT); 496 const char *init = getenv(LUA_INIT);
495#endif 497#endif
496 if (init == NULL) 498 if (init == NULL)
497 return 0; /* status OK */ 499 return LUA_OK;
498 else if (init[0] == '@') 500 else if (init[0] == '@')
499 return dofile(L, init+1); 501 return dofile(L, init+1);
500 else 502 else
@@ -511,45 +513,57 @@ static int pmain(lua_State *L)
511{ 513{
512 struct Smain *s = &smain; 514 struct Smain *s = &smain;
513 char **argv = s->argv; 515 char **argv = s->argv;
514 int script; 516 int argn;
515 int flags = 0; 517 int flags = 0;
516 globalL = L; 518 globalL = L;
517 if (argv[0] && argv[0][0]) progname = argv[0]; 519 if (argv[0] && argv[0][0]) progname = argv[0];
518 LUAJIT_VERSION_SYM(); /* linker-enforced version check */ 520
519 script = collectargs(argv, &flags); 521 LUAJIT_VERSION_SYM(); /* Linker-enforced version check. */
520 if (script < 0) { /* invalid args? */ 522
523 argn = collectargs(argv, &flags);
524 if (argn < 0) { /* Invalid args? */
521 print_usage(); 525 print_usage();
522 s->status = 1; 526 s->status = 1;
523 return 0; 527 return 0;
524 } 528 }
529
525 if ((flags & FLAGS_NOENV)) { 530 if ((flags & FLAGS_NOENV)) {
526 lua_pushboolean(L, 1); 531 lua_pushboolean(L, 1);
527 lua_setfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); 532 lua_setfield(L, LUA_REGISTRYINDEX, "LUA_NOENV");
528 } 533 }
529 lua_gc(L, LUA_GCSTOP, 0); /* stop collector during initialization */ 534
530 luaL_openlibs(L); /* open libraries */ 535 /* Stop collector during library initialization. */
536 lua_gc(L, LUA_GCSTOP, 0);
537 luaL_openlibs(L);
531 lua_gc(L, LUA_GCRESTART, -1); 538 lua_gc(L, LUA_GCRESTART, -1);
539
540 createargtable(L, argv, s->argc, argn);
541
532 if (!(flags & FLAGS_NOENV)) { 542 if (!(flags & FLAGS_NOENV)) {
533 s->status = handle_luainit(L); 543 s->status = handle_luainit(L);
534 if (s->status != 0) return 0; 544 if (s->status != LUA_OK) return 0;
535 } 545 }
546
536 if ((flags & FLAGS_VERSION)) print_version(); 547 if ((flags & FLAGS_VERSION)) print_version();
537 s->status = runargs(L, argv, (script > 0) ? script : s->argc); 548
538 if (s->status != 0) return 0; 549 s->status = runargs(L, argv, argn);
539 if (script) { 550 if (s->status != LUA_OK) return 0;
540 s->status = handle_script(L, argv, script); 551
541 if (s->status != 0) return 0; 552 if (s->argc > argn) {
553 s->status = handle_script(L, argv + argn);
554 if (s->status != LUA_OK) return 0;
542 } 555 }
556
543 if ((flags & FLAGS_INTERACTIVE)) { 557 if ((flags & FLAGS_INTERACTIVE)) {
544 print_jit_status(L); 558 print_jit_status(L);
545 dotty(L); 559 dotty(L);
546 } else if (script == 0 && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) { 560 } else if (s->argc == argn && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) {
547 if (lua_stdin_is_tty()) { 561 if (lua_stdin_is_tty()) {
548 print_version(); 562 print_version();
549 print_jit_status(L); 563 print_jit_status(L);
550 dotty(L); 564 dotty(L);
551 } else { 565 } else {
552 dofile(L, NULL); /* executes stdin as a file */ 566 dofile(L, NULL); /* Executes stdin as a file. */
553 } 567 }
554 } 568 }
555 return 0; 569 return 0;
@@ -558,7 +572,7 @@ static int pmain(lua_State *L)
558int main(int argc, char **argv) 572int main(int argc, char **argv)
559{ 573{
560 int status; 574 int status;
561 lua_State *L = lua_open(); /* create state */ 575 lua_State *L = lua_open();
562 if (L == NULL) { 576 if (L == NULL) {
563 l_message(argv[0], "cannot create state: not enough memory"); 577 l_message(argv[0], "cannot create state: not enough memory");
564 return EXIT_FAILURE; 578 return EXIT_FAILURE;
@@ -568,6 +582,6 @@ int main(int argc, char **argv)
568 status = lua_cpcall(L, pmain, NULL); 582 status = lua_cpcall(L, pmain, NULL);
569 report(L, status); 583 report(L, status);
570 lua_close(L); 584 lua_close(L);
571 return (status || smain.status) ? EXIT_FAILURE : EXIT_SUCCESS; 585 return (status || smain.status > 0) ? EXIT_FAILURE : EXIT_SUCCESS;
572} 586}
573 587
diff --git a/src/luajit.h b/src/luajit.h
index 73010056..2ee1f908 100644
--- a/src/luajit.h
+++ b/src/luajit.h
@@ -30,9 +30,9 @@
30 30
31#include "lua.h" 31#include "lua.h"
32 32
33#define LUAJIT_VERSION "LuaJIT 2.0.5" 33#define LUAJIT_VERSION "LuaJIT 2.1.0-beta3"
34#define LUAJIT_VERSION_NUM 20005 /* Version 2.0.5 = 02.00.05. */ 34#define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */
35#define LUAJIT_VERSION_SYM luaJIT_version_2_0_5 35#define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_beta3
36#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2021 Mike Pall" 36#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2021 Mike Pall"
37#define LUAJIT_URL "https://luajit.org/" 37#define LUAJIT_URL "https://luajit.org/"
38 38
@@ -64,6 +64,15 @@ enum {
64/* Control the JIT engine. */ 64/* Control the JIT engine. */
65LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode); 65LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode);
66 66
67/* Low-overhead profiling API. */
68typedef void (*luaJIT_profile_callback)(void *data, lua_State *L,
69 int samples, int vmstate);
70LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
71 luaJIT_profile_callback cb, void *data);
72LUA_API void luaJIT_profile_stop(lua_State *L);
73LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
74 int depth, size_t *len);
75
67/* Enforce (dynamic) linker error for version mismatches. Call from main. */ 76/* Enforce (dynamic) linker error for version mismatches. Call from main. */
68LUA_API void LUAJIT_VERSION_SYM(void); 77LUA_API void LUAJIT_VERSION_SYM(void);
69 78
diff --git a/src/lualib.h b/src/lualib.h
index 9cd39880..5c18e9ec 100644
--- a/src/lualib.h
+++ b/src/lualib.h
@@ -33,6 +33,7 @@ LUALIB_API int luaopen_debug(lua_State *L);
33LUALIB_API int luaopen_bit(lua_State *L); 33LUALIB_API int luaopen_bit(lua_State *L);
34LUALIB_API int luaopen_jit(lua_State *L); 34LUALIB_API int luaopen_jit(lua_State *L);
35LUALIB_API int luaopen_ffi(lua_State *L); 35LUALIB_API int luaopen_ffi(lua_State *L);
36LUALIB_API int luaopen_string_buffer(lua_State *L);
36 37
37LUALIB_API void luaL_openlibs(lua_State *L); 38LUALIB_API void luaL_openlibs(lua_State *L);
38 39
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat
index 777d695f..7e1a6e04 100644
--- a/src/msvcbuild.bat
+++ b/src/msvcbuild.bat
@@ -5,6 +5,7 @@
5@rem Then cd to this directory and run this script. Use the following 5@rem Then cd to this directory and run this script. Use the following
6@rem options (in order), if needed. The default is a dynamic release build. 6@rem options (in order), if needed. The default is a dynamic release build.
7@rem 7@rem
8@rem nogc64 disable LJ_GC64 mode for x64
8@rem debug emit debug symbols 9@rem debug emit debug symbols
9@rem amalg amalgamated build 10@rem amalg amalgamated build
10@rem static static linkage 11@rem static static linkage
@@ -20,10 +21,11 @@
20@set LJLIB=lib /nologo /nodefaultlib 21@set LJLIB=lib /nologo /nodefaultlib
21@set DASMDIR=..\dynasm 22@set DASMDIR=..\dynasm
22@set DASM=%DASMDIR%\dynasm.lua 23@set DASM=%DASMDIR%\dynasm.lua
24@set DASC=vm_x64.dasc
23@set LJDLLNAME=lua51.dll 25@set LJDLLNAME=lua51.dll
24@set LJLIBNAME=lua51.lib 26@set LJLIBNAME=lua51.lib
25@set BUILDTYPE=release 27@set BUILDTYPE=release
26@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c 28@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
27 29
28%LJCOMPILE% host\minilua.c 30%LJCOMPILE% host\minilua.c
29@if errorlevel 1 goto :BAD 31@if errorlevel 1 goto :BAD
@@ -36,10 +38,17 @@ if exist minilua.exe.manifest^
36@set LJARCH=x64 38@set LJARCH=x64
37@minilua 39@minilua
38@if errorlevel 8 goto :X64 40@if errorlevel 8 goto :X64
41@set DASC=vm_x86.dasc
39@set DASMFLAGS=-D WIN -D JIT -D FFI 42@set DASMFLAGS=-D WIN -D JIT -D FFI
40@set LJARCH=x86 43@set LJARCH=x86
44@set LJCOMPILE=%LJCOMPILE% /arch:SSE2
41:X64 45:X64
42minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc 46@if "%1" neq "nogc64" goto :GC64
47@shift
48@set DASC=vm_x86.dasc
49@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64
50:GC64
51minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
43@if errorlevel 1 goto :BAD 52@if errorlevel 1 goto :BAD
44 53
45%LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c 54%LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c
@@ -68,6 +77,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
68@shift 77@shift
69@set BUILDTYPE=debug 78@set BUILDTYPE=debug
70@set LJCOMPILE=%LJCOMPILE% /Zi %DEBUGCFLAGS% 79@set LJCOMPILE=%LJCOMPILE% /Zi %DEBUGCFLAGS%
80@set LJLINK=%LJLINK% /opt:ref /opt:icf /incremental:no
71:NODEBUG 81:NODEBUG
72@set LJLINK=%LJLINK% /%BUILDTYPE% 82@set LJLINK=%LJLINK% /%BUILDTYPE%
73@if "%1"=="amalg" goto :AMALGDLL 83@if "%1"=="amalg" goto :AMALGDLL
diff --git a/src/ps4build.bat b/src/ps4build.bat
index 337a44fa..fdd09d81 100644
--- a/src/ps4build.bat
+++ b/src/ps4build.bat
@@ -2,7 +2,19 @@
2@rem Donated to the public domain. 2@rem Donated to the public domain.
3@rem 3@rem
4@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler) 4@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler)
5@rem or "VS2015 x64 Native Tools Command Prompt".
6@rem
5@rem Then cd to this directory and run this script. 7@rem Then cd to this directory and run this script.
8@rem
9@rem Recommended invocation:
10@rem
11@rem ps4build release build, amalgamated, 64-bit GC
12@rem ps4build debug debug build, amalgamated, 64-bit GC
13@rem
14@rem Additional command-line options (not generally recommended):
15@rem
16@rem gc32 (before debug) 32-bit GC
17@rem noamalg (after debug) non-amalgamated build
6 18
7@if not defined INCLUDE goto :FAIL 19@if not defined INCLUDE goto :FAIL
8@if not defined SCE_ORBIS_SDK_DIR goto :FAIL 20@if not defined SCE_ORBIS_SDK_DIR goto :FAIL
@@ -14,7 +26,15 @@
14@set LJMT=mt /nologo 26@set LJMT=mt /nologo
15@set DASMDIR=..\dynasm 27@set DASMDIR=..\dynasm
16@set DASM=%DASMDIR%\dynasm.lua 28@set DASM=%DASMDIR%\dynasm.lua
17@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c 29@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
30@set GC64=
31@set DASC=vm_x64.dasc
32
33@if "%1" neq "gc32" goto :NOGC32
34@shift
35@set GC64=-DLUAJIT_DISABLE_GC64
36@set DASC=vm_x86.dasc
37:NOGC32
18 38
19%LJCOMPILE% host\minilua.c 39%LJCOMPILE% host\minilua.c
20@if errorlevel 1 goto :BAD 40@if errorlevel 1 goto :BAD
@@ -28,10 +48,10 @@ if exist minilua.exe.manifest^
28@if not errorlevel 8 goto :FAIL 48@if not errorlevel 8 goto :FAIL
29 49
30@set DASMFLAGS=-D P64 -D NO_UNWIND 50@set DASMFLAGS=-D P64 -D NO_UNWIND
31minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc 51minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
32@if errorlevel 1 goto :BAD 52@if errorlevel 1 goto :BAD
33 53
34%LJCOMPILE% /I "." /I %DASMDIR% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c 54%LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c
35@if errorlevel 1 goto :BAD 55@if errorlevel 1 goto :BAD
36%LJLINK% /out:buildvm.exe buildvm*.obj 56%LJLINK% /out:buildvm.exe buildvm*.obj
37@if errorlevel 1 goto :BAD 57@if errorlevel 1 goto :BAD
@@ -54,7 +74,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
54@if errorlevel 1 goto :BAD 74@if errorlevel 1 goto :BAD
55 75
56@rem ---- Cross compiler ---- 76@rem ---- Cross compiler ----
57@set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI 77@set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI %GC64%
58@set LJLIB="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-ar" rcus 78@set LJLIB="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-ar" rcus
59@set INCLUDE="" 79@set INCLUDE=""
60 80
@@ -63,14 +83,14 @@ orbis-as -o lj_vm.o lj_vm.s
63@if "%1" neq "debug" goto :NODEBUG 83@if "%1" neq "debug" goto :NODEBUG
64@shift 84@shift
65@set LJCOMPILE=%LJCOMPILE% -g -O0 85@set LJCOMPILE=%LJCOMPILE% -g -O0
66@set TARGETLIB=libluajitD.a 86@set TARGETLIB=libluajitD_ps4.a
67goto :BUILD 87goto :BUILD
68:NODEBUG 88:NODEBUG
69@set LJCOMPILE=%LJCOMPILE% -O2 89@set LJCOMPILE=%LJCOMPILE% -O2
70@set TARGETLIB=libluajit.a 90@set TARGETLIB=libluajit_ps4.a
71:BUILD 91:BUILD
72del %TARGETLIB% 92del %TARGETLIB%
73@if "%1"=="amalg" goto :AMALG 93@if "%1" neq "noamalg" goto :AMALG
74for %%f in (lj_*.c lib_*.c) do ( 94for %%f in (lj_*.c lib_*.c) do (
75 %LJCOMPILE% %%f 95 %LJCOMPILE% %%f
76 @if errorlevel 1 goto :BAD 96 @if errorlevel 1 goto :BAD
diff --git a/src/psvitabuild.bat b/src/psvitabuild.bat
index 3991dc65..2980e157 100644
--- a/src/psvitabuild.bat
+++ b/src/psvitabuild.bat
@@ -14,7 +14,7 @@
14@set LJMT=mt /nologo 14@set LJMT=mt /nologo
15@set DASMDIR=..\dynasm 15@set DASMDIR=..\dynasm
16@set DASM=%DASMDIR%\dynasm.lua 16@set DASM=%DASMDIR%\dynasm.lua
17@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c 17@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
18 18
19%LJCOMPILE% host\minilua.c 19%LJCOMPILE% host\minilua.c
20@if errorlevel 1 goto :BAD 20@if errorlevel 1 goto :BAD
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 21d706d8..35ba0e36 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -99,6 +99,7 @@
99|.type NODE, Node 99|.type NODE, Node
100|.type NARGS8, int 100|.type NARGS8, int
101|.type TRACE, GCtrace 101|.type TRACE, GCtrace
102|.type SBUF, SBuf
102| 103|
103|//----------------------------------------------------------------------- 104|//-----------------------------------------------------------------------
104| 105|
@@ -372,6 +373,17 @@ static void build_subroutines(BuildCtx *ctx)
372 | st_vmstate CARG2 373 | st_vmstate CARG2
373 | b ->vm_returnc 374 | b ->vm_returnc
374 | 375 |
376 |->vm_unwind_ext: // Complete external unwind.
377#if !LJ_NO_UNWIND
378 | push {r0, r1, r2, lr}
379 | bl extern _Unwind_Complete
380 | ldr r0, [sp]
381 | bl extern _Unwind_DeleteException
382 | pop {r0, r1, r2, lr}
383 | mov r0, r1
384 | bx r2
385#endif
386 |
375 |//----------------------------------------------------------------------- 387 |//-----------------------------------------------------------------------
376 |//-- Grow stack for calls ----------------------------------------------- 388 |//-- Grow stack for calls -----------------------------------------------
377 |//----------------------------------------------------------------------- 389 |//-----------------------------------------------------------------------
@@ -418,13 +430,14 @@ static void build_subroutines(BuildCtx *ctx)
418 | add CARG2, sp, #CFRAME_RESUME 430 | add CARG2, sp, #CFRAME_RESUME
419 | ldrb CARG1, L->status 431 | ldrb CARG1, L->status
420 | str CARG3, SAVE_ERRF 432 | str CARG3, SAVE_ERRF
421 | str CARG2, L->cframe 433 | str L, SAVE_PC // Any value outside of bytecode is ok.
422 | str CARG3, SAVE_CFRAME 434 | str CARG3, SAVE_CFRAME
423 | cmp CARG1, #0 435 | cmp CARG1, #0
424 | str L, SAVE_PC // Any value outside of bytecode is ok. 436 | str CARG2, L->cframe
425 | beq >3 437 | beq >3
426 | 438 |
427 | // Resume after yield (like a return). 439 | // Resume after yield (like a return).
440 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
428 | mov RA, BASE 441 | mov RA, BASE
429 | ldr BASE, L->base 442 | ldr BASE, L->base
430 | ldr CARG1, L->top 443 | ldr CARG1, L->top
@@ -458,14 +471,15 @@ static void build_subroutines(BuildCtx *ctx)
458 | str CARG3, SAVE_NRES 471 | str CARG3, SAVE_NRES
459 | mov L, CARG1 472 | mov L, CARG1
460 | str CARG1, SAVE_L 473 | str CARG1, SAVE_L
461 | mov BASE, CARG2
462 | str sp, L->cframe // Add our C frame to cframe chain.
463 | ldr DISPATCH, L->glref // Setup pointer to dispatch table. 474 | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
475 | mov BASE, CARG2
464 | str CARG1, SAVE_PC // Any value outside of bytecode is ok. 476 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
465 | str RC, SAVE_CFRAME 477 | str RC, SAVE_CFRAME
466 | add DISPATCH, DISPATCH, #GG_G2DISP 478 | add DISPATCH, DISPATCH, #GG_G2DISP
479 | str sp, L->cframe // Add our C frame to cframe chain.
467 | 480 |
468 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 481 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
482 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
469 | ldr RB, L->base // RB = old base (for vmeta_call). 483 | ldr RB, L->base // RB = old base (for vmeta_call).
470 | ldr CARG1, L->top 484 | ldr CARG1, L->top
471 | mov MASKR8, #255 485 | mov MASKR8, #255
@@ -491,20 +505,21 @@ static void build_subroutines(BuildCtx *ctx)
491 | mov L, CARG1 505 | mov L, CARG1
492 | ldr RA, L:CARG1->stack 506 | ldr RA, L:CARG1->stack
493 | str CARG1, SAVE_L 507 | str CARG1, SAVE_L
508 | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
494 | ldr RB, L->top 509 | ldr RB, L->top
495 | str CARG1, SAVE_PC // Any value outside of bytecode is ok. 510 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
496 | ldr RC, L->cframe 511 | ldr RC, L->cframe
512 | add DISPATCH, DISPATCH, #GG_G2DISP
497 | sub RA, RA, RB // Compute -savestack(L, L->top). 513 | sub RA, RA, RB // Compute -savestack(L, L->top).
498 | str sp, L->cframe // Add our C frame to cframe chain.
499 | mov RB, #0 514 | mov RB, #0
500 | str RA, SAVE_NRES // Neg. delta means cframe w/o frame. 515 | str RA, SAVE_NRES // Neg. delta means cframe w/o frame.
501 | str RB, SAVE_ERRF // No error function. 516 | str RB, SAVE_ERRF // No error function.
502 | str RC, SAVE_CFRAME 517 | str RC, SAVE_CFRAME
518 | str sp, L->cframe // Add our C frame to cframe chain.
519 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
503 | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud) 520 | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud)
504 | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
505 | movs BASE, CRET1 521 | movs BASE, CRET1
506 | mov PC, #FRAME_CP 522 | mov PC, #FRAME_CP
507 | add DISPATCH, DISPATCH, #GG_G2DISP
508 | bne <3 // Else continue with the call. 523 | bne <3 // Else continue with the call.
509 | b ->vm_leave_cp // No base? Just remove C frame. 524 | b ->vm_leave_cp // No base? Just remove C frame.
510 | 525 |
@@ -614,6 +629,16 @@ static void build_subroutines(BuildCtx *ctx)
614 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. 629 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
615 | b ->vm_call_dispatch_f 630 | b ->vm_call_dispatch_f
616 | 631 |
632 |->vmeta_tgetr:
633 | .IOS mov RC, BASE
634 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
635 | // Returns cTValue * or NULL.
636 | .IOS mov BASE, RC
637 | cmp CRET1, #0
638 | ldrdne CARG12, [CRET1]
639 | mvneq CARG2, #~LJ_TNIL
640 | b ->BC_TGETR_Z
641 |
617 |//----------------------------------------------------------------------- 642 |//-----------------------------------------------------------------------
618 | 643 |
619 |->vmeta_tsets1: 644 |->vmeta_tsets1:
@@ -671,6 +696,16 @@ static void build_subroutines(BuildCtx *ctx)
671 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. 696 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
672 | b ->vm_call_dispatch_f 697 | b ->vm_call_dispatch_f
673 | 698 |
699 |->vmeta_tsetr:
700 | str BASE, L->base
701 | .IOS mov RC, BASE
702 | mov CARG1, L
703 | str PC, SAVE_PC
704 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
705 | // Returns TValue *.
706 | .IOS mov BASE, RC
707 | b ->BC_TSETR_Z
708 |
674 |//-- Comparison metamethods --------------------------------------------- 709 |//-- Comparison metamethods ---------------------------------------------
675 | 710 |
676 |->vmeta_comp: 711 |->vmeta_comp:
@@ -735,6 +770,17 @@ static void build_subroutines(BuildCtx *ctx)
735 | b <3 770 | b <3
736 |.endif 771 |.endif
737 | 772 |
773 |->vmeta_istype:
774 | sub PC, PC, #4
775 | str BASE, L->base
776 | mov CARG1, L
777 | lsr CARG2, RA, #3
778 | mov CARG3, RC
779 | str PC, SAVE_PC
780 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
781 | .IOS ldr BASE, L->base
782 | b ->cont_nop
783 |
738 |//-- Arithmetic metamethods --------------------------------------------- 784 |//-- Arithmetic metamethods ---------------------------------------------
739 | 785 |
740 |->vmeta_arith_vn: 786 |->vmeta_arith_vn:
@@ -966,9 +1012,9 @@ static void build_subroutines(BuildCtx *ctx)
966 | cmp TAB:RB, #0 1012 | cmp TAB:RB, #0
967 | beq ->fff_restv 1013 | beq ->fff_restv
968 | ldr CARG3, TAB:RB->hmask 1014 | ldr CARG3, TAB:RB->hmask
969 | ldr CARG4, STR:RC->hash 1015 | ldr CARG4, STR:RC->sid
970 | ldr NODE:INS, TAB:RB->node 1016 | ldr NODE:INS, TAB:RB->node
971 | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask 1017 | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask
972 | add CARG3, CARG3, CARG3, lsl #1 1018 | add CARG3, CARG3, CARG3, lsl #1
973 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 1019 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8
974 |3: // Rearranged logic, because we expect _not_ to find the key. 1020 |3: // Rearranged logic, because we expect _not_ to find the key.
@@ -1052,7 +1098,7 @@ static void build_subroutines(BuildCtx *ctx)
1052 | ffgccheck 1098 | ffgccheck
1053 | mov CARG1, L 1099 | mov CARG1, L
1054 | mov CARG2, BASE 1100 | mov CARG2, BASE
1055 | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) 1101 | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
1056 | // Returns GCstr *. 1102 | // Returns GCstr *.
1057 | ldr BASE, L->base 1103 | ldr BASE, L->base
1058 | mvn CARG2, #~LJ_TSTR 1104 | mvn CARG2, #~LJ_TSTR
@@ -1230,9 +1276,10 @@ static void build_subroutines(BuildCtx *ctx)
1230 | ldr CARG3, L:RA->base 1276 | ldr CARG3, L:RA->base
1231 | mv_vmstate CARG2, INTERP 1277 | mv_vmstate CARG2, INTERP
1232 | ldr CARG4, L:RA->top 1278 | ldr CARG4, L:RA->top
1233 | st_vmstate CARG2
1234 | cmp CRET1, #LUA_YIELD 1279 | cmp CRET1, #LUA_YIELD
1235 | ldr BASE, L->base 1280 | ldr BASE, L->base
1281 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
1282 | st_vmstate CARG2
1236 | bhi >8 1283 | bhi >8
1237 | subs RC, CARG4, CARG3 1284 | subs RC, CARG4, CARG3
1238 | ldr CARG1, L->maxstack 1285 | ldr CARG1, L->maxstack
@@ -1500,19 +1547,6 @@ static void build_subroutines(BuildCtx *ctx)
1500 | math_extern2 atan2 1547 | math_extern2 atan2
1501 | math_extern2 fmod 1548 | math_extern2 fmod
1502 | 1549 |
1503 |->ff_math_deg:
1504 |.if FPU
1505 | .ffunc_d math_rad
1506 | vldr d1, CFUNC:CARG3->upvalue[0]
1507 | vmul.f64 d0, d0, d1
1508 | b ->fff_resd
1509 |.else
1510 | .ffunc_n math_rad
1511 | ldrd CARG34, CFUNC:CARG3->upvalue[0]
1512 | bl extern __aeabi_dmul
1513 | b ->fff_restv
1514 |.endif
1515 |
1516 |.if HFABI 1550 |.if HFABI
1517 | .ffunc math_ldexp 1551 | .ffunc math_ldexp
1518 | ldr CARG4, [BASE, #4] 1552 | ldr CARG4, [BASE, #4]
@@ -1682,17 +1716,11 @@ static void build_subroutines(BuildCtx *ctx)
1682 |.endif 1716 |.endif
1683 |.endmacro 1717 |.endmacro
1684 | 1718 |
1685 | math_minmax math_min, gt, hi 1719 | math_minmax math_min, gt, pl
1686 | math_minmax math_max, lt, lo 1720 | math_minmax math_max, lt, le
1687 | 1721 |
1688 |//-- String library ----------------------------------------------------- 1722 |//-- String library -----------------------------------------------------
1689 | 1723 |
1690 |.ffunc_1 string_len
1691 | checkstr CARG2, ->fff_fallback
1692 | ldr CARG1, STR:CARG1->len
1693 | mvn CARG2, #~LJ_TISNUM
1694 | b ->fff_restv
1695 |
1696 |.ffunc string_byte // Only handle the 1-arg case here. 1724 |.ffunc string_byte // Only handle the 1-arg case here.
1697 | ldrd CARG12, [BASE] 1725 | ldrd CARG12, [BASE]
1698 | ldr PC, [BASE, FRAME_PC] 1726 | ldr PC, [BASE, FRAME_PC]
@@ -1725,6 +1753,7 @@ static void build_subroutines(BuildCtx *ctx)
1725 | mov CARG1, L 1753 | mov CARG1, L
1726 | str PC, SAVE_PC 1754 | str PC, SAVE_PC
1727 | bl extern lj_str_new // (lua_State *L, char *str, size_t l) 1755 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
1756 |->fff_resstr:
1728 | // Returns GCstr *. 1757 | // Returns GCstr *.
1729 | ldr BASE, L->base 1758 | ldr BASE, L->base
1730 | mvn CARG2, #~LJ_TSTR 1759 | mvn CARG2, #~LJ_TSTR
@@ -1768,91 +1797,28 @@ static void build_subroutines(BuildCtx *ctx)
1768 | mvn CARG2, #~LJ_TSTR 1797 | mvn CARG2, #~LJ_TSTR
1769 | b ->fff_restv 1798 | b ->fff_restv
1770 | 1799 |
1771 |.ffunc string_rep // Only handle the 1-char case inline. 1800 |.macro ffstring_op, name
1772 | ffgccheck 1801 | .ffunc string_ .. name
1773 | ldrd CARG12, [BASE]
1774 | ldrd CARG34, [BASE, #8]
1775 | cmp NARGS8:RC, #16
1776 | bne ->fff_fallback // Exactly 2 arguments
1777 | checktp CARG2, LJ_TSTR
1778 | checktpeq CARG4, LJ_TISNUM
1779 | bne ->fff_fallback
1780 | subs CARG4, CARG3, #1
1781 | ldr CARG2, STR:CARG1->len
1782 | blt ->fff_emptystr // Count <= 0?
1783 | cmp CARG2, #1
1784 | blo ->fff_emptystr // Zero-length string?
1785 | bne ->fff_fallback // Fallback for > 1-char strings.
1786 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
1787 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
1788 | ldr CARG1, STR:CARG1[1]
1789 | cmp RB, CARG3
1790 | blo ->fff_fallback
1791 |1: // Fill buffer with char.
1792 | strb CARG1, [CARG2, CARG4]
1793 | subs CARG4, CARG4, #1
1794 | bge <1
1795 | b ->fff_newstr
1796 |
1797 |.ffunc string_reverse
1798 | ffgccheck 1802 | ffgccheck
1799 | ldrd CARG12, [BASE] 1803 | ldr CARG3, [BASE, #4]
1800 | cmp NARGS8:RC, #8 1804 | cmp NARGS8:RC, #8
1805 | ldr STR:CARG2, [BASE]
1801 | blo ->fff_fallback 1806 | blo ->fff_fallback
1802 | checkstr CARG2, ->fff_fallback 1807 | sub SBUF:CARG1, DISPATCH, #-DISPATCH_GL(tmpbuf)
1803 | ldr CARG3, STR:CARG1->len 1808 | checkstr CARG3, ->fff_fallback
1804 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)] 1809 | ldr CARG4, SBUF:CARG1->b
1805 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)] 1810 | str BASE, L->base
1806 | mov CARG4, CARG3 1811 | str PC, SAVE_PC
1807 | add CARG1, STR:CARG1, #sizeof(GCstr) 1812 | str L, SBUF:CARG1->L
1808 | cmp RB, CARG3 1813 | str CARG4, SBUF:CARG1->w
1809 | blo ->fff_fallback 1814 | bl extern lj_buf_putstr_ .. name
1810 |1: // Reverse string copy. 1815 | bl extern lj_buf_tostr
1811 | ldrb RB, [CARG1], #1 1816 | b ->fff_resstr
1812 | subs CARG4, CARG4, #1
1813 | blt ->fff_newstr
1814 | strb RB, [CARG2, CARG4]
1815 | b <1
1816 |
1817 |.macro ffstring_case, name, lo
1818 | .ffunc name
1819 | ffgccheck
1820 | ldrd CARG12, [BASE]
1821 | cmp NARGS8:RC, #8
1822 | blo ->fff_fallback
1823 | checkstr CARG2, ->fff_fallback
1824 | ldr CARG3, STR:CARG1->len
1825 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
1826 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
1827 | mov CARG4, #0
1828 | add CARG1, STR:CARG1, #sizeof(GCstr)
1829 | cmp RB, CARG3
1830 | blo ->fff_fallback
1831 |1: // ASCII case conversion.
1832 | ldrb RB, [CARG1, CARG4]
1833 | cmp CARG4, CARG3
1834 | bhs ->fff_newstr
1835 | sub RC, RB, #lo
1836 | cmp RC, #26
1837 | eorlo RB, RB, #0x20
1838 | strb RB, [CARG2, CARG4]
1839 | add CARG4, CARG4, #1
1840 | b <1
1841 |.endmacro 1817 |.endmacro
1842 | 1818 |
1843 |ffstring_case string_lower, 65 1819 |ffstring_op reverse
1844 |ffstring_case string_upper, 97 1820 |ffstring_op lower
1845 | 1821 |ffstring_op upper
1846 |//-- Table library ------------------------------------------------------
1847 |
1848 |.ffunc_1 table_getn
1849 | checktab CARG2, ->fff_fallback
1850 | .IOS mov RA, BASE
1851 | bl extern lj_tab_len // (GCtab *t)
1852 | // Returns uint32_t (but less than 2^31).
1853 | .IOS mov BASE, RA
1854 | mvn CARG2, #~LJ_TISNUM
1855 | b ->fff_restv
1856 | 1822 |
1857 |//-- Bit library -------------------------------------------------------- 1823 |//-- Bit library --------------------------------------------------------
1858 | 1824 |
@@ -2127,6 +2093,66 @@ static void build_subroutines(BuildCtx *ctx)
2127 | ldr INS, [PC, #-4] 2093 | ldr INS, [PC, #-4]
2128 | bx CRET1 2094 | bx CRET1
2129 | 2095 |
2096 |->cont_stitch: // Trace stitching.
2097 |.if JIT
2098 | // RA = resultptr, CARG4 = meta base
2099 | ldr RB, SAVE_MULTRES
2100 | ldr INS, [PC, #-4]
2101 | ldr TRACE:CARG3, [CARG4, #-24] // Save previous trace.
2102 | subs RB, RB, #8
2103 | decode_RA8 RC, INS // Call base.
2104 | beq >2
2105 |1: // Move results down.
2106 | ldrd CARG12, [RA]
2107 | add RA, RA, #8
2108 | subs RB, RB, #8
2109 | strd CARG12, [BASE, RC]
2110 | add RC, RC, #8
2111 | bne <1
2112 |2:
2113 | decode_RA8 RA, INS
2114 | decode_RB8 RB, INS
2115 | add RA, RA, RB
2116 |3:
2117 | cmp RA, RC
2118 | mvn CARG2, #~LJ_TNIL
2119 | bhi >9 // More results wanted?
2120 |
2121 | ldrh RA, TRACE:CARG3->traceno
2122 | ldrh RC, TRACE:CARG3->link
2123 | cmp RC, RA
2124 | beq ->cont_nop // Blacklisted.
2125 | cmp RC, #0
2126 | bne =>BC_JLOOP // Jump to stitched trace.
2127 |
2128 | // Stitch a new trace to the previous trace.
2129 | str RA, [DISPATCH, #DISPATCH_J(exitno)]
2130 | str L, [DISPATCH, #DISPATCH_J(L)]
2131 | str BASE, L->base
2132 | sub CARG1, DISPATCH, #-GG_DISP2J
2133 | mov CARG2, PC
2134 | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2135 | ldr BASE, L->base
2136 | b ->cont_nop
2137 |
2138 |9: // Fill up results with nil.
2139 | strd CARG12, [BASE, RC]
2140 | add RC, RC, #8
2141 | b <3
2142 |.endif
2143 |
2144 |->vm_profhook: // Dispatch target for profiler hook.
2145#if LJ_HASPROFILE
2146 | mov CARG1, L
2147 | str BASE, L->base
2148 | mov CARG2, PC
2149 | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2150 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2151 | ldr BASE, L->base
2152 | sub PC, PC, #4
2153 | b ->cont_nop
2154#endif
2155 |
2130 |//----------------------------------------------------------------------- 2156 |//-----------------------------------------------------------------------
2131 |//-- Trace exit handler ------------------------------------------------- 2157 |//-- Trace exit handler -------------------------------------------------
2132 |//----------------------------------------------------------------------- 2158 |//-----------------------------------------------------------------------
@@ -2151,14 +2177,14 @@ static void build_subroutines(BuildCtx *ctx)
2151 | add CARG1, CARG1, CARG2, asr #6 2177 | add CARG1, CARG1, CARG2, asr #6
2152 | ldr CARG2, [lr, #4] // Load exit stub group offset. 2178 | ldr CARG2, [lr, #4] // Load exit stub group offset.
2153 | sub CARG1, CARG1, lr 2179 | sub CARG1, CARG1, lr
2154 | ldr L, [DISPATCH, #DISPATCH_GL(jit_L)] 2180 | ldr L, [DISPATCH, #DISPATCH_GL(cur_L)]
2155 | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. 2181 | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number.
2156 | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)] 2182 | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
2157 | str CARG1, [DISPATCH, #DISPATCH_J(exitno)] 2183 | str CARG1, [DISPATCH, #DISPATCH_J(exitno)]
2158 | mov CARG4, #0 2184 | mov CARG4, #0
2159 | str L, [DISPATCH, #DISPATCH_J(L)]
2160 | str BASE, L->base 2185 | str BASE, L->base
2161 | str CARG4, [DISPATCH, #DISPATCH_GL(jit_L)] 2186 | str L, [DISPATCH, #DISPATCH_J(L)]
2187 | str CARG4, [DISPATCH, #DISPATCH_GL(jit_base)]
2162 | sub CARG1, DISPATCH, #-GG_DISP2J 2188 | sub CARG1, DISPATCH, #-GG_DISP2J
2163 | mov CARG2, sp 2189 | mov CARG2, sp
2164 | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) 2190 | bl extern lj_trace_exit // (jit_State *J, ExitState *ex)
@@ -2177,13 +2203,14 @@ static void build_subroutines(BuildCtx *ctx)
2177 | ldr L, SAVE_L 2203 | ldr L, SAVE_L
2178 |1: 2204 |1:
2179 | cmp CARG1, #0 2205 | cmp CARG1, #0
2180 | blt >3 // Check for error from exit. 2206 | blt >9 // Check for error from exit.
2181 | lsl RC, CARG1, #3 2207 | lsl RC, CARG1, #3
2182 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] 2208 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2183 | str RC, SAVE_MULTRES 2209 | str RC, SAVE_MULTRES
2184 | mov CARG3, #0 2210 | mov CARG3, #0
2211 | str BASE, L->base
2185 | ldr CARG2, LFUNC:CARG2->field_pc 2212 | ldr CARG2, LFUNC:CARG2->field_pc
2186 | str CARG3, [DISPATCH, #DISPATCH_GL(jit_L)] 2213 | str CARG3, [DISPATCH, #DISPATCH_GL(jit_base)]
2187 | mv_vmstate CARG4, INTERP 2214 | mv_vmstate CARG4, INTERP
2188 | ldr KBASE, [CARG2, #PC2PROTO(k)] 2215 | ldr KBASE, [CARG2, #PC2PROTO(k)]
2189 | // Modified copy of ins_next which handles function header dispatch, too. 2216 | // Modified copy of ins_next which handles function header dispatch, too.
@@ -2192,17 +2219,35 @@ static void build_subroutines(BuildCtx *ctx)
2192 | ldr INS, [PC], #4 2219 | ldr INS, [PC], #4
2193 | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. 2220 | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8.
2194 | st_vmstate CARG4 2221 | st_vmstate CARG4
2222 | cmp OP, #BC_FUNCC+2 // Fast function?
2223 | bhs >4
2224 |2:
2195 | cmp OP, #BC_FUNCF // Function header? 2225 | cmp OP, #BC_FUNCF // Function header?
2196 | ldr OP, [DISPATCH, OP, lsl #2] 2226 | ldr OP, [DISPATCH, OP, lsl #2]
2197 | decode_RA8 RA, INS 2227 | decode_RA8 RA, INS
2198 | lsrlo RC, INS, #16 // No: Decode operands A*8 and D. 2228 | lsrlo RC, INS, #16 // No: Decode operands A*8 and D.
2199 | subhs RC, RC, #8 2229 | subhs RC, RC, #8
2200 | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8 2230 | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8
2231 | ldrhs CARG3, [BASE, FRAME_FUNC]
2201 | bx OP 2232 | bx OP
2202 | 2233 |
2203 |3: // Rethrow error from the right C frame. 2234 |4: // Check frame below fast function.
2235 | ldr CARG1, [BASE, FRAME_PC]
2236 | ands CARG2, CARG1, #FRAME_TYPE
2237 | bne <2 // Trace stitching continuation?
2238 | // Otherwise set KBASE for Lua function below fast function.
2239 | ldr CARG3, [CARG1, #-4]
2240 | decode_RA8 CARG1, CARG3
2241 | sub CARG2, BASE, CARG1
2242 | ldr LFUNC:CARG3, [CARG2, #-16]
2243 | ldr CARG3, LFUNC:CARG3->field_pc
2244 | ldr KBASE, [CARG3, #PC2PROTO(k)]
2245 | b <2
2246 |
2247 |9: // Rethrow error from the right C frame.
2248 | rsb CARG2, CARG1, #0
2204 | mov CARG1, L 2249 | mov CARG1, L
2205 | bl extern lj_err_run // (lua_State *L) 2250 | bl extern lj_err_trace // (lua_State *L, int errcode)
2206 |.endif 2251 |.endif
2207 | 2252 |
2208 |//----------------------------------------------------------------------- 2253 |//-----------------------------------------------------------------------
@@ -2832,6 +2877,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2832 | ins_next 2877 | ins_next
2833 break; 2878 break;
2834 2879
2880 case BC_ISTYPE:
2881 | // RA = src*8, RC = -type
2882 | ldrd CARG12, [BASE, RA]
2883 | ins_next1
2884 | cmn CARG2, RC
2885 | ins_next2
2886 | bne ->vmeta_istype
2887 | ins_next3
2888 break;
2889 case BC_ISNUM:
2890 | // RA = src*8, RC = -(TISNUM-1)
2891 | ldrd CARG12, [BASE, RA]
2892 | ins_next1
2893 | checktp CARG2, LJ_TISNUM
2894 | ins_next2
2895 | bhs ->vmeta_istype
2896 | ins_next3
2897 break;
2898
2835 /* -- Unary ops --------------------------------------------------------- */ 2899 /* -- Unary ops --------------------------------------------------------- */
2836 2900
2837 case BC_MOV: 2901 case BC_MOV:
@@ -3436,10 +3500,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3436 |->BC_TGETS_Z: 3500 |->BC_TGETS_Z:
3437 | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 3501 | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8
3438 | ldr CARG3, TAB:CARG1->hmask 3502 | ldr CARG3, TAB:CARG1->hmask
3439 | ldr CARG4, STR:RC->hash 3503 | ldr CARG4, STR:RC->sid
3440 | ldr NODE:INS, TAB:CARG1->node 3504 | ldr NODE:INS, TAB:CARG1->node
3441 | mov TAB:RB, TAB:CARG1 3505 | mov TAB:RB, TAB:CARG1
3442 | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask 3506 | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask
3443 | add CARG3, CARG3, CARG3, lsl #1 3507 | add CARG3, CARG3, CARG3, lsl #1
3444 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 3508 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8
3445 |1: 3509 |1:
@@ -3502,6 +3566,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3502 | bne <1 // 'no __index' flag set: done. 3566 | bne <1 // 'no __index' flag set: done.
3503 | b ->vmeta_tgetb 3567 | b ->vmeta_tgetb
3504 break; 3568 break;
3569 case BC_TGETR:
3570 | decode_RB8 RB, INS
3571 | decode_RC8 RC, INS
3572 | // RA = dst*8, RB = table*8, RC = key*8
3573 | ldr TAB:CARG1, [BASE, RB]
3574 | ldr CARG2, [BASE, RC]
3575 | ldr CARG4, TAB:CARG1->array
3576 | ldr CARG3, TAB:CARG1->asize
3577 | add CARG4, CARG4, CARG2, lsl #3
3578 | cmp CARG2, CARG3 // In array part?
3579 | bhs ->vmeta_tgetr
3580 | ldrd CARG12, [CARG4]
3581 |->BC_TGETR_Z:
3582 | ins_next1
3583 | ins_next2
3584 | strd CARG12, [BASE, RA]
3585 | ins_next3
3586 break;
3505 3587
3506 case BC_TSETV: 3588 case BC_TSETV:
3507 | decode_RB8 RB, INS 3589 | decode_RB8 RB, INS
@@ -3565,10 +3647,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3565 |->BC_TSETS_Z: 3647 |->BC_TSETS_Z:
3566 | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 3648 | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8
3567 | ldr CARG3, TAB:CARG1->hmask 3649 | ldr CARG3, TAB:CARG1->hmask
3568 | ldr CARG4, STR:RC->hash 3650 | ldr CARG4, STR:RC->sid
3569 | ldr NODE:INS, TAB:CARG1->node 3651 | ldr NODE:INS, TAB:CARG1->node
3570 | mov TAB:RB, TAB:CARG1 3652 | mov TAB:RB, TAB:CARG1
3571 | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask 3653 | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask
3572 | add CARG3, CARG3, CARG3, lsl #1 3654 | add CARG3, CARG3, CARG3, lsl #1
3573 | mov CARG4, #0 3655 | mov CARG4, #0
3574 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 3656 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8
@@ -3672,6 +3754,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3672 | barrierback TAB:CARG1, INS, CARG3 3754 | barrierback TAB:CARG1, INS, CARG3
3673 | b <2 3755 | b <2
3674 break; 3756 break;
3757 case BC_TSETR:
3758 | decode_RB8 RB, INS
3759 | decode_RC8 RC, INS
3760 | // RA = src*8, RB = table*8, RC = key*8
3761 | ldr TAB:CARG2, [BASE, RB]
3762 | ldr CARG3, [BASE, RC]
3763 | ldrb INS, TAB:CARG2->marked
3764 | ldr CARG1, TAB:CARG2->array
3765 | ldr CARG4, TAB:CARG2->asize
3766 | tst INS, #LJ_GC_BLACK // isblack(table)
3767 | add CARG1, CARG1, CARG3, lsl #3
3768 | bne >7
3769 |2:
3770 | cmp CARG3, CARG4 // In array part?
3771 | bhs ->vmeta_tsetr
3772 |->BC_TSETR_Z:
3773 | ldrd CARG34, [BASE, RA]
3774 | ins_next1
3775 | ins_next2
3776 | strd CARG34, [CARG1]
3777 | ins_next3
3778 |
3779 |7: // Possible table write barrier for the value. Skip valiswhite check.
3780 | barrierback TAB:CARG2, INS, RB
3781 | b <2
3782 break;
3675 3783
3676 case BC_TSETM: 3784 case BC_TSETM:
3677 | // RA = base*8 (table at base-1), RC = num_const (start index) 3785 | // RA = base*8 (table at base-1), RC = num_const (start index)
@@ -4269,7 +4377,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4269 | st_vmstate CARG2 4377 | st_vmstate CARG2
4270 | ldr RA, TRACE:RC->mcode 4378 | ldr RA, TRACE:RC->mcode
4271 | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)] 4379 | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
4272 | str L, [DISPATCH, #DISPATCH_GL(jit_L)] 4380 | str L, [DISPATCH, #DISPATCH_GL(tmpbuf.L)]
4273 | bx RA 4381 | bx RA
4274 |.endif 4382 |.endif
4275 break; 4383 break;
@@ -4387,6 +4495,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4387 | ldr BASE, L->base 4495 | ldr BASE, L->base
4388 | mv_vmstate CARG3, INTERP 4496 | mv_vmstate CARG3, INTERP
4389 | ldr CRET2, L->top 4497 | ldr CRET2, L->top
4498 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
4390 | lsl RC, CRET1, #3 4499 | lsl RC, CRET1, #3
4391 | st_vmstate CARG3 4500 | st_vmstate CARG3
4392 | ldr PC, [BASE, FRAME_PC] 4501 | ldr PC, [BASE, FRAME_PC]
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
new file mode 100644
index 00000000..92f89cd6
--- /dev/null
+++ b/src/vm_arm64.dasc
@@ -0,0 +1,3989 @@
1|// Low-level VM code for ARM64 CPUs.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4|
5|.arch arm64
6|.section code_op, code_sub
7|
8|.actionlist build_actionlist
9|.globals GLOB_
10|.globalnames globnames
11|.externnames extnames
12|
13|// Note: The ragged indentation of the instructions is intentional.
14|// The starting columns indicate data dependencies.
15|
16|//-----------------------------------------------------------------------
17|
18|// ARM64 registers and the AAPCS64 ABI 1.0 at a glance:
19|//
20|// x0-x17 temp, x19-x28 callee-saved, x29 fp, x30 lr
21|// x18 is reserved on most platforms. Don't use it, save it or restore it.
22|// x31 doesn't exist. Register number 31 either means xzr/wzr (zero) or sp,
23|// depending on the instruction.
24|// v0-v7 temp, v8-v15 callee-saved (only d8-d15 preserved), v16-v31 temp
25|//
26|// x0-x7/v0-v7 hold parameters and results.
27|
28|// Fixed register assignments for the interpreter.
29|
30|// The following must be C callee-save.
31|.define BASE, x19 // Base of current Lua stack frame.
32|.define KBASE, x20 // Constants of current Lua function.
33|.define PC, x21 // Next PC.
34|.define GLREG, x22 // Global state.
35|.define LREG, x23 // Register holding lua_State (also in SAVE_L).
36|.define TISNUM, x24 // Constant LJ_TISNUM << 47.
37|.define TISNUMhi, x25 // Constant LJ_TISNUM << 15.
38|.define TISNIL, x26 // Constant -1LL.
39|.define fp, x29 // Yes, we have to maintain a frame pointer.
40|
41|.define ST_INTERP, w26 // Constant -1.
42|
43|// The following temporaries are not saved across C calls, except for RA/RC.
44|.define RA, x27
45|.define RC, x28
46|.define RB, x17
47|.define RAw, w27
48|.define RCw, w28
49|.define RBw, w17
50|.define INS, x16
51|.define INSw, w16
52|.define ITYPE, x15
53|.define TMP0, x8
54|.define TMP1, x9
55|.define TMP2, x10
56|.define TMP3, x11
57|.define TMP0w, w8
58|.define TMP1w, w9
59|.define TMP2w, w10
60|.define TMP3w, w11
61|
62|// Calling conventions. Also used as temporaries.
63|.define CARG1, x0
64|.define CARG2, x1
65|.define CARG3, x2
66|.define CARG4, x3
67|.define CARG5, x4
68|.define CARG1w, w0
69|.define CARG2w, w1
70|.define CARG3w, w2
71|.define CARG4w, w3
72|.define CARG5w, w4
73|
74|.define FARG1, d0
75|.define FARG2, d1
76|
77|.define CRET1, x0
78|.define CRET1w, w0
79|
80|// Stack layout while in interpreter. Must match with lj_frame.h.
81|
82|.define CFRAME_SPACE, 208
83|//----- 16 byte aligned, <-- sp entering interpreter
84|// Unused [sp, #204] // 32 bit values
85|.define SAVE_NRES, [sp, #200]
86|.define SAVE_ERRF, [sp, #196]
87|.define SAVE_MULTRES, [sp, #192]
88|.define TMPD, [sp, #184] // 64 bit values
89|.define SAVE_L, [sp, #176]
90|.define SAVE_PC, [sp, #168]
91|.define SAVE_CFRAME, [sp, #160]
92|.define SAVE_FPR_, 96 // 96+8*8: 64 bit FPR saves
93|.define SAVE_GPR_, 16 // 16+10*8: 64 bit GPR saves
94|.define SAVE_LR, [sp, #8]
95|.define SAVE_FP, [sp]
96|//----- 16 byte aligned, <-- sp while in interpreter.
97|
98|.define TMPDofs, #184
99|
100|.macro save_, gpr1, gpr2, fpr1, fpr2
101| stp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
102| stp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
103|.endmacro
104|.macro rest_, gpr1, gpr2, fpr1, fpr2
105| ldp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
106| ldp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
107|.endmacro
108|
109|.macro saveregs
110| stp fp, lr, [sp, #-CFRAME_SPACE]!
111| add fp, sp, #0
112| stp x19, x20, [sp, # SAVE_GPR_]
113| save_ 21, 22, 8, 9
114| save_ 23, 24, 10, 11
115| save_ 25, 26, 12, 13
116| save_ 27, 28, 14, 15
117|.endmacro
118|.macro restoreregs
119| ldp x19, x20, [sp, # SAVE_GPR_]
120| rest_ 21, 22, 8, 9
121| rest_ 23, 24, 10, 11
122| rest_ 25, 26, 12, 13
123| rest_ 27, 28, 14, 15
124| ldp fp, lr, [sp], # CFRAME_SPACE
125|.endmacro
126|
127|// Type definitions. Some of these are only used for documentation.
128|.type L, lua_State, LREG
129|.type GL, global_State, GLREG
130|.type TVALUE, TValue
131|.type GCOBJ, GCobj
132|.type STR, GCstr
133|.type TAB, GCtab
134|.type LFUNC, GCfuncL
135|.type CFUNC, GCfuncC
136|.type PROTO, GCproto
137|.type UPVAL, GCupval
138|.type NODE, Node
139|.type NARGS8, int
140|.type TRACE, GCtrace
141|.type SBUF, SBuf
142|
143|//-----------------------------------------------------------------------
144|
145|// Trap for not-yet-implemented parts.
146|.macro NYI; brk; .endmacro
147|
148|//-----------------------------------------------------------------------
149|
150|// Access to frame relative to BASE.
151|.define FRAME_FUNC, #-16
152|.define FRAME_PC, #-8
153|
154|// Endian-specific defines.
155|.if ENDIAN_LE
156|.define LO, 0
157|.define OFS_RD, 2
158|.define OFS_RB, 3
159|.define OFS_RA, 1
160|.define OFS_OP, 0
161|.else
162|.define LO, 4
163|.define OFS_RD, 0
164|.define OFS_RB, 0
165|.define OFS_RA, 2
166|.define OFS_OP, 3
167|.endif
168|
169|.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro
170|.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro
171|.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro
172|.macro decode_RD, dst, ins; ubfx dst, ins, #16, #16; .endmacro
173|.macro decode_RC8RD, dst, src; ubfiz dst, src, #3, #8; .endmacro
174|
175|// Instruction decode+dispatch.
176|.macro ins_NEXT
177| ldr INSw, [PC], #4
178| add TMP1, GL, INS, uxtb #3
179| decode_RA RA, INS
180| ldr TMP0, [TMP1, #GG_G2DISP]
181| decode_RD RC, INS
182| br TMP0
183|.endmacro
184|
185|// Instruction footer.
186|.if 1
187| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
188| .define ins_next, ins_NEXT
189| .define ins_next_, ins_NEXT
190|.else
191| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
192| // Affects only certain kinds of benchmarks (and only with -j off).
193| .macro ins_next
194| b ->ins_next
195| .endmacro
196| .macro ins_next_
197| ->ins_next:
198| ins_NEXT
199| .endmacro
200|.endif
201|
202|// Call decode and dispatch.
203|.macro ins_callt
204| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
205| ldr PC, LFUNC:CARG3->pc
206| ldr INSw, [PC], #4
207| add TMP1, GL, INS, uxtb #3
208| decode_RA RA, INS
209| ldr TMP0, [TMP1, #GG_G2DISP]
210| add RA, BASE, RA, lsl #3
211| br TMP0
212|.endmacro
213|
214|.macro ins_call
215| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
216| str PC, [BASE, FRAME_PC]
217| ins_callt
218|.endmacro
219|
220|//-----------------------------------------------------------------------
221|
222|// Macros to check the TValue type and extract the GCobj. Branch on failure.
223|.macro checktp, reg, tp, target
224| asr ITYPE, reg, #47
225| cmn ITYPE, #-tp
226| and reg, reg, #LJ_GCVMASK
227| bne target
228|.endmacro
229|.macro checktp, dst, reg, tp, target
230| asr ITYPE, reg, #47
231| cmn ITYPE, #-tp
232| and dst, reg, #LJ_GCVMASK
233| bne target
234|.endmacro
235|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
236|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
237|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
238|.macro checkint, reg, target
239| cmp TISNUMhi, reg, lsr #32
240| bne target
241|.endmacro
242|.macro checknum, reg, target
243| cmp TISNUMhi, reg, lsr #32
244| bls target
245|.endmacro
246|.macro checknumber, reg, target
247| cmp TISNUMhi, reg, lsr #32
248| blo target
249|.endmacro
250|
251|.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro
252|.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro
253|
254#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field))
255|
256#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
257|
258|.macro hotcheck, delta
259| lsr CARG1, PC, #1
260| and CARG1, CARG1, #126
261| add CARG1, CARG1, #GG_G2DISP+GG_DISP2HOT
262| ldrh CARG2w, [GL, CARG1]
263| subs CARG2, CARG2, #delta
264| strh CARG2w, [GL, CARG1]
265|.endmacro
266|
267|.macro hotloop
268| hotcheck HOTCOUNT_LOOP
269| blo ->vm_hotloop
270|.endmacro
271|
272|.macro hotcall
273| hotcheck HOTCOUNT_CALL
274| blo ->vm_hotcall
275|.endmacro
276|
277|// Set current VM state.
278|.macro mv_vmstate, reg, st; movn reg, #LJ_VMST_..st; .endmacro
279|.macro st_vmstate, reg; str reg, GL->vmstate; .endmacro
280|
281|// Move table write barrier back. Overwrites mark and tmp.
282|.macro barrierback, tab, mark, tmp
283| ldr tmp, GL->gc.grayagain
284| and mark, mark, #~LJ_GC_BLACK // black2gray(tab)
285| str tab, GL->gc.grayagain
286| strb mark, tab->marked
287| str tmp, tab->gclist
288|.endmacro
289|
290|//-----------------------------------------------------------------------
291
292#if !LJ_DUALNUM
293#error "Only dual-number mode supported for ARM64 target"
294#endif
295
296/* Generate subroutines used by opcodes and other parts of the VM. */
297/* The .code_sub section should be last to help static branch prediction. */
298static void build_subroutines(BuildCtx *ctx)
299{
300 |.code_sub
301 |
302 |//-----------------------------------------------------------------------
303 |//-- Return handling ----------------------------------------------------
304 |//-----------------------------------------------------------------------
305 |
306 |->vm_returnp:
307 | // See vm_return. Also: RB = previous base.
308 | tbz PC, #2, ->cont_dispatch // (PC & FRAME_P) == 0?
309 |
310 | // Return from pcall or xpcall fast func.
311 | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame.
312 | mov_true TMP0
313 | mov BASE, RB
314 | // Prepending may overwrite the pcall frame, so do it at the end.
315 | str TMP0, [RA, #-8]! // Prepend true to results.
316 |
317 |->vm_returnc:
318 | adds RC, RC, #8 // RC = (nresults+1)*8.
319 | mov CRET1, #LUA_YIELD
320 | beq ->vm_unwind_c_eh
321 | str RCw, SAVE_MULTRES
322 | ands CARG1, PC, #FRAME_TYPE
323 | beq ->BC_RET_Z // Handle regular return to Lua.
324 |
325 |->vm_return:
326 | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return
327 | // CARG1 = PC & FRAME_TYPE
328 | and RB, PC, #~FRAME_TYPEP
329 | cmp CARG1, #FRAME_C
330 | sub RB, BASE, RB // RB = previous base.
331 | bne ->vm_returnp
332 |
333 | str RB, L->base
334 | ldrsw CARG2, SAVE_NRES // CARG2 = nresults+1.
335 | mv_vmstate TMP0w, C
336 | sub BASE, BASE, #16
337 | subs TMP2, RC, #8
338 | st_vmstate TMP0w
339 | beq >2
340 |1:
341 | subs TMP2, TMP2, #8
342 | ldr TMP0, [RA], #8
343 | str TMP0, [BASE], #8
344 | bne <1
345 |2:
346 | cmp RC, CARG2, lsl #3 // More/less results wanted?
347 | bne >6
348 |3:
349 | str BASE, L->top // Store new top.
350 |
351 |->vm_leave_cp:
352 | ldr RC, SAVE_CFRAME // Restore previous C frame.
353 | mov CRET1, #0 // Ok return status for vm_pcall.
354 | str RC, L->cframe
355 |
356 |->vm_leave_unw:
357 | restoreregs
358 | ret
359 |
360 |6:
361 | bgt >7 // Less results wanted?
362 | // More results wanted. Check stack size and fill up results with nil.
363 | ldr CARG3, L->maxstack
364 | cmp BASE, CARG3
365 | bhs >8
366 | str TISNIL, [BASE], #8
367 | add RC, RC, #8
368 | b <2
369 |
370 |7: // Less results wanted.
371 | cbz CARG2, <3 // LUA_MULTRET+1 case?
372 | sub CARG1, RC, CARG2, lsl #3
373 | sub BASE, BASE, CARG1 // Shrink top.
374 | b <3
375 |
376 |8: // Corner case: need to grow stack for filling up results.
377 | // This can happen if:
378 | // - A C function grows the stack (a lot).
379 | // - The GC shrinks the stack in between.
380 | // - A return back from a lua_call() with (high) nresults adjustment.
381 | str BASE, L->top // Save current top held in BASE (yes).
382 | mov CARG1, L
383 | bl extern lj_state_growstack // (lua_State *L, int n)
384 | ldr BASE, L->top // Need the (realloced) L->top in BASE.
385 | ldrsw CARG2, SAVE_NRES
386 | b <2
387 |
388 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
389 | // (void *cframe, int errcode)
390 | mov sp, CARG1
391 | mov CRET1, CARG2
392 |->vm_unwind_c_eh: // Landing pad for external unwinder.
393 | ldr L, SAVE_L
394 | mv_vmstate TMP0w, C
395 | ldr GL, L->glref
396 | st_vmstate TMP0w
397 | b ->vm_leave_unw
398 |
399 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
400 | // (void *cframe)
401 | and sp, CARG1, #CFRAME_RAWMASK
402 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
403 | ldr L, SAVE_L
404 | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
405 | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
406 | movn TISNIL, #0
407 | mov RC, #16 // 2 results: false + error message.
408 | ldr BASE, L->base
409 | ldr GL, L->glref // Setup pointer to global state.
410 | mov_false TMP0
411 | sub RA, BASE, #8 // Results start at BASE-8.
412 | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame.
413 | str TMP0, [BASE, #-8] // Prepend false to error message.
414 | st_vmstate ST_INTERP
415 | b ->vm_returnc
416 |
417 |//-----------------------------------------------------------------------
418 |//-- Grow stack for calls -----------------------------------------------
419 |//-----------------------------------------------------------------------
420 |
421 |->vm_growstack_c: // Grow stack for C function.
422 | // CARG1 = L
423 | mov CARG2, #LUA_MINSTACK
424 | b >2
425 |
426 |->vm_growstack_l: // Grow stack for Lua function.
427 | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
428 | add RC, BASE, RC
429 | sub RA, RA, BASE
430 | mov CARG1, L
431 | stp BASE, RC, L->base
432 | add PC, PC, #4 // Must point after first instruction.
433 | lsr CARG2, RA, #3
434 |2:
435 | // L->base = new base, L->top = top
436 | str PC, SAVE_PC
437 | bl extern lj_state_growstack // (lua_State *L, int n)
438 | ldp BASE, RC, L->base
439 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
440 | sub NARGS8:RC, RC, BASE
441 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
442 | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
443 | ins_callt // Just retry the call.
444 |
445 |//-----------------------------------------------------------------------
446 |//-- Entry points into the assembler VM ---------------------------------
447 |//-----------------------------------------------------------------------
448 |
449 |->vm_resume: // Setup C frame and resume thread.
450 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
451 | saveregs
452 | mov L, CARG1
453 | ldr GL, L->glref // Setup pointer to global state.
454 | mov BASE, CARG2
455 | str L, SAVE_L
456 | mov PC, #FRAME_CP
457 | str wzr, SAVE_NRES
458 | add TMP0, sp, #CFRAME_RESUME
459 | ldrb TMP1w, L->status
460 | str wzr, SAVE_ERRF
461 | str L, SAVE_PC // Any value outside of bytecode is ok.
462 | str xzr, SAVE_CFRAME
463 | str TMP0, L->cframe
464 | cbz TMP1w, >3
465 |
466 | // Resume after yield (like a return).
467 | str L, GL->cur_L
468 | mov RA, BASE
469 | ldp BASE, CARG1, L->base
470 | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
471 | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
472 | ldr PC, [BASE, FRAME_PC]
473 | strb wzr, L->status
474 | movn TISNIL, #0
475 | sub RC, CARG1, BASE
476 | ands CARG1, PC, #FRAME_TYPE
477 | add RC, RC, #8
478 | st_vmstate ST_INTERP
479 | str RCw, SAVE_MULTRES
480 | beq ->BC_RET_Z
481 | b ->vm_return
482 |
483 |->vm_pcall: // Setup protected C frame and enter VM.
484 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
485 | saveregs
486 | mov PC, #FRAME_CP
487 | str CARG4w, SAVE_ERRF
488 | b >1
489 |
490 |->vm_call: // Setup C frame and enter VM.
491 | // (lua_State *L, TValue *base, int nres1)
492 | saveregs
493 | mov PC, #FRAME_C
494 |
495 |1: // Entry point for vm_pcall above (PC = ftype).
496 | ldr RC, L:CARG1->cframe
497 | str CARG3w, SAVE_NRES
498 | mov L, CARG1
499 | str CARG1, SAVE_L
500 | ldr GL, L->glref // Setup pointer to global state.
501 | mov BASE, CARG2
502 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
503 | str RC, SAVE_CFRAME
504 | str fp, L->cframe // Add our C frame to cframe chain.
505 |
506 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
507 | str L, GL->cur_L
508 | ldp RB, CARG1, L->base // RB = old base (for vmeta_call).
509 | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
510 | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
511 | add PC, PC, BASE
512 | movn TISNIL, #0
513 | sub PC, PC, RB // PC = frame delta + frame type
514 | sub NARGS8:RC, CARG1, BASE
515 | st_vmstate ST_INTERP
516 |
517 |->vm_call_dispatch:
518 | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC
519 | ldr CARG3, [BASE, FRAME_FUNC]
520 | checkfunc CARG3, ->vmeta_call
521 |
522 |->vm_call_dispatch_f:
523 | ins_call
524 | // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC
525 |
526 |->vm_cpcall: // Setup protected C frame, call C.
527 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
528 | saveregs
529 | mov L, CARG1
530 | ldr RA, L:CARG1->stack
531 | str CARG1, SAVE_L
532 | ldr GL, L->glref // Setup pointer to global state.
533 | ldr RB, L->top
534 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
535 | ldr RC, L->cframe
536 | sub RA, RA, RB // Compute -savestack(L, L->top).
537 | str RAw, SAVE_NRES // Neg. delta means cframe w/o frame.
538 | str wzr, SAVE_ERRF // No error function.
539 | str RC, SAVE_CFRAME
540 | str fp, L->cframe // Add our C frame to cframe chain.
541 | str L, GL->cur_L
542 | blr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
543 | mov BASE, CRET1
544 | mov PC, #FRAME_CP
545 | cbnz BASE, <3 // Else continue with the call.
546 | b ->vm_leave_cp // No base? Just remove C frame.
547 |
548 |//-----------------------------------------------------------------------
549 |//-- Metamethod handling ------------------------------------------------
550 |//-----------------------------------------------------------------------
551 |
552 |//-- Continuation dispatch ----------------------------------------------
553 |
554 |->cont_dispatch:
555 | // BASE = meta base, RA = resultptr, RC = (nresults+1)*8
556 | ldr LFUNC:CARG3, [RB, FRAME_FUNC]
557 | ldr CARG1, [BASE, #-32] // Get continuation.
558 | mov CARG4, BASE
559 | mov BASE, RB // Restore caller BASE.
560 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
561 |.if FFI
562 | cmp CARG1, #1
563 |.endif
564 | ldr PC, [CARG4, #-24] // Restore PC from [cont|PC].
565 | ldr CARG3, LFUNC:CARG3->pc
566 | add TMP0, RA, RC
567 | str TISNIL, [TMP0, #-8] // Ensure one valid arg.
568 |.if FFI
569 | bls >1
570 |.endif
571 | ldr KBASE, [CARG3, #PC2PROTO(k)]
572 | // BASE = base, RA = resultptr, CARG4 = meta base
573 | br CARG1
574 |
575 |.if FFI
576 |1:
577 | beq ->cont_ffi_callback // cont = 1: return from FFI callback.
578 | // cont = 0: tailcall from C function.
579 | sub CARG4, CARG4, #32
580 | sub RC, CARG4, BASE
581 | b ->vm_call_tail
582 |.endif
583 |
584 |->cont_cat: // RA = resultptr, CARG4 = meta base
585 | ldr INSw, [PC, #-4]
586 | sub CARG2, CARG4, #32
587 | ldr TMP0, [RA]
588 | str BASE, L->base
589 | decode_RB RB, INS
590 | decode_RA RA, INS
591 | add TMP1, BASE, RB, lsl #3
592 | subs TMP1, CARG2, TMP1
593 | beq >1
594 | str TMP0, [CARG2]
595 | lsr CARG3, TMP1, #3
596 | b ->BC_CAT_Z
597 |
598 |1:
599 | str TMP0, [BASE, RA, lsl #3]
600 | b ->cont_nop
601 |
602 |//-- Table indexing metamethods -----------------------------------------
603 |
604 |->vmeta_tgets1:
605 | movn CARG4, #~LJ_TSTR
606 | add CARG2, BASE, RB, lsl #3
607 | add CARG4, STR:RC, CARG4, lsl #47
608 | b >2
609 |
610 |->vmeta_tgets:
611 | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48
612 | str CARG2, GL->tmptv
613 | add CARG2, GL, #offsetof(global_State, tmptv)
614 |2:
615 | add CARG3, sp, TMPDofs
616 | str CARG4, TMPD
617 | b >1
618 |
619 |->vmeta_tgetb: // RB = table, RC = index
620 | add RC, RC, TISNUM
621 | add CARG2, BASE, RB, lsl #3
622 | add CARG3, sp, TMPDofs
623 | str RC, TMPD
624 | b >1
625 |
626 |->vmeta_tgetv: // RB = table, RC = key
627 | add CARG2, BASE, RB, lsl #3
628 | add CARG3, BASE, RC, lsl #3
629 |1:
630 | str BASE, L->base
631 | mov CARG1, L
632 | str PC, SAVE_PC
633 | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
634 | // Returns TValue * (finished) or NULL (metamethod).
635 | cbz CRET1, >3
636 | ldr TMP0, [CRET1]
637 | str TMP0, [BASE, RA, lsl #3]
638 | ins_next
639 |
640 |3: // Call __index metamethod.
641 | // BASE = base, L->top = new base, stack = cont/func/t/k
642 | sub TMP1, BASE, #FRAME_CONT
643 | ldr BASE, L->top
644 | mov NARGS8:RC, #16 // 2 args for func(t, k).
645 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
646 | str PC, [BASE, #-24] // [cont|PC]
647 | sub PC, BASE, TMP1
648 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
649 | b ->vm_call_dispatch_f
650 |
651 |->vmeta_tgetr:
652 | sxtw CARG2, TMP1w
653 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
654 | // Returns cTValue * or NULL.
655 | mov TMP0, TISNIL
656 | cbz CRET1, ->BC_TGETR_Z
657 | ldr TMP0, [CRET1]
658 | b ->BC_TGETR_Z
659 |
660 |//-----------------------------------------------------------------------
661 |
662 |->vmeta_tsets1:
663 | movn CARG4, #~LJ_TSTR
664 | add CARG2, BASE, RB, lsl #3
665 | add CARG4, STR:RC, CARG4, lsl #47
666 | b >2
667 |
668 |->vmeta_tsets:
669 | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48
670 | str CARG2, GL->tmptv
671 | add CARG2, GL, #offsetof(global_State, tmptv)
672 |2:
673 | add CARG3, sp, TMPDofs
674 | str CARG4, TMPD
675 | b >1
676 |
677 |->vmeta_tsetb: // RB = table, RC = index
678 | add RC, RC, TISNUM
679 | add CARG2, BASE, RB, lsl #3
680 | add CARG3, sp, TMPDofs
681 | str RC, TMPD
682 | b >1
683 |
684 |->vmeta_tsetv:
685 | add CARG2, BASE, RB, lsl #3
686 | add CARG3, BASE, RC, lsl #3
687 |1:
688 | str BASE, L->base
689 | mov CARG1, L
690 | str PC, SAVE_PC
691 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
692 | // Returns TValue * (finished) or NULL (metamethod).
693 | ldr TMP0, [BASE, RA, lsl #3]
694 | cbz CRET1, >3
695 | // NOBARRIER: lj_meta_tset ensures the table is not black.
696 | str TMP0, [CRET1]
697 | ins_next
698 |
699 |3: // Call __newindex metamethod.
700 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
701 | sub TMP1, BASE, #FRAME_CONT
702 | ldr BASE, L->top
703 | mov NARGS8:RC, #24 // 3 args for func(t, k, v).
704 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
705 | str TMP0, [BASE, #16] // Copy value to third argument.
706 | str PC, [BASE, #-24] // [cont|PC]
707 | sub PC, BASE, TMP1
708 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
709 | b ->vm_call_dispatch_f
710 |
711 |->vmeta_tsetr:
712 | sxtw CARG3, TMP1w
713 | str BASE, L->base
714 | mov CARG1, L
715 | str PC, SAVE_PC
716 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
717 | // Returns TValue *.
718 | b ->BC_TSETR_Z
719 |
720 |//-- Comparison metamethods ---------------------------------------------
721 |
722 |->vmeta_comp:
723 | add CARG2, BASE, RA, lsl #3
724 | sub PC, PC, #4
725 | add CARG3, BASE, RC, lsl #3
726 | str BASE, L->base
727 | mov CARG1, L
728 | str PC, SAVE_PC
729 | uxtb CARG4w, INSw
730 | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
731 | // Returns 0/1 or TValue * (metamethod).
732 |3:
733 | cmp CRET1, #1
734 | bhi ->vmeta_binop
735 |4:
736 | ldrh RBw, [PC, # OFS_RD]
737 | add PC, PC, #4
738 | add RB, PC, RB, lsl #2
739 | sub RB, RB, #0x20000
740 | csel PC, PC, RB, lo
741 |->cont_nop:
742 | ins_next
743 |
744 |->cont_ra: // RA = resultptr
745 | ldr INSw, [PC, #-4]
746 | ldr TMP0, [RA]
747 | decode_RA TMP1, INS
748 | str TMP0, [BASE, TMP1, lsl #3]
749 | b ->cont_nop
750 |
751 |->cont_condt: // RA = resultptr
752 | ldr TMP0, [RA]
753 | mov_true TMP1
754 | cmp TMP1, TMP0 // Branch if result is true.
755 | b <4
756 |
757 |->cont_condf: // RA = resultptr
758 | ldr TMP0, [RA]
759 | mov_false TMP1
760 | cmp TMP0, TMP1 // Branch if result is false.
761 | b <4
762 |
763 |->vmeta_equal:
764 | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
765 | and TAB:CARG3, CARG3, #LJ_GCVMASK
766 | sub PC, PC, #4
767 | str BASE, L->base
768 | mov CARG1, L
769 | str PC, SAVE_PC
770 | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
771 | // Returns 0/1 or TValue * (metamethod).
772 | b <3
773 |
774 |->vmeta_equal_cd:
775 |.if FFI
776 | sub PC, PC, #4
777 | str BASE, L->base
778 | mov CARG1, L
779 | mov CARG2, INS
780 | str PC, SAVE_PC
781 | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op)
782 | // Returns 0/1 or TValue * (metamethod).
783 | b <3
784 |.endif
785 |
786 |->vmeta_istype:
787 | sub PC, PC, #4
788 | str BASE, L->base
789 | mov CARG1, L
790 | mov CARG2, RA
791 | mov CARG3, RC
792 | str PC, SAVE_PC
793 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
794 | b ->cont_nop
795 |
796 |//-- Arithmetic metamethods ---------------------------------------------
797 |
798 |->vmeta_arith_vn:
799 | add CARG3, BASE, RB, lsl #3
800 | add CARG4, KBASE, RC, lsl #3
801 | b >1
802 |
803 |->vmeta_arith_nv:
804 | add CARG4, BASE, RB, lsl #3
805 | add CARG3, KBASE, RC, lsl #3
806 | b >1
807 |
808 |->vmeta_unm:
809 | add CARG3, BASE, RC, lsl #3
810 | mov CARG4, CARG3
811 | b >1
812 |
813 |->vmeta_arith_vv:
814 | add CARG3, BASE, RB, lsl #3
815 | add CARG4, BASE, RC, lsl #3
816 |1:
817 | uxtb CARG5w, INSw
818 | add CARG2, BASE, RA, lsl #3
819 | str BASE, L->base
820 | mov CARG1, L
821 | str PC, SAVE_PC
822 | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
823 | // Returns NULL (finished) or TValue * (metamethod).
824 | cbz CRET1, ->cont_nop
825 |
826 | // Call metamethod for binary op.
827 |->vmeta_binop:
828 | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
829 | sub TMP1, CRET1, BASE
830 | str PC, [CRET1, #-24] // [cont|PC]
831 | add PC, TMP1, #FRAME_CONT
832 | mov BASE, CRET1
833 | mov NARGS8:RC, #16 // 2 args for func(o1, o2).
834 | b ->vm_call_dispatch
835 |
836 |->vmeta_len:
837 | add CARG2, BASE, RC, lsl #3
838#if LJ_52
839 | mov TAB:RC, TAB:CARG1 // Save table (ignored for other types).
840#endif
841 | str BASE, L->base
842 | mov CARG1, L
843 | str PC, SAVE_PC
844 | bl extern lj_meta_len // (lua_State *L, TValue *o)
845 | // Returns NULL (retry) or TValue * (metamethod base).
846#if LJ_52
847 | cbnz CRET1, ->vmeta_binop // Binop call for compatibility.
848 | mov TAB:CARG1, TAB:RC
849 | b ->BC_LEN_Z
850#else
851 | b ->vmeta_binop // Binop call for compatibility.
852#endif
853 |
854 |//-- Call metamethod ----------------------------------------------------
855 |
856 |->vmeta_call: // Resolve and call __call metamethod.
857 | // RB = old base, BASE = new base, RC = nargs*8
858 | mov CARG1, L
859 | str RB, L->base // This is the callers base!
860 | sub CARG2, BASE, #16
861 | str PC, SAVE_PC
862 | add CARG3, BASE, NARGS8:RC
863 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
864 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
865 | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now.
866 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
867 | ins_call
868 |
869 |->vmeta_callt: // Resolve __call for BC_CALLT.
870 | // BASE = old base, RA = new base, RC = nargs*8
871 | mov CARG1, L
872 | str BASE, L->base
873 | sub CARG2, RA, #16
874 | str PC, SAVE_PC
875 | add CARG3, RA, NARGS8:RC
876 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
877 | ldr TMP1, [RA, FRAME_FUNC] // Guaranteed to be a function here.
878 | ldr PC, [BASE, FRAME_PC]
879 | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now.
880 | and LFUNC:CARG3, TMP1, #LJ_GCVMASK
881 | b ->BC_CALLT2_Z
882 |
883 |//-- Argument coercion for 'for' statement ------------------------------
884 |
885 |->vmeta_for:
886 | mov CARG1, L
887 | str BASE, L->base
888 | mov CARG2, RA
889 | str PC, SAVE_PC
890 | bl extern lj_meta_for // (lua_State *L, TValue *base)
891 | ldr INSw, [PC, #-4]
892 |.if JIT
893 | uxtb TMP0w, INSw
894 |.endif
895 | decode_RA RA, INS
896 | decode_RD RC, INS
897 |.if JIT
898 | cmp TMP0, #BC_JFORI
899 | beq =>BC_JFORI
900 |.endif
901 | b =>BC_FORI
902 |
903 |//-----------------------------------------------------------------------
904 |//-- Fast functions -----------------------------------------------------
905 |//-----------------------------------------------------------------------
906 |
907 |.macro .ffunc, name
908 |->ff_ .. name:
909 |.endmacro
910 |
911 |.macro .ffunc_1, name
912 |->ff_ .. name:
913 | ldr CARG1, [BASE]
914 | cmp NARGS8:RC, #8
915 | blo ->fff_fallback
916 |.endmacro
917 |
918 |.macro .ffunc_2, name
919 |->ff_ .. name:
920 | ldp CARG1, CARG2, [BASE]
921 | cmp NARGS8:RC, #16
922 | blo ->fff_fallback
923 |.endmacro
924 |
925 |.macro .ffunc_n, name
926 | .ffunc name
927 | ldr CARG1, [BASE]
928 | cmp NARGS8:RC, #8
929 | ldr FARG1, [BASE]
930 | blo ->fff_fallback
931 | checknum CARG1, ->fff_fallback
932 |.endmacro
933 |
934 |.macro .ffunc_nn, name
935 | .ffunc name
936 | ldp CARG1, CARG2, [BASE]
937 | cmp NARGS8:RC, #16
938 | ldp FARG1, FARG2, [BASE]
939 | blo ->fff_fallback
940 | checknum CARG1, ->fff_fallback
941 | checknum CARG2, ->fff_fallback
942 |.endmacro
943 |
944 |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2.
945 |.macro ffgccheck
946 | ldp CARG1, CARG2, GL->gc.total // Assumes threshold follows total.
947 | cmp CARG1, CARG2
948 | blt >1
949 | bl ->fff_gcstep
950 |1:
951 |.endmacro
952 |
953 |//-- Base library: checks -----------------------------------------------
954 |
955 |.ffunc_1 assert
956 | ldr PC, [BASE, FRAME_PC]
957 | mov_false TMP1
958 | cmp CARG1, TMP1
959 | bhs ->fff_fallback
960 | str CARG1, [BASE, #-16]
961 | sub RB, BASE, #8
962 | subs RA, NARGS8:RC, #8
963 | add RC, NARGS8:RC, #8 // Compute (nresults+1)*8.
964 | cbz RA, ->fff_res // Done if exactly 1 argument.
965 |1:
966 | ldr CARG1, [RB, #16]
967 | sub RA, RA, #8
968 | str CARG1, [RB], #8
969 | cbnz RA, <1
970 | b ->fff_res
971 |
972 |.ffunc_1 type
973 | mov TMP0, #~LJ_TISNUM
974 | asr ITYPE, CARG1, #47
975 | cmn ITYPE, #~LJ_TISNUM
976 | csinv TMP1, TMP0, ITYPE, lo
977 | add TMP1, TMP1, #offsetof(GCfuncC, upvalue)/8
978 | ldr CARG1, [CFUNC:CARG3, TMP1, lsl #3]
979 | b ->fff_restv
980 |
981 |//-- Base library: getters and setters ---------------------------------
982 |
983 |.ffunc_1 getmetatable
984 | asr ITYPE, CARG1, #47
985 | cmn ITYPE, #-LJ_TTAB
986 | ccmn ITYPE, #-LJ_TUDATA, #4, ne
987 | and TAB:CARG1, CARG1, #LJ_GCVMASK
988 | bne >6
989 |1: // Field metatable must be at same offset for GCtab and GCudata!
990 | ldr TAB:RB, TAB:CARG1->metatable
991 |2:
992 | mov CARG1, TISNIL
993 | ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable]
994 | cbz TAB:RB, ->fff_restv
995 | ldr TMP1w, TAB:RB->hmask
996 | ldr TMP2w, STR:RC->sid
997 | ldr NODE:CARG3, TAB:RB->node
998 | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask
999 | add TMP1, TMP1, TMP1, lsl #1
1000 | movn CARG4, #~LJ_TSTR
1001 | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
1002 | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for.
1003 |3: // Rearranged logic, because we expect _not_ to find the key.
1004 | ldp CARG1, TMP0, NODE:CARG3->val
1005 | ldr NODE:CARG3, NODE:CARG3->next
1006 | cmp TMP0, CARG4
1007 | beq >5
1008 | cbnz NODE:CARG3, <3
1009 |4:
1010 | mov CARG1, RB // Use metatable as default result.
1011 | movk CARG1, #(LJ_TTAB>>1)&0xffff, lsl #48
1012 | b ->fff_restv
1013 |5:
1014 | cmp TMP0, TISNIL
1015 | bne ->fff_restv
1016 | b <4
1017 |
1018 |6:
1019 | movn TMP0, #~LJ_TISNUM
1020 | cmp ITYPE, TMP0
1021 | csel ITYPE, ITYPE, TMP0, hs
1022 | sub TMP1, GL, ITYPE, lsl #3
1023 | ldr TAB:RB, [TMP1, #offsetof(global_State, gcroot[GCROOT_BASEMT])-8]
1024 | b <2
1025 |
1026 |.ffunc_2 setmetatable
1027 | // Fast path: no mt for table yet and not clearing the mt.
1028 | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
1029 | ldr TAB:TMP0, TAB:TMP1->metatable
1030 | asr ITYPE, CARG2, #47
1031 | ldrb TMP2w, TAB:TMP1->marked
1032 | cmn ITYPE, #-LJ_TTAB
1033 | and TAB:CARG2, CARG2, #LJ_GCVMASK
1034 | ccmp TAB:TMP0, #0, #0, eq
1035 | bne ->fff_fallback
1036 | str TAB:CARG2, TAB:TMP1->metatable
1037 | tbz TMP2w, #2, ->fff_restv // isblack(table)
1038 | barrierback TAB:TMP1, TMP2w, TMP0
1039 | b ->fff_restv
1040 |
1041 |.ffunc rawget
1042 | ldr CARG2, [BASE]
1043 | cmp NARGS8:RC, #16
1044 | blo ->fff_fallback
1045 | checktab CARG2, ->fff_fallback
1046 | mov CARG1, L
1047 | add CARG3, BASE, #8
1048 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1049 | // Returns cTValue *.
1050 | ldr CARG1, [CRET1]
1051 | b ->fff_restv
1052 |
1053 |//-- Base library: conversions ------------------------------------------
1054 |
1055 |.ffunc tonumber
1056 | // Only handles the number case inline (without a base argument).
1057 | ldr CARG1, [BASE]
1058 | cmp NARGS8:RC, #8
1059 | bne ->fff_fallback
1060 | checknumber CARG1, ->fff_fallback
1061 | b ->fff_restv
1062 |
1063 |.ffunc_1 tostring
1064 | // Only handles the string or number case inline.
1065 | asr ITYPE, CARG1, #47
1066 | cmn ITYPE, #-LJ_TSTR
1067 | // A __tostring method in the string base metatable is ignored.
1068 | beq ->fff_restv
1069 | // Handle numbers inline, unless a number base metatable is present.
1070 | ldr TMP1, GL->gcroot[GCROOT_BASEMT_NUM]
1071 | str BASE, L->base
1072 | cmn ITYPE, #-LJ_TISNUM
1073 | ccmp TMP1, #0, #0, ls
1074 | str PC, SAVE_PC // Redundant (but a defined value).
1075 | bne ->fff_fallback
1076 | ffgccheck
1077 | mov CARG1, L
1078 | mov CARG2, BASE
1079 | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
1080 | // Returns GCstr *.
1081 | movn TMP1, #~LJ_TSTR
1082 | ldr BASE, L->base
1083 | add CARG1, CARG1, TMP1, lsl #47
1084 | b ->fff_restv
1085 |
1086 |//-- Base library: iterators -------------------------------------------
1087 |
1088 |.ffunc_1 next
1089 | checktp CARG2, CARG1, LJ_TTAB, ->fff_fallback
1090 | str TISNIL, [BASE, NARGS8:RC] // Set missing 2nd arg to nil.
1091 | ldr PC, [BASE, FRAME_PC]
1092 | stp BASE, BASE, L->base // Add frame since C call can throw.
1093 | mov CARG1, L
1094 | add CARG3, BASE, #8
1095 | str PC, SAVE_PC
1096 | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1097 | // Returns 0 at end of traversal.
1098 | str TISNIL, [BASE, #-16]
1099 | cbz CRET1, ->fff_res1 // End of traversal: return nil.
1100 | ldp CARG1, CARG2, [BASE, #8] // Copy key and value to results.
1101 | mov RC, #(2+1)*8
1102 | stp CARG1, CARG2, [BASE, #-16]
1103 | b ->fff_res
1104 |
1105 |.ffunc_1 pairs
1106 | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
1107#if LJ_52
1108 | ldr TAB:CARG2, TAB:TMP1->metatable
1109#endif
1110 | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0]
1111 | ldr PC, [BASE, FRAME_PC]
1112#if LJ_52
1113 | cbnz TAB:CARG2, ->fff_fallback
1114#endif
1115 | mov RC, #(3+1)*8
1116 | stp CARG1, TISNIL, [BASE, #-8]
1117 | str CFUNC:CARG4, [BASE, #-16]
1118 | b ->fff_res
1119 |
1120 |.ffunc_2 ipairs_aux
1121 | checktab CARG1, ->fff_fallback
1122 | checkint CARG2, ->fff_fallback
1123 | ldr TMP1w, TAB:CARG1->asize
1124 | ldr CARG3, TAB:CARG1->array
1125 | ldr TMP0w, TAB:CARG1->hmask
1126 | add CARG2w, CARG2w, #1
1127 | cmp CARG2w, TMP1w
1128 | ldr PC, [BASE, FRAME_PC]
1129 | add TMP2, CARG2, TISNUM
1130 | mov RC, #(0+1)*8
1131 | str TMP2, [BASE, #-16]
1132 | bhs >2 // Not in array part?
1133 | ldr TMP0, [CARG3, CARG2, lsl #3]
1134 |1:
1135 | mov TMP1, #(2+1)*8
1136 | cmp TMP0, TISNIL
1137 | str TMP0, [BASE, #-8]
1138 | csel RC, RC, TMP1, eq
1139 | b ->fff_res
1140 |2: // Check for empty hash part first. Otherwise call C function.
1141 | cbz TMP0w, ->fff_res
1142 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
1143 | // Returns cTValue * or NULL.
1144 | cbz CRET1, ->fff_res
1145 | ldr TMP0, [CRET1]
1146 | b <1
1147 |
1148 |.ffunc_1 ipairs
1149 | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
1150#if LJ_52
1151 | ldr TAB:CARG2, TAB:TMP1->metatable
1152#endif
1153 | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0]
1154 | ldr PC, [BASE, FRAME_PC]
1155#if LJ_52
1156 | cbnz TAB:CARG2, ->fff_fallback
1157#endif
1158 | mov RC, #(3+1)*8
1159 | stp CARG1, TISNUM, [BASE, #-8]
1160 | str CFUNC:CARG4, [BASE, #-16]
1161 | b ->fff_res
1162 |
1163 |//-- Base library: catch errors ----------------------------------------
1164 |
1165 |.ffunc pcall
1166 | ldrb TMP0w, GL->hookmask
1167 | subs NARGS8:RC, NARGS8:RC, #8
1168 | blo ->fff_fallback
1169 | mov RB, BASE
1170 | add BASE, BASE, #16
1171 | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1
1172 | add PC, TMP0, #16+FRAME_PCALL
1173 | beq ->vm_call_dispatch
1174 |1:
1175 | add TMP2, BASE, NARGS8:RC
1176 |2:
1177 | ldr TMP0, [TMP2, #-16]
1178 | str TMP0, [TMP2, #-8]!
1179 | cmp TMP2, BASE
1180 | bne <2
1181 | b ->vm_call_dispatch
1182 |
1183 |.ffunc xpcall
1184 | ldp CARG1, CARG2, [BASE]
1185 | ldrb TMP0w, GL->hookmask
1186 | subs NARGS8:TMP1, NARGS8:RC, #16
1187 | blo ->fff_fallback
1188 | mov RB, BASE
1189 | asr ITYPE, CARG2, #47
1190 | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1
1191 | cmn ITYPE, #-LJ_TFUNC
1192 | add PC, TMP0, #24+FRAME_PCALL
1193 | bne ->fff_fallback // Traceback must be a function.
1194 | mov NARGS8:RC, NARGS8:TMP1
1195 | add BASE, BASE, #24
1196 | stp CARG2, CARG1, [RB] // Swap function and traceback.
1197 | cbz NARGS8:RC, ->vm_call_dispatch
1198 | b <1
1199 |
1200 |//-- Coroutine library --------------------------------------------------
1201 |
1202 |.macro coroutine_resume_wrap, resume
1203 |.if resume
1204 |.ffunc_1 coroutine_resume
1205 | checktp CARG1, LJ_TTHREAD, ->fff_fallback
1206 |.else
1207 |.ffunc coroutine_wrap_aux
1208 | ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr
1209 | and L:CARG1, CARG1, #LJ_GCVMASK
1210 |.endif
1211 | ldr PC, [BASE, FRAME_PC]
1212 | str BASE, L->base
1213 | ldp RB, CARG2, L:CARG1->base
1214 | ldrb TMP1w, L:CARG1->status
1215 | add TMP0, CARG2, TMP1
1216 | str PC, SAVE_PC
1217 | cmp TMP0, RB
1218 | beq ->fff_fallback
1219 | cmp TMP1, #LUA_YIELD
1220 | add TMP0, CARG2, #8
1221 | csel CARG2, CARG2, TMP0, hs
1222 | ldr CARG4, L:CARG1->maxstack
1223 | add CARG3, CARG2, NARGS8:RC
1224 | ldr RB, L:CARG1->cframe
1225 | ccmp CARG3, CARG4, #2, ls
1226 | ccmp RB, #0, #2, ls
1227 | bhi ->fff_fallback
1228 |.if resume
1229 | sub CARG3, CARG3, #8 // Keep resumed thread in stack for GC.
1230 | add BASE, BASE, #8
1231 | sub NARGS8:RC, NARGS8:RC, #8
1232 |.endif
1233 | str CARG3, L:CARG1->top
1234 | str BASE, L->top
1235 | cbz NARGS8:RC, >3
1236 |2: // Move args to coroutine.
1237 | ldr TMP0, [BASE, RB]
1238 | cmp RB, NARGS8:RC
1239 | str TMP0, [CARG2, RB]
1240 | add RB, RB, #8
1241 | bne <2
1242 |3:
1243 | mov CARG3, #0
1244 | mov L:RA, L:CARG1
1245 | mov CARG4, #0
1246 | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1247 | // Returns thread status.
1248 |4:
1249 | ldp CARG3, CARG4, L:RA->base
1250 | cmp CRET1, #LUA_YIELD
1251 | ldr BASE, L->base
1252 | str L, GL->cur_L
1253 | st_vmstate ST_INTERP
1254 | bhi >8
1255 | sub RC, CARG4, CARG3
1256 | ldr CARG1, L->maxstack
1257 | add CARG2, BASE, RC
1258 | cbz RC, >6 // No results?
1259 | cmp CARG2, CARG1
1260 | mov RB, #0
1261 | bhi >9 // Need to grow stack?
1262 |
1263 | sub CARG4, RC, #8
1264 | str CARG3, L:RA->top // Clear coroutine stack.
1265 |5: // Move results from coroutine.
1266 | ldr TMP0, [CARG3, RB]
1267 | cmp RB, CARG4
1268 | str TMP0, [BASE, RB]
1269 | add RB, RB, #8
1270 | bne <5
1271 |6:
1272 |.if resume
1273 | mov_true TMP1
1274 | add RC, RC, #16
1275 |7:
1276 | str TMP1, [BASE, #-8] // Prepend true/false to results.
1277 | sub RA, BASE, #8
1278 |.else
1279 | mov RA, BASE
1280 | add RC, RC, #8
1281 |.endif
1282 | ands CARG1, PC, #FRAME_TYPE
1283 | str PC, SAVE_PC
1284 | str RCw, SAVE_MULTRES
1285 | beq ->BC_RET_Z
1286 | b ->vm_return
1287 |
1288 |8: // Coroutine returned with error (at co->top-1).
1289 |.if resume
1290 | ldr TMP0, [CARG4, #-8]!
1291 | mov_false TMP1
1292 | mov RC, #(2+1)*8
1293 | str CARG4, L:RA->top // Remove error from coroutine stack.
1294 | str TMP0, [BASE] // Copy error message.
1295 | b <7
1296 |.else
1297 | mov CARG1, L
1298 | mov CARG2, L:RA
1299 | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1300 | // Never returns.
1301 |.endif
1302 |
1303 |9: // Handle stack expansion on return from yield.
1304 | mov CARG1, L
1305 | lsr CARG2, RC, #3
1306 | bl extern lj_state_growstack // (lua_State *L, int n)
1307 | mov CRET1, #0
1308 | b <4
1309 |.endmacro
1310 |
1311 | coroutine_resume_wrap 1 // coroutine.resume
1312 | coroutine_resume_wrap 0 // coroutine.wrap
1313 |
1314 |.ffunc coroutine_yield
1315 | ldr TMP0, L->cframe
1316 | add TMP1, BASE, NARGS8:RC
1317 | mov CRET1, #LUA_YIELD
1318 | stp BASE, TMP1, L->base
1319 | tbz TMP0, #0, ->fff_fallback
1320 | str xzr, L->cframe
1321 | strb CRET1w, L->status
1322 | b ->vm_leave_unw
1323 |
1324 |//-- Math library -------------------------------------------------------
1325 |
1326 |.macro math_round, func, round
1327 | .ffunc math_ .. func
1328 | ldr CARG1, [BASE]
1329 | cmp NARGS8:RC, #8
1330 | ldr d0, [BASE]
1331 | blo ->fff_fallback
1332 | cmp TISNUMhi, CARG1, lsr #32
1333 | beq ->fff_restv
1334 | blo ->fff_fallback
1335 | round d0, d0
1336 | b ->fff_resn
1337 |.endmacro
1338 |
1339 | math_round floor, frintm
1340 | math_round ceil, frintp
1341 |
1342 |.ffunc_1 math_abs
1343 | checknumber CARG1, ->fff_fallback
1344 | and CARG1, CARG1, #U64x(7fffffff,ffffffff)
1345 | bne ->fff_restv
1346 | eor CARG2w, CARG1w, CARG1w, asr #31
1347 | movz CARG3, #0x41e0, lsl #48 // 2^31.
1348 | subs CARG1w, CARG2w, CARG1w, asr #31
1349 | add CARG1, CARG1, TISNUM
1350 | csel CARG1, CARG1, CARG3, pl
1351 | // Fallthrough.
1352 |
1353 |->fff_restv:
1354 | // CARG1 = TValue result.
1355 | ldr PC, [BASE, FRAME_PC]
1356 | str CARG1, [BASE, #-16]
1357 |->fff_res1:
1358 | // PC = return.
1359 | mov RC, #(1+1)*8
1360 |->fff_res:
1361 | // RC = (nresults+1)*8, PC = return.
1362 | ands CARG1, PC, #FRAME_TYPE
1363 | str RCw, SAVE_MULTRES
1364 | sub RA, BASE, #16
1365 | bne ->vm_return
1366 | ldr INSw, [PC, #-4]
1367 | decode_RB RB, INS
1368 |5:
1369 | cmp RC, RB, lsl #3 // More results expected?
1370 | blo >6
1371 | decode_RA TMP1, INS
1372 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1373 | sub BASE, RA, TMP1, lsl #3
1374 | ins_next
1375 |
1376 |6: // Fill up results with nil.
1377 | add TMP1, RA, RC
1378 | add RC, RC, #8
1379 | str TISNIL, [TMP1, #-8]
1380 | b <5
1381 |
1382 |.macro math_extern, func
1383 | .ffunc_n math_ .. func
1384 | bl extern func
1385 | b ->fff_resn
1386 |.endmacro
1387 |
1388 |.macro math_extern2, func
1389 | .ffunc_nn math_ .. func
1390 | bl extern func
1391 | b ->fff_resn
1392 |.endmacro
1393 |
1394 |.ffunc_n math_sqrt
1395 | fsqrt d0, d0
1396 |->fff_resn:
1397 | ldr PC, [BASE, FRAME_PC]
1398 | str d0, [BASE, #-16]
1399 | b ->fff_res1
1400 |
1401 |.ffunc math_log
1402 | ldr CARG1, [BASE]
1403 | cmp NARGS8:RC, #8
1404 | ldr FARG1, [BASE]
1405 | bne ->fff_fallback // Need exactly 1 argument.
1406 | checknum CARG1, ->fff_fallback
1407 | bl extern log
1408 | b ->fff_resn
1409 |
1410 | math_extern log10
1411 | math_extern exp
1412 | math_extern sin
1413 | math_extern cos
1414 | math_extern tan
1415 | math_extern asin
1416 | math_extern acos
1417 | math_extern atan
1418 | math_extern sinh
1419 | math_extern cosh
1420 | math_extern tanh
1421 | math_extern2 pow
1422 | math_extern2 atan2
1423 | math_extern2 fmod
1424 |
1425 |.ffunc_2 math_ldexp
1426 | ldr FARG1, [BASE]
1427 | checknum CARG1, ->fff_fallback
1428 | checkint CARG2, ->fff_fallback
1429 | sxtw CARG1, CARG2w
1430 | bl extern ldexp // (double x, int exp)
1431 | b ->fff_resn
1432 |
1433 |.ffunc_n math_frexp
1434 | add CARG1, sp, TMPDofs
1435 | bl extern frexp
1436 | ldr CARG2w, TMPD
1437 | ldr PC, [BASE, FRAME_PC]
1438 | str d0, [BASE, #-16]
1439 | mov RC, #(2+1)*8
1440 | add CARG2, CARG2, TISNUM
1441 | str CARG2, [BASE, #-8]
1442 | b ->fff_res
1443 |
1444 |.ffunc_n math_modf
1445 | sub CARG1, BASE, #16
1446 | ldr PC, [BASE, FRAME_PC]
1447 | bl extern modf
1448 | mov RC, #(2+1)*8
1449 | str d0, [BASE, #-8]
1450 | b ->fff_res
1451 |
1452 |.macro math_minmax, name, cond, fcond
1453 | .ffunc_1 name
1454 | add RB, BASE, RC
1455 | add RA, BASE, #8
1456 | checkint CARG1, >4
1457 |1: // Handle integers.
1458 | ldr CARG2, [RA]
1459 | cmp RA, RB
1460 | bhs ->fff_restv
1461 | checkint CARG2, >3
1462 | cmp CARG1w, CARG2w
1463 | add RA, RA, #8
1464 | csel CARG1, CARG2, CARG1, cond
1465 | b <1
1466 |3: // Convert intermediate result to number and continue below.
1467 | scvtf d0, CARG1w
1468 | blo ->fff_fallback
1469 | ldr d1, [RA]
1470 | b >6
1471 |
1472 |4:
1473 | ldr d0, [BASE]
1474 | blo ->fff_fallback
1475 |5: // Handle numbers.
1476 | ldr CARG2, [RA]
1477 | ldr d1, [RA]
1478 | cmp RA, RB
1479 | bhs ->fff_resn
1480 | checknum CARG2, >7
1481 |6:
1482 | fcmp d0, d1
1483 | add RA, RA, #8
1484 | fcsel d0, d1, d0, fcond
1485 | b <5
1486 |7: // Convert integer to number and continue above.
1487 | scvtf d1, CARG2w
1488 | blo ->fff_fallback
1489 | b <6
1490 |.endmacro
1491 |
1492 | math_minmax math_min, gt, pl
1493 | math_minmax math_max, lt, le
1494 |
1495 |//-- String library -----------------------------------------------------
1496 |
1497 |.ffunc string_byte // Only handle the 1-arg case here.
1498 | ldp PC, CARG1, [BASE, FRAME_PC]
1499 | cmp NARGS8:RC, #8
1500 | asr ITYPE, CARG1, #47
1501 | ccmn ITYPE, #-LJ_TSTR, #0, eq
1502 | and STR:CARG1, CARG1, #LJ_GCVMASK
1503 | bne ->fff_fallback
1504 | ldrb TMP0w, STR:CARG1[1] // Access is always ok (NUL at end).
1505 | ldr CARG3w, STR:CARG1->len
1506 | add TMP0, TMP0, TISNUM
1507 | str TMP0, [BASE, #-16]
1508 | mov RC, #(0+1)*8
1509 | cbz CARG3, ->fff_res
1510 | b ->fff_res1
1511 |
1512 |.ffunc string_char // Only handle the 1-arg case here.
1513 | ffgccheck
1514 | ldp PC, CARG1, [BASE, FRAME_PC]
1515 | cmp CARG1w, #255
1516 | ccmp NARGS8:RC, #8, #0, ls // Need exactly 1 argument.
1517 | bne ->fff_fallback
1518 | checkint CARG1, ->fff_fallback
1519 | mov CARG3, #1
1520 | // Point to the char inside the integer in the stack slot.
1521 |.if ENDIAN_LE
1522 | mov CARG2, BASE
1523 |.else
1524 | add CARG2, BASE, #7
1525 |.endif
1526 |->fff_newstr:
1527 | // CARG2 = str, CARG3 = len.
1528 | str BASE, L->base
1529 | mov CARG1, L
1530 | str PC, SAVE_PC
1531 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
1532 |->fff_resstr:
1533 | // Returns GCstr *.
1534 | ldr BASE, L->base
1535 | movn TMP1, #~LJ_TSTR
1536 | add CARG1, CARG1, TMP1, lsl #47
1537 | b ->fff_restv
1538 |
1539 |.ffunc string_sub
1540 | ffgccheck
1541 | ldr CARG1, [BASE]
1542 | ldr CARG3, [BASE, #16]
1543 | cmp NARGS8:RC, #16
1544 | movn RB, #0
1545 | beq >1
1546 | blo ->fff_fallback
1547 | checkint CARG3, ->fff_fallback
1548 | sxtw RB, CARG3w
1549 |1:
1550 | ldr CARG2, [BASE, #8]
1551 | checkstr CARG1, ->fff_fallback
1552 | ldr TMP1w, STR:CARG1->len
1553 | checkint CARG2, ->fff_fallback
1554 | sxtw CARG2, CARG2w
1555 | // CARG1 = str, TMP1 = str->len, CARG2 = start, RB = end
1556 | add TMP2, RB, TMP1
1557 | cmp RB, #0
1558 | add TMP0, CARG2, TMP1
1559 | csinc RB, RB, TMP2, ge // if (end < 0) end += len+1
1560 | cmp CARG2, #0
1561 | csinc CARG2, CARG2, TMP0, ge // if (start < 0) start += len+1
1562 | cmp RB, #0
1563 | csel RB, RB, xzr, ge // if (end < 0) end = 0
1564 | cmp CARG2, #1
1565 | csinc CARG2, CARG2, xzr, ge // if (start < 1) start = 1
1566 | cmp RB, TMP1
1567 | csel RB, RB, TMP1, le // if (end > len) end = len
1568 | add CARG1, STR:CARG1, #sizeof(GCstr)-1
1569 | subs CARG3, RB, CARG2 // len = end - start
1570 | add CARG2, CARG1, CARG2
1571 | add CARG3, CARG3, #1 // len += 1
1572 | bge ->fff_newstr
1573 | add STR:CARG1, GL, #offsetof(global_State, strempty)
1574 | movn TMP1, #~LJ_TSTR
1575 | add CARG1, CARG1, TMP1, lsl #47
1576 | b ->fff_restv
1577 |
1578 |.macro ffstring_op, name
1579 | .ffunc string_ .. name
1580 | ffgccheck
1581 | ldr CARG2, [BASE]
1582 | cmp NARGS8:RC, #8
1583 | asr ITYPE, CARG2, #47
1584 | ccmn ITYPE, #-LJ_TSTR, #0, hs
1585 | and STR:CARG2, CARG2, #LJ_GCVMASK
1586 | bne ->fff_fallback
1587 | ldr TMP0, GL->tmpbuf.b
1588 | add SBUF:CARG1, GL, #offsetof(global_State, tmpbuf)
1589 | str BASE, L->base
1590 | str PC, SAVE_PC
1591 | str L, GL->tmpbuf.L
1592 | str TMP0, GL->tmpbuf.w
1593 | bl extern lj_buf_putstr_ .. name
1594 | bl extern lj_buf_tostr
1595 | b ->fff_resstr
1596 |.endmacro
1597 |
1598 |ffstring_op reverse
1599 |ffstring_op lower
1600 |ffstring_op upper
1601 |
1602 |//-- Bit library --------------------------------------------------------
1603 |
1604 |// FP number to bit conversion for soft-float. Clobbers CARG1-CARG3
1605 |->vm_tobit_fb:
1606 | bls ->fff_fallback
1607 | add CARG2, CARG1, CARG1
1608 | mov CARG3, #1076
1609 | sub CARG3, CARG3, CARG2, lsr #53
1610 | cmp CARG3, #53
1611 | bhi >1
1612 | and CARG2, CARG2, #U64x(001fffff,ffffffff)
1613 | orr CARG2, CARG2, #U64x(00200000,00000000)
1614 | cmp CARG1, #0
1615 | lsr CARG2, CARG2, CARG3
1616 | cneg CARG1w, CARG2w, mi
1617 | br lr
1618 |1:
1619 | mov CARG1w, #0
1620 | br lr
1621 |
1622 |.macro .ffunc_bit, name
1623 | .ffunc_1 bit_..name
1624 | adr lr, >1
1625 | checkint CARG1, ->vm_tobit_fb
1626 |1:
1627 |.endmacro
1628 |
1629 |.macro .ffunc_bit_op, name, ins
1630 | .ffunc_bit name
1631 | mov RA, #8
1632 | mov TMP0w, CARG1w
1633 | adr lr, >2
1634 |1:
1635 | ldr CARG1, [BASE, RA]
1636 | cmp RA, NARGS8:RC
1637 | add RA, RA, #8
1638 | bge >9
1639 | checkint CARG1, ->vm_tobit_fb
1640 |2:
1641 | ins TMP0w, TMP0w, CARG1w
1642 | b <1
1643 |.endmacro
1644 |
1645 |.ffunc_bit_op band, and
1646 |.ffunc_bit_op bor, orr
1647 |.ffunc_bit_op bxor, eor
1648 |
1649 |.ffunc_bit tobit
1650 | mov TMP0w, CARG1w
1651 |9: // Label reused by .ffunc_bit_op users.
1652 | add CARG1, TMP0, TISNUM
1653 | b ->fff_restv
1654 |
1655 |.ffunc_bit bswap
1656 | rev TMP0w, CARG1w
1657 | add CARG1, TMP0, TISNUM
1658 | b ->fff_restv
1659 |
1660 |.ffunc_bit bnot
1661 | mvn TMP0w, CARG1w
1662 | add CARG1, TMP0, TISNUM
1663 | b ->fff_restv
1664 |
1665 |.macro .ffunc_bit_sh, name, ins, shmod
1666 | .ffunc bit_..name
1667 | ldp TMP0, CARG1, [BASE]
1668 | cmp NARGS8:RC, #16
1669 | blo ->fff_fallback
1670 | adr lr, >1
1671 | checkint CARG1, ->vm_tobit_fb
1672 |1:
1673 |.if shmod == 0
1674 | mov TMP1, CARG1
1675 |.else
1676 | neg TMP1, CARG1
1677 |.endif
1678 | mov CARG1, TMP0
1679 | adr lr, >2
1680 | checkint CARG1, ->vm_tobit_fb
1681 |2:
1682 | ins TMP0w, CARG1w, TMP1w
1683 | add CARG1, TMP0, TISNUM
1684 | b ->fff_restv
1685 |.endmacro
1686 |
1687 |.ffunc_bit_sh lshift, lsl, 0
1688 |.ffunc_bit_sh rshift, lsr, 0
1689 |.ffunc_bit_sh arshift, asr, 0
1690 |.ffunc_bit_sh rol, ror, 1
1691 |.ffunc_bit_sh ror, ror, 0
1692 |
1693 |//-----------------------------------------------------------------------
1694 |
1695 |->fff_fallback: // Call fast function fallback handler.
1696 | // BASE = new base, RC = nargs*8
1697 | ldp CFUNC:CARG3, PC, [BASE, FRAME_FUNC] // Fallback may overwrite PC.
1698 | ldr TMP2, L->maxstack
1699 | add TMP1, BASE, NARGS8:RC
1700 | stp BASE, TMP1, L->base
1701 | and CFUNC:CARG3, CARG3, #LJ_GCVMASK
1702 | add TMP1, TMP1, #8*LUA_MINSTACK
1703 | ldr CARG3, CFUNC:CARG3->f
1704 | str PC, SAVE_PC // Redundant (but a defined value).
1705 | cmp TMP1, TMP2
1706 | mov CARG1, L
1707 | bhi >5 // Need to grow stack.
1708 | blr CARG3 // (lua_State *L)
1709 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
1710 | ldr BASE, L->base
1711 | cmp CRET1w, #0
1712 | lsl RC, CRET1, #3
1713 | sub RA, BASE, #16
1714 | bgt ->fff_res // Returned nresults+1?
1715 |1: // Returned 0 or -1: retry fast path.
1716 | ldr CARG1, L->top
1717 | ldr CFUNC:CARG3, [BASE, FRAME_FUNC]
1718 | sub NARGS8:RC, CARG1, BASE
1719 | bne ->vm_call_tail // Returned -1?
1720 | and CFUNC:CARG3, CARG3, #LJ_GCVMASK
1721 | ins_callt // Returned 0: retry fast path.
1722 |
1723 |// Reconstruct previous base for vmeta_call during tailcall.
1724 |->vm_call_tail:
1725 | ands TMP0, PC, #FRAME_TYPE
1726 | and TMP1, PC, #~FRAME_TYPEP
1727 | bne >3
1728 | ldrb RAw, [PC, #-4+OFS_RA]
1729 | lsl RA, RA, #3
1730 | add TMP1, RA, #16
1731 |3:
1732 | sub RB, BASE, TMP1
1733 | b ->vm_call_dispatch // Resolve again for tailcall.
1734 |
1735 |5: // Grow stack for fallback handler.
1736 | mov CARG2, #LUA_MINSTACK
1737 | bl extern lj_state_growstack // (lua_State *L, int n)
1738 | ldr BASE, L->base
1739 | cmp CARG1, CARG1 // Set zero-flag to force retry.
1740 | b <1
1741 |
1742 |->fff_gcstep: // Call GC step function.
1743 | // BASE = new base, RC = nargs*8
1744 | add CARG2, BASE, NARGS8:RC // Calculate L->top.
1745 | mov RA, lr
1746 | stp BASE, CARG2, L->base
1747 | str PC, SAVE_PC // Redundant (but a defined value).
1748 | mov CARG1, L
1749 | bl extern lj_gc_step // (lua_State *L)
1750 | ldp BASE, CARG2, L->base
1751 | ldr CFUNC:CARG3, [BASE, FRAME_FUNC]
1752 | mov lr, RA // Help return address predictor.
1753 | sub NARGS8:RC, CARG2, BASE // Calculate nargs*8.
1754 | and CFUNC:CARG3, CARG3, #LJ_GCVMASK
1755 | ret
1756 |
1757 |//-----------------------------------------------------------------------
1758 |//-- Special dispatch targets -------------------------------------------
1759 |//-----------------------------------------------------------------------
1760 |
1761 |->vm_record: // Dispatch target for recording phase.
1762 |.if JIT
1763 | ldrb CARG1w, GL->hookmask
1764 | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent.
1765 | bne >5
1766 | // Decrement the hookcount for consistency, but always do the call.
1767 | ldr CARG2w, GL->hookcount
1768 | tst CARG1, #HOOK_ACTIVE
1769 | bne >1
1770 | sub CARG2w, CARG2w, #1
1771 | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT
1772 | beq >1
1773 | str CARG2w, GL->hookcount
1774 | b >1
1775 |.endif
1776 |
1777 |->vm_rethook: // Dispatch target for return hooks.
1778 | ldrb TMP2w, GL->hookmask
1779 | tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1 // Hook already active?
1780 |5: // Re-dispatch to static ins.
1781 | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
1782 | br TMP0
1783 |
1784 |->vm_inshook: // Dispatch target for instr/line hooks.
1785 | ldrb TMP2w, GL->hookmask
1786 | ldr TMP3w, GL->hookcount
1787 | tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5 // Hook already active?
1788 | tst TMP2w, #LUA_MASKLINE|LUA_MASKCOUNT
1789 | beq <5
1790 | sub TMP3w, TMP3w, #1
1791 | str TMP3w, GL->hookcount
1792 | cbz TMP3w, >1
1793 | tbz TMP2w, #LUA_HOOKLINE, <5
1794 |1:
1795 | mov CARG1, L
1796 | str BASE, L->base
1797 | mov CARG2, PC
1798 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
1799 | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
1800 |3:
1801 | ldr BASE, L->base
1802 |4: // Re-dispatch to static ins.
1803 | ldr INSw, [PC, #-4]
1804 | add TMP1, GL, INS, uxtb #3
1805 | decode_RA RA, INS
1806 | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
1807 | decode_RD RC, INS
1808 | br TMP0
1809 |
1810 |->cont_hook: // Continue from hook yield.
1811 | ldr CARG1, [CARG4, #-40]
1812 | add PC, PC, #4
1813 | str CARG1w, SAVE_MULTRES // Restore MULTRES for *M ins.
1814 | b <4
1815 |
1816 |->vm_hotloop: // Hot loop counter underflow.
1817 |.if JIT
1818 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L).
1819 | add CARG1, GL, #GG_G2DISP+GG_DISP2J
1820 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
1821 | str PC, SAVE_PC
1822 | ldr CARG3, LFUNC:CARG3->pc
1823 | mov CARG2, PC
1824 | str L, [GL, #GL_J(L)]
1825 | ldrb CARG3w, [CARG3, #PC2PROTO(framesize)]
1826 | str BASE, L->base
1827 | add CARG3, BASE, CARG3, lsl #3
1828 | str CARG3, L->top
1829 | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc)
1830 | b <3
1831 |.endif
1832 |
1833 |->vm_callhook: // Dispatch target for call hooks.
1834 | mov CARG2, PC
1835 |.if JIT
1836 | b >1
1837 |.endif
1838 |
1839 |->vm_hotcall: // Hot call counter underflow.
1840 |.if JIT
1841 | orr CARG2, PC, #1
1842 |1:
1843 |.endif
1844 | add TMP1, BASE, NARGS8:RC
1845 | str PC, SAVE_PC
1846 | mov CARG1, L
1847 | sub RA, RA, BASE
1848 | stp BASE, TMP1, L->base
1849 | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
1850 | // Returns ASMFunction.
1851 | ldp BASE, TMP1, L->base
1852 | str xzr, SAVE_PC // Invalidate for subsequent line hook.
1853 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
1854 | add RA, BASE, RA
1855 | sub NARGS8:RC, TMP1, BASE
1856 | ldr INSw, [PC, #-4]
1857 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
1858 | br CRET1
1859 |
1860 |->cont_stitch: // Trace stitching.
1861 |.if JIT
1862 | // RA = resultptr, CARG4 = meta base
1863 | ldr RBw, SAVE_MULTRES
1864 | ldr INSw, [PC, #-4]
1865 | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace.
1866 | subs RB, RB, #8
1867 | decode_RA RC, INS // Call base.
1868 | and CARG3, CARG3, #LJ_GCVMASK
1869 | beq >2
1870 |1: // Move results down.
1871 | ldr CARG1, [RA]
1872 | add RA, RA, #8
1873 | subs RB, RB, #8
1874 | str CARG1, [BASE, RC, lsl #3]
1875 | add RC, RC, #1
1876 | bne <1
1877 |2:
1878 | decode_RA RA, INS
1879 | decode_RB RB, INS
1880 | add RA, RA, RB
1881 |3:
1882 | cmp RA, RC
1883 | bhi >9 // More results wanted?
1884 |
1885 | ldrh RAw, TRACE:CARG3->traceno
1886 | ldrh RCw, TRACE:CARG3->link
1887 | cmp RCw, RAw
1888 | beq ->cont_nop // Blacklisted.
1889 | cmp RCw, #0
1890 | bne =>BC_JLOOP // Jump to stitched trace.
1891 |
1892 | // Stitch a new trace to the previous trace.
1893 | mov CARG1, #GL_J(exitno)
1894 | str RAw, [GL, CARG1]
1895 | mov CARG1, #GL_J(L)
1896 | str L, [GL, CARG1]
1897 | str BASE, L->base
1898 | add CARG1, GL, #GG_G2J
1899 | mov CARG2, PC
1900 | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
1901 | ldr BASE, L->base
1902 | b ->cont_nop
1903 |
1904 |9: // Fill up results with nil.
1905 | str TISNIL, [BASE, RC, lsl #3]
1906 | add RC, RC, #1
1907 | b <3
1908 |.endif
1909 |
1910 |->vm_profhook: // Dispatch target for profiler hook.
1911#if LJ_HASPROFILE
1912 | mov CARG1, L
1913 | str BASE, L->base
1914 | mov CARG2, PC
1915 | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
1916 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
1917 | ldr BASE, L->base
1918 | sub PC, PC, #4
1919 | b ->cont_nop
1920#endif
1921 |
1922 |//-----------------------------------------------------------------------
1923 |//-- Trace exit handler -------------------------------------------------
1924 |//-----------------------------------------------------------------------
1925 |
1926 |.macro savex_, a, b
1927 | stp d..a, d..b, [sp, #a*8]
1928 | stp x..a, x..b, [sp, #32*8+a*8]
1929 |.endmacro
1930 |
1931 |->vm_exit_handler:
1932 |.if JIT
1933 | sub sp, sp, #(64*8)
1934 | savex_, 0, 1
1935 | savex_, 2, 3
1936 | savex_, 4, 5
1937 | savex_, 6, 7
1938 | savex_, 8, 9
1939 | savex_, 10, 11
1940 | savex_, 12, 13
1941 | savex_, 14, 15
1942 | savex_, 16, 17
1943 | savex_, 18, 19
1944 | savex_, 20, 21
1945 | savex_, 22, 23
1946 | savex_, 24, 25
1947 | savex_, 26, 27
1948 | savex_, 28, 29
1949 | stp d30, d31, [sp, #30*8]
1950 | ldr CARG1, [sp, #64*8] // Load original value of lr.
1951 | add CARG3, sp, #64*8 // Recompute original value of sp.
1952 | mv_vmstate CARG4w, EXIT
1953 | stp xzr, CARG3, [sp, #62*8] // Store 0/sp in RID_LR/RID_SP.
1954 | sub CARG1, CARG1, lr
1955 | ldr L, GL->cur_L
1956 | lsr CARG1, CARG1, #2
1957 | ldr BASE, GL->jit_base
1958 | sub CARG1, CARG1, #2
1959 | ldr CARG2w, [lr] // Load trace number.
1960 | st_vmstate CARG4w
1961 |.if ENDIAN_BE
1962 | rev32 CARG2, CARG2
1963 |.endif
1964 | str BASE, L->base
1965 | ubfx CARG2w, CARG2w, #5, #16
1966 | str CARG1w, [GL, #GL_J(exitno)]
1967 | str CARG2w, [GL, #GL_J(parent)]
1968 | str L, [GL, #GL_J(L)]
1969 | str xzr, GL->jit_base
1970 | add CARG1, GL, #GG_G2J
1971 | mov CARG2, sp
1972 | bl extern lj_trace_exit // (jit_State *J, ExitState *ex)
1973 | // Returns MULTRES (unscaled) or negated error code.
1974 | ldr CARG2, L->cframe
1975 | ldr BASE, L->base
1976 | and sp, CARG2, #CFRAME_RAWMASK
1977 | ldr PC, SAVE_PC // Get SAVE_PC.
1978 | str L, SAVE_L // Set SAVE_L (on-trace resume/yield).
1979 | b >1
1980 |.endif
1981 |
1982 |->vm_exit_interp:
1983 | // CARG1 = MULTRES or negated error code, BASE, PC and GL set.
1984 |.if JIT
1985 | ldr L, SAVE_L
1986 |1:
1987 | cmp CARG1w, #0
1988 | blt >9 // Check for error from exit.
1989 | lsl RC, CARG1, #3
1990 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
1991 | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
1992 | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
1993 | movn TISNIL, #0
1994 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
1995 | str RCw, SAVE_MULTRES
1996 | str BASE, L->base
1997 | ldr CARG2, LFUNC:CARG2->pc
1998 | str xzr, GL->jit_base
1999 | mv_vmstate CARG4w, INTERP
2000 | ldr KBASE, [CARG2, #PC2PROTO(k)]
2001 | // Modified copy of ins_next which handles function header dispatch, too.
2002 | ldrb RBw, [PC, # OFS_OP]
2003 | ldr INSw, [PC], #4
2004 | st_vmstate CARG4w
2005 | cmp RBw, #BC_FUNCC+2 // Fast function?
2006 | add TMP1, GL, INS, uxtb #3
2007 | bhs >4
2008 |2:
2009 | cmp RBw, #BC_FUNCF // Function header?
2010 | add TMP0, GL, RB, uxtb #3
2011 | ldr RB, [TMP0, #GG_G2DISP]
2012 | decode_RA RA, INS
2013 | lsr TMP0, INS, #16
2014 | csel RC, TMP0, RC, lo
2015 | blo >5
2016 | ldr CARG3, [BASE, FRAME_FUNC]
2017 | sub RC, RC, #8
2018 | add RA, BASE, RA, lsl #3 // Yes: RA = BASE+framesize*8, RC = nargs*8
2019 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
2020 |5:
2021 | br RB
2022 |
2023 |4: // Check frame below fast function.
2024 | ldr CARG1, [BASE, FRAME_PC]
2025 | ands CARG2, CARG1, #FRAME_TYPE
2026 | bne <2 // Trace stitching continuation?
2027 | // Otherwise set KBASE for Lua function below fast function.
2028 | ldr CARG3w, [CARG1, #-4]
2029 | decode_RA CARG1, CARG3
2030 | sub CARG2, BASE, CARG1, lsl #3
2031 | ldr LFUNC:CARG3, [CARG2, #-32]
2032 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
2033 | ldr CARG3, LFUNC:CARG3->pc
2034 | ldr KBASE, [CARG3, #PC2PROTO(k)]
2035 | b <2
2036 |
2037 |9: // Rethrow error from the right C frame.
2038 | neg CARG2w, CARG1w
2039 | mov CARG1, L
2040 | bl extern lj_err_trace // (lua_State *L, int errcode)
2041 |.endif
2042 |
2043 |//-----------------------------------------------------------------------
2044 |//-- Math helper functions ----------------------------------------------
2045 |//-----------------------------------------------------------------------
2046 |
2047 | // int lj_vm_modi(int dividend, int divisor);
2048 |->vm_modi:
2049 | eor CARG4w, CARG1w, CARG2w
2050 | cmp CARG4w, #0
2051 | eor CARG3w, CARG1w, CARG1w, asr #31
2052 | eor CARG4w, CARG2w, CARG2w, asr #31
2053 | sub CARG3w, CARG3w, CARG1w, asr #31
2054 | sub CARG4w, CARG4w, CARG2w, asr #31
2055 | udiv CARG1w, CARG3w, CARG4w
2056 | msub CARG1w, CARG1w, CARG4w, CARG3w
2057 | ccmp CARG1w, #0, #4, mi
2058 | sub CARG3w, CARG1w, CARG4w
2059 | csel CARG1w, CARG1w, CARG3w, eq
2060 | eor CARG3w, CARG1w, CARG2w
2061 | cmp CARG3w, #0
2062 | cneg CARG1w, CARG1w, mi
2063 | ret
2064 |
2065 |//-----------------------------------------------------------------------
2066 |//-- Miscellaneous functions --------------------------------------------
2067 |//-----------------------------------------------------------------------
2068 |
2069 |//-----------------------------------------------------------------------
2070 |//-- FFI helper functions -----------------------------------------------
2071 |//-----------------------------------------------------------------------
2072 |
2073 |// Handler for callback functions.
2074 |// Saveregs already performed. Callback slot number in [sp], g in r12.
2075 |->vm_ffi_callback:
2076 |.if FFI
2077 |.type CTSTATE, CTState, PC
2078 | saveregs
2079 | ldr CTSTATE, GL:x10->ctype_state
2080 | mov GL, x10
2081 | add x10, sp, # CFRAME_SPACE
2082 | str w9, CTSTATE->cb.slot
2083 | stp x0, x1, CTSTATE->cb.gpr[0]
2084 | stp d0, d1, CTSTATE->cb.fpr[0]
2085 | stp x2, x3, CTSTATE->cb.gpr[2]
2086 | stp d2, d3, CTSTATE->cb.fpr[2]
2087 | stp x4, x5, CTSTATE->cb.gpr[4]
2088 | stp d4, d5, CTSTATE->cb.fpr[4]
2089 | stp x6, x7, CTSTATE->cb.gpr[6]
2090 | stp d6, d7, CTSTATE->cb.fpr[6]
2091 | str x10, CTSTATE->cb.stack
2092 | mov CARG1, CTSTATE
2093 | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok.
2094 | mov CARG2, sp
2095 | bl extern lj_ccallback_enter // (CTState *cts, void *cf)
2096 | // Returns lua_State *.
2097 | ldp BASE, RC, L:CRET1->base
2098 | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
2099 | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
2100 | movn TISNIL, #0
2101 | mov L, CRET1
2102 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
2103 | sub RC, RC, BASE
2104 | st_vmstate ST_INTERP
2105 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
2106 | ins_callt
2107 |.endif
2108 |
2109 |->cont_ffi_callback: // Return from FFI callback.
2110 |.if FFI
2111 | ldr CTSTATE, GL->ctype_state
2112 | stp BASE, CARG4, L->base
2113 | str L, CTSTATE->L
2114 | mov CARG1, CTSTATE
2115 | mov CARG2, RA
2116 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
2117 | ldp x0, x1, CTSTATE->cb.gpr[0]
2118 | ldp d0, d1, CTSTATE->cb.fpr[0]
2119 | b ->vm_leave_unw
2120 |.endif
2121 |
2122 |->vm_ffi_call: // Call C function via FFI.
2123 | // Caveat: needs special frame unwinding, see below.
2124 |.if FFI
2125 | .type CCSTATE, CCallState, x19
2126 | stp fp, lr, [sp, #-32]!
2127 | add fp, sp, #0
2128 | str CCSTATE, [sp, #16]
2129 | mov CCSTATE, x0
2130 | ldr TMP0w, CCSTATE:x0->spadj
2131 | ldrb TMP1w, CCSTATE->nsp
2132 | add TMP2, CCSTATE, #offsetof(CCallState, stack)
2133 | subs TMP1, TMP1, #1
2134 | ldr TMP3, CCSTATE->func
2135 | sub sp, fp, TMP0
2136 | bmi >2
2137 |1: // Copy stack slots
2138 | ldr TMP0, [TMP2, TMP1, lsl #3]
2139 | str TMP0, [sp, TMP1, lsl #3]
2140 | subs TMP1, TMP1, #1
2141 | bpl <1
2142 |2:
2143 | ldp x0, x1, CCSTATE->gpr[0]
2144 | ldp d0, d1, CCSTATE->fpr[0]
2145 | ldp x2, x3, CCSTATE->gpr[2]
2146 | ldp d2, d3, CCSTATE->fpr[2]
2147 | ldp x4, x5, CCSTATE->gpr[4]
2148 | ldp d4, d5, CCSTATE->fpr[4]
2149 | ldp x6, x7, CCSTATE->gpr[6]
2150 | ldp d6, d7, CCSTATE->fpr[6]
2151 | ldr x8, CCSTATE->retp
2152 | blr TMP3
2153 | mov sp, fp
2154 | stp x0, x1, CCSTATE->gpr[0]
2155 | stp d0, d1, CCSTATE->fpr[0]
2156 | stp d2, d3, CCSTATE->fpr[2]
2157 | ldr CCSTATE, [sp, #16]
2158 | ldp fp, lr, [sp], #32
2159 | ret
2160 |.endif
2161 |// Note: vm_ffi_call must be the last function in this object file!
2162 |
2163 |//-----------------------------------------------------------------------
2164}
2165
2166/* Generate the code for a single instruction. */
2167static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2168{
2169 int vk = 0;
2170 |=>defop:
2171
2172 switch (op) {
2173
2174 /* -- Comparison ops ---------------------------------------------------- */
2175
2176 /* Remember: all ops branch for a true comparison, fall through otherwise. */
2177
2178 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2179 | // RA = src1, RC = src2, JMP with RC = target
2180 | ldr CARG1, [BASE, RA, lsl #3]
2181 | ldrh RBw, [PC, # OFS_RD]
2182 | ldr CARG2, [BASE, RC, lsl #3]
2183 | add PC, PC, #4
2184 | add RB, PC, RB, lsl #2
2185 | sub RB, RB, #0x20000
2186 | checkint CARG1, >3
2187 | checkint CARG2, >4
2188 | cmp CARG1w, CARG2w
2189 if (op == BC_ISLT) {
2190 | csel PC, RB, PC, lt
2191 } else if (op == BC_ISGE) {
2192 | csel PC, RB, PC, ge
2193 } else if (op == BC_ISLE) {
2194 | csel PC, RB, PC, le
2195 } else {
2196 | csel PC, RB, PC, gt
2197 }
2198 |1:
2199 | ins_next
2200 |
2201 |3: // RA not int.
2202 | ldr FARG1, [BASE, RA, lsl #3]
2203 | blo ->vmeta_comp
2204 | ldr FARG2, [BASE, RC, lsl #3]
2205 | cmp TISNUMhi, CARG2, lsr #32
2206 | bhi >5
2207 | bne ->vmeta_comp
2208 | // RA number, RC int.
2209 | scvtf FARG2, CARG2w
2210 | b >5
2211 |
2212 |4: // RA int, RC not int
2213 | ldr FARG2, [BASE, RC, lsl #3]
2214 | blo ->vmeta_comp
2215 | // RA int, RC number.
2216 | scvtf FARG1, CARG1w
2217 |
2218 |5: // RA number, RC number
2219 | fcmp FARG1, FARG2
2220 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
2221 if (op == BC_ISLT) {
2222 | csel PC, RB, PC, lo
2223 } else if (op == BC_ISGE) {
2224 | csel PC, RB, PC, hs
2225 } else if (op == BC_ISLE) {
2226 | csel PC, RB, PC, ls
2227 } else {
2228 | csel PC, RB, PC, hi
2229 }
2230 | b <1
2231 break;
2232
2233 case BC_ISEQV: case BC_ISNEV:
2234 vk = op == BC_ISEQV;
2235 | // RA = src1, RC = src2, JMP with RC = target
2236 | ldr CARG1, [BASE, RA, lsl #3]
2237 | add RC, BASE, RC, lsl #3
2238 | ldrh RBw, [PC, # OFS_RD]
2239 | ldr CARG3, [RC]
2240 | add PC, PC, #4
2241 | add RB, PC, RB, lsl #2
2242 | sub RB, RB, #0x20000
2243 | asr ITYPE, CARG3, #47
2244 | cmn ITYPE, #-LJ_TISNUM
2245 if (vk) {
2246 | bls ->BC_ISEQN_Z
2247 } else {
2248 | bls ->BC_ISNEN_Z
2249 }
2250 | // RC is not a number.
2251 | asr TMP0, CARG1, #47
2252 |.if FFI
2253 | // Check if RC or RA is a cdata.
2254 | cmn ITYPE, #-LJ_TCDATA
2255 | ccmn TMP0, #-LJ_TCDATA, #4, ne
2256 | beq ->vmeta_equal_cd
2257 |.endif
2258 | cmp CARG1, CARG3
2259 | bne >2
2260 | // Tag and value are equal.
2261 if (vk) {
2262 |->BC_ISEQV_Z:
2263 | mov PC, RB // Perform branch.
2264 }
2265 |1:
2266 | ins_next
2267 |
2268 |2: // Check if the tags are the same and it's a table or userdata.
2269 | cmp ITYPE, TMP0
2270 | ccmn ITYPE, #-LJ_TISTABUD, #2, eq
2271 if (vk) {
2272 | bhi <1
2273 } else {
2274 | bhi ->BC_ISEQV_Z // Reuse code from opposite instruction.
2275 }
2276 | // Different tables or userdatas. Need to check __eq metamethod.
2277 | // Field metatable must be at same offset for GCtab and GCudata!
2278 | and TAB:CARG2, CARG1, #LJ_GCVMASK
2279 | ldr TAB:TMP2, TAB:CARG2->metatable
2280 if (vk) {
2281 | cbz TAB:TMP2, <1 // No metatable?
2282 | ldrb TMP1w, TAB:TMP2->nomm
2283 | mov CARG4, #0 // ne = 0
2284 | tbnz TMP1w, #MM_eq, <1 // 'no __eq' flag set: done.
2285 } else {
2286 | cbz TAB:TMP2, ->BC_ISEQV_Z // No metatable?
2287 | ldrb TMP1w, TAB:TMP2->nomm
2288 | mov CARG4, #1 // ne = 1.
2289 | tbnz TMP1w, #MM_eq, ->BC_ISEQV_Z // 'no __eq' flag set: done.
2290 }
2291 | b ->vmeta_equal
2292 break;
2293
2294 case BC_ISEQS: case BC_ISNES:
2295 vk = op == BC_ISEQS;
2296 | // RA = src, RC = str_const (~), JMP with RC = target
2297 | ldr CARG1, [BASE, RA, lsl #3]
2298 | mvn RC, RC
2299 | ldrh RBw, [PC, # OFS_RD]
2300 | ldr CARG2, [KBASE, RC, lsl #3]
2301 | add PC, PC, #4
2302 | movn TMP0, #~LJ_TSTR
2303 |.if FFI
2304 | asr ITYPE, CARG1, #47
2305 |.endif
2306 | add RB, PC, RB, lsl #2
2307 | add CARG2, CARG2, TMP0, lsl #47
2308 | sub RB, RB, #0x20000
2309 |.if FFI
2310 | cmn ITYPE, #-LJ_TCDATA
2311 | beq ->vmeta_equal_cd
2312 |.endif
2313 | cmp CARG1, CARG2
2314 if (vk) {
2315 | csel PC, RB, PC, eq
2316 } else {
2317 | csel PC, RB, PC, ne
2318 }
2319 | ins_next
2320 break;
2321
2322 case BC_ISEQN: case BC_ISNEN:
2323 vk = op == BC_ISEQN;
2324 | // RA = src, RC = num_const (~), JMP with RC = target
2325 | ldr CARG1, [BASE, RA, lsl #3]
2326 | add RC, KBASE, RC, lsl #3
2327 | ldrh RBw, [PC, # OFS_RD]
2328 | ldr CARG3, [RC]
2329 | add PC, PC, #4
2330 | add RB, PC, RB, lsl #2
2331 | sub RB, RB, #0x20000
2332 if (vk) {
2333 |->BC_ISEQN_Z:
2334 } else {
2335 |->BC_ISNEN_Z:
2336 }
2337 | checkint CARG1, >4
2338 | checkint CARG3, >6
2339 | cmp CARG1w, CARG3w
2340 |1:
2341 if (vk) {
2342 | csel PC, RB, PC, eq
2343 |2:
2344 } else {
2345 |2:
2346 | csel PC, RB, PC, ne
2347 }
2348 |3:
2349 | ins_next
2350 |
2351 |4: // RA not int.
2352 |.if FFI
2353 | blo >7
2354 |.else
2355 | blo <2
2356 |.endif
2357 | ldr FARG1, [BASE, RA, lsl #3]
2358 | ldr FARG2, [RC]
2359 | cmp TISNUMhi, CARG3, lsr #32
2360 | bne >5
2361 | // RA number, RC int.
2362 | scvtf FARG2, CARG3w
2363 |5:
2364 | // RA number, RC number.
2365 | fcmp FARG1, FARG2
2366 | b <1
2367 |
2368 |6: // RA int, RC number
2369 | ldr FARG2, [RC]
2370 | scvtf FARG1, CARG1w
2371 | fcmp FARG1, FARG2
2372 | b <1
2373 |
2374 |.if FFI
2375 |7:
2376 | asr ITYPE, CARG1, #47
2377 | cmn ITYPE, #-LJ_TCDATA
2378 | bne <2
2379 | b ->vmeta_equal_cd
2380 |.endif
2381 break;
2382
2383 case BC_ISEQP: case BC_ISNEP:
2384 vk = op == BC_ISEQP;
2385 | // RA = src, RC = primitive_type (~), JMP with RC = target
2386 | ldr TMP0, [BASE, RA, lsl #3]
2387 | ldrh RBw, [PC, # OFS_RD]
2388 | add PC, PC, #4
2389 | add RC, RC, #1
2390 | add RB, PC, RB, lsl #2
2391 |.if FFI
2392 | asr ITYPE, TMP0, #47
2393 | cmn ITYPE, #-LJ_TCDATA
2394 | beq ->vmeta_equal_cd
2395 | cmn RC, ITYPE
2396 |.else
2397 | cmn RC, TMP0, asr #47
2398 |.endif
2399 | sub RB, RB, #0x20000
2400 if (vk) {
2401 | csel PC, RB, PC, eq
2402 } else {
2403 | csel PC, RB, PC, ne
2404 }
2405 | ins_next
2406 break;
2407
2408 /* -- Unary test and copy ops ------------------------------------------- */
2409
2410 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
2411 | // RA = dst or unused, RC = src, JMP with RC = target
2412 | ldrh RBw, [PC, # OFS_RD]
2413 | ldr TMP0, [BASE, RC, lsl #3]
2414 | add PC, PC, #4
2415 | mov_false TMP1
2416 | add RB, PC, RB, lsl #2
2417 | cmp TMP0, TMP1
2418 | sub RB, RB, #0x20000
2419 if (op == BC_ISTC || op == BC_IST) {
2420 if (op == BC_ISTC) {
2421 | csel RA, RA, RC, lo
2422 }
2423 | csel PC, RB, PC, lo
2424 } else {
2425 if (op == BC_ISFC) {
2426 | csel RA, RA, RC, hs
2427 }
2428 | csel PC, RB, PC, hs
2429 }
2430 if (op == BC_ISTC || op == BC_ISFC) {
2431 | str TMP0, [BASE, RA, lsl #3]
2432 }
2433 | ins_next
2434 break;
2435
2436 case BC_ISTYPE:
2437 | // RA = src, RC = -type
2438 | ldr TMP0, [BASE, RA, lsl #3]
2439 | cmn RC, TMP0, asr #47
2440 | bne ->vmeta_istype
2441 | ins_next
2442 break;
2443 case BC_ISNUM:
2444 | // RA = src, RC = -(TISNUM-1)
2445 | ldr TMP0, [BASE, RA]
2446 | checknum TMP0, ->vmeta_istype
2447 | ins_next
2448 break;
2449
2450 /* -- Unary ops --------------------------------------------------------- */
2451
2452 case BC_MOV:
2453 | // RA = dst, RC = src
2454 | ldr TMP0, [BASE, RC, lsl #3]
2455 | str TMP0, [BASE, RA, lsl #3]
2456 | ins_next
2457 break;
2458 case BC_NOT:
2459 | // RA = dst, RC = src
2460 | ldr TMP0, [BASE, RC, lsl #3]
2461 | mov_false TMP1
2462 | mov_true TMP2
2463 | cmp TMP0, TMP1
2464 | csel TMP0, TMP1, TMP2, lo
2465 | str TMP0, [BASE, RA, lsl #3]
2466 | ins_next
2467 break;
2468 case BC_UNM:
2469 | // RA = dst, RC = src
2470 | ldr TMP0, [BASE, RC, lsl #3]
2471 | asr ITYPE, TMP0, #47
2472 | cmn ITYPE, #-LJ_TISNUM
2473 | bhi ->vmeta_unm
2474 | eor TMP0, TMP0, #U64x(80000000,00000000)
2475 | bne >5
2476 | negs TMP0w, TMP0w
2477 | movz CARG3, #0x41e0, lsl #48 // 2^31.
2478 | add TMP0, TMP0, TISNUM
2479 | csel TMP0, TMP0, CARG3, vc
2480 |5:
2481 | str TMP0, [BASE, RA, lsl #3]
2482 | ins_next
2483 break;
2484 case BC_LEN:
2485 | // RA = dst, RC = src
2486 | ldr CARG1, [BASE, RC, lsl #3]
2487 | asr ITYPE, CARG1, #47
2488 | cmn ITYPE, #-LJ_TSTR
2489 | and CARG1, CARG1, #LJ_GCVMASK
2490 | bne >2
2491 | ldr CARG1w, STR:CARG1->len
2492 |1:
2493 | add CARG1, CARG1, TISNUM
2494 | str CARG1, [BASE, RA, lsl #3]
2495 | ins_next
2496 |
2497 |2:
2498 | cmn ITYPE, #-LJ_TTAB
2499 | bne ->vmeta_len
2500#if LJ_52
2501 | ldr TAB:CARG2, TAB:CARG1->metatable
2502 | cbnz TAB:CARG2, >9
2503 |3:
2504#endif
2505 |->BC_LEN_Z:
2506 | bl extern lj_tab_len // (GCtab *t)
2507 | // Returns uint32_t (but less than 2^31).
2508 | b <1
2509 |
2510#if LJ_52
2511 |9:
2512 | ldrb TMP1w, TAB:CARG2->nomm
2513 | tbnz TMP1w, #MM_len, <3 // 'no __len' flag set: done.
2514 | b ->vmeta_len
2515#endif
2516 break;
2517
2518 /* -- Binary ops -------------------------------------------------------- */
2519
2520 |.macro ins_arithcheck_int, target
2521 | checkint CARG1, target
2522 | checkint CARG2, target
2523 |.endmacro
2524 |
2525 |.macro ins_arithcheck_num, target
2526 | checknum CARG1, target
2527 | checknum CARG2, target
2528 |.endmacro
2529 |
2530 |.macro ins_arithcheck_nzdiv, target
2531 | cbz CARG2w, target
2532 |.endmacro
2533 |
2534 |.macro ins_arithhead
2535 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2536 ||if (vk == 1) {
2537 | and RC, RC, #255
2538 | decode_RB RB, INS
2539 ||} else {
2540 | decode_RB RB, INS
2541 | and RC, RC, #255
2542 ||}
2543 |.endmacro
2544 |
2545 |.macro ins_arithload, reg1, reg2
2546 | // RA = dst, RB = src1, RC = src2 | num_const
2547 ||switch (vk) {
2548 ||case 0:
2549 | ldr reg1, [BASE, RB, lsl #3]
2550 | ldr reg2, [KBASE, RC, lsl #3]
2551 || break;
2552 ||case 1:
2553 | ldr reg1, [KBASE, RC, lsl #3]
2554 | ldr reg2, [BASE, RB, lsl #3]
2555 || break;
2556 ||default:
2557 | ldr reg1, [BASE, RB, lsl #3]
2558 | ldr reg2, [BASE, RC, lsl #3]
2559 || break;
2560 ||}
2561 |.endmacro
2562 |
2563 |.macro ins_arithfallback, ins
2564 ||switch (vk) {
2565 ||case 0:
2566 | ins ->vmeta_arith_vn
2567 || break;
2568 ||case 1:
2569 | ins ->vmeta_arith_nv
2570 || break;
2571 ||default:
2572 | ins ->vmeta_arith_vv
2573 || break;
2574 ||}
2575 |.endmacro
2576 |
2577 |.macro ins_arithmod, res, reg1, reg2
2578 | fdiv d2, reg1, reg2
2579 | frintm d2, d2
2580 | fmsub res, d2, reg2, reg1
2581 |.endmacro
2582 |
2583 |.macro ins_arithdn, intins, fpins
2584 | ins_arithhead
2585 | ins_arithload CARG1, CARG2
2586 | ins_arithcheck_int >5
2587 |.if "intins" == "smull"
2588 | smull CARG1, CARG1w, CARG2w
2589 | cmp CARG1, CARG1, sxtw
2590 | mov CARG1w, CARG1w
2591 | ins_arithfallback bne
2592 |.elif "intins" == "ins_arithmodi"
2593 | ins_arithfallback ins_arithcheck_nzdiv
2594 | bl ->vm_modi
2595 |.else
2596 | intins CARG1w, CARG1w, CARG2w
2597 | ins_arithfallback bvs
2598 |.endif
2599 | add CARG1, CARG1, TISNUM
2600 | str CARG1, [BASE, RA, lsl #3]
2601 |4:
2602 | ins_next
2603 |
2604 |5: // FP variant.
2605 | ins_arithload FARG1, FARG2
2606 | ins_arithfallback ins_arithcheck_num
2607 | fpins FARG1, FARG1, FARG2
2608 | str FARG1, [BASE, RA, lsl #3]
2609 | b <4
2610 |.endmacro
2611 |
2612 |.macro ins_arithfp, fpins
2613 | ins_arithhead
2614 | ins_arithload CARG1, CARG2
2615 | ins_arithload FARG1, FARG2
2616 | ins_arithfallback ins_arithcheck_num
2617 |.if "fpins" == "fpow"
2618 | bl extern pow
2619 |.else
2620 | fpins FARG1, FARG1, FARG2
2621 |.endif
2622 | str FARG1, [BASE, RA, lsl #3]
2623 | ins_next
2624 |.endmacro
2625
2626 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2627 | ins_arithdn adds, fadd
2628 break;
2629 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2630 | ins_arithdn subs, fsub
2631 break;
2632 case BC_MULVN: case BC_MULNV: case BC_MULVV:
2633 | ins_arithdn smull, fmul
2634 break;
2635 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
2636 | ins_arithfp fdiv
2637 break;
2638 case BC_MODVN: case BC_MODNV: case BC_MODVV:
2639 | ins_arithdn ins_arithmodi, ins_arithmod
2640 break;
2641 case BC_POW:
2642 | // NYI: (partial) integer arithmetic.
2643 | ins_arithfp fpow
2644 break;
2645
2646 case BC_CAT:
2647 | decode_RB RB, INS
2648 | and RC, RC, #255
2649 | // RA = dst, RB = src_start, RC = src_end
2650 | str BASE, L->base
2651 | sub CARG3, RC, RB
2652 | add CARG2, BASE, RC, lsl #3
2653 |->BC_CAT_Z:
2654 | // RA = dst, CARG2 = top-1, CARG3 = left
2655 | mov CARG1, L
2656 | str PC, SAVE_PC
2657 | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
2658 | // Returns NULL (finished) or TValue * (metamethod).
2659 | ldrb RBw, [PC, #-4+OFS_RB]
2660 | ldr BASE, L->base
2661 | cbnz CRET1, ->vmeta_binop
2662 | ldr TMP0, [BASE, RB, lsl #3]
2663 | str TMP0, [BASE, RA, lsl #3] // Copy result to RA.
2664 | ins_next
2665 break;
2666
2667 /* -- Constant ops ------------------------------------------------------ */
2668
2669 case BC_KSTR:
2670 | // RA = dst, RC = str_const (~)
2671 | mvn RC, RC
2672 | ldr TMP0, [KBASE, RC, lsl #3]
2673 | movn TMP1, #~LJ_TSTR
2674 | add TMP0, TMP0, TMP1, lsl #47
2675 | str TMP0, [BASE, RA, lsl #3]
2676 | ins_next
2677 break;
2678 case BC_KCDATA:
2679 |.if FFI
2680 | // RA = dst, RC = cdata_const (~)
2681 | mvn RC, RC
2682 | ldr TMP0, [KBASE, RC, lsl #3]
2683 | movn TMP1, #~LJ_TCDATA
2684 | add TMP0, TMP0, TMP1, lsl #47
2685 | str TMP0, [BASE, RA, lsl #3]
2686 | ins_next
2687 |.endif
2688 break;
2689 case BC_KSHORT:
2690 | // RA = dst, RC = int16_literal
2691 | sxth RCw, RCw
2692 | add TMP0, RC, TISNUM
2693 | str TMP0, [BASE, RA, lsl #3]
2694 | ins_next
2695 break;
2696 case BC_KNUM:
2697 | // RA = dst, RC = num_const
2698 | ldr TMP0, [KBASE, RC, lsl #3]
2699 | str TMP0, [BASE, RA, lsl #3]
2700 | ins_next
2701 break;
2702 case BC_KPRI:
2703 | // RA = dst, RC = primitive_type (~)
2704 | mvn TMP0, RC, lsl #47
2705 | str TMP0, [BASE, RA, lsl #3]
2706 | ins_next
2707 break;
2708 case BC_KNIL:
2709 | // RA = base, RC = end
2710 | add RA, BASE, RA, lsl #3
2711 | add RC, BASE, RC, lsl #3
2712 | str TISNIL, [RA], #8
2713 |1:
2714 | cmp RA, RC
2715 | str TISNIL, [RA], #8
2716 | blt <1
2717 | ins_next_
2718 break;
2719
2720 /* -- Upvalue and function ops ------------------------------------------ */
2721
2722 case BC_UGET:
2723 | // RA = dst, RC = uvnum
2724 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2725 | add RC, RC, #offsetof(GCfuncL, uvptr)/8
2726 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2727 | ldr UPVAL:CARG2, [LFUNC:CARG2, RC, lsl #3]
2728 | ldr CARG2, UPVAL:CARG2->v
2729 | ldr TMP0, [CARG2]
2730 | str TMP0, [BASE, RA, lsl #3]
2731 | ins_next
2732 break;
2733 case BC_USETV:
2734 | // RA = uvnum, RC = src
2735 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2736 | add RA, RA, #offsetof(GCfuncL, uvptr)/8
2737 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2738 | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3]
2739 | ldr CARG3, [BASE, RC, lsl #3]
2740 | ldr CARG2, UPVAL:CARG1->v
2741 | ldrb TMP2w, UPVAL:CARG1->marked
2742 | ldrb TMP0w, UPVAL:CARG1->closed
2743 | asr ITYPE, CARG3, #47
2744 | str CARG3, [CARG2]
2745 | add ITYPE, ITYPE, #-LJ_TISGCV
2746 | tst TMP2w, #LJ_GC_BLACK // isblack(uv)
2747 | ccmp TMP0w, #0, #4, ne // && uv->closed
2748 | ccmn ITYPE, #-(LJ_TNUMX - LJ_TISGCV), #0, ne // && tvisgcv(v)
2749 | bhi >2
2750 |1:
2751 | ins_next
2752 |
2753 |2: // Check if new value is white.
2754 | and GCOBJ:CARG3, CARG3, #LJ_GCVMASK
2755 | ldrb TMP1w, GCOBJ:CARG3->gch.marked
2756 | tst TMP1w, #LJ_GC_WHITES // iswhite(str)
2757 | beq <1
2758 | // Crossed a write barrier. Move the barrier forward.
2759 | mov CARG1, GL
2760 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
2761 | b <1
2762 break;
2763 case BC_USETS:
2764 | // RA = uvnum, RC = str_const (~)
2765 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2766 | add RA, RA, #offsetof(GCfuncL, uvptr)/8
2767 | mvn RC, RC
2768 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2769 | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3]
2770 | ldr STR:CARG3, [KBASE, RC, lsl #3]
2771 | movn TMP0, #~LJ_TSTR
2772 | ldr CARG2, UPVAL:CARG1->v
2773 | ldrb TMP2w, UPVAL:CARG1->marked
2774 | add TMP0, STR:CARG3, TMP0, lsl #47
2775 | ldrb TMP1w, STR:CARG3->marked
2776 | str TMP0, [CARG2]
2777 | tbnz TMP2w, #2, >2 // isblack(uv)
2778 |1:
2779 | ins_next
2780 |
2781 |2: // Check if string is white and ensure upvalue is closed.
2782 | ldrb TMP0w, UPVAL:CARG1->closed
2783 | tst TMP1w, #LJ_GC_WHITES // iswhite(str)
2784 | ccmp TMP0w, #0, #4, ne
2785 | beq <1
2786 | // Crossed a write barrier. Move the barrier forward.
2787 | mov CARG1, GL
2788 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
2789 | b <1
2790 break;
2791 case BC_USETN:
2792 | // RA = uvnum, RC = num_const
2793 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2794 | add RA, RA, #offsetof(GCfuncL, uvptr)/8
2795 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2796 | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3]
2797 | ldr TMP0, [KBASE, RC, lsl #3]
2798 | ldr CARG2, UPVAL:CARG2->v
2799 | str TMP0, [CARG2]
2800 | ins_next
2801 break;
2802 case BC_USETP:
2803 | // RA = uvnum, RC = primitive_type (~)
2804 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2805 | add RA, RA, #offsetof(GCfuncL, uvptr)/8
2806 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2807 | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3]
2808 | mvn TMP0, RC, lsl #47
2809 | ldr CARG2, UPVAL:CARG2->v
2810 | str TMP0, [CARG2]
2811 | ins_next
2812 break;
2813
2814 case BC_UCLO:
2815 | // RA = level, RC = target
2816 | ldr CARG3, L->openupval
2817 | add RC, PC, RC, lsl #2
2818 | str BASE, L->base
2819 | sub PC, RC, #0x20000
2820 | cbz CARG3, >1
2821 | mov CARG1, L
2822 | add CARG2, BASE, RA, lsl #3
2823 | bl extern lj_func_closeuv // (lua_State *L, TValue *level)
2824 | ldr BASE, L->base
2825 |1:
2826 | ins_next
2827 break;
2828
2829 case BC_FNEW:
2830 | // RA = dst, RC = proto_const (~) (holding function prototype)
2831 | mvn RC, RC
2832 | str BASE, L->base
2833 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
2834 | str PC, SAVE_PC
2835 | ldr CARG2, [KBASE, RC, lsl #3]
2836 | mov CARG1, L
2837 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
2838 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
2839 | bl extern lj_func_newL_gc
2840 | // Returns GCfuncL *.
2841 | ldr BASE, L->base
2842 | movn TMP0, #~LJ_TFUNC
2843 | add CRET1, CRET1, TMP0, lsl #47
2844 | str CRET1, [BASE, RA, lsl #3]
2845 | ins_next
2846 break;
2847
2848 /* -- Table ops --------------------------------------------------------- */
2849
2850 case BC_TNEW:
2851 case BC_TDUP:
2852 | // RA = dst, RC = (hbits|asize) | tab_const (~)
2853 | ldp CARG3, CARG4, GL->gc.total // Assumes threshold follows total.
2854 | str BASE, L->base
2855 | str PC, SAVE_PC
2856 | mov CARG1, L
2857 | cmp CARG3, CARG4
2858 | bhs >5
2859 |1:
2860 if (op == BC_TNEW) {
2861 | and CARG2, RC, #0x7ff
2862 | lsr CARG3, RC, #11
2863 | cmp CARG2, #0x7ff
2864 | mov TMP0, #0x801
2865 | csel CARG2, CARG2, TMP0, ne
2866 | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
2867 | // Returns GCtab *.
2868 } else {
2869 | mvn RC, RC
2870 | ldr CARG2, [KBASE, RC, lsl #3]
2871 | bl extern lj_tab_dup // (lua_State *L, Table *kt)
2872 | // Returns GCtab *.
2873 }
2874 | ldr BASE, L->base
2875 | movk CRET1, #(LJ_TTAB>>1)&0xffff, lsl #48
2876 | str CRET1, [BASE, RA, lsl #3]
2877 | ins_next
2878 |
2879 |5:
2880 | bl extern lj_gc_step_fixtop // (lua_State *L)
2881 | mov CARG1, L
2882 | b <1
2883 break;
2884
2885 case BC_GGET:
2886 | // RA = dst, RC = str_const (~)
2887 case BC_GSET:
2888 | // RA = dst, RC = str_const (~)
2889 | ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
2890 | mvn RC, RC
2891 | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
2892 | ldr TAB:CARG2, LFUNC:CARG1->env
2893 | ldr STR:RC, [KBASE, RC, lsl #3]
2894 if (op == BC_GGET) {
2895 | b ->BC_TGETS_Z
2896 } else {
2897 | b ->BC_TSETS_Z
2898 }
2899 break;
2900
2901 case BC_TGETV:
2902 | decode_RB RB, INS
2903 | and RC, RC, #255
2904 | // RA = dst, RB = table, RC = key
2905 | ldr CARG2, [BASE, RB, lsl #3]
2906 | ldr TMP1, [BASE, RC, lsl #3]
2907 | checktab CARG2, ->vmeta_tgetv
2908 | checkint TMP1, >9 // Integer key?
2909 | ldr CARG3, TAB:CARG2->array
2910 | ldr CARG1w, TAB:CARG2->asize
2911 | add CARG3, CARG3, TMP1, uxtw #3
2912 | cmp TMP1w, CARG1w // In array part?
2913 | bhs ->vmeta_tgetv
2914 | ldr TMP0, [CARG3]
2915 | cmp TMP0, TISNIL
2916 | beq >5
2917 |1:
2918 | str TMP0, [BASE, RA, lsl #3]
2919 | ins_next
2920 |
2921 |5: // Check for __index if table value is nil.
2922 | ldr TAB:CARG1, TAB:CARG2->metatable
2923 | cbz TAB:CARG1, <1 // No metatable: done.
2924 | ldrb TMP1w, TAB:CARG1->nomm
2925 | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done.
2926 | b ->vmeta_tgetv
2927 |
2928 |9:
2929 | asr ITYPE, TMP1, #47
2930 | cmn ITYPE, #-LJ_TSTR // String key?
2931 | bne ->vmeta_tgetv
2932 | and STR:RC, TMP1, #LJ_GCVMASK
2933 | b ->BC_TGETS_Z
2934 break;
2935 case BC_TGETS:
2936 | decode_RB RB, INS
2937 | and RC, RC, #255
2938 | // RA = dst, RB = table, RC = str_const (~)
2939 | ldr CARG2, [BASE, RB, lsl #3]
2940 | mvn RC, RC
2941 | ldr STR:RC, [KBASE, RC, lsl #3]
2942 | checktab CARG2, ->vmeta_tgets1
2943 |->BC_TGETS_Z:
2944 | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = dst
2945 | ldr TMP1w, TAB:CARG2->hmask
2946 | ldr TMP2w, STR:RC->sid
2947 | ldr NODE:CARG3, TAB:CARG2->node
2948 | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask
2949 | add TMP1, TMP1, TMP1, lsl #1
2950 | movn CARG4, #~LJ_TSTR
2951 | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
2952 | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for.
2953 |1:
2954 | ldp TMP0, CARG1, NODE:CARG3->val
2955 | ldr NODE:CARG3, NODE:CARG3->next
2956 | cmp CARG1, CARG4
2957 | bne >4
2958 | cmp TMP0, TISNIL
2959 | beq >5
2960 |3:
2961 | str TMP0, [BASE, RA, lsl #3]
2962 | ins_next
2963 |
2964 |4: // Follow hash chain.
2965 | cbnz NODE:CARG3, <1
2966 | // End of hash chain: key not found, nil result.
2967 | mov TMP0, TISNIL
2968 |
2969 |5: // Check for __index if table value is nil.
2970 | ldr TAB:CARG1, TAB:CARG2->metatable
2971 | cbz TAB:CARG1, <3 // No metatable: done.
2972 | ldrb TMP1w, TAB:CARG1->nomm
2973 | tbnz TMP1w, #MM_index, <3 // 'no __index' flag set: done.
2974 | b ->vmeta_tgets
2975 break;
2976 case BC_TGETB:
2977 | decode_RB RB, INS
2978 | and RC, RC, #255
2979 | // RA = dst, RB = table, RC = index
2980 | ldr CARG2, [BASE, RB, lsl #3]
2981 | checktab CARG2, ->vmeta_tgetb
2982 | ldr CARG3, TAB:CARG2->array
2983 | ldr CARG1w, TAB:CARG2->asize
2984 | add CARG3, CARG3, RC, lsl #3
2985 | cmp RCw, CARG1w // In array part?
2986 | bhs ->vmeta_tgetb
2987 | ldr TMP0, [CARG3]
2988 | cmp TMP0, TISNIL
2989 | beq >5
2990 |1:
2991 | str TMP0, [BASE, RA, lsl #3]
2992 | ins_next
2993 |
2994 |5: // Check for __index if table value is nil.
2995 | ldr TAB:CARG1, TAB:CARG2->metatable
2996 | cbz TAB:CARG1, <1 // No metatable: done.
2997 | ldrb TMP1w, TAB:CARG1->nomm
2998 | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done.
2999 | b ->vmeta_tgetb
3000 break;
3001 case BC_TGETR:
3002 | decode_RB RB, INS
3003 | and RC, RC, #255
3004 | // RA = dst, RB = table, RC = key
3005 | ldr CARG1, [BASE, RB, lsl #3]
3006 | ldr TMP1, [BASE, RC, lsl #3]
3007 | and TAB:CARG1, CARG1, #LJ_GCVMASK
3008 | ldr CARG3, TAB:CARG1->array
3009 | ldr TMP2w, TAB:CARG1->asize
3010 | add CARG3, CARG3, TMP1w, uxtw #3
3011 | cmp TMP1w, TMP2w // In array part?
3012 | bhs ->vmeta_tgetr
3013 | ldr TMP0, [CARG3]
3014 |->BC_TGETR_Z:
3015 | str TMP0, [BASE, RA, lsl #3]
3016 | ins_next
3017 break;
3018
3019 case BC_TSETV:
3020 | decode_RB RB, INS
3021 | and RC, RC, #255
3022 | // RA = src, RB = table, RC = key
3023 | ldr CARG2, [BASE, RB, lsl #3]
3024 | ldr TMP1, [BASE, RC, lsl #3]
3025 | checktab CARG2, ->vmeta_tsetv
3026 | checkint TMP1, >9 // Integer key?
3027 | ldr CARG3, TAB:CARG2->array
3028 | ldr CARG1w, TAB:CARG2->asize
3029 | add CARG3, CARG3, TMP1, uxtw #3
3030 | cmp TMP1w, CARG1w // In array part?
3031 | bhs ->vmeta_tsetv
3032 | ldr TMP1, [CARG3]
3033 | ldr TMP0, [BASE, RA, lsl #3]
3034 | ldrb TMP2w, TAB:CARG2->marked
3035 | cmp TMP1, TISNIL // Previous value is nil?
3036 | beq >5
3037 |1:
3038 | str TMP0, [CARG3]
3039 | tbnz TMP2w, #2, >7 // isblack(table)
3040 |2:
3041 | ins_next
3042 |
3043 |5: // Check for __newindex if previous value is nil.
3044 | ldr TAB:CARG1, TAB:CARG2->metatable
3045 | cbz TAB:CARG1, <1 // No metatable: done.
3046 | ldrb TMP1w, TAB:CARG1->nomm
3047 | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done.
3048 | b ->vmeta_tsetv
3049 |
3050 |7: // Possible table write barrier for the value. Skip valiswhite check.
3051 | barrierback TAB:CARG2, TMP2w, TMP1
3052 | b <2
3053 |
3054 |9:
3055 | asr ITYPE, TMP1, #47
3056 | cmn ITYPE, #-LJ_TSTR // String key?
3057 | bne ->vmeta_tsetv
3058 | and STR:RC, TMP1, #LJ_GCVMASK
3059 | b ->BC_TSETS_Z
3060 break;
3061 case BC_TSETS:
3062 | decode_RB RB, INS
3063 | and RC, RC, #255
3064 | // RA = dst, RB = table, RC = str_const (~)
3065 | ldr CARG2, [BASE, RB, lsl #3]
3066 | mvn RC, RC
3067 | ldr STR:RC, [KBASE, RC, lsl #3]
3068 | checktab CARG2, ->vmeta_tsets1
3069 |->BC_TSETS_Z:
3070 | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = src
3071 | ldr TMP1w, TAB:CARG2->hmask
3072 | ldr TMP2w, STR:RC->sid
3073 | ldr NODE:CARG3, TAB:CARG2->node
3074 | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask
3075 | add TMP1, TMP1, TMP1, lsl #1
3076 | movn CARG4, #~LJ_TSTR
3077 | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
3078 | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for.
3079 | strb wzr, TAB:CARG2->nomm // Clear metamethod cache.
3080 |1:
3081 | ldp TMP1, CARG1, NODE:CARG3->val
3082 | ldr NODE:TMP3, NODE:CARG3->next
3083 | ldrb TMP2w, TAB:CARG2->marked
3084 | cmp CARG1, CARG4
3085 | bne >5
3086 | ldr TMP0, [BASE, RA, lsl #3]
3087 | cmp TMP1, TISNIL // Previous value is nil?
3088 | beq >4
3089 |2:
3090 | str TMP0, NODE:CARG3->val
3091 | tbnz TMP2w, #2, >7 // isblack(table)
3092 |3:
3093 | ins_next
3094 |
3095 |4: // Check for __newindex if previous value is nil.
3096 | ldr TAB:CARG1, TAB:CARG2->metatable
3097 | cbz TAB:CARG1, <2 // No metatable: done.
3098 | ldrb TMP1w, TAB:CARG1->nomm
3099 | tbnz TMP1w, #MM_newindex, <2 // 'no __newindex' flag set: done.
3100 | b ->vmeta_tsets
3101 |
3102 |5: // Follow hash chain.
3103 | mov NODE:CARG3, NODE:TMP3
3104 | cbnz NODE:TMP3, <1
3105 | // End of hash chain: key not found, add a new one.
3106 |
3107 | // But check for __newindex first.
3108 | ldr TAB:CARG1, TAB:CARG2->metatable
3109 | cbz TAB:CARG1, >6 // No metatable: continue.
3110 | ldrb TMP1w, TAB:CARG1->nomm
3111 | // 'no __newindex' flag NOT set: check.
3112 | tbz TMP1w, #MM_newindex, ->vmeta_tsets
3113 |6:
3114 | movn TMP1, #~LJ_TSTR
3115 | str PC, SAVE_PC
3116 | add TMP0, STR:RC, TMP1, lsl #47
3117 | str BASE, L->base
3118 | mov CARG1, L
3119 | str TMP0, TMPD
3120 | add CARG3, sp, TMPDofs
3121 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
3122 | // Returns TValue *.
3123 | ldr BASE, L->base
3124 | ldr TMP0, [BASE, RA, lsl #3]
3125 | str TMP0, [CRET1]
3126 | b <3 // No 2nd write barrier needed.
3127 |
3128 |7: // Possible table write barrier for the value. Skip valiswhite check.
3129 | barrierback TAB:CARG2, TMP2w, TMP1
3130 | b <3
3131 break;
3132 case BC_TSETB:
3133 | decode_RB RB, INS
3134 | and RC, RC, #255
3135 | // RA = src, RB = table, RC = index
3136 | ldr CARG2, [BASE, RB, lsl #3]
3137 | checktab CARG2, ->vmeta_tsetb
3138 | ldr CARG3, TAB:CARG2->array
3139 | ldr CARG1w, TAB:CARG2->asize
3140 | add CARG3, CARG3, RC, lsl #3
3141 | cmp RCw, CARG1w // In array part?
3142 | bhs ->vmeta_tsetb
3143 | ldr TMP1, [CARG3]
3144 | ldr TMP0, [BASE, RA, lsl #3]
3145 | ldrb TMP2w, TAB:CARG2->marked
3146 | cmp TMP1, TISNIL // Previous value is nil?
3147 | beq >5
3148 |1:
3149 | str TMP0, [CARG3]
3150 | tbnz TMP2w, #2, >7 // isblack(table)
3151 |2:
3152 | ins_next
3153 |
3154 |5: // Check for __newindex if previous value is nil.
3155 | ldr TAB:CARG1, TAB:CARG2->metatable
3156 | cbz TAB:CARG1, <1 // No metatable: done.
3157 | ldrb TMP1w, TAB:CARG1->nomm
3158 | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done.
3159 | b ->vmeta_tsetb
3160 |
3161 |7: // Possible table write barrier for the value. Skip valiswhite check.
3162 | barrierback TAB:CARG2, TMP2w, TMP1
3163 | b <2
3164 break;
3165 case BC_TSETR:
3166 | decode_RB RB, INS
3167 | and RC, RC, #255
3168 | // RA = src, RB = table, RC = key
3169 | ldr CARG2, [BASE, RB, lsl #3]
3170 | ldr TMP1, [BASE, RC, lsl #3]
3171 | and TAB:CARG2, CARG2, #LJ_GCVMASK
3172 | ldr CARG1, TAB:CARG2->array
3173 | ldrb TMP2w, TAB:CARG2->marked
3174 | ldr CARG4w, TAB:CARG2->asize
3175 | add CARG1, CARG1, TMP1, uxtw #3
3176 | tbnz TMP2w, #2, >7 // isblack(table)
3177 |2:
3178 | cmp TMP1w, CARG4w // In array part?
3179 | bhs ->vmeta_tsetr
3180 |->BC_TSETR_Z:
3181 | ldr TMP0, [BASE, RA, lsl #3]
3182 | str TMP0, [CARG1]
3183 | ins_next
3184 |
3185 |7: // Possible table write barrier for the value. Skip valiswhite check.
3186 | barrierback TAB:CARG2, TMP2w, TMP0
3187 | b <2
3188 break;
3189
3190 case BC_TSETM:
3191 | // RA = base (table at base-1), RC = num_const (start index)
3192 | add RA, BASE, RA, lsl #3
3193 |1:
3194 | ldr RBw, SAVE_MULTRES
3195 | ldr TAB:CARG2, [RA, #-8] // Guaranteed to be a table.
3196 | ldr TMP1, [KBASE, RC, lsl #3] // Integer constant is in lo-word.
3197 | sub RB, RB, #8
3198 | cbz RB, >4 // Nothing to copy?
3199 | and TAB:CARG2, CARG2, #LJ_GCVMASK
3200 | ldr CARG1w, TAB:CARG2->asize
3201 | add CARG3w, TMP1w, RBw, lsr #3
3202 | ldr CARG4, TAB:CARG2->array
3203 | cmp CARG3, CARG1
3204 | add RB, RA, RB
3205 | bhi >5
3206 | add TMP1, CARG4, TMP1w, uxtw #3
3207 | ldrb TMP2w, TAB:CARG2->marked
3208 |3: // Copy result slots to table.
3209 | ldr TMP0, [RA], #8
3210 | str TMP0, [TMP1], #8
3211 | cmp RA, RB
3212 | blo <3
3213 | tbnz TMP2w, #2, >7 // isblack(table)
3214 |4:
3215 | ins_next
3216 |
3217 |5: // Need to resize array part.
3218 | str BASE, L->base
3219 | mov CARG1, L
3220 | str PC, SAVE_PC
3221 | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
3222 | // Must not reallocate the stack.
3223 | b <1
3224 |
3225 |7: // Possible table write barrier for any value. Skip valiswhite check.
3226 | barrierback TAB:CARG2, TMP2w, TMP1
3227 | b <4
3228 break;
3229
3230 /* -- Calls and vararg handling ----------------------------------------- */
3231
3232 case BC_CALLM:
3233 | // RA = base, (RB = nresults+1,) RC = extra_nargs
3234 | ldr TMP0w, SAVE_MULTRES
3235 | decode_RC8RD NARGS8:RC, RC
3236 | add NARGS8:RC, NARGS8:RC, TMP0
3237 | b ->BC_CALL_Z
3238 break;
3239 case BC_CALL:
3240 | decode_RC8RD NARGS8:RC, RC
3241 | // RA = base, (RB = nresults+1,) RC = (nargs+1)*8
3242 |->BC_CALL_Z:
3243 | mov RB, BASE // Save old BASE for vmeta_call.
3244 | add BASE, BASE, RA, lsl #3
3245 | ldr CARG3, [BASE]
3246 | sub NARGS8:RC, NARGS8:RC, #8
3247 | add BASE, BASE, #16
3248 | checkfunc CARG3, ->vmeta_call
3249 | ins_call
3250 break;
3251
3252 case BC_CALLMT:
3253 | // RA = base, (RB = 0,) RC = extra_nargs
3254 | ldr TMP0w, SAVE_MULTRES
3255 | add NARGS8:RC, TMP0, RC, lsl #3
3256 | b ->BC_CALLT1_Z
3257 break;
3258 case BC_CALLT:
3259 | lsl NARGS8:RC, RC, #3
3260 | // RA = base, (RB = 0,) RC = (nargs+1)*8
3261 |->BC_CALLT1_Z:
3262 | add RA, BASE, RA, lsl #3
3263 | ldr TMP1, [RA]
3264 | sub NARGS8:RC, NARGS8:RC, #8
3265 | add RA, RA, #16
3266 | checktp CARG3, TMP1, LJ_TFUNC, ->vmeta_callt
3267 | ldr PC, [BASE, FRAME_PC]
3268 |->BC_CALLT2_Z:
3269 | mov RB, #0
3270 | ldrb TMP2w, LFUNC:CARG3->ffid
3271 | tst PC, #FRAME_TYPE
3272 | bne >7
3273 |1:
3274 | str TMP1, [BASE, FRAME_FUNC] // Copy function down, but keep PC.
3275 | cbz NARGS8:RC, >3
3276 |2:
3277 | ldr TMP0, [RA, RB]
3278 | add TMP1, RB, #8
3279 | cmp TMP1, NARGS8:RC
3280 | str TMP0, [BASE, RB]
3281 | mov RB, TMP1
3282 | bne <2
3283 |3:
3284 | cmp TMP2, #1 // (> FF_C) Calling a fast function?
3285 | bhi >5
3286 |4:
3287 | ins_callt
3288 |
3289 |5: // Tailcall to a fast function with a Lua frame below.
3290 | ldrb RAw, [PC, #-4+OFS_RA]
3291 | sub CARG1, BASE, RA, lsl #3
3292 | ldr LFUNC:CARG1, [CARG1, #-32]
3293 | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
3294 | ldr CARG1, LFUNC:CARG1->pc
3295 | ldr KBASE, [CARG1, #PC2PROTO(k)]
3296 | b <4
3297 |
3298 |7: // Tailcall from a vararg function.
3299 | eor PC, PC, #FRAME_VARG
3300 | tst PC, #FRAME_TYPEP // Vararg frame below?
3301 | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below.
3302 | bne <1
3303 | sub BASE, BASE, PC
3304 | ldr PC, [BASE, FRAME_PC]
3305 | tst PC, #FRAME_TYPE
3306 | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below.
3307 | b <1
3308 break;
3309
3310 case BC_ITERC:
3311 | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
3312 | add RA, BASE, RA, lsl #3
3313 | ldr CARG3, [RA, #-24]
3314 | mov RB, BASE // Save old BASE for vmeta_call.
3315 | ldp CARG1, CARG2, [RA, #-16]
3316 | add BASE, RA, #16
3317 | mov NARGS8:RC, #16 // Iterators get 2 arguments.
3318 | str CARG3, [RA] // Copy callable.
3319 | stp CARG1, CARG2, [RA, #16] // Copy state and control var.
3320 | checkfunc CARG3, ->vmeta_call
3321 | ins_call
3322 break;
3323
3324 case BC_ITERN:
3325 | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
3326 |.if JIT
3327 | // NYI: add hotloop, record BC_ITERN.
3328 |.endif
3329 | add RA, BASE, RA, lsl #3
3330 | ldr TAB:RB, [RA, #-16]
3331 | ldrh TMP3w, [PC, # OFS_RD]
3332 | ldr CARG1w, [RA, #-8+LO] // Get index from control var.
3333 | add PC, PC, #4
3334 | add TMP3, PC, TMP3, lsl #2
3335 | and TAB:RB, RB, #LJ_GCVMASK
3336 | sub TMP3, TMP3, #0x20000
3337 | ldr TMP1w, TAB:RB->asize
3338 | ldr CARG2, TAB:RB->array
3339 |1: // Traverse array part.
3340 | subs RC, CARG1, TMP1
3341 | add CARG3, CARG2, CARG1, lsl #3
3342 | bhs >5 // Index points after array part?
3343 | ldr TMP0, [CARG3]
3344 | cmp TMP0, TISNIL
3345 | cinc CARG1, CARG1, eq // Skip holes in array part.
3346 | beq <1
3347 | add CARG1, CARG1, TISNUM
3348 | stp CARG1, TMP0, [RA]
3349 | add CARG1, CARG1, #1
3350 |3:
3351 | str CARG1w, [RA, #-8+LO] // Update control var.
3352 | mov PC, TMP3
3353 |4:
3354 | ins_next
3355 |
3356 |5: // Traverse hash part.
3357 | ldr TMP2w, TAB:RB->hmask
3358 | ldr NODE:RB, TAB:RB->node
3359 |6:
3360 | add CARG1, RC, RC, lsl #1
3361 | cmp RC, TMP2 // End of iteration? Branch to ITERN+1.
3362 | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8
3363 | bhi <4
3364 | ldp TMP0, CARG1, NODE:CARG3->val
3365 | cmp TMP0, TISNIL
3366 | add RC, RC, #1
3367 | beq <6 // Skip holes in hash part.
3368 | stp CARG1, TMP0, [RA]
3369 | add CARG1, RC, TMP1
3370 | b <3
3371 break;
3372
3373 case BC_ISNEXT:
3374 | // RA = base, RC = target (points to ITERN)
3375 | add RA, BASE, RA, lsl #3
3376 | ldr CFUNC:CARG1, [RA, #-24]
3377 | add RC, PC, RC, lsl #2
3378 | ldp TAB:CARG3, CARG4, [RA, #-16]
3379 | sub RC, RC, #0x20000
3380 | checkfunc CFUNC:CARG1, >5
3381 | asr TMP0, TAB:CARG3, #47
3382 | ldrb TMP1w, CFUNC:CARG1->ffid
3383 | cmn TMP0, #-LJ_TTAB
3384 | ccmp CARG4, TISNIL, #0, eq
3385 | ccmp TMP1w, #FF_next_N, #0, eq
3386 | bne >5
3387 | mov TMP0w, #0xfffe7fff
3388 | lsl TMP0, TMP0, #32
3389 | str TMP0, [RA, #-8] // Initialize control var.
3390 |1:
3391 | mov PC, RC
3392 | ins_next
3393 |
3394 |5: // Despecialize bytecode if any of the checks fail.
3395 | mov TMP0, #BC_JMP
3396 | mov TMP1, #BC_ITERC
3397 | strb TMP0w, [PC, #-4+OFS_OP]
3398 | strb TMP1w, [RC, # OFS_OP]
3399 | b <1
3400 break;
3401
3402 case BC_VARG:
3403 | decode_RB RB, INS
3404 | and RC, RC, #255
3405 | // RA = base, RB = (nresults+1), RC = numparams
3406 | ldr TMP1, [BASE, FRAME_PC]
3407 | add RC, BASE, RC, lsl #3
3408 | add RA, BASE, RA, lsl #3
3409 | add RC, RC, #FRAME_VARG
3410 | add TMP2, RA, RB, lsl #3
3411 | sub RC, RC, TMP1 // RC = vbase
3412 | // Note: RC may now be even _above_ BASE if nargs was < numparams.
3413 | sub TMP3, BASE, #16 // TMP3 = vtop
3414 | cbz RB, >5
3415 | sub TMP2, TMP2, #16
3416 |1: // Copy vararg slots to destination slots.
3417 | cmp RC, TMP3
3418 | ldr TMP0, [RC], #8
3419 | csel TMP0, TMP0, TISNIL, lo
3420 | cmp RA, TMP2
3421 | str TMP0, [RA], #8
3422 | blo <1
3423 |2:
3424 | ins_next
3425 |
3426 |5: // Copy all varargs.
3427 | ldr TMP0, L->maxstack
3428 | subs TMP2, TMP3, RC
3429 | csel RB, xzr, TMP2, le // MULTRES = (max(vtop-vbase,0)+1)*8
3430 | add RB, RB, #8
3431 | add TMP1, RA, TMP2
3432 | str RBw, SAVE_MULTRES
3433 | ble <2 // Nothing to copy.
3434 | cmp TMP1, TMP0
3435 | bhi >7
3436 |6:
3437 | ldr TMP0, [RC], #8
3438 | str TMP0, [RA], #8
3439 | cmp RC, TMP3
3440 | blo <6
3441 | b <2
3442 |
3443 |7: // Grow stack for varargs.
3444 | lsr CARG2, TMP2, #3
3445 | stp BASE, RA, L->base
3446 | mov CARG1, L
3447 | sub RC, RC, BASE // Need delta, because BASE may change.
3448 | str PC, SAVE_PC
3449 | bl extern lj_state_growstack // (lua_State *L, int n)
3450 | ldp BASE, RA, L->base
3451 | add RC, BASE, RC
3452 | sub TMP3, BASE, #16
3453 | b <6
3454 break;
3455
3456 /* -- Returns ----------------------------------------------------------- */
3457
3458 case BC_RETM:
3459 | // RA = results, RC = extra results
3460 | ldr TMP0w, SAVE_MULTRES
3461 | ldr PC, [BASE, FRAME_PC]
3462 | add RA, BASE, RA, lsl #3
3463 | add RC, TMP0, RC, lsl #3
3464 | b ->BC_RETM_Z
3465 break;
3466
3467 case BC_RET:
3468 | // RA = results, RC = nresults+1
3469 | ldr PC, [BASE, FRAME_PC]
3470 | lsl RC, RC, #3
3471 | add RA, BASE, RA, lsl #3
3472 |->BC_RETM_Z:
3473 | str RCw, SAVE_MULTRES
3474 |1:
3475 | ands CARG1, PC, #FRAME_TYPE
3476 | eor CARG2, PC, #FRAME_VARG
3477 | bne ->BC_RETV2_Z
3478 |
3479 |->BC_RET_Z:
3480 | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return
3481 | ldr INSw, [PC, #-4]
3482 | subs TMP1, RC, #8
3483 | sub CARG3, BASE, #16
3484 | beq >3
3485 |2:
3486 | ldr TMP0, [RA], #8
3487 | add BASE, BASE, #8
3488 | sub TMP1, TMP1, #8
3489 | str TMP0, [BASE, #-24]
3490 | cbnz TMP1, <2
3491 |3:
3492 | decode_RA RA, INS
3493 | sub CARG4, CARG3, RA, lsl #3
3494 | decode_RB RB, INS
3495 | ldr LFUNC:CARG1, [CARG4, FRAME_FUNC]
3496 |5:
3497 | cmp RC, RB, lsl #3 // More results expected?
3498 | blo >6
3499 | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
3500 | mov BASE, CARG4
3501 | ldr CARG2, LFUNC:CARG1->pc
3502 | ldr KBASE, [CARG2, #PC2PROTO(k)]
3503 | ins_next
3504 |
3505 |6: // Fill up results with nil.
3506 | add BASE, BASE, #8
3507 | add RC, RC, #8
3508 | str TISNIL, [BASE, #-24]
3509 | b <5
3510 |
3511 |->BC_RETV1_Z: // Non-standard return case.
3512 | add RA, BASE, RA, lsl #3
3513 |->BC_RETV2_Z:
3514 | tst CARG2, #FRAME_TYPEP
3515 | bne ->vm_return
3516 | // Return from vararg function: relocate BASE down.
3517 | sub BASE, BASE, CARG2
3518 | ldr PC, [BASE, FRAME_PC]
3519 | b <1
3520 break;
3521
3522 case BC_RET0: case BC_RET1:
3523 | // RA = results, RC = nresults+1
3524 | ldr PC, [BASE, FRAME_PC]
3525 | lsl RC, RC, #3
3526 | str RCw, SAVE_MULTRES
3527 | ands CARG1, PC, #FRAME_TYPE
3528 | eor CARG2, PC, #FRAME_VARG
3529 | bne ->BC_RETV1_Z
3530 | ldr INSw, [PC, #-4]
3531 if (op == BC_RET1) {
3532 | ldr TMP0, [BASE, RA, lsl #3]
3533 }
3534 | sub CARG4, BASE, #16
3535 | decode_RA RA, INS
3536 | sub BASE, CARG4, RA, lsl #3
3537 if (op == BC_RET1) {
3538 | str TMP0, [CARG4], #8
3539 }
3540 | decode_RB RB, INS
3541 | ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
3542 |5:
3543 | cmp RC, RB, lsl #3
3544 | blo >6
3545 | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
3546 | ldr CARG2, LFUNC:CARG1->pc
3547 | ldr KBASE, [CARG2, #PC2PROTO(k)]
3548 | ins_next
3549 |
3550 |6: // Fill up results with nil.
3551 | add RC, RC, #8
3552 | str TISNIL, [CARG4], #8
3553 | b <5
3554 break;
3555
3556 /* -- Loops and branches ------------------------------------------------ */
3557
3558 |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4]
3559 |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12]
3560 |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20]
3561 |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28]
3562
3563 case BC_FORL:
3564 |.if JIT
3565 | hotloop
3566 |.endif
3567 | // Fall through. Assumes BC_IFORL follows.
3568 break;
3569
3570 case BC_JFORI:
3571 case BC_JFORL:
3572#if !LJ_HASJIT
3573 break;
3574#endif
3575 case BC_FORI:
3576 case BC_IFORL:
3577 | // RA = base, RC = target (after end of loop or start of loop)
3578 vk = (op == BC_IFORL || op == BC_JFORL);
3579 | add RA, BASE, RA, lsl #3
3580 | ldp CARG1, CARG2, FOR_IDX // CARG1 = IDX, CARG2 = STOP
3581 | ldr CARG3, FOR_STEP // CARG3 = STEP
3582 if (op != BC_JFORL) {
3583 | add RC, PC, RC, lsl #2
3584 | sub RC, RC, #0x20000
3585 }
3586 | checkint CARG1, >5
3587 if (!vk) {
3588 | checkint CARG2, ->vmeta_for
3589 | checkint CARG3, ->vmeta_for
3590 | tbnz CARG3w, #31, >4
3591 | cmp CARG1w, CARG2w
3592 } else {
3593 | adds CARG1w, CARG1w, CARG3w
3594 | bvs >2
3595 | add TMP0, CARG1, TISNUM
3596 | tbnz CARG3w, #31, >4
3597 | cmp CARG1w, CARG2w
3598 }
3599 |1:
3600 if (op == BC_FORI) {
3601 | csel PC, RC, PC, gt
3602 } else if (op == BC_JFORI) {
3603 | mov PC, RC
3604 | ldrh RCw, [RC, #-4+OFS_RD]
3605 } else if (op == BC_IFORL) {
3606 | csel PC, RC, PC, le
3607 }
3608 if (vk) {
3609 | str TMP0, FOR_IDX
3610 | str TMP0, FOR_EXT
3611 } else {
3612 | str CARG1, FOR_EXT
3613 }
3614 if (op == BC_JFORI || op == BC_JFORL) {
3615 | ble =>BC_JLOOP
3616 }
3617 |2:
3618 | ins_next
3619 |
3620 |4: // Invert check for negative step.
3621 | cmp CARG2w, CARG1w
3622 | b <1
3623 |
3624 |5: // FP loop.
3625 | ldp d0, d1, FOR_IDX
3626 | blo ->vmeta_for
3627 if (!vk) {
3628 | checknum CARG2, ->vmeta_for
3629 | checknum CARG3, ->vmeta_for
3630 | str d0, FOR_EXT
3631 } else {
3632 | ldr d2, FOR_STEP
3633 | fadd d0, d0, d2
3634 }
3635 | tbnz CARG3, #63, >7
3636 | fcmp d0, d1
3637 |6:
3638 if (vk) {
3639 | str d0, FOR_IDX
3640 | str d0, FOR_EXT
3641 }
3642 if (op == BC_FORI) {
3643 | csel PC, RC, PC, hi
3644 } else if (op == BC_JFORI) {
3645 | ldrh RCw, [RC, #-4+OFS_RD]
3646 | bls =>BC_JLOOP
3647 } else if (op == BC_IFORL) {
3648 | csel PC, RC, PC, ls
3649 } else {
3650 | bls =>BC_JLOOP
3651 }
3652 | b <2
3653 |
3654 |7: // Invert check for negative step.
3655 | fcmp d1, d0
3656 | b <6
3657 break;
3658
3659 case BC_ITERL:
3660 |.if JIT
3661 | hotloop
3662 |.endif
3663 | // Fall through. Assumes BC_IITERL follows.
3664 break;
3665
3666 case BC_JITERL:
3667#if !LJ_HASJIT
3668 break;
3669#endif
3670 case BC_IITERL:
3671 | // RA = base, RC = target
3672 | ldr CARG1, [BASE, RA, lsl #3]
3673 | add TMP1, BASE, RA, lsl #3
3674 | cmp CARG1, TISNIL
3675 | beq >1 // Stop if iterator returned nil.
3676 if (op == BC_JITERL) {
3677 | str CARG1, [TMP1, #-8]
3678 | b =>BC_JLOOP
3679 } else {
3680 | add TMP0, PC, RC, lsl #2 // Otherwise save control var + branch.
3681 | sub PC, TMP0, #0x20000
3682 | str CARG1, [TMP1, #-8]
3683 }
3684 |1:
3685 | ins_next
3686 break;
3687
3688 case BC_LOOP:
3689 | // RA = base, RC = target (loop extent)
3690 | // Note: RA/RC is only used by trace recorder to determine scope/extent
3691 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
3692 |.if JIT
3693 | hotloop
3694 |.endif
3695 | // Fall through. Assumes BC_ILOOP follows.
3696 break;
3697
3698 case BC_ILOOP:
3699 | // RA = base, RC = target (loop extent)
3700 | ins_next
3701 break;
3702
3703 case BC_JLOOP:
3704 |.if JIT
3705 | // RA = base (ignored), RC = traceno
3706 | ldr CARG1, [GL, #GL_J(trace)]
3707 | mov CARG2w, #0 // Traces on ARM64 don't store the trace #, so use 0.
3708 | ldr TRACE:RC, [CARG1, RC, lsl #3]
3709 | st_vmstate CARG2w
3710 | ldr RA, TRACE:RC->mcode
3711 | str BASE, GL->jit_base
3712 | str L, GL->tmpbuf.L
3713 | sub sp, sp, #16 // See SPS_FIXED. Avoids sp adjust in every root trace.
3714 | br RA
3715 |.endif
3716 break;
3717
3718 case BC_JMP:
3719 | // RA = base (only used by trace recorder), RC = target
3720 | add RC, PC, RC, lsl #2
3721 | sub PC, RC, #0x20000
3722 | ins_next
3723 break;
3724
3725 /* -- Function headers -------------------------------------------------- */
3726
3727 case BC_FUNCF:
3728 |.if JIT
3729 | hotcall
3730 |.endif
3731 case BC_FUNCV: /* NYI: compiled vararg functions. */
3732 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
3733 break;
3734
3735 case BC_JFUNCF:
3736#if !LJ_HASJIT
3737 break;
3738#endif
3739 case BC_IFUNCF:
3740 | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
3741 | ldr CARG1, L->maxstack
3742 | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)]
3743 | ldr KBASE, [PC, #-4+PC2PROTO(k)]
3744 | cmp RA, CARG1
3745 | bhi ->vm_growstack_l
3746 |2:
3747 | cmp NARGS8:RC, TMP1, lsl #3 // Check for missing parameters.
3748 | blo >3
3749 if (op == BC_JFUNCF) {
3750 | decode_RD RC, INS
3751 | b =>BC_JLOOP
3752 } else {
3753 | ins_next
3754 }
3755 |
3756 |3: // Clear missing parameters.
3757 | str TISNIL, [BASE, NARGS8:RC]
3758 | add NARGS8:RC, NARGS8:RC, #8
3759 | b <2
3760 break;
3761
3762 case BC_JFUNCV:
3763#if !LJ_HASJIT
3764 break;
3765#endif
3766 | NYI // NYI: compiled vararg functions
3767 break; /* NYI: compiled vararg functions. */
3768
3769 case BC_IFUNCV:
3770 | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
3771 | ldr CARG1, L->maxstack
3772 | movn TMP0, #~LJ_TFUNC
3773 | add TMP2, BASE, RC
3774 | add LFUNC:CARG3, CARG3, TMP0, lsl #47
3775 | add RA, RA, RC
3776 | add TMP0, RC, #16+FRAME_VARG
3777 | str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC.
3778 | ldr KBASE, [PC, #-4+PC2PROTO(k)]
3779 | cmp RA, CARG1
3780 | str TMP0, [TMP2], #8 // Store delta + FRAME_VARG.
3781 | bhs ->vm_growstack_l
3782 | sub RC, TMP2, #16
3783 | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)]
3784 | mov RA, BASE
3785 | mov BASE, TMP2
3786 | cbz TMP1, >2
3787 |1:
3788 | cmp RA, RC // Less args than parameters?
3789 | bhs >3
3790 | ldr TMP0, [RA]
3791 | sub TMP1, TMP1, #1
3792 | str TISNIL, [RA], #8 // Clear old fixarg slot (help the GC).
3793 | str TMP0, [TMP2], #8
3794 | cbnz TMP1, <1
3795 |2:
3796 | ins_next
3797 |
3798 |3:
3799 | sub TMP1, TMP1, #1
3800 | str TISNIL, [TMP2], #8
3801 | cbz TMP1, <2
3802 | b <3
3803 break;
3804
3805 case BC_FUNCC:
3806 case BC_FUNCCW:
3807 | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8
3808 if (op == BC_FUNCC) {
3809 | ldr CARG4, CFUNC:CARG3->f
3810 } else {
3811 | ldr CARG4, GL->wrapf
3812 }
3813 | add CARG2, RA, NARGS8:RC
3814 | ldr CARG1, L->maxstack
3815 | add RC, BASE, NARGS8:RC
3816 | cmp CARG2, CARG1
3817 | stp BASE, RC, L->base
3818 if (op == BC_FUNCCW) {
3819 | ldr CARG2, CFUNC:CARG3->f
3820 }
3821 | mv_vmstate TMP0w, C
3822 | mov CARG1, L
3823 | bhi ->vm_growstack_c // Need to grow stack.
3824 | st_vmstate TMP0w
3825 | blr CARG4 // (lua_State *L [, lua_CFunction f])
3826 | // Returns nresults.
3827 | ldp BASE, TMP1, L->base
3828 | str L, GL->cur_L
3829 | sbfiz RC, CRET1, #3, #32
3830 | st_vmstate ST_INTERP
3831 | ldr PC, [BASE, FRAME_PC]
3832 | sub RA, TMP1, RC // RA = L->top - nresults*8
3833 | b ->vm_returnc
3834 break;
3835
3836 /* ---------------------------------------------------------------------- */
3837
3838 default:
3839 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
3840 exit(2);
3841 break;
3842 }
3843}
3844
3845static int build_backend(BuildCtx *ctx)
3846{
3847 int op;
3848
3849 dasm_growpc(Dst, BC__MAX);
3850
3851 build_subroutines(ctx);
3852
3853 |.code_op
3854 for (op = 0; op < BC__MAX; op++)
3855 build_ins(ctx, (BCOp)op, op);
3856
3857 return BC__MAX;
3858}
3859
3860/* Emit pseudo frame-info for all assembler functions. */
3861static void emit_asm_debug(BuildCtx *ctx)
3862{
3863 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
3864 int i, cf = CFRAME_SIZE >> 3;
3865 switch (ctx->mode) {
3866 case BUILD_elfasm:
3867 fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n");
3868 fprintf(ctx->fp,
3869 ".Lframe0:\n"
3870 "\t.long .LECIE0-.LSCIE0\n"
3871 ".LSCIE0:\n"
3872 "\t.long 0xffffffff\n"
3873 "\t.byte 0x1\n"
3874 "\t.string \"\"\n"
3875 "\t.uleb128 0x1\n"
3876 "\t.sleb128 -8\n"
3877 "\t.byte 30\n" /* Return address is in lr. */
3878 "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
3879 "\t.align 3\n"
3880 ".LECIE0:\n\n");
3881 fprintf(ctx->fp,
3882 ".LSFDE0:\n"
3883 "\t.long .LEFDE0-.LASFDE0\n"
3884 ".LASFDE0:\n"
3885 "\t.long .Lframe0\n"
3886 "\t.quad .Lbegin\n"
3887 "\t.quad %d\n"
3888 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
3889 "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */
3890 "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */
3891 fcofs, CFRAME_SIZE, cf, cf-1);
3892 for (i = 19; i <= 28; i++) /* offset x19-x28 */
3893 fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
3894 for (i = 8; i <= 15; i++) /* offset d8-d15 */
3895 fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
3896 64+i, cf-i-4);
3897 fprintf(ctx->fp,
3898 "\t.align 3\n"
3899 ".LEFDE0:\n\n");
3900#if LJ_HASFFI
3901 fprintf(ctx->fp,
3902 ".LSFDE1:\n"
3903 "\t.long .LEFDE1-.LASFDE1\n"
3904 ".LASFDE1:\n"
3905 "\t.long .Lframe0\n"
3906 "\t.quad lj_vm_ffi_call\n"
3907 "\t.quad %d\n"
3908 "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */
3909 "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */
3910 "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */
3911 "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */
3912 "\t.align 3\n"
3913 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
3914#endif
3915 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",%%progbits\n");
3916 fprintf(ctx->fp,
3917 ".Lframe1:\n"
3918 "\t.long .LECIE1-.LSCIE1\n"
3919 ".LSCIE1:\n"
3920 "\t.long 0\n"
3921 "\t.byte 0x1\n"
3922 "\t.string \"zPR\"\n"
3923 "\t.uleb128 0x1\n"
3924 "\t.sleb128 -8\n"
3925 "\t.byte 30\n" /* Return address is in lr. */
3926 "\t.uleb128 6\n" /* augmentation length */
3927 "\t.byte 0x1b\n" /* pcrel|sdata4 */
3928 "\t.long lj_err_unwind_dwarf-.\n"
3929 "\t.byte 0x1b\n" /* pcrel|sdata4 */
3930 "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
3931 "\t.align 3\n"
3932 ".LECIE1:\n\n");
3933 fprintf(ctx->fp,
3934 ".LSFDE2:\n"
3935 "\t.long .LEFDE2-.LASFDE2\n"
3936 ".LASFDE2:\n"
3937 "\t.long .LASFDE2-.Lframe1\n"
3938 "\t.long .Lbegin-.\n"
3939 "\t.long %d\n"
3940 "\t.uleb128 0\n" /* augmentation length */
3941 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
3942 "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */
3943 "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */
3944 fcofs, CFRAME_SIZE, cf, cf-1);
3945 for (i = 19; i <= 28; i++) /* offset x19-x28 */
3946 fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
3947 for (i = 8; i <= 15; i++) /* offset d8-d15 */
3948 fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
3949 64+i, cf-i-4);
3950 fprintf(ctx->fp,
3951 "\t.align 3\n"
3952 ".LEFDE2:\n\n");
3953#if LJ_HASFFI
3954 fprintf(ctx->fp,
3955 ".Lframe2:\n"
3956 "\t.long .LECIE2-.LSCIE2\n"
3957 ".LSCIE2:\n"
3958 "\t.long 0\n"
3959 "\t.byte 0x1\n"
3960 "\t.string \"zR\"\n"
3961 "\t.uleb128 0x1\n"
3962 "\t.sleb128 -8\n"
3963 "\t.byte 30\n" /* Return address is in lr. */
3964 "\t.uleb128 1\n" /* augmentation length */
3965 "\t.byte 0x1b\n" /* pcrel|sdata4 */
3966 "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
3967 "\t.align 3\n"
3968 ".LECIE2:\n\n");
3969 fprintf(ctx->fp,
3970 ".LSFDE3:\n"
3971 "\t.long .LEFDE3-.LASFDE3\n"
3972 ".LASFDE3:\n"
3973 "\t.long .LASFDE3-.Lframe2\n"
3974 "\t.long lj_vm_ffi_call-.\n"
3975 "\t.long %d\n"
3976 "\t.uleb128 0\n" /* augmentation length */
3977 "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */
3978 "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */
3979 "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */
3980 "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */
3981 "\t.align 3\n"
3982 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
3983#endif
3984 break;
3985 default:
3986 break;
3987 }
3988}
3989
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 8cb4bd3d..7bd86514 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -1,6 +1,9 @@
1|// Low-level VM code for MIPS CPUs. 1|// Low-level VM code for MIPS CPUs.
2|// Bytecode interpreter, fast functions and helper functions. 2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h 3|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4|//
5|// MIPS soft-float support contributed by Djordje Kovacevic and
6|// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc.
4| 7|
5|.arch mips 8|.arch mips
6|.section code_op, code_sub 9|.section code_op, code_sub
@@ -18,6 +21,12 @@
18|// Fixed register assignments for the interpreter. 21|// Fixed register assignments for the interpreter.
19|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra 22|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra
20| 23|
24|.macro .FPU, a, b
25|.if FPU
26| a, b
27|.endif
28|.endmacro
29|
21|// The following must be C callee-save (but BASE is often refetched). 30|// The following must be C callee-save (but BASE is often refetched).
22|.define BASE, r16 // Base of current Lua stack frame. 31|.define BASE, r16 // Base of current Lua stack frame.
23|.define KBASE, r17 // Constants of current Lua function. 32|.define KBASE, r17 // Constants of current Lua function.
@@ -25,13 +34,15 @@
25|.define DISPATCH, r19 // Opcode dispatch table. 34|.define DISPATCH, r19 // Opcode dispatch table.
26|.define LREG, r20 // Register holding lua_State (also in SAVE_L). 35|.define LREG, r20 // Register holding lua_State (also in SAVE_L).
27|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. 36|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8.
28|// NYI: r22 currently unused.
29| 37|
30|.define JGL, r30 // On-trace: global_State + 32768. 38|.define JGL, r30 // On-trace: global_State + 32768.
31| 39|
32|// Constants for type-comparisons, stores and conversions. C callee-save. 40|// Constants for type-comparisons, stores and conversions. C callee-save.
41|.define TISNUM, r22
33|.define TISNIL, r30 42|.define TISNIL, r30
43|.if FPU
34|.define TOBIT, f30 // 2^52 + 2^51. 44|.define TOBIT, f30 // 2^52 + 2^51.
45|.endif
35| 46|
36|// The following temporaries are not saved across C calls, except for RA. 47|// The following temporaries are not saved across C calls, except for RA.
37|.define RA, r23 // Callee-save. 48|.define RA, r23 // Callee-save.
@@ -46,7 +57,7 @@
46|.define TMP2, r14 57|.define TMP2, r14
47|.define TMP3, r15 58|.define TMP3, r15
48| 59|
49|// Calling conventions. 60|// MIPS o32 calling convention.
50|.define CFUNCADDR, r25 61|.define CFUNCADDR, r25
51|.define CARG1, r4 62|.define CARG1, r4
52|.define CARG2, r5 63|.define CARG2, r5
@@ -56,13 +67,33 @@
56|.define CRET1, r2 67|.define CRET1, r2
57|.define CRET2, r3 68|.define CRET2, r3
58| 69|
70|.if ENDIAN_LE
71|.define SFRETLO, CRET1
72|.define SFRETHI, CRET2
73|.define SFARG1LO, CARG1
74|.define SFARG1HI, CARG2
75|.define SFARG2LO, CARG3
76|.define SFARG2HI, CARG4
77|.else
78|.define SFRETLO, CRET2
79|.define SFRETHI, CRET1
80|.define SFARG1LO, CARG2
81|.define SFARG1HI, CARG1
82|.define SFARG2LO, CARG4
83|.define SFARG2HI, CARG3
84|.endif
85|
86|.if FPU
59|.define FARG1, f12 87|.define FARG1, f12
60|.define FARG2, f14 88|.define FARG2, f14
61| 89|
62|.define FRET1, f0 90|.define FRET1, f0
63|.define FRET2, f2 91|.define FRET2, f2
92|.endif
64| 93|
65|// Stack layout while in interpreter. Must match with lj_frame.h. 94|// Stack layout while in interpreter. Must match with lj_frame.h.
95|.if FPU // MIPS32 hard-float.
96|
66|.define CFRAME_SPACE, 112 // Delta for sp. 97|.define CFRAME_SPACE, 112 // Delta for sp.
67| 98|
68|.define SAVE_ERRF, 124(sp) // 32 bit C frame info. 99|.define SAVE_ERRF, 124(sp) // 32 bit C frame info.
@@ -72,6 +103,20 @@
72|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. 103|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
73|.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves. 104|.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves.
74|.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves. 105|.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves.
106|
107|.else // MIPS32 soft-float
108|
109|.define CFRAME_SPACE, 64 // Delta for sp.
110|
111|.define SAVE_ERRF, 76(sp) // 32 bit C frame info.
112|.define SAVE_NRES, 72(sp)
113|.define SAVE_CFRAME, 68(sp)
114|.define SAVE_L, 64(sp)
115|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
116|.define SAVE_GPR_, 24 // .. 24+10*4: 32 bit GPR saves.
117|
118|.endif
119|
75|.define SAVE_PC, 20(sp) 120|.define SAVE_PC, 20(sp)
76|.define ARG5, 16(sp) 121|.define ARG5, 16(sp)
77|.define CSAVE_4, 12(sp) 122|.define CSAVE_4, 12(sp)
@@ -83,43 +128,45 @@
83|.define ARG5_OFS, 16 128|.define ARG5_OFS, 16
84|.define SAVE_MULTRES, ARG5 129|.define SAVE_MULTRES, ARG5
85| 130|
131|//-----------------------------------------------------------------------
132|
86|.macro saveregs 133|.macro saveregs
87| addiu sp, sp, -CFRAME_SPACE 134| addiu sp, sp, -CFRAME_SPACE
88| sw ra, SAVE_GPR_+9*4(sp) 135| sw ra, SAVE_GPR_+9*4(sp)
89| sw r30, SAVE_GPR_+8*4(sp) 136| sw r30, SAVE_GPR_+8*4(sp)
90| sdc1 f30, SAVE_FPR_+5*8(sp) 137| .FPU sdc1 f30, SAVE_FPR_+5*8(sp)
91| sw r23, SAVE_GPR_+7*4(sp) 138| sw r23, SAVE_GPR_+7*4(sp)
92| sw r22, SAVE_GPR_+6*4(sp) 139| sw r22, SAVE_GPR_+6*4(sp)
93| sdc1 f28, SAVE_FPR_+4*8(sp) 140| .FPU sdc1 f28, SAVE_FPR_+4*8(sp)
94| sw r21, SAVE_GPR_+5*4(sp) 141| sw r21, SAVE_GPR_+5*4(sp)
95| sw r20, SAVE_GPR_+4*4(sp) 142| sw r20, SAVE_GPR_+4*4(sp)
96| sdc1 f26, SAVE_FPR_+3*8(sp) 143| .FPU sdc1 f26, SAVE_FPR_+3*8(sp)
97| sw r19, SAVE_GPR_+3*4(sp) 144| sw r19, SAVE_GPR_+3*4(sp)
98| sw r18, SAVE_GPR_+2*4(sp) 145| sw r18, SAVE_GPR_+2*4(sp)
99| sdc1 f24, SAVE_FPR_+2*8(sp) 146| .FPU sdc1 f24, SAVE_FPR_+2*8(sp)
100| sw r17, SAVE_GPR_+1*4(sp) 147| sw r17, SAVE_GPR_+1*4(sp)
101| sw r16, SAVE_GPR_+0*4(sp) 148| sw r16, SAVE_GPR_+0*4(sp)
102| sdc1 f22, SAVE_FPR_+1*8(sp) 149| .FPU sdc1 f22, SAVE_FPR_+1*8(sp)
103| sdc1 f20, SAVE_FPR_+0*8(sp) 150| .FPU sdc1 f20, SAVE_FPR_+0*8(sp)
104|.endmacro 151|.endmacro
105| 152|
106|.macro restoreregs_ret 153|.macro restoreregs_ret
107| lw ra, SAVE_GPR_+9*4(sp) 154| lw ra, SAVE_GPR_+9*4(sp)
108| lw r30, SAVE_GPR_+8*4(sp) 155| lw r30, SAVE_GPR_+8*4(sp)
109| ldc1 f30, SAVE_FPR_+5*8(sp) 156| .FPU ldc1 f30, SAVE_FPR_+5*8(sp)
110| lw r23, SAVE_GPR_+7*4(sp) 157| lw r23, SAVE_GPR_+7*4(sp)
111| lw r22, SAVE_GPR_+6*4(sp) 158| lw r22, SAVE_GPR_+6*4(sp)
112| ldc1 f28, SAVE_FPR_+4*8(sp) 159| .FPU ldc1 f28, SAVE_FPR_+4*8(sp)
113| lw r21, SAVE_GPR_+5*4(sp) 160| lw r21, SAVE_GPR_+5*4(sp)
114| lw r20, SAVE_GPR_+4*4(sp) 161| lw r20, SAVE_GPR_+4*4(sp)
115| ldc1 f26, SAVE_FPR_+3*8(sp) 162| .FPU ldc1 f26, SAVE_FPR_+3*8(sp)
116| lw r19, SAVE_GPR_+3*4(sp) 163| lw r19, SAVE_GPR_+3*4(sp)
117| lw r18, SAVE_GPR_+2*4(sp) 164| lw r18, SAVE_GPR_+2*4(sp)
118| ldc1 f24, SAVE_FPR_+2*8(sp) 165| .FPU ldc1 f24, SAVE_FPR_+2*8(sp)
119| lw r17, SAVE_GPR_+1*4(sp) 166| lw r17, SAVE_GPR_+1*4(sp)
120| lw r16, SAVE_GPR_+0*4(sp) 167| lw r16, SAVE_GPR_+0*4(sp)
121| ldc1 f22, SAVE_FPR_+1*8(sp) 168| .FPU ldc1 f22, SAVE_FPR_+1*8(sp)
122| ldc1 f20, SAVE_FPR_+0*8(sp) 169| .FPU ldc1 f20, SAVE_FPR_+0*8(sp)
123| jr ra 170| jr ra
124| addiu sp, sp, CFRAME_SPACE 171| addiu sp, sp, CFRAME_SPACE
125|.endmacro 172|.endmacro
@@ -138,6 +185,7 @@
138|.type NODE, Node 185|.type NODE, Node
139|.type NARGS8, int 186|.type NARGS8, int
140|.type TRACE, GCtrace 187|.type TRACE, GCtrace
188|.type SBUF, SBuf
141| 189|
142|//----------------------------------------------------------------------- 190|//-----------------------------------------------------------------------
143| 191|
@@ -152,13 +200,23 @@
152|//----------------------------------------------------------------------- 200|//-----------------------------------------------------------------------
153| 201|
154|// Endian-specific defines. 202|// Endian-specific defines.
155|.define FRAME_PC, LJ_ENDIAN_SELECT(-4,-8) 203|.if ENDIAN_LE
156|.define FRAME_FUNC, LJ_ENDIAN_SELECT(-8,-4) 204|.define FRAME_PC, -4
157|.define HI, LJ_ENDIAN_SELECT(4,0) 205|.define FRAME_FUNC, -8
158|.define LO, LJ_ENDIAN_SELECT(0,4) 206|.define HI, 4
159|.define OFS_RD, LJ_ENDIAN_SELECT(2,0) 207|.define LO, 0
160|.define OFS_RA, LJ_ENDIAN_SELECT(1,2) 208|.define OFS_RD, 2
161|.define OFS_OP, LJ_ENDIAN_SELECT(0,3) 209|.define OFS_RA, 1
210|.define OFS_OP, 0
211|.else
212|.define FRAME_PC, -8
213|.define FRAME_FUNC, -4
214|.define HI, 0
215|.define LO, 4
216|.define OFS_RD, 0
217|.define OFS_RA, 2
218|.define OFS_OP, 3
219|.endif
162| 220|
163|// Instruction decode. 221|// Instruction decode.
164|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro 222|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro
@@ -353,9 +411,11 @@ static void build_subroutines(BuildCtx *ctx)
353 |. sll TMP2, TMP2, 3 411 |. sll TMP2, TMP2, 3
354 |1: 412 |1:
355 | addiu TMP1, TMP1, -8 413 | addiu TMP1, TMP1, -8
356 | ldc1 f0, 0(RA) 414 | lw SFRETHI, HI(RA)
415 | lw SFRETLO, LO(RA)
357 | addiu RA, RA, 8 416 | addiu RA, RA, 8
358 | sdc1 f0, 0(BASE) 417 | sw SFRETHI, HI(BASE)
418 | sw SFRETLO, LO(BASE)
359 | bnez TMP1, <1 419 | bnez TMP1, <1
360 |. addiu BASE, BASE, 8 420 |. addiu BASE, BASE, 8
361 | 421 |
@@ -424,15 +484,16 @@ static void build_subroutines(BuildCtx *ctx)
424 | and sp, CARG1, AT 484 | and sp, CARG1, AT
425 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 485 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
426 | lw L, SAVE_L 486 | lw L, SAVE_L
427 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 487 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
488 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
428 | li TISNIL, LJ_TNIL 489 | li TISNIL, LJ_TNIL
429 | lw BASE, L->base 490 | lw BASE, L->base
430 | lw DISPATCH, L->glref // Setup pointer to dispatch table. 491 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
431 | mtc1 TMP3, TOBIT 492 | .FPU mtc1 TMP3, TOBIT
432 | li TMP1, LJ_TFALSE 493 | li TMP1, LJ_TFALSE
433 | li_vmstate INTERP 494 | li_vmstate INTERP
434 | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame. 495 | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame.
435 | cvt.d.s TOBIT, TOBIT 496 | .FPU cvt.d.s TOBIT, TOBIT
436 | addiu RA, BASE, -8 // Results start at BASE-8. 497 | addiu RA, BASE, -8 // Results start at BASE-8.
437 | addiu DISPATCH, DISPATCH, GG_G2DISP 498 | addiu DISPATCH, DISPATCH, GG_G2DISP
438 | sw TMP1, HI(RA) // Prepend false to error message. 499 | sw TMP1, HI(RA) // Prepend false to error message.
@@ -440,6 +501,10 @@ static void build_subroutines(BuildCtx *ctx)
440 | b ->vm_returnc 501 | b ->vm_returnc
441 |. li RD, 16 // 2 results: false + error message. 502 |. li RD, 16 // 2 results: false + error message.
442 | 503 |
504 |->vm_unwind_stub: // Jump to exit stub from unwinder.
505 | jr CARG1
506 |. move ra, CARG2
507 |
443 |//----------------------------------------------------------------------- 508 |//-----------------------------------------------------------------------
444 |//-- Grow stack for calls ----------------------------------------------- 509 |//-- Grow stack for calls -----------------------------------------------
445 |//----------------------------------------------------------------------- 510 |//-----------------------------------------------------------------------
@@ -486,21 +551,23 @@ static void build_subroutines(BuildCtx *ctx)
486 | addiu DISPATCH, DISPATCH, GG_G2DISP 551 | addiu DISPATCH, DISPATCH, GG_G2DISP
487 | sw r0, SAVE_NRES 552 | sw r0, SAVE_NRES
488 | sw r0, SAVE_ERRF 553 | sw r0, SAVE_ERRF
489 | sw TMP0, L->cframe 554 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
490 | sw r0, SAVE_CFRAME 555 | sw r0, SAVE_CFRAME
491 | beqz TMP1, >3 556 | beqz TMP1, >3
492 |. sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 557 |. sw TMP0, L->cframe
493 | 558 |
494 | // Resume after yield (like a return). 559 | // Resume after yield (like a return).
560 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
495 | move RA, BASE 561 | move RA, BASE
496 | lw BASE, L->base 562 | lw BASE, L->base
563 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
497 | lw TMP1, L->top 564 | lw TMP1, L->top
498 | lw PC, FRAME_PC(BASE) 565 | lw PC, FRAME_PC(BASE)
499 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 566 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
500 | subu RD, TMP1, BASE 567 | subu RD, TMP1, BASE
501 | mtc1 TMP3, TOBIT 568 | .FPU mtc1 TMP3, TOBIT
502 | sb r0, L->status 569 | sb r0, L->status
503 | cvt.d.s TOBIT, TOBIT 570 | .FPU cvt.d.s TOBIT, TOBIT
504 | li_vmstate INTERP 571 | li_vmstate INTERP
505 | addiu RD, RD, 8 572 | addiu RD, RD, 8
506 | st_vmstate 573 | st_vmstate
@@ -525,25 +592,27 @@ static void build_subroutines(BuildCtx *ctx)
525 | 592 |
526 |1: // Entry point for vm_pcall above (PC = ftype). 593 |1: // Entry point for vm_pcall above (PC = ftype).
527 | lw TMP1, L:CARG1->cframe 594 | lw TMP1, L:CARG1->cframe
528 | sw CARG3, SAVE_NRES
529 | move L, CARG1 595 | move L, CARG1
530 | sw CARG1, SAVE_L 596 | sw CARG3, SAVE_NRES
531 | move BASE, CARG2
532 | sw sp, L->cframe // Add our C frame to cframe chain.
533 | lw DISPATCH, L->glref // Setup pointer to dispatch table. 597 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
598 | sw CARG1, SAVE_L
599 | move BASE, CARG2
600 | addiu DISPATCH, DISPATCH, GG_G2DISP
534 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 601 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
535 | sw TMP1, SAVE_CFRAME 602 | sw TMP1, SAVE_CFRAME
536 | addiu DISPATCH, DISPATCH, GG_G2DISP 603 | sw sp, L->cframe // Add our C frame to cframe chain.
537 | 604 |
538 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 605 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
606 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
539 | lw TMP2, L->base // TMP2 = old base (used in vmeta_call). 607 | lw TMP2, L->base // TMP2 = old base (used in vmeta_call).
540 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 608 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
609 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
541 | lw TMP1, L->top 610 | lw TMP1, L->top
542 | mtc1 TMP3, TOBIT 611 | .FPU mtc1 TMP3, TOBIT
543 | addu PC, PC, BASE 612 | addu PC, PC, BASE
544 | subu NARGS8:RC, TMP1, BASE 613 | subu NARGS8:RC, TMP1, BASE
545 | subu PC, PC, TMP2 // PC = frame delta + frame type 614 | subu PC, PC, TMP2 // PC = frame delta + frame type
546 | cvt.d.s TOBIT, TOBIT 615 | .FPU cvt.d.s TOBIT, TOBIT
547 | li_vmstate INTERP 616 | li_vmstate INTERP
548 | li TISNIL, LJ_TNIL 617 | li TISNIL, LJ_TNIL
549 | st_vmstate 618 | st_vmstate
@@ -566,20 +635,21 @@ static void build_subroutines(BuildCtx *ctx)
566 | lw TMP0, L:CARG1->stack 635 | lw TMP0, L:CARG1->stack
567 | sw CARG1, SAVE_L 636 | sw CARG1, SAVE_L
568 | lw TMP1, L->top 637 | lw TMP1, L->top
638 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
569 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 639 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
570 | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). 640 | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
571 | lw TMP1, L->cframe 641 | lw TMP1, L->cframe
572 | sw sp, L->cframe // Add our C frame to cframe chain. 642 | addiu DISPATCH, DISPATCH, GG_G2DISP
573 | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. 643 | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
574 | sw r0, SAVE_ERRF // No error function. 644 | sw r0, SAVE_ERRF // No error function.
575 | move CFUNCADDR, CARG4 645 | sw TMP1, SAVE_CFRAME
646 | sw sp, L->cframe // Add our C frame to cframe chain.
647 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
576 | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) 648 | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
577 |. sw TMP1, SAVE_CFRAME 649 |. move CFUNCADDR, CARG4
578 | move BASE, CRET1 650 | move BASE, CRET1
579 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
580 | li PC, FRAME_CP
581 | bnez CRET1, <3 // Else continue with the call. 651 | bnez CRET1, <3 // Else continue with the call.
582 |. addiu DISPATCH, DISPATCH, GG_G2DISP 652 |. li PC, FRAME_CP
583 | b ->vm_leave_cp // No base? Just remove C frame. 653 | b ->vm_leave_cp // No base? Just remove C frame.
584 |. nop 654 |. nop
585 | 655 |
@@ -624,7 +694,8 @@ static void build_subroutines(BuildCtx *ctx)
624 |->cont_cat: // RA = resultptr, RB = meta base 694 |->cont_cat: // RA = resultptr, RB = meta base
625 | lw INS, -4(PC) 695 | lw INS, -4(PC)
626 | addiu CARG2, RB, -16 696 | addiu CARG2, RB, -16
627 | ldc1 f0, 0(RA) 697 | lw SFRETHI, HI(RA)
698 | lw SFRETLO, LO(RA)
628 | decode_RB8a MULTRES, INS 699 | decode_RB8a MULTRES, INS
629 | decode_RA8a RA, INS 700 | decode_RA8a RA, INS
630 | decode_RB8b MULTRES 701 | decode_RB8b MULTRES
@@ -632,11 +703,13 @@ static void build_subroutines(BuildCtx *ctx)
632 | addu TMP1, BASE, MULTRES 703 | addu TMP1, BASE, MULTRES
633 | sw BASE, L->base 704 | sw BASE, L->base
634 | subu CARG3, CARG2, TMP1 705 | subu CARG3, CARG2, TMP1
706 | sw SFRETHI, HI(CARG2)
635 | bne TMP1, CARG2, ->BC_CAT_Z 707 | bne TMP1, CARG2, ->BC_CAT_Z
636 |. sdc1 f0, 0(CARG2) 708 |. sw SFRETLO, LO(CARG2)
637 | addu RA, BASE, RA 709 | addu RA, BASE, RA
710 | sw SFRETHI, HI(RA)
638 | b ->cont_nop 711 | b ->cont_nop
639 |. sdc1 f0, 0(RA) 712 |. sw SFRETLO, LO(RA)
640 | 713 |
641 |//-- Table indexing metamethods ----------------------------------------- 714 |//-- Table indexing metamethods -----------------------------------------
642 | 715 |
@@ -659,10 +732,9 @@ static void build_subroutines(BuildCtx *ctx)
659 |. sw TMP1, HI(CARG3) 732 |. sw TMP1, HI(CARG3)
660 | 733 |
661 |->vmeta_tgetb: // TMP0 = index 734 |->vmeta_tgetb: // TMP0 = index
662 | mtc1 TMP0, f0
663 | cvt.d.w f0, f0
664 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 735 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
665 | sdc1 f0, 0(CARG3) 736 | sw TMP0, LO(CARG3)
737 | sw TISNUM, HI(CARG3)
666 | 738 |
667 |->vmeta_tgetv: 739 |->vmeta_tgetv:
668 |1: 740 |1:
@@ -674,9 +746,11 @@ static void build_subroutines(BuildCtx *ctx)
674 | // Returns TValue * (finished) or NULL (metamethod). 746 | // Returns TValue * (finished) or NULL (metamethod).
675 | beqz CRET1, >3 747 | beqz CRET1, >3
676 |. addiu TMP1, BASE, -FRAME_CONT 748 |. addiu TMP1, BASE, -FRAME_CONT
677 | ldc1 f0, 0(CRET1) 749 | lw SFARG1HI, HI(CRET1)
750 | lw SFARG2HI, LO(CRET1)
678 | ins_next1 751 | ins_next1
679 | sdc1 f0, 0(RA) 752 | sw SFARG1HI, HI(RA)
753 | sw SFARG2HI, LO(RA)
680 | ins_next2 754 | ins_next2
681 | 755 |
682 |3: // Call __index metamethod. 756 |3: // Call __index metamethod.
@@ -688,6 +762,17 @@ static void build_subroutines(BuildCtx *ctx)
688 | b ->vm_call_dispatch_f 762 | b ->vm_call_dispatch_f
689 |. li NARGS8:RC, 16 // 2 args for func(t, k). 763 |. li NARGS8:RC, 16 // 2 args for func(t, k).
690 | 764 |
765 |->vmeta_tgetr:
766 | load_got lj_tab_getinth
767 | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
768 |. nop
769 | // Returns cTValue * or NULL.
770 | beqz CRET1, ->BC_TGETR_Z
771 |. move SFARG2HI, TISNIL
772 | lw SFARG2HI, HI(CRET1)
773 | b ->BC_TGETR_Z
774 |. lw SFARG2LO, LO(CRET1)
775 |
691 |//----------------------------------------------------------------------- 776 |//-----------------------------------------------------------------------
692 | 777 |
693 |->vmeta_tsets1: 778 |->vmeta_tsets1:
@@ -709,10 +794,9 @@ static void build_subroutines(BuildCtx *ctx)
709 |. sw TMP1, HI(CARG3) 794 |. sw TMP1, HI(CARG3)
710 | 795 |
711 |->vmeta_tsetb: // TMP0 = index 796 |->vmeta_tsetb: // TMP0 = index
712 | mtc1 TMP0, f0
713 | cvt.d.w f0, f0
714 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 797 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
715 | sdc1 f0, 0(CARG3) 798 | sw TMP0, LO(CARG3)
799 | sw TISNUM, HI(CARG3)
716 | 800 |
717 |->vmeta_tsetv: 801 |->vmeta_tsetv:
718 |1: 802 |1:
@@ -722,11 +806,13 @@ static void build_subroutines(BuildCtx *ctx)
722 | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 806 | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
723 |. move CARG1, L 807 |. move CARG1, L
724 | // Returns TValue * (finished) or NULL (metamethod). 808 | // Returns TValue * (finished) or NULL (metamethod).
809 | lw SFARG1HI, HI(RA)
725 | beqz CRET1, >3 810 | beqz CRET1, >3
726 |. ldc1 f0, 0(RA) 811 |. lw SFARG1LO, LO(RA)
727 | // NOBARRIER: lj_meta_tset ensures the table is not black. 812 | // NOBARRIER: lj_meta_tset ensures the table is not black.
728 | ins_next1 813 | ins_next1
729 | sdc1 f0, 0(CRET1) 814 | sw SFARG1HI, HI(CRET1)
815 | sw SFARG1LO, LO(CRET1)
730 | ins_next2 816 | ins_next2
731 | 817 |
732 |3: // Call __newindex metamethod. 818 |3: // Call __newindex metamethod.
@@ -736,14 +822,27 @@ static void build_subroutines(BuildCtx *ctx)
736 | sw PC, -16+HI(BASE) // [cont|PC] 822 | sw PC, -16+HI(BASE) // [cont|PC]
737 | subu PC, BASE, TMP1 823 | subu PC, BASE, TMP1
738 | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 824 | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
739 | sdc1 f0, 16(BASE) // Copy value to third argument. 825 | sw SFARG1HI, 16+HI(BASE) // Copy value to third argument.
826 | sw SFARG1LO, 16+LO(BASE)
740 | b ->vm_call_dispatch_f 827 | b ->vm_call_dispatch_f
741 |. li NARGS8:RC, 24 // 3 args for func(t, k, v) 828 |. li NARGS8:RC, 24 // 3 args for func(t, k, v)
742 | 829 |
830 |->vmeta_tsetr:
831 | load_got lj_tab_setinth
832 | sw BASE, L->base
833 | sw PC, SAVE_PC
834 | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
835 |. move CARG1, L
836 | // Returns TValue *.
837 | b ->BC_TSETR_Z
838 |. nop
839 |
743 |//-- Comparison metamethods --------------------------------------------- 840 |//-- Comparison metamethods ---------------------------------------------
744 | 841 |
745 |->vmeta_comp: 842 |->vmeta_comp:
746 | // CARG2, CARG3 are already set by BC_ISLT/BC_ISGE/BC_ISLE/BC_ISGT. 843 | // RA/RD point to o1/o2.
844 | move CARG2, RA
845 | move CARG3, RD
747 | load_got lj_meta_comp 846 | load_got lj_meta_comp
748 | addiu PC, PC, -4 847 | addiu PC, PC, -4
749 | sw BASE, L->base 848 | sw BASE, L->base
@@ -769,11 +868,13 @@ static void build_subroutines(BuildCtx *ctx)
769 | 868 |
770 |->cont_ra: // RA = resultptr 869 |->cont_ra: // RA = resultptr
771 | lbu TMP1, -4+OFS_RA(PC) 870 | lbu TMP1, -4+OFS_RA(PC)
772 | ldc1 f0, 0(RA) 871 | lw SFRETHI, HI(RA)
872 | lw SFRETLO, LO(RA)
773 | sll TMP1, TMP1, 3 873 | sll TMP1, TMP1, 3
774 | addu TMP1, BASE, TMP1 874 | addu TMP1, BASE, TMP1
875 | sw SFRETHI, HI(TMP1)
775 | b ->cont_nop 876 | b ->cont_nop
776 |. sdc1 f0, 0(TMP1) 877 |. sw SFRETLO, LO(TMP1)
777 | 878 |
778 |->cont_condt: // RA = resultptr 879 |->cont_condt: // RA = resultptr
779 | lw TMP0, HI(RA) 880 | lw TMP0, HI(RA)
@@ -788,8 +889,11 @@ static void build_subroutines(BuildCtx *ctx)
788 |. addiu TMP2, AT, -1 // Branch if result is false. 889 |. addiu TMP2, AT, -1 // Branch if result is false.
789 | 890 |
790 |->vmeta_equal: 891 |->vmeta_equal:
791 | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. 892 | // SFARG1LO/SFARG2LO point to o1/o2. TMP0 is set to 0/1.
792 | load_got lj_meta_equal 893 | load_got lj_meta_equal
894 | move CARG2, SFARG1LO
895 | move CARG3, SFARG2LO
896 | move CARG4, TMP0
793 | addiu PC, PC, -4 897 | addiu PC, PC, -4
794 | sw BASE, L->base 898 | sw BASE, L->base
795 | sw PC, SAVE_PC 899 | sw PC, SAVE_PC
@@ -813,17 +917,31 @@ static void build_subroutines(BuildCtx *ctx)
813 |. nop 917 |. nop
814 |.endif 918 |.endif
815 | 919 |
920 |->vmeta_istype:
921 | load_got lj_meta_istype
922 | addiu PC, PC, -4
923 | sw BASE, L->base
924 | srl CARG2, RA, 3
925 | srl CARG3, RD, 3
926 | sw PC, SAVE_PC
927 | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
928 |. move CARG1, L
929 | b ->cont_nop
930 |. nop
931 |
816 |//-- Arithmetic metamethods --------------------------------------------- 932 |//-- Arithmetic metamethods ---------------------------------------------
817 | 933 |
818 |->vmeta_unm: 934 |->vmeta_unm:
819 | move CARG4, CARG3 935 | move RC, RB
820 | 936 |
821 |->vmeta_arith: 937 |->vmeta_arith:
822 | load_got lj_meta_arith 938 | load_got lj_meta_arith
823 | decode_OP1 TMP0, INS 939 | decode_OP1 TMP0, INS
824 | sw BASE, L->base 940 | sw BASE, L->base
825 | sw PC, SAVE_PC
826 | move CARG2, RA 941 | move CARG2, RA
942 | sw PC, SAVE_PC
943 | move CARG3, RB
944 | move CARG4, RC
827 | sw TMP0, ARG5 945 | sw TMP0, ARG5
828 | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) 946 | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
829 |. move CARG1, L 947 |. move CARG1, L
@@ -931,40 +1049,52 @@ static void build_subroutines(BuildCtx *ctx)
931 | 1049 |
932 |.macro .ffunc_1, name 1050 |.macro .ffunc_1, name
933 |->ff_ .. name: 1051 |->ff_ .. name:
1052 | lw SFARG1HI, HI(BASE)
934 | beqz NARGS8:RC, ->fff_fallback 1053 | beqz NARGS8:RC, ->fff_fallback
935 |. lw CARG3, HI(BASE) 1054 |. lw SFARG1LO, LO(BASE)
936 | lw CARG1, LO(BASE)
937 |.endmacro 1055 |.endmacro
938 | 1056 |
939 |.macro .ffunc_2, name 1057 |.macro .ffunc_2, name
940 |->ff_ .. name: 1058 |->ff_ .. name:
941 | sltiu AT, NARGS8:RC, 16 1059 | sltiu AT, NARGS8:RC, 16
942 | lw CARG3, HI(BASE) 1060 | lw SFARG1HI, HI(BASE)
943 | bnez AT, ->fff_fallback 1061 | bnez AT, ->fff_fallback
944 |. lw CARG4, 8+HI(BASE) 1062 |. lw SFARG2HI, 8+HI(BASE)
945 | lw CARG1, LO(BASE) 1063 | lw SFARG1LO, LO(BASE)
946 | lw CARG2, 8+LO(BASE) 1064 | lw SFARG2LO, 8+LO(BASE)
947 |.endmacro 1065 |.endmacro
948 | 1066 |
949 |.macro .ffunc_n, name // Caveat: has delay slot! 1067 |.macro .ffunc_n, name // Caveat: has delay slot!
950 |->ff_ .. name: 1068 |->ff_ .. name:
951 | lw CARG3, HI(BASE) 1069 | lw SFARG1HI, HI(BASE)
1070 |.if FPU
1071 | ldc1 FARG1, 0(BASE)
1072 |.else
1073 | lw SFARG1LO, LO(BASE)
1074 |.endif
952 | beqz NARGS8:RC, ->fff_fallback 1075 | beqz NARGS8:RC, ->fff_fallback
953 |. ldc1 FARG1, 0(BASE) 1076 |. sltiu AT, SFARG1HI, LJ_TISNUM
954 | sltiu AT, CARG3, LJ_TISNUM
955 | beqz AT, ->fff_fallback 1077 | beqz AT, ->fff_fallback
956 |.endmacro 1078 |.endmacro
957 | 1079 |
958 |.macro .ffunc_nn, name // Caveat: has delay slot! 1080 |.macro .ffunc_nn, name // Caveat: has delay slot!
959 |->ff_ .. name: 1081 |->ff_ .. name:
960 | sltiu AT, NARGS8:RC, 16 1082 | sltiu AT, NARGS8:RC, 16
961 | lw CARG3, HI(BASE) 1083 | lw SFARG1HI, HI(BASE)
962 | bnez AT, ->fff_fallback 1084 | bnez AT, ->fff_fallback
963 |. lw CARG4, 8+HI(BASE) 1085 |. lw SFARG2HI, 8+HI(BASE)
964 | ldc1 FARG1, 0(BASE) 1086 | sltiu TMP0, SFARG1HI, LJ_TISNUM
965 | ldc1 FARG2, 8(BASE) 1087 |.if FPU
966 | sltiu TMP0, CARG3, LJ_TISNUM 1088 | ldc1 FARG1, 0(BASE)
967 | sltiu TMP1, CARG4, LJ_TISNUM 1089 |.else
1090 | lw SFARG1LO, LO(BASE)
1091 |.endif
1092 | sltiu TMP1, SFARG2HI, LJ_TISNUM
1093 |.if FPU
1094 | ldc1 FARG2, 8(BASE)
1095 |.else
1096 | lw SFARG2LO, 8+LO(BASE)
1097 |.endif
968 | and TMP0, TMP0, TMP1 1098 | and TMP0, TMP0, TMP1
969 | beqz TMP0, ->fff_fallback 1099 | beqz TMP0, ->fff_fallback
970 |.endmacro 1100 |.endmacro
@@ -980,53 +1110,55 @@ static void build_subroutines(BuildCtx *ctx)
980 |//-- Base library: checks ----------------------------------------------- 1110 |//-- Base library: checks -----------------------------------------------
981 | 1111 |
982 |.ffunc_1 assert 1112 |.ffunc_1 assert
983 | sltiu AT, CARG3, LJ_TISTRUECOND 1113 | sltiu AT, SFARG1HI, LJ_TISTRUECOND
984 | beqz AT, ->fff_fallback 1114 | beqz AT, ->fff_fallback
985 |. addiu RA, BASE, -8 1115 |. addiu RA, BASE, -8
986 | lw PC, FRAME_PC(BASE) 1116 | lw PC, FRAME_PC(BASE)
987 | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. 1117 | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
988 | addu TMP2, RA, NARGS8:RC 1118 | addu TMP2, RA, NARGS8:RC
989 | sw CARG3, HI(RA) 1119 | sw SFARG1HI, HI(RA)
990 | addiu TMP1, BASE, 8 1120 | addiu TMP1, BASE, 8
991 | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. 1121 | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument.
992 |. sw CARG1, LO(RA) 1122 |. sw SFARG1LO, LO(RA)
993 |1: 1123 |1:
994 | ldc1 f0, 0(TMP1) 1124 | lw SFRETHI, HI(TMP1)
995 | sdc1 f0, -8(TMP1) 1125 | lw SFRETLO, LO(TMP1)
1126 | sw SFRETHI, -8+HI(TMP1)
1127 | sw SFRETLO, -8+LO(TMP1)
996 | bne TMP1, TMP2, <1 1128 | bne TMP1, TMP2, <1
997 |. addiu TMP1, TMP1, 8 1129 |. addiu TMP1, TMP1, 8
998 | b ->fff_res 1130 | b ->fff_res
999 |. nop 1131 |. nop
1000 | 1132 |
1001 |.ffunc type 1133 |.ffunc type
1002 | lw CARG3, HI(BASE) 1134 | lw SFARG1HI, HI(BASE)
1003 | li TMP1, LJ_TISNUM
1004 | beqz NARGS8:RC, ->fff_fallback 1135 | beqz NARGS8:RC, ->fff_fallback
1005 |. sltiu TMP0, CARG3, LJ_TISNUM 1136 |. sltiu TMP0, SFARG1HI, LJ_TISNUM
1006 | movz TMP1, CARG3, TMP0 1137 | movn SFARG1HI, TISNUM, TMP0
1007 | not TMP1, TMP1 1138 | not TMP1, SFARG1HI
1008 | sll TMP1, TMP1, 3 1139 | sll TMP1, TMP1, 3
1009 | addu TMP1, CFUNC:RB, TMP1 1140 | addu TMP1, CFUNC:RB, TMP1
1010 | b ->fff_resn 1141 | lw SFARG1HI, CFUNC:TMP1->upvalue[0].u32.hi
1011 |. ldc1 FRET1, CFUNC:TMP1->upvalue 1142 | b ->fff_restv
1143 |. lw SFARG1LO, CFUNC:TMP1->upvalue[0].u32.lo
1012 | 1144 |
1013 |//-- Base library: getters and setters --------------------------------- 1145 |//-- Base library: getters and setters ---------------------------------
1014 | 1146 |
1015 |.ffunc_1 getmetatable 1147 |.ffunc_1 getmetatable
1016 | li AT, LJ_TTAB 1148 | li AT, LJ_TTAB
1017 | bne CARG3, AT, >6 1149 | bne SFARG1HI, AT, >6
1018 |. li AT, LJ_TUDATA 1150 |. li AT, LJ_TUDATA
1019 |1: // Field metatable must be at same offset for GCtab and GCudata! 1151 |1: // Field metatable must be at same offset for GCtab and GCudata!
1020 | lw TAB:CARG1, TAB:CARG1->metatable 1152 | lw TAB:SFARG1LO, TAB:SFARG1LO->metatable
1021 |2: 1153 |2:
1022 | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) 1154 | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
1023 | beqz TAB:CARG1, ->fff_restv 1155 | beqz TAB:SFARG1LO, ->fff_restv
1024 |. li CARG3, LJ_TNIL 1156 |. li SFARG1HI, LJ_TNIL
1025 | lw TMP0, TAB:CARG1->hmask 1157 | lw TMP0, TAB:SFARG1LO->hmask
1026 | li CARG3, LJ_TTAB // Use metatable as default result. 1158 | li SFARG1HI, LJ_TTAB // Use metatable as default result.
1027 | lw TMP1, STR:RC->hash 1159 | lw TMP1, STR:RC->sid
1028 | lw NODE:TMP2, TAB:CARG1->node 1160 | lw NODE:TMP2, TAB:SFARG1LO->node
1029 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 1161 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
1030 | sll TMP0, TMP1, 5 1162 | sll TMP0, TMP1, 5
1031 | sll TMP1, TMP1, 3 1163 | sll TMP1, TMP1, 3
1032 | subu TMP1, TMP0, TMP1 1164 | subu TMP1, TMP0, TMP1
@@ -1037,7 +1169,7 @@ static void build_subroutines(BuildCtx *ctx)
1037 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) 1169 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
1038 | lw NODE:TMP3, NODE:TMP2->next 1170 | lw NODE:TMP3, NODE:TMP2->next
1039 | bne CARG4, AT, >4 1171 | bne CARG4, AT, >4
1040 |. lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) 1172 |. lw CARG3, offsetof(Node, val)+HI(NODE:TMP2)
1041 | beq TMP0, STR:RC, >5 1173 | beq TMP0, STR:RC, >5
1042 |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2) 1174 |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2)
1043 |4: 1175 |4:
@@ -1046,36 +1178,35 @@ static void build_subroutines(BuildCtx *ctx)
1046 | b <3 1178 | b <3
1047 |. nop 1179 |. nop
1048 |5: 1180 |5:
1049 | beq CARG2, TISNIL, ->fff_restv // Ditto for nil value. 1181 | beq CARG3, TISNIL, ->fff_restv // Ditto for nil value.
1050 |. nop 1182 |. nop
1051 | move CARG3, CARG2 // Return value of mt.__metatable. 1183 | move SFARG1HI, CARG3 // Return value of mt.__metatable.
1052 | b ->fff_restv 1184 | b ->fff_restv
1053 |. move CARG1, TMP1 1185 |. move SFARG1LO, TMP1
1054 | 1186 |
1055 |6: 1187 |6:
1056 | beq CARG3, AT, <1 1188 | beq SFARG1HI, AT, <1
1057 |. sltiu TMP0, CARG3, LJ_TISNUM 1189 |. sltu AT, TISNUM, SFARG1HI
1058 | li TMP1, LJ_TISNUM 1190 | movz SFARG1HI, TISNUM, AT
1059 | movz TMP1, CARG3, TMP0 1191 | not TMP1, SFARG1HI
1060 | not TMP1, TMP1
1061 | sll TMP1, TMP1, 2 1192 | sll TMP1, TMP1, 2
1062 | addu TMP1, DISPATCH, TMP1 1193 | addu TMP1, DISPATCH, TMP1
1063 | b <2 1194 | b <2
1064 |. lw TAB:CARG1, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1) 1195 |. lw TAB:SFARG1LO, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1)
1065 | 1196 |
1066 |.ffunc_2 setmetatable 1197 |.ffunc_2 setmetatable
1067 | // Fast path: no mt for table yet and not clearing the mt. 1198 | // Fast path: no mt for table yet and not clearing the mt.
1068 | li AT, LJ_TTAB 1199 | li AT, LJ_TTAB
1069 | bne CARG3, AT, ->fff_fallback 1200 | bne SFARG1HI, AT, ->fff_fallback
1070 |. addiu CARG4, CARG4, -LJ_TTAB 1201 |. addiu SFARG2HI, SFARG2HI, -LJ_TTAB
1071 | lw TAB:TMP1, TAB:CARG1->metatable 1202 | lw TAB:TMP1, TAB:SFARG1LO->metatable
1072 | lbu TMP3, TAB:CARG1->marked 1203 | lbu TMP3, TAB:SFARG1LO->marked
1073 | or AT, CARG4, TAB:TMP1 1204 | or AT, SFARG2HI, TAB:TMP1
1074 | bnez AT, ->fff_fallback 1205 | bnez AT, ->fff_fallback
1075 |. andi AT, TMP3, LJ_GC_BLACK // isblack(table) 1206 |. andi AT, TMP3, LJ_GC_BLACK // isblack(table)
1076 | beqz AT, ->fff_restv 1207 | beqz AT, ->fff_restv
1077 |. sw TAB:CARG2, TAB:CARG1->metatable 1208 |. sw TAB:SFARG2LO, TAB:SFARG1LO->metatable
1078 | barrierback TAB:CARG1, TMP3, TMP0, ->fff_restv 1209 | barrierback TAB:SFARG1LO, TMP3, TMP0, ->fff_restv
1079 | 1210 |
1080 |.ffunc rawget 1211 |.ffunc rawget
1081 | lw CARG4, HI(BASE) 1212 | lw CARG4, HI(BASE)
@@ -1089,44 +1220,44 @@ static void build_subroutines(BuildCtx *ctx)
1089 | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1220 | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1090 |. move CARG1, L 1221 |. move CARG1, L
1091 | // Returns cTValue *. 1222 | // Returns cTValue *.
1092 | b ->fff_resn 1223 | lw SFARG1HI, HI(CRET1)
1093 |. ldc1 FRET1, 0(CRET1) 1224 | b ->fff_restv
1225 |. lw SFARG1LO, LO(CRET1)
1094 | 1226 |
1095 |//-- Base library: conversions ------------------------------------------ 1227 |//-- Base library: conversions ------------------------------------------
1096 | 1228 |
1097 |.ffunc tonumber 1229 |.ffunc tonumber
1098 | // Only handles the number case inline (without a base argument). 1230 | // Only handles the number case inline (without a base argument).
1099 | lw CARG1, HI(BASE) 1231 | lw CARG1, HI(BASE)
1100 | xori AT, NARGS8:RC, 8 1232 | xori AT, NARGS8:RC, 8 // Exactly one number argument.
1101 | sltiu CARG1, CARG1, LJ_TISNUM 1233 | sltu TMP0, TISNUM, CARG1
1102 | movn CARG1, r0, AT 1234 | or AT, AT, TMP0
1103 | beqz CARG1, ->fff_fallback // Exactly one number argument. 1235 | bnez AT, ->fff_fallback
1104 |. ldc1 FRET1, 0(BASE) 1236 |. lw SFARG1HI, HI(BASE)
1105 | b ->fff_resn 1237 | b ->fff_restv
1106 |. nop 1238 |. lw SFARG1LO, LO(BASE)
1107 | 1239 |
1108 |.ffunc_1 tostring 1240 |.ffunc_1 tostring
1109 | // Only handles the string or number case inline. 1241 | // Only handles the string or number case inline.
1110 | li AT, LJ_TSTR 1242 | li AT, LJ_TSTR
1111 | // A __tostring method in the string base metatable is ignored. 1243 | // A __tostring method in the string base metatable is ignored.
1112 | beq CARG3, AT, ->fff_restv // String key? 1244 | beq SFARG1HI, AT, ->fff_restv // String key?
1113 | // Handle numbers inline, unless a number base metatable is present. 1245 | // Handle numbers inline, unless a number base metatable is present.
1114 |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) 1246 |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
1115 | sltiu TMP0, CARG3, LJ_TISNUM 1247 | sltu TMP0, TISNUM, SFARG1HI
1116 | sltiu TMP1, TMP1, 1 1248 | or TMP0, TMP0, TMP1
1117 | and TMP0, TMP0, TMP1 1249 | bnez TMP0, ->fff_fallback
1118 | beqz TMP0, ->fff_fallback
1119 |. sw BASE, L->base // Add frame since C call can throw. 1250 |. sw BASE, L->base // Add frame since C call can throw.
1120 | ffgccheck 1251 | ffgccheck
1121 |. sw PC, SAVE_PC // Redundant (but a defined value). 1252 |. sw PC, SAVE_PC // Redundant (but a defined value).
1122 | load_got lj_str_fromnum 1253 | load_got lj_strfmt_number
1123 | move CARG1, L 1254 | move CARG1, L
1124 | call_intern lj_str_fromnum // (lua_State *L, lua_Number *np) 1255 | call_intern lj_strfmt_number // (lua_State *L, cTValue *o)
1125 |. move CARG2, BASE 1256 |. move CARG2, BASE
1126 | // Returns GCstr *. 1257 | // Returns GCstr *.
1127 | li CARG3, LJ_TSTR 1258 | li SFARG1HI, LJ_TSTR
1128 | b ->fff_restv 1259 | b ->fff_restv
1129 |. move CARG1, CRET1 1260 |. move SFARG1LO, CRET1
1130 | 1261 |
1131 |//-- Base library: iterators ------------------------------------------- 1262 |//-- Base library: iterators -------------------------------------------
1132 | 1263 |
@@ -1148,31 +1279,38 @@ static void build_subroutines(BuildCtx *ctx)
1148 |. move CARG1, L 1279 |. move CARG1, L
1149 | // Returns 0 at end of traversal. 1280 | // Returns 0 at end of traversal.
1150 | beqz CRET1, ->fff_restv // End of traversal: return nil. 1281 | beqz CRET1, ->fff_restv // End of traversal: return nil.
1151 |. li CARG3, LJ_TNIL 1282 |. li SFARG1HI, LJ_TNIL
1152 | ldc1 f0, 8(BASE) // Copy key and value to results. 1283 | lw TMP0, 8+HI(BASE)
1284 | lw TMP1, 8+LO(BASE)
1153 | addiu RA, BASE, -8 1285 | addiu RA, BASE, -8
1154 | ldc1 f2, 16(BASE) 1286 | lw TMP2, 16+HI(BASE)
1155 | li RD, (2+1)*8 1287 | lw TMP3, 16+LO(BASE)
1156 | sdc1 f0, 0(RA) 1288 | sw TMP0, HI(RA)
1289 | sw TMP1, LO(RA)
1290 | sw TMP2, 8+HI(RA)
1291 | sw TMP3, 8+LO(RA)
1157 | b ->fff_res 1292 | b ->fff_res
1158 |. sdc1 f2, 8(RA) 1293 |. li RD, (2+1)*8
1159 | 1294 |
1160 |.ffunc_1 pairs 1295 |.ffunc_1 pairs
1161 | li AT, LJ_TTAB 1296 | li AT, LJ_TTAB
1162 | bne CARG3, AT, ->fff_fallback 1297 | bne SFARG1HI, AT, ->fff_fallback
1163 |. lw PC, FRAME_PC(BASE) 1298 |. lw PC, FRAME_PC(BASE)
1164#if LJ_52 1299#if LJ_52
1165 | lw TAB:TMP2, TAB:CARG1->metatable 1300 | lw TAB:TMP2, TAB:SFARG1LO->metatable
1166 | ldc1 f0, CFUNC:RB->upvalue[0] 1301 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1302 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1167 | bnez TAB:TMP2, ->fff_fallback 1303 | bnez TAB:TMP2, ->fff_fallback
1168#else 1304#else
1169 | ldc1 f0, CFUNC:RB->upvalue[0] 1305 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1306 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1170#endif 1307#endif
1171 |. addiu RA, BASE, -8 1308 |. addiu RA, BASE, -8
1172 | sw TISNIL, 8+HI(BASE) 1309 | sw TISNIL, 8+HI(BASE)
1173 | li RD, (3+1)*8 1310 | sw TMP0, HI(RA)
1311 | sw TMP1, LO(RA)
1174 | b ->fff_res 1312 | b ->fff_res
1175 |. sdc1 f0, 0(RA) 1313 |. li RD, (3+1)*8
1176 | 1314 |
1177 |.ffunc ipairs_aux 1315 |.ffunc ipairs_aux
1178 | sltiu AT, NARGS8:RC, 16 1316 | sltiu AT, NARGS8:RC, 16
@@ -1180,35 +1318,32 @@ static void build_subroutines(BuildCtx *ctx)
1180 | lw TAB:CARG1, LO(BASE) 1318 | lw TAB:CARG1, LO(BASE)
1181 | lw CARG4, 8+HI(BASE) 1319 | lw CARG4, 8+HI(BASE)
1182 | bnez AT, ->fff_fallback 1320 | bnez AT, ->fff_fallback
1183 |. ldc1 FARG2, 8(BASE) 1321 |. addiu CARG3, CARG3, -LJ_TTAB
1184 | addiu CARG3, CARG3, -LJ_TTAB 1322 | xor CARG4, CARG4, TISNUM
1185 | sltiu AT, CARG4, LJ_TISNUM 1323 | and AT, CARG3, CARG4
1186 | li TMP0, 1 1324 | bnez AT, ->fff_fallback
1187 | movn AT, r0, CARG3
1188 | mtc1 TMP0, FARG1
1189 | beqz AT, ->fff_fallback
1190 |. lw PC, FRAME_PC(BASE) 1325 |. lw PC, FRAME_PC(BASE)
1191 | cvt.w.d FRET1, FARG2 1326 | lw TMP2, 8+LO(BASE)
1192 | cvt.d.w FARG1, FARG1
1193 | lw TMP0, TAB:CARG1->asize 1327 | lw TMP0, TAB:CARG1->asize
1194 | lw TMP1, TAB:CARG1->array 1328 | lw TMP1, TAB:CARG1->array
1195 | mfc1 TMP2, FRET1
1196 | addiu RA, BASE, -8
1197 | add.d FARG2, FARG2, FARG1
1198 | addiu TMP2, TMP2, 1 1329 | addiu TMP2, TMP2, 1
1330 | sw TISNUM, -8+HI(BASE)
1199 | sltu AT, TMP2, TMP0 1331 | sltu AT, TMP2, TMP0
1332 | sw TMP2, -8+LO(BASE)
1333 | beqz AT, >2 // Not in array part?
1334 |. addiu RA, BASE, -8
1200 | sll TMP3, TMP2, 3 1335 | sll TMP3, TMP2, 3
1201 | addu TMP3, TMP1, TMP3 1336 | addu TMP3, TMP1, TMP3
1202 | beqz AT, >2 // Not in array part? 1337 | lw TMP1, HI(TMP3)
1203 |. sdc1 FARG2, 0(RA) 1338 | lw TMP2, LO(TMP3)
1204 | lw TMP2, HI(TMP3)
1205 | ldc1 f0, 0(TMP3)
1206 |1: 1339 |1:
1207 | beq TMP2, TISNIL, ->fff_res // End of iteration, return 0 results. 1340 | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results.
1208 |. li RD, (0+1)*8 1341 |. li RD, (0+1)*8
1209 | li RD, (2+1)*8 1342 | sw TMP1, 8+HI(RA)
1343 | sw TMP2, 8+LO(RA)
1210 | b ->fff_res 1344 | b ->fff_res
1211 |. sdc1 f0, 8(RA) 1345 |. li RD, (2+1)*8
1346 |
1212 |2: // Check for empty hash part first. Otherwise call C function. 1347 |2: // Check for empty hash part first. Otherwise call C function.
1213 | lw TMP0, TAB:CARG1->hmask 1348 | lw TMP0, TAB:CARG1->hmask
1214 | load_got lj_tab_getinth 1349 | load_got lj_tab_getinth
@@ -1219,27 +1354,30 @@ static void build_subroutines(BuildCtx *ctx)
1219 | // Returns cTValue * or NULL. 1354 | // Returns cTValue * or NULL.
1220 | beqz CRET1, ->fff_res 1355 | beqz CRET1, ->fff_res
1221 |. li RD, (0+1)*8 1356 |. li RD, (0+1)*8
1222 | lw TMP2, HI(CRET1) 1357 | lw TMP1, HI(CRET1)
1223 | b <1 1358 | b <1
1224 |. ldc1 f0, 0(CRET1) 1359 |. lw TMP2, LO(CRET1)
1225 | 1360 |
1226 |.ffunc_1 ipairs 1361 |.ffunc_1 ipairs
1227 | li AT, LJ_TTAB 1362 | li AT, LJ_TTAB
1228 | bne CARG3, AT, ->fff_fallback 1363 | bne SFARG1HI, AT, ->fff_fallback
1229 |. lw PC, FRAME_PC(BASE) 1364 |. lw PC, FRAME_PC(BASE)
1230#if LJ_52 1365#if LJ_52
1231 | lw TAB:TMP2, TAB:CARG1->metatable 1366 | lw TAB:TMP2, TAB:SFARG1LO->metatable
1232 | ldc1 f0, CFUNC:RB->upvalue[0] 1367 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1368 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1233 | bnez TAB:TMP2, ->fff_fallback 1369 | bnez TAB:TMP2, ->fff_fallback
1234#else 1370#else
1235 | ldc1 f0, CFUNC:RB->upvalue[0] 1371 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1372 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1236#endif 1373#endif
1237 |. addiu RA, BASE, -8 1374 |. addiu RA, BASE, -8
1238 | sw r0, 8+HI(BASE) 1375 | sw TISNUM, 8+HI(BASE)
1239 | sw r0, 8+LO(BASE) 1376 | sw r0, 8+LO(BASE)
1240 | li RD, (3+1)*8 1377 | sw TMP0, HI(RA)
1378 | sw TMP1, LO(RA)
1241 | b ->fff_res 1379 | b ->fff_res
1242 |. sdc1 f0, 0(RA) 1380 |. li RD, (3+1)*8
1243 | 1381 |
1244 |//-- Base library: catch errors ---------------------------------------- 1382 |//-- Base library: catch errors ----------------------------------------
1245 | 1383 |
@@ -1259,8 +1397,9 @@ static void build_subroutines(BuildCtx *ctx)
1259 | sltiu AT, NARGS8:RC, 16 1397 | sltiu AT, NARGS8:RC, 16
1260 | lw CARG4, 8+HI(BASE) 1398 | lw CARG4, 8+HI(BASE)
1261 | bnez AT, ->fff_fallback 1399 | bnez AT, ->fff_fallback
1262 |. ldc1 FARG2, 8(BASE) 1400 |. lw CARG3, 8+LO(BASE)
1263 | ldc1 FARG1, 0(BASE) 1401 | lw CARG1, LO(BASE)
1402 | lw CARG2, HI(BASE)
1264 | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) 1403 | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
1265 | li AT, LJ_TFUNC 1404 | li AT, LJ_TFUNC
1266 | move TMP2, BASE 1405 | move TMP2, BASE
@@ -1268,9 +1407,11 @@ static void build_subroutines(BuildCtx *ctx)
1268 | addiu BASE, BASE, 16 1407 | addiu BASE, BASE, 16
1269 | // Remember active hook before pcall. 1408 | // Remember active hook before pcall.
1270 | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT 1409 | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
1271 | sdc1 FARG2, 0(TMP2) // Swap function and traceback. 1410 | sw CARG3, LO(TMP2) // Swap function and traceback.
1411 | sw CARG4, HI(TMP2)
1272 | andi TMP3, TMP3, 1 1412 | andi TMP3, TMP3, 1
1273 | sdc1 FARG1, 8(TMP2) 1413 | sw CARG1, 8+LO(TMP2)
1414 | sw CARG2, 8+HI(TMP2)
1274 | addiu PC, TMP3, 16+FRAME_PCALL 1415 | addiu PC, TMP3, 16+FRAME_PCALL
1275 | b ->vm_call_dispatch 1416 | b ->vm_call_dispatch
1276 |. addiu NARGS8:RC, NARGS8:RC, -16 1417 |. addiu NARGS8:RC, NARGS8:RC, -16
@@ -1279,7 +1420,10 @@ static void build_subroutines(BuildCtx *ctx)
1279 | 1420 |
1280 |.macro coroutine_resume_wrap, resume 1421 |.macro coroutine_resume_wrap, resume
1281 |.if resume 1422 |.if resume
1282 |.ffunc_1 coroutine_resume 1423 |.ffunc coroutine_resume
1424 | lw CARG3, HI(BASE)
1425 | beqz NARGS8:RC, ->fff_fallback
1426 |. lw CARG1, LO(BASE)
1283 | li AT, LJ_TTHREAD 1427 | li AT, LJ_TTHREAD
1284 | bne CARG3, AT, ->fff_fallback 1428 | bne CARG3, AT, ->fff_fallback
1285 |.else 1429 |.else
@@ -1314,11 +1458,13 @@ static void build_subroutines(BuildCtx *ctx)
1314 | move CARG3, CARG2 1458 | move CARG3, CARG2
1315 | sw BASE, L->top 1459 | sw BASE, L->top
1316 |2: // Move args to coroutine. 1460 |2: // Move args to coroutine.
1317 | ldc1 f0, 0(BASE) 1461 | lw SFRETHI, HI(BASE)
1462 | lw SFRETLO, LO(BASE)
1318 | sltu AT, BASE, TMP1 1463 | sltu AT, BASE, TMP1
1319 | beqz AT, >3 1464 | beqz AT, >3
1320 |. addiu BASE, BASE, 8 1465 |. addiu BASE, BASE, 8
1321 | sdc1 f0, 0(CARG3) 1466 | sw SFRETHI, HI(CARG3)
1467 | sw SFRETLO, LO(CARG3)
1322 | b <2 1468 | b <2
1323 |. addiu CARG3, CARG3, 8 1469 |. addiu CARG3, CARG3, 8
1324 |3: 1470 |3:
@@ -1331,6 +1477,7 @@ static void build_subroutines(BuildCtx *ctx)
1331 | lw TMP3, L:RA->top 1477 | lw TMP3, L:RA->top
1332 | li_vmstate INTERP 1478 | li_vmstate INTERP
1333 | lw BASE, L->base 1479 | lw BASE, L->base
1480 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
1334 | st_vmstate 1481 | st_vmstate
1335 | beqz AT, >8 1482 | beqz AT, >8
1336 |. subu RD, TMP3, TMP2 1483 |. subu RD, TMP3, TMP2
@@ -1343,10 +1490,12 @@ static void build_subroutines(BuildCtx *ctx)
1343 | sw TMP2, L:RA->top // Clear coroutine stack. 1490 | sw TMP2, L:RA->top // Clear coroutine stack.
1344 | move TMP1, BASE 1491 | move TMP1, BASE
1345 |5: // Move results from coroutine. 1492 |5: // Move results from coroutine.
1346 | ldc1 f0, 0(TMP2) 1493 | lw SFRETHI, HI(TMP2)
1494 | lw SFRETLO, LO(TMP2)
1347 | addiu TMP2, TMP2, 8 1495 | addiu TMP2, TMP2, 8
1348 | sltu AT, TMP2, TMP3 1496 | sltu AT, TMP2, TMP3
1349 | sdc1 f0, 0(TMP1) 1497 | sw SFRETHI, HI(TMP1)
1498 | sw SFRETLO, LO(TMP1)
1350 | bnez AT, <5 1499 | bnez AT, <5
1351 |. addiu TMP1, TMP1, 8 1500 |. addiu TMP1, TMP1, 8
1352 |6: 1501 |6:
@@ -1371,12 +1520,14 @@ static void build_subroutines(BuildCtx *ctx)
1371 |.if resume 1520 |.if resume
1372 | addiu TMP3, TMP3, -8 1521 | addiu TMP3, TMP3, -8
1373 | li TMP1, LJ_TFALSE 1522 | li TMP1, LJ_TFALSE
1374 | ldc1 f0, 0(TMP3) 1523 | lw SFRETHI, HI(TMP3)
1524 | lw SFRETLO, LO(TMP3)
1375 | sw TMP3, L:RA->top // Remove error from coroutine stack. 1525 | sw TMP3, L:RA->top // Remove error from coroutine stack.
1376 | li RD, (2+1)*8 1526 | li RD, (2+1)*8
1377 | sw TMP1, -8+HI(BASE) // Prepend false to results. 1527 | sw TMP1, -8+HI(BASE) // Prepend false to results.
1378 | addiu RA, BASE, -8 1528 | addiu RA, BASE, -8
1379 | sdc1 f0, 0(BASE) // Copy error message. 1529 | sw SFRETHI, HI(BASE) // Copy error message.
1530 | sw SFRETLO, LO(BASE)
1380 | b <7 1531 | b <7
1381 |. andi TMP0, PC, FRAME_TYPE 1532 |. andi TMP0, PC, FRAME_TYPE
1382 |.else 1533 |.else
@@ -1412,20 +1563,29 @@ static void build_subroutines(BuildCtx *ctx)
1412 | 1563 |
1413 |//-- Math library ------------------------------------------------------- 1564 |//-- Math library -------------------------------------------------------
1414 | 1565 |
1415 |.ffunc_n math_abs 1566 |.ffunc_1 math_abs
1416 |. abs.d FRET1, FARG1 1567 | bne SFARG1HI, TISNUM, >1
1417 |->fff_resn: 1568 |. sra TMP0, SFARG1LO, 31
1418 | lw PC, FRAME_PC(BASE) 1569 | xor TMP1, SFARG1LO, TMP0
1419 | addiu RA, BASE, -8 1570 | subu SFARG1LO, TMP1, TMP0
1420 | b ->fff_res1 1571 | bgez SFARG1LO, ->fff_restv
1421 |. sdc1 FRET1, -8(BASE) 1572 |. nop
1573 | lui SFARG1HI, 0x41e0 // 2^31 as a double.
1574 | b ->fff_restv
1575 |. li SFARG1LO, 0
1576 |1:
1577 | sltiu AT, SFARG1HI, LJ_TISNUM
1578 | beqz AT, ->fff_fallback
1579 |. sll SFARG1HI, SFARG1HI, 1
1580 | srl SFARG1HI, SFARG1HI, 1
1581 |// fallthrough
1422 | 1582 |
1423 |->fff_restv: 1583 |->fff_restv:
1424 | // CARG3/CARG1 = TValue result. 1584 | // SFARG1LO/SFARG1HI = TValue result.
1425 | lw PC, FRAME_PC(BASE) 1585 | lw PC, FRAME_PC(BASE)
1426 | sw CARG3, -8+HI(BASE) 1586 | sw SFARG1HI, -8+HI(BASE)
1427 | addiu RA, BASE, -8 1587 | addiu RA, BASE, -8
1428 | sw CARG1, -8+LO(BASE) 1588 | sw SFARG1LO, -8+LO(BASE)
1429 |->fff_res1: 1589 |->fff_res1:
1430 | // RA = results, PC = return. 1590 | // RA = results, PC = return.
1431 | li RD, (1+1)*8 1591 | li RD, (1+1)*8
@@ -1454,15 +1614,19 @@ static void build_subroutines(BuildCtx *ctx)
1454 |. sw TISNIL, -8+HI(TMP1) 1614 |. sw TISNIL, -8+HI(TMP1)
1455 | 1615 |
1456 |.macro math_extern, func 1616 |.macro math_extern, func
1457 |->ff_math_ .. func: 1617 | .ffunc math_ .. func
1458 | lw CARG3, HI(BASE) 1618 | lw SFARG1HI, HI(BASE)
1459 | beqz NARGS8:RC, ->fff_fallback 1619 | beqz NARGS8:RC, ->fff_fallback
1460 |. load_got func 1620 |. load_got func
1461 | sltiu AT, CARG3, LJ_TISNUM 1621 | sltiu AT, SFARG1HI, LJ_TISNUM
1462 | beqz AT, ->fff_fallback 1622 | beqz AT, ->fff_fallback
1463 |. nop 1623 |.if FPU
1464 | call_extern
1465 |. ldc1 FARG1, 0(BASE) 1624 |. ldc1 FARG1, 0(BASE)
1625 |.else
1626 |. lw SFARG1LO, LO(BASE)
1627 |.endif
1628 | call_extern
1629 |. nop
1466 | b ->fff_resn 1630 | b ->fff_resn
1467 |. nop 1631 |. nop
1468 |.endmacro 1632 |.endmacro
@@ -1476,10 +1640,22 @@ static void build_subroutines(BuildCtx *ctx)
1476 |. nop 1640 |. nop
1477 |.endmacro 1641 |.endmacro
1478 | 1642 |
1643 |// TODO: Return integer type if result is integer (own sf implementation).
1479 |.macro math_round, func 1644 |.macro math_round, func
1480 | .ffunc_n math_ .. func 1645 |->ff_math_ .. func:
1481 |. nop 1646 | lw SFARG1HI, HI(BASE)
1647 | beqz NARGS8:RC, ->fff_fallback
1648 |. lw SFARG1LO, LO(BASE)
1649 | beq SFARG1HI, TISNUM, ->fff_restv
1650 |. sltu AT, SFARG1HI, TISNUM
1651 | beqz AT, ->fff_fallback
1652 |.if FPU
1653 |. ldc1 FARG1, 0(BASE)
1482 | bal ->vm_ .. func 1654 | bal ->vm_ .. func
1655 |.else
1656 |. load_got func
1657 | call_extern
1658 |.endif
1483 |. nop 1659 |. nop
1484 | b ->fff_resn 1660 | b ->fff_resn
1485 |. nop 1661 |. nop
@@ -1489,15 +1665,19 @@ static void build_subroutines(BuildCtx *ctx)
1489 | math_round ceil 1665 | math_round ceil
1490 | 1666 |
1491 |.ffunc math_log 1667 |.ffunc math_log
1492 | lw CARG3, HI(BASE)
1493 | li AT, 8 1668 | li AT, 8
1494 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. 1669 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
1495 |. load_got log 1670 |. lw SFARG1HI, HI(BASE)
1496 | sltiu AT, CARG3, LJ_TISNUM 1671 | sltiu AT, SFARG1HI, LJ_TISNUM
1497 | beqz AT, ->fff_fallback 1672 | beqz AT, ->fff_fallback
1498 |. nop 1673 |. load_got log
1674 |.if FPU
1499 | call_extern 1675 | call_extern
1500 |. ldc1 FARG1, 0(BASE) 1676 |. ldc1 FARG1, 0(BASE)
1677 |.else
1678 | call_extern
1679 |. lw SFARG1LO, LO(BASE)
1680 |.endif
1501 | b ->fff_resn 1681 | b ->fff_resn
1502 |. nop 1682 |. nop
1503 | 1683 |
@@ -1516,23 +1696,43 @@ static void build_subroutines(BuildCtx *ctx)
1516 | math_extern2 atan2 1696 | math_extern2 atan2
1517 | math_extern2 fmod 1697 | math_extern2 fmod
1518 | 1698 |
1699 |.if FPU
1519 |.ffunc_n math_sqrt 1700 |.ffunc_n math_sqrt
1520 |. sqrt.d FRET1, FARG1 1701 |. sqrt.d FRET1, FARG1
1521 | b ->fff_resn 1702 |// fallthrough to ->fff_resn
1522 |. nop 1703 |.else
1704 | math_extern sqrt
1705 |.endif
1706 |
1707 |->fff_resn:
1708 | lw PC, FRAME_PC(BASE)
1709 | addiu RA, BASE, -8
1710 |.if FPU
1711 | b ->fff_res1
1712 |. sdc1 FRET1, -8(BASE)
1713 |.else
1714 | sw SFRETHI, -8+HI(BASE)
1715 | b ->fff_res1
1716 |. sw SFRETLO, -8+LO(BASE)
1717 |.endif
1523 | 1718 |
1524 |->ff_math_deg:
1525 |.ffunc_n math_rad
1526 |. ldc1 FARG2, CFUNC:RB->upvalue[0]
1527 | b ->fff_resn
1528 |. mul.d FRET1, FARG1, FARG2
1529 | 1719 |
1530 |.ffunc_nn math_ldexp 1720 |.ffunc math_ldexp
1531 | cvt.w.d FARG2, FARG2 1721 | sltiu AT, NARGS8:RC, 16
1722 | lw SFARG1HI, HI(BASE)
1723 | bnez AT, ->fff_fallback
1724 |. lw CARG4, 8+HI(BASE)
1725 | bne CARG4, TISNUM, ->fff_fallback
1532 | load_got ldexp 1726 | load_got ldexp
1533 | mfc1 CARG3, FARG2 1727 |. sltu AT, SFARG1HI, TISNUM
1728 | beqz AT, ->fff_fallback
1729 |.if FPU
1730 |. ldc1 FARG1, 0(BASE)
1731 |.else
1732 |. lw SFARG1LO, LO(BASE)
1733 |.endif
1534 | call_extern 1734 | call_extern
1535 |. nop 1735 |. lw CARG3, 8+LO(BASE)
1536 | b ->fff_resn 1736 | b ->fff_resn
1537 |. nop 1737 |. nop
1538 | 1738 |
@@ -1543,10 +1743,17 @@ static void build_subroutines(BuildCtx *ctx)
1543 |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 1743 |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
1544 | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) 1744 | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH)
1545 | addiu RA, BASE, -8 1745 | addiu RA, BASE, -8
1746 |.if FPU
1546 | mtc1 TMP1, FARG2 1747 | mtc1 TMP1, FARG2
1547 | sdc1 FRET1, 0(RA) 1748 | sdc1 FRET1, 0(RA)
1548 | cvt.d.w FARG2, FARG2 1749 | cvt.d.w FARG2, FARG2
1549 | sdc1 FARG2, 8(RA) 1750 | sdc1 FARG2, 8(RA)
1751 |.else
1752 | sw SFRETLO, LO(RA)
1753 | sw SFRETHI, HI(RA)
1754 | sw TMP1, 8+LO(RA)
1755 | sw TISNUM, 8+HI(RA)
1756 |.endif
1550 | b ->fff_res 1757 | b ->fff_res
1551 |. li RD, (2+1)*8 1758 |. li RD, (2+1)*8
1552 | 1759 |
@@ -1556,49 +1763,109 @@ static void build_subroutines(BuildCtx *ctx)
1556 | call_extern 1763 | call_extern
1557 |. addiu CARG3, BASE, -8 1764 |. addiu CARG3, BASE, -8
1558 | addiu RA, BASE, -8 1765 | addiu RA, BASE, -8
1766 |.if FPU
1559 | sdc1 FRET1, 0(BASE) 1767 | sdc1 FRET1, 0(BASE)
1768 |.else
1769 | sw SFRETLO, LO(BASE)
1770 | sw SFRETHI, HI(BASE)
1771 |.endif
1560 | b ->fff_res 1772 | b ->fff_res
1561 |. li RD, (2+1)*8 1773 |. li RD, (2+1)*8
1562 | 1774 |
1563 |.macro math_minmax, name, ismax 1775 |.macro math_minmax, name, intins, ismax
1564 |->ff_ .. name: 1776 | .ffunc_1 name
1565 | lw CARG3, HI(BASE) 1777 | addu TMP3, BASE, NARGS8:RC
1566 | beqz NARGS8:RC, ->fff_fallback 1778 | bne SFARG1HI, TISNUM, >5
1567 |. ldc1 FRET1, 0(BASE) 1779 |. addiu TMP2, BASE, 8
1568 | sltiu AT, CARG3, LJ_TISNUM 1780 |1: // Handle integers.
1781 |. lw SFARG2HI, HI(TMP2)
1782 | beq TMP2, TMP3, ->fff_restv
1783 |. lw SFARG2LO, LO(TMP2)
1784 | bne SFARG2HI, TISNUM, >3
1785 |. slt AT, SFARG1LO, SFARG2LO
1786 | intins SFARG1LO, SFARG2LO, AT
1787 | b <1
1788 |. addiu TMP2, TMP2, 8
1789 |
1790 |3: // Convert intermediate result to number and continue with number loop.
1791 | sltiu AT, SFARG2HI, LJ_TISNUM
1569 | beqz AT, ->fff_fallback 1792 | beqz AT, ->fff_fallback
1570 |. addu TMP2, BASE, NARGS8:RC 1793 |.if FPU
1571 | addiu TMP1, BASE, 8 1794 |. mtc1 SFARG1LO, FRET1
1572 | beq TMP1, TMP2, ->fff_resn 1795 | cvt.d.w FRET1, FRET1
1573 |1: 1796 | b >7
1574 |. lw CARG3, HI(TMP1) 1797 |. ldc1 FARG1, 0(TMP2)
1575 | ldc1 FARG1, 0(TMP1) 1798 |.else
1576 | addiu TMP1, TMP1, 8 1799 |. nop
1577 | sltiu AT, CARG3, LJ_TISNUM 1800 | bal ->vm_sfi2d_1
1801 |. nop
1802 | b >7
1803 |. nop
1804 |.endif
1805 |
1806 |5:
1807 |. sltiu AT, SFARG1HI, LJ_TISNUM
1578 | beqz AT, ->fff_fallback 1808 | beqz AT, ->fff_fallback
1809 |.if FPU
1810 |. ldc1 FRET1, 0(BASE)
1811 |.endif
1812 |
1813 |6: // Handle numbers.
1814 |. lw SFARG2HI, HI(TMP2)
1815 |.if FPU
1816 | beq TMP2, TMP3, ->fff_resn
1817 |.else
1818 | beq TMP2, TMP3, ->fff_restv
1819 |.endif
1820 |. sltiu AT, SFARG2HI, LJ_TISNUM
1821 | beqz AT, >8
1822 |.if FPU
1823 |. ldc1 FARG1, 0(TMP2)
1824 |.else
1825 |. lw SFARG2LO, LO(TMP2)
1826 |.endif
1827 |7:
1828 |.if FPU
1579 |.if ismax 1829 |.if ismax
1580 |. c.olt.d FARG1, FRET1 1830 | c.olt.d FARG1, FRET1
1581 |.else 1831 |.else
1582 |. c.olt.d FRET1, FARG1 1832 | c.olt.d FRET1, FARG1
1833 |.endif
1834 | movf.d FRET1, FARG1
1835 |.else
1836 |.if ismax
1837 | bal ->vm_sfcmpogt
1838 |.else
1839 | bal ->vm_sfcmpolt
1583 |.endif 1840 |.endif
1584 | bne TMP1, TMP2, <1
1585 |. movf.d FRET1, FARG1
1586 | b ->fff_resn
1587 |. nop 1841 |. nop
1842 | movz SFARG1LO, SFARG2LO, CRET1
1843 | movz SFARG1HI, SFARG2HI, CRET1
1844 |.endif
1845 | b <6
1846 |. addiu TMP2, TMP2, 8
1847 |
1848 |8: // Convert integer to number and continue with number loop.
1849 | bne SFARG2HI, TISNUM, ->fff_fallback
1850 |.if FPU
1851 |. lwc1 FARG1, LO(TMP2)
1852 | b <7
1853 |. cvt.d.w FARG1, FARG1
1854 |.else
1855 |. nop
1856 | bal ->vm_sfi2d_2
1857 |. nop
1858 | b <7
1859 |. nop
1860 |.endif
1861 |
1588 |.endmacro 1862 |.endmacro
1589 | 1863 |
1590 | math_minmax math_min, 0 1864 | math_minmax math_min, movz, 0
1591 | math_minmax math_max, 1 1865 | math_minmax math_max, movn, 1
1592 | 1866 |
1593 |//-- String library ----------------------------------------------------- 1867 |//-- String library -----------------------------------------------------
1594 | 1868 |
1595 |.ffunc_1 string_len
1596 | li AT, LJ_TSTR
1597 | bne CARG3, AT, ->fff_fallback
1598 |. nop
1599 | b ->fff_resi
1600 |. lw CRET1, STR:CARG1->len
1601 |
1602 |.ffunc string_byte // Only handle the 1-arg case here. 1869 |.ffunc string_byte // Only handle the 1-arg case here.
1603 | lw CARG3, HI(BASE) 1870 | lw CARG3, HI(BASE)
1604 | lw STR:CARG1, LO(BASE) 1871 | lw STR:CARG1, LO(BASE)
@@ -1608,33 +1875,31 @@ static void build_subroutines(BuildCtx *ctx)
1608 | bnez AT, ->fff_fallback // Need exactly 1 string argument. 1875 | bnez AT, ->fff_fallback // Need exactly 1 string argument.
1609 |. nop 1876 |. nop
1610 | lw TMP0, STR:CARG1->len 1877 | lw TMP0, STR:CARG1->len
1611 | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1612 | addiu RA, BASE, -8 1878 | addiu RA, BASE, -8
1879 | lw PC, FRAME_PC(BASE)
1613 | sltu RD, r0, TMP0 1880 | sltu RD, r0, TMP0
1614 | mtc1 TMP1, f0 1881 | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1615 | addiu RD, RD, 1 1882 | addiu RD, RD, 1
1616 | cvt.d.w f0, f0
1617 | lw PC, FRAME_PC(BASE)
1618 | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 1883 | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8
1884 | sw TISNUM, HI(RA)
1619 | b ->fff_res 1885 | b ->fff_res
1620 |. sdc1 f0, 0(RA) 1886 |. sw TMP1, LO(RA)
1621 | 1887 |
1622 |.ffunc string_char // Only handle the 1-arg case here. 1888 |.ffunc string_char // Only handle the 1-arg case here.
1623 | ffgccheck 1889 | ffgccheck
1624 |. nop 1890 |. nop
1625 | lw CARG3, HI(BASE) 1891 | lw CARG3, HI(BASE)
1626 | ldc1 FARG1, 0(BASE) 1892 | lw CARG1, LO(BASE)
1627 | li AT, 8 1893 | li TMP1, 255
1628 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. 1894 | xori AT, NARGS8:RC, 8 // Exactly 1 argument.
1629 |. sltiu AT, CARG3, LJ_TISNUM 1895 | xor TMP0, CARG3, TISNUM // Integer.
1630 | beqz AT, ->fff_fallback 1896 | sltu TMP1, TMP1, CARG1 // !(255 < n).
1897 | or AT, AT, TMP0
1898 | or AT, AT, TMP1
1899 | bnez AT, ->fff_fallback
1631 |. li CARG3, 1 1900 |. li CARG3, 1
1632 | cvt.w.d FARG1, FARG1
1633 | addiu CARG2, sp, ARG5_OFS 1901 | addiu CARG2, sp, ARG5_OFS
1634 | sltiu AT, TMP0, 256 1902 | sb CARG1, ARG5
1635 | mfc1 TMP0, FARG1
1636 | beqz AT, ->fff_fallback
1637 |. sw TMP0, ARG5
1638 |->fff_newstr: 1903 |->fff_newstr:
1639 | load_got lj_str_new 1904 | load_got lj_str_new
1640 | sw BASE, L->base 1905 | sw BASE, L->base
@@ -1643,35 +1908,30 @@ static void build_subroutines(BuildCtx *ctx)
1643 |. move CARG1, L 1908 |. move CARG1, L
1644 | // Returns GCstr *. 1909 | // Returns GCstr *.
1645 | lw BASE, L->base 1910 | lw BASE, L->base
1646 | move CARG1, CRET1 1911 |->fff_resstr:
1912 | move SFARG1LO, CRET1
1647 | b ->fff_restv 1913 | b ->fff_restv
1648 |. li CARG3, LJ_TSTR 1914 |. li SFARG1HI, LJ_TSTR
1649 | 1915 |
1650 |.ffunc string_sub 1916 |.ffunc string_sub
1651 | ffgccheck 1917 | ffgccheck
1652 |. nop 1918 |. nop
1653 | addiu AT, NARGS8:RC, -16 1919 | addiu AT, NARGS8:RC, -16
1654 | lw CARG3, 16+HI(BASE) 1920 | lw CARG3, 16+HI(BASE)
1655 | ldc1 f0, 16(BASE)
1656 | lw TMP0, HI(BASE) 1921 | lw TMP0, HI(BASE)
1657 | lw STR:CARG1, LO(BASE) 1922 | lw STR:CARG1, LO(BASE)
1658 | bltz AT, ->fff_fallback 1923 | bltz AT, ->fff_fallback
1659 | lw CARG2, 8+HI(BASE) 1924 |. lw CARG2, 8+HI(BASE)
1660 | ldc1 f2, 8(BASE)
1661 | beqz AT, >1 1925 | beqz AT, >1
1662 |. li CARG4, -1 1926 |. li CARG4, -1
1663 | cvt.w.d f0, f0 1927 | bne CARG3, TISNUM, ->fff_fallback
1664 | sltiu AT, CARG3, LJ_TISNUM 1928 |. lw CARG4, 16+LO(BASE)
1665 | beqz AT, ->fff_fallback
1666 |. mfc1 CARG4, f0
1667 |1: 1929 |1:
1668 | sltiu AT, CARG2, LJ_TISNUM 1930 | bne CARG2, TISNUM, ->fff_fallback
1669 | beqz AT, ->fff_fallback
1670 |. li AT, LJ_TSTR 1931 |. li AT, LJ_TSTR
1671 | cvt.w.d f2, f2
1672 | bne TMP0, AT, ->fff_fallback 1932 | bne TMP0, AT, ->fff_fallback
1673 |. lw CARG2, STR:CARG1->len 1933 |. lw CARG3, 8+LO(BASE)
1674 | mfc1 CARG3, f2 1934 | lw CARG2, STR:CARG1->len
1675 | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end 1935 | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
1676 | slt AT, CARG4, r0 1936 | slt AT, CARG4, r0
1677 | addiu TMP0, CARG2, 1 1937 | addiu TMP0, CARG2, 1
@@ -1693,139 +1953,130 @@ static void build_subroutines(BuildCtx *ctx)
1693 | bgez CARG3, ->fff_newstr 1953 | bgez CARG3, ->fff_newstr
1694 |. addiu CARG3, CARG3, 1 // len++ 1954 |. addiu CARG3, CARG3, 1 // len++
1695 |->fff_emptystr: // Return empty string. 1955 |->fff_emptystr: // Return empty string.
1696 | addiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty) 1956 | addiu STR:SFARG1LO, DISPATCH, DISPATCH_GL(strempty)
1697 | b ->fff_restv 1957 | b ->fff_restv
1698 |. li CARG3, LJ_TSTR 1958 |. li SFARG1HI, LJ_TSTR
1699 |
1700 |.ffunc string_rep // Only handle the 1-char case inline.
1701 | ffgccheck
1702 |. nop
1703 | lw TMP0, HI(BASE)
1704 | addiu AT, NARGS8:RC, -16 // Exactly 2 arguments.
1705 | lw CARG4, 8+HI(BASE)
1706 | lw STR:CARG1, LO(BASE)
1707 | addiu TMP0, TMP0, -LJ_TSTR
1708 | ldc1 f0, 8(BASE)
1709 | or AT, AT, TMP0
1710 | bnez AT, ->fff_fallback
1711 |. sltiu AT, CARG4, LJ_TISNUM
1712 | cvt.w.d f0, f0
1713 | beqz AT, ->fff_fallback
1714 |. lw TMP0, STR:CARG1->len
1715 | mfc1 CARG3, f0
1716 | lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1717 | li AT, 1
1718 | blez CARG3, ->fff_emptystr // Count <= 0?
1719 |. sltu AT, AT, TMP0
1720 | beqz TMP0, ->fff_emptystr // Zero length string?
1721 |. sltu TMP0, TMP1, CARG3
1722 | or AT, AT, TMP0
1723 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1724 | bnez AT, ->fff_fallback // Fallback for > 1-char strings.
1725 |. lbu TMP0, STR:CARG1[1]
1726 | addu TMP2, CARG2, CARG3
1727 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
1728 | addiu TMP2, TMP2, -1
1729 | sltu AT, CARG2, TMP2
1730 | bnez AT, <1
1731 |. sb TMP0, 0(TMP2)
1732 | b ->fff_newstr
1733 |. nop
1734 | 1959 |
1735 |.ffunc string_reverse 1960 |.macro ffstring_op, name
1961 | .ffunc string_ .. name
1736 | ffgccheck 1962 | ffgccheck
1737 |. nop 1963 |. nop
1738 | lw CARG3, HI(BASE) 1964 | lw CARG3, HI(BASE)
1739 | lw STR:CARG1, LO(BASE) 1965 | lw STR:CARG2, LO(BASE)
1740 | beqz NARGS8:RC, ->fff_fallback 1966 | beqz NARGS8:RC, ->fff_fallback
1741 |. li AT, LJ_TSTR 1967 |. li AT, LJ_TSTR
1742 | bne CARG3, AT, ->fff_fallback 1968 | bne CARG3, AT, ->fff_fallback
1743 |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 1969 |. addiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf)
1744 | lw CARG3, STR:CARG1->len 1970 | load_got lj_buf_putstr_ .. name
1745 | addiu CARG1, STR:CARG1, #STR 1971 | lw TMP0, SBUF:CARG1->b
1746 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 1972 | sw L, SBUF:CARG1->L
1747 | sltu AT, TMP1, CARG3 1973 | sw BASE, L->base
1748 | bnez AT, ->fff_fallback 1974 | sw TMP0, SBUF:CARG1->w
1749 |. addu TMP3, CARG1, CARG3 1975 | call_intern extern lj_buf_putstr_ .. name
1750 | addu CARG4, CARG2, CARG3 1976 |. sw PC, SAVE_PC
1751 |1: // Reverse string copy. 1977 | load_got lj_buf_tostr
1752 | lbu TMP1, 0(CARG1) 1978 | call_intern lj_buf_tostr
1753 | sltu AT, CARG1, TMP3 1979 |. move SBUF:CARG1, SBUF:CRET1
1754 | beqz AT, ->fff_newstr 1980 | b ->fff_resstr
1755 |. addiu CARG1, CARG1, 1 1981 |. lw BASE, L->base
1756 | addiu CARG4, CARG4, -1
1757 | b <1
1758 | sb TMP1, 0(CARG4)
1759 |
1760 |.macro ffstring_case, name, lo
1761 | .ffunc name
1762 | ffgccheck
1763 |. nop
1764 | lw CARG3, HI(BASE)
1765 | lw STR:CARG1, LO(BASE)
1766 | beqz NARGS8:RC, ->fff_fallback
1767 |. li AT, LJ_TSTR
1768 | bne CARG3, AT, ->fff_fallback
1769 |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1770 | lw CARG3, STR:CARG1->len
1771 | addiu CARG1, STR:CARG1, #STR
1772 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1773 | sltu AT, TMP1, CARG3
1774 | bnez AT, ->fff_fallback
1775 |. addu TMP3, CARG1, CARG3
1776 | move CARG4, CARG2
1777 |1: // ASCII case conversion.
1778 | lbu TMP1, 0(CARG1)
1779 | sltu AT, CARG1, TMP3
1780 | beqz AT, ->fff_newstr
1781 |. addiu TMP0, TMP1, -lo
1782 | xori TMP2, TMP1, 0x20
1783 | sltiu AT, TMP0, 26
1784 | movn TMP1, TMP2, AT
1785 | addiu CARG1, CARG1, 1
1786 | sb TMP1, 0(CARG4)
1787 | b <1
1788 |. addiu CARG4, CARG4, 1
1789 |.endmacro 1982 |.endmacro
1790 | 1983 |
1791 |ffstring_case string_lower, 65 1984 |ffstring_op reverse
1792 |ffstring_case string_upper, 97 1985 |ffstring_op lower
1986 |ffstring_op upper
1793 | 1987 |
1794 |//-- Table library ------------------------------------------------------ 1988 |//-- Bit library --------------------------------------------------------
1795 | 1989 |
1796 |.ffunc_1 table_getn 1990 |->vm_tobit_fb:
1797 | li AT, LJ_TTAB 1991 | beqz TMP1, ->fff_fallback
1798 | bne CARG3, AT, ->fff_fallback 1992 |.if FPU
1799 |. load_got lj_tab_len 1993 |. ldc1 FARG1, 0(BASE)
1800 | call_intern lj_tab_len // (GCtab *t) 1994 | add.d FARG1, FARG1, TOBIT
1801 |. nop 1995 | jr ra
1802 | // Returns uint32_t (but less than 2^31). 1996 |. mfc1 CRET1, FARG1
1803 | b ->fff_resi 1997 |.else
1998 |// FP number to bit conversion for soft-float.
1999 |->vm_tobit:
2000 | sll TMP0, SFARG1HI, 1
2001 | lui AT, 0x0020
2002 | addu TMP0, TMP0, AT
2003 | slt AT, TMP0, r0
2004 | movz SFARG1LO, r0, AT
2005 | beqz AT, >2
2006 |. li TMP1, 0x3e0
2007 | not TMP1, TMP1
2008 | sra TMP0, TMP0, 21
2009 | subu TMP0, TMP1, TMP0
2010 | slt AT, TMP0, r0
2011 | bnez AT, >1
2012 |. sll TMP1, SFARG1HI, 11
2013 | lui AT, 0x8000
2014 | or TMP1, TMP1, AT
2015 | srl AT, SFARG1LO, 21
2016 | or TMP1, TMP1, AT
2017 | slt AT, SFARG1HI, r0
2018 | beqz AT, >2
2019 |. srlv SFARG1LO, TMP1, TMP0
2020 | subu SFARG1LO, r0, SFARG1LO
2021 |2:
2022 | jr ra
2023 |. move CRET1, SFARG1LO
2024 |1:
2025 | addiu TMP0, TMP0, 21
2026 | srlv TMP1, SFARG1LO, TMP0
2027 | li AT, 20
2028 | subu TMP0, AT, TMP0
2029 | sll SFARG1LO, SFARG1HI, 12
2030 | sllv AT, SFARG1LO, TMP0
2031 | or SFARG1LO, TMP1, AT
2032 | slt AT, SFARG1HI, r0
2033 | beqz AT, <2
1804 |. nop 2034 |. nop
1805 | 2035 | jr ra
1806 |//-- Bit library -------------------------------------------------------- 2036 |. subu CRET1, r0, SFARG1LO
2037 |.endif
1807 | 2038 |
1808 |.macro .ffunc_bit, name 2039 |.macro .ffunc_bit, name
1809 | .ffunc_n bit_..name 2040 | .ffunc_1 bit_..name
1810 |. add.d FARG1, FARG1, TOBIT 2041 | beq SFARG1HI, TISNUM, >6
1811 | mfc1 CRET1, FARG1 2042 |. move CRET1, SFARG1LO
2043 | bal ->vm_tobit_fb
2044 |. sltu TMP1, SFARG1HI, TISNUM
2045 |6:
1812 |.endmacro 2046 |.endmacro
1813 | 2047 |
1814 |.macro .ffunc_bit_op, name, ins 2048 |.macro .ffunc_bit_op, name, ins
1815 | .ffunc_bit name 2049 | .ffunc_bit name
1816 | addiu TMP1, BASE, 8 2050 | addiu TMP2, BASE, 8
1817 | addu TMP2, BASE, NARGS8:RC 2051 | addu TMP3, BASE, NARGS8:RC
1818 |1: 2052 |1:
1819 | lw CARG4, HI(TMP1) 2053 | lw SFARG1HI, HI(TMP2)
1820 | beq TMP1, TMP2, ->fff_resi 2054 | beq TMP2, TMP3, ->fff_resi
1821 |. ldc1 FARG1, 0(TMP1) 2055 |. lw SFARG1LO, LO(TMP2)
1822 | sltiu AT, CARG4, LJ_TISNUM 2056 |.if FPU
1823 | beqz AT, ->fff_fallback 2057 | bne SFARG1HI, TISNUM, >2
1824 | add.d FARG1, FARG1, TOBIT 2058 |. addiu TMP2, TMP2, 8
1825 | mfc1 CARG2, FARG1
1826 | ins CRET1, CRET1, CARG2
1827 | b <1 2059 | b <1
1828 |. addiu TMP1, TMP1, 8 2060 |. ins CRET1, CRET1, SFARG1LO
2061 |2:
2062 | ldc1 FARG1, -8(TMP2)
2063 | sltu TMP1, SFARG1HI, TISNUM
2064 | beqz TMP1, ->fff_fallback
2065 |. add.d FARG1, FARG1, TOBIT
2066 | mfc1 SFARG1LO, FARG1
2067 | b <1
2068 |. ins CRET1, CRET1, SFARG1LO
2069 |.else
2070 | beq SFARG1HI, TISNUM, >2
2071 |. move CRET2, CRET1
2072 | bal ->vm_tobit_fb
2073 |. sltu TMP1, SFARG1HI, TISNUM
2074 | move SFARG1LO, CRET2
2075 |2:
2076 | ins CRET1, CRET1, SFARG1LO
2077 | b <1
2078 |. addiu TMP2, TMP2, 8
2079 |.endif
1829 |.endmacro 2080 |.endmacro
1830 | 2081 |
1831 |.ffunc_bit_op band, and 2082 |.ffunc_bit_op band, and
@@ -1849,24 +2100,28 @@ static void build_subroutines(BuildCtx *ctx)
1849 |. not CRET1, CRET1 2100 |. not CRET1, CRET1
1850 | 2101 |
1851 |.macro .ffunc_bit_sh, name, ins, shmod 2102 |.macro .ffunc_bit_sh, name, ins, shmod
1852 | .ffunc_nn bit_..name 2103 | .ffunc_2 bit_..name
1853 |. add.d FARG1, FARG1, TOBIT 2104 | beq SFARG1HI, TISNUM, >1
1854 | add.d FARG2, FARG2, TOBIT 2105 |. nop
1855 | mfc1 CARG1, FARG1 2106 | bal ->vm_tobit_fb
1856 | mfc1 CARG2, FARG2 2107 |. sltu TMP1, SFARG1HI, TISNUM
2108 | move SFARG1LO, CRET1
2109 |1:
2110 | bne SFARG2HI, TISNUM, ->fff_fallback
2111 |. nop
1857 |.if shmod == 1 2112 |.if shmod == 1
1858 | li AT, 32 2113 | li AT, 32
1859 | subu TMP0, AT, CARG2 2114 | subu TMP0, AT, SFARG2LO
1860 | sllv CARG2, CARG1, CARG2 2115 | sllv SFARG2LO, SFARG1LO, SFARG2LO
1861 | srlv CARG1, CARG1, TMP0 2116 | srlv SFARG1LO, SFARG1LO, TMP0
1862 |.elif shmod == 2 2117 |.elif shmod == 2
1863 | li AT, 32 2118 | li AT, 32
1864 | subu TMP0, AT, CARG2 2119 | subu TMP0, AT, SFARG2LO
1865 | srlv CARG2, CARG1, CARG2 2120 | srlv SFARG2LO, SFARG1LO, SFARG2LO
1866 | sllv CARG1, CARG1, TMP0 2121 | sllv SFARG1LO, SFARG1LO, TMP0
1867 |.endif 2122 |.endif
1868 | b ->fff_resi 2123 | b ->fff_resi
1869 |. ins CRET1, CARG1, CARG2 2124 |. ins CRET1, SFARG1LO, SFARG2LO
1870 |.endmacro 2125 |.endmacro
1871 | 2126 |
1872 |.ffunc_bit_sh lshift, sllv, 0 2127 |.ffunc_bit_sh lshift, sllv, 0
@@ -1878,9 +2133,11 @@ static void build_subroutines(BuildCtx *ctx)
1878 | 2133 |
1879 |.ffunc_bit tobit 2134 |.ffunc_bit tobit
1880 |->fff_resi: 2135 |->fff_resi:
1881 | mtc1 CRET1, FRET1 2136 | lw PC, FRAME_PC(BASE)
1882 | b ->fff_resn 2137 | addiu RA, BASE, -8
1883 |. cvt.d.w FRET1, FRET1 2138 | sw TISNUM, -8+HI(BASE)
2139 | b ->fff_res1
2140 |. sw CRET1, -8+LO(BASE)
1884 | 2141 |
1885 |//----------------------------------------------------------------------- 2142 |//-----------------------------------------------------------------------
1886 | 2143 |
@@ -2067,19 +2324,96 @@ static void build_subroutines(BuildCtx *ctx)
2067 | jr CRET1 2324 | jr CRET1
2068 |. lw INS, -4(PC) 2325 |. lw INS, -4(PC)
2069 | 2326 |
2327 |->cont_stitch: // Trace stitching.
2328 |.if JIT
2329 | // RA = resultptr, RB = meta base
2330 | lw INS, -4(PC)
2331 | lw TMP2, -24+LO(RB) // Save previous trace.
2332 | decode_RA8a RC, INS
2333 | addiu AT, MULTRES, -8
2334 | decode_RA8b RC
2335 | beqz AT, >2
2336 |. addu RC, BASE, RC // Call base.
2337 |1: // Move results down.
2338 | lw SFRETHI, HI(RA)
2339 | lw SFRETLO, LO(RA)
2340 | addiu AT, AT, -8
2341 | addiu RA, RA, 8
2342 | sw SFRETHI, HI(RC)
2343 | sw SFRETLO, LO(RC)
2344 | bnez AT, <1
2345 |. addiu RC, RC, 8
2346 |2:
2347 | decode_RA8a RA, INS
2348 | decode_RB8a RB, INS
2349 | decode_RA8b RA
2350 | decode_RB8b RB
2351 | addu RA, RA, RB
2352 | addu RA, BASE, RA
2353 |3:
2354 | sltu AT, RC, RA
2355 | bnez AT, >9 // More results wanted?
2356 |. nop
2357 |
2358 | lhu TMP3, TRACE:TMP2->traceno
2359 | lhu RD, TRACE:TMP2->link
2360 | beq RD, TMP3, ->cont_nop // Blacklisted.
2361 |. load_got lj_dispatch_stitch
2362 | bnez RD, =>BC_JLOOP // Jump to stitched trace.
2363 |. sll RD, RD, 3
2364 |
2365 | // Stitch a new trace to the previous trace.
2366 | sw TMP3, DISPATCH_J(exitno)(DISPATCH)
2367 | sw L, DISPATCH_J(L)(DISPATCH)
2368 | sw BASE, L->base
2369 | addiu CARG1, DISPATCH, GG_DISP2J
2370 | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2371 |. move CARG2, PC
2372 | b ->cont_nop
2373 |. lw BASE, L->base
2374 |
2375 |9:
2376 | sw TISNIL, HI(RC)
2377 | b <3
2378 |. addiu RC, RC, 8
2379 |.endif
2380 |
2381 |->vm_profhook: // Dispatch target for profiler hook.
2382#if LJ_HASPROFILE
2383 | load_got lj_dispatch_profile
2384 | sw MULTRES, SAVE_MULTRES
2385 | move CARG2, PC
2386 | sw BASE, L->base
2387 | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2388 |. move CARG1, L
2389 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2390 | addiu PC, PC, -4
2391 | b ->cont_nop
2392 |. lw BASE, L->base
2393#endif
2394 |
2070 |//----------------------------------------------------------------------- 2395 |//-----------------------------------------------------------------------
2071 |//-- Trace exit handler ------------------------------------------------- 2396 |//-- Trace exit handler -------------------------------------------------
2072 |//----------------------------------------------------------------------- 2397 |//-----------------------------------------------------------------------
2073 | 2398 |
2074 |.macro savex_, a, b 2399 |.macro savex_, a, b
2400 |.if FPU
2075 | sdc1 f..a, 16+a*8(sp) 2401 | sdc1 f..a, 16+a*8(sp)
2076 | sw r..a, 16+32*8+a*4(sp) 2402 | sw r..a, 16+32*8+a*4(sp)
2077 | sw r..b, 16+32*8+b*4(sp) 2403 | sw r..b, 16+32*8+b*4(sp)
2404 |.else
2405 | sw r..a, 16+a*4(sp)
2406 | sw r..b, 16+b*4(sp)
2407 |.endif
2078 |.endmacro 2408 |.endmacro
2079 | 2409 |
2080 |->vm_exit_handler: 2410 |->vm_exit_handler:
2081 |.if JIT 2411 |.if JIT
2412 |.if FPU
2082 | addiu sp, sp, -(16+32*8+32*4) 2413 | addiu sp, sp, -(16+32*8+32*4)
2414 |.else
2415 | addiu sp, sp, -(16+32*4)
2416 |.endif
2083 | savex_ 0, 1 2417 | savex_ 0, 1
2084 | savex_ 2, 3 2418 | savex_ 2, 3
2085 | savex_ 4, 5 2419 | savex_ 4, 5
@@ -2094,25 +2428,34 @@ static void build_subroutines(BuildCtx *ctx)
2094 | savex_ 22, 23 2428 | savex_ 22, 23
2095 | savex_ 24, 25 2429 | savex_ 24, 25
2096 | savex_ 26, 27 2430 | savex_ 26, 27
2431 |.if FPU
2097 | sdc1 f28, 16+28*8(sp) 2432 | sdc1 f28, 16+28*8(sp)
2098 | sw r28, 16+32*8+28*4(sp)
2099 | sdc1 f30, 16+30*8(sp) 2433 | sdc1 f30, 16+30*8(sp)
2434 | sw r28, 16+32*8+28*4(sp)
2100 | sw r30, 16+32*8+30*4(sp) 2435 | sw r30, 16+32*8+30*4(sp)
2101 | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP. 2436 | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP.
2437 | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
2438 | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP
2439 |.else
2440 | sw r28, 16+28*4(sp)
2441 | sw r30, 16+30*4(sp)
2442 | sw r0, 16+31*4(sp) // Clear RID_TMP.
2443 | addiu TMP2, sp, 16+32*4 // Recompute original value of sp.
2444 | sw TMP2, 16+29*4(sp) // Store sp in RID_SP
2445 |.endif
2102 | li_vmstate EXIT 2446 | li_vmstate EXIT
2103 | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
2104 | addiu DISPATCH, JGL, -GG_DISP2G-32768 2447 | addiu DISPATCH, JGL, -GG_DISP2G-32768
2105 | lw TMP1, 0(TMP2) // Load exit number. 2448 | lw TMP1, 0(TMP2) // Load exit number.
2106 | st_vmstate 2449 | st_vmstate
2107 | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP. 2450 | lw L, DISPATCH_GL(cur_L)(DISPATCH)
2108 | lw L, DISPATCH_GL(jit_L)(DISPATCH) 2451 | lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
2109 | lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
2110 | load_got lj_trace_exit 2452 | load_got lj_trace_exit
2111 | sw L, DISPATCH_J(L)(DISPATCH) 2453 | sw L, DISPATCH_J(L)(DISPATCH)
2112 | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. 2454 | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number.
2455 | sw BASE, L->base
2113 | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. 2456 | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number.
2114 | addiu CARG1, DISPATCH, GG_DISP2J 2457 | addiu CARG1, DISPATCH, GG_DISP2J
2115 | sw BASE, L->base 2458 | sw r0, DISPATCH_GL(jit_base)(DISPATCH)
2116 | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) 2459 | call_intern lj_trace_exit // (jit_State *J, ExitState *ex)
2117 |. addiu CARG2, sp, 16 2460 |. addiu CARG2, sp, 16
2118 | // Returns MULTRES (unscaled) or negated error code. 2461 | // Returns MULTRES (unscaled) or negated error code.
@@ -2128,19 +2471,21 @@ static void build_subroutines(BuildCtx *ctx)
2128 |.if JIT 2471 |.if JIT
2129 | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. 2472 | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
2130 | lw L, SAVE_L 2473 | lw L, SAVE_L
2131 | addiu DISPATCH, JGL, -GG_DISP2G-32768 2474 | addiu DISPATCH, JGL, -GG_DISP2G-32768
2475 | sw BASE, L->base
2132 |1: 2476 |1:
2133 | bltz CRET1, >3 // Check for error from exit. 2477 | bltz CRET1, >9 // Check for error from exit.
2134 |. lw LFUNC:TMP1, FRAME_FUNC(BASE) 2478 |. lw LFUNC:RB, FRAME_FUNC(BASE)
2135 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2479 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2136 | sll MULTRES, CRET1, 3 2480 | sll MULTRES, CRET1, 3
2137 | li TISNIL, LJ_TNIL 2481 | li TISNIL, LJ_TNIL
2482 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2138 | sw MULTRES, SAVE_MULTRES 2483 | sw MULTRES, SAVE_MULTRES
2139 | mtc1 TMP3, TOBIT 2484 | .FPU mtc1 TMP3, TOBIT
2140 | lw TMP1, LFUNC:TMP1->pc 2485 | lw TMP1, LFUNC:RB->pc
2141 | sw r0, DISPATCH_GL(jit_L)(DISPATCH) 2486 | sw r0, DISPATCH_GL(jit_base)(DISPATCH)
2142 | lw KBASE, PC2PROTO(k)(TMP1) 2487 | lw KBASE, PC2PROTO(k)(TMP1)
2143 | cvt.d.s TOBIT, TOBIT 2488 | .FPU cvt.d.s TOBIT, TOBIT
2144 | // Modified copy of ins_next which handles function header dispatch, too. 2489 | // Modified copy of ins_next which handles function header dispatch, too.
2145 | lw INS, 0(PC) 2490 | lw INS, 0(PC)
2146 | addiu PC, PC, 4 2491 | addiu PC, PC, 4
@@ -2148,7 +2493,7 @@ static void build_subroutines(BuildCtx *ctx)
2148 | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) 2493 | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
2149 | decode_OP4a TMP1, INS 2494 | decode_OP4a TMP1, INS
2150 | decode_OP4b TMP1 2495 | decode_OP4b TMP1
2151 | sltiu TMP2, TMP1, BC_FUNCF*4 // Function header? 2496 | sltiu TMP2, TMP1, BC_FUNCF*4
2152 | addu TMP0, DISPATCH, TMP1 2497 | addu TMP0, DISPATCH, TMP1
2153 | decode_RD8a RD, INS 2498 | decode_RD8a RD, INS
2154 | lw AT, 0(TMP0) 2499 | lw AT, 0(TMP0)
@@ -2158,13 +2503,30 @@ static void build_subroutines(BuildCtx *ctx)
2158 | jr AT 2503 | jr AT
2159 |. decode_RD8b RD 2504 |. decode_RD8b RD
2160 |2: 2505 |2:
2506 | sltiu TMP2, TMP1, (BC_FUNCC+2)*4 // Fast function?
2507 | bnez TMP2, >3
2508 |. lw TMP1, FRAME_PC(BASE)
2509 | // Check frame below fast function.
2510 | andi TMP0, TMP1, FRAME_TYPE
2511 | bnez TMP0, >3 // Trace stitching continuation?
2512 |. nop
2513 | // Otherwise set KBASE for Lua function below fast function.
2514 | lw TMP2, -4(TMP1)
2515 | decode_RA8a TMP0, TMP2
2516 | decode_RA8b TMP0
2517 | subu TMP1, BASE, TMP0
2518 | lw LFUNC:TMP2, -8+FRAME_FUNC(TMP1)
2519 | lw TMP1, LFUNC:TMP2->pc
2520 | lw KBASE, PC2PROTO(k)(TMP1)
2521 |3:
2161 | addiu RC, MULTRES, -8 2522 | addiu RC, MULTRES, -8
2162 | jr AT 2523 | jr AT
2163 |. addu RA, RA, BASE 2524 |. addu RA, RA, BASE
2164 | 2525 |
2165 |3: // Rethrow error from the right C frame. 2526 |9: // Rethrow error from the right C frame.
2166 | load_got lj_err_run 2527 | load_got lj_err_trace
2167 | call_intern lj_err_run // (lua_State *L) 2528 | sub CARG2, r0, CRET1
2529 | call_intern lj_err_trace // (lua_State *L, int errcode)
2168 |. move CARG1, L 2530 |. move CARG1, L
2169 |.endif 2531 |.endif
2170 | 2532 |
@@ -2172,8 +2534,9 @@ static void build_subroutines(BuildCtx *ctx)
2172 |//-- Math helper functions ---------------------------------------------- 2534 |//-- Math helper functions ----------------------------------------------
2173 |//----------------------------------------------------------------------- 2535 |//-----------------------------------------------------------------------
2174 | 2536 |
2537 |// Hard-float round to integer.
2175 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. 2538 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1.
2176 |.macro vm_round, func 2539 |.macro vm_round_hf, func
2177 | lui TMP0, 0x4330 // Hiword of 2^52 (double). 2540 | lui TMP0, 0x4330 // Hiword of 2^52 (double).
2178 | mtc1 r0, f4 2541 | mtc1 r0, f4
2179 | mtc1 TMP0, f5 2542 | mtc1 TMP0, f5
@@ -2215,6 +2578,12 @@ static void build_subroutines(BuildCtx *ctx)
2215 |. mov.d FRET1, FARG1 2578 |. mov.d FRET1, FARG1
2216 |.endmacro 2579 |.endmacro
2217 | 2580 |
2581 |.macro vm_round, func
2582 |.if FPU
2583 | vm_round_hf, func
2584 |.endif
2585 |.endmacro
2586 |
2218 |->vm_floor: 2587 |->vm_floor:
2219 | vm_round floor 2588 | vm_round floor
2220 |->vm_ceil: 2589 |->vm_ceil:
@@ -2224,6 +2593,215 @@ static void build_subroutines(BuildCtx *ctx)
2224 | vm_round trunc 2593 | vm_round trunc
2225 |.endif 2594 |.endif
2226 | 2595 |
2596 |// Soft-float integer to number conversion.
2597 |.macro sfi2d, AHI, ALO
2598 |.if not FPU
2599 | beqz ALO, >9 // Handle zero first.
2600 |. sra TMP0, ALO, 31
2601 | xor TMP1, ALO, TMP0
2602 | subu TMP1, TMP1, TMP0 // Absolute value in TMP1.
2603 | clz AHI, TMP1
2604 | andi TMP0, TMP0, 0x800 // Mask sign bit.
2605 | li AT, 0x3ff+31-1
2606 | sllv TMP1, TMP1, AHI // Align mantissa left with leading 1.
2607 | subu AHI, AT, AHI // Exponent - 1 in AHI.
2608 | sll ALO, TMP1, 21
2609 | or AHI, AHI, TMP0 // Sign | Exponent.
2610 | srl TMP1, TMP1, 11
2611 | sll AHI, AHI, 20 // Align left.
2612 | jr ra
2613 |. addu AHI, AHI, TMP1 // Add mantissa, increment exponent.
2614 |9:
2615 | jr ra
2616 |. li AHI, 0
2617 |.endif
2618 |.endmacro
2619 |
2620 |// Input SFARG1LO. Output: SFARG1*. Temporaries: AT, TMP0, TMP1.
2621 |->vm_sfi2d_1:
2622 | sfi2d SFARG1HI, SFARG1LO
2623 |
2624 |// Input SFARG2LO. Output: SFARG2*. Temporaries: AT, TMP0, TMP1.
2625 |->vm_sfi2d_2:
2626 | sfi2d SFARG2HI, SFARG2LO
2627 |
2628 |// Soft-float comparison. Equivalent to c.eq.d.
2629 |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2630 |->vm_sfcmpeq:
2631 |.if not FPU
2632 | sll AT, SFARG1HI, 1
2633 | sll TMP0, SFARG2HI, 1
2634 | or CRET1, SFARG1LO, SFARG2LO
2635 | or TMP1, AT, TMP0
2636 | or TMP1, TMP1, CRET1
2637 | beqz TMP1, >8 // Both args +-0: return 1.
2638 |. sltu CRET1, r0, SFARG1LO
2639 | lui TMP1, 0xffe0
2640 | addu AT, AT, CRET1
2641 | sltu CRET1, r0, SFARG2LO
2642 | sltu AT, TMP1, AT
2643 | addu TMP0, TMP0, CRET1
2644 | sltu TMP0, TMP1, TMP0
2645 | or TMP1, AT, TMP0
2646 | bnez TMP1, >9 // Either arg is NaN: return 0;
2647 |. xor TMP0, SFARG1HI, SFARG2HI
2648 | xor TMP1, SFARG1LO, SFARG2LO
2649 | or AT, TMP0, TMP1
2650 | jr ra
2651 |. sltiu CRET1, AT, 1 // Same values: return 1.
2652 |8:
2653 | jr ra
2654 |. li CRET1, 1
2655 |9:
2656 | jr ra
2657 |. li CRET1, 0
2658 |.endif
2659 |
2660 |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d.
2661 |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2.
2662 |->vm_sfcmpult:
2663 |.if not FPU
2664 | b >1
2665 |. li CRET2, 1
2666 |.endif
2667 |
2668 |->vm_sfcmpolt:
2669 |.if not FPU
2670 | li CRET2, 0
2671 |1:
2672 | sll AT, SFARG1HI, 1
2673 | sll TMP0, SFARG2HI, 1
2674 | or CRET1, SFARG1LO, SFARG2LO
2675 | or TMP1, AT, TMP0
2676 | or TMP1, TMP1, CRET1
2677 | beqz TMP1, >8 // Both args +-0: return 0.
2678 |. sltu CRET1, r0, SFARG1LO
2679 | lui TMP1, 0xffe0
2680 | addu AT, AT, CRET1
2681 | sltu CRET1, r0, SFARG2LO
2682 | sltu AT, TMP1, AT
2683 | addu TMP0, TMP0, CRET1
2684 | sltu TMP0, TMP1, TMP0
2685 | or TMP1, AT, TMP0
2686 | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
2687 |. and AT, SFARG1HI, SFARG2HI
2688 | bltz AT, >5 // Both args negative?
2689 |. nop
2690 | beq SFARG1HI, SFARG2HI, >8
2691 |. sltu CRET1, SFARG1LO, SFARG2LO
2692 | jr ra
2693 |. slt CRET1, SFARG1HI, SFARG2HI
2694 |5: // Swap conditions if both operands are negative.
2695 | beq SFARG1HI, SFARG2HI, >8
2696 |. sltu CRET1, SFARG2LO, SFARG1LO
2697 | jr ra
2698 |. slt CRET1, SFARG2HI, SFARG1HI
2699 |8:
2700 | jr ra
2701 |. nop
2702 |9:
2703 | jr ra
2704 |. move CRET1, CRET2
2705 |.endif
2706 |
2707 |->vm_sfcmpogt:
2708 |.if not FPU
2709 | sll AT, SFARG2HI, 1
2710 | sll TMP0, SFARG1HI, 1
2711 | or CRET1, SFARG2LO, SFARG1LO
2712 | or TMP1, AT, TMP0
2713 | or TMP1, TMP1, CRET1
2714 | beqz TMP1, >8 // Both args +-0: return 0.
2715 |. sltu CRET1, r0, SFARG2LO
2716 | lui TMP1, 0xffe0
2717 | addu AT, AT, CRET1
2718 | sltu CRET1, r0, SFARG1LO
2719 | sltu AT, TMP1, AT
2720 | addu TMP0, TMP0, CRET1
2721 | sltu TMP0, TMP1, TMP0
2722 | or TMP1, AT, TMP0
2723 | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
2724 |. and AT, SFARG2HI, SFARG1HI
2725 | bltz AT, >5 // Both args negative?
2726 |. nop
2727 | beq SFARG2HI, SFARG1HI, >8
2728 |. sltu CRET1, SFARG2LO, SFARG1LO
2729 | jr ra
2730 |. slt CRET1, SFARG2HI, SFARG1HI
2731 |5: // Swap conditions if both operands are negative.
2732 | beq SFARG2HI, SFARG1HI, >8
2733 |. sltu CRET1, SFARG1LO, SFARG2LO
2734 | jr ra
2735 |. slt CRET1, SFARG1HI, SFARG2HI
2736 |8:
2737 | jr ra
2738 |. nop
2739 |9:
2740 | jr ra
2741 |. li CRET1, 0
2742 |.endif
2743 |
2744 |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a.
2745 |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2746 |->vm_sfcmpolex:
2747 |.if not FPU
2748 | sll AT, SFARG1HI, 1
2749 | sll TMP0, SFARG2HI, 1
2750 | or CRET1, SFARG1LO, SFARG2LO
2751 | or TMP1, AT, TMP0
2752 | or TMP1, TMP1, CRET1
2753 | beqz TMP1, >8 // Both args +-0: return 1.
2754 |. sltu CRET1, r0, SFARG1LO
2755 | lui TMP1, 0xffe0
2756 | addu AT, AT, CRET1
2757 | sltu CRET1, r0, SFARG2LO
2758 | sltu AT, TMP1, AT
2759 | addu TMP0, TMP0, CRET1
2760 | sltu TMP0, TMP1, TMP0
2761 | or TMP1, AT, TMP0
2762 | bnez TMP1, >9 // Either arg is NaN: return 0;
2763 |. and AT, SFARG1HI, SFARG2HI
2764 | xor AT, AT, TMP3
2765 | bltz AT, >5 // Both args negative?
2766 |. nop
2767 | beq SFARG1HI, SFARG2HI, >6
2768 |. sltu CRET1, SFARG2LO, SFARG1LO
2769 | jr ra
2770 |. slt CRET1, SFARG2HI, SFARG1HI
2771 |5: // Swap conditions if both operands are negative.
2772 | beq SFARG1HI, SFARG2HI, >6
2773 |. sltu CRET1, SFARG1LO, SFARG2LO
2774 | slt CRET1, SFARG1HI, SFARG2HI
2775 |6:
2776 | jr ra
2777 |. nop
2778 |8:
2779 | jr ra
2780 |. li CRET1, 1
2781 |9:
2782 | jr ra
2783 |. li CRET1, 0
2784 |.endif
2785 |
2786 |.macro sfmin_max, name, fpcall
2787 |->vm_sf .. name:
2788 |.if JIT and not FPU
2789 | move TMP2, ra
2790 | bal ->fpcall
2791 |. nop
2792 | move TMP0, CRET1
2793 | move SFRETHI, SFARG1HI
2794 | move SFRETLO, SFARG1LO
2795 | move ra, TMP2
2796 | movz SFRETHI, SFARG2HI, TMP0
2797 | jr ra
2798 |. movz SFRETLO, SFARG2LO, TMP0
2799 |.endif
2800 |.endmacro
2801 |
2802 | sfmin_max min, vm_sfcmpolt
2803 | sfmin_max max, vm_sfcmpogt
2804 |
2227 |//----------------------------------------------------------------------- 2805 |//-----------------------------------------------------------------------
2228 |//-- Miscellaneous functions -------------------------------------------- 2806 |//-- Miscellaneous functions --------------------------------------------
2229 |//----------------------------------------------------------------------- 2807 |//-----------------------------------------------------------------------
@@ -2243,10 +2821,10 @@ static void build_subroutines(BuildCtx *ctx)
2243 | sw r1, CTSTATE->cb.slot 2821 | sw r1, CTSTATE->cb.slot
2244 | sw CARG1, CTSTATE->cb.gpr[0] 2822 | sw CARG1, CTSTATE->cb.gpr[0]
2245 | sw CARG2, CTSTATE->cb.gpr[1] 2823 | sw CARG2, CTSTATE->cb.gpr[1]
2246 | sdc1 FARG1, CTSTATE->cb.fpr[0] 2824 | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0]
2247 | sw CARG3, CTSTATE->cb.gpr[2] 2825 | sw CARG3, CTSTATE->cb.gpr[2]
2248 | sw CARG4, CTSTATE->cb.gpr[3] 2826 | sw CARG4, CTSTATE->cb.gpr[3]
2249 | sdc1 FARG2, CTSTATE->cb.fpr[1] 2827 | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1]
2250 | addiu TMP0, sp, CFRAME_SPACE+16 2828 | addiu TMP0, sp, CFRAME_SPACE+16
2251 | sw TMP0, CTSTATE->cb.stack 2829 | sw TMP0, CTSTATE->cb.stack
2252 | sw r0, SAVE_PC // Any value outside of bytecode is ok. 2830 | sw r0, SAVE_PC // Any value outside of bytecode is ok.
@@ -2256,15 +2834,16 @@ static void build_subroutines(BuildCtx *ctx)
2256 | // Returns lua_State *. 2834 | // Returns lua_State *.
2257 | lw BASE, L:CRET1->base 2835 | lw BASE, L:CRET1->base
2258 | lw RC, L:CRET1->top 2836 | lw RC, L:CRET1->top
2837 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2259 | move L, CRET1 2838 | move L, CRET1
2260 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2839 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2261 | lw LFUNC:RB, FRAME_FUNC(BASE) 2840 | lw LFUNC:RB, FRAME_FUNC(BASE)
2262 | mtc1 TMP3, TOBIT 2841 | .FPU mtc1 TMP3, TOBIT
2263 | li_vmstate INTERP 2842 | li_vmstate INTERP
2264 | li TISNIL, LJ_TNIL 2843 | li TISNIL, LJ_TNIL
2265 | subu RC, RC, BASE 2844 | subu RC, RC, BASE
2266 | st_vmstate 2845 | st_vmstate
2267 | cvt.d.s TOBIT, TOBIT 2846 | .FPU cvt.d.s TOBIT, TOBIT
2268 | ins_callt 2847 | ins_callt
2269 |.endif 2848 |.endif
2270 | 2849 |
@@ -2278,11 +2857,11 @@ static void build_subroutines(BuildCtx *ctx)
2278 | move CARG2, RA 2857 | move CARG2, RA
2279 | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) 2858 | call_intern lj_ccallback_leave // (CTState *cts, TValue *o)
2280 |. move CARG1, CTSTATE 2859 |. move CARG1, CTSTATE
2860 | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0]
2281 | lw CRET1, CTSTATE->cb.gpr[0] 2861 | lw CRET1, CTSTATE->cb.gpr[0]
2282 | ldc1 FRET1, CTSTATE->cb.fpr[0] 2862 | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1]
2283 | lw CRET2, CTSTATE->cb.gpr[1]
2284 | b ->vm_leave_unw 2863 | b ->vm_leave_unw
2285 |. ldc1 FRET2, CTSTATE->cb.fpr[1] 2864 |. lw CRET2, CTSTATE->cb.gpr[1]
2286 |.endif 2865 |.endif
2287 | 2866 |
2288 |->vm_ffi_call: // Call C function via FFI. 2867 |->vm_ffi_call: // Call C function via FFI.
@@ -2314,8 +2893,8 @@ static void build_subroutines(BuildCtx *ctx)
2314 | lw CARG2, CCSTATE->gpr[1] 2893 | lw CARG2, CCSTATE->gpr[1]
2315 | lw CARG3, CCSTATE->gpr[2] 2894 | lw CARG3, CCSTATE->gpr[2]
2316 | lw CARG4, CCSTATE->gpr[3] 2895 | lw CARG4, CCSTATE->gpr[3]
2317 | ldc1 FARG1, CCSTATE->fpr[0] 2896 | .FPU ldc1 FARG1, CCSTATE->fpr[0]
2318 | ldc1 FARG2, CCSTATE->fpr[1] 2897 | .FPU ldc1 FARG2, CCSTATE->fpr[1]
2319 | jalr CFUNCADDR 2898 | jalr CFUNCADDR
2320 |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. 2899 |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
2321 | lw CCSTATE:TMP1, -12(r16) 2900 | lw CCSTATE:TMP1, -12(r16)
@@ -2323,8 +2902,13 @@ static void build_subroutines(BuildCtx *ctx)
2323 | lw ra, -4(r16) 2902 | lw ra, -4(r16)
2324 | sw CRET1, CCSTATE:TMP1->gpr[0] 2903 | sw CRET1, CCSTATE:TMP1->gpr[0]
2325 | sw CRET2, CCSTATE:TMP1->gpr[1] 2904 | sw CRET2, CCSTATE:TMP1->gpr[1]
2905 |.if FPU
2326 | sdc1 FRET1, CCSTATE:TMP1->fpr[0] 2906 | sdc1 FRET1, CCSTATE:TMP1->fpr[0]
2327 | sdc1 FRET2, CCSTATE:TMP1->fpr[1] 2907 | sdc1 FRET2, CCSTATE:TMP1->fpr[1]
2908 |.else
2909 | sw CARG1, CCSTATE:TMP1->gpr[2] // Soft-float: complex double .im part.
2910 | sw CARG2, CCSTATE:TMP1->gpr[3]
2911 |.endif
2328 | move sp, r16 2912 | move sp, r16
2329 | jr ra 2913 | jr ra
2330 |. move r16, TMP2 2914 |. move r16, TMP2
@@ -2348,82 +2932,143 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2348 2932
2349 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 2933 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2350 | // RA = src1*8, RD = src2*8, JMP with RD = target 2934 | // RA = src1*8, RD = src2*8, JMP with RD = target
2351 | addu CARG2, BASE, RA 2935 |.macro bc_comp, FRA, FRD, RAHI, RALO, RDHI, RDLO, movop, fmovop, fcomp, sfcomp
2352 | addu CARG3, BASE, RD 2936 | addu RA, BASE, RA
2353 | lw TMP0, HI(CARG2) 2937 | addu RD, BASE, RD
2354 | lw TMP1, HI(CARG3) 2938 | lw RAHI, HI(RA)
2355 | ldc1 f0, 0(CARG2) 2939 | lw RDHI, HI(RD)
2356 | ldc1 f2, 0(CARG3)
2357 | sltiu TMP0, TMP0, LJ_TISNUM
2358 | sltiu TMP1, TMP1, LJ_TISNUM
2359 | lhu TMP2, OFS_RD(PC) 2940 | lhu TMP2, OFS_RD(PC)
2360 | and TMP0, TMP0, TMP1
2361 | addiu PC, PC, 4 2941 | addiu PC, PC, 4
2362 | beqz TMP0, ->vmeta_comp 2942 | bne RAHI, TISNUM, >2
2363 |. lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) 2943 |. lw RALO, LO(RA)
2364 | decode_RD4b TMP2 2944 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2365 | addu TMP2, TMP2, TMP1 2945 | lw RDLO, LO(RD)
2366 if (op == BC_ISLT || op == BC_ISGE) { 2946 | bne RDHI, TISNUM, >5
2367 | c.olt.d f0, f2 2947 |. decode_RD4b TMP2
2368 } else { 2948 | slt AT, SFARG1LO, SFARG2LO
2369 | c.ole.d f0, f2 2949 | addu TMP2, TMP2, TMP3
2370 } 2950 | movop TMP2, r0, AT
2371 if (op == BC_ISLT || op == BC_ISLE) {
2372 | movf TMP2, r0
2373 } else {
2374 | movt TMP2, r0
2375 }
2376 | addu PC, PC, TMP2
2377 |1: 2951 |1:
2952 | addu PC, PC, TMP2
2378 | ins_next 2953 | ins_next
2954 |
2955 |2: // RA is not an integer.
2956 | sltiu AT, RAHI, LJ_TISNUM
2957 | beqz AT, ->vmeta_comp
2958 |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2959 | sltiu AT, RDHI, LJ_TISNUM
2960 |.if FPU
2961 | ldc1 FRA, 0(RA)
2962 | ldc1 FRD, 0(RD)
2963 |.else
2964 | lw RDLO, LO(RD)
2965 |.endif
2966 | beqz AT, >4
2967 |. decode_RD4b TMP2
2968 |3: // RA and RD are both numbers.
2969 |.if FPU
2970 | fcomp f20, f22
2971 | addu TMP2, TMP2, TMP3
2972 | b <1
2973 |. fmovop TMP2, r0
2974 |.else
2975 | bal sfcomp
2976 |. addu TMP2, TMP2, TMP3
2977 | b <1
2978 |. movop TMP2, r0, CRET1
2979 |.endif
2980 |
2981 |4: // RA is a number, RD is not a number.
2982 | bne RDHI, TISNUM, ->vmeta_comp
2983 | // RA is a number, RD is an integer. Convert RD to a number.
2984 |.if FPU
2985 |. lwc1 FRD, LO(RD)
2986 | b <3
2987 |. cvt.d.w FRD, FRD
2988 |.else
2989 |. nop
2990 |.if "RDHI" == "SFARG1HI"
2991 | bal ->vm_sfi2d_1
2992 |.else
2993 | bal ->vm_sfi2d_2
2994 |.endif
2995 |. nop
2996 | b <3
2997 |. nop
2998 |.endif
2999 |
3000 |5: // RA is an integer, RD is not an integer
3001 | sltiu AT, RDHI, LJ_TISNUM
3002 | beqz AT, ->vmeta_comp
3003 | // RA is an integer, RD is a number. Convert RA to a number.
3004 |.if FPU
3005 |. mtc1 RALO, FRA
3006 | ldc1 FRD, 0(RD)
3007 | b <3
3008 | cvt.d.w FRA, FRA
3009 |.else
3010 |. nop
3011 |.if "RAHI" == "SFARG1HI"
3012 | bal ->vm_sfi2d_1
3013 |.else
3014 | bal ->vm_sfi2d_2
3015 |.endif
3016 |. nop
3017 | b <3
3018 |. nop
3019 |.endif
3020 |.endmacro
3021 |
3022 if (op == BC_ISLT) {
3023 | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movz, movf, c.olt.d, ->vm_sfcmpolt
3024 } else if (op == BC_ISGE) {
3025 | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movn, movt, c.olt.d, ->vm_sfcmpolt
3026 } else if (op == BC_ISLE) {
3027 | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movn, movt, c.ult.d, ->vm_sfcmpult
3028 } else {
3029 | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movz, movf, c.ult.d, ->vm_sfcmpult
3030 }
2379 break; 3031 break;
2380 3032
2381 case BC_ISEQV: case BC_ISNEV: 3033 case BC_ISEQV: case BC_ISNEV:
2382 vk = op == BC_ISEQV; 3034 vk = op == BC_ISEQV;
2383 | // RA = src1*8, RD = src2*8, JMP with RD = target 3035 | // RA = src1*8, RD = src2*8, JMP with RD = target
2384 | addu RA, BASE, RA 3036 | addu RA, BASE, RA
2385 | addiu PC, PC, 4 3037 | addiu PC, PC, 4
2386 | lw TMP0, HI(RA)
2387 | ldc1 f0, 0(RA)
2388 | addu RD, BASE, RD 3038 | addu RD, BASE, RD
3039 | lw SFARG1HI, HI(RA)
2389 | lhu TMP2, -4+OFS_RD(PC) 3040 | lhu TMP2, -4+OFS_RD(PC)
2390 | lw TMP1, HI(RD) 3041 | lw SFARG2HI, HI(RD)
2391 | ldc1 f2, 0(RD)
2392 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3042 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2393 | sltiu AT, TMP0, LJ_TISNUM 3043 | sltu AT, TISNUM, SFARG1HI
2394 | sltiu CARG1, TMP1, LJ_TISNUM 3044 | sltu TMP0, TISNUM, SFARG2HI
2395 | decode_RD4b TMP2 3045 | or AT, AT, TMP0
2396 | and AT, AT, CARG1
2397 | beqz AT, >5
2398 |. addu TMP2, TMP2, TMP3
2399 | c.eq.d f0, f2
2400 if (vk) { 3046 if (vk) {
2401 | movf TMP2, r0 3047 | beqz AT, ->BC_ISEQN_Z
2402 } else { 3048 } else {
2403 | movt TMP2, r0 3049 | beqz AT, ->BC_ISNEN_Z
2404 } 3050 }
2405 |1: 3051 |. decode_RD4b TMP2
2406 | addu PC, PC, TMP2 3052 | // Either or both types are not numbers.
2407 | ins_next 3053 | lw SFARG1LO, LO(RA)
2408 |5: // Either or both types are not numbers. 3054 | lw SFARG2LO, LO(RD)
2409 | lw CARG2, LO(RA) 3055 | addu TMP2, TMP2, TMP3
2410 | lw CARG3, LO(RD)
2411 |.if FFI 3056 |.if FFI
2412 | li TMP3, LJ_TCDATA 3057 | li TMP3, LJ_TCDATA
2413 | beq TMP0, TMP3, ->vmeta_equal_cd 3058 | beq SFARG1HI, TMP3, ->vmeta_equal_cd
2414 |.endif 3059 |.endif
2415 |. sltiu AT, TMP0, LJ_TISPRI // Not a primitive? 3060 |. sltiu AT, SFARG1HI, LJ_TISPRI // Not a primitive?
2416 |.if FFI 3061 |.if FFI
2417 | beq TMP1, TMP3, ->vmeta_equal_cd 3062 | beq SFARG2HI, TMP3, ->vmeta_equal_cd
2418 |.endif 3063 |.endif
2419 |. xor TMP3, CARG2, CARG3 // Same tv? 3064 |. xor TMP3, SFARG1LO, SFARG2LO // Same tv?
2420 | xor TMP1, TMP1, TMP0 // Same type? 3065 | xor SFARG2HI, SFARG2HI, SFARG1HI // Same type?
2421 | sltiu CARG1, TMP0, LJ_TISTABUD+1 // Table or userdata? 3066 | sltiu TMP0, SFARG1HI, LJ_TISTABUD+1 // Table or userdata?
2422 | movz TMP3, r0, AT // Ignore tv if primitive. 3067 | movz TMP3, r0, AT // Ignore tv if primitive.
2423 | movn CARG1, r0, TMP1 // Tab/ud and same type? 3068 | movn TMP0, r0, SFARG2HI // Tab/ud and same type?
2424 | or AT, TMP1, TMP3 // Same type && (pri||same tv). 3069 | or AT, SFARG2HI, TMP3 // Same type && (pri||same tv).
2425 | movz CARG1, r0, AT 3070 | movz TMP0, r0, AT
2426 | beqz CARG1, <1 // Done if not tab/ud or not same type or same tv. 3071 | beqz TMP0, >1 // Done if not tab/ud or not same type or same tv.
2427 if (vk) { 3072 if (vk) {
2428 |. movn TMP2, r0, AT 3073 |. movn TMP2, r0, AT
2429 } else { 3074 } else {
@@ -2431,15 +3076,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2431 } 3076 }
2432 | // Different tables or userdatas. Need to check __eq metamethod. 3077 | // Different tables or userdatas. Need to check __eq metamethod.
2433 | // Field metatable must be at same offset for GCtab and GCudata! 3078 | // Field metatable must be at same offset for GCtab and GCudata!
2434 | lw TAB:TMP1, TAB:CARG2->metatable 3079 | lw TAB:TMP1, TAB:SFARG1LO->metatable
2435 | beqz TAB:TMP1, <1 // No metatable? 3080 | beqz TAB:TMP1, >1 // No metatable?
2436 |. nop 3081 |. nop
2437 | lbu TMP1, TAB:TMP1->nomm 3082 | lbu TMP1, TAB:TMP1->nomm
2438 | andi TMP1, TMP1, 1<<MM_eq 3083 | andi TMP1, TMP1, 1<<MM_eq
2439 | bnez TMP1, <1 // Or 'no __eq' flag set? 3084 | bnez TMP1, >1 // Or 'no __eq' flag set?
2440 |. nop 3085 |. nop
2441 | b ->vmeta_equal // Handle __eq metamethod. 3086 | b ->vmeta_equal // Handle __eq metamethod.
2442 |. li CARG4, 1-vk // ne = 0 or 1. 3087 |. li TMP0, 1-vk // ne = 0 or 1.
3088 |1:
3089 | addu PC, PC, TMP2
3090 | ins_next
2443 break; 3091 break;
2444 3092
2445 case BC_ISEQS: case BC_ISNES: 3093 case BC_ISEQS: case BC_ISNES:
@@ -2476,38 +3124,124 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2476 vk = op == BC_ISEQN; 3124 vk = op == BC_ISEQN;
2477 | // RA = src*8, RD = num_const*8, JMP with RD = target 3125 | // RA = src*8, RD = num_const*8, JMP with RD = target
2478 | addu RA, BASE, RA 3126 | addu RA, BASE, RA
2479 | addiu PC, PC, 4 3127 | addu RD, KBASE, RD
2480 | lw TMP0, HI(RA) 3128 | lw SFARG1HI, HI(RA)
2481 | ldc1 f0, 0(RA) 3129 | lw SFARG2HI, HI(RD)
2482 | addu RD, KBASE, RD 3130 | lhu TMP2, OFS_RD(PC)
2483 | lhu TMP2, -4+OFS_RD(PC) 3131 | addiu PC, PC, 4
2484 | ldc1 f2, 0(RD)
2485 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3132 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2486 | sltiu AT, TMP0, LJ_TISNUM
2487 | decode_RD4b TMP2 3133 | decode_RD4b TMP2
2488 |.if FFI
2489 | beqz AT, >5
2490 |.else
2491 | beqz AT, >1
2492 |.endif
2493 |. addu TMP2, TMP2, TMP3
2494 | c.eq.d f0, f2
2495 if (vk) { 3134 if (vk) {
2496 | movf TMP2, r0 3135 |->BC_ISEQN_Z:
2497 | addu PC, PC, TMP2 3136 } else {
3137 |->BC_ISNEN_Z:
3138 }
3139 | bne SFARG1HI, TISNUM, >3
3140 |. lw SFARG1LO, LO(RA)
3141 | lw SFARG2LO, LO(RD)
3142 | addu TMP2, TMP2, TMP3
3143 | bne SFARG2HI, TISNUM, >6
3144 |. xor AT, SFARG1LO, SFARG2LO
3145 if (vk) {
3146 | movn TMP2, r0, AT
2498 |1: 3147 |1:
3148 | addu PC, PC, TMP2
3149 |2:
2499 } else { 3150 } else {
2500 | movt TMP2, r0 3151 | movz TMP2, r0, AT
2501 |1: 3152 |1:
3153 |2:
2502 | addu PC, PC, TMP2 3154 | addu PC, PC, TMP2
2503 } 3155 }
2504 | ins_next 3156 | ins_next
3157 |
3158 |3: // RA is not an integer.
3159 | sltiu AT, SFARG1HI, LJ_TISNUM
2505 |.if FFI 3160 |.if FFI
2506 |5: 3161 | beqz AT, >8
2507 | li AT, LJ_TCDATA 3162 |.else
2508 | beq TMP0, AT, ->vmeta_equal_cd 3163 | beqz AT, <2
3164 |.endif
3165 |. addu TMP2, TMP2, TMP3
3166 | sltiu AT, SFARG2HI, LJ_TISNUM
3167 |.if FPU
3168 | ldc1 f20, 0(RA)
3169 | ldc1 f22, 0(RD)
3170 |.endif
3171 | beqz AT, >5
3172 |. lw SFARG2LO, LO(RD)
3173 |4: // RA and RD are both numbers.
3174 |.if FPU
3175 | c.eq.d f20, f22
3176 | b <1
3177 if (vk) {
3178 |. movf TMP2, r0
3179 } else {
3180 |. movt TMP2, r0
3181 }
3182 |.else
3183 | bal ->vm_sfcmpeq
2509 |. nop 3184 |. nop
2510 | b <1 3185 | b <1
3186 if (vk) {
3187 |. movz TMP2, r0, CRET1
3188 } else {
3189 |. movn TMP2, r0, CRET1
3190 }
3191 |.endif
3192 |
3193 |5: // RA is a number, RD is not a number.
3194 |.if FFI
3195 | bne SFARG2HI, TISNUM, >9
3196 |.else
3197 | bne SFARG2HI, TISNUM, <2
3198 |.endif
3199 | // RA is a number, RD is an integer. Convert RD to a number.
3200 |.if FPU
3201 |. lwc1 f22, LO(RD)
3202 | b <4
3203 |. cvt.d.w f22, f22
3204 |.else
3205 |. nop
3206 | bal ->vm_sfi2d_2
3207 |. nop
3208 | b <4
3209 |. nop
3210 |.endif
3211 |
3212 |6: // RA is an integer, RD is not an integer
3213 | sltiu AT, SFARG2HI, LJ_TISNUM
3214 |.if FFI
3215 | beqz AT, >9
3216 |.else
3217 | beqz AT, <2
3218 |.endif
3219 | // RA is an integer, RD is a number. Convert RA to a number.
3220 |.if FPU
3221 |. mtc1 SFARG1LO, f20
3222 | ldc1 f22, 0(RD)
3223 | b <4
3224 | cvt.d.w f20, f20
3225 |.else
3226 |. nop
3227 | bal ->vm_sfi2d_1
3228 |. nop
3229 | b <4
3230 |. nop
3231 |.endif
3232 |
3233 |.if FFI
3234 |8:
3235 | li AT, LJ_TCDATA
3236 | bne SFARG1HI, AT, <2
3237 |. nop
3238 | b ->vmeta_equal_cd
3239 |. nop
3240 |9:
3241 | li AT, LJ_TCDATA
3242 | bne SFARG2HI, AT, <2
3243 |. nop
3244 | b ->vmeta_equal_cd
2511 |. nop 3245 |. nop
2512 |.endif 3246 |.endif
2513 break; 3247 break;
@@ -2559,7 +3293,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2559 | addu PC, PC, TMP2 3293 | addu PC, PC, TMP2
2560 } else { 3294 } else {
2561 | sltiu TMP0, TMP0, LJ_TISTRUECOND 3295 | sltiu TMP0, TMP0, LJ_TISTRUECOND
2562 | ldc1 f0, 0(RD) 3296 | lw SFRETHI, HI(RD)
3297 | lw SFRETLO, LO(RD)
2563 if (op == BC_ISTC) { 3298 if (op == BC_ISTC) {
2564 | beqz TMP0, >1 3299 | beqz TMP0, >1
2565 } else { 3300 } else {
@@ -2569,22 +3304,45 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2569 | decode_RD4b TMP2 3304 | decode_RD4b TMP2
2570 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3305 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2571 | addu TMP2, TMP2, TMP3 3306 | addu TMP2, TMP2, TMP3
2572 | sdc1 f0, 0(RA) 3307 | sw SFRETHI, HI(RA)
3308 | sw SFRETLO, LO(RA)
2573 | addu PC, PC, TMP2 3309 | addu PC, PC, TMP2
2574 |1: 3310 |1:
2575 } 3311 }
2576 | ins_next 3312 | ins_next
2577 break; 3313 break;
2578 3314
3315 case BC_ISTYPE:
3316 | // RA = src*8, RD = -type*8
3317 | addu TMP2, BASE, RA
3318 | srl TMP1, RD, 3
3319 | lw TMP0, HI(TMP2)
3320 | ins_next1
3321 | addu AT, TMP0, TMP1
3322 | bnez AT, ->vmeta_istype
3323 |. ins_next2
3324 break;
3325 case BC_ISNUM:
3326 | // RA = src*8, RD = -(TISNUM-1)*8
3327 | addu TMP2, BASE, RA
3328 | lw TMP0, HI(TMP2)
3329 | ins_next1
3330 | sltiu AT, TMP0, LJ_TISNUM
3331 | beqz AT, ->vmeta_istype
3332 |. ins_next2
3333 break;
3334
2579 /* -- Unary ops --------------------------------------------------------- */ 3335 /* -- Unary ops --------------------------------------------------------- */
2580 3336
2581 case BC_MOV: 3337 case BC_MOV:
2582 | // RA = dst*8, RD = src*8 3338 | // RA = dst*8, RD = src*8
2583 | addu RD, BASE, RD 3339 | addu RD, BASE, RD
2584 | addu RA, BASE, RA 3340 | addu RA, BASE, RA
2585 | ldc1 f0, 0(RD) 3341 | lw SFRETHI, HI(RD)
3342 | lw SFRETLO, LO(RD)
2586 | ins_next1 3343 | ins_next1
2587 | sdc1 f0, 0(RA) 3344 | sw SFRETHI, HI(RA)
3345 | sw SFRETLO, LO(RA)
2588 | ins_next2 3346 | ins_next2
2589 break; 3347 break;
2590 case BC_NOT: 3348 case BC_NOT:
@@ -2601,16 +3359,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2601 break; 3359 break;
2602 case BC_UNM: 3360 case BC_UNM:
2603 | // RA = dst*8, RD = src*8 3361 | // RA = dst*8, RD = src*8
2604 | addu CARG3, BASE, RD 3362 | addu RB, BASE, RD
3363 | lw SFARG1HI, HI(RB)
2605 | addu RA, BASE, RA 3364 | addu RA, BASE, RA
2606 | lw TMP0, HI(CARG3) 3365 | bne SFARG1HI, TISNUM, >2
2607 | ldc1 f0, 0(CARG3) 3366 |. lw SFARG1LO, LO(RB)
2608 | sltiu AT, TMP0, LJ_TISNUM 3367 | lui TMP1, 0x8000
2609 | beqz AT, ->vmeta_unm 3368 | beq SFARG1LO, TMP1, ->vmeta_unm // Meta handler deals with -2^31.
2610 |. neg.d f0, f0 3369 |. negu SFARG1LO, SFARG1LO
3370 |1:
2611 | ins_next1 3371 | ins_next1
2612 | sdc1 f0, 0(RA) 3372 | sw SFARG1HI, HI(RA)
3373 | sw SFARG1LO, LO(RA)
2613 | ins_next2 3374 | ins_next2
3375 |2:
3376 | sltiu AT, SFARG1HI, LJ_TISNUM
3377 | beqz AT, ->vmeta_unm
3378 |. lui TMP1, 0x8000
3379 | b <1
3380 |. xor SFARG1HI, SFARG1HI, TMP1
2614 break; 3381 break;
2615 case BC_LEN: 3382 case BC_LEN:
2616 | // RA = dst*8, RD = src*8 3383 | // RA = dst*8, RD = src*8
@@ -2621,12 +3388,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2621 | li AT, LJ_TSTR 3388 | li AT, LJ_TSTR
2622 | bne TMP0, AT, >2 3389 | bne TMP0, AT, >2
2623 |. li AT, LJ_TTAB 3390 |. li AT, LJ_TTAB
2624 | lw CRET1, STR:CARG1->len 3391 | lw CRET1, STR:CARG1->len
2625 |1: 3392 |1:
2626 | mtc1 CRET1, f0
2627 | cvt.d.w f0, f0
2628 | ins_next1 3393 | ins_next1
2629 | sdc1 f0, 0(RA) 3394 | sw TISNUM, HI(RA)
3395 | sw CRET1, LO(RA)
2630 | ins_next2 3396 | ins_next2
2631 |2: 3397 |2:
2632 | bne TMP0, AT, ->vmeta_len 3398 | bne TMP0, AT, ->vmeta_len
@@ -2657,104 +3423,232 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2657 3423
2658 /* -- Binary ops -------------------------------------------------------- */ 3424 /* -- Binary ops -------------------------------------------------------- */
2659 3425
2660 |.macro ins_arithpre 3426 |.macro fpmod, a, b, c
3427 | bal ->vm_floor // floor(b/c)
3428 |. div.d FARG1, b, c
3429 | mul.d a, FRET1, c
3430 | sub.d a, b, a // b - floor(b/c)*c
3431 |.endmacro
3432
3433 |.macro sfpmod
3434 | addiu sp, sp, -16
3435 |
3436 | load_got __divdf3
3437 | sw SFARG1HI, HI(sp)
3438 | sw SFARG1LO, LO(sp)
3439 | sw SFARG2HI, 8+HI(sp)
3440 | call_extern
3441 |. sw SFARG2LO, 8+LO(sp)
3442 |
3443 | load_got floor
3444 | move SFARG1HI, SFRETHI
3445 | call_extern
3446 |. move SFARG1LO, SFRETLO
3447 |
3448 | load_got __muldf3
3449 | move SFARG1HI, SFRETHI
3450 | move SFARG1LO, SFRETLO
3451 | lw SFARG2HI, 8+HI(sp)
3452 | call_extern
3453 |. lw SFARG2LO, 8+LO(sp)
3454 |
3455 | load_got __subdf3
3456 | lw SFARG1HI, HI(sp)
3457 | lw SFARG1LO, LO(sp)
3458 | move SFARG2HI, SFRETHI
3459 | call_extern
3460 |. move SFARG2LO, SFRETLO
3461 |
3462 | addiu sp, sp, 16
3463 |.endmacro
3464
3465 |.macro ins_arithpre, label
2661 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3466 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2662 | decode_RB8a RB, INS
2663 | decode_RB8b RB
2664 | decode_RDtoRC8 RC, RD
2665 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 3467 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
2666 ||switch (vk) { 3468 ||switch (vk) {
2667 ||case 0: 3469 ||case 0:
2668 | addu CARG3, BASE, RB 3470 | decode_RB8a RB, INS
2669 | addu CARG4, KBASE, RC 3471 | decode_RB8b RB
2670 | lw TMP1, HI(CARG3) 3472 | decode_RDtoRC8 RC, RD
2671 | ldc1 f20, 0(CARG3) 3473 | // RA = dst*8, RB = src1*8, RC = num_const*8
2672 | ldc1 f22, 0(CARG4) 3474 | addu RB, BASE, RB
2673 | sltiu AT, TMP1, LJ_TISNUM 3475 |.if "label" ~= "none"
3476 | b label
3477 |.endif
3478 |. addu RC, KBASE, RC
2674 || break; 3479 || break;
2675 ||case 1: 3480 ||case 1:
2676 | addu CARG4, BASE, RB 3481 | decode_RB8a RC, INS
2677 | addu CARG3, KBASE, RC 3482 | decode_RB8b RC
2678 | lw TMP1, HI(CARG4) 3483 | decode_RDtoRC8 RB, RD
2679 | ldc1 f22, 0(CARG4) 3484 | // RA = dst*8, RB = num_const*8, RC = src1*8
2680 | ldc1 f20, 0(CARG3) 3485 | addu RC, BASE, RC
2681 | sltiu AT, TMP1, LJ_TISNUM 3486 |.if "label" ~= "none"
3487 | b label
3488 |.endif
3489 |. addu RB, KBASE, RB
2682 || break; 3490 || break;
2683 ||default: 3491 ||default:
2684 | addu CARG3, BASE, RB 3492 | decode_RB8a RB, INS
2685 | addu CARG4, BASE, RC 3493 | decode_RB8b RB
2686 | lw TMP1, HI(CARG3) 3494 | decode_RDtoRC8 RC, RD
2687 | lw TMP2, HI(CARG4) 3495 | // RA = dst*8, RB = src1*8, RC = src2*8
2688 | ldc1 f20, 0(CARG3) 3496 | addu RB, BASE, RB
2689 | ldc1 f22, 0(CARG4) 3497 |.if "label" ~= "none"
2690 | sltiu AT, TMP1, LJ_TISNUM 3498 | b label
2691 | sltiu TMP0, TMP2, LJ_TISNUM 3499 |.endif
2692 | and AT, AT, TMP0 3500 |. addu RC, BASE, RC
2693 || break; 3501 || break;
2694 ||} 3502 ||}
2695 | beqz AT, ->vmeta_arith
2696 |. addu RA, BASE, RA
2697 |.endmacro 3503 |.endmacro
2698 | 3504 |
2699 |.macro fpmod, a, b, c 3505 |.macro ins_arith, intins, fpins, fpcall, label
2700 |->BC_MODVN_Z: 3506 | ins_arithpre none
2701 | bal ->vm_floor // floor(b/c)
2702 |. div.d FARG1, b, c
2703 | mul.d a, FRET1, c
2704 | sub.d a, b, a // b - floor(b/c)*c
2705 |.endmacro
2706 | 3507 |
2707 |.macro ins_arith, ins 3508 |.if "label" ~= "none"
2708 | ins_arithpre 3509 |label:
2709 |.if "ins" == "fpmod_" 3510 |.endif
2710 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3511 |
2711 |. nop 3512 | lw SFARG1HI, HI(RB)
3513 | lw SFARG2HI, HI(RC)
3514 |
3515 |.if "intins" ~= "div"
3516 |
3517 | // Check for two integers.
3518 | lw SFARG1LO, LO(RB)
3519 | bne SFARG1HI, TISNUM, >5
3520 |. lw SFARG2LO, LO(RC)
3521 | bne SFARG2HI, TISNUM, >5
3522 |
3523 |.if "intins" == "addu"
3524 |. intins CRET1, SFARG1LO, SFARG2LO
3525 | xor TMP1, CRET1, SFARG1LO // ((y^a) & (y^b)) < 0: overflow.
3526 | xor TMP2, CRET1, SFARG2LO
3527 | and TMP1, TMP1, TMP2
3528 | bltz TMP1, ->vmeta_arith
3529 |. addu RA, BASE, RA
3530 |.elif "intins" == "subu"
3531 |. intins CRET1, SFARG1LO, SFARG2LO
3532 | xor TMP1, CRET1, SFARG1LO // ((y^a) & (a^b)) < 0: overflow.
3533 | xor TMP2, SFARG1LO, SFARG2LO
3534 | and TMP1, TMP1, TMP2
3535 | bltz TMP1, ->vmeta_arith
3536 |. addu RA, BASE, RA
3537 |.elif "intins" == "mult"
3538 |. intins SFARG1LO, SFARG2LO
3539 | mflo CRET1
3540 | mfhi TMP2
3541 | sra TMP1, CRET1, 31
3542 | bne TMP1, TMP2, ->vmeta_arith
3543 |. addu RA, BASE, RA
2712 |.else 3544 |.else
2713 | ins f0, f20, f22 3545 |. load_got lj_vm_modi
3546 | beqz SFARG2LO, ->vmeta_arith
3547 |. addu RA, BASE, RA
3548 |.if ENDIAN_BE
3549 | move CARG1, SFARG1LO
3550 |.endif
3551 | call_extern
3552 |. move CARG2, SFARG2LO
3553 |.endif
3554 |
2714 | ins_next1 3555 | ins_next1
2715 | sdc1 f0, 0(RA) 3556 | sw TISNUM, HI(RA)
3557 | sw CRET1, LO(RA)
3558 |3:
2716 | ins_next2 3559 | ins_next2
3560 |
3561 |.elif not FPU
3562 |
3563 | lw SFARG1LO, LO(RB)
3564 | lw SFARG2LO, LO(RC)
3565 |
2717 |.endif 3566 |.endif
3567 |
3568 |5: // Check for two numbers.
3569 | .FPU ldc1 f20, 0(RB)
3570 | sltiu AT, SFARG1HI, LJ_TISNUM
3571 | sltiu TMP0, SFARG2HI, LJ_TISNUM
3572 | .FPU ldc1 f22, 0(RC)
3573 | and AT, AT, TMP0
3574 | beqz AT, ->vmeta_arith
3575 |. addu RA, BASE, RA
3576 |
3577 |.if FPU
3578 | fpins FRET1, f20, f22
3579 |.elif "fpcall" == "sfpmod"
3580 | sfpmod
3581 |.else
3582 | load_got fpcall
3583 | call_extern
3584 |. nop
3585 |.endif
3586 |
3587 | ins_next1
3588 |.if not FPU
3589 | sw SFRETHI, HI(RA)
3590 |.endif
3591 |.if "intins" ~= "div"
3592 | b <3
3593 |.endif
3594 |.if FPU
3595 |. sdc1 FRET1, 0(RA)
3596 |.else
3597 |. sw SFRETLO, LO(RA)
3598 |.endif
3599 |.if "intins" == "div"
3600 | ins_next2
3601 |.endif
3602 |
2718 |.endmacro 3603 |.endmacro
2719 3604
2720 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3605 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2721 | ins_arith add.d 3606 | ins_arith addu, add.d, __adddf3, none
2722 break; 3607 break;
2723 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3608 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2724 | ins_arith sub.d 3609 | ins_arith subu, sub.d, __subdf3, none
2725 break; 3610 break;
2726 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3611 case BC_MULVN: case BC_MULNV: case BC_MULVV:
2727 | ins_arith mul.d 3612 | ins_arith mult, mul.d, __muldf3, none
2728 break; 3613 break;
2729 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3614 case BC_DIVVN:
2730 | ins_arith div.d 3615 | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z
3616 break;
3617 case BC_DIVNV: case BC_DIVVV:
3618 | ins_arithpre ->BC_DIVVN_Z
2731 break; 3619 break;
2732 case BC_MODVN: 3620 case BC_MODVN:
2733 | ins_arith fpmod 3621 | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z
2734 break; 3622 break;
2735 case BC_MODNV: case BC_MODVV: 3623 case BC_MODNV: case BC_MODVV:
2736 | ins_arith fpmod_ 3624 | ins_arithpre ->BC_MODVN_Z
2737 break; 3625 break;
2738 case BC_POW: 3626 case BC_POW:
2739 | decode_RB8a RB, INS 3627 | ins_arithpre none
2740 | decode_RB8b RB 3628 | lw SFARG1HI, HI(RB)
2741 | decode_RDtoRC8 RC, RD 3629 | lw SFARG2HI, HI(RC)
2742 | addu CARG3, BASE, RB 3630 | sltiu AT, SFARG1HI, LJ_TISNUM
2743 | addu CARG4, BASE, RC 3631 | sltiu TMP0, SFARG2HI, LJ_TISNUM
2744 | lw TMP1, HI(CARG3)
2745 | lw TMP2, HI(CARG4)
2746 | ldc1 FARG1, 0(CARG3)
2747 | ldc1 FARG2, 0(CARG4)
2748 | sltiu AT, TMP1, LJ_TISNUM
2749 | sltiu TMP0, TMP2, LJ_TISNUM
2750 | and AT, AT, TMP0 3632 | and AT, AT, TMP0
2751 | load_got pow 3633 | load_got pow
2752 | beqz AT, ->vmeta_arith 3634 | beqz AT, ->vmeta_arith
2753 |. addu RA, BASE, RA 3635 |. addu RA, BASE, RA
3636 |.if FPU
3637 | ldc1 FARG1, 0(RB)
3638 | ldc1 FARG2, 0(RC)
3639 |.else
3640 | lw SFARG1LO, LO(RB)
3641 | lw SFARG2LO, LO(RC)
3642 |.endif
2754 | call_extern 3643 | call_extern
2755 |. nop 3644 |. nop
2756 | ins_next1 3645 | ins_next1
3646 |.if FPU
2757 | sdc1 FRET1, 0(RA) 3647 | sdc1 FRET1, 0(RA)
3648 |.else
3649 | sw SFRETHI, HI(RA)
3650 | sw SFRETLO, LO(RA)
3651 |.endif
2758 | ins_next2 3652 | ins_next2
2759 break; 3653 break;
2760 3654
@@ -2777,10 +3671,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2777 | bnez CRET1, ->vmeta_binop 3671 | bnez CRET1, ->vmeta_binop
2778 |. lw BASE, L->base 3672 |. lw BASE, L->base
2779 | addu RB, BASE, MULTRES 3673 | addu RB, BASE, MULTRES
2780 | ldc1 f0, 0(RB) 3674 | lw SFRETHI, HI(RB)
3675 | lw SFRETLO, LO(RB)
2781 | addu RA, BASE, RA 3676 | addu RA, BASE, RA
2782 | ins_next1 3677 | ins_next1
2783 | sdc1 f0, 0(RA) // Copy result from RB to RA. 3678 | sw SFRETHI, HI(RA)
3679 | sw SFRETLO, LO(RA)
2784 | ins_next2 3680 | ins_next2
2785 break; 3681 break;
2786 3682
@@ -2815,20 +3711,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2815 case BC_KSHORT: 3711 case BC_KSHORT:
2816 | // RA = dst*8, RD = int16_literal*8 3712 | // RA = dst*8, RD = int16_literal*8
2817 | sra RD, INS, 16 3713 | sra RD, INS, 16
2818 | mtc1 RD, f0
2819 | addu RA, BASE, RA 3714 | addu RA, BASE, RA
2820 | cvt.d.w f0, f0
2821 | ins_next1 3715 | ins_next1
2822 | sdc1 f0, 0(RA) 3716 | sw TISNUM, HI(RA)
3717 | sw RD, LO(RA)
2823 | ins_next2 3718 | ins_next2
2824 break; 3719 break;
2825 case BC_KNUM: 3720 case BC_KNUM:
2826 | // RA = dst*8, RD = num_const*8 3721 | // RA = dst*8, RD = num_const*8
2827 | addu RD, KBASE, RD 3722 | addu RD, KBASE, RD
2828 | addu RA, BASE, RA 3723 | addu RA, BASE, RA
2829 | ldc1 f0, 0(RD) 3724 | lw SFRETHI, HI(RD)
3725 | lw SFRETLO, LO(RD)
2830 | ins_next1 3726 | ins_next1
2831 | sdc1 f0, 0(RA) 3727 | sw SFRETHI, HI(RA)
3728 | sw SFRETLO, LO(RA)
2832 | ins_next2 3729 | ins_next2
2833 break; 3730 break;
2834 case BC_KPRI: 3731 case BC_KPRI:
@@ -2864,9 +3761,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2864 | lw UPVAL:RB, LFUNC:RD->uvptr 3761 | lw UPVAL:RB, LFUNC:RD->uvptr
2865 | ins_next1 3762 | ins_next1
2866 | lw TMP1, UPVAL:RB->v 3763 | lw TMP1, UPVAL:RB->v
2867 | ldc1 f0, 0(TMP1) 3764 | lw SFRETHI, HI(TMP1)
3765 | lw SFRETLO, LO(TMP1)
2868 | addu RA, BASE, RA 3766 | addu RA, BASE, RA
2869 | sdc1 f0, 0(RA) 3767 | sw SFRETHI, HI(RA)
3768 | sw SFRETLO, LO(RA)
2870 | ins_next2 3769 | ins_next2
2871 break; 3770 break;
2872 case BC_USETV: 3771 case BC_USETV:
@@ -2875,26 +3774,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2875 | srl RA, RA, 1 3774 | srl RA, RA, 1
2876 | addu RD, BASE, RD 3775 | addu RD, BASE, RD
2877 | addu RA, RA, LFUNC:RB 3776 | addu RA, RA, LFUNC:RB
2878 | ldc1 f0, 0(RD)
2879 | lw UPVAL:RB, LFUNC:RA->uvptr 3777 | lw UPVAL:RB, LFUNC:RA->uvptr
3778 | lw SFRETHI, HI(RD)
3779 | lw SFRETLO, LO(RD)
2880 | lbu TMP3, UPVAL:RB->marked 3780 | lbu TMP3, UPVAL:RB->marked
2881 | lw CARG2, UPVAL:RB->v 3781 | lw CARG2, UPVAL:RB->v
2882 | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 3782 | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
2883 | lbu TMP0, UPVAL:RB->closed 3783 | lbu TMP0, UPVAL:RB->closed
2884 | lw TMP2, HI(RD) 3784 | sw SFRETHI, HI(CARG2)
2885 | sdc1 f0, 0(CARG2) 3785 | sw SFRETLO, LO(CARG2)
2886 | li AT, LJ_GC_BLACK|1 3786 | li AT, LJ_GC_BLACK|1
2887 | or TMP3, TMP3, TMP0 3787 | or TMP3, TMP3, TMP0
2888 | beq TMP3, AT, >2 // Upvalue is closed and black? 3788 | beq TMP3, AT, >2 // Upvalue is closed and black?
2889 |. addiu TMP2, TMP2, -(LJ_TNUMX+1) 3789 |. addiu TMP2, SFRETHI, -(LJ_TNUMX+1)
2890 |1: 3790 |1:
2891 | ins_next 3791 | ins_next
2892 | 3792 |
2893 |2: // Check if new value is collectable. 3793 |2: // Check if new value is collectable.
2894 | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) 3794 | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
2895 | beqz AT, <1 // tvisgcv(v) 3795 | beqz AT, <1 // tvisgcv(v)
2896 |. lw TMP1, LO(RD) 3796 |. nop
2897 | lbu TMP3, GCOBJ:TMP1->gch.marked 3797 | lbu TMP3, GCOBJ:SFRETLO->gch.marked
2898 | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) 3798 | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
2899 | beqz TMP3, <1 3799 | beqz TMP3, <1
2900 |. load_got lj_gc_barrieruv 3800 |. load_got lj_gc_barrieruv
@@ -2942,11 +3842,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2942 | srl RA, RA, 1 3842 | srl RA, RA, 1
2943 | addu RD, KBASE, RD 3843 | addu RD, KBASE, RD
2944 | addu RA, RA, LFUNC:RB 3844 | addu RA, RA, LFUNC:RB
2945 | ldc1 f0, 0(RD) 3845 | lw UPVAL:RB, LFUNC:RA->uvptr
2946 | lw UPVAL:RB, LFUNC:RA->uvptr 3846 | lw SFRETHI, HI(RD)
3847 | lw SFRETLO, LO(RD)
3848 | lw TMP1, UPVAL:RB->v
2947 | ins_next1 3849 | ins_next1
2948 | lw TMP1, UPVAL:RB->v 3850 | sw SFRETHI, HI(TMP1)
2949 | sdc1 f0, 0(TMP1) 3851 | sw SFRETLO, LO(TMP1)
2950 | ins_next2 3852 | ins_next2
2951 break; 3853 break;
2952 case BC_USETP: 3854 case BC_USETP:
@@ -2956,10 +3858,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2956 | srl TMP0, RD, 3 3858 | srl TMP0, RD, 3
2957 | addu RA, RA, LFUNC:RB 3859 | addu RA, RA, LFUNC:RB
2958 | not TMP0, TMP0 3860 | not TMP0, TMP0
2959 | lw UPVAL:RB, LFUNC:RA->uvptr 3861 | lw UPVAL:RB, LFUNC:RA->uvptr
2960 | ins_next1 3862 | ins_next1
2961 | lw TMP1, UPVAL:RB->v 3863 | lw TMP1, UPVAL:RB->v
2962 | sw TMP0, HI(TMP1) 3864 | sw TMP0, HI(TMP1)
2963 | ins_next2 3865 | ins_next2
2964 break; 3866 break;
2965 3867
@@ -2995,8 +3897,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2995 | li TMP0, LJ_TFUNC 3897 | li TMP0, LJ_TFUNC
2996 | ins_next1 3898 | ins_next1
2997 | addu RA, BASE, RA 3899 | addu RA, BASE, RA
2998 | sw TMP0, HI(RA)
2999 | sw LFUNC:CRET1, LO(RA) 3900 | sw LFUNC:CRET1, LO(RA)
3901 | sw TMP0, HI(RA)
3000 | ins_next2 3902 | ins_next2
3001 break; 3903 break;
3002 3904
@@ -3077,31 +3979,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3077 | lw TMP2, HI(CARG3) 3979 | lw TMP2, HI(CARG3)
3078 | lw TAB:RB, LO(CARG2) 3980 | lw TAB:RB, LO(CARG2)
3079 | li AT, LJ_TTAB 3981 | li AT, LJ_TTAB
3080 | ldc1 f0, 0(CARG3)
3081 | bne TMP1, AT, ->vmeta_tgetv 3982 | bne TMP1, AT, ->vmeta_tgetv
3082 |. addu RA, BASE, RA 3983 |. addu RA, BASE, RA
3083 | sltiu AT, TMP2, LJ_TISNUM 3984 | bne TMP2, TISNUM, >5
3084 | beqz AT, >5 3985 |. lw RC, LO(CARG3)
3085 |. li AT, LJ_TSTR 3986 | lw TMP0, TAB:RB->asize
3086 |
3087 | // Convert number key to integer, check for integerness and range.
3088 | cvt.w.d f2, f0
3089 | lw TMP0, TAB:RB->asize
3090 | mfc1 TMP2, f2
3091 | cvt.d.w f4, f2
3092 | lw TMP1, TAB:RB->array 3987 | lw TMP1, TAB:RB->array
3093 | c.eq.d f0, f4 3988 | sltu AT, RC, TMP0
3094 | sltu AT, TMP2, TMP0 3989 | sll TMP2, RC, 3
3095 | movf AT, r0
3096 | sll TMP2, TMP2, 3
3097 | beqz AT, ->vmeta_tgetv // Integer key and in array part? 3990 | beqz AT, ->vmeta_tgetv // Integer key and in array part?
3098 |. addu TMP2, TMP1, TMP2 3991 |. addu TMP2, TMP1, TMP2
3099 | lw TMP0, HI(TMP2) 3992 | lw SFRETHI, HI(TMP2)
3100 | beq TMP0, TISNIL, >2 3993 | beq SFRETHI, TISNIL, >2
3101 |. ldc1 f0, 0(TMP2) 3994 |. lw SFRETLO, LO(TMP2)
3102 |1: 3995 |1:
3103 | ins_next1 3996 | ins_next1
3104 | sdc1 f0, 0(RA) 3997 | sw SFRETHI, HI(RA)
3998 | sw SFRETLO, LO(RA)
3105 | ins_next2 3999 | ins_next2
3106 | 4000 |
3107 |2: // Check for __index if table value is nil. 4001 |2: // Check for __index if table value is nil.
@@ -3116,8 +4010,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3116 |. nop 4010 |. nop
3117 | 4011 |
3118 |5: 4012 |5:
4013 | li AT, LJ_TSTR
3119 | bne TMP2, AT, ->vmeta_tgetv 4014 | bne TMP2, AT, ->vmeta_tgetv
3120 |. lw STR:RC, LO(CARG3) 4015 |. nop
3121 | b ->BC_TGETS_Z // String key? 4016 | b ->BC_TGETS_Z // String key?
3122 |. nop 4017 |. nop
3123 break; 4018 break;
@@ -3138,9 +4033,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3138 |->BC_TGETS_Z: 4033 |->BC_TGETS_Z:
3139 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 4034 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
3140 | lw TMP0, TAB:RB->hmask 4035 | lw TMP0, TAB:RB->hmask
3141 | lw TMP1, STR:RC->hash 4036 | lw TMP1, STR:RC->sid
3142 | lw NODE:TMP2, TAB:RB->node 4037 | lw NODE:TMP2, TAB:RB->node
3143 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 4038 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
3144 | sll TMP0, TMP1, 5 4039 | sll TMP0, TMP1, 5
3145 | sll TMP1, TMP1, 3 4040 | sll TMP1, TMP1, 3
3146 | subu TMP1, TMP0, TMP1 4041 | subu TMP1, TMP0, TMP1
@@ -3149,18 +4044,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3149 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) 4044 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
3150 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) 4045 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
3151 | lw NODE:TMP1, NODE:TMP2->next 4046 | lw NODE:TMP1, NODE:TMP2->next
3152 | lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) 4047 | lw SFRETHI, offsetof(Node, val)+HI(NODE:TMP2)
3153 | addiu CARG1, CARG1, -LJ_TSTR 4048 | addiu CARG1, CARG1, -LJ_TSTR
3154 | xor TMP0, TMP0, STR:RC 4049 | xor TMP0, TMP0, STR:RC
3155 | or AT, CARG1, TMP0 4050 | or AT, CARG1, TMP0
3156 | bnez AT, >4 4051 | bnez AT, >4
3157 |. lw TAB:TMP3, TAB:RB->metatable 4052 |. lw TAB:TMP3, TAB:RB->metatable
3158 | beq CARG2, TISNIL, >5 // Key found, but nil value? 4053 | beq SFRETHI, TISNIL, >5 // Key found, but nil value?
3159 |. lw CARG1, offsetof(Node, val)+LO(NODE:TMP2) 4054 |. lw SFRETLO, offsetof(Node, val)+LO(NODE:TMP2)
3160 |3: 4055 |3:
3161 | ins_next1 4056 | ins_next1
3162 | sw CARG2, HI(RA) 4057 | sw SFRETHI, HI(RA)
3163 | sw CARG1, LO(RA) 4058 | sw SFRETLO, LO(RA)
3164 | ins_next2 4059 | ins_next2
3165 | 4060 |
3166 |4: // Follow hash chain. 4061 |4: // Follow hash chain.
@@ -3170,7 +4065,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3170 | 4065 |
3171 |5: // Check for __index if table value is nil. 4066 |5: // Check for __index if table value is nil.
3172 | beqz TAB:TMP3, <3 // No metatable: done. 4067 | beqz TAB:TMP3, <3 // No metatable: done.
3173 |. li CARG2, LJ_TNIL 4068 |. li SFRETHI, LJ_TNIL
3174 | lbu TMP0, TAB:TMP3->nomm 4069 | lbu TMP0, TAB:TMP3->nomm
3175 | andi TMP0, TMP0, 1<<MM_index 4070 | andi TMP0, TMP0, 1<<MM_index
3176 | bnez TMP0, <3 // 'no __index' flag set: done. 4071 | bnez TMP0, <3 // 'no __index' flag set: done.
@@ -3195,12 +4090,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3195 | sltu AT, TMP0, TMP1 4090 | sltu AT, TMP0, TMP1
3196 | beqz AT, ->vmeta_tgetb 4091 | beqz AT, ->vmeta_tgetb
3197 |. addu RC, TMP2, RC 4092 |. addu RC, TMP2, RC
3198 | lw TMP1, HI(RC) 4093 | lw SFRETHI, HI(RC)
3199 | beq TMP1, TISNIL, >5 4094 | beq SFRETHI, TISNIL, >5
3200 |. ldc1 f0, 0(RC) 4095 |. lw SFRETLO, LO(RC)
3201 |1: 4096 |1:
3202 | ins_next1 4097 | ins_next1
3203 | sdc1 f0, 0(RA) 4098 | sw SFRETHI, HI(RA)
4099 | sw SFRETLO, LO(RA)
3204 | ins_next2 4100 | ins_next2
3205 | 4101 |
3206 |5: // Check for __index if table value is nil. 4102 |5: // Check for __index if table value is nil.
@@ -3211,9 +4107,33 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3211 | andi TMP1, TMP1, 1<<MM_index 4107 | andi TMP1, TMP1, 1<<MM_index
3212 | bnez TMP1, <1 // 'no __index' flag set: done. 4108 | bnez TMP1, <1 // 'no __index' flag set: done.
3213 |. nop 4109 |. nop
3214 | b ->vmeta_tgetb // Caveat: preserve TMP0! 4110 | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2!
3215 |. nop 4111 |. nop
3216 break; 4112 break;
4113 case BC_TGETR:
4114 | // RA = dst*8, RB = table*8, RC = key*8
4115 | decode_RB8a RB, INS
4116 | decode_RB8b RB
4117 | decode_RDtoRC8 RC, RD
4118 | addu RB, BASE, RB
4119 | addu RC, BASE, RC
4120 | lw TAB:CARG1, LO(RB)
4121 | lw CARG2, LO(RC)
4122 | addu RA, BASE, RA
4123 | lw TMP0, TAB:CARG1->asize
4124 | lw TMP1, TAB:CARG1->array
4125 | sltu AT, CARG2, TMP0
4126 | sll TMP2, CARG2, 3
4127 | beqz AT, ->vmeta_tgetr // In array part?
4128 |. addu CRET1, TMP1, TMP2
4129 | lw SFARG2HI, HI(CRET1)
4130 | lw SFARG2LO, LO(CRET1)
4131 |->BC_TGETR_Z:
4132 | ins_next1
4133 | sw SFARG2HI, HI(RA)
4134 | sw SFARG2LO, LO(RA)
4135 | ins_next2
4136 break;
3217 4137
3218 case BC_TSETV: 4138 case BC_TSETV:
3219 | // RA = src*8, RB = table*8, RC = key*8 4139 | // RA = src*8, RB = table*8, RC = key*8
@@ -3226,33 +4146,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3226 | lw TMP2, HI(CARG3) 4146 | lw TMP2, HI(CARG3)
3227 | lw TAB:RB, LO(CARG2) 4147 | lw TAB:RB, LO(CARG2)
3228 | li AT, LJ_TTAB 4148 | li AT, LJ_TTAB
3229 | ldc1 f0, 0(CARG3)
3230 | bne TMP1, AT, ->vmeta_tsetv 4149 | bne TMP1, AT, ->vmeta_tsetv
3231 |. addu RA, BASE, RA 4150 |. addu RA, BASE, RA
3232 | sltiu AT, TMP2, LJ_TISNUM 4151 | bne TMP2, TISNUM, >5
3233 | beqz AT, >5 4152 |. lw RC, LO(CARG3)
3234 |. li AT, LJ_TSTR 4153 | lw TMP0, TAB:RB->asize
3235 |
3236 | // Convert number key to integer, check for integerness and range.
3237 | cvt.w.d f2, f0
3238 | lw TMP0, TAB:RB->asize
3239 | mfc1 TMP2, f2
3240 | cvt.d.w f4, f2
3241 | lw TMP1, TAB:RB->array 4154 | lw TMP1, TAB:RB->array
3242 | c.eq.d f0, f4 4155 | sltu AT, RC, TMP0
3243 | sltu AT, TMP2, TMP0 4156 | sll TMP2, RC, 3
3244 | movf AT, r0
3245 | sll TMP2, TMP2, 3
3246 | beqz AT, ->vmeta_tsetv // Integer key and in array part? 4157 | beqz AT, ->vmeta_tsetv // Integer key and in array part?
3247 |. addu TMP1, TMP1, TMP2 4158 |. addu TMP1, TMP1, TMP2
3248 | lbu TMP3, TAB:RB->marked
3249 | lw TMP0, HI(TMP1) 4159 | lw TMP0, HI(TMP1)
4160 | lbu TMP3, TAB:RB->marked
4161 | lw SFRETHI, HI(RA)
3250 | beq TMP0, TISNIL, >3 4162 | beq TMP0, TISNIL, >3
3251 |. ldc1 f0, 0(RA) 4163 |. lw SFRETLO, LO(RA)
3252 |1: 4164 |1:
3253 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4165 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
3254 | bnez AT, >7 4166 | sw SFRETHI, HI(TMP1)
3255 |. sdc1 f0, 0(TMP1) 4167 | bnez AT, >7
4168 |. sw SFRETLO, LO(TMP1)
3256 |2: 4169 |2:
3257 | ins_next 4170 | ins_next
3258 | 4171 |
@@ -3268,8 +4181,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3268 |. nop 4181 |. nop
3269 | 4182 |
3270 |5: 4183 |5:
4184 | li AT, LJ_TSTR
3271 | bne TMP2, AT, ->vmeta_tsetv 4185 | bne TMP2, AT, ->vmeta_tsetv
3272 |. lw STR:RC, LO(CARG3) 4186 |. nop
3273 | b ->BC_TSETS_Z // String key? 4187 | b ->BC_TSETS_Z // String key?
3274 |. nop 4188 |. nop
3275 | 4189 |
@@ -3293,15 +4207,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3293 |->BC_TSETS_Z: 4207 |->BC_TSETS_Z:
3294 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 4208 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8
3295 | lw TMP0, TAB:RB->hmask 4209 | lw TMP0, TAB:RB->hmask
3296 | lw TMP1, STR:RC->hash 4210 | lw TMP1, STR:RC->sid
3297 | lw NODE:TMP2, TAB:RB->node 4211 | lw NODE:TMP2, TAB:RB->node
3298 | sb r0, TAB:RB->nomm // Clear metamethod cache. 4212 | sb r0, TAB:RB->nomm // Clear metamethod cache.
3299 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 4213 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
3300 | sll TMP0, TMP1, 5 4214 | sll TMP0, TMP1, 5
3301 | sll TMP1, TMP1, 3 4215 | sll TMP1, TMP1, 3
3302 | subu TMP1, TMP0, TMP1 4216 | subu TMP1, TMP0, TMP1
3303 | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 4217 | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
4218 |.if FPU
3304 | ldc1 f20, 0(RA) 4219 | ldc1 f20, 0(RA)
4220 |.else
4221 | lw SFRETHI, HI(RA)
4222 | lw SFRETLO, LO(RA)
4223 |.endif
3305 |1: 4224 |1:
3306 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) 4225 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
3307 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) 4226 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
@@ -3315,8 +4234,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3315 |. lw TAB:TMP0, TAB:RB->metatable 4234 |. lw TAB:TMP0, TAB:RB->metatable
3316 |2: 4235 |2:
3317 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4236 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4237 |.if FPU
3318 | bnez AT, >7 4238 | bnez AT, >7
3319 |. sdc1 f20, NODE:TMP2->val 4239 |. sdc1 f20, NODE:TMP2->val
4240 |.else
4241 | sw SFRETHI, NODE:TMP2->val.u32.hi
4242 | bnez AT, >7
4243 |. sw SFRETLO, NODE:TMP2->val.u32.lo
4244 |.endif
3320 |3: 4245 |3:
3321 | ins_next 4246 | ins_next
3322 | 4247 |
@@ -3354,8 +4279,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3354 |. move CARG1, L 4279 |. move CARG1, L
3355 | // Returns TValue *. 4280 | // Returns TValue *.
3356 | lw BASE, L->base 4281 | lw BASE, L->base
4282 |.if FPU
3357 | b <3 // No 2nd write barrier needed. 4283 | b <3 // No 2nd write barrier needed.
3358 |. sdc1 f20, 0(CRET1) 4284 |. sdc1 f20, 0(CRET1)
4285 |.else
4286 | lw SFARG1HI, HI(RA)
4287 | lw SFARG1LO, LO(RA)
4288 | sw SFARG1HI, HI(CRET1)
4289 | b <3 // No 2nd write barrier needed.
4290 |. sw SFARG1LO, LO(CRET1)
4291 |.endif
3359 | 4292 |
3360 |7: // Possible table write barrier for the value. Skip valiswhite check. 4293 |7: // Possible table write barrier for the value. Skip valiswhite check.
3361 | barrierback TAB:RB, TMP3, TMP0, <3 4294 | barrierback TAB:RB, TMP3, TMP0, <3
@@ -3380,11 +4313,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3380 | lw TMP1, HI(RC) 4313 | lw TMP1, HI(RC)
3381 | lbu TMP3, TAB:RB->marked 4314 | lbu TMP3, TAB:RB->marked
3382 | beq TMP1, TISNIL, >5 4315 | beq TMP1, TISNIL, >5
3383 |. ldc1 f0, 0(RA)
3384 |1: 4316 |1:
4317 |. lw SFRETHI, HI(RA)
4318 | lw SFRETLO, LO(RA)
3385 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4319 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4320 | sw SFRETHI, HI(RC)
3386 | bnez AT, >7 4321 | bnez AT, >7
3387 |. sdc1 f0, 0(RC) 4322 |. sw SFRETLO, LO(RC)
3388 |2: 4323 |2:
3389 | ins_next 4324 | ins_next
3390 | 4325 |
@@ -3396,12 +4331,43 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3396 | andi TMP1, TMP1, 1<<MM_newindex 4331 | andi TMP1, TMP1, 1<<MM_newindex
3397 | bnez TMP1, <1 // 'no __newindex' flag set: done. 4332 | bnez TMP1, <1 // 'no __newindex' flag set: done.
3398 |. nop 4333 |. nop
3399 | b ->vmeta_tsetb // Caveat: preserve TMP0! 4334 | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2!
3400 |. nop 4335 |. nop
3401 | 4336 |
3402 |7: // Possible table write barrier for the value. Skip valiswhite check. 4337 |7: // Possible table write barrier for the value. Skip valiswhite check.
3403 | barrierback TAB:RB, TMP3, TMP0, <2 4338 | barrierback TAB:RB, TMP3, TMP0, <2
3404 break; 4339 break;
4340 case BC_TSETR:
4341 | // RA = dst*8, RB = table*8, RC = key*8
4342 | decode_RB8a RB, INS
4343 | decode_RB8b RB
4344 | decode_RDtoRC8 RC, RD
4345 | addu CARG1, BASE, RB
4346 | addu CARG3, BASE, RC
4347 | lw TAB:CARG2, LO(CARG1)
4348 | lw CARG3, LO(CARG3)
4349 | lbu TMP3, TAB:CARG2->marked
4350 | lw TMP0, TAB:CARG2->asize
4351 | lw TMP1, TAB:CARG2->array
4352 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4353 | bnez AT, >7
4354 |. addu RA, BASE, RA
4355 |2:
4356 | sltu AT, CARG3, TMP0
4357 | sll TMP2, CARG3, 3
4358 | beqz AT, ->vmeta_tsetr // In array part?
4359 |. addu CRET1, TMP1, TMP2
4360 |->BC_TSETR_Z:
4361 | lw SFARG1HI, HI(RA)
4362 | lw SFARG1LO, LO(RA)
4363 | ins_next1
4364 | sw SFARG1HI, HI(CRET1)
4365 | sw SFARG1LO, LO(CRET1)
4366 | ins_next2
4367 |
4368 |7: // Possible table write barrier for the value. Skip valiswhite check.
4369 | barrierback TAB:CARG2, TMP3, CRET1, <2
4370 break;
3405 4371
3406 case BC_TSETM: 4372 case BC_TSETM:
3407 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 4373 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
@@ -3424,10 +4390,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3424 | addu TMP1, TMP1, CARG1 4390 | addu TMP1, TMP1, CARG1
3425 | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4391 | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table)
3426 |3: // Copy result slots to table. 4392 |3: // Copy result slots to table.
3427 | ldc1 f0, 0(RA) 4393 | lw SFRETHI, HI(RA)
4394 | lw SFRETLO, LO(RA)
3428 | addiu RA, RA, 8 4395 | addiu RA, RA, 8
3429 | sltu AT, RA, TMP2 4396 | sltu AT, RA, TMP2
3430 | sdc1 f0, 0(TMP1) 4397 | sw SFRETHI, HI(TMP1)
4398 | sw SFRETLO, LO(TMP1)
3431 | bnez AT, <3 4399 | bnez AT, <3
3432 |. addiu TMP1, TMP1, 8 4400 |. addiu TMP1, TMP1, 8
3433 | bnez TMP0, >7 4401 | bnez TMP0, >7
@@ -3502,10 +4470,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3502 | beqz NARGS8:RC, >3 4470 | beqz NARGS8:RC, >3
3503 |. move TMP3, NARGS8:RC 4471 |. move TMP3, NARGS8:RC
3504 |2: 4472 |2:
3505 | ldc1 f0, 0(RA) 4473 | lw SFRETHI, HI(RA)
4474 | lw SFRETLO, LO(RA)
3506 | addiu RA, RA, 8 4475 | addiu RA, RA, 8
3507 | addiu TMP3, TMP3, -8 4476 | addiu TMP3, TMP3, -8
3508 | sdc1 f0, 0(TMP2) 4477 | sw SFRETHI, HI(TMP2)
4478 | sw SFRETLO, LO(TMP2)
3509 | bnez TMP3, <2 4479 | bnez TMP3, <2
3510 |. addiu TMP2, TMP2, 8 4480 |. addiu TMP2, TMP2, 8
3511 |3: 4481 |3:
@@ -3542,12 +4512,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3542 | li AT, LJ_TFUNC 4512 | li AT, LJ_TFUNC
3543 | lw TMP1, -24+HI(BASE) 4513 | lw TMP1, -24+HI(BASE)
3544 | lw LFUNC:RB, -24+LO(BASE) 4514 | lw LFUNC:RB, -24+LO(BASE)
3545 | ldc1 f2, -8(BASE) 4515 | lw SFARG1HI, -16+HI(BASE)
3546 | ldc1 f0, -16(BASE) 4516 | lw SFARG1LO, -16+LO(BASE)
4517 | lw SFARG2HI, -8+HI(BASE)
4518 | lw SFARG2LO, -8+LO(BASE)
3547 | sw TMP1, HI(BASE) // Copy callable. 4519 | sw TMP1, HI(BASE) // Copy callable.
3548 | sw LFUNC:RB, LO(BASE) 4520 | sw LFUNC:RB, LO(BASE)
3549 | sdc1 f2, 16(BASE) // Copy control var. 4521 | sw SFARG1HI, 8+HI(BASE) // Copy state.
3550 | sdc1 f0, 8(BASE) // Copy state. 4522 | sw SFARG1LO, 8+LO(BASE)
4523 | sw SFARG2HI, 16+HI(BASE) // Copy control var.
4524 | sw SFARG2LO, 16+LO(BASE)
3551 | addiu BASE, BASE, 8 4525 | addiu BASE, BASE, 8
3552 | bne TMP1, AT, ->vmeta_call 4526 | bne TMP1, AT, ->vmeta_call
3553 |. li NARGS8:RC, 16 // Iterators get 2 arguments. 4527 |. li NARGS8:RC, 16 // Iterators get 2 arguments.
@@ -3570,20 +4544,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3570 | beqz AT, >5 // Index points after array part? 4544 | beqz AT, >5 // Index points after array part?
3571 |. sll TMP3, RC, 3 4545 |. sll TMP3, RC, 3
3572 | addu TMP3, TMP1, TMP3 4546 | addu TMP3, TMP1, TMP3
3573 | lw TMP2, HI(TMP3) 4547 | lw SFARG1HI, HI(TMP3)
3574 | ldc1 f0, 0(TMP3) 4548 | lw SFARG1LO, LO(TMP3)
3575 | mtc1 RC, f2
3576 | lhu RD, -4+OFS_RD(PC) 4549 | lhu RD, -4+OFS_RD(PC)
3577 | beq TMP2, TISNIL, <1 // Skip holes in array part. 4550 | sw TISNUM, HI(RA)
4551 | sw RC, LO(RA)
4552 | beq SFARG1HI, TISNIL, <1 // Skip holes in array part.
3578 |. addiu RC, RC, 1 4553 |. addiu RC, RC, 1
3579 | cvt.d.w f2, f2 4554 | sw SFARG1HI, 8+HI(RA)
4555 | sw SFARG1LO, 8+LO(RA)
3580 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 4556 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3581 | sdc1 f0, 8(RA)
3582 | decode_RD4b RD 4557 | decode_RD4b RD
3583 | addu RD, RD, TMP3 4558 | addu RD, RD, TMP3
3584 | sw RC, -8+LO(RA) // Update control var. 4559 | sw RC, -8+LO(RA) // Update control var.
3585 | addu PC, PC, RD 4560 | addu PC, PC, RD
3586 | sdc1 f2, 0(RA)
3587 |3: 4561 |3:
3588 | ins_next 4562 | ins_next
3589 | 4563 |
@@ -3598,18 +4572,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3598 | sll RB, RC, 3 4572 | sll RB, RC, 3
3599 | subu TMP3, TMP3, RB 4573 | subu TMP3, TMP3, RB
3600 | addu NODE:TMP3, TMP3, TMP2 4574 | addu NODE:TMP3, TMP3, TMP2
3601 | lw RB, HI(NODE:TMP3) 4575 | lw SFARG1HI, NODE:TMP3->val.u32.hi
3602 | ldc1 f0, 0(NODE:TMP3) 4576 | lw SFARG1LO, NODE:TMP3->val.u32.lo
3603 | lhu RD, -4+OFS_RD(PC) 4577 | lhu RD, -4+OFS_RD(PC)
3604 | beq RB, TISNIL, <6 // Skip holes in hash part. 4578 | beq SFARG1HI, TISNIL, <6 // Skip holes in hash part.
3605 |. addiu RC, RC, 1 4579 |. addiu RC, RC, 1
3606 | ldc1 f2, NODE:TMP3->key 4580 | lw SFARG2HI, NODE:TMP3->key.u32.hi
4581 | lw SFARG2LO, NODE:TMP3->key.u32.lo
3607 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 4582 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3608 | sdc1 f0, 8(RA) 4583 | sw SFARG1HI, 8+HI(RA)
4584 | sw SFARG1LO, 8+LO(RA)
3609 | addu RC, RC, TMP0 4585 | addu RC, RC, TMP0
3610 | decode_RD4b RD 4586 | decode_RD4b RD
3611 | addu RD, RD, TMP3 4587 | addu RD, RD, TMP3
3612 | sdc1 f2, 0(RA) 4588 | sw SFARG2HI, HI(RA)
4589 | sw SFARG2LO, LO(RA)
3613 | addu PC, PC, RD 4590 | addu PC, PC, RD
3614 | b <3 4591 | b <3
3615 |. sw RC, -8+LO(RA) // Update control var. 4592 |. sw RC, -8+LO(RA) // Update control var.
@@ -3689,9 +4666,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3689 | bnez AT, >7 4666 | bnez AT, >7
3690 |. addiu MULTRES, TMP1, 8 4667 |. addiu MULTRES, TMP1, 8
3691 |6: 4668 |6:
3692 | ldc1 f0, 0(RC) 4669 | lw SFRETHI, HI(RC)
4670 | lw SFRETLO, LO(RC)
3693 | addiu RC, RC, 8 4671 | addiu RC, RC, 8
3694 | sdc1 f0, 0(RA) 4672 | sw SFRETHI, HI(RA)
4673 | sw SFRETLO, LO(RA)
3695 | sltu AT, RC, TMP3 4674 | sltu AT, RC, TMP3
3696 | bnez AT, <6 // More vararg slots? 4675 | bnez AT, <6 // More vararg slots?
3697 |. addiu RA, RA, 8 4676 |. addiu RA, RA, 8
@@ -3747,10 +4726,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3747 | beqz RC, >3 4726 | beqz RC, >3
3748 |. subu BASE, TMP2, TMP0 4727 |. subu BASE, TMP2, TMP0
3749 |2: 4728 |2:
3750 | ldc1 f0, 0(RA) 4729 | lw SFRETHI, HI(RA)
4730 | lw SFRETLO, LO(RA)
3751 | addiu RA, RA, 8 4731 | addiu RA, RA, 8
3752 | addiu RC, RC, -8 4732 | addiu RC, RC, -8
3753 | sdc1 f0, 0(TMP2) 4733 | sw SFRETHI, HI(TMP2)
4734 | sw SFRETLO, LO(TMP2)
3754 | bnez RC, <2 4735 | bnez RC, <2
3755 |. addiu TMP2, TMP2, 8 4736 |. addiu TMP2, TMP2, 8
3756 |3: 4737 |3:
@@ -3791,14 +4772,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3791 | lw INS, -4(PC) 4772 | lw INS, -4(PC)
3792 | addiu TMP2, BASE, -8 4773 | addiu TMP2, BASE, -8
3793 if (op == BC_RET1) { 4774 if (op == BC_RET1) {
3794 | ldc1 f0, 0(RA) 4775 | lw SFRETHI, HI(RA)
4776 | lw SFRETLO, LO(RA)
3795 } 4777 }
3796 | decode_RB8a RB, INS 4778 | decode_RB8a RB, INS
3797 | decode_RA8a RA, INS 4779 | decode_RA8a RA, INS
3798 | decode_RB8b RB 4780 | decode_RB8b RB
3799 | decode_RA8b RA 4781 | decode_RA8b RA
3800 if (op == BC_RET1) { 4782 if (op == BC_RET1) {
3801 | sdc1 f0, 0(TMP2) 4783 | sw SFRETHI, HI(TMP2)
4784 | sw SFRETLO, LO(TMP2)
3802 } 4785 }
3803 | subu BASE, TMP2, RA 4786 | subu BASE, TMP2, RA
3804 |5: 4787 |5:
@@ -3840,69 +4823,147 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3840 | // RA = base*8, RD = target (after end of loop or start of loop) 4823 | // RA = base*8, RD = target (after end of loop or start of loop)
3841 vk = (op == BC_IFORL || op == BC_JFORL); 4824 vk = (op == BC_IFORL || op == BC_JFORL);
3842 | addu RA, BASE, RA 4825 | addu RA, BASE, RA
3843 if (vk) { 4826 | lw SFARG1HI, FORL_IDX*8+HI(RA)
3844 | ldc1 f0, FORL_IDX*8(RA) 4827 | lw SFARG1LO, FORL_IDX*8+LO(RA)
3845 | ldc1 f4, FORL_STEP*8(RA)
3846 | ldc1 f2, FORL_STOP*8(RA)
3847 | lw TMP3, FORL_STEP*8+HI(RA)
3848 | add.d f0, f0, f4
3849 | sdc1 f0, FORL_IDX*8(RA)
3850 } else {
3851 | lw TMP1, FORL_IDX*8+HI(RA)
3852 | lw TMP3, FORL_STEP*8+HI(RA)
3853 | lw TMP2, FORL_STOP*8+HI(RA)
3854 | sltiu TMP1, TMP1, LJ_TISNUM
3855 | sltiu TMP0, TMP3, LJ_TISNUM
3856 | sltiu TMP2, TMP2, LJ_TISNUM
3857 | and TMP1, TMP1, TMP0
3858 | and TMP1, TMP1, TMP2
3859 | ldc1 f0, FORL_IDX*8(RA)
3860 | beqz TMP1, ->vmeta_for
3861 |. ldc1 f2, FORL_STOP*8(RA)
3862 }
3863 if (op != BC_JFORL) { 4828 if (op != BC_JFORL) {
3864 | srl RD, RD, 1 4829 | srl RD, RD, 1
3865 | lui TMP0, (-(BCBIAS_J*4 >> 16) & 65535) 4830 | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
4831 | addu TMP2, RD, TMP2
4832 }
4833 if (!vk) {
4834 | lw SFARG2HI, FORL_STOP*8+HI(RA)
4835 | lw SFARG2LO, FORL_STOP*8+LO(RA)
4836 | bne SFARG1HI, TISNUM, >5
4837 |. lw SFRETHI, FORL_STEP*8+HI(RA)
4838 | xor AT, SFARG2HI, TISNUM
4839 | lw SFRETLO, FORL_STEP*8+LO(RA)
4840 | xor TMP0, SFRETHI, TISNUM
4841 | or AT, AT, TMP0
4842 | bnez AT, ->vmeta_for
4843 |. slt AT, SFRETLO, r0
4844 | slt CRET1, SFARG2LO, SFARG1LO
4845 | slt TMP1, SFARG1LO, SFARG2LO
4846 | movn CRET1, TMP1, AT
4847 } else {
4848 | bne SFARG1HI, TISNUM, >5
4849 |. lw SFARG2LO, FORL_STEP*8+LO(RA)
4850 | lw SFRETLO, FORL_STOP*8+LO(RA)
4851 | move TMP3, SFARG1LO
4852 | addu SFARG1LO, SFARG1LO, SFARG2LO
4853 | xor TMP0, SFARG1LO, TMP3
4854 | xor TMP1, SFARG1LO, SFARG2LO
4855 | and TMP0, TMP0, TMP1
4856 | slt TMP1, SFARG1LO, SFRETLO
4857 | slt CRET1, SFRETLO, SFARG1LO
4858 | slt AT, SFARG2LO, r0
4859 | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow.
4860 | movn CRET1, TMP1, AT
4861 | or CRET1, CRET1, TMP0
4862 }
4863 |1:
4864 if (op == BC_FORI) {
4865 | movz TMP2, r0, CRET1
4866 | addu PC, PC, TMP2
4867 } else if (op == BC_JFORI) {
4868 | addu PC, PC, TMP2
4869 | lhu RD, -4+OFS_RD(PC)
4870 } else if (op == BC_IFORL) {
4871 | movn TMP2, r0, CRET1
4872 | addu PC, PC, TMP2
3866 } 4873 }
3867 | c.le.d 0, f0, f2 4874 if (vk) {
3868 | c.le.d 1, f2, f0 4875 | sw SFARG1HI, FORL_IDX*8+HI(RA)
3869 | sdc1 f0, FORL_EXT*8(RA) 4876 | sw SFARG1LO, FORL_IDX*8+LO(RA)
4877 }
4878 | ins_next1
4879 | sw SFARG1HI, FORL_EXT*8+HI(RA)
4880 | sw SFARG1LO, FORL_EXT*8+LO(RA)
4881 |2:
3870 if (op == BC_JFORI) { 4882 if (op == BC_JFORI) {
3871 | li TMP1, 1 4883 | beqz CRET1, =>BC_JLOOP
3872 | li TMP2, 1
3873 | addu TMP0, RD, TMP0
3874 | slt TMP3, TMP3, r0
3875 | movf TMP1, r0, 0
3876 | addu PC, PC, TMP0
3877 | movf TMP2, r0, 1
3878 | lhu RD, -4+OFS_RD(PC)
3879 | movn TMP1, TMP2, TMP3
3880 | bnez TMP1, =>BC_JLOOP
3881 |. decode_RD8b RD 4884 |. decode_RD8b RD
3882 } else if (op == BC_JFORL) { 4885 } else if (op == BC_JFORL) {
3883 | li TMP1, 1 4886 | beqz CRET1, =>BC_JLOOP
3884 | li TMP2, 1 4887 }
3885 | slt TMP3, TMP3, r0 4888 | ins_next2
3886 | movf TMP1, r0, 0 4889 |
3887 | movf TMP2, r0, 1 4890 |5: // FP loop.
3888 | movn TMP1, TMP2, TMP3 4891 |.if FPU
3889 | bnez TMP1, =>BC_JLOOP 4892 if (!vk) {
4893 | ldc1 f0, FORL_IDX*8(RA)
4894 | ldc1 f2, FORL_STOP*8(RA)
4895 | sltiu TMP0, SFARG1HI, LJ_TISNUM
4896 | sltiu TMP1, SFARG2HI, LJ_TISNUM
4897 | sltiu AT, SFRETHI, LJ_TISNUM
4898 | and TMP0, TMP0, TMP1
4899 | and AT, AT, TMP0
4900 | beqz AT, ->vmeta_for
4901 |. slt TMP3, SFRETHI, r0
4902 | c.ole.d 0, f0, f2
4903 | c.ole.d 1, f2, f0
4904 | li CRET1, 1
4905 | movt CRET1, r0, 0
4906 | movt AT, r0, 1
4907 | b <1
4908 |. movn CRET1, AT, TMP3
4909 } else {
4910 | ldc1 f0, FORL_IDX*8(RA)
4911 | ldc1 f4, FORL_STEP*8(RA)
4912 | ldc1 f2, FORL_STOP*8(RA)
4913 | lw SFARG2HI, FORL_STEP*8+HI(RA)
4914 | add.d f0, f0, f4
4915 | c.ole.d 0, f0, f2
4916 | c.ole.d 1, f2, f0
4917 | slt TMP3, SFARG2HI, r0
4918 | li CRET1, 1
4919 | li AT, 1
4920 | movt CRET1, r0, 0
4921 | movt AT, r0, 1
4922 | movn CRET1, AT, TMP3
4923 if (op == BC_IFORL) {
4924 | movn TMP2, r0, CRET1
4925 | addu PC, PC, TMP2
4926 }
4927 | sdc1 f0, FORL_IDX*8(RA)
4928 | ins_next1
4929 | b <2
4930 |. sdc1 f0, FORL_EXT*8(RA)
4931 }
4932 |.else
4933 if (!vk) {
4934 | sltiu TMP0, SFARG1HI, LJ_TISNUM
4935 | sltiu TMP1, SFARG2HI, LJ_TISNUM
4936 | sltiu AT, SFRETHI, LJ_TISNUM
4937 | and TMP0, TMP0, TMP1
4938 | and AT, AT, TMP0
4939 | beqz AT, ->vmeta_for
4940 |. nop
4941 | bal ->vm_sfcmpolex
4942 |. move TMP3, SFRETHI
4943 | b <1
3890 |. nop 4944 |. nop
3891 } else { 4945 } else {
3892 | addu TMP1, RD, TMP0 4946 | lw SFARG2HI, FORL_STEP*8+HI(RA)
3893 | slt TMP3, TMP3, r0 4947 | load_got __adddf3
3894 | move TMP2, TMP1 4948 | call_extern
3895 if (op == BC_FORI) { 4949 |. sw TMP2, ARG5
3896 | movt TMP1, r0, 0 4950 | lw SFARG2HI, FORL_STOP*8+HI(RA)
3897 | movt TMP2, r0, 1 4951 | lw SFARG2LO, FORL_STOP*8+LO(RA)
4952 | move SFARG1HI, SFRETHI
4953 | move SFARG1LO, SFRETLO
4954 | bal ->vm_sfcmpolex
4955 |. lw TMP3, FORL_STEP*8+HI(RA)
4956 if ( op == BC_JFORL ) {
4957 | lhu RD, -4+OFS_RD(PC)
4958 | lw TMP2, ARG5
4959 | b <1
4960 |. decode_RD8b RD
3898 } else { 4961 } else {
3899 | movf TMP1, r0, 0 4962 | b <1
3900 | movf TMP2, r0, 1 4963 |. lw TMP2, ARG5
3901 } 4964 }
3902 | movn TMP1, TMP2, TMP3
3903 | addu PC, PC, TMP1
3904 } 4965 }
3905 | ins_next 4966 |.endif
3906 break; 4967 break;
3907 4968
3908 case BC_ITERL: 4969 case BC_ITERL:
@@ -3961,8 +5022,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3961 | sw AT, DISPATCH_GL(vmstate)(DISPATCH) 5022 | sw AT, DISPATCH_GL(vmstate)(DISPATCH)
3962 | lw TRACE:TMP2, 0(TMP1) 5023 | lw TRACE:TMP2, 0(TMP1)
3963 | sw BASE, DISPATCH_GL(jit_base)(DISPATCH) 5024 | sw BASE, DISPATCH_GL(jit_base)(DISPATCH)
3964 | sw L, DISPATCH_GL(jit_L)(DISPATCH)
3965 | lw TMP2, TRACE:TMP2->mcode 5025 | lw TMP2, TRACE:TMP2->mcode
5026 | sw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
3966 | jr TMP2 5027 | jr TMP2
3967 |. addiu JGL, DISPATCH, GG_DISP2G+32768 5028 |. addiu JGL, DISPATCH, GG_DISP2G+32768
3968 |.endif 5029 |.endif
@@ -4088,6 +5149,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4088 | li_vmstate INTERP 5149 | li_vmstate INTERP
4089 | lw PC, FRAME_PC(BASE) // Fetch PC of caller. 5150 | lw PC, FRAME_PC(BASE) // Fetch PC of caller.
4090 | subu RA, TMP1, RD // RA = L->top - nresults*8 5151 | subu RA, TMP1, RD // RA = L->top - nresults*8
5152 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
4091 | b ->vm_returnc 5153 | b ->vm_returnc
4092 |. st_vmstate 5154 |. st_vmstate
4093 break; 5155 break;
@@ -4150,8 +5212,10 @@ static void emit_asm_debug(BuildCtx *ctx)
4150 fcofs, CFRAME_SIZE); 5212 fcofs, CFRAME_SIZE);
4151 for (i = 23; i >= 16; i--) 5213 for (i = 23; i >= 16; i--)
4152 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); 5214 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
5215#if !LJ_SOFTFP
4153 for (i = 30; i >= 20; i -= 2) 5216 for (i = 30; i >= 20; i -= 2)
4154 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); 5217 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
5218#endif
4155 fprintf(ctx->fp, 5219 fprintf(ctx->fp,
4156 "\t.align 2\n" 5220 "\t.align 2\n"
4157 ".LEFDE0:\n\n"); 5221 ".LEFDE0:\n\n");
@@ -4203,8 +5267,10 @@ static void emit_asm_debug(BuildCtx *ctx)
4203 fcofs, CFRAME_SIZE); 5267 fcofs, CFRAME_SIZE);
4204 for (i = 23; i >= 16; i--) 5268 for (i = 23; i >= 16; i--)
4205 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); 5269 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
5270#if !LJ_SOFTFP
4206 for (i = 30; i >= 20; i -= 2) 5271 for (i = 30; i >= 20; i -= 2)
4207 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); 5272 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
5273#endif
4208 fprintf(ctx->fp, 5274 fprintf(ctx->fp,
4209 "\t.align 2\n" 5275 "\t.align 2\n"
4210 ".LEFDE2:\n\n"); 5276 ".LEFDE2:\n\n");
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
new file mode 100644
index 00000000..05395ffd
--- /dev/null
+++ b/src/vm_mips64.dasc
@@ -0,0 +1,5458 @@
1|// Low-level VM code for MIPS64 CPUs.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4|//
5|// Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
6|// Sponsored by Cisco Systems, Inc.
7|
8|.arch mips64
9|.section code_op, code_sub
10|
11|.actionlist build_actionlist
12|.globals GLOB_
13|.globalnames globnames
14|.externnames extnames
15|
16|// Note: The ragged indentation of the instructions is intentional.
17|// The starting columns indicate data dependencies.
18|
19|//-----------------------------------------------------------------------
20|
21|// Fixed register assignments for the interpreter.
22|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra
23|
24|.macro .FPU, a, b
25|.if FPU
26| a, b
27|.endif
28|.endmacro
29|
30|// The following must be C callee-save (but BASE is often refetched).
31|.define BASE, r16 // Base of current Lua stack frame.
32|.define KBASE, r17 // Constants of current Lua function.
33|.define PC, r18 // Next PC.
34|.define DISPATCH, r19 // Opcode dispatch table.
35|.define LREG, r20 // Register holding lua_State (also in SAVE_L).
36|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8.
37|
38|.define JGL, r30 // On-trace: global_State + 32768.
39|
40|// Constants for type-comparisons, stores and conversions. C callee-save.
41|.define TISNIL, r30
42|.define TISNUM, r22
43|.if FPU
44|.define TOBIT, f30 // 2^52 + 2^51.
45|.endif
46|
47|// The following temporaries are not saved across C calls, except for RA.
48|.define RA, r23 // Callee-save.
49|.define RB, r8
50|.define RC, r9
51|.define RD, r10
52|.define INS, r11
53|
54|.define AT, r1 // Assembler temporary.
55|.define TMP0, r12
56|.define TMP1, r13
57|.define TMP2, r14
58|.define TMP3, r15
59|
60|// MIPS n64 calling convention.
61|.define CFUNCADDR, r25
62|.define CARG1, r4
63|.define CARG2, r5
64|.define CARG3, r6
65|.define CARG4, r7
66|.define CARG5, r8
67|.define CARG6, r9
68|.define CARG7, r10
69|.define CARG8, r11
70|
71|.define CRET1, r2
72|.define CRET2, r3
73|
74|.if FPU
75|.define FARG1, f12
76|.define FARG2, f13
77|.define FARG3, f14
78|.define FARG4, f15
79|.define FARG5, f16
80|.define FARG6, f17
81|.define FARG7, f18
82|.define FARG8, f19
83|
84|.define FRET1, f0
85|.define FRET2, f2
86|
87|.define FTMP0, f20
88|.define FTMP1, f21
89|.define FTMP2, f22
90|.endif
91|
92|// Stack layout while in interpreter. Must match with lj_frame.h.
93|.if FPU // MIPS64 hard-float.
94|
95|.define CFRAME_SPACE, 192 // Delta for sp.
96|
97|//----- 16 byte aligned, <-- sp entering interpreter
98|.define SAVE_ERRF, 188(sp) // 32 bit values.
99|.define SAVE_NRES, 184(sp)
100|.define SAVE_CFRAME, 176(sp) // 64 bit values.
101|.define SAVE_L, 168(sp)
102|.define SAVE_PC, 160(sp)
103|//----- 16 byte aligned
104|.define SAVE_GPR_, 80 // .. 80+10*8: 64 bit GPR saves.
105|.define SAVE_FPR_, 16 // .. 16+8*8: 64 bit FPR saves.
106|
107|.else // MIPS64 soft-float
108|
109|.define CFRAME_SPACE, 128 // Delta for sp.
110|
111|//----- 16 byte aligned, <-- sp entering interpreter
112|.define SAVE_ERRF, 124(sp) // 32 bit values.
113|.define SAVE_NRES, 120(sp)
114|.define SAVE_CFRAME, 112(sp) // 64 bit values.
115|.define SAVE_L, 104(sp)
116|.define SAVE_PC, 96(sp)
117|//----- 16 byte aligned
118|.define SAVE_GPR_, 16 // .. 16+10*8: 64 bit GPR saves.
119|
120|.endif
121|
122|.define TMPX, 8(sp) // Unused by interpreter, temp for JIT code.
123|.define TMPD, 0(sp)
124|//----- 16 byte aligned
125|
126|.define TMPD_OFS, 0
127|
128|.define SAVE_MULTRES, TMPD
129|
130|//-----------------------------------------------------------------------
131|
132|.macro saveregs
133| daddiu sp, sp, -CFRAME_SPACE
134| sd ra, SAVE_GPR_+9*8(sp)
135| sd r30, SAVE_GPR_+8*8(sp)
136| .FPU sdc1 f31, SAVE_FPR_+7*8(sp)
137| sd r23, SAVE_GPR_+7*8(sp)
138| .FPU sdc1 f30, SAVE_FPR_+6*8(sp)
139| sd r22, SAVE_GPR_+6*8(sp)
140| .FPU sdc1 f29, SAVE_FPR_+5*8(sp)
141| sd r21, SAVE_GPR_+5*8(sp)
142| .FPU sdc1 f28, SAVE_FPR_+4*8(sp)
143| sd r20, SAVE_GPR_+4*8(sp)
144| .FPU sdc1 f27, SAVE_FPR_+3*8(sp)
145| sd r19, SAVE_GPR_+3*8(sp)
146| .FPU sdc1 f26, SAVE_FPR_+2*8(sp)
147| sd r18, SAVE_GPR_+2*8(sp)
148| .FPU sdc1 f25, SAVE_FPR_+1*8(sp)
149| sd r17, SAVE_GPR_+1*8(sp)
150| .FPU sdc1 f24, SAVE_FPR_+0*8(sp)
151| sd r16, SAVE_GPR_+0*8(sp)
152|.endmacro
153|
154|.macro restoreregs_ret
155| ld ra, SAVE_GPR_+9*8(sp)
156| ld r30, SAVE_GPR_+8*8(sp)
157| ld r23, SAVE_GPR_+7*8(sp)
158| .FPU ldc1 f31, SAVE_FPR_+7*8(sp)
159| ld r22, SAVE_GPR_+6*8(sp)
160| .FPU ldc1 f30, SAVE_FPR_+6*8(sp)
161| ld r21, SAVE_GPR_+5*8(sp)
162| .FPU ldc1 f29, SAVE_FPR_+5*8(sp)
163| ld r20, SAVE_GPR_+4*8(sp)
164| .FPU ldc1 f28, SAVE_FPR_+4*8(sp)
165| ld r19, SAVE_GPR_+3*8(sp)
166| .FPU ldc1 f27, SAVE_FPR_+3*8(sp)
167| ld r18, SAVE_GPR_+2*8(sp)
168| .FPU ldc1 f26, SAVE_FPR_+2*8(sp)
169| ld r17, SAVE_GPR_+1*8(sp)
170| .FPU ldc1 f25, SAVE_FPR_+1*8(sp)
171| ld r16, SAVE_GPR_+0*8(sp)
172| .FPU ldc1 f24, SAVE_FPR_+0*8(sp)
173| jr ra
174| daddiu sp, sp, CFRAME_SPACE
175|.endmacro
176|
177|// Type definitions. Some of these are only used for documentation.
178|.type L, lua_State, LREG
179|.type GL, global_State
180|.type TVALUE, TValue
181|.type GCOBJ, GCobj
182|.type STR, GCstr
183|.type TAB, GCtab
184|.type LFUNC, GCfuncL
185|.type CFUNC, GCfuncC
186|.type PROTO, GCproto
187|.type UPVAL, GCupval
188|.type NODE, Node
189|.type NARGS8, int
190|.type TRACE, GCtrace
191|.type SBUF, SBuf
192|
193|//-----------------------------------------------------------------------
194|
195|// Trap for not-yet-implemented parts.
196|.macro NYI; .long 0xf0f0f0f0; .endmacro
197|
198|// Macros to mark delay slots.
199|.macro ., a; a; .endmacro
200|.macro ., a,b; a,b; .endmacro
201|.macro ., a,b,c; a,b,c; .endmacro
202|.macro ., a,b,c,d; a,b,c,d; .endmacro
203|
204|.define FRAME_PC, -8
205|.define FRAME_FUNC, -16
206|
207|//-----------------------------------------------------------------------
208|
209|// Endian-specific defines.
210|.if ENDIAN_LE
211|.define HI, 4
212|.define LO, 0
213|.define OFS_RD, 2
214|.define OFS_RA, 1
215|.define OFS_OP, 0
216|.else
217|.define HI, 0
218|.define LO, 4
219|.define OFS_RD, 0
220|.define OFS_RA, 2
221|.define OFS_OP, 3
222|.endif
223|
224|// Instruction decode.
225|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro
226|.macro decode_OP8a, dst, ins; andi dst, ins, 0xff; .endmacro
227|.macro decode_OP8b, dst; sll dst, dst, 3; .endmacro
228|.macro decode_RC8a, dst, ins; srl dst, ins, 13; .endmacro
229|.macro decode_RC8b, dst; andi dst, dst, 0x7f8; .endmacro
230|.macro decode_RD4b, dst; sll dst, dst, 2; .endmacro
231|.macro decode_RA8a, dst, ins; srl dst, ins, 5; .endmacro
232|.macro decode_RA8b, dst; andi dst, dst, 0x7f8; .endmacro
233|.macro decode_RB8a, dst, ins; srl dst, ins, 21; .endmacro
234|.macro decode_RB8b, dst; andi dst, dst, 0x7f8; .endmacro
235|.macro decode_RD8a, dst, ins; srl dst, ins, 16; .endmacro
236|.macro decode_RD8b, dst; sll dst, dst, 3; .endmacro
237|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro
238|
239|// Instruction fetch.
240|.macro ins_NEXT1
241| lw INS, 0(PC)
242| daddiu PC, PC, 4
243|.endmacro
244|// Instruction decode+dispatch.
245|.macro ins_NEXT2
246| decode_OP8a TMP1, INS
247| decode_OP8b TMP1
248| daddu TMP0, DISPATCH, TMP1
249| decode_RD8a RD, INS
250| ld AT, 0(TMP0)
251| decode_RA8a RA, INS
252| decode_RD8b RD
253| jr AT
254| decode_RA8b RA
255|.endmacro
256|.macro ins_NEXT
257| ins_NEXT1
258| ins_NEXT2
259|.endmacro
260|
261|// Instruction footer.
262|.if 1
263| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
264| .define ins_next, ins_NEXT
265| .define ins_next_, ins_NEXT
266| .define ins_next1, ins_NEXT1
267| .define ins_next2, ins_NEXT2
268|.else
269| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
270| // Affects only certain kinds of benchmarks (and only with -j off).
271| .macro ins_next
272| b ->ins_next
273| .endmacro
274| .macro ins_next1
275| .endmacro
276| .macro ins_next2
277| b ->ins_next
278| .endmacro
279| .macro ins_next_
280| ->ins_next:
281| ins_NEXT
282| .endmacro
283|.endif
284|
285|// Call decode and dispatch.
286|.macro ins_callt
287| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
288| ld PC, LFUNC:RB->pc
289| lw INS, 0(PC)
290| daddiu PC, PC, 4
291| decode_OP8a TMP1, INS
292| decode_RA8a RA, INS
293| decode_OP8b TMP1
294| decode_RA8b RA
295| daddu TMP0, DISPATCH, TMP1
296| ld TMP0, 0(TMP0)
297| jr TMP0
298| daddu RA, RA, BASE
299|.endmacro
300|
301|.macro ins_call
302| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
303| sd PC, FRAME_PC(BASE)
304| ins_callt
305|.endmacro
306|
307|//-----------------------------------------------------------------------
308|
309|.macro branch_RD
310| srl TMP0, RD, 1
311| lui AT, (-(BCBIAS_J*4 >> 16) & 65535)
312| addu TMP0, TMP0, AT
313| daddu PC, PC, TMP0
314|.endmacro
315|
316|// Assumes DISPATCH is relative to GL.
317#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
318#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
319#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch))
320#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name)
321|
322#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
323|
324|.macro load_got, func
325| ld CFUNCADDR, DISPATCH_GOT(func)(DISPATCH)
326|.endmacro
327|// Much faster. Sadly, there's no easy way to force the required code layout.
328|// .macro call_intern, func; bal extern func; .endmacro
329|.macro call_intern, func; jalr CFUNCADDR; .endmacro
330|.macro call_extern; jalr CFUNCADDR; .endmacro
331|.macro jmp_extern; jr CFUNCADDR; .endmacro
332|
333|.macro hotcheck, delta, target
334| dsrl TMP1, PC, 1
335| andi TMP1, TMP1, 126
336| daddu TMP1, TMP1, DISPATCH
337| lhu TMP2, GG_DISP2HOT(TMP1)
338| addiu TMP2, TMP2, -delta
339| bltz TMP2, target
340|. sh TMP2, GG_DISP2HOT(TMP1)
341|.endmacro
342|
343|.macro hotloop
344| hotcheck HOTCOUNT_LOOP, ->vm_hotloop
345|.endmacro
346|
347|.macro hotcall
348| hotcheck HOTCOUNT_CALL, ->vm_hotcall
349|.endmacro
350|
351|// Set current VM state. Uses TMP0.
352|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro
353|.macro st_vmstate; sw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro
354|
355|// Move table write barrier back. Overwrites mark and tmp.
356|.macro barrierback, tab, mark, tmp, target
357| ld tmp, DISPATCH_GL(gc.grayagain)(DISPATCH)
358| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab)
359| sd tab, DISPATCH_GL(gc.grayagain)(DISPATCH)
360| sb mark, tab->marked
361| b target
362|. sd tmp, tab->gclist
363|.endmacro
364|
365|// Clear type tag. Isolate lowest 14+32+1=47 bits of reg.
366|.macro cleartp, reg; dextm reg, reg, 0, 14; .endmacro
367|.macro cleartp, dst, reg; dextm dst, reg, 0, 14; .endmacro
368|
369|// Set type tag: Merge 17 type bits into bits [15+32=47, 31+32+1=64) of dst.
370|.macro settp, dst, tp; dinsu dst, tp, 15, 31; .endmacro
371|
372|// Extract (negative) type tag.
373|.macro gettp, dst, src; dsra dst, src, 47; .endmacro
374|
375|// Macros to check the TValue type and extract the GCobj. Branch on failure.
376|.macro checktp, reg, tp, target
377| gettp AT, reg
378| daddiu AT, AT, tp
379| bnez AT, target
380|. cleartp reg
381|.endmacro
382|.macro checktp, dst, reg, tp, target
383| gettp AT, reg
384| daddiu AT, AT, tp
385| bnez AT, target
386|. cleartp dst, reg
387|.endmacro
388|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro
389|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro
390|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro
391|.macro checkint, reg, target // Caveat: has delay slot!
392| gettp AT, reg
393| bne AT, TISNUM, target
394|.endmacro
395|.macro checknum, reg, target // Caveat: has delay slot!
396| gettp AT, reg
397| sltiu AT, AT, LJ_TISNUM
398| beqz AT, target
399|.endmacro
400|
401|.macro mov_false, reg
402| lu reg, 0x8000
403| dsll reg, reg, 32
404| not reg, reg
405|.endmacro
406|.macro mov_true, reg
407| li reg, 0x0001
408| dsll reg, reg, 48
409| not reg, reg
410|.endmacro
411|
412|//-----------------------------------------------------------------------
413
414/* Generate subroutines used by opcodes and other parts of the VM. */
415/* The .code_sub section should be last to help static branch prediction. */
416static void build_subroutines(BuildCtx *ctx)
417{
418 |.code_sub
419 |
420 |//-----------------------------------------------------------------------
421 |//-- Return handling ----------------------------------------------------
422 |//-----------------------------------------------------------------------
423 |
424 |->vm_returnp:
425 | // See vm_return. Also: TMP2 = previous base.
426 | andi AT, PC, FRAME_P
427 | beqz AT, ->cont_dispatch
428 |
429 | // Return from pcall or xpcall fast func.
430 |. mov_true TMP1
431 | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame.
432 | move BASE, TMP2 // Restore caller base.
433 | // Prepending may overwrite the pcall frame, so do it at the end.
434 | sd TMP1, -8(RA) // Prepend true to results.
435 | daddiu RA, RA, -8
436 |
437 |->vm_returnc:
438 | addiu RD, RD, 8 // RD = (nresults+1)*8.
439 | andi TMP0, PC, FRAME_TYPE
440 | beqz RD, ->vm_unwind_c_eh
441 |. li CRET1, LUA_YIELD
442 | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua.
443 |. move MULTRES, RD
444 |
445 |->vm_return:
446 | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return
447 | // TMP0 = PC & FRAME_TYPE
448 | li TMP2, -8
449 | xori AT, TMP0, FRAME_C
450 | and TMP2, PC, TMP2
451 | bnez AT, ->vm_returnp
452 | dsubu TMP2, BASE, TMP2 // TMP2 = previous base.
453 |
454 | addiu TMP1, RD, -8
455 | sd TMP2, L->base
456 | li_vmstate C
457 | lw TMP2, SAVE_NRES
458 | daddiu BASE, BASE, -16
459 | st_vmstate
460 | beqz TMP1, >2
461 |. sll TMP2, TMP2, 3
462 |1:
463 | addiu TMP1, TMP1, -8
464 | ld CRET1, 0(RA)
465 | daddiu RA, RA, 8
466 | sd CRET1, 0(BASE)
467 | bnez TMP1, <1
468 |. daddiu BASE, BASE, 8
469 |
470 |2:
471 | bne TMP2, RD, >6
472 |3:
473 |. sd BASE, L->top // Store new top.
474 |
475 |->vm_leave_cp:
476 | ld TMP0, SAVE_CFRAME // Restore previous C frame.
477 | move CRET1, r0 // Ok return status for vm_pcall.
478 | sd TMP0, L->cframe
479 |
480 |->vm_leave_unw:
481 | restoreregs_ret
482 |
483 |6:
484 | ld TMP1, L->maxstack
485 | slt AT, TMP2, RD
486 | bnez AT, >7 // Less results wanted?
487 | // More results wanted. Check stack size and fill up results with nil.
488 |. slt AT, BASE, TMP1
489 | beqz AT, >8
490 |. nop
491 | sd TISNIL, 0(BASE)
492 | addiu RD, RD, 8
493 | b <2
494 |. daddiu BASE, BASE, 8
495 |
496 |7: // Less results wanted.
497 | subu TMP0, RD, TMP2
498 | dsubu TMP0, BASE, TMP0 // Either keep top or shrink it.
499 |.if MIPSR6
500 | selnez TMP0, TMP0, TMP2 // LUA_MULTRET+1 case?
501 | seleqz BASE, BASE, TMP2
502 | b <3
503 |. or BASE, BASE, TMP0
504 |.else
505 | b <3
506 |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case?
507 |.endif
508 |
509 |8: // Corner case: need to grow stack for filling up results.
510 | // This can happen if:
511 | // - A C function grows the stack (a lot).
512 | // - The GC shrinks the stack in between.
513 | // - A return back from a lua_call() with (high) nresults adjustment.
514 | load_got lj_state_growstack
515 | move MULTRES, RD
516 | srl CARG2, TMP2, 3
517 | call_intern lj_state_growstack // (lua_State *L, int n)
518 |. move CARG1, L
519 | lw TMP2, SAVE_NRES
520 | ld BASE, L->top // Need the (realloced) L->top in BASE.
521 | move RD, MULTRES
522 | b <2
523 |. sll TMP2, TMP2, 3
524 |
525 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
526 | // (void *cframe, int errcode)
527 | move sp, CARG1
528 | move CRET1, CARG2
529 |->vm_unwind_c_eh: // Landing pad for external unwinder.
530 | ld L, SAVE_L
531 | li TMP0, ~LJ_VMST_C
532 | ld GL:TMP1, L->glref
533 | b ->vm_leave_unw
534 |. sw TMP0, GL:TMP1->vmstate
535 |
536 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
537 | // (void *cframe)
538 | li AT, -4
539 | and sp, CARG1, AT
540 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
541 | ld L, SAVE_L
542 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
543 | li TISNIL, LJ_TNIL
544 | li TISNUM, LJ_TISNUM
545 | ld BASE, L->base
546 | ld DISPATCH, L->glref // Setup pointer to dispatch table.
547 | .FPU mtc1 TMP3, TOBIT
548 | mov_false TMP1
549 | li_vmstate INTERP
550 | ld PC, FRAME_PC(BASE) // Fetch PC of previous frame.
551 | .FPU cvt.d.s TOBIT, TOBIT
552 | daddiu RA, BASE, -8 // Results start at BASE-8.
553 | daddiu DISPATCH, DISPATCH, GG_G2DISP
554 | sd TMP1, 0(RA) // Prepend false to error message.
555 | st_vmstate
556 | b ->vm_returnc
557 |. li RD, 16 // 2 results: false + error message.
558 |
559 |->vm_unwind_stub: // Jump to exit stub from unwinder.
560 | jr CARG1
561 |. move ra, CARG2
562 |
563 |//-----------------------------------------------------------------------
564 |//-- Grow stack for calls -----------------------------------------------
565 |//-----------------------------------------------------------------------
566 |
567 |->vm_growstack_c: // Grow stack for C function.
568 | b >2
569 |. li CARG2, LUA_MINSTACK
570 |
571 |->vm_growstack_l: // Grow stack for Lua function.
572 | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
573 | daddu RC, BASE, RC
574 | dsubu RA, RA, BASE
575 | sd BASE, L->base
576 | daddiu PC, PC, 4 // Must point after first instruction.
577 | sd RC, L->top
578 | srl CARG2, RA, 3
579 |2:
580 | // L->base = new base, L->top = top
581 | load_got lj_state_growstack
582 | sd PC, SAVE_PC
583 | call_intern lj_state_growstack // (lua_State *L, int n)
584 |. move CARG1, L
585 | ld BASE, L->base
586 | ld RC, L->top
587 | ld LFUNC:RB, FRAME_FUNC(BASE)
588 | dsubu RC, RC, BASE
589 | cleartp LFUNC:RB
590 | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
591 | ins_callt // Just retry the call.
592 |
593 |//-----------------------------------------------------------------------
594 |//-- Entry points into the assembler VM ---------------------------------
595 |//-----------------------------------------------------------------------
596 |
597 |->vm_resume: // Setup C frame and resume thread.
598 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
599 | saveregs
600 | move L, CARG1
601 | ld DISPATCH, L->glref // Setup pointer to dispatch table.
602 | move BASE, CARG2
603 | lbu TMP1, L->status
604 | sd L, SAVE_L
605 | li PC, FRAME_CP
606 | daddiu TMP0, sp, CFRAME_RESUME
607 | daddiu DISPATCH, DISPATCH, GG_G2DISP
608 | sw r0, SAVE_NRES
609 | sw r0, SAVE_ERRF
610 | sd CARG1, SAVE_PC // Any value outside of bytecode is ok.
611 | sd r0, SAVE_CFRAME
612 | beqz TMP1, >3
613 |. sd TMP0, L->cframe
614 |
615 | // Resume after yield (like a return).
616 | sd L, DISPATCH_GL(cur_L)(DISPATCH)
617 | move RA, BASE
618 | ld BASE, L->base
619 | ld TMP1, L->top
620 | ld PC, FRAME_PC(BASE)
621 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
622 | dsubu RD, TMP1, BASE
623 | .FPU mtc1 TMP3, TOBIT
624 | sb r0, L->status
625 | .FPU cvt.d.s TOBIT, TOBIT
626 | li_vmstate INTERP
627 | daddiu RD, RD, 8
628 | st_vmstate
629 | move MULTRES, RD
630 | andi TMP0, PC, FRAME_TYPE
631 | li TISNIL, LJ_TNIL
632 | beqz TMP0, ->BC_RET_Z
633 |. li TISNUM, LJ_TISNUM
634 | b ->vm_return
635 |. nop
636 |
637 |->vm_pcall: // Setup protected C frame and enter VM.
638 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
639 | saveregs
640 | sw CARG4, SAVE_ERRF
641 | b >1
642 |. li PC, FRAME_CP
643 |
644 |->vm_call: // Setup C frame and enter VM.
645 | // (lua_State *L, TValue *base, int nres1)
646 | saveregs
647 | li PC, FRAME_C
648 |
649 |1: // Entry point for vm_pcall above (PC = ftype).
650 | ld TMP1, L:CARG1->cframe
651 | move L, CARG1
652 | sw CARG3, SAVE_NRES
653 | ld DISPATCH, L->glref // Setup pointer to dispatch table.
654 | sd CARG1, SAVE_L
655 | move BASE, CARG2
656 | daddiu DISPATCH, DISPATCH, GG_G2DISP
657 | sd CARG1, SAVE_PC // Any value outside of bytecode is ok.
658 | sd TMP1, SAVE_CFRAME
659 | sd sp, L->cframe // Add our C frame to cframe chain.
660 |
661 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
662 | sd L, DISPATCH_GL(cur_L)(DISPATCH)
663 | ld TMP2, L->base // TMP2 = old base (used in vmeta_call).
664 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
665 | ld TMP1, L->top
666 | .FPU mtc1 TMP3, TOBIT
667 | daddu PC, PC, BASE
668 | dsubu NARGS8:RC, TMP1, BASE
669 | li TISNUM, LJ_TISNUM
670 | dsubu PC, PC, TMP2 // PC = frame delta + frame type
671 | .FPU cvt.d.s TOBIT, TOBIT
672 | li_vmstate INTERP
673 | li TISNIL, LJ_TNIL
674 | st_vmstate
675 |
676 |->vm_call_dispatch:
677 | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
678 | ld LFUNC:RB, FRAME_FUNC(BASE)
679 | checkfunc LFUNC:RB, ->vmeta_call
680 |
681 |->vm_call_dispatch_f:
682 | ins_call
683 | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC
684 |
685 |->vm_cpcall: // Setup protected C frame, call C.
686 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
687 | saveregs
688 | move L, CARG1
689 | ld TMP0, L:CARG1->stack
690 | sd CARG1, SAVE_L
691 | ld TMP1, L->top
692 | ld DISPATCH, L->glref // Setup pointer to dispatch table.
693 | sd CARG1, SAVE_PC // Any value outside of bytecode is ok.
694 | dsubu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
695 | ld TMP1, L->cframe
696 | daddiu DISPATCH, DISPATCH, GG_G2DISP
697 | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
698 | sw r0, SAVE_ERRF // No error function.
699 | sd TMP1, SAVE_CFRAME
700 | sd sp, L->cframe // Add our C frame to cframe chain.
701 | sd L, DISPATCH_GL(cur_L)(DISPATCH)
702 | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
703 |. move CFUNCADDR, CARG4
704 | move BASE, CRET1
705 | bnez CRET1, <3 // Else continue with the call.
706 |. li PC, FRAME_CP
707 | b ->vm_leave_cp // No base? Just remove C frame.
708 |. nop
709 |
710 |//-----------------------------------------------------------------------
711 |//-- Metamethod handling ------------------------------------------------
712 |//-----------------------------------------------------------------------
713 |
714 |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the
715 |// stack, so BASE doesn't need to be reloaded across these calls.
716 |
717 |//-- Continuation dispatch ----------------------------------------------
718 |
719 |->cont_dispatch:
720 | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
721 | ld TMP0, -32(BASE) // Continuation.
722 | move RB, BASE
723 | move BASE, TMP2 // Restore caller BASE.
724 | ld LFUNC:TMP1, FRAME_FUNC(TMP2)
725 |.if FFI
726 | sltiu AT, TMP0, 2
727 |.endif
728 | ld PC, -24(RB) // Restore PC from [cont|PC].
729 | cleartp LFUNC:TMP1
730 | daddu TMP2, RA, RD
731 | ld TMP1, LFUNC:TMP1->pc
732 |.if FFI
733 | bnez AT, >1
734 |.endif
735 |. sd TISNIL, -8(TMP2) // Ensure one valid arg.
736 | // BASE = base, RA = resultptr, RB = meta base
737 | jr TMP0 // Jump to continuation.
738 |. ld KBASE, PC2PROTO(k)(TMP1)
739 |
740 |.if FFI
741 |1:
742 | bnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback.
743 | // cont = 0: tailcall from C function.
744 |. daddiu TMP1, RB, -32
745 | b ->vm_call_tail
746 |. dsubu RC, TMP1, BASE
747 |.endif
748 |
749 |->cont_cat: // RA = resultptr, RB = meta base
750 | lw INS, -4(PC)
751 | daddiu CARG2, RB, -32
752 | ld CRET1, 0(RA)
753 | decode_RB8a MULTRES, INS
754 | decode_RA8a RA, INS
755 | decode_RB8b MULTRES
756 | decode_RA8b RA
757 | daddu TMP1, BASE, MULTRES
758 | sd BASE, L->base
759 | dsubu CARG3, CARG2, TMP1
760 | bne TMP1, CARG2, ->BC_CAT_Z
761 |. sd CRET1, 0(CARG2)
762 | daddu RA, BASE, RA
763 | b ->cont_nop
764 |. sd CRET1, 0(RA)
765 |
766 |//-- Table indexing metamethods -----------------------------------------
767 |
768 |->vmeta_tgets1:
769 | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
770 | li TMP0, LJ_TSTR
771 | settp STR:RC, TMP0
772 | b >1
773 |. sd STR:RC, 0(CARG3)
774 |
775 |->vmeta_tgets:
776 | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv)
777 | li TMP0, LJ_TTAB
778 | li TMP1, LJ_TSTR
779 | settp TAB:RB, TMP0
780 | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2)
781 | sd TAB:RB, 0(CARG2)
782 | settp STR:RC, TMP1
783 | b >1
784 |. sd STR:RC, 0(CARG3)
785 |
786 |->vmeta_tgetb: // TMP0 = index
787 | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
788 | settp TMP0, TISNUM
789 | sd TMP0, 0(CARG3)
790 |
791 |->vmeta_tgetv:
792 |1:
793 | load_got lj_meta_tget
794 | sd BASE, L->base
795 | sd PC, SAVE_PC
796 | call_intern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
797 |. move CARG1, L
798 | // Returns TValue * (finished) or NULL (metamethod).
799 | beqz CRET1, >3
800 |. daddiu TMP1, BASE, -FRAME_CONT
801 | ld CARG1, 0(CRET1)
802 | ins_next1
803 | sd CARG1, 0(RA)
804 | ins_next2
805 |
806 |3: // Call __index metamethod.
807 | // BASE = base, L->top = new base, stack = cont/func/t/k
808 | ld BASE, L->top
809 | sd PC, -24(BASE) // [cont|PC]
810 | dsubu PC, BASE, TMP1
811 | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
812 | cleartp LFUNC:RB
813 | b ->vm_call_dispatch_f
814 |. li NARGS8:RC, 16 // 2 args for func(t, k).
815 |
816 |->vmeta_tgetr:
817 | load_got lj_tab_getinth
818 | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
819 |. nop
820 | // Returns cTValue * or NULL.
821 | beqz CRET1, ->BC_TGETR_Z
822 |. move CARG2, TISNIL
823 | b ->BC_TGETR_Z
824 |. ld CARG2, 0(CRET1)
825 |
826 |//-----------------------------------------------------------------------
827 |
828 |->vmeta_tsets1:
829 | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
830 | li TMP0, LJ_TSTR
831 | settp STR:RC, TMP0
832 | b >1
833 |. sd STR:RC, 0(CARG3)
834 |
835 |->vmeta_tsets:
836 | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv)
837 | li TMP0, LJ_TTAB
838 | li TMP1, LJ_TSTR
839 | settp TAB:RB, TMP0
840 | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2)
841 | sd TAB:RB, 0(CARG2)
842 | settp STR:RC, TMP1
843 | b >1
844 |. sd STR:RC, 0(CARG3)
845 |
846 |->vmeta_tsetb: // TMP0 = index
847 | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
848 | settp TMP0, TISNUM
849 | sd TMP0, 0(CARG3)
850 |
851 |->vmeta_tsetv:
852 |1:
853 | load_got lj_meta_tset
854 | sd BASE, L->base
855 | sd PC, SAVE_PC
856 | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
857 |. move CARG1, L
858 | // Returns TValue * (finished) or NULL (metamethod).
859 | beqz CRET1, >3
860 |. ld CARG1, 0(RA)
861 | // NOBARRIER: lj_meta_tset ensures the table is not black.
862 | ins_next1
863 | sd CARG1, 0(CRET1)
864 | ins_next2
865 |
866 |3: // Call __newindex metamethod.
867 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
868 | daddiu TMP1, BASE, -FRAME_CONT
869 | ld BASE, L->top
870 | sd PC, -24(BASE) // [cont|PC]
871 | dsubu PC, BASE, TMP1
872 | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
873 | cleartp LFUNC:RB
874 | sd CARG1, 16(BASE) // Copy value to third argument.
875 | b ->vm_call_dispatch_f
876 |. li NARGS8:RC, 24 // 3 args for func(t, k, v)
877 |
878 |->vmeta_tsetr:
879 | load_got lj_tab_setinth
880 | sd BASE, L->base
881 | sd PC, SAVE_PC
882 | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
883 |. move CARG1, L
884 | // Returns TValue *.
885 | b ->BC_TSETR_Z
886 |. nop
887 |
888 |//-- Comparison metamethods ---------------------------------------------
889 |
890 |->vmeta_comp:
891 | // RA/RD point to o1/o2.
892 | move CARG2, RA
893 | move CARG3, RD
894 | load_got lj_meta_comp
895 | daddiu PC, PC, -4
896 | sd BASE, L->base
897 | sd PC, SAVE_PC
898 | decode_OP1 CARG4, INS
899 | call_intern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
900 |. move CARG1, L
901 | // Returns 0/1 or TValue * (metamethod).
902 |3:
903 | sltiu AT, CRET1, 2
904 | beqz AT, ->vmeta_binop
905 | negu TMP2, CRET1
906 |4:
907 | lhu RD, OFS_RD(PC)
908 | daddiu PC, PC, 4
909 | lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535)
910 | sll RD, RD, 2
911 | addu RD, RD, TMP1
912 | and RD, RD, TMP2
913 | daddu PC, PC, RD
914 |->cont_nop:
915 | ins_next
916 |
917 |->cont_ra: // RA = resultptr
918 | lbu TMP1, -4+OFS_RA(PC)
919 | ld CRET1, 0(RA)
920 | sll TMP1, TMP1, 3
921 | daddu TMP1, BASE, TMP1
922 | b ->cont_nop
923 |. sd CRET1, 0(TMP1)
924 |
925 |->cont_condt: // RA = resultptr
926 | ld TMP0, 0(RA)
927 | gettp TMP0, TMP0
928 | sltiu AT, TMP0, LJ_TISTRUECOND
929 | b <4
930 |. negu TMP2, AT // Branch if result is true.
931 |
932 |->cont_condf: // RA = resultptr
933 | ld TMP0, 0(RA)
934 | gettp TMP0, TMP0
935 | sltiu AT, TMP0, LJ_TISTRUECOND
936 | b <4
937 |. addiu TMP2, AT, -1 // Branch if result is false.
938 |
939 |->vmeta_equal:
940 | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1.
941 | load_got lj_meta_equal
942 | cleartp LFUNC:CARG3, CARG2
943 | cleartp LFUNC:CARG2, CARG1
944 | move CARG4, TMP0
945 | daddiu PC, PC, -4
946 | sd BASE, L->base
947 | sd PC, SAVE_PC
948 | call_intern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
949 |. move CARG1, L
950 | // Returns 0/1 or TValue * (metamethod).
951 | b <3
952 |. nop
953 |
954 |->vmeta_equal_cd:
955 |.if FFI
956 | load_got lj_meta_equal_cd
957 | move CARG2, INS
958 | daddiu PC, PC, -4
959 | sd BASE, L->base
960 | sd PC, SAVE_PC
961 | call_intern lj_meta_equal_cd // (lua_State *L, BCIns op)
962 |. move CARG1, L
963 | // Returns 0/1 or TValue * (metamethod).
964 | b <3
965 |. nop
966 |.endif
967 |
968 |->vmeta_istype:
969 | load_got lj_meta_istype
970 | daddiu PC, PC, -4
971 | sd BASE, L->base
972 | srl CARG2, RA, 3
973 | srl CARG3, RD, 3
974 | sd PC, SAVE_PC
975 | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
976 |. move CARG1, L
977 | b ->cont_nop
978 |. nop
979 |
980 |//-- Arithmetic metamethods ---------------------------------------------
981 |
982 |->vmeta_unm:
983 | move RC, RB
984 |
985 |->vmeta_arith:
986 | load_got lj_meta_arith
987 | sd BASE, L->base
988 | move CARG2, RA
989 | sd PC, SAVE_PC
990 | move CARG3, RB
991 | move CARG4, RC
992 | decode_OP1 CARG5, INS // CARG5 == RB.
993 | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
994 |. move CARG1, L
995 | // Returns NULL (finished) or TValue * (metamethod).
996 | beqz CRET1, ->cont_nop
997 |. nop
998 |
999 | // Call metamethod for binary op.
1000 |->vmeta_binop:
1001 | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
1002 | dsubu TMP1, CRET1, BASE
1003 | sd PC, -24(CRET1) // [cont|PC]
1004 | move TMP2, BASE
1005 | daddiu PC, TMP1, FRAME_CONT
1006 | move BASE, CRET1
1007 | b ->vm_call_dispatch
1008 |. li NARGS8:RC, 16 // 2 args for func(o1, o2).
1009 |
1010 |->vmeta_len:
1011 | // CARG2 already set by BC_LEN.
1012#if LJ_52
1013 | move MULTRES, CARG1
1014#endif
1015 | load_got lj_meta_len
1016 | sd BASE, L->base
1017 | sd PC, SAVE_PC
1018 | call_intern lj_meta_len // (lua_State *L, TValue *o)
1019 |. move CARG1, L
1020 | // Returns NULL (retry) or TValue * (metamethod base).
1021#if LJ_52
1022 | bnez CRET1, ->vmeta_binop // Binop call for compatibility.
1023 |. nop
1024 | b ->BC_LEN_Z
1025 |. move CARG1, MULTRES
1026#else
1027 | b ->vmeta_binop // Binop call for compatibility.
1028 |. nop
1029#endif
1030 |
1031 |//-- Call metamethod ----------------------------------------------------
1032 |
1033 |->vmeta_call: // Resolve and call __call metamethod.
1034 | // TMP2 = old base, BASE = new base, RC = nargs*8
1035 | load_got lj_meta_call
1036 | sd TMP2, L->base // This is the callers base!
1037 | daddiu CARG2, BASE, -16
1038 | sd PC, SAVE_PC
1039 | daddu CARG3, BASE, RC
1040 | move MULTRES, NARGS8:RC
1041 | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
1042 |. move CARG1, L
1043 | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
1044 | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now.
1045 | cleartp LFUNC:RB
1046 | ins_call
1047 |
1048 |->vmeta_callt: // Resolve __call for BC_CALLT.
1049 | // BASE = old base, RA = new base, RC = nargs*8
1050 | load_got lj_meta_call
1051 | sd BASE, L->base
1052 | daddiu CARG2, RA, -16
1053 | sd PC, SAVE_PC
1054 | daddu CARG3, RA, RC
1055 | move MULTRES, NARGS8:RC
1056 | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
1057 |. move CARG1, L
1058 | ld RB, FRAME_FUNC(RA) // Guaranteed to be a function here.
1059 | ld TMP1, FRAME_PC(BASE)
1060 | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now.
1061 | b ->BC_CALLT_Z
1062 |. cleartp LFUNC:CARG3, RB
1063 |
1064 |//-- Argument coercion for 'for' statement ------------------------------
1065 |
1066 |->vmeta_for:
1067 | load_got lj_meta_for
1068 | sd BASE, L->base
1069 | move CARG2, RA
1070 | sd PC, SAVE_PC
1071 | move MULTRES, INS
1072 | call_intern lj_meta_for // (lua_State *L, TValue *base)
1073 |. move CARG1, L
1074 |.if JIT
1075 | decode_OP1 TMP0, MULTRES
1076 | li AT, BC_JFORI
1077 |.endif
1078 | decode_RA8a RA, MULTRES
1079 | decode_RD8a RD, MULTRES
1080 | decode_RA8b RA
1081 |.if JIT
1082 | beq TMP0, AT, =>BC_JFORI
1083 |. decode_RD8b RD
1084 | b =>BC_FORI
1085 |. nop
1086 |.else
1087 | b =>BC_FORI
1088 |. decode_RD8b RD
1089 |.endif
1090 |
1091 |//-----------------------------------------------------------------------
1092 |//-- Fast functions -----------------------------------------------------
1093 |//-----------------------------------------------------------------------
1094 |
1095 |.macro .ffunc, name
1096 |->ff_ .. name:
1097 |.endmacro
1098 |
1099 |.macro .ffunc_1, name
1100 |->ff_ .. name:
1101 | beqz NARGS8:RC, ->fff_fallback
1102 |. ld CARG1, 0(BASE)
1103 |.endmacro
1104 |
1105 |.macro .ffunc_2, name
1106 |->ff_ .. name:
1107 | sltiu AT, NARGS8:RC, 16
1108 | ld CARG1, 0(BASE)
1109 | bnez AT, ->fff_fallback
1110 |. ld CARG2, 8(BASE)
1111 |.endmacro
1112 |
1113 |.macro .ffunc_n, name // Caveat: has delay slot!
1114 |->ff_ .. name:
1115 | ld CARG1, 0(BASE)
1116 | beqz NARGS8:RC, ->fff_fallback
1117 | // Either ldc1 or the 1st instruction of checknum is in the delay slot.
1118 | .FPU ldc1 FARG1, 0(BASE)
1119 | checknum CARG1, ->fff_fallback
1120 |.endmacro
1121 |
1122 |.macro .ffunc_nn, name // Caveat: has delay slot!
1123 |->ff_ .. name:
1124 | ld CARG1, 0(BASE)
1125 | sltiu AT, NARGS8:RC, 16
1126 | ld CARG2, 8(BASE)
1127 | bnez AT, ->fff_fallback
1128 |. gettp TMP0, CARG1
1129 | gettp TMP1, CARG2
1130 | sltiu TMP0, TMP0, LJ_TISNUM
1131 | sltiu TMP1, TMP1, LJ_TISNUM
1132 | .FPU ldc1 FARG1, 0(BASE)
1133 | and TMP0, TMP0, TMP1
1134 | .FPU ldc1 FARG2, 8(BASE)
1135 | beqz TMP0, ->fff_fallback
1136 |.endmacro
1137 |
1138 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot!
1139 |// MIPSR6: no delay slot, but a forbidden slot.
1140 |.macro ffgccheck
1141 | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH)
1142 | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
1143 | dsubu AT, TMP0, TMP1
1144 |.if MIPSR6
1145 | bgezalc AT, ->fff_gcstep
1146 |.else
1147 | bgezal AT, ->fff_gcstep
1148 |.endif
1149 |.endmacro
1150 |
1151 |//-- Base library: checks -----------------------------------------------
1152 |.ffunc_1 assert
1153 | gettp AT, CARG1
1154 | sltiu AT, AT, LJ_TISTRUECOND
1155 | beqz AT, ->fff_fallback
1156 |. daddiu RA, BASE, -16
1157 | ld PC, FRAME_PC(BASE)
1158 | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
1159 | daddu TMP2, RA, RD
1160 | daddiu TMP1, BASE, 8
1161 | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument.
1162 |. sd CARG1, 0(RA)
1163 |1:
1164 | ld CRET1, 0(TMP1)
1165 | sd CRET1, -16(TMP1)
1166 | bne TMP1, TMP2, <1
1167 |. daddiu TMP1, TMP1, 8
1168 | b ->fff_res
1169 |. nop
1170 |
1171 |.ffunc_1 type
1172 | gettp TMP0, CARG1
1173 | sltu TMP1, TISNUM, TMP0
1174 | not TMP2, TMP0
1175 | li TMP3, ~LJ_TISNUM
1176 |.if MIPSR6
1177 | selnez TMP2, TMP2, TMP1
1178 | seleqz TMP3, TMP3, TMP1
1179 | or TMP2, TMP2, TMP3
1180 |.else
1181 | movz TMP2, TMP3, TMP1
1182 |.endif
1183 | dsll TMP2, TMP2, 3
1184 | daddu TMP2, CFUNC:RB, TMP2
1185 | b ->fff_restv
1186 |. ld CARG1, CFUNC:TMP2->upvalue
1187 |
1188 |//-- Base library: getters and setters ---------------------------------
1189 |
1190 |.ffunc_1 getmetatable
1191 | gettp TMP2, CARG1
1192 | daddiu TMP0, TMP2, -LJ_TTAB
1193 | daddiu TMP1, TMP2, -LJ_TUDATA
1194 |.if MIPSR6
1195 | selnez TMP0, TMP1, TMP0
1196 |.else
1197 | movn TMP0, TMP1, TMP0
1198 |.endif
1199 | bnez TMP0, >6
1200 |. cleartp TAB:CARG1
1201 |1: // Field metatable must be at same offset for GCtab and GCudata!
1202 | ld TAB:RB, TAB:CARG1->metatable
1203 |2:
1204 | ld STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
1205 | beqz TAB:RB, ->fff_restv
1206 |. li CARG1, LJ_TNIL
1207 | lw TMP0, TAB:RB->hmask
1208 | lw TMP1, STR:RC->sid
1209 | ld NODE:TMP2, TAB:RB->node
1210 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
1211 | dsll TMP0, TMP1, 5
1212 | dsll TMP1, TMP1, 3
1213 | dsubu TMP1, TMP0, TMP1
1214 | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
1215 | li CARG4, LJ_TSTR
1216 | settp STR:RC, CARG4 // Tagged key to look for.
1217 |3: // Rearranged logic, because we expect _not_ to find the key.
1218 | ld TMP0, NODE:TMP2->key
1219 | ld CARG1, NODE:TMP2->val
1220 | ld NODE:TMP2, NODE:TMP2->next
1221 | beq RC, TMP0, >5
1222 |. li AT, LJ_TTAB
1223 | bnez NODE:TMP2, <3
1224 |. nop
1225 |4:
1226 | move CARG1, RB
1227 | b ->fff_restv // Not found, keep default result.
1228 |. settp CARG1, AT
1229 |5:
1230 | bne CARG1, TISNIL, ->fff_restv
1231 |. nop
1232 | b <4 // Ditto for nil value.
1233 |. nop
1234 |
1235 |6:
1236 | sltiu AT, TMP2, LJ_TISNUM
1237 |.if MIPSR6
1238 | selnez TMP0, TISNUM, AT
1239 | seleqz AT, TMP2, AT
1240 | or TMP2, TMP0, AT
1241 |.else
1242 | movn TMP2, TISNUM, AT
1243 |.endif
1244 | dsll TMP2, TMP2, 3
1245 | dsubu TMP0, DISPATCH, TMP2
1246 | b <2
1247 |. ld TAB:RB, DISPATCH_GL(gcroot[GCROOT_BASEMT])-8(TMP0)
1248 |
1249 |.ffunc_2 setmetatable
1250 | // Fast path: no mt for table yet and not clearing the mt.
1251 | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback
1252 | gettp TMP3, CARG2
1253 | ld TAB:TMP0, TAB:TMP1->metatable
1254 | lbu TMP2, TAB:TMP1->marked
1255 | daddiu AT, TMP3, -LJ_TTAB
1256 | cleartp TAB:CARG2
1257 | or AT, AT, TAB:TMP0
1258 | bnez AT, ->fff_fallback
1259 |. andi AT, TMP2, LJ_GC_BLACK // isblack(table)
1260 | beqz AT, ->fff_restv
1261 |. sd TAB:CARG2, TAB:TMP1->metatable
1262 | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv
1263 |
1264 |.ffunc rawget
1265 | ld CARG2, 0(BASE)
1266 | sltiu AT, NARGS8:RC, 16
1267 | load_got lj_tab_get
1268 | gettp TMP0, CARG2
1269 | cleartp CARG2
1270 | daddiu TMP0, TMP0, -LJ_TTAB
1271 | or AT, AT, TMP0
1272 | bnez AT, ->fff_fallback
1273 |. daddiu CARG3, BASE, 8
1274 | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1275 |. move CARG1, L
1276 | b ->fff_restv
1277 |. ld CARG1, 0(CRET1)
1278 |
1279 |//-- Base library: conversions ------------------------------------------
1280 |
1281 |.ffunc tonumber
1282 | // Only handles the number case inline (without a base argument).
1283 | ld CARG1, 0(BASE)
1284 | xori AT, NARGS8:RC, 8 // Exactly one number argument.
1285 | gettp TMP1, CARG1
1286 | sltu TMP0, TISNUM, TMP1
1287 | or AT, AT, TMP0
1288 | bnez AT, ->fff_fallback
1289 |. nop
1290 | b ->fff_restv
1291 |. nop
1292 |
1293 |.ffunc_1 tostring
1294 | // Only handles the string or number case inline.
1295 | gettp TMP0, CARG1
1296 | daddiu AT, TMP0, -LJ_TSTR
1297 | // A __tostring method in the string base metatable is ignored.
1298 | beqz AT, ->fff_restv // String key?
1299 | // Handle numbers inline, unless a number base metatable is present.
1300 |. ld TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
1301 | sltu TMP0, TISNUM, TMP0
1302 | or TMP0, TMP0, TMP1
1303 | bnez TMP0, ->fff_fallback
1304 |. sd BASE, L->base // Add frame since C call can throw.
1305 |.if MIPSR6
1306 | sd PC, SAVE_PC // Redundant (but a defined value).
1307 | ffgccheck
1308 |.else
1309 | ffgccheck
1310 |. sd PC, SAVE_PC // Redundant (but a defined value).
1311 |.endif
1312 | load_got lj_strfmt_number
1313 | move CARG1, L
1314 | call_intern lj_strfmt_number // (lua_State *L, cTValue *o)
1315 |. move CARG2, BASE
1316 | // Returns GCstr *.
1317 | li AT, LJ_TSTR
1318 | settp CRET1, AT
1319 | b ->fff_restv
1320 |. move CARG1, CRET1
1321 |
1322 |//-- Base library: iterators -------------------------------------------
1323 |
1324 |.ffunc_1 next
1325 | checktp CARG2, CARG1, -LJ_TTAB, ->fff_fallback
1326 | daddu TMP2, BASE, NARGS8:RC
1327 | sd TISNIL, 0(TMP2) // Set missing 2nd arg to nil.
1328 | ld PC, FRAME_PC(BASE)
1329 | load_got lj_tab_next
1330 | sd BASE, L->base // Add frame since C call can throw.
1331 | sd BASE, L->top // Dummy frame length is ok.
1332 | daddiu CARG3, BASE, 8
1333 | sd PC, SAVE_PC
1334 | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1335 |. move CARG1, L
1336 | // Returns 0 at end of traversal.
1337 | beqz CRET1, ->fff_restv // End of traversal: return nil.
1338 |. move CARG1, TISNIL
1339 | ld TMP0, 8(BASE)
1340 | daddiu RA, BASE, -16
1341 | ld TMP2, 16(BASE)
1342 | sd TMP0, 0(RA)
1343 | sd TMP2, 8(RA)
1344 | b ->fff_res
1345 |. li RD, (2+1)*8
1346 |
1347 |.ffunc_1 pairs
1348 | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
1349 | ld PC, FRAME_PC(BASE)
1350#if LJ_52
1351 | ld TAB:TMP2, TAB:TMP1->metatable
1352 | ld TMP0, CFUNC:RB->upvalue[0]
1353 | bnez TAB:TMP2, ->fff_fallback
1354#else
1355 | ld TMP0, CFUNC:RB->upvalue[0]
1356#endif
1357 |. daddiu RA, BASE, -16
1358 | sd TISNIL, 0(BASE)
1359 | sd CARG1, -8(BASE)
1360 | sd TMP0, 0(RA)
1361 | b ->fff_res
1362 |. li RD, (3+1)*8
1363 |
1364 |.ffunc_2 ipairs_aux
1365 | checktab CARG1, ->fff_fallback
1366 | checkint CARG2, ->fff_fallback
1367 |. lw TMP0, TAB:CARG1->asize
1368 | ld TMP1, TAB:CARG1->array
1369 | ld PC, FRAME_PC(BASE)
1370 | sextw TMP2, CARG2
1371 | addiu TMP2, TMP2, 1
1372 | sltu AT, TMP2, TMP0
1373 | daddiu RA, BASE, -16
1374 | zextw TMP0, TMP2
1375 | settp TMP0, TISNUM
1376 | beqz AT, >2 // Not in array part?
1377 |. sd TMP0, 0(RA)
1378 | dsll TMP3, TMP2, 3
1379 | daddu TMP3, TMP1, TMP3
1380 | ld TMP1, 0(TMP3)
1381 |1:
1382 | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results.
1383 |. li RD, (0+1)*8
1384 | sd TMP1, -8(BASE)
1385 | b ->fff_res
1386 |. li RD, (2+1)*8
1387 |2: // Check for empty hash part first. Otherwise call C function.
1388 | lw TMP0, TAB:CARG1->hmask
1389 | load_got lj_tab_getinth
1390 | beqz TMP0, ->fff_res
1391 |. li RD, (0+1)*8
1392 | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
1393 |. move CARG2, TMP2
1394 | // Returns cTValue * or NULL.
1395 | beqz CRET1, ->fff_res
1396 |. li RD, (0+1)*8
1397 | b <1
1398 |. ld TMP1, 0(CRET1)
1399 |
1400 |.ffunc_1 ipairs
1401 | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
1402 | ld PC, FRAME_PC(BASE)
1403#if LJ_52
1404 | ld TAB:TMP2, TAB:TMP1->metatable
1405 | ld CFUNC:TMP0, CFUNC:RB->upvalue[0]
1406 | bnez TAB:TMP2, ->fff_fallback
1407#else
1408 | ld TMP0, CFUNC:RB->upvalue[0]
1409#endif
1410 | daddiu RA, BASE, -16
1411 | dsll AT, TISNUM, 47
1412 | sd CARG1, -8(BASE)
1413 | sd AT, 0(BASE)
1414 | sd CFUNC:TMP0, 0(RA)
1415 | b ->fff_res
1416 |. li RD, (3+1)*8
1417 |
1418 |//-- Base library: catch errors ----------------------------------------
1419 |
1420 |.ffunc pcall
1421 | daddiu NARGS8:RC, NARGS8:RC, -8
1422 | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
1423 | bltz NARGS8:RC, ->fff_fallback
1424 |. move TMP2, BASE
1425 | daddiu BASE, BASE, 16
1426 | // Remember active hook before pcall.
1427 | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
1428 | andi TMP3, TMP3, 1
1429 | daddiu PC, TMP3, 16+FRAME_PCALL
1430 | beqz NARGS8:RC, ->vm_call_dispatch
1431 |1:
1432 |. daddu TMP0, BASE, NARGS8:RC
1433 |2:
1434 | ld TMP1, -16(TMP0)
1435 | sd TMP1, -8(TMP0)
1436 | daddiu TMP0, TMP0, -8
1437 | bne TMP0, BASE, <2
1438 |. nop
1439 | b ->vm_call_dispatch
1440 |. nop
1441 |
1442 |.ffunc xpcall
1443 | daddiu NARGS8:TMP0, NARGS8:RC, -16
1444 | ld CARG1, 0(BASE)
1445 | ld CARG2, 8(BASE)
1446 | bltz NARGS8:TMP0, ->fff_fallback
1447 |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
1448 | gettp AT, CARG2
1449 | daddiu AT, AT, -LJ_TFUNC
1450 | bnez AT, ->fff_fallback // Traceback must be a function.
1451 |. move TMP2, BASE
1452 | move NARGS8:RC, NARGS8:TMP0
1453 | daddiu BASE, BASE, 24
1454 | // Remember active hook before pcall.
1455 | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
1456 | sd CARG2, 0(TMP2) // Swap function and traceback.
1457 | andi TMP3, TMP3, 1
1458 | sd CARG1, 8(TMP2)
1459 | beqz NARGS8:RC, ->vm_call_dispatch
1460 |. daddiu PC, TMP3, 24+FRAME_PCALL
1461 | b <1
1462 |. nop
1463 |
1464 |//-- Coroutine library --------------------------------------------------
1465 |
1466 |.macro coroutine_resume_wrap, resume
1467 |.if resume
1468 |.ffunc_1 coroutine_resume
1469 | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback
1470 |.else
1471 |.ffunc coroutine_wrap_aux
1472 | ld L:CARG1, CFUNC:RB->upvalue[0].gcr
1473 | cleartp L:CARG1
1474 |.endif
1475 | lbu TMP0, L:CARG1->status
1476 | ld TMP1, L:CARG1->cframe
1477 | ld CARG2, L:CARG1->top
1478 | ld TMP2, L:CARG1->base
1479 | addiu AT, TMP0, -LUA_YIELD
1480 | daddu CARG3, CARG2, TMP0
1481 | daddiu TMP3, CARG2, 8
1482 |.if MIPSR6
1483 | seleqz CARG2, CARG2, AT
1484 | selnez TMP3, TMP3, AT
1485 | bgtz AT, ->fff_fallback // st > LUA_YIELD?
1486 |. or CARG2, TMP3, CARG2
1487 |.else
1488 | bgtz AT, ->fff_fallback // st > LUA_YIELD?
1489 |. movn CARG2, TMP3, AT
1490 |.endif
1491 | xor TMP2, TMP2, CARG3
1492 | bnez TMP1, ->fff_fallback // cframe != 0?
1493 |. or AT, TMP2, TMP0
1494 | ld TMP0, L:CARG1->maxstack
1495 | beqz AT, ->fff_fallback // base == top && st == 0?
1496 |. ld PC, FRAME_PC(BASE)
1497 | daddu TMP2, CARG2, NARGS8:RC
1498 | sltu AT, TMP0, TMP2
1499 | bnez AT, ->fff_fallback // Stack overflow?
1500 |. sd PC, SAVE_PC
1501 | sd BASE, L->base
1502 |1:
1503 |.if resume
1504 | daddiu BASE, BASE, 8 // Keep resumed thread in stack for GC.
1505 | daddiu NARGS8:RC, NARGS8:RC, -8
1506 | daddiu TMP2, TMP2, -8
1507 |.endif
1508 | sd TMP2, L:CARG1->top
1509 | daddu TMP1, BASE, NARGS8:RC
1510 | move CARG3, CARG2
1511 | sd BASE, L->top
1512 |2: // Move args to coroutine.
1513 | ld CRET1, 0(BASE)
1514 | sltu AT, BASE, TMP1
1515 | beqz AT, >3
1516 |. daddiu BASE, BASE, 8
1517 | sd CRET1, 0(CARG3)
1518 | b <2
1519 |. daddiu CARG3, CARG3, 8
1520 |3:
1521 | bal ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1522 |. move L:RA, L:CARG1
1523 | // Returns thread status.
1524 |4:
1525 | ld TMP2, L:RA->base
1526 | sltiu AT, CRET1, LUA_YIELD+1
1527 | ld TMP3, L:RA->top
1528 | li_vmstate INTERP
1529 | ld BASE, L->base
1530 | sd L, DISPATCH_GL(cur_L)(DISPATCH)
1531 | st_vmstate
1532 | beqz AT, >8
1533 |. dsubu RD, TMP3, TMP2
1534 | ld TMP0, L->maxstack
1535 | beqz RD, >6 // No results?
1536 |. daddu TMP1, BASE, RD
1537 | sltu AT, TMP0, TMP1
1538 | bnez AT, >9 // Need to grow stack?
1539 |. daddu TMP3, TMP2, RD
1540 | sd TMP2, L:RA->top // Clear coroutine stack.
1541 | move TMP1, BASE
1542 |5: // Move results from coroutine.
1543 | ld CRET1, 0(TMP2)
1544 | daddiu TMP2, TMP2, 8
1545 | sltu AT, TMP2, TMP3
1546 | sd CRET1, 0(TMP1)
1547 | bnez AT, <5
1548 |. daddiu TMP1, TMP1, 8
1549 |6:
1550 | andi TMP0, PC, FRAME_TYPE
1551 |.if resume
1552 | mov_true TMP1
1553 | daddiu RA, BASE, -8
1554 | sd TMP1, -8(BASE) // Prepend true to results.
1555 | daddiu RD, RD, 16
1556 |.else
1557 | move RA, BASE
1558 | daddiu RD, RD, 8
1559 |.endif
1560 |7:
1561 | sd PC, SAVE_PC
1562 | beqz TMP0, ->BC_RET_Z
1563 |. move MULTRES, RD
1564 | b ->vm_return
1565 |. nop
1566 |
1567 |8: // Coroutine returned with error (at co->top-1).
1568 |.if resume
1569 | daddiu TMP3, TMP3, -8
1570 | mov_false TMP1
1571 | ld CRET1, 0(TMP3)
1572 | sd TMP3, L:RA->top // Remove error from coroutine stack.
1573 | li RD, (2+1)*8
1574 | sd TMP1, -8(BASE) // Prepend false to results.
1575 | daddiu RA, BASE, -8
1576 | sd CRET1, 0(BASE) // Copy error message.
1577 | b <7
1578 |. andi TMP0, PC, FRAME_TYPE
1579 |.else
1580 | load_got lj_ffh_coroutine_wrap_err
1581 | move CARG2, L:RA
1582 | call_intern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1583 |. move CARG1, L
1584 |.endif
1585 |
1586 |9: // Handle stack expansion on return from yield.
1587 | load_got lj_state_growstack
1588 | srl CARG2, RD, 3
1589 | call_intern lj_state_growstack // (lua_State *L, int n)
1590 |. move CARG1, L
1591 | b <4
1592 |. li CRET1, 0
1593 |.endmacro
1594 |
1595 | coroutine_resume_wrap 1 // coroutine.resume
1596 | coroutine_resume_wrap 0 // coroutine.wrap
1597 |
1598 |.ffunc coroutine_yield
1599 | ld TMP0, L->cframe
1600 | daddu TMP1, BASE, NARGS8:RC
1601 | sd BASE, L->base
1602 | andi TMP0, TMP0, CFRAME_RESUME
1603 | sd TMP1, L->top
1604 | beqz TMP0, ->fff_fallback
1605 |. li CRET1, LUA_YIELD
1606 | sd r0, L->cframe
1607 | b ->vm_leave_unw
1608 |. sb CRET1, L->status
1609 |
1610 |//-- Math library -------------------------------------------------------
1611 |
1612 |.ffunc_1 math_abs
1613 | gettp CARG2, CARG1
1614 | daddiu AT, CARG2, -LJ_TISNUM
1615 | bnez AT, >1
1616 |. sextw TMP1, CARG1
1617 | sra TMP0, TMP1, 31 // Extract sign.
1618 | xor TMP1, TMP1, TMP0
1619 | dsubu CARG1, TMP1, TMP0
1620 | dsll TMP3, CARG1, 32
1621 | bgez TMP3, ->fff_restv
1622 |. settp CARG1, TISNUM
1623 | li CARG1, 0x41e0 // 2^31 as a double.
1624 | b ->fff_restv
1625 |. dsll CARG1, CARG1, 48
1626 |1:
1627 | sltiu AT, CARG2, LJ_TISNUM
1628 | beqz AT, ->fff_fallback
1629 |. dextm CARG1, CARG1, 0, 30
1630 |// fallthrough
1631 |
1632 |->fff_restv:
1633 | // CARG1 = TValue result.
1634 | ld PC, FRAME_PC(BASE)
1635 | daddiu RA, BASE, -16
1636 | sd CARG1, -16(BASE)
1637 |->fff_res1:
1638 | // RA = results, PC = return.
1639 | li RD, (1+1)*8
1640 |->fff_res:
1641 | // RA = results, RD = (nresults+1)*8, PC = return.
1642 | andi TMP0, PC, FRAME_TYPE
1643 | bnez TMP0, ->vm_return
1644 |. move MULTRES, RD
1645 | lw INS, -4(PC)
1646 | decode_RB8a RB, INS
1647 | decode_RB8b RB
1648 |5:
1649 | sltu AT, RD, RB
1650 | bnez AT, >6 // More results expected?
1651 |. decode_RA8a TMP0, INS
1652 | decode_RA8b TMP0
1653 | ins_next1
1654 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1655 | dsubu BASE, RA, TMP0
1656 | ins_next2
1657 |
1658 |6: // Fill up results with nil.
1659 | daddu TMP1, RA, RD
1660 | daddiu RD, RD, 8
1661 | b <5
1662 |. sd TISNIL, -8(TMP1)
1663 |
1664 |.macro math_extern, func
1665 | .ffunc_n math_ .. func
1666 | load_got func
1667 | call_extern
1668 |. nop
1669 | b ->fff_resn
1670 |. nop
1671 |.endmacro
1672 |
1673 |.macro math_extern2, func
1674 | .ffunc_nn math_ .. func
1675 |. load_got func
1676 | call_extern
1677 |. nop
1678 | b ->fff_resn
1679 |. nop
1680 |.endmacro
1681 |
1682 |// TODO: Return integer type if result is integer (own sf implementation).
1683 |.macro math_round, func
1684 |->ff_math_ .. func:
1685 | ld CARG1, 0(BASE)
1686 | beqz NARGS8:RC, ->fff_fallback
1687 |. gettp TMP0, CARG1
1688 | beq TMP0, TISNUM, ->fff_restv
1689 |. sltu AT, TMP0, TISNUM
1690 | beqz AT, ->fff_fallback
1691 |.if FPU
1692 |. ldc1 FARG1, 0(BASE)
1693 | bal ->vm_ .. func
1694 |. nop
1695 |.else
1696 |. load_got func
1697 | call_extern
1698 |. nop
1699 |.endif
1700 | b ->fff_resn
1701 |. nop
1702 |.endmacro
1703 |
1704 | math_round floor
1705 | math_round ceil
1706 |
1707 |.ffunc math_log
1708 | li AT, 8
1709 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
1710 |. ld CARG1, 0(BASE)
1711 | checknum CARG1, ->fff_fallback
1712 |. load_got log
1713 |.if FPU
1714 | call_extern
1715 |. ldc1 FARG1, 0(BASE)
1716 |.else
1717 | call_extern
1718 |. nop
1719 |.endif
1720 | b ->fff_resn
1721 |. nop
1722 |
1723 | math_extern log10
1724 | math_extern exp
1725 | math_extern sin
1726 | math_extern cos
1727 | math_extern tan
1728 | math_extern asin
1729 | math_extern acos
1730 | math_extern atan
1731 | math_extern sinh
1732 | math_extern cosh
1733 | math_extern tanh
1734 | math_extern2 pow
1735 | math_extern2 atan2
1736 | math_extern2 fmod
1737 |
1738 |.if FPU
1739 |.ffunc_n math_sqrt
1740 |. sqrt.d FRET1, FARG1
1741 |// fallthrough to ->fff_resn
1742 |.else
1743 | math_extern sqrt
1744 |.endif
1745 |
1746 |->fff_resn:
1747 | ld PC, FRAME_PC(BASE)
1748 | daddiu RA, BASE, -16
1749 | b ->fff_res1
1750 |.if FPU
1751 |. sdc1 FRET1, 0(RA)
1752 |.else
1753 |. sd CRET1, 0(RA)
1754 |.endif
1755 |
1756 |
1757 |.ffunc_2 math_ldexp
1758 | checknum CARG1, ->fff_fallback
1759 | checkint CARG2, ->fff_fallback
1760 |. load_got ldexp
1761 | .FPU ldc1 FARG1, 0(BASE)
1762 | call_extern
1763 |. lw CARG2, 8+LO(BASE)
1764 | b ->fff_resn
1765 |. nop
1766 |
1767 |.ffunc_n math_frexp
1768 | load_got frexp
1769 | ld PC, FRAME_PC(BASE)
1770 | call_extern
1771 |. daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv)
1772 | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH)
1773 | daddiu RA, BASE, -16
1774 |.if FPU
1775 | mtc1 TMP1, FARG2
1776 | sdc1 FRET1, 0(RA)
1777 | cvt.d.w FARG2, FARG2
1778 | sdc1 FARG2, 8(RA)
1779 |.else
1780 | sd CRET1, 0(RA)
1781 | zextw TMP1, TMP1
1782 | settp TMP1, TISNUM
1783 | sd TMP1, 8(RA)
1784 |.endif
1785 | b ->fff_res
1786 |. li RD, (2+1)*8
1787 |
1788 |.ffunc_n math_modf
1789 | load_got modf
1790 | ld PC, FRAME_PC(BASE)
1791 | call_extern
1792 |. daddiu CARG2, BASE, -16
1793 | daddiu RA, BASE, -16
1794 |.if FPU
1795 | sdc1 FRET1, -8(BASE)
1796 |.else
1797 | sd CRET1, -8(BASE)
1798 |.endif
1799 | b ->fff_res
1800 |. li RD, (2+1)*8
1801 |
1802 |.macro math_minmax, name, intins, intinsc, fpins
1803 | .ffunc_1 name
1804 | daddu TMP3, BASE, NARGS8:RC
1805 | checkint CARG1, >5
1806 |. daddiu TMP2, BASE, 8
1807 |1: // Handle integers.
1808 | beq TMP2, TMP3, ->fff_restv
1809 |. ld CARG2, 0(TMP2)
1810 | checkint CARG2, >3
1811 |. sextw CARG1, CARG1
1812 | lw CARG2, LO(TMP2)
1813 |. slt AT, CARG1, CARG2
1814 |.if MIPSR6
1815 | intins TMP1, CARG2, AT
1816 | intinsc CARG1, CARG1, AT
1817 | or CARG1, CARG1, TMP1
1818 |.else
1819 | intins CARG1, CARG2, AT
1820 |.endif
1821 | daddiu TMP2, TMP2, 8
1822 | zextw CARG1, CARG1
1823 | b <1
1824 |. settp CARG1, TISNUM
1825 |
1826 |3: // Convert intermediate result to number and continue with number loop.
1827 | checknum CARG2, ->fff_fallback
1828 |.if FPU
1829 |. mtc1 CARG1, FRET1
1830 | cvt.d.w FRET1, FRET1
1831 | b >7
1832 |. ldc1 FARG1, 0(TMP2)
1833 |.else
1834 |. nop
1835 | bal ->vm_sfi2d_1
1836 |. nop
1837 | b >7
1838 |. nop
1839 |.endif
1840 |
1841 |5:
1842 | .FPU ldc1 FRET1, 0(BASE)
1843 | checknum CARG1, ->fff_fallback
1844 |6: // Handle numbers.
1845 |. ld CARG2, 0(TMP2)
1846 | beq TMP2, TMP3, ->fff_resn
1847 |.if FPU
1848 | ldc1 FARG1, 0(TMP2)
1849 |.else
1850 | move CRET1, CARG1
1851 |.endif
1852 | checknum CARG2, >8
1853 |. nop
1854 |7:
1855 |.if FPU
1856 |.if MIPSR6
1857 | fpins FRET1, FRET1, FARG1
1858 |.else
1859 |.if fpins // ismax
1860 | c.olt.d FARG1, FRET1
1861 |.else
1862 | c.olt.d FRET1, FARG1
1863 |.endif
1864 | movf.d FRET1, FARG1
1865 |.endif
1866 |.else
1867 |.if fpins // ismax
1868 | bal ->vm_sfcmpogt
1869 |.else
1870 | bal ->vm_sfcmpolt
1871 |.endif
1872 |. nop
1873 |.if MIPSR6
1874 | seleqz AT, CARG2, CRET1
1875 | selnez CARG1, CARG1, CRET1
1876 | or CARG1, CARG1, AT
1877 |.else
1878 | movz CARG1, CARG2, CRET1
1879 |.endif
1880 |.endif
1881 | b <6
1882 |. daddiu TMP2, TMP2, 8
1883 |
1884 |8: // Convert integer to number and continue with number loop.
1885 | checkint CARG2, ->fff_fallback
1886 |.if FPU
1887 |. lwc1 FARG1, LO(TMP2)
1888 | b <7
1889 |. cvt.d.w FARG1, FARG1
1890 |.else
1891 |. lw CARG2, LO(TMP2)
1892 | bal ->vm_sfi2d_2
1893 |. nop
1894 | b <7
1895 |. nop
1896 |.endif
1897 |
1898 |.endmacro
1899 |
1900 |.if MIPSR6
1901 | math_minmax math_min, seleqz, selnez, min.d
1902 | math_minmax math_max, selnez, seleqz, max.d
1903 |.else
1904 | math_minmax math_min, movz, _, 0
1905 | math_minmax math_max, movn, _, 1
1906 |.endif
1907 |
1908 |//-- String library -----------------------------------------------------
1909 |
1910 |.ffunc string_byte // Only handle the 1-arg case here.
1911 | ld CARG1, 0(BASE)
1912 | gettp TMP0, CARG1
1913 | xori AT, NARGS8:RC, 8
1914 | daddiu TMP0, TMP0, -LJ_TSTR
1915 | or AT, AT, TMP0
1916 | bnez AT, ->fff_fallback // Need exactly 1 string argument.
1917 |. cleartp STR:CARG1
1918 | lw TMP0, STR:CARG1->len
1919 | daddiu RA, BASE, -16
1920 | ld PC, FRAME_PC(BASE)
1921 | sltu RD, r0, TMP0
1922 | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1923 | addiu RD, RD, 1
1924 | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8
1925 | settp TMP1, TISNUM
1926 | b ->fff_res
1927 |. sd TMP1, 0(RA)
1928 |
1929 |.ffunc string_char // Only handle the 1-arg case here.
1930 | ffgccheck
1931 |.if not MIPSR6
1932 |. nop
1933 |.endif
1934 | ld CARG1, 0(BASE)
1935 | gettp TMP0, CARG1
1936 | xori AT, NARGS8:RC, 8 // Exactly 1 argument.
1937 | daddiu TMP0, TMP0, -LJ_TISNUM // Integer.
1938 | li TMP1, 255
1939 | sextw CARG1, CARG1
1940 | or AT, AT, TMP0
1941 | sltu TMP1, TMP1, CARG1 // !(255 < n).
1942 | or AT, AT, TMP1
1943 | bnez AT, ->fff_fallback
1944 |. li CARG3, 1
1945 | daddiu CARG2, sp, TMPD_OFS
1946 | sb CARG1, TMPD
1947 |->fff_newstr:
1948 | load_got lj_str_new
1949 | sd BASE, L->base
1950 | sd PC, SAVE_PC
1951 | call_intern lj_str_new // (lua_State *L, char *str, size_t l)
1952 |. move CARG1, L
1953 | // Returns GCstr *.
1954 | ld BASE, L->base
1955 |->fff_resstr:
1956 | li AT, LJ_TSTR
1957 | settp CRET1, AT
1958 | b ->fff_restv
1959 |. move CARG1, CRET1
1960 |
1961 |.ffunc string_sub
1962 | ffgccheck
1963 |.if not MIPSR6
1964 |. nop
1965 |.endif
1966 | addiu AT, NARGS8:RC, -16
1967 | ld TMP0, 0(BASE)
1968 | bltz AT, ->fff_fallback
1969 |. gettp TMP3, TMP0
1970 | cleartp STR:CARG1, TMP0
1971 | ld CARG2, 8(BASE)
1972 | beqz AT, >1
1973 |. li CARG4, -1
1974 | ld CARG3, 16(BASE)
1975 | checkint CARG3, ->fff_fallback
1976 |. sextw CARG4, CARG3
1977 |1:
1978 | checkint CARG2, ->fff_fallback
1979 |. li AT, LJ_TSTR
1980 | bne TMP3, AT, ->fff_fallback
1981 |. sextw CARG3, CARG2
1982 | lw CARG2, STR:CARG1->len
1983 | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
1984 | slt AT, CARG4, r0
1985 | addiu TMP0, CARG2, 1
1986 | addu TMP1, CARG4, TMP0
1987 | slt TMP3, CARG3, r0
1988 |.if MIPSR6
1989 | seleqz CARG4, CARG4, AT
1990 | selnez TMP1, TMP1, AT
1991 | or CARG4, TMP1, CARG4 // if (end < 0) end += len+1
1992 |.else
1993 | movn CARG4, TMP1, AT // if (end < 0) end += len+1
1994 |.endif
1995 | addu TMP1, CARG3, TMP0
1996 |.if MIPSR6
1997 | selnez TMP1, TMP1, TMP3
1998 | seleqz CARG3, CARG3, TMP3
1999 | or CARG3, TMP1, CARG3 // if (start < 0) start += len+1
2000 | li TMP2, 1
2001 | slt AT, CARG4, r0
2002 | slt TMP3, r0, CARG3
2003 | seleqz CARG4, CARG4, AT // if (end < 0) end = 0
2004 | selnez CARG3, CARG3, TMP3
2005 | seleqz TMP2, TMP2, TMP3
2006 | or CARG3, TMP2, CARG3 // if (start < 1) start = 1
2007 | slt AT, CARG2, CARG4
2008 | seleqz CARG4, CARG4, AT
2009 | selnez CARG2, CARG2, AT
2010 | or CARG4, CARG2, CARG4 // if (end > len) end = len
2011 |.else
2012 | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1
2013 | li TMP2, 1
2014 | slt AT, CARG4, r0
2015 | slt TMP3, r0, CARG3
2016 | movn CARG4, r0, AT // if (end < 0) end = 0
2017 | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1
2018 | slt AT, CARG2, CARG4
2019 | movn CARG4, CARG2, AT // if (end > len) end = len
2020 |.endif
2021 | daddu CARG2, STR:CARG1, CARG3
2022 | subu CARG3, CARG4, CARG3 // len = end - start
2023 | daddiu CARG2, CARG2, sizeof(GCstr)-1
2024 | bgez CARG3, ->fff_newstr
2025 |. addiu CARG3, CARG3, 1 // len++
2026 |->fff_emptystr: // Return empty string.
2027 | li AT, LJ_TSTR
2028 | daddiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty)
2029 | b ->fff_restv
2030 |. settp CARG1, AT
2031 |
2032 |.macro ffstring_op, name
2033 | .ffunc string_ .. name
2034 | ffgccheck
2035 |. nop
2036 | beqz NARGS8:RC, ->fff_fallback
2037 |. ld CARG2, 0(BASE)
2038 | checkstr STR:CARG2, ->fff_fallback
2039 | daddiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf)
2040 | load_got lj_buf_putstr_ .. name
2041 | ld TMP0, SBUF:CARG1->b
2042 | sd L, SBUF:CARG1->L
2043 | sd BASE, L->base
2044 | sd TMP0, SBUF:CARG1->w
2045 | call_intern extern lj_buf_putstr_ .. name
2046 |. sd PC, SAVE_PC
2047 | load_got lj_buf_tostr
2048 | call_intern lj_buf_tostr
2049 |. move SBUF:CARG1, SBUF:CRET1
2050 | b ->fff_resstr
2051 |. ld BASE, L->base
2052 |.endmacro
2053 |
2054 |ffstring_op reverse
2055 |ffstring_op lower
2056 |ffstring_op upper
2057 |
2058 |//-- Bit library --------------------------------------------------------
2059 |
2060 |->vm_tobit_fb:
2061 | beqz TMP1, ->fff_fallback
2062 |.if FPU
2063 |. ldc1 FARG1, 0(BASE)
2064 | add.d FARG1, FARG1, TOBIT
2065 | mfc1 CRET1, FARG1
2066 | jr ra
2067 |. zextw CRET1, CRET1
2068 |.else
2069 |// FP number to bit conversion for soft-float.
2070 |->vm_tobit:
2071 | dsll TMP0, CARG1, 1
2072 | li CARG3, 1076
2073 | dsrl AT, TMP0, 53
2074 | dsubu CARG3, CARG3, AT
2075 | sltiu AT, CARG3, 54
2076 | beqz AT, >1
2077 |. dextm TMP0, TMP0, 0, 20
2078 | dinsu TMP0, AT, 21, 21
2079 | slt AT, CARG1, r0
2080 | dsrlv CRET1, TMP0, CARG3
2081 | dsubu TMP0, r0, CRET1
2082 |.if MIPSR6
2083 | selnez TMP0, TMP0, AT
2084 | seleqz CRET1, CRET1, AT
2085 | or CRET1, CRET1, TMP0
2086 |.else
2087 | movn CRET1, TMP0, AT
2088 |.endif
2089 | jr ra
2090 |. zextw CRET1, CRET1
2091 |1:
2092 | jr ra
2093 |. move CRET1, r0
2094 |
2095 |// FP number to int conversion with a check for soft-float.
2096 |// Modifies CARG1, CRET1, CRET2, TMP0, AT.
2097 |->vm_tointg:
2098 |.if JIT
2099 | dsll CRET2, CARG1, 1
2100 | beqz CRET2, >2
2101 |. li TMP0, 1076
2102 | dsrl AT, CRET2, 53
2103 | dsubu TMP0, TMP0, AT
2104 | sltiu AT, TMP0, 54
2105 | beqz AT, >1
2106 |. dextm CRET2, CRET2, 0, 20
2107 | dinsu CRET2, AT, 21, 21
2108 | slt AT, CARG1, r0
2109 | dsrlv CRET1, CRET2, TMP0
2110 | dsubu CARG1, r0, CRET1
2111 |.if MIPSR6
2112 | seleqz CRET1, CRET1, AT
2113 | selnez CARG1, CARG1, AT
2114 | or CRET1, CRET1, CARG1
2115 |.else
2116 | movn CRET1, CARG1, AT
2117 |.endif
2118 | li CARG1, 64
2119 | subu TMP0, CARG1, TMP0
2120 | dsllv CRET2, CRET2, TMP0 // Integer check.
2121 | sextw AT, CRET1
2122 | xor AT, CRET1, AT // Range check.
2123 |.if MIPSR6
2124 | seleqz AT, AT, CRET2
2125 | selnez CRET2, CRET2, CRET2
2126 | jr ra
2127 |. or CRET2, AT, CRET2
2128 |.else
2129 | jr ra
2130 |. movz CRET2, AT, CRET2
2131 |.endif
2132 |1:
2133 | jr ra
2134 |. li CRET2, 1
2135 |2:
2136 | jr ra
2137 |. move CRET1, r0
2138 |.endif
2139 |.endif
2140 |
2141 |.macro .ffunc_bit, name
2142 | .ffunc_1 bit_..name
2143 | gettp TMP0, CARG1
2144 | beq TMP0, TISNUM, >6
2145 |. zextw CRET1, CARG1
2146 | bal ->vm_tobit_fb
2147 |. sltiu TMP1, TMP0, LJ_TISNUM
2148 |6:
2149 |.endmacro
2150 |
2151 |.macro .ffunc_bit_op, name, bins
2152 | .ffunc_bit name
2153 | daddiu TMP2, BASE, 8
2154 | daddu TMP3, BASE, NARGS8:RC
2155 |1:
2156 | beq TMP2, TMP3, ->fff_resi
2157 |. ld CARG1, 0(TMP2)
2158 | gettp TMP0, CARG1
2159 |.if FPU
2160 | bne TMP0, TISNUM, >2
2161 |. daddiu TMP2, TMP2, 8
2162 | zextw CARG1, CARG1
2163 | b <1
2164 |. bins CRET1, CRET1, CARG1
2165 |2:
2166 | ldc1 FARG1, -8(TMP2)
2167 | sltiu AT, TMP0, LJ_TISNUM
2168 | beqz AT, ->fff_fallback
2169 |. add.d FARG1, FARG1, TOBIT
2170 | mfc1 CARG1, FARG1
2171 | zextw CARG1, CARG1
2172 | b <1
2173 |. bins CRET1, CRET1, CARG1
2174 |.else
2175 | beq TMP0, TISNUM, >2
2176 |. move CRET2, CRET1
2177 | bal ->vm_tobit_fb
2178 |. sltiu TMP1, TMP0, LJ_TISNUM
2179 | move CARG1, CRET2
2180 |2:
2181 | zextw CARG1, CARG1
2182 | bins CRET1, CRET1, CARG1
2183 | b <1
2184 |. daddiu TMP2, TMP2, 8
2185 |.endif
2186 |.endmacro
2187 |
2188 |.ffunc_bit_op band, and
2189 |.ffunc_bit_op bor, or
2190 |.ffunc_bit_op bxor, xor
2191 |
2192 |.ffunc_bit bswap
2193 | dsrl TMP0, CRET1, 8
2194 | dsrl TMP1, CRET1, 24
2195 | andi TMP2, TMP0, 0xff00
2196 | dins TMP1, CRET1, 24, 31
2197 | dins TMP2, TMP0, 16, 23
2198 | b ->fff_resi
2199 |. or CRET1, TMP1, TMP2
2200 |
2201 |.ffunc_bit bnot
2202 | not CRET1, CRET1
2203 | b ->fff_resi
2204 |. zextw CRET1, CRET1
2205 |
2206 |.macro .ffunc_bit_sh, name, shins, shmod
2207 | .ffunc_2 bit_..name
2208 | gettp TMP0, CARG1
2209 | beq TMP0, TISNUM, >1
2210 |. nop
2211 | bal ->vm_tobit_fb
2212 |. sltiu TMP1, TMP0, LJ_TISNUM
2213 | move CARG1, CRET1
2214 |1:
2215 | gettp TMP0, CARG2
2216 | bne TMP0, TISNUM, ->fff_fallback
2217 |. zextw CARG2, CARG2
2218 | sextw CARG1, CARG1
2219 |.if shmod == 1
2220 | negu CARG2, CARG2
2221 |.endif
2222 | shins CRET1, CARG1, CARG2
2223 | b ->fff_resi
2224 |. zextw CRET1, CRET1
2225 |.endmacro
2226 |
2227 |.ffunc_bit_sh lshift, sllv, 0
2228 |.ffunc_bit_sh rshift, srlv, 0
2229 |.ffunc_bit_sh arshift, srav, 0
2230 |.ffunc_bit_sh rol, rotrv, 1
2231 |.ffunc_bit_sh ror, rotrv, 0
2232 |
2233 |.ffunc_bit tobit
2234 |->fff_resi:
2235 | ld PC, FRAME_PC(BASE)
2236 | daddiu RA, BASE, -16
2237 | settp CRET1, TISNUM
2238 | b ->fff_res1
2239 |. sd CRET1, -16(BASE)
2240 |
2241 |//-----------------------------------------------------------------------
2242 |->fff_fallback: // Call fast function fallback handler.
2243 | // BASE = new base, RB = CFUNC, RC = nargs*8
2244 | ld TMP3, CFUNC:RB->f
2245 | daddu TMP1, BASE, NARGS8:RC
2246 | ld PC, FRAME_PC(BASE) // Fallback may overwrite PC.
2247 | daddiu TMP0, TMP1, 8*LUA_MINSTACK
2248 | ld TMP2, L->maxstack
2249 | sd PC, SAVE_PC // Redundant (but a defined value).
2250 | sltu AT, TMP2, TMP0
2251 | sd BASE, L->base
2252 | sd TMP1, L->top
2253 | bnez AT, >5 // Need to grow stack.
2254 |. move CFUNCADDR, TMP3
2255 | jalr TMP3 // (lua_State *L)
2256 |. move CARG1, L
2257 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
2258 | ld BASE, L->base
2259 | sll RD, CRET1, 3
2260 | bgtz CRET1, ->fff_res // Returned nresults+1?
2261 |. daddiu RA, BASE, -16
2262 |1: // Returned 0 or -1: retry fast path.
2263 | ld LFUNC:RB, FRAME_FUNC(BASE)
2264 | ld TMP0, L->top
2265 | cleartp LFUNC:RB
2266 | bnez CRET1, ->vm_call_tail // Returned -1?
2267 |. dsubu NARGS8:RC, TMP0, BASE
2268 | ins_callt // Returned 0: retry fast path.
2269 |
2270 |// Reconstruct previous base for vmeta_call during tailcall.
2271 |->vm_call_tail:
2272 | andi TMP0, PC, FRAME_TYPE
2273 | li AT, -4
2274 | bnez TMP0, >3
2275 |. and TMP1, PC, AT
2276 | lbu TMP1, OFS_RA(PC)
2277 | sll TMP1, TMP1, 3
2278 | addiu TMP1, TMP1, 16
2279 |3:
2280 | b ->vm_call_dispatch // Resolve again for tailcall.
2281 |. dsubu TMP2, BASE, TMP1
2282 |
2283 |5: // Grow stack for fallback handler.
2284 | load_got lj_state_growstack
2285 | li CARG2, LUA_MINSTACK
2286 | call_intern lj_state_growstack // (lua_State *L, int n)
2287 |. move CARG1, L
2288 | ld BASE, L->base
2289 | b <1
2290 |. li CRET1, 0 // Force retry.
2291 |
2292 |->fff_gcstep: // Call GC step function.
2293 | // BASE = new base, RC = nargs*8
2294 | move MULTRES, ra
2295 | load_got lj_gc_step
2296 | sd BASE, L->base
2297 | daddu TMP0, BASE, NARGS8:RC
2298 | sd PC, SAVE_PC // Redundant (but a defined value).
2299 | sd TMP0, L->top
2300 | call_intern lj_gc_step // (lua_State *L)
2301 |. move CARG1, L
2302 | ld BASE, L->base
2303 | move ra, MULTRES
2304 | ld TMP0, L->top
2305 | ld CFUNC:RB, FRAME_FUNC(BASE)
2306 | cleartp CFUNC:RB
2307 | jr ra
2308 |. dsubu NARGS8:RC, TMP0, BASE
2309 |
2310 |//-----------------------------------------------------------------------
2311 |//-- Special dispatch targets -------------------------------------------
2312 |//-----------------------------------------------------------------------
2313 |
2314 |->vm_record: // Dispatch target for recording phase.
2315 |.if JIT
2316 | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
2317 | andi AT, TMP3, HOOK_VMEVENT // No recording while in vmevent.
2318 | bnez AT, >5
2319 | // Decrement the hookcount for consistency, but always do the call.
2320 |. lw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
2321 | andi AT, TMP3, HOOK_ACTIVE
2322 | bnez AT, >1
2323 |. addiu TMP2, TMP2, -1
2324 | andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
2325 | beqz AT, >1
2326 |. nop
2327 | b >1
2328 |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
2329 |.endif
2330 |
2331 |->vm_rethook: // Dispatch target for return hooks.
2332 | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
2333 | andi AT, TMP3, HOOK_ACTIVE // Hook already active?
2334 | beqz AT, >1
2335 |5: // Re-dispatch to static ins.
2336 |. ld AT, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4.
2337 | jr AT
2338 |. nop
2339 |
2340 |->vm_inshook: // Dispatch target for instr/line hooks.
2341 | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
2342 | lw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
2343 | andi AT, TMP3, HOOK_ACTIVE // Hook already active?
2344 | bnez AT, <5
2345 |. andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
2346 | beqz AT, <5
2347 |. addiu TMP2, TMP2, -1
2348 | beqz TMP2, >1
2349 |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
2350 | andi AT, TMP3, LUA_MASKLINE
2351 | beqz AT, <5
2352 |1:
2353 |. load_got lj_dispatch_ins
2354 | sw MULTRES, SAVE_MULTRES
2355 | move CARG2, PC
2356 | sd BASE, L->base
2357 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2358 | call_intern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
2359 |. move CARG1, L
2360 |3:
2361 | ld BASE, L->base
2362 |4: // Re-dispatch to static ins.
2363 | lw INS, -4(PC)
2364 | decode_OP8a TMP1, INS
2365 | decode_OP8b TMP1
2366 | daddu TMP0, DISPATCH, TMP1
2367 | decode_RD8a RD, INS
2368 | ld AT, GG_DISP2STATIC(TMP0)
2369 | decode_RA8a RA, INS
2370 | decode_RD8b RD
2371 | jr AT
2372 | decode_RA8b RA
2373 |
2374 |->cont_hook: // Continue from hook yield.
2375 | daddiu PC, PC, 4
2376 | b <4
2377 |. lw MULTRES, -24+LO(RB) // Restore MULTRES for *M ins.
2378 |
2379 |->vm_hotloop: // Hot loop counter underflow.
2380 |.if JIT
2381 | ld LFUNC:TMP1, FRAME_FUNC(BASE)
2382 | daddiu CARG1, DISPATCH, GG_DISP2J
2383 | cleartp LFUNC:TMP1
2384 | sd PC, SAVE_PC
2385 | ld TMP1, LFUNC:TMP1->pc
2386 | move CARG2, PC
2387 | sd L, DISPATCH_J(L)(DISPATCH)
2388 | lbu TMP1, PC2PROTO(framesize)(TMP1)
2389 | load_got lj_trace_hot
2390 | sd BASE, L->base
2391 | dsll TMP1, TMP1, 3
2392 | daddu TMP1, BASE, TMP1
2393 | call_intern lj_trace_hot // (jit_State *J, const BCIns *pc)
2394 |. sd TMP1, L->top
2395 | b <3
2396 |. nop
2397 |.endif
2398 |
2399 |
2400 |->vm_callhook: // Dispatch target for call hooks.
2401 |.if JIT
2402 | b >1
2403 |.endif
2404 |. move CARG2, PC
2405 |
2406 |->vm_hotcall: // Hot call counter underflow.
2407 |.if JIT
2408 | ori CARG2, PC, 1
2409 |1:
2410 |.endif
2411 | load_got lj_dispatch_call
2412 | daddu TMP0, BASE, RC
2413 | sd PC, SAVE_PC
2414 | sd BASE, L->base
2415 | dsubu RA, RA, BASE
2416 | sd TMP0, L->top
2417 | call_intern lj_dispatch_call // (lua_State *L, const BCIns *pc)
2418 |. move CARG1, L
2419 | // Returns ASMFunction.
2420 | ld BASE, L->base
2421 | ld TMP0, L->top
2422 | sd r0, SAVE_PC // Invalidate for subsequent line hook.
2423 | dsubu NARGS8:RC, TMP0, BASE
2424 | daddu RA, BASE, RA
2425 | ld LFUNC:RB, FRAME_FUNC(BASE)
2426 | cleartp LFUNC:RB
2427 | jr CRET1
2428 |. lw INS, -4(PC)
2429 |
2430 |->cont_stitch: // Trace stitching.
2431 |.if JIT
2432 | // RA = resultptr, RB = meta base
2433 | lw INS, -4(PC)
2434 | ld TRACE:TMP2, -40(RB) // Save previous trace.
2435 | decode_RA8a RC, INS
2436 | daddiu AT, MULTRES, -8
2437 | cleartp TRACE:TMP2
2438 | decode_RA8b RC
2439 | beqz AT, >2
2440 |. daddu RC, BASE, RC // Call base.
2441 |1: // Move results down.
2442 | ld CARG1, 0(RA)
2443 | daddiu AT, AT, -8
2444 | daddiu RA, RA, 8
2445 | sd CARG1, 0(RC)
2446 | bnez AT, <1
2447 |. daddiu RC, RC, 8
2448 |2:
2449 | decode_RA8a RA, INS
2450 | decode_RB8a RB, INS
2451 | decode_RA8b RA
2452 | decode_RB8b RB
2453 | daddu RA, RA, RB
2454 | daddu RA, BASE, RA
2455 |3:
2456 | sltu AT, RC, RA
2457 | bnez AT, >9 // More results wanted?
2458 |. nop
2459 |
2460 | lhu TMP3, TRACE:TMP2->traceno
2461 | lhu RD, TRACE:TMP2->link
2462 | beq RD, TMP3, ->cont_nop // Blacklisted.
2463 |. load_got lj_dispatch_stitch
2464 | bnez RD, =>BC_JLOOP // Jump to stitched trace.
2465 |. sll RD, RD, 3
2466 |
2467 | // Stitch a new trace to the previous trace.
2468 | sw TMP3, DISPATCH_J(exitno)(DISPATCH)
2469 | sd L, DISPATCH_J(L)(DISPATCH)
2470 | sd BASE, L->base
2471 | daddiu CARG1, DISPATCH, GG_DISP2J
2472 | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2473 |. move CARG2, PC
2474 | b ->cont_nop
2475 |. ld BASE, L->base
2476 |
2477 |9:
2478 | sd TISNIL, 0(RC)
2479 | b <3
2480 |. daddiu RC, RC, 8
2481 |.endif
2482 |
2483 |->vm_profhook: // Dispatch target for profiler hook.
2484#if LJ_HASPROFILE
2485 | load_got lj_dispatch_profile
2486 | sw MULTRES, SAVE_MULTRES
2487 | move CARG2, PC
2488 | sd BASE, L->base
2489 | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2490 |. move CARG1, L
2491 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2492 | daddiu PC, PC, -4
2493 | b ->cont_nop
2494 |. ld BASE, L->base
2495#endif
2496 |
2497 |//-----------------------------------------------------------------------
2498 |//-- Trace exit handler -------------------------------------------------
2499 |//-----------------------------------------------------------------------
2500 |
2501 |.macro savex_, a, b
2502 |.if FPU
2503 | sdc1 f..a, a*8(sp)
2504 | sdc1 f..b, b*8(sp)
2505 | sd r..a, 32*8+a*8(sp)
2506 | sd r..b, 32*8+b*8(sp)
2507 |.else
2508 | sd r..a, a*8(sp)
2509 | sd r..b, b*8(sp)
2510 |.endif
2511 |.endmacro
2512 |
2513 |->vm_exit_handler:
2514 |.if JIT
2515 |.if FPU
2516 | daddiu sp, sp, -(32*8+32*8)
2517 |.else
2518 | daddiu sp, sp, -(32*8)
2519 |.endif
2520 | savex_ 0, 1
2521 | savex_ 2, 3
2522 | savex_ 4, 5
2523 | savex_ 6, 7
2524 | savex_ 8, 9
2525 | savex_ 10, 11
2526 | savex_ 12, 13
2527 | savex_ 14, 15
2528 | savex_ 16, 17
2529 | savex_ 18, 19
2530 | savex_ 20, 21
2531 | savex_ 22, 23
2532 | savex_ 24, 25
2533 | savex_ 26, 27
2534 | savex_ 28, 30
2535 |.if FPU
2536 | sdc1 f29, 29*8(sp)
2537 | sdc1 f31, 31*8(sp)
2538 | sd r0, 32*8+31*8(sp) // Clear RID_TMP.
2539 | daddiu TMP2, sp, 32*8+32*8 // Recompute original value of sp.
2540 | sd TMP2, 32*8+29*8(sp) // Store sp in RID_SP
2541 |.else
2542 | sd r0, 31*8(sp) // Clear RID_TMP.
2543 | daddiu TMP2, sp, 32*8 // Recompute original value of sp.
2544 | sd TMP2, 29*8(sp) // Store sp in RID_SP
2545 |.endif
2546 | li_vmstate EXIT
2547 | daddiu DISPATCH, JGL, -GG_DISP2G-32768
2548 | lw TMP1, 0(TMP2) // Load exit number.
2549 | st_vmstate
2550 | ld L, DISPATCH_GL(cur_L)(DISPATCH)
2551 | ld BASE, DISPATCH_GL(jit_base)(DISPATCH)
2552 | load_got lj_trace_exit
2553 | sd L, DISPATCH_J(L)(DISPATCH)
2554 | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number.
2555 | sd BASE, L->base
2556 | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number.
2557 | daddiu CARG1, DISPATCH, GG_DISP2J
2558 | sd r0, DISPATCH_GL(jit_base)(DISPATCH)
2559 | call_intern lj_trace_exit // (jit_State *J, ExitState *ex)
2560 |. move CARG2, sp
2561 | // Returns MULTRES (unscaled) or negated error code.
2562 | ld TMP1, L->cframe
2563 | li AT, -4
2564 | ld BASE, L->base
2565 | and sp, TMP1, AT
2566 | ld PC, SAVE_PC // Get SAVE_PC.
2567 | b >1
2568 |. sd L, SAVE_L // Set SAVE_L (on-trace resume/yield).
2569 |.endif
2570 |->vm_exit_interp:
2571 |.if JIT
2572 | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
2573 | ld L, SAVE_L
2574 | daddiu DISPATCH, JGL, -GG_DISP2G-32768
2575 | sd BASE, L->base
2576 |1:
2577 | bltz CRET1, >9 // Check for error from exit.
2578 |. ld LFUNC:RB, FRAME_FUNC(BASE)
2579 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2580 | dsll MULTRES, CRET1, 3
2581 | cleartp LFUNC:RB
2582 | sw MULTRES, SAVE_MULTRES
2583 | li TISNIL, LJ_TNIL
2584 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2585 | .FPU mtc1 TMP3, TOBIT
2586 | ld TMP1, LFUNC:RB->pc
2587 | sd r0, DISPATCH_GL(jit_base)(DISPATCH)
2588 | ld KBASE, PC2PROTO(k)(TMP1)
2589 | .FPU cvt.d.s TOBIT, TOBIT
2590 | // Modified copy of ins_next which handles function header dispatch, too.
2591 | lw INS, 0(PC)
2592 | daddiu PC, PC, 4
2593 | // Assumes TISNIL == ~LJ_VMST_INTERP == -1
2594 | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
2595 | decode_OP8a TMP1, INS
2596 | decode_OP8b TMP1
2597 | sltiu TMP2, TMP1, BC_FUNCF*8
2598 | daddu TMP0, DISPATCH, TMP1
2599 | decode_RD8a RD, INS
2600 | ld AT, 0(TMP0)
2601 | decode_RA8a RA, INS
2602 | beqz TMP2, >2
2603 |. decode_RA8b RA
2604 | jr AT
2605 |. decode_RD8b RD
2606 |2:
2607 | sltiu TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function?
2608 | bnez TMP2, >3
2609 |. ld TMP1, FRAME_PC(BASE)
2610 | // Check frame below fast function.
2611 | andi TMP0, TMP1, FRAME_TYPE
2612 | bnez TMP0, >3 // Trace stitching continuation?
2613 |. nop
2614 | // Otherwise set KBASE for Lua function below fast function.
2615 | lw TMP2, -4(TMP1)
2616 | decode_RA8a TMP0, TMP2
2617 | decode_RA8b TMP0
2618 | dsubu TMP1, BASE, TMP0
2619 | ld LFUNC:TMP2, -32(TMP1)
2620 | cleartp LFUNC:TMP2
2621 | ld TMP1, LFUNC:TMP2->pc
2622 | ld KBASE, PC2PROTO(k)(TMP1)
2623 |3:
2624 | daddiu RC, MULTRES, -8
2625 | jr AT
2626 |. daddu RA, RA, BASE
2627 |
2628 |9: // Rethrow error from the right C frame.
2629 | load_got lj_err_trace
2630 | sub CARG2, r0, CRET1
2631 | call_intern lj_err_trace // (lua_State *L, int errcode)
2632 |. move CARG1, L
2633 |.endif
2634 |
2635 |//-----------------------------------------------------------------------
2636 |//-- Math helper functions ----------------------------------------------
2637 |//-----------------------------------------------------------------------
2638 |
2639 |// Hard-float round to integer.
2640 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1.
2641 |// MIPSR6: Modifies FTMP1, too.
2642 |.macro vm_round_hf, func
2643 | lui TMP0, 0x4330 // Hiword of 2^52 (double).
2644 | dsll TMP0, TMP0, 32
2645 | dmtc1 TMP0, f4
2646 | abs.d FRET2, FARG1 // |x|
2647 | dmfc1 AT, FARG1
2648 |.if MIPSR6
2649 | cmp.lt.d FTMP1, FRET2, f4
2650 | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52
2651 | bc1eqz FTMP1, >1 // Truncate only if |x| < 2^52.
2652 |.else
2653 | c.olt.d 0, FRET2, f4
2654 | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52
2655 | bc1f 0, >1 // Truncate only if |x| < 2^52.
2656 |.endif
2657 |. sub.d FRET1, FRET1, f4
2658 | slt AT, AT, r0
2659 |.if "func" == "ceil"
2660 | lui TMP0, 0xbff0 // Hiword of -1 (double). Preserves -0.
2661 |.else
2662 | lui TMP0, 0x3ff0 // Hiword of +1 (double).
2663 |.endif
2664 |.if "func" == "trunc"
2665 | dsll TMP0, TMP0, 32
2666 | dmtc1 TMP0, f4
2667 |.if MIPSR6
2668 | cmp.lt.d FTMP1, FRET2, FRET1 // |x| < result?
2669 | sub.d FRET2, FRET1, f4
2670 | sel.d FTMP1, FRET1, FRET2 // If yes, subtract +1.
2671 | dmtc1 AT, FRET1
2672 | neg.d FRET2, FTMP1
2673 | jr ra
2674 |. sel.d FRET1, FTMP1, FRET2 // Merge sign bit back in.
2675 |.else
2676 | c.olt.d 0, FRET2, FRET1 // |x| < result?
2677 | sub.d FRET2, FRET1, f4
2678 | movt.d FRET1, FRET2, 0 // If yes, subtract +1.
2679 | neg.d FRET2, FRET1
2680 | jr ra
2681 |. movn.d FRET1, FRET2, AT // Merge sign bit back in.
2682 |.endif
2683 |.else
2684 | neg.d FRET2, FRET1
2685 | dsll TMP0, TMP0, 32
2686 | dmtc1 TMP0, f4
2687 |.if MIPSR6
2688 | dmtc1 AT, FTMP1
2689 | sel.d FTMP1, FRET1, FRET2
2690 |.if "func" == "ceil"
2691 | cmp.lt.d FRET1, FTMP1, FARG1 // x > result?
2692 |.else
2693 | cmp.lt.d FRET1, FARG1, FTMP1 // x < result?
2694 |.endif
2695 | sub.d FRET2, FTMP1, f4 // If yes, subtract +-1.
2696 | jr ra
2697 |. sel.d FRET1, FTMP1, FRET2
2698 |.else
2699 | movn.d FRET1, FRET2, AT // Merge sign bit back in.
2700 |.if "func" == "ceil"
2701 | c.olt.d 0, FRET1, FARG1 // x > result?
2702 |.else
2703 | c.olt.d 0, FARG1, FRET1 // x < result?
2704 |.endif
2705 | sub.d FRET2, FRET1, f4 // If yes, subtract +-1.
2706 | jr ra
2707 |. movt.d FRET1, FRET2, 0
2708 |.endif
2709 |.endif
2710 |1:
2711 | jr ra
2712 |. mov.d FRET1, FARG1
2713 |.endmacro
2714 |
2715 |.macro vm_round, func
2716 |.if FPU
2717 | vm_round_hf, func
2718 |.endif
2719 |.endmacro
2720 |
2721 |->vm_floor:
2722 | vm_round floor
2723 |->vm_ceil:
2724 | vm_round ceil
2725 |->vm_trunc:
2726 |.if JIT
2727 | vm_round trunc
2728 |.endif
2729 |
2730 |// Soft-float integer to number conversion.
2731 |.macro sfi2d, ARG
2732 |.if not FPU
2733 | beqz ARG, >9 // Handle zero first.
2734 |. sra TMP0, ARG, 31
2735 | xor TMP1, ARG, TMP0
2736 | dsubu TMP1, TMP1, TMP0 // Absolute value in TMP1.
2737 | dclz ARG, TMP1
2738 | addiu ARG, ARG, -11
2739 | li AT, 0x3ff+63-11-1
2740 | dsllv TMP1, TMP1, ARG // Align mantissa left with leading 1.
2741 | subu ARG, AT, ARG // Exponent - 1.
2742 | ins ARG, TMP0, 11, 11 // Sign | Exponent.
2743 | dsll ARG, ARG, 52 // Align left.
2744 | jr ra
2745 |. daddu ARG, ARG, TMP1 // Add mantissa, increment exponent.
2746 |9:
2747 | jr ra
2748 |. nop
2749 |.endif
2750 |.endmacro
2751 |
2752 |// Input CARG1. Output: CARG1. Temporaries: AT, TMP0, TMP1.
2753 |->vm_sfi2d_1:
2754 | sfi2d CARG1
2755 |
2756 |// Input CARG2. Output: CARG2. Temporaries: AT, TMP0, TMP1.
2757 |->vm_sfi2d_2:
2758 | sfi2d CARG2
2759 |
2760 |// Soft-float comparison. Equivalent to c.eq.d.
2761 |// Input: CARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2762 |->vm_sfcmpeq:
2763 |.if not FPU
2764 | dsll AT, CARG1, 1
2765 | dsll TMP0, CARG2, 1
2766 | or TMP1, AT, TMP0
2767 | beqz TMP1, >8 // Both args +-0: return 1.
2768 |. lui TMP1, 0xffe0
2769 | dsll TMP1, TMP1, 32
2770 | sltu AT, TMP1, AT
2771 | sltu TMP0, TMP1, TMP0
2772 | or TMP1, AT, TMP0
2773 | bnez TMP1, >9 // Either arg is NaN: return 0;
2774 |. xor AT, CARG1, CARG2
2775 | jr ra
2776 |. sltiu CRET1, AT, 1 // Same values: return 1.
2777 |8:
2778 | jr ra
2779 |. li CRET1, 1
2780 |9:
2781 | jr ra
2782 |. li CRET1, 0
2783 |.endif
2784 |
2785 |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d.
2786 |// Input: CARG1, CARG2. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2.
2787 |->vm_sfcmpult:
2788 |.if not FPU
2789 | b >1
2790 |. li CRET2, 1
2791 |.endif
2792 |
2793 |->vm_sfcmpolt:
2794 |.if not FPU
2795 | li CRET2, 0
2796 |1:
2797 | dsll AT, CARG1, 1
2798 | dsll TMP0, CARG2, 1
2799 | or TMP1, AT, TMP0
2800 | beqz TMP1, >8 // Both args +-0: return 0.
2801 |. lui TMP1, 0xffe0
2802 | dsll TMP1, TMP1, 32
2803 | sltu AT, TMP1, AT
2804 | sltu TMP0, TMP1, TMP0
2805 | or TMP1, AT, TMP0
2806 | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
2807 |. and AT, CARG1, CARG2
2808 | bltz AT, >5 // Both args negative?
2809 |. nop
2810 | jr ra
2811 |. slt CRET1, CARG1, CARG2
2812 |5: // Swap conditions if both operands are negative.
2813 | jr ra
2814 |. slt CRET1, CARG2, CARG1
2815 |8:
2816 | jr ra
2817 |. li CRET1, 0
2818 |9:
2819 | jr ra
2820 |. move CRET1, CRET2
2821 |.endif
2822 |
2823 |->vm_sfcmpogt:
2824 |.if not FPU
2825 | dsll AT, CARG2, 1
2826 | dsll TMP0, CARG1, 1
2827 | or TMP1, AT, TMP0
2828 | beqz TMP1, >8 // Both args +-0: return 0.
2829 |. lui TMP1, 0xffe0
2830 | dsll TMP1, TMP1, 32
2831 | sltu AT, TMP1, AT
2832 | sltu TMP0, TMP1, TMP0
2833 | or TMP1, AT, TMP0
2834 | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
2835 |. and AT, CARG2, CARG1
2836 | bltz AT, >5 // Both args negative?
2837 |. nop
2838 | jr ra
2839 |. slt CRET1, CARG2, CARG1
2840 |5: // Swap conditions if both operands are negative.
2841 | jr ra
2842 |. slt CRET1, CARG1, CARG2
2843 |8:
2844 | jr ra
2845 |. li CRET1, 0
2846 |9:
2847 | jr ra
2848 |. li CRET1, 0
2849 |.endif
2850 |
2851 |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a.
2852 |// Input: CARG1, CARG2, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2853 |->vm_sfcmpolex:
2854 |.if not FPU
2855 | dsll AT, CARG1, 1
2856 | dsll TMP0, CARG2, 1
2857 | or TMP1, AT, TMP0
2858 | beqz TMP1, >8 // Both args +-0: return 1.
2859 |. lui TMP1, 0xffe0
2860 | dsll TMP1, TMP1, 32
2861 | sltu AT, TMP1, AT
2862 | sltu TMP0, TMP1, TMP0
2863 | or TMP1, AT, TMP0
2864 | bnez TMP1, >9 // Either arg is NaN: return 0;
2865 |. and AT, CARG1, CARG2
2866 | xor AT, AT, TMP3
2867 | bltz AT, >5 // Both args negative?
2868 |. nop
2869 | jr ra
2870 |. slt CRET1, CARG2, CARG1
2871 |5: // Swap conditions if both operands are negative.
2872 | jr ra
2873 |. slt CRET1, CARG1, CARG2
2874 |8:
2875 | jr ra
2876 |. li CRET1, 1
2877 |9:
2878 | jr ra
2879 |. li CRET1, 0
2880 |.endif
2881 |
2882 |.macro sfmin_max, name, fpcall
2883 |->vm_sf .. name:
2884 |.if JIT and not FPU
2885 | move TMP2, ra
2886 | bal ->fpcall
2887 |. nop
2888 | move ra, TMP2
2889 | move TMP0, CRET1
2890 | move CRET1, CARG1
2891 |.if MIPSR6
2892 | selnez CRET1, CRET1, TMP0
2893 | seleqz TMP0, CARG2, TMP0
2894 | jr ra
2895 |. or CRET1, CRET1, TMP0
2896 |.else
2897 | jr ra
2898 |. movz CRET1, CARG2, TMP0
2899 |.endif
2900 |.endif
2901 |.endmacro
2902 |
2903 | sfmin_max min, vm_sfcmpolt
2904 | sfmin_max max, vm_sfcmpogt
2905 |
2906 |//-----------------------------------------------------------------------
2907 |//-- Miscellaneous functions --------------------------------------------
2908 |//-----------------------------------------------------------------------
2909 |
2910 |//-----------------------------------------------------------------------
2911 |//-- FFI helper functions -----------------------------------------------
2912 |//-----------------------------------------------------------------------
2913 |
2914 |// Handler for callback functions. Callback slot number in r1, g in r2.
2915 |->vm_ffi_callback:
2916 |.if FFI
2917 |.type CTSTATE, CTState, PC
2918 | saveregs
2919 | ld CTSTATE, GL:r2->ctype_state
2920 | daddiu DISPATCH, r2, GG_G2DISP
2921 | load_got lj_ccallback_enter
2922 | sw r1, CTSTATE->cb.slot
2923 | sd CARG1, CTSTATE->cb.gpr[0]
2924 | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0]
2925 | sd CARG2, CTSTATE->cb.gpr[1]
2926 | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1]
2927 | sd CARG3, CTSTATE->cb.gpr[2]
2928 | .FPU sdc1 FARG3, CTSTATE->cb.fpr[2]
2929 | sd CARG4, CTSTATE->cb.gpr[3]
2930 | .FPU sdc1 FARG4, CTSTATE->cb.fpr[3]
2931 | sd CARG5, CTSTATE->cb.gpr[4]
2932 | .FPU sdc1 FARG5, CTSTATE->cb.fpr[4]
2933 | sd CARG6, CTSTATE->cb.gpr[5]
2934 | .FPU sdc1 FARG6, CTSTATE->cb.fpr[5]
2935 | sd CARG7, CTSTATE->cb.gpr[6]
2936 | .FPU sdc1 FARG7, CTSTATE->cb.fpr[6]
2937 | sd CARG8, CTSTATE->cb.gpr[7]
2938 | .FPU sdc1 FARG8, CTSTATE->cb.fpr[7]
2939 | daddiu TMP0, sp, CFRAME_SPACE
2940 | sd TMP0, CTSTATE->cb.stack
2941 | sd r0, SAVE_PC // Any value outside of bytecode is ok.
2942 | move CARG2, sp
2943 | call_intern lj_ccallback_enter // (CTState *cts, void *cf)
2944 |. move CARG1, CTSTATE
2945 | // Returns lua_State *.
2946 | ld BASE, L:CRET1->base
2947 | ld RC, L:CRET1->top
2948 | move L, CRET1
2949 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2950 | ld LFUNC:RB, FRAME_FUNC(BASE)
2951 | .FPU mtc1 TMP3, TOBIT
2952 | li TISNIL, LJ_TNIL
2953 | li TISNUM, LJ_TISNUM
2954 | li_vmstate INTERP
2955 | subu RC, RC, BASE
2956 | cleartp LFUNC:RB
2957 | st_vmstate
2958 | .FPU cvt.d.s TOBIT, TOBIT
2959 | ins_callt
2960 |.endif
2961 |
2962 |->cont_ffi_callback: // Return from FFI callback.
2963 |.if FFI
2964 | load_got lj_ccallback_leave
2965 | ld CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH)
2966 | sd BASE, L->base
2967 | sd RB, L->top
2968 | sd L, CTSTATE->L
2969 | move CARG2, RA
2970 | call_intern lj_ccallback_leave // (CTState *cts, TValue *o)
2971 |. move CARG1, CTSTATE
2972 | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0]
2973 | ld CRET1, CTSTATE->cb.gpr[0]
2974 | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1]
2975 | b ->vm_leave_unw
2976 |. ld CRET2, CTSTATE->cb.gpr[1]
2977 |.endif
2978 |
2979 |->vm_ffi_call: // Call C function via FFI.
2980 | // Caveat: needs special frame unwinding, see below.
2981 |.if FFI
2982 | .type CCSTATE, CCallState, CARG1
2983 | lw TMP1, CCSTATE->spadj
2984 | lbu CARG2, CCSTATE->nsp
2985 | move TMP2, sp
2986 | dsubu sp, sp, TMP1
2987 | sd ra, -8(TMP2)
2988 | sll CARG2, CARG2, 3
2989 | sd r16, -16(TMP2)
2990 | sd CCSTATE, -24(TMP2)
2991 | move r16, TMP2
2992 | daddiu TMP1, CCSTATE, offsetof(CCallState, stack)
2993 | move TMP2, sp
2994 | beqz CARG2, >2
2995 |. daddu TMP3, TMP1, CARG2
2996 |1:
2997 | ld TMP0, 0(TMP1)
2998 | daddiu TMP1, TMP1, 8
2999 | sltu AT, TMP1, TMP3
3000 | sd TMP0, 0(TMP2)
3001 | bnez AT, <1
3002 |. daddiu TMP2, TMP2, 8
3003 |2:
3004 | ld CFUNCADDR, CCSTATE->func
3005 | .FPU ldc1 FARG1, CCSTATE->gpr[0]
3006 | ld CARG2, CCSTATE->gpr[1]
3007 | .FPU ldc1 FARG2, CCSTATE->gpr[1]
3008 | ld CARG3, CCSTATE->gpr[2]
3009 | .FPU ldc1 FARG3, CCSTATE->gpr[2]
3010 | ld CARG4, CCSTATE->gpr[3]
3011 | .FPU ldc1 FARG4, CCSTATE->gpr[3]
3012 | ld CARG5, CCSTATE->gpr[4]
3013 | .FPU ldc1 FARG5, CCSTATE->gpr[4]
3014 | ld CARG6, CCSTATE->gpr[5]
3015 | .FPU ldc1 FARG6, CCSTATE->gpr[5]
3016 | ld CARG7, CCSTATE->gpr[6]
3017 | .FPU ldc1 FARG7, CCSTATE->gpr[6]
3018 | ld CARG8, CCSTATE->gpr[7]
3019 | .FPU ldc1 FARG8, CCSTATE->gpr[7]
3020 | jalr CFUNCADDR
3021 |. ld CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
3022 | ld CCSTATE:TMP1, -24(r16)
3023 | ld TMP2, -16(r16)
3024 | ld ra, -8(r16)
3025 | sd CRET1, CCSTATE:TMP1->gpr[0]
3026 | sd CRET2, CCSTATE:TMP1->gpr[1]
3027 |.if FPU
3028 | sdc1 FRET1, CCSTATE:TMP1->fpr[0]
3029 | sdc1 FRET2, CCSTATE:TMP1->fpr[1]
3030 |.else
3031 | sd CARG1, CCSTATE:TMP1->gpr[2] // 2nd FP struct field for soft-float.
3032 |.endif
3033 | move sp, r16
3034 | jr ra
3035 |. move r16, TMP2
3036 |.endif
3037 |// Note: vm_ffi_call must be the last function in this object file!
3038 |
3039 |//-----------------------------------------------------------------------
3040}
3041
3042/* Generate the code for a single instruction. */
3043static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3044{
3045 int vk = 0;
3046 |=>defop:
3047
3048 switch (op) {
3049
3050 /* -- Comparison ops ---------------------------------------------------- */
3051
3052 /* Remember: all ops branch for a true comparison, fall through otherwise. */
3053
3054 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
3055 | // RA = src1*8, RD = src2*8, JMP with RD = target
3056 |.macro bc_comp, FRA, FRD, ARGRA, ARGRD, movop, fmovop, fcomp, sfcomp
3057 | daddu RA, BASE, RA
3058 | daddu RD, BASE, RD
3059 | ld ARGRA, 0(RA)
3060 | ld ARGRD, 0(RD)
3061 | lhu TMP2, OFS_RD(PC)
3062 | gettp CARG3, ARGRA
3063 | gettp CARG4, ARGRD
3064 | bne CARG3, TISNUM, >2
3065 |. daddiu PC, PC, 4
3066 | bne CARG4, TISNUM, >5
3067 |. decode_RD4b TMP2
3068 | sextw ARGRA, ARGRA
3069 | sextw ARGRD, ARGRD
3070 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3071 | slt AT, CARG1, CARG2
3072 | addu TMP2, TMP2, TMP3
3073 |.if MIPSR6
3074 | movop TMP2, TMP2, AT
3075 |.else
3076 | movop TMP2, r0, AT
3077 |.endif
3078 |1:
3079 | daddu PC, PC, TMP2
3080 | ins_next
3081 |
3082 |2: // RA is not an integer.
3083 | sltiu AT, CARG3, LJ_TISNUM
3084 | beqz AT, ->vmeta_comp
3085 |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3086 | sltiu AT, CARG4, LJ_TISNUM
3087 | beqz AT, >4
3088 |. decode_RD4b TMP2
3089 |.if FPU
3090 | ldc1 FRA, 0(RA)
3091 | ldc1 FRD, 0(RD)
3092 |.endif
3093 |3: // RA and RD are both numbers.
3094 |.if FPU
3095 |.if MIPSR6
3096 | fcomp FTMP0, FTMP0, FTMP2
3097 | addu TMP2, TMP2, TMP3
3098 | mfc1 TMP3, FTMP0
3099 | b <1
3100 |. fmovop TMP2, TMP2, TMP3
3101 |.else
3102 | fcomp FTMP0, FTMP2
3103 | addu TMP2, TMP2, TMP3
3104 | b <1
3105 |. fmovop TMP2, r0
3106 |.endif
3107 |.else
3108 | bal sfcomp
3109 |. addu TMP2, TMP2, TMP3
3110 | b <1
3111 |.if MIPSR6
3112 |. movop TMP2, TMP2, CRET1
3113 |.else
3114 |. movop TMP2, r0, CRET1
3115 |.endif
3116 |.endif
3117 |
3118 |4: // RA is a number, RD is not a number.
3119 | bne CARG4, TISNUM, ->vmeta_comp
3120 | // RA is a number, RD is an integer. Convert RD to a number.
3121 |.if FPU
3122 |. lwc1 FRD, LO(RD)
3123 | ldc1 FRA, 0(RA)
3124 | b <3
3125 |. cvt.d.w FRD, FRD
3126 |.else
3127 |.if "ARGRD" == "CARG1"
3128 |. sextw CARG1, CARG1
3129 | bal ->vm_sfi2d_1
3130 |. nop
3131 |.else
3132 |. sextw CARG2, CARG2
3133 | bal ->vm_sfi2d_2
3134 |. nop
3135 |.endif
3136 | b <3
3137 |. nop
3138 |.endif
3139 |
3140 |5: // RA is an integer, RD is not an integer
3141 | sltiu AT, CARG4, LJ_TISNUM
3142 | beqz AT, ->vmeta_comp
3143 |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3144 | // RA is an integer, RD is a number. Convert RA to a number.
3145 |.if FPU
3146 | lwc1 FRA, LO(RA)
3147 | ldc1 FRD, 0(RD)
3148 | b <3
3149 | cvt.d.w FRA, FRA
3150 |.else
3151 |.if "ARGRA" == "CARG1"
3152 | bal ->vm_sfi2d_1
3153 |. sextw CARG1, CARG1
3154 |.else
3155 | bal ->vm_sfi2d_2
3156 |. sextw CARG2, CARG2
3157 |.endif
3158 | b <3
3159 |. nop
3160 |.endif
3161 |.endmacro
3162 |
3163 |.if MIPSR6
3164 if (op == BC_ISLT) {
3165 | bc_comp FTMP0, FTMP2, CARG1, CARG2, selnez, selnez, cmp.lt.d, ->vm_sfcmpolt
3166 } else if (op == BC_ISGE) {
3167 | bc_comp FTMP0, FTMP2, CARG1, CARG2, seleqz, seleqz, cmp.lt.d, ->vm_sfcmpolt
3168 } else if (op == BC_ISLE) {
3169 | bc_comp FTMP2, FTMP0, CARG2, CARG1, seleqz, seleqz, cmp.ult.d, ->vm_sfcmpult
3170 } else {
3171 | bc_comp FTMP2, FTMP0, CARG2, CARG1, selnez, selnez, cmp.ult.d, ->vm_sfcmpult
3172 }
3173 |.else
3174 if (op == BC_ISLT) {
3175 | bc_comp FTMP0, FTMP2, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt
3176 } else if (op == BC_ISGE) {
3177 | bc_comp FTMP0, FTMP2, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt
3178 } else if (op == BC_ISLE) {
3179 | bc_comp FTMP2, FTMP0, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult
3180 } else {
3181 | bc_comp FTMP2, FTMP0, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult
3182 }
3183 |.endif
3184 break;
3185
3186 case BC_ISEQV: case BC_ISNEV:
3187 vk = op == BC_ISEQV;
3188 | // RA = src1*8, RD = src2*8, JMP with RD = target
3189 | daddu RA, BASE, RA
3190 | daddiu PC, PC, 4
3191 | daddu RD, BASE, RD
3192 | ld CARG1, 0(RA)
3193 | lhu TMP2, -4+OFS_RD(PC)
3194 | ld CARG2, 0(RD)
3195 | gettp CARG3, CARG1
3196 | gettp CARG4, CARG2
3197 | sltu AT, TISNUM, CARG3
3198 | sltu TMP1, TISNUM, CARG4
3199 | or AT, AT, TMP1
3200 if (vk) {
3201 | beqz AT, ->BC_ISEQN_Z
3202 } else {
3203 | beqz AT, ->BC_ISNEN_Z
3204 }
3205 | // Either or both types are not numbers.
3206 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3207 |.if FFI
3208 |. li AT, LJ_TCDATA
3209 | beq CARG3, AT, ->vmeta_equal_cd
3210 |.endif
3211 | decode_RD4b TMP2
3212 |.if FFI
3213 | beq CARG4, AT, ->vmeta_equal_cd
3214 |. nop
3215 |.endif
3216 | bne CARG1, CARG2, >2
3217 |. addu TMP2, TMP2, TMP3
3218 | // Tag and value are equal.
3219 if (vk) {
3220 |->BC_ISEQV_Z:
3221 | daddu PC, PC, TMP2
3222 }
3223 |1:
3224 | ins_next
3225 |
3226 |2: // Check if the tags are the same and it's a table or userdata.
3227 | xor AT, CARG3, CARG4 // Same type?
3228 | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata?
3229 |.if MIPSR6
3230 | seleqz TMP0, TMP0, AT
3231 |.else
3232 | movn TMP0, r0, AT
3233 |.endif
3234 if (vk) {
3235 | beqz TMP0, <1
3236 } else {
3237 | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction.
3238 }
3239 | // Different tables or userdatas. Need to check __eq metamethod.
3240 | // Field metatable must be at same offset for GCtab and GCudata!
3241 |. cleartp TAB:TMP1, CARG1
3242 | ld TAB:TMP3, TAB:TMP1->metatable
3243 if (vk) {
3244 | beqz TAB:TMP3, <1 // No metatable?
3245 |. nop
3246 | lbu TMP3, TAB:TMP3->nomm
3247 | andi TMP3, TMP3, 1<<MM_eq
3248 | bnez TMP3, >1 // Or 'no __eq' flag set?
3249 } else {
3250 | beqz TAB:TMP3,->BC_ISEQV_Z // No metatable?
3251 |. nop
3252 | lbu TMP3, TAB:TMP3->nomm
3253 | andi TMP3, TMP3, 1<<MM_eq
3254 | bnez TMP3, ->BC_ISEQV_Z // Or 'no __eq' flag set?
3255 }
3256 |. nop
3257 | b ->vmeta_equal // Handle __eq metamethod.
3258 |. li TMP0, 1-vk // ne = 0 or 1.
3259 break;
3260
3261 case BC_ISEQS: case BC_ISNES:
3262 vk = op == BC_ISEQS;
3263 | // RA = src*8, RD = str_const*8 (~), JMP with RD = target
3264 | daddu RA, BASE, RA
3265 | daddiu PC, PC, 4
3266 | ld CARG1, 0(RA)
3267 | dsubu RD, KBASE, RD
3268 | lhu TMP2, -4+OFS_RD(PC)
3269 | ld CARG2, -8(RD) // KBASE-8-str_const*8
3270 |.if FFI
3271 | gettp TMP0, CARG1
3272 | li AT, LJ_TCDATA
3273 |.endif
3274 | li TMP1, LJ_TSTR
3275 | decode_RD4b TMP2
3276 |.if FFI
3277 | beq TMP0, AT, ->vmeta_equal_cd
3278 |.endif
3279 |. settp CARG2, TMP1
3280 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3281 | xor TMP1, CARG1, CARG2
3282 | addu TMP2, TMP2, TMP3
3283 |.if MIPSR6
3284 if (vk) {
3285 | seleqz TMP2, TMP2, TMP1
3286 } else {
3287 | selnez TMP2, TMP2, TMP1
3288 }
3289 |.else
3290 if (vk) {
3291 | movn TMP2, r0, TMP1
3292 } else {
3293 | movz TMP2, r0, TMP1
3294 }
3295 |.endif
3296 | daddu PC, PC, TMP2
3297 | ins_next
3298 break;
3299
3300 case BC_ISEQN: case BC_ISNEN:
3301 vk = op == BC_ISEQN;
3302 | // RA = src*8, RD = num_const*8, JMP with RD = target
3303 | daddu RA, BASE, RA
3304 | daddu RD, KBASE, RD
3305 | ld CARG1, 0(RA)
3306 | ld CARG2, 0(RD)
3307 | lhu TMP2, OFS_RD(PC)
3308 | gettp CARG3, CARG1
3309 | gettp CARG4, CARG2
3310 | daddiu PC, PC, 4
3311 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3312 if (vk) {
3313 |->BC_ISEQN_Z:
3314 } else {
3315 |->BC_ISNEN_Z:
3316 }
3317 | bne CARG3, TISNUM, >3
3318 |. decode_RD4b TMP2
3319 | bne CARG4, TISNUM, >6
3320 |. addu TMP2, TMP2, TMP3
3321 | xor AT, CARG1, CARG2
3322 |.if MIPSR6
3323 if (vk) {
3324 | seleqz TMP2, TMP2, AT
3325 |1:
3326 | daddu PC, PC, TMP2
3327 |2:
3328 } else {
3329 | selnez TMP2, TMP2, AT
3330 |1:
3331 |2:
3332 | daddu PC, PC, TMP2
3333 }
3334 |.else
3335 if (vk) {
3336 | movn TMP2, r0, AT
3337 |1:
3338 | daddu PC, PC, TMP2
3339 |2:
3340 } else {
3341 | movz TMP2, r0, AT
3342 |1:
3343 |2:
3344 | daddu PC, PC, TMP2
3345 }
3346 |.endif
3347 | ins_next
3348 |
3349 |3: // RA is not an integer.
3350 | sltu AT, CARG3, TISNUM
3351 |.if FFI
3352 | beqz AT, >8
3353 |.else
3354 | beqz AT, <2
3355 |.endif
3356 |. addu TMP2, TMP2, TMP3
3357 | sltu AT, CARG4, TISNUM
3358 |.if FPU
3359 | ldc1 FTMP0, 0(RA)
3360 | ldc1 FTMP2, 0(RD)
3361 |.endif
3362 | beqz AT, >5
3363 |. nop
3364 |4: // RA and RD are both numbers.
3365 |.if FPU
3366 |.if MIPSR6
3367 | cmp.eq.d FTMP0, FTMP0, FTMP2
3368 | dmfc1 TMP1, FTMP0
3369 | b <1
3370 if (vk) {
3371 |. selnez TMP2, TMP2, TMP1
3372 } else {
3373 |. seleqz TMP2, TMP2, TMP1
3374 }
3375 |.else
3376 | c.eq.d FTMP0, FTMP2
3377 | b <1
3378 if (vk) {
3379 |. movf TMP2, r0
3380 } else {
3381 |. movt TMP2, r0
3382 }
3383 |.endif
3384 |.else
3385 | bal ->vm_sfcmpeq
3386 |. nop
3387 | b <1
3388 |.if MIPSR6
3389 if (vk) {
3390 |. selnez TMP2, TMP2, CRET1
3391 } else {
3392 |. seleqz TMP2, TMP2, CRET1
3393 }
3394 |.else
3395 if (vk) {
3396 |. movz TMP2, r0, CRET1
3397 } else {
3398 |. movn TMP2, r0, CRET1
3399 }
3400 |.endif
3401 |.endif
3402 |
3403 |5: // RA is a number, RD is not a number.
3404 |.if FFI
3405 | bne CARG4, TISNUM, >9
3406 |.else
3407 | bne CARG4, TISNUM, <2
3408 |.endif
3409 | // RA is a number, RD is an integer. Convert RD to a number.
3410 |.if FPU
3411 |. lwc1 FTMP2, LO(RD)
3412 | b <4
3413 |. cvt.d.w FTMP2, FTMP2
3414 |.else
3415 |. sextw CARG2, CARG2
3416 | bal ->vm_sfi2d_2
3417 |. nop
3418 | b <4
3419 |. nop
3420 |.endif
3421 |
3422 |6: // RA is an integer, RD is not an integer
3423 | sltu AT, CARG4, TISNUM
3424 |.if FFI
3425 | beqz AT, >9
3426 |.else
3427 | beqz AT, <2
3428 |.endif
3429 | // RA is an integer, RD is a number. Convert RA to a number.
3430 |.if FPU
3431 |. lwc1 FTMP0, LO(RA)
3432 | ldc1 FTMP2, 0(RD)
3433 | b <4
3434 | cvt.d.w FTMP0, FTMP0
3435 |.else
3436 |. sextw CARG1, CARG1
3437 | bal ->vm_sfi2d_1
3438 |. nop
3439 | b <4
3440 |. nop
3441 |.endif
3442 |
3443 |.if FFI
3444 |8:
3445 | li AT, LJ_TCDATA
3446 | bne CARG3, AT, <2
3447 |. nop
3448 | b ->vmeta_equal_cd
3449 |. nop
3450 |9:
3451 | li AT, LJ_TCDATA
3452 | bne CARG4, AT, <2
3453 |. nop
3454 | b ->vmeta_equal_cd
3455 |. nop
3456 |.endif
3457 break;
3458
3459 case BC_ISEQP: case BC_ISNEP:
3460 vk = op == BC_ISEQP;
3461 | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
3462 | daddu RA, BASE, RA
3463 | srl TMP1, RD, 3
3464 | ld TMP0, 0(RA)
3465 | lhu TMP2, OFS_RD(PC)
3466 | not TMP1, TMP1
3467 | gettp TMP0, TMP0
3468 | daddiu PC, PC, 4
3469 |.if FFI
3470 | li AT, LJ_TCDATA
3471 | beq TMP0, AT, ->vmeta_equal_cd
3472 |.endif
3473 |. xor TMP0, TMP0, TMP1
3474 | decode_RD4b TMP2
3475 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3476 | addu TMP2, TMP2, TMP3
3477 |.if MIPSR6
3478 if (vk) {
3479 | seleqz TMP2, TMP2, TMP0
3480 } else {
3481 | selnez TMP2, TMP2, TMP0
3482 }
3483 |.else
3484 if (vk) {
3485 | movn TMP2, r0, TMP0
3486 } else {
3487 | movz TMP2, r0, TMP0
3488 }
3489 |.endif
3490 | daddu PC, PC, TMP2
3491 | ins_next
3492 break;
3493
3494 /* -- Unary test and copy ops ------------------------------------------- */
3495
3496 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
3497 | // RA = dst*8 or unused, RD = src*8, JMP with RD = target
3498 | daddu RD, BASE, RD
3499 | lhu TMP2, OFS_RD(PC)
3500 | ld TMP0, 0(RD)
3501 | daddiu PC, PC, 4
3502 | gettp TMP0, TMP0
3503 | sltiu TMP0, TMP0, LJ_TISTRUECOND
3504 if (op == BC_IST || op == BC_ISF) {
3505 | decode_RD4b TMP2
3506 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3507 | addu TMP2, TMP2, TMP3
3508 |.if MIPSR6
3509 if (op == BC_IST) {
3510 | selnez TMP2, TMP2, TMP0;
3511 } else {
3512 | seleqz TMP2, TMP2, TMP0;
3513 }
3514 |.else
3515 if (op == BC_IST) {
3516 | movz TMP2, r0, TMP0
3517 } else {
3518 | movn TMP2, r0, TMP0
3519 }
3520 |.endif
3521 | daddu PC, PC, TMP2
3522 } else {
3523 | ld CRET1, 0(RD)
3524 if (op == BC_ISTC) {
3525 | beqz TMP0, >1
3526 } else {
3527 | bnez TMP0, >1
3528 }
3529 |. daddu RA, BASE, RA
3530 | decode_RD4b TMP2
3531 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3532 | addu TMP2, TMP2, TMP3
3533 | sd CRET1, 0(RA)
3534 | daddu PC, PC, TMP2
3535 |1:
3536 }
3537 | ins_next
3538 break;
3539
3540 case BC_ISTYPE:
3541 | // RA = src*8, RD = -type*8
3542 | daddu TMP2, BASE, RA
3543 | srl TMP1, RD, 3
3544 | ld TMP0, 0(TMP2)
3545 | ins_next1
3546 | gettp TMP0, TMP0
3547 | daddu AT, TMP0, TMP1
3548 | bnez AT, ->vmeta_istype
3549 |. ins_next2
3550 break;
3551 case BC_ISNUM:
3552 | // RA = src*8, RD = -(TISNUM-1)*8
3553 | daddu TMP2, BASE, RA
3554 | ld TMP0, 0(TMP2)
3555 | ins_next1
3556 | checknum TMP0, ->vmeta_istype
3557 |. ins_next2
3558 break;
3559
3560 /* -- Unary ops --------------------------------------------------------- */
3561
3562 case BC_MOV:
3563 | // RA = dst*8, RD = src*8
3564 | daddu RD, BASE, RD
3565 | daddu RA, BASE, RA
3566 | ld CRET1, 0(RD)
3567 | ins_next1
3568 | sd CRET1, 0(RA)
3569 | ins_next2
3570 break;
3571 case BC_NOT:
3572 | // RA = dst*8, RD = src*8
3573 | daddu RD, BASE, RD
3574 | daddu RA, BASE, RA
3575 | ld TMP0, 0(RD)
3576 | li AT, LJ_TTRUE
3577 | gettp TMP0, TMP0
3578 | sltu TMP0, AT, TMP0
3579 | addiu TMP0, TMP0, 1
3580 | dsll TMP0, TMP0, 47
3581 | not TMP0, TMP0
3582 | ins_next1
3583 | sd TMP0, 0(RA)
3584 | ins_next2
3585 break;
3586 case BC_UNM:
3587 | // RA = dst*8, RD = src*8
3588 | daddu RB, BASE, RD
3589 | ld CARG1, 0(RB)
3590 | daddu RA, BASE, RA
3591 | gettp CARG3, CARG1
3592 | bne CARG3, TISNUM, >2
3593 |. lui TMP1, 0x8000
3594 | sextw CARG1, CARG1
3595 | beq CARG1, TMP1, ->vmeta_unm // Meta handler deals with -2^31.
3596 |. negu CARG1, CARG1
3597 | zextw CARG1, CARG1
3598 | settp CARG1, TISNUM
3599 |1:
3600 | ins_next1
3601 | sd CARG1, 0(RA)
3602 | ins_next2
3603 |2:
3604 | sltiu AT, CARG3, LJ_TISNUM
3605 | beqz AT, ->vmeta_unm
3606 |. dsll TMP1, TMP1, 32
3607 | b <1
3608 |. xor CARG1, CARG1, TMP1
3609 break;
3610 case BC_LEN:
3611 | // RA = dst*8, RD = src*8
3612 | daddu CARG2, BASE, RD
3613 | daddu RA, BASE, RA
3614 | ld TMP0, 0(CARG2)
3615 | gettp TMP1, TMP0
3616 | daddiu AT, TMP1, -LJ_TSTR
3617 | bnez AT, >2
3618 |. cleartp STR:CARG1, TMP0
3619 | lw CRET1, STR:CARG1->len
3620 |1:
3621 | settp CRET1, TISNUM
3622 | ins_next1
3623 | sd CRET1, 0(RA)
3624 | ins_next2
3625 |2:
3626 | daddiu AT, TMP1, -LJ_TTAB
3627 | bnez AT, ->vmeta_len
3628 |. nop
3629#if LJ_52
3630 | ld TAB:TMP2, TAB:CARG1->metatable
3631 | bnez TAB:TMP2, >9
3632 |. nop
3633 |3:
3634#endif
3635 |->BC_LEN_Z:
3636 | load_got lj_tab_len
3637 | call_intern lj_tab_len // (GCtab *t)
3638 |. nop
3639 | // Returns uint32_t (but less than 2^31).
3640 | b <1
3641 |. nop
3642#if LJ_52
3643 |9:
3644 | lbu TMP0, TAB:TMP2->nomm
3645 | andi TMP0, TMP0, 1<<MM_len
3646 | bnez TMP0, <3 // 'no __len' flag set: done.
3647 |. nop
3648 | b ->vmeta_len
3649 |. nop
3650#endif
3651 break;
3652
3653 /* -- Binary ops -------------------------------------------------------- */
3654
3655 |.macro fpmod, a, b, c
3656 | bal ->vm_floor // floor(b/c)
3657 |. div.d FARG1, b, c
3658 | mul.d a, FRET1, c
3659 | sub.d a, b, a // b - floor(b/c)*c
3660 |.endmacro
3661
3662 |.macro sfpmod
3663 | daddiu sp, sp, -16
3664 |
3665 | load_got __divdf3
3666 | sd CARG1, 0(sp)
3667 | call_extern
3668 |. sd CARG2, 8(sp)
3669 |
3670 | load_got floor
3671 | call_extern
3672 |. move CARG1, CRET1
3673 |
3674 | load_got __muldf3
3675 | move CARG1, CRET1
3676 | call_extern
3677 |. ld CARG2, 8(sp)
3678 |
3679 | load_got __subdf3
3680 | ld CARG1, 0(sp)
3681 | call_extern
3682 |. move CARG2, CRET1
3683 |
3684 | daddiu sp, sp, 16
3685 |.endmacro
3686
3687 |.macro ins_arithpre, label
3688 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3689 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
3690 ||switch (vk) {
3691 ||case 0:
3692 | decode_RB8a RB, INS
3693 | decode_RB8b RB
3694 | decode_RDtoRC8 RC, RD
3695 | // RA = dst*8, RB = src1*8, RC = num_const*8
3696 | daddu RB, BASE, RB
3697 |.if "label" ~= "none"
3698 | b label
3699 |.endif
3700 |. daddu RC, KBASE, RC
3701 || break;
3702 ||case 1:
3703 | decode_RB8a RC, INS
3704 | decode_RB8b RC
3705 | decode_RDtoRC8 RB, RD
3706 | // RA = dst*8, RB = num_const*8, RC = src1*8
3707 | daddu RC, BASE, RC
3708 |.if "label" ~= "none"
3709 | b label
3710 |.endif
3711 |. daddu RB, KBASE, RB
3712 || break;
3713 ||default:
3714 | decode_RB8a RB, INS
3715 | decode_RB8b RB
3716 | decode_RDtoRC8 RC, RD
3717 | // RA = dst*8, RB = src1*8, RC = src2*8
3718 | daddu RB, BASE, RB
3719 |.if "label" ~= "none"
3720 | b label
3721 |.endif
3722 |. daddu RC, BASE, RC
3723 || break;
3724 ||}
3725 |.endmacro
3726 |
3727 |.macro ins_arith, intins, fpins, fpcall, label
3728 | ins_arithpre none
3729 |
3730 |.if "label" ~= "none"
3731 |label:
3732 |.endif
3733 |
3734 |// Used in 5.
3735 | ld CARG1, 0(RB)
3736 | ld CARG2, 0(RC)
3737 | gettp TMP0, CARG1
3738 | gettp TMP1, CARG2
3739 |
3740 |.if "intins" ~= "div"
3741 |
3742 | // Check for two integers.
3743 | sextw CARG3, CARG1
3744 | bne TMP0, TISNUM, >5
3745 |. sextw CARG4, CARG2
3746 | bne TMP1, TISNUM, >5
3747 |
3748 |.if "intins" == "addu"
3749 |. intins CRET1, CARG3, CARG4
3750 | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow.
3751 | xor TMP2, CRET1, CARG4
3752 | and TMP1, TMP1, TMP2
3753 | bltz TMP1, ->vmeta_arith
3754 |. daddu RA, BASE, RA
3755 |.elif "intins" == "subu"
3756 |. intins CRET1, CARG3, CARG4
3757 | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow.
3758 | xor TMP2, CARG3, CARG4
3759 | and TMP1, TMP1, TMP2
3760 | bltz TMP1, ->vmeta_arith
3761 |. daddu RA, BASE, RA
3762 |.elif "intins" == "mult"
3763 |.if MIPSR6
3764 |. nop
3765 | mul CRET1, CARG3, CARG4
3766 | muh TMP2, CARG3, CARG4
3767 |.else
3768 |. intins CARG3, CARG4
3769 | mflo CRET1
3770 | mfhi TMP2
3771 |.endif
3772 | sra TMP1, CRET1, 31
3773 | bne TMP1, TMP2, ->vmeta_arith
3774 |. daddu RA, BASE, RA
3775 |.else
3776 |. load_got lj_vm_modi
3777 | beqz CARG4, ->vmeta_arith
3778 |. daddu RA, BASE, RA
3779 | move CARG1, CARG3
3780 | call_extern
3781 |. move CARG2, CARG4
3782 |.endif
3783 |
3784 | zextw CRET1, CRET1
3785 | settp CRET1, TISNUM
3786 | ins_next1
3787 | sd CRET1, 0(RA)
3788 |3:
3789 | ins_next2
3790 |
3791 |.endif
3792 |
3793 |5: // Check for two numbers.
3794 | .FPU ldc1 FTMP0, 0(RB)
3795 | sltu AT, TMP0, TISNUM
3796 | sltu TMP0, TMP1, TISNUM
3797 | .FPU ldc1 FTMP2, 0(RC)
3798 | and AT, AT, TMP0
3799 | beqz AT, ->vmeta_arith
3800 |. daddu RA, BASE, RA
3801 |
3802 |.if FPU
3803 | fpins FRET1, FTMP0, FTMP2
3804 |.elif "fpcall" == "sfpmod"
3805 | sfpmod
3806 |.else
3807 | load_got fpcall
3808 | call_extern
3809 |. nop
3810 |.endif
3811 |
3812 | ins_next1
3813 |.if "intins" ~= "div"
3814 | b <3
3815 |.endif
3816 |.if FPU
3817 |. sdc1 FRET1, 0(RA)
3818 |.else
3819 |. sd CRET1, 0(RA)
3820 |.endif
3821 |.if "intins" == "div"
3822 | ins_next2
3823 |.endif
3824 |
3825 |.endmacro
3826
3827 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
3828 | ins_arith addu, add.d, __adddf3, none
3829 break;
3830 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
3831 | ins_arith subu, sub.d, __subdf3, none
3832 break;
3833 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3834 | ins_arith mult, mul.d, __muldf3, none
3835 break;
3836 case BC_DIVVN:
3837 | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z
3838 break;
3839 case BC_DIVNV: case BC_DIVVV:
3840 | ins_arithpre ->BC_DIVVN_Z
3841 break;
3842 case BC_MODVN:
3843 | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z
3844 break;
3845 case BC_MODNV: case BC_MODVV:
3846 | ins_arithpre ->BC_MODVN_Z
3847 break;
3848 case BC_POW:
3849 | ins_arithpre none
3850 | ld CARG1, 0(RB)
3851 | ld CARG2, 0(RC)
3852 | gettp TMP0, CARG1
3853 | gettp TMP1, CARG2
3854 | sltiu TMP0, TMP0, LJ_TISNUM
3855 | sltiu TMP1, TMP1, LJ_TISNUM
3856 | and AT, TMP0, TMP1
3857 | load_got pow
3858 | beqz AT, ->vmeta_arith
3859 |. daddu RA, BASE, RA
3860 |.if FPU
3861 | ldc1 FARG1, 0(RB)
3862 | ldc1 FARG2, 0(RC)
3863 |.endif
3864 | call_extern
3865 |. nop
3866 | ins_next1
3867 |.if FPU
3868 | sdc1 FRET1, 0(RA)
3869 |.else
3870 | sd CRET1, 0(RA)
3871 |.endif
3872 | ins_next2
3873 break;
3874
3875 case BC_CAT:
3876 | // RA = dst*8, RB = src_start*8, RC = src_end*8
3877 | decode_RB8a RB, INS
3878 | decode_RB8b RB
3879 | decode_RDtoRC8 RC, RD
3880 | dsubu CARG3, RC, RB
3881 | sd BASE, L->base
3882 | daddu CARG2, BASE, RC
3883 | move MULTRES, RB
3884 |->BC_CAT_Z:
3885 | load_got lj_meta_cat
3886 | srl CARG3, CARG3, 3
3887 | sd PC, SAVE_PC
3888 | call_intern lj_meta_cat // (lua_State *L, TValue *top, int left)
3889 |. move CARG1, L
3890 | // Returns NULL (finished) or TValue * (metamethod).
3891 | bnez CRET1, ->vmeta_binop
3892 |. ld BASE, L->base
3893 | daddu RB, BASE, MULTRES
3894 | ld CRET1, 0(RB)
3895 | daddu RA, BASE, RA
3896 | ins_next1
3897 | sd CRET1, 0(RA)
3898 | ins_next2
3899 break;
3900
3901 /* -- Constant ops ------------------------------------------------------ */
3902
3903 case BC_KSTR:
3904 | // RA = dst*8, RD = str_const*8 (~)
3905 | dsubu TMP1, KBASE, RD
3906 | ins_next1
3907 | li TMP2, LJ_TSTR
3908 | ld TMP0, -8(TMP1) // KBASE-8-str_const*8
3909 | daddu RA, BASE, RA
3910 | settp TMP0, TMP2
3911 | sd TMP0, 0(RA)
3912 | ins_next2
3913 break;
3914 case BC_KCDATA:
3915 |.if FFI
3916 | // RA = dst*8, RD = cdata_const*8 (~)
3917 | dsubu TMP1, KBASE, RD
3918 | ins_next1
3919 | ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8
3920 | li TMP2, LJ_TCDATA
3921 | daddu RA, BASE, RA
3922 | settp TMP0, TMP2
3923 | sd TMP0, 0(RA)
3924 | ins_next2
3925 |.endif
3926 break;
3927 case BC_KSHORT:
3928 | // RA = dst*8, RD = int16_literal*8
3929 | sra RD, INS, 16
3930 | daddu RA, BASE, RA
3931 | zextw RD, RD
3932 | ins_next1
3933 | settp RD, TISNUM
3934 | sd RD, 0(RA)
3935 | ins_next2
3936 break;
3937 case BC_KNUM:
3938 | // RA = dst*8, RD = num_const*8
3939 | daddu RD, KBASE, RD
3940 | daddu RA, BASE, RA
3941 | ld CRET1, 0(RD)
3942 | ins_next1
3943 | sd CRET1, 0(RA)
3944 | ins_next2
3945 break;
3946 case BC_KPRI:
3947 | // RA = dst*8, RD = primitive_type*8 (~)
3948 | daddu RA, BASE, RA
3949 | dsll TMP0, RD, 44
3950 | not TMP0, TMP0
3951 | ins_next1
3952 | sd TMP0, 0(RA)
3953 | ins_next2
3954 break;
3955 case BC_KNIL:
3956 | // RA = base*8, RD = end*8
3957 | daddu RA, BASE, RA
3958 | sd TISNIL, 0(RA)
3959 | daddiu RA, RA, 8
3960 | daddu RD, BASE, RD
3961 |1:
3962 | sd TISNIL, 0(RA)
3963 | slt AT, RA, RD
3964 | bnez AT, <1
3965 |. daddiu RA, RA, 8
3966 | ins_next_
3967 break;
3968
3969 /* -- Upvalue and function ops ------------------------------------------ */
3970
3971 case BC_UGET:
3972 | // RA = dst*8, RD = uvnum*8
3973 | ld LFUNC:RB, FRAME_FUNC(BASE)
3974 | daddu RA, BASE, RA
3975 | cleartp LFUNC:RB
3976 | daddu RD, RD, LFUNC:RB
3977 | ld UPVAL:RB, LFUNC:RD->uvptr
3978 | ins_next1
3979 | ld TMP1, UPVAL:RB->v
3980 | ld CRET1, 0(TMP1)
3981 | sd CRET1, 0(RA)
3982 | ins_next2
3983 break;
3984 case BC_USETV:
3985 | // RA = uvnum*8, RD = src*8
3986 | ld LFUNC:RB, FRAME_FUNC(BASE)
3987 | daddu RD, BASE, RD
3988 | cleartp LFUNC:RB
3989 | daddu RA, RA, LFUNC:RB
3990 | ld UPVAL:RB, LFUNC:RA->uvptr
3991 | ld CRET1, 0(RD)
3992 | lbu TMP3, UPVAL:RB->marked
3993 | ld CARG2, UPVAL:RB->v
3994 | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
3995 | lbu TMP0, UPVAL:RB->closed
3996 | gettp TMP2, CRET1
3997 | sd CRET1, 0(CARG2)
3998 | li AT, LJ_GC_BLACK|1
3999 | or TMP3, TMP3, TMP0
4000 | beq TMP3, AT, >2 // Upvalue is closed and black?
4001 |. daddiu TMP2, TMP2, -(LJ_TNUMX+1)
4002 |1:
4003 | ins_next
4004 |
4005 |2: // Check if new value is collectable.
4006 | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
4007 | beqz AT, <1 // tvisgcv(v)
4008 |. cleartp GCOBJ:CRET1, CRET1
4009 | lbu TMP3, GCOBJ:CRET1->gch.marked
4010 | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
4011 | beqz TMP3, <1
4012 |. load_got lj_gc_barrieruv
4013 | // Crossed a write barrier. Move the barrier forward.
4014 | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv)
4015 |. daddiu CARG1, DISPATCH, GG_DISP2G
4016 | b <1
4017 |. nop
4018 break;
4019 case BC_USETS:
4020 | // RA = uvnum*8, RD = str_const*8 (~)
4021 | ld LFUNC:RB, FRAME_FUNC(BASE)
4022 | dsubu TMP1, KBASE, RD
4023 | cleartp LFUNC:RB
4024 | daddu RA, RA, LFUNC:RB
4025 | ld UPVAL:RB, LFUNC:RA->uvptr
4026 | ld STR:TMP1, -8(TMP1) // KBASE-8-str_const*8
4027 | lbu TMP2, UPVAL:RB->marked
4028 | ld CARG2, UPVAL:RB->v
4029 | lbu TMP3, STR:TMP1->marked
4030 | andi AT, TMP2, LJ_GC_BLACK // isblack(uv)
4031 | lbu TMP2, UPVAL:RB->closed
4032 | li TMP0, LJ_TSTR
4033 | settp TMP1, TMP0
4034 | bnez AT, >2
4035 |. sd TMP1, 0(CARG2)
4036 |1:
4037 | ins_next
4038 |
4039 |2: // Check if string is white and ensure upvalue is closed.
4040 | beqz TMP2, <1
4041 |. andi AT, TMP3, LJ_GC_WHITES // iswhite(str)
4042 | beqz AT, <1
4043 |. load_got lj_gc_barrieruv
4044 | // Crossed a write barrier. Move the barrier forward.
4045 | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv)
4046 |. daddiu CARG1, DISPATCH, GG_DISP2G
4047 | b <1
4048 |. nop
4049 break;
4050 case BC_USETN:
4051 | // RA = uvnum*8, RD = num_const*8
4052 | ld LFUNC:RB, FRAME_FUNC(BASE)
4053 | daddu RD, KBASE, RD
4054 | cleartp LFUNC:RB
4055 | daddu RA, RA, LFUNC:RB
4056 | ld UPVAL:RB, LFUNC:RA->uvptr
4057 | ld CRET1, 0(RD)
4058 | ld TMP1, UPVAL:RB->v
4059 | ins_next1
4060 | sd CRET1, 0(TMP1)
4061 | ins_next2
4062 break;
4063 case BC_USETP:
4064 | // RA = uvnum*8, RD = primitive_type*8 (~)
4065 | ld LFUNC:RB, FRAME_FUNC(BASE)
4066 | dsll TMP0, RD, 44
4067 | cleartp LFUNC:RB
4068 | daddu RA, RA, LFUNC:RB
4069 | not TMP0, TMP0
4070 | ld UPVAL:RB, LFUNC:RA->uvptr
4071 | ins_next1
4072 | ld TMP1, UPVAL:RB->v
4073 | sd TMP0, 0(TMP1)
4074 | ins_next2
4075 break;
4076
4077 case BC_UCLO:
4078 | // RA = level*8, RD = target
4079 | ld TMP2, L->openupval
4080 | branch_RD // Do this first since RD is not saved.
4081 | load_got lj_func_closeuv
4082 | sd BASE, L->base
4083 | beqz TMP2, >1
4084 |. move CARG1, L
4085 | call_intern lj_func_closeuv // (lua_State *L, TValue *level)
4086 |. daddu CARG2, BASE, RA
4087 | ld BASE, L->base
4088 |1:
4089 | ins_next
4090 break;
4091
4092 case BC_FNEW:
4093 | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype)
4094 | load_got lj_func_newL_gc
4095 | dsubu TMP1, KBASE, RD
4096 | ld CARG3, FRAME_FUNC(BASE)
4097 | ld CARG2, -8(TMP1) // KBASE-8-tab_const*8
4098 | sd BASE, L->base
4099 | sd PC, SAVE_PC
4100 | cleartp CARG3
4101 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
4102 | call_intern lj_func_newL_gc
4103 |. move CARG1, L
4104 | // Returns GCfuncL *.
4105 | li TMP0, LJ_TFUNC
4106 | ld BASE, L->base
4107 | ins_next1
4108 | settp CRET1, TMP0
4109 | daddu RA, BASE, RA
4110 | sd CRET1, 0(RA)
4111 | ins_next2
4112 break;
4113
4114 /* -- Table ops --------------------------------------------------------- */
4115
4116 case BC_TNEW:
4117 case BC_TDUP:
4118 | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~)
4119 | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH)
4120 | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
4121 | sd BASE, L->base
4122 | sd PC, SAVE_PC
4123 | sltu AT, TMP0, TMP1
4124 | beqz AT, >5
4125 |1:
4126 if (op == BC_TNEW) {
4127 | load_got lj_tab_new
4128 | srl CARG2, RD, 3
4129 | andi CARG2, CARG2, 0x7ff
4130 | li TMP0, 0x801
4131 | addiu AT, CARG2, -0x7ff
4132 | srl CARG3, RD, 14
4133 |.if MIPSR6
4134 | seleqz TMP0, TMP0, AT
4135 | selnez CARG2, CARG2, AT
4136 | or CARG2, CARG2, TMP0
4137 |.else
4138 | movz CARG2, TMP0, AT
4139 |.endif
4140 | // (lua_State *L, int32_t asize, uint32_t hbits)
4141 | call_intern lj_tab_new
4142 |. move CARG1, L
4143 | // Returns Table *.
4144 } else {
4145 | load_got lj_tab_dup
4146 | dsubu TMP1, KBASE, RD
4147 | move CARG1, L
4148 | call_intern lj_tab_dup // (lua_State *L, Table *kt)
4149 |. ld CARG2, -8(TMP1) // KBASE-8-str_const*8
4150 | // Returns Table *.
4151 }
4152 | li TMP0, LJ_TTAB
4153 | ld BASE, L->base
4154 | ins_next1
4155 | daddu RA, BASE, RA
4156 | settp CRET1, TMP0
4157 | sd CRET1, 0(RA)
4158 | ins_next2
4159 |5:
4160 | load_got lj_gc_step_fixtop
4161 | move MULTRES, RD
4162 | call_intern lj_gc_step_fixtop // (lua_State *L)
4163 |. move CARG1, L
4164 | b <1
4165 |. move RD, MULTRES
4166 break;
4167
4168 case BC_GGET:
4169 | // RA = dst*8, RD = str_const*8 (~)
4170 case BC_GSET:
4171 | // RA = src*8, RD = str_const*8 (~)
4172 | ld LFUNC:TMP2, FRAME_FUNC(BASE)
4173 | dsubu TMP1, KBASE, RD
4174 | ld STR:RC, -8(TMP1) // KBASE-8-str_const*8
4175 | cleartp LFUNC:TMP2
4176 | ld TAB:RB, LFUNC:TMP2->env
4177 if (op == BC_GGET) {
4178 | b ->BC_TGETS_Z
4179 } else {
4180 | b ->BC_TSETS_Z
4181 }
4182 |. daddu RA, BASE, RA
4183 break;
4184
4185 case BC_TGETV:
4186 | // RA = dst*8, RB = table*8, RC = key*8
4187 | decode_RB8a RB, INS
4188 | decode_RB8b RB
4189 | decode_RDtoRC8 RC, RD
4190 | daddu CARG2, BASE, RB
4191 | daddu CARG3, BASE, RC
4192 | ld TAB:RB, 0(CARG2)
4193 | ld TMP2, 0(CARG3)
4194 | daddu RA, BASE, RA
4195 | checktab TAB:RB, ->vmeta_tgetv
4196 | gettp TMP3, TMP2
4197 | bne TMP3, TISNUM, >5 // Integer key?
4198 |. lw TMP0, TAB:RB->asize
4199 | sextw TMP2, TMP2
4200 | ld TMP1, TAB:RB->array
4201 | sltu AT, TMP2, TMP0
4202 | sll TMP2, TMP2, 3
4203 | beqz AT, ->vmeta_tgetv // Integer key and in array part?
4204 |. daddu TMP2, TMP1, TMP2
4205 | ld AT, 0(TMP2)
4206 | beq AT, TISNIL, >2
4207 |. ld CRET1, 0(TMP2)
4208 |1:
4209 | ins_next1
4210 | sd CRET1, 0(RA)
4211 | ins_next2
4212 |
4213 |2: // Check for __index if table value is nil.
4214 | ld TAB:TMP2, TAB:RB->metatable
4215 | beqz TAB:TMP2, <1 // No metatable: done.
4216 |. nop
4217 | lbu TMP0, TAB:TMP2->nomm
4218 | andi TMP0, TMP0, 1<<MM_index
4219 | bnez TMP0, <1 // 'no __index' flag set: done.
4220 |. nop
4221 | b ->vmeta_tgetv
4222 |. nop
4223 |
4224 |5:
4225 | li AT, LJ_TSTR
4226 | bne TMP3, AT, ->vmeta_tgetv
4227 |. cleartp RC, TMP2
4228 | b ->BC_TGETS_Z // String key?
4229 |. nop
4230 break;
4231 case BC_TGETS:
4232 | // RA = dst*8, RB = table*8, RC = str_const*8 (~)
4233 | decode_RB8a RB, INS
4234 | decode_RB8b RB
4235 | decode_RC8a RC, INS
4236 | daddu CARG2, BASE, RB
4237 | decode_RC8b RC
4238 | ld TAB:RB, 0(CARG2)
4239 | dsubu CARG3, KBASE, RC
4240 | daddu RA, BASE, RA
4241 | ld STR:RC, -8(CARG3) // KBASE-8-str_const*8
4242 | checktab TAB:RB, ->vmeta_tgets1
4243 |->BC_TGETS_Z:
4244 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
4245 | lw TMP0, TAB:RB->hmask
4246 | lw TMP1, STR:RC->sid
4247 | ld NODE:TMP2, TAB:RB->node
4248 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
4249 | sll TMP0, TMP1, 5
4250 | sll TMP1, TMP1, 3
4251 | subu TMP1, TMP0, TMP1
4252 | li TMP3, LJ_TSTR
4253 | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
4254 | settp STR:RC, TMP3 // Tagged key to look for.
4255 |1:
4256 | ld CARG1, NODE:TMP2->key
4257 | ld CRET1, NODE:TMP2->val
4258 | ld NODE:TMP1, NODE:TMP2->next
4259 | bne CARG1, RC, >4
4260 |. ld TAB:TMP3, TAB:RB->metatable
4261 | beq CRET1, TISNIL, >5 // Key found, but nil value?
4262 |. nop
4263 |3:
4264 | ins_next1
4265 | sd CRET1, 0(RA)
4266 | ins_next2
4267 |
4268 |4: // Follow hash chain.
4269 | bnez NODE:TMP1, <1
4270 |. move NODE:TMP2, NODE:TMP1
4271 | // End of hash chain: key not found, nil result.
4272 |
4273 |5: // Check for __index if table value is nil.
4274 | beqz TAB:TMP3, <3 // No metatable: done.
4275 |. move CRET1, TISNIL
4276 | lbu TMP0, TAB:TMP3->nomm
4277 | andi TMP0, TMP0, 1<<MM_index
4278 | bnez TMP0, <3 // 'no __index' flag set: done.
4279 |. nop
4280 | b ->vmeta_tgets
4281 |. nop
4282 break;
4283 case BC_TGETB:
4284 | // RA = dst*8, RB = table*8, RC = index*8
4285 | decode_RB8a RB, INS
4286 | decode_RB8b RB
4287 | daddu CARG2, BASE, RB
4288 | decode_RDtoRC8 RC, RD
4289 | ld TAB:RB, 0(CARG2)
4290 | daddu RA, BASE, RA
4291 | srl TMP0, RC, 3
4292 | checktab TAB:RB, ->vmeta_tgetb
4293 | lw TMP1, TAB:RB->asize
4294 | ld TMP2, TAB:RB->array
4295 | sltu AT, TMP0, TMP1
4296 | beqz AT, ->vmeta_tgetb
4297 |. daddu RC, TMP2, RC
4298 | ld AT, 0(RC)
4299 | beq AT, TISNIL, >5
4300 |. ld CRET1, 0(RC)
4301 |1:
4302 | ins_next1
4303 | sd CRET1, 0(RA)
4304 | ins_next2
4305 |
4306 |5: // Check for __index if table value is nil.
4307 | ld TAB:TMP2, TAB:RB->metatable
4308 | beqz TAB:TMP2, <1 // No metatable: done.
4309 |. nop
4310 | lbu TMP1, TAB:TMP2->nomm
4311 | andi TMP1, TMP1, 1<<MM_index
4312 | bnez TMP1, <1 // 'no __index' flag set: done.
4313 |. nop
4314 | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2!
4315 |. nop
4316 break;
4317 case BC_TGETR:
4318 | // RA = dst*8, RB = table*8, RC = key*8
4319 | decode_RB8a RB, INS
4320 | decode_RB8b RB
4321 | decode_RDtoRC8 RC, RD
4322 | daddu RB, BASE, RB
4323 | daddu RC, BASE, RC
4324 | ld TAB:CARG1, 0(RB)
4325 | lw CARG2, LO(RC)
4326 | daddu RA, BASE, RA
4327 | cleartp TAB:CARG1
4328 | lw TMP0, TAB:CARG1->asize
4329 | ld TMP1, TAB:CARG1->array
4330 | sltu AT, CARG2, TMP0
4331 | sll TMP2, CARG2, 3
4332 | beqz AT, ->vmeta_tgetr // In array part?
4333 |. daddu CRET1, TMP1, TMP2
4334 | ld CARG2, 0(CRET1)
4335 |->BC_TGETR_Z:
4336 | ins_next1
4337 | sd CARG2, 0(RA)
4338 | ins_next2
4339 break;
4340
4341 case BC_TSETV:
4342 | // RA = src*8, RB = table*8, RC = key*8
4343 | decode_RB8a RB, INS
4344 | decode_RB8b RB
4345 | decode_RDtoRC8 RC, RD
4346 | daddu CARG2, BASE, RB
4347 | daddu CARG3, BASE, RC
4348 | ld RB, 0(CARG2)
4349 | ld TMP2, 0(CARG3)
4350 | daddu RA, BASE, RA
4351 | checktab RB, ->vmeta_tsetv
4352 | checkint TMP2, >5
4353 |. sextw RC, TMP2
4354 | lw TMP0, TAB:RB->asize
4355 | ld TMP1, TAB:RB->array
4356 | sltu AT, RC, TMP0
4357 | sll TMP2, RC, 3
4358 | beqz AT, ->vmeta_tsetv // Integer key and in array part?
4359 |. daddu TMP1, TMP1, TMP2
4360 | ld TMP0, 0(TMP1)
4361 | lbu TMP3, TAB:RB->marked
4362 | beq TMP0, TISNIL, >3
4363 |. ld CRET1, 0(RA)
4364 |1:
4365 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4366 | bnez AT, >7
4367 |. sd CRET1, 0(TMP1)
4368 |2:
4369 | ins_next
4370 |
4371 |3: // Check for __newindex if previous value is nil.
4372 | ld TAB:TMP2, TAB:RB->metatable
4373 | beqz TAB:TMP2, <1 // No metatable: done.
4374 |. nop
4375 | lbu TMP2, TAB:TMP2->nomm
4376 | andi TMP2, TMP2, 1<<MM_newindex
4377 | bnez TMP2, <1 // 'no __newindex' flag set: done.
4378 |. nop
4379 | b ->vmeta_tsetv
4380 |. nop
4381 |
4382 |5:
4383 | gettp AT, TMP2
4384 | daddiu AT, AT, -LJ_TSTR
4385 | bnez AT, ->vmeta_tsetv
4386 |. nop
4387 | b ->BC_TSETS_Z // String key?
4388 |. cleartp STR:RC, TMP2
4389 |
4390 |7: // Possible table write barrier for the value. Skip valiswhite check.
4391 | barrierback TAB:RB, TMP3, TMP0, <2
4392 break;
4393 case BC_TSETS:
4394 | // RA = src*8, RB = table*8, RC = str_const*8 (~)
4395 | decode_RB8a RB, INS
4396 | decode_RB8b RB
4397 | daddu CARG2, BASE, RB
4398 | decode_RC8a RC, INS
4399 | ld TAB:RB, 0(CARG2)
4400 | decode_RC8b RC
4401 | dsubu CARG3, KBASE, RC
4402 | ld RC, -8(CARG3) // KBASE-8-str_const*8
4403 | daddu RA, BASE, RA
4404 | cleartp STR:RC
4405 | checktab TAB:RB, ->vmeta_tsets1
4406 |->BC_TSETS_Z:
4407 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8
4408 | lw TMP0, TAB:RB->hmask
4409 | lw TMP1, STR:RC->sid
4410 | ld NODE:TMP2, TAB:RB->node
4411 | sb r0, TAB:RB->nomm // Clear metamethod cache.
4412 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
4413 | sll TMP0, TMP1, 5
4414 | sll TMP1, TMP1, 3
4415 | subu TMP1, TMP0, TMP1
4416 | li TMP3, LJ_TSTR
4417 | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
4418 | settp STR:RC, TMP3 // Tagged key to look for.
4419 |.if FPU
4420 | ldc1 FTMP0, 0(RA)
4421 |.else
4422 | ld CRET1, 0(RA)
4423 |.endif
4424 |1:
4425 | ld TMP0, NODE:TMP2->key
4426 | ld CARG2, NODE:TMP2->val
4427 | ld NODE:TMP1, NODE:TMP2->next
4428 | bne TMP0, RC, >5
4429 |. lbu TMP3, TAB:RB->marked
4430 | beq CARG2, TISNIL, >4 // Key found, but nil value?
4431 |. ld TAB:TMP0, TAB:RB->metatable
4432 |2:
4433 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4434 | bnez AT, >7
4435 |.if FPU
4436 |. sdc1 FTMP0, NODE:TMP2->val
4437 |.else
4438 |. sd CRET1, NODE:TMP2->val
4439 |.endif
4440 |3:
4441 | ins_next
4442 |
4443 |4: // Check for __newindex if previous value is nil.
4444 | beqz TAB:TMP0, <2 // No metatable: done.
4445 |. nop
4446 | lbu TMP0, TAB:TMP0->nomm
4447 | andi TMP0, TMP0, 1<<MM_newindex
4448 | bnez TMP0, <2 // 'no __newindex' flag set: done.
4449 |. nop
4450 | b ->vmeta_tsets
4451 |. nop
4452 |
4453 |5: // Follow hash chain.
4454 | bnez NODE:TMP1, <1
4455 |. move NODE:TMP2, NODE:TMP1
4456 | // End of hash chain: key not found, add a new one
4457 |
4458 | // But check for __newindex first.
4459 | ld TAB:TMP2, TAB:RB->metatable
4460 | beqz TAB:TMP2, >6 // No metatable: continue.
4461 |. daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
4462 | lbu TMP0, TAB:TMP2->nomm
4463 | andi TMP0, TMP0, 1<<MM_newindex
4464 | beqz TMP0, ->vmeta_tsets // 'no __newindex' flag NOT set: check.
4465 |6:
4466 | load_got lj_tab_newkey
4467 | sd RC, 0(CARG3)
4468 | sd BASE, L->base
4469 | move CARG2, TAB:RB
4470 | sd PC, SAVE_PC
4471 | call_intern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k
4472 |. move CARG1, L
4473 | // Returns TValue *.
4474 | ld BASE, L->base
4475 |.if FPU
4476 | b <3 // No 2nd write barrier needed.
4477 |. sdc1 FTMP0, 0(CRET1)
4478 |.else
4479 | ld CARG1, 0(RA)
4480 | b <3 // No 2nd write barrier needed.
4481 |. sd CARG1, 0(CRET1)
4482 |.endif
4483 |
4484 |7: // Possible table write barrier for the value. Skip valiswhite check.
4485 | barrierback TAB:RB, TMP3, TMP0, <3
4486 break;
4487 case BC_TSETB:
4488 | // RA = src*8, RB = table*8, RC = index*8
4489 | decode_RB8a RB, INS
4490 | decode_RB8b RB
4491 | daddu CARG2, BASE, RB
4492 | decode_RDtoRC8 RC, RD
4493 | ld TAB:RB, 0(CARG2)
4494 | daddu RA, BASE, RA
4495 | srl TMP0, RC, 3
4496 | checktab RB, ->vmeta_tsetb
4497 | lw TMP1, TAB:RB->asize
4498 | ld TMP2, TAB:RB->array
4499 | sltu AT, TMP0, TMP1
4500 | beqz AT, ->vmeta_tsetb
4501 |. daddu RC, TMP2, RC
4502 | ld TMP1, 0(RC)
4503 | lbu TMP3, TAB:RB->marked
4504 | beq TMP1, TISNIL, >5
4505 |1:
4506 |. ld CRET1, 0(RA)
4507 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4508 | bnez AT, >7
4509 |. sd CRET1, 0(RC)
4510 |2:
4511 | ins_next
4512 |
4513 |5: // Check for __newindex if previous value is nil.
4514 | ld TAB:TMP2, TAB:RB->metatable
4515 | beqz TAB:TMP2, <1 // No metatable: done.
4516 |. nop
4517 | lbu TMP1, TAB:TMP2->nomm
4518 | andi TMP1, TMP1, 1<<MM_newindex
4519 | bnez TMP1, <1 // 'no __newindex' flag set: done.
4520 |. nop
4521 | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2!
4522 |. nop
4523 |
4524 |7: // Possible table write barrier for the value. Skip valiswhite check.
4525 | barrierback TAB:RB, TMP3, TMP0, <2
4526 break;
4527 case BC_TSETR:
4528 | // RA = dst*8, RB = table*8, RC = key*8
4529 | decode_RB8a RB, INS
4530 | decode_RB8b RB
4531 | decode_RDtoRC8 RC, RD
4532 | daddu CARG1, BASE, RB
4533 | daddu CARG3, BASE, RC
4534 | ld TAB:CARG2, 0(CARG1)
4535 | lw CARG3, LO(CARG3)
4536 | cleartp TAB:CARG2
4537 | lbu TMP3, TAB:CARG2->marked
4538 | lw TMP0, TAB:CARG2->asize
4539 | ld TMP1, TAB:CARG2->array
4540 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4541 | bnez AT, >7
4542 |. daddu RA, BASE, RA
4543 |2:
4544 | sltu AT, CARG3, TMP0
4545 | sll TMP2, CARG3, 3
4546 | beqz AT, ->vmeta_tsetr // In array part?
4547 |. daddu CRET1, TMP1, TMP2
4548 |->BC_TSETR_Z:
4549 | ld CARG1, 0(RA)
4550 | ins_next1
4551 | sd CARG1, 0(CRET1)
4552 | ins_next2
4553 |
4554 |7: // Possible table write barrier for the value. Skip valiswhite check.
4555 | barrierback TAB:CARG2, TMP3, CRET1, <2
4556 break;
4557
4558 case BC_TSETM:
4559 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
4560 | daddu RA, BASE, RA
4561 |1:
4562 | daddu TMP3, KBASE, RD
4563 | ld TAB:CARG2, -8(RA) // Guaranteed to be a table.
4564 | addiu TMP0, MULTRES, -8
4565 | lw TMP3, LO(TMP3) // Integer constant is in lo-word.
4566 | beqz TMP0, >4 // Nothing to copy?
4567 |. srl CARG3, TMP0, 3
4568 | cleartp CARG2
4569 | addu CARG3, CARG3, TMP3
4570 | lw TMP2, TAB:CARG2->asize
4571 | sll TMP1, TMP3, 3
4572 | lbu TMP3, TAB:CARG2->marked
4573 | ld CARG1, TAB:CARG2->array
4574 | sltu AT, TMP2, CARG3
4575 | bnez AT, >5
4576 |. daddu TMP2, RA, TMP0
4577 | daddu TMP1, TMP1, CARG1
4578 | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4579 |3: // Copy result slots to table.
4580 | ld CRET1, 0(RA)
4581 | daddiu RA, RA, 8
4582 | sltu AT, RA, TMP2
4583 | sd CRET1, 0(TMP1)
4584 | bnez AT, <3
4585 |. daddiu TMP1, TMP1, 8
4586 | bnez TMP0, >7
4587 |. nop
4588 |4:
4589 | ins_next
4590 |
4591 |5: // Need to resize array part.
4592 | load_got lj_tab_reasize
4593 | sd BASE, L->base
4594 | sd PC, SAVE_PC
4595 | move BASE, RD
4596 | call_intern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
4597 |. move CARG1, L
4598 | // Must not reallocate the stack.
4599 | move RD, BASE
4600 | b <1
4601 |. ld BASE, L->base // Reload BASE for lack of a saved register.
4602 |
4603 |7: // Possible table write barrier for any value. Skip valiswhite check.
4604 | barrierback TAB:CARG2, TMP3, TMP0, <4
4605 break;
4606
4607 /* -- Calls and vararg handling ----------------------------------------- */
4608
4609 case BC_CALLM:
4610 | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8
4611 | decode_RDtoRC8 NARGS8:RC, RD
4612 | b ->BC_CALL_Z
4613 |. addu NARGS8:RC, NARGS8:RC, MULTRES
4614 break;
4615 case BC_CALL:
4616 | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
4617 | decode_RDtoRC8 NARGS8:RC, RD
4618 |->BC_CALL_Z:
4619 | move TMP2, BASE
4620 | daddu BASE, BASE, RA
4621 | ld LFUNC:RB, 0(BASE)
4622 | daddiu BASE, BASE, 16
4623 | addiu NARGS8:RC, NARGS8:RC, -8
4624 | checkfunc RB, ->vmeta_call
4625 | ins_call
4626 break;
4627
4628 case BC_CALLMT:
4629 | // RA = base*8, (RB = 0,) RC = extra_nargs*8
4630 | addu NARGS8:RD, NARGS8:RD, MULTRES // BC_CALLT gets RC from RD.
4631 | // Fall through. Assumes BC_CALLT follows.
4632 break;
4633 case BC_CALLT:
4634 | // RA = base*8, (RB = 0,) RC = (nargs+1)*8
4635 | daddu RA, BASE, RA
4636 | ld RB, 0(RA)
4637 | move NARGS8:RC, RD
4638 | ld TMP1, FRAME_PC(BASE)
4639 | daddiu RA, RA, 16
4640 | addiu NARGS8:RC, NARGS8:RC, -8
4641 | checktp CARG3, RB, -LJ_TFUNC, ->vmeta_callt
4642 |->BC_CALLT_Z:
4643 | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'.
4644 | lbu TMP3, LFUNC:CARG3->ffid
4645 | bnez TMP0, >7
4646 |. xori TMP2, TMP1, FRAME_VARG
4647 |1:
4648 | sd RB, FRAME_FUNC(BASE) // Copy function down, but keep PC.
4649 | sltiu AT, TMP3, 2 // (> FF_C) Calling a fast function?
4650 | move TMP2, BASE
4651 | move RB, CARG3
4652 | beqz NARGS8:RC, >3
4653 |. move TMP3, NARGS8:RC
4654 |2:
4655 | ld CRET1, 0(RA)
4656 | daddiu RA, RA, 8
4657 | addiu TMP3, TMP3, -8
4658 | sd CRET1, 0(TMP2)
4659 | bnez TMP3, <2
4660 |. daddiu TMP2, TMP2, 8
4661 |3:
4662 | or TMP0, TMP0, AT
4663 | beqz TMP0, >5
4664 |. nop
4665 |4:
4666 | ins_callt
4667 |
4668 |5: // Tailcall to a fast function with a Lua frame below.
4669 | lw INS, -4(TMP1)
4670 | decode_RA8a RA, INS
4671 | decode_RA8b RA
4672 | dsubu TMP1, BASE, RA
4673 | ld TMP1, -32(TMP1)
4674 | cleartp LFUNC:TMP1
4675 | ld TMP1, LFUNC:TMP1->pc
4676 | b <4
4677 |. ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE.
4678 |
4679 |7: // Tailcall from a vararg function.
4680 | andi AT, TMP2, FRAME_TYPEP
4681 | bnez AT, <1 // Vararg frame below?
4682 |. dsubu TMP2, BASE, TMP2 // Relocate BASE down.
4683 | move BASE, TMP2
4684 | ld TMP1, FRAME_PC(TMP2)
4685 | b <1
4686 |. andi TMP0, TMP1, FRAME_TYPE
4687 break;
4688
4689 case BC_ITERC:
4690 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
4691 | move TMP2, BASE // Save old BASE fir vmeta_call.
4692 | daddu BASE, BASE, RA
4693 | ld RB, -24(BASE)
4694 | ld CARG1, -16(BASE)
4695 | ld CARG2, -8(BASE)
4696 | li NARGS8:RC, 16 // Iterators get 2 arguments.
4697 | sd RB, 0(BASE) // Copy callable.
4698 | sd CARG1, 16(BASE) // Copy state.
4699 | sd CARG2, 24(BASE) // Copy control var.
4700 | daddiu BASE, BASE, 16
4701 | checkfunc RB, ->vmeta_call
4702 | ins_call
4703 break;
4704
4705 case BC_ITERN:
4706 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
4707 |.if JIT
4708 | // NYI: add hotloop, record BC_ITERN.
4709 |.endif
4710 | daddu RA, BASE, RA
4711 | ld TAB:RB, -16(RA)
4712 | lw RC, -8+LO(RA) // Get index from control var.
4713 | cleartp TAB:RB
4714 | daddiu PC, PC, 4
4715 | lw TMP0, TAB:RB->asize
4716 | ld TMP1, TAB:RB->array
4717 | dsll CARG3, TISNUM, 47
4718 |1: // Traverse array part.
4719 | sltu AT, RC, TMP0
4720 | beqz AT, >5 // Index points after array part?
4721 |. sll TMP3, RC, 3
4722 | daddu TMP3, TMP1, TMP3
4723 | ld CARG1, 0(TMP3)
4724 | lhu RD, -4+OFS_RD(PC)
4725 | or TMP2, RC, CARG3
4726 | beq CARG1, TISNIL, <1 // Skip holes in array part.
4727 |. addiu RC, RC, 1
4728 | sd TMP2, 0(RA)
4729 | sd CARG1, 8(RA)
4730 | or TMP0, RC, CARG3
4731 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
4732 | decode_RD4b RD
4733 | daddu RD, RD, TMP3
4734 | sw TMP0, -8+LO(RA) // Update control var.
4735 | daddu PC, PC, RD
4736 |3:
4737 | ins_next
4738 |
4739 |5: // Traverse hash part.
4740 | lw TMP1, TAB:RB->hmask
4741 | subu RC, RC, TMP0
4742 | ld TMP2, TAB:RB->node
4743 |6:
4744 | sltu AT, TMP1, RC // End of iteration? Branch to ITERL+1.
4745 | bnez AT, <3
4746 |. sll TMP3, RC, 5
4747 | sll RB, RC, 3
4748 | subu TMP3, TMP3, RB
4749 | daddu NODE:TMP3, TMP3, TMP2
4750 | ld CARG1, 0(NODE:TMP3)
4751 | lhu RD, -4+OFS_RD(PC)
4752 | beq CARG1, TISNIL, <6 // Skip holes in hash part.
4753 |. addiu RC, RC, 1
4754 | ld CARG2, NODE:TMP3->key
4755 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
4756 | sd CARG1, 8(RA)
4757 | addu RC, RC, TMP0
4758 | decode_RD4b RD
4759 | addu RD, RD, TMP3
4760 | sd CARG2, 0(RA)
4761 | daddu PC, PC, RD
4762 | b <3
4763 |. sw RC, -8+LO(RA) // Update control var.
4764 break;
4765
4766 case BC_ISNEXT:
4767 | // RA = base*8, RD = target (points to ITERN)
4768 | daddu RA, BASE, RA
4769 | srl TMP0, RD, 1
4770 | ld CFUNC:CARG1, -24(RA)
4771 | daddu TMP0, PC, TMP0
4772 | ld CARG2, -16(RA)
4773 | ld CARG3, -8(RA)
4774 | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
4775 | checkfunc CFUNC:CARG1, >5
4776 | gettp CARG2, CARG2
4777 | daddiu CARG2, CARG2, -LJ_TTAB
4778 | lbu TMP1, CFUNC:CARG1->ffid
4779 | daddiu CARG3, CARG3, -LJ_TNIL
4780 | or AT, CARG2, CARG3
4781 | daddiu TMP1, TMP1, -FF_next_N
4782 | or AT, AT, TMP1
4783 | bnez AT, >5
4784 |. lui TMP1, 0xfffe
4785 | daddu PC, TMP0, TMP2
4786 | ori TMP1, TMP1, 0x7fff
4787 | dsll TMP1, TMP1, 32
4788 | sd TMP1, -8(RA)
4789 |1:
4790 | ins_next
4791 |5: // Despecialize bytecode if any of the checks fail.
4792 | li TMP3, BC_JMP
4793 | li TMP1, BC_ITERC
4794 | sb TMP3, -4+OFS_OP(PC)
4795 | daddu PC, TMP0, TMP2
4796 | b <1
4797 |. sb TMP1, OFS_OP(PC)
4798 break;
4799
4800 case BC_VARG:
4801 | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8
4802 | ld TMP0, FRAME_PC(BASE)
4803 | decode_RDtoRC8 RC, RD
4804 | decode_RB8a RB, INS
4805 | daddu RC, BASE, RC
4806 | decode_RB8b RB
4807 | daddu RA, BASE, RA
4808 | daddiu RC, RC, FRAME_VARG
4809 | daddu TMP2, RA, RB
4810 | daddiu TMP3, BASE, -16 // TMP3 = vtop
4811 | dsubu RC, RC, TMP0 // RC = vbase
4812 | // Note: RC may now be even _above_ BASE if nargs was < numparams.
4813 | beqz RB, >5 // Copy all varargs?
4814 |. dsubu TMP1, TMP3, RC
4815 | daddiu TMP2, TMP2, -16
4816 |1: // Copy vararg slots to destination slots.
4817 | ld CARG1, 0(RC)
4818 | sltu AT, RC, TMP3
4819 | daddiu RC, RC, 8
4820 |.if MIPSR6
4821 | selnez CARG1, CARG1, AT
4822 | seleqz AT, TISNIL, AT
4823 | or CARG1, CARG1, AT
4824 |.else
4825 | movz CARG1, TISNIL, AT
4826 |.endif
4827 | sd CARG1, 0(RA)
4828 | sltu AT, RA, TMP2
4829 | bnez AT, <1
4830 |. daddiu RA, RA, 8
4831 |3:
4832 | ins_next
4833 |
4834 |5: // Copy all varargs.
4835 | ld TMP0, L->maxstack
4836 | blez TMP1, <3 // No vararg slots?
4837 |. li MULTRES, 8 // MULTRES = (0+1)*8
4838 | daddu TMP2, RA, TMP1
4839 | sltu AT, TMP0, TMP2
4840 | bnez AT, >7
4841 |. daddiu MULTRES, TMP1, 8
4842 |6:
4843 | ld CRET1, 0(RC)
4844 | daddiu RC, RC, 8
4845 | sd CRET1, 0(RA)
4846 | sltu AT, RC, TMP3
4847 | bnez AT, <6 // More vararg slots?
4848 |. daddiu RA, RA, 8
4849 | b <3
4850 |. nop
4851 |
4852 |7: // Grow stack for varargs.
4853 | load_got lj_state_growstack
4854 | sd RA, L->top
4855 | dsubu RA, RA, BASE
4856 | sd BASE, L->base
4857 | dsubu BASE, RC, BASE // Need delta, because BASE may change.
4858 | sd PC, SAVE_PC
4859 | srl CARG2, TMP1, 3
4860 | call_intern lj_state_growstack // (lua_State *L, int n)
4861 |. move CARG1, L
4862 | move RC, BASE
4863 | ld BASE, L->base
4864 | daddu RA, BASE, RA
4865 | daddu RC, BASE, RC
4866 | b <6
4867 |. daddiu TMP3, BASE, -16
4868 break;
4869
4870 /* -- Returns ----------------------------------------------------------- */
4871
4872 case BC_RETM:
4873 | // RA = results*8, RD = extra_nresults*8
4874 | addu RD, RD, MULTRES // MULTRES >= 8, so RD >= 8.
4875 | // Fall through. Assumes BC_RET follows.
4876 break;
4877
4878 case BC_RET:
4879 | // RA = results*8, RD = (nresults+1)*8
4880 | ld PC, FRAME_PC(BASE)
4881 | daddu RA, BASE, RA
4882 | move MULTRES, RD
4883 |1:
4884 | andi TMP0, PC, FRAME_TYPE
4885 | bnez TMP0, ->BC_RETV_Z
4886 |. xori TMP1, PC, FRAME_VARG
4887 |
4888 |->BC_RET_Z:
4889 | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return
4890 | lw INS, -4(PC)
4891 | daddiu TMP2, BASE, -16
4892 | daddiu RC, RD, -8
4893 | decode_RA8a TMP0, INS
4894 | decode_RB8a RB, INS
4895 | decode_RA8b TMP0
4896 | decode_RB8b RB
4897 | daddu TMP3, TMP2, RB
4898 | beqz RC, >3
4899 |. dsubu BASE, TMP2, TMP0
4900 |2:
4901 | ld CRET1, 0(RA)
4902 | daddiu RA, RA, 8
4903 | daddiu RC, RC, -8
4904 | sd CRET1, 0(TMP2)
4905 | bnez RC, <2
4906 |. daddiu TMP2, TMP2, 8
4907 |3:
4908 | daddiu TMP3, TMP3, -8
4909 |5:
4910 | sltu AT, TMP2, TMP3
4911 | bnez AT, >6
4912 |. ld LFUNC:TMP1, FRAME_FUNC(BASE)
4913 | ins_next1
4914 | cleartp LFUNC:TMP1
4915 | ld TMP1, LFUNC:TMP1->pc
4916 | ld KBASE, PC2PROTO(k)(TMP1)
4917 | ins_next2
4918 |
4919 |6: // Fill up results with nil.
4920 | sd TISNIL, 0(TMP2)
4921 | b <5
4922 |. daddiu TMP2, TMP2, 8
4923 |
4924 |->BC_RETV_Z: // Non-standard return case.
4925 | andi TMP2, TMP1, FRAME_TYPEP
4926 | bnez TMP2, ->vm_return
4927 |. nop
4928 | // Return from vararg function: relocate BASE down.
4929 | dsubu BASE, BASE, TMP1
4930 | b <1
4931 |. ld PC, FRAME_PC(BASE)
4932 break;
4933
4934 case BC_RET0: case BC_RET1:
4935 | // RA = results*8, RD = (nresults+1)*8
4936 | ld PC, FRAME_PC(BASE)
4937 | daddu RA, BASE, RA
4938 | move MULTRES, RD
4939 | andi TMP0, PC, FRAME_TYPE
4940 | bnez TMP0, ->BC_RETV_Z
4941 |. xori TMP1, PC, FRAME_VARG
4942 | lw INS, -4(PC)
4943 | daddiu TMP2, BASE, -16
4944 if (op == BC_RET1) {
4945 | ld CRET1, 0(RA)
4946 }
4947 | decode_RB8a RB, INS
4948 | decode_RA8a RA, INS
4949 | decode_RB8b RB
4950 | decode_RA8b RA
4951 | dsubu BASE, TMP2, RA
4952 if (op == BC_RET1) {
4953 | sd CRET1, 0(TMP2)
4954 }
4955 |5:
4956 | sltu AT, RD, RB
4957 | bnez AT, >6
4958 |. ld TMP1, FRAME_FUNC(BASE)
4959 | ins_next1
4960 | cleartp LFUNC:TMP1
4961 | ld TMP1, LFUNC:TMP1->pc
4962 | ld KBASE, PC2PROTO(k)(TMP1)
4963 | ins_next2
4964 |
4965 |6: // Fill up results with nil.
4966 | daddiu TMP2, TMP2, 8
4967 | daddiu RD, RD, 8
4968 | b <5
4969 if (op == BC_RET1) {
4970 |. sd TISNIL, 0(TMP2)
4971 } else {
4972 |. sd TISNIL, -8(TMP2)
4973 }
4974 break;
4975
4976 /* -- Loops and branches ------------------------------------------------ */
4977
4978 case BC_FORL:
4979 |.if JIT
4980 | hotloop
4981 |.endif
4982 | // Fall through. Assumes BC_IFORL follows.
4983 break;
4984
4985 case BC_JFORI:
4986 case BC_JFORL:
4987#if !LJ_HASJIT
4988 break;
4989#endif
4990 case BC_FORI:
4991 case BC_IFORL:
4992 | // RA = base*8, RD = target (after end of loop or start of loop)
4993 vk = (op == BC_IFORL || op == BC_JFORL);
4994 | daddu RA, BASE, RA
4995 | ld CARG1, FORL_IDX*8(RA) // IDX CARG1 - CARG3 type
4996 | gettp CARG3, CARG1
4997 if (op != BC_JFORL) {
4998 | srl RD, RD, 1
4999 | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
5000 | daddu TMP2, RD, TMP2
5001 }
5002 if (!vk) {
5003 | ld CARG2, FORL_STOP*8(RA) // STOP CARG2 - CARG4 type
5004 | ld CRET1, FORL_STEP*8(RA) // STEP CRET1 - CRET2 type
5005 | gettp CARG4, CARG2
5006 | bne CARG3, TISNUM, >5
5007 |. gettp CRET2, CRET1
5008 | bne CARG4, TISNUM, ->vmeta_for
5009 |. sextw CARG3, CARG1
5010 | bne CRET2, TISNUM, ->vmeta_for
5011 |. sextw CARG2, CARG2
5012 | dext AT, CRET1, 31, 0
5013 | slt CRET1, CARG2, CARG3
5014 | slt TMP1, CARG3, CARG2
5015 |.if MIPSR6
5016 | selnez TMP1, TMP1, AT
5017 | seleqz CRET1, CRET1, AT
5018 | or CRET1, CRET1, TMP1
5019 |.else
5020 | movn CRET1, TMP1, AT
5021 |.endif
5022 } else {
5023 | bne CARG3, TISNUM, >5
5024 |. ld CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type
5025 | ld CRET1, FORL_STOP*8(RA) // STOP CRET1 - CRET2 type
5026 | sextw TMP3, CARG1
5027 | sextw CARG2, CARG2
5028 | sextw CRET1, CRET1
5029 | addu CARG1, TMP3, CARG2
5030 | xor TMP0, CARG1, TMP3
5031 | xor TMP1, CARG1, CARG2
5032 | and TMP0, TMP0, TMP1
5033 | slt TMP1, CARG1, CRET1
5034 | slt CRET1, CRET1, CARG1
5035 | slt AT, CARG2, r0
5036 | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow.
5037 |.if MIPSR6
5038 | selnez TMP1, TMP1, AT
5039 | seleqz CRET1, CRET1, AT
5040 | or CRET1, CRET1, TMP1
5041 |.else
5042 | movn CRET1, TMP1, AT
5043 |.endif
5044 | or CRET1, CRET1, TMP0
5045 | zextw CARG1, CARG1
5046 | settp CARG1, TISNUM
5047 }
5048 |1:
5049 if (op == BC_FORI) {
5050 |.if MIPSR6
5051 | selnez TMP2, TMP2, CRET1
5052 |.else
5053 | movz TMP2, r0, CRET1
5054 |.endif
5055 | daddu PC, PC, TMP2
5056 } else if (op == BC_JFORI) {
5057 | daddu PC, PC, TMP2
5058 | lhu RD, -4+OFS_RD(PC)
5059 } else if (op == BC_IFORL) {
5060 |.if MIPSR6
5061 | seleqz TMP2, TMP2, CRET1
5062 |.else
5063 | movn TMP2, r0, CRET1
5064 |.endif
5065 | daddu PC, PC, TMP2
5066 }
5067 if (vk) {
5068 | sd CARG1, FORL_IDX*8(RA)
5069 }
5070 | ins_next1
5071 | sd CARG1, FORL_EXT*8(RA)
5072 |2:
5073 if (op == BC_JFORI) {
5074 | beqz CRET1, =>BC_JLOOP
5075 |. decode_RD8b RD
5076 } else if (op == BC_JFORL) {
5077 | beqz CRET1, =>BC_JLOOP
5078 }
5079 | ins_next2
5080 |
5081 |5: // FP loop.
5082 |.if FPU
5083 if (!vk) {
5084 | ldc1 f0, FORL_IDX*8(RA)
5085 | ldc1 f2, FORL_STOP*8(RA)
5086 | sltiu TMP0, CARG3, LJ_TISNUM
5087 | sltiu TMP1, CARG4, LJ_TISNUM
5088 | sltiu AT, CRET2, LJ_TISNUM
5089 | ld TMP3, FORL_STEP*8(RA)
5090 | and TMP0, TMP0, TMP1
5091 | and AT, AT, TMP0
5092 | beqz AT, ->vmeta_for
5093 |. slt TMP3, TMP3, r0
5094 |.if MIPSR6
5095 | dmtc1 TMP3, FTMP2
5096 | cmp.lt.d FTMP0, f0, f2
5097 | cmp.lt.d FTMP1, f2, f0
5098 | sel.d FTMP2, FTMP1, FTMP0
5099 | b <1
5100 |. dmfc1 CRET1, FTMP2
5101 |.else
5102 | c.ole.d 0, f0, f2
5103 | c.ole.d 1, f2, f0
5104 | li CRET1, 1
5105 | movt CRET1, r0, 0
5106 | movt AT, r0, 1
5107 | b <1
5108 |. movn CRET1, AT, TMP3
5109 |.endif
5110 } else {
5111 | ldc1 f0, FORL_IDX*8(RA)
5112 | ldc1 f4, FORL_STEP*8(RA)
5113 | ldc1 f2, FORL_STOP*8(RA)
5114 | ld TMP3, FORL_STEP*8(RA)
5115 | add.d f0, f0, f4
5116 |.if MIPSR6
5117 | slt TMP3, TMP3, r0
5118 | dmtc1 TMP3, FTMP2
5119 | cmp.lt.d FTMP0, f0, f2
5120 | cmp.lt.d FTMP1, f2, f0
5121 | sel.d FTMP2, FTMP1, FTMP0
5122 | dmfc1 CRET1, FTMP2
5123 if (op == BC_IFORL) {
5124 | seleqz TMP2, TMP2, CRET1
5125 | daddu PC, PC, TMP2
5126 }
5127 |.else
5128 | c.ole.d 0, f0, f2
5129 | c.ole.d 1, f2, f0
5130 | slt TMP3, TMP3, r0
5131 | li CRET1, 1
5132 | li AT, 1
5133 | movt CRET1, r0, 0
5134 | movt AT, r0, 1
5135 | movn CRET1, AT, TMP3
5136 if (op == BC_IFORL) {
5137 | movn TMP2, r0, CRET1
5138 | daddu PC, PC, TMP2
5139 }
5140 |.endif
5141 | sdc1 f0, FORL_IDX*8(RA)
5142 | ins_next1
5143 | b <2
5144 |. sdc1 f0, FORL_EXT*8(RA)
5145 }
5146 |.else
5147 if (!vk) {
5148 | sltiu TMP0, CARG3, LJ_TISNUM
5149 | sltiu TMP1, CARG4, LJ_TISNUM
5150 | sltiu AT, CRET2, LJ_TISNUM
5151 | and TMP0, TMP0, TMP1
5152 | and AT, AT, TMP0
5153 | beqz AT, ->vmeta_for
5154 |. nop
5155 | bal ->vm_sfcmpolex
5156 |. lw TMP3, FORL_STEP*8+HI(RA)
5157 | b <1
5158 |. nop
5159 } else {
5160 | load_got __adddf3
5161 | call_extern
5162 |. sw TMP2, TMPD
5163 | ld CARG2, FORL_STOP*8(RA)
5164 | move CARG1, CRET1
5165 if ( op == BC_JFORL ) {
5166 | lhu RD, -4+OFS_RD(PC)
5167 | decode_RD8b RD
5168 }
5169 | bal ->vm_sfcmpolex
5170 |. lw TMP3, FORL_STEP*8+HI(RA)
5171 | b <1
5172 |. lw TMP2, TMPD
5173 }
5174 |.endif
5175 break;
5176
5177 case BC_ITERL:
5178 |.if JIT
5179 | hotloop
5180 |.endif
5181 | // Fall through. Assumes BC_IITERL follows.
5182 break;
5183
5184 case BC_JITERL:
5185#if !LJ_HASJIT
5186 break;
5187#endif
5188 case BC_IITERL:
5189 | // RA = base*8, RD = target
5190 | daddu RA, BASE, RA
5191 | ld TMP1, 0(RA)
5192 | beq TMP1, TISNIL, >1 // Stop if iterator returned nil.
5193 |. nop
5194 if (op == BC_JITERL) {
5195 | b =>BC_JLOOP
5196 |. sd TMP1, -8(RA)
5197 } else {
5198 | branch_RD // Otherwise save control var + branch.
5199 | sd TMP1, -8(RA)
5200 }
5201 |1:
5202 | ins_next
5203 break;
5204
5205 case BC_LOOP:
5206 | // RA = base*8, RD = target (loop extent)
5207 | // Note: RA/RD is only used by trace recorder to determine scope/extent
5208 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
5209 |.if JIT
5210 | hotloop
5211 |.endif
5212 | // Fall through. Assumes BC_ILOOP follows.
5213 break;
5214
5215 case BC_ILOOP:
5216 | // RA = base*8, RD = target (loop extent)
5217 | ins_next
5218 break;
5219
5220 case BC_JLOOP:
5221 |.if JIT
5222 | // RA = base*8 (ignored), RD = traceno*8
5223 | ld TMP1, DISPATCH_J(trace)(DISPATCH)
5224 | li AT, 0
5225 | daddu TMP1, TMP1, RD
5226 | // Traces on MIPS don't store the trace number, so use 0.
5227 | sd AT, DISPATCH_GL(vmstate)(DISPATCH)
5228 | ld TRACE:TMP2, 0(TMP1)
5229 | sd BASE, DISPATCH_GL(jit_base)(DISPATCH)
5230 | ld TMP2, TRACE:TMP2->mcode
5231 | sd L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
5232 | jr TMP2
5233 |. daddiu JGL, DISPATCH, GG_DISP2G+32768
5234 |.endif
5235 break;
5236
5237 case BC_JMP:
5238 | // RA = base*8 (only used by trace recorder), RD = target
5239 | branch_RD
5240 | ins_next
5241 break;
5242
5243 /* -- Function headers -------------------------------------------------- */
5244
5245 case BC_FUNCF:
5246 |.if JIT
5247 | hotcall
5248 |.endif
5249 case BC_FUNCV: /* NYI: compiled vararg functions. */
5250 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
5251 break;
5252
5253 case BC_JFUNCF:
5254#if !LJ_HASJIT
5255 break;
5256#endif
5257 case BC_IFUNCF:
5258 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
5259 | ld TMP2, L->maxstack
5260 | lbu TMP1, -4+PC2PROTO(numparams)(PC)
5261 | ld KBASE, -4+PC2PROTO(k)(PC)
5262 | sltu AT, TMP2, RA
5263 | bnez AT, ->vm_growstack_l
5264 |. sll TMP1, TMP1, 3
5265 if (op != BC_JFUNCF) {
5266 | ins_next1
5267 }
5268 |2:
5269 | sltu AT, NARGS8:RC, TMP1 // Check for missing parameters.
5270 | bnez AT, >3
5271 |. daddu AT, BASE, NARGS8:RC
5272 if (op == BC_JFUNCF) {
5273 | decode_RD8a RD, INS
5274 | b =>BC_JLOOP
5275 |. decode_RD8b RD
5276 } else {
5277 | ins_next2
5278 }
5279 |
5280 |3: // Clear missing parameters.
5281 | sd TISNIL, 0(AT)
5282 | b <2
5283 |. addiu NARGS8:RC, NARGS8:RC, 8
5284 break;
5285
5286 case BC_JFUNCV:
5287#if !LJ_HASJIT
5288 break;
5289#endif
5290 | NYI // NYI: compiled vararg functions
5291 break; /* NYI: compiled vararg functions. */
5292
5293 case BC_IFUNCV:
5294 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
5295 | li TMP0, LJ_TFUNC
5296 | daddu TMP1, BASE, RC
5297 | ld TMP2, L->maxstack
5298 | settp LFUNC:RB, TMP0
5299 | daddu TMP0, RA, RC
5300 | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC.
5301 | daddiu TMP3, RC, 16+FRAME_VARG
5302 | sltu AT, TMP0, TMP2
5303 | ld KBASE, -4+PC2PROTO(k)(PC)
5304 | beqz AT, ->vm_growstack_l
5305 |. sd TMP3, 8(TMP1) // Store delta + FRAME_VARG.
5306 | lbu TMP2, -4+PC2PROTO(numparams)(PC)
5307 | move RA, BASE
5308 | move RC, TMP1
5309 | ins_next1
5310 | beqz TMP2, >3
5311 |. daddiu BASE, TMP1, 16
5312 |1:
5313 | ld TMP0, 0(RA)
5314 | sltu AT, RA, RC // Less args than parameters?
5315 | move CARG1, TMP0
5316 |.if MIPSR6
5317 | selnez TMP0, TMP0, AT
5318 | seleqz TMP3, TISNIL, AT
5319 | or TMP0, TMP0, TMP3
5320 | seleqz TMP3, CARG1, AT
5321 | selnez CARG1, TISNIL, AT
5322 | or CARG1, CARG1, TMP3
5323 |.else
5324 | movz TMP0, TISNIL, AT // Clear missing parameters.
5325 | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC).
5326 |.endif
5327 | addiu TMP2, TMP2, -1
5328 | sd TMP0, 16(TMP1)
5329 | daddiu TMP1, TMP1, 8
5330 | sd CARG1, 0(RA)
5331 | bnez TMP2, <1
5332 |. daddiu RA, RA, 8
5333 |3:
5334 | ins_next2
5335 break;
5336
5337 case BC_FUNCC:
5338 case BC_FUNCCW:
5339 | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
5340 if (op == BC_FUNCC) {
5341 | ld CFUNCADDR, CFUNC:RB->f
5342 } else {
5343 | ld CFUNCADDR, DISPATCH_GL(wrapf)(DISPATCH)
5344 }
5345 | daddu TMP1, RA, NARGS8:RC
5346 | ld TMP2, L->maxstack
5347 | daddu RC, BASE, NARGS8:RC
5348 | sd BASE, L->base
5349 | sltu AT, TMP2, TMP1
5350 | sd RC, L->top
5351 | li_vmstate C
5352 if (op == BC_FUNCCW) {
5353 | ld CARG2, CFUNC:RB->f
5354 }
5355 | bnez AT, ->vm_growstack_c // Need to grow stack.
5356 |. move CARG1, L
5357 | jalr CFUNCADDR // (lua_State *L [, lua_CFunction f])
5358 |. st_vmstate
5359 | // Returns nresults.
5360 | ld BASE, L->base
5361 | sll RD, CRET1, 3
5362 | ld TMP1, L->top
5363 | li_vmstate INTERP
5364 | ld PC, FRAME_PC(BASE) // Fetch PC of caller.
5365 | dsubu RA, TMP1, RD // RA = L->top - nresults*8
5366 | sd L, DISPATCH_GL(cur_L)(DISPATCH)
5367 | b ->vm_returnc
5368 |. st_vmstate
5369 break;
5370
5371 /* ---------------------------------------------------------------------- */
5372
5373 default:
5374 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
5375 exit(2);
5376 break;
5377 }
5378}
5379
5380static int build_backend(BuildCtx *ctx)
5381{
5382 int op;
5383
5384 dasm_growpc(Dst, BC__MAX);
5385
5386 build_subroutines(ctx);
5387
5388 |.code_op
5389 for (op = 0; op < BC__MAX; op++)
5390 build_ins(ctx, (BCOp)op, op);
5391
5392 return BC__MAX;
5393}
5394
5395/* Emit pseudo frame-info for all assembler functions. */
5396static void emit_asm_debug(BuildCtx *ctx)
5397{
5398 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
5399 int i;
5400 switch (ctx->mode) {
5401 case BUILD_elfasm:
5402 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
5403 fprintf(ctx->fp,
5404 ".Lframe0:\n"
5405 "\t.4byte .LECIE0-.LSCIE0\n"
5406 ".LSCIE0:\n"
5407 "\t.4byte 0xffffffff\n"
5408 "\t.byte 0x1\n"
5409 "\t.string \"\"\n"
5410 "\t.uleb128 0x1\n"
5411 "\t.sleb128 -4\n"
5412 "\t.byte 31\n"
5413 "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n"
5414 "\t.align 2\n"
5415 ".LECIE0:\n\n");
5416 fprintf(ctx->fp,
5417 ".LSFDE0:\n"
5418 "\t.4byte .LEFDE0-.LASFDE0\n"
5419 ".LASFDE0:\n"
5420 "\t.4byte .Lframe0\n"
5421 "\t.8byte .Lbegin\n"
5422 "\t.8byte %d\n"
5423 "\t.byte 0xe\n\t.uleb128 %d\n"
5424 "\t.byte 0x9f\n\t.sleb128 2*5\n"
5425 "\t.byte 0x9e\n\t.sleb128 2*6\n",
5426 fcofs, CFRAME_SIZE);
5427 for (i = 23; i >= 16; i--)
5428 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(30-i));
5429#if !LJ_SOFTFP
5430 for (i = 31; i >= 24; i--)
5431 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(46-i));
5432#endif
5433 fprintf(ctx->fp,
5434 "\t.align 2\n"
5435 ".LEFDE0:\n\n");
5436#if LJ_HASFFI
5437 fprintf(ctx->fp,
5438 ".LSFDE1:\n"
5439 "\t.4byte .LEFDE1-.LASFDE1\n"
5440 ".LASFDE1:\n"
5441 "\t.4byte .Lframe0\n"
5442 "\t.4byte lj_vm_ffi_call\n"
5443 "\t.4byte %d\n"
5444 "\t.byte 0x9f\n\t.uleb128 2*1\n"
5445 "\t.byte 0x90\n\t.uleb128 2*2\n"
5446 "\t.byte 0xd\n\t.uleb128 0x10\n"
5447 "\t.align 2\n"
5448 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
5449#endif
5450#if !LJ_NO_UNWIND
5451 /* NYI */
5452#endif
5453 break;
5454 default:
5455 break;
5456 }
5457}
5458
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index e2d62e00..6aa00c5b 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -1,4 +1,4 @@
1|// Low-level VM code for PowerPC CPUs. 1|// Low-level VM code for PowerPC 32 bit or 32on64 bit mode.
2|// Bytecode interpreter, fast functions and helper functions. 2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h 3|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4| 4|
@@ -18,7 +18,6 @@
18|// DynASM defines used by the PPC port: 18|// DynASM defines used by the PPC port:
19|// 19|//
20|// P64 64 bit pointers (only for GPR64 testing). 20|// P64 64 bit pointers (only for GPR64 testing).
21|// Note: a full PPC64 _LP64 port is not planned.
22|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). 21|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3).
23|// Affects reg saves, stack layout, carry/overflow/dot flags etc. 22|// Affects reg saves, stack layout, carry/overflow/dot flags etc.
24|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). 23|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360).
@@ -103,6 +102,18 @@
103|// Fixed register assignments for the interpreter. 102|// Fixed register assignments for the interpreter.
104|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) 103|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA)
105| 104|
105|.macro .FPU, a, b
106|.if FPU
107| a, b
108|.endif
109|.endmacro
110|
111|.macro .FPU, a, b, c
112|.if FPU
113| a, b, c
114|.endif
115|.endmacro
116|
106|// The following must be C callee-save (but BASE is often refetched). 117|// The following must be C callee-save (but BASE is often refetched).
107|.define BASE, r14 // Base of current Lua stack frame. 118|.define BASE, r14 // Base of current Lua stack frame.
108|.define KBASE, r15 // Constants of current Lua function. 119|.define KBASE, r15 // Constants of current Lua function.
@@ -116,8 +127,10 @@
116|.define TISNUM, r22 127|.define TISNUM, r22
117|.define TISNIL, r23 128|.define TISNIL, r23
118|.define ZERO, r24 129|.define ZERO, r24
130|.if FPU
119|.define TOBIT, f30 // 2^52 + 2^51. 131|.define TOBIT, f30 // 2^52 + 2^51.
120|.define TONUM, f31 // 2^52 + 2^51 + 2^31. 132|.define TONUM, f31 // 2^52 + 2^51 + 2^31.
133|.endif
121| 134|
122|// The following temporaries are not saved across C calls, except for RA. 135|// The following temporaries are not saved across C calls, except for RA.
123|.define RA, r20 // Callee-save. 136|.define RA, r20 // Callee-save.
@@ -133,6 +146,7 @@
133| 146|
134|// Saved temporaries. 147|// Saved temporaries.
135|.define SAVE0, r21 148|.define SAVE0, r21
149|.define SAVE1, r25
136| 150|
137|// Calling conventions. 151|// Calling conventions.
138|.define CARG1, r3 152|.define CARG1, r3
@@ -141,8 +155,10 @@
141|.define CARG4, r6 // Overlaps TMP3. 155|.define CARG4, r6 // Overlaps TMP3.
142|.define CARG5, r7 // Overlaps INS. 156|.define CARG5, r7 // Overlaps INS.
143| 157|
158|.if FPU
144|.define FARG1, f1 159|.define FARG1, f1
145|.define FARG2, f2 160|.define FARG2, f2
161|.endif
146| 162|
147|.define CRET1, r3 163|.define CRET1, r3
148|.define CRET2, r4 164|.define CRET2, r4
@@ -213,10 +229,16 @@
213|.endif 229|.endif
214|.else 230|.else
215| 231|
232|.if FPU
216|.define SAVE_LR, 276(sp) 233|.define SAVE_LR, 276(sp)
217|.define CFRAME_SPACE, 272 // Delta for sp. 234|.define CFRAME_SPACE, 272 // Delta for sp.
218|// Back chain for sp: 272(sp) <-- sp entering interpreter 235|// Back chain for sp: 272(sp) <-- sp entering interpreter
219|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. 236|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves.
237|.else
238|.define SAVE_LR, 132(sp)
239|.define CFRAME_SPACE, 128 // Delta for sp.
240|// Back chain for sp: 128(sp) <-- sp entering interpreter
241|.endif
220|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. 242|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves.
221|.define SAVE_CR, 52(sp) // 32 bit CR save. 243|.define SAVE_CR, 52(sp) // 32 bit CR save.
222|.define SAVE_ERRF, 48(sp) // 32 bit C frame info. 244|.define SAVE_ERRF, 48(sp) // 32 bit C frame info.
@@ -226,16 +248,25 @@
226|.define SAVE_PC, 32(sp) 248|.define SAVE_PC, 32(sp)
227|.define SAVE_MULTRES, 28(sp) 249|.define SAVE_MULTRES, 28(sp)
228|.define UNUSED1, 24(sp) 250|.define UNUSED1, 24(sp)
251|.if FPU
229|.define TMPD_LO, 20(sp) 252|.define TMPD_LO, 20(sp)
230|.define TMPD_HI, 16(sp) 253|.define TMPD_HI, 16(sp)
231|.define TONUM_LO, 12(sp) 254|.define TONUM_LO, 12(sp)
232|.define TONUM_HI, 8(sp) 255|.define TONUM_HI, 8(sp)
256|.else
257|.define SFSAVE_4, 20(sp)
258|.define SFSAVE_3, 16(sp)
259|.define SFSAVE_2, 12(sp)
260|.define SFSAVE_1, 8(sp)
261|.endif
233|// Next frame lr: 4(sp) 262|// Next frame lr: 4(sp)
234|// Back chain for sp: 0(sp) <-- sp while in interpreter 263|// Back chain for sp: 0(sp) <-- sp while in interpreter
235| 264|
265|.if FPU
236|.define TMPD_BLO, 23(sp) 266|.define TMPD_BLO, 23(sp)
237|.define TMPD, TMPD_HI 267|.define TMPD, TMPD_HI
238|.define TONUM_D, TONUM_HI 268|.define TONUM_D, TONUM_HI
269|.endif
239| 270|
240|.endif 271|.endif
241| 272|
@@ -245,7 +276,7 @@
245|.else 276|.else
246| stw r..reg, SAVE_GPR_+(reg-14)*4(sp) 277| stw r..reg, SAVE_GPR_+(reg-14)*4(sp)
247|.endif 278|.endif
248| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 279| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
249|.endmacro 280|.endmacro
250|.macro rest_, reg 281|.macro rest_, reg
251|.if GPR64 282|.if GPR64
@@ -253,7 +284,7 @@
253|.else 284|.else
254| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) 285| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp)
255|.endif 286|.endif
256| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 287| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
257|.endmacro 288|.endmacro
258| 289|
259|.macro saveregs 290|.macro saveregs
@@ -316,19 +347,14 @@
316|.type NODE, Node 347|.type NODE, Node
317|.type NARGS8, int 348|.type NARGS8, int
318|.type TRACE, GCtrace 349|.type TRACE, GCtrace
350|.type SBUF, SBuf
319| 351|
320|//----------------------------------------------------------------------- 352|//-----------------------------------------------------------------------
321| 353|
322|// These basic macros should really be part of DynASM.
323|.macro srwi, rx, ry, n; rlwinm rx, ry, 32-n, n, 31; .endmacro
324|.macro slwi, rx, ry, n; rlwinm rx, ry, n, 0, 31-n; .endmacro
325|.macro rotlwi, rx, ry, n; rlwinm rx, ry, n, 0, 31; .endmacro
326|.macro rotlw, rx, ry, rn; rlwnm rx, ry, rn, 0, 31; .endmacro
327|.macro subi, rx, ry, i; addi rx, ry, -i; .endmacro
328|
329|// Trap for not-yet-implemented parts. 354|// Trap for not-yet-implemented parts.
330|.macro NYI; tw 4, sp, sp; .endmacro 355|.macro NYI; tw 4, sp, sp; .endmacro
331| 356|
357|.if FPU
332|// int/FP conversions. 358|// int/FP conversions.
333|.macro tonum_i, freg, reg 359|.macro tonum_i, freg, reg
334| xoris reg, reg, 0x8000 360| xoris reg, reg, 0x8000
@@ -352,6 +378,7 @@
352|.macro toint, reg, freg 378|.macro toint, reg, freg
353| toint reg, freg, freg 379| toint reg, freg, freg
354|.endmacro 380|.endmacro
381|.endif
355| 382|
356|//----------------------------------------------------------------------- 383|//-----------------------------------------------------------------------
357| 384|
@@ -539,9 +566,19 @@ static void build_subroutines(BuildCtx *ctx)
539 | beq >2 566 | beq >2
540 |1: 567 |1:
541 | addic. TMP1, TMP1, -8 568 | addic. TMP1, TMP1, -8
569 |.if FPU
542 | lfd f0, 0(RA) 570 | lfd f0, 0(RA)
571 |.else
572 | lwz CARG1, 0(RA)
573 | lwz CARG2, 4(RA)
574 |.endif
543 | addi RA, RA, 8 575 | addi RA, RA, 8
576 |.if FPU
544 | stfd f0, 0(BASE) 577 | stfd f0, 0(BASE)
578 |.else
579 | stw CARG1, 0(BASE)
580 | stw CARG2, 4(BASE)
581 |.endif
545 | addi BASE, BASE, 8 582 | addi BASE, BASE, 8
546 | bney <1 583 | bney <1
547 | 584 |
@@ -619,23 +656,23 @@ static void build_subroutines(BuildCtx *ctx)
619 | .toc ld TOCREG, SAVE_TOC 656 | .toc ld TOCREG, SAVE_TOC
620 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 657 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
621 | lp BASE, L->base 658 | lp BASE, L->base
622 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 659 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
623 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 660 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
624 | li ZERO, 0 661 | li ZERO, 0
625 | stw TMP3, TMPD 662 | .FPU stw TMP3, TMPD
626 | li TMP1, LJ_TFALSE 663 | li TMP1, LJ_TFALSE
627 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 664 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
628 | li TISNIL, LJ_TNIL 665 | li TISNIL, LJ_TNIL
629 | li_vmstate INTERP 666 | li_vmstate INTERP
630 | lfs TOBIT, TMPD 667 | .FPU lfs TOBIT, TMPD
631 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. 668 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
632 | la RA, -8(BASE) // Results start at BASE-8. 669 | la RA, -8(BASE) // Results start at BASE-8.
633 | stw TMP3, TMPD 670 | .FPU stw TMP3, TMPD
634 | addi DISPATCH, DISPATCH, GG_G2DISP 671 | addi DISPATCH, DISPATCH, GG_G2DISP
635 | stw TMP1, 0(RA) // Prepend false to error message. 672 | stw TMP1, 0(RA) // Prepend false to error message.
636 | li RD, 16 // 2 results: false + error message. 673 | li RD, 16 // 2 results: false + error message.
637 | st_vmstate 674 | st_vmstate
638 | lfs TONUM, TMPD 675 | .FPU lfs TONUM, TMPD
639 | b ->vm_returnc 676 | b ->vm_returnc
640 | 677 |
641 |//----------------------------------------------------------------------- 678 |//-----------------------------------------------------------------------
@@ -684,33 +721,34 @@ static void build_subroutines(BuildCtx *ctx)
684 | stw CARG3, SAVE_NRES 721 | stw CARG3, SAVE_NRES
685 | cmplwi TMP1, 0 722 | cmplwi TMP1, 0
686 | stw CARG3, SAVE_ERRF 723 | stw CARG3, SAVE_ERRF
687 | stp TMP0, L->cframe
688 | stp CARG3, SAVE_CFRAME 724 | stp CARG3, SAVE_CFRAME
689 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 725 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
726 | stp TMP0, L->cframe
690 | beq >3 727 | beq >3
691 | 728 |
692 | // Resume after yield (like a return). 729 | // Resume after yield (like a return).
730 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
693 | mr RA, BASE 731 | mr RA, BASE
694 | lp BASE, L->base 732 | lp BASE, L->base
695 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 733 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
696 | lp TMP1, L->top 734 | lp TMP1, L->top
697 | lwz PC, FRAME_PC(BASE) 735 | lwz PC, FRAME_PC(BASE)
698 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 736 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
699 | stb CARG3, L->status 737 | stb CARG3, L->status
700 | stw TMP3, TMPD 738 | .FPU stw TMP3, TMPD
701 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 739 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
702 | lfs TOBIT, TMPD 740 | .FPU lfs TOBIT, TMPD
703 | sub RD, TMP1, BASE 741 | sub RD, TMP1, BASE
704 | stw TMP3, TMPD 742 | .FPU stw TMP3, TMPD
705 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 743 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
706 | addi RD, RD, 8 744 | addi RD, RD, 8
707 | stw TMP0, TONUM_HI 745 | .FPU stw TMP0, TONUM_HI
708 | li_vmstate INTERP 746 | li_vmstate INTERP
709 | li ZERO, 0 747 | li ZERO, 0
710 | st_vmstate 748 | st_vmstate
711 | andix. TMP0, PC, FRAME_TYPE 749 | andix. TMP0, PC, FRAME_TYPE
712 | mr MULTRES, RD 750 | mr MULTRES, RD
713 | lfs TONUM, TMPD 751 | .FPU lfs TONUM, TMPD
714 | li TISNIL, LJ_TNIL 752 | li TISNIL, LJ_TNIL
715 | beq ->BC_RET_Z 753 | beq ->BC_RET_Z
716 | b ->vm_return 754 | b ->vm_return
@@ -729,33 +767,34 @@ static void build_subroutines(BuildCtx *ctx)
729 | 767 |
730 |1: // Entry point for vm_pcall above (PC = ftype). 768 |1: // Entry point for vm_pcall above (PC = ftype).
731 | lp TMP1, L:CARG1->cframe 769 | lp TMP1, L:CARG1->cframe
732 | stw CARG3, SAVE_NRES
733 | mr L, CARG1 770 | mr L, CARG1
734 | stw CARG1, SAVE_L 771 | stw CARG3, SAVE_NRES
735 | mr BASE, CARG2
736 | stp sp, L->cframe // Add our C frame to cframe chain.
737 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 772 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
773 | stw CARG1, SAVE_L
774 | mr BASE, CARG2
775 | addi DISPATCH, DISPATCH, GG_G2DISP
738 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 776 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
739 | stp TMP1, SAVE_CFRAME 777 | stp TMP1, SAVE_CFRAME
740 | addi DISPATCH, DISPATCH, GG_G2DISP 778 | stp sp, L->cframe // Add our C frame to cframe chain.
741 | 779 |
742 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 780 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
781 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
743 | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). 782 | lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
744 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 783 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
745 | lp TMP1, L->top 784 | lp TMP1, L->top
746 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 785 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
747 | add PC, PC, BASE 786 | add PC, PC, BASE
748 | stw TMP3, TMPD 787 | .FPU stw TMP3, TMPD
749 | li ZERO, 0 788 | li ZERO, 0
750 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 789 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
751 | lfs TOBIT, TMPD 790 | .FPU lfs TOBIT, TMPD
752 | sub PC, PC, TMP2 // PC = frame delta + frame type 791 | sub PC, PC, TMP2 // PC = frame delta + frame type
753 | stw TMP3, TMPD 792 | .FPU stw TMP3, TMPD
754 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 793 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
755 | sub NARGS8:RC, TMP1, BASE 794 | sub NARGS8:RC, TMP1, BASE
756 | stw TMP0, TONUM_HI 795 | .FPU stw TMP0, TONUM_HI
757 | li_vmstate INTERP 796 | li_vmstate INTERP
758 | lfs TONUM, TMPD 797 | .FPU lfs TONUM, TMPD
759 | li TISNIL, LJ_TNIL 798 | li TISNIL, LJ_TNIL
760 | st_vmstate 799 | st_vmstate
761 | 800 |
@@ -776,15 +815,18 @@ static void build_subroutines(BuildCtx *ctx)
776 | lwz TMP0, L:CARG1->stack 815 | lwz TMP0, L:CARG1->stack
777 | stw CARG1, SAVE_L 816 | stw CARG1, SAVE_L
778 | lp TMP1, L->top 817 | lp TMP1, L->top
818 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
779 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 819 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
780 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). 820 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
781 | lp TMP1, L->cframe 821 | lp TMP1, L->cframe
782 | stp sp, L->cframe // Add our C frame to cframe chain. 822 | addi DISPATCH, DISPATCH, GG_G2DISP
783 | .toc lp CARG4, 0(CARG4) 823 | .toc lp CARG4, 0(CARG4)
784 | li TMP2, 0 824 | li TMP2, 0
785 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. 825 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
786 | stw TMP2, SAVE_ERRF // No error function. 826 | stw TMP2, SAVE_ERRF // No error function.
787 | stp TMP1, SAVE_CFRAME 827 | stp TMP1, SAVE_CFRAME
828 | stp sp, L->cframe // Add our C frame to cframe chain.
829 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
788 | mtctr CARG4 830 | mtctr CARG4
789 | bctrl // (lua_State *L, lua_CFunction func, void *ud) 831 | bctrl // (lua_State *L, lua_CFunction func, void *ud)
790 |.if PPE 832 |.if PPE
@@ -793,9 +835,7 @@ static void build_subroutines(BuildCtx *ctx)
793 |.else 835 |.else
794 | mr. BASE, CRET1 836 | mr. BASE, CRET1
795 |.endif 837 |.endif
796 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 838 | li PC, FRAME_CP
797 | li PC, FRAME_CP
798 | addi DISPATCH, DISPATCH, GG_G2DISP
799 | bne <3 // Else continue with the call. 839 | bne <3 // Else continue with the call.
800 | b ->vm_leave_cp // No base? Just remove C frame. 840 | b ->vm_leave_cp // No base? Just remove C frame.
801 | 841 |
@@ -842,15 +882,30 @@ static void build_subroutines(BuildCtx *ctx)
842 | lwz INS, -4(PC) 882 | lwz INS, -4(PC)
843 | subi CARG2, RB, 16 883 | subi CARG2, RB, 16
844 | decode_RB8 SAVE0, INS 884 | decode_RB8 SAVE0, INS
885 |.if FPU
845 | lfd f0, 0(RA) 886 | lfd f0, 0(RA)
887 |.else
888 | lwz TMP2, 0(RA)
889 | lwz TMP3, 4(RA)
890 |.endif
846 | add TMP1, BASE, SAVE0 891 | add TMP1, BASE, SAVE0
847 | stp BASE, L->base 892 | stp BASE, L->base
848 | cmplw TMP1, CARG2 893 | cmplw TMP1, CARG2
849 | sub CARG3, CARG2, TMP1 894 | sub CARG3, CARG2, TMP1
850 | decode_RA8 RA, INS 895 | decode_RA8 RA, INS
896 |.if FPU
851 | stfd f0, 0(CARG2) 897 | stfd f0, 0(CARG2)
898 |.else
899 | stw TMP2, 0(CARG2)
900 | stw TMP3, 4(CARG2)
901 |.endif
852 | bney ->BC_CAT_Z 902 | bney ->BC_CAT_Z
903 |.if FPU
853 | stfdx f0, BASE, RA 904 | stfdx f0, BASE, RA
905 |.else
906 | stwux TMP2, RA, BASE
907 | stw TMP3, 4(RA)
908 |.endif
854 | b ->cont_nop 909 | b ->cont_nop
855 | 910 |
856 |//-- Table indexing metamethods ----------------------------------------- 911 |//-- Table indexing metamethods -----------------------------------------
@@ -903,9 +958,19 @@ static void build_subroutines(BuildCtx *ctx)
903 | // Returns TValue * (finished) or NULL (metamethod). 958 | // Returns TValue * (finished) or NULL (metamethod).
904 | cmplwi CRET1, 0 959 | cmplwi CRET1, 0
905 | beq >3 960 | beq >3
961 |.if FPU
906 | lfd f0, 0(CRET1) 962 | lfd f0, 0(CRET1)
963 |.else
964 | lwz TMP0, 0(CRET1)
965 | lwz TMP1, 4(CRET1)
966 |.endif
907 | ins_next1 967 | ins_next1
968 |.if FPU
908 | stfdx f0, BASE, RA 969 | stfdx f0, BASE, RA
970 |.else
971 | stwux TMP0, RA, BASE
972 | stw TMP1, 4(RA)
973 |.endif
909 | ins_next2 974 | ins_next2
910 | 975 |
911 |3: // Call __index metamethod. 976 |3: // Call __index metamethod.
@@ -918,6 +983,22 @@ static void build_subroutines(BuildCtx *ctx)
918 | li NARGS8:RC, 16 // 2 args for func(t, k). 983 | li NARGS8:RC, 16 // 2 args for func(t, k).
919 | b ->vm_call_dispatch_f 984 | b ->vm_call_dispatch_f
920 | 985 |
986 |->vmeta_tgetr:
987 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
988 | // Returns cTValue * or NULL.
989 | cmplwi CRET1, 0
990 | beq >1
991 |.if FPU
992 | lfd f14, 0(CRET1)
993 |.else
994 | lwz SAVE0, 0(CRET1)
995 | lwz SAVE1, 4(CRET1)
996 |.endif
997 | b ->BC_TGETR_Z
998 |1:
999 | stwx TISNIL, BASE, RA
1000 | b ->cont_nop
1001 |
921 |//----------------------------------------------------------------------- 1002 |//-----------------------------------------------------------------------
922 | 1003 |
923 |->vmeta_tsets1: 1004 |->vmeta_tsets1:
@@ -967,11 +1048,21 @@ static void build_subroutines(BuildCtx *ctx)
967 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 1048 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
968 | // Returns TValue * (finished) or NULL (metamethod). 1049 | // Returns TValue * (finished) or NULL (metamethod).
969 | cmplwi CRET1, 0 1050 | cmplwi CRET1, 0
1051 |.if FPU
970 | lfdx f0, BASE, RA 1052 | lfdx f0, BASE, RA
1053 |.else
1054 | lwzux TMP2, RA, BASE
1055 | lwz TMP3, 4(RA)
1056 |.endif
971 | beq >3 1057 | beq >3
972 | // NOBARRIER: lj_meta_tset ensures the table is not black. 1058 | // NOBARRIER: lj_meta_tset ensures the table is not black.
973 | ins_next1 1059 | ins_next1
1060 |.if FPU
974 | stfd f0, 0(CRET1) 1061 | stfd f0, 0(CRET1)
1062 |.else
1063 | stw TMP2, 0(CRET1)
1064 | stw TMP3, 4(CRET1)
1065 |.endif
975 | ins_next2 1066 | ins_next2
976 | 1067 |
977 |3: // Call __newindex metamethod. 1068 |3: // Call __newindex metamethod.
@@ -982,9 +1073,28 @@ static void build_subroutines(BuildCtx *ctx)
982 | add PC, TMP1, BASE 1073 | add PC, TMP1, BASE
983 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 1074 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
984 | li NARGS8:RC, 24 // 3 args for func(t, k, v) 1075 | li NARGS8:RC, 24 // 3 args for func(t, k, v)
1076 |.if FPU
985 | stfd f0, 16(BASE) // Copy value to third argument. 1077 | stfd f0, 16(BASE) // Copy value to third argument.
1078 |.else
1079 | stw TMP2, 16(BASE)
1080 | stw TMP3, 20(BASE)
1081 |.endif
986 | b ->vm_call_dispatch_f 1082 | b ->vm_call_dispatch_f
987 | 1083 |
1084 |->vmeta_tsetr:
1085 | stp BASE, L->base
1086 | mr CARG1, L
1087 | stw PC, SAVE_PC
1088 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1089 | // Returns TValue *.
1090 |.if FPU
1091 | stfd f14, 0(CRET1)
1092 |.else
1093 | stw SAVE0, 0(CRET1)
1094 | stw SAVE1, 4(CRET1)
1095 |.endif
1096 | b ->cont_nop
1097 |
988 |//-- Comparison metamethods --------------------------------------------- 1098 |//-- Comparison metamethods ---------------------------------------------
989 | 1099 |
990 |->vmeta_comp: 1100 |->vmeta_comp:
@@ -1021,9 +1131,19 @@ static void build_subroutines(BuildCtx *ctx)
1021 | 1131 |
1022 |->cont_ra: // RA = resultptr 1132 |->cont_ra: // RA = resultptr
1023 | lwz INS, -4(PC) 1133 | lwz INS, -4(PC)
1134 |.if FPU
1024 | lfd f0, 0(RA) 1135 | lfd f0, 0(RA)
1136 |.else
1137 | lwz CARG1, 0(RA)
1138 | lwz CARG2, 4(RA)
1139 |.endif
1025 | decode_RA8 TMP1, INS 1140 | decode_RA8 TMP1, INS
1141 |.if FPU
1026 | stfdx f0, BASE, TMP1 1142 | stfdx f0, BASE, TMP1
1143 |.else
1144 | stwux CARG1, TMP1, BASE
1145 | stw CARG2, 4(TMP1)
1146 |.endif
1027 | b ->cont_nop 1147 | b ->cont_nop
1028 | 1148 |
1029 |->cont_condt: // RA = resultptr 1149 |->cont_condt: // RA = resultptr
@@ -1063,6 +1183,16 @@ static void build_subroutines(BuildCtx *ctx)
1063 | b <3 1183 | b <3
1064 |.endif 1184 |.endif
1065 | 1185 |
1186 |->vmeta_istype:
1187 | subi PC, PC, 4
1188 | stp BASE, L->base
1189 | srwi CARG2, RA, 3
1190 | mr CARG1, L
1191 | srwi CARG3, RD, 3
1192 | stw PC, SAVE_PC
1193 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1194 | b ->cont_nop
1195 |
1066 |//-- Arithmetic metamethods --------------------------------------------- 1196 |//-- Arithmetic metamethods ---------------------------------------------
1067 | 1197 |
1068 |->vmeta_arith_nv: 1198 |->vmeta_arith_nv:
@@ -1219,22 +1349,32 @@ static void build_subroutines(BuildCtx *ctx)
1219 |.macro .ffunc_n, name 1349 |.macro .ffunc_n, name
1220 |->ff_ .. name: 1350 |->ff_ .. name:
1221 | cmplwi NARGS8:RC, 8 1351 | cmplwi NARGS8:RC, 8
1222 | lwz CARG3, 0(BASE) 1352 | lwz CARG1, 0(BASE)
1353 |.if FPU
1223 | lfd FARG1, 0(BASE) 1354 | lfd FARG1, 0(BASE)
1355 |.else
1356 | lwz CARG2, 4(BASE)
1357 |.endif
1224 | blt ->fff_fallback 1358 | blt ->fff_fallback
1225 | checknum CARG3; bge ->fff_fallback 1359 | checknum CARG1; bge ->fff_fallback
1226 |.endmacro 1360 |.endmacro
1227 | 1361 |
1228 |.macro .ffunc_nn, name 1362 |.macro .ffunc_nn, name
1229 |->ff_ .. name: 1363 |->ff_ .. name:
1230 | cmplwi NARGS8:RC, 16 1364 | cmplwi NARGS8:RC, 16
1231 | lwz CARG3, 0(BASE) 1365 | lwz CARG1, 0(BASE)
1366 |.if FPU
1232 | lfd FARG1, 0(BASE) 1367 | lfd FARG1, 0(BASE)
1233 | lwz CARG4, 8(BASE) 1368 | lwz CARG3, 8(BASE)
1234 | lfd FARG2, 8(BASE) 1369 | lfd FARG2, 8(BASE)
1370 |.else
1371 | lwz CARG2, 4(BASE)
1372 | lwz CARG3, 8(BASE)
1373 | lwz CARG4, 12(BASE)
1374 |.endif
1235 | blt ->fff_fallback 1375 | blt ->fff_fallback
1376 | checknum CARG1; bge ->fff_fallback
1236 | checknum CARG3; bge ->fff_fallback 1377 | checknum CARG3; bge ->fff_fallback
1237 | checknum CARG4; bge ->fff_fallback
1238 |.endmacro 1378 |.endmacro
1239 | 1379 |
1240 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. 1380 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
@@ -1255,14 +1395,21 @@ static void build_subroutines(BuildCtx *ctx)
1255 | bge cr1, ->fff_fallback 1395 | bge cr1, ->fff_fallback
1256 | stw CARG3, 0(RA) 1396 | stw CARG3, 0(RA)
1257 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. 1397 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
1398 | addi TMP1, BASE, 8
1399 | add TMP2, RA, NARGS8:RC
1258 | stw CARG1, 4(RA) 1400 | stw CARG1, 4(RA)
1259 | beq ->fff_res // Done if exactly 1 argument. 1401 | beq ->fff_res // Done if exactly 1 argument.
1260 | li TMP1, 8
1261 | subi RC, RC, 8
1262 |1: 1402 |1:
1263 | cmplw TMP1, RC 1403 | cmplw TMP1, TMP2
1264 | lfdx f0, BASE, TMP1 1404 |.if FPU
1265 | stfdx f0, RA, TMP1 1405 | lfd f0, 0(TMP1)
1406 | stfd f0, 0(TMP1)
1407 |.else
1408 | lwz CARG1, 0(TMP1)
1409 | lwz CARG2, 4(TMP1)
1410 | stw CARG1, -8(TMP1)
1411 | stw CARG2, -4(TMP1)
1412 |.endif
1266 | addi TMP1, TMP1, 8 1413 | addi TMP1, TMP1, 8
1267 | bney <1 1414 | bney <1
1268 | b ->fff_res 1415 | b ->fff_res
@@ -1277,8 +1424,14 @@ static void build_subroutines(BuildCtx *ctx)
1277 | orc TMP1, TMP2, TMP0 1424 | orc TMP1, TMP2, TMP0
1278 | addi TMP1, TMP1, ~LJ_TISNUM+1 1425 | addi TMP1, TMP1, ~LJ_TISNUM+1
1279 | slwi TMP1, TMP1, 3 1426 | slwi TMP1, TMP1, 3
1427 |.if FPU
1280 | la TMP2, CFUNC:RB->upvalue 1428 | la TMP2, CFUNC:RB->upvalue
1281 | lfdx FARG1, TMP2, TMP1 1429 | lfdx FARG1, TMP2, TMP1
1430 |.else
1431 | add TMP1, CFUNC:RB, TMP1
1432 | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi
1433 | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo
1434 |.endif
1282 | b ->fff_resn 1435 | b ->fff_resn
1283 | 1436 |
1284 |//-- Base library: getters and setters --------------------------------- 1437 |//-- Base library: getters and setters ---------------------------------
@@ -1294,9 +1447,9 @@ static void build_subroutines(BuildCtx *ctx)
1294 | beq ->fff_restv 1447 | beq ->fff_restv
1295 | lwz TMP0, TAB:CARG1->hmask 1448 | lwz TMP0, TAB:CARG1->hmask
1296 | li CARG3, LJ_TTAB // Use metatable as default result. 1449 | li CARG3, LJ_TTAB // Use metatable as default result.
1297 | lwz TMP1, STR:RC->hash 1450 | lwz TMP1, STR:RC->sid
1298 | lwz NODE:TMP2, TAB:CARG1->node 1451 | lwz NODE:TMP2, TAB:CARG1->node
1299 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 1452 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
1300 | slwi TMP0, TMP1, 5 1453 | slwi TMP0, TMP1, 5
1301 | slwi TMP1, TMP1, 3 1454 | slwi TMP1, TMP1, 3
1302 | sub TMP1, TMP0, TMP1 1455 | sub TMP1, TMP0, TMP1
@@ -1356,7 +1509,12 @@ static void build_subroutines(BuildCtx *ctx)
1356 | mr CARG1, L 1509 | mr CARG1, L
1357 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1510 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1358 | // Returns cTValue *. 1511 | // Returns cTValue *.
1512 |.if FPU
1359 | lfd FARG1, 0(CRET1) 1513 | lfd FARG1, 0(CRET1)
1514 |.else
1515 | lwz CARG2, 4(CRET1)
1516 | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1.
1517 |.endif
1360 | b ->fff_resn 1518 | b ->fff_resn
1361 | 1519 |
1362 |//-- Base library: conversions ------------------------------------------ 1520 |//-- Base library: conversions ------------------------------------------
@@ -1365,7 +1523,11 @@ static void build_subroutines(BuildCtx *ctx)
1365 | // Only handles the number case inline (without a base argument). 1523 | // Only handles the number case inline (without a base argument).
1366 | cmplwi NARGS8:RC, 8 1524 | cmplwi NARGS8:RC, 8
1367 | lwz CARG1, 0(BASE) 1525 | lwz CARG1, 0(BASE)
1526 |.if FPU
1368 | lfd FARG1, 0(BASE) 1527 | lfd FARG1, 0(BASE)
1528 |.else
1529 | lwz CARG2, 4(BASE)
1530 |.endif
1369 | bne ->fff_fallback // Exactly one argument. 1531 | bne ->fff_fallback // Exactly one argument.
1370 | checknum CARG1; bgt ->fff_fallback 1532 | checknum CARG1; bgt ->fff_fallback
1371 | b ->fff_resn 1533 | b ->fff_resn
@@ -1387,9 +1549,9 @@ static void build_subroutines(BuildCtx *ctx)
1387 | mr CARG1, L 1549 | mr CARG1, L
1388 | mr CARG2, BASE 1550 | mr CARG2, BASE
1389 |.if DUALNUM 1551 |.if DUALNUM
1390 | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) 1552 | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
1391 |.else 1553 |.else
1392 | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np) 1554 | bl extern lj_strfmt_num // (lua_State *L, lua_Number *np)
1393 |.endif 1555 |.endif
1394 | // Returns GCstr *. 1556 | // Returns GCstr *.
1395 | li CARG3, LJ_TSTR 1557 | li CARG3, LJ_TSTR
@@ -1416,12 +1578,23 @@ static void build_subroutines(BuildCtx *ctx)
1416 | cmplwi CRET1, 0 1578 | cmplwi CRET1, 0
1417 | li CARG3, LJ_TNIL 1579 | li CARG3, LJ_TNIL
1418 | beq ->fff_restv // End of traversal: return nil. 1580 | beq ->fff_restv // End of traversal: return nil.
1419 | lfd f0, 8(BASE) // Copy key and value to results.
1420 | la RA, -8(BASE) 1581 | la RA, -8(BASE)
1582 |.if FPU
1583 | lfd f0, 8(BASE) // Copy key and value to results.
1421 | lfd f1, 16(BASE) 1584 | lfd f1, 16(BASE)
1422 | stfd f0, 0(RA) 1585 | stfd f0, 0(RA)
1423 | li RD, (2+1)*8
1424 | stfd f1, 8(RA) 1586 | stfd f1, 8(RA)
1587 |.else
1588 | lwz CARG1, 8(BASE)
1589 | lwz CARG2, 12(BASE)
1590 | lwz CARG3, 16(BASE)
1591 | lwz CARG4, 20(BASE)
1592 | stw CARG1, 0(RA)
1593 | stw CARG2, 4(RA)
1594 | stw CARG3, 8(RA)
1595 | stw CARG4, 12(RA)
1596 |.endif
1597 | li RD, (2+1)*8
1425 | b ->fff_res 1598 | b ->fff_res
1426 | 1599 |
1427 |.ffunc_1 pairs 1600 |.ffunc_1 pairs
@@ -1430,17 +1603,32 @@ static void build_subroutines(BuildCtx *ctx)
1430 | bne ->fff_fallback 1603 | bne ->fff_fallback
1431#if LJ_52 1604#if LJ_52
1432 | lwz TAB:TMP2, TAB:CARG1->metatable 1605 | lwz TAB:TMP2, TAB:CARG1->metatable
1606 |.if FPU
1433 | lfd f0, CFUNC:RB->upvalue[0] 1607 | lfd f0, CFUNC:RB->upvalue[0]
1608 |.else
1609 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1610 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1611 |.endif
1434 | cmplwi TAB:TMP2, 0 1612 | cmplwi TAB:TMP2, 0
1435 | la RA, -8(BASE) 1613 | la RA, -8(BASE)
1436 | bne ->fff_fallback 1614 | bne ->fff_fallback
1437#else 1615#else
1616 |.if FPU
1438 | lfd f0, CFUNC:RB->upvalue[0] 1617 | lfd f0, CFUNC:RB->upvalue[0]
1618 |.else
1619 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1620 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1621 |.endif
1439 | la RA, -8(BASE) 1622 | la RA, -8(BASE)
1440#endif 1623#endif
1441 | stw TISNIL, 8(BASE) 1624 | stw TISNIL, 8(BASE)
1442 | li RD, (3+1)*8 1625 | li RD, (3+1)*8
1626 |.if FPU
1443 | stfd f0, 0(RA) 1627 | stfd f0, 0(RA)
1628 |.else
1629 | stw TMP0, 0(RA)
1630 | stw TMP1, 4(RA)
1631 |.endif
1444 | b ->fff_res 1632 | b ->fff_res
1445 | 1633 |
1446 |.ffunc ipairs_aux 1634 |.ffunc ipairs_aux
@@ -1486,14 +1674,24 @@ static void build_subroutines(BuildCtx *ctx)
1486 | stfd FARG2, 0(RA) 1674 | stfd FARG2, 0(RA)
1487 |.endif 1675 |.endif
1488 | ble >2 // Not in array part? 1676 | ble >2 // Not in array part?
1677 |.if FPU
1489 | lwzx TMP2, TMP1, TMP3 1678 | lwzx TMP2, TMP1, TMP3
1490 | lfdx f0, TMP1, TMP3 1679 | lfdx f0, TMP1, TMP3
1680 |.else
1681 | lwzux TMP2, TMP1, TMP3
1682 | lwz TMP3, 4(TMP1)
1683 |.endif
1491 |1: 1684 |1:
1492 | checknil TMP2 1685 | checknil TMP2
1493 | li RD, (0+1)*8 1686 | li RD, (0+1)*8
1494 | beq ->fff_res // End of iteration, return 0 results. 1687 | beq ->fff_res // End of iteration, return 0 results.
1495 | li RD, (2+1)*8 1688 | li RD, (2+1)*8
1689 |.if FPU
1496 | stfd f0, 8(RA) 1690 | stfd f0, 8(RA)
1691 |.else
1692 | stw TMP2, 8(RA)
1693 | stw TMP3, 12(RA)
1694 |.endif
1497 | b ->fff_res 1695 | b ->fff_res
1498 |2: // Check for empty hash part first. Otherwise call C function. 1696 |2: // Check for empty hash part first. Otherwise call C function.
1499 | lwz TMP0, TAB:CARG1->hmask 1697 | lwz TMP0, TAB:CARG1->hmask
@@ -1507,7 +1705,11 @@ static void build_subroutines(BuildCtx *ctx)
1507 | li RD, (0+1)*8 1705 | li RD, (0+1)*8
1508 | beq ->fff_res 1706 | beq ->fff_res
1509 | lwz TMP2, 0(CRET1) 1707 | lwz TMP2, 0(CRET1)
1708 |.if FPU
1510 | lfd f0, 0(CRET1) 1709 | lfd f0, 0(CRET1)
1710 |.else
1711 | lwz TMP3, 4(CRET1)
1712 |.endif
1511 | b <1 1713 | b <1
1512 | 1714 |
1513 |.ffunc_1 ipairs 1715 |.ffunc_1 ipairs
@@ -1516,12 +1718,22 @@ static void build_subroutines(BuildCtx *ctx)
1516 | bne ->fff_fallback 1718 | bne ->fff_fallback
1517#if LJ_52 1719#if LJ_52
1518 | lwz TAB:TMP2, TAB:CARG1->metatable 1720 | lwz TAB:TMP2, TAB:CARG1->metatable
1721 |.if FPU
1519 | lfd f0, CFUNC:RB->upvalue[0] 1722 | lfd f0, CFUNC:RB->upvalue[0]
1723 |.else
1724 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1725 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1726 |.endif
1520 | cmplwi TAB:TMP2, 0 1727 | cmplwi TAB:TMP2, 0
1521 | la RA, -8(BASE) 1728 | la RA, -8(BASE)
1522 | bne ->fff_fallback 1729 | bne ->fff_fallback
1523#else 1730#else
1731 |.if FPU
1524 | lfd f0, CFUNC:RB->upvalue[0] 1732 | lfd f0, CFUNC:RB->upvalue[0]
1733 |.else
1734 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1735 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1736 |.endif
1525 | la RA, -8(BASE) 1737 | la RA, -8(BASE)
1526#endif 1738#endif
1527 |.if DUALNUM 1739 |.if DUALNUM
@@ -1531,7 +1743,12 @@ static void build_subroutines(BuildCtx *ctx)
1531 |.endif 1743 |.endif
1532 | stw ZERO, 12(BASE) 1744 | stw ZERO, 12(BASE)
1533 | li RD, (3+1)*8 1745 | li RD, (3+1)*8
1746 |.if FPU
1534 | stfd f0, 0(RA) 1747 | stfd f0, 0(RA)
1748 |.else
1749 | stw TMP0, 0(RA)
1750 | stw TMP1, 4(RA)
1751 |.endif
1535 | b ->fff_res 1752 | b ->fff_res
1536 | 1753 |
1537 |//-- Base library: catch errors ---------------------------------------- 1754 |//-- Base library: catch errors ----------------------------------------
@@ -1550,19 +1767,32 @@ static void build_subroutines(BuildCtx *ctx)
1550 | 1767 |
1551 |.ffunc xpcall 1768 |.ffunc xpcall
1552 | cmplwi NARGS8:RC, 16 1769 | cmplwi NARGS8:RC, 16
1553 | lwz CARG4, 8(BASE) 1770 | lwz CARG3, 8(BASE)
1771 |.if FPU
1554 | lfd FARG2, 8(BASE) 1772 | lfd FARG2, 8(BASE)
1555 | lfd FARG1, 0(BASE) 1773 | lfd FARG1, 0(BASE)
1774 |.else
1775 | lwz CARG1, 0(BASE)
1776 | lwz CARG2, 4(BASE)
1777 | lwz CARG4, 12(BASE)
1778 |.endif
1556 | blt ->fff_fallback 1779 | blt ->fff_fallback
1557 | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) 1780 | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH)
1558 | mr TMP2, BASE 1781 | mr TMP2, BASE
1559 | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. 1782 | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function.
1560 | la BASE, 16(BASE) 1783 | la BASE, 16(BASE)
1561 | // Remember active hook before pcall. 1784 | // Remember active hook before pcall.
1562 | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 1785 | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31
1786 |.if FPU
1563 | stfd FARG2, 0(TMP2) // Swap function and traceback. 1787 | stfd FARG2, 0(TMP2) // Swap function and traceback.
1564 | subi NARGS8:RC, NARGS8:RC, 16
1565 | stfd FARG1, 8(TMP2) 1788 | stfd FARG1, 8(TMP2)
1789 |.else
1790 | stw CARG3, 0(TMP2)
1791 | stw CARG4, 4(TMP2)
1792 | stw CARG1, 8(TMP2)
1793 | stw CARG2, 12(TMP2)
1794 |.endif
1795 | subi NARGS8:RC, NARGS8:RC, 16
1566 | addi PC, TMP1, 16+FRAME_PCALL 1796 | addi PC, TMP1, 16+FRAME_PCALL
1567 | b ->vm_call_dispatch 1797 | b ->vm_call_dispatch
1568 | 1798 |
@@ -1605,9 +1835,21 @@ static void build_subroutines(BuildCtx *ctx)
1605 | stp BASE, L->top 1835 | stp BASE, L->top
1606 |2: // Move args to coroutine. 1836 |2: // Move args to coroutine.
1607 | cmpw TMP1, NARGS8:RC 1837 | cmpw TMP1, NARGS8:RC
1838 |.if FPU
1608 | lfdx f0, BASE, TMP1 1839 | lfdx f0, BASE, TMP1
1840 |.else
1841 | add CARG3, BASE, TMP1
1842 | lwz TMP2, 0(CARG3)
1843 | lwz TMP3, 4(CARG3)
1844 |.endif
1609 | beq >3 1845 | beq >3
1846 |.if FPU
1610 | stfdx f0, CARG2, TMP1 1847 | stfdx f0, CARG2, TMP1
1848 |.else
1849 | add CARG3, CARG2, TMP1
1850 | stw TMP2, 0(CARG3)
1851 | stw TMP3, 4(CARG3)
1852 |.endif
1611 | addi TMP1, TMP1, 8 1853 | addi TMP1, TMP1, 8
1612 | b <2 1854 | b <2
1613 |3: 1855 |3:
@@ -1622,6 +1864,7 @@ static void build_subroutines(BuildCtx *ctx)
1622 | lp TMP3, L:SAVE0->top 1864 | lp TMP3, L:SAVE0->top
1623 | li_vmstate INTERP 1865 | li_vmstate INTERP
1624 | lp BASE, L->base 1866 | lp BASE, L->base
1867 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
1625 | st_vmstate 1868 | st_vmstate
1626 | bgt >8 1869 | bgt >8
1627 | sub RD, TMP3, TMP2 1870 | sub RD, TMP3, TMP2
@@ -1637,8 +1880,17 @@ static void build_subroutines(BuildCtx *ctx)
1637 | stp TMP2, L:SAVE0->top // Clear coroutine stack. 1880 | stp TMP2, L:SAVE0->top // Clear coroutine stack.
1638 |5: // Move results from coroutine. 1881 |5: // Move results from coroutine.
1639 | cmplw TMP1, TMP3 1882 | cmplw TMP1, TMP3
1883 |.if FPU
1640 | lfdx f0, TMP2, TMP1 1884 | lfdx f0, TMP2, TMP1
1641 | stfdx f0, BASE, TMP1 1885 | stfdx f0, BASE, TMP1
1886 |.else
1887 | add CARG3, TMP2, TMP1
1888 | lwz CARG1, 0(CARG3)
1889 | lwz CARG2, 4(CARG3)
1890 | add CARG3, BASE, TMP1
1891 | stw CARG1, 0(CARG3)
1892 | stw CARG2, 4(CARG3)
1893 |.endif
1642 | addi TMP1, TMP1, 8 1894 | addi TMP1, TMP1, 8
1643 | bne <5 1895 | bne <5
1644 |6: 1896 |6:
@@ -1663,12 +1915,22 @@ static void build_subroutines(BuildCtx *ctx)
1663 | andix. TMP0, PC, FRAME_TYPE 1915 | andix. TMP0, PC, FRAME_TYPE
1664 | la TMP3, -8(TMP3) 1916 | la TMP3, -8(TMP3)
1665 | li TMP1, LJ_TFALSE 1917 | li TMP1, LJ_TFALSE
1918 |.if FPU
1666 | lfd f0, 0(TMP3) 1919 | lfd f0, 0(TMP3)
1920 |.else
1921 | lwz CARG1, 0(TMP3)
1922 | lwz CARG2, 4(TMP3)
1923 |.endif
1667 | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. 1924 | stp TMP3, L:SAVE0->top // Remove error from coroutine stack.
1668 | li RD, (2+1)*8 1925 | li RD, (2+1)*8
1669 | stw TMP1, -8(BASE) // Prepend false to results. 1926 | stw TMP1, -8(BASE) // Prepend false to results.
1670 | la RA, -8(BASE) 1927 | la RA, -8(BASE)
1928 |.if FPU
1671 | stfd f0, 0(BASE) // Copy error message. 1929 | stfd f0, 0(BASE) // Copy error message.
1930 |.else
1931 | stw CARG1, 0(BASE) // Copy error message.
1932 | stw CARG2, 4(BASE)
1933 |.endif
1672 | b <7 1934 | b <7
1673 |.else 1935 |.else
1674 | mr CARG1, L 1936 | mr CARG1, L
@@ -1847,7 +2109,12 @@ static void build_subroutines(BuildCtx *ctx)
1847 | lus CARG1, 0x8000 // -(2^31). 2109 | lus CARG1, 0x8000 // -(2^31).
1848 | beqy ->fff_resi 2110 | beqy ->fff_resi
1849 |5: 2111 |5:
2112 |.if FPU
1850 | lfd FARG1, 0(BASE) 2113 | lfd FARG1, 0(BASE)
2114 |.else
2115 | lwz CARG1, 0(BASE)
2116 | lwz CARG2, 4(BASE)
2117 |.endif
1851 | blex func 2118 | blex func
1852 | b ->fff_resn 2119 | b ->fff_resn
1853 |.endmacro 2120 |.endmacro
@@ -1871,10 +2138,14 @@ static void build_subroutines(BuildCtx *ctx)
1871 | 2138 |
1872 |.ffunc math_log 2139 |.ffunc math_log
1873 | cmplwi NARGS8:RC, 8 2140 | cmplwi NARGS8:RC, 8
1874 | lwz CARG3, 0(BASE) 2141 | lwz CARG1, 0(BASE)
1875 | lfd FARG1, 0(BASE)
1876 | bne ->fff_fallback // Need exactly 1 argument. 2142 | bne ->fff_fallback // Need exactly 1 argument.
1877 | checknum CARG3; bge ->fff_fallback 2143 | checknum CARG1; bge ->fff_fallback
2144 |.if FPU
2145 | lfd FARG1, 0(BASE)
2146 |.else
2147 | lwz CARG2, 4(BASE)
2148 |.endif
1878 | blex log 2149 | blex log
1879 | b ->fff_resn 2150 | b ->fff_resn
1880 | 2151 |
@@ -1893,26 +2164,27 @@ static void build_subroutines(BuildCtx *ctx)
1893 | math_extern2 atan2 2164 | math_extern2 atan2
1894 | math_extern2 fmod 2165 | math_extern2 fmod
1895 | 2166 |
1896 |->ff_math_deg:
1897 |.ffunc_n math_rad
1898 | lfd FARG2, CFUNC:RB->upvalue[0]
1899 | fmul FARG1, FARG1, FARG2
1900 | b ->fff_resn
1901 |
1902 |.if DUALNUM 2167 |.if DUALNUM
1903 |.ffunc math_ldexp 2168 |.ffunc math_ldexp
1904 | cmplwi NARGS8:RC, 16 2169 | cmplwi NARGS8:RC, 16
1905 | lwz CARG3, 0(BASE) 2170 | lwz TMP0, 0(BASE)
2171 |.if FPU
1906 | lfd FARG1, 0(BASE) 2172 | lfd FARG1, 0(BASE)
1907 | lwz CARG4, 8(BASE) 2173 |.else
2174 | lwz CARG1, 0(BASE)
2175 | lwz CARG2, 4(BASE)
2176 |.endif
2177 | lwz TMP1, 8(BASE)
1908 |.if GPR64 2178 |.if GPR64
1909 | lwz CARG2, 12(BASE) 2179 | lwz CARG2, 12(BASE)
1910 |.else 2180 |.elif FPU
1911 | lwz CARG1, 12(BASE) 2181 | lwz CARG1, 12(BASE)
2182 |.else
2183 | lwz CARG3, 12(BASE)
1912 |.endif 2184 |.endif
1913 | blt ->fff_fallback 2185 | blt ->fff_fallback
1914 | checknum CARG3; bge ->fff_fallback 2186 | checknum TMP0; bge ->fff_fallback
1915 | checknum CARG4; bne ->fff_fallback 2187 | checknum TMP1; bne ->fff_fallback
1916 |.else 2188 |.else
1917 |.ffunc_nn math_ldexp 2189 |.ffunc_nn math_ldexp
1918 |.if GPR64 2190 |.if GPR64
@@ -1927,8 +2199,10 @@ static void build_subroutines(BuildCtx *ctx)
1927 |.ffunc_n math_frexp 2199 |.ffunc_n math_frexp
1928 |.if GPR64 2200 |.if GPR64
1929 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 2201 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
1930 |.else 2202 |.elif FPU
1931 | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) 2203 | la CARG1, DISPATCH_GL(tmptv)(DISPATCH)
2204 |.else
2205 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
1932 |.endif 2206 |.endif
1933 | lwz PC, FRAME_PC(BASE) 2207 | lwz PC, FRAME_PC(BASE)
1934 | blex frexp 2208 | blex frexp
@@ -1937,7 +2211,12 @@ static void build_subroutines(BuildCtx *ctx)
1937 |.if not DUALNUM 2211 |.if not DUALNUM
1938 | tonum_i FARG2, TMP1 2212 | tonum_i FARG2, TMP1
1939 |.endif 2213 |.endif
2214 |.if FPU
1940 | stfd FARG1, 0(RA) 2215 | stfd FARG1, 0(RA)
2216 |.else
2217 | stw CRET1, 0(RA)
2218 | stw CRET2, 4(RA)
2219 |.endif
1941 | li RD, (2+1)*8 2220 | li RD, (2+1)*8
1942 |.if DUALNUM 2221 |.if DUALNUM
1943 | stw TISNUM, 8(RA) 2222 | stw TISNUM, 8(RA)
@@ -1950,13 +2229,20 @@ static void build_subroutines(BuildCtx *ctx)
1950 |.ffunc_n math_modf 2229 |.ffunc_n math_modf
1951 |.if GPR64 2230 |.if GPR64
1952 | la CARG2, -8(BASE) 2231 | la CARG2, -8(BASE)
1953 |.else 2232 |.elif FPU
1954 | la CARG1, -8(BASE) 2233 | la CARG1, -8(BASE)
2234 |.else
2235 | la CARG3, -8(BASE)
1955 |.endif 2236 |.endif
1956 | lwz PC, FRAME_PC(BASE) 2237 | lwz PC, FRAME_PC(BASE)
1957 | blex modf 2238 | blex modf
1958 | la RA, -8(BASE) 2239 | la RA, -8(BASE)
2240 |.if FPU
1959 | stfd FARG1, 0(BASE) 2241 | stfd FARG1, 0(BASE)
2242 |.else
2243 | stw CRET1, 0(BASE)
2244 | stw CRET2, 4(BASE)
2245 |.endif
1960 | li RD, (2+1)*8 2246 | li RD, (2+1)*8
1961 | b ->fff_res 2247 | b ->fff_res
1962 | 2248 |
@@ -1964,13 +2250,13 @@ static void build_subroutines(BuildCtx *ctx)
1964 |.if DUALNUM 2250 |.if DUALNUM
1965 | .ffunc_1 name 2251 | .ffunc_1 name
1966 | checknum CARG3 2252 | checknum CARG3
1967 | addi TMP1, BASE, 8 2253 | addi SAVE0, BASE, 8
1968 | add TMP2, BASE, NARGS8:RC 2254 | add SAVE1, BASE, NARGS8:RC
1969 | bne >4 2255 | bne >4
1970 |1: // Handle integers. 2256 |1: // Handle integers.
1971 | lwz CARG4, 0(TMP1) 2257 | lwz CARG4, 0(SAVE0)
1972 | cmplw cr1, TMP1, TMP2 2258 | cmplw cr1, SAVE0, SAVE1
1973 | lwz CARG2, 4(TMP1) 2259 | lwz CARG2, 4(SAVE0)
1974 | bge cr1, ->fff_resi 2260 | bge cr1, ->fff_resi
1975 | checknum CARG4 2261 | checknum CARG4
1976 | xoris TMP0, CARG1, 0x8000 2262 | xoris TMP0, CARG1, 0x8000
@@ -1987,36 +2273,76 @@ static void build_subroutines(BuildCtx *ctx)
1987 |.if GPR64 2273 |.if GPR64
1988 | rldicl CARG1, CARG1, 0, 32 2274 | rldicl CARG1, CARG1, 0, 32
1989 |.endif 2275 |.endif
1990 | addi TMP1, TMP1, 8 2276 | addi SAVE0, SAVE0, 8
1991 | b <1 2277 | b <1
1992 |3: 2278 |3:
1993 | bge ->fff_fallback 2279 | bge ->fff_fallback
1994 | // Convert intermediate result to number and continue below. 2280 | // Convert intermediate result to number and continue below.
2281 |.if FPU
1995 | tonum_i FARG1, CARG1 2282 | tonum_i FARG1, CARG1
1996 | lfd FARG2, 0(TMP1) 2283 | lfd FARG2, 0(SAVE0)
2284 |.else
2285 | mr CARG2, CARG1
2286 | bl ->vm_sfi2d_1
2287 | lwz CARG3, 0(SAVE0)
2288 | lwz CARG4, 4(SAVE0)
2289 |.endif
1997 | b >6 2290 | b >6
1998 |4: 2291 |4:
2292 |.if FPU
1999 | lfd FARG1, 0(BASE) 2293 | lfd FARG1, 0(BASE)
2294 |.else
2295 | lwz CARG1, 0(BASE)
2296 | lwz CARG2, 4(BASE)
2297 |.endif
2000 | bge ->fff_fallback 2298 | bge ->fff_fallback
2001 |5: // Handle numbers. 2299 |5: // Handle numbers.
2002 | lwz CARG4, 0(TMP1) 2300 | lwz CARG3, 0(SAVE0)
2003 | cmplw cr1, TMP1, TMP2 2301 | cmplw cr1, SAVE0, SAVE1
2004 | lfd FARG2, 0(TMP1) 2302 |.if FPU
2303 | lfd FARG2, 0(SAVE0)
2304 |.else
2305 | lwz CARG4, 4(SAVE0)
2306 |.endif
2005 | bge cr1, ->fff_resn 2307 | bge cr1, ->fff_resn
2006 | checknum CARG4; bge >7 2308 | checknum CARG3; bge >7
2007 |6: 2309 |6:
2008 | fsub f0, FARG1, FARG2 2310 | addi SAVE0, SAVE0, 8
2009 | addi TMP1, TMP1, 8 2311 |.if FPU
2010 |.if ismax 2312 |.if ismax
2313 | fsub f0, FARG1, FARG2
2314 |.else
2315 | fsub f0, FARG2, FARG1
2316 |.endif
2011 | fsel FARG1, f0, FARG1, FARG2 2317 | fsel FARG1, f0, FARG1, FARG2
2012 |.else 2318 |.else
2013 | fsel FARG1, f0, FARG2, FARG1 2319 | stw CARG1, SFSAVE_1
2320 | stw CARG2, SFSAVE_2
2321 | stw CARG3, SFSAVE_3
2322 | stw CARG4, SFSAVE_4
2323 | blex __ledf2
2324 | cmpwi CRET1, 0
2325 |.if ismax
2326 | blt >8
2327 |.else
2328 | bge >8
2329 |.endif
2330 | lwz CARG1, SFSAVE_1
2331 | lwz CARG2, SFSAVE_2
2332 | b <5
2333 |8:
2334 | lwz CARG1, SFSAVE_3
2335 | lwz CARG2, SFSAVE_4
2014 |.endif 2336 |.endif
2015 | b <5 2337 | b <5
2016 |7: // Convert integer to number and continue above. 2338 |7: // Convert integer to number and continue above.
2017 | lwz CARG2, 4(TMP1) 2339 | lwz CARG3, 4(SAVE0)
2018 | bne ->fff_fallback 2340 | bne ->fff_fallback
2019 | tonum_i FARG2, CARG2 2341 |.if FPU
2342 | tonum_i FARG2, CARG3
2343 |.else
2344 | bl ->vm_sfi2d_2
2345 |.endif
2020 | b <6 2346 | b <6
2021 |.else 2347 |.else
2022 | .ffunc_n name 2348 | .ffunc_n name
@@ -2028,13 +2354,13 @@ static void build_subroutines(BuildCtx *ctx)
2028 | checknum CARG2 2354 | checknum CARG2
2029 | bge cr1, ->fff_resn 2355 | bge cr1, ->fff_resn
2030 | bge ->fff_fallback 2356 | bge ->fff_fallback
2031 | fsub f0, FARG1, FARG2
2032 | addi TMP1, TMP1, 8
2033 |.if ismax 2357 |.if ismax
2034 | fsel FARG1, f0, FARG1, FARG2 2358 | fsub f0, FARG1, FARG2
2035 |.else 2359 |.else
2036 | fsel FARG1, f0, FARG2, FARG1 2360 | fsub f0, FARG2, FARG1
2037 |.endif 2361 |.endif
2362 | addi TMP1, TMP1, 8
2363 | fsel FARG1, f0, FARG1, FARG2
2038 | b <1 2364 | b <1
2039 |.endif 2365 |.endif
2040 |.endmacro 2366 |.endmacro
@@ -2044,11 +2370,6 @@ static void build_subroutines(BuildCtx *ctx)
2044 | 2370 |
2045 |//-- String library ----------------------------------------------------- 2371 |//-- String library -----------------------------------------------------
2046 | 2372 |
2047 |.ffunc_1 string_len
2048 | checkstr CARG3; bne ->fff_fallback
2049 | lwz CRET1, STR:CARG1->len
2050 | b ->fff_resi
2051 |
2052 |.ffunc string_byte // Only handle the 1-arg case here. 2373 |.ffunc string_byte // Only handle the 1-arg case here.
2053 | cmplwi NARGS8:RC, 8 2374 | cmplwi NARGS8:RC, 8
2054 | lwz CARG3, 0(BASE) 2375 | lwz CARG3, 0(BASE)
@@ -2103,6 +2424,7 @@ static void build_subroutines(BuildCtx *ctx)
2103 | stp BASE, L->base 2424 | stp BASE, L->base
2104 | stw PC, SAVE_PC 2425 | stw PC, SAVE_PC
2105 | bl extern lj_str_new // (lua_State *L, char *str, size_t l) 2426 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
2427 |->fff_resstr:
2106 | // Returns GCstr *. 2428 | // Returns GCstr *.
2107 | lp BASE, L->base 2429 | lp BASE, L->base
2108 | li CARG3, LJ_TSTR 2430 | li CARG3, LJ_TSTR
@@ -2180,114 +2502,29 @@ static void build_subroutines(BuildCtx *ctx)
2180 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) 2502 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0)
2181 | b <3 2503 | b <3
2182 | 2504 |
2183 |.ffunc string_rep // Only handle the 1-char case inline. 2505 |.macro ffstring_op, name
2184 | ffgccheck 2506 | .ffunc string_ .. name
2185 | cmplwi NARGS8:RC, 16
2186 | lwz TMP0, 0(BASE)
2187 | lwz STR:CARG1, 4(BASE)
2188 | lwz CARG4, 8(BASE)
2189 |.if DUALNUM
2190 | lwz CARG3, 12(BASE)
2191 |.else
2192 | lfd FARG2, 8(BASE)
2193 |.endif
2194 | bne ->fff_fallback // Exactly 2 arguments.
2195 | checkstr TMP0; bne ->fff_fallback
2196 |.if DUALNUM
2197 | checknum CARG4; bne ->fff_fallback
2198 |.else
2199 | checknum CARG4; bge ->fff_fallback
2200 | toint CARG3, FARG2
2201 |.endif
2202 | lwz TMP0, STR:CARG1->len
2203 | cmpwi CARG3, 0
2204 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
2205 | ble >2 // Count <= 0? (or non-int)
2206 | cmplwi TMP0, 1
2207 | subi TMP2, CARG3, 1
2208 | blt >2 // Zero length string?
2209 | cmplw cr1, TMP1, CARG3
2210 | bne ->fff_fallback // Fallback for > 1-char strings.
2211 | lbz TMP0, STR:CARG1[1]
2212 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
2213 | blt cr1, ->fff_fallback
2214 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2215 | cmplwi TMP2, 0
2216 | stbx TMP0, CARG2, TMP2
2217 | subi TMP2, TMP2, 1
2218 | bne <1
2219 | b ->fff_newstr
2220 |2: // Return empty string.
2221 | la STR:CARG1, DISPATCH_GL(strempty)(DISPATCH)
2222 | li CARG3, LJ_TSTR
2223 | b ->fff_restv
2224 |
2225 |.ffunc string_reverse
2226 | ffgccheck 2507 | ffgccheck
2227 | cmplwi NARGS8:RC, 8 2508 | cmplwi NARGS8:RC, 8
2228 | lwz CARG3, 0(BASE) 2509 | lwz CARG3, 0(BASE)
2229 | lwz STR:CARG1, 4(BASE) 2510 | lwz STR:CARG2, 4(BASE)
2230 | blt ->fff_fallback 2511 | blt ->fff_fallback
2231 | checkstr CARG3 2512 | checkstr CARG3
2232 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 2513 | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH)
2233 | bne ->fff_fallback 2514 | bne ->fff_fallback
2234 | lwz CARG3, STR:CARG1->len 2515 | lwz TMP0, SBUF:CARG1->b
2235 | la CARG1, #STR(STR:CARG1) 2516 | stw L, SBUF:CARG1->L
2236 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 2517 | stp BASE, L->base
2237 | li TMP2, 0 2518 | stw PC, SAVE_PC
2238 | cmplw TMP1, CARG3 2519 | stw TMP0, SBUF:CARG1->w
2239 | subi TMP3, CARG3, 1 2520 | bl extern lj_buf_putstr_ .. name
2240 | blt ->fff_fallback 2521 | bl extern lj_buf_tostr
2241 |1: // Reverse string copy. 2522 | b ->fff_resstr
2242 | cmpwi TMP3, 0
2243 | lbzx TMP1, CARG1, TMP2
2244 | blty ->fff_newstr
2245 | stbx TMP1, CARG2, TMP3
2246 | subi TMP3, TMP3, 1
2247 | addi TMP2, TMP2, 1
2248 | b <1
2249 |
2250 |.macro ffstring_case, name, lo
2251 | .ffunc name
2252 | ffgccheck
2253 | cmplwi NARGS8:RC, 8
2254 | lwz CARG3, 0(BASE)
2255 | lwz STR:CARG1, 4(BASE)
2256 | blt ->fff_fallback
2257 | checkstr CARG3
2258 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
2259 | bne ->fff_fallback
2260 | lwz CARG3, STR:CARG1->len
2261 | la CARG1, #STR(STR:CARG1)
2262 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
2263 | cmplw TMP1, CARG3
2264 | li TMP2, 0
2265 | blt ->fff_fallback
2266 |1: // ASCII case conversion.
2267 | cmplw TMP2, CARG3
2268 | lbzx TMP1, CARG1, TMP2
2269 | bgey ->fff_newstr
2270 | subi TMP0, TMP1, lo
2271 | xori TMP3, TMP1, 0x20
2272 | addic TMP0, TMP0, -26
2273 | subfe TMP3, TMP3, TMP3
2274 | rlwinm TMP3, TMP3, 0, 26, 26 // x &= 0x20.
2275 | xor TMP1, TMP1, TMP3
2276 | stbx TMP1, CARG2, TMP2
2277 | addi TMP2, TMP2, 1
2278 | b <1
2279 |.endmacro 2523 |.endmacro
2280 | 2524 |
2281 |ffstring_case string_lower, 65 2525 |ffstring_op reverse
2282 |ffstring_case string_upper, 97 2526 |ffstring_op lower
2283 | 2527 |ffstring_op upper
2284 |//-- Table library ------------------------------------------------------
2285 |
2286 |.ffunc_1 table_getn
2287 | checktab CARG3; bne ->fff_fallback
2288 | bl extern lj_tab_len // (GCtab *t)
2289 | // Returns uint32_t (but less than 2^31).
2290 | b ->fff_resi
2291 | 2528 |
2292 |//-- Bit library -------------------------------------------------------- 2529 |//-- Bit library --------------------------------------------------------
2293 | 2530 |
@@ -2305,28 +2542,37 @@ static void build_subroutines(BuildCtx *ctx)
2305 | 2542 |
2306 |.macro .ffunc_bit_op, name, ins 2543 |.macro .ffunc_bit_op, name, ins
2307 | .ffunc_bit name 2544 | .ffunc_bit name
2308 | addi TMP1, BASE, 8 2545 | addi SAVE0, BASE, 8
2309 | add TMP2, BASE, NARGS8:RC 2546 | add SAVE1, BASE, NARGS8:RC
2310 |1: 2547 |1:
2311 | lwz CARG4, 0(TMP1) 2548 | lwz CARG4, 0(SAVE0)
2312 | cmplw cr1, TMP1, TMP2 2549 | cmplw cr1, SAVE0, SAVE1
2313 |.if DUALNUM 2550 |.if DUALNUM
2314 | lwz CARG2, 4(TMP1) 2551 | lwz CARG2, 4(SAVE0)
2315 |.else 2552 |.else
2316 | lfd FARG1, 0(TMP1) 2553 | lfd FARG1, 0(SAVE0)
2317 |.endif 2554 |.endif
2318 | bgey cr1, ->fff_resi 2555 | bgey cr1, ->fff_resi
2319 | checknum CARG4 2556 | checknum CARG4
2320 |.if DUALNUM 2557 |.if DUALNUM
2558 |.if FPU
2321 | bnel ->fff_bitop_fb 2559 | bnel ->fff_bitop_fb
2322 |.else 2560 |.else
2561 | beq >3
2562 | stw CARG1, SFSAVE_1
2563 | bl ->fff_bitop_fb
2564 | mr CARG2, CARG1
2565 | lwz CARG1, SFSAVE_1
2566 |3:
2567 |.endif
2568 |.else
2323 | fadd FARG1, FARG1, TOBIT 2569 | fadd FARG1, FARG1, TOBIT
2324 | bge ->fff_fallback 2570 | bge ->fff_fallback
2325 | stfd FARG1, TMPD 2571 | stfd FARG1, TMPD
2326 | lwz CARG2, TMPD_LO 2572 | lwz CARG2, TMPD_LO
2327 |.endif 2573 |.endif
2328 | ins CARG1, CARG1, CARG2 2574 | ins CARG1, CARG1, CARG2
2329 | addi TMP1, TMP1, 8 2575 | addi SAVE0, SAVE0, 8
2330 | b <1 2576 | b <1
2331 |.endmacro 2577 |.endmacro
2332 | 2578 |
@@ -2348,7 +2594,14 @@ static void build_subroutines(BuildCtx *ctx)
2348 |.macro .ffunc_bit_sh, name, ins, shmod 2594 |.macro .ffunc_bit_sh, name, ins, shmod
2349 |.if DUALNUM 2595 |.if DUALNUM
2350 | .ffunc_2 bit_..name 2596 | .ffunc_2 bit_..name
2597 |.if FPU
2351 | checknum CARG3; bnel ->fff_tobit_fb 2598 | checknum CARG3; bnel ->fff_tobit_fb
2599 |.else
2600 | checknum CARG3; beq >1
2601 | bl ->fff_tobit_fb
2602 | lwz CARG2, 12(BASE) // Conversion polluted CARG2.
2603 |1:
2604 |.endif
2352 | // Note: no inline conversion from number for 2nd argument! 2605 | // Note: no inline conversion from number for 2nd argument!
2353 | checknum CARG4; bne ->fff_fallback 2606 | checknum CARG4; bne ->fff_fallback
2354 |.else 2607 |.else
@@ -2385,27 +2638,77 @@ static void build_subroutines(BuildCtx *ctx)
2385 |->fff_resn: 2638 |->fff_resn:
2386 | lwz PC, FRAME_PC(BASE) 2639 | lwz PC, FRAME_PC(BASE)
2387 | la RA, -8(BASE) 2640 | la RA, -8(BASE)
2641 |.if FPU
2388 | stfd FARG1, -8(BASE) 2642 | stfd FARG1, -8(BASE)
2643 |.else
2644 | stw CARG1, -8(BASE)
2645 | stw CARG2, -4(BASE)
2646 |.endif
2389 | b ->fff_res1 2647 | b ->fff_res1
2390 | 2648 |
2391 |// Fallback FP number to bit conversion. 2649 |// Fallback FP number to bit conversion.
2392 |->fff_tobit_fb: 2650 |->fff_tobit_fb:
2393 |.if DUALNUM 2651 |.if DUALNUM
2652 |.if FPU
2394 | lfd FARG1, 0(BASE) 2653 | lfd FARG1, 0(BASE)
2395 | bgt ->fff_fallback 2654 | bgt ->fff_fallback
2396 | fadd FARG1, FARG1, TOBIT 2655 | fadd FARG1, FARG1, TOBIT
2397 | stfd FARG1, TMPD 2656 | stfd FARG1, TMPD
2398 | lwz CARG1, TMPD_LO 2657 | lwz CARG1, TMPD_LO
2399 | blr 2658 | blr
2659 |.else
2660 | bgt ->fff_fallback
2661 | mr CARG2, CARG1
2662 | mr CARG1, CARG3
2663 |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2.
2664 |->vm_tobit:
2665 | slwi TMP2, CARG1, 1
2666 | addis TMP2, TMP2, 0x0020
2667 | cmpwi TMP2, 0
2668 | bge >2
2669 | li TMP1, 0x3e0
2670 | srawi TMP2, TMP2, 21
2671 | not TMP1, TMP1
2672 | sub. TMP2, TMP1, TMP2
2673 | cmpwi cr7, CARG1, 0
2674 | blt >1
2675 | slwi TMP1, CARG1, 11
2676 | srwi TMP0, CARG2, 21
2677 | oris TMP1, TMP1, 0x8000
2678 | or TMP1, TMP1, TMP0
2679 | srw CARG1, TMP1, TMP2
2680 | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
2681 | neg CARG1, CARG1
2682 | blr
2683 |1:
2684 | addi TMP2, TMP2, 21
2685 | srw TMP1, CARG2, TMP2
2686 | slwi CARG2, CARG1, 12
2687 | subfic TMP2, TMP2, 20
2688 | slw TMP0, CARG2, TMP2
2689 | or CARG1, TMP1, TMP0
2690 | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
2691 | neg CARG1, CARG1
2692 | blr
2693 |2:
2694 | li CARG1, 0
2695 | blr
2696 |.endif
2400 |.endif 2697 |.endif
2401 |->fff_bitop_fb: 2698 |->fff_bitop_fb:
2402 |.if DUALNUM 2699 |.if DUALNUM
2403 | lfd FARG1, 0(TMP1) 2700 |.if FPU
2701 | lfd FARG1, 0(SAVE0)
2404 | bgt ->fff_fallback 2702 | bgt ->fff_fallback
2405 | fadd FARG1, FARG1, TOBIT 2703 | fadd FARG1, FARG1, TOBIT
2406 | stfd FARG1, TMPD 2704 | stfd FARG1, TMPD
2407 | lwz CARG2, TMPD_LO 2705 | lwz CARG2, TMPD_LO
2408 | blr 2706 | blr
2707 |.else
2708 | bgt ->fff_fallback
2709 | mr CARG1, CARG4
2710 | b ->vm_tobit
2711 |.endif
2409 |.endif 2712 |.endif
2410 | 2713 |
2411 |//----------------------------------------------------------------------- 2714 |//-----------------------------------------------------------------------
@@ -2589,15 +2892,88 @@ static void build_subroutines(BuildCtx *ctx)
2589 | mtctr CRET1 2892 | mtctr CRET1
2590 | bctr 2893 | bctr
2591 | 2894 |
2895 |->cont_stitch: // Trace stitching.
2896 |.if JIT
2897 | // RA = resultptr, RB = meta base
2898 | lwz INS, -4(PC)
2899 | lwz TRACE:TMP2, -20(RB) // Save previous trace.
2900 | addic. TMP1, MULTRES, -8
2901 | decode_RA8 RC, INS // Call base.
2902 | beq >2
2903 |1: // Move results down.
2904 |.if FPU
2905 | lfd f0, 0(RA)
2906 |.else
2907 | lwz CARG1, 0(RA)
2908 | lwz CARG2, 4(RA)
2909 |.endif
2910 | addic. TMP1, TMP1, -8
2911 | addi RA, RA, 8
2912 |.if FPU
2913 | stfdx f0, BASE, RC
2914 |.else
2915 | add CARG3, BASE, RC
2916 | stw CARG1, 0(CARG3)
2917 | stw CARG2, 4(CARG3)
2918 |.endif
2919 | addi RC, RC, 8
2920 | bne <1
2921 |2:
2922 | decode_RA8 RA, INS
2923 | decode_RB8 RB, INS
2924 | add RA, RA, RB
2925 |3:
2926 | cmplw RA, RC
2927 | bgt >9 // More results wanted?
2928 |
2929 | lhz TMP3, TRACE:TMP2->traceno
2930 | lhz RD, TRACE:TMP2->link
2931 | cmpw RD, TMP3
2932 | cmpwi cr1, RD, 0
2933 | beq ->cont_nop // Blacklisted.
2934 | slwi RD, RD, 3
2935 | bne cr1, =>BC_JLOOP // Jump to stitched trace.
2936 |
2937 | // Stitch a new trace to the previous trace.
2938 | stw TMP3, DISPATCH_J(exitno)(DISPATCH)
2939 | stp L, DISPATCH_J(L)(DISPATCH)
2940 | stp BASE, L->base
2941 | addi CARG1, DISPATCH, GG_DISP2J
2942 | mr CARG2, PC
2943 | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2944 | lp BASE, L->base
2945 | b ->cont_nop
2946 |
2947 |9:
2948 | stwx TISNIL, BASE, RC
2949 | addi RC, RC, 8
2950 | b <3
2951 |.endif
2952 |
2953 |->vm_profhook: // Dispatch target for profiler hook.
2954#if LJ_HASPROFILE
2955 | mr CARG1, L
2956 | stw MULTRES, SAVE_MULTRES
2957 | mr CARG2, PC
2958 | stp BASE, L->base
2959 | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2960 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2961 | lp BASE, L->base
2962 | subi PC, PC, 4
2963 | b ->cont_nop
2964#endif
2965 |
2592 |//----------------------------------------------------------------------- 2966 |//-----------------------------------------------------------------------
2593 |//-- Trace exit handler ------------------------------------------------- 2967 |//-- Trace exit handler -------------------------------------------------
2594 |//----------------------------------------------------------------------- 2968 |//-----------------------------------------------------------------------
2595 | 2969 |
2596 |.macro savex_, a, b, c, d 2970 |.macro savex_, a, b, c, d
2971 |.if FPU
2597 | stfd f..a, 16+a*8(sp) 2972 | stfd f..a, 16+a*8(sp)
2598 | stfd f..b, 16+b*8(sp) 2973 | stfd f..b, 16+b*8(sp)
2599 | stfd f..c, 16+c*8(sp) 2974 | stfd f..c, 16+c*8(sp)
2600 | stfd f..d, 16+d*8(sp) 2975 | stfd f..d, 16+d*8(sp)
2976 |.endif
2601 |.endmacro 2977 |.endmacro
2602 | 2978 |
2603 |->vm_exit_handler: 2979 |->vm_exit_handler:
@@ -2623,16 +2999,16 @@ static void build_subroutines(BuildCtx *ctx)
2623 | savex_ 20,21,22,23 2999 | savex_ 20,21,22,23
2624 | lhz CARG4, 2(CARG3) // Load trace number. 3000 | lhz CARG4, 2(CARG3) // Load trace number.
2625 | savex_ 24,25,26,27 3001 | savex_ 24,25,26,27
2626 | lwz L, DISPATCH_GL(jit_L)(DISPATCH) 3002 | lwz L, DISPATCH_GL(cur_L)(DISPATCH)
2627 | savex_ 28,29,30,31 3003 | savex_ 28,29,30,31
2628 | sub CARG3, TMP0, CARG3 // Compute exit number. 3004 | sub CARG3, TMP0, CARG3 // Compute exit number.
2629 | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) 3005 | lp BASE, DISPATCH_GL(jit_base)(DISPATCH)
2630 | srwi CARG3, CARG3, 2 3006 | srwi CARG3, CARG3, 2
2631 | stw L, DISPATCH_J(L)(DISPATCH) 3007 | stp L, DISPATCH_J(L)(DISPATCH)
2632 | subi CARG3, CARG3, 2 3008 | subi CARG3, CARG3, 2
2633 | stw TMP1, DISPATCH_GL(jit_L)(DISPATCH)
2634 | stw CARG4, DISPATCH_J(parent)(DISPATCH)
2635 | stp BASE, L->base 3009 | stp BASE, L->base
3010 | stw CARG4, DISPATCH_J(parent)(DISPATCH)
3011 | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH)
2636 | addi CARG1, DISPATCH, GG_DISP2J 3012 | addi CARG1, DISPATCH, GG_DISP2J
2637 | stw CARG3, DISPATCH_J(exitno)(DISPATCH) 3013 | stw CARG3, DISPATCH_J(exitno)(DISPATCH)
2638 | addi CARG2, sp, 16 3014 | addi CARG2, sp, 16
@@ -2656,28 +3032,29 @@ static void build_subroutines(BuildCtx *ctx)
2656 | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set. 3032 | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set.
2657 | lwz L, SAVE_L 3033 | lwz L, SAVE_L
2658 | addi DISPATCH, JGL, -GG_DISP2G-32768 3034 | addi DISPATCH, JGL, -GG_DISP2G-32768
3035 | stp BASE, L->base
2659 |1: 3036 |1:
2660 | cmpwi CARG1, 0 3037 | cmpwi CARG1, 0
2661 | blt >3 // Check for error from exit. 3038 | blt >9 // Check for error from exit.
2662 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 3039 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2663 | slwi MULTRES, CARG1, 3 3040 | slwi MULTRES, CARG1, 3
2664 | li TMP2, 0 3041 | li TMP2, 0
2665 | stw MULTRES, SAVE_MULTRES 3042 | stw MULTRES, SAVE_MULTRES
2666 | lwz TMP1, LFUNC:TMP1->pc 3043 | lwz TMP1, LFUNC:RB->pc
2667 | stw TMP2, DISPATCH_GL(jit_L)(DISPATCH) 3044 | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH)
2668 | lwz KBASE, PC2PROTO(k)(TMP1) 3045 | lwz KBASE, PC2PROTO(k)(TMP1)
2669 | // Setup type comparison constants. 3046 | // Setup type comparison constants.
2670 | li TISNUM, LJ_TISNUM 3047 | li TISNUM, LJ_TISNUM
2671 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 3048 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2672 | stw TMP3, TMPD 3049 | .FPU stw TMP3, TMPD
2673 | li ZERO, 0 3050 | li ZERO, 0
2674 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 3051 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
2675 | lfs TOBIT, TMPD 3052 | .FPU lfs TOBIT, TMPD
2676 | stw TMP3, TMPD 3053 | .FPU stw TMP3, TMPD
2677 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 3054 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
2678 | li TISNIL, LJ_TNIL 3055 | li TISNIL, LJ_TNIL
2679 | stw TMP0, TONUM_HI 3056 | .FPU stw TMP0, TONUM_HI
2680 | lfs TONUM, TMPD 3057 | .FPU lfs TONUM, TMPD
2681 | // Modified copy of ins_next which handles function header dispatch, too. 3058 | // Modified copy of ins_next which handles function header dispatch, too.
2682 | lwz INS, 0(PC) 3059 | lwz INS, 0(PC)
2683 | addi PC, PC, 4 3060 | addi PC, PC, 4
@@ -2694,20 +3071,63 @@ static void build_subroutines(BuildCtx *ctx)
2694 | decode_RC8 RC, INS 3071 | decode_RC8 RC, INS
2695 | bctr 3072 | bctr
2696 |2: 3073 |2:
3074 | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function?
3075 | blt >3
3076 | // Check frame below fast function.
3077 | lwz TMP1, FRAME_PC(BASE)
3078 | andix. TMP0, TMP1, FRAME_TYPE
3079 | bney >3 // Trace stitching continuation?
3080 | // Otherwise set KBASE for Lua function below fast function.
3081 | lwz TMP2, -4(TMP1)
3082 | decode_RA8 TMP0, TMP2
3083 | sub TMP1, BASE, TMP0
3084 | lwz LFUNC:TMP2, -12(TMP1)
3085 | lwz TMP1, LFUNC:TMP2->pc
3086 | lwz KBASE, PC2PROTO(k)(TMP1)
3087 |3:
2697 | subi RC, MULTRES, 8 3088 | subi RC, MULTRES, 8
2698 | add RA, RA, BASE 3089 | add RA, RA, BASE
2699 | bctr 3090 | bctr
2700 | 3091 |
2701 |3: // Rethrow error from the right C frame. 3092 |9: // Rethrow error from the right C frame.
3093 | neg CARG2, CARG1
2702 | mr CARG1, L 3094 | mr CARG1, L
2703 | bl extern lj_err_run // (lua_State *L) 3095 | bl extern lj_err_trace // (lua_State *L, int errcode)
2704 |.endif 3096 |.endif
2705 | 3097 |
2706 |//----------------------------------------------------------------------- 3098 |//-----------------------------------------------------------------------
2707 |//-- Math helper functions ---------------------------------------------- 3099 |//-- Math helper functions ----------------------------------------------
2708 |//----------------------------------------------------------------------- 3100 |//-----------------------------------------------------------------------
2709 | 3101 |
2710 |// NYI: Use internal implementations of floor, ceil, trunc. 3102 |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp.
3103 |
3104 |.macro sfi2d, AHI, ALO
3105 |.if not FPU
3106 | mr. AHI, ALO
3107 | bclr 12, 2 // Handle zero first.
3108 | srawi TMP0, ALO, 31
3109 | xor TMP1, ALO, TMP0
3110 | sub TMP1, TMP1, TMP0 // Absolute value in TMP1.
3111 | cntlzw AHI, TMP1
3112 | andix. TMP0, TMP0, 0x800 // Mask sign bit.
3113 | slw TMP1, TMP1, AHI // Align mantissa left with leading 1.
3114 | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI.
3115 | slwi ALO, TMP1, 21
3116 | or AHI, AHI, TMP0 // Sign | Exponent.
3117 | srwi TMP1, TMP1, 11
3118 | slwi AHI, AHI, 20 // Align left.
3119 | add AHI, AHI, TMP1 // Add mantissa, increment exponent.
3120 | blr
3121 |.endif
3122 |.endmacro
3123 |
3124 |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1.
3125 |->vm_sfi2d_1:
3126 | sfi2d CARG1, CARG2
3127 |
3128 |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1.
3129 |->vm_sfi2d_2:
3130 | sfi2d CARG3, CARG4
2711 | 3131 |
2712 |->vm_modi: 3132 |->vm_modi:
2713 | divwo. TMP0, CARG1, CARG2 3133 | divwo. TMP0, CARG1, CARG2
@@ -2775,21 +3195,21 @@ static void build_subroutines(BuildCtx *ctx)
2775 | addi DISPATCH, r12, GG_G2DISP 3195 | addi DISPATCH, r12, GG_G2DISP
2776 | stw r11, CTSTATE->cb.slot 3196 | stw r11, CTSTATE->cb.slot
2777 | stw r3, CTSTATE->cb.gpr[0] 3197 | stw r3, CTSTATE->cb.gpr[0]
2778 | stfd f1, CTSTATE->cb.fpr[0] 3198 | .FPU stfd f1, CTSTATE->cb.fpr[0]
2779 | stw r4, CTSTATE->cb.gpr[1] 3199 | stw r4, CTSTATE->cb.gpr[1]
2780 | stfd f2, CTSTATE->cb.fpr[1] 3200 | .FPU stfd f2, CTSTATE->cb.fpr[1]
2781 | stw r5, CTSTATE->cb.gpr[2] 3201 | stw r5, CTSTATE->cb.gpr[2]
2782 | stfd f3, CTSTATE->cb.fpr[2] 3202 | .FPU stfd f3, CTSTATE->cb.fpr[2]
2783 | stw r6, CTSTATE->cb.gpr[3] 3203 | stw r6, CTSTATE->cb.gpr[3]
2784 | stfd f4, CTSTATE->cb.fpr[3] 3204 | .FPU stfd f4, CTSTATE->cb.fpr[3]
2785 | stw r7, CTSTATE->cb.gpr[4] 3205 | stw r7, CTSTATE->cb.gpr[4]
2786 | stfd f5, CTSTATE->cb.fpr[4] 3206 | .FPU stfd f5, CTSTATE->cb.fpr[4]
2787 | stw r8, CTSTATE->cb.gpr[5] 3207 | stw r8, CTSTATE->cb.gpr[5]
2788 | stfd f6, CTSTATE->cb.fpr[5] 3208 | .FPU stfd f6, CTSTATE->cb.fpr[5]
2789 | stw r9, CTSTATE->cb.gpr[6] 3209 | stw r9, CTSTATE->cb.gpr[6]
2790 | stfd f7, CTSTATE->cb.fpr[6] 3210 | .FPU stfd f7, CTSTATE->cb.fpr[6]
2791 | stw r10, CTSTATE->cb.gpr[7] 3211 | stw r10, CTSTATE->cb.gpr[7]
2792 | stfd f8, CTSTATE->cb.fpr[7] 3212 | .FPU stfd f8, CTSTATE->cb.fpr[7]
2793 | addi TMP0, sp, CFRAME_SPACE+8 3213 | addi TMP0, sp, CFRAME_SPACE+8
2794 | stw TMP0, CTSTATE->cb.stack 3214 | stw TMP0, CTSTATE->cb.stack
2795 | mr CARG1, CTSTATE 3215 | mr CARG1, CTSTATE
@@ -2800,21 +3220,21 @@ static void build_subroutines(BuildCtx *ctx)
2800 | lp BASE, L:CRET1->base 3220 | lp BASE, L:CRET1->base
2801 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 3221 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2802 | lp RC, L:CRET1->top 3222 | lp RC, L:CRET1->top
2803 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 3223 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2804 | li ZERO, 0 3224 | li ZERO, 0
2805 | mr L, CRET1 3225 | mr L, CRET1
2806 | stw TMP3, TMPD 3226 | .FPU stw TMP3, TMPD
2807 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 3227 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
2808 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3228 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2809 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 3229 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
2810 | stw TMP0, TONUM_HI 3230 | .FPU stw TMP0, TONUM_HI
2811 | li TISNIL, LJ_TNIL 3231 | li TISNIL, LJ_TNIL
2812 | li_vmstate INTERP 3232 | li_vmstate INTERP
2813 | lfs TOBIT, TMPD 3233 | .FPU lfs TOBIT, TMPD
2814 | stw TMP3, TMPD 3234 | .FPU stw TMP3, TMPD
2815 | sub RC, RC, BASE 3235 | sub RC, RC, BASE
2816 | st_vmstate 3236 | st_vmstate
2817 | lfs TONUM, TMPD 3237 | .FPU lfs TONUM, TMPD
2818 | ins_callt 3238 | ins_callt
2819 |.endif 3239 |.endif
2820 | 3240 |
@@ -2828,7 +3248,7 @@ static void build_subroutines(BuildCtx *ctx)
2828 | mr CARG2, RA 3248 | mr CARG2, RA
2829 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) 3249 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
2830 | lwz CRET1, CTSTATE->cb.gpr[0] 3250 | lwz CRET1, CTSTATE->cb.gpr[0]
2831 | lfd FARG1, CTSTATE->cb.fpr[0] 3251 | .FPU lfd FARG1, CTSTATE->cb.fpr[0]
2832 | lwz CRET2, CTSTATE->cb.gpr[1] 3252 | lwz CRET2, CTSTATE->cb.gpr[1]
2833 | b ->vm_leave_unw 3253 | b ->vm_leave_unw
2834 |.endif 3254 |.endif
@@ -2862,14 +3282,14 @@ static void build_subroutines(BuildCtx *ctx)
2862 | bge <1 3282 | bge <1
2863 |2: 3283 |2:
2864 | bney cr1, >3 3284 | bney cr1, >3
2865 | lfd f1, CCSTATE->fpr[0] 3285 | .FPU lfd f1, CCSTATE->fpr[0]
2866 | lfd f2, CCSTATE->fpr[1] 3286 | .FPU lfd f2, CCSTATE->fpr[1]
2867 | lfd f3, CCSTATE->fpr[2] 3287 | .FPU lfd f3, CCSTATE->fpr[2]
2868 | lfd f4, CCSTATE->fpr[3] 3288 | .FPU lfd f4, CCSTATE->fpr[3]
2869 | lfd f5, CCSTATE->fpr[4] 3289 | .FPU lfd f5, CCSTATE->fpr[4]
2870 | lfd f6, CCSTATE->fpr[5] 3290 | .FPU lfd f6, CCSTATE->fpr[5]
2871 | lfd f7, CCSTATE->fpr[6] 3291 | .FPU lfd f7, CCSTATE->fpr[6]
2872 | lfd f8, CCSTATE->fpr[7] 3292 | .FPU lfd f8, CCSTATE->fpr[7]
2873 |3: 3293 |3:
2874 | lp TMP0, CCSTATE->func 3294 | lp TMP0, CCSTATE->func
2875 | lwz CARG2, CCSTATE->gpr[1] 3295 | lwz CARG2, CCSTATE->gpr[1]
@@ -2886,7 +3306,7 @@ static void build_subroutines(BuildCtx *ctx)
2886 | lwz TMP2, -4(r14) 3306 | lwz TMP2, -4(r14)
2887 | lwz TMP0, 4(r14) 3307 | lwz TMP0, 4(r14)
2888 | stw CARG1, CCSTATE:TMP1->gpr[0] 3308 | stw CARG1, CCSTATE:TMP1->gpr[0]
2889 | stfd FARG1, CCSTATE:TMP1->fpr[0] 3309 | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0]
2890 | stw CARG2, CCSTATE:TMP1->gpr[1] 3310 | stw CARG2, CCSTATE:TMP1->gpr[1]
2891 | mtlr TMP0 3311 | mtlr TMP0
2892 | stw CARG3, CCSTATE:TMP1->gpr[2] 3312 | stw CARG3, CCSTATE:TMP1->gpr[2]
@@ -2915,19 +3335,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2915 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 3335 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2916 | // RA = src1*8, RD = src2*8, JMP with RD = target 3336 | // RA = src1*8, RD = src2*8, JMP with RD = target
2917 |.if DUALNUM 3337 |.if DUALNUM
2918 | lwzux TMP0, RA, BASE 3338 | lwzux CARG1, RA, BASE
2919 | addi PC, PC, 4 3339 | addi PC, PC, 4
2920 | lwz CARG2, 4(RA) 3340 | lwz CARG2, 4(RA)
2921 | lwzux TMP1, RD, BASE 3341 | lwzux CARG3, RD, BASE
2922 | lwz TMP2, -4(PC) 3342 | lwz TMP2, -4(PC)
2923 | checknum cr0, TMP0 3343 | checknum cr0, CARG1
2924 | lwz CARG3, 4(RD) 3344 | lwz CARG4, 4(RD)
2925 | decode_RD4 TMP2, TMP2 3345 | decode_RD4 TMP2, TMP2
2926 | checknum cr1, TMP1 3346 | checknum cr1, CARG3
2927 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3347 | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16)
2928 | bne cr0, >7 3348 | bne cr0, >7
2929 | bne cr1, >8 3349 | bne cr1, >8
2930 | cmpw CARG2, CARG3 3350 | cmpw CARG2, CARG4
2931 if (op == BC_ISLT) { 3351 if (op == BC_ISLT) {
2932 | bge >2 3352 | bge >2
2933 } else if (op == BC_ISGE) { 3353 } else if (op == BC_ISGE) {
@@ -2938,28 +3358,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2938 | ble >2 3358 | ble >2
2939 } 3359 }
2940 |1: 3360 |1:
2941 | add PC, PC, TMP2 3361 | add PC, PC, SAVE0
2942 |2: 3362 |2:
2943 | ins_next 3363 | ins_next
2944 | 3364 |
2945 |7: // RA is not an integer. 3365 |7: // RA is not an integer.
2946 | bgt cr0, ->vmeta_comp 3366 | bgt cr0, ->vmeta_comp
2947 | // RA is a number. 3367 | // RA is a number.
2948 | lfd f0, 0(RA) 3368 | .FPU lfd f0, 0(RA)
2949 | bgt cr1, ->vmeta_comp 3369 | bgt cr1, ->vmeta_comp
2950 | blt cr1, >4 3370 | blt cr1, >4
2951 | // RA is a number, RD is an integer. 3371 | // RA is a number, RD is an integer.
2952 | tonum_i f1, CARG3 3372 |.if FPU
3373 | tonum_i f1, CARG4
3374 |.else
3375 | bl ->vm_sfi2d_2
3376 |.endif
2953 | b >5 3377 | b >5
2954 | 3378 |
2955 |8: // RA is an integer, RD is not an integer. 3379 |8: // RA is an integer, RD is not an integer.
2956 | bgt cr1, ->vmeta_comp 3380 | bgt cr1, ->vmeta_comp
2957 | // RA is an integer, RD is a number. 3381 | // RA is an integer, RD is a number.
3382 |.if FPU
2958 | tonum_i f0, CARG2 3383 | tonum_i f0, CARG2
3384 |.else
3385 | bl ->vm_sfi2d_1
3386 |.endif
2959 |4: 3387 |4:
2960 | lfd f1, 0(RD) 3388 | .FPU lfd f1, 0(RD)
2961 |5: 3389 |5:
3390 |.if FPU
2962 | fcmpu cr0, f0, f1 3391 | fcmpu cr0, f0, f1
3392 |.else
3393 | blex __ledf2
3394 | cmpwi CRET1, 0
3395 |.endif
2963 if (op == BC_ISLT) { 3396 if (op == BC_ISLT) {
2964 | bge <2 3397 | bge <2
2965 } else if (op == BC_ISGE) { 3398 } else if (op == BC_ISGE) {
@@ -3007,42 +3440,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3007 vk = op == BC_ISEQV; 3440 vk = op == BC_ISEQV;
3008 | // RA = src1*8, RD = src2*8, JMP with RD = target 3441 | // RA = src1*8, RD = src2*8, JMP with RD = target
3009 |.if DUALNUM 3442 |.if DUALNUM
3010 | lwzux TMP0, RA, BASE 3443 | lwzux CARG1, RA, BASE
3011 | addi PC, PC, 4 3444 | addi PC, PC, 4
3012 | lwz CARG2, 4(RA) 3445 | lwz CARG2, 4(RA)
3013 | lwzux TMP1, RD, BASE 3446 | lwzux CARG3, RD, BASE
3014 | checknum cr0, TMP0 3447 | checknum cr0, CARG1
3015 | lwz TMP2, -4(PC) 3448 | lwz SAVE0, -4(PC)
3016 | checknum cr1, TMP1 3449 | checknum cr1, CARG3
3017 | decode_RD4 TMP2, TMP2 3450 | decode_RD4 SAVE0, SAVE0
3018 | lwz CARG3, 4(RD) 3451 | lwz CARG4, 4(RD)
3019 | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt 3452 | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
3020 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3453 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3021 if (vk) { 3454 if (vk) {
3022 | ble cr7, ->BC_ISEQN_Z 3455 | ble cr7, ->BC_ISEQN_Z
3023 } else { 3456 } else {
3024 | ble cr7, ->BC_ISNEN_Z 3457 | ble cr7, ->BC_ISNEN_Z
3025 } 3458 }
3026 |.else 3459 |.else
3027 | lwzux TMP0, RA, BASE 3460 | lwzux CARG1, RA, BASE
3028 | lwz TMP2, 0(PC) 3461 | lwz SAVE0, 0(PC)
3029 | lfd f0, 0(RA) 3462 | lfd f0, 0(RA)
3030 | addi PC, PC, 4 3463 | addi PC, PC, 4
3031 | lwzux TMP1, RD, BASE 3464 | lwzux CARG3, RD, BASE
3032 | checknum cr0, TMP0 3465 | checknum cr0, CARG1
3033 | decode_RD4 TMP2, TMP2 3466 | decode_RD4 SAVE0, SAVE0
3034 | lfd f1, 0(RD) 3467 | lfd f1, 0(RD)
3035 | checknum cr1, TMP1 3468 | checknum cr1, CARG3
3036 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3469 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3037 | bge cr0, >5 3470 | bge cr0, >5
3038 | bge cr1, >5 3471 | bge cr1, >5
3039 | fcmpu cr0, f0, f1 3472 | fcmpu cr0, f0, f1
3040 if (vk) { 3473 if (vk) {
3041 | bne >1 3474 | bne >1
3042 | add PC, PC, TMP2 3475 | add PC, PC, SAVE0
3043 } else { 3476 } else {
3044 | beq >1 3477 | beq >1
3045 | add PC, PC, TMP2 3478 | add PC, PC, SAVE0
3046 } 3479 }
3047 |1: 3480 |1:
3048 | ins_next 3481 | ins_next
@@ -3050,36 +3483,36 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3050 |5: // Either or both types are not numbers. 3483 |5: // Either or both types are not numbers.
3051 |.if not DUALNUM 3484 |.if not DUALNUM
3052 | lwz CARG2, 4(RA) 3485 | lwz CARG2, 4(RA)
3053 | lwz CARG3, 4(RD) 3486 | lwz CARG4, 4(RD)
3054 |.endif 3487 |.endif
3055 |.if FFI 3488 |.if FFI
3056 | cmpwi cr7, TMP0, LJ_TCDATA 3489 | cmpwi cr7, CARG1, LJ_TCDATA
3057 | cmpwi cr5, TMP1, LJ_TCDATA 3490 | cmpwi cr5, CARG3, LJ_TCDATA
3058 |.endif 3491 |.endif
3059 | not TMP3, TMP0 3492 | not TMP2, CARG1
3060 | cmplw TMP0, TMP1 3493 | cmplw CARG1, CARG3
3061 | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? 3494 | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive?
3062 |.if FFI 3495 |.if FFI
3063 | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq 3496 | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq
3064 |.endif 3497 |.endif
3065 | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? 3498 | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata?
3066 |.if FFI 3499 |.if FFI
3067 | beq cr7, ->vmeta_equal_cd 3500 | beq cr7, ->vmeta_equal_cd
3068 |.endif 3501 |.endif
3069 | cmplw cr5, CARG2, CARG3 3502 | cmplw cr5, CARG2, CARG4
3070 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. 3503 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive.
3071 | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. 3504 | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type.
3072 | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. 3505 | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv.
3073 | mr SAVE0, PC 3506 | mr SAVE1, PC
3074 | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. 3507 | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2.
3075 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. 3508 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2.
3076 if (vk) { 3509 if (vk) {
3077 | bne cr0, >6 3510 | bne cr0, >6
3078 | add PC, PC, TMP2 3511 | add PC, PC, SAVE0
3079 |6: 3512 |6:
3080 } else { 3513 } else {
3081 | beq cr0, >6 3514 | beq cr0, >6
3082 | add PC, PC, TMP2 3515 | add PC, PC, SAVE0
3083 |6: 3516 |6:
3084 } 3517 }
3085 |.if DUALNUM 3518 |.if DUALNUM
@@ -3094,6 +3527,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3094 | 3527 |
3095 | // Different tables or userdatas. Need to check __eq metamethod. 3528 | // Different tables or userdatas. Need to check __eq metamethod.
3096 | // Field metatable must be at same offset for GCtab and GCudata! 3529 | // Field metatable must be at same offset for GCtab and GCudata!
3530 | mr CARG3, CARG4
3097 | lwz TAB:TMP2, TAB:CARG2->metatable 3531 | lwz TAB:TMP2, TAB:CARG2->metatable
3098 | li CARG4, 1-vk // ne = 0 or 1. 3532 | li CARG4, 1-vk // ne = 0 or 1.
3099 | cmplwi TAB:TMP2, 0 3533 | cmplwi TAB:TMP2, 0
@@ -3101,7 +3535,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3101 | lbz TMP2, TAB:TMP2->nomm 3535 | lbz TMP2, TAB:TMP2->nomm
3102 | andix. TMP2, TMP2, 1<<MM_eq 3536 | andix. TMP2, TMP2, 1<<MM_eq
3103 | bne <1 // Or 'no __eq' flag set? 3537 | bne <1 // Or 'no __eq' flag set?
3104 | mr PC, SAVE0 // Restore old PC. 3538 | mr PC, SAVE1 // Restore old PC.
3105 | b ->vmeta_equal // Handle __eq metamethod. 3539 | b ->vmeta_equal // Handle __eq metamethod.
3106 break; 3540 break;
3107 3541
@@ -3142,16 +3576,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3142 vk = op == BC_ISEQN; 3576 vk = op == BC_ISEQN;
3143 | // RA = src*8, RD = num_const*8, JMP with RD = target 3577 | // RA = src*8, RD = num_const*8, JMP with RD = target
3144 |.if DUALNUM 3578 |.if DUALNUM
3145 | lwzux TMP0, RA, BASE 3579 | lwzux CARG1, RA, BASE
3146 | addi PC, PC, 4 3580 | addi PC, PC, 4
3147 | lwz CARG2, 4(RA) 3581 | lwz CARG2, 4(RA)
3148 | lwzux TMP1, RD, KBASE 3582 | lwzux CARG3, RD, KBASE
3149 | checknum cr0, TMP0 3583 | checknum cr0, CARG1
3150 | lwz TMP2, -4(PC) 3584 | lwz SAVE0, -4(PC)
3151 | checknum cr1, TMP1 3585 | checknum cr1, CARG3
3152 | decode_RD4 TMP2, TMP2 3586 | decode_RD4 SAVE0, SAVE0
3153 | lwz CARG3, 4(RD) 3587 | lwz CARG4, 4(RD)
3154 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3588 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3155 if (vk) { 3589 if (vk) {
3156 |->BC_ISEQN_Z: 3590 |->BC_ISEQN_Z:
3157 } else { 3591 } else {
@@ -3159,7 +3593,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3159 } 3593 }
3160 | bne cr0, >7 3594 | bne cr0, >7
3161 | bne cr1, >8 3595 | bne cr1, >8
3162 | cmpw CARG2, CARG3 3596 | cmpw CARG2, CARG4
3163 |4: 3597 |4:
3164 |.else 3598 |.else
3165 if (vk) { 3599 if (vk) {
@@ -3167,20 +3601,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3167 } else { 3601 } else {
3168 |->BC_ISNEN_Z: // Dummy label. 3602 |->BC_ISNEN_Z: // Dummy label.
3169 } 3603 }
3170 | lwzx TMP0, BASE, RA 3604 | lwzx CARG1, BASE, RA
3171 | addi PC, PC, 4 3605 | addi PC, PC, 4
3172 | lfdx f0, BASE, RA 3606 | lfdx f0, BASE, RA
3173 | lwz TMP2, -4(PC) 3607 | lwz SAVE0, -4(PC)
3174 | lfdx f1, KBASE, RD 3608 | lfdx f1, KBASE, RD
3175 | decode_RD4 TMP2, TMP2 3609 | decode_RD4 SAVE0, SAVE0
3176 | checknum TMP0 3610 | checknum CARG1
3177 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3611 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3178 | bge >3 3612 | bge >3
3179 | fcmpu cr0, f0, f1 3613 | fcmpu cr0, f0, f1
3180 |.endif 3614 |.endif
3181 if (vk) { 3615 if (vk) {
3182 | bne >1 3616 | bne >1
3183 | add PC, PC, TMP2 3617 | add PC, PC, SAVE0
3184 |1: 3618 |1:
3185 |.if not FFI 3619 |.if not FFI
3186 |3: 3620 |3:
@@ -3191,13 +3625,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3191 |.if not FFI 3625 |.if not FFI
3192 |3: 3626 |3:
3193 |.endif 3627 |.endif
3194 | add PC, PC, TMP2 3628 | add PC, PC, SAVE0
3195 |2: 3629 |2:
3196 } 3630 }
3197 | ins_next 3631 | ins_next
3198 |.if FFI 3632 |.if FFI
3199 |3: 3633 |3:
3200 | cmpwi TMP0, LJ_TCDATA 3634 | cmpwi CARG1, LJ_TCDATA
3201 | beq ->vmeta_equal_cd 3635 | beq ->vmeta_equal_cd
3202 | b <1 3636 | b <1
3203 |.endif 3637 |.endif
@@ -3205,18 +3639,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3205 |7: // RA is not an integer. 3639 |7: // RA is not an integer.
3206 | bge cr0, <3 3640 | bge cr0, <3
3207 | // RA is a number. 3641 | // RA is a number.
3208 | lfd f0, 0(RA) 3642 | .FPU lfd f0, 0(RA)
3209 | blt cr1, >1 3643 | blt cr1, >1
3210 | // RA is a number, RD is an integer. 3644 | // RA is a number, RD is an integer.
3211 | tonum_i f1, CARG3 3645 |.if FPU
3646 | tonum_i f1, CARG4
3647 |.else
3648 | bl ->vm_sfi2d_2
3649 |.endif
3212 | b >2 3650 | b >2
3213 | 3651 |
3214 |8: // RA is an integer, RD is a number. 3652 |8: // RA is an integer, RD is a number.
3653 |.if FPU
3215 | tonum_i f0, CARG2 3654 | tonum_i f0, CARG2
3655 |.else
3656 | bl ->vm_sfi2d_1
3657 |.endif
3216 |1: 3658 |1:
3217 | lfd f1, 0(RD) 3659 | .FPU lfd f1, 0(RD)
3218 |2: 3660 |2:
3661 |.if FPU
3219 | fcmpu cr0, f0, f1 3662 | fcmpu cr0, f0, f1
3663 |.else
3664 | blex __ledf2
3665 | cmpwi CRET1, 0
3666 |.endif
3220 | b <4 3667 | b <4
3221 |.endif 3668 |.endif
3222 break; 3669 break;
@@ -3271,7 +3718,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3271 | add PC, PC, TMP2 3718 | add PC, PC, TMP2
3272 } else { 3719 } else {
3273 | li TMP1, LJ_TFALSE 3720 | li TMP1, LJ_TFALSE
3721 |.if FPU
3274 | lfdx f0, BASE, RD 3722 | lfdx f0, BASE, RD
3723 |.else
3724 | lwzux CARG1, RD, BASE
3725 | lwz CARG2, 4(RD)
3726 |.endif
3275 | cmplw TMP0, TMP1 3727 | cmplw TMP0, TMP1
3276 if (op == BC_ISTC) { 3728 if (op == BC_ISTC) {
3277 | bge >1 3729 | bge >1
@@ -3280,20 +3732,55 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3280 } 3732 }
3281 | addis PC, PC, -(BCBIAS_J*4 >> 16) 3733 | addis PC, PC, -(BCBIAS_J*4 >> 16)
3282 | decode_RD4 TMP2, INS 3734 | decode_RD4 TMP2, INS
3735 |.if FPU
3283 | stfdx f0, BASE, RA 3736 | stfdx f0, BASE, RA
3737 |.else
3738 | stwux CARG1, RA, BASE
3739 | stw CARG2, 4(RA)
3740 |.endif
3284 | add PC, PC, TMP2 3741 | add PC, PC, TMP2
3285 |1: 3742 |1:
3286 } 3743 }
3287 | ins_next 3744 | ins_next
3288 break; 3745 break;
3289 3746
3747 case BC_ISTYPE:
3748 | // RA = src*8, RD = -type*8
3749 | lwzx TMP0, BASE, RA
3750 | srwi TMP1, RD, 3
3751 | ins_next1
3752 |.if not PPE and not GPR64
3753 | add. TMP0, TMP0, TMP1
3754 |.else
3755 | neg TMP1, TMP1
3756 | cmpw TMP0, TMP1
3757 |.endif
3758 | bne ->vmeta_istype
3759 | ins_next2
3760 break;
3761 case BC_ISNUM:
3762 | // RA = src*8, RD = -(TISNUM-1)*8
3763 | lwzx TMP0, BASE, RA
3764 | ins_next1
3765 | checknum TMP0
3766 | bge ->vmeta_istype
3767 | ins_next2
3768 break;
3769
3290 /* -- Unary ops --------------------------------------------------------- */ 3770 /* -- Unary ops --------------------------------------------------------- */
3291 3771
3292 case BC_MOV: 3772 case BC_MOV:
3293 | // RA = dst*8, RD = src*8 3773 | // RA = dst*8, RD = src*8
3294 | ins_next1 3774 | ins_next1
3775 |.if FPU
3295 | lfdx f0, BASE, RD 3776 | lfdx f0, BASE, RD
3296 | stfdx f0, BASE, RA 3777 | stfdx f0, BASE, RA
3778 |.else
3779 | lwzux TMP0, RD, BASE
3780 | lwz TMP1, 4(RD)
3781 | stwux TMP0, RA, BASE
3782 | stw TMP1, 4(RA)
3783 |.endif
3297 | ins_next2 3784 | ins_next2
3298 break; 3785 break;
3299 case BC_NOT: 3786 case BC_NOT:
@@ -3395,44 +3882,65 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3395 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3882 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3396 ||switch (vk) { 3883 ||switch (vk) {
3397 ||case 0: 3884 ||case 0:
3398 | lwzx TMP1, BASE, RB 3885 | lwzx CARG1, BASE, RB
3399 | .if DUALNUM 3886 | .if DUALNUM
3400 | lwzx TMP2, KBASE, RC 3887 | lwzx CARG3, KBASE, RC
3401 | .endif 3888 | .endif
3889 | .if FPU
3402 | lfdx f14, BASE, RB 3890 | lfdx f14, BASE, RB
3403 | lfdx f15, KBASE, RC 3891 | lfdx f15, KBASE, RC
3892 | .else
3893 | add TMP1, BASE, RB
3894 | add TMP2, KBASE, RC
3895 | lwz CARG2, 4(TMP1)
3896 | lwz CARG4, 4(TMP2)
3897 | .endif
3404 | .if DUALNUM 3898 | .if DUALNUM
3405 | checknum cr0, TMP1 3899 | checknum cr0, CARG1
3406 | checknum cr1, TMP2 3900 | checknum cr1, CARG3
3407 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3901 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3408 | bge ->vmeta_arith_vn 3902 | bge ->vmeta_arith_vn
3409 | .else 3903 | .else
3410 | checknum TMP1; bge ->vmeta_arith_vn 3904 | checknum CARG1; bge ->vmeta_arith_vn
3411 | .endif 3905 | .endif
3412 || break; 3906 || break;
3413 ||case 1: 3907 ||case 1:
3414 | lwzx TMP1, BASE, RB 3908 | lwzx CARG1, BASE, RB
3415 | .if DUALNUM 3909 | .if DUALNUM
3416 | lwzx TMP2, KBASE, RC 3910 | lwzx CARG3, KBASE, RC
3417 | .endif 3911 | .endif
3912 | .if FPU
3418 | lfdx f15, BASE, RB 3913 | lfdx f15, BASE, RB
3419 | lfdx f14, KBASE, RC 3914 | lfdx f14, KBASE, RC
3915 | .else
3916 | add TMP1, BASE, RB
3917 | add TMP2, KBASE, RC
3918 | lwz CARG2, 4(TMP1)
3919 | lwz CARG4, 4(TMP2)
3920 | .endif
3420 | .if DUALNUM 3921 | .if DUALNUM
3421 | checknum cr0, TMP1 3922 | checknum cr0, CARG1
3422 | checknum cr1, TMP2 3923 | checknum cr1, CARG3
3423 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3924 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3424 | bge ->vmeta_arith_nv 3925 | bge ->vmeta_arith_nv
3425 | .else 3926 | .else
3426 | checknum TMP1; bge ->vmeta_arith_nv 3927 | checknum CARG1; bge ->vmeta_arith_nv
3427 | .endif 3928 | .endif
3428 || break; 3929 || break;
3429 ||default: 3930 ||default:
3430 | lwzx TMP1, BASE, RB 3931 | lwzx CARG1, BASE, RB
3431 | lwzx TMP2, BASE, RC 3932 | lwzx CARG3, BASE, RC
3933 | .if FPU
3432 | lfdx f14, BASE, RB 3934 | lfdx f14, BASE, RB
3433 | lfdx f15, BASE, RC 3935 | lfdx f15, BASE, RC
3434 | checknum cr0, TMP1 3936 | .else
3435 | checknum cr1, TMP2 3937 | add TMP1, BASE, RB
3938 | add TMP2, BASE, RC
3939 | lwz CARG2, 4(TMP1)
3940 | lwz CARG4, 4(TMP2)
3941 | .endif
3942 | checknum cr0, CARG1
3943 | checknum cr1, CARG3
3436 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3944 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3437 | bge ->vmeta_arith_vv 3945 | bge ->vmeta_arith_vv
3438 || break; 3946 || break;
@@ -3466,48 +3974,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3466 | fsub a, b, a // b - floor(b/c)*c 3974 | fsub a, b, a // b - floor(b/c)*c
3467 |.endmacro 3975 |.endmacro
3468 | 3976 |
3977 |.macro sfpmod
3978 |->BC_MODVN_Z:
3979 | stw CARG1, SFSAVE_1
3980 | stw CARG2, SFSAVE_2
3981 | mr SAVE0, CARG3
3982 | mr SAVE1, CARG4
3983 | blex __divdf3
3984 | blex floor
3985 | mr CARG3, SAVE0
3986 | mr CARG4, SAVE1
3987 | blex __muldf3
3988 | mr CARG3, CRET1
3989 | mr CARG4, CRET2
3990 | lwz CARG1, SFSAVE_1
3991 | lwz CARG2, SFSAVE_2
3992 | blex __subdf3
3993 |.endmacro
3994 |
3469 |.macro ins_arithfp, fpins 3995 |.macro ins_arithfp, fpins
3470 | ins_arithpre 3996 | ins_arithpre
3471 |.if "fpins" == "fpmod_" 3997 |.if "fpins" == "fpmod_"
3472 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3998 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3473 |.else 3999 |.elif FPU
3474 | fpins f0, f14, f15 4000 | fpins f0, f14, f15
3475 | ins_next1 4001 | ins_next1
3476 | stfdx f0, BASE, RA 4002 | stfdx f0, BASE, RA
3477 | ins_next2 4003 | ins_next2
4004 |.else
4005 | blex __divdf3 // Only soft-float div uses this macro.
4006 | ins_next1
4007 | stwux CRET1, RA, BASE
4008 | stw CRET2, 4(RA)
4009 | ins_next2
3478 |.endif 4010 |.endif
3479 |.endmacro 4011 |.endmacro
3480 | 4012 |
3481 |.macro ins_arithdn, intins, fpins 4013 |.macro ins_arithdn, intins, fpins, fpcall
3482 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 4014 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
3483 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 4015 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3484 ||switch (vk) { 4016 ||switch (vk) {
3485 ||case 0: 4017 ||case 0:
3486 | lwzux TMP1, RB, BASE 4018 | lwzux CARG1, RB, BASE
3487 | lwzux TMP2, RC, KBASE 4019 | lwzux CARG3, RC, KBASE
3488 | lwz CARG1, 4(RB) 4020 | lwz CARG2, 4(RB)
3489 | checknum cr0, TMP1 4021 | checknum cr0, CARG1
3490 | lwz CARG2, 4(RC) 4022 | lwz CARG4, 4(RC)
4023 | checknum cr1, CARG3
3491 || break; 4024 || break;
3492 ||case 1: 4025 ||case 1:
3493 | lwzux TMP1, RB, BASE 4026 | lwzux CARG3, RB, BASE
3494 | lwzux TMP2, RC, KBASE 4027 | lwzux CARG1, RC, KBASE
3495 | lwz CARG2, 4(RB) 4028 | lwz CARG4, 4(RB)
3496 | checknum cr0, TMP1 4029 | checknum cr0, CARG3
3497 | lwz CARG1, 4(RC) 4030 | lwz CARG2, 4(RC)
4031 | checknum cr1, CARG1
3498 || break; 4032 || break;
3499 ||default: 4033 ||default:
3500 | lwzux TMP1, RB, BASE 4034 | lwzux CARG1, RB, BASE
3501 | lwzux TMP2, RC, BASE 4035 | lwzux CARG3, RC, BASE
3502 | lwz CARG1, 4(RB) 4036 | lwz CARG2, 4(RB)
3503 | checknum cr0, TMP1 4037 | checknum cr0, CARG1
3504 | lwz CARG2, 4(RC) 4038 | lwz CARG4, 4(RC)
4039 | checknum cr1, CARG3
3505 || break; 4040 || break;
3506 ||} 4041 ||}
3507 | checknum cr1, TMP2
3508 | bne >5 4042 | bne >5
3509 | bne cr1, >5 4043 | bne cr1, >5
3510 | intins CARG1, CARG1, CARG2 4044 |.if "intins" == "intmod"
4045 | mr CARG1, CARG2
4046 | mr CARG2, CARG4
4047 |.endif
4048 | intins CARG1, CARG2, CARG4
3511 | bso >4 4049 | bso >4
3512 |1: 4050 |1:
3513 | ins_next1 4051 | ins_next1
@@ -3519,29 +4057,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3519 | checkov TMP0, <1 // Ignore unrelated overflow. 4057 | checkov TMP0, <1 // Ignore unrelated overflow.
3520 | ins_arithfallback b 4058 | ins_arithfallback b
3521 |5: // FP variant. 4059 |5: // FP variant.
4060 |.if FPU
3522 ||if (vk == 1) { 4061 ||if (vk == 1) {
3523 | lfd f15, 0(RB) 4062 | lfd f15, 0(RB)
3524 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3525 | lfd f14, 0(RC) 4063 | lfd f14, 0(RC)
3526 ||} else { 4064 ||} else {
3527 | lfd f14, 0(RB) 4065 | lfd f14, 0(RB)
3528 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3529 | lfd f15, 0(RC) 4066 | lfd f15, 0(RC)
3530 ||} 4067 ||}
4068 |.endif
4069 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3531 | ins_arithfallback bge 4070 | ins_arithfallback bge
3532 |.if "fpins" == "fpmod_" 4071 |.if "fpins" == "fpmod_"
3533 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 4072 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3534 |.else 4073 |.else
4074 |.if FPU
3535 | fpins f0, f14, f15 4075 | fpins f0, f14, f15
3536 | ins_next1
3537 | stfdx f0, BASE, RA 4076 | stfdx f0, BASE, RA
4077 |.else
4078 |.if "fpcall" == "sfpmod"
4079 | sfpmod
4080 |.else
4081 | blex fpcall
4082 |.endif
4083 | stwux CRET1, RA, BASE
4084 | stw CRET2, 4(RA)
4085 |.endif
4086 | ins_next1
3538 | b <2 4087 | b <2
3539 |.endif 4088 |.endif
3540 |.endmacro 4089 |.endmacro
3541 | 4090 |
3542 |.macro ins_arith, intins, fpins 4091 |.macro ins_arith, intins, fpins, fpcall
3543 |.if DUALNUM 4092 |.if DUALNUM
3544 | ins_arithdn intins, fpins 4093 | ins_arithdn intins, fpins, fpcall
3545 |.else 4094 |.else
3546 | ins_arithfp fpins 4095 | ins_arithfp fpins
3547 |.endif 4096 |.endif
@@ -3556,9 +4105,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3556 | addo. TMP0, TMP0, TMP1 4105 | addo. TMP0, TMP0, TMP1
3557 | add y, a, b 4106 | add y, a, b
3558 |.endmacro 4107 |.endmacro
3559 | ins_arith addo32., fadd 4108 | ins_arith addo32., fadd, __adddf3
3560 |.else 4109 |.else
3561 | ins_arith addo., fadd 4110 | ins_arith addo., fadd, __adddf3
3562 |.endif 4111 |.endif
3563 break; 4112 break;
3564 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 4113 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
@@ -3570,36 +4119,48 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3570 | subo. TMP0, TMP0, TMP1 4119 | subo. TMP0, TMP0, TMP1
3571 | sub y, a, b 4120 | sub y, a, b
3572 |.endmacro 4121 |.endmacro
3573 | ins_arith subo32., fsub 4122 | ins_arith subo32., fsub, __subdf3
3574 |.else 4123 |.else
3575 | ins_arith subo., fsub 4124 | ins_arith subo., fsub, __subdf3
3576 |.endif 4125 |.endif
3577 break; 4126 break;
3578 case BC_MULVN: case BC_MULNV: case BC_MULVV: 4127 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3579 | ins_arith mullwo., fmul 4128 | ins_arith mullwo., fmul, __muldf3
3580 break; 4129 break;
3581 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 4130 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
3582 | ins_arithfp fdiv 4131 | ins_arithfp fdiv
3583 break; 4132 break;
3584 case BC_MODVN: 4133 case BC_MODVN:
3585 | ins_arith intmod, fpmod 4134 | ins_arith intmod, fpmod, sfpmod
3586 break; 4135 break;
3587 case BC_MODNV: case BC_MODVV: 4136 case BC_MODNV: case BC_MODVV:
3588 | ins_arith intmod, fpmod_ 4137 | ins_arith intmod, fpmod_, sfpmod
3589 break; 4138 break;
3590 case BC_POW: 4139 case BC_POW:
3591 | // NYI: (partial) integer arithmetic. 4140 | // NYI: (partial) integer arithmetic.
3592 | lwzx TMP1, BASE, RB 4141 | lwzx CARG1, BASE, RB
4142 | lwzx CARG3, BASE, RC
4143 |.if FPU
3593 | lfdx FARG1, BASE, RB 4144 | lfdx FARG1, BASE, RB
3594 | lwzx TMP2, BASE, RC
3595 | lfdx FARG2, BASE, RC 4145 | lfdx FARG2, BASE, RC
3596 | checknum cr0, TMP1 4146 |.else
3597 | checknum cr1, TMP2 4147 | add TMP1, BASE, RB
4148 | add TMP2, BASE, RC
4149 | lwz CARG2, 4(TMP1)
4150 | lwz CARG4, 4(TMP2)
4151 |.endif
4152 | checknum cr0, CARG1
4153 | checknum cr1, CARG3
3598 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 4154 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3599 | bge ->vmeta_arith_vv 4155 | bge ->vmeta_arith_vv
3600 | blex pow 4156 | blex pow
3601 | ins_next1 4157 | ins_next1
4158 |.if FPU
3602 | stfdx FARG1, BASE, RA 4159 | stfdx FARG1, BASE, RA
4160 |.else
4161 | stwux CARG1, RA, BASE
4162 | stw CARG2, 4(RA)
4163 |.endif
3603 | ins_next2 4164 | ins_next2
3604 break; 4165 break;
3605 4166
@@ -3619,8 +4180,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3619 | lp BASE, L->base 4180 | lp BASE, L->base
3620 | bne ->vmeta_binop 4181 | bne ->vmeta_binop
3621 | ins_next1 4182 | ins_next1
4183 |.if FPU
3622 | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. 4184 | lfdx f0, BASE, SAVE0 // Copy result from RB to RA.
3623 | stfdx f0, BASE, RA 4185 | stfdx f0, BASE, RA
4186 |.else
4187 | lwzux TMP0, SAVE0, BASE
4188 | lwz TMP1, 4(SAVE0)
4189 | stwux TMP0, RA, BASE
4190 | stw TMP1, 4(RA)
4191 |.endif
3624 | ins_next2 4192 | ins_next2
3625 break; 4193 break;
3626 4194
@@ -3683,8 +4251,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3683 case BC_KNUM: 4251 case BC_KNUM:
3684 | // RA = dst*8, RD = num_const*8 4252 | // RA = dst*8, RD = num_const*8
3685 | ins_next1 4253 | ins_next1
4254 |.if FPU
3686 | lfdx f0, KBASE, RD 4255 | lfdx f0, KBASE, RD
3687 | stfdx f0, BASE, RA 4256 | stfdx f0, BASE, RA
4257 |.else
4258 | lwzux TMP0, RD, KBASE
4259 | lwz TMP1, 4(RD)
4260 | stwux TMP0, RA, BASE
4261 | stw TMP1, 4(RA)
4262 |.endif
3688 | ins_next2 4263 | ins_next2
3689 break; 4264 break;
3690 case BC_KPRI: 4265 case BC_KPRI:
@@ -3717,8 +4292,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3717 | lwzx UPVAL:RB, LFUNC:RB, RD 4292 | lwzx UPVAL:RB, LFUNC:RB, RD
3718 | ins_next1 4293 | ins_next1
3719 | lwz TMP1, UPVAL:RB->v 4294 | lwz TMP1, UPVAL:RB->v
4295 |.if FPU
3720 | lfd f0, 0(TMP1) 4296 | lfd f0, 0(TMP1)
3721 | stfdx f0, BASE, RA 4297 | stfdx f0, BASE, RA
4298 |.else
4299 | lwz TMP2, 0(TMP1)
4300 | lwz TMP3, 4(TMP1)
4301 | stwux TMP2, RA, BASE
4302 | stw TMP3, 4(RA)
4303 |.endif
3722 | ins_next2 4304 | ins_next2
3723 break; 4305 break;
3724 case BC_USETV: 4306 case BC_USETV:
@@ -3726,14 +4308,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3726 | lwz LFUNC:RB, FRAME_FUNC(BASE) 4308 | lwz LFUNC:RB, FRAME_FUNC(BASE)
3727 | srwi RA, RA, 1 4309 | srwi RA, RA, 1
3728 | addi RA, RA, offsetof(GCfuncL, uvptr) 4310 | addi RA, RA, offsetof(GCfuncL, uvptr)
4311 |.if FPU
3729 | lfdux f0, RD, BASE 4312 | lfdux f0, RD, BASE
4313 |.else
4314 | lwzux CARG1, RD, BASE
4315 | lwz CARG3, 4(RD)
4316 |.endif
3730 | lwzx UPVAL:RB, LFUNC:RB, RA 4317 | lwzx UPVAL:RB, LFUNC:RB, RA
3731 | lbz TMP3, UPVAL:RB->marked 4318 | lbz TMP3, UPVAL:RB->marked
3732 | lwz CARG2, UPVAL:RB->v 4319 | lwz CARG2, UPVAL:RB->v
3733 | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 4320 | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
3734 | lbz TMP0, UPVAL:RB->closed 4321 | lbz TMP0, UPVAL:RB->closed
3735 | lwz TMP2, 0(RD) 4322 | lwz TMP2, 0(RD)
4323 |.if FPU
3736 | stfd f0, 0(CARG2) 4324 | stfd f0, 0(CARG2)
4325 |.else
4326 | stw CARG1, 0(CARG2)
4327 | stw CARG3, 4(CARG2)
4328 |.endif
3737 | cmplwi cr1, TMP0, 0 4329 | cmplwi cr1, TMP0, 0
3738 | lwz TMP1, 4(RD) 4330 | lwz TMP1, 4(RD)
3739 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 4331 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
@@ -3789,11 +4381,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3789 | lwz LFUNC:RB, FRAME_FUNC(BASE) 4381 | lwz LFUNC:RB, FRAME_FUNC(BASE)
3790 | srwi RA, RA, 1 4382 | srwi RA, RA, 1
3791 | addi RA, RA, offsetof(GCfuncL, uvptr) 4383 | addi RA, RA, offsetof(GCfuncL, uvptr)
4384 |.if FPU
3792 | lfdx f0, KBASE, RD 4385 | lfdx f0, KBASE, RD
4386 |.else
4387 | lwzux TMP2, RD, KBASE
4388 | lwz TMP3, 4(RD)
4389 |.endif
3793 | lwzx UPVAL:RB, LFUNC:RB, RA 4390 | lwzx UPVAL:RB, LFUNC:RB, RA
3794 | ins_next1 4391 | ins_next1
3795 | lwz TMP1, UPVAL:RB->v 4392 | lwz TMP1, UPVAL:RB->v
4393 |.if FPU
3796 | stfd f0, 0(TMP1) 4394 | stfd f0, 0(TMP1)
4395 |.else
4396 | stw TMP2, 0(TMP1)
4397 | stw TMP3, 4(TMP1)
4398 |.endif
3797 | ins_next2 4399 | ins_next2
3798 break; 4400 break;
3799 case BC_USETP: 4401 case BC_USETP:
@@ -3941,11 +4543,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3941 |.endif 4543 |.endif
3942 | ble ->vmeta_tgetv // Integer key and in array part? 4544 | ble ->vmeta_tgetv // Integer key and in array part?
3943 | lwzx TMP0, TMP1, TMP2 4545 | lwzx TMP0, TMP1, TMP2
4546 |.if FPU
3944 | lfdx f14, TMP1, TMP2 4547 | lfdx f14, TMP1, TMP2
4548 |.else
4549 | lwzux SAVE0, TMP1, TMP2
4550 | lwz SAVE1, 4(TMP1)
4551 |.endif
3945 | checknil TMP0; beq >2 4552 | checknil TMP0; beq >2
3946 |1: 4553 |1:
3947 | ins_next1 4554 | ins_next1
4555 |.if FPU
3948 | stfdx f14, BASE, RA 4556 | stfdx f14, BASE, RA
4557 |.else
4558 | stwux SAVE0, RA, BASE
4559 | stw SAVE1, 4(RA)
4560 |.endif
3949 | ins_next2 4561 | ins_next2
3950 | 4562 |
3951 |2: // Check for __index if table value is nil. 4563 |2: // Check for __index if table value is nil.
@@ -3976,9 +4588,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3976 |->BC_TGETS_Z: 4588 |->BC_TGETS_Z:
3977 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 4589 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
3978 | lwz TMP0, TAB:RB->hmask 4590 | lwz TMP0, TAB:RB->hmask
3979 | lwz TMP1, STR:RC->hash 4591 | lwz TMP1, STR:RC->sid
3980 | lwz NODE:TMP2, TAB:RB->node 4592 | lwz NODE:TMP2, TAB:RB->node
3981 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 4593 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
3982 | slwi TMP0, TMP1, 5 4594 | slwi TMP0, TMP1, 5
3983 | slwi TMP1, TMP1, 3 4595 | slwi TMP1, TMP1, 3
3984 | sub TMP1, TMP0, TMP1 4596 | sub TMP1, TMP0, TMP1
@@ -4021,12 +4633,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4021 | lwz TMP1, TAB:RB->asize 4633 | lwz TMP1, TAB:RB->asize
4022 | lwz TMP2, TAB:RB->array 4634 | lwz TMP2, TAB:RB->array
4023 | cmplw TMP0, TMP1; bge ->vmeta_tgetb 4635 | cmplw TMP0, TMP1; bge ->vmeta_tgetb
4636 |.if FPU
4024 | lwzx TMP1, TMP2, RC 4637 | lwzx TMP1, TMP2, RC
4025 | lfdx f0, TMP2, RC 4638 | lfdx f0, TMP2, RC
4639 |.else
4640 | lwzux TMP1, TMP2, RC
4641 | lwz TMP3, 4(TMP2)
4642 |.endif
4026 | checknil TMP1; beq >5 4643 | checknil TMP1; beq >5
4027 |1: 4644 |1:
4028 | ins_next1 4645 | ins_next1
4646 |.if FPU
4029 | stfdx f0, BASE, RA 4647 | stfdx f0, BASE, RA
4648 |.else
4649 | stwux TMP1, RA, BASE
4650 | stw TMP3, 4(RA)
4651 |.endif
4030 | ins_next2 4652 | ins_next2
4031 | 4653 |
4032 |5: // Check for __index if table value is nil. 4654 |5: // Check for __index if table value is nil.
@@ -4038,6 +4660,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4038 | bne <1 // 'no __index' flag set: done. 4660 | bne <1 // 'no __index' flag set: done.
4039 | b ->vmeta_tgetb // Caveat: preserve TMP0! 4661 | b ->vmeta_tgetb // Caveat: preserve TMP0!
4040 break; 4662 break;
4663 case BC_TGETR:
4664 | // RA = dst*8, RB = table*8, RC = key*8
4665 | add RB, BASE, RB
4666 | lwz TAB:CARG1, 4(RB)
4667 |.if DUALNUM
4668 | add RC, BASE, RC
4669 | lwz TMP0, TAB:CARG1->asize
4670 | lwz CARG2, 4(RC)
4671 | lwz TMP1, TAB:CARG1->array
4672 |.else
4673 | lfdx f0, BASE, RC
4674 | lwz TMP0, TAB:CARG1->asize
4675 | toint CARG2, f0
4676 | lwz TMP1, TAB:CARG1->array
4677 |.endif
4678 | cmplw TMP0, CARG2
4679 | slwi TMP2, CARG2, 3
4680 | ble ->vmeta_tgetr // In array part?
4681 |.if FPU
4682 | lfdx f14, TMP1, TMP2
4683 |.else
4684 | lwzux SAVE0, TMP2, TMP1
4685 | lwz SAVE1, 4(TMP2)
4686 |.endif
4687 |->BC_TGETR_Z:
4688 | ins_next1
4689 |.if FPU
4690 | stfdx f14, BASE, RA
4691 |.else
4692 | stwux SAVE0, RA, BASE
4693 | stw SAVE1, 4(RA)
4694 |.endif
4695 | ins_next2
4696 break;
4041 4697
4042 case BC_TSETV: 4698 case BC_TSETV:
4043 | // RA = src*8, RB = table*8, RC = key*8 4699 | // RA = src*8, RB = table*8, RC = key*8
@@ -4076,11 +4732,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4076 | ble ->vmeta_tsetv // Integer key and in array part? 4732 | ble ->vmeta_tsetv // Integer key and in array part?
4077 | lwzx TMP2, TMP1, TMP0 4733 | lwzx TMP2, TMP1, TMP0
4078 | lbz TMP3, TAB:RB->marked 4734 | lbz TMP3, TAB:RB->marked
4735 |.if FPU
4079 | lfdx f14, BASE, RA 4736 | lfdx f14, BASE, RA
4737 |.else
4738 | add SAVE1, BASE, RA
4739 | lwz SAVE0, 0(SAVE1)
4740 | lwz SAVE1, 4(SAVE1)
4741 |.endif
4080 | checknil TMP2; beq >3 4742 | checknil TMP2; beq >3
4081 |1: 4743 |1:
4082 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) 4744 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
4745 |.if FPU
4083 | stfdx f14, TMP1, TMP0 4746 | stfdx f14, TMP1, TMP0
4747 |.else
4748 | stwux SAVE0, TMP1, TMP0
4749 | stw SAVE1, 4(TMP1)
4750 |.endif
4084 | bne >7 4751 | bne >7
4085 |2: 4752 |2:
4086 | ins_next 4753 | ins_next
@@ -4117,11 +4784,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4117 |->BC_TSETS_Z: 4784 |->BC_TSETS_Z:
4118 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8 4785 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8
4119 | lwz TMP0, TAB:RB->hmask 4786 | lwz TMP0, TAB:RB->hmask
4120 | lwz TMP1, STR:RC->hash 4787 | lwz TMP1, STR:RC->sid
4121 | lwz NODE:TMP2, TAB:RB->node 4788 | lwz NODE:TMP2, TAB:RB->node
4122 | stb ZERO, TAB:RB->nomm // Clear metamethod cache. 4789 | stb ZERO, TAB:RB->nomm // Clear metamethod cache.
4123 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 4790 | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
4791 |.if FPU
4124 | lfdx f14, BASE, RA 4792 | lfdx f14, BASE, RA
4793 |.else
4794 | add CARG2, BASE, RA
4795 | lwz SAVE0, 0(CARG2)
4796 | lwz SAVE1, 4(CARG2)
4797 |.endif
4125 | slwi TMP0, TMP1, 5 4798 | slwi TMP0, TMP1, 5
4126 | slwi TMP1, TMP1, 3 4799 | slwi TMP1, TMP1, 3
4127 | sub TMP1, TMP0, TMP1 4800 | sub TMP1, TMP0, TMP1
@@ -4137,7 +4810,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4137 | checknil CARG2; beq >4 // Key found, but nil value? 4810 | checknil CARG2; beq >4 // Key found, but nil value?
4138 |2: 4811 |2:
4139 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4812 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4813 |.if FPU
4140 | stfd f14, NODE:TMP2->val 4814 | stfd f14, NODE:TMP2->val
4815 |.else
4816 | stw SAVE0, NODE:TMP2->val.u32.hi
4817 | stw SAVE1, NODE:TMP2->val.u32.lo
4818 |.endif
4141 | bne >7 4819 | bne >7
4142 |3: 4820 |3:
4143 | ins_next 4821 | ins_next
@@ -4176,7 +4854,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4176 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) 4854 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
4177 | // Returns TValue *. 4855 | // Returns TValue *.
4178 | lp BASE, L->base 4856 | lp BASE, L->base
4857 |.if FPU
4179 | stfd f14, 0(CRET1) 4858 | stfd f14, 0(CRET1)
4859 |.else
4860 | stw SAVE0, 0(CRET1)
4861 | stw SAVE1, 4(CRET1)
4862 |.endif
4180 | b <3 // No 2nd write barrier needed. 4863 | b <3 // No 2nd write barrier needed.
4181 | 4864 |
4182 |7: // Possible table write barrier for the value. Skip valiswhite check. 4865 |7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -4193,13 +4876,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4193 | lwz TMP2, TAB:RB->array 4876 | lwz TMP2, TAB:RB->array
4194 | lbz TMP3, TAB:RB->marked 4877 | lbz TMP3, TAB:RB->marked
4195 | cmplw TMP0, TMP1 4878 | cmplw TMP0, TMP1
4879 |.if FPU
4196 | lfdx f14, BASE, RA 4880 | lfdx f14, BASE, RA
4881 |.else
4882 | add CARG2, BASE, RA
4883 | lwz SAVE0, 0(CARG2)
4884 | lwz SAVE1, 4(CARG2)
4885 |.endif
4197 | bge ->vmeta_tsetb 4886 | bge ->vmeta_tsetb
4198 | lwzx TMP1, TMP2, RC 4887 | lwzx TMP1, TMP2, RC
4199 | checknil TMP1; beq >5 4888 | checknil TMP1; beq >5
4200 |1: 4889 |1:
4201 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4890 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4891 |.if FPU
4202 | stfdx f14, TMP2, RC 4892 | stfdx f14, TMP2, RC
4893 |.else
4894 | stwux SAVE0, RC, TMP2
4895 | stw SAVE1, 4(RC)
4896 |.endif
4203 | bne >7 4897 | bne >7
4204 |2: 4898 |2:
4205 | ins_next 4899 | ins_next
@@ -4217,6 +4911,49 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4217 | barrierback TAB:RB, TMP3, TMP0 4911 | barrierback TAB:RB, TMP3, TMP0
4218 | b <2 4912 | b <2
4219 break; 4913 break;
4914 case BC_TSETR:
4915 | // RA = dst*8, RB = table*8, RC = key*8
4916 | add RB, BASE, RB
4917 | lwz TAB:CARG2, 4(RB)
4918 |.if DUALNUM
4919 | add RC, BASE, RC
4920 | lbz TMP3, TAB:CARG2->marked
4921 | lwz TMP0, TAB:CARG2->asize
4922 | lwz CARG3, 4(RC)
4923 | lwz TMP1, TAB:CARG2->array
4924 |.else
4925 | lfdx f0, BASE, RC
4926 | lbz TMP3, TAB:CARG2->marked
4927 | lwz TMP0, TAB:CARG2->asize
4928 | toint CARG3, f0
4929 | lwz TMP1, TAB:CARG2->array
4930 |.endif
4931 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
4932 | bne >7
4933 |2:
4934 | cmplw TMP0, CARG3
4935 | slwi TMP2, CARG3, 3
4936 |.if FPU
4937 | lfdx f14, BASE, RA
4938 |.else
4939 | lwzux SAVE0, RA, BASE
4940 | lwz SAVE1, 4(RA)
4941 |.endif
4942 | ble ->vmeta_tsetr // In array part?
4943 | ins_next1
4944 |.if FPU
4945 | stfdx f14, TMP1, TMP2
4946 |.else
4947 | stwux SAVE0, TMP1, TMP2
4948 | stw SAVE1, 4(TMP1)
4949 |.endif
4950 | ins_next2
4951 |
4952 |7: // Possible table write barrier for the value. Skip valiswhite check.
4953 | barrierback TAB:CARG2, TMP3, TMP2
4954 | b <2
4955 break;
4956
4220 4957
4221 case BC_TSETM: 4958 case BC_TSETM:
4222 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 4959 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
@@ -4239,10 +4976,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4239 | add TMP1, TMP1, TMP0 4976 | add TMP1, TMP1, TMP0
4240 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4977 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4241 |3: // Copy result slots to table. 4978 |3: // Copy result slots to table.
4979 |.if FPU
4242 | lfd f0, 0(RA) 4980 | lfd f0, 0(RA)
4981 |.else
4982 | lwz SAVE0, 0(RA)
4983 | lwz SAVE1, 4(RA)
4984 |.endif
4243 | addi RA, RA, 8 4985 | addi RA, RA, 8
4244 | cmpw cr1, RA, TMP2 4986 | cmpw cr1, RA, TMP2
4987 |.if FPU
4245 | stfd f0, 0(TMP1) 4988 | stfd f0, 0(TMP1)
4989 |.else
4990 | stw SAVE0, 0(TMP1)
4991 | stw SAVE1, 4(TMP1)
4992 |.endif
4246 | addi TMP1, TMP1, 8 4993 | addi TMP1, TMP1, 8
4247 | blt cr1, <3 4994 | blt cr1, <3
4248 | bne >7 4995 | bne >7
@@ -4309,9 +5056,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4309 | beq cr1, >3 5056 | beq cr1, >3
4310 |2: 5057 |2:
4311 | addi TMP3, TMP2, 8 5058 | addi TMP3, TMP2, 8
5059 |.if FPU
4312 | lfdx f0, RA, TMP2 5060 | lfdx f0, RA, TMP2
5061 |.else
5062 | add CARG3, RA, TMP2
5063 | lwz CARG1, 0(CARG3)
5064 | lwz CARG2, 4(CARG3)
5065 |.endif
4313 | cmplw cr1, TMP3, NARGS8:RC 5066 | cmplw cr1, TMP3, NARGS8:RC
5067 |.if FPU
4314 | stfdx f0, BASE, TMP2 5068 | stfdx f0, BASE, TMP2
5069 |.else
5070 | stwux CARG1, TMP2, BASE
5071 | stw CARG2, 4(TMP2)
5072 |.endif
4315 | mr TMP2, TMP3 5073 | mr TMP2, TMP3
4316 | bne cr1, <2 5074 | bne cr1, <2
4317 |3: 5075 |3:
@@ -4344,14 +5102,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4344 | add BASE, BASE, RA 5102 | add BASE, BASE, RA
4345 | lwz TMP1, -24(BASE) 5103 | lwz TMP1, -24(BASE)
4346 | lwz LFUNC:RB, -20(BASE) 5104 | lwz LFUNC:RB, -20(BASE)
5105 |.if FPU
4347 | lfd f1, -8(BASE) 5106 | lfd f1, -8(BASE)
4348 | lfd f0, -16(BASE) 5107 | lfd f0, -16(BASE)
5108 |.else
5109 | lwz CARG1, -8(BASE)
5110 | lwz CARG2, -4(BASE)
5111 | lwz CARG3, -16(BASE)
5112 | lwz CARG4, -12(BASE)
5113 |.endif
4349 | stw TMP1, 0(BASE) // Copy callable. 5114 | stw TMP1, 0(BASE) // Copy callable.
4350 | stw LFUNC:RB, 4(BASE) 5115 | stw LFUNC:RB, 4(BASE)
4351 | checkfunc TMP1 5116 | checkfunc TMP1
4352 | stfd f1, 16(BASE) // Copy control var.
4353 | li NARGS8:RC, 16 // Iterators get 2 arguments. 5117 | li NARGS8:RC, 16 // Iterators get 2 arguments.
5118 |.if FPU
5119 | stfd f1, 16(BASE) // Copy control var.
4354 | stfdu f0, 8(BASE) // Copy state. 5120 | stfdu f0, 8(BASE) // Copy state.
5121 |.else
5122 | stw CARG1, 16(BASE) // Copy control var.
5123 | stw CARG2, 20(BASE)
5124 | stwu CARG3, 8(BASE) // Copy state.
5125 | stw CARG4, 4(BASE)
5126 |.endif
4355 | bne ->vmeta_call 5127 | bne ->vmeta_call
4356 | ins_call 5128 | ins_call
4357 break; 5129 break;
@@ -4372,7 +5144,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4372 | slwi TMP3, RC, 3 5144 | slwi TMP3, RC, 3
4373 | bge >5 // Index points after array part? 5145 | bge >5 // Index points after array part?
4374 | lwzx TMP2, TMP1, TMP3 5146 | lwzx TMP2, TMP1, TMP3
5147 |.if FPU
4375 | lfdx f0, TMP1, TMP3 5148 | lfdx f0, TMP1, TMP3
5149 |.else
5150 | lwzux CARG1, TMP3, TMP1
5151 | lwz CARG2, 4(TMP3)
5152 |.endif
4376 | checknil TMP2 5153 | checknil TMP2
4377 | lwz INS, -4(PC) 5154 | lwz INS, -4(PC)
4378 | beq >4 5155 | beq >4
@@ -4384,7 +5161,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4384 |.endif 5161 |.endif
4385 | addi RC, RC, 1 5162 | addi RC, RC, 1
4386 | addis TMP3, PC, -(BCBIAS_J*4 >> 16) 5163 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
5164 |.if FPU
4387 | stfd f0, 8(RA) 5165 | stfd f0, 8(RA)
5166 |.else
5167 | stw CARG1, 8(RA)
5168 | stw CARG2, 12(RA)
5169 |.endif
4388 | decode_RD4 TMP1, INS 5170 | decode_RD4 TMP1, INS
4389 | stw RC, -4(RA) // Update control var. 5171 | stw RC, -4(RA) // Update control var.
4390 | add PC, TMP1, TMP3 5172 | add PC, TMP1, TMP3
@@ -4409,17 +5191,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4409 | slwi RB, RC, 3 5191 | slwi RB, RC, 3
4410 | sub TMP3, TMP3, RB 5192 | sub TMP3, TMP3, RB
4411 | lwzx RB, TMP2, TMP3 5193 | lwzx RB, TMP2, TMP3
5194 |.if FPU
4412 | lfdx f0, TMP2, TMP3 5195 | lfdx f0, TMP2, TMP3
5196 |.else
5197 | add CARG3, TMP2, TMP3
5198 | lwz CARG1, 0(CARG3)
5199 | lwz CARG2, 4(CARG3)
5200 |.endif
4413 | add NODE:TMP3, TMP2, TMP3 5201 | add NODE:TMP3, TMP2, TMP3
4414 | checknil RB 5202 | checknil RB
4415 | lwz INS, -4(PC) 5203 | lwz INS, -4(PC)
4416 | beq >7 5204 | beq >7
5205 |.if FPU
4417 | lfd f1, NODE:TMP3->key 5206 | lfd f1, NODE:TMP3->key
5207 |.else
5208 | lwz CARG3, NODE:TMP3->key.u32.hi
5209 | lwz CARG4, NODE:TMP3->key.u32.lo
5210 |.endif
4418 | addis TMP2, PC, -(BCBIAS_J*4 >> 16) 5211 | addis TMP2, PC, -(BCBIAS_J*4 >> 16)
5212 |.if FPU
4419 | stfd f0, 8(RA) 5213 | stfd f0, 8(RA)
5214 |.else
5215 | stw CARG1, 8(RA)
5216 | stw CARG2, 12(RA)
5217 |.endif
4420 | add RC, RC, TMP0 5218 | add RC, RC, TMP0
4421 | decode_RD4 TMP1, INS 5219 | decode_RD4 TMP1, INS
5220 |.if FPU
4422 | stfd f1, 0(RA) 5221 | stfd f1, 0(RA)
5222 |.else
5223 | stw CARG3, 0(RA)
5224 | stw CARG4, 4(RA)
5225 |.endif
4423 | addi RC, RC, 1 5226 | addi RC, RC, 1
4424 | add PC, TMP1, TMP2 5227 | add PC, TMP1, TMP2
4425 | stw RC, -4(RA) // Update control var. 5228 | stw RC, -4(RA) // Update control var.
@@ -4485,9 +5288,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4485 | subi TMP2, TMP2, 16 5288 | subi TMP2, TMP2, 16
4486 | ble >2 // No vararg slots? 5289 | ble >2 // No vararg slots?
4487 |1: // Copy vararg slots to destination slots. 5290 |1: // Copy vararg slots to destination slots.
5291 |.if FPU
4488 | lfd f0, 0(RC) 5292 | lfd f0, 0(RC)
5293 |.else
5294 | lwz CARG1, 0(RC)
5295 | lwz CARG2, 4(RC)
5296 |.endif
4489 | addi RC, RC, 8 5297 | addi RC, RC, 8
5298 |.if FPU
4490 | stfd f0, 0(RA) 5299 | stfd f0, 0(RA)
5300 |.else
5301 | stw CARG1, 0(RA)
5302 | stw CARG2, 4(RA)
5303 |.endif
4491 | cmplw RA, TMP2 5304 | cmplw RA, TMP2
4492 | cmplw cr1, RC, TMP3 5305 | cmplw cr1, RC, TMP3
4493 | bge >3 // All destination slots filled? 5306 | bge >3 // All destination slots filled?
@@ -4510,9 +5323,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4510 | addi MULTRES, TMP1, 8 5323 | addi MULTRES, TMP1, 8
4511 | bgt >7 5324 | bgt >7
4512 |6: 5325 |6:
5326 |.if FPU
4513 | lfd f0, 0(RC) 5327 | lfd f0, 0(RC)
5328 |.else
5329 | lwz CARG1, 0(RC)
5330 | lwz CARG2, 4(RC)
5331 |.endif
4514 | addi RC, RC, 8 5332 | addi RC, RC, 8
5333 |.if FPU
4515 | stfd f0, 0(RA) 5334 | stfd f0, 0(RA)
5335 |.else
5336 | stw CARG1, 0(RA)
5337 | stw CARG2, 4(RA)
5338 |.endif
4516 | cmplw RC, TMP3 5339 | cmplw RC, TMP3
4517 | addi RA, RA, 8 5340 | addi RA, RA, 8
4518 | blt <6 // More vararg slots? 5341 | blt <6 // More vararg slots?
@@ -4563,14 +5386,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4563 | li TMP1, 0 5386 | li TMP1, 0
4564 |2: 5387 |2:
4565 | addi TMP3, TMP1, 8 5388 | addi TMP3, TMP1, 8
5389 |.if FPU
4566 | lfdx f0, RA, TMP1 5390 | lfdx f0, RA, TMP1
5391 |.else
5392 | add CARG3, RA, TMP1
5393 | lwz CARG1, 0(CARG3)
5394 | lwz CARG2, 4(CARG3)
5395 |.endif
4567 | cmpw TMP3, RC 5396 | cmpw TMP3, RC
5397 |.if FPU
4568 | stfdx f0, TMP2, TMP1 5398 | stfdx f0, TMP2, TMP1
5399 |.else
5400 | add CARG3, TMP2, TMP1
5401 | stw CARG1, 0(CARG3)
5402 | stw CARG2, 4(CARG3)
5403 |.endif
4569 | beq >3 5404 | beq >3
4570 | addi TMP1, TMP3, 8 5405 | addi TMP1, TMP3, 8
5406 |.if FPU
4571 | lfdx f1, RA, TMP3 5407 | lfdx f1, RA, TMP3
5408 |.else
5409 | add CARG3, RA, TMP3
5410 | lwz CARG1, 0(CARG3)
5411 | lwz CARG2, 4(CARG3)
5412 |.endif
4572 | cmpw TMP1, RC 5413 | cmpw TMP1, RC
5414 |.if FPU
4573 | stfdx f1, TMP2, TMP3 5415 | stfdx f1, TMP2, TMP3
5416 |.else
5417 | add CARG3, TMP2, TMP3
5418 | stw CARG1, 0(CARG3)
5419 | stw CARG2, 4(CARG3)
5420 |.endif
4574 | bne <2 5421 | bne <2
4575 |3: 5422 |3:
4576 |5: 5423 |5:
@@ -4612,8 +5459,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4612 | subi TMP2, BASE, 8 5459 | subi TMP2, BASE, 8
4613 | decode_RB8 RB, INS 5460 | decode_RB8 RB, INS
4614 if (op == BC_RET1) { 5461 if (op == BC_RET1) {
5462 |.if FPU
4615 | lfd f0, 0(RA) 5463 | lfd f0, 0(RA)
4616 | stfd f0, 0(TMP2) 5464 | stfd f0, 0(TMP2)
5465 |.else
5466 | lwz CARG1, 0(RA)
5467 | lwz CARG2, 4(RA)
5468 | stw CARG1, 0(TMP2)
5469 | stw CARG2, 4(TMP2)
5470 |.endif
4617 } 5471 }
4618 |5: 5472 |5:
4619 | cmplw RB, RD 5473 | cmplw RB, RD
@@ -4674,11 +5528,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4674 |4: 5528 |4:
4675 | stw CARG1, FORL_IDX*8+4(RA) 5529 | stw CARG1, FORL_IDX*8+4(RA)
4676 } else { 5530 } else {
4677 | lwz TMP3, FORL_STEP*8(RA) 5531 | lwz SAVE0, FORL_STEP*8(RA)
4678 | lwz CARG3, FORL_STEP*8+4(RA) 5532 | lwz CARG3, FORL_STEP*8+4(RA)
4679 | lwz TMP2, FORL_STOP*8(RA) 5533 | lwz TMP2, FORL_STOP*8(RA)
4680 | lwz CARG2, FORL_STOP*8+4(RA) 5534 | lwz CARG2, FORL_STOP*8+4(RA)
4681 | cmplw cr7, TMP3, TISNUM 5535 | cmplw cr7, SAVE0, TISNUM
4682 | cmplw cr1, TMP2, TISNUM 5536 | cmplw cr1, TMP2, TISNUM
4683 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq 5537 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
4684 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 5538 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
@@ -4721,41 +5575,80 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4721 if (vk) { 5575 if (vk) {
4722 |.if DUALNUM 5576 |.if DUALNUM
4723 |9: // FP loop. 5577 |9: // FP loop.
5578 |.if FPU
4724 | lfd f1, FORL_IDX*8(RA) 5579 | lfd f1, FORL_IDX*8(RA)
4725 |.else 5580 |.else
5581 | lwz CARG1, FORL_IDX*8(RA)
5582 | lwz CARG2, FORL_IDX*8+4(RA)
5583 |.endif
5584 |.else
4726 | lfdux f1, RA, BASE 5585 | lfdux f1, RA, BASE
4727 |.endif 5586 |.endif
5587 |.if FPU
4728 | lfd f3, FORL_STEP*8(RA) 5588 | lfd f3, FORL_STEP*8(RA)
4729 | lfd f2, FORL_STOP*8(RA) 5589 | lfd f2, FORL_STOP*8(RA)
4730 | lwz TMP3, FORL_STEP*8(RA)
4731 | fadd f1, f1, f3 5590 | fadd f1, f1, f3
4732 | stfd f1, FORL_IDX*8(RA) 5591 | stfd f1, FORL_IDX*8(RA)
5592 |.else
5593 | lwz CARG3, FORL_STEP*8(RA)
5594 | lwz CARG4, FORL_STEP*8+4(RA)
5595 | mr SAVE1, RD
5596 | blex __adddf3
5597 | mr RD, SAVE1
5598 | stw CRET1, FORL_IDX*8(RA)
5599 | stw CRET2, FORL_IDX*8+4(RA)
5600 | lwz CARG3, FORL_STOP*8(RA)
5601 | lwz CARG4, FORL_STOP*8+4(RA)
5602 |.endif
5603 | lwz SAVE0, FORL_STEP*8(RA)
4733 } else { 5604 } else {
4734 |.if DUALNUM 5605 |.if DUALNUM
4735 |9: // FP loop. 5606 |9: // FP loop.
4736 |.else 5607 |.else
4737 | lwzux TMP1, RA, BASE 5608 | lwzux TMP1, RA, BASE
4738 | lwz TMP3, FORL_STEP*8(RA) 5609 | lwz SAVE0, FORL_STEP*8(RA)
4739 | lwz TMP2, FORL_STOP*8(RA) 5610 | lwz TMP2, FORL_STOP*8(RA)
4740 | cmplw cr0, TMP1, TISNUM 5611 | cmplw cr0, TMP1, TISNUM
4741 | cmplw cr7, TMP3, TISNUM 5612 | cmplw cr7, SAVE0, TISNUM
4742 | cmplw cr1, TMP2, TISNUM 5613 | cmplw cr1, TMP2, TISNUM
4743 |.endif 5614 |.endif
5615 |.if FPU
4744 | lfd f1, FORL_IDX*8(RA) 5616 | lfd f1, FORL_IDX*8(RA)
5617 |.else
5618 | lwz CARG1, FORL_IDX*8(RA)
5619 | lwz CARG2, FORL_IDX*8+4(RA)
5620 |.endif
4745 | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt 5621 | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
4746 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 5622 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
5623 |.if FPU
4747 | lfd f2, FORL_STOP*8(RA) 5624 | lfd f2, FORL_STOP*8(RA)
5625 |.else
5626 | lwz CARG3, FORL_STOP*8(RA)
5627 | lwz CARG4, FORL_STOP*8+4(RA)
5628 |.endif
4748 | bge ->vmeta_for 5629 | bge ->vmeta_for
4749 } 5630 }
4750 | cmpwi cr6, TMP3, 0 5631 | cmpwi cr6, SAVE0, 0
4751 if (op != BC_JFORL) { 5632 if (op != BC_JFORL) {
4752 | srwi RD, RD, 1 5633 | srwi RD, RD, 1
4753 } 5634 }
5635 |.if FPU
4754 | stfd f1, FORL_EXT*8(RA) 5636 | stfd f1, FORL_EXT*8(RA)
5637 |.else
5638 | stw CARG1, FORL_EXT*8(RA)
5639 | stw CARG2, FORL_EXT*8+4(RA)
5640 |.endif
4755 if (op != BC_JFORL) { 5641 if (op != BC_JFORL) {
4756 | add RD, PC, RD 5642 | add RD, PC, RD
4757 } 5643 }
5644 |.if FPU
4758 | fcmpu cr0, f1, f2 5645 | fcmpu cr0, f1, f2
5646 |.else
5647 | mr SAVE1, RD
5648 | blex __ledf2
5649 | cmpwi CRET1, 0
5650 | mr RD, SAVE1
5651 |.endif
4759 if (op == BC_JFORI) { 5652 if (op == BC_JFORI) {
4760 | addis PC, RD, -(BCBIAS_J*4 >> 16) 5653 | addis PC, RD, -(BCBIAS_J*4 >> 16)
4761 } 5654 }
@@ -4858,8 +5751,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4858 | lp TMP2, TRACE:TMP2->mcode 5751 | lp TMP2, TRACE:TMP2->mcode
4859 | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) 5752 | stw BASE, DISPATCH_GL(jit_base)(DISPATCH)
4860 | mtctr TMP2 5753 | mtctr TMP2
4861 | stw L, DISPATCH_GL(jit_L)(DISPATCH)
4862 | addi JGL, DISPATCH, GG_DISP2G+32768 5754 | addi JGL, DISPATCH, GG_DISP2G+32768
5755 | stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
4863 | bctr 5756 | bctr
4864 |.endif 5757 |.endif
4865 break; 5758 break;
@@ -4994,6 +5887,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4994 | lp TMP1, L->top 5887 | lp TMP1, L->top
4995 | li_vmstate INTERP 5888 | li_vmstate INTERP
4996 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. 5889 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller.
5890 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
4997 | sub RA, TMP1, RD // RA = L->top - nresults*8 5891 | sub RA, TMP1, RD // RA = L->top - nresults*8
4998 | st_vmstate 5892 | st_vmstate
4999 | b ->vm_returnc 5893 | b ->vm_returnc
diff --git a/src/vm_ppcspe.dasc b/src/vm_ppcspe.dasc
deleted file mode 100644
index b55b8b3b..00000000
--- a/src/vm_ppcspe.dasc
+++ /dev/null
@@ -1,3691 +0,0 @@
1|// Low-level VM code for PowerPC/e500 CPUs.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4|
5|.arch ppc
6|.section code_op, code_sub
7|
8|.actionlist build_actionlist
9|.globals GLOB_
10|.globalnames globnames
11|.externnames extnames
12|
13|// Note: The ragged indentation of the instructions is intentional.
14|// The starting columns indicate data dependencies.
15|
16|//-----------------------------------------------------------------------
17|
18|// Fixed register assignments for the interpreter.
19|// Don't use: r1 = sp, r2 and r13 = reserved and/or small data area ptr
20|
21|// The following must be C callee-save (but BASE is often refetched).
22|.define BASE, r14 // Base of current Lua stack frame.
23|.define KBASE, r15 // Constants of current Lua function.
24|.define PC, r16 // Next PC.
25|.define DISPATCH, r17 // Opcode dispatch table.
26|.define LREG, r18 // Register holding lua_State (also in SAVE_L).
27|.define MULTRES, r19 // Size of multi-result: (nresults+1)*8.
28|
29|// Constants for vectorized type-comparisons (hi+low GPR). C callee-save.
30|.define TISNUM, r22
31|.define TISSTR, r23
32|.define TISTAB, r24
33|.define TISFUNC, r25
34|.define TISNIL, r26
35|.define TOBIT, r27
36|.define ZERO, TOBIT // Zero in lo word.
37|
38|// The following temporaries are not saved across C calls, except for RA.
39|.define RA, r20 // Callee-save.
40|.define RB, r10
41|.define RC, r11
42|.define RD, r12
43|.define INS, r7 // Overlaps CARG5.
44|
45|.define TMP0, r0
46|.define TMP1, r8
47|.define TMP2, r9
48|.define TMP3, r6 // Overlaps CARG4.
49|
50|// Saved temporaries.
51|.define SAVE0, r21
52|
53|// Calling conventions.
54|.define CARG1, r3
55|.define CARG2, r4
56|.define CARG3, r5
57|.define CARG4, r6 // Overlaps TMP3.
58|.define CARG5, r7 // Overlaps INS.
59|
60|.define CRET1, r3
61|.define CRET2, r4
62|
63|// Stack layout while in interpreter. Must match with lj_frame.h.
64|.define SAVE_LR, 188(sp)
65|.define CFRAME_SPACE, 184 // Delta for sp.
66|// Back chain for sp: 184(sp) <-- sp entering interpreter
67|.define SAVE_r31, 176(sp) // 64 bit register saves.
68|.define SAVE_r30, 168(sp)
69|.define SAVE_r29, 160(sp)
70|.define SAVE_r28, 152(sp)
71|.define SAVE_r27, 144(sp)
72|.define SAVE_r26, 136(sp)
73|.define SAVE_r25, 128(sp)
74|.define SAVE_r24, 120(sp)
75|.define SAVE_r23, 112(sp)
76|.define SAVE_r22, 104(sp)
77|.define SAVE_r21, 96(sp)
78|.define SAVE_r20, 88(sp)
79|.define SAVE_r19, 80(sp)
80|.define SAVE_r18, 72(sp)
81|.define SAVE_r17, 64(sp)
82|.define SAVE_r16, 56(sp)
83|.define SAVE_r15, 48(sp)
84|.define SAVE_r14, 40(sp)
85|.define SAVE_CR, 36(sp)
86|.define UNUSED1, 32(sp)
87|.define SAVE_ERRF, 28(sp) // 32 bit C frame info.
88|.define SAVE_NRES, 24(sp)
89|.define SAVE_CFRAME, 20(sp)
90|.define SAVE_L, 16(sp)
91|.define SAVE_PC, 12(sp)
92|.define SAVE_MULTRES, 8(sp)
93|// Next frame lr: 4(sp)
94|// Back chain for sp: 0(sp) <-- sp while in interpreter
95|
96|.macro save_, reg; evstdd reg, SAVE_..reg; .endmacro
97|.macro rest_, reg; evldd reg, SAVE_..reg; .endmacro
98|
99|.macro saveregs
100| stwu sp, -CFRAME_SPACE(sp)
101| save_ r14; save_ r15; save_ r16; save_ r17; save_ r18; save_ r19
102| mflr r0; mfcr r12
103| save_ r20; save_ r21; save_ r22; save_ r23; save_ r24; save_ r25
104| stw r0, SAVE_LR; stw r12, SAVE_CR
105| save_ r26; save_ r27; save_ r28; save_ r29; save_ r30; save_ r31
106|.endmacro
107|
108|.macro restoreregs
109| lwz r0, SAVE_LR; lwz r12, SAVE_CR
110| rest_ r14; rest_ r15; rest_ r16; rest_ r17; rest_ r18; rest_ r19
111| mtlr r0; mtcrf 0x38, r12
112| rest_ r20; rest_ r21; rest_ r22; rest_ r23; rest_ r24; rest_ r25
113| rest_ r26; rest_ r27; rest_ r28; rest_ r29; rest_ r30; rest_ r31
114| addi sp, sp, CFRAME_SPACE
115|.endmacro
116|
117|// Type definitions. Some of these are only used for documentation.
118|.type L, lua_State, LREG
119|.type GL, global_State
120|.type TVALUE, TValue
121|.type GCOBJ, GCobj
122|.type STR, GCstr
123|.type TAB, GCtab
124|.type LFUNC, GCfuncL
125|.type CFUNC, GCfuncC
126|.type PROTO, GCproto
127|.type UPVAL, GCupval
128|.type NODE, Node
129|.type NARGS8, int
130|.type TRACE, GCtrace
131|
132|//-----------------------------------------------------------------------
133|
134|// These basic macros should really be part of DynASM.
135|.macro srwi, rx, ry, n; rlwinm rx, ry, 32-n, n, 31; .endmacro
136|.macro slwi, rx, ry, n; rlwinm rx, ry, n, 0, 31-n; .endmacro
137|.macro rotlwi, rx, ry, n; rlwinm rx, ry, n, 0, 31; .endmacro
138|.macro rotlw, rx, ry, rn; rlwnm rx, ry, rn, 0, 31; .endmacro
139|.macro subi, rx, ry, i; addi rx, ry, -i; .endmacro
140|
141|// Trap for not-yet-implemented parts.
142|.macro NYI; tw 4, sp, sp; .endmacro
143|
144|//-----------------------------------------------------------------------
145|
146|// Access to frame relative to BASE.
147|.define FRAME_PC, -8
148|.define FRAME_FUNC, -4
149|
150|// Instruction decode.
151|.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro
152|.macro decode_RA8, dst, ins; rlwinm dst, ins, 27, 21, 28; .endmacro
153|.macro decode_RB8, dst, ins; rlwinm dst, ins, 11, 21, 28; .endmacro
154|.macro decode_RC8, dst, ins; rlwinm dst, ins, 19, 21, 28; .endmacro
155|.macro decode_RD8, dst, ins; rlwinm dst, ins, 19, 13, 28; .endmacro
156|
157|.macro decode_OP1, dst, ins; rlwinm dst, ins, 0, 24, 31; .endmacro
158|.macro decode_RD4, dst, ins; rlwinm dst, ins, 18, 14, 29; .endmacro
159|
160|// Instruction fetch.
161|.macro ins_NEXT1
162| lwz INS, 0(PC)
163| addi PC, PC, 4
164|.endmacro
165|// Instruction decode+dispatch.
166|.macro ins_NEXT2
167| decode_OP4 TMP1, INS
168| decode_RB8 RB, INS
169| decode_RD8 RD, INS
170| lwzx TMP0, DISPATCH, TMP1
171| decode_RA8 RA, INS
172| decode_RC8 RC, INS
173| mtctr TMP0
174| bctr
175|.endmacro
176|.macro ins_NEXT
177| ins_NEXT1
178| ins_NEXT2
179|.endmacro
180|
181|// Instruction footer.
182|.if 1
183| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
184| .define ins_next, ins_NEXT
185| .define ins_next_, ins_NEXT
186| .define ins_next1, ins_NEXT1
187| .define ins_next2, ins_NEXT2
188|.else
189| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
190| // Affects only certain kinds of benchmarks (and only with -j off).
191| .macro ins_next
192| b ->ins_next
193| .endmacro
194| .macro ins_next1
195| .endmacro
196| .macro ins_next2
197| b ->ins_next
198| .endmacro
199| .macro ins_next_
200| ->ins_next:
201| ins_NEXT
202| .endmacro
203|.endif
204|
205|// Call decode and dispatch.
206|.macro ins_callt
207| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
208| lwz PC, LFUNC:RB->pc
209| lwz INS, 0(PC)
210| addi PC, PC, 4
211| decode_OP4 TMP1, INS
212| decode_RA8 RA, INS
213| lwzx TMP0, DISPATCH, TMP1
214| add RA, RA, BASE
215| mtctr TMP0
216| bctr
217|.endmacro
218|
219|.macro ins_call
220| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
221| stw PC, FRAME_PC(BASE)
222| ins_callt
223|.endmacro
224|
225|//-----------------------------------------------------------------------
226|
227|// Macros to test operand types.
228|.macro checknum, reg; evcmpltu reg, TISNUM; .endmacro
229|.macro checkstr, reg; evcmpeq reg, TISSTR; .endmacro
230|.macro checktab, reg; evcmpeq reg, TISTAB; .endmacro
231|.macro checkfunc, reg; evcmpeq reg, TISFUNC; .endmacro
232|.macro checknil, reg; evcmpeq reg, TISNIL; .endmacro
233|.macro checkok, label; blt label; .endmacro
234|.macro checkfail, label; bge label; .endmacro
235|.macro checkanyfail, label; bns label; .endmacro
236|.macro checkallok, label; bso label; .endmacro
237|
238|.macro branch_RD
239| srwi TMP0, RD, 1
240| add PC, PC, TMP0
241| addis PC, PC, -(BCBIAS_J*4 >> 16)
242|.endmacro
243|
244|// Assumes DISPATCH is relative to GL.
245#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
246#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
247|
248#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
249|
250|.macro hotloop
251| NYI
252|.endmacro
253|
254|.macro hotcall
255| NYI
256|.endmacro
257|
258|// Set current VM state. Uses TMP0.
259|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro
260|.macro st_vmstate; stw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro
261|
262|// Move table write barrier back. Overwrites mark and tmp.
263|.macro barrierback, tab, mark, tmp
264| lwz tmp, DISPATCH_GL(gc.grayagain)(DISPATCH)
265| // Assumes LJ_GC_BLACK is 0x04.
266| rlwinm mark, mark, 0, 30, 28 // black2gray(tab)
267| stw tab, DISPATCH_GL(gc.grayagain)(DISPATCH)
268| stb mark, tab->marked
269| stw tmp, tab->gclist
270|.endmacro
271|
272|//-----------------------------------------------------------------------
273
274/* Generate subroutines used by opcodes and other parts of the VM. */
275/* The .code_sub section should be last to help static branch prediction. */
276static void build_subroutines(BuildCtx *ctx)
277{
278 |.code_sub
279 |
280 |//-----------------------------------------------------------------------
281 |//-- Return handling ----------------------------------------------------
282 |//-----------------------------------------------------------------------
283 |
284 |->vm_returnp:
285 | // See vm_return. Also: TMP2 = previous base.
286 | andi. TMP0, PC, FRAME_P
287 | evsplati TMP1, LJ_TTRUE
288 | beq ->cont_dispatch
289 |
290 | // Return from pcall or xpcall fast func.
291 | lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame.
292 | mr BASE, TMP2 // Restore caller base.
293 | // Prepending may overwrite the pcall frame, so do it at the end.
294 | stwu TMP1, FRAME_PC(RA) // Prepend true to results.
295 |
296 |->vm_returnc:
297 | addi RD, RD, 8 // RD = (nresults+1)*8.
298 | andi. TMP0, PC, FRAME_TYPE
299 | cmpwi cr1, RD, 0
300 | li CRET1, LUA_YIELD
301 | beq cr1, ->vm_unwind_c_eh
302 | mr MULTRES, RD
303 | beq ->BC_RET_Z // Handle regular return to Lua.
304 |
305 |->vm_return:
306 | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return
307 | // TMP0 = PC & FRAME_TYPE
308 | cmpwi TMP0, FRAME_C
309 | rlwinm TMP2, PC, 0, 0, 28
310 | li_vmstate C
311 | sub TMP2, BASE, TMP2 // TMP2 = previous base.
312 | bne ->vm_returnp
313 |
314 | addic. TMP1, RD, -8
315 | stw TMP2, L->base
316 | lwz TMP2, SAVE_NRES
317 | subi BASE, BASE, 8
318 | st_vmstate
319 | slwi TMP2, TMP2, 3
320 | beq >2
321 |1:
322 | addic. TMP1, TMP1, -8
323 | evldd TMP0, 0(RA)
324 | addi RA, RA, 8
325 | evstdd TMP0, 0(BASE)
326 | addi BASE, BASE, 8
327 | bne <1
328 |
329 |2:
330 | cmpw TMP2, RD // More/less results wanted?
331 | bne >6
332 |3:
333 | stw BASE, L->top // Store new top.
334 |
335 |->vm_leave_cp:
336 | lwz TMP0, SAVE_CFRAME // Restore previous C frame.
337 | li CRET1, 0 // Ok return status for vm_pcall.
338 | stw TMP0, L->cframe
339 |
340 |->vm_leave_unw:
341 | restoreregs
342 | blr
343 |
344 |6:
345 | ble >7 // Less results wanted?
346 | // More results wanted. Check stack size and fill up results with nil.
347 | lwz TMP1, L->maxstack
348 | cmplw BASE, TMP1
349 | bge >8
350 | evstdd TISNIL, 0(BASE)
351 | addi RD, RD, 8
352 | addi BASE, BASE, 8
353 | b <2
354 |
355 |7: // Less results wanted.
356 | sub TMP0, RD, TMP2
357 | cmpwi TMP2, 0 // LUA_MULTRET+1 case?
358 | sub TMP0, BASE, TMP0 // Subtract the difference.
359 | iseleq BASE, BASE, TMP0 // Either keep top or shrink it.
360 | b <3
361 |
362 |8: // Corner case: need to grow stack for filling up results.
363 | // This can happen if:
364 | // - A C function grows the stack (a lot).
365 | // - The GC shrinks the stack in between.
366 | // - A return back from a lua_call() with (high) nresults adjustment.
367 | stw BASE, L->top // Save current top held in BASE (yes).
368 | mr SAVE0, RD
369 | mr CARG2, TMP2
370 | mr CARG1, L
371 | bl extern lj_state_growstack // (lua_State *L, int n)
372 | lwz TMP2, SAVE_NRES
373 | mr RD, SAVE0
374 | slwi TMP2, TMP2, 3
375 | lwz BASE, L->top // Need the (realloced) L->top in BASE.
376 | b <2
377 |
378 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
379 | // (void *cframe, int errcode)
380 | mr sp, CARG1
381 | mr CRET1, CARG2
382 |->vm_unwind_c_eh: // Landing pad for external unwinder.
383 | lwz L, SAVE_L
384 | li TMP0, ~LJ_VMST_C
385 | lwz GL:TMP1, L->glref
386 | stw TMP0, GL:TMP1->vmstate
387 | b ->vm_leave_unw
388 |
389 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
390 | // (void *cframe)
391 | rlwinm sp, CARG1, 0, 0, 29
392 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
393 | lwz L, SAVE_L
394 | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants.
395 | evsplati TISFUNC, LJ_TFUNC
396 | lus TOBIT, 0x4338
397 | evsplati TISTAB, LJ_TTAB
398 | li TMP0, 0
399 | lwz BASE, L->base
400 | evmergelo TOBIT, TOBIT, TMP0
401 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
402 | evsplati TISSTR, LJ_TSTR
403 | li TMP1, LJ_TFALSE
404 | evsplati TISNIL, LJ_TNIL
405 | li_vmstate INTERP
406 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
407 | la RA, -8(BASE) // Results start at BASE-8.
408 | addi DISPATCH, DISPATCH, GG_G2DISP
409 | stw TMP1, 0(RA) // Prepend false to error message.
410 | li RD, 16 // 2 results: false + error message.
411 | st_vmstate
412 | b ->vm_returnc
413 |
414 |//-----------------------------------------------------------------------
415 |//-- Grow stack for calls -----------------------------------------------
416 |//-----------------------------------------------------------------------
417 |
418 |->vm_growstack_c: // Grow stack for C function.
419 | li CARG2, LUA_MINSTACK
420 | b >2
421 |
422 |->vm_growstack_l: // Grow stack for Lua function.
423 | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
424 | add RC, BASE, RC
425 | sub RA, RA, BASE
426 | stw BASE, L->base
427 | addi PC, PC, 4 // Must point after first instruction.
428 | stw RC, L->top
429 | srwi CARG2, RA, 3
430 |2:
431 | // L->base = new base, L->top = top
432 | stw PC, SAVE_PC
433 | mr CARG1, L
434 | bl extern lj_state_growstack // (lua_State *L, int n)
435 | lwz BASE, L->base
436 | lwz RC, L->top
437 | lwz LFUNC:RB, FRAME_FUNC(BASE)
438 | sub RC, RC, BASE
439 | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
440 | ins_callt // Just retry the call.
441 |
442 |//-----------------------------------------------------------------------
443 |//-- Entry points into the assembler VM ---------------------------------
444 |//-----------------------------------------------------------------------
445 |
446 |->vm_resume: // Setup C frame and resume thread.
447 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
448 | saveregs
449 | mr L, CARG1
450 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
451 | mr BASE, CARG2
452 | lbz TMP1, L->status
453 | stw L, SAVE_L
454 | li PC, FRAME_CP
455 | addi TMP0, sp, CFRAME_RESUME
456 | addi DISPATCH, DISPATCH, GG_G2DISP
457 | stw CARG3, SAVE_NRES
458 | cmplwi TMP1, 0
459 | stw CARG3, SAVE_ERRF
460 | stw TMP0, L->cframe
461 | stw CARG3, SAVE_CFRAME
462 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
463 | beq >3
464 |
465 | // Resume after yield (like a return).
466 | mr RA, BASE
467 | lwz BASE, L->base
468 | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants.
469 | lwz TMP1, L->top
470 | evsplati TISFUNC, LJ_TFUNC
471 | lus TOBIT, 0x4338
472 | evsplati TISTAB, LJ_TTAB
473 | lwz PC, FRAME_PC(BASE)
474 | li TMP2, 0
475 | evsplati TISSTR, LJ_TSTR
476 | sub RD, TMP1, BASE
477 | evmergelo TOBIT, TOBIT, TMP2
478 | stb CARG3, L->status
479 | andi. TMP0, PC, FRAME_TYPE
480 | li_vmstate INTERP
481 | addi RD, RD, 8
482 | evsplati TISNIL, LJ_TNIL
483 | mr MULTRES, RD
484 | st_vmstate
485 | beq ->BC_RET_Z
486 | b ->vm_return
487 |
488 |->vm_pcall: // Setup protected C frame and enter VM.
489 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
490 | saveregs
491 | li PC, FRAME_CP
492 | stw CARG4, SAVE_ERRF
493 | b >1
494 |
495 |->vm_call: // Setup C frame and enter VM.
496 | // (lua_State *L, TValue *base, int nres1)
497 | saveregs
498 | li PC, FRAME_C
499 |
500 |1: // Entry point for vm_pcall above (PC = ftype).
501 | lwz TMP1, L:CARG1->cframe
502 | stw CARG3, SAVE_NRES
503 | mr L, CARG1
504 | stw CARG1, SAVE_L
505 | mr BASE, CARG2
506 | stw sp, L->cframe // Add our C frame to cframe chain.
507 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
508 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
509 | stw TMP1, SAVE_CFRAME
510 | addi DISPATCH, DISPATCH, GG_G2DISP
511 |
512 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
513 | lwz TMP2, L->base // TMP2 = old base (used in vmeta_call).
514 | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants.
515 | lwz TMP1, L->top
516 | evsplati TISFUNC, LJ_TFUNC
517 | add PC, PC, BASE
518 | evsplati TISTAB, LJ_TTAB
519 | lus TOBIT, 0x4338
520 | li TMP0, 0
521 | sub PC, PC, TMP2 // PC = frame delta + frame type
522 | evsplati TISSTR, LJ_TSTR
523 | sub NARGS8:RC, TMP1, BASE
524 | evmergelo TOBIT, TOBIT, TMP0
525 | li_vmstate INTERP
526 | evsplati TISNIL, LJ_TNIL
527 | st_vmstate
528 |
529 |->vm_call_dispatch:
530 | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
531 | li TMP0, -8
532 | evlddx LFUNC:RB, BASE, TMP0
533 | checkfunc LFUNC:RB
534 | checkfail ->vmeta_call
535 |
536 |->vm_call_dispatch_f:
537 | ins_call
538 | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC
539 |
540 |->vm_cpcall: // Setup protected C frame, call C.
541 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
542 | saveregs
543 | mr L, CARG1
544 | lwz TMP0, L:CARG1->stack
545 | stw CARG1, SAVE_L
546 | lwz TMP1, L->top
547 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
548 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
549 | lwz TMP1, L->cframe
550 | stw sp, L->cframe // Add our C frame to cframe chain.
551 | li TMP2, 0
552 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
553 | stw TMP2, SAVE_ERRF // No error function.
554 | stw TMP1, SAVE_CFRAME
555 | mtctr CARG4
556 | bctrl // (lua_State *L, lua_CFunction func, void *ud)
557 | mr. BASE, CRET1
558 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
559 | li PC, FRAME_CP
560 | addi DISPATCH, DISPATCH, GG_G2DISP
561 | bne <3 // Else continue with the call.
562 | b ->vm_leave_cp // No base? Just remove C frame.
563 |
564 |//-----------------------------------------------------------------------
565 |//-- Metamethod handling ------------------------------------------------
566 |//-----------------------------------------------------------------------
567 |
568 |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the
569 |// stack, so BASE doesn't need to be reloaded across these calls.
570 |
571 |//-- Continuation dispatch ----------------------------------------------
572 |
573 |->cont_dispatch:
574 | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
575 | lwz TMP0, -12(BASE) // Continuation.
576 | mr RB, BASE
577 | mr BASE, TMP2 // Restore caller BASE.
578 | lwz LFUNC:TMP1, FRAME_FUNC(TMP2)
579 | cmplwi TMP0, 0
580 | lwz PC, -16(RB) // Restore PC from [cont|PC].
581 | beq >1
582 | subi TMP2, RD, 8
583 | lwz TMP1, LFUNC:TMP1->pc
584 | evstddx TISNIL, RA, TMP2 // Ensure one valid arg.
585 | lwz KBASE, PC2PROTO(k)(TMP1)
586 | // BASE = base, RA = resultptr, RB = meta base
587 | mtctr TMP0
588 | bctr // Jump to continuation.
589 |
590 |1: // Tail call from C function.
591 | subi TMP1, RB, 16
592 | sub RC, TMP1, BASE
593 | b ->vm_call_tail
594 |
595 |->cont_cat: // RA = resultptr, RB = meta base
596 | lwz INS, -4(PC)
597 | subi CARG2, RB, 16
598 | decode_RB8 SAVE0, INS
599 | evldd TMP0, 0(RA)
600 | add TMP1, BASE, SAVE0
601 | stw BASE, L->base
602 | cmplw TMP1, CARG2
603 | sub CARG3, CARG2, TMP1
604 | decode_RA8 RA, INS
605 | evstdd TMP0, 0(CARG2)
606 | bne ->BC_CAT_Z
607 | evstddx TMP0, BASE, RA
608 | b ->cont_nop
609 |
610 |//-- Table indexing metamethods -----------------------------------------
611 |
612 |->vmeta_tgets1:
613 | evmergelo STR:RC, TISSTR, STR:RC
614 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
615 | decode_RB8 RB, INS
616 | evstdd STR:RC, 0(CARG3)
617 | add CARG2, BASE, RB
618 | b >1
619 |
620 |->vmeta_tgets:
621 | evmergelo TAB:RB, TISTAB, TAB:RB
622 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
623 | evmergelo STR:RC, TISSTR, STR:RC
624 | evstdd TAB:RB, 0(CARG2)
625 | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
626 | evstdd STR:RC, 0(CARG3)
627 | b >1
628 |
629 |->vmeta_tgetb: // TMP0 = index
630 | efdcfsi TMP0, TMP0
631 | decode_RB8 RB, INS
632 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
633 | add CARG2, BASE, RB
634 | evstdd TMP0, 0(CARG3)
635 | b >1
636 |
637 |->vmeta_tgetv:
638 | decode_RB8 RB, INS
639 | decode_RC8 RC, INS
640 | add CARG2, BASE, RB
641 | add CARG3, BASE, RC
642 |1:
643 | stw BASE, L->base
644 | mr CARG1, L
645 | stw PC, SAVE_PC
646 | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
647 | // Returns TValue * (finished) or NULL (metamethod).
648 | cmplwi CRET1, 0
649 | beq >3
650 | evldd TMP0, 0(CRET1)
651 | evstddx TMP0, BASE, RA
652 | ins_next
653 |
654 |3: // Call __index metamethod.
655 | // BASE = base, L->top = new base, stack = cont/func/t/k
656 | subfic TMP1, BASE, FRAME_CONT
657 | lwz BASE, L->top
658 | stw PC, -16(BASE) // [cont|PC]
659 | add PC, TMP1, BASE
660 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
661 | li NARGS8:RC, 16 // 2 args for func(t, k).
662 | b ->vm_call_dispatch_f
663 |
664 |//-----------------------------------------------------------------------
665 |
666 |->vmeta_tsets1:
667 | evmergelo STR:RC, TISSTR, STR:RC
668 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
669 | decode_RB8 RB, INS
670 | evstdd STR:RC, 0(CARG3)
671 | add CARG2, BASE, RB
672 | b >1
673 |
674 |->vmeta_tsets:
675 | evmergelo TAB:RB, TISTAB, TAB:RB
676 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
677 | evmergelo STR:RC, TISSTR, STR:RC
678 | evstdd TAB:RB, 0(CARG2)
679 | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
680 | evstdd STR:RC, 0(CARG3)
681 | b >1
682 |
683 |->vmeta_tsetb: // TMP0 = index
684 | efdcfsi TMP0, TMP0
685 | decode_RB8 RB, INS
686 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
687 | add CARG2, BASE, RB
688 | evstdd TMP0, 0(CARG3)
689 | b >1
690 |
691 |->vmeta_tsetv:
692 | decode_RB8 RB, INS
693 | decode_RC8 RC, INS
694 | add CARG2, BASE, RB
695 | add CARG3, BASE, RC
696 |1:
697 | stw BASE, L->base
698 | mr CARG1, L
699 | stw PC, SAVE_PC
700 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
701 | // Returns TValue * (finished) or NULL (metamethod).
702 | cmplwi CRET1, 0
703 | evlddx TMP0, BASE, RA
704 | beq >3
705 | // NOBARRIER: lj_meta_tset ensures the table is not black.
706 | evstdd TMP0, 0(CRET1)
707 | ins_next
708 |
709 |3: // Call __newindex metamethod.
710 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
711 | subfic TMP1, BASE, FRAME_CONT
712 | lwz BASE, L->top
713 | stw PC, -16(BASE) // [cont|PC]
714 | add PC, TMP1, BASE
715 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
716 | li NARGS8:RC, 24 // 3 args for func(t, k, v)
717 | evstdd TMP0, 16(BASE) // Copy value to third argument.
718 | b ->vm_call_dispatch_f
719 |
720 |//-- Comparison metamethods ---------------------------------------------
721 |
722 |->vmeta_comp:
723 | mr CARG1, L
724 | subi PC, PC, 4
725 | add CARG2, BASE, RA
726 | stw PC, SAVE_PC
727 | add CARG3, BASE, RD
728 | stw BASE, L->base
729 | decode_OP1 CARG4, INS
730 | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
731 | // Returns 0/1 or TValue * (metamethod).
732 |3:
733 | cmplwi CRET1, 1
734 | bgt ->vmeta_binop
735 |4:
736 | lwz INS, 0(PC)
737 | addi PC, PC, 4
738 | decode_RD4 TMP2, INS
739 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
740 | add TMP2, TMP2, TMP3
741 | isellt PC, PC, TMP2
742 |->cont_nop:
743 | ins_next
744 |
745 |->cont_ra: // RA = resultptr
746 | lwz INS, -4(PC)
747 | evldd TMP0, 0(RA)
748 | decode_RA8 TMP1, INS
749 | evstddx TMP0, BASE, TMP1
750 | b ->cont_nop
751 |
752 |->cont_condt: // RA = resultptr
753 | lwz TMP0, 0(RA)
754 | li TMP1, LJ_TTRUE
755 | cmplw TMP1, TMP0 // Branch if result is true.
756 | b <4
757 |
758 |->cont_condf: // RA = resultptr
759 | lwz TMP0, 0(RA)
760 | li TMP1, LJ_TFALSE
761 | cmplw TMP0, TMP1 // Branch if result is false.
762 | b <4
763 |
764 |->vmeta_equal:
765 | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
766 | subi PC, PC, 4
767 | stw BASE, L->base
768 | mr CARG1, L
769 | stw PC, SAVE_PC
770 | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
771 | // Returns 0/1 or TValue * (metamethod).
772 | b <3
773 |
774 |//-- Arithmetic metamethods ---------------------------------------------
775 |
776 |->vmeta_arith_vn:
777 | add CARG3, BASE, RB
778 | add CARG4, KBASE, RC
779 | b >1
780 |
781 |->vmeta_arith_nv:
782 | add CARG3, KBASE, RC
783 | add CARG4, BASE, RB
784 | b >1
785 |
786 |->vmeta_unm:
787 | add CARG3, BASE, RD
788 | mr CARG4, CARG3
789 | b >1
790 |
791 |->vmeta_arith_vv:
792 | add CARG3, BASE, RB
793 | add CARG4, BASE, RC
794 |1:
795 | add CARG2, BASE, RA
796 | stw BASE, L->base
797 | mr CARG1, L
798 | stw PC, SAVE_PC
799 | decode_OP1 CARG5, INS // Caveat: CARG5 overlaps INS.
800 | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
801 | // Returns NULL (finished) or TValue * (metamethod).
802 | cmplwi CRET1, 0
803 | beq ->cont_nop
804 |
805 | // Call metamethod for binary op.
806 |->vmeta_binop:
807 | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
808 | sub TMP1, CRET1, BASE
809 | stw PC, -16(CRET1) // [cont|PC]
810 | mr TMP2, BASE
811 | addi PC, TMP1, FRAME_CONT
812 | mr BASE, CRET1
813 | li NARGS8:RC, 16 // 2 args for func(o1, o2).
814 | b ->vm_call_dispatch
815 |
816 |->vmeta_len:
817#if LJ_52
818 | mr SAVE0, CARG1
819#endif
820 | add CARG2, BASE, RD
821 | stw BASE, L->base
822 | mr CARG1, L
823 | stw PC, SAVE_PC
824 | bl extern lj_meta_len // (lua_State *L, TValue *o)
825 | // Returns NULL (retry) or TValue * (metamethod base).
826#if LJ_52
827 | cmplwi CRET1, 0
828 | bne ->vmeta_binop // Binop call for compatibility.
829 | mr CARG1, SAVE0
830 | b ->BC_LEN_Z
831#else
832 | b ->vmeta_binop // Binop call for compatibility.
833#endif
834 |
835 |//-- Call metamethod ----------------------------------------------------
836 |
837 |->vmeta_call: // Resolve and call __call metamethod.
838 | // TMP2 = old base, BASE = new base, RC = nargs*8
839 | mr CARG1, L
840 | stw TMP2, L->base // This is the callers base!
841 | subi CARG2, BASE, 8
842 | stw PC, SAVE_PC
843 | add CARG3, BASE, RC
844 | mr SAVE0, NARGS8:RC
845 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
846 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
847 | addi NARGS8:RC, SAVE0, 8 // Got one more argument now.
848 | ins_call
849 |
850 |->vmeta_callt: // Resolve __call for BC_CALLT.
851 | // BASE = old base, RA = new base, RC = nargs*8
852 | mr CARG1, L
853 | stw BASE, L->base
854 | subi CARG2, RA, 8
855 | stw PC, SAVE_PC
856 | add CARG3, RA, RC
857 | mr SAVE0, NARGS8:RC
858 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
859 | lwz TMP1, FRAME_PC(BASE)
860 | addi NARGS8:RC, SAVE0, 8 // Got one more argument now.
861 | lwz LFUNC:RB, FRAME_FUNC(RA) // Guaranteed to be a function here.
862 | b ->BC_CALLT_Z
863 |
864 |//-- Argument coercion for 'for' statement ------------------------------
865 |
866 |->vmeta_for:
867 | mr CARG1, L
868 | stw BASE, L->base
869 | mr CARG2, RA
870 | stw PC, SAVE_PC
871 | mr SAVE0, INS
872 | bl extern lj_meta_for // (lua_State *L, TValue *base)
873 |.if JIT
874 | decode_OP1 TMP0, SAVE0
875 |.endif
876 | decode_RA8 RA, SAVE0
877 |.if JIT
878 | cmpwi TMP0, BC_JFORI
879 |.endif
880 | decode_RD8 RD, SAVE0
881 |.if JIT
882 | beq =>BC_JFORI
883 |.endif
884 | b =>BC_FORI
885 |
886 |//-----------------------------------------------------------------------
887 |//-- Fast functions -----------------------------------------------------
888 |//-----------------------------------------------------------------------
889 |
890 |.macro .ffunc, name
891 |->ff_ .. name:
892 |.endmacro
893 |
894 |.macro .ffunc_1, name
895 |->ff_ .. name:
896 | cmplwi NARGS8:RC, 8
897 | evldd CARG1, 0(BASE)
898 | blt ->fff_fallback
899 |.endmacro
900 |
901 |.macro .ffunc_2, name
902 |->ff_ .. name:
903 | cmplwi NARGS8:RC, 16
904 | evldd CARG1, 0(BASE)
905 | evldd CARG2, 8(BASE)
906 | blt ->fff_fallback
907 |.endmacro
908 |
909 |.macro .ffunc_n, name
910 | .ffunc_1 name
911 | checknum CARG1
912 | checkfail ->fff_fallback
913 |.endmacro
914 |
915 |.macro .ffunc_nn, name
916 | .ffunc_2 name
917 | evmergehi TMP0, CARG1, CARG2
918 | checknum TMP0
919 | checkanyfail ->fff_fallback
920 |.endmacro
921 |
922 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
923 |.macro ffgccheck
924 | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH)
925 | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
926 | cmplw TMP0, TMP1
927 | bgel ->fff_gcstep
928 |.endmacro
929 |
930 |//-- Base library: checks -----------------------------------------------
931 |
932 |.ffunc assert
933 | cmplwi NARGS8:RC, 8
934 | evldd TMP0, 0(BASE)
935 | blt ->fff_fallback
936 | evaddw TMP1, TISNIL, TISNIL // Synthesize LJ_TFALSE.
937 | la RA, -8(BASE)
938 | evcmpltu cr1, TMP0, TMP1
939 | lwz PC, FRAME_PC(BASE)
940 | bge cr1, ->fff_fallback
941 | evstdd TMP0, 0(RA)
942 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
943 | beq ->fff_res // Done if exactly 1 argument.
944 | li TMP1, 8
945 | subi RC, RC, 8
946 |1:
947 | cmplw TMP1, RC
948 | evlddx TMP0, BASE, TMP1
949 | evstddx TMP0, RA, TMP1
950 | addi TMP1, TMP1, 8
951 | bne <1
952 | b ->fff_res
953 |
954 |.ffunc type
955 | cmplwi NARGS8:RC, 8
956 | lwz CARG1, 0(BASE)
957 | blt ->fff_fallback
958 | li TMP2, ~LJ_TNUMX
959 | cmplw CARG1, TISNUM
960 | not TMP1, CARG1
961 | isellt TMP1, TMP2, TMP1
962 | slwi TMP1, TMP1, 3
963 | la TMP2, CFUNC:RB->upvalue
964 | evlddx STR:CRET1, TMP2, TMP1
965 | b ->fff_restv
966 |
967 |//-- Base library: getters and setters ---------------------------------
968 |
969 |.ffunc_1 getmetatable
970 | checktab CARG1
971 | evmergehi TMP1, CARG1, CARG1
972 | checkfail >6
973 |1: // Field metatable must be at same offset for GCtab and GCudata!
974 | lwz TAB:RB, TAB:CARG1->metatable
975 |2:
976 | evmr CRET1, TISNIL
977 | cmplwi TAB:RB, 0
978 | lwz STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
979 | beq ->fff_restv
980 | lwz TMP0, TAB:RB->hmask
981 | evmergelo CRET1, TISTAB, TAB:RB // Use metatable as default result.
982 | lwz TMP1, STR:RC->hash
983 | lwz NODE:TMP2, TAB:RB->node
984 | evmergelo STR:RC, TISSTR, STR:RC
985 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
986 | slwi TMP0, TMP1, 5
987 | slwi TMP1, TMP1, 3
988 | sub TMP1, TMP0, TMP1
989 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
990 |3: // Rearranged logic, because we expect _not_ to find the key.
991 | evldd TMP0, NODE:TMP2->key
992 | evldd TMP1, NODE:TMP2->val
993 | evcmpeq TMP0, STR:RC
994 | lwz NODE:TMP2, NODE:TMP2->next
995 | checkallok >5
996 | cmplwi NODE:TMP2, 0
997 | beq ->fff_restv // Not found, keep default result.
998 | b <3
999 |5:
1000 | checknil TMP1
1001 | checkok ->fff_restv // Ditto for nil value.
1002 | evmr CRET1, TMP1 // Return value of mt.__metatable.
1003 | b ->fff_restv
1004 |
1005 |6:
1006 | cmpwi TMP1, LJ_TUDATA
1007 | not TMP1, TMP1
1008 | beq <1
1009 | checknum CARG1
1010 | slwi TMP1, TMP1, 2
1011 | li TMP2, 4*~LJ_TNUMX
1012 | isellt TMP1, TMP2, TMP1
1013 | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH)
1014 | lwzx TAB:RB, TMP2, TMP1
1015 | b <2
1016 |
1017 |.ffunc_2 setmetatable
1018 | // Fast path: no mt for table yet and not clearing the mt.
1019 | evmergehi TMP0, TAB:CARG1, TAB:CARG2
1020 | checktab TMP0
1021 | checkanyfail ->fff_fallback
1022 | lwz TAB:TMP1, TAB:CARG1->metatable
1023 | cmplwi TAB:TMP1, 0
1024 | lbz TMP3, TAB:CARG1->marked
1025 | bne ->fff_fallback
1026 | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
1027 | stw TAB:CARG2, TAB:CARG1->metatable
1028 | beq ->fff_restv
1029 | barrierback TAB:CARG1, TMP3, TMP0
1030 | b ->fff_restv
1031 |
1032 |.ffunc rawget
1033 | cmplwi NARGS8:RC, 16
1034 | evldd CARG2, 0(BASE)
1035 | blt ->fff_fallback
1036 | checktab CARG2
1037 | la CARG3, 8(BASE)
1038 | checkfail ->fff_fallback
1039 | mr CARG1, L
1040 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1041 | // Returns cTValue *.
1042 | evldd CRET1, 0(CRET1)
1043 | b ->fff_restv
1044 |
1045 |//-- Base library: conversions ------------------------------------------
1046 |
1047 |.ffunc tonumber
1048 | // Only handles the number case inline (without a base argument).
1049 | cmplwi NARGS8:RC, 8
1050 | evldd CARG1, 0(BASE)
1051 | bne ->fff_fallback // Exactly one argument.
1052 | checknum CARG1
1053 | checkok ->fff_restv
1054 | b ->fff_fallback
1055 |
1056 |.ffunc_1 tostring
1057 | // Only handles the string or number case inline.
1058 | checkstr CARG1
1059 | // A __tostring method in the string base metatable is ignored.
1060 | checkok ->fff_restv // String key?
1061 | // Handle numbers inline, unless a number base metatable is present.
1062 | lwz TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
1063 | checknum CARG1
1064 | cmplwi cr1, TMP0, 0
1065 | stw BASE, L->base // Add frame since C call can throw.
1066 | crand 4*cr0+eq, 4*cr0+lt, 4*cr1+eq
1067 | stw PC, SAVE_PC // Redundant (but a defined value).
1068 | bne ->fff_fallback
1069 | ffgccheck
1070 | mr CARG1, L
1071 | mr CARG2, BASE
1072 | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np)
1073 | // Returns GCstr *.
1074 | evmergelo STR:CRET1, TISSTR, STR:CRET1
1075 | b ->fff_restv
1076 |
1077 |//-- Base library: iterators -------------------------------------------
1078 |
1079 |.ffunc next
1080 | cmplwi NARGS8:RC, 8
1081 | evldd CARG2, 0(BASE)
1082 | blt ->fff_fallback
1083 | evstddx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil.
1084 | checktab TAB:CARG2
1085 | lwz PC, FRAME_PC(BASE)
1086 | checkfail ->fff_fallback
1087 | stw BASE, L->base // Add frame since C call can throw.
1088 | mr CARG1, L
1089 | stw BASE, L->top // Dummy frame length is ok.
1090 | la CARG3, 8(BASE)
1091 | stw PC, SAVE_PC
1092 | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1093 | // Returns 0 at end of traversal.
1094 | cmplwi CRET1, 0
1095 | evmr CRET1, TISNIL
1096 | beq ->fff_restv // End of traversal: return nil.
1097 | evldd TMP0, 8(BASE) // Copy key and value to results.
1098 | la RA, -8(BASE)
1099 | evldd TMP1, 16(BASE)
1100 | evstdd TMP0, 0(RA)
1101 | li RD, (2+1)*8
1102 | evstdd TMP1, 8(RA)
1103 | b ->fff_res
1104 |
1105 |.ffunc_1 pairs
1106 | checktab TAB:CARG1
1107 | lwz PC, FRAME_PC(BASE)
1108 | checkfail ->fff_fallback
1109#if LJ_52
1110 | lwz TAB:TMP2, TAB:CARG1->metatable
1111 | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
1112 | cmplwi TAB:TMP2, 0
1113 | la RA, -8(BASE)
1114 | bne ->fff_fallback
1115#else
1116 | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
1117 | la RA, -8(BASE)
1118#endif
1119 | evstdd TISNIL, 8(BASE)
1120 | li RD, (3+1)*8
1121 | evstdd CFUNC:TMP0, 0(RA)
1122 | b ->fff_res
1123 |
1124 |.ffunc_2 ipairs_aux
1125 | checktab TAB:CARG1
1126 | lwz PC, FRAME_PC(BASE)
1127 | checkfail ->fff_fallback
1128 | checknum CARG2
1129 | lus TMP3, 0x3ff0
1130 | checkfail ->fff_fallback
1131 | efdctsi TMP2, CARG2
1132 | lwz TMP0, TAB:CARG1->asize
1133 | evmergelo TMP3, TMP3, ZERO
1134 | lwz TMP1, TAB:CARG1->array
1135 | efdadd CARG2, CARG2, TMP3
1136 | addi TMP2, TMP2, 1
1137 | la RA, -8(BASE)
1138 | cmplw TMP0, TMP2
1139 | slwi TMP3, TMP2, 3
1140 | evstdd CARG2, 0(RA)
1141 | ble >2 // Not in array part?
1142 | evlddx TMP1, TMP1, TMP3
1143 |1:
1144 | checknil TMP1
1145 | li RD, (0+1)*8
1146 | checkok ->fff_res // End of iteration, return 0 results.
1147 | li RD, (2+1)*8
1148 | evstdd TMP1, 8(RA)
1149 | b ->fff_res
1150 |2: // Check for empty hash part first. Otherwise call C function.
1151 | lwz TMP0, TAB:CARG1->hmask
1152 | cmplwi TMP0, 0
1153 | li RD, (0+1)*8
1154 | beq ->fff_res
1155 | mr CARG2, TMP2
1156 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
1157 | // Returns cTValue * or NULL.
1158 | cmplwi CRET1, 0
1159 | li RD, (0+1)*8
1160 | beq ->fff_res
1161 | evldd TMP1, 0(CRET1)
1162 | b <1
1163 |
1164 |.ffunc_1 ipairs
1165 | checktab TAB:CARG1
1166 | lwz PC, FRAME_PC(BASE)
1167 | checkfail ->fff_fallback
1168#if LJ_52
1169 | lwz TAB:TMP2, TAB:CARG1->metatable
1170 | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
1171 | cmplwi TAB:TMP2, 0
1172 | la RA, -8(BASE)
1173 | bne ->fff_fallback
1174#else
1175 | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
1176 | la RA, -8(BASE)
1177#endif
1178 | evsplati TMP1, 0
1179 | li RD, (3+1)*8
1180 | evstdd TMP1, 8(BASE)
1181 | evstdd CFUNC:TMP0, 0(RA)
1182 | b ->fff_res
1183 |
1184 |//-- Base library: catch errors ----------------------------------------
1185 |
1186 |.ffunc pcall
1187 | cmplwi NARGS8:RC, 8
1188 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
1189 | blt ->fff_fallback
1190 | mr TMP2, BASE
1191 | la BASE, 8(BASE)
1192 | // Remember active hook before pcall.
1193 | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31
1194 | subi NARGS8:RC, NARGS8:RC, 8
1195 | addi PC, TMP3, 8+FRAME_PCALL
1196 | b ->vm_call_dispatch
1197 |
1198 |.ffunc_2 xpcall
1199 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
1200 | mr TMP2, BASE
1201 | checkfunc CARG2 // Traceback must be a function.
1202 | checkfail ->fff_fallback
1203 | la BASE, 16(BASE)
1204 | // Remember active hook before pcall.
1205 | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31
1206 | evstdd CARG2, 0(TMP2) // Swap function and traceback.
1207 | subi NARGS8:RC, NARGS8:RC, 16
1208 | evstdd CARG1, 8(TMP2)
1209 | addi PC, TMP3, 16+FRAME_PCALL
1210 | b ->vm_call_dispatch
1211 |
1212 |//-- Coroutine library --------------------------------------------------
1213 |
1214 |.macro coroutine_resume_wrap, resume
1215 |.if resume
1216 |.ffunc_1 coroutine_resume
1217 | evmergehi TMP0, L:CARG1, L:CARG1
1218 |.else
1219 |.ffunc coroutine_wrap_aux
1220 | lwz L:CARG1, CFUNC:RB->upvalue[0].gcr
1221 |.endif
1222 |.if resume
1223 | cmpwi TMP0, LJ_TTHREAD
1224 | bne ->fff_fallback
1225 |.endif
1226 | lbz TMP0, L:CARG1->status
1227 | lwz TMP1, L:CARG1->cframe
1228 | lwz CARG2, L:CARG1->top
1229 | cmplwi cr0, TMP0, LUA_YIELD
1230 | lwz TMP2, L:CARG1->base
1231 | cmplwi cr1, TMP1, 0
1232 | lwz TMP0, L:CARG1->maxstack
1233 | cmplw cr7, CARG2, TMP2
1234 | lwz PC, FRAME_PC(BASE)
1235 | crorc 4*cr6+lt, 4*cr0+gt, 4*cr1+eq // st>LUA_YIELD || cframe!=0
1236 | add TMP2, CARG2, NARGS8:RC
1237 | crandc 4*cr6+gt, 4*cr7+eq, 4*cr0+eq // base==top && st!=LUA_YIELD
1238 | cmplw cr1, TMP2, TMP0
1239 | cror 4*cr6+lt, 4*cr6+lt, 4*cr6+gt
1240 | stw PC, SAVE_PC
1241 | cror 4*cr6+lt, 4*cr6+lt, 4*cr1+gt // cond1 || cond2 || stackov
1242 | stw BASE, L->base
1243 | blt cr6, ->fff_fallback
1244 |1:
1245 |.if resume
1246 | addi BASE, BASE, 8 // Keep resumed thread in stack for GC.
1247 | subi NARGS8:RC, NARGS8:RC, 8
1248 | subi TMP2, TMP2, 8
1249 |.endif
1250 | stw TMP2, L:CARG1->top
1251 | li TMP1, 0
1252 | stw BASE, L->top
1253 |2: // Move args to coroutine.
1254 | cmpw TMP1, NARGS8:RC
1255 | evlddx TMP0, BASE, TMP1
1256 | beq >3
1257 | evstddx TMP0, CARG2, TMP1
1258 | addi TMP1, TMP1, 8
1259 | b <2
1260 |3:
1261 | li CARG3, 0
1262 | mr L:SAVE0, L:CARG1
1263 | li CARG4, 0
1264 | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1265 | // Returns thread status.
1266 |4:
1267 | lwz TMP2, L:SAVE0->base
1268 | cmplwi CRET1, LUA_YIELD
1269 | lwz TMP3, L:SAVE0->top
1270 | li_vmstate INTERP
1271 | lwz BASE, L->base
1272 | st_vmstate
1273 | bgt >8
1274 | sub RD, TMP3, TMP2
1275 | lwz TMP0, L->maxstack
1276 | cmplwi RD, 0
1277 | add TMP1, BASE, RD
1278 | beq >6 // No results?
1279 | cmplw TMP1, TMP0
1280 | li TMP1, 0
1281 | bgt >9 // Need to grow stack?
1282 |
1283 | subi TMP3, RD, 8
1284 | stw TMP2, L:SAVE0->top // Clear coroutine stack.
1285 |5: // Move results from coroutine.
1286 | cmplw TMP1, TMP3
1287 | evlddx TMP0, TMP2, TMP1
1288 | evstddx TMP0, BASE, TMP1
1289 | addi TMP1, TMP1, 8
1290 | bne <5
1291 |6:
1292 | andi. TMP0, PC, FRAME_TYPE
1293 |.if resume
1294 | li TMP1, LJ_TTRUE
1295 | la RA, -8(BASE)
1296 | stw TMP1, -8(BASE) // Prepend true to results.
1297 | addi RD, RD, 16
1298 |.else
1299 | mr RA, BASE
1300 | addi RD, RD, 8
1301 |.endif
1302 |7:
1303 | stw PC, SAVE_PC
1304 | mr MULTRES, RD
1305 | beq ->BC_RET_Z
1306 | b ->vm_return
1307 |
1308 |8: // Coroutine returned with error (at co->top-1).
1309 |.if resume
1310 | andi. TMP0, PC, FRAME_TYPE
1311 | la TMP3, -8(TMP3)
1312 | li TMP1, LJ_TFALSE
1313 | evldd TMP0, 0(TMP3)
1314 | stw TMP3, L:SAVE0->top // Remove error from coroutine stack.
1315 | li RD, (2+1)*8
1316 | stw TMP1, -8(BASE) // Prepend false to results.
1317 | la RA, -8(BASE)
1318 | evstdd TMP0, 0(BASE) // Copy error message.
1319 | b <7
1320 |.else
1321 | mr CARG1, L
1322 | mr CARG2, L:SAVE0
1323 | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1324 |.endif
1325 |
1326 |9: // Handle stack expansion on return from yield.
1327 | mr CARG1, L
1328 | srwi CARG2, RD, 3
1329 | bl extern lj_state_growstack // (lua_State *L, int n)
1330 | li CRET1, 0
1331 | b <4
1332 |.endmacro
1333 |
1334 | coroutine_resume_wrap 1 // coroutine.resume
1335 | coroutine_resume_wrap 0 // coroutine.wrap
1336 |
1337 |.ffunc coroutine_yield
1338 | lwz TMP0, L->cframe
1339 | add TMP1, BASE, NARGS8:RC
1340 | stw BASE, L->base
1341 | andi. TMP0, TMP0, CFRAME_RESUME
1342 | stw TMP1, L->top
1343 | li CRET1, LUA_YIELD
1344 | beq ->fff_fallback
1345 | stw ZERO, L->cframe
1346 | stb CRET1, L->status
1347 | b ->vm_leave_unw
1348 |
1349 |//-- Math library -------------------------------------------------------
1350 |
1351 |.ffunc_n math_abs
1352 | efdabs CRET1, CARG1
1353 | // Fallthrough.
1354 |
1355 |->fff_restv:
1356 | // CRET1 = TValue result.
1357 | lwz PC, FRAME_PC(BASE)
1358 | la RA, -8(BASE)
1359 | evstdd CRET1, 0(RA)
1360 |->fff_res1:
1361 | // RA = results, PC = return.
1362 | li RD, (1+1)*8
1363 |->fff_res:
1364 | // RA = results, RD = (nresults+1)*8, PC = return.
1365 | andi. TMP0, PC, FRAME_TYPE
1366 | mr MULTRES, RD
1367 | bne ->vm_return
1368 | lwz INS, -4(PC)
1369 | decode_RB8 RB, INS
1370 |5:
1371 | cmplw RB, RD // More results expected?
1372 | decode_RA8 TMP0, INS
1373 | bgt >6
1374 | ins_next1
1375 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1376 | sub BASE, RA, TMP0
1377 | ins_next2
1378 |
1379 |6: // Fill up results with nil.
1380 | subi TMP1, RD, 8
1381 | addi RD, RD, 8
1382 | evstddx TISNIL, RA, TMP1
1383 | b <5
1384 |
1385 |.macro math_extern, func
1386 | .ffunc math_ .. func
1387 | cmplwi NARGS8:RC, 8
1388 | evldd CARG2, 0(BASE)
1389 | blt ->fff_fallback
1390 | checknum CARG2
1391 | evmergehi CARG1, CARG2, CARG2
1392 | checkfail ->fff_fallback
1393 | bl extern func@plt
1394 | evmergelo CRET1, CRET1, CRET2
1395 | b ->fff_restv
1396 |.endmacro
1397 |
1398 |.macro math_extern2, func
1399 | .ffunc math_ .. func
1400 | cmplwi NARGS8:RC, 16
1401 | evldd CARG2, 0(BASE)
1402 | evldd CARG4, 8(BASE)
1403 | blt ->fff_fallback
1404 | evmergehi CARG1, CARG4, CARG2
1405 | checknum CARG1
1406 | evmergehi CARG3, CARG4, CARG4
1407 | checkanyfail ->fff_fallback
1408 | bl extern func@plt
1409 | evmergelo CRET1, CRET1, CRET2
1410 | b ->fff_restv
1411 |.endmacro
1412 |
1413 |.macro math_round, func
1414 | .ffunc math_ .. func
1415 | cmplwi NARGS8:RC, 8
1416 | evldd CARG2, 0(BASE)
1417 | blt ->fff_fallback
1418 | checknum CARG2
1419 | evmergehi CARG1, CARG2, CARG2
1420 | checkfail ->fff_fallback
1421 | lwz PC, FRAME_PC(BASE)
1422 | bl ->vm_..func.._hilo;
1423 | la RA, -8(BASE)
1424 | evstdd CRET2, 0(RA)
1425 | b ->fff_res1
1426 |.endmacro
1427 |
1428 | math_round floor
1429 | math_round ceil
1430 |
1431 | math_extern sqrt
1432 |
1433 |.ffunc math_log
1434 | cmplwi NARGS8:RC, 8
1435 | evldd CARG2, 0(BASE)
1436 | bne ->fff_fallback // Need exactly 1 argument.
1437 | checknum CARG2
1438 | evmergehi CARG1, CARG2, CARG2
1439 | checkfail ->fff_fallback
1440 | bl extern log@plt
1441 | evmergelo CRET1, CRET1, CRET2
1442 | b ->fff_restv
1443 |
1444 | math_extern log10
1445 | math_extern exp
1446 | math_extern sin
1447 | math_extern cos
1448 | math_extern tan
1449 | math_extern asin
1450 | math_extern acos
1451 | math_extern atan
1452 | math_extern sinh
1453 | math_extern cosh
1454 | math_extern tanh
1455 | math_extern2 pow
1456 | math_extern2 atan2
1457 | math_extern2 fmod
1458 |
1459 |->ff_math_deg:
1460 |.ffunc_n math_rad
1461 | evldd CARG2, CFUNC:RB->upvalue[0]
1462 | efdmul CRET1, CARG1, CARG2
1463 | b ->fff_restv
1464 |
1465 |.ffunc math_ldexp
1466 | cmplwi NARGS8:RC, 16
1467 | evldd CARG2, 0(BASE)
1468 | evldd CARG4, 8(BASE)
1469 | blt ->fff_fallback
1470 | evmergehi CARG1, CARG4, CARG2
1471 | checknum CARG1
1472 | checkanyfail ->fff_fallback
1473 | efdctsi CARG3, CARG4
1474 | bl extern ldexp@plt
1475 | evmergelo CRET1, CRET1, CRET2
1476 | b ->fff_restv
1477 |
1478 |.ffunc math_frexp
1479 | cmplwi NARGS8:RC, 8
1480 | evldd CARG2, 0(BASE)
1481 | blt ->fff_fallback
1482 | checknum CARG2
1483 | evmergehi CARG1, CARG2, CARG2
1484 | checkfail ->fff_fallback
1485 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
1486 | lwz PC, FRAME_PC(BASE)
1487 | bl extern frexp@plt
1488 | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH)
1489 | evmergelo CRET1, CRET1, CRET2
1490 | efdcfsi CRET2, TMP1
1491 | la RA, -8(BASE)
1492 | evstdd CRET1, 0(RA)
1493 | li RD, (2+1)*8
1494 | evstdd CRET2, 8(RA)
1495 | b ->fff_res
1496 |
1497 |.ffunc math_modf
1498 | cmplwi NARGS8:RC, 8
1499 | evldd CARG2, 0(BASE)
1500 | blt ->fff_fallback
1501 | checknum CARG2
1502 | evmergehi CARG1, CARG2, CARG2
1503 | checkfail ->fff_fallback
1504 | la CARG3, -8(BASE)
1505 | lwz PC, FRAME_PC(BASE)
1506 | bl extern modf@plt
1507 | evmergelo CRET1, CRET1, CRET2
1508 | la RA, -8(BASE)
1509 | evstdd CRET1, 0(BASE)
1510 | li RD, (2+1)*8
1511 | b ->fff_res
1512 |
1513 |.macro math_minmax, name, cmpop
1514 | .ffunc_1 name
1515 | checknum CARG1
1516 | li TMP1, 8
1517 | checkfail ->fff_fallback
1518 |1:
1519 | evlddx CARG2, BASE, TMP1
1520 | cmplw cr1, TMP1, NARGS8:RC
1521 | checknum CARG2
1522 | bge cr1, ->fff_restv // Ok, since CRET1 = CARG1.
1523 | checkfail ->fff_fallback
1524 | cmpop CARG2, CARG1
1525 | addi TMP1, TMP1, 8
1526 | crmove 4*cr0+lt, 4*cr0+gt
1527 | evsel CARG1, CARG2, CARG1
1528 | b <1
1529 |.endmacro
1530 |
1531 | math_minmax math_min, efdtstlt
1532 | math_minmax math_max, efdtstgt
1533 |
1534 |//-- String library -----------------------------------------------------
1535 |
1536 |.ffunc_1 string_len
1537 | checkstr STR:CARG1
1538 | checkfail ->fff_fallback
1539 | lwz TMP0, STR:CARG1->len
1540 | efdcfsi CRET1, TMP0
1541 | b ->fff_restv
1542 |
1543 |.ffunc string_byte // Only handle the 1-arg case here.
1544 | cmplwi NARGS8:RC, 8
1545 | evldd STR:CARG1, 0(BASE)
1546 | bne ->fff_fallback // Need exactly 1 argument.
1547 | checkstr STR:CARG1
1548 | la RA, -8(BASE)
1549 | checkfail ->fff_fallback
1550 | lwz TMP0, STR:CARG1->len
1551 | li RD, (0+1)*8
1552 | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1553 | li TMP2, (1+1)*8
1554 | cmplwi TMP0, 0
1555 | lwz PC, FRAME_PC(BASE)
1556 | efdcfsi CRET1, TMP1
1557 | iseleq RD, RD, TMP2
1558 | evstdd CRET1, 0(RA)
1559 | b ->fff_res
1560 |
1561 |.ffunc string_char // Only handle the 1-arg case here.
1562 | ffgccheck
1563 | cmplwi NARGS8:RC, 8
1564 | evldd CARG1, 0(BASE)
1565 | bne ->fff_fallback // Exactly 1 argument.
1566 | checknum CARG1
1567 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
1568 | checkfail ->fff_fallback
1569 | efdctsiz TMP0, CARG1
1570 | li CARG3, 1
1571 | cmplwi TMP0, 255
1572 | stb TMP0, 0(CARG2)
1573 | bgt ->fff_fallback
1574 |->fff_newstr:
1575 | mr CARG1, L
1576 | stw BASE, L->base
1577 | stw PC, SAVE_PC
1578 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
1579 | // Returns GCstr *.
1580 | lwz BASE, L->base
1581 | evmergelo STR:CRET1, TISSTR, STR:CRET1
1582 | b ->fff_restv
1583 |
1584 |.ffunc string_sub
1585 | ffgccheck
1586 | cmplwi NARGS8:RC, 16
1587 | evldd CARG3, 16(BASE)
1588 | evldd STR:CARG1, 0(BASE)
1589 | blt ->fff_fallback
1590 | evldd CARG2, 8(BASE)
1591 | li TMP2, -1
1592 | beq >1
1593 | checknum CARG3
1594 | checkfail ->fff_fallback
1595 | efdctsiz TMP2, CARG3
1596 |1:
1597 | checknum CARG2
1598 | checkfail ->fff_fallback
1599 | checkstr STR:CARG1
1600 | efdctsiz TMP1, CARG2
1601 | checkfail ->fff_fallback
1602 | lwz TMP0, STR:CARG1->len
1603 | cmplw TMP0, TMP2 // len < end? (unsigned compare)
1604 | add TMP3, TMP2, TMP0
1605 | blt >5
1606 |2:
1607 | cmpwi TMP1, 0 // start <= 0?
1608 | add TMP3, TMP1, TMP0
1609 | ble >7
1610 |3:
1611 | sub. CARG3, TMP2, TMP1
1612 | addi CARG2, STR:CARG1, #STR-1
1613 | addi CARG3, CARG3, 1
1614 | add CARG2, CARG2, TMP1
1615 | isellt CARG3, r0, CARG3
1616 | b ->fff_newstr
1617 |
1618 |5: // Negative end or overflow.
1619 | cmpw TMP0, TMP2
1620 | addi TMP3, TMP3, 1
1621 | iselgt TMP2, TMP3, TMP0 // end = end > len ? len : end+len+1
1622 | b <2
1623 |
1624 |7: // Negative start or underflow.
1625 | cmpwi cr1, TMP3, 0
1626 | iseleq TMP1, r0, TMP3
1627 | isel TMP1, r0, TMP1, 4*cr1+lt
1628 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0)
1629 | b <3
1630 |
1631 |.ffunc string_rep // Only handle the 1-char case inline.
1632 | ffgccheck
1633 | cmplwi NARGS8:RC, 16
1634 | evldd CARG1, 0(BASE)
1635 | evldd CARG2, 8(BASE)
1636 | bne ->fff_fallback // Exactly 2 arguments.
1637 | checknum CARG2
1638 | checkfail ->fff_fallback
1639 | checkstr STR:CARG1
1640 | efdctsiz CARG3, CARG2
1641 | checkfail ->fff_fallback
1642 | lwz TMP0, STR:CARG1->len
1643 | cmpwi CARG3, 0
1644 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1645 | ble >2 // Count <= 0? (or non-int)
1646 | cmplwi TMP0, 1
1647 | subi TMP2, CARG3, 1
1648 | blt >2 // Zero length string?
1649 | cmplw cr1, TMP1, CARG3
1650 | bne ->fff_fallback // Fallback for > 1-char strings.
1651 | lbz TMP0, STR:CARG1[1]
1652 | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1653 | blt cr1, ->fff_fallback
1654 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
1655 | cmplwi TMP2, 0
1656 | stbx TMP0, CARG2, TMP2
1657 | subi TMP2, TMP2, 1
1658 | bne <1
1659 | b ->fff_newstr
1660 |2: // Return empty string.
1661 | la STR:CRET1, DISPATCH_GL(strempty)(DISPATCH)
1662 | evmergelo CRET1, TISSTR, STR:CRET1
1663 | b ->fff_restv
1664 |
1665 |.ffunc string_reverse
1666 | ffgccheck
1667 | cmplwi NARGS8:RC, 8
1668 | evldd CARG1, 0(BASE)
1669 | blt ->fff_fallback
1670 | checkstr STR:CARG1
1671 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1672 | checkfail ->fff_fallback
1673 | lwz CARG3, STR:CARG1->len
1674 | la CARG1, #STR(STR:CARG1)
1675 | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1676 | li TMP2, 0
1677 | cmplw TMP1, CARG3
1678 | subi TMP3, CARG3, 1
1679 | blt ->fff_fallback
1680 |1: // Reverse string copy.
1681 | cmpwi TMP3, 0
1682 | lbzx TMP1, CARG1, TMP2
1683 | blt ->fff_newstr
1684 | stbx TMP1, CARG2, TMP3
1685 | subi TMP3, TMP3, 1
1686 | addi TMP2, TMP2, 1
1687 | b <1
1688 |
1689 |.macro ffstring_case, name, lo
1690 | .ffunc name
1691 | ffgccheck
1692 | cmplwi NARGS8:RC, 8
1693 | evldd CARG1, 0(BASE)
1694 | blt ->fff_fallback
1695 | checkstr STR:CARG1
1696 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1697 | checkfail ->fff_fallback
1698 | lwz CARG3, STR:CARG1->len
1699 | la CARG1, #STR(STR:CARG1)
1700 | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1701 | cmplw TMP1, CARG3
1702 | li TMP2, 0
1703 | blt ->fff_fallback
1704 |1: // ASCII case conversion.
1705 | cmplw TMP2, CARG3
1706 | lbzx TMP1, CARG1, TMP2
1707 | bge ->fff_newstr
1708 | subi TMP0, TMP1, lo
1709 | xori TMP3, TMP1, 0x20
1710 | cmplwi TMP0, 26
1711 | isellt TMP1, TMP3, TMP1
1712 | stbx TMP1, CARG2, TMP2
1713 | addi TMP2, TMP2, 1
1714 | b <1
1715 |.endmacro
1716 |
1717 |ffstring_case string_lower, 65
1718 |ffstring_case string_upper, 97
1719 |
1720 |//-- Table library ------------------------------------------------------
1721 |
1722 |.ffunc_1 table_getn
1723 | checktab CARG1
1724 | checkfail ->fff_fallback
1725 | bl extern lj_tab_len // (GCtab *t)
1726 | // Returns uint32_t (but less than 2^31).
1727 | efdcfsi CRET1, CRET1
1728 | b ->fff_restv
1729 |
1730 |//-- Bit library --------------------------------------------------------
1731 |
1732 |.macro .ffunc_bit, name
1733 | .ffunc_n bit_..name
1734 | efdadd CARG1, CARG1, TOBIT
1735 |.endmacro
1736 |
1737 |.ffunc_bit tobit
1738 |->fff_resbit:
1739 | efdcfsi CRET1, CARG1
1740 | b ->fff_restv
1741 |
1742 |.macro .ffunc_bit_op, name, ins
1743 | .ffunc_bit name
1744 | li TMP1, 8
1745 |1:
1746 | evlddx CARG2, BASE, TMP1
1747 | cmplw cr1, TMP1, NARGS8:RC
1748 | checknum CARG2
1749 | bge cr1, ->fff_resbit
1750 | checkfail ->fff_fallback
1751 | efdadd CARG2, CARG2, TOBIT
1752 | ins CARG1, CARG1, CARG2
1753 | addi TMP1, TMP1, 8
1754 | b <1
1755 |.endmacro
1756 |
1757 |.ffunc_bit_op band, and
1758 |.ffunc_bit_op bor, or
1759 |.ffunc_bit_op bxor, xor
1760 |
1761 |.ffunc_bit bswap
1762 | rotlwi TMP0, CARG1, 8
1763 | rlwimi TMP0, CARG1, 24, 0, 7
1764 | rlwimi TMP0, CARG1, 24, 16, 23
1765 | efdcfsi CRET1, TMP0
1766 | b ->fff_restv
1767 |
1768 |.ffunc_bit bnot
1769 | not TMP0, CARG1
1770 | efdcfsi CRET1, TMP0
1771 | b ->fff_restv
1772 |
1773 |.macro .ffunc_bit_sh, name, ins, shmod
1774 | .ffunc_nn bit_..name
1775 | efdadd CARG2, CARG2, TOBIT
1776 | efdadd CARG1, CARG1, TOBIT
1777 |.if shmod == 1
1778 | rlwinm CARG2, CARG2, 0, 27, 31
1779 |.elif shmod == 2
1780 | neg CARG2, CARG2
1781 |.endif
1782 | ins TMP0, CARG1, CARG2
1783 | efdcfsi CRET1, TMP0
1784 | b ->fff_restv
1785 |.endmacro
1786 |
1787 |.ffunc_bit_sh lshift, slw, 1
1788 |.ffunc_bit_sh rshift, srw, 1
1789 |.ffunc_bit_sh arshift, sraw, 1
1790 |.ffunc_bit_sh rol, rotlw, 0
1791 |.ffunc_bit_sh ror, rotlw, 2
1792 |
1793 |//-----------------------------------------------------------------------
1794 |
1795 |->fff_fallback: // Call fast function fallback handler.
1796 | // BASE = new base, RB = CFUNC, RC = nargs*8
1797 | lwz TMP3, CFUNC:RB->f
1798 | add TMP1, BASE, NARGS8:RC
1799 | lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC.
1800 | addi TMP0, TMP1, 8*LUA_MINSTACK
1801 | lwz TMP2, L->maxstack
1802 | stw PC, SAVE_PC // Redundant (but a defined value).
1803 | cmplw TMP0, TMP2
1804 | stw BASE, L->base
1805 | stw TMP1, L->top
1806 | mr CARG1, L
1807 | bgt >5 // Need to grow stack.
1808 | mtctr TMP3
1809 | bctrl // (lua_State *L)
1810 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
1811 | lwz BASE, L->base
1812 | cmpwi CRET1, 0
1813 | slwi RD, CRET1, 3
1814 | la RA, -8(BASE)
1815 | bgt ->fff_res // Returned nresults+1?
1816 |1: // Returned 0 or -1: retry fast path.
1817 | lwz TMP0, L->top
1818 | lwz LFUNC:RB, FRAME_FUNC(BASE)
1819 | sub NARGS8:RC, TMP0, BASE
1820 | bne ->vm_call_tail // Returned -1?
1821 | ins_callt // Returned 0: retry fast path.
1822 |
1823 |// Reconstruct previous base for vmeta_call during tailcall.
1824 |->vm_call_tail:
1825 | andi. TMP0, PC, FRAME_TYPE
1826 | rlwinm TMP1, PC, 0, 0, 28
1827 | bne >3
1828 | lwz INS, -4(PC)
1829 | decode_RA8 TMP1, INS
1830 | addi TMP1, TMP1, 8
1831 |3:
1832 | sub TMP2, BASE, TMP1
1833 | b ->vm_call_dispatch // Resolve again for tailcall.
1834 |
1835 |5: // Grow stack for fallback handler.
1836 | li CARG2, LUA_MINSTACK
1837 | bl extern lj_state_growstack // (lua_State *L, int n)
1838 | lwz BASE, L->base
1839 | cmpw TMP0, TMP0 // Set 4*cr0+eq to force retry.
1840 | b <1
1841 |
1842 |->fff_gcstep: // Call GC step function.
1843 | // BASE = new base, RC = nargs*8
1844 | mflr SAVE0
1845 | stw BASE, L->base
1846 | add TMP0, BASE, NARGS8:RC
1847 | stw PC, SAVE_PC // Redundant (but a defined value).
1848 | stw TMP0, L->top
1849 | mr CARG1, L
1850 | bl extern lj_gc_step // (lua_State *L)
1851 | lwz BASE, L->base
1852 | mtlr SAVE0
1853 | lwz TMP0, L->top
1854 | sub NARGS8:RC, TMP0, BASE
1855 | lwz CFUNC:RB, FRAME_FUNC(BASE)
1856 | blr
1857 |
1858 |//-----------------------------------------------------------------------
1859 |//-- Special dispatch targets -------------------------------------------
1860 |//-----------------------------------------------------------------------
1861 |
1862 |->vm_record: // Dispatch target for recording phase.
1863 |.if JIT
1864 | NYI
1865 |.endif
1866 |
1867 |->vm_rethook: // Dispatch target for return hooks.
1868 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
1869 | andi. TMP0, TMP3, HOOK_ACTIVE // Hook already active?
1870 | beq >1
1871 |5: // Re-dispatch to static ins.
1872 | addi TMP1, TMP1, GG_DISP2STATIC // Assumes decode_OP4 TMP1, INS.
1873 | lwzx TMP0, DISPATCH, TMP1
1874 | mtctr TMP0
1875 | bctr
1876 |
1877 |->vm_inshook: // Dispatch target for instr/line hooks.
1878 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
1879 | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH)
1880 | andi. TMP0, TMP3, HOOK_ACTIVE // Hook already active?
1881 | rlwinm TMP0, TMP3, 31-LUA_HOOKLINE, 31, 0
1882 | bne <5
1883 |
1884 | cmpwi cr1, TMP0, 0
1885 | addic. TMP2, TMP2, -1
1886 | beq cr1, <5
1887 | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
1888 | beq >1
1889 | bge cr1, <5
1890 |1:
1891 | mr CARG1, L
1892 | stw MULTRES, SAVE_MULTRES
1893 | mr CARG2, PC
1894 | stw BASE, L->base
1895 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
1896 | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
1897 |3:
1898 | lwz BASE, L->base
1899 |4: // Re-dispatch to static ins.
1900 | lwz INS, -4(PC)
1901 | decode_OP4 TMP1, INS
1902 | decode_RB8 RB, INS
1903 | addi TMP1, TMP1, GG_DISP2STATIC
1904 | decode_RD8 RD, INS
1905 | lwzx TMP0, DISPATCH, TMP1
1906 | decode_RA8 RA, INS
1907 | decode_RC8 RC, INS
1908 | mtctr TMP0
1909 | bctr
1910 |
1911 |->cont_hook: // Continue from hook yield.
1912 | addi PC, PC, 4
1913 | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins.
1914 | b <4
1915 |
1916 |->vm_hotloop: // Hot loop counter underflow.
1917 |.if JIT
1918 | NYI
1919 |.endif
1920 |
1921 |->vm_callhook: // Dispatch target for call hooks.
1922 | mr CARG2, PC
1923 |.if JIT
1924 | b >1
1925 |.endif
1926 |
1927 |->vm_hotcall: // Hot call counter underflow.
1928 |.if JIT
1929 | ori CARG2, PC, 1
1930 |1:
1931 |.endif
1932 | add TMP0, BASE, RC
1933 | stw PC, SAVE_PC
1934 | mr CARG1, L
1935 | stw BASE, L->base
1936 | sub RA, RA, BASE
1937 | stw TMP0, L->top
1938 | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
1939 | // Returns ASMFunction.
1940 | lwz BASE, L->base
1941 | lwz TMP0, L->top
1942 | stw ZERO, SAVE_PC // Invalidate for subsequent line hook.
1943 | sub NARGS8:RC, TMP0, BASE
1944 | add RA, BASE, RA
1945 | lwz LFUNC:RB, FRAME_FUNC(BASE)
1946 | mtctr CRET1
1947 | bctr
1948 |
1949 |//-----------------------------------------------------------------------
1950 |//-- Trace exit handler -------------------------------------------------
1951 |//-----------------------------------------------------------------------
1952 |
1953 |->vm_exit_handler:
1954 |.if JIT
1955 | NYI
1956 |.endif
1957 |->vm_exit_interp:
1958 |.if JIT
1959 | NYI
1960 |.endif
1961 |
1962 |//-----------------------------------------------------------------------
1963 |//-- Math helper functions ----------------------------------------------
1964 |//-----------------------------------------------------------------------
1965 |
1966 |// FP value rounding. Called by math.floor/math.ceil fast functions
1967 |// and from JIT code.
1968 |//
1969 |// This can be inlined if the CPU has the frin/friz/frip/frim instructions.
1970 |// The alternative hard-float approaches have a deep dependency chain.
1971 |// The resulting latency is at least 3x-7x the double-precision FP latency
1972 |// (e500v2: 6cy, e600: 5cy, Cell: 10cy) or around 20-70 cycles.
1973 |//
1974 |// The soft-float approach is tedious, but much faster (e500v2: ~11cy/~6cy).
1975 |// However it relies on a fast way to transfer the FP value to GPRs
1976 |// (e500v2: 0cy for lo-word, 1cy for hi-word).
1977 |//
1978 |.macro vm_round, name, mode
1979 | // Used temporaries: TMP0, TMP1, TMP2, TMP3.
1980 |->name.._efd: // Input: CARG2, output: CRET2
1981 | evmergehi CARG1, CARG2, CARG2
1982 |->name.._hilo:
1983 | // Input: CARG1 (hi), CARG2 (hi, lo), output: CRET2
1984 | rlwinm TMP2, CARG1, 12, 21, 31
1985 | addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023
1986 | li TMP1, -1
1987 | cmplwi cr1, TMP2, 51 // 0 <= exp <= 51?
1988 | subfic TMP0, TMP2, 52
1989 | bgt cr1, >1
1990 | lus TMP3, 0xfff0
1991 | slw TMP0, TMP1, TMP0 // lomask = -1 << (52-exp)
1992 | sraw TMP1, TMP3, TMP2 // himask = (int32_t)0xfff00000 >> exp
1993 |.if mode == 2 // trunc(x):
1994 | evmergelo TMP0, TMP1, TMP0
1995 | evand CRET2, CARG2, TMP0 // hi &= himask, lo &= lomask
1996 |.else
1997 | andc TMP2, CARG2, TMP0
1998 | andc TMP3, CARG1, TMP1
1999 | or TMP2, TMP2, TMP3 // ztest = (hi&~himask) | (lo&~lomask)
2000 | srawi TMP3, CARG1, 31 // signmask = (int32_t)hi >> 31
2001 |.if mode == 0 // floor(x):
2002 | and. TMP2, TMP2, TMP3 // iszero = ((ztest & signmask) == 0)
2003 |.else // ceil(x):
2004 | andc. TMP2, TMP2, TMP3 // iszero = ((ztest & ~signmask) == 0)
2005 |.endif
2006 | and CARG2, CARG2, TMP0 // lo &= lomask
2007 | and CARG1, CARG1, TMP1 // hi &= himask
2008 | subc TMP0, CARG2, TMP0
2009 | iseleq TMP0, CARG2, TMP0 // lo = iszero ? lo : lo-lomask
2010 | sube TMP1, CARG1, TMP1
2011 | iseleq TMP1, CARG1, TMP1 // hi = iszero ? hi : hi-himask+carry
2012 | evmergelo CRET2, TMP1, TMP0
2013 |.endif
2014 | blr
2015 |1:
2016 | bgtlr // Already done if >=2^52, +-inf or nan.
2017 |.if mode == 2 // trunc(x):
2018 | rlwinm TMP1, CARG1, 0, 0, 0 // hi = sign(x)
2019 | li TMP0, 0
2020 | evmergelo CRET2, TMP1, TMP0
2021 |.else
2022 | rlwinm TMP2, CARG1, 0, 1, 31
2023 | srawi TMP0, CARG1, 31 // signmask = (int32_t)hi >> 31
2024 | or TMP2, TMP2, CARG2 // ztest = abs(hi) | lo
2025 | lus TMP1, 0x3ff0
2026 |.if mode == 0 // floor(x):
2027 | and. TMP2, TMP2, TMP0 // iszero = ((ztest & signmask) == 0)
2028 |.else // ceil(x):
2029 | andc. TMP2, TMP2, TMP0 // iszero = ((ztest & ~signmask) == 0)
2030 |.endif
2031 | li TMP0, 0
2032 | iseleq TMP1, r0, TMP1
2033 | rlwimi CARG1, TMP1, 0, 1, 31 // hi = sign(x) | (iszero ? 0.0 : 1.0)
2034 | evmergelo CRET2, CARG1, TMP0
2035 |.endif
2036 | blr
2037 |.endmacro
2038 |
2039 |->vm_floor:
2040 | mflr CARG3
2041 | evmergelo CARG2, CARG1, CARG2
2042 | bl ->vm_floor_hilo
2043 | mtlr CARG3
2044 | evmergehi CRET1, CRET2, CRET2
2045 | blr
2046 |
2047 | vm_round vm_floor, 0
2048 | vm_round vm_ceil, 1
2049 |.if JIT
2050 | vm_round vm_trunc, 2
2051 |.else
2052 |->vm_trunc_efd:
2053 |->vm_trunc_hilo:
2054 |.endif
2055 |
2056 |//-----------------------------------------------------------------------
2057 |//-- Miscellaneous functions --------------------------------------------
2058 |//-----------------------------------------------------------------------
2059 |
2060 |//-----------------------------------------------------------------------
2061 |//-- FFI helper functions -----------------------------------------------
2062 |//-----------------------------------------------------------------------
2063 |
2064 |->vm_ffi_call:
2065 |.if FFI
2066 | NYI
2067 |.endif
2068 |
2069 |//-----------------------------------------------------------------------
2070}
2071
2072/* Generate the code for a single instruction. */
2073static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2074{
2075 int vk = 0;
2076 |=>defop:
2077
2078 switch (op) {
2079
2080 /* -- Comparison ops ---------------------------------------------------- */
2081
2082 /* Remember: all ops branch for a true comparison, fall through otherwise. */
2083
2084 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2085 | // RA = src1*8, RD = src2*8, JMP with RD = target
2086 | evlddx TMP0, BASE, RA
2087 | addi PC, PC, 4
2088 | evlddx TMP1, BASE, RD
2089 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
2090 | lwz TMP2, -4(PC)
2091 | evmergehi RB, TMP0, TMP1
2092 | decode_RD4 TMP2, TMP2
2093 | checknum RB
2094 | add TMP2, TMP2, TMP3
2095 | checkanyfail ->vmeta_comp
2096 | efdcmplt TMP0, TMP1
2097 if (op == BC_ISLE || op == BC_ISGT) {
2098 | efdcmpeq cr1, TMP0, TMP1
2099 | cror 4*cr0+gt, 4*cr0+gt, 4*cr1+gt
2100 }
2101 if (op == BC_ISLT || op == BC_ISLE) {
2102 | iselgt PC, TMP2, PC
2103 } else {
2104 | iselgt PC, PC, TMP2
2105 }
2106 | ins_next
2107 break;
2108
2109 case BC_ISEQV: case BC_ISNEV:
2110 vk = op == BC_ISEQV;
2111 | // RA = src1*8, RD = src2*8, JMP with RD = target
2112 | evlddx CARG2, BASE, RA
2113 | addi PC, PC, 4
2114 | evlddx CARG3, BASE, RD
2115 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
2116 | lwz TMP2, -4(PC)
2117 | evmergehi RB, CARG2, CARG3
2118 | decode_RD4 TMP2, TMP2
2119 | checknum RB
2120 | add TMP2, TMP2, TMP3
2121 | checkanyfail >5
2122 | efdcmpeq CARG2, CARG3
2123 if (vk) {
2124 | iselgt PC, TMP2, PC
2125 } else {
2126 | iselgt PC, PC, TMP2
2127 }
2128 |1:
2129 | ins_next
2130 |
2131 |5: // Either or both types are not numbers.
2132 | evcmpeq CARG2, CARG3
2133 | not TMP3, RB
2134 | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive?
2135 | crorc 4*cr7+lt, 4*cr0+so, 4*cr0+lt // 1: Same tv or different type.
2136 | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata?
2137 | crandc 4*cr7+gt, 4*cr0+lt, 4*cr1+gt // 2: Same type and primitive.
2138 | mr SAVE0, PC
2139 if (vk) {
2140 | isel PC, TMP2, PC, 4*cr7+gt
2141 } else {
2142 | isel TMP2, PC, TMP2, 4*cr7+gt
2143 }
2144 | cror 4*cr7+lt, 4*cr7+lt, 4*cr7+gt // 1 or 2.
2145 if (vk) {
2146 | isel PC, TMP2, PC, 4*cr0+so
2147 } else {
2148 | isel PC, PC, TMP2, 4*cr0+so
2149 }
2150 | blt cr7, <1 // Done if 1 or 2.
2151 | blt cr6, <1 // Done if not tab/ud.
2152 |
2153 | // Different tables or userdatas. Need to check __eq metamethod.
2154 | // Field metatable must be at same offset for GCtab and GCudata!
2155 | lwz TAB:TMP2, TAB:CARG2->metatable
2156 | li CARG4, 1-vk // ne = 0 or 1.
2157 | cmplwi TAB:TMP2, 0
2158 | beq <1 // No metatable?
2159 | lbz TMP2, TAB:TMP2->nomm
2160 | andi. TMP2, TMP2, 1<<MM_eq
2161 | bne <1 // Or 'no __eq' flag set?
2162 | mr PC, SAVE0 // Restore old PC.
2163 | b ->vmeta_equal // Handle __eq metamethod.
2164 break;
2165
2166 case BC_ISEQS: case BC_ISNES:
2167 vk = op == BC_ISEQS;
2168 | // RA = src*8, RD = str_const*8 (~), JMP with RD = target
2169 | evlddx TMP0, BASE, RA
2170 | srwi RD, RD, 1
2171 | lwz INS, 0(PC)
2172 | subfic RD, RD, -4
2173 | addi PC, PC, 4
2174 | lwzx STR:TMP1, KBASE, RD // KBASE-4-str_const*4
2175 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
2176 | decode_RD4 TMP2, INS
2177 | evmergelo STR:TMP1, TISSTR, STR:TMP1
2178 | add TMP2, TMP2, TMP3
2179 | evcmpeq TMP0, STR:TMP1
2180 if (vk) {
2181 | isel PC, TMP2, PC, 4*cr0+so
2182 } else {
2183 | isel PC, PC, TMP2, 4*cr0+so
2184 }
2185 | ins_next
2186 break;
2187
2188 case BC_ISEQN: case BC_ISNEN:
2189 vk = op == BC_ISEQN;
2190 | // RA = src*8, RD = num_const*8, JMP with RD = target
2191 | evlddx TMP0, BASE, RA
2192 | addi PC, PC, 4
2193 | evlddx TMP1, KBASE, RD
2194 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
2195 | lwz INS, -4(PC)
2196 | checknum TMP0
2197 | checkfail >5
2198 | efdcmpeq TMP0, TMP1
2199 |1:
2200 | decode_RD4 TMP2, INS
2201 | add TMP2, TMP2, TMP3
2202 if (vk) {
2203 | iselgt PC, TMP2, PC
2204 |5:
2205 } else {
2206 | iselgt PC, PC, TMP2
2207 }
2208 |3:
2209 | ins_next
2210 if (!vk) {
2211 |5:
2212 | decode_RD4 TMP2, INS
2213 | add PC, TMP2, TMP3
2214 | b <3
2215 }
2216 break;
2217
2218 case BC_ISEQP: case BC_ISNEP:
2219 vk = op == BC_ISEQP;
2220 | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
2221 | lwzx TMP0, BASE, RA
2222 | srwi TMP1, RD, 3
2223 | lwz INS, 0(PC)
2224 | addi PC, PC, 4
2225 | not TMP1, TMP1
2226 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
2227 | cmplw TMP0, TMP1
2228 | decode_RD4 TMP2, INS
2229 | add TMP2, TMP2, TMP3
2230 if (vk) {
2231 | iseleq PC, TMP2, PC
2232 } else {
2233 | iseleq PC, PC, TMP2
2234 }
2235 | ins_next
2236 break;
2237
2238 /* -- Unary test and copy ops ------------------------------------------- */
2239
2240 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
2241 | // RA = dst*8 or unused, RD = src*8, JMP with RD = target
2242 | evlddx TMP0, BASE, RD
2243 | evaddw TMP1, TISNIL, TISNIL // Synthesize LJ_TFALSE.
2244 | lwz INS, 0(PC)
2245 | evcmpltu TMP0, TMP1
2246 | addi PC, PC, 4
2247 if (op == BC_IST || op == BC_ISF) {
2248 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
2249 | decode_RD4 TMP2, INS
2250 | add TMP2, TMP2, TMP3
2251 if (op == BC_IST) {
2252 | isellt PC, TMP2, PC
2253 } else {
2254 | isellt PC, PC, TMP2
2255 }
2256 } else {
2257 if (op == BC_ISTC) {
2258 | checkfail >1
2259 } else {
2260 | checkok >1
2261 }
2262 | addis PC, PC, -(BCBIAS_J*4 >> 16)
2263 | decode_RD4 TMP2, INS
2264 | evstddx TMP0, BASE, RA
2265 | add PC, PC, TMP2
2266 |1:
2267 }
2268 | ins_next
2269 break;
2270
2271 /* -- Unary ops --------------------------------------------------------- */
2272
2273 case BC_MOV:
2274 | // RA = dst*8, RD = src*8
2275 | ins_next1
2276 | evlddx TMP0, BASE, RD
2277 | evstddx TMP0, BASE, RA
2278 | ins_next2
2279 break;
2280 case BC_NOT:
2281 | // RA = dst*8, RD = src*8
2282 | ins_next1
2283 | lwzx TMP0, BASE, RD
2284 | subfic TMP1, TMP0, LJ_TTRUE
2285 | adde TMP0, TMP0, TMP1
2286 | stwx TMP0, BASE, RA
2287 | ins_next2
2288 break;
2289 case BC_UNM:
2290 | // RA = dst*8, RD = src*8
2291 | evlddx TMP0, BASE, RD
2292 | checknum TMP0
2293 | checkfail ->vmeta_unm
2294 | efdneg TMP0, TMP0
2295 | ins_next1
2296 | evstddx TMP0, BASE, RA
2297 | ins_next2
2298 break;
2299 case BC_LEN:
2300 | // RA = dst*8, RD = src*8
2301 | evlddx CARG1, BASE, RD
2302 | checkstr CARG1
2303 | checkfail >2
2304 | lwz CRET1, STR:CARG1->len
2305 |1:
2306 | ins_next1
2307 | efdcfsi TMP0, CRET1
2308 | evstddx TMP0, BASE, RA
2309 | ins_next2
2310 |2:
2311 | checktab CARG1
2312 | checkfail ->vmeta_len
2313#if LJ_52
2314 | lwz TAB:TMP2, TAB:CARG1->metatable
2315 | cmplwi TAB:TMP2, 0
2316 | bne >9
2317 |3:
2318#endif
2319 |->BC_LEN_Z:
2320 | bl extern lj_tab_len // (GCtab *t)
2321 | // Returns uint32_t (but less than 2^31).
2322 | b <1
2323#if LJ_52
2324 |9:
2325 | lbz TMP0, TAB:TMP2->nomm
2326 | andi. TMP0, TMP0, 1<<MM_len
2327 | bne <3 // 'no __len' flag set: done.
2328 | b ->vmeta_len
2329#endif
2330 break;
2331
2332 /* -- Binary ops -------------------------------------------------------- */
2333
2334 |.macro ins_arithpre, t0, t1
2335 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
2336 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2337 ||switch (vk) {
2338 ||case 0:
2339 | evlddx t0, BASE, RB
2340 | checknum t0
2341 | evlddx t1, KBASE, RC
2342 | checkfail ->vmeta_arith_vn
2343 || break;
2344 ||case 1:
2345 | evlddx t1, BASE, RB
2346 | checknum t1
2347 | evlddx t0, KBASE, RC
2348 | checkfail ->vmeta_arith_nv
2349 || break;
2350 ||default:
2351 | evlddx t0, BASE, RB
2352 | evlddx t1, BASE, RC
2353 | evmergehi TMP2, t0, t1
2354 | checknum TMP2
2355 | checkanyfail ->vmeta_arith_vv
2356 || break;
2357 ||}
2358 |.endmacro
2359 |
2360 |.macro ins_arith, ins
2361 | ins_arithpre TMP0, TMP1
2362 | ins_next1
2363 | ins TMP0, TMP0, TMP1
2364 | evstddx TMP0, BASE, RA
2365 | ins_next2
2366 |.endmacro
2367
2368 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2369 | ins_arith efdadd
2370 break;
2371 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2372 | ins_arith efdsub
2373 break;
2374 case BC_MULVN: case BC_MULNV: case BC_MULVV:
2375 | ins_arith efdmul
2376 break;
2377 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
2378 | ins_arith efddiv
2379 break;
2380 case BC_MODVN:
2381 | ins_arithpre RD, SAVE0
2382 |->BC_MODVN_Z:
2383 | efddiv CARG2, RD, SAVE0
2384 | bl ->vm_floor_efd // floor(b/c)
2385 | efdmul TMP0, CRET2, SAVE0
2386 | ins_next1
2387 | efdsub TMP0, RD, TMP0 // b - floor(b/c)*c
2388 | evstddx TMP0, BASE, RA
2389 | ins_next2
2390 break;
2391 case BC_MODNV: case BC_MODVV:
2392 | ins_arithpre RD, SAVE0
2393 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
2394 break;
2395 case BC_POW:
2396 | evlddx CARG2, BASE, RB
2397 | evlddx CARG4, BASE, RC
2398 | evmergehi CARG1, CARG4, CARG2
2399 | checknum CARG1
2400 | evmergehi CARG3, CARG4, CARG4
2401 | checkanyfail ->vmeta_arith_vv
2402 | bl extern pow@plt
2403 | evmergelo CRET2, CRET1, CRET2
2404 | evstddx CRET2, BASE, RA
2405 | ins_next
2406 break;
2407
2408 case BC_CAT:
2409 | // RA = dst*8, RB = src_start*8, RC = src_end*8
2410 | sub CARG3, RC, RB
2411 | stw BASE, L->base
2412 | add CARG2, BASE, RC
2413 | mr SAVE0, RB
2414 |->BC_CAT_Z:
2415 | stw PC, SAVE_PC
2416 | mr CARG1, L
2417 | srwi CARG3, CARG3, 3
2418 | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
2419 | // Returns NULL (finished) or TValue * (metamethod).
2420 | cmplwi CRET1, 0
2421 | lwz BASE, L->base
2422 | bne ->vmeta_binop
2423 | evlddx TMP0, BASE, SAVE0 // Copy result from RB to RA.
2424 | evstddx TMP0, BASE, RA
2425 | ins_next
2426 break;
2427
2428 /* -- Constant ops ------------------------------------------------------ */
2429
2430 case BC_KSTR:
2431 | // RA = dst*8, RD = str_const*8 (~)
2432 | ins_next1
2433 | srwi TMP1, RD, 1
2434 | subfic TMP1, TMP1, -4
2435 | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4
2436 | evmergelo TMP0, TISSTR, TMP0
2437 | evstddx TMP0, BASE, RA
2438 | ins_next2
2439 break;
2440 case BC_KCDATA:
2441 |.if FFI
2442 | // RA = dst*8, RD = cdata_const*8 (~)
2443 | ins_next1
2444 | srwi TMP1, RD, 1
2445 | subfic TMP1, TMP1, -4
2446 | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4
2447 | li TMP2, LJ_TCDATA
2448 | evmergelo TMP0, TMP2, TMP0
2449 | evstddx TMP0, BASE, RA
2450 | ins_next2
2451 |.endif
2452 break;
2453 case BC_KSHORT:
2454 | // RA = dst*8, RD = int16_literal*8
2455 | srwi TMP1, RD, 3
2456 | extsh TMP1, TMP1
2457 | ins_next1
2458 | efdcfsi TMP0, TMP1
2459 | evstddx TMP0, BASE, RA
2460 | ins_next2
2461 break;
2462 case BC_KNUM:
2463 | // RA = dst*8, RD = num_const*8
2464 | evlddx TMP0, KBASE, RD
2465 | ins_next1
2466 | evstddx TMP0, BASE, RA
2467 | ins_next2
2468 break;
2469 case BC_KPRI:
2470 | // RA = dst*8, RD = primitive_type*8 (~)
2471 | srwi TMP1, RD, 3
2472 | not TMP0, TMP1
2473 | ins_next1
2474 | stwx TMP0, BASE, RA
2475 | ins_next2
2476 break;
2477 case BC_KNIL:
2478 | // RA = base*8, RD = end*8
2479 | evstddx TISNIL, BASE, RA
2480 | addi RA, RA, 8
2481 |1:
2482 | evstddx TISNIL, BASE, RA
2483 | cmpw RA, RD
2484 | addi RA, RA, 8
2485 | blt <1
2486 | ins_next_
2487 break;
2488
2489 /* -- Upvalue and function ops ------------------------------------------ */
2490
2491 case BC_UGET:
2492 | // RA = dst*8, RD = uvnum*8
2493 | ins_next1
2494 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2495 | srwi RD, RD, 1
2496 | addi RD, RD, offsetof(GCfuncL, uvptr)
2497 | lwzx UPVAL:RB, LFUNC:RB, RD
2498 | lwz TMP1, UPVAL:RB->v
2499 | evldd TMP0, 0(TMP1)
2500 | evstddx TMP0, BASE, RA
2501 | ins_next2
2502 break;
2503 case BC_USETV:
2504 | // RA = uvnum*8, RD = src*8
2505 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2506 | srwi RA, RA, 1
2507 | addi RA, RA, offsetof(GCfuncL, uvptr)
2508 | evlddx TMP1, BASE, RD
2509 | lwzx UPVAL:RB, LFUNC:RB, RA
2510 | lbz TMP3, UPVAL:RB->marked
2511 | lwz CARG2, UPVAL:RB->v
2512 | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
2513 | lbz TMP0, UPVAL:RB->closed
2514 | evmergehi TMP2, TMP1, TMP1
2515 | evstdd TMP1, 0(CARG2)
2516 | cmplwi cr1, TMP0, 0
2517 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
2518 | subi TMP2, TMP2, (LJ_TISNUM+1)
2519 | bne >2 // Upvalue is closed and black?
2520 |1:
2521 | ins_next
2522 |
2523 |2: // Check if new value is collectable.
2524 | cmplwi TMP2, LJ_TISGCV - (LJ_TISNUM+1)
2525 | bge <1 // tvisgcv(v)
2526 | lbz TMP3, GCOBJ:TMP1->gch.marked
2527 | andi. TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
2528 | la CARG1, GG_DISP2G(DISPATCH)
2529 | // Crossed a write barrier. Move the barrier forward.
2530 | beq <1
2531 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
2532 | b <1
2533 break;
2534 case BC_USETS:
2535 | // RA = uvnum*8, RD = str_const*8 (~)
2536 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2537 | srwi TMP1, RD, 1
2538 | srwi RA, RA, 1
2539 | subfic TMP1, TMP1, -4
2540 | addi RA, RA, offsetof(GCfuncL, uvptr)
2541 | lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4
2542 | lwzx UPVAL:RB, LFUNC:RB, RA
2543 | evmergelo STR:TMP1, TISSTR, STR:TMP1
2544 | lbz TMP3, UPVAL:RB->marked
2545 | lwz CARG2, UPVAL:RB->v
2546 | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
2547 | lbz TMP3, STR:TMP1->marked
2548 | lbz TMP2, UPVAL:RB->closed
2549 | evstdd STR:TMP1, 0(CARG2)
2550 | bne >2
2551 |1:
2552 | ins_next
2553 |
2554 |2: // Check if string is white and ensure upvalue is closed.
2555 | andi. TMP3, TMP3, LJ_GC_WHITES // iswhite(str)
2556 | cmplwi cr1, TMP2, 0
2557 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
2558 | la CARG1, GG_DISP2G(DISPATCH)
2559 | // Crossed a write barrier. Move the barrier forward.
2560 | beq <1
2561 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
2562 | b <1
2563 break;
2564 case BC_USETN:
2565 | // RA = uvnum*8, RD = num_const*8
2566 | ins_next1
2567 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2568 | srwi RA, RA, 1
2569 | addi RA, RA, offsetof(GCfuncL, uvptr)
2570 | evlddx TMP0, KBASE, RD
2571 | lwzx UPVAL:RB, LFUNC:RB, RA
2572 | lwz TMP1, UPVAL:RB->v
2573 | evstdd TMP0, 0(TMP1)
2574 | ins_next2
2575 break;
2576 case BC_USETP:
2577 | // RA = uvnum*8, RD = primitive_type*8 (~)
2578 | ins_next1
2579 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2580 | srwi RA, RA, 1
2581 | addi RA, RA, offsetof(GCfuncL, uvptr)
2582 | srwi TMP0, RD, 3
2583 | lwzx UPVAL:RB, LFUNC:RB, RA
2584 | not TMP0, TMP0
2585 | lwz TMP1, UPVAL:RB->v
2586 | stw TMP0, 0(TMP1)
2587 | ins_next2
2588 break;
2589
2590 case BC_UCLO:
2591 | // RA = level*8, RD = target
2592 | lwz TMP1, L->openupval
2593 | branch_RD // Do this first since RD is not saved.
2594 | stw BASE, L->base
2595 | cmplwi TMP1, 0
2596 | mr CARG1, L
2597 | beq >1
2598 | add CARG2, BASE, RA
2599 | bl extern lj_func_closeuv // (lua_State *L, TValue *level)
2600 | lwz BASE, L->base
2601 |1:
2602 | ins_next
2603 break;
2604
2605 case BC_FNEW:
2606 | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype)
2607 | srwi TMP1, RD, 1
2608 | stw BASE, L->base
2609 | subfic TMP1, TMP1, -4
2610 | stw PC, SAVE_PC
2611 | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4
2612 | mr CARG1, L
2613 | lwz CARG3, FRAME_FUNC(BASE)
2614 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
2615 | bl extern lj_func_newL_gc
2616 | // Returns GCfuncL *.
2617 | lwz BASE, L->base
2618 | evmergelo LFUNC:CRET1, TISFUNC, LFUNC:CRET1
2619 | evstddx LFUNC:CRET1, BASE, RA
2620 | ins_next
2621 break;
2622
2623 /* -- Table ops --------------------------------------------------------- */
2624
2625 case BC_TNEW:
2626 case BC_TDUP:
2627 | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~)
2628 | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH)
2629 | mr CARG1, L
2630 | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
2631 | stw BASE, L->base
2632 | cmplw TMP0, TMP1
2633 | stw PC, SAVE_PC
2634 | bge >5
2635 |1:
2636 if (op == BC_TNEW) {
2637 | rlwinm CARG2, RD, 29, 21, 31
2638 | rlwinm CARG3, RD, 18, 27, 31
2639 | cmpwi CARG2, 0x7ff
2640 | li TMP1, 0x801
2641 | iseleq CARG2, TMP1, CARG2
2642 | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
2643 | // Returns Table *.
2644 } else {
2645 | srwi TMP1, RD, 1
2646 | subfic TMP1, TMP1, -4
2647 | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4
2648 | bl extern lj_tab_dup // (lua_State *L, Table *kt)
2649 | // Returns Table *.
2650 }
2651 | lwz BASE, L->base
2652 | evmergelo TAB:CRET1, TISTAB, TAB:CRET1
2653 | evstddx TAB:CRET1, BASE, RA
2654 | ins_next
2655 |5:
2656 | mr SAVE0, RD
2657 | bl extern lj_gc_step_fixtop // (lua_State *L)
2658 | mr RD, SAVE0
2659 | mr CARG1, L
2660 | b <1
2661 break;
2662
2663 case BC_GGET:
2664 | // RA = dst*8, RD = str_const*8 (~)
2665 case BC_GSET:
2666 | // RA = src*8, RD = str_const*8 (~)
2667 | lwz LFUNC:TMP2, FRAME_FUNC(BASE)
2668 | srwi TMP1, RD, 1
2669 | lwz TAB:RB, LFUNC:TMP2->env
2670 | subfic TMP1, TMP1, -4
2671 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
2672 if (op == BC_GGET) {
2673 | b ->BC_TGETS_Z
2674 } else {
2675 | b ->BC_TSETS_Z
2676 }
2677 break;
2678
2679 case BC_TGETV:
2680 | // RA = dst*8, RB = table*8, RC = key*8
2681 | evlddx TAB:RB, BASE, RB
2682 | evlddx RC, BASE, RC
2683 | checktab TAB:RB
2684 | checkfail ->vmeta_tgetv
2685 | checknum RC
2686 | checkfail >5
2687 | // Convert number key to integer
2688 | efdctsi TMP2, RC
2689 | lwz TMP0, TAB:RB->asize
2690 | efdcfsi TMP1, TMP2
2691 | cmplw cr0, TMP0, TMP2
2692 | efdcmpeq cr1, RC, TMP1
2693 | lwz TMP1, TAB:RB->array
2694 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+gt
2695 | slwi TMP2, TMP2, 3
2696 | ble ->vmeta_tgetv // Integer key and in array part?
2697 | evlddx TMP1, TMP1, TMP2
2698 | checknil TMP1
2699 | checkok >2
2700 |1:
2701 | evstddx TMP1, BASE, RA
2702 | ins_next
2703 |
2704 |2: // Check for __index if table value is nil.
2705 | lwz TAB:TMP2, TAB:RB->metatable
2706 | cmplwi TAB:TMP2, 0
2707 | beq <1 // No metatable: done.
2708 | lbz TMP0, TAB:TMP2->nomm
2709 | andi. TMP0, TMP0, 1<<MM_index
2710 | bne <1 // 'no __index' flag set: done.
2711 | b ->vmeta_tgetv
2712 |
2713 |5:
2714 | checkstr STR:RC // String key?
2715 | checkok ->BC_TGETS_Z
2716 | b ->vmeta_tgetv
2717 break;
2718 case BC_TGETS:
2719 | // RA = dst*8, RB = table*8, RC = str_const*8 (~)
2720 | evlddx TAB:RB, BASE, RB
2721 | srwi TMP1, RC, 1
2722 | checktab TAB:RB
2723 | subfic TMP1, TMP1, -4
2724 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
2725 | checkfail ->vmeta_tgets1
2726 |->BC_TGETS_Z:
2727 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
2728 | lwz TMP0, TAB:RB->hmask
2729 | lwz TMP1, STR:RC->hash
2730 | lwz NODE:TMP2, TAB:RB->node
2731 | evmergelo STR:RC, TISSTR, STR:RC
2732 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
2733 | slwi TMP0, TMP1, 5
2734 | slwi TMP1, TMP1, 3
2735 | sub TMP1, TMP0, TMP1
2736 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
2737 |1:
2738 | evldd TMP0, NODE:TMP2->key
2739 | evldd TMP1, NODE:TMP2->val
2740 | evcmpeq TMP0, STR:RC
2741 | checkanyfail >4
2742 | checknil TMP1
2743 | checkok >5 // Key found, but nil value?
2744 |3:
2745 | evstddx TMP1, BASE, RA
2746 | ins_next
2747 |
2748 |4: // Follow hash chain.
2749 | lwz NODE:TMP2, NODE:TMP2->next
2750 | cmplwi NODE:TMP2, 0
2751 | bne <1
2752 | // End of hash chain: key not found, nil result.
2753 | evmr TMP1, TISNIL
2754 |
2755 |5: // Check for __index if table value is nil.
2756 | lwz TAB:TMP2, TAB:RB->metatable
2757 | cmplwi TAB:TMP2, 0
2758 | beq <3 // No metatable: done.
2759 | lbz TMP0, TAB:TMP2->nomm
2760 | andi. TMP0, TMP0, 1<<MM_index
2761 | bne <3 // 'no __index' flag set: done.
2762 | b ->vmeta_tgets
2763 break;
2764 case BC_TGETB:
2765 | // RA = dst*8, RB = table*8, RC = index*8
2766 | evlddx TAB:RB, BASE, RB
2767 | srwi TMP0, RC, 3
2768 | checktab TAB:RB
2769 | checkfail ->vmeta_tgetb
2770 | lwz TMP1, TAB:RB->asize
2771 | lwz TMP2, TAB:RB->array
2772 | cmplw TMP0, TMP1
2773 | bge ->vmeta_tgetb
2774 | evlddx TMP1, TMP2, RC
2775 | checknil TMP1
2776 | checkok >5
2777 |1:
2778 | ins_next1
2779 | evstddx TMP1, BASE, RA
2780 | ins_next2
2781 |
2782 |5: // Check for __index if table value is nil.
2783 | lwz TAB:TMP2, TAB:RB->metatable
2784 | cmplwi TAB:TMP2, 0
2785 | beq <1 // No metatable: done.
2786 | lbz TMP2, TAB:TMP2->nomm
2787 | andi. TMP2, TMP2, 1<<MM_index
2788 | bne <1 // 'no __index' flag set: done.
2789 | b ->vmeta_tgetb // Caveat: preserve TMP0!
2790 break;
2791
2792 case BC_TSETV:
2793 | // RA = src*8, RB = table*8, RC = key*8
2794 | evlddx TAB:RB, BASE, RB
2795 | evlddx RC, BASE, RC
2796 | checktab TAB:RB
2797 | checkfail ->vmeta_tsetv
2798 | checknum RC
2799 | checkfail >5
2800 | // Convert number key to integer
2801 | efdctsi TMP2, RC
2802 | evlddx SAVE0, BASE, RA
2803 | lwz TMP0, TAB:RB->asize
2804 | efdcfsi TMP1, TMP2
2805 | cmplw cr0, TMP0, TMP2
2806 | efdcmpeq cr1, RC, TMP1
2807 | lwz TMP1, TAB:RB->array
2808 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+gt
2809 | slwi TMP0, TMP2, 3
2810 | ble ->vmeta_tsetv // Integer key and in array part?
2811 | lbz TMP3, TAB:RB->marked
2812 | evlddx TMP2, TMP1, TMP0
2813 | checknil TMP2
2814 | checkok >3
2815 |1:
2816 | andi. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
2817 | evstddx SAVE0, TMP1, TMP0
2818 | bne >7
2819 |2:
2820 | ins_next
2821 |
2822 |3: // Check for __newindex if previous value is nil.
2823 | lwz TAB:TMP2, TAB:RB->metatable
2824 | cmplwi TAB:TMP2, 0
2825 | beq <1 // No metatable: done.
2826 | lbz TMP2, TAB:TMP2->nomm
2827 | andi. TMP2, TMP2, 1<<MM_newindex
2828 | bne <1 // 'no __newindex' flag set: done.
2829 | b ->vmeta_tsetv
2830 |
2831 |5:
2832 | checkstr STR:RC // String key?
2833 | checkok ->BC_TSETS_Z
2834 | b ->vmeta_tsetv
2835 |
2836 |7: // Possible table write barrier for the value. Skip valiswhite check.
2837 | barrierback TAB:RB, TMP3, TMP0
2838 | b <2
2839 break;
2840 case BC_TSETS:
2841 | // RA = src*8, RB = table*8, RC = str_const*8 (~)
2842 | evlddx TAB:RB, BASE, RB
2843 | srwi TMP1, RC, 1
2844 | checktab TAB:RB
2845 | subfic TMP1, TMP1, -4
2846 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
2847 | checkfail ->vmeta_tsets1
2848 |->BC_TSETS_Z:
2849 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8
2850 | lwz TMP0, TAB:RB->hmask
2851 | lwz TMP1, STR:RC->hash
2852 | lwz NODE:TMP2, TAB:RB->node
2853 | evmergelo STR:RC, TISSTR, STR:RC
2854 | stb ZERO, TAB:RB->nomm // Clear metamethod cache.
2855 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
2856 | evlddx SAVE0, BASE, RA
2857 | slwi TMP0, TMP1, 5
2858 | slwi TMP1, TMP1, 3
2859 | sub TMP1, TMP0, TMP1
2860 | lbz TMP3, TAB:RB->marked
2861 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
2862 |1:
2863 | evldd TMP0, NODE:TMP2->key
2864 | evldd TMP1, NODE:TMP2->val
2865 | evcmpeq TMP0, STR:RC
2866 | checkanyfail >5
2867 | checknil TMP1
2868 | checkok >4 // Key found, but nil value?
2869 |2:
2870 | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
2871 | evstdd SAVE0, NODE:TMP2->val
2872 | bne >7
2873 |3:
2874 | ins_next
2875 |
2876 |4: // Check for __newindex if previous value is nil.
2877 | lwz TAB:TMP1, TAB:RB->metatable
2878 | cmplwi TAB:TMP1, 0
2879 | beq <2 // No metatable: done.
2880 | lbz TMP0, TAB:TMP1->nomm
2881 | andi. TMP0, TMP0, 1<<MM_newindex
2882 | bne <2 // 'no __newindex' flag set: done.
2883 | b ->vmeta_tsets
2884 |
2885 |5: // Follow hash chain.
2886 | lwz NODE:TMP2, NODE:TMP2->next
2887 | cmplwi NODE:TMP2, 0
2888 | bne <1
2889 | // End of hash chain: key not found, add a new one.
2890 |
2891 | // But check for __newindex first.
2892 | lwz TAB:TMP1, TAB:RB->metatable
2893 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
2894 | stw PC, SAVE_PC
2895 | mr CARG1, L
2896 | cmplwi TAB:TMP1, 0
2897 | stw BASE, L->base
2898 | beq >6 // No metatable: continue.
2899 | lbz TMP0, TAB:TMP1->nomm
2900 | andi. TMP0, TMP0, 1<<MM_newindex
2901 | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check.
2902 |6:
2903 | mr CARG2, TAB:RB
2904 | evstdd STR:RC, 0(CARG3)
2905 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
2906 | // Returns TValue *.
2907 | lwz BASE, L->base
2908 | evstdd SAVE0, 0(CRET1)
2909 | b <3 // No 2nd write barrier needed.
2910 |
2911 |7: // Possible table write barrier for the value. Skip valiswhite check.
2912 | barrierback TAB:RB, TMP3, TMP0
2913 | b <3
2914 break;
2915 case BC_TSETB:
2916 | // RA = src*8, RB = table*8, RC = index*8
2917 | evlddx TAB:RB, BASE, RB
2918 | srwi TMP0, RC, 3
2919 | checktab TAB:RB
2920 | checkfail ->vmeta_tsetb
2921 | lwz TMP1, TAB:RB->asize
2922 | lwz TMP2, TAB:RB->array
2923 | lbz TMP3, TAB:RB->marked
2924 | cmplw TMP0, TMP1
2925 | evlddx SAVE0, BASE, RA
2926 | bge ->vmeta_tsetb
2927 | evlddx TMP1, TMP2, RC
2928 | checknil TMP1
2929 | checkok >5
2930 |1:
2931 | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
2932 | evstddx SAVE0, TMP2, RC
2933 | bne >7
2934 |2:
2935 | ins_next
2936 |
2937 |5: // Check for __newindex if previous value is nil.
2938 | lwz TAB:TMP1, TAB:RB->metatable
2939 | cmplwi TAB:TMP1, 0
2940 | beq <1 // No metatable: done.
2941 | lbz TMP1, TAB:TMP1->nomm
2942 | andi. TMP1, TMP1, 1<<MM_newindex
2943 | bne <1 // 'no __newindex' flag set: done.
2944 | b ->vmeta_tsetb // Caveat: preserve TMP0!
2945 |
2946 |7: // Possible table write barrier for the value. Skip valiswhite check.
2947 | barrierback TAB:RB, TMP3, TMP0
2948 | b <2
2949 break;
2950
2951 case BC_TSETM:
2952 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
2953 | add RA, BASE, RA
2954 |1:
2955 | add TMP3, KBASE, RD
2956 | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table.
2957 | addic. TMP0, MULTRES, -8
2958 | lwz TMP3, 4(TMP3) // Integer constant is in lo-word.
2959 | srwi CARG3, TMP0, 3
2960 | beq >4 // Nothing to copy?
2961 | add CARG3, CARG3, TMP3
2962 | lwz TMP2, TAB:CARG2->asize
2963 | slwi TMP1, TMP3, 3
2964 | lbz TMP3, TAB:CARG2->marked
2965 | cmplw CARG3, TMP2
2966 | add TMP2, RA, TMP0
2967 | lwz TMP0, TAB:CARG2->array
2968 | bgt >5
2969 | add TMP1, TMP1, TMP0
2970 | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
2971 |3: // Copy result slots to table.
2972 | evldd TMP0, 0(RA)
2973 | addi RA, RA, 8
2974 | cmpw cr1, RA, TMP2
2975 | evstdd TMP0, 0(TMP1)
2976 | addi TMP1, TMP1, 8
2977 | blt cr1, <3
2978 | bne >7
2979 |4:
2980 | ins_next
2981 |
2982 |5: // Need to resize array part.
2983 | stw BASE, L->base
2984 | mr CARG1, L
2985 | stw PC, SAVE_PC
2986 | mr SAVE0, RD
2987 | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
2988 | // Must not reallocate the stack.
2989 | mr RD, SAVE0
2990 | b <1
2991 |
2992 |7: // Possible table write barrier for any value. Skip valiswhite check.
2993 | barrierback TAB:CARG2, TMP3, TMP0
2994 | b <4
2995 break;
2996
2997 /* -- Calls and vararg handling ----------------------------------------- */
2998
2999 case BC_CALLM:
3000 | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8
3001 | add NARGS8:RC, NARGS8:RC, MULTRES
3002 | // Fall through. Assumes BC_CALL follows.
3003 break;
3004 case BC_CALL:
3005 | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
3006 | evlddx LFUNC:RB, BASE, RA
3007 | mr TMP2, BASE
3008 | add BASE, BASE, RA
3009 | subi NARGS8:RC, NARGS8:RC, 8
3010 | checkfunc LFUNC:RB
3011 | addi BASE, BASE, 8
3012 | checkfail ->vmeta_call
3013 | ins_call
3014 break;
3015
3016 case BC_CALLMT:
3017 | // RA = base*8, (RB = 0,) RC = extra_nargs*8
3018 | add NARGS8:RC, NARGS8:RC, MULTRES
3019 | // Fall through. Assumes BC_CALLT follows.
3020 break;
3021 case BC_CALLT:
3022 | // RA = base*8, (RB = 0,) RC = (nargs+1)*8
3023 | evlddx LFUNC:RB, BASE, RA
3024 | add RA, BASE, RA
3025 | lwz TMP1, FRAME_PC(BASE)
3026 | subi NARGS8:RC, NARGS8:RC, 8
3027 | checkfunc LFUNC:RB
3028 | addi RA, RA, 8
3029 | checkfail ->vmeta_callt
3030 |->BC_CALLT_Z:
3031 | andi. TMP0, TMP1, FRAME_TYPE // Caveat: preserve cr0 until the crand.
3032 | lbz TMP3, LFUNC:RB->ffid
3033 | xori TMP2, TMP1, FRAME_VARG
3034 | cmplwi cr1, NARGS8:RC, 0
3035 | bne >7
3036 |1:
3037 | stw LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC.
3038 | li TMP2, 0
3039 | cmplwi cr7, TMP3, 1 // (> FF_C) Calling a fast function?
3040 | beq cr1, >3
3041 |2:
3042 | addi TMP3, TMP2, 8
3043 | evlddx TMP0, RA, TMP2
3044 | cmplw cr1, TMP3, NARGS8:RC
3045 | evstddx TMP0, BASE, TMP2
3046 | mr TMP2, TMP3
3047 | bne cr1, <2
3048 |3:
3049 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+gt
3050 | beq >5
3051 |4:
3052 | ins_callt
3053 |
3054 |5: // Tailcall to a fast function with a Lua frame below.
3055 | lwz INS, -4(TMP1)
3056 | decode_RA8 RA, INS
3057 | sub TMP1, BASE, RA
3058 | lwz LFUNC:TMP1, FRAME_FUNC-8(TMP1)
3059 | lwz TMP1, LFUNC:TMP1->pc
3060 | lwz KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE.
3061 | b <4
3062 |
3063 |7: // Tailcall from a vararg function.
3064 | andi. TMP0, TMP2, FRAME_TYPEP
3065 | bne <1 // Vararg frame below?
3066 | sub BASE, BASE, TMP2 // Relocate BASE down.
3067 | lwz TMP1, FRAME_PC(BASE)
3068 | andi. TMP0, TMP1, FRAME_TYPE
3069 | b <1
3070 break;
3071
3072 case BC_ITERC:
3073 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
3074 | subi RA, RA, 24 // evldd doesn't support neg. offsets.
3075 | mr TMP2, BASE
3076 | evlddx LFUNC:RB, BASE, RA
3077 | add BASE, BASE, RA
3078 | evldd TMP0, 8(BASE)
3079 | evldd TMP1, 16(BASE)
3080 | evstdd LFUNC:RB, 24(BASE) // Copy callable.
3081 | checkfunc LFUNC:RB
3082 | evstdd TMP0, 32(BASE) // Copy state.
3083 | li NARGS8:RC, 16 // Iterators get 2 arguments.
3084 | evstdd TMP1, 40(BASE) // Copy control var.
3085 | addi BASE, BASE, 32
3086 | checkfail ->vmeta_call
3087 | ins_call
3088 break;
3089
3090 case BC_ITERN:
3091 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
3092 |.if JIT
3093 | // NYI: add hotloop, record BC_ITERN.
3094 |.endif
3095 | add RA, BASE, RA
3096 | lwz TAB:RB, -12(RA)
3097 | lwz RC, -4(RA) // Get index from control var.
3098 | lwz TMP0, TAB:RB->asize
3099 | lwz TMP1, TAB:RB->array
3100 | addi PC, PC, 4
3101 |1: // Traverse array part.
3102 | cmplw RC, TMP0
3103 | slwi TMP3, RC, 3
3104 | bge >5 // Index points after array part?
3105 | evlddx TMP2, TMP1, TMP3
3106 | checknil TMP2
3107 | lwz INS, -4(PC)
3108 | checkok >4
3109 | efdcfsi TMP0, RC
3110 | addi RC, RC, 1
3111 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
3112 | evstdd TMP2, 8(RA)
3113 | decode_RD4 TMP1, INS
3114 | stw RC, -4(RA) // Update control var.
3115 | add PC, TMP1, TMP3
3116 | evstdd TMP0, 0(RA)
3117 |3:
3118 | ins_next
3119 |
3120 |4: // Skip holes in array part.
3121 | addi RC, RC, 1
3122 | b <1
3123 |
3124 |5: // Traverse hash part.
3125 | lwz TMP1, TAB:RB->hmask
3126 | sub RC, RC, TMP0
3127 | lwz TMP2, TAB:RB->node
3128 |6:
3129 | cmplw RC, TMP1 // End of iteration? Branch to ITERL+1.
3130 | slwi TMP3, RC, 5
3131 | bgt <3
3132 | slwi RB, RC, 3
3133 | sub TMP3, TMP3, RB
3134 | evlddx RB, TMP2, TMP3
3135 | add NODE:TMP3, TMP2, TMP3
3136 | checknil RB
3137 | lwz INS, -4(PC)
3138 | checkok >7
3139 | evldd TMP3, NODE:TMP3->key
3140 | addis TMP2, PC, -(BCBIAS_J*4 >> 16)
3141 | evstdd RB, 8(RA)
3142 | add RC, RC, TMP0
3143 | decode_RD4 TMP1, INS
3144 | evstdd TMP3, 0(RA)
3145 | addi RC, RC, 1
3146 | add PC, TMP1, TMP2
3147 | stw RC, -4(RA) // Update control var.
3148 | b <3
3149 |
3150 |7: // Skip holes in hash part.
3151 | addi RC, RC, 1
3152 | b <6
3153 break;
3154
3155 case BC_ISNEXT:
3156 | // RA = base*8, RD = target (points to ITERN)
3157 | add RA, BASE, RA
3158 | li TMP2, -24
3159 | evlddx CFUNC:TMP1, RA, TMP2
3160 | lwz TMP2, -16(RA)
3161 | lwz TMP3, -8(RA)
3162 | evmergehi TMP0, CFUNC:TMP1, CFUNC:TMP1
3163 | cmpwi cr0, TMP2, LJ_TTAB
3164 | cmpwi cr1, TMP0, LJ_TFUNC
3165 | cmpwi cr6, TMP3, LJ_TNIL
3166 | bne cr1, >5
3167 | lbz TMP1, CFUNC:TMP1->ffid
3168 | crand 4*cr0+eq, 4*cr0+eq, 4*cr6+eq
3169 | cmpwi cr7, TMP1, FF_next_N
3170 | srwi TMP0, RD, 1
3171 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
3172 | add TMP3, PC, TMP0
3173 | bne cr0, >5
3174 | lus TMP1, 0xfffe
3175 | ori TMP1, TMP1, 0x7fff
3176 | stw ZERO, -4(RA) // Initialize control var.
3177 | stw TMP1, -8(RA)
3178 | addis PC, TMP3, -(BCBIAS_J*4 >> 16)
3179 |1:
3180 | ins_next
3181 |5: // Despecialize bytecode if any of the checks fail.
3182 | li TMP0, BC_JMP
3183 | li TMP1, BC_ITERC
3184 | stb TMP0, -1(PC)
3185 | addis PC, TMP3, -(BCBIAS_J*4 >> 16)
3186 | stb TMP1, 3(PC)
3187 | b <1
3188 break;
3189
3190 case BC_VARG:
3191 | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8
3192 | lwz TMP0, FRAME_PC(BASE)
3193 | add RC, BASE, RC
3194 | add RA, BASE, RA
3195 | addi RC, RC, FRAME_VARG
3196 | add TMP2, RA, RB
3197 | subi TMP3, BASE, 8 // TMP3 = vtop
3198 | sub RC, RC, TMP0 // RC = vbase
3199 | // Note: RC may now be even _above_ BASE if nargs was < numparams.
3200 | cmplwi cr1, RB, 0
3201 | sub. TMP1, TMP3, RC
3202 | beq cr1, >5 // Copy all varargs?
3203 | subi TMP2, TMP2, 16
3204 | ble >2 // No vararg slots?
3205 |1: // Copy vararg slots to destination slots.
3206 | evldd TMP0, 0(RC)
3207 | addi RC, RC, 8
3208 | evstdd TMP0, 0(RA)
3209 | cmplw RA, TMP2
3210 | cmplw cr1, RC, TMP3
3211 | bge >3 // All destination slots filled?
3212 | addi RA, RA, 8
3213 | blt cr1, <1 // More vararg slots?
3214 |2: // Fill up remainder with nil.
3215 | evstdd TISNIL, 0(RA)
3216 | cmplw RA, TMP2
3217 | addi RA, RA, 8
3218 | blt <2
3219 |3:
3220 | ins_next
3221 |
3222 |5: // Copy all varargs.
3223 | lwz TMP0, L->maxstack
3224 | li MULTRES, 8 // MULTRES = (0+1)*8
3225 | ble <3 // No vararg slots?
3226 | add TMP2, RA, TMP1
3227 | cmplw TMP2, TMP0
3228 | addi MULTRES, TMP1, 8
3229 | bgt >7
3230 |6:
3231 | evldd TMP0, 0(RC)
3232 | addi RC, RC, 8
3233 | evstdd TMP0, 0(RA)
3234 | cmplw RC, TMP3
3235 | addi RA, RA, 8
3236 | blt <6 // More vararg slots?
3237 | b <3
3238 |
3239 |7: // Grow stack for varargs.
3240 | mr CARG1, L
3241 | stw RA, L->top
3242 | sub SAVE0, RC, BASE // Need delta, because BASE may change.
3243 | stw BASE, L->base
3244 | sub RA, RA, BASE
3245 | stw PC, SAVE_PC
3246 | srwi CARG2, TMP1, 3
3247 | bl extern lj_state_growstack // (lua_State *L, int n)
3248 | lwz BASE, L->base
3249 | add RA, BASE, RA
3250 | add RC, BASE, SAVE0
3251 | subi TMP3, BASE, 8
3252 | b <6
3253 break;
3254
3255 /* -- Returns ----------------------------------------------------------- */
3256
3257 case BC_RETM:
3258 | // RA = results*8, RD = extra_nresults*8
3259 | add RD, RD, MULTRES // MULTRES >= 8, so RD >= 8.
3260 | // Fall through. Assumes BC_RET follows.
3261 break;
3262
3263 case BC_RET:
3264 | // RA = results*8, RD = (nresults+1)*8
3265 | lwz PC, FRAME_PC(BASE)
3266 | add RA, BASE, RA
3267 | mr MULTRES, RD
3268 |1:
3269 | andi. TMP0, PC, FRAME_TYPE
3270 | xori TMP1, PC, FRAME_VARG
3271 | bne ->BC_RETV_Z
3272 |
3273 |->BC_RET_Z:
3274 | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return
3275 | lwz INS, -4(PC)
3276 | cmpwi RD, 8
3277 | subi TMP2, BASE, 8
3278 | subi RC, RD, 8
3279 | decode_RB8 RB, INS
3280 | beq >3
3281 | li TMP1, 0
3282 |2:
3283 | addi TMP3, TMP1, 8
3284 | evlddx TMP0, RA, TMP1
3285 | cmpw TMP3, RC
3286 | evstddx TMP0, TMP2, TMP1
3287 | beq >3
3288 | addi TMP1, TMP3, 8
3289 | evlddx TMP0, RA, TMP3
3290 | cmpw TMP1, RC
3291 | evstddx TMP0, TMP2, TMP3
3292 | bne <2
3293 |3:
3294 |5:
3295 | cmplw RB, RD
3296 | decode_RA8 RA, INS
3297 | bgt >6
3298 | sub BASE, TMP2, RA
3299 | lwz LFUNC:TMP1, FRAME_FUNC(BASE)
3300 | ins_next1
3301 | lwz TMP1, LFUNC:TMP1->pc
3302 | lwz KBASE, PC2PROTO(k)(TMP1)
3303 | ins_next2
3304 |
3305 |6: // Fill up results with nil.
3306 | subi TMP1, RD, 8
3307 | addi RD, RD, 8
3308 | evstddx TISNIL, TMP2, TMP1
3309 | b <5
3310 |
3311 |->BC_RETV_Z: // Non-standard return case.
3312 | andi. TMP2, TMP1, FRAME_TYPEP
3313 | bne ->vm_return
3314 | // Return from vararg function: relocate BASE down.
3315 | sub BASE, BASE, TMP1
3316 | lwz PC, FRAME_PC(BASE)
3317 | b <1
3318 break;
3319
3320 case BC_RET0: case BC_RET1:
3321 | // RA = results*8, RD = (nresults+1)*8
3322 | lwz PC, FRAME_PC(BASE)
3323 | add RA, BASE, RA
3324 | mr MULTRES, RD
3325 | andi. TMP0, PC, FRAME_TYPE
3326 | xori TMP1, PC, FRAME_VARG
3327 | bne ->BC_RETV_Z
3328 |
3329 | lwz INS, -4(PC)
3330 | subi TMP2, BASE, 8
3331 | decode_RB8 RB, INS
3332 if (op == BC_RET1) {
3333 | evldd TMP0, 0(RA)
3334 | evstdd TMP0, 0(TMP2)
3335 }
3336 |5:
3337 | cmplw RB, RD
3338 | decode_RA8 RA, INS
3339 | bgt >6
3340 | sub BASE, TMP2, RA
3341 | lwz LFUNC:TMP1, FRAME_FUNC(BASE)
3342 | ins_next1
3343 | lwz TMP1, LFUNC:TMP1->pc
3344 | lwz KBASE, PC2PROTO(k)(TMP1)
3345 | ins_next2
3346 |
3347 |6: // Fill up results with nil.
3348 | subi TMP1, RD, 8
3349 | addi RD, RD, 8
3350 | evstddx TISNIL, TMP2, TMP1
3351 | b <5
3352 break;
3353
3354 /* -- Loops and branches ------------------------------------------------ */
3355
3356 case BC_FORL:
3357 |.if JIT
3358 | hotloop
3359 |.endif
3360 | // Fall through. Assumes BC_IFORL follows.
3361 break;
3362
3363 case BC_JFORI:
3364 case BC_JFORL:
3365#if !LJ_HASJIT
3366 break;
3367#endif
3368 case BC_FORI:
3369 case BC_IFORL:
3370 | // RA = base*8, RD = target (after end of loop or start of loop)
3371 vk = (op == BC_IFORL || op == BC_JFORL);
3372 | add RA, BASE, RA
3373 | evldd TMP1, FORL_IDX*8(RA)
3374 | evldd TMP3, FORL_STEP*8(RA)
3375 | evldd TMP2, FORL_STOP*8(RA)
3376 if (!vk) {
3377 | evcmpgtu cr0, TMP1, TISNUM
3378 | evcmpgtu cr7, TMP3, TISNUM
3379 | evcmpgtu cr1, TMP2, TISNUM
3380 | cror 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
3381 | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3382 | blt ->vmeta_for
3383 }
3384 if (vk) {
3385 | efdadd TMP1, TMP1, TMP3
3386 | evstdd TMP1, FORL_IDX*8(RA)
3387 }
3388 | evcmpgts TMP3, TISNIL
3389 | evstdd TMP1, FORL_EXT*8(RA)
3390 | bge >2
3391 | efdcmpgt TMP1, TMP2
3392 |1:
3393 if (op != BC_JFORL) {
3394 | srwi RD, RD, 1
3395 | add RD, PC, RD
3396 if (op == BC_JFORI) {
3397 | addis PC, RD, -(BCBIAS_J*4 >> 16)
3398 } else {
3399 | addis RD, RD, -(BCBIAS_J*4 >> 16)
3400 }
3401 }
3402 if (op == BC_FORI) {
3403 | iselgt PC, RD, PC
3404 } else if (op == BC_IFORL) {
3405 | iselgt PC, PC, RD
3406 } else {
3407 | ble =>BC_JLOOP
3408 }
3409 | ins_next
3410 |2:
3411 | efdcmpgt TMP2, TMP1
3412 | b <1
3413 break;
3414
3415 case BC_ITERL:
3416 |.if JIT
3417 | hotloop
3418 |.endif
3419 | // Fall through. Assumes BC_IITERL follows.
3420 break;
3421
3422 case BC_JITERL:
3423#if !LJ_HASJIT
3424 break;
3425#endif
3426 case BC_IITERL:
3427 | // RA = base*8, RD = target
3428 | evlddx TMP1, BASE, RA
3429 | subi RA, RA, 8
3430 | checknil TMP1
3431 | checkok >1 // Stop if iterator returned nil.
3432 if (op == BC_JITERL) {
3433 | NYI
3434 } else {
3435 | branch_RD // Otherwise save control var + branch.
3436 | evstddx TMP1, BASE, RA
3437 }
3438 |1:
3439 | ins_next
3440 break;
3441
3442 case BC_LOOP:
3443 | // RA = base*8, RD = target (loop extent)
3444 | // Note: RA/RD is only used by trace recorder to determine scope/extent
3445 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
3446 |.if JIT
3447 | hotloop
3448 |.endif
3449 | // Fall through. Assumes BC_ILOOP follows.
3450 break;
3451
3452 case BC_ILOOP:
3453 | // RA = base*8, RD = target (loop extent)
3454 | ins_next
3455 break;
3456
3457 case BC_JLOOP:
3458 |.if JIT
3459 | NYI
3460 |.endif
3461 break;
3462
3463 case BC_JMP:
3464 | // RA = base*8 (only used by trace recorder), RD = target
3465 | branch_RD
3466 | ins_next
3467 break;
3468
3469 /* -- Function headers -------------------------------------------------- */
3470
3471 case BC_FUNCF:
3472 |.if JIT
3473 | hotcall
3474 |.endif
3475 case BC_FUNCV: /* NYI: compiled vararg functions. */
3476 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
3477 break;
3478
3479 case BC_JFUNCF:
3480#if !LJ_HASJIT
3481 break;
3482#endif
3483 case BC_IFUNCF:
3484 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
3485 | lwz TMP2, L->maxstack
3486 | lbz TMP1, -4+PC2PROTO(numparams)(PC)
3487 | lwz KBASE, -4+PC2PROTO(k)(PC)
3488 | cmplw RA, TMP2
3489 | slwi TMP1, TMP1, 3
3490 | bgt ->vm_growstack_l
3491 | ins_next1
3492 |2:
3493 | cmplw NARGS8:RC, TMP1 // Check for missing parameters.
3494 | ble >3
3495 if (op == BC_JFUNCF) {
3496 | NYI
3497 } else {
3498 | ins_next2
3499 }
3500 |
3501 |3: // Clear missing parameters.
3502 | evstddx TISNIL, BASE, NARGS8:RC
3503 | addi NARGS8:RC, NARGS8:RC, 8
3504 | b <2
3505 break;
3506
3507 case BC_JFUNCV:
3508#if !LJ_HASJIT
3509 break;
3510#endif
3511 | NYI // NYI: compiled vararg functions
3512 break; /* NYI: compiled vararg functions. */
3513
3514 case BC_IFUNCV:
3515 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
3516 | lwz TMP2, L->maxstack
3517 | add TMP1, BASE, RC
3518 | add TMP0, RA, RC
3519 | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC.
3520 | addi TMP3, RC, 8+FRAME_VARG
3521 | lwz KBASE, -4+PC2PROTO(k)(PC)
3522 | cmplw TMP0, TMP2
3523 | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG.
3524 | bge ->vm_growstack_l
3525 | lbz TMP2, -4+PC2PROTO(numparams)(PC)
3526 | mr RA, BASE
3527 | mr RC, TMP1
3528 | ins_next1
3529 | cmpwi TMP2, 0
3530 | addi BASE, TMP1, 8
3531 | beq >3
3532 |1:
3533 | cmplw RA, RC // Less args than parameters?
3534 | evldd TMP0, 0(RA)
3535 | bge >4
3536 | evstdd TISNIL, 0(RA) // Clear old fixarg slot (help the GC).
3537 | addi RA, RA, 8
3538 |2:
3539 | addic. TMP2, TMP2, -1
3540 | evstdd TMP0, 8(TMP1)
3541 | addi TMP1, TMP1, 8
3542 | bne <1
3543 |3:
3544 | ins_next2
3545 |
3546 |4: // Clear missing parameters.
3547 | evmr TMP0, TISNIL
3548 | b <2
3549 break;
3550
3551 case BC_FUNCC:
3552 case BC_FUNCCW:
3553 | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
3554 if (op == BC_FUNCC) {
3555 | lwz TMP3, CFUNC:RB->f
3556 } else {
3557 | lwz TMP3, DISPATCH_GL(wrapf)(DISPATCH)
3558 }
3559 | add TMP1, RA, NARGS8:RC
3560 | lwz TMP2, L->maxstack
3561 | add RC, BASE, NARGS8:RC
3562 | stw BASE, L->base
3563 | cmplw TMP1, TMP2
3564 | stw RC, L->top
3565 | li_vmstate C
3566 | mtctr TMP3
3567 if (op == BC_FUNCCW) {
3568 | lwz CARG2, CFUNC:RB->f
3569 }
3570 | mr CARG1, L
3571 | bgt ->vm_growstack_c // Need to grow stack.
3572 | st_vmstate
3573 | bctrl // (lua_State *L [, lua_CFunction f])
3574 | // Returns nresults.
3575 | lwz TMP1, L->top
3576 | slwi RD, CRET1, 3
3577 | lwz BASE, L->base
3578 | li_vmstate INTERP
3579 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller.
3580 | sub RA, TMP1, RD // RA = L->top - nresults*8
3581 | st_vmstate
3582 | b ->vm_returnc
3583 break;
3584
3585 /* ---------------------------------------------------------------------- */
3586
3587 default:
3588 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
3589 exit(2);
3590 break;
3591 }
3592}
3593
3594static int build_backend(BuildCtx *ctx)
3595{
3596 int op;
3597
3598 dasm_growpc(Dst, BC__MAX);
3599
3600 build_subroutines(ctx);
3601
3602 |.code_op
3603 for (op = 0; op < BC__MAX; op++)
3604 build_ins(ctx, (BCOp)op, op);
3605
3606 return BC__MAX;
3607}
3608
3609/* Emit pseudo frame-info for all assembler functions. */
3610static void emit_asm_debug(BuildCtx *ctx)
3611{
3612 int i;
3613 switch (ctx->mode) {
3614 case BUILD_elfasm:
3615 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
3616 fprintf(ctx->fp,
3617 ".Lframe0:\n"
3618 "\t.long .LECIE0-.LSCIE0\n"
3619 ".LSCIE0:\n"
3620 "\t.long 0xffffffff\n"
3621 "\t.byte 0x1\n"
3622 "\t.string \"\"\n"
3623 "\t.uleb128 0x1\n"
3624 "\t.sleb128 -4\n"
3625 "\t.byte 65\n"
3626 "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
3627 "\t.align 2\n"
3628 ".LECIE0:\n\n");
3629 fprintf(ctx->fp,
3630 ".LSFDE0:\n"
3631 "\t.long .LEFDE0-.LASFDE0\n"
3632 ".LASFDE0:\n"
3633 "\t.long .Lframe0\n"
3634 "\t.long .Lbegin\n"
3635 "\t.long %d\n"
3636 "\t.byte 0xe\n\t.uleb128 %d\n"
3637 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
3638 "\t.byte 0x5\n\t.uleb128 70\n\t.sleb128 37\n",
3639 (int)ctx->codesz, CFRAME_SIZE);
3640 for (i = 14; i <= 31; i++)
3641 fprintf(ctx->fp,
3642 "\t.byte %d\n\t.uleb128 %d\n"
3643 "\t.byte 5\n\t.uleb128 %d\n\t.uleb128 %d\n",
3644 0x80+i, 1+2*(31-i), 1200+i, 2+2*(31-i));
3645 fprintf(ctx->fp,
3646 "\t.align 2\n"
3647 ".LEFDE0:\n\n");
3648 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
3649 fprintf(ctx->fp,
3650 ".Lframe1:\n"
3651 "\t.long .LECIE1-.LSCIE1\n"
3652 ".LSCIE1:\n"
3653 "\t.long 0\n"
3654 "\t.byte 0x1\n"
3655 "\t.string \"zPR\"\n"
3656 "\t.uleb128 0x1\n"
3657 "\t.sleb128 -4\n"
3658 "\t.byte 65\n"
3659 "\t.uleb128 6\n" /* augmentation length */
3660 "\t.byte 0x1b\n" /* pcrel|sdata4 */
3661 "\t.long lj_err_unwind_dwarf-.\n"
3662 "\t.byte 0x1b\n" /* pcrel|sdata4 */
3663 "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
3664 "\t.align 2\n"
3665 ".LECIE1:\n\n");
3666 fprintf(ctx->fp,
3667 ".LSFDE1:\n"
3668 "\t.long .LEFDE1-.LASFDE1\n"
3669 ".LASFDE1:\n"
3670 "\t.long .LASFDE1-.Lframe1\n"
3671 "\t.long .Lbegin-.\n"
3672 "\t.long %d\n"
3673 "\t.uleb128 0\n" /* augmentation length */
3674 "\t.byte 0xe\n\t.uleb128 %d\n"
3675 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
3676 "\t.byte 0x5\n\t.uleb128 70\n\t.sleb128 37\n",
3677 (int)ctx->codesz, CFRAME_SIZE);
3678 for (i = 14; i <= 31; i++)
3679 fprintf(ctx->fp,
3680 "\t.byte %d\n\t.uleb128 %d\n"
3681 "\t.byte 5\n\t.uleb128 %d\n\t.uleb128 %d\n",
3682 0x80+i, 1+2*(31-i), 1200+i, 2+2*(31-i));
3683 fprintf(ctx->fp,
3684 "\t.align 2\n"
3685 ".LEFDE1:\n\n");
3686 break;
3687 default:
3688 break;
3689 }
3690}
3691
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
new file mode 100644
index 00000000..76ce071d
--- /dev/null
+++ b/src/vm_x64.dasc
@@ -0,0 +1,4909 @@
1|// Low-level VM code for x64 CPUs in LJ_GC64 mode.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4|
5|.arch x64
6|.section code_op, code_sub
7|
8|.actionlist build_actionlist
9|.globals GLOB_
10|.globalnames globnames
11|.externnames extnames
12|
13|//-----------------------------------------------------------------------
14|
15|.if WIN
16|.define X64WIN, 1 // Windows/x64 calling conventions.
17|.endif
18|
19|// Fixed register assignments for the interpreter.
20|// This is very fragile and has many dependencies. Caveat emptor.
21|.define BASE, rdx // Not C callee-save, refetched anyway.
22|.if X64WIN
23|.define KBASE, rdi // Must be C callee-save.
24|.define PC, rsi // Must be C callee-save.
25|.define DISPATCH, rbx // Must be C callee-save.
26|.define KBASEd, edi
27|.define PCd, esi
28|.define DISPATCHd, ebx
29|.else
30|.define KBASE, r15 // Must be C callee-save.
31|.define PC, rbx // Must be C callee-save.
32|.define DISPATCH, r14 // Must be C callee-save.
33|.define KBASEd, r15d
34|.define PCd, ebx
35|.define DISPATCHd, r14d
36|.endif
37|
38|.define RA, rcx
39|.define RAd, ecx
40|.define RAH, ch
41|.define RAL, cl
42|.define RB, rbp // Must be rbp (C callee-save).
43|.define RBd, ebp
44|.define RC, rax // Must be rax.
45|.define RCd, eax
46|.define RCW, ax
47|.define RCH, ah
48|.define RCL, al
49|.define OP, RBd
50|.define RD, RC
51|.define RDd, RCd
52|.define RDW, RCW
53|.define RDL, RCL
54|.define TMPR, r10
55|.define TMPRd, r10d
56|.define ITYPE, r11
57|.define ITYPEd, r11d
58|
59|.if X64WIN
60|.define CARG1, rcx // x64/WIN64 C call arguments.
61|.define CARG2, rdx
62|.define CARG3, r8
63|.define CARG4, r9
64|.define CARG1d, ecx
65|.define CARG2d, edx
66|.define CARG3d, r8d
67|.define CARG4d, r9d
68|.else
69|.define CARG1, rdi // x64/POSIX C call arguments.
70|.define CARG2, rsi
71|.define CARG3, rdx
72|.define CARG4, rcx
73|.define CARG5, r8
74|.define CARG6, r9
75|.define CARG1d, edi
76|.define CARG2d, esi
77|.define CARG3d, edx
78|.define CARG4d, ecx
79|.define CARG5d, r8d
80|.define CARG6d, r9d
81|.endif
82|
83|// Type definitions. Some of these are only used for documentation.
84|.type L, lua_State
85|.type GL, global_State
86|.type TVALUE, TValue
87|.type GCOBJ, GCobj
88|.type STR, GCstr
89|.type TAB, GCtab
90|.type LFUNC, GCfuncL
91|.type CFUNC, GCfuncC
92|.type PROTO, GCproto
93|.type UPVAL, GCupval
94|.type NODE, Node
95|.type NARGS, int
96|.type TRACE, GCtrace
97|.type SBUF, SBuf
98|
99|// Stack layout while in interpreter. Must match with lj_frame.h.
100|//-----------------------------------------------------------------------
101|.if X64WIN // x64/Windows stack layout
102|
103|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
104|.macro saveregs_
105| push rdi; push rsi; push rbx
106| sub rsp, CFRAME_SPACE
107|.endmacro
108|.macro saveregs
109| push rbp; saveregs_
110|.endmacro
111|.macro restoreregs
112| add rsp, CFRAME_SPACE
113| pop rbx; pop rsi; pop rdi; pop rbp
114|.endmacro
115|
116|.define SAVE_CFRAME, aword [rsp+aword*13]
117|.define SAVE_PC, aword [rsp+aword*12]
118|.define SAVE_L, aword [rsp+aword*11]
119|.define SAVE_ERRF, dword [rsp+dword*21]
120|.define SAVE_NRES, dword [rsp+dword*20]
121|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
122|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
123|.define SAVE_R4, aword [rsp+aword*8]
124|.define SAVE_R3, aword [rsp+aword*7]
125|.define SAVE_R2, aword [rsp+aword*6]
126|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
127|.define ARG5, aword [rsp+aword*4]
128|.define CSAVE_4, aword [rsp+aword*3]
129|.define CSAVE_3, aword [rsp+aword*2]
130|.define CSAVE_2, aword [rsp+aword*1]
131|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
132|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
133|
134|.define ARG5d, dword [rsp+dword*8]
135|.define TMP1, ARG5 // TMP1 overlaps ARG5
136|.define TMP1d, ARG5d
137|.define TMP1hi, dword [rsp+dword*9]
138|.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
139|
140|//-----------------------------------------------------------------------
141|.else // x64/POSIX stack layout
142|
143|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
144|.macro saveregs_
145| push rbx; push r15; push r14
146|.if NO_UNWIND
147| push r13; push r12
148|.endif
149| sub rsp, CFRAME_SPACE
150|.endmacro
151|.macro saveregs
152| push rbp; saveregs_
153|.endmacro
154|.macro restoreregs
155| add rsp, CFRAME_SPACE
156|.if NO_UNWIND
157| pop r12; pop r13
158|.endif
159| pop r14; pop r15; pop rbx; pop rbp
160|.endmacro
161|
162|//----- 16 byte aligned,
163|.if NO_UNWIND
164|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
165|.define SAVE_R4, aword [rsp+aword*10]
166|.define SAVE_R3, aword [rsp+aword*9]
167|.define SAVE_R2, aword [rsp+aword*8]
168|.define SAVE_R1, aword [rsp+aword*7]
169|.define SAVE_RU2, aword [rsp+aword*6]
170|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
171|.else
172|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
173|.define SAVE_R4, aword [rsp+aword*8]
174|.define SAVE_R3, aword [rsp+aword*7]
175|.define SAVE_R2, aword [rsp+aword*6]
176|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
177|.endif
178|.define SAVE_CFRAME, aword [rsp+aword*4]
179|.define SAVE_PC, aword [rsp+aword*3]
180|.define SAVE_L, aword [rsp+aword*2]
181|.define SAVE_ERRF, dword [rsp+dword*3]
182|.define SAVE_NRES, dword [rsp+dword*2]
183|.define TMP1, aword [rsp] //<-- rsp while in interpreter.
184|//----- 16 byte aligned
185|
186|.define TMP1d, dword [rsp]
187|.define TMP1hi, dword [rsp+dword*1]
188|.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
189|
190|.endif
191|
192|//-----------------------------------------------------------------------
193|
194|// Instruction headers.
195|.macro ins_A; .endmacro
196|.macro ins_AD; .endmacro
197|.macro ins_AJ; .endmacro
198|.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro
199|.macro ins_AB_; movzx RBd, RCH; .endmacro
200|.macro ins_A_C; movzx RCd, RCL; .endmacro
201|.macro ins_AND; not RD; .endmacro
202|
203|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
204|.macro ins_NEXT
205| mov RCd, [PC]
206| movzx RAd, RCH
207| movzx OP, RCL
208| add PC, 4
209| shr RCd, 16
210| jmp aword [DISPATCH+OP*8]
211|.endmacro
212|
213|// Instruction footer.
214|.if 1
215| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
216| .define ins_next, ins_NEXT
217| .define ins_next_, ins_NEXT
218|.else
219| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
220| // Affects only certain kinds of benchmarks (and only with -j off).
221| // Around 10%-30% slower on Core2, a lot more slower on P4.
222| .macro ins_next
223| jmp ->ins_next
224| .endmacro
225| .macro ins_next_
226| ->ins_next:
227| ins_NEXT
228| .endmacro
229|.endif
230|
231|// Call decode and dispatch.
232|.macro ins_callt
233| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC
234| mov PC, LFUNC:RB->pc
235| mov RAd, [PC]
236| movzx OP, RAL
237| movzx RAd, RAH
238| add PC, 4
239| jmp aword [DISPATCH+OP*8]
240|.endmacro
241|
242|.macro ins_call
243| // BASE = new base, RB = LFUNC, RD = nargs+1
244| mov [BASE-8], PC
245| ins_callt
246|.endmacro
247|
248|//-----------------------------------------------------------------------
249|
250|// Macros to clear or set tags.
251|.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro
252|.macro settp, reg, tp
253| mov64 ITYPE, ((uint64_t)tp<<47)
254| or reg, ITYPE
255|.endmacro
256|.macro settp, dst, reg, tp
257| mov64 dst, ((uint64_t)tp<<47)
258| or dst, reg
259|.endmacro
260|.macro setint, reg
261| settp reg, LJ_TISNUM
262|.endmacro
263|.macro setint, dst, reg
264| settp dst, reg, LJ_TISNUM
265|.endmacro
266|
267|// Macros to test operand types.
268|.macro checktp_nc, reg, tp, target
269| mov ITYPE, reg
270| sar ITYPE, 47
271| cmp ITYPEd, tp
272| jne target
273|.endmacro
274|.macro checktp, reg, tp, target
275| mov ITYPE, reg
276| cleartp reg
277| sar ITYPE, 47
278| cmp ITYPEd, tp
279| jne target
280|.endmacro
281|.macro checktptp, src, tp, target
282| mov ITYPE, src
283| sar ITYPE, 47
284| cmp ITYPEd, tp
285| jne target
286|.endmacro
287|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
288|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
289|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
290|
291|.macro checknumx, reg, target, jump
292| mov ITYPE, reg
293| sar ITYPE, 47
294| cmp ITYPEd, LJ_TISNUM
295| jump target
296|.endmacro
297|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
298|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
299|.macro checknum, reg, target; checknumx reg, target, jae; .endmacro
300|.macro checknumtp, src, target; checknumx src, target, jae; .endmacro
301|.macro checknumber, src, target; checknumx src, target, ja; .endmacro
302|
303|.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro
304|.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro
305|
306|// These operands must be used with movzx.
307|.define PC_OP, byte [PC-4]
308|.define PC_RA, byte [PC-3]
309|.define PC_RB, byte [PC-1]
310|.define PC_RC, byte [PC-2]
311|.define PC_RD, word [PC-2]
312|
313|.macro branchPC, reg
314| lea PC, [PC+reg*4-BCBIAS_J*4]
315|.endmacro
316|
317|// Assumes DISPATCH is relative to GL.
318#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
319#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
320|
321#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
322|
323|// Decrement hashed hotcount and trigger trace recorder if zero.
324|.macro hotloop, reg
325| mov reg, PCd
326| shr reg, 1
327| and reg, HOTCOUNT_PCMASK
328| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
329| jb ->vm_hotloop
330|.endmacro
331|
332|.macro hotcall, reg
333| mov reg, PCd
334| shr reg, 1
335| and reg, HOTCOUNT_PCMASK
336| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
337| jb ->vm_hotcall
338|.endmacro
339|
340|// Set current VM state.
341|.macro set_vmstate, st
342| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
343|.endmacro
344|
345|.macro fpop1; fstp st1; .endmacro
346|
347|// Synthesize SSE FP constants.
348|.macro sseconst_abs, reg, tmp // Synthesize abs mask.
349| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
350|.endmacro
351|
352|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
353| mov64 tmp, U64x(val,00000000); movd reg, tmp
354|.endmacro
355|
356|.macro sseconst_sign, reg, tmp // Synthesize sign mask.
357| sseconst_hi reg, tmp, 80000000
358|.endmacro
359|.macro sseconst_1, reg, tmp // Synthesize 1.0.
360| sseconst_hi reg, tmp, 3ff00000
361|.endmacro
362|.macro sseconst_m1, reg, tmp // Synthesize -1.0.
363| sseconst_hi reg, tmp, bff00000
364|.endmacro
365|.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
366| sseconst_hi reg, tmp, 43300000
367|.endmacro
368|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
369| sseconst_hi reg, tmp, 43380000
370|.endmacro
371|
372|// Move table write barrier back. Overwrites reg.
373|.macro barrierback, tab, reg
374| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab)
375| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
376| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
377| mov tab->gclist, reg
378|.endmacro
379|
380|//-----------------------------------------------------------------------
381
382/* Generate subroutines used by opcodes and other parts of the VM. */
383/* The .code_sub section should be last to help static branch prediction. */
384static void build_subroutines(BuildCtx *ctx)
385{
386 |.code_sub
387 |
388 |//-----------------------------------------------------------------------
389 |//-- Return handling ----------------------------------------------------
390 |//-----------------------------------------------------------------------
391 |
392 |->vm_returnp:
393 | test PCd, FRAME_P
394 | jz ->cont_dispatch
395 |
396 | // Return from pcall or xpcall fast func.
397 | and PC, -8
398 | sub BASE, PC // Restore caller base.
399 | lea RA, [RA+PC-8] // Rebase RA and prepend one result.
400 | mov PC, [BASE-8] // Fetch PC of previous frame.
401 | // Prepending may overwrite the pcall frame, so do it at the end.
402 | mov_true ITYPE
403 | mov aword [BASE+RA], ITYPE // Prepend true to results.
404 |
405 |->vm_returnc:
406 | add RDd, 1 // RD = nresults+1
407 | jz ->vm_unwind_yield
408 | mov MULTRES, RDd
409 | test PC, FRAME_TYPE
410 | jz ->BC_RET_Z // Handle regular return to Lua.
411 |
412 |->vm_return:
413 | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
414 | xor PC, FRAME_C
415 | test PCd, FRAME_TYPE
416 | jnz ->vm_returnp
417 |
418 | // Return to C.
419 | set_vmstate C
420 | and PC, -8
421 | sub PC, BASE
422 | neg PC // Previous base = BASE - delta.
423 |
424 | sub RDd, 1
425 | jz >2
426 |1: // Move results down.
427 | mov RB, [BASE+RA]
428 | mov [BASE-16], RB
429 | add BASE, 8
430 | sub RDd, 1
431 | jnz <1
432 |2:
433 | mov L:RB, SAVE_L
434 | mov L:RB->base, PC
435 |3:
436 | mov RDd, MULTRES
437 | mov RAd, SAVE_NRES // RA = wanted nresults+1
438 |4:
439 | cmp RAd, RDd
440 | jne >6 // More/less results wanted?
441 |5:
442 | sub BASE, 16
443 | mov L:RB->top, BASE
444 |
445 |->vm_leave_cp:
446 | mov RA, SAVE_CFRAME // Restore previous C frame.
447 | mov L:RB->cframe, RA
448 | xor eax, eax // Ok return status for vm_pcall.
449 |
450 |->vm_leave_unw:
451 | restoreregs
452 | ret
453 |
454 |6:
455 | jb >7 // Less results wanted?
456 | // More results wanted. Check stack size and fill up results with nil.
457 | cmp BASE, L:RB->maxstack
458 | ja >8
459 | mov aword [BASE-16], LJ_TNIL
460 | add BASE, 8
461 | add RDd, 1
462 | jmp <4
463 |
464 |7: // Less results wanted.
465 | test RAd, RAd
466 | jz <5 // But check for LUA_MULTRET+1.
467 | sub RA, RD // Negative result!
468 | lea BASE, [BASE+RA*8] // Correct top.
469 | jmp <5
470 |
471 |8: // Corner case: need to grow stack for filling up results.
472 | // This can happen if:
473 | // - A C function grows the stack (a lot).
474 | // - The GC shrinks the stack in between.
475 | // - A return back from a lua_call() with (high) nresults adjustment.
476 | mov L:RB->top, BASE // Save current top held in BASE (yes).
477 | mov MULTRES, RDd // Need to fill only remainder with nil.
478 | mov CARG2d, RAd
479 | mov CARG1, L:RB
480 | call extern lj_state_growstack // (lua_State *L, int n)
481 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
482 | jmp <3
483 |
484 |->vm_unwind_yield:
485 | mov al, LUA_YIELD
486 | jmp ->vm_unwind_c_eh
487 |
488 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
489 | // (void *cframe, int errcode)
490 | mov eax, CARG2d // Error return status for vm_pcall.
491 | mov rsp, CARG1
492 |->vm_unwind_c_eh: // Landing pad for external unwinder.
493 | mov L:RB, SAVE_L
494 | mov GL:RB, L:RB->glref
495 | mov dword GL:RB->vmstate, ~LJ_VMST_C
496 | jmp ->vm_leave_unw
497 |
498 |->vm_unwind_rethrow:
499 |.if not X64WIN
500 | mov CARG1, SAVE_L
501 | mov CARG2d, eax
502 | restoreregs
503 | jmp extern lj_err_throw // (lua_State *L, int errcode)
504 |.endif
505 |
506 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
507 | // (void *cframe)
508 | and CARG1, CFRAME_RAWMASK
509 | mov rsp, CARG1
510 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
511 | mov L:RB, SAVE_L
512 | mov RDd, 1+1 // Really 1+2 results, incr. later.
513 | mov BASE, L:RB->base
514 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
515 | add DISPATCH, GG_G2DISP
516 | mov PC, [BASE-8] // Fetch PC of previous frame.
517 | mov_false RA
518 | mov RB, [BASE]
519 | mov [BASE-16], RA // Prepend false to error message.
520 | mov [BASE-8], RB
521 | mov RA, -16 // Results start at BASE+RA = BASE-16.
522 | set_vmstate INTERP
523 | jmp ->vm_returnc // Increments RD/MULTRES and returns.
524 |
525 |//-----------------------------------------------------------------------
526 |//-- Grow stack for calls -----------------------------------------------
527 |//-----------------------------------------------------------------------
528 |
529 |->vm_growstack_c: // Grow stack for C function.
530 | mov CARG2d, LUA_MINSTACK
531 | jmp >2
532 |
533 |->vm_growstack_v: // Grow stack for vararg Lua function.
534 | sub RD, 16 // LJ_FR2
535 | jmp >1
536 |
537 |->vm_growstack_f: // Grow stack for fixarg Lua function.
538 | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
539 | lea RD, [BASE+NARGS:RD*8-8]
540 |1:
541 | movzx RAd, byte [PC-4+PC2PROTO(framesize)]
542 | add PC, 4 // Must point after first instruction.
543 | mov L:RB->base, BASE
544 | mov L:RB->top, RD
545 | mov SAVE_PC, PC
546 | mov CARG2, RA
547 |2:
548 | // RB = L, L->base = new base, L->top = top
549 | mov CARG1, L:RB
550 | call extern lj_state_growstack // (lua_State *L, int n)
551 | mov BASE, L:RB->base
552 | mov RD, L:RB->top
553 | mov LFUNC:RB, [BASE-16]
554 | cleartp LFUNC:RB
555 | sub RD, BASE
556 | shr RDd, 3
557 | add NARGS:RDd, 1
558 | // BASE = new base, RB = LFUNC, RD = nargs+1
559 | ins_callt // Just retry the call.
560 |
561 |//-----------------------------------------------------------------------
562 |//-- Entry points into the assembler VM ---------------------------------
563 |//-----------------------------------------------------------------------
564 |
565 |->vm_resume: // Setup C frame and resume thread.
566 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
567 | saveregs
568 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
569 | mov SAVE_L, CARG1
570 | mov RA, CARG2
571 | mov PCd, FRAME_CP
572 | xor RDd, RDd
573 | lea KBASE, [esp+CFRAME_RESUME]
574 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
575 | add DISPATCH, GG_G2DISP
576 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
577 | mov SAVE_CFRAME, RD
578 | mov SAVE_NRES, RDd
579 | mov SAVE_ERRF, RDd
580 | mov L:RB->cframe, KBASE
581 | cmp byte L:RB->status, RDL
582 | je >2 // Initial resume (like a call).
583 |
584 | // Resume after yield (like a return).
585 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
586 | set_vmstate INTERP
587 | mov byte L:RB->status, RDL
588 | mov BASE, L:RB->base
589 | mov RD, L:RB->top
590 | sub RD, RA
591 | shr RDd, 3
592 | add RDd, 1 // RD = nresults+1
593 | sub RA, BASE // RA = resultofs
594 | mov PC, [BASE-8]
595 | mov MULTRES, RDd
596 | test PCd, FRAME_TYPE
597 | jz ->BC_RET_Z
598 | jmp ->vm_return
599 |
600 |->vm_pcall: // Setup protected C frame and enter VM.
601 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
602 | saveregs
603 | mov PCd, FRAME_CP
604 | mov SAVE_ERRF, CARG4d
605 | jmp >1
606 |
607 |->vm_call: // Setup C frame and enter VM.
608 | // (lua_State *L, TValue *base, int nres1)
609 | saveregs
610 | mov PCd, FRAME_C
611 |
612 |1: // Entry point for vm_pcall above (PC = ftype).
613 | mov SAVE_NRES, CARG3d
614 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
615 | mov SAVE_L, CARG1
616 | mov RA, CARG2
617 |
618 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
619 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
620 | mov SAVE_CFRAME, KBASE
621 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
622 | add DISPATCH, GG_G2DISP
623 | mov L:RB->cframe, rsp
624 |
625 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
626 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
627 | set_vmstate INTERP
628 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
629 | add PC, RA
630 | sub PC, BASE // PC = frame delta + frame type
631 |
632 | mov RD, L:RB->top
633 | sub RD, RA
634 | shr NARGS:RDd, 3
635 | add NARGS:RDd, 1 // RD = nargs+1
636 |
637 |->vm_call_dispatch:
638 | mov LFUNC:RB, [RA-16]
639 | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE.
640 |
641 |->vm_call_dispatch_f:
642 | mov BASE, RA
643 | ins_call
644 | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
645 |
646 |->vm_cpcall: // Setup protected C frame, call C.
647 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
648 | saveregs
649 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
650 | mov SAVE_L, CARG1
651 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
652 |
653 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
654 | sub KBASE, L:RB->top
655 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
656 | mov SAVE_ERRF, 0 // No error function.
657 | mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame.
658 | add DISPATCH, GG_G2DISP
659 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
660 |
661 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
662 | mov SAVE_CFRAME, KBASE
663 | mov L:RB->cframe, rsp
664 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
665 |
666 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
667 | // TValue * (new base) or NULL returned in eax (RC).
668 | test RC, RC
669 | jz ->vm_leave_cp // No base? Just remove C frame.
670 | mov RA, RC
671 | mov PCd, FRAME_CP
672 | jmp <2 // Else continue with the call.
673 |
674 |//-----------------------------------------------------------------------
675 |//-- Metamethod handling ------------------------------------------------
676 |//-----------------------------------------------------------------------
677 |
678 |//-- Continuation dispatch ----------------------------------------------
679 |
680 |->cont_dispatch:
681 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
682 | add RA, BASE
683 | and PC, -8
684 | mov RB, BASE
685 | sub BASE, PC // Restore caller BASE.
686 | mov aword [RA+RD*8-8], LJ_TNIL // Ensure one valid arg.
687 | mov RC, RA // ... in [RC]
688 | mov PC, [RB-24] // Restore PC from [cont|PC].
689 | mov RA, qword [RB-32] // May be negative on WIN64 with debug.
690 |.if FFI
691 | cmp RA, 1
692 | jbe >1
693 |.endif
694 | mov LFUNC:KBASE, [BASE-16]
695 | cleartp LFUNC:KBASE
696 | mov KBASE, LFUNC:KBASE->pc
697 | mov KBASE, [KBASE+PC2PROTO(k)]
698 | // BASE = base, RC = result, RB = meta base
699 | jmp RA // Jump to continuation.
700 |
701 |.if FFI
702 |1:
703 | je ->cont_ffi_callback // cont = 1: return from FFI callback.
704 | // cont = 0: Tail call from C function.
705 | sub RB, BASE
706 | shr RBd, 3
707 | lea RDd, [RBd-3]
708 | jmp ->vm_call_tail
709 |.endif
710 |
711 |->cont_cat: // BASE = base, RC = result, RB = mbase
712 | movzx RAd, PC_RB
713 | sub RB, 32
714 | lea RA, [BASE+RA*8]
715 | sub RA, RB
716 | je ->cont_ra
717 | neg RA
718 | shr RAd, 3
719 |.if X64WIN
720 | mov CARG3d, RAd
721 | mov L:CARG1, SAVE_L
722 | mov L:CARG1->base, BASE
723 | mov RC, [RC]
724 | mov [RB], RC
725 | mov CARG2, RB
726 |.else
727 | mov L:CARG1, SAVE_L
728 | mov L:CARG1->base, BASE
729 | mov CARG3d, RAd
730 | mov RA, [RC]
731 | mov [RB], RA
732 | mov CARG2, RB
733 |.endif
734 | jmp ->BC_CAT_Z
735 |
736 |//-- Table indexing metamethods -----------------------------------------
737 |
738 |->vmeta_tgets:
739 | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
740 | mov TMP1, STR:RC
741 | lea RC, TMP1
742 | cmp PC_OP, BC_GGET
743 | jne >1
744 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
745 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
746 | mov [RB], TAB:RA
747 | jmp >2
748 |
749 |->vmeta_tgetb:
750 | movzx RCd, PC_RC
751 |.if DUALNUM
752 | setint RC
753 | mov TMP1, RC
754 |.else
755 | cvtsi2sd xmm0, RCd
756 | movsd TMP1, xmm0
757 |.endif
758 | lea RC, TMP1
759 | jmp >1
760 |
761 |->vmeta_tgetv:
762 | movzx RCd, PC_RC // Reload TValue *k from RC.
763 | lea RC, [BASE+RC*8]
764 |1:
765 | movzx RBd, PC_RB // Reload TValue *t from RB.
766 | lea RB, [BASE+RB*8]
767 |2:
768 | mov L:CARG1, SAVE_L
769 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
770 | mov CARG2, RB
771 | mov CARG3, RC
772 | mov L:RB, L:CARG1
773 | mov SAVE_PC, PC
774 | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
775 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
776 | mov BASE, L:RB->base
777 | test RC, RC
778 | jz >3
779 |->cont_ra: // BASE = base, RC = result
780 | movzx RAd, PC_RA
781 | mov RB, [RC]
782 | mov [BASE+RA*8], RB
783 | ins_next
784 |
785 |3: // Call __index metamethod.
786 | // BASE = base, L->top = new base, stack = cont/func/t/k
787 | mov RA, L:RB->top
788 | mov [RA-24], PC // [cont|PC]
789 | lea PC, [RA+FRAME_CONT]
790 | sub PC, BASE
791 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
792 | mov NARGS:RDd, 2+1 // 2 args for func(t, k).
793 | cleartp LFUNC:RB
794 | jmp ->vm_call_dispatch_f
795 |
796 |->vmeta_tgetr:
797 | mov CARG1, TAB:RB
798 | mov RB, BASE // Save BASE.
799 | mov CARG2d, RCd // Caveat: CARG2 == BASE
800 | call extern lj_tab_getinth // (GCtab *t, int32_t key)
801 | // cTValue * or NULL returned in eax (RC).
802 | movzx RAd, PC_RA
803 | mov BASE, RB // Restore BASE.
804 | test RC, RC
805 | jnz ->BC_TGETR_Z
806 | mov ITYPE, LJ_TNIL
807 | jmp ->BC_TGETR2_Z
808 |
809 |//-----------------------------------------------------------------------
810 |
811 |->vmeta_tsets:
812 | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
813 | mov TMP1, STR:RC
814 | lea RC, TMP1
815 | cmp PC_OP, BC_GSET
816 | jne >1
817 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
818 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
819 | mov [RB], TAB:RA
820 | jmp >2
821 |
822 |->vmeta_tsetb:
823 | movzx RCd, PC_RC
824 |.if DUALNUM
825 | setint RC
826 | mov TMP1, RC
827 |.else
828 | cvtsi2sd xmm0, RCd
829 | movsd TMP1, xmm0
830 |.endif
831 | lea RC, TMP1
832 | jmp >1
833 |
834 |->vmeta_tsetv:
835 | movzx RCd, PC_RC // Reload TValue *k from RC.
836 | lea RC, [BASE+RC*8]
837 |1:
838 | movzx RBd, PC_RB // Reload TValue *t from RB.
839 | lea RB, [BASE+RB*8]
840 |2:
841 | mov L:CARG1, SAVE_L
842 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
843 | mov CARG2, RB
844 | mov CARG3, RC
845 | mov L:RB, L:CARG1
846 | mov SAVE_PC, PC
847 | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
848 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
849 | mov BASE, L:RB->base
850 | test RC, RC
851 | jz >3
852 | // NOBARRIER: lj_meta_tset ensures the table is not black.
853 | movzx RAd, PC_RA
854 | mov RB, [BASE+RA*8]
855 | mov [RC], RB
856 |->cont_nop: // BASE = base, (RC = result)
857 | ins_next
858 |
859 |3: // Call __newindex metamethod.
860 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
861 | mov RA, L:RB->top
862 | mov [RA-24], PC // [cont|PC]
863 | movzx RCd, PC_RA
864 | // Copy value to third argument.
865 | mov RB, [BASE+RC*8]
866 | mov [RA+16], RB
867 | lea PC, [RA+FRAME_CONT]
868 | sub PC, BASE
869 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
870 | mov NARGS:RDd, 3+1 // 3 args for func(t, k, v).
871 | cleartp LFUNC:RB
872 | jmp ->vm_call_dispatch_f
873 |
874 |->vmeta_tsetr:
875 |.if X64WIN
876 | mov L:CARG1, SAVE_L
877 | mov CARG3d, RCd
878 | mov L:CARG1->base, BASE
879 | xchg CARG2, TAB:RB // Caveat: CARG2 == BASE.
880 |.else
881 | mov L:CARG1, SAVE_L
882 | mov CARG2, TAB:RB
883 | mov L:CARG1->base, BASE
884 | mov RB, BASE // Save BASE.
885 | mov CARG3d, RCd // Caveat: CARG3 == BASE.
886 |.endif
887 | mov SAVE_PC, PC
888 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
889 | // TValue * returned in eax (RC).
890 | movzx RAd, PC_RA
891 | mov BASE, RB // Restore BASE.
892 | jmp ->BC_TSETR_Z
893 |
894 |//-- Comparison metamethods ---------------------------------------------
895 |
896 |->vmeta_comp:
897 | movzx RDd, PC_RD
898 | movzx RAd, PC_RA
899 | mov L:RB, SAVE_L
900 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 == BASE.
901 |.if X64WIN
902 | lea CARG3, [BASE+RD*8]
903 | lea CARG2, [BASE+RA*8]
904 |.else
905 | lea CARG2, [BASE+RA*8]
906 | lea CARG3, [BASE+RD*8]
907 |.endif
908 | mov CARG1, L:RB // Caveat: CARG1/CARG4 == RA.
909 | movzx CARG4d, PC_OP
910 | mov SAVE_PC, PC
911 | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
912 | // 0/1 or TValue * (metamethod) returned in eax (RC).
913 |3:
914 | mov BASE, L:RB->base
915 | cmp RC, 1
916 | ja ->vmeta_binop
917 |4:
918 | lea PC, [PC+4]
919 | jb >6
920 |5:
921 | movzx RDd, PC_RD
922 | branchPC RD
923 |6:
924 | ins_next
925 |
926 |->cont_condt: // BASE = base, RC = result
927 | add PC, 4
928 | mov ITYPE, [RC]
929 | sar ITYPE, 47
930 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is true.
931 | jb <5
932 | jmp <6
933 |
934 |->cont_condf: // BASE = base, RC = result
935 | mov ITYPE, [RC]
936 | sar ITYPE, 47
937 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false.
938 | jmp <4
939 |
940 |->vmeta_equal:
941 | cleartp TAB:RD
942 | sub PC, 4
943 |.if X64WIN
944 | mov CARG3, RD
945 | mov CARG4d, RBd
946 | mov L:RB, SAVE_L
947 | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
948 | mov CARG2, RA
949 | mov CARG1, L:RB // Caveat: CARG1 == RA.
950 |.else
951 | mov CARG2, RA
952 | mov CARG4d, RBd // Caveat: CARG4 == RA.
953 | mov L:RB, SAVE_L
954 | mov L:RB->base, BASE // Caveat: CARG3 == BASE.
955 | mov CARG3, RD
956 | mov CARG1, L:RB
957 |.endif
958 | mov SAVE_PC, PC
959 | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
960 | // 0/1 or TValue * (metamethod) returned in eax (RC).
961 | jmp <3
962 |
963 |->vmeta_equal_cd:
964 |.if FFI
965 | sub PC, 4
966 | mov L:RB, SAVE_L
967 | mov L:RB->base, BASE
968 | mov CARG1, L:RB
969 | mov CARG2d, dword [PC-4]
970 | mov SAVE_PC, PC
971 | call extern lj_meta_equal_cd // (lua_State *L, BCIns ins)
972 | // 0/1 or TValue * (metamethod) returned in eax (RC).
973 | jmp <3
974 |.endif
975 |
976 |->vmeta_istype:
977 | mov L:RB, SAVE_L
978 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
979 | mov CARG2d, RAd
980 | mov CARG3d, RDd
981 | mov L:CARG1, L:RB
982 | mov SAVE_PC, PC
983 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
984 | mov BASE, L:RB->base
985 | jmp <6
986 |
987 |//-- Arithmetic metamethods ---------------------------------------------
988 |
989 |->vmeta_arith_vno:
990 |.if DUALNUM
991 | movzx RBd, PC_RB
992 | movzx RCd, PC_RC
993 |.endif
994 |->vmeta_arith_vn:
995 | lea RC, [KBASE+RC*8]
996 | jmp >1
997 |
998 |->vmeta_arith_nvo:
999 |.if DUALNUM
1000 | movzx RBd, PC_RB
1001 | movzx RCd, PC_RC
1002 |.endif
1003 |->vmeta_arith_nv:
1004 | lea TMPR, [KBASE+RC*8]
1005 | lea RC, [BASE+RB*8]
1006 | mov RB, TMPR
1007 | jmp >2
1008 |
1009 |->vmeta_unm:
1010 | lea RC, [BASE+RD*8]
1011 | mov RB, RC
1012 | jmp >2
1013 |
1014 |->vmeta_arith_vvo:
1015 |.if DUALNUM
1016 | movzx RBd, PC_RB
1017 | movzx RCd, PC_RC
1018 |.endif
1019 |->vmeta_arith_vv:
1020 | lea RC, [BASE+RC*8]
1021 |1:
1022 | lea RB, [BASE+RB*8]
1023 |2:
1024 | lea RA, [BASE+RA*8]
1025 |.if X64WIN
1026 | mov CARG3, RB
1027 | mov CARG4, RC
1028 | movzx RCd, PC_OP
1029 | mov ARG5d, RCd
1030 | mov L:RB, SAVE_L
1031 | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
1032 | mov CARG2, RA
1033 | mov CARG1, L:RB // Caveat: CARG1 == RA.
1034 |.else
1035 | movzx CARG5d, PC_OP
1036 | mov CARG2, RA
1037 | mov CARG4, RC // Caveat: CARG4 == RA.
1038 | mov L:CARG1, SAVE_L
1039 | mov L:CARG1->base, BASE // Caveat: CARG3 == BASE.
1040 | mov CARG3, RB
1041 | mov L:RB, L:CARG1
1042 |.endif
1043 | mov SAVE_PC, PC
1044 | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
1045 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
1046 | mov BASE, L:RB->base
1047 | test RC, RC
1048 | jz ->cont_nop
1049 |
1050 | // Call metamethod for binary op.
1051 |->vmeta_binop:
1052 | // BASE = base, RC = new base, stack = cont/func/o1/o2
1053 | mov RA, RC
1054 | sub RC, BASE
1055 | mov [RA-24], PC // [cont|PC]
1056 | lea PC, [RC+FRAME_CONT]
1057 | mov NARGS:RDd, 2+1 // 2 args for func(o1, o2).
1058 | jmp ->vm_call_dispatch
1059 |
1060 |->vmeta_len:
1061 | movzx RDd, PC_RD
1062 | mov L:RB, SAVE_L
1063 | mov L:RB->base, BASE
1064 | lea CARG2, [BASE+RD*8] // Caveat: CARG2 == BASE
1065 | mov L:CARG1, L:RB
1066 | mov SAVE_PC, PC
1067 | call extern lj_meta_len // (lua_State *L, TValue *o)
1068 | // NULL (retry) or TValue * (metamethod) returned in eax (RC).
1069 | mov BASE, L:RB->base
1070#if LJ_52
1071 | test RC, RC
1072 | jne ->vmeta_binop // Binop call for compatibility.
1073 | movzx RDd, PC_RD
1074 | mov TAB:CARG1, [BASE+RD*8]
1075 | cleartp TAB:CARG1
1076 | jmp ->BC_LEN_Z
1077#else
1078 | jmp ->vmeta_binop // Binop call for compatibility.
1079#endif
1080 |
1081 |//-- Call metamethod ----------------------------------------------------
1082 |
1083 |->vmeta_call_ra:
1084 | lea RA, [BASE+RA*8+16]
1085 |->vmeta_call: // Resolve and call __call metamethod.
1086 | // BASE = old base, RA = new base, RC = nargs+1, PC = return
1087 | mov TMP1d, NARGS:RDd // Save RA, RC for us.
1088 | mov RB, RA
1089 |.if X64WIN
1090 | mov L:TMPR, SAVE_L
1091 | mov L:TMPR->base, BASE // Caveat: CARG2 is BASE.
1092 | lea CARG2, [RA-16]
1093 | lea CARG3, [RA+NARGS:RD*8-8]
1094 | mov CARG1, L:TMPR // Caveat: CARG1 is RA.
1095 |.else
1096 | mov L:CARG1, SAVE_L
1097 | mov L:CARG1->base, BASE // Caveat: CARG3 is BASE.
1098 | lea CARG2, [RA-16]
1099 | lea CARG3, [RA+NARGS:RD*8-8]
1100 |.endif
1101 | mov SAVE_PC, PC
1102 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
1103 | mov RA, RB
1104 | mov L:RB, SAVE_L
1105 | mov BASE, L:RB->base
1106 | mov NARGS:RDd, TMP1d
1107 | mov LFUNC:RB, [RA-16]
1108 | add NARGS:RDd, 1
1109 | // This is fragile. L->base must not move, KBASE must always be defined.
1110 | cmp KBASE, BASE // Continue with CALLT if flag set.
1111 | je ->BC_CALLT_Z
1112 | cleartp LFUNC:RB
1113 | mov BASE, RA
1114 | ins_call // Otherwise call resolved metamethod.
1115 |
1116 |//-- Argument coercion for 'for' statement ------------------------------
1117 |
1118 |->vmeta_for:
1119 | mov L:RB, SAVE_L
1120 | mov L:RB->base, BASE
1121 | mov CARG2, RA // Caveat: CARG2 == BASE
1122 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
1123 | mov SAVE_PC, PC
1124 | call extern lj_meta_for // (lua_State *L, TValue *base)
1125 | mov BASE, L:RB->base
1126 | mov RCd, [PC-4]
1127 | movzx RAd, RCH
1128 | movzx OP, RCL
1129 | shr RCd, 16
1130 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI.
1131 |
1132 |//-----------------------------------------------------------------------
1133 |//-- Fast functions -----------------------------------------------------
1134 |//-----------------------------------------------------------------------
1135 |
1136 |.macro .ffunc, name
1137 |->ff_ .. name:
1138 |.endmacro
1139 |
1140 |.macro .ffunc_1, name
1141 |->ff_ .. name:
1142 | cmp NARGS:RDd, 1+1; jb ->fff_fallback
1143 |.endmacro
1144 |
1145 |.macro .ffunc_2, name
1146 |->ff_ .. name:
1147 | cmp NARGS:RDd, 2+1; jb ->fff_fallback
1148 |.endmacro
1149 |
1150 |.macro .ffunc_n, name, op
1151 | .ffunc_1 name
1152 | checknumtp [BASE], ->fff_fallback
1153 | op xmm0, qword [BASE]
1154 |.endmacro
1155 |
1156 |.macro .ffunc_n, name
1157 | .ffunc_n name, movsd
1158 |.endmacro
1159 |
1160 |.macro .ffunc_nn, name
1161 | .ffunc_2 name
1162 | checknumtp [BASE], ->fff_fallback
1163 | checknumtp [BASE+8], ->fff_fallback
1164 | movsd xmm0, qword [BASE]
1165 | movsd xmm1, qword [BASE+8]
1166 |.endmacro
1167 |
1168 |// Inlined GC threshold check. Caveat: uses label 1.
1169 |.macro ffgccheck
1170 | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
1171 | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
1172 | jb >1
1173 | call ->fff_gcstep
1174 |1:
1175 |.endmacro
1176 |
1177 |//-- Base library: checks -----------------------------------------------
1178 |
1179 |.ffunc_1 assert
1180 | mov ITYPE, [BASE]
1181 | mov RB, ITYPE
1182 | sar ITYPE, 47
1183 | cmp ITYPEd, LJ_TISTRUECOND; jae ->fff_fallback
1184 | mov PC, [BASE-8]
1185 | mov MULTRES, RDd
1186 | mov RB, [BASE]
1187 | mov [BASE-16], RB
1188 | sub RDd, 2
1189 | jz >2
1190 | mov RA, BASE
1191 |1:
1192 | add RA, 8
1193 | mov RB, [RA]
1194 | mov [RA-16], RB
1195 | sub RDd, 1
1196 | jnz <1
1197 |2:
1198 | mov RDd, MULTRES
1199 | jmp ->fff_res_
1200 |
1201 |.ffunc_1 type
1202 | mov RC, [BASE]
1203 | sar RC, 47
1204 | mov RBd, LJ_TISNUM
1205 | cmp RCd, RBd
1206 | cmovb RCd, RBd
1207 | not RCd
1208 |2:
1209 | mov CFUNC:RB, [BASE-16]
1210 | cleartp CFUNC:RB
1211 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
1212 | mov PC, [BASE-8]
1213 | settp STR:RC, LJ_TSTR
1214 | mov [BASE-16], STR:RC
1215 | jmp ->fff_res1
1216 |
1217 |//-- Base library: getters and setters ---------------------------------
1218 |
1219 |.ffunc_1 getmetatable
1220 | mov TAB:RB, [BASE]
1221 | mov PC, [BASE-8]
1222 | checktab TAB:RB, >6
1223 |1: // Field metatable must be at same offset for GCtab and GCudata!
1224 | mov TAB:RB, TAB:RB->metatable
1225 |2:
1226 | test TAB:RB, TAB:RB
1227 | mov aword [BASE-16], LJ_TNIL
1228 | jz ->fff_res1
1229 | settp TAB:RC, TAB:RB, LJ_TTAB
1230 | mov [BASE-16], TAB:RC // Store metatable as default result.
1231 | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)]
1232 | mov RAd, TAB:RB->hmask
1233 | and RAd, STR:RC->sid
1234 | settp STR:RC, LJ_TSTR
1235 | imul RAd, #NODE
1236 | add NODE:RA, TAB:RB->node
1237 |3: // Rearranged logic, because we expect _not_ to find the key.
1238 | cmp NODE:RA->key, STR:RC
1239 | je >5
1240 |4:
1241 | mov NODE:RA, NODE:RA->next
1242 | test NODE:RA, NODE:RA
1243 | jnz <3
1244 | jmp ->fff_res1 // Not found, keep default result.
1245 |5:
1246 | mov RB, NODE:RA->val
1247 | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
1248 | mov [BASE-16], RB // Return value of mt.__metatable.
1249 | jmp ->fff_res1
1250 |
1251 |6:
1252 | cmp ITYPEd, LJ_TUDATA; je <1
1253 | cmp ITYPEd, LJ_TISNUM; ja >7
1254 | mov ITYPEd, LJ_TISNUM
1255 |7:
1256 | not ITYPEd
1257 | mov TAB:RB, [DISPATCH+ITYPE*8+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
1258 | jmp <2
1259 |
1260 |.ffunc_2 setmetatable
1261 | mov TAB:RB, [BASE]
1262 | mov TAB:TMPR, TAB:RB
1263 | checktab TAB:RB, ->fff_fallback
1264 | // Fast path: no mt for table yet and not clearing the mt.
1265 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1266 | mov TAB:RA, [BASE+8]
1267 | checktab TAB:RA, ->fff_fallback
1268 | mov TAB:RB->metatable, TAB:RA
1269 | mov PC, [BASE-8]
1270 | mov [BASE-16], TAB:TMPR // Return original table.
1271 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
1272 | jz >1
1273 | // Possible write barrier. Table is black, but skip iswhite(mt) check.
1274 | barrierback TAB:RB, RC
1275 |1:
1276 | jmp ->fff_res1
1277 |
1278 |.ffunc_2 rawget
1279 |.if X64WIN
1280 | mov TAB:RA, [BASE]
1281 | checktab TAB:RA, ->fff_fallback
1282 | mov RB, BASE // Save BASE.
1283 | lea CARG3, [BASE+8]
1284 | mov CARG2, TAB:RA // Caveat: CARG2 == BASE.
1285 | mov CARG1, SAVE_L
1286 |.else
1287 | mov TAB:CARG2, [BASE]
1288 | checktab TAB:CARG2, ->fff_fallback
1289 | mov RB, BASE // Save BASE.
1290 | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE.
1291 | mov CARG1, SAVE_L
1292 |.endif
1293 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1294 | // cTValue * returned in eax (RD).
1295 | mov BASE, RB // Restore BASE.
1296 | // Copy table slot.
1297 | mov RB, [RD]
1298 | mov PC, [BASE-8]
1299 | mov [BASE-16], RB
1300 | jmp ->fff_res1
1301 |
1302 |//-- Base library: conversions ------------------------------------------
1303 |
1304 |.ffunc tonumber
1305 | // Only handles the number case inline (without a base argument).
1306 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
1307 | mov RB, [BASE]
1308 | checknumber RB, ->fff_fallback
1309 | mov PC, [BASE-8]
1310 | mov [BASE-16], RB
1311 | jmp ->fff_res1
1312 |
1313 |.ffunc_1 tostring
1314 | // Only handles the string or number case inline.
1315 | mov PC, [BASE-8]
1316 | mov STR:RB, [BASE]
1317 | checktp_nc STR:RB, LJ_TSTR, >3
1318 | // A __tostring method in the string base metatable is ignored.
1319 |2:
1320 | mov [BASE-16], STR:RB
1321 | jmp ->fff_res1
1322 |3: // Handle numbers inline, unless a number base metatable is present.
1323 | cmp ITYPEd, LJ_TISNUM; ja ->fff_fallback_1
1324 | cmp aword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
1325 | jne ->fff_fallback
1326 | ffgccheck // Caveat: uses label 1.
1327 | mov L:RB, SAVE_L
1328 | mov L:RB->base, BASE // Add frame since C call can throw.
1329 | mov SAVE_PC, PC // Redundant (but a defined value).
1330 |.if not X64WIN
1331 | mov CARG2, BASE // Otherwise: CARG2 == BASE
1332 |.endif
1333 | mov L:CARG1, L:RB
1334 |.if DUALNUM
1335 | call extern lj_strfmt_number // (lua_State *L, cTValue *o)
1336 |.else
1337 | call extern lj_strfmt_num // (lua_State *L, lua_Number *np)
1338 |.endif
1339 | // GCstr returned in eax (RD).
1340 | mov BASE, L:RB->base
1341 | settp STR:RB, RD, LJ_TSTR
1342 | jmp <2
1343 |
1344 |//-- Base library: iterators -------------------------------------------
1345 |
1346 |.ffunc_1 next
1347 | je >2 // Missing 2nd arg?
1348 |1:
1349 |.if X64WIN
1350 | mov RA, [BASE]
1351 | checktab RA, ->fff_fallback
1352 |.else
1353 | mov CARG2, [BASE]
1354 | checktab CARG2, ->fff_fallback
1355 |.endif
1356 | mov L:RB, SAVE_L
1357 | mov L:RB->base, BASE // Add frame since C call can throw.
1358 | mov L:RB->top, BASE // Dummy frame length is ok.
1359 | mov PC, [BASE-8]
1360 |.if X64WIN
1361 | lea CARG3, [BASE+8]
1362 | mov CARG2, RA // Caveat: CARG2 == BASE.
1363 | mov CARG1, L:RB
1364 |.else
1365 | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE.
1366 | mov CARG1, L:RB
1367 |.endif
1368 | mov SAVE_PC, PC // Needed for ITERN fallback.
1369 | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1370 | // Flag returned in eax (RD).
1371 | mov BASE, L:RB->base
1372 | test RDd, RDd; jz >3 // End of traversal?
1373 | // Copy key and value to results.
1374 | mov RB, [BASE+8]
1375 | mov RD, [BASE+16]
1376 | mov [BASE-16], RB
1377 | mov [BASE-8], RD
1378 |->fff_res2:
1379 | mov RDd, 1+2
1380 | jmp ->fff_res
1381 |2: // Set missing 2nd arg to nil.
1382 | mov aword [BASE+8], LJ_TNIL
1383 | jmp <1
1384 |3: // End of traversal: return nil.
1385 | mov aword [BASE-16], LJ_TNIL
1386 | jmp ->fff_res1
1387 |
1388 |.ffunc_1 pairs
1389 | mov TAB:RB, [BASE]
1390 | mov TMPR, TAB:RB
1391 | checktab TAB:RB, ->fff_fallback
1392#if LJ_52
1393 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1394#endif
1395 | mov CFUNC:RD, [BASE-16]
1396 | cleartp CFUNC:RD
1397 | mov CFUNC:RD, CFUNC:RD->upvalue[0]
1398 | settp CFUNC:RD, LJ_TFUNC
1399 | mov PC, [BASE-8]
1400 | mov [BASE-16], CFUNC:RD
1401 | mov [BASE-8], TMPR
1402 | mov aword [BASE], LJ_TNIL
1403 | mov RDd, 1+3
1404 | jmp ->fff_res
1405 |
1406 |.ffunc_2 ipairs_aux
1407 | mov TAB:RB, [BASE]
1408 | checktab TAB:RB, ->fff_fallback
1409 |.if DUALNUM
1410 | mov RA, [BASE+8]
1411 | checkint RA, ->fff_fallback
1412 |.else
1413 | checknumtp [BASE+8], ->fff_fallback
1414 | movsd xmm0, qword [BASE+8]
1415 |.endif
1416 | mov PC, [BASE-8]
1417 |.if DUALNUM
1418 | add RAd, 1
1419 | setint ITYPE, RA
1420 | mov [BASE-16], ITYPE
1421 |.else
1422 | sseconst_1 xmm1, TMPR
1423 | addsd xmm0, xmm1
1424 | cvttsd2si RAd, xmm0
1425 | movsd qword [BASE-16], xmm0
1426 |.endif
1427 | cmp RAd, TAB:RB->asize; jae >2 // Not in array part?
1428 | mov RD, TAB:RB->array
1429 | lea RD, [RD+RA*8]
1430 |1:
1431 | cmp aword [RD], LJ_TNIL; je ->fff_res0
1432 | // Copy array slot.
1433 | mov RB, [RD]
1434 | mov [BASE-8], RB
1435 | jmp ->fff_res2
1436 |2: // Check for empty hash part first. Otherwise call C function.
1437 | cmp dword TAB:RB->hmask, 0; je ->fff_res0
1438 |.if X64WIN
1439 | mov TMPR, BASE
1440 | mov CARG2d, RAd
1441 | mov CARG1, TAB:RB
1442 | mov RB, TMPR
1443 |.else
1444 | mov CARG1, TAB:RB
1445 | mov RB, BASE // Save BASE.
1446 | mov CARG2d, RAd // Caveat: CARG2 == BASE
1447 |.endif
1448 | call extern lj_tab_getinth // (GCtab *t, int32_t key)
1449 | // cTValue * or NULL returned in eax (RD).
1450 | mov BASE, RB
1451 | test RD, RD
1452 | jnz <1
1453 |->fff_res0:
1454 | mov RDd, 1+0
1455 | jmp ->fff_res
1456 |
1457 |.ffunc_1 ipairs
1458 | mov TAB:RB, [BASE]
1459 | mov TMPR, TAB:RB
1460 | checktab TAB:RB, ->fff_fallback
1461#if LJ_52
1462 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1463#endif
1464 | mov CFUNC:RD, [BASE-16]
1465 | cleartp CFUNC:RD
1466 | mov CFUNC:RD, CFUNC:RD->upvalue[0]
1467 | settp CFUNC:RD, LJ_TFUNC
1468 | mov PC, [BASE-8]
1469 | mov [BASE-16], CFUNC:RD
1470 | mov [BASE-8], TMPR
1471 |.if DUALNUM
1472 | mov64 RD, ((uint64_t)LJ_TISNUM<<47)
1473 | mov [BASE], RD
1474 |.else
1475 | mov qword [BASE], 0
1476 |.endif
1477 | mov RDd, 1+3
1478 | jmp ->fff_res
1479 |
1480 |//-- Base library: catch errors ----------------------------------------
1481 |
1482 |.ffunc_1 pcall
1483 | lea RA, [BASE+16]
1484 | sub NARGS:RDd, 1
1485 | mov PCd, 16+FRAME_PCALL
1486 |1:
1487 | movzx RBd, byte [DISPATCH+DISPATCH_GL(hookmask)]
1488 | shr RB, HOOK_ACTIVE_SHIFT
1489 | and RB, 1
1490 | add PC, RB // Remember active hook before pcall.
1491 | // Note: this does a (harmless) copy of the function to the PC slot, too.
1492 | mov KBASE, RD
1493 |2:
1494 | mov RB, [RA+KBASE*8-24]
1495 | mov [RA+KBASE*8-16], RB
1496 | sub KBASE, 1
1497 | ja <2
1498 | jmp ->vm_call_dispatch
1499 |
1500 |.ffunc_2 xpcall
1501 | mov LFUNC:RA, [BASE+8]
1502 | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
1503 | mov LFUNC:RB, [BASE] // Swap function and traceback.
1504 | mov [BASE], LFUNC:RA
1505 | mov [BASE+8], LFUNC:RB
1506 | lea RA, [BASE+24]
1507 | sub NARGS:RDd, 2
1508 | mov PCd, 24+FRAME_PCALL
1509 | jmp <1
1510 |
1511 |//-- Coroutine library --------------------------------------------------
1512 |
1513 |.macro coroutine_resume_wrap, resume
1514 |.if resume
1515 |.ffunc_1 coroutine_resume
1516 | mov L:RB, [BASE]
1517 | cleartp L:RB
1518 |.else
1519 |.ffunc coroutine_wrap_aux
1520 | mov CFUNC:RB, [BASE-16]
1521 | cleartp CFUNC:RB
1522 | mov L:RB, CFUNC:RB->upvalue[0].gcr
1523 | cleartp L:RB
1524 |.endif
1525 | mov PC, [BASE-8]
1526 | mov SAVE_PC, PC
1527 | mov TMP1, L:RB
1528 |.if resume
1529 | checktptp [BASE], LJ_TTHREAD, ->fff_fallback
1530 |.endif
1531 | cmp aword L:RB->cframe, 0; jne ->fff_fallback
1532 | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback
1533 | mov RA, L:RB->top
1534 | je >1 // Status != LUA_YIELD (i.e. 0)?
1535 | cmp RA, L:RB->base // Check for presence of initial func.
1536 | je ->fff_fallback
1537 | mov PC, [RA-8] // Move initial function up.
1538 | mov [RA], PC
1539 | add RA, 8
1540 |1:
1541 |.if resume
1542 | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread).
1543 |.else
1544 | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1).
1545 |.endif
1546 | cmp PC, L:RB->maxstack; ja ->fff_fallback
1547 | mov L:RB->top, PC
1548 |
1549 | mov L:RB, SAVE_L
1550 | mov L:RB->base, BASE
1551 |.if resume
1552 | add BASE, 8 // Keep resumed thread in stack for GC.
1553 |.endif
1554 | mov L:RB->top, BASE
1555 |.if resume
1556 | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move.
1557 |.else
1558 | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move.
1559 |.endif
1560 | sub RB, PC // Relative to PC.
1561 |
1562 | cmp PC, RA
1563 | je >3
1564 |2: // Move args to coroutine.
1565 | mov RC, [PC+RB]
1566 | mov [PC-8], RC
1567 | sub PC, 8
1568 | cmp PC, RA
1569 | jne <2
1570 |3:
1571 | mov CARG2, RA
1572 | mov CARG1, TMP1
1573 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1574 |
1575 | mov L:RB, SAVE_L
1576 | mov L:PC, TMP1
1577 | mov BASE, L:RB->base
1578 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1579 | set_vmstate INTERP
1580 |
1581 | cmp eax, LUA_YIELD
1582 | ja >8
1583 |4:
1584 | mov RA, L:PC->base
1585 | mov KBASE, L:PC->top
1586 | mov L:PC->top, RA // Clear coroutine stack.
1587 | mov PC, KBASE
1588 | sub PC, RA
1589 | je >6 // No results?
1590 | lea RD, [BASE+PC]
1591 | shr PCd, 3
1592 | cmp RD, L:RB->maxstack
1593 | ja >9 // Need to grow stack?
1594 |
1595 | mov RB, BASE
1596 | sub RB, RA
1597 |5: // Move results from coroutine.
1598 | mov RD, [RA]
1599 | mov [RA+RB], RD
1600 | add RA, 8
1601 | cmp RA, KBASE
1602 | jne <5
1603 |6:
1604 |.if resume
1605 | lea RDd, [PCd+2] // nresults+1 = 1 + true + results.
1606 | mov_true ITYPE // Prepend true to results.
1607 | mov [BASE-8], ITYPE
1608 |.else
1609 | lea RDd, [PCd+1] // nresults+1 = 1 + results.
1610 |.endif
1611 |7:
1612 | mov PC, SAVE_PC
1613 | mov MULTRES, RDd
1614 |.if resume
1615 | mov RA, -8
1616 |.else
1617 | xor RAd, RAd
1618 |.endif
1619 | test PCd, FRAME_TYPE
1620 | jz ->BC_RET_Z
1621 | jmp ->vm_return
1622 |
1623 |8: // Coroutine returned with error (at co->top-1).
1624 |.if resume
1625 | mov_false ITYPE // Prepend false to results.
1626 | mov [BASE-8], ITYPE
1627 | mov RA, L:PC->top
1628 | sub RA, 8
1629 | mov L:PC->top, RA // Clear error from coroutine stack.
1630 | // Copy error message.
1631 | mov RD, [RA]
1632 | mov [BASE], RD
1633 | mov RDd, 1+2 // nresults+1 = 1 + false + error.
1634 | jmp <7
1635 |.else
1636 | mov CARG2, L:PC
1637 | mov CARG1, L:RB
1638 | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1639 | // Error function does not return.
1640 |.endif
1641 |
1642 |9: // Handle stack expansion on return from yield.
1643 | mov L:RA, TMP1
1644 | mov L:RA->top, KBASE // Undo coroutine stack clearing.
1645 | mov CARG2, PC
1646 | mov CARG1, L:RB
1647 | call extern lj_state_growstack // (lua_State *L, int n)
1648 | mov L:PC, TMP1
1649 | mov BASE, L:RB->base
1650 | jmp <4 // Retry the stack move.
1651 |.endmacro
1652 |
1653 | coroutine_resume_wrap 1 // coroutine.resume
1654 | coroutine_resume_wrap 0 // coroutine.wrap
1655 |
1656 |.ffunc coroutine_yield
1657 | mov L:RB, SAVE_L
1658 | test aword L:RB->cframe, CFRAME_RESUME
1659 | jz ->fff_fallback
1660 | mov L:RB->base, BASE
1661 | lea RD, [BASE+NARGS:RD*8-8]
1662 | mov L:RB->top, RD
1663 | xor RDd, RDd
1664 | mov aword L:RB->cframe, RD
1665 | mov al, LUA_YIELD
1666 | mov byte L:RB->status, al
1667 | jmp ->vm_leave_unw
1668 |
1669 |//-- Math library -------------------------------------------------------
1670 |
1671 | .ffunc_1 math_abs
1672 | mov RB, [BASE]
1673 |.if DUALNUM
1674 | checkint RB, >3
1675 | cmp RBd, 0; jns ->fff_resi
1676 | neg RBd; js >2
1677 |->fff_resbit:
1678 |->fff_resi:
1679 | setint RB
1680 |->fff_resRB:
1681 | mov PC, [BASE-8]
1682 | mov [BASE-16], RB
1683 | jmp ->fff_res1
1684 |2:
1685 | mov64 RB, U64x(41e00000,00000000) // 2^31.
1686 | jmp ->fff_resRB
1687 |3:
1688 | ja ->fff_fallback
1689 |.else
1690 | checknum RB, ->fff_fallback
1691 |.endif
1692 | shl RB, 1
1693 | shr RB, 1
1694 | mov PC, [BASE-8]
1695 | mov [BASE-16], RB
1696 | jmp ->fff_res1
1697 |
1698 |.ffunc_n math_sqrt, sqrtsd
1699 |->fff_resxmm0:
1700 | mov PC, [BASE-8]
1701 | movsd qword [BASE-16], xmm0
1702 | // fallthrough
1703 |
1704 |->fff_res1:
1705 | mov RDd, 1+1
1706 |->fff_res:
1707 | mov MULTRES, RDd
1708 |->fff_res_:
1709 | test PCd, FRAME_TYPE
1710 | jnz >7
1711 |5:
1712 | cmp PC_RB, RDL // More results expected?
1713 | ja >6
1714 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1715 | movzx RAd, PC_RA
1716 | neg RA
1717 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
1718 | ins_next
1719 |
1720 |6: // Fill up results with nil.
1721 | mov aword [BASE+RD*8-24], LJ_TNIL
1722 | add RD, 1
1723 | jmp <5
1724 |
1725 |7: // Non-standard return case.
1726 | mov RA, -16 // Results start at BASE+RA = BASE-16.
1727 | jmp ->vm_return
1728 |
1729 |.macro math_round, func
1730 | .ffunc math_ .. func
1731 |.if DUALNUM
1732 | mov RB, [BASE]
1733 | checknumx RB, ->fff_resRB, je
1734 | ja ->fff_fallback
1735 |.else
1736 | checknumtp [BASE], ->fff_fallback
1737 |.endif
1738 | movsd xmm0, qword [BASE]
1739 | call ->vm_ .. func .. _sse
1740 |.if DUALNUM
1741 | cvttsd2si RBd, xmm0
1742 | cmp RBd, 0x80000000
1743 | jne ->fff_resi
1744 | cvtsi2sd xmm1, RBd
1745 | ucomisd xmm0, xmm1
1746 | jp ->fff_resxmm0
1747 | je ->fff_resi
1748 |.endif
1749 | jmp ->fff_resxmm0
1750 |.endmacro
1751 |
1752 | math_round floor
1753 | math_round ceil
1754 |
1755 |.ffunc math_log
1756 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
1757 | checknumtp [BASE], ->fff_fallback
1758 | movsd xmm0, qword [BASE]
1759 | mov RB, BASE
1760 | call extern log
1761 | mov BASE, RB
1762 | jmp ->fff_resxmm0
1763 |
1764 |.macro math_extern, func
1765 | .ffunc_n math_ .. func
1766 | mov RB, BASE
1767 | call extern func
1768 | mov BASE, RB
1769 | jmp ->fff_resxmm0
1770 |.endmacro
1771 |
1772 |.macro math_extern2, func
1773 | .ffunc_nn math_ .. func
1774 | mov RB, BASE
1775 | call extern func
1776 | mov BASE, RB
1777 | jmp ->fff_resxmm0
1778 |.endmacro
1779 |
1780 | math_extern log10
1781 | math_extern exp
1782 | math_extern sin
1783 | math_extern cos
1784 | math_extern tan
1785 | math_extern asin
1786 | math_extern acos
1787 | math_extern atan
1788 | math_extern sinh
1789 | math_extern cosh
1790 | math_extern tanh
1791 | math_extern2 pow
1792 | math_extern2 atan2
1793 | math_extern2 fmod
1794 |
1795 |.ffunc_2 math_ldexp
1796 | checknumtp [BASE], ->fff_fallback
1797 | checknumtp [BASE+8], ->fff_fallback
1798 | fld qword [BASE+8]
1799 | fld qword [BASE]
1800 | fscale
1801 | fpop1
1802 | mov PC, [BASE-8]
1803 | fstp qword [BASE-16]
1804 | jmp ->fff_res1
1805 |
1806 |.ffunc_n math_frexp
1807 | mov RB, BASE
1808 |.if X64WIN
1809 | lea CARG2, TMP1 // Caveat: CARG2 == BASE
1810 |.else
1811 | lea CARG1, TMP1
1812 |.endif
1813 | call extern frexp
1814 | mov BASE, RB
1815 | mov RBd, TMP1d
1816 | mov PC, [BASE-8]
1817 | movsd qword [BASE-16], xmm0
1818 |.if DUALNUM
1819 | setint RB
1820 | mov [BASE-8], RB
1821 |.else
1822 | cvtsi2sd xmm1, RBd
1823 | movsd qword [BASE-8], xmm1
1824 |.endif
1825 | mov RDd, 1+2
1826 | jmp ->fff_res
1827 |
1828 |.ffunc_n math_modf
1829 | mov RB, BASE
1830 |.if X64WIN
1831 | lea CARG2, [BASE-16] // Caveat: CARG2 == BASE
1832 |.else
1833 | lea CARG1, [BASE-16]
1834 |.endif
1835 | call extern modf
1836 | mov BASE, RB
1837 | mov PC, [BASE-8]
1838 | movsd qword [BASE-8], xmm0
1839 | mov RDd, 1+2
1840 | jmp ->fff_res
1841 |
1842 |.macro math_minmax, name, cmovop, sseop
1843 | .ffunc_1 name
1844 | mov RAd, 2
1845 |.if DUALNUM
1846 | mov RB, [BASE]
1847 | checkint RB, >4
1848 |1: // Handle integers.
1849 | cmp RAd, RDd; jae ->fff_resRB
1850 | mov TMPR, [BASE+RA*8-8]
1851 | checkint TMPR, >3
1852 | cmp RBd, TMPRd
1853 | cmovop RB, TMPR
1854 | add RAd, 1
1855 | jmp <1
1856 |3:
1857 | ja ->fff_fallback
1858 | // Convert intermediate result to number and continue below.
1859 | cvtsi2sd xmm0, RBd
1860 | jmp >6
1861 |4:
1862 | ja ->fff_fallback
1863 |.else
1864 | checknumtp [BASE], ->fff_fallback
1865 |.endif
1866 |
1867 | movsd xmm0, qword [BASE]
1868 |5: // Handle numbers or integers.
1869 | cmp RAd, RDd; jae ->fff_resxmm0
1870 |.if DUALNUM
1871 | mov RB, [BASE+RA*8-8]
1872 | checknumx RB, >6, jb
1873 | ja ->fff_fallback
1874 | cvtsi2sd xmm1, RBd
1875 | jmp >7
1876 |.else
1877 | checknumtp [BASE+RA*8-8], ->fff_fallback
1878 |.endif
1879 |6:
1880 | movsd xmm1, qword [BASE+RA*8-8]
1881 |7:
1882 | sseop xmm0, xmm1
1883 | add RAd, 1
1884 | jmp <5
1885 |.endmacro
1886 |
1887 | math_minmax math_min, cmovg, minsd
1888 | math_minmax math_max, cmovl, maxsd
1889 |
1890 |//-- String library -----------------------------------------------------
1891 |
1892 |.ffunc string_byte // Only handle the 1-arg case here.
1893 | cmp NARGS:RDd, 1+1; jne ->fff_fallback
1894 | mov STR:RB, [BASE]
1895 | checkstr STR:RB, ->fff_fallback
1896 | mov PC, [BASE-8]
1897 | cmp dword STR:RB->len, 1
1898 | jb ->fff_res0 // Return no results for empty string.
1899 | movzx RBd, byte STR:RB[1]
1900 |.if DUALNUM
1901 | jmp ->fff_resi
1902 |.else
1903 | cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0
1904 |.endif
1905 |
1906 |.ffunc string_char // Only handle the 1-arg case here.
1907 | ffgccheck
1908 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
1909 |.if DUALNUM
1910 | mov RB, [BASE]
1911 | checkint RB, ->fff_fallback
1912 |.else
1913 | checknumtp [BASE], ->fff_fallback
1914 | cvttsd2si RBd, qword [BASE]
1915 |.endif
1916 | cmp RBd, 255; ja ->fff_fallback
1917 | mov TMP1d, RBd
1918 | mov TMPRd, 1
1919 | lea RD, TMP1 // Points to stack. Little-endian.
1920 |->fff_newstr:
1921 | mov L:RB, SAVE_L
1922 | mov L:RB->base, BASE
1923 | mov CARG3d, TMPRd // Zero-extended to size_t.
1924 | mov CARG2, RD
1925 | mov CARG1, L:RB
1926 | mov SAVE_PC, PC
1927 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
1928 |->fff_resstr:
1929 | // GCstr * returned in eax (RD).
1930 | mov BASE, L:RB->base
1931 | mov PC, [BASE-8]
1932 | settp STR:RD, LJ_TSTR
1933 | mov [BASE-16], STR:RD
1934 | jmp ->fff_res1
1935 |
1936 |.ffunc string_sub
1937 | ffgccheck
1938 | mov TMPRd, -1
1939 | cmp NARGS:RDd, 1+2; jb ->fff_fallback
1940 | jna >1
1941 |.if DUALNUM
1942 | mov TMPR, [BASE+16]
1943 | checkint TMPR, ->fff_fallback
1944 |.else
1945 | checknumtp [BASE+16], ->fff_fallback
1946 | cvttsd2si TMPRd, qword [BASE+16]
1947 |.endif
1948 |1:
1949 | mov STR:RB, [BASE]
1950 | checkstr STR:RB, ->fff_fallback
1951 |.if DUALNUM
1952 | mov ITYPE, [BASE+8]
1953 | mov RAd, ITYPEd // Must clear hiword for lea below.
1954 | sar ITYPE, 47
1955 | cmp ITYPEd, LJ_TISNUM
1956 | jne ->fff_fallback
1957 |.else
1958 | checknumtp [BASE+8], ->fff_fallback
1959 | cvttsd2si RAd, qword [BASE+8]
1960 |.endif
1961 | mov RCd, STR:RB->len
1962 | cmp RCd, TMPRd // len < end? (unsigned compare)
1963 | jb >5
1964 |2:
1965 | test RAd, RAd // start <= 0?
1966 | jle >7
1967 |3:
1968 | sub TMPRd, RAd // start > end?
1969 | jl ->fff_emptystr
1970 | lea RD, [STR:RB+RAd+#STR-1]
1971 | add TMPRd, 1
1972 |4:
1973 | jmp ->fff_newstr
1974 |
1975 |5: // Negative end or overflow.
1976 | jl >6
1977 | lea TMPRd, [TMPRd+RCd+1] // end = end+(len+1)
1978 | jmp <2
1979 |6: // Overflow.
1980 | mov TMPRd, RCd // end = len
1981 | jmp <2
1982 |
1983 |7: // Negative start or underflow.
1984 | je >8
1985 | add RAd, RCd // start = start+(len+1)
1986 | add RAd, 1
1987 | jg <3 // start > 0?
1988 |8: // Underflow.
1989 | mov RAd, 1 // start = 1
1990 | jmp <3
1991 |
1992 |->fff_emptystr: // Range underflow.
1993 | xor TMPRd, TMPRd // Zero length. Any ptr in RD is ok.
1994 | jmp <4
1995 |
1996 |.macro ffstring_op, name
1997 | .ffunc_1 string_ .. name
1998 | ffgccheck
1999 |.if X64WIN
2000 | mov STR:TMPR, [BASE]
2001 | checkstr STR:TMPR, ->fff_fallback
2002 |.else
2003 | mov STR:CARG2, [BASE]
2004 | checkstr STR:CARG2, ->fff_fallback
2005 |.endif
2006 | mov L:RB, SAVE_L
2007 | lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
2008 | mov L:RB->base, BASE
2009 |.if X64WIN
2010 | mov STR:CARG2, STR:TMPR // Caveat: CARG2 == BASE
2011 |.endif
2012 | mov RC, SBUF:CARG1->b
2013 | mov SBUF:CARG1->L, L:RB
2014 | mov SBUF:CARG1->w, RC
2015 | mov SAVE_PC, PC
2016 | call extern lj_buf_putstr_ .. name
2017 | mov CARG1, rax
2018 | call extern lj_buf_tostr
2019 | jmp ->fff_resstr
2020 |.endmacro
2021 |
2022 |ffstring_op reverse
2023 |ffstring_op lower
2024 |ffstring_op upper
2025 |
2026 |//-- Bit library --------------------------------------------------------
2027 |
2028 |.macro .ffunc_bit, name, kind, fdef
2029 | fdef name
2030 |.if kind == 2
2031 | sseconst_tobit xmm1, RB
2032 |.endif
2033 |.if DUALNUM
2034 | mov RB, [BASE]
2035 | checkint RB, >1
2036 |.if kind > 0
2037 | jmp >2
2038 |.else
2039 | jmp ->fff_resbit
2040 |.endif
2041 |1:
2042 | ja ->fff_fallback
2043 | movd xmm0, RB
2044 |.else
2045 | checknumtp [BASE], ->fff_fallback
2046 | movsd xmm0, qword [BASE]
2047 |.endif
2048 |.if kind < 2
2049 | sseconst_tobit xmm1, RB
2050 |.endif
2051 | addsd xmm0, xmm1
2052 | movd RBd, xmm0
2053 |2:
2054 |.endmacro
2055 |
2056 |.macro .ffunc_bit, name, kind
2057 | .ffunc_bit name, kind, .ffunc_1
2058 |.endmacro
2059 |
2060 |.ffunc_bit bit_tobit, 0
2061 | jmp ->fff_resbit
2062 |
2063 |.macro .ffunc_bit_op, name, ins
2064 | .ffunc_bit name, 2
2065 | mov TMPRd, NARGS:RDd // Save for fallback.
2066 | lea RD, [BASE+NARGS:RD*8-16]
2067 |1:
2068 | cmp RD, BASE
2069 | jbe ->fff_resbit
2070 |.if DUALNUM
2071 | mov RA, [RD]
2072 | checkint RA, >2
2073 | ins RBd, RAd
2074 | sub RD, 8
2075 | jmp <1
2076 |2:
2077 | ja ->fff_fallback_bit_op
2078 | movd xmm0, RA
2079 |.else
2080 | checknumtp [RD], ->fff_fallback_bit_op
2081 | movsd xmm0, qword [RD]
2082 |.endif
2083 | addsd xmm0, xmm1
2084 | movd RAd, xmm0
2085 | ins RBd, RAd
2086 | sub RD, 8
2087 | jmp <1
2088 |.endmacro
2089 |
2090 |.ffunc_bit_op bit_band, and
2091 |.ffunc_bit_op bit_bor, or
2092 |.ffunc_bit_op bit_bxor, xor
2093 |
2094 |.ffunc_bit bit_bswap, 1
2095 | bswap RBd
2096 | jmp ->fff_resbit
2097 |
2098 |.ffunc_bit bit_bnot, 1
2099 | not RBd
2100 |.if DUALNUM
2101 | jmp ->fff_resbit
2102 |.else
2103 |->fff_resbit:
2104 | cvtsi2sd xmm0, RBd
2105 | jmp ->fff_resxmm0
2106 |.endif
2107 |
2108 |->fff_fallback_bit_op:
2109 | mov NARGS:RDd, TMPRd // Restore for fallback
2110 | jmp ->fff_fallback
2111 |
2112 |.macro .ffunc_bit_sh, name, ins
2113 |.if DUALNUM
2114 | .ffunc_bit name, 1, .ffunc_2
2115 | // Note: no inline conversion from number for 2nd argument!
2116 | mov RA, [BASE+8]
2117 | checkint RA, ->fff_fallback
2118 |.else
2119 | .ffunc_nn name
2120 | sseconst_tobit xmm2, RB
2121 | addsd xmm0, xmm2
2122 | addsd xmm1, xmm2
2123 | movd RBd, xmm0
2124 | movd RAd, xmm1
2125 |.endif
2126 | ins RBd, cl // Assumes RA is ecx.
2127 | jmp ->fff_resbit
2128 |.endmacro
2129 |
2130 |.ffunc_bit_sh bit_lshift, shl
2131 |.ffunc_bit_sh bit_rshift, shr
2132 |.ffunc_bit_sh bit_arshift, sar
2133 |.ffunc_bit_sh bit_rol, rol
2134 |.ffunc_bit_sh bit_ror, ror
2135 |
2136 |//-----------------------------------------------------------------------
2137 |
2138 |->fff_fallback_2:
2139 | mov NARGS:RDd, 1+2 // Other args are ignored, anyway.
2140 | jmp ->fff_fallback
2141 |->fff_fallback_1:
2142 | mov NARGS:RDd, 1+1 // Other args are ignored, anyway.
2143 |->fff_fallback: // Call fast function fallback handler.
2144 | // BASE = new base, RD = nargs+1
2145 | mov L:RB, SAVE_L
2146 | mov PC, [BASE-8] // Fallback may overwrite PC.
2147 | mov SAVE_PC, PC // Redundant (but a defined value).
2148 | mov L:RB->base, BASE
2149 | lea RD, [BASE+NARGS:RD*8-8]
2150 | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler.
2151 | mov L:RB->top, RD
2152 | mov CFUNC:RD, [BASE-16]
2153 | cleartp CFUNC:RD
2154 | cmp RA, L:RB->maxstack
2155 | ja >5 // Need to grow stack.
2156 | mov CARG1, L:RB
2157 | call aword CFUNC:RD->f // (lua_State *L)
2158 | mov BASE, L:RB->base
2159 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
2160 | test RDd, RDd; jg ->fff_res // Returned nresults+1?
2161 |1:
2162 | mov RA, L:RB->top
2163 | sub RA, BASE
2164 | shr RAd, 3
2165 | test RDd, RDd
2166 | lea NARGS:RDd, [RAd+1]
2167 | mov LFUNC:RB, [BASE-16]
2168 | jne ->vm_call_tail // Returned -1?
2169 | cleartp LFUNC:RB
2170 | ins_callt // Returned 0: retry fast path.
2171 |
2172 |// Reconstruct previous base for vmeta_call during tailcall.
2173 |->vm_call_tail:
2174 | mov RA, BASE
2175 | test PCd, FRAME_TYPE
2176 | jnz >3
2177 | movzx RBd, PC_RA
2178 | neg RB
2179 | lea BASE, [BASE+RB*8-16] // base = base - (RB+2)*8
2180 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2181 |3:
2182 | mov RB, PC
2183 | and RB, -8
2184 | sub BASE, RB
2185 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2186 |
2187 |5: // Grow stack for fallback handler.
2188 | mov CARG2d, LUA_MINSTACK
2189 | mov CARG1, L:RB
2190 | call extern lj_state_growstack // (lua_State *L, int n)
2191 | mov BASE, L:RB->base
2192 | xor RDd, RDd // Simulate a return 0.
2193 | jmp <1 // Dumb retry (goes through ff first).
2194 |
2195 |->fff_gcstep: // Call GC step function.
2196 | // BASE = new base, RD = nargs+1
2197 | pop RB // Must keep stack at same level.
2198 | mov TMP1, RB // Save return address
2199 | mov L:RB, SAVE_L
2200 | mov SAVE_PC, PC // Redundant (but a defined value).
2201 | mov L:RB->base, BASE
2202 | lea RD, [BASE+NARGS:RD*8-8]
2203 | mov CARG1, L:RB
2204 | mov L:RB->top, RD
2205 | call extern lj_gc_step // (lua_State *L)
2206 | mov BASE, L:RB->base
2207 | mov RD, L:RB->top
2208 | sub RD, BASE
2209 | shr RDd, 3
2210 | add NARGS:RDd, 1
2211 | mov RB, TMP1
2212 | push RB // Restore return address.
2213 | ret
2214 |
2215 |//-----------------------------------------------------------------------
2216 |//-- Special dispatch targets -------------------------------------------
2217 |//-----------------------------------------------------------------------
2218 |
2219 |->vm_record: // Dispatch target for recording phase.
2220 |.if JIT
2221 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2222 | test RDL, HOOK_VMEVENT // No recording while in vmevent.
2223 | jnz >5
2224 | // Decrement the hookcount for consistency, but always do the call.
2225 | test RDL, HOOK_ACTIVE
2226 | jnz >1
2227 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2228 | jz >1
2229 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2230 | jmp >1
2231 |.endif
2232 |
2233 |->vm_rethook: // Dispatch target for return hooks.
2234 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2235 | test RDL, HOOK_ACTIVE // Hook already active?
2236 | jnz >5
2237 | jmp >1
2238 |
2239 |->vm_inshook: // Dispatch target for instr/line hooks.
2240 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2241 | test RDL, HOOK_ACTIVE // Hook already active?
2242 | jnz >5
2243 |
2244 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2245 | jz >5
2246 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2247 | jz >1
2248 | test RDL, LUA_MASKLINE
2249 | jz >5
2250 |1:
2251 | mov L:RB, SAVE_L
2252 | mov L:RB->base, BASE
2253 | mov CARG2, PC // Caveat: CARG2 == BASE
2254 | mov CARG1, L:RB
2255 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2256 | call extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
2257 |3:
2258 | mov BASE, L:RB->base
2259 |4:
2260 | movzx RAd, PC_RA
2261 |5:
2262 | movzx OP, PC_OP
2263 | movzx RDd, PC_RD
2264 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
2265 |
2266 |->cont_hook: // Continue from hook yield.
2267 | add PC, 4
2268 | mov RA, [RB-40]
2269 | mov MULTRES, RAd // Restore MULTRES for *M ins.
2270 | jmp <4
2271 |
2272 |->vm_hotloop: // Hot loop counter underflow.
2273 |.if JIT
2274 | mov LFUNC:RB, [BASE-16] // Same as curr_topL(L).
2275 | cleartp LFUNC:RB
2276 | mov RB, LFUNC:RB->pc
2277 | movzx RDd, byte [RB+PC2PROTO(framesize)]
2278 | lea RD, [BASE+RD*8]
2279 | mov L:RB, SAVE_L
2280 | mov L:RB->base, BASE
2281 | mov L:RB->top, RD
2282 | mov CARG2, PC
2283 | lea CARG1, [DISPATCH+GG_DISP2J]
2284 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2285 | mov SAVE_PC, PC
2286 | call extern lj_trace_hot // (jit_State *J, const BCIns *pc)
2287 | jmp <3
2288 |.endif
2289 |
2290 |->vm_callhook: // Dispatch target for call hooks.
2291 | mov SAVE_PC, PC
2292 |.if JIT
2293 | jmp >1
2294 |.endif
2295 |
2296 |->vm_hotcall: // Hot call counter underflow.
2297 |.if JIT
2298 | mov SAVE_PC, PC
2299 | or PC, 1 // Marker for hot call.
2300 |1:
2301 |.endif
2302 | lea RD, [BASE+NARGS:RD*8-8]
2303 | mov L:RB, SAVE_L
2304 | mov L:RB->base, BASE
2305 | mov L:RB->top, RD
2306 | mov CARG2, PC
2307 | mov CARG1, L:RB
2308 | call extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
2309 | // ASMFunction returned in eax/rax (RD).
2310 | mov SAVE_PC, 0 // Invalidate for subsequent line hook.
2311 |.if JIT
2312 | and PC, -2
2313 |.endif
2314 | mov BASE, L:RB->base
2315 | mov RA, RD
2316 | mov RD, L:RB->top
2317 | sub RD, BASE
2318 | mov RB, RA
2319 | movzx RAd, PC_RA
2320 | shr RDd, 3
2321 | add NARGS:RDd, 1
2322 | jmp RB
2323 |
2324 |->cont_stitch: // Trace stitching.
2325 |.if JIT
2326 | // BASE = base, RC = result, RB = mbase
2327 | mov TRACE:ITYPE, [RB-40] // Save previous trace.
2328 | cleartp TRACE:ITYPE
2329 | mov TMPRd, MULTRES
2330 | movzx RAd, PC_RA
2331 | lea RA, [BASE+RA*8] // Call base.
2332 | sub TMPRd, 1
2333 | jz >2
2334 |1: // Move results down.
2335 | mov RB, [RC]
2336 | mov [RA], RB
2337 | add RC, 8
2338 | add RA, 8
2339 | sub TMPRd, 1
2340 | jnz <1
2341 |2:
2342 | movzx RCd, PC_RA
2343 | movzx RBd, PC_RB
2344 | add RC, RB
2345 | lea RC, [BASE+RC*8-8]
2346 |3:
2347 | cmp RC, RA
2348 | ja >9 // More results wanted?
2349 |
2350 | test TRACE:ITYPE, TRACE:ITYPE
2351 | jz ->cont_nop
2352 | movzx RBd, word TRACE:ITYPE->traceno
2353 | movzx RDd, word TRACE:ITYPE->link
2354 | cmp RDd, RBd
2355 | je ->cont_nop // Blacklisted.
2356 | test RDd, RDd
2357 | jne =>BC_JLOOP // Jump to stitched trace.
2358 |
2359 | // Stitch a new trace to the previous trace.
2360 | mov [DISPATCH+DISPATCH_J(exitno)], RB
2361 | mov L:RB, SAVE_L
2362 | mov L:RB->base, BASE
2363 | mov CARG2, PC
2364 | lea CARG1, [DISPATCH+GG_DISP2J]
2365 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2366 | call extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2367 | mov BASE, L:RB->base
2368 | jmp ->cont_nop
2369 |
2370 |9: // Fill up results with nil.
2371 | mov aword [RA], LJ_TNIL
2372 | add RA, 8
2373 | jmp <3
2374 |.endif
2375 |
2376 |->vm_profhook: // Dispatch target for profiler hook.
2377#if LJ_HASPROFILE
2378 | mov L:RB, SAVE_L
2379 | mov L:RB->base, BASE
2380 | mov CARG2, PC // Caveat: CARG2 == BASE
2381 | mov CARG1, L:RB
2382 | call extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2383 | mov BASE, L:RB->base
2384 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2385 | sub PC, 4
2386 | jmp ->cont_nop
2387#endif
2388 |
2389 |//-----------------------------------------------------------------------
2390 |//-- Trace exit handler -------------------------------------------------
2391 |//-----------------------------------------------------------------------
2392 |
2393 |// Called from an exit stub with the exit number on the stack.
2394 |// The 16 bit exit number is stored with two (sign-extended) push imm8.
2395 |->vm_exit_handler:
2396 |.if JIT
2397 | push r13; push r12
2398 | push r11; push r10; push r9; push r8
2399 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
2400 | push rbx; push rdx; push rcx; push rax
2401 | movzx RCd, byte [rbp-8] // Reconstruct exit number.
2402 | mov RCH, byte [rbp-16]
2403 | mov [rbp-8], r15; mov [rbp-16], r14
2404 | // DISPATCH is preserved on-trace in LJ_GC64 mode.
2405 | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
2406 | set_vmstate EXIT
2407 | mov [DISPATCH+DISPATCH_J(exitno)], RCd
2408 | mov [DISPATCH+DISPATCH_J(parent)], RAd
2409 |.if X64WIN
2410 | sub rsp, 16*8+4*8 // Room for SSE regs + save area.
2411 |.else
2412 | sub rsp, 16*8 // Room for SSE regs.
2413 |.endif
2414 | add rbp, -128
2415 | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14
2416 | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12
2417 | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10
2418 | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8
2419 | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6
2420 | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4
2421 | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2
2422 | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0
2423 | // Caveat: RB is rbp.
2424 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2425 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2426 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2427 | mov L:RB->base, BASE
2428 |.if X64WIN
2429 | lea CARG2, [rsp+4*8]
2430 |.else
2431 | mov CARG2, rsp
2432 |.endif
2433 | lea CARG1, [DISPATCH+GG_DISP2J]
2434 | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
2435 | call extern lj_trace_exit // (jit_State *J, ExitState *ex)
2436 | // MULTRES or negated error code returned in eax (RD).
2437 | mov RA, L:RB->cframe
2438 | and RA, CFRAME_RAWMASK
2439 | mov [RA+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield).
2440 | mov BASE, L:RB->base
2441 | mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC.
2442 | jmp >1
2443 |.endif
2444 |->vm_exit_interp:
2445 | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
2446 |.if JIT
2447 | // Restore additional callee-save registers only used in compiled code.
2448 |.if X64WIN
2449 | lea RA, [rsp+10*16+4*8]
2450 |1:
2451 | movdqa xmm15, [RA-10*16]
2452 | movdqa xmm14, [RA-9*16]
2453 | movdqa xmm13, [RA-8*16]
2454 | movdqa xmm12, [RA-7*16]
2455 | movdqa xmm11, [RA-6*16]
2456 | movdqa xmm10, [RA-5*16]
2457 | movdqa xmm9, [RA-4*16]
2458 | movdqa xmm8, [RA-3*16]
2459 | movdqa xmm7, [RA-2*16]
2460 | mov rsp, RA // Reposition stack to C frame.
2461 | movdqa xmm6, [RA-1*16]
2462 | mov r15, CSAVE_1
2463 | mov r14, CSAVE_2
2464 | mov r13, CSAVE_3
2465 | mov r12, CSAVE_4
2466 |.else
2467 | lea RA, [rsp+16]
2468 |1:
2469 | mov r13, [RA-8]
2470 | mov r12, [RA]
2471 | mov rsp, RA // Reposition stack to C frame.
2472 |.endif
2473 | test RDd, RDd; js >9 // Check for error from exit.
2474 | mov L:RB, SAVE_L
2475 | mov MULTRES, RDd
2476 | mov LFUNC:KBASE, [BASE-16]
2477 | cleartp LFUNC:KBASE
2478 | mov KBASE, LFUNC:KBASE->pc
2479 | mov KBASE, [KBASE+PC2PROTO(k)]
2480 | mov L:RB->base, BASE
2481 | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
2482 | set_vmstate INTERP
2483 | // Modified copy of ins_next which handles function header dispatch, too.
2484 | mov RCd, [PC]
2485 | movzx RAd, RCH
2486 | movzx OP, RCL
2487 | add PC, 4
2488 | shr RCd, 16
2489 | cmp OP, BC_FUNCF // Function header?
2490 | jb >3
2491 | cmp OP, BC_FUNCC+2 // Fast function?
2492 | jae >4
2493 |2:
2494 | mov RCd, MULTRES // RC/RD holds nres+1.
2495 |3:
2496 | jmp aword [DISPATCH+OP*8]
2497 |
2498 |4: // Check frame below fast function.
2499 | mov RC, [BASE-8]
2500 | test RCd, FRAME_TYPE
2501 | jnz <2 // Trace stitching continuation?
2502 | // Otherwise set KBASE for Lua function below fast function.
2503 | movzx RCd, byte [RC-3]
2504 | neg RC
2505 | mov LFUNC:KBASE, [BASE+RC*8-32]
2506 | cleartp LFUNC:KBASE
2507 | mov KBASE, LFUNC:KBASE->pc
2508 | mov KBASE, [KBASE+PC2PROTO(k)]
2509 | jmp <2
2510 |
2511 |9: // Rethrow error from the right C frame.
2512 | mov CARG2d, RDd
2513 | mov CARG1, L:RB
2514 | neg CARG2d
2515 | call extern lj_err_trace // (lua_State *L, int errcode)
2516 |.endif
2517 |
2518 |//-----------------------------------------------------------------------
2519 |//-- Math helper functions ----------------------------------------------
2520 |//-----------------------------------------------------------------------
2521 |
2522 |// FP value rounding. Called by math.floor/math.ceil fast functions
2523 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
2524 |.macro vm_round, name, mode, cond
2525 |->name:
2526 |->name .. _sse:
2527 | sseconst_abs xmm2, RD
2528 | sseconst_2p52 xmm3, RD
2529 | movaps xmm1, xmm0
2530 | andpd xmm1, xmm2 // |x|
2531 | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|.
2532 | jbe >1
2533 | andnpd xmm2, xmm0 // Isolate sign bit.
2534 |.if mode == 2 // trunc(x)?
2535 | movaps xmm0, xmm1
2536 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2537 | subsd xmm1, xmm3
2538 | sseconst_1 xmm3, RD
2539 | cmpsd xmm0, xmm1, 1 // |x| < result?
2540 | andpd xmm0, xmm3
2541 | subsd xmm1, xmm0 // If yes, subtract -1.
2542 | orpd xmm1, xmm2 // Merge sign bit back in.
2543 |.else
2544 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2545 | subsd xmm1, xmm3
2546 | orpd xmm1, xmm2 // Merge sign bit back in.
2547 | .if mode == 1 // ceil(x)?
2548 | sseconst_m1 xmm2, RD // Must subtract -1 to preserve -0.
2549 | cmpsd xmm0, xmm1, 6 // x > result?
2550 | .else // floor(x)?
2551 | sseconst_1 xmm2, RD
2552 | cmpsd xmm0, xmm1, 1 // x < result?
2553 | .endif
2554 | andpd xmm0, xmm2
2555 | subsd xmm1, xmm0 // If yes, subtract +-1.
2556 |.endif
2557 | movaps xmm0, xmm1
2558 |1:
2559 | ret
2560 |.endmacro
2561 |
2562 | vm_round vm_floor, 0, 1
2563 | vm_round vm_ceil, 1, JIT
2564 | vm_round vm_trunc, 2, JIT
2565 |
2566 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
2567 |->vm_mod:
2568 |// Args in xmm0/xmm1, return value in xmm0.
2569 |// Caveat: xmm0-xmm5 and RC (eax) modified!
2570 | movaps xmm5, xmm0
2571 | divsd xmm0, xmm1
2572 | sseconst_abs xmm2, RD
2573 | sseconst_2p52 xmm3, RD
2574 | movaps xmm4, xmm0
2575 | andpd xmm4, xmm2 // |x/y|
2576 | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
2577 | jbe >1
2578 | andnpd xmm2, xmm0 // Isolate sign bit.
2579 | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
2580 | subsd xmm4, xmm3
2581 | orpd xmm4, xmm2 // Merge sign bit back in.
2582 | sseconst_1 xmm2, RD
2583 | cmpsd xmm0, xmm4, 1 // x/y < result?
2584 | andpd xmm0, xmm2
2585 | subsd xmm4, xmm0 // If yes, subtract 1.0.
2586 | movaps xmm0, xmm5
2587 | mulsd xmm1, xmm4
2588 | subsd xmm0, xmm1
2589 | ret
2590 |1:
2591 | mulsd xmm1, xmm0
2592 | movaps xmm0, xmm5
2593 | subsd xmm0, xmm1
2594 | ret
2595 |
2596 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
2597 |->vm_powi_sse:
2598 | cmp eax, 1; jle >6 // i<=1?
2599 | // Now 1 < (unsigned)i <= 0x80000000.
2600 |1: // Handle leading zeros.
2601 | test eax, 1; jnz >2
2602 | mulsd xmm0, xmm0
2603 | shr eax, 1
2604 | jmp <1
2605 |2:
2606 | shr eax, 1; jz >5
2607 | movaps xmm1, xmm0
2608 |3: // Handle trailing bits.
2609 | mulsd xmm0, xmm0
2610 | shr eax, 1; jz >4
2611 | jnc <3
2612 | mulsd xmm1, xmm0
2613 | jmp <3
2614 |4:
2615 | mulsd xmm0, xmm1
2616 |5:
2617 | ret
2618 |6:
2619 | je <5 // x^1 ==> x
2620 | jb >7 // x^0 ==> 1
2621 | neg eax
2622 | call <1
2623 | sseconst_1 xmm1, RD
2624 | divsd xmm1, xmm0
2625 | movaps xmm0, xmm1
2626 | ret
2627 |7:
2628 | sseconst_1 xmm0, RD
2629 | ret
2630 |
2631 |//-----------------------------------------------------------------------
2632 |//-- Miscellaneous functions --------------------------------------------
2633 |//-----------------------------------------------------------------------
2634 |
2635 |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
2636 |->vm_cpuid:
2637 | mov eax, CARG1d
2638 | .if X64WIN; push rsi; mov rsi, CARG2; .endif
2639 | push rbx
2640 | xor ecx, ecx
2641 | cpuid
2642 | mov [rsi], eax
2643 | mov [rsi+4], ebx
2644 | mov [rsi+8], ecx
2645 | mov [rsi+12], edx
2646 | pop rbx
2647 | .if X64WIN; pop rsi; .endif
2648 | ret
2649 |
2650 |//-----------------------------------------------------------------------
2651 |//-- Assertions ---------------------------------------------------------
2652 |//-----------------------------------------------------------------------
2653 |
2654 |->assert_bad_for_arg_type:
2655#ifdef LUA_USE_ASSERT
2656 | int3
2657#endif
2658 | int3
2659 |
2660 |//-----------------------------------------------------------------------
2661 |//-- FFI helper functions -----------------------------------------------
2662 |//-----------------------------------------------------------------------
2663 |
2664 |// Handler for callback functions. Callback slot number in ah/al.
2665 |->vm_ffi_callback:
2666 |.if FFI
2667 |.type CTSTATE, CTState, PC
2668 | saveregs_ // ebp/rbp already saved. ebp now holds global_State *.
2669 | lea DISPATCH, [ebp+GG_G2DISP]
2670 | mov CTSTATE, GL:ebp->ctype_state
2671 | movzx eax, ax
2672 | mov CTSTATE->cb.slot, eax
2673 | mov CTSTATE->cb.gpr[0], CARG1
2674 | mov CTSTATE->cb.gpr[1], CARG2
2675 | mov CTSTATE->cb.gpr[2], CARG3
2676 | mov CTSTATE->cb.gpr[3], CARG4
2677 | movsd qword CTSTATE->cb.fpr[0], xmm0
2678 | movsd qword CTSTATE->cb.fpr[1], xmm1
2679 | movsd qword CTSTATE->cb.fpr[2], xmm2
2680 | movsd qword CTSTATE->cb.fpr[3], xmm3
2681 |.if X64WIN
2682 | lea rax, [rsp+CFRAME_SIZE+4*8]
2683 |.else
2684 | lea rax, [rsp+CFRAME_SIZE]
2685 | mov CTSTATE->cb.gpr[4], CARG5
2686 | mov CTSTATE->cb.gpr[5], CARG6
2687 | movsd qword CTSTATE->cb.fpr[4], xmm4
2688 | movsd qword CTSTATE->cb.fpr[5], xmm5
2689 | movsd qword CTSTATE->cb.fpr[6], xmm6
2690 | movsd qword CTSTATE->cb.fpr[7], xmm7
2691 |.endif
2692 | mov CTSTATE->cb.stack, rax
2693 | mov CARG2, rsp
2694 | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok.
2695 | mov CARG1, CTSTATE
2696 | call extern lj_ccallback_enter // (CTState *cts, void *cf)
2697 | // lua_State * returned in eax (RD).
2698 | set_vmstate INTERP
2699 | mov BASE, L:RD->base
2700 | mov RD, L:RD->top
2701 | sub RD, BASE
2702 | mov LFUNC:RB, [BASE-16]
2703 | cleartp LFUNC:RB
2704 | shr RD, 3
2705 | add RD, 1
2706 | ins_callt
2707 |.endif
2708 |
2709 |->cont_ffi_callback: // Return from FFI callback.
2710 |.if FFI
2711 | mov L:RA, SAVE_L
2712 | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
2713 | mov aword CTSTATE->L, L:RA
2714 | mov L:RA->base, BASE
2715 | mov L:RA->top, RB
2716 | mov CARG1, CTSTATE
2717 | mov CARG2, RC
2718 | call extern lj_ccallback_leave // (CTState *cts, TValue *o)
2719 | mov rax, CTSTATE->cb.gpr[0]
2720 | movsd xmm0, qword CTSTATE->cb.fpr[0]
2721 | jmp ->vm_leave_unw
2722 |.endif
2723 |
2724 |->vm_ffi_call: // Call C function via FFI.
2725 | // Caveat: needs special frame unwinding, see below.
2726 |.if FFI
2727 | .type CCSTATE, CCallState, rbx
2728 | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
2729 |
2730 | // Readjust stack.
2731 | mov eax, CCSTATE->spadj
2732 | sub rsp, rax
2733 |
2734 | // Copy stack slots.
2735 | movzx ecx, byte CCSTATE->nsp
2736 | sub ecx, 1
2737 | js >2
2738 |1:
2739 | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
2740 | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
2741 | sub ecx, 1
2742 | jns <1
2743 |2:
2744 |
2745 | movzx eax, byte CCSTATE->nfpr
2746 | mov CARG1, CCSTATE->gpr[0]
2747 | mov CARG2, CCSTATE->gpr[1]
2748 | mov CARG3, CCSTATE->gpr[2]
2749 | mov CARG4, CCSTATE->gpr[3]
2750 |.if not X64WIN
2751 | mov CARG5, CCSTATE->gpr[4]
2752 | mov CARG6, CCSTATE->gpr[5]
2753 |.endif
2754 | test eax, eax; jz >5
2755 | movaps xmm0, CCSTATE->fpr[0]
2756 | movaps xmm1, CCSTATE->fpr[1]
2757 | movaps xmm2, CCSTATE->fpr[2]
2758 | movaps xmm3, CCSTATE->fpr[3]
2759 |.if not X64WIN
2760 | cmp eax, 4; jbe >5
2761 | movaps xmm4, CCSTATE->fpr[4]
2762 | movaps xmm5, CCSTATE->fpr[5]
2763 | movaps xmm6, CCSTATE->fpr[6]
2764 | movaps xmm7, CCSTATE->fpr[7]
2765 |.endif
2766 |5:
2767 |
2768 | call aword CCSTATE->func
2769 |
2770 | mov CCSTATE->gpr[0], rax
2771 | movaps CCSTATE->fpr[0], xmm0
2772 |.if not X64WIN
2773 | mov CCSTATE->gpr[1], rdx
2774 | movaps CCSTATE->fpr[1], xmm1
2775 |.endif
2776 |
2777 | mov rbx, [rbp-8]; leave; ret
2778 |.endif
2779 |// Note: vm_ffi_call must be the last function in this object file!
2780 |
2781 |//-----------------------------------------------------------------------
2782}
2783
2784/* Generate the code for a single instruction. */
2785static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2786{
2787 int vk = 0;
2788 |// Note: aligning all instructions does not pay off.
2789 |=>defop:
2790
2791 switch (op) {
2792
2793 /* -- Comparison ops ---------------------------------------------------- */
2794
2795 /* Remember: all ops branch for a true comparison, fall through otherwise. */
2796
2797 |.macro jmp_comp, lt, ge, le, gt, target
2798 ||switch (op) {
2799 ||case BC_ISLT:
2800 | lt target
2801 ||break;
2802 ||case BC_ISGE:
2803 | ge target
2804 ||break;
2805 ||case BC_ISLE:
2806 | le target
2807 ||break;
2808 ||case BC_ISGT:
2809 | gt target
2810 ||break;
2811 ||default: break; /* Shut up GCC. */
2812 ||}
2813 |.endmacro
2814
2815 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2816 | // RA = src1, RD = src2, JMP with RD = target
2817 | ins_AD
2818 | mov ITYPE, [BASE+RA*8]
2819 | mov RB, [BASE+RD*8]
2820 | mov RA, ITYPE
2821 | mov RD, RB
2822 | sar ITYPE, 47
2823 | sar RB, 47
2824 |.if DUALNUM
2825 | cmp ITYPEd, LJ_TISNUM; jne >7
2826 | cmp RBd, LJ_TISNUM; jne >8
2827 | add PC, 4
2828 | cmp RAd, RDd
2829 | jmp_comp jge, jl, jg, jle, >9
2830 |6:
2831 | movzx RDd, PC_RD
2832 | branchPC RD
2833 |9:
2834 | ins_next
2835 |
2836 |7: // RA is not an integer.
2837 | ja ->vmeta_comp
2838 | // RA is a number.
2839 | cmp RBd, LJ_TISNUM; jb >1; jne ->vmeta_comp
2840 | // RA is a number, RD is an integer.
2841 | cvtsi2sd xmm0, RDd
2842 | jmp >2
2843 |
2844 |8: // RA is an integer, RD is not an integer.
2845 | ja ->vmeta_comp
2846 | // RA is an integer, RD is a number.
2847 | cvtsi2sd xmm1, RAd
2848 | movd xmm0, RD
2849 | jmp >3
2850 |.else
2851 | cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp
2852 | cmp RBd, LJ_TISNUM; jae ->vmeta_comp
2853 |.endif
2854 |1:
2855 | movd xmm0, RD
2856 |2:
2857 | movd xmm1, RA
2858 |3:
2859 | add PC, 4
2860 | ucomisd xmm0, xmm1
2861 | // Unordered: all of ZF CF PF set, ordered: PF clear.
2862 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
2863 |.if DUALNUM
2864 | jmp_comp jbe, ja, jb, jae, <9
2865 | jmp <6
2866 |.else
2867 | jmp_comp jbe, ja, jb, jae, >1
2868 | movzx RDd, PC_RD
2869 | branchPC RD
2870 |1:
2871 | ins_next
2872 |.endif
2873 break;
2874
2875 case BC_ISEQV: case BC_ISNEV:
2876 vk = op == BC_ISEQV;
2877 | ins_AD // RA = src1, RD = src2, JMP with RD = target
2878 | mov RB, [BASE+RD*8]
2879 | mov ITYPE, [BASE+RA*8]
2880 | add PC, 4
2881 | mov RD, RB
2882 | mov RA, ITYPE
2883 | sar RB, 47
2884 | sar ITYPE, 47
2885 |.if DUALNUM
2886 | cmp RBd, LJ_TISNUM; jne >7
2887 | cmp ITYPEd, LJ_TISNUM; jne >8
2888 | cmp RDd, RAd
2889 if (vk) {
2890 | jne >9
2891 } else {
2892 | je >9
2893 }
2894 | movzx RDd, PC_RD
2895 | branchPC RD
2896 |9:
2897 | ins_next
2898 |
2899 |7: // RD is not an integer.
2900 | ja >5
2901 | // RD is a number.
2902 | movd xmm1, RD
2903 | cmp ITYPEd, LJ_TISNUM; jb >1; jne >5
2904 | // RD is a number, RA is an integer.
2905 | cvtsi2sd xmm0, RAd
2906 | jmp >2
2907 |
2908 |8: // RD is an integer, RA is not an integer.
2909 | ja >5
2910 | // RD is an integer, RA is a number.
2911 | cvtsi2sd xmm1, RDd
2912 | jmp >1
2913 |
2914 |.else
2915 | cmp RBd, LJ_TISNUM; jae >5
2916 | cmp ITYPEd, LJ_TISNUM; jae >5
2917 | movd xmm1, RD
2918 |.endif
2919 |1:
2920 | movd xmm0, RA
2921 |2:
2922 | ucomisd xmm0, xmm1
2923 |4:
2924 iseqne_fp:
2925 if (vk) {
2926 | jp >2 // Unordered means not equal.
2927 | jne >2
2928 } else {
2929 | jp >2 // Unordered means not equal.
2930 | je >1
2931 }
2932 iseqne_end:
2933 if (vk) {
2934 |1: // EQ: Branch to the target.
2935 | movzx RDd, PC_RD
2936 | branchPC RD
2937 |2: // NE: Fallthrough to next instruction.
2938 |.if not FFI
2939 |3:
2940 |.endif
2941 } else {
2942 |.if not FFI
2943 |3:
2944 |.endif
2945 |2: // NE: Branch to the target.
2946 | movzx RDd, PC_RD
2947 | branchPC RD
2948 |1: // EQ: Fallthrough to next instruction.
2949 }
2950 if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
2951 op == BC_ISEQN || op == BC_ISNEN)) {
2952 | jmp <9
2953 } else {
2954 | ins_next
2955 }
2956 |
2957 if (op == BC_ISEQV || op == BC_ISNEV) {
2958 |5: // Either or both types are not numbers.
2959 |.if FFI
2960 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
2961 | cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd
2962 |.endif
2963 | cmp RA, RD
2964 | je <1 // Same GCobjs or pvalues?
2965 | cmp RBd, ITYPEd
2966 | jne <2 // Not the same type?
2967 | cmp RBd, LJ_TISTABUD
2968 | ja <2 // Different objects and not table/ud?
2969 |
2970 | // Different tables or userdatas. Need to check __eq metamethod.
2971 | // Field metatable must be at same offset for GCtab and GCudata!
2972 | cleartp TAB:RA
2973 | mov TAB:RB, TAB:RA->metatable
2974 | test TAB:RB, TAB:RB
2975 | jz <2 // No metatable?
2976 | test byte TAB:RB->nomm, 1<<MM_eq
2977 | jnz <2 // Or 'no __eq' flag set?
2978 if (vk) {
2979 | xor RBd, RBd // ne = 0
2980 } else {
2981 | mov RBd, 1 // ne = 1
2982 }
2983 | jmp ->vmeta_equal // Handle __eq metamethod.
2984 } else {
2985 |.if FFI
2986 |3:
2987 | cmp ITYPEd, LJ_TCDATA
2988 if (LJ_DUALNUM && vk) {
2989 | jne <9
2990 } else {
2991 | jne <2
2992 }
2993 | jmp ->vmeta_equal_cd
2994 |.endif
2995 }
2996 break;
2997 case BC_ISEQS: case BC_ISNES:
2998 vk = op == BC_ISEQS;
2999 | ins_AND // RA = src, RD = str const, JMP with RD = target
3000 | mov RB, [BASE+RA*8]
3001 | add PC, 4
3002 | checkstr RB, >3
3003 | cmp RB, [KBASE+RD*8]
3004 iseqne_test:
3005 if (vk) {
3006 | jne >2
3007 } else {
3008 | je >1
3009 }
3010 goto iseqne_end;
3011 case BC_ISEQN: case BC_ISNEN:
3012 vk = op == BC_ISEQN;
3013 | ins_AD // RA = src, RD = num const, JMP with RD = target
3014 | mov RB, [BASE+RA*8]
3015 | add PC, 4
3016 |.if DUALNUM
3017 | checkint RB, >7
3018 | mov RD, [KBASE+RD*8]
3019 | checkint RD, >8
3020 | cmp RBd, RDd
3021 if (vk) {
3022 | jne >9
3023 } else {
3024 | je >9
3025 }
3026 | movzx RDd, PC_RD
3027 | branchPC RD
3028 |9:
3029 | ins_next
3030 |
3031 |7: // RA is not an integer.
3032 | ja >3
3033 | // RA is a number.
3034 | mov RD, [KBASE+RD*8]
3035 | checkint RD, >1
3036 | // RA is a number, RD is an integer.
3037 | cvtsi2sd xmm0, RDd
3038 | jmp >2
3039 |
3040 |8: // RA is an integer, RD is a number.
3041 | cvtsi2sd xmm0, RBd
3042 | movd xmm1, RD
3043 | ucomisd xmm0, xmm1
3044 | jmp >4
3045 |1:
3046 | movd xmm0, RD
3047 |.else
3048 | checknum RB, >3
3049 |1:
3050 | movsd xmm0, qword [KBASE+RD*8]
3051 |.endif
3052 |2:
3053 | ucomisd xmm0, qword [BASE+RA*8]
3054 |4:
3055 goto iseqne_fp;
3056 case BC_ISEQP: case BC_ISNEP:
3057 vk = op == BC_ISEQP;
3058 | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
3059 | mov RB, [BASE+RA*8]
3060 | sar RB, 47
3061 | add PC, 4
3062 | cmp RBd, RDd
3063 if (!LJ_HASFFI) goto iseqne_test;
3064 if (vk) {
3065 | jne >3
3066 | movzx RDd, PC_RD
3067 | branchPC RD
3068 |2:
3069 | ins_next
3070 |3:
3071 | cmp RBd, LJ_TCDATA; jne <2
3072 | jmp ->vmeta_equal_cd
3073 } else {
3074 | je >2
3075 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
3076 | movzx RDd, PC_RD
3077 | branchPC RD
3078 |2:
3079 | ins_next
3080 }
3081 break;
3082
3083 /* -- Unary test and copy ops ------------------------------------------- */
3084
3085 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
3086 | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
3087 | mov ITYPE, [BASE+RD*8]
3088 | add PC, 4
3089 if (op == BC_ISTC || op == BC_ISFC) {
3090 | mov RB, ITYPE
3091 }
3092 | sar ITYPE, 47
3093 | cmp ITYPEd, LJ_TISTRUECOND
3094 if (op == BC_IST || op == BC_ISTC) {
3095 | jae >1
3096 } else {
3097 | jb >1
3098 }
3099 if (op == BC_ISTC || op == BC_ISFC) {
3100 | mov [BASE+RA*8], RB
3101 }
3102 | movzx RDd, PC_RD
3103 | branchPC RD
3104 |1: // Fallthrough to the next instruction.
3105 | ins_next
3106 break;
3107
3108 case BC_ISTYPE:
3109 | ins_AD // RA = src, RD = -type
3110 | mov RB, [BASE+RA*8]
3111 | sar RB, 47
3112 | add RBd, RDd
3113 | jne ->vmeta_istype
3114 | ins_next
3115 break;
3116 case BC_ISNUM:
3117 | ins_AD // RA = src, RD = -(TISNUM-1)
3118 | checknumtp [BASE+RA*8], ->vmeta_istype
3119 | ins_next
3120 break;
3121
3122 /* -- Unary ops --------------------------------------------------------- */
3123
3124 case BC_MOV:
3125 | ins_AD // RA = dst, RD = src
3126 | mov RB, [BASE+RD*8]
3127 | mov [BASE+RA*8], RB
3128 | ins_next_
3129 break;
3130 case BC_NOT:
3131 | ins_AD // RA = dst, RD = src
3132 | mov RB, [BASE+RD*8]
3133 | sar RB, 47
3134 | mov RCd, 2
3135 | cmp RB, LJ_TISTRUECOND
3136 | sbb RCd, 0
3137 | shl RC, 47
3138 | not RC
3139 | mov [BASE+RA*8], RC
3140 | ins_next
3141 break;
3142 case BC_UNM:
3143 | ins_AD // RA = dst, RD = src
3144 | mov RB, [BASE+RD*8]
3145 |.if DUALNUM
3146 | checkint RB, >5
3147 | neg RBd
3148 | jo >4
3149 | setint RB
3150 |9:
3151 | mov [BASE+RA*8], RB
3152 | ins_next
3153 |4:
3154 | mov64 RB, U64x(41e00000,00000000) // 2^31.
3155 | jmp <9
3156 |5:
3157 | ja ->vmeta_unm
3158 |.else
3159 | checknum RB, ->vmeta_unm
3160 |.endif
3161 | mov64 RD, U64x(80000000,00000000)
3162 | xor RB, RD
3163 |.if DUALNUM
3164 | jmp <9
3165 |.else
3166 | mov [BASE+RA*8], RB
3167 | ins_next
3168 |.endif
3169 break;
3170 case BC_LEN:
3171 | ins_AD // RA = dst, RD = src
3172 | mov RD, [BASE+RD*8]
3173 | checkstr RD, >2
3174 |.if DUALNUM
3175 | mov RDd, dword STR:RD->len
3176 |1:
3177 | setint RD
3178 | mov [BASE+RA*8], RD
3179 |.else
3180 | xorps xmm0, xmm0
3181 | cvtsi2sd xmm0, dword STR:RD->len
3182 |1:
3183 | movsd qword [BASE+RA*8], xmm0
3184 |.endif
3185 | ins_next
3186 |2:
3187 | cmp ITYPEd, LJ_TTAB; jne ->vmeta_len
3188 | mov TAB:CARG1, TAB:RD
3189#if LJ_52
3190 | mov TAB:RB, TAB:RD->metatable
3191 | cmp TAB:RB, 0
3192 | jnz >9
3193 |3:
3194#endif
3195 |->BC_LEN_Z:
3196 | mov RB, BASE // Save BASE.
3197 | call extern lj_tab_len // (GCtab *t)
3198 | // Length of table returned in eax (RD).
3199 |.if DUALNUM
3200 | // Nothing to do.
3201 |.else
3202 | cvtsi2sd xmm0, RDd
3203 |.endif
3204 | mov BASE, RB // Restore BASE.
3205 | movzx RAd, PC_RA
3206 | jmp <1
3207#if LJ_52
3208 |9: // Check for __len.
3209 | test byte TAB:RB->nomm, 1<<MM_len
3210 | jnz <3
3211 | jmp ->vmeta_len // 'no __len' flag NOT set: check.
3212#endif
3213 break;
3214
3215 /* -- Binary ops -------------------------------------------------------- */
3216
3217 |.macro ins_arithpre, sseins, ssereg
3218 | ins_ABC
3219 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3220 ||switch (vk) {
3221 ||case 0:
3222 | checknumtp [BASE+RB*8], ->vmeta_arith_vn
3223 | .if DUALNUM
3224 | checknumtp [KBASE+RC*8], ->vmeta_arith_vn
3225 | .endif
3226 | movsd xmm0, qword [BASE+RB*8]
3227 | sseins ssereg, qword [KBASE+RC*8]
3228 || break;
3229 ||case 1:
3230 | checknumtp [BASE+RB*8], ->vmeta_arith_nv
3231 | .if DUALNUM
3232 | checknumtp [KBASE+RC*8], ->vmeta_arith_nv
3233 | .endif
3234 | movsd xmm0, qword [KBASE+RC*8]
3235 | sseins ssereg, qword [BASE+RB*8]
3236 || break;
3237 ||default:
3238 | checknumtp [BASE+RB*8], ->vmeta_arith_vv
3239 | checknumtp [BASE+RC*8], ->vmeta_arith_vv
3240 | movsd xmm0, qword [BASE+RB*8]
3241 | sseins ssereg, qword [BASE+RC*8]
3242 || break;
3243 ||}
3244 |.endmacro
3245 |
3246 |.macro ins_arithdn, intins
3247 | ins_ABC
3248 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3249 ||switch (vk) {
3250 ||case 0:
3251 | mov RB, [BASE+RB*8]
3252 | mov RC, [KBASE+RC*8]
3253 | checkint RB, ->vmeta_arith_vno
3254 | checkint RC, ->vmeta_arith_vno
3255 | intins RBd, RCd; jo ->vmeta_arith_vno
3256 || break;
3257 ||case 1:
3258 | mov RB, [BASE+RB*8]
3259 | mov RC, [KBASE+RC*8]
3260 | checkint RB, ->vmeta_arith_nvo
3261 | checkint RC, ->vmeta_arith_nvo
3262 | intins RCd, RBd; jo ->vmeta_arith_nvo
3263 || break;
3264 ||default:
3265 | mov RB, [BASE+RB*8]
3266 | mov RC, [BASE+RC*8]
3267 | checkint RB, ->vmeta_arith_vvo
3268 | checkint RC, ->vmeta_arith_vvo
3269 | intins RBd, RCd; jo ->vmeta_arith_vvo
3270 || break;
3271 ||}
3272 ||if (vk == 1) {
3273 | setint RC
3274 | mov [BASE+RA*8], RC
3275 ||} else {
3276 | setint RB
3277 | mov [BASE+RA*8], RB
3278 ||}
3279 | ins_next
3280 |.endmacro
3281 |
3282 |.macro ins_arithpost
3283 | movsd qword [BASE+RA*8], xmm0
3284 |.endmacro
3285 |
3286 |.macro ins_arith, sseins
3287 | ins_arithpre sseins, xmm0
3288 | ins_arithpost
3289 | ins_next
3290 |.endmacro
3291 |
3292 |.macro ins_arith, intins, sseins
3293 |.if DUALNUM
3294 | ins_arithdn intins
3295 |.else
3296 | ins_arith, sseins
3297 |.endif
3298 |.endmacro
3299
3300 | // RA = dst, RB = src1 or num const, RC = src2 or num const
3301 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
3302 | ins_arith add, addsd
3303 break;
3304 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
3305 | ins_arith sub, subsd
3306 break;
3307 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3308 | ins_arith imul, mulsd
3309 break;
3310 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
3311 | ins_arith divsd
3312 break;
3313 case BC_MODVN:
3314 | ins_arithpre movsd, xmm1
3315 |->BC_MODVN_Z:
3316 | call ->vm_mod
3317 | ins_arithpost
3318 | ins_next
3319 break;
3320 case BC_MODNV: case BC_MODVV:
3321 | ins_arithpre movsd, xmm1
3322 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3323 break;
3324 case BC_POW:
3325 | ins_arithpre movsd, xmm1
3326 | mov RB, BASE
3327 | call extern pow
3328 | movzx RAd, PC_RA
3329 | mov BASE, RB
3330 | ins_arithpost
3331 | ins_next
3332 break;
3333
3334 case BC_CAT:
3335 | ins_ABC // RA = dst, RB = src_start, RC = src_end
3336 | mov L:CARG1, SAVE_L
3337 | mov L:CARG1->base, BASE
3338 | lea CARG2, [BASE+RC*8]
3339 | mov CARG3d, RCd
3340 | sub CARG3d, RBd
3341 |->BC_CAT_Z:
3342 | mov L:RB, L:CARG1
3343 | mov SAVE_PC, PC
3344 | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
3345 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
3346 | mov BASE, L:RB->base
3347 | test RC, RC
3348 | jnz ->vmeta_binop
3349 | movzx RBd, PC_RB // Copy result to Stk[RA] from Stk[RB].
3350 | movzx RAd, PC_RA
3351 | mov RC, [BASE+RB*8]
3352 | mov [BASE+RA*8], RC
3353 | ins_next
3354 break;
3355
3356 /* -- Constant ops ------------------------------------------------------ */
3357
3358 case BC_KSTR:
3359 | ins_AND // RA = dst, RD = str const (~)
3360 | mov RD, [KBASE+RD*8]
3361 | settp RD, LJ_TSTR
3362 | mov [BASE+RA*8], RD
3363 | ins_next
3364 break;
3365 case BC_KCDATA:
3366 |.if FFI
3367 | ins_AND // RA = dst, RD = cdata const (~)
3368 | mov RD, [KBASE+RD*8]
3369 | settp RD, LJ_TCDATA
3370 | mov [BASE+RA*8], RD
3371 | ins_next
3372 |.endif
3373 break;
3374 case BC_KSHORT:
3375 | ins_AD // RA = dst, RD = signed int16 literal
3376 |.if DUALNUM
3377 | movsx RDd, RDW
3378 | setint RD
3379 | mov [BASE+RA*8], RD
3380 |.else
3381 | movsx RDd, RDW // Sign-extend literal.
3382 | cvtsi2sd xmm0, RDd
3383 | movsd qword [BASE+RA*8], xmm0
3384 |.endif
3385 | ins_next
3386 break;
3387 case BC_KNUM:
3388 | ins_AD // RA = dst, RD = num const
3389 | movsd xmm0, qword [KBASE+RD*8]
3390 | movsd qword [BASE+RA*8], xmm0
3391 | ins_next
3392 break;
3393 case BC_KPRI:
3394 | ins_AD // RA = dst, RD = primitive type (~)
3395 | shl RD, 47
3396 | not RD
3397 | mov [BASE+RA*8], RD
3398 | ins_next
3399 break;
3400 case BC_KNIL:
3401 | ins_AD // RA = dst_start, RD = dst_end
3402 | lea RA, [BASE+RA*8+8]
3403 | lea RD, [BASE+RD*8]
3404 | mov RB, LJ_TNIL
3405 | mov [RA-8], RB // Sets minimum 2 slots.
3406 |1:
3407 | mov [RA], RB
3408 | add RA, 8
3409 | cmp RA, RD
3410 | jbe <1
3411 | ins_next
3412 break;
3413
3414 /* -- Upvalue and function ops ------------------------------------------ */
3415
3416 case BC_UGET:
3417 | ins_AD // RA = dst, RD = upvalue #
3418 | mov LFUNC:RB, [BASE-16]
3419 | cleartp LFUNC:RB
3420 | mov UPVAL:RB, [LFUNC:RB+RD*8+offsetof(GCfuncL, uvptr)]
3421 | mov RB, UPVAL:RB->v
3422 | mov RD, [RB]
3423 | mov [BASE+RA*8], RD
3424 | ins_next
3425 break;
3426 case BC_USETV:
3427#define TV2MARKOFS \
3428 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
3429 | ins_AD // RA = upvalue #, RD = src
3430 | mov LFUNC:RB, [BASE-16]
3431 | cleartp LFUNC:RB
3432 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3433 | cmp byte UPVAL:RB->closed, 0
3434 | mov RB, UPVAL:RB->v
3435 | mov RA, [BASE+RD*8]
3436 | mov [RB], RA
3437 | jz >1
3438 | // Check barrier for closed upvalue.
3439 | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
3440 | jnz >2
3441 |1:
3442 | ins_next
3443 |
3444 |2: // Upvalue is black. Check if new value is collectable and white.
3445 | mov RD, RA
3446 | sar RD, 47
3447 | sub RDd, LJ_TISGCV
3448 | cmp RDd, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
3449 | jbe <1
3450 | cleartp GCOBJ:RA
3451 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
3452 | jz <1
3453 | // Crossed a write barrier. Move the barrier forward.
3454 |.if not X64WIN
3455 | mov CARG2, RB
3456 | mov RB, BASE // Save BASE.
3457 |.else
3458 | xchg CARG2, RB // Save BASE (CARG2 == BASE).
3459 |.endif
3460 | lea GL:CARG1, [DISPATCH+GG_DISP2G]
3461 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
3462 | mov BASE, RB // Restore BASE.
3463 | jmp <1
3464 break;
3465#undef TV2MARKOFS
3466 case BC_USETS:
3467 | ins_AND // RA = upvalue #, RD = str const (~)
3468 | mov LFUNC:RB, [BASE-16]
3469 | cleartp LFUNC:RB
3470 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3471 | mov STR:RA, [KBASE+RD*8]
3472 | mov RD, UPVAL:RB->v
3473 | settp STR:ITYPE, STR:RA, LJ_TSTR
3474 | mov [RD], STR:ITYPE
3475 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
3476 | jnz >2
3477 |1:
3478 | ins_next
3479 |
3480 |2: // Check if string is white and ensure upvalue is closed.
3481 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
3482 | jz <1
3483 | cmp byte UPVAL:RB->closed, 0
3484 | jz <1
3485 | // Crossed a write barrier. Move the barrier forward.
3486 | mov RB, BASE // Save BASE (CARG2 == BASE).
3487 | mov CARG2, RD
3488 | lea GL:CARG1, [DISPATCH+GG_DISP2G]
3489 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
3490 | mov BASE, RB // Restore BASE.
3491 | jmp <1
3492 break;
3493 case BC_USETN:
3494 | ins_AD // RA = upvalue #, RD = num const
3495 | mov LFUNC:RB, [BASE-16]
3496 | cleartp LFUNC:RB
3497 | movsd xmm0, qword [KBASE+RD*8]
3498 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3499 | mov RA, UPVAL:RB->v
3500 | movsd qword [RA], xmm0
3501 | ins_next
3502 break;
3503 case BC_USETP:
3504 | ins_AD // RA = upvalue #, RD = primitive type (~)
3505 | mov LFUNC:RB, [BASE-16]
3506 | cleartp LFUNC:RB
3507 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3508 | shl RD, 47
3509 | not RD
3510 | mov RA, UPVAL:RB->v
3511 | mov [RA], RD
3512 | ins_next
3513 break;
3514 case BC_UCLO:
3515 | ins_AD // RA = level, RD = target
3516 | branchPC RD // Do this first to free RD.
3517 | mov L:RB, SAVE_L
3518 | cmp aword L:RB->openupval, 0
3519 | je >1
3520 | mov L:RB->base, BASE
3521 | lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE
3522 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
3523 | call extern lj_func_closeuv // (lua_State *L, TValue *level)
3524 | mov BASE, L:RB->base
3525 |1:
3526 | ins_next
3527 break;
3528
3529 case BC_FNEW:
3530 | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
3531 | mov L:RB, SAVE_L
3532 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
3533 | mov CARG3, [BASE-16]
3534 | cleartp CARG3
3535 | mov CARG2, [KBASE+RD*8] // Fetch GCproto *.
3536 | mov CARG1, L:RB
3537 | mov SAVE_PC, PC
3538 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
3539 | call extern lj_func_newL_gc
3540 | // GCfuncL * returned in eax (RC).
3541 | mov BASE, L:RB->base
3542 | movzx RAd, PC_RA
3543 | settp LFUNC:RC, LJ_TFUNC
3544 | mov [BASE+RA*8], LFUNC:RC
3545 | ins_next
3546 break;
3547
3548 /* -- Table ops --------------------------------------------------------- */
3549
3550 case BC_TNEW:
3551 | ins_AD // RA = dst, RD = hbits|asize
3552 | mov L:RB, SAVE_L
3553 | mov L:RB->base, BASE
3554 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3555 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3556 | mov SAVE_PC, PC
3557 | jae >5
3558 |1:
3559 | mov CARG3d, RDd
3560 | and RDd, 0x7ff
3561 | shr CARG3d, 11
3562 | cmp RDd, 0x7ff
3563 | je >3
3564 |2:
3565 | mov L:CARG1, L:RB
3566 | mov CARG2d, RDd
3567 | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
3568 | // Table * returned in eax (RC).
3569 | mov BASE, L:RB->base
3570 | movzx RAd, PC_RA
3571 | settp TAB:RC, LJ_TTAB
3572 | mov [BASE+RA*8], TAB:RC
3573 | ins_next
3574 |3: // Turn 0x7ff into 0x801.
3575 | mov RDd, 0x801
3576 | jmp <2
3577 |5:
3578 | mov L:CARG1, L:RB
3579 | call extern lj_gc_step_fixtop // (lua_State *L)
3580 | movzx RDd, PC_RD
3581 | jmp <1
3582 break;
3583 case BC_TDUP:
3584 | ins_AND // RA = dst, RD = table const (~) (holding template table)
3585 | mov L:RB, SAVE_L
3586 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3587 | mov SAVE_PC, PC
3588 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3589 | mov L:RB->base, BASE
3590 | jae >3
3591 |2:
3592 | mov TAB:CARG2, [KBASE+RD*8] // Caveat: CARG2 == BASE
3593 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
3594 | call extern lj_tab_dup // (lua_State *L, Table *kt)
3595 | // Table * returned in eax (RC).
3596 | mov BASE, L:RB->base
3597 | movzx RAd, PC_RA
3598 | settp TAB:RC, LJ_TTAB
3599 | mov [BASE+RA*8], TAB:RC
3600 | ins_next
3601 |3:
3602 | mov L:CARG1, L:RB
3603 | call extern lj_gc_step_fixtop // (lua_State *L)
3604 | movzx RDd, PC_RD // Need to reload RD.
3605 | not RD
3606 | jmp <2
3607 break;
3608
3609 case BC_GGET:
3610 | ins_AND // RA = dst, RD = str const (~)
3611 | mov LFUNC:RB, [BASE-16]
3612 | cleartp LFUNC:RB
3613 | mov TAB:RB, LFUNC:RB->env
3614 | mov STR:RC, [KBASE+RD*8]
3615 | jmp ->BC_TGETS_Z
3616 break;
3617 case BC_GSET:
3618 | ins_AND // RA = src, RD = str const (~)
3619 | mov LFUNC:RB, [BASE-16]
3620 | cleartp LFUNC:RB
3621 | mov TAB:RB, LFUNC:RB->env
3622 | mov STR:RC, [KBASE+RD*8]
3623 | jmp ->BC_TSETS_Z
3624 break;
3625
3626 case BC_TGETV:
3627 | ins_ABC // RA = dst, RB = table, RC = key
3628 | mov TAB:RB, [BASE+RB*8]
3629 | mov RC, [BASE+RC*8]
3630 | checktab TAB:RB, ->vmeta_tgetv
3631 |
3632 | // Integer key?
3633 |.if DUALNUM
3634 | checkint RC, >5
3635 |.else
3636 | // Convert number to int and back and compare.
3637 | checknum RC, >5
3638 | movd xmm0, RC
3639 | cvttsd2si RCd, xmm0
3640 | cvtsi2sd xmm1, RCd
3641 | ucomisd xmm0, xmm1
3642 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
3643 |.endif
3644 | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
3645 | jae ->vmeta_tgetv // Not in array part? Use fallback.
3646 | shl RCd, 3
3647 | add RC, TAB:RB->array
3648 | // Get array slot.
3649 | mov ITYPE, [RC]
3650 | cmp ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath.
3651 | je >2
3652 |1:
3653 | mov [BASE+RA*8], ITYPE
3654 | ins_next
3655 |
3656 |2: // Check for __index if table value is nil.
3657 | mov TAB:TMPR, TAB:RB->metatable
3658 | test TAB:TMPR, TAB:TMPR
3659 | jz <1
3660 | test byte TAB:TMPR->nomm, 1<<MM_index
3661 | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
3662 | jmp <1
3663 |
3664 |5: // String key?
3665 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tgetv
3666 | cleartp STR:RC
3667 | jmp ->BC_TGETS_Z
3668 break;
3669 case BC_TGETS:
3670 | ins_ABC // RA = dst, RB = table, RC = str const (~)
3671 | mov TAB:RB, [BASE+RB*8]
3672 | not RC
3673 | mov STR:RC, [KBASE+RC*8]
3674 | checktab TAB:RB, ->vmeta_tgets
3675 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
3676 | mov TMPRd, TAB:RB->hmask
3677 | and TMPRd, STR:RC->sid
3678 | imul TMPRd, #NODE
3679 | add NODE:TMPR, TAB:RB->node
3680 | settp ITYPE, STR:RC, LJ_TSTR
3681 |1:
3682 | cmp NODE:TMPR->key, ITYPE
3683 | jne >4
3684 | // Get node value.
3685 | mov ITYPE, NODE:TMPR->val
3686 | cmp ITYPE, LJ_TNIL
3687 | je >5 // Key found, but nil value?
3688 |2:
3689 | mov [BASE+RA*8], ITYPE
3690 | ins_next
3691 |
3692 |4: // Follow hash chain.
3693 | mov NODE:TMPR, NODE:TMPR->next
3694 | test NODE:TMPR, NODE:TMPR
3695 | jnz <1
3696 | // End of hash chain: key not found, nil result.
3697 | mov ITYPE, LJ_TNIL
3698 |
3699 |5: // Check for __index if table value is nil.
3700 | mov TAB:TMPR, TAB:RB->metatable
3701 | test TAB:TMPR, TAB:TMPR
3702 | jz <2 // No metatable: done.
3703 | test byte TAB:TMPR->nomm, 1<<MM_index
3704 | jnz <2 // 'no __index' flag set: done.
3705 | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
3706 break;
3707 case BC_TGETB:
3708 | ins_ABC // RA = dst, RB = table, RC = byte literal
3709 | mov TAB:RB, [BASE+RB*8]
3710 | checktab TAB:RB, ->vmeta_tgetb
3711 | cmp RCd, TAB:RB->asize
3712 | jae ->vmeta_tgetb
3713 | shl RCd, 3
3714 | add RC, TAB:RB->array
3715 | // Get array slot.
3716 | mov ITYPE, [RC]
3717 | cmp ITYPE, LJ_TNIL
3718 | je >2
3719 |1:
3720 | mov [BASE+RA*8], ITYPE
3721 | ins_next
3722 |
3723 |2: // Check for __index if table value is nil.
3724 | mov TAB:TMPR, TAB:RB->metatable
3725 | test TAB:TMPR, TAB:TMPR
3726 | jz <1
3727 | test byte TAB:TMPR->nomm, 1<<MM_index
3728 | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
3729 | jmp <1
3730 break;
3731 case BC_TGETR:
3732 | ins_ABC // RA = dst, RB = table, RC = key
3733 | mov TAB:RB, [BASE+RB*8]
3734 | cleartp TAB:RB
3735 |.if DUALNUM
3736 | mov RCd, dword [BASE+RC*8]
3737 |.else
3738 | cvttsd2si RCd, qword [BASE+RC*8]
3739 |.endif
3740 | cmp RCd, TAB:RB->asize
3741 | jae ->vmeta_tgetr // Not in array part? Use fallback.
3742 | shl RCd, 3
3743 | add RC, TAB:RB->array
3744 | // Get array slot.
3745 |->BC_TGETR_Z:
3746 | mov ITYPE, [RC]
3747 |->BC_TGETR2_Z:
3748 | mov [BASE+RA*8], ITYPE
3749 | ins_next
3750 break;
3751
3752 case BC_TSETV:
3753 | ins_ABC // RA = src, RB = table, RC = key
3754 | mov TAB:RB, [BASE+RB*8]
3755 | mov RC, [BASE+RC*8]
3756 | checktab TAB:RB, ->vmeta_tsetv
3757 |
3758 | // Integer key?
3759 |.if DUALNUM
3760 | checkint RC, >5
3761 |.else
3762 | // Convert number to int and back and compare.
3763 | checknum RC, >5
3764 | movd xmm0, RC
3765 | cvttsd2si RCd, xmm0
3766 | cvtsi2sd xmm1, RCd
3767 | ucomisd xmm0, xmm1
3768 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
3769 |.endif
3770 | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
3771 | jae ->vmeta_tsetv
3772 | shl RCd, 3
3773 | add RC, TAB:RB->array
3774 | cmp aword [RC], LJ_TNIL
3775 | je >3 // Previous value is nil?
3776 |1:
3777 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3778 | jnz >7
3779 |2: // Set array slot.
3780 | mov RB, [BASE+RA*8]
3781 | mov [RC], RB
3782 | ins_next
3783 |
3784 |3: // Check for __newindex if previous value is nil.
3785 | mov TAB:TMPR, TAB:RB->metatable
3786 | test TAB:TMPR, TAB:TMPR
3787 | jz <1
3788 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3789 | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
3790 | jmp <1
3791 |
3792 |5: // String key?
3793 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tsetv
3794 | cleartp STR:RC
3795 | jmp ->BC_TSETS_Z
3796 |
3797 |7: // Possible table write barrier for the value. Skip valiswhite check.
3798 | barrierback TAB:RB, TMPR
3799 | jmp <2
3800 break;
3801 case BC_TSETS:
3802 | ins_ABC // RA = src, RB = table, RC = str const (~)
3803 | mov TAB:RB, [BASE+RB*8]
3804 | not RC
3805 | mov STR:RC, [KBASE+RC*8]
3806 | checktab TAB:RB, ->vmeta_tsets
3807 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
3808 | mov TMPRd, TAB:RB->hmask
3809 | and TMPRd, STR:RC->sid
3810 | imul TMPRd, #NODE
3811 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
3812 | add NODE:TMPR, TAB:RB->node
3813 | settp ITYPE, STR:RC, LJ_TSTR
3814 |1:
3815 | cmp NODE:TMPR->key, ITYPE
3816 | jne >5
3817 | // Ok, key found. Assumes: offsetof(Node, val) == 0
3818 | cmp aword [TMPR], LJ_TNIL
3819 | je >4 // Previous value is nil?
3820 |2:
3821 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3822 | jnz >7
3823 |3: // Set node value.
3824 | mov ITYPE, [BASE+RA*8]
3825 | mov [TMPR], ITYPE
3826 | ins_next
3827 |
3828 |4: // Check for __newindex if previous value is nil.
3829 | mov TAB:ITYPE, TAB:RB->metatable
3830 | test TAB:ITYPE, TAB:ITYPE
3831 | jz <2
3832 | test byte TAB:ITYPE->nomm, 1<<MM_newindex
3833 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3834 | jmp <2
3835 |
3836 |5: // Follow hash chain.
3837 | mov NODE:TMPR, NODE:TMPR->next
3838 | test NODE:TMPR, NODE:TMPR
3839 | jnz <1
3840 | // End of hash chain: key not found, add a new one.
3841 |
3842 | // But check for __newindex first.
3843 | mov TAB:TMPR, TAB:RB->metatable
3844 | test TAB:TMPR, TAB:TMPR
3845 | jz >6 // No metatable: continue.
3846 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3847 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3848 |6:
3849 | mov TMP1, ITYPE
3850 | mov L:CARG1, SAVE_L
3851 | mov L:CARG1->base, BASE
3852 | lea CARG3, TMP1
3853 | mov CARG2, TAB:RB
3854 | mov SAVE_PC, PC
3855 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
3856 | // Handles write barrier for the new key. TValue * returned in eax (RC).
3857 | mov L:CARG1, SAVE_L
3858 | mov BASE, L:CARG1->base
3859 | mov TMPR, rax
3860 | movzx RAd, PC_RA
3861 | jmp <2 // Must check write barrier for value.
3862 |
3863 |7: // Possible table write barrier for the value. Skip valiswhite check.
3864 | barrierback TAB:RB, ITYPE
3865 | jmp <3
3866 break;
3867 case BC_TSETB:
3868 | ins_ABC // RA = src, RB = table, RC = byte literal
3869 | mov TAB:RB, [BASE+RB*8]
3870 | checktab TAB:RB, ->vmeta_tsetb
3871 | cmp RCd, TAB:RB->asize
3872 | jae ->vmeta_tsetb
3873 | shl RCd, 3
3874 | add RC, TAB:RB->array
3875 | cmp aword [RC], LJ_TNIL
3876 | je >3 // Previous value is nil?
3877 |1:
3878 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3879 | jnz >7
3880 |2: // Set array slot.
3881 | mov ITYPE, [BASE+RA*8]
3882 | mov [RC], ITYPE
3883 | ins_next
3884 |
3885 |3: // Check for __newindex if previous value is nil.
3886 | mov TAB:TMPR, TAB:RB->metatable
3887 | test TAB:TMPR, TAB:TMPR
3888 | jz <1
3889 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3890 | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
3891 | jmp <1
3892 |
3893 |7: // Possible table write barrier for the value. Skip valiswhite check.
3894 | barrierback TAB:RB, TMPR
3895 | jmp <2
3896 break;
3897 case BC_TSETR:
3898 | ins_ABC // RA = src, RB = table, RC = key
3899 | mov TAB:RB, [BASE+RB*8]
3900 | cleartp TAB:RB
3901 |.if DUALNUM
3902 | mov RC, [BASE+RC*8]
3903 |.else
3904 | cvttsd2si RCd, qword [BASE+RC*8]
3905 |.endif
3906 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3907 | jnz >7
3908 |2:
3909 | cmp RCd, TAB:RB->asize
3910 | jae ->vmeta_tsetr
3911 | shl RCd, 3
3912 | add RC, TAB:RB->array
3913 | // Set array slot.
3914 |->BC_TSETR_Z:
3915 | mov ITYPE, [BASE+RA*8]
3916 | mov [RC], ITYPE
3917 | ins_next
3918 |
3919 |7: // Possible table write barrier for the value. Skip valiswhite check.
3920 | barrierback TAB:RB, TMPR
3921 | jmp <2
3922 break;
3923
3924 case BC_TSETM:
3925 | ins_AD // RA = base (table at base-1), RD = num const (start index)
3926 |1:
3927 | mov TMPRd, dword [KBASE+RD*8] // Integer constant is in lo-word.
3928 | lea RA, [BASE+RA*8]
3929 | mov TAB:RB, [RA-8] // Guaranteed to be a table.
3930 | cleartp TAB:RB
3931 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3932 | jnz >7
3933 |2:
3934 | mov RDd, MULTRES
3935 | sub RDd, 1
3936 | jz >4 // Nothing to copy?
3937 | add RDd, TMPRd // Compute needed size.
3938 | cmp RDd, TAB:RB->asize
3939 | ja >5 // Doesn't fit into array part?
3940 | sub RDd, TMPRd
3941 | shl TMPRd, 3
3942 | add TMPR, TAB:RB->array
3943 |3: // Copy result slots to table.
3944 | mov RB, [RA]
3945 | add RA, 8
3946 | mov [TMPR], RB
3947 | add TMPR, 8
3948 | sub RDd, 1
3949 | jnz <3
3950 |4:
3951 | ins_next
3952 |
3953 |5: // Need to resize array part.
3954 | mov L:CARG1, SAVE_L
3955 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
3956 | mov CARG2, TAB:RB
3957 | mov CARG3d, RDd
3958 | mov L:RB, L:CARG1
3959 | mov SAVE_PC, PC
3960 | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
3961 | mov BASE, L:RB->base
3962 | movzx RAd, PC_RA // Restore RA.
3963 | movzx RDd, PC_RD // Restore RD.
3964 | jmp <1 // Retry.
3965 |
3966 |7: // Possible table write barrier for any value. Skip valiswhite check.
3967 | barrierback TAB:RB, RD
3968 | jmp <2
3969 break;
3970
3971 /* -- Calls and vararg handling ----------------------------------------- */
3972
3973 case BC_CALL: case BC_CALLM:
3974 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
3975 if (op == BC_CALLM) {
3976 | add NARGS:RDd, MULTRES
3977 }
3978 | mov LFUNC:RB, [BASE+RA*8]
3979 | checkfunc LFUNC:RB, ->vmeta_call_ra
3980 | lea BASE, [BASE+RA*8+16]
3981 | ins_call
3982 break;
3983
3984 case BC_CALLMT:
3985 | ins_AD // RA = base, RD = extra_nargs
3986 | add NARGS:RDd, MULTRES
3987 | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
3988 break;
3989 case BC_CALLT:
3990 | ins_AD // RA = base, RD = nargs+1
3991 | lea RA, [BASE+RA*8+16]
3992 | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
3993 | mov LFUNC:RB, [RA-16]
3994 | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
3995 |->BC_CALLT_Z:
3996 | mov PC, [BASE-8]
3997 | test PCd, FRAME_TYPE
3998 | jnz >7
3999 |1:
4000 | mov [BASE-16], LFUNC:RB // Copy func+tag down, reloaded below.
4001 | mov MULTRES, NARGS:RDd
4002 | sub NARGS:RDd, 1
4003 | jz >3
4004 |2: // Move args down.
4005 | mov RB, [RA]
4006 | add RA, 8
4007 | mov [KBASE], RB
4008 | add KBASE, 8
4009 | sub NARGS:RDd, 1
4010 | jnz <2
4011 |
4012 | mov LFUNC:RB, [BASE-16]
4013 |3:
4014 | cleartp LFUNC:RB
4015 | mov NARGS:RDd, MULTRES
4016 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
4017 | ja >5
4018 |4:
4019 | ins_callt
4020 |
4021 |5: // Tailcall to a fast function.
4022 | test PCd, FRAME_TYPE // Lua frame below?
4023 | jnz <4
4024 | movzx RAd, PC_RA
4025 | neg RA
4026 | mov LFUNC:KBASE, [BASE+RA*8-32] // Need to prepare KBASE.
4027 | cleartp LFUNC:KBASE
4028 | mov KBASE, LFUNC:KBASE->pc
4029 | mov KBASE, [KBASE+PC2PROTO(k)]
4030 | jmp <4
4031 |
4032 |7: // Tailcall from a vararg function.
4033 | sub PC, FRAME_VARG
4034 | test PCd, FRAME_TYPEP
4035 | jnz >8 // Vararg frame below?
4036 | sub BASE, PC // Need to relocate BASE/KBASE down.
4037 | mov KBASE, BASE
4038 | mov PC, [BASE-8]
4039 | jmp <1
4040 |8:
4041 | add PCd, FRAME_VARG
4042 | jmp <1
4043 break;
4044
4045 case BC_ITERC:
4046 | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
4047 | lea RA, [BASE+RA*8+16] // fb = base+2
4048 | mov RB, [RA-32] // Copy state. fb[0] = fb[-4].
4049 | mov RC, [RA-24] // Copy control var. fb[1] = fb[-3].
4050 | mov [RA], RB
4051 | mov [RA+8], RC
4052 | mov LFUNC:RB, [RA-40] // Copy callable. fb[-2] = fb[-5]
4053 | mov [RA-16], LFUNC:RB
4054 | mov NARGS:RDd, 2+1 // Handle like a regular 2-arg call.
4055 | checkfunc LFUNC:RB, ->vmeta_call
4056 | mov BASE, RA
4057 | ins_call
4058 break;
4059
4060 case BC_ITERN:
4061 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
4062 |.if JIT
4063 | // NYI: add hotloop, record BC_ITERN.
4064 |.endif
4065 | mov TAB:RB, [BASE+RA*8-16]
4066 | cleartp TAB:RB
4067 | mov RCd, [BASE+RA*8-8] // Get index from control var.
4068 | mov TMPRd, TAB:RB->asize
4069 | add PC, 4
4070 | mov ITYPE, TAB:RB->array
4071 |1: // Traverse array part.
4072 | cmp RCd, TMPRd; jae >5 // Index points after array part?
4073 | cmp aword [ITYPE+RC*8], LJ_TNIL; je >4
4074 |.if not DUALNUM
4075 | cvtsi2sd xmm0, RCd
4076 |.endif
4077 | // Copy array slot to returned value.
4078 | mov RB, [ITYPE+RC*8]
4079 | mov [BASE+RA*8+8], RB
4080 | // Return array index as a numeric key.
4081 |.if DUALNUM
4082 | setint ITYPE, RC
4083 | mov [BASE+RA*8], ITYPE
4084 |.else
4085 | movsd qword [BASE+RA*8], xmm0
4086 |.endif
4087 | add RCd, 1
4088 | mov [BASE+RA*8-8], RCd // Update control var.
4089 |2:
4090 | movzx RDd, PC_RD // Get target from ITERL.
4091 | branchPC RD
4092 |3:
4093 | ins_next
4094 |
4095 |4: // Skip holes in array part.
4096 | add RCd, 1
4097 | jmp <1
4098 |
4099 |5: // Traverse hash part.
4100 | sub RCd, TMPRd
4101 |6:
4102 | cmp RCd, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1.
4103 | imul ITYPEd, RCd, #NODE
4104 | add NODE:ITYPE, TAB:RB->node
4105 | cmp aword NODE:ITYPE->val, LJ_TNIL; je >7
4106 | lea TMPRd, [RCd+TMPRd+1]
4107 | // Copy key and value from hash slot.
4108 | mov RB, NODE:ITYPE->key
4109 | mov RC, NODE:ITYPE->val
4110 | mov [BASE+RA*8], RB
4111 | mov [BASE+RA*8+8], RC
4112 | mov [BASE+RA*8-8], TMPRd
4113 | jmp <2
4114 |
4115 |7: // Skip holes in hash part.
4116 | add RCd, 1
4117 | jmp <6
4118 break;
4119
4120 case BC_ISNEXT:
4121 | ins_AD // RA = base, RD = target (points to ITERN)
4122 | mov CFUNC:RB, [BASE+RA*8-24]
4123 | checkfunc CFUNC:RB, >5
4124 | checktptp [BASE+RA*8-16], LJ_TTAB, >5
4125 | cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5
4126 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
4127 | branchPC RD
4128 | mov64 TMPR, U64x(fffe7fff, 00000000)
4129 | mov [BASE+RA*8-8], TMPR // Initialize control var.
4130 |1:
4131 | ins_next
4132 |5: // Despecialize bytecode if any of the checks fail.
4133 | mov PC_OP, BC_JMP
4134 | branchPC RD
4135 | mov byte [PC], BC_ITERC
4136 | jmp <1
4137 break;
4138
4139 case BC_VARG:
4140 | ins_ABC // RA = base, RB = nresults+1, RC = numparams
4141 | lea TMPR, [BASE+RC*8+(16+FRAME_VARG)]
4142 | lea RA, [BASE+RA*8]
4143 | sub TMPR, [BASE-8]
4144 | // Note: TMPR may now be even _above_ BASE if nargs was < numparams.
4145 | test RB, RB
4146 | jz >5 // Copy all varargs?
4147 | lea RB, [RA+RB*8-8]
4148 | cmp TMPR, BASE // No vararg slots?
4149 | jnb >2
4150 |1: // Copy vararg slots to destination slots.
4151 | mov RC, [TMPR-16]
4152 | add TMPR, 8
4153 | mov [RA], RC
4154 | add RA, 8
4155 | cmp RA, RB // All destination slots filled?
4156 | jnb >3
4157 | cmp TMPR, BASE // No more vararg slots?
4158 | jb <1
4159 |2: // Fill up remainder with nil.
4160 | mov aword [RA], LJ_TNIL
4161 | add RA, 8
4162 | cmp RA, RB
4163 | jb <2
4164 |3:
4165 | ins_next
4166 |
4167 |5: // Copy all varargs.
4168 | mov MULTRES, 1 // MULTRES = 0+1
4169 | mov RC, BASE
4170 | sub RC, TMPR
4171 | jbe <3 // No vararg slots?
4172 | mov RBd, RCd
4173 | shr RBd, 3
4174 | add RBd, 1
4175 | mov MULTRES, RBd // MULTRES = #varargs+1
4176 | mov L:RB, SAVE_L
4177 | add RC, RA
4178 | cmp RC, L:RB->maxstack
4179 | ja >7 // Need to grow stack?
4180 |6: // Copy all vararg slots.
4181 | mov RC, [TMPR-16]
4182 | add TMPR, 8
4183 | mov [RA], RC
4184 | add RA, 8
4185 | cmp TMPR, BASE // No more vararg slots?
4186 | jb <6
4187 | jmp <3
4188 |
4189 |7: // Grow stack for varargs.
4190 | mov L:RB->base, BASE
4191 | mov L:RB->top, RA
4192 | mov SAVE_PC, PC
4193 | sub TMPR, BASE // Need delta, because BASE may change.
4194 | mov TMP1hi, TMPRd
4195 | mov CARG2d, MULTRES
4196 | sub CARG2d, 1
4197 | mov CARG1, L:RB
4198 | call extern lj_state_growstack // (lua_State *L, int n)
4199 | mov BASE, L:RB->base
4200 | movsxd TMPR, TMP1hi
4201 | mov RA, L:RB->top
4202 | add TMPR, BASE
4203 | jmp <6
4204 break;
4205
4206 /* -- Returns ----------------------------------------------------------- */
4207
4208 case BC_RETM:
4209 | ins_AD // RA = results, RD = extra_nresults
4210 | add RDd, MULTRES // MULTRES >=1, so RD >=1.
4211 | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
4212 break;
4213
4214 case BC_RET: case BC_RET0: case BC_RET1:
4215 | ins_AD // RA = results, RD = nresults+1
4216 if (op != BC_RET0) {
4217 | shl RAd, 3
4218 }
4219 |1:
4220 | mov PC, [BASE-8]
4221 | mov MULTRES, RDd // Save nresults+1.
4222 | test PCd, FRAME_TYPE // Check frame type marker.
4223 | jnz >7 // Not returning to a fixarg Lua func?
4224 switch (op) {
4225 case BC_RET:
4226 |->BC_RET_Z:
4227 | mov KBASE, BASE // Use KBASE for result move.
4228 | sub RDd, 1
4229 | jz >3
4230 |2: // Move results down.
4231 | mov RB, [KBASE+RA]
4232 | mov [KBASE-16], RB
4233 | add KBASE, 8
4234 | sub RDd, 1
4235 | jnz <2
4236 |3:
4237 | mov RDd, MULTRES // Note: MULTRES may be >255.
4238 | movzx RBd, PC_RB // So cannot compare with RDL!
4239 |5:
4240 | cmp RBd, RDd // More results expected?
4241 | ja >6
4242 break;
4243 case BC_RET1:
4244 | mov RB, [BASE+RA]
4245 | mov [BASE-16], RB
4246 /* fallthrough */
4247 case BC_RET0:
4248 |5:
4249 | cmp PC_RB, RDL // More results expected?
4250 | ja >6
4251 default:
4252 break;
4253 }
4254 | movzx RAd, PC_RA
4255 | neg RA
4256 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
4257 | mov LFUNC:KBASE, [BASE-16]
4258 | cleartp LFUNC:KBASE
4259 | mov KBASE, LFUNC:KBASE->pc
4260 | mov KBASE, [KBASE+PC2PROTO(k)]
4261 | ins_next
4262 |
4263 |6: // Fill up results with nil.
4264 if (op == BC_RET) {
4265 | mov aword [KBASE-16], LJ_TNIL // Note: relies on shifted base.
4266 | add KBASE, 8
4267 } else {
4268 | mov aword [BASE+RD*8-24], LJ_TNIL
4269 }
4270 | add RD, 1
4271 | jmp <5
4272 |
4273 |7: // Non-standard return case.
4274 | lea RB, [PC-FRAME_VARG]
4275 | test RBd, FRAME_TYPEP
4276 | jnz ->vm_return
4277 | // Return from vararg function: relocate BASE down and RA up.
4278 | sub BASE, RB
4279 if (op != BC_RET0) {
4280 | add RA, RB
4281 }
4282 | jmp <1
4283 break;
4284
4285 /* -- Loops and branches ------------------------------------------------ */
4286
4287 |.define FOR_IDX, [RA]
4288 |.define FOR_STOP, [RA+8]
4289 |.define FOR_STEP, [RA+16]
4290 |.define FOR_EXT, [RA+24]
4291
4292 case BC_FORL:
4293 |.if JIT
4294 | hotloop RBd
4295 |.endif
4296 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
4297 break;
4298
4299 case BC_JFORI:
4300 case BC_JFORL:
4301#if !LJ_HASJIT
4302 break;
4303#endif
4304 case BC_FORI:
4305 case BC_IFORL:
4306 vk = (op == BC_IFORL || op == BC_JFORL);
4307 | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
4308 | lea RA, [BASE+RA*8]
4309 if (LJ_DUALNUM) {
4310 | mov RB, FOR_IDX
4311 | checkint RB, >9
4312 | mov TMPR, FOR_STOP
4313 if (!vk) {
4314 | checkint TMPR, ->vmeta_for
4315 | mov ITYPE, FOR_STEP
4316 | test ITYPEd, ITYPEd; js >5
4317 | sar ITYPE, 47;
4318 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
4319 } else {
4320#ifdef LUA_USE_ASSERT
4321 | checkinttp FOR_STOP, ->assert_bad_for_arg_type
4322 | checkinttp FOR_STEP, ->assert_bad_for_arg_type
4323#endif
4324 | mov ITYPE, FOR_STEP
4325 | test ITYPEd, ITYPEd; js >5
4326 | add RBd, ITYPEd; jo >1
4327 | setint RB
4328 | mov FOR_IDX, RB
4329 }
4330 | cmp RBd, TMPRd
4331 | mov FOR_EXT, RB
4332 if (op == BC_FORI) {
4333 | jle >7
4334 |1:
4335 |6:
4336 | branchPC RD
4337 } else if (op == BC_JFORI) {
4338 | branchPC RD
4339 | movzx RDd, PC_RD
4340 | jle =>BC_JLOOP
4341 |1:
4342 |6:
4343 } else if (op == BC_IFORL) {
4344 | jg >7
4345 |6:
4346 | branchPC RD
4347 |1:
4348 } else {
4349 | jle =>BC_JLOOP
4350 |1:
4351 |6:
4352 }
4353 |7:
4354 | ins_next
4355 |
4356 |5: // Invert check for negative step.
4357 if (!vk) {
4358 | sar ITYPE, 47;
4359 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
4360 } else {
4361 | add RBd, ITYPEd; jo <1
4362 | setint RB
4363 | mov FOR_IDX, RB
4364 }
4365 | cmp RBd, TMPRd
4366 | mov FOR_EXT, RB
4367 if (op == BC_FORI) {
4368 | jge <7
4369 } else if (op == BC_JFORI) {
4370 | branchPC RD
4371 | movzx RDd, PC_RD
4372 | jge =>BC_JLOOP
4373 } else if (op == BC_IFORL) {
4374 | jl <7
4375 } else {
4376 | jge =>BC_JLOOP
4377 }
4378 | jmp <6
4379 |9: // Fallback to FP variant.
4380 if (!vk) {
4381 | jae ->vmeta_for
4382 }
4383 } else if (!vk) {
4384 | checknumtp FOR_IDX, ->vmeta_for
4385 }
4386 if (!vk) {
4387 | checknumtp FOR_STOP, ->vmeta_for
4388 } else {
4389#ifdef LUA_USE_ASSERT
4390 | checknumtp FOR_STOP, ->assert_bad_for_arg_type
4391 | checknumtp FOR_STEP, ->assert_bad_for_arg_type
4392#endif
4393 }
4394 | mov RB, FOR_STEP
4395 if (!vk) {
4396 | checknum RB, ->vmeta_for
4397 }
4398 | movsd xmm0, qword FOR_IDX
4399 | movsd xmm1, qword FOR_STOP
4400 if (vk) {
4401 | addsd xmm0, qword FOR_STEP
4402 | movsd qword FOR_IDX, xmm0
4403 | test RB, RB; js >3
4404 } else {
4405 | jl >3
4406 }
4407 | ucomisd xmm1, xmm0
4408 |1:
4409 | movsd qword FOR_EXT, xmm0
4410 if (op == BC_FORI) {
4411 |.if DUALNUM
4412 | jnb <7
4413 |.else
4414 | jnb >2
4415 | branchPC RD
4416 |.endif
4417 } else if (op == BC_JFORI) {
4418 | branchPC RD
4419 | movzx RDd, PC_RD
4420 | jnb =>BC_JLOOP
4421 } else if (op == BC_IFORL) {
4422 |.if DUALNUM
4423 | jb <7
4424 |.else
4425 | jb >2
4426 | branchPC RD
4427 |.endif
4428 } else {
4429 | jnb =>BC_JLOOP
4430 }
4431 |.if DUALNUM
4432 | jmp <6
4433 |.else
4434 |2:
4435 | ins_next
4436 |.endif
4437 |
4438 |3: // Invert comparison if step is negative.
4439 | ucomisd xmm0, xmm1
4440 | jmp <1
4441 break;
4442
4443 case BC_ITERL:
4444 |.if JIT
4445 | hotloop RBd
4446 |.endif
4447 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
4448 break;
4449
4450 case BC_JITERL:
4451#if !LJ_HASJIT
4452 break;
4453#endif
4454 case BC_IITERL:
4455 | ins_AJ // RA = base, RD = target
4456 | lea RA, [BASE+RA*8]
4457 | mov RB, [RA]
4458 | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
4459 if (op == BC_JITERL) {
4460 | mov [RA-8], RB
4461 | jmp =>BC_JLOOP
4462 } else {
4463 | branchPC RD // Otherwise save control var + branch.
4464 | mov [RA-8], RB
4465 }
4466 |1:
4467 | ins_next
4468 break;
4469
4470 case BC_LOOP:
4471 | ins_A // RA = base, RD = target (loop extent)
4472 | // Note: RA/RD is only used by trace recorder to determine scope/extent
4473 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
4474 |.if JIT
4475 | hotloop RBd
4476 |.endif
4477 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
4478 break;
4479
4480 case BC_ILOOP:
4481 | ins_A // RA = base, RD = target (loop extent)
4482 | ins_next
4483 break;
4484
4485 case BC_JLOOP:
4486 |.if JIT
4487 | ins_AD // RA = base (ignored), RD = traceno
4488 | mov RA, [DISPATCH+DISPATCH_J(trace)]
4489 | mov TRACE:RD, [RA+RD*8]
4490 | mov RD, TRACE:RD->mcode
4491 | mov L:RB, SAVE_L
4492 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
4493 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
4494 | // Save additional callee-save registers only used in compiled code.
4495 |.if X64WIN
4496 | mov CSAVE_4, r12
4497 | mov CSAVE_3, r13
4498 | mov CSAVE_2, r14
4499 | mov CSAVE_1, r15
4500 | mov RA, rsp
4501 | sub rsp, 10*16+4*8
4502 | movdqa [RA-1*16], xmm6
4503 | movdqa [RA-2*16], xmm7
4504 | movdqa [RA-3*16], xmm8
4505 | movdqa [RA-4*16], xmm9
4506 | movdqa [RA-5*16], xmm10
4507 | movdqa [RA-6*16], xmm11
4508 | movdqa [RA-7*16], xmm12
4509 | movdqa [RA-8*16], xmm13
4510 | movdqa [RA-9*16], xmm14
4511 | movdqa [RA-10*16], xmm15
4512 |.else
4513 | sub rsp, 16
4514 | mov [rsp+16], r12
4515 | mov [rsp+8], r13
4516 |.endif
4517 | jmp RD
4518 |.endif
4519 break;
4520
4521 case BC_JMP:
4522 | ins_AJ // RA = unused, RD = target
4523 | branchPC RD
4524 | ins_next
4525 break;
4526
4527 /* -- Function headers -------------------------------------------------- */
4528
4529 /*
4530 ** Reminder: A function may be called with func/args above L->maxstack,
4531 ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
4532 ** too. This means all FUNC* ops (including fast functions) must check
4533 ** for stack overflow _before_ adding more slots!
4534 */
4535
4536 case BC_FUNCF:
4537 |.if JIT
4538 | hotcall RBd
4539 |.endif
4540 case BC_FUNCV: /* NYI: compiled vararg functions. */
4541 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
4542 break;
4543
4544 case BC_JFUNCF:
4545#if !LJ_HASJIT
4546 break;
4547#endif
4548 case BC_IFUNCF:
4549 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4550 | mov KBASE, [PC-4+PC2PROTO(k)]
4551 | mov L:RB, SAVE_L
4552 | lea RA, [BASE+RA*8] // Top of frame.
4553 | cmp RA, L:RB->maxstack
4554 | ja ->vm_growstack_f
4555 | movzx RAd, byte [PC-4+PC2PROTO(numparams)]
4556 | cmp NARGS:RDd, RAd // Check for missing parameters.
4557 | jbe >3
4558 |2:
4559 if (op == BC_JFUNCF) {
4560 | movzx RDd, PC_RD
4561 | jmp =>BC_JLOOP
4562 } else {
4563 | ins_next
4564 }
4565 |
4566 |3: // Clear missing parameters.
4567 | mov aword [BASE+NARGS:RD*8-8], LJ_TNIL
4568 | add NARGS:RDd, 1
4569 | cmp NARGS:RDd, RAd
4570 | jbe <3
4571 | jmp <2
4572 break;
4573
4574 case BC_JFUNCV:
4575#if !LJ_HASJIT
4576 break;
4577#endif
4578 | int3 // NYI: compiled vararg functions
4579 break; /* NYI: compiled vararg functions. */
4580
4581 case BC_IFUNCV:
4582 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4583 | lea RBd, [NARGS:RD*8+FRAME_VARG+8]
4584 | lea RD, [BASE+NARGS:RD*8+8]
4585 | mov LFUNC:KBASE, [BASE-16]
4586 | mov [RD-8], RB // Store delta + FRAME_VARG.
4587 | mov [RD-16], LFUNC:KBASE // Store copy of LFUNC.
4588 | mov L:RB, SAVE_L
4589 | lea RA, [RD+RA*8]
4590 | cmp RA, L:RB->maxstack
4591 | ja ->vm_growstack_v // Need to grow stack.
4592 | mov RA, BASE
4593 | mov BASE, RD
4594 | movzx RBd, byte [PC-4+PC2PROTO(numparams)]
4595 | test RBd, RBd
4596 | jz >2
4597 | add RA, 8
4598 |1: // Copy fixarg slots up to new frame.
4599 | add RA, 8
4600 | cmp RA, BASE
4601 | jnb >3 // Less args than parameters?
4602 | mov KBASE, [RA-16]
4603 | mov [RD], KBASE
4604 | add RD, 8
4605 | mov aword [RA-16], LJ_TNIL // Clear old fixarg slot (help the GC).
4606 | sub RBd, 1
4607 | jnz <1
4608 |2:
4609 if (op == BC_JFUNCV) {
4610 | movzx RDd, PC_RD
4611 | jmp =>BC_JLOOP
4612 } else {
4613 | mov KBASE, [PC-4+PC2PROTO(k)]
4614 | ins_next
4615 }
4616 |
4617 |3: // Clear missing parameters.
4618 | mov aword [RD], LJ_TNIL
4619 | add RD, 8
4620 | sub RBd, 1
4621 | jnz <3
4622 | jmp <2
4623 break;
4624
4625 case BC_FUNCC:
4626 case BC_FUNCCW:
4627 | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
4628 | mov CFUNC:RB, [BASE-16]
4629 | cleartp CFUNC:RB
4630 | mov KBASE, CFUNC:RB->f
4631 | mov L:RB, SAVE_L
4632 | lea RD, [BASE+NARGS:RD*8-8]
4633 | mov L:RB->base, BASE
4634 | lea RA, [RD+8*LUA_MINSTACK]
4635 | cmp RA, L:RB->maxstack
4636 | mov L:RB->top, RD
4637 if (op == BC_FUNCC) {
4638 | mov CARG1, L:RB // Caveat: CARG1 may be RA.
4639 } else {
4640 | mov CARG2, KBASE
4641 | mov CARG1, L:RB // Caveat: CARG1 may be RA.
4642 }
4643 | ja ->vm_growstack_c // Need to grow stack.
4644 | set_vmstate C
4645 if (op == BC_FUNCC) {
4646 | call KBASE // (lua_State *L)
4647 } else {
4648 | // (lua_State *L, lua_CFunction f)
4649 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
4650 }
4651 | // nresults returned in eax (RD).
4652 | mov BASE, L:RB->base
4653 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
4654 | set_vmstate INTERP
4655 | lea RA, [BASE+RD*8]
4656 | neg RA
4657 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
4658 | mov PC, [BASE-8] // Fetch PC of caller.
4659 | jmp ->vm_returnc
4660 break;
4661
4662 /* ---------------------------------------------------------------------- */
4663
4664 default:
4665 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
4666 exit(2);
4667 break;
4668 }
4669}
4670
4671static int build_backend(BuildCtx *ctx)
4672{
4673 int op;
4674 dasm_growpc(Dst, BC__MAX);
4675 build_subroutines(ctx);
4676 |.code_op
4677 for (op = 0; op < BC__MAX; op++)
4678 build_ins(ctx, (BCOp)op, op);
4679 return BC__MAX;
4680}
4681
4682/* Emit pseudo frame-info for all assembler functions. */
4683static void emit_asm_debug(BuildCtx *ctx)
4684{
4685 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
4686 switch (ctx->mode) {
4687 case BUILD_elfasm:
4688 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
4689 fprintf(ctx->fp,
4690 ".Lframe0:\n"
4691 "\t.long .LECIE0-.LSCIE0\n"
4692 ".LSCIE0:\n"
4693 "\t.long 0xffffffff\n"
4694 "\t.byte 0x1\n"
4695 "\t.string \"\"\n"
4696 "\t.uleb128 0x1\n"
4697 "\t.sleb128 -8\n"
4698 "\t.byte 0x10\n"
4699 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4700 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4701 "\t.align 8\n"
4702 ".LECIE0:\n\n");
4703 fprintf(ctx->fp,
4704 ".LSFDE0:\n"
4705 "\t.long .LEFDE0-.LASFDE0\n"
4706 ".LASFDE0:\n"
4707 "\t.long .Lframe0\n"
4708 "\t.quad .Lbegin\n"
4709 "\t.quad %d\n"
4710 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4711 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4712 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4713 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4714 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4715#if LJ_NO_UNWIND
4716 "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */
4717 "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */
4718#endif
4719 "\t.align 8\n"
4720 ".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
4721#if LJ_HASFFI
4722 fprintf(ctx->fp,
4723 ".LSFDE1:\n"
4724 "\t.long .LEFDE1-.LASFDE1\n"
4725 ".LASFDE1:\n"
4726 "\t.long .Lframe0\n"
4727 "\t.quad lj_vm_ffi_call\n"
4728 "\t.quad %d\n"
4729 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
4730 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4731 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
4732 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4733 "\t.align 8\n"
4734 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
4735#endif
4736#if !LJ_NO_UNWIND
4737#if LJ_TARGET_SOLARIS
4738 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
4739#else
4740 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
4741#endif
4742 fprintf(ctx->fp,
4743 ".Lframe1:\n"
4744 "\t.long .LECIE1-.LSCIE1\n"
4745 ".LSCIE1:\n"
4746 "\t.long 0\n"
4747 "\t.byte 0x1\n"
4748 "\t.string \"zPR\"\n"
4749 "\t.uleb128 0x1\n"
4750 "\t.sleb128 -8\n"
4751 "\t.byte 0x10\n"
4752 "\t.uleb128 6\n" /* augmentation length */
4753 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4754 "\t.long lj_err_unwind_dwarf-.\n"
4755 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4756 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4757 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4758 "\t.align 8\n"
4759 ".LECIE1:\n\n");
4760 fprintf(ctx->fp,
4761 ".LSFDE2:\n"
4762 "\t.long .LEFDE2-.LASFDE2\n"
4763 ".LASFDE2:\n"
4764 "\t.long .LASFDE2-.Lframe1\n"
4765 "\t.long .Lbegin-.\n"
4766 "\t.long %d\n"
4767 "\t.uleb128 0\n" /* augmentation length */
4768 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4769 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4770 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4771 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4772 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4773 "\t.align 8\n"
4774 ".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
4775#if LJ_HASFFI
4776 fprintf(ctx->fp,
4777 ".Lframe2:\n"
4778 "\t.long .LECIE2-.LSCIE2\n"
4779 ".LSCIE2:\n"
4780 "\t.long 0\n"
4781 "\t.byte 0x1\n"
4782 "\t.string \"zR\"\n"
4783 "\t.uleb128 0x1\n"
4784 "\t.sleb128 -8\n"
4785 "\t.byte 0x10\n"
4786 "\t.uleb128 1\n" /* augmentation length */
4787 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4788 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4789 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4790 "\t.align 8\n"
4791 ".LECIE2:\n\n");
4792 fprintf(ctx->fp,
4793 ".LSFDE3:\n"
4794 "\t.long .LEFDE3-.LASFDE3\n"
4795 ".LASFDE3:\n"
4796 "\t.long .LASFDE3-.Lframe2\n"
4797 "\t.long lj_vm_ffi_call-.\n"
4798 "\t.long %d\n"
4799 "\t.uleb128 0\n" /* augmentation length */
4800 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
4801 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4802 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
4803 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4804 "\t.align 8\n"
4805 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
4806#endif
4807#endif
4808 break;
4809#if !LJ_NO_UNWIND
4810 /* Mental note: never let Apple design an assembler.
4811 ** Or a linker. Or a plastic case. But I digress.
4812 */
4813 case BUILD_machasm: {
4814#if LJ_HASFFI
4815 int fcsize = 0;
4816#endif
4817 int i;
4818 fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
4819 fprintf(ctx->fp,
4820 "EH_frame1:\n"
4821 "\t.set L$set$x,LECIEX-LSCIEX\n"
4822 "\t.long L$set$x\n"
4823 "LSCIEX:\n"
4824 "\t.long 0\n"
4825 "\t.byte 0x1\n"
4826 "\t.ascii \"zPR\\0\"\n"
4827 "\t.byte 0x1\n"
4828 "\t.byte 128-8\n"
4829 "\t.byte 0x10\n"
4830 "\t.byte 6\n" /* augmentation length */
4831 "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
4832 "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
4833 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4834 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
4835 "\t.byte 0x80+0x10\n\t.byte 0x1\n"
4836 "\t.align 3\n"
4837 "LECIEX:\n\n");
4838 for (i = 0; i < ctx->nsym; i++) {
4839 const char *name = ctx->sym[i].name;
4840 int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
4841 if (size == 0) continue;
4842#if LJ_HASFFI
4843 if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
4844#endif
4845 fprintf(ctx->fp,
4846 "%s.eh:\n"
4847 "LSFDE%d:\n"
4848 "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
4849 "\t.long L$set$%d\n"
4850 "LASFDE%d:\n"
4851 "\t.long LASFDE%d-EH_frame1\n"
4852 "\t.long %s-.\n"
4853 "\t.long %d\n"
4854 "\t.byte 0\n" /* augmentation length */
4855 "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */
4856 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
4857 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
4858 "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */
4859 "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */
4860 "\t.align 3\n"
4861 "LEFDE%d:\n\n",
4862 name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
4863 }
4864#if LJ_HASFFI
4865 if (fcsize) {
4866 fprintf(ctx->fp,
4867 "EH_frame2:\n"
4868 "\t.set L$set$y,LECIEY-LSCIEY\n"
4869 "\t.long L$set$y\n"
4870 "LSCIEY:\n"
4871 "\t.long 0\n"
4872 "\t.byte 0x1\n"
4873 "\t.ascii \"zR\\0\"\n"
4874 "\t.byte 0x1\n"
4875 "\t.byte 128-8\n"
4876 "\t.byte 0x10\n"
4877 "\t.byte 1\n" /* augmentation length */
4878 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4879 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
4880 "\t.byte 0x80+0x10\n\t.byte 0x1\n"
4881 "\t.align 3\n"
4882 "LECIEY:\n\n");
4883 fprintf(ctx->fp,
4884 "_lj_vm_ffi_call.eh:\n"
4885 "LSFDEY:\n"
4886 "\t.set L$set$yy,LEFDEY-LASFDEY\n"
4887 "\t.long L$set$yy\n"
4888 "LASFDEY:\n"
4889 "\t.long LASFDEY-EH_frame2\n"
4890 "\t.long _lj_vm_ffi_call-.\n"
4891 "\t.long %d\n"
4892 "\t.byte 0\n" /* augmentation length */
4893 "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */
4894 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
4895 "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */
4896 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
4897 "\t.align 3\n"
4898 "LEFDEY:\n\n", fcsize);
4899 }
4900#endif
4901 fprintf(ctx->fp, ".subsections_via_symbols\n");
4902 }
4903 break;
4904#endif
4905 default: /* Difficult for other modes. */
4906 break;
4907 }
4908}
4909
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index bfa4946b..81b899fa 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -18,7 +18,6 @@
18| 18|
19|.if P64 19|.if P64
20|.define X64, 1 20|.define X64, 1
21|.define SSE, 1
22|.if WIN 21|.if WIN
23|.define X64WIN, 1 22|.define X64WIN, 1
24|.endif 23|.endif
@@ -116,24 +115,74 @@
116|.type NODE, Node 115|.type NODE, Node
117|.type NARGS, int 116|.type NARGS, int
118|.type TRACE, GCtrace 117|.type TRACE, GCtrace
118|.type SBUF, SBuf
119| 119|
120|// Stack layout while in interpreter. Must match with lj_frame.h. 120|// Stack layout while in interpreter. Must match with lj_frame.h.
121|//----------------------------------------------------------------------- 121|//-----------------------------------------------------------------------
122|.if not X64 // x86 stack layout. 122|.if not X64 // x86 stack layout.
123| 123|
124|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). 124|.if WIN
125|
126|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
125|.macro saveregs_ 127|.macro saveregs_
126| push edi; push esi; push ebx 128| push edi; push esi; push ebx
129| push extern lj_err_unwind_win
130| fs; push dword [0]
131| fs; mov [0], esp
127| sub esp, CFRAME_SPACE 132| sub esp, CFRAME_SPACE
128|.endmacro 133|.endmacro
129|.macro saveregs 134|.macro restoreregs
130| push ebp; saveregs_ 135| add esp, CFRAME_SPACE
136| fs; pop dword [0]
137| pop edi // Short for esp += 4.
138| pop ebx; pop esi; pop edi; pop ebp
139|.endmacro
140|
141|.else
142|
143|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
144|.macro saveregs_
145| push edi; push esi; push ebx
146| sub esp, CFRAME_SPACE
131|.endmacro 147|.endmacro
132|.macro restoreregs 148|.macro restoreregs
133| add esp, CFRAME_SPACE 149| add esp, CFRAME_SPACE
134| pop ebx; pop esi; pop edi; pop ebp 150| pop ebx; pop esi; pop edi; pop ebp
135|.endmacro 151|.endmacro
136| 152|
153|.endif
154|
155|.macro saveregs
156| push ebp; saveregs_
157|.endmacro
158|
159|.if WIN
160|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only.
161|.define SAVE_NRES, aword [esp+aword*18]
162|.define SAVE_CFRAME, aword [esp+aword*17]
163|.define SAVE_L, aword [esp+aword*16]
164|//----- 16 byte aligned, ^^^ arguments from C caller
165|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter.
166|.define SAVE_R4, aword [esp+aword*14]
167|.define SAVE_R3, aword [esp+aword*13]
168|.define SAVE_R2, aword [esp+aword*12]
169|//----- 16 byte aligned
170|.define SAVE_R1, aword [esp+aword*11]
171|.define SEH_FUNC, aword [esp+aword*10]
172|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves.
173|.define UNUSED2, aword [esp+aword*8]
174|//----- 16 byte aligned
175|.define UNUSED1, aword [esp+aword*7]
176|.define SAVE_PC, aword [esp+aword*6]
177|.define TMP2, aword [esp+aword*5]
178|.define TMP1, aword [esp+aword*4]
179|//----- 16 byte aligned
180|.define ARG4, aword [esp+aword*3]
181|.define ARG3, aword [esp+aword*2]
182|.define ARG2, aword [esp+aword*1]
183|.define ARG1, aword [esp] //<-- esp while in interpreter.
184|//----- 16 byte aligned, ^^^ arguments for C callee
185|.else
137|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. 186|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
138|.define SAVE_NRES, aword [esp+aword*14] 187|.define SAVE_NRES, aword [esp+aword*14]
139|.define SAVE_CFRAME, aword [esp+aword*13] 188|.define SAVE_CFRAME, aword [esp+aword*13]
@@ -154,6 +203,7 @@
154|.define ARG2, aword [esp+aword*1] 203|.define ARG2, aword [esp+aword*1]
155|.define ARG1, aword [esp] //<-- esp while in interpreter. 204|.define ARG1, aword [esp] //<-- esp while in interpreter.
156|//----- 16 byte aligned, ^^^ arguments for C callee 205|//----- 16 byte aligned, ^^^ arguments for C callee
206|.endif
157| 207|
158|// FPARGx overlaps ARGx and ARG(x+1) on x86. 208|// FPARGx overlaps ARGx and ARG(x+1) on x86.
159|.define FPARG3, qword [esp+qword*1] 209|.define FPARG3, qword [esp+qword*1]
@@ -389,7 +439,6 @@
389| fpop 439| fpop
390|.endmacro 440|.endmacro
391| 441|
392|.macro fdup; fld st0; .endmacro
393|.macro fpop1; fstp st1; .endmacro 442|.macro fpop1; fstp st1; .endmacro
394| 443|
395|// Synthesize SSE FP constants. 444|// Synthesize SSE FP constants.
@@ -555,6 +604,10 @@ static void build_subroutines(BuildCtx *ctx)
555 |.else 604 |.else
556 | mov eax, FCARG2 // Error return status for vm_pcall. 605 | mov eax, FCARG2 // Error return status for vm_pcall.
557 | mov esp, FCARG1 606 | mov esp, FCARG1
607 |.if WIN
608 | lea FCARG1, SEH_NEXT
609 | fs; mov [0], FCARG1
610 |.endif
558 |.endif 611 |.endif
559 |->vm_unwind_c_eh: // Landing pad for external unwinder. 612 |->vm_unwind_c_eh: // Landing pad for external unwinder.
560 | mov L:RB, SAVE_L 613 | mov L:RB, SAVE_L
@@ -578,6 +631,10 @@ static void build_subroutines(BuildCtx *ctx)
578 |.else 631 |.else
579 | and FCARG1, CFRAME_RAWMASK 632 | and FCARG1, CFRAME_RAWMASK
580 | mov esp, FCARG1 633 | mov esp, FCARG1
634 |.if WIN
635 | lea FCARG1, SEH_NEXT
636 | fs; mov [0], FCARG1
637 |.endif
581 |.endif 638 |.endif
582 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 639 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
583 | mov L:RB, SAVE_L 640 | mov L:RB, SAVE_L
@@ -591,6 +648,19 @@ static void build_subroutines(BuildCtx *ctx)
591 | set_vmstate INTERP 648 | set_vmstate INTERP
592 | jmp ->vm_returnc // Increments RD/MULTRES and returns. 649 | jmp ->vm_returnc // Increments RD/MULTRES and returns.
593 | 650 |
651 |.if WIN and not X64
652 |->vm_rtlunwind@16: // Thin layer around RtlUnwind.
653 | // (void *cframe, void *excptrec, void *unwinder, int errcode)
654 | mov [esp], FCARG1 // Return value for RtlUnwind.
655 | push FCARG2 // Exception record for RtlUnwind.
656 | push 0 // Ignored by RtlUnwind.
657 | push dword [FCARG1+CFRAME_OFS_SEH]
658 | call extern RtlUnwind@16 // Violates ABI (clobbers too much).
659 | mov FCARG1, eax
660 | mov FCARG2, [esp+4] // errcode (for vm_unwind_c).
661 | ret // Jump to unwinder.
662 |.endif
663 |
594 |//----------------------------------------------------------------------- 664 |//-----------------------------------------------------------------------
595 |//-- Grow stack for calls ----------------------------------------------- 665 |//-- Grow stack for calls -----------------------------------------------
596 |//----------------------------------------------------------------------- 666 |//-----------------------------------------------------------------------
@@ -646,17 +716,18 @@ static void build_subroutines(BuildCtx *ctx)
646 | lea KBASEa, [esp+CFRAME_RESUME] 716 | lea KBASEa, [esp+CFRAME_RESUME]
647 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 717 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
648 | add DISPATCH, GG_G2DISP 718 | add DISPATCH, GG_G2DISP
649 | mov L:RB->cframe, KBASEa
650 | mov SAVE_PC, RD // Any value outside of bytecode is ok. 719 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
651 | mov SAVE_CFRAME, RDa 720 | mov SAVE_CFRAME, RDa
652 |.if X64 721 |.if X64
653 | mov SAVE_NRES, RD 722 | mov SAVE_NRES, RD
654 | mov SAVE_ERRF, RD 723 | mov SAVE_ERRF, RD
655 |.endif 724 |.endif
725 | mov L:RB->cframe, KBASEa
656 | cmp byte L:RB->status, RDL 726 | cmp byte L:RB->status, RDL
657 | je >3 // Initial resume (like a call). 727 | je >2 // Initial resume (like a call).
658 | 728 |
659 | // Resume after yield (like a return). 729 | // Resume after yield (like a return).
730 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
660 | set_vmstate INTERP 731 | set_vmstate INTERP
661 | mov byte L:RB->status, RDL 732 | mov byte L:RB->status, RDL
662 | mov BASE, L:RB->base 733 | mov BASE, L:RB->base
@@ -696,20 +767,19 @@ static void build_subroutines(BuildCtx *ctx)
696 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! 767 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
697 |.endif 768 |.endif
698 | 769 |
770 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
699 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 771 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
700 | mov SAVE_CFRAME, KBASEa 772 | mov SAVE_CFRAME, KBASEa
701 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. 773 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
774 | add DISPATCH, GG_G2DISP
702 |.if X64 775 |.if X64
703 | mov L:RB->cframe, rsp 776 | mov L:RB->cframe, rsp
704 |.else 777 |.else
705 | mov L:RB->cframe, esp 778 | mov L:RB->cframe, esp
706 |.endif 779 |.endif
707 | 780 |
708 |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype). 781 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
709 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 782 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
710 | add DISPATCH, GG_G2DISP
711 |
712 |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype).
713 | set_vmstate INTERP 783 | set_vmstate INTERP
714 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). 784 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
715 | add PC, RA 785 | add PC, RA
@@ -747,14 +817,17 @@ static void build_subroutines(BuildCtx *ctx)
747 | 817 |
748 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). 818 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
749 | sub KBASE, L:RB->top 819 | sub KBASE, L:RB->top
820 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
750 | mov SAVE_ERRF, 0 // No error function. 821 | mov SAVE_ERRF, 0 // No error function.
751 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. 822 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
823 | add DISPATCH, GG_G2DISP
752 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). 824 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
753 | 825 |
754 |.if X64 826 |.if X64
755 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 827 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
756 | mov SAVE_CFRAME, KBASEa 828 | mov SAVE_CFRAME, KBASEa
757 | mov L:RB->cframe, rsp 829 | mov L:RB->cframe, rsp
830 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
758 | 831 |
759 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) 832 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
760 |.else 833 |.else
@@ -765,6 +838,7 @@ static void build_subroutines(BuildCtx *ctx)
765 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. 838 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
766 | mov SAVE_CFRAME, KBASE 839 | mov SAVE_CFRAME, KBASE
767 | mov L:RB->cframe, esp 840 | mov L:RB->cframe, esp
841 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
768 | 842 |
769 | call BASE // (lua_State *L, lua_CFunction func, void *ud) 843 | call BASE // (lua_State *L, lua_CFunction func, void *ud)
770 |.endif 844 |.endif
@@ -872,13 +946,9 @@ static void build_subroutines(BuildCtx *ctx)
872 |.if DUALNUM 946 |.if DUALNUM
873 | mov TMP2, LJ_TISNUM 947 | mov TMP2, LJ_TISNUM
874 | mov TMP1, RC 948 | mov TMP1, RC
875 |.elif SSE 949 |.else
876 | cvtsi2sd xmm0, RC 950 | cvtsi2sd xmm0, RC
877 | movsd TMPQ, xmm0 951 | movsd TMPQ, xmm0
878 |.else
879 | mov ARG4, RC
880 | fild ARG4
881 | fstp TMPQ
882 |.endif 952 |.endif
883 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 953 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
884 | jmp >1 954 | jmp >1
@@ -932,6 +1002,19 @@ static void build_subroutines(BuildCtx *ctx)
932 | mov NARGS:RD, 2+1 // 2 args for func(t, k). 1002 | mov NARGS:RD, 2+1 // 2 args for func(t, k).
933 | jmp ->vm_call_dispatch_f 1003 | jmp ->vm_call_dispatch_f
934 | 1004 |
1005 |->vmeta_tgetr:
1006 | mov FCARG1, TAB:RB
1007 | mov RB, BASE // Save BASE.
1008 | mov FCARG2, RC // Caveat: FCARG2 == BASE
1009 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
1010 | // cTValue * or NULL returned in eax (RC).
1011 | movzx RA, PC_RA
1012 | mov BASE, RB // Restore BASE.
1013 | test RC, RC
1014 | jnz ->BC_TGETR_Z
1015 | mov dword [BASE+RA*8+4], LJ_TNIL
1016 | jmp ->BC_TGETR2_Z
1017 |
935 |//----------------------------------------------------------------------- 1018 |//-----------------------------------------------------------------------
936 | 1019 |
937 |->vmeta_tsets: 1020 |->vmeta_tsets:
@@ -951,13 +1034,9 @@ static void build_subroutines(BuildCtx *ctx)
951 |.if DUALNUM 1034 |.if DUALNUM
952 | mov TMP2, LJ_TISNUM 1035 | mov TMP2, LJ_TISNUM
953 | mov TMP1, RC 1036 | mov TMP1, RC
954 |.elif SSE 1037 |.else
955 | cvtsi2sd xmm0, RC 1038 | cvtsi2sd xmm0, RC
956 | movsd TMPQ, xmm0 1039 | movsd TMPQ, xmm0
957 |.else
958 | mov ARG4, RC
959 | fild ARG4
960 | fstp TMPQ
961 |.endif 1040 |.endif
962 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 1041 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
963 | jmp >1 1042 | jmp >1
@@ -1023,6 +1102,33 @@ static void build_subroutines(BuildCtx *ctx)
1023 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). 1102 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
1024 | jmp ->vm_call_dispatch_f 1103 | jmp ->vm_call_dispatch_f
1025 | 1104 |
1105 |->vmeta_tsetr:
1106 |.if X64WIN
1107 | mov L:CARG1d, SAVE_L
1108 | mov CARG3d, RC
1109 | mov L:CARG1d->base, BASE
1110 | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
1111 |.elif X64
1112 | mov L:CARG1d, SAVE_L
1113 | mov CARG2d, TAB:RB
1114 | mov L:CARG1d->base, BASE
1115 | mov RB, BASE // Save BASE.
1116 | mov CARG3d, RC // Caveat: CARG3d == BASE.
1117 |.else
1118 | mov L:RA, SAVE_L
1119 | mov ARG2, TAB:RB
1120 | mov RB, BASE // Save BASE.
1121 | mov ARG3, RC
1122 | mov ARG1, L:RA
1123 | mov L:RA->base, BASE
1124 |.endif
1125 | mov SAVE_PC, PC
1126 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1127 | // TValue * returned in eax (RC).
1128 | movzx RA, PC_RA
1129 | mov BASE, RB // Restore BASE.
1130 | jmp ->BC_TSETR_Z
1131 |
1026 |//-- Comparison metamethods --------------------------------------------- 1132 |//-- Comparison metamethods ---------------------------------------------
1027 | 1133 |
1028 |->vmeta_comp: 1134 |->vmeta_comp:
@@ -1117,6 +1223,26 @@ static void build_subroutines(BuildCtx *ctx)
1117 | jmp <3 1223 | jmp <3
1118 |.endif 1224 |.endif
1119 | 1225 |
1226 |->vmeta_istype:
1227 |.if X64
1228 | mov L:RB, SAVE_L
1229 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
1230 | mov CARG2d, RA
1231 | movzx CARG3d, PC_RD
1232 | mov L:CARG1d, L:RB
1233 |.else
1234 | movzx RD, PC_RD
1235 | mov ARG2, RA
1236 | mov L:RB, SAVE_L
1237 | mov ARG3, RD
1238 | mov ARG1, L:RB
1239 | mov L:RB->base, BASE
1240 |.endif
1241 | mov SAVE_PC, PC
1242 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1243 | mov BASE, L:RB->base
1244 | jmp <6
1245 |
1120 |//-- Arithmetic metamethods --------------------------------------------- 1246 |//-- Arithmetic metamethods ---------------------------------------------
1121 | 1247 |
1122 |->vmeta_arith_vno: 1248 |->vmeta_arith_vno:
@@ -1293,19 +1419,6 @@ static void build_subroutines(BuildCtx *ctx)
1293 | cmp NARGS:RD, 2+1; jb ->fff_fallback 1419 | cmp NARGS:RD, 2+1; jb ->fff_fallback
1294 |.endmacro 1420 |.endmacro
1295 | 1421 |
1296 |.macro .ffunc_n, name
1297 | .ffunc_1 name
1298 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1299 | fld qword [BASE]
1300 |.endmacro
1301 |
1302 |.macro .ffunc_n, name, op
1303 | .ffunc_1 name
1304 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1305 | op
1306 | fld qword [BASE]
1307 |.endmacro
1308 |
1309 |.macro .ffunc_nsse, name, op 1422 |.macro .ffunc_nsse, name, op
1310 | .ffunc_1 name 1423 | .ffunc_1 name
1311 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1424 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
@@ -1316,14 +1429,6 @@ static void build_subroutines(BuildCtx *ctx)
1316 | .ffunc_nsse name, movsd 1429 | .ffunc_nsse name, movsd
1317 |.endmacro 1430 |.endmacro
1318 | 1431 |
1319 |.macro .ffunc_nn, name
1320 | .ffunc_2 name
1321 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1322 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
1323 | fld qword [BASE]
1324 | fld qword [BASE+8]
1325 |.endmacro
1326 |
1327 |.macro .ffunc_nnsse, name 1432 |.macro .ffunc_nnsse, name
1328 | .ffunc_2 name 1433 | .ffunc_2 name
1329 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1434 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
@@ -1421,7 +1526,7 @@ static void build_subroutines(BuildCtx *ctx)
1421 | mov dword [BASE-4], LJ_TTAB // Store metatable as default result. 1526 | mov dword [BASE-4], LJ_TTAB // Store metatable as default result.
1422 | mov [BASE-8], TAB:RB 1527 | mov [BASE-8], TAB:RB
1423 | mov RA, TAB:RB->hmask 1528 | mov RA, TAB:RB->hmask
1424 | and RA, STR:RC->hash 1529 | and RA, STR:RC->sid
1425 | imul RA, #NODE 1530 | imul RA, #NODE
1426 | add NODE:RA, TAB:RB->node 1531 | add NODE:RA, TAB:RB->node
1427 |3: // Rearranged logic, because we expect _not_ to find the key. 1532 |3: // Rearranged logic, because we expect _not_ to find the key.
@@ -1529,11 +1634,7 @@ static void build_subroutines(BuildCtx *ctx)
1529 |.else 1634 |.else
1530 | jae ->fff_fallback 1635 | jae ->fff_fallback
1531 |.endif 1636 |.endif
1532 |.if SSE
1533 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 1637 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1534 |.else
1535 | fld qword [BASE]; jmp ->fff_resn
1536 |.endif
1537 | 1638 |
1538 |.ffunc_1 tostring 1639 |.ffunc_1 tostring
1539 | // Only handles the string or number case inline. 1640 | // Only handles the string or number case inline.
@@ -1558,9 +1659,9 @@ static void build_subroutines(BuildCtx *ctx)
1558 |.endif 1659 |.endif
1559 | mov L:FCARG1, L:RB 1660 | mov L:FCARG1, L:RB
1560 |.if DUALNUM 1661 |.if DUALNUM
1561 | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) 1662 | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o)
1562 |.else 1663 |.else
1563 | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) 1664 | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np)
1564 |.endif 1665 |.endif
1565 | // GCstr returned in eax (RD). 1666 | // GCstr returned in eax (RD).
1566 | mov BASE, L:RB->base 1667 | mov BASE, L:RB->base
@@ -1651,19 +1752,12 @@ static void build_subroutines(BuildCtx *ctx)
1651 | add RD, 1 1752 | add RD, 1
1652 | mov dword [BASE-4], LJ_TISNUM 1753 | mov dword [BASE-4], LJ_TISNUM
1653 | mov dword [BASE-8], RD 1754 | mov dword [BASE-8], RD
1654 |.elif SSE 1755 |.else
1655 | movsd xmm0, qword [BASE+8] 1756 | movsd xmm0, qword [BASE+8]
1656 | sseconst_1 xmm1, RBa 1757 | sseconst_1 xmm1, RBa
1657 | addsd xmm0, xmm1 1758 | addsd xmm0, xmm1
1658 | cvtsd2si RD, xmm0 1759 | cvttsd2si RD, xmm0
1659 | movsd qword [BASE-8], xmm0 1760 | movsd qword [BASE-8], xmm0
1660 |.else
1661 | fld qword [BASE+8]
1662 | fld1
1663 | faddp st1
1664 | fist ARG1
1665 | fstp qword [BASE-8]
1666 | mov RD, ARG1
1667 |.endif 1761 |.endif
1668 | mov TAB:RB, [BASE] 1762 | mov TAB:RB, [BASE]
1669 | cmp RD, TAB:RB->asize; jae >2 // Not in array part? 1763 | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
@@ -1710,12 +1804,9 @@ static void build_subroutines(BuildCtx *ctx)
1710 |.if DUALNUM 1804 |.if DUALNUM
1711 | mov dword [BASE+12], LJ_TISNUM 1805 | mov dword [BASE+12], LJ_TISNUM
1712 | mov dword [BASE+8], 0 1806 | mov dword [BASE+8], 0
1713 |.elif SSE 1807 |.else
1714 | xorps xmm0, xmm0 1808 | xorps xmm0, xmm0
1715 | movsd qword [BASE+8], xmm0 1809 | movsd qword [BASE+8], xmm0
1716 |.else
1717 | fldz
1718 | fstp qword [BASE+8]
1719 |.endif 1810 |.endif
1720 | mov RD, 1+3 1811 | mov RD, 1+3
1721 | jmp ->fff_res 1812 | jmp ->fff_res
@@ -1822,7 +1913,6 @@ static void build_subroutines(BuildCtx *ctx)
1822 | mov ARG3, RA 1913 | mov ARG3, RA
1823 |.endif 1914 |.endif
1824 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) 1915 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1825 | set_vmstate INTERP
1826 | 1916 |
1827 | mov L:RB, SAVE_L 1917 | mov L:RB, SAVE_L
1828 |.if X64 1918 |.if X64
@@ -1831,6 +1921,9 @@ static void build_subroutines(BuildCtx *ctx)
1831 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. 1921 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
1832 |.endif 1922 |.endif
1833 | mov BASE, L:RB->base 1923 | mov BASE, L:RB->base
1924 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1925 | set_vmstate INTERP
1926 |
1834 | cmp eax, LUA_YIELD 1927 | cmp eax, LUA_YIELD
1835 | ja >8 1928 | ja >8
1836 |4: 1929 |4:
@@ -1945,12 +2038,10 @@ static void build_subroutines(BuildCtx *ctx)
1945 |->fff_resi: // Dummy. 2038 |->fff_resi: // Dummy.
1946 |.endif 2039 |.endif
1947 | 2040 |
1948 |.if SSE
1949 |->fff_resn: 2041 |->fff_resn:
1950 | mov PC, [BASE-4] 2042 | mov PC, [BASE-4]
1951 | fstp qword [BASE-8] 2043 | fstp qword [BASE-8]
1952 | jmp ->fff_res1 2044 | jmp ->fff_res1
1953 |.endif
1954 | 2045 |
1955 | .ffunc_1 math_abs 2046 | .ffunc_1 math_abs
1956 |.if DUALNUM 2047 |.if DUALNUM
@@ -1974,8 +2065,6 @@ static void build_subroutines(BuildCtx *ctx)
1974 |.else 2065 |.else
1975 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2066 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1976 |.endif 2067 |.endif
1977 |
1978 |.if SSE
1979 | movsd xmm0, qword [BASE] 2068 | movsd xmm0, qword [BASE]
1980 | sseconst_abs xmm1, RDa 2069 | sseconst_abs xmm1, RDa
1981 | andps xmm0, xmm1 2070 | andps xmm0, xmm1
@@ -1983,15 +2072,6 @@ static void build_subroutines(BuildCtx *ctx)
1983 | mov PC, [BASE-4] 2072 | mov PC, [BASE-4]
1984 | movsd qword [BASE-8], xmm0 2073 | movsd qword [BASE-8], xmm0
1985 | // fallthrough 2074 | // fallthrough
1986 |.else
1987 | fld qword [BASE]
1988 | fabs
1989 | // fallthrough
1990 |->fff_resxmm0: // Dummy.
1991 |->fff_resn:
1992 | mov PC, [BASE-4]
1993 | fstp qword [BASE-8]
1994 |.endif
1995 | 2075 |
1996 |->fff_res1: 2076 |->fff_res1:
1997 | mov RD, 1+1 2077 | mov RD, 1+1
@@ -2018,6 +2098,12 @@ static void build_subroutines(BuildCtx *ctx)
2018 | mov RAa, -8 // Results start at BASE+RA = BASE-8. 2098 | mov RAa, -8 // Results start at BASE+RA = BASE-8.
2019 | jmp ->vm_return 2099 | jmp ->vm_return
2020 | 2100 |
2101 |.if X64
2102 |.define fff_resfp, fff_resxmm0
2103 |.else
2104 |.define fff_resfp, fff_resn
2105 |.endif
2106 |
2021 |.macro math_round, func 2107 |.macro math_round, func
2022 | .ffunc math_ .. func 2108 | .ffunc math_ .. func
2023 |.if DUALNUM 2109 |.if DUALNUM
@@ -2028,107 +2114,75 @@ static void build_subroutines(BuildCtx *ctx)
2028 |.else 2114 |.else
2029 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2115 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2030 |.endif 2116 |.endif
2031 |.if SSE
2032 | movsd xmm0, qword [BASE] 2117 | movsd xmm0, qword [BASE]
2033 | call ->vm_ .. func 2118 | call ->vm_ .. func .. _sse
2034 | .if DUALNUM 2119 |.if DUALNUM
2035 | cvtsd2si RB, xmm0 2120 | cvttsd2si RB, xmm0
2036 | cmp RB, 0x80000000 2121 | cmp RB, 0x80000000
2037 | jne ->fff_resi 2122 | jne ->fff_resi
2038 | cvtsi2sd xmm1, RB 2123 | cvtsi2sd xmm1, RB
2039 | ucomisd xmm0, xmm1 2124 | ucomisd xmm0, xmm1
2040 | jp ->fff_resxmm0 2125 | jp ->fff_resxmm0
2041 | je ->fff_resi 2126 | je ->fff_resi
2042 | .endif
2043 | jmp ->fff_resxmm0
2044 |.else
2045 | fld qword [BASE]
2046 | call ->vm_ .. func
2047 | .if DUALNUM
2048 | fist ARG1
2049 | mov RB, ARG1
2050 | cmp RB, 0x80000000; jne >2
2051 | fdup
2052 | fild ARG1
2053 | fcomparepp
2054 | jp ->fff_resn
2055 | jne ->fff_resn
2056 |2:
2057 | fpop
2058 | jmp ->fff_resi
2059 | .else
2060 | jmp ->fff_resn
2061 | .endif
2062 |.endif 2127 |.endif
2128 | jmp ->fff_resxmm0
2063 |.endmacro 2129 |.endmacro
2064 | 2130 |
2065 | math_round floor 2131 | math_round floor
2066 | math_round ceil 2132 | math_round ceil
2067 | 2133 |
2068 |.if SSE
2069 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 2134 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
2070 |.else
2071 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
2072 |.endif
2073 | 2135 |
2074 |.ffunc math_log 2136 |.ffunc math_log
2075 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. 2137 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
2076 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2138 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2077 | fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn 2139 | movsd xmm0, qword [BASE]
2078 | 2140 |.if not X64
2079 |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn 2141 | movsd FPARG1, xmm0
2080 |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn 2142 |.endif
2081 | 2143 | mov RB, BASE
2082 |.ffunc_n math_sin; fsin; jmp ->fff_resn 2144 | call extern log
2083 |.ffunc_n math_cos; fcos; jmp ->fff_resn 2145 | mov BASE, RB
2084 |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn 2146 | jmp ->fff_resfp
2085 |
2086 |.ffunc_n math_asin
2087 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan
2088 | jmp ->fff_resn
2089 |.ffunc_n math_acos
2090 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan
2091 | jmp ->fff_resn
2092 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
2093 | 2147 |
2094 |.macro math_extern, func 2148 |.macro math_extern, func
2095 |.if SSE
2096 | .ffunc_nsse math_ .. func 2149 | .ffunc_nsse math_ .. func
2097 | .if not X64 2150 |.if not X64
2098 | movsd FPARG1, xmm0 2151 | movsd FPARG1, xmm0
2099 | .endif
2100 |.else
2101 | .ffunc_n math_ .. func
2102 | fstp FPARG1
2103 |.endif 2152 |.endif
2104 | mov RB, BASE 2153 | mov RB, BASE
2105 | call extern lj_vm_ .. func 2154 | call extern func
2106 | mov BASE, RB 2155 | mov BASE, RB
2107 | .if X64 2156 | jmp ->fff_resfp
2108 | jmp ->fff_resxmm0 2157 |.endmacro
2109 | .else 2158 |
2110 | jmp ->fff_resn 2159 |.macro math_extern2, func
2111 | .endif 2160 | .ffunc_nnsse math_ .. func
2161 |.if not X64
2162 | movsd FPARG1, xmm0
2163 | movsd FPARG3, xmm1
2164 |.endif
2165 | mov RB, BASE
2166 | call extern func
2167 | mov BASE, RB
2168 | jmp ->fff_resfp
2112 |.endmacro 2169 |.endmacro
2113 | 2170 |
2171 | math_extern log10
2172 | math_extern exp
2173 | math_extern sin
2174 | math_extern cos
2175 | math_extern tan
2176 | math_extern asin
2177 | math_extern acos
2178 | math_extern atan
2114 | math_extern sinh 2179 | math_extern sinh
2115 | math_extern cosh 2180 | math_extern cosh
2116 | math_extern tanh 2181 | math_extern tanh
2182 | math_extern2 pow
2183 | math_extern2 atan2
2184 | math_extern2 fmod
2117 | 2185 |
2118 |->ff_math_deg:
2119 |.if SSE
2120 |.ffunc_nsse math_rad
2121 | mov CFUNC:RB, [BASE-8]
2122 | mulsd xmm0, qword CFUNC:RB->upvalue[0]
2123 | jmp ->fff_resxmm0
2124 |.else
2125 |.ffunc_n math_rad
2126 | mov CFUNC:RB, [BASE-8]
2127 | fmul qword CFUNC:RB->upvalue[0]
2128 | jmp ->fff_resn
2129 |.endif
2130 |
2131 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
2132 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn 2186 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
2133 | 2187 |
2134 |.ffunc_1 math_frexp 2188 |.ffunc_1 math_frexp
@@ -2143,65 +2197,34 @@ static void build_subroutines(BuildCtx *ctx)
2143 | cmp RB, 0x00200000; jb >4 2197 | cmp RB, 0x00200000; jb >4
2144 |1: 2198 |1:
2145 | shr RB, 21; sub RB, RC // Extract and unbias exponent. 2199 | shr RB, 21; sub RB, RC // Extract and unbias exponent.
2146 |.if SSE
2147 | cvtsi2sd xmm0, RB 2200 | cvtsi2sd xmm0, RB
2148 |.else
2149 | mov TMP1, RB; fild TMP1
2150 |.endif
2151 | mov RB, [BASE-4] 2201 | mov RB, [BASE-4]
2152 | and RB, 0x800fffff // Mask off exponent. 2202 | and RB, 0x800fffff // Mask off exponent.
2153 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. 2203 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
2154 | mov [BASE-4], RB 2204 | mov [BASE-4], RB
2155 |2: 2205 |2:
2156 |.if SSE
2157 | movsd qword [BASE], xmm0 2206 | movsd qword [BASE], xmm0
2158 |.else
2159 | fstp qword [BASE]
2160 |.endif
2161 | mov RD, 1+2 2207 | mov RD, 1+2
2162 | jmp ->fff_res 2208 | jmp ->fff_res
2163 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. 2209 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
2164 |.if SSE
2165 | xorps xmm0, xmm0; jmp <2 2210 | xorps xmm0, xmm0; jmp <2
2166 |.else
2167 | fldz; jmp <2
2168 |.endif
2169 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. 2211 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
2170 |.if SSE
2171 | movsd xmm0, qword [BASE] 2212 | movsd xmm0, qword [BASE]
2172 | sseconst_hi xmm1, RBa, 43500000 // 2^54. 2213 | sseconst_hi xmm1, RBa, 43500000 // 2^54.
2173 | mulsd xmm0, xmm1 2214 | mulsd xmm0, xmm1
2174 | movsd qword [BASE-8], xmm0 2215 | movsd qword [BASE-8], xmm0
2175 |.else
2176 | fld qword [BASE]
2177 | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
2178 | fstp qword [BASE-8]
2179 |.endif
2180 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 2216 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
2181 | 2217 |
2182 |.if SSE
2183 |.ffunc_nsse math_modf 2218 |.ffunc_nsse math_modf
2184 |.else
2185 |.ffunc_n math_modf
2186 |.endif
2187 | mov RB, [BASE+4] 2219 | mov RB, [BASE+4]
2188 | mov PC, [BASE-4] 2220 | mov PC, [BASE-4]
2189 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? 2221 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
2190 |.if SSE
2191 | movaps xmm4, xmm0 2222 | movaps xmm4, xmm0
2192 | call ->vm_trunc 2223 | call ->vm_trunc_sse
2193 | subsd xmm4, xmm0 2224 | subsd xmm4, xmm0
2194 |1: 2225 |1:
2195 | movsd qword [BASE-8], xmm0 2226 | movsd qword [BASE-8], xmm0
2196 | movsd qword [BASE], xmm4 2227 | movsd qword [BASE], xmm4
2197 |.else
2198 | fdup
2199 | call ->vm_trunc
2200 | fsub st1, st0
2201 |1:
2202 | fstp qword [BASE-8]
2203 | fstp qword [BASE]
2204 |.endif
2205 | mov RC, [BASE-4]; mov RB, [BASE+4] 2228 | mov RC, [BASE-4]; mov RB, [BASE+4]
2206 | xor RC, RB; js >3 // Need to adjust sign? 2229 | xor RC, RB; js >3 // Need to adjust sign?
2207 |2: 2230 |2:
@@ -2211,25 +2234,10 @@ static void build_subroutines(BuildCtx *ctx)
2211 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. 2234 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
2212 | jmp <2 2235 | jmp <2
2213 |4: 2236 |4:
2214 |.if SSE
2215 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. 2237 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
2216 |.else
2217 | fldz; fxch; jmp <1 // Return +-Inf and +-0.
2218 |.endif
2219 |
2220 |.ffunc_nnr math_fmod
2221 |1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1
2222 | fpop1
2223 | jmp ->fff_resn
2224 | 2238 |
2225 |.if SSE 2239 |.macro math_minmax, name, cmovop, sseop
2226 |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0 2240 | .ffunc_1 name
2227 |.else
2228 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
2229 |.endif
2230 |
2231 |.macro math_minmax, name, cmovop, fcmovop, sseop
2232 | .ffunc name
2233 | mov RA, 2 2241 | mov RA, 2
2234 | cmp dword [BASE+4], LJ_TISNUM 2242 | cmp dword [BASE+4], LJ_TISNUM
2235 |.if DUALNUM 2243 |.if DUALNUM
@@ -2245,12 +2253,7 @@ static void build_subroutines(BuildCtx *ctx)
2245 |3: 2253 |3:
2246 | ja ->fff_fallback 2254 | ja ->fff_fallback
2247 | // Convert intermediate result to number and continue below. 2255 | // Convert intermediate result to number and continue below.
2248 |.if SSE
2249 | cvtsi2sd xmm0, RB 2256 | cvtsi2sd xmm0, RB
2250 |.else
2251 | mov TMP1, RB
2252 | fild TMP1
2253 |.endif
2254 | jmp >6 2257 | jmp >6
2255 |4: 2258 |4:
2256 | ja ->fff_fallback 2259 | ja ->fff_fallback
@@ -2258,7 +2261,6 @@ static void build_subroutines(BuildCtx *ctx)
2258 | jae ->fff_fallback 2261 | jae ->fff_fallback
2259 |.endif 2262 |.endif
2260 | 2263 |
2261 |.if SSE
2262 | movsd xmm0, qword [BASE] 2264 | movsd xmm0, qword [BASE]
2263 |5: // Handle numbers or integers. 2265 |5: // Handle numbers or integers.
2264 | cmp RA, RD; jae ->fff_resxmm0 2266 | cmp RA, RD; jae ->fff_resxmm0
@@ -2277,48 +2279,13 @@ static void build_subroutines(BuildCtx *ctx)
2277 | sseop xmm0, xmm1 2279 | sseop xmm0, xmm1
2278 | add RA, 1 2280 | add RA, 1
2279 | jmp <5 2281 | jmp <5
2280 |.else
2281 | fld qword [BASE]
2282 |5: // Handle numbers or integers.
2283 | cmp RA, RD; jae ->fff_resn
2284 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2285 |.if DUALNUM
2286 | jb >6
2287 | ja >9
2288 | fild dword [BASE+RA*8-8]
2289 | jmp >7
2290 |.else
2291 | jae >9
2292 |.endif
2293 |6:
2294 | fld qword [BASE+RA*8-8]
2295 |7:
2296 | fucomi st1; fcmovop st1; fpop1
2297 | add RA, 1
2298 | jmp <5
2299 |.endif
2300 |.endmacro 2282 |.endmacro
2301 | 2283 |
2302 | math_minmax math_min, cmovg, fcmovnbe, minsd 2284 | math_minmax math_min, cmovg, minsd
2303 | math_minmax math_max, cmovl, fcmovbe, maxsd 2285 | math_minmax math_max, cmovl, maxsd
2304 |.if not SSE
2305 |9:
2306 | fpop; jmp ->fff_fallback
2307 |.endif
2308 | 2286 |
2309 |//-- String library ----------------------------------------------------- 2287 |//-- String library -----------------------------------------------------
2310 | 2288 |
2311 |.ffunc_1 string_len
2312 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2313 | mov STR:RB, [BASE]
2314 |.if DUALNUM
2315 | mov RB, dword STR:RB->len; jmp ->fff_resi
2316 |.elif SSE
2317 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2318 |.else
2319 | fild dword STR:RB->len; jmp ->fff_resn
2320 |.endif
2321 |
2322 |.ffunc string_byte // Only handle the 1-arg case here. 2289 |.ffunc string_byte // Only handle the 1-arg case here.
2323 | cmp NARGS:RD, 1+1; jne ->fff_fallback 2290 | cmp NARGS:RD, 1+1; jne ->fff_fallback
2324 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2291 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2329,10 +2296,8 @@ static void build_subroutines(BuildCtx *ctx)
2329 | movzx RB, byte STR:RB[1] 2296 | movzx RB, byte STR:RB[1]
2330 |.if DUALNUM 2297 |.if DUALNUM
2331 | jmp ->fff_resi 2298 | jmp ->fff_resi
2332 |.elif SSE
2333 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2334 |.else 2299 |.else
2335 | mov TMP1, RB; fild TMP1; jmp ->fff_resn 2300 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2336 |.endif 2301 |.endif
2337 | 2302 |
2338 |.ffunc string_char // Only handle the 1-arg case here. 2303 |.ffunc string_char // Only handle the 1-arg case here.
@@ -2344,16 +2309,11 @@ static void build_subroutines(BuildCtx *ctx)
2344 | mov RB, dword [BASE] 2309 | mov RB, dword [BASE]
2345 | cmp RB, 255; ja ->fff_fallback 2310 | cmp RB, 255; ja ->fff_fallback
2346 | mov TMP2, RB 2311 | mov TMP2, RB
2347 |.elif SSE 2312 |.else
2348 | jae ->fff_fallback 2313 | jae ->fff_fallback
2349 | cvttsd2si RB, qword [BASE] 2314 | cvttsd2si RB, qword [BASE]
2350 | cmp RB, 255; ja ->fff_fallback 2315 | cmp RB, 255; ja ->fff_fallback
2351 | mov TMP2, RB 2316 | mov TMP2, RB
2352 |.else
2353 | jae ->fff_fallback
2354 | fld qword [BASE]
2355 | fistp TMP2
2356 | cmp TMP2, 255; ja ->fff_fallback
2357 |.endif 2317 |.endif
2358 |.if X64 2318 |.if X64
2359 | mov TMP3, 1 2319 | mov TMP3, 1
@@ -2374,6 +2334,7 @@ static void build_subroutines(BuildCtx *ctx)
2374 |.endif 2334 |.endif
2375 | mov SAVE_PC, PC 2335 | mov SAVE_PC, PC
2376 | call extern lj_str_new // (lua_State *L, char *str, size_t l) 2336 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
2337 |->fff_resstr:
2377 | // GCstr * returned in eax (RD). 2338 | // GCstr * returned in eax (RD).
2378 | mov BASE, L:RB->base 2339 | mov BASE, L:RB->base
2379 | mov PC, [BASE-4] 2340 | mov PC, [BASE-4]
@@ -2391,14 +2352,10 @@ static void build_subroutines(BuildCtx *ctx)
2391 | jne ->fff_fallback 2352 | jne ->fff_fallback
2392 | mov RB, dword [BASE+16] 2353 | mov RB, dword [BASE+16]
2393 | mov TMP2, RB 2354 | mov TMP2, RB
2394 |.elif SSE 2355 |.else
2395 | jae ->fff_fallback 2356 | jae ->fff_fallback
2396 | cvttsd2si RB, qword [BASE+16] 2357 | cvttsd2si RB, qword [BASE+16]
2397 | mov TMP2, RB 2358 | mov TMP2, RB
2398 |.else
2399 | jae ->fff_fallback
2400 | fld qword [BASE+16]
2401 | fistp TMP2
2402 |.endif 2359 |.endif
2403 |1: 2360 |1:
2404 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2361 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2413,12 +2370,8 @@ static void build_subroutines(BuildCtx *ctx)
2413 | mov RB, STR:RB->len 2370 | mov RB, STR:RB->len
2414 |.if DUALNUM 2371 |.if DUALNUM
2415 | mov RA, dword [BASE+8] 2372 | mov RA, dword [BASE+8]
2416 |.elif SSE
2417 | cvttsd2si RA, qword [BASE+8]
2418 |.else 2373 |.else
2419 | fld qword [BASE+8] 2374 | cvttsd2si RA, qword [BASE+8]
2420 | fistp ARG3
2421 | mov RA, ARG3
2422 |.endif 2375 |.endif
2423 | mov RC, TMP2 2376 | mov RC, TMP2
2424 | cmp RB, RC // len < end? (unsigned compare) 2377 | cmp RB, RC // len < end? (unsigned compare)
@@ -2462,136 +2415,34 @@ static void build_subroutines(BuildCtx *ctx)
2462 | xor RC, RC // Zero length. Any ptr in RB is ok. 2415 | xor RC, RC // Zero length. Any ptr in RB is ok.
2463 | jmp <4 2416 | jmp <4
2464 | 2417 |
2465 |.ffunc string_rep // Only handle the 1-char case inline. 2418 |.macro ffstring_op, name
2466 | ffgccheck 2419 | .ffunc_1 string_ .. name
2467 | cmp NARGS:RD, 2+1; jne ->fff_fallback // Exactly 2 arguments.
2468 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2469 | cmp dword [BASE+12], LJ_TISNUM
2470 | mov STR:RB, [BASE]
2471 |.if DUALNUM
2472 | jne ->fff_fallback
2473 | mov RC, dword [BASE+8]
2474 |.elif SSE
2475 | jae ->fff_fallback
2476 | cvttsd2si RC, qword [BASE+8]
2477 |.else
2478 | jae ->fff_fallback
2479 | fld qword [BASE+8]
2480 | fistp TMP2
2481 | mov RC, TMP2
2482 |.endif
2483 | test RC, RC
2484 | jle ->fff_emptystr // Count <= 0? (or non-int)
2485 | cmp dword STR:RB->len, 1
2486 | jb ->fff_emptystr // Zero length string?
2487 | jne ->fff_fallback_2 // Fallback for > 1-char strings.
2488 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
2489 | movzx RA, byte STR:RB[1]
2490 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2491 |.if X64
2492 | mov TMP3, RC
2493 |.else
2494 | mov ARG3, RC
2495 |.endif
2496 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2497 | mov [RB], RAL
2498 | add RB, 1
2499 | sub RC, 1
2500 | jnz <1
2501 | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2502 | jmp ->fff_newstr
2503 |
2504 |.ffunc_1 string_reverse
2505 | ffgccheck 2420 | ffgccheck
2506 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2421 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2507 | mov STR:RB, [BASE] 2422 | mov L:RB, SAVE_L
2508 | mov RC, STR:RB->len 2423 | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
2509 | test RC, RC 2424 | mov L:RB->base, BASE
2510 | jz ->fff_emptystr // Zero length string? 2425 | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE
2511 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 2426 | mov RCa, SBUF:FCARG1->b
2512 | add RB, #STR 2427 | mov SBUF:FCARG1->L, L:RB
2513 | mov TMP2, PC // Need another temp register. 2428 | mov SBUF:FCARG1->w, RCa
2514 |.if X64 2429 | mov SAVE_PC, PC
2515 | mov TMP3, RC 2430 | call extern lj_buf_putstr_ .. name .. @8
2516 |.else 2431 | mov FCARG1, eax
2517 | mov ARG3, RC 2432 | call extern lj_buf_tostr@4
2518 |.endif 2433 | jmp ->fff_resstr
2519 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2520 |1:
2521 | movzx RA, byte [RB]
2522 | add RB, 1
2523 | sub RC, 1
2524 | mov [PC+RC], RAL
2525 | jnz <1
2526 | mov RD, PC
2527 | mov PC, TMP2
2528 | jmp ->fff_newstr
2529 |
2530 |.macro ffstring_case, name, lo, hi
2531 | .ffunc_1 name
2532 | ffgccheck
2533 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2534 | mov STR:RB, [BASE]
2535 | mov RC, STR:RB->len
2536 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2537 | add RB, #STR
2538 | mov TMP2, PC // Need another temp register.
2539 |.if X64
2540 | mov TMP3, RC
2541 |.else
2542 | mov ARG3, RC
2543 |.endif
2544 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2545 | jmp >3
2546 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
2547 | movzx RA, byte [RB+RC]
2548 | cmp RA, lo
2549 | jb >2
2550 | cmp RA, hi
2551 | ja >2
2552 | xor RA, 0x20
2553 |2:
2554 | mov [PC+RC], RAL
2555 |3:
2556 | sub RC, 1
2557 | jns <1
2558 | mov RD, PC
2559 | mov PC, TMP2
2560 | jmp ->fff_newstr
2561 |.endmacro 2434 |.endmacro
2562 | 2435 |
2563 |ffstring_case string_lower, 0x41, 0x5a 2436 |ffstring_op reverse
2564 |ffstring_case string_upper, 0x61, 0x7a 2437 |ffstring_op lower
2565 | 2438 |ffstring_op upper
2566 |//-- Table library ------------------------------------------------------
2567 |
2568 |.ffunc_1 table_getn
2569 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
2570 | mov RB, BASE // Save BASE.
2571 | mov TAB:FCARG1, [BASE]
2572 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
2573 | // Length of table returned in eax (RD).
2574 | mov BASE, RB // Restore BASE.
2575 |.if DUALNUM
2576 | mov RB, RD; jmp ->fff_resi
2577 |.elif SSE
2578 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
2579 |.else
2580 | mov ARG1, RD; fild ARG1; jmp ->fff_resn
2581 |.endif
2582 | 2439 |
2583 |//-- Bit library -------------------------------------------------------- 2440 |//-- Bit library --------------------------------------------------------
2584 | 2441 |
2585 |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!).
2586 |
2587 |.macro .ffunc_bit, name, kind, fdef 2442 |.macro .ffunc_bit, name, kind, fdef
2588 | fdef name 2443 | fdef name
2589 |.if kind == 2 2444 |.if kind == 2
2590 |.if SSE
2591 | sseconst_tobit xmm1, RBa 2445 | sseconst_tobit xmm1, RBa
2592 |.else
2593 | mov TMP1, TOBIT_BIAS
2594 |.endif
2595 |.endif 2446 |.endif
2596 | cmp dword [BASE+4], LJ_TISNUM 2447 | cmp dword [BASE+4], LJ_TISNUM
2597 |.if DUALNUM 2448 |.if DUALNUM
@@ -2607,24 +2458,12 @@ static void build_subroutines(BuildCtx *ctx)
2607 |.else 2458 |.else
2608 | jae ->fff_fallback 2459 | jae ->fff_fallback
2609 |.endif 2460 |.endif
2610 |.if SSE
2611 | movsd xmm0, qword [BASE] 2461 | movsd xmm0, qword [BASE]
2612 |.if kind < 2 2462 |.if kind < 2
2613 | sseconst_tobit xmm1, RBa 2463 | sseconst_tobit xmm1, RBa
2614 |.endif 2464 |.endif
2615 | addsd xmm0, xmm1 2465 | addsd xmm0, xmm1
2616 | movd RB, xmm0 2466 | movd RB, xmm0
2617 |.else
2618 | fld qword [BASE]
2619 |.if kind < 2
2620 | mov TMP1, TOBIT_BIAS
2621 |.endif
2622 | fadd TMP1
2623 | fstp FPARG1
2624 |.if kind > 0
2625 | mov RB, ARG1
2626 |.endif
2627 |.endif
2628 |2: 2467 |2:
2629 |.endmacro 2468 |.endmacro
2630 | 2469 |
@@ -2633,15 +2472,7 @@ static void build_subroutines(BuildCtx *ctx)
2633 |.endmacro 2472 |.endmacro
2634 | 2473 |
2635 |.ffunc_bit bit_tobit, 0 2474 |.ffunc_bit bit_tobit, 0
2636 |.if DUALNUM or SSE
2637 |.if not SSE
2638 | mov RB, ARG1
2639 |.endif
2640 | jmp ->fff_resbit 2475 | jmp ->fff_resbit
2641 |.else
2642 | fild ARG1
2643 | jmp ->fff_resn
2644 |.endif
2645 | 2476 |
2646 |.macro .ffunc_bit_op, name, ins 2477 |.macro .ffunc_bit_op, name, ins
2647 | .ffunc_bit name, 2 2478 | .ffunc_bit name, 2
@@ -2661,17 +2492,10 @@ static void build_subroutines(BuildCtx *ctx)
2661 |.else 2492 |.else
2662 | jae ->fff_fallback_bit_op 2493 | jae ->fff_fallback_bit_op
2663 |.endif 2494 |.endif
2664 |.if SSE
2665 | movsd xmm0, qword [RD] 2495 | movsd xmm0, qword [RD]
2666 | addsd xmm0, xmm1 2496 | addsd xmm0, xmm1
2667 | movd RA, xmm0 2497 | movd RA, xmm0
2668 | ins RB, RA 2498 | ins RB, RA
2669 |.else
2670 | fld qword [RD]
2671 | fadd TMP1
2672 | fstp FPARG1
2673 | ins RB, ARG1
2674 |.endif
2675 | sub RD, 8 2499 | sub RD, 8
2676 | jmp <1 2500 | jmp <1
2677 |.endmacro 2501 |.endmacro
@@ -2688,15 +2512,10 @@ static void build_subroutines(BuildCtx *ctx)
2688 | not RB 2512 | not RB
2689 |.if DUALNUM 2513 |.if DUALNUM
2690 | jmp ->fff_resbit 2514 | jmp ->fff_resbit
2691 |.elif SSE 2515 |.else
2692 |->fff_resbit: 2516 |->fff_resbit:
2693 | cvtsi2sd xmm0, RB 2517 | cvtsi2sd xmm0, RB
2694 | jmp ->fff_resxmm0 2518 | jmp ->fff_resxmm0
2695 |.else
2696 |->fff_resbit:
2697 | mov ARG1, RB
2698 | fild ARG1
2699 | jmp ->fff_resn
2700 |.endif 2519 |.endif
2701 | 2520 |
2702 |->fff_fallback_bit_op: 2521 |->fff_fallback_bit_op:
@@ -2709,22 +2528,13 @@ static void build_subroutines(BuildCtx *ctx)
2709 | // Note: no inline conversion from number for 2nd argument! 2528 | // Note: no inline conversion from number for 2nd argument!
2710 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback 2529 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
2711 | mov RA, dword [BASE+8] 2530 | mov RA, dword [BASE+8]
2712 |.elif SSE 2531 |.else
2713 | .ffunc_nnsse name 2532 | .ffunc_nnsse name
2714 | sseconst_tobit xmm2, RBa 2533 | sseconst_tobit xmm2, RBa
2715 | addsd xmm0, xmm2 2534 | addsd xmm0, xmm2
2716 | addsd xmm1, xmm2 2535 | addsd xmm1, xmm2
2717 | movd RB, xmm0 2536 | movd RB, xmm0
2718 | movd RA, xmm1 2537 | movd RA, xmm1
2719 |.else
2720 | .ffunc_nn name
2721 | mov TMP1, TOBIT_BIAS
2722 | fadd TMP1
2723 | fstp FPARG3
2724 | fadd TMP1
2725 | fstp FPARG1
2726 | mov RA, ARG3
2727 | mov RB, ARG1
2728 |.endif 2538 |.endif
2729 | ins RB, cl // Assumes RA is ecx. 2539 | ins RB, cl // Assumes RA is ecx.
2730 | jmp ->fff_resbit 2540 | jmp ->fff_resbit
@@ -2858,7 +2668,7 @@ static void build_subroutines(BuildCtx *ctx)
2858 | mov FCARG2, PC // Caveat: FCARG2 == BASE 2668 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2859 | mov FCARG1, L:RB 2669 | mov FCARG1, L:RB
2860 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. 2670 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2861 | call extern lj_dispatch_ins@8 // (lua_State *L, BCIns *pc) 2671 | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc)
2862 |3: 2672 |3:
2863 | mov BASE, L:RB->base 2673 | mov BASE, L:RB->base
2864 |4: 2674 |4:
@@ -2929,6 +2739,79 @@ static void build_subroutines(BuildCtx *ctx)
2929 | add NARGS:RD, 1 2739 | add NARGS:RD, 1
2930 | jmp RBa 2740 | jmp RBa
2931 | 2741 |
2742 |->cont_stitch: // Trace stitching.
2743 |.if JIT
2744 | // BASE = base, RC = result, RB = mbase
2745 | mov TRACE:RA, [RB-24] // Save previous trace.
2746 | mov TMP1, TRACE:RA
2747 | mov TMP3, DISPATCH // Need one more register.
2748 | mov DISPATCH, MULTRES
2749 | movzx RA, PC_RA
2750 | lea RA, [BASE+RA*8] // Call base.
2751 | sub DISPATCH, 1
2752 | jz >2
2753 |1: // Move results down.
2754 |.if X64
2755 | mov RBa, [RC]
2756 | mov [RA], RBa
2757 |.else
2758 | mov RB, [RC]
2759 | mov [RA], RB
2760 | mov RB, [RC+4]
2761 | mov [RA+4], RB
2762 |.endif
2763 | add RC, 8
2764 | add RA, 8
2765 | sub DISPATCH, 1
2766 | jnz <1
2767 |2:
2768 | movzx RC, PC_RA
2769 | movzx RB, PC_RB
2770 | add RC, RB
2771 | lea RC, [BASE+RC*8-8]
2772 |3:
2773 | cmp RC, RA
2774 | ja >9 // More results wanted?
2775 |
2776 | mov DISPATCH, TMP3
2777 | mov TRACE:RD, TMP1 // Get previous trace.
2778 | movzx RB, word TRACE:RD->traceno
2779 | movzx RD, word TRACE:RD->link
2780 | cmp RD, RB
2781 | je ->cont_nop // Blacklisted.
2782 | test RD, RD
2783 | jne =>BC_JLOOP // Jump to stitched trace.
2784 |
2785 | // Stitch a new trace to the previous trace.
2786 | mov [DISPATCH+DISPATCH_J(exitno)], RB
2787 | mov L:RB, SAVE_L
2788 | mov L:RB->base, BASE
2789 | mov FCARG2, PC
2790 | lea FCARG1, [DISPATCH+GG_DISP2J]
2791 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2792 | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc)
2793 | mov BASE, L:RB->base
2794 | jmp ->cont_nop
2795 |
2796 |9: // Fill up results with nil.
2797 | mov dword [RA+4], LJ_TNIL
2798 | add RA, 8
2799 | jmp <3
2800 |.endif
2801 |
2802 |->vm_profhook: // Dispatch target for profiler hook.
2803#if LJ_HASPROFILE
2804 | mov L:RB, SAVE_L
2805 | mov L:RB->base, BASE
2806 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2807 | mov FCARG1, L:RB
2808 | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc)
2809 | mov BASE, L:RB->base
2810 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2811 | sub PC, 4
2812 | jmp ->cont_nop
2813#endif
2814 |
2932 |//----------------------------------------------------------------------- 2815 |//-----------------------------------------------------------------------
2933 |//-- Trace exit handler ------------------------------------------------- 2816 |//-- Trace exit handler -------------------------------------------------
2934 |//----------------------------------------------------------------------- 2817 |//-----------------------------------------------------------------------
@@ -2981,10 +2864,9 @@ static void build_subroutines(BuildCtx *ctx)
2981 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 2864 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
2982 |.endif 2865 |.endif
2983 | // Caveat: RB is ebp. 2866 | // Caveat: RB is ebp.
2984 | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)] 2867 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2985 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] 2868 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2986 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa 2869 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2987 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
2988 | mov L:RB->base, BASE 2870 | mov L:RB->base, BASE
2989 |.if X64WIN 2871 |.if X64WIN
2990 | lea CARG2, [rsp+4*8] 2872 | lea CARG2, [rsp+4*8]
@@ -2994,6 +2876,7 @@ static void build_subroutines(BuildCtx *ctx)
2994 | lea FCARG2, [esp+16] 2876 | lea FCARG2, [esp+16]
2995 |.endif 2877 |.endif
2996 | lea FCARG1, [DISPATCH+GG_DISP2J] 2878 | lea FCARG1, [DISPATCH+GG_DISP2J]
2879 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
2997 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) 2880 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
2998 | // MULTRES or negated error code returned in eax (RD). 2881 | // MULTRES or negated error code returned in eax (RD).
2999 | mov RAa, L:RB->cframe 2882 | mov RAa, L:RB->cframe
@@ -3040,12 +2923,14 @@ static void build_subroutines(BuildCtx *ctx)
3040 | mov r13, TMPa 2923 | mov r13, TMPa
3041 | mov r12, TMPQ 2924 | mov r12, TMPQ
3042 |.endif 2925 |.endif
3043 | test RD, RD; js >3 // Check for error from exit. 2926 | test RD, RD; js >9 // Check for error from exit.
2927 | mov L:RB, SAVE_L
3044 | mov MULTRES, RD 2928 | mov MULTRES, RD
3045 | mov LFUNC:KBASE, [BASE-8] 2929 | mov LFUNC:KBASE, [BASE-8]
3046 | mov KBASE, LFUNC:KBASE->pc 2930 | mov KBASE, LFUNC:KBASE->pc
3047 | mov KBASE, [KBASE+PC2PROTO(k)] 2931 | mov KBASE, [KBASE+PC2PROTO(k)]
3048 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 2932 | mov L:RB->base, BASE
2933 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
3049 | set_vmstate INTERP 2934 | set_vmstate INTERP
3050 | // Modified copy of ins_next which handles function header dispatch, too. 2935 | // Modified copy of ins_next which handles function header dispatch, too.
3051 | mov RC, [PC] 2936 | mov RC, [PC]
@@ -3054,18 +2939,35 @@ static void build_subroutines(BuildCtx *ctx)
3054 | add PC, 4 2939 | add PC, 4
3055 | shr RC, 16 2940 | shr RC, 16
3056 | cmp OP, BC_FUNCF // Function header? 2941 | cmp OP, BC_FUNCF // Function header?
3057 | jb >2 2942 | jb >3
3058 | mov RC, MULTRES // RC/RD holds nres+1. 2943 | cmp OP, BC_FUNCC+2 // Fast function?
2944 | jae >4
3059 |2: 2945 |2:
2946 | mov RC, MULTRES // RC/RD holds nres+1.
2947 |3:
3060 |.if X64 2948 |.if X64
3061 | jmp aword [DISPATCH+OP*8] 2949 | jmp aword [DISPATCH+OP*8]
3062 |.else 2950 |.else
3063 | jmp aword [DISPATCH+OP*4] 2951 | jmp aword [DISPATCH+OP*4]
3064 |.endif 2952 |.endif
3065 | 2953 |
3066 |3: // Rethrow error from the right C frame. 2954 |4: // Check frame below fast function.
2955 | mov RC, [BASE-4]
2956 | test RC, FRAME_TYPE
2957 | jnz <2 // Trace stitching continuation?
2958 | // Otherwise set KBASE for Lua function below fast function.
2959 | movzx RC, byte [RC-3]
2960 | not RCa
2961 | mov LFUNC:KBASE, [BASE+RC*8-8]
2962 | mov KBASE, LFUNC:KBASE->pc
2963 | mov KBASE, [KBASE+PC2PROTO(k)]
2964 | jmp <2
2965 |
2966 |9: // Rethrow error from the right C frame.
2967 | mov FCARG2, RD
3067 | mov FCARG1, L:RB 2968 | mov FCARG1, L:RB
3068 | call extern lj_err_run@4 // (lua_State *L) 2969 | neg FCARG2
2970 | call extern lj_err_trace@8 // (lua_State *L, int errcode)
3069 |.endif 2971 |.endif
3070 | 2972 |
3071 |//----------------------------------------------------------------------- 2973 |//-----------------------------------------------------------------------
@@ -3073,27 +2975,18 @@ static void build_subroutines(BuildCtx *ctx)
3073 |//----------------------------------------------------------------------- 2975 |//-----------------------------------------------------------------------
3074 | 2976 |
3075 |// FP value rounding. Called by math.floor/math.ceil fast functions 2977 |// FP value rounding. Called by math.floor/math.ceil fast functions
3076 |// and from JIT code. 2978 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3077 | 2979 |.macro vm_round, name, mode, cond
3078 |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. 2980 |->name:
3079 |.macro vm_round_x87, mode1, mode2 2981 |.if not X64 and cond
3080 | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2. 2982 | movsd xmm0, qword [esp+4]
3081 | mov [esp+8], eax 2983 | call ->name .. _sse
3082 | mov ax, mode1 2984 | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
3083 | or ax, [esp+4] 2985 | fld qword [esp+4]
3084 |.if mode2 ~= 0xffff
3085 | and ax, mode2
3086 |.endif
3087 | mov [esp+6], ax
3088 | fldcw word [esp+6]
3089 | frndint
3090 | fldcw word [esp+4]
3091 | mov eax, [esp+8]
3092 | ret 2986 | ret
3093 |.endmacro 2987 |.endif
3094 | 2988 |
3095 |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. 2989 |->name .. _sse:
3096 |.macro vm_round_sse, mode
3097 | sseconst_abs xmm2, RDa 2990 | sseconst_abs xmm2, RDa
3098 | sseconst_2p52 xmm3, RDa 2991 | sseconst_2p52 xmm3, RDa
3099 | movaps xmm1, xmm0 2992 | movaps xmm1, xmm0
@@ -3129,22 +3022,12 @@ static void build_subroutines(BuildCtx *ctx)
3129 | ret 3022 | ret
3130 |.endmacro 3023 |.endmacro
3131 | 3024 |
3132 |.macro vm_round, name, ssemode, mode1, mode2 3025 | vm_round vm_floor, 0, 1
3133 |->name: 3026 | vm_round vm_ceil, 1, JIT
3134 |.if not SSE 3027 | vm_round vm_trunc, 2, JIT
3135 | vm_round_x87 mode1, mode2
3136 |.endif
3137 |->name .. _sse:
3138 | vm_round_sse ssemode
3139 |.endmacro
3140 |
3141 | vm_round vm_floor, 0, 0x0400, 0xf7ff
3142 | vm_round vm_ceil, 1, 0x0800, 0xfbff
3143 | vm_round vm_trunc, 2, 0x0c00, 0xffff
3144 | 3028 |
3145 |// FP modulo x%y. Called by BC_MOD* and vm_arith. 3029 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
3146 |->vm_mod: 3030 |->vm_mod:
3147 |.if SSE
3148 |// Args in xmm0/xmm1, return value in xmm0. 3031 |// Args in xmm0/xmm1, return value in xmm0.
3149 |// Caveat: xmm0-xmm5 and RC (eax) modified! 3032 |// Caveat: xmm0-xmm5 and RC (eax) modified!
3150 | movaps xmm5, xmm0 3033 | movaps xmm5, xmm0
@@ -3172,172 +3055,6 @@ static void build_subroutines(BuildCtx *ctx)
3172 | movaps xmm0, xmm5 3055 | movaps xmm0, xmm5
3173 | subsd xmm0, xmm1 3056 | subsd xmm0, xmm1
3174 | ret 3057 | ret
3175 |.else
3176 |// Args/ret on x87 stack (y on top). No xmm registers modified.
3177 |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
3178 | fld st1
3179 | fdiv st1
3180 | fnstcw word [esp+4]
3181 | mov ax, 0x0400
3182 | or ax, [esp+4]
3183 | and ax, 0xf7ff
3184 | mov [esp+6], ax
3185 | fldcw word [esp+6]
3186 | frndint
3187 | fldcw word [esp+4]
3188 | fmulp st1
3189 | fsubp st1
3190 | ret
3191 |.endif
3192 |
3193 |// FP log2(x). Called by math.log(x, base).
3194 |->vm_log2:
3195 |.if X64WIN
3196 | movsd qword [rsp+8], xmm0 // Use scratch area.
3197 | fld1
3198 | fld qword [rsp+8]
3199 | fyl2x
3200 | fstp qword [rsp+8]
3201 | movsd xmm0, qword [rsp+8]
3202 |.elif X64
3203 | movsd qword [rsp-8], xmm0 // Use red zone.
3204 | fld1
3205 | fld qword [rsp-8]
3206 | fyl2x
3207 | fstp qword [rsp-8]
3208 | movsd xmm0, qword [rsp-8]
3209 |.else
3210 | fld1
3211 | fld qword [esp+4]
3212 | fyl2x
3213 |.endif
3214 | ret
3215 |
3216 |// FP exponentiation e^x and 2^x. Called by math.exp fast function and
3217 |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified.
3218 |// Caveat: needs 3 slots on x87 stack!
3219 |->vm_exp_x87:
3220 | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e))
3221 |->vm_exp2_x87:
3222 | .if X64WIN
3223 | .define expscratch, dword [rsp+8] // Use scratch area.
3224 | .elif X64
3225 | .define expscratch, dword [rsp-8] // Use red zone.
3226 | .else
3227 | .define expscratch, dword [esp+4] // Needs 4 byte scratch area.
3228 | .endif
3229 | fst expscratch // Caveat: overwrites ARG1.
3230 | cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf
3231 | cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0
3232 |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check.
3233 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
3234 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
3235 |1:
3236 | ret
3237 |2:
3238 | fpop; fldz; ret
3239 |
3240 |// Generic power function x^y. Called by BC_POW, math.pow fast function,
3241 |// and vm_arith.
3242 |// Args/ret on x87 stack (y on top). RC (eax) modified.
3243 |// Caveat: needs 3 slots on x87 stack!
3244 |->vm_pow:
3245 |.if not SSE
3246 | fist dword [esp+4] // Store/reload int before comparison.
3247 | fild dword [esp+4] // Integral exponent used in vm_powi.
3248 | fucomip st1
3249 | jnz >8 // Branch for FP exponents.
3250 | jp >9 // Branch for NaN exponent.
3251 | fpop // Pop y and fallthrough to vm_powi.
3252 |
3253 |// FP/int power function x^i. Arg1/ret on x87 stack.
3254 |// Arg2 (int) on C stack. RC (eax) modified.
3255 |// Caveat: needs 2 slots on x87 stack!
3256 | mov eax, [esp+4]
3257 | cmp eax, 1; jle >6 // i<=1?
3258 | // Now 1 < (unsigned)i <= 0x80000000.
3259 |1: // Handle leading zeros.
3260 | test eax, 1; jnz >2
3261 | fmul st0
3262 | shr eax, 1
3263 | jmp <1
3264 |2:
3265 | shr eax, 1; jz >5
3266 | fdup
3267 |3: // Handle trailing bits.
3268 | fmul st0
3269 | shr eax, 1; jz >4
3270 | jnc <3
3271 | fmul st1, st0
3272 | jmp <3
3273 |4:
3274 | fmulp st1
3275 |5:
3276 | ret
3277 |6:
3278 | je <5 // x^1 ==> x
3279 | jb >7
3280 | fld1; fdivrp st1
3281 | neg eax
3282 | cmp eax, 1; je <5 // x^-1 ==> 1/x
3283 | jmp <1 // x^-i ==> (1/x)^i
3284 |7:
3285 | fpop; fld1 // x^0 ==> 1
3286 | ret
3287 |
3288 |8: // FP/FP power function x^y.
3289 | fst dword [esp+4]
3290 | fxch
3291 | fst dword [esp+8]
3292 | mov eax, [esp+4]; shl eax, 1
3293 | cmp eax, 0xff000000; je >2 // x^+-Inf?
3294 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3295 | cmp eax, 0xff000000; je >4 // +-Inf^y?
3296 | fyl2x
3297 | jmp ->vm_exp2raw
3298 |
3299 |9: // Handle x^NaN.
3300 | fld1
3301 | fucomip st2
3302 | je >1 // 1^NaN ==> 1
3303 | fxch // x^NaN ==> NaN
3304 |1:
3305 | fpop
3306 | ret
3307 |
3308 |2: // Handle x^+-Inf.
3309 | fabs
3310 | fld1
3311 | fucomip st1
3312 | je >3 // +-1^+-Inf ==> 1
3313 | fpop; fabs; fldz; mov eax, 0; setc al
3314 | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
3315 | fxch
3316 |3:
3317 | fpop1; fabs
3318 | ret
3319 |
3320 |4: // Handle +-0^y or +-Inf^y.
3321 | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
3322 | fpop; fpop
3323 | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
3324 | fldz // y < 0, +-Inf^y ==> 0
3325 | ret
3326 |5:
3327 | mov dword [esp+4], 0x7f800000 // Return +Inf.
3328 | fld dword [esp+4]
3329 | ret
3330 |.endif
3331 |
3332 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
3333 |// Needs 16 byte scratch area for x86. Also called from JIT code.
3334 |->vm_pow_sse:
3335 | cvtsd2si eax, xmm1
3336 | cvtsi2sd xmm2, eax
3337 | ucomisd xmm1, xmm2
3338 | jnz >8 // Branch for FP exponents.
3339 | jp >9 // Branch for NaN exponent.
3340 | // Fallthrough to vm_powi_sse.
3341 | 3058 |
3342 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. 3059 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
3343 |->vm_powi_sse: 3060 |->vm_powi_sse:
@@ -3374,287 +3091,6 @@ static void build_subroutines(BuildCtx *ctx)
3374 | sseconst_1 xmm0, RDa 3091 | sseconst_1 xmm0, RDa
3375 | ret 3092 | ret
3376 | 3093 |
3377 |8: // FP/FP power function x^y.
3378 |.if X64
3379 | movd rax, xmm1; shl rax, 1
3380 | rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf?
3381 | movd rax, xmm0; shl rax, 1; je >4 // +-0^y?
3382 | rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y?
3383 | .if X64WIN
3384 | movsd qword [rsp+16], xmm1 // Use scratch area.
3385 | movsd qword [rsp+8], xmm0
3386 | fld qword [rsp+16]
3387 | fld qword [rsp+8]
3388 | .else
3389 | movsd qword [rsp-16], xmm1 // Use red zone.
3390 | movsd qword [rsp-8], xmm0
3391 | fld qword [rsp-16]
3392 | fld qword [rsp-8]
3393 | .endif
3394 |.else
3395 | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area.
3396 | movsd qword [esp+4], xmm0
3397 | cmp dword [esp+12], 0; jne >1
3398 | mov eax, [esp+16]; shl eax, 1
3399 | cmp eax, 0xffe00000; je >2 // x^+-Inf?
3400 |1:
3401 | cmp dword [esp+4], 0; jne >1
3402 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3403 | cmp eax, 0xffe00000; je >5 // +-Inf^y?
3404 |1:
3405 | fld qword [esp+12]
3406 | fld qword [esp+4]
3407 |.endif
3408 | fyl2x // y*log2(x)
3409 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
3410 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
3411 |.if X64WIN
3412 | fstp qword [rsp+8] // Use scratch area.
3413 | movsd xmm0, qword [rsp+8]
3414 |.elif X64
3415 | fstp qword [rsp-8] // Use red zone.
3416 | movsd xmm0, qword [rsp-8]
3417 |.else
3418 | fstp qword [esp+4] // Needs 8 byte scratch area.
3419 | movsd xmm0, qword [esp+4]
3420 |.endif
3421 | ret
3422 |
3423 |9: // Handle x^NaN.
3424 | sseconst_1 xmm2, RDa
3425 | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1
3426 | movaps xmm0, xmm1 // x^NaN ==> NaN
3427 |1:
3428 | ret
3429 |
3430 |2: // Handle x^+-Inf.
3431 | sseconst_abs xmm2, RDa
3432 | andpd xmm0, xmm2 // |x|
3433 | sseconst_1 xmm2, RDa
3434 | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1
3435 | movmskpd eax, xmm1
3436 | xorps xmm0, xmm0
3437 | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0
3438 |3:
3439 | sseconst_hi xmm0, RDa, 7ff00000 // +Inf
3440 | ret
3441 |
3442 |4: // Handle +-0^y.
3443 | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf
3444 | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0
3445 | ret
3446 |
3447 |5: // Handle +-Inf^y.
3448 | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf
3449 | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0
3450 | ret
3451 |
3452 |// Callable from C: double lj_vm_foldfpm(double x, int fpm)
3453 |// Computes fpm(x) for extended math functions. ORDER FPM.
3454 |->vm_foldfpm:
3455 |.if JIT
3456 |.if X64
3457 | .if X64WIN
3458 | .define fpmop, CARG2d
3459 | .else
3460 | .define fpmop, CARG1d
3461 | .endif
3462 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3463 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3464 | sqrtsd xmm0, xmm0; ret
3465 |2:
3466 | .if X64WIN
3467 | movsd qword [rsp+8], xmm0 // Use scratch area.
3468 | fld qword [rsp+8]
3469 | .else
3470 | movsd qword [rsp-8], xmm0 // Use red zone.
3471 | fld qword [rsp-8]
3472 | .endif
3473 | cmp fpmop, 5; ja >2
3474 | .if X64WIN; pop rax; .endif
3475 | je >1
3476 | call ->vm_exp_x87
3477 | .if X64WIN; push rax; .endif
3478 | jmp >7
3479 |1:
3480 | call ->vm_exp2_x87
3481 | .if X64WIN; push rax; .endif
3482 | jmp >7
3483 |2: ; cmp fpmop, 7; je >1; ja >2
3484 | fldln2; fxch; fyl2x; jmp >7
3485 |1: ; fld1; fxch; fyl2x; jmp >7
3486 |2: ; cmp fpmop, 9; je >1; ja >2
3487 | fldlg2; fxch; fyl2x; jmp >7
3488 |1: ; fsin; jmp >7
3489 |2: ; cmp fpmop, 11; je >1; ja >9
3490 | fcos; jmp >7
3491 |1: ; fptan; fpop
3492 |7:
3493 | .if X64WIN
3494 | fstp qword [rsp+8] // Use scratch area.
3495 | movsd xmm0, qword [rsp+8]
3496 | .else
3497 | fstp qword [rsp-8] // Use red zone.
3498 | movsd xmm0, qword [rsp-8]
3499 | .endif
3500 | ret
3501 |.else // x86 calling convention.
3502 | .define fpmop, eax
3503 |.if SSE
3504 | mov fpmop, [esp+12]
3505 | movsd xmm0, qword [esp+4]
3506 | cmp fpmop, 1; je >1; ja >2
3507 | call ->vm_floor; jmp >7
3508 |1: ; call ->vm_ceil; jmp >7
3509 |2: ; cmp fpmop, 3; je >1; ja >2
3510 | call ->vm_trunc; jmp >7
3511 |1:
3512 | sqrtsd xmm0, xmm0
3513 |7:
3514 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3515 | fld qword [esp+4]
3516 | ret
3517 |2: ; fld qword [esp+4]
3518 | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3519 |2: ; cmp fpmop, 7; je >1; ja >2
3520 | fldln2; fxch; fyl2x; ret
3521 |1: ; fld1; fxch; fyl2x; ret
3522 |2: ; cmp fpmop, 9; je >1; ja >2
3523 | fldlg2; fxch; fyl2x; ret
3524 |1: ; fsin; ret
3525 |2: ; cmp fpmop, 11; je >1; ja >9
3526 | fcos; ret
3527 |1: ; fptan; fpop; ret
3528 |.else
3529 | mov fpmop, [esp+12]
3530 | fld qword [esp+4]
3531 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3532 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3533 | fsqrt; ret
3534 |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3535 | cmp fpmop, 7; je >1; ja >2
3536 | fldln2; fxch; fyl2x; ret
3537 |1: ; fld1; fxch; fyl2x; ret
3538 |2: ; cmp fpmop, 9; je >1; ja >2
3539 | fldlg2; fxch; fyl2x; ret
3540 |1: ; fsin; ret
3541 |2: ; cmp fpmop, 11; je >1; ja >9
3542 | fcos; ret
3543 |1: ; fptan; fpop; ret
3544 |.endif
3545 |.endif
3546 |9: ; int3 // Bad fpm.
3547 |.endif
3548 |
3549 |// Callable from C: double lj_vm_foldarith(double x, double y, int op)
3550 |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
3551 |// and basic math functions. ORDER ARITH
3552 |->vm_foldarith:
3553 |.if X64
3554 |
3555 | .if X64WIN
3556 | .define foldop, CARG3d
3557 | .else
3558 | .define foldop, CARG1d
3559 | .endif
3560 | cmp foldop, 1; je >1; ja >2
3561 | addsd xmm0, xmm1; ret
3562 |1: ; subsd xmm0, xmm1; ret
3563 |2: ; cmp foldop, 3; je >1; ja >2
3564 | mulsd xmm0, xmm1; ret
3565 |1: ; divsd xmm0, xmm1; ret
3566 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow
3567 | cmp foldop, 7; je >1; ja >2
3568 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
3569 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
3570 |2: ; cmp foldop, 9; ja >2
3571 |.if X64WIN
3572 | movsd qword [rsp+8], xmm0 // Use scratch area.
3573 | movsd qword [rsp+16], xmm1
3574 | fld qword [rsp+8]
3575 | fld qword [rsp+16]
3576 |.else
3577 | movsd qword [rsp-8], xmm0 // Use red zone.
3578 | movsd qword [rsp-16], xmm1
3579 | fld qword [rsp-8]
3580 | fld qword [rsp-16]
3581 |.endif
3582 | je >1
3583 | fpatan
3584 |7:
3585 |.if X64WIN
3586 | fstp qword [rsp+8] // Use scratch area.
3587 | movsd xmm0, qword [rsp+8]
3588 |.else
3589 | fstp qword [rsp-8] // Use red zone.
3590 | movsd xmm0, qword [rsp-8]
3591 |.endif
3592 | ret
3593 |1: ; fxch; fscale; fpop1; jmp <7
3594 |2: ; cmp foldop, 11; je >1; ja >9
3595 | minsd xmm0, xmm1; ret
3596 |1: ; maxsd xmm0, xmm1; ret
3597 |9: ; int3 // Bad op.
3598 |
3599 |.elif SSE // x86 calling convention with SSE ops.
3600 |
3601 | .define foldop, eax
3602 | mov foldop, [esp+20]
3603 | movsd xmm0, qword [esp+4]
3604 | movsd xmm1, qword [esp+12]
3605 | cmp foldop, 1; je >1; ja >2
3606 | addsd xmm0, xmm1
3607 |7:
3608 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3609 | fld qword [esp+4]
3610 | ret
3611 |1: ; subsd xmm0, xmm1; jmp <7
3612 |2: ; cmp foldop, 3; je >1; ja >2
3613 | mulsd xmm0, xmm1; jmp <7
3614 |1: ; divsd xmm0, xmm1; jmp <7
3615 |2: ; cmp foldop, 5
3616 | je >1; ja >2
3617 | call ->vm_mod; jmp <7
3618 |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area.
3619 |2: ; cmp foldop, 7; je >1; ja >2
3620 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
3621 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
3622 |2: ; cmp foldop, 9; ja >2
3623 | fld qword [esp+4] // Reload from stack
3624 | fld qword [esp+12]
3625 | je >1
3626 | fpatan; ret
3627 |1: ; fxch; fscale; fpop1; ret
3628 |2: ; cmp foldop, 11; je >1; ja >9
3629 | minsd xmm0, xmm1; jmp <7
3630 |1: ; maxsd xmm0, xmm1; jmp <7
3631 |9: ; int3 // Bad op.
3632 |
3633 |.else // x86 calling convention with x87 ops.
3634 |
3635 | mov eax, [esp+20]
3636 | fld qword [esp+4]
3637 | fld qword [esp+12]
3638 | cmp eax, 1; je >1; ja >2
3639 | faddp st1; ret
3640 |1: ; fsubp st1; ret
3641 |2: ; cmp eax, 3; je >1; ja >2
3642 | fmulp st1; ret
3643 |1: ; fdivp st1; ret
3644 |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
3645 | cmp eax, 7; je >1; ja >2
3646 | fpop; fchs; ret
3647 |1: ; fpop; fabs; ret
3648 |2: ; cmp eax, 9; je >1; ja >2
3649 | fpatan; ret
3650 |1: ; fxch; fscale; fpop1; ret
3651 |2: ; cmp eax, 11; je >1; ja >9
3652 | fucomi st1; fcmovnbe st1; fpop1; ret
3653 |1: ; fucomi st1; fcmovbe st1; fpop1; ret
3654 |9: ; int3 // Bad op.
3655 |
3656 |.endif
3657 |
3658 |//----------------------------------------------------------------------- 3094 |//-----------------------------------------------------------------------
3659 |//-- Miscellaneous functions -------------------------------------------- 3095 |//-- Miscellaneous functions --------------------------------------------
3660 |//----------------------------------------------------------------------- 3096 |//-----------------------------------------------------------------------
@@ -3665,6 +3101,7 @@ static void build_subroutines(BuildCtx *ctx)
3665 | mov eax, CARG1d 3101 | mov eax, CARG1d
3666 | .if X64WIN; push rsi; mov rsi, CARG2; .endif 3102 | .if X64WIN; push rsi; mov rsi, CARG2; .endif
3667 | push rbx 3103 | push rbx
3104 | xor ecx, ecx
3668 | cpuid 3105 | cpuid
3669 | mov [rsi], eax 3106 | mov [rsi], eax
3670 | mov [rsi+4], ebx 3107 | mov [rsi+4], ebx
@@ -3688,6 +3125,7 @@ static void build_subroutines(BuildCtx *ctx)
3688 | mov eax, [esp+4] // Argument 1 is function number. 3125 | mov eax, [esp+4] // Argument 1 is function number.
3689 | push edi 3126 | push edi
3690 | push ebx 3127 | push ebx
3128 | xor ecx, ecx
3691 | cpuid 3129 | cpuid
3692 | mov edi, [esp+16] // Argument 2 is result area. 3130 | mov edi, [esp+16] // Argument 2 is result area.
3693 | mov [edi], eax 3131 | mov [edi], eax
@@ -3965,19 +3403,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3965 | // RA is a number. 3403 | // RA is a number.
3966 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp 3404 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
3967 | // RA is a number, RD is an integer. 3405 | // RA is a number, RD is an integer.
3968 |.if SSE
3969 | cvtsi2sd xmm0, dword [BASE+RD*8] 3406 | cvtsi2sd xmm0, dword [BASE+RD*8]
3970 | jmp >2 3407 | jmp >2
3971 |.else
3972 | fld qword [BASE+RA*8]
3973 | fild dword [BASE+RD*8]
3974 | jmp >3
3975 |.endif
3976 | 3408 |
3977 |8: // RA is an integer, RD is not an integer. 3409 |8: // RA is an integer, RD is not an integer.
3978 | ja ->vmeta_comp 3410 | ja ->vmeta_comp
3979 | // RA is an integer, RD is a number. 3411 | // RA is an integer, RD is a number.
3980 |.if SSE
3981 | cvtsi2sd xmm1, dword [BASE+RA*8] 3412 | cvtsi2sd xmm1, dword [BASE+RA*8]
3982 | movsd xmm0, qword [BASE+RD*8] 3413 | movsd xmm0, qword [BASE+RD*8]
3983 | add PC, 4 3414 | add PC, 4
@@ -3985,29 +3416,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3985 | jmp_comp jbe, ja, jb, jae, <9 3416 | jmp_comp jbe, ja, jb, jae, <9
3986 | jmp <6 3417 | jmp <6
3987 |.else 3418 |.else
3988 | fild dword [BASE+RA*8]
3989 | jmp >2
3990 |.endif
3991 |.else
3992 | checknum RA, ->vmeta_comp 3419 | checknum RA, ->vmeta_comp
3993 | checknum RD, ->vmeta_comp 3420 | checknum RD, ->vmeta_comp
3994 |.endif 3421 |.endif
3995 |.if SSE
3996 |1: 3422 |1:
3997 | movsd xmm0, qword [BASE+RD*8] 3423 | movsd xmm0, qword [BASE+RD*8]
3998 |2: 3424 |2:
3999 | add PC, 4 3425 | add PC, 4
4000 | ucomisd xmm0, qword [BASE+RA*8] 3426 | ucomisd xmm0, qword [BASE+RA*8]
4001 |3: 3427 |3:
4002 |.else
4003 |1:
4004 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
4005 |2:
4006 | fld qword [BASE+RD*8]
4007 |3:
4008 | add PC, 4
4009 | fcomparepp
4010 |.endif
4011 | // Unordered: all of ZF CF PF set, ordered: PF clear. 3428 | // Unordered: all of ZF CF PF set, ordered: PF clear.
4012 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. 3429 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
4013 |.if DUALNUM 3430 |.if DUALNUM
@@ -4047,43 +3464,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4047 | // RD is a number. 3464 | // RD is a number.
4048 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 3465 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
4049 | // RD is a number, RA is an integer. 3466 | // RD is a number, RA is an integer.
4050 |.if SSE
4051 | cvtsi2sd xmm0, dword [BASE+RA*8] 3467 | cvtsi2sd xmm0, dword [BASE+RA*8]
4052 |.else
4053 | fild dword [BASE+RA*8]
4054 |.endif
4055 | jmp >2 3468 | jmp >2
4056 | 3469 |
4057 |8: // RD is an integer, RA is not an integer. 3470 |8: // RD is an integer, RA is not an integer.
4058 | ja >5 3471 | ja >5
4059 | // RD is an integer, RA is a number. 3472 | // RD is an integer, RA is a number.
4060 |.if SSE
4061 | cvtsi2sd xmm0, dword [BASE+RD*8] 3473 | cvtsi2sd xmm0, dword [BASE+RD*8]
4062 | ucomisd xmm0, qword [BASE+RA*8] 3474 | ucomisd xmm0, qword [BASE+RA*8]
4063 |.else
4064 | fild dword [BASE+RD*8]
4065 | fld qword [BASE+RA*8]
4066 |.endif
4067 | jmp >4 3475 | jmp >4
4068 | 3476 |
4069 |.else 3477 |.else
4070 | cmp RB, LJ_TISNUM; jae >5 3478 | cmp RB, LJ_TISNUM; jae >5
4071 | checknum RA, >5 3479 | checknum RA, >5
4072 |.endif 3480 |.endif
4073 |.if SSE
4074 |1: 3481 |1:
4075 | movsd xmm0, qword [BASE+RA*8] 3482 | movsd xmm0, qword [BASE+RA*8]
4076 |2: 3483 |2:
4077 | ucomisd xmm0, qword [BASE+RD*8] 3484 | ucomisd xmm0, qword [BASE+RD*8]
4078 |4: 3485 |4:
4079 |.else
4080 |1:
4081 | fld qword [BASE+RA*8]
4082 |2:
4083 | fld qword [BASE+RD*8]
4084 |4:
4085 | fcomparepp
4086 |.endif
4087 iseqne_fp: 3486 iseqne_fp:
4088 if (vk) { 3487 if (vk) {
4089 | jp >2 // Unordered means not equal. 3488 | jp >2 // Unordered means not equal.
@@ -4206,39 +3605,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4206 | // RA is a number. 3605 | // RA is a number.
4207 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 3606 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
4208 | // RA is a number, RD is an integer. 3607 | // RA is a number, RD is an integer.
4209 |.if SSE
4210 | cvtsi2sd xmm0, dword [KBASE+RD*8] 3608 | cvtsi2sd xmm0, dword [KBASE+RD*8]
4211 |.else
4212 | fild dword [KBASE+RD*8]
4213 |.endif
4214 | jmp >2 3609 | jmp >2
4215 | 3610 |
4216 |8: // RA is an integer, RD is a number. 3611 |8: // RA is an integer, RD is a number.
4217 |.if SSE
4218 | cvtsi2sd xmm0, dword [BASE+RA*8] 3612 | cvtsi2sd xmm0, dword [BASE+RA*8]
4219 | ucomisd xmm0, qword [KBASE+RD*8] 3613 | ucomisd xmm0, qword [KBASE+RD*8]
4220 |.else
4221 | fild dword [BASE+RA*8]
4222 | fld qword [KBASE+RD*8]
4223 |.endif
4224 | jmp >4 3614 | jmp >4
4225 |.else 3615 |.else
4226 | cmp RB, LJ_TISNUM; jae >3 3616 | cmp RB, LJ_TISNUM; jae >3
4227 |.endif 3617 |.endif
4228 |.if SSE
4229 |1: 3618 |1:
4230 | movsd xmm0, qword [KBASE+RD*8] 3619 | movsd xmm0, qword [KBASE+RD*8]
4231 |2: 3620 |2:
4232 | ucomisd xmm0, qword [BASE+RA*8] 3621 | ucomisd xmm0, qword [BASE+RA*8]
4233 |4: 3622 |4:
4234 |.else
4235 |1:
4236 | fld qword [KBASE+RD*8]
4237 |2:
4238 | fld qword [BASE+RA*8]
4239 |4:
4240 | fcomparepp
4241 |.endif
4242 goto iseqne_fp; 3623 goto iseqne_fp;
4243 case BC_ISEQP: case BC_ISNEP: 3624 case BC_ISEQP: case BC_ISNEP:
4244 vk = op == BC_ISEQP; 3625 vk = op == BC_ISEQP;
@@ -4289,6 +3670,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4289 | ins_next 3670 | ins_next
4290 break; 3671 break;
4291 3672
3673 case BC_ISTYPE:
3674 | ins_AD // RA = src, RD = -type
3675 | add RD, [BASE+RA*8+4]
3676 | jne ->vmeta_istype
3677 | ins_next
3678 break;
3679 case BC_ISNUM:
3680 | ins_AD // RA = src, RD = -(TISNUM-1)
3681 | checknum RA, ->vmeta_istype
3682 | ins_next
3683 break;
3684
4292 /* -- Unary ops --------------------------------------------------------- */ 3685 /* -- Unary ops --------------------------------------------------------- */
4293 3686
4294 case BC_MOV: 3687 case BC_MOV:
@@ -4332,16 +3725,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4332 |.else 3725 |.else
4333 | checknum RD, ->vmeta_unm 3726 | checknum RD, ->vmeta_unm
4334 |.endif 3727 |.endif
4335 |.if SSE
4336 | movsd xmm0, qword [BASE+RD*8] 3728 | movsd xmm0, qword [BASE+RD*8]
4337 | sseconst_sign xmm1, RDa 3729 | sseconst_sign xmm1, RDa
4338 | xorps xmm0, xmm1 3730 | xorps xmm0, xmm1
4339 | movsd qword [BASE+RA*8], xmm0 3731 | movsd qword [BASE+RA*8], xmm0
4340 |.else
4341 | fld qword [BASE+RD*8]
4342 | fchs
4343 | fstp qword [BASE+RA*8]
4344 |.endif
4345 |.if DUALNUM 3732 |.if DUALNUM
4346 | jmp <9 3733 | jmp <9
4347 |.else 3734 |.else
@@ -4357,15 +3744,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4357 |1: 3744 |1:
4358 | mov dword [BASE+RA*8+4], LJ_TISNUM 3745 | mov dword [BASE+RA*8+4], LJ_TISNUM
4359 | mov dword [BASE+RA*8], RD 3746 | mov dword [BASE+RA*8], RD
4360 |.elif SSE 3747 |.else
4361 | xorps xmm0, xmm0 3748 | xorps xmm0, xmm0
4362 | cvtsi2sd xmm0, dword STR:RD->len 3749 | cvtsi2sd xmm0, dword STR:RD->len
4363 |1: 3750 |1:
4364 | movsd qword [BASE+RA*8], xmm0 3751 | movsd qword [BASE+RA*8], xmm0
4365 |.else
4366 | fild dword STR:RD->len
4367 |1:
4368 | fstp qword [BASE+RA*8]
4369 |.endif 3752 |.endif
4370 | ins_next 3753 | ins_next
4371 |2: 3754 |2:
@@ -4383,11 +3766,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4383 | // Length of table returned in eax (RD). 3766 | // Length of table returned in eax (RD).
4384 |.if DUALNUM 3767 |.if DUALNUM
4385 | // Nothing to do. 3768 | // Nothing to do.
4386 |.elif SSE
4387 | cvtsi2sd xmm0, RD
4388 |.else 3769 |.else
4389 | mov ARG1, RD 3770 | cvtsi2sd xmm0, RD
4390 | fild ARG1
4391 |.endif 3771 |.endif
4392 | mov BASE, RB // Restore BASE. 3772 | mov BASE, RB // Restore BASE.
4393 | movzx RA, PC_RA 3773 | movzx RA, PC_RA
@@ -4402,7 +3782,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4402 3782
4403 /* -- Binary ops -------------------------------------------------------- */ 3783 /* -- Binary ops -------------------------------------------------------- */
4404 3784
4405 |.macro ins_arithpre, x87ins, sseins, ssereg 3785 |.macro ins_arithpre, sseins, ssereg
4406 | ins_ABC 3786 | ins_ABC
4407 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3787 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
4408 ||switch (vk) { 3788 ||switch (vk) {
@@ -4411,37 +3791,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4411 | .if DUALNUM 3791 | .if DUALNUM
4412 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn 3792 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
4413 | .endif 3793 | .endif
4414 | .if SSE 3794 | movsd xmm0, qword [BASE+RB*8]
4415 | movsd xmm0, qword [BASE+RB*8] 3795 | sseins ssereg, qword [KBASE+RC*8]
4416 | sseins ssereg, qword [KBASE+RC*8]
4417 | .else
4418 | fld qword [BASE+RB*8]
4419 | x87ins qword [KBASE+RC*8]
4420 | .endif
4421 || break; 3796 || break;
4422 ||case 1: 3797 ||case 1:
4423 | checknum RB, ->vmeta_arith_nv 3798 | checknum RB, ->vmeta_arith_nv
4424 | .if DUALNUM 3799 | .if DUALNUM
4425 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv 3800 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
4426 | .endif 3801 | .endif
4427 | .if SSE 3802 | movsd xmm0, qword [KBASE+RC*8]
4428 | movsd xmm0, qword [KBASE+RC*8] 3803 | sseins ssereg, qword [BASE+RB*8]
4429 | sseins ssereg, qword [BASE+RB*8]
4430 | .else
4431 | fld qword [KBASE+RC*8]
4432 | x87ins qword [BASE+RB*8]
4433 | .endif
4434 || break; 3804 || break;
4435 ||default: 3805 ||default:
4436 | checknum RB, ->vmeta_arith_vv 3806 | checknum RB, ->vmeta_arith_vv
4437 | checknum RC, ->vmeta_arith_vv 3807 | checknum RC, ->vmeta_arith_vv
4438 | .if SSE 3808 | movsd xmm0, qword [BASE+RB*8]
4439 | movsd xmm0, qword [BASE+RB*8] 3809 | sseins ssereg, qword [BASE+RC*8]
4440 | sseins ssereg, qword [BASE+RC*8]
4441 | .else
4442 | fld qword [BASE+RB*8]
4443 | x87ins qword [BASE+RC*8]
4444 | .endif
4445 || break; 3810 || break;
4446 ||} 3811 ||}
4447 |.endmacro 3812 |.endmacro
@@ -4479,55 +3844,62 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4479 |.endmacro 3844 |.endmacro
4480 | 3845 |
4481 |.macro ins_arithpost 3846 |.macro ins_arithpost
4482 |.if SSE
4483 | movsd qword [BASE+RA*8], xmm0 3847 | movsd qword [BASE+RA*8], xmm0
4484 |.else
4485 | fstp qword [BASE+RA*8]
4486 |.endif
4487 |.endmacro 3848 |.endmacro
4488 | 3849 |
4489 |.macro ins_arith, x87ins, sseins 3850 |.macro ins_arith, sseins
4490 | ins_arithpre x87ins, sseins, xmm0 3851 | ins_arithpre sseins, xmm0
4491 | ins_arithpost 3852 | ins_arithpost
4492 | ins_next 3853 | ins_next
4493 |.endmacro 3854 |.endmacro
4494 | 3855 |
4495 |.macro ins_arith, intins, x87ins, sseins 3856 |.macro ins_arith, intins, sseins
4496 |.if DUALNUM 3857 |.if DUALNUM
4497 | ins_arithdn intins 3858 | ins_arithdn intins
4498 |.else 3859 |.else
4499 | ins_arith, x87ins, sseins 3860 | ins_arith, sseins
4500 |.endif 3861 |.endif
4501 |.endmacro 3862 |.endmacro
4502 3863
4503 | // RA = dst, RB = src1 or num const, RC = src2 or num const 3864 | // RA = dst, RB = src1 or num const, RC = src2 or num const
4504 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3865 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
4505 | ins_arith add, fadd, addsd 3866 | ins_arith add, addsd
4506 break; 3867 break;
4507 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3868 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
4508 | ins_arith sub, fsub, subsd 3869 | ins_arith sub, subsd
4509 break; 3870 break;
4510 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3871 case BC_MULVN: case BC_MULNV: case BC_MULVV:
4511 | ins_arith imul, fmul, mulsd 3872 | ins_arith imul, mulsd
4512 break; 3873 break;
4513 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3874 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
4514 | ins_arith fdiv, divsd 3875 | ins_arith divsd
4515 break; 3876 break;
4516 case BC_MODVN: 3877 case BC_MODVN:
4517 | ins_arithpre fld, movsd, xmm1 3878 | ins_arithpre movsd, xmm1
4518 |->BC_MODVN_Z: 3879 |->BC_MODVN_Z:
4519 | call ->vm_mod 3880 | call ->vm_mod
4520 | ins_arithpost 3881 | ins_arithpost
4521 | ins_next 3882 | ins_next
4522 break; 3883 break;
4523 case BC_MODNV: case BC_MODVV: 3884 case BC_MODNV: case BC_MODVV:
4524 | ins_arithpre fld, movsd, xmm1 3885 | ins_arithpre movsd, xmm1
4525 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3886 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
4526 break; 3887 break;
4527 case BC_POW: 3888 case BC_POW:
4528 | ins_arithpre fld, movsd, xmm1 3889 | ins_arithpre movsd, xmm1
4529 | call ->vm_pow 3890 | mov RB, BASE
3891 |.if not X64
3892 | movsd FPARG1, xmm0
3893 | movsd FPARG3, xmm1
3894 |.endif
3895 | call extern pow
3896 | movzx RA, PC_RA
3897 | mov BASE, RB
3898 |.if X64
4530 | ins_arithpost 3899 | ins_arithpost
3900 |.else
3901 | fstp qword [BASE+RA*8]
3902 |.endif
4531 | ins_next 3903 | ins_next
4532 break; 3904 break;
4533 3905
@@ -4595,25 +3967,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4595 | movsx RD, RDW 3967 | movsx RD, RDW
4596 | mov dword [BASE+RA*8+4], LJ_TISNUM 3968 | mov dword [BASE+RA*8+4], LJ_TISNUM
4597 | mov dword [BASE+RA*8], RD 3969 | mov dword [BASE+RA*8], RD
4598 |.elif SSE 3970 |.else
4599 | movsx RD, RDW // Sign-extend literal. 3971 | movsx RD, RDW // Sign-extend literal.
4600 | cvtsi2sd xmm0, RD 3972 | cvtsi2sd xmm0, RD
4601 | movsd qword [BASE+RA*8], xmm0 3973 | movsd qword [BASE+RA*8], xmm0
4602 |.else
4603 | fild PC_RD // Refetch signed RD from instruction.
4604 | fstp qword [BASE+RA*8]
4605 |.endif 3974 |.endif
4606 | ins_next 3975 | ins_next
4607 break; 3976 break;
4608 case BC_KNUM: 3977 case BC_KNUM:
4609 | ins_AD // RA = dst, RD = num const 3978 | ins_AD // RA = dst, RD = num const
4610 |.if SSE
4611 | movsd xmm0, qword [KBASE+RD*8] 3979 | movsd xmm0, qword [KBASE+RD*8]
4612 | movsd qword [BASE+RA*8], xmm0 3980 | movsd qword [BASE+RA*8], xmm0
4613 |.else
4614 | fld qword [KBASE+RD*8]
4615 | fstp qword [BASE+RA*8]
4616 |.endif
4617 | ins_next 3981 | ins_next
4618 break; 3982 break;
4619 case BC_KPRI: 3983 case BC_KPRI:
@@ -4720,18 +4084,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4720 case BC_USETN: 4084 case BC_USETN:
4721 | ins_AD // RA = upvalue #, RD = num const 4085 | ins_AD // RA = upvalue #, RD = num const
4722 | mov LFUNC:RB, [BASE-8] 4086 | mov LFUNC:RB, [BASE-8]
4723 |.if SSE
4724 | movsd xmm0, qword [KBASE+RD*8] 4087 | movsd xmm0, qword [KBASE+RD*8]
4725 |.else
4726 | fld qword [KBASE+RD*8]
4727 |.endif
4728 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4088 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4729 | mov RA, UPVAL:RB->v 4089 | mov RA, UPVAL:RB->v
4730 |.if SSE
4731 | movsd qword [RA], xmm0 4090 | movsd qword [RA], xmm0
4732 |.else
4733 | fstp qword [RA]
4734 |.endif
4735 | ins_next 4091 | ins_next
4736 break; 4092 break;
4737 case BC_USETP: 4093 case BC_USETP:
@@ -4885,18 +4241,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4885 |.else 4241 |.else
4886 | // Convert number to int and back and compare. 4242 | // Convert number to int and back and compare.
4887 | checknum RC, >5 4243 | checknum RC, >5
4888 |.if SSE
4889 | movsd xmm0, qword [BASE+RC*8] 4244 | movsd xmm0, qword [BASE+RC*8]
4890 | cvtsd2si RC, xmm0 4245 | cvttsd2si RC, xmm0
4891 | cvtsi2sd xmm1, RC 4246 | cvtsi2sd xmm1, RC
4892 | ucomisd xmm0, xmm1 4247 | ucomisd xmm0, xmm1
4893 |.else
4894 | fld qword [BASE+RC*8]
4895 | fist ARG1
4896 | fild ARG1
4897 | fcomparepp
4898 | mov RC, ARG1
4899 |.endif
4900 | jne ->vmeta_tgetv // Generic numeric key? Use fallback. 4248 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
4901 |.endif 4249 |.endif
4902 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4250 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -4942,7 +4290,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4942 | mov TAB:RB, [BASE+RB*8] 4290 | mov TAB:RB, [BASE+RB*8]
4943 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. 4291 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
4944 | mov RA, TAB:RB->hmask 4292 | mov RA, TAB:RB->hmask
4945 | and RA, STR:RC->hash 4293 | and RA, STR:RC->sid
4946 | imul RA, #NODE 4294 | imul RA, #NODE
4947 | add NODE:RA, TAB:RB->node 4295 | add NODE:RA, TAB:RB->node
4948 |1: 4296 |1:
@@ -5020,6 +4368,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5020 | mov dword [BASE+RA*8+4], LJ_TNIL 4368 | mov dword [BASE+RA*8+4], LJ_TNIL
5021 | jmp <1 4369 | jmp <1
5022 break; 4370 break;
4371 case BC_TGETR:
4372 | ins_ABC // RA = dst, RB = table, RC = key
4373 | mov TAB:RB, [BASE+RB*8]
4374 |.if DUALNUM
4375 | mov RC, dword [BASE+RC*8]
4376 |.else
4377 | cvttsd2si RC, qword [BASE+RC*8]
4378 |.endif
4379 | cmp RC, TAB:RB->asize
4380 | jae ->vmeta_tgetr // Not in array part? Use fallback.
4381 | shl RC, 3
4382 | add RC, TAB:RB->array
4383 | // Get array slot.
4384 |->BC_TGETR_Z:
4385 |.if X64
4386 | mov RBa, [RC]
4387 | mov [BASE+RA*8], RBa
4388 |.else
4389 | mov RB, [RC]
4390 | mov RC, [RC+4]
4391 | mov [BASE+RA*8], RB
4392 | mov [BASE+RA*8+4], RC
4393 |.endif
4394 |->BC_TGETR2_Z:
4395 | ins_next
4396 break;
5023 4397
5024 case BC_TSETV: 4398 case BC_TSETV:
5025 | ins_ABC // RA = src, RB = table, RC = key 4399 | ins_ABC // RA = src, RB = table, RC = key
@@ -5033,18 +4407,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5033 |.else 4407 |.else
5034 | // Convert number to int and back and compare. 4408 | // Convert number to int and back and compare.
5035 | checknum RC, >5 4409 | checknum RC, >5
5036 |.if SSE
5037 | movsd xmm0, qword [BASE+RC*8] 4410 | movsd xmm0, qword [BASE+RC*8]
5038 | cvtsd2si RC, xmm0 4411 | cvttsd2si RC, xmm0
5039 | cvtsi2sd xmm1, RC 4412 | cvtsi2sd xmm1, RC
5040 | ucomisd xmm0, xmm1 4413 | ucomisd xmm0, xmm1
5041 |.else
5042 | fld qword [BASE+RC*8]
5043 | fist ARG1
5044 | fild ARG1
5045 | fcomparepp
5046 | mov RC, ARG1
5047 |.endif
5048 | jne ->vmeta_tsetv // Generic numeric key? Use fallback. 4414 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
5049 |.endif 4415 |.endif
5050 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4416 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -5095,7 +4461,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5095 | mov TAB:RB, [BASE+RB*8] 4461 | mov TAB:RB, [BASE+RB*8]
5096 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. 4462 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
5097 | mov RA, TAB:RB->hmask 4463 | mov RA, TAB:RB->hmask
5098 | and RA, STR:RC->hash 4464 | and RA, STR:RC->sid
5099 | imul RA, #NODE 4465 | imul RA, #NODE
5100 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. 4466 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
5101 | add NODE:RA, TAB:RB->node 4467 | add NODE:RA, TAB:RB->node
@@ -5214,6 +4580,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5214 | movzx RA, PC_RA // Restore RA. 4580 | movzx RA, PC_RA // Restore RA.
5215 | jmp <2 4581 | jmp <2
5216 break; 4582 break;
4583 case BC_TSETR:
4584 | ins_ABC // RA = src, RB = table, RC = key
4585 | mov TAB:RB, [BASE+RB*8]
4586 |.if DUALNUM
4587 | mov RC, dword [BASE+RC*8]
4588 |.else
4589 | cvttsd2si RC, qword [BASE+RC*8]
4590 |.endif
4591 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4592 | jnz >7
4593 |2:
4594 | cmp RC, TAB:RB->asize
4595 | jae ->vmeta_tsetr
4596 | shl RC, 3
4597 | add RC, TAB:RB->array
4598 | // Set array slot.
4599 |->BC_TSETR_Z:
4600 |.if X64
4601 | mov RBa, [BASE+RA*8]
4602 | mov [RC], RBa
4603 |.else
4604 | mov RB, [BASE+RA*8+4]
4605 | mov RA, [BASE+RA*8]
4606 | mov [RC+4], RB
4607 | mov [RC], RA
4608 |.endif
4609 | ins_next
4610 |
4611 |7: // Possible table write barrier for the value. Skip valiswhite check.
4612 | barrierback TAB:RB, RA
4613 | movzx RA, PC_RA // Restore RA.
4614 | jmp <2
4615 break;
5217 4616
5218 case BC_TSETM: 4617 case BC_TSETM:
5219 | ins_AD // RA = base (table at base-1), RD = num const (start index) 4618 | ins_AD // RA = base (table at base-1), RD = num const (start index)
@@ -5407,10 +4806,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5407 |.if DUALNUM 4806 |.if DUALNUM
5408 | mov dword [BASE+RA*8+4], LJ_TISNUM 4807 | mov dword [BASE+RA*8+4], LJ_TISNUM
5409 | mov dword [BASE+RA*8], RC 4808 | mov dword [BASE+RA*8], RC
5410 |.elif SSE
5411 | cvtsi2sd xmm0, RC
5412 |.else 4809 |.else
5413 | fild dword [BASE+RA*8-8] 4810 | cvtsi2sd xmm0, RC
5414 |.endif 4811 |.endif
5415 | // Copy array slot to returned value. 4812 | // Copy array slot to returned value.
5416 |.if X64 4813 |.if X64
@@ -5426,10 +4823,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5426 | // Return array index as a numeric key. 4823 | // Return array index as a numeric key.
5427 |.if DUALNUM 4824 |.if DUALNUM
5428 | // See above. 4825 | // See above.
5429 |.elif SSE
5430 | movsd qword [BASE+RA*8], xmm0
5431 |.else 4826 |.else
5432 | fstp qword [BASE+RA*8] 4827 | movsd qword [BASE+RA*8], xmm0
5433 |.endif 4828 |.endif
5434 | mov [BASE+RA*8-8], RC // Update control var. 4829 | mov [BASE+RA*8-8], RC // Update control var.
5435 |2: 4830 |2:
@@ -5442,9 +4837,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5442 | 4837 |
5443 |4: // Skip holes in array part. 4838 |4: // Skip holes in array part.
5444 | add RC, 1 4839 | add RC, 1
5445 |.if not (DUALNUM or SSE)
5446 | mov [BASE+RA*8-8], RC
5447 |.endif
5448 | jmp <1 4840 | jmp <1
5449 | 4841 |
5450 |5: // Traverse hash part. 4842 |5: // Traverse hash part.
@@ -5778,7 +5170,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5778 if (!vk) { 5170 if (!vk) {
5779 | cmp RB, LJ_TISNUM; jae ->vmeta_for 5171 | cmp RB, LJ_TISNUM; jae ->vmeta_for
5780 } 5172 }
5781 |.if SSE
5782 | movsd xmm0, qword FOR_IDX 5173 | movsd xmm0, qword FOR_IDX
5783 | movsd xmm1, qword FOR_STOP 5174 | movsd xmm1, qword FOR_STOP
5784 if (vk) { 5175 if (vk) {
@@ -5791,22 +5182,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5791 | ucomisd xmm1, xmm0 5182 | ucomisd xmm1, xmm0
5792 |1: 5183 |1:
5793 | movsd qword FOR_EXT, xmm0 5184 | movsd qword FOR_EXT, xmm0
5794 |.else
5795 | fld qword FOR_STOP
5796 | fld qword FOR_IDX
5797 if (vk) {
5798 | fadd qword FOR_STEP // nidx = idx + step
5799 | fst qword FOR_IDX
5800 | fst qword FOR_EXT
5801 | test RB, RB; js >1
5802 } else {
5803 | fst qword FOR_EXT
5804 | jl >1
5805 }
5806 | fxch // Swap lim/(n)idx if step non-negative.
5807 |1:
5808 | fcomparepp
5809 |.endif
5810 if (op == BC_FORI) { 5185 if (op == BC_FORI) {
5811 |.if DUALNUM 5186 |.if DUALNUM
5812 | jnb <7 5187 | jnb <7
@@ -5834,11 +5209,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5834 |2: 5209 |2:
5835 | ins_next 5210 | ins_next
5836 |.endif 5211 |.endif
5837 |.if SSE 5212 |
5838 |3: // Invert comparison if step is negative. 5213 |3: // Invert comparison if step is negative.
5839 | ucomisd xmm0, xmm1 5214 | ucomisd xmm0, xmm1
5840 | jmp <1 5215 | jmp <1
5841 |.endif
5842 break; 5216 break;
5843 5217
5844 case BC_ITERL: 5218 case BC_ITERL:
@@ -5876,7 +5250,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5876 | ins_A // RA = base, RD = target (loop extent) 5250 | ins_A // RA = base, RD = target (loop extent)
5877 | // Note: RA/RD is only used by trace recorder to determine scope/extent 5251 | // Note: RA/RD is only used by trace recorder to determine scope/extent
5878 | // This opcode does NOT jump, it's only purpose is to detect a hot loop. 5252 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
5879 |.if JIT 5253 |.if JIT
5880 | hotloop RB 5254 | hotloop RB
5881 |.endif 5255 |.endif
5882 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. 5256 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
@@ -5895,7 +5269,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5895 | mov RDa, TRACE:RD->mcode 5269 | mov RDa, TRACE:RD->mcode
5896 | mov L:RB, SAVE_L 5270 | mov L:RB, SAVE_L
5897 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE 5271 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
5898 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB 5272 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
5899 | // Save additional callee-save registers only used in compiled code. 5273 | // Save additional callee-save registers only used in compiled code.
5900 |.if X64WIN 5274 |.if X64WIN
5901 | mov TMPQ, r12 5275 | mov TMPQ, r12
@@ -6062,9 +5436,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
6062 | // (lua_State *L, lua_CFunction f) 5436 | // (lua_State *L, lua_CFunction f)
6063 | call aword [DISPATCH+DISPATCH_GL(wrapf)] 5437 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
6064 } 5438 }
6065 | set_vmstate INTERP
6066 | // nresults returned in eax (RD). 5439 | // nresults returned in eax (RD).
6067 | mov BASE, L:RB->base 5440 | mov BASE, L:RB->base
5441 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
5442 | set_vmstate INTERP
6068 | lea RA, [BASE+RD*8] 5443 | lea RA, [BASE+RD*8]
6069 | neg RA 5444 | neg RA
6070 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 5445 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
@@ -6177,7 +5552,7 @@ static void emit_asm_debug(BuildCtx *ctx)
6177 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); 5552 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
6178#endif 5553#endif
6179#if !LJ_NO_UNWIND 5554#if !LJ_NO_UNWIND
6180#if (defined(__sun__) && defined(__svr4__)) 5555#if LJ_TARGET_SOLARIS
6181#if LJ_64 5556#if LJ_64
6182 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); 5557 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
6183#else 5558#else
@@ -6384,15 +5759,21 @@ static void emit_asm_debug(BuildCtx *ctx)
6384 "LEFDEY:\n\n", fcsize); 5759 "LEFDEY:\n\n", fcsize);
6385 } 5760 }
6386#endif 5761#endif
6387#if LJ_64 5762#if !LJ_64
6388 fprintf(ctx->fp, "\t.subsections_via_symbols\n");
6389#else
6390 fprintf(ctx->fp, 5763 fprintf(ctx->fp,
6391 "\t.non_lazy_symbol_pointer\n" 5764 "\t.non_lazy_symbol_pointer\n"
6392 "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" 5765 "L_lj_err_unwind_dwarf$non_lazy_ptr:\n"
6393 ".indirect_symbol _lj_err_unwind_dwarf\n" 5766 ".indirect_symbol _lj_err_unwind_dwarf\n"
6394 ".long 0\n"); 5767 ".long 0\n\n");
5768 fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n");
5769 {
5770 const char *const *xn;
5771 for (xn = ctx->extnames; *xn; xn++)
5772 if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1))
5773 fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn);
5774 }
6395#endif 5775#endif
5776 fprintf(ctx->fp, ".subsections_via_symbols\n");
6396 } 5777 }
6397 break; 5778 break;
6398#endif 5779#endif
diff --git a/src/xb1build.bat b/src/xb1build.bat
new file mode 100644
index 00000000..2eb68171
--- /dev/null
+++ b/src/xb1build.bat
@@ -0,0 +1,101 @@
1@rem Script to build LuaJIT with the Xbox One SDK.
2@rem Donated to the public domain.
3@rem
4@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler)
5@rem Then cd to this directory and run this script.
6
7@if not defined INCLUDE goto :FAIL
8@if not defined DurangoXDK goto :FAIL
9
10@setlocal
11@echo ---- Host compiler ----
12@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE
13@set LJLINK=link /nologo
14@set LJMT=mt /nologo
15@set DASMDIR=..\dynasm
16@set DASM=%DASMDIR%\dynasm.lua
17@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
18
19%LJCOMPILE% host\minilua.c
20@if errorlevel 1 goto :BAD
21%LJLINK% /out:minilua.exe minilua.obj
22@if errorlevel 1 goto :BAD
23if exist minilua.exe.manifest^
24 %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
25
26@rem Error out for 64 bit host compiler
27@minilua
28@if not errorlevel 8 goto :FAIL
29
30@set DASMFLAGS=-D WIN -D FFI -D P64
31minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x64.dasc
32@if errorlevel 1 goto :BAD
33
34%LJCOMPILE% /I "." /I %DASMDIR% /D_DURANGO host\buildvm*.c
35@if errorlevel 1 goto :BAD
36%LJLINK% /out:buildvm.exe buildvm*.obj
37@if errorlevel 1 goto :BAD
38if exist buildvm.exe.manifest^
39 %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
40
41buildvm -m peobj -o lj_vm.obj
42@if errorlevel 1 goto :BAD
43buildvm -m bcdef -o lj_bcdef.h %ALL_LIB%
44@if errorlevel 1 goto :BAD
45buildvm -m ffdef -o lj_ffdef.h %ALL_LIB%
46@if errorlevel 1 goto :BAD
47buildvm -m libdef -o lj_libdef.h %ALL_LIB%
48@if errorlevel 1 goto :BAD
49buildvm -m recdef -o lj_recdef.h %ALL_LIB%
50@if errorlevel 1 goto :BAD
51buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB%
52@if errorlevel 1 goto :BAD
53buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
54@if errorlevel 1 goto :BAD
55
56@echo ---- Cross compiler ----
57
58@set CWD=%cd%
59@call "%DurangoXDK%\xdk\DurangoVars.cmd" XDK
60@cd /D "%CWD%"
61@shift
62
63@set LJCOMPILE="cl" /nologo /c /W3 /GF /Gm- /GR- /GS- /Gy /openmp- /D_CRT_SECURE_NO_DEPRECATE /D_LIB /D_UNICODE /D_DURANGO
64@set LJLIB="lib" /nologo
65
66@if "%1"=="debug" (
67 @shift
68 @set LJCOMPILE=%LJCOMPILE% /Zi /MDd /Od
69 @set LJLINK=%LJLINK% /debug
70) else (
71 @set LJCOMPILE=%LJCOMPILE% /MD /O2 /DNDEBUG
72)
73
74@if "%1"=="amalg" goto :AMALG
75%LJCOMPILE% /DLUA_BUILD_AS_DLL lj_*.c lib_*.c
76@if errorlevel 1 goto :BAD
77%LJLIB% /OUT:luajit.lib lj_*.obj lib_*.obj
78@if errorlevel 1 goto :BAD
79@goto :NOAMALG
80:AMALG
81%LJCOMPILE% /DLUA_BUILD_AS_DLL ljamalg.c
82@if errorlevel 1 goto :BAD
83%LJLIB% /OUT:luajit.lib ljamalg.obj lj_vm.obj
84@if errorlevel 1 goto :BAD
85:NOAMALG
86
87@del *.obj *.manifest minilua.exe buildvm.exe
88@echo.
89@echo === Successfully built LuaJIT for Xbox One ===
90
91@goto :END
92:BAD
93@echo.
94@echo *******************************************************
95@echo *** Build FAILED -- Please check the error messages ***
96@echo *******************************************************
97@goto :END
98:FAIL
99@echo To run this script you must open a "Visual Studio .NET Command Prompt"
100@echo (64 bit host compiler). The Xbox One SDK must be installed, too.
101:END
diff --git a/src/xedkbuild.bat b/src/xedkbuild.bat
index 240ec878..37322d03 100644
--- a/src/xedkbuild.bat
+++ b/src/xedkbuild.bat
@@ -14,7 +14,7 @@
14@set LJMT=mt /nologo 14@set LJMT=mt /nologo
15@set DASMDIR=..\dynasm 15@set DASMDIR=..\dynasm
16@set DASM=%DASMDIR%\dynasm.lua 16@set DASM=%DASMDIR%\dynasm.lua
17@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c 17@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
18 18
19%LJCOMPILE% host\minilua.c 19%LJCOMPILE% host\minilua.c
20@if errorlevel 1 goto :BAD 20@if errorlevel 1 goto :BAD