summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2009-12-08 19:46:35 +0100
committerMike Pall <mike>2009-12-08 19:46:35 +0100
commit55b16959717084884fd4a0cbae6d19e3786c20c7 (patch)
treec8a07a43c13679751ed25a9d06796e9e7b2134a6
downloadluajit-2.0.0-beta1.tar.gz
luajit-2.0.0-beta1.tar.bz2
luajit-2.0.0-beta1.zip
RELEASE LuaJIT-2.0.0-beta1v2.0.0-beta1
-rw-r--r--.gitignore11
-rw-r--r--Makefile84
-rw-r--r--README16
-rw-r--r--doc/api.html203
-rw-r--r--doc/bluequad-print.css166
-rw-r--r--doc/bluequad.css303
-rw-r--r--doc/changes.html281
-rw-r--r--doc/contact.html84
-rw-r--r--doc/faq.html141
-rw-r--r--doc/img/contact.pngbin0 -> 1340 bytes
-rw-r--r--doc/install.html216
-rw-r--r--doc/luajit.html120
-rw-r--r--doc/running.html233
-rw-r--r--doc/status.html235
-rw-r--r--dynasm/dasm_proto.h69
-rw-r--r--dynasm/dasm_x86.h467
-rw-r--r--dynasm/dasm_x86.lua1799
-rw-r--r--dynasm/dynasm.lua1070
-rw-r--r--etc/strict.lua41
-rw-r--r--lib/.gitignore1
-rw-r--r--lib/bc.lua182
-rw-r--r--lib/dis_x64.lua19
-rw-r--r--lib/dis_x86.lua824
-rw-r--r--lib/dump.lua567
-rw-r--r--lib/v.lua156
-rw-r--r--src/.gitignore8
-rw-r--r--src/Makefile326
-rw-r--r--src/Makefile.dep139
-rw-r--r--src/buildvm.c438
-rw-r--r--src/buildvm.h106
-rw-r--r--src/buildvm_asm.c220
-rw-r--r--src/buildvm_fold.c206
-rw-r--r--src/buildvm_lib.c365
-rw-r--r--src/buildvm_peobj.c303
-rw-r--r--src/buildvm_x86.dasc3592
-rw-r--r--src/lauxlib.h159
-rw-r--r--src/lib_aux.c438
-rw-r--r--src/lib_base.c560
-rw-r--r--src/lib_bit.c74
-rw-r--r--src/lib_debug.c366
-rw-r--r--src/lib_init.c37
-rw-r--r--src/lib_io.c538
-rw-r--r--src/lib_jit.c589
-rw-r--r--src/lib_math.c188
-rw-r--r--src/lib_os.c249
-rw-r--r--src/lib_package.c508
-rw-r--r--src/lib_string.c790
-rw-r--r--src/lib_table.c276
-rw-r--r--src/lj.supp6
-rw-r--r--src/lj_alloc.c1232
-rw-r--r--src/lj_alloc.h17
-rw-r--r--src/lj_api.c1046
-rw-r--r--src/lj_arch.h88
-rw-r--r--src/lj_asm.c3324
-rw-r--r--src/lj_asm.h17
-rw-r--r--src/lj_bc.c17
-rw-r--r--src/lj_bc.h235
-rw-r--r--src/lj_ctype.c44
-rw-r--r--src/lj_ctype.h40
-rw-r--r--src/lj_def.h226
-rw-r--r--src/lj_dispatch.c284
-rw-r--r--src/lj_dispatch.h64
-rw-r--r--src/lj_err.c763
-rw-r--r--src/lj_err.h40
-rw-r--r--src/lj_errmsg.h134
-rw-r--r--src/lj_ff.h18
-rw-r--r--src/lj_frame.h84
-rw-r--r--src/lj_func.c185
-rw-r--r--src/lj_func.h25
-rw-r--r--src/lj_gc.c800
-rw-r--r--src/lj_gc.h102
-rw-r--r--src/lj_gdbjit.c739
-rw-r--r--src/lj_gdbjit.h22
-rw-r--r--src/lj_ir.c461
-rw-r--r--src/lj_ir.h429
-rw-r--r--src/lj_iropt.h128
-rw-r--r--src/lj_jit.h279
-rw-r--r--src/lj_lex.c393
-rw-r--r--src/lj_lex.h63
-rw-r--r--src/lj_lib.c216
-rw-r--r--src/lj_lib.h84
-rw-r--r--src/lj_mcode.c260
-rw-r--r--src/lj_mcode.h23
-rw-r--r--src/lj_meta.c358
-rw-r--r--src/lj_meta.h33
-rw-r--r--src/lj_obj.c41
-rw-r--r--src/lj_obj.h676
-rw-r--r--src/lj_opt_dce.c79
-rw-r--r--src/lj_opt_fold.c1415
-rw-r--r--src/lj_opt_loop.c358
-rw-r--r--src/lj_opt_mem.c550
-rw-r--r--src/lj_opt_narrow.c430
-rw-r--r--src/lj_parse.c2198
-rw-r--r--src/lj_parse.h15
-rw-r--r--src/lj_record.c2136
-rw-r--r--src/lj_record.h17
-rw-r--r--src/lj_snap.c286
-rw-r--r--src/lj_snap.h19
-rw-r--r--src/lj_state.c255
-rw-r--r--src/lj_state.h31
-rw-r--r--src/lj_str.c301
-rw-r--r--src/lj_str.h45
-rw-r--r--src/lj_tab.c618
-rw-r--r--src/lj_tab.h41
-rw-r--r--src/lj_target.h132
-rw-r--r--src/lj_target_x86.h257
-rw-r--r--src/lj_trace.c591
-rw-r--r--src/lj_trace.h52
-rw-r--r--src/lj_traceerr.h59
-rw-r--r--src/lj_udata.c33
-rw-r--r--src/lj_udata.h14
-rw-r--r--src/lj_vm.h66
-rw-r--r--src/lj_vmevent.c56
-rw-r--r--src/lj_vmevent.h49
-rw-r--r--src/ljamalg.c70
-rw-r--r--src/lua.h388
-rw-r--r--src/lua.hpp9
-rw-r--r--src/luaconf.h133
-rw-r--r--src/luajit.c519
-rw-r--r--src/luajit.h68
-rw-r--r--src/lualib.h41
-rw-r--r--src/msvcbuild.bat53
122 files changed, 42143 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..1a07bf75
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,11 @@
1*.[oa]
2*.so
3*.obj
4*.lib
5*.exp
6*.dll
7*.exe
8*.manifest
9*.dmp
10*.swp
11.tags
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..67347041
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,84 @@
1##############################################################################
2# LuaJIT top level Makefile for installation. Requires GNU Make.
3#
4# Suitable for POSIX platforms (Linux, *BSD, OSX etc.).
5# Note: src/Makefile has many more configurable options.
6#
7# ##### This Makefile is NOT useful for installation on Windows! #####
8# For MSVC, please follow the instructions given in src/msvcbuild.bat.
9# For MinGW and Cygwin, cd to src and run make with the Makefile there.
10# NYI: add wininstall.bat
11#
12# Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
13##############################################################################
14
15BASEVER= 2.0.0
16VERSION= 2.0.0-beta1
17
18##############################################################################
19#
20# Change the installation path as needed and modify src/luaconf.h accordingly.
21# Note: PREFIX must be an absolute path!
22#
23PREFIX= /usr/local
24##############################################################################
25
26INSTALL_BIN= $(PREFIX)/bin
27INSTALL_NAME= luajit-$(VERSION)
28INSTALL_T= $(INSTALL_BIN)/$(INSTALL_NAME)
29INSTALL_TSYM= $(INSTALL_BIN)/luajit
30INSTALL_INC= $(PREFIX)/include/luajit-$(BASEVER)
31INSTALL_JITLIB= $(PREFIX)/share/luajit-$(VERSION)/jit
32
33MKDIR= mkdir -p
34SYMLINK= ln -f -s
35INSTALL_X= install -m 0755
36INSTALL_F= install -m 0644
37
38FILES_T= luajit
39FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
40FILES_JITLIB= bc.lua v.lua dump.lua dis_x86.lua dis_x64.lua vmdef.lua
41
42##############################################################################
43
44INSTALL_DEP= src/luajit
45
46all $(INSTALL_DEP):
47 @echo "==== Building LuaJIT $(VERSION) ===="
48 $(MAKE) -C src
49 @echo "==== Successfully built LuaJIT $(VERSION) ===="
50
51install: $(INSTALL_DEP)
52 @echo "==== Installing LuaJIT $(VERSION) to $(PREFIX) ===="
53 $(MKDIR) $(INSTALL_BIN) $(INSTALL_INC) $(INSTALL_JITLIB)
54 cd src && $(INSTALL_X) $(FILES_T) $(INSTALL_T)
55 cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC)
56 cd lib && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB)
57 @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ===="
58 @echo ""
59 @echo "Note: the beta releases deliberately do NOT install a symlink for luajit"
60 @echo "You can do this now by running this command (with sudo):"
61 @echo ""
62 @echo " $(SYMLINK) $(INSTALL_NAME) $(INSTALL_TSYM)"
63 @echo ""
64
65##############################################################################
66
67amalg:
68 @echo "Building LuaJIT $(VERSION)"
69 $(MAKE) -C src amalg
70
71clean:
72 $(MAKE) -C src clean
73
74cleaner:
75 $(MAKE) -C src cleaner
76
77distclean:
78 $(MAKE) -C src distclean
79
80SUB_TARGETS= amalg clean cleaner distclean
81
82.PHONY: all install $(SUB_TARGETS)
83
84##############################################################################
diff --git a/README b/README
new file mode 100644
index 00000000..43caf78e
--- /dev/null
+++ b/README
@@ -0,0 +1,16 @@
1README for LuaJIT 2.0.0-beta1
2-----------------------------
3
4LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
5
6Project Homepage: http://luajit.org/
7
8LuaJIT is Copyright (C) 2005-2009 Mike Pall.
9LuaJIT is free software, released under the MIT/X license.
10See full Copyright Notice in src/luajit.h
11
12Documentation for LuaJIT is available in HTML format.
13Please point your favorite browser to:
14
15 doc/luajit.html
16
diff --git a/doc/api.html b/doc/api.html
new file mode 100644
index 00000000..79788d95
--- /dev/null
+++ b/doc/api.html
@@ -0,0 +1,203 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
2<html>
3<head>
4<title>API Extensions</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
6<meta name="Author" content="Mike Pall">
7<meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall">
8<meta name="Language" content="en">
9<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
10<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
11</head>
12<body>
13<div id="site">
14<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
15</div>
16<div id="head">
17<h1>API Extensions</h1>
18</div>
19<div id="nav">
20<ul><li>
21<a href="luajit.html">LuaJIT</a>
22<ul><li>
23<a href="install.html">Installation</a>
24</li><li>
25<a href="running.html">Running</a>
26</li><li>
27<a class="current" href="api.html">API Extensions</a>
28</li></ul>
29</li><li>
30<a href="status.html">Status</a>
31<ul><li>
32<a href="changes.html">Changes</a>
33</li></ul>
34</li><li>
35<a href="faq.html">FAQ</a>
36</li><li>
37<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
38</li></ul>
39</div>
40<div id="main">
41<p>
42LuaJIT is fully upwards-compatible with Lua 5.1. It supports all
43<a href="http://www.lua.org/manual/5.1/manual.html#5"><span class="ext">&raquo;</span>&nbsp;standard Lua
44library functions</a> and the full set of
45<a href="http://www.lua.org/manual/5.1/manual.html#3"><span class="ext">&raquo;</span>&nbsp;Lua/C API
46functions</a>.
47</p>
48<p>
49LuaJIT is also fully ABI-compatible to Lua 5.1 at the linker/dynamic
50loader level. This means you can compile a C&nbsp;module against the
51standard Lua headers and load the same shared library from either Lua
52or LuaJIT.
53</p>
54
55<h2 id="bit"><tt>bit.*</tt> &mdash; Bitwise Operations</h2>
56<p>
57LuaJIT supports all bitwise operations as defined by
58<a href="http://bitop.luajit.org"><span class="ext">&raquo;</span>&nbsp;Lua BitOp</a>:
59</p>
60<pre class="code">
61bit.tobit bit.tohex bit.bnot bit.band bit.bor bit.bxor
62bit.lshift bit.rshift bit.arshift bit.rol bit.ror bit.bswap
63</pre>
64<p>
65This module is a LuaJIT built-in &mdash; you don't need to download or
66install Lua BitOp. The Lua BitOp site has full documentation for all
67<a href="http://bitop.luajit.org/api.html"><span class="ext">&raquo;</span>&nbsp;Lua BitOp API functions</a>.
68</p>
69<p>
70Please make sure to <tt>require</tt> the module before using any of
71its functions:
72</p>
73<pre class="code">
74local bit = require("bit")
75</pre>
76<p>
77An already installed Lua BitOp module is ignored by LuaJIT.
78This way you can use bit operations from both Lua and LuaJIT on a
79shared installation.
80</p>
81
82<h2 id="jit"><tt>jit.*</tt> &mdash; JIT compiler control</h2>
83<p>
84The functions in this built-in module control the behavior
85of the JIT compiler engine.
86</p>
87
88<h3 id="jit_onoff"><tt>jit.on()<br>
89jit.off()</tt></h3>
90<p>
91Turns the whole JIT compiler on (default) or off.
92</p>
93<p>
94These functions are typically used with the command line options
95<tt>-j on</tt> or <tt>-j off</tt>.
96</p>
97
98<h3 id="jit_flush"><tt>jit.flush()</tt></h3>
99<p>
100Flushes the whole cache of compiled code.
101</p>
102
103<h3 id="jit_flush_tr"><tt>jit.flush(tr)</tt></h3>
104<p>
105Flushes the code for the specified root trace and all of its
106side traces from the cache.
107</p>
108
109<h3 id="jit_onoff_func"><tt>jit.on(func|true [,true|false])<br>
110jit.off(func|true [,true|false])<br>
111jit.flush(func|true [,true|false])</tt></h3>
112<p>
113<tt>jit.on</tt> enables JIT compilation for a Lua function (this is
114the default).
115</p>
116<p>
117<tt>jit.off</tt> disables JIT compilation for a Lua function and
118flushes any already compiled code from the code cache.
119</p>
120<p>
121<tt>jit.flush</tt> flushes the code, but doesn't affect the
122enable/disable status.
123</p>
124<p>
125The current function, i.e. the Lua function calling this library
126function, can also be specified by passing <tt>true</tt> as the first
127argument.
128</p>
129<p>
130If the second argument is <tt>true</tt>, JIT compilation is also
131enabled, disabled or flushed recursively for all subfunctions of a
132function. With <tt>false</tt> only the subfunctions are affected.
133</p>
134<p>
135The <tt>jit.on</tt> and <tt>jit.off</tt> functions only set a flag
136which is checked when the function is about to be compiled. They do
137not trigger immediate compilation.
138</p>
139<p>
140Typical usage is <tt>jit.off(true, true)</tt> in the main chunk
141of a module to turn off JIT compilation for the whole module for
142debugging purposes.
143</p>
144
145<h3 id="jit_version"><tt>jit.version</tt></h3>
146<p>
147Contains the LuaJIT version string.
148</p>
149
150<h3 id="jit_version_num"><tt>jit.version_num</tt></h3>
151<p>
152Contains the version number of the LuaJIT core. Version xx.yy.zz
153is represented by the decimal number xxyyzz.
154</p>
155
156<h3 id="jit_arch"><tt>jit.arch</tt></h3>
157<p>
158Contains the target architecture name (CPU and optional ABI).
159</p>
160
161<h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2>
162<p>
163This module provides the backend for the <tt>-O</tt> command line
164option.
165</p>
166<p>
167You can also use it programmatically, e.g.:
168</p>
169<pre class="code">
170jit.opt.start(2) -- same as -O2
171jit.opt.start("-dce")
172jit.opt.start("hotloop=10", "hotexit=2")
173</pre>
174<p>
175Unlike in LuaJIT 1.x, the module is built-in and
176<b>optimization is turned on by default!</b>
177It's no longer necessary to run <tt>require("jit.opt").start()</tt>,
178which was one of the ways to enable optimization.
179</p>
180
181<h2 id="jit_util"><tt>jit.util.*</tt> &mdash; JIT compiler introspection</h2>
182<p>
183This module holds functions to introspect the bytecode, generated
184traces, the IR and the generated machine code. The functionality
185provided by this module is still in flux and therefore undocumented.
186</p>
187<p>
188The debug modules <tt>-jbc</tt>, <tt>-jv</tt> and <tt>-jdump</tt> make
189extensive use of these functions. Please check out their source code,
190if you want to know more.
191</p>
192<br class="flush">
193</div>
194<div id="foot">
195<hr class="hide">
196Copyright &copy; 2005-2009 Mike Pall
197<span class="noprint">
198&middot;
199<a href="contact.html">Contact</a>
200</span>
201</div>
202</body>
203</html>
diff --git a/doc/bluequad-print.css b/doc/bluequad-print.css
new file mode 100644
index 00000000..00a6b154
--- /dev/null
+++ b/doc/bluequad-print.css
@@ -0,0 +1,166 @@
1/* Copyright (C) 2004-2009 Mike Pall.
2 *
3 * You are welcome to use the general ideas of this design for your own sites.
4 * But please do not steal the stylesheet, the layout or the color scheme.
5 */
6body {
7 font-family: serif;
8 font-size: 11pt;
9 margin: 0 3em;
10 padding: 0;
11 border: none;
12}
13a:link, a:visited, a:hover, a:active {
14 text-decoration: none;
15 background: transparent;
16 color: #0000ff;
17}
18h1, h2, h3 {
19 font-family: sans-serif;
20 font-weight: bold;
21 text-align: left;
22 margin: 0.5em 0;
23 padding: 0;
24}
25h1 {
26 font-size: 200%;
27}
28h2 {
29 font-size: 150%;
30}
31h3 {
32 font-size: 125%;
33}
34p {
35 margin: 0 0 0.5em 0;
36 padding: 0;
37}
38ul, ol {
39 margin: 0.5em 0;
40 padding: 0 0 0 2em;
41}
42ul {
43 list-style: outside square;
44}
45ol {
46 list-style: outside decimal;
47}
48li {
49 margin: 0;
50 padding: 0;
51}
52dl {
53 margin: 1em 0;
54 padding: 1em;
55 border: 1px solid black;
56}
57dt {
58 font-weight: bold;
59 margin: 0;
60 padding: 0;
61}
62dt sup {
63 float: right;
64 margin-left: 1em;
65}
66dd {
67 margin: 0.5em 0 0 2em;
68 padding: 0;
69}
70table {
71 table-layout: fixed;
72 width: 100%;
73 margin: 1em 0;
74 padding: 0;
75 border: 1px solid black;
76 border-spacing: 0;
77 border-collapse: collapse;
78}
79tr {
80 margin: 0;
81 padding: 0;
82 border: none;
83}
84td {
85 text-align: left;
86 margin: 0;
87 padding: 0.2em 0.5em;
88 border-top: 1px solid black;
89 border-bottom: 1px solid black;
90}
91tr.separate td {
92 border-top: double;
93}
94tt, pre, code, kbd, samp {
95 font-family: monospace;
96 font-size: 75%;
97}
98kbd {
99 font-weight: bolder;
100}
101blockquote, pre {
102 margin: 1em 2em;
103 padding: 0;
104}
105img {
106 border: none;
107 vertical-align: baseline;
108 margin: 0;
109 padding: 0;
110}
111img.left {
112 float: left;
113 margin: 0.5em 1em 0.5em 0;
114}
115img.right {
116 float: right;
117 margin: 0.5em 0 0.5em 1em;
118}
119.flush {
120 clear: both;
121 visibility: hidden;
122}
123.hide, .noprint, #nav {
124 display: none !important;
125}
126.pagebreak {
127 page-break-before: always;
128}
129#site {
130 text-align: right;
131 font-family: sans-serif;
132 font-weight: bold;
133 margin: 0 1em;
134 border-bottom: 1pt solid black;
135}
136#site a {
137 font-size: 1.2em;
138}
139#site a:link, #site a:visited {
140 text-decoration: none;
141 font-weight: bold;
142 background: transparent;
143 color: #ffffff;
144}
145#logo {
146 color: #ff8000;
147}
148#head {
149 clear: both;
150 margin: 0 1em;
151}
152#main {
153 line-height: 1.3;
154 text-align: justify;
155 margin: 1em;
156}
157#foot {
158 clear: both;
159 font-size: 80%;
160 text-align: center;
161 margin: 0 1.25em;
162 padding: 0.5em 0 0 0;
163 border-top: 1pt solid black;
164 page-break-before: avoid;
165 page-break-after: avoid;
166}
diff --git a/doc/bluequad.css b/doc/bluequad.css
new file mode 100644
index 00000000..7e52102f
--- /dev/null
+++ b/doc/bluequad.css
@@ -0,0 +1,303 @@
1/* Copyright (C) 2004-2009 Mike Pall.
2 *
3 * You are welcome to use the general ideas of this design for your own sites.
4 * But please do not steal the stylesheet, the layout or the color scheme.
5 */
6/* colorscheme:
7 *
8 * site | head #4162bf/white | #6078bf/#e6ecff
9 * ------+------ ----------------+-------------------
10 * nav | main #bfcfff | #e6ecff/black
11 *
12 * nav: hiback loback #c5d5ff #b9c9f9
13 * hiborder loborder #e6ecff #97a7d7
14 * link hover #2142bf #ff0000
15 *
16 * link: link visited hover #2142bf #8122bf #ff0000
17 *
18 * main: boxback boxborder #f0f4ff #bfcfff
19 */
20body {
21 font-family: Verdana, Arial, Helvetica, sans-serif;
22 font-size: 10pt;
23 margin: 0;
24 padding: 0;
25 border: none;
26 background: #e0e0e0;
27 color: #000000;
28}
29a:link {
30 text-decoration: none;
31 background: transparent;
32 color: #2142bf;
33}
34a:visited {
35 text-decoration: none;
36 background: transparent;
37 color: #8122bf;
38}
39a:hover, a:active {
40 text-decoration: underline;
41 background: transparent;
42 color: #ff0000;
43}
44h1, h2, h3 {
45 font-weight: bold;
46 text-align: left;
47 margin: 0.5em 0;
48 padding: 0;
49 background: transparent;
50}
51h1 {
52 font-size: 200%;
53 line-height: 3em; /* really 6em relative to body, match #site span */
54 margin: 0;
55}
56h2 {
57 font-size: 150%;
58 color: #606060;
59}
60h3 {
61 font-size: 125%;
62 color: #404040;
63}
64p {
65 max-width: 600px;
66 margin: 0 0 0.5em 0;
67 padding: 0;
68}
69b {
70 color: #404040;
71}
72ul, ol {
73 max-width: 600px;
74 margin: 0.5em 0;
75 padding: 0 0 0 2em;
76}
77ul {
78 list-style: outside square;
79}
80ol {
81 list-style: outside decimal;
82}
83li {
84 margin: 0;
85 padding: 0;
86}
87dl {
88 max-width: 600px;
89 margin: 1em 0;
90 padding: 1em;
91 border: 1px solid #bfcfff;
92 background: #f0f4ff;
93}
94dt {
95 font-weight: bold;
96 margin: 0;
97 padding: 0;
98}
99dt sup {
100 float: right;
101 margin-left: 1em;
102 color: #808080;
103}
104dt a:visited {
105 text-decoration: none;
106 color: #2142bf;
107}
108dt a:hover, dt a:active {
109 text-decoration: none;
110 color: #ff0000;
111}
112dd {
113 margin: 0.5em 0 0 2em;
114 padding: 0;
115}
116div.tablewrap { /* for IE *sigh* */
117 max-width: 600px;
118}
119table {
120 table-layout: fixed;
121 border-spacing: 0;
122 border-collapse: collapse;
123 max-width: 600px;
124 width: 100%;
125 margin: 1em 0;
126 padding: 0;
127 border: 1px solid #bfcfff;
128}
129tr {
130 margin: 0;
131 padding: 0;
132 border: none;
133}
134tr.odd {
135 background: #f0f4ff;
136}
137tr.separate td {
138 border-top: 1px solid #bfcfff;
139}
140td {
141 text-align: left;
142 margin: 0;
143 padding: 0.2em 0.5em;
144 border: none;
145}
146tt, code, kbd, samp {
147 font-family: Courier New, Courier, monospace;
148 line-height: 1.2;
149 font-size: 110%;
150}
151kbd {
152 font-weight: bolder;
153}
154blockquote, pre {
155 max-width: 600px;
156 margin: 1em 2em;
157 padding: 0;
158}
159pre {
160 line-height: 1.1;
161}
162pre.code {
163 line-height: 1.4;
164 margin: 0.5em 0 1em 0.5em;
165 padding: 0.5em 1em;
166 border: 1px solid #bfcfff;
167 background: #f0f4ff;
168}
169img {
170 border: none;
171 vertical-align: baseline;
172 margin: 0;
173 padding: 0;
174}
175img.left {
176 float: left;
177 margin: 0.5em 1em 0.5em 0;
178}
179img.right {
180 float: right;
181 margin: 0.5em 0 0.5em 1em;
182}
183.indent {
184 padding-left: 1em;
185}
186.flush {
187 clear: both;
188 visibility: hidden;
189}
190.hide, .noscreen {
191 display: none !important;
192}
193.ext {
194 color: #ff8000;
195}
196#site {
197 clear: both;
198 float: left;
199 width: 13em;
200 text-align: center;
201 font-weight: bold;
202 margin: 0;
203 padding: 0;
204 background: transparent;
205 color: #ffffff;
206}
207#site a {
208 font-size: 200%;
209}
210#site a:link, #site a:visited {
211 text-decoration: none;
212 font-weight: bold;
213 background: transparent;
214 color: #ffffff;
215}
216#site span {
217 line-height: 3em; /* really 6em relative to body, match h1 */
218}
219#logo {
220 color: #ffb380;
221}
222#head {
223 margin: 0;
224 padding: 0 0 0 2em;
225 border-left: solid 13em #4162bf;
226 border-right: solid 3em #6078bf;
227 background: #6078bf;
228 color: #e6ecff;
229}
230#nav {
231 clear: both;
232 float: left;
233 overflow: hidden;
234 text-align: left;
235 line-height: 1.5;
236 width: 13em;
237 padding-top: 1em;
238 background: transparent;
239}
240#nav ul {
241 list-style: none outside;
242 margin: 0;
243 padding: 0;
244}
245#nav li {
246 margin: 0;
247 padding: 0;
248}
249#nav a {
250 display: block;
251 text-decoration: none;
252 font-weight: bold;
253 margin: 0;
254 padding: 2px 1em;
255 border-top: 1px solid transparent;
256 border-bottom: 1px solid transparent;
257 background: transparent;
258 color: #2142bf;
259}
260#nav a:hover, #nav a:active {
261 text-decoration: none;
262 border-top: 1px solid #97a7d7;
263 border-bottom: 1px solid #e6ecff;
264 background: #b9c9f9;
265 color: #ff0000;
266}
267#nav a.current, #nav a.current:hover, #nav a.current:active {
268 border-top: 1px solid #e6ecff;
269 border-bottom: 1px solid #97a7d7;
270 background: #c5d5ff;
271 color: #2142bf;
272}
273#nav ul ul a {
274 padding: 0 1em 0 2em;
275}
276#main {
277 line-height: 1.5;
278 text-align: left;
279 margin: 0;
280 padding: 1em 2em;
281 border-left: solid 13em #bfcfff;
282 border-right: solid 3em #e6ecff;
283 background: #e6ecff;
284}
285#foot {
286 clear: both;
287 font-size: 80%;
288 text-align: center;
289 margin: 0;
290 padding: 0.5em;
291 background: #6078bf;
292 color: #ffffff;
293}
294#foot a:link, #foot a:visited {
295 text-decoration: underline;
296 background: transparent;
297 color: #ffffff;
298}
299#foot a:hover, #foot a:active {
300 text-decoration: underline;
301 background: transparent;
302 color: #bfcfff;
303}
diff --git a/doc/changes.html b/doc/changes.html
new file mode 100644
index 00000000..6c34b8be
--- /dev/null
+++ b/doc/changes.html
@@ -0,0 +1,281 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
2<html>
3<head>
4<title>LuaJIT Change History</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
6<meta name="Author" content="Mike Pall">
7<meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall">
8<meta name="Language" content="en">
9<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
10<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
11<style type="text/css">
12div.major { max-width: 600px; padding: 1em; margin: 1em 0 1em 0; }
13</style>
14</head>
15<body>
16<div id="site">
17<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
18</div>
19<div id="head">
20<h1>LuaJIT Change History</h1>
21</div>
22<div id="nav">
23<ul><li>
24<a href="luajit.html">LuaJIT</a>
25<ul><li>
26<a href="install.html">Installation</a>
27</li><li>
28<a href="running.html">Running</a>
29</li><li>
30<a href="api.html">API Extensions</a>
31</li></ul>
32</li><li>
33<a href="status.html">Status</a>
34<ul><li>
35<a class="current" href="changes.html">Changes</a>
36</li></ul>
37</li><li>
38<a href="faq.html">FAQ</a>
39</li><li>
40<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
41</li></ul>
42</div>
43<div id="main">
44<p>
45This is a list of changes between the released versions of LuaJIT.<br>
46The current <span style="color: #c00000;">development version</span> is <strong>LuaJIT&nbsp;2.0.0-beta1</strong>.<br>
47The current <span style="color: #0000c0;">stable version</span> is <strong>LuaJIT&nbsp;1.1.5</strong>.
48</p>
49<p>
50Please check the
51<a href="http://luajit.org/luajit_changes.html"><span class="ext">&raquo;</span>&nbsp;Online Change History</a>
52to see whether newer versions are available.
53</p>
54
55<div class="major" style="background: #ffd0d0;">
56<h2 id="LuaJIT-2.0.0-beta1">LuaJIT 2.0.0-beta1 &mdash; 2009-10-31</h2>
57<ul>
58<li>This is the first public release of LuaJIT 2.0.</li>
59<li>The whole VM has been rewritten from the ground up, so there's
60no point in listing differences over earlier versions.</li>
61</ul>
62</div>
63
64<div class="major" style="background: #d0d0ff;">
65<h2 id="LuaJIT-1.1.5">LuaJIT 1.1.5 &mdash; 2008-10-25</h2>
66<ul>
67<li>Merged with Lua 5.1.4. Fixes all
68<a href="http://www.lua.org/bugs.html#5.1.3"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.3</a>.</li>
69</ul>
70
71<h2 id="LuaJIT-1.1.4">LuaJIT 1.1.4 &mdash; 2008-02-05</h2>
72<ul>
73<li>Merged with Lua 5.1.3. Fixes all
74<a href="http://www.lua.org/bugs.html#5.1.2"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.2</a>.</li>
75<li>Fixed possible (but unlikely) stack corruption while compiling
76<tt>k^x</tt> expressions.</li>
77<li>Fixed DynASM template for cmpss instruction.</li>
78</ul>
79
80<h2 id="LuaJIT-1.1.3">LuaJIT 1.1.3 &mdash; 2007-05-24</h2>
81<ul>
82<li>Merged with Lua 5.1.2. Fixes all
83<a href="http://www.lua.org/bugs.html#5.1.1"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.1</a>.</li>
84<li>Merged pending Lua 5.1.x fixes: "return -nil" bug, spurious count hook call.</li>
85<li>Remove a (sometimes) wrong assertion in <tt>luaJIT_findpc()</tt>.</li>
86<li>DynASM now allows labels for displacements and <tt>.aword</tt>.</li>
87<li>Fix some compiler warnings for DynASM glue (internal API change).</li>
88<li>Correct naming for SSSE3 (temporarily known as SSE4) in DynASM and x86 disassembler.</li>
89<li>The loadable debug modules now handle redirection to stdout
90(e.g. <tt>-j&nbsp;trace=-</tt>).</li>
91</ul>
92
93<h2 id="LuaJIT-1.1.2">LuaJIT 1.1.2 &mdash; 2006-06-24</h2>
94<ul>
95<li>Fix MSVC inline assembly: use only local variables with
96<tt>lua_number2int()</tt>.</li>
97<li>Fix "attempt to call a thread value" bug on Mac OS X:
98make values of consts used as lightuserdata keys unique
99to avoid joining by the compiler/linker.</li>
100</ul>
101
102<h2 id="LuaJIT-1.1.1">LuaJIT 1.1.1 &mdash; 2006-06-20</h2>
103<ul>
104<li>Merged with Lua 5.1.1. Fixes all
105<a href="http://www.lua.org/bugs.html#5.1"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1</a>.</li>
106<li>Enforce (dynamic) linker error for EXE/DLL version mismatches.</li>
107<li>Minor changes to DynASM: faster preprocessing, smaller encoding
108for some immediates.</li>
109</ul>
110<p>
111This release is in sync with Coco 1.1.1 (see the
112<a href="http://coco.luajit.org/changes.html"><span class="ext">&raquo;</span>&nbsp;Coco Change History</a>).
113</p>
114
115<h2 id="LuaJIT-1.1.0">LuaJIT 1.1.0 &mdash; 2006-03-13</h2>
116<ul>
117<li>Merged with Lua 5.1 (final).</li>
118
119<li>New JIT call frame setup:
120<ul>
121<li>The C stack is kept 16 byte aligned (faster).
122Mandatory for Mac OS X on Intel, too.</li>
123<li>Faster calling conventions for internal C helper functions.</li>
124<li>Better instruction scheduling for function prologue, OP_CALL and
125OP_RETURN.</li>
126</ul></li>
127
128<li>Miscellaneous optimizations:
129<ul>
130<li>Faster loads of FP constants. Remove narrow-to-wide store-to-load
131forwarding stalls.</li>
132<li>Use (scalar) SSE2 ops (if the CPU supports it) to speed up slot moves
133and FP to integer conversions.</li>
134<li>Optimized the two-argument form of <tt>OP_CONCAT</tt> (<tt>a..b</tt>).</li>
135<li>Inlined <tt>OP_MOD</tt> (<tt>a%b</tt>).
136With better accuracy than the C variant, too.</li>
137<li>Inlined <tt>OP_POW</tt> (<tt>a^b</tt>). Unroll <tt>x^k</tt> or
138use <tt>k^x = 2^(log2(k)*x)</tt> or call <tt>pow()</tt>.</li>
139</ul></li>
140
141<li>Changes in the optimizer:
142<ul>
143<li>Improved hinting for table keys derived from table values
144(<tt>t1[t2[x]]</tt>).</li>
145<li>Lookup hinting now works with arbitrary object types and
146supports index chains, too.</li>
147<li>Generate type hints for arithmetic and comparison operators,
148OP_LEN, OP_CONCAT and OP_FORPREP.</li>
149<li>Remove several hint definitions in favour of a generic COMBINE hint.</li>
150<li>Complete rewrite of <tt>jit.opt_inline</tt> module
151(ex <tt>jit.opt_lib</tt>).</li>
152</ul></li>
153
154<li>Use adaptive deoptimization:
155<ul>
156<li>If runtime verification of a contract fails, the affected
157instruction is recompiled and patched on-the-fly.
158Regular programs will trigger deoptimization only occasionally.</li>
159<li>This avoids generating code for uncommon fallback cases
160most of the time. Generated code is up to 30% smaller compared to
161LuaJIT&nbsp;1.0.3.</li>
162<li>Deoptimization is used for many opcodes and contracts:
163<ul>
164<li>OP_CALL, OP_TAILCALL: type mismatch for callable.</li>
165<li>Inlined calls: closure mismatch, parameter number and type mismatches.</li>
166<li>OP_GETTABLE, OP_SETTABLE: table or key type and range mismatches.</li>
167<li>All arithmetic and comparison operators, OP_LEN, OP_CONCAT,
168OP_FORPREP: operand type and range mismatches.</li>
169</ul></li>
170<li>Complete redesign of the debug and traceback info
171(bytecode &harr; mcode) to support deoptimization.
172Much more flexible and needs only 50% of the space.</li>
173<li>The modules <tt>jit.trace</tt>, <tt>jit.dumphints</tt> and
174<tt>jit.dump</tt> handle deoptimization.</li>
175</ul></li>
176
177<li>Inlined many popular library functions
178(for commonly used arguments only):
179<ul>
180<li>Most <tt>math.*</tt> functions (the 18 most used ones)
181[2x-10x faster].</li>
182<li><tt>string.len</tt>, <tt>string.sub</tt> and <tt>string.char</tt>
183[2x-10x faster].</li>
184<li><tt>table.insert</tt>, <tt>table.remove</tt> and <tt>table.getn</tt>
185[3x-5x faster].</li>
186<li><tt>coroutine.yield</tt> and <tt>coroutine.resume</tt>
187[3x-5x faster].</li>
188<li><tt>pairs</tt>, <tt>ipairs</tt> and the corresponding iterators
189[8x-15x faster].</li>
190</ul></li>
191
192<li>Changes in the core and loadable modules and the stand-alone executable:
193<ul>
194<li>Added <tt>jit.version</tt>, <tt>jit.version_num</tt>
195and <tt>jit.arch</tt>.</li>
196<li>Reorganized some internal API functions (<tt>jit.util.*mcode*</tt>).</li>
197<li>The <tt>-j dump</tt> output now shows JSUB names, too.</li>
198<li>New x86 disassembler module written in pure Lua. No dependency
199on ndisasm anymore. Flexible API, very compact (500 lines)
200and complete (x87, MMX, SSE, SSE2, SSE3, SSSE3, privileged instructions).</li>
201<li><tt>luajit -v</tt> prints the LuaJIT version and copyright
202on a separate line.</li>
203</ul></li>
204
205<li>Added SSE, SSE2, SSE3 and SSSE3 support to DynASM.</li>
206<li>Miscellaneous doc changes. Added a section about
207<a href="luajit_install.html#embedding">embedding LuaJIT</a>.</li>
208</ul>
209<p>
210This release is in sync with Coco 1.1.0 (see the
211<a href="http://coco.luajit.org/changes.html"><span class="ext">&raquo;</span>&nbsp;Coco Change History</a>).
212</p>
213</div>
214
215<div class="major" style="background: #ffffd0;">
216<h2 id="LuaJIT-1.0.3">LuaJIT 1.0.3 &mdash; 2005-09-08</h2>
217<ul>
218<li>Even more docs.</li>
219<li>Unified closure checks in <tt>jit.*</tt>.</li>
220<li>Fixed some range checks in <tt>jit.util.*</tt>.</li>
221<li>Fixed __newindex call originating from <tt>jit_settable_str()</tt>.</li>
222<li>Merged with Lua 5.1 alpha (including early bugfixes).</li>
223</ul>
224<p>
225This is the first public release of LuaJIT.
226</p>
227
228<h2 id="LuaJIT-1.0.2">LuaJIT 1.0.2 &mdash; 2005-09-02</h2>
229<ul>
230<li>Add support for flushing the Valgrind translation cache <br>
231(<tt>MYCFLAGS= -DUSE_VALGRIND</tt>).</li>
232<li>Add support for freeing executable mcode memory to the <tt>mmap()</tt>-based
233variant for POSIX systems.</li>
234<li>Reorganized the C&nbsp;function signature handling in
235<tt>jit.opt_lib</tt>.</li>
236<li>Changed to index-based hints for inlining C&nbsp;functions.
237Still no support in the backend for inlining.</li>
238<li>Hardcode <tt>HEAP_CREATE_ENABLE_EXECUTE</tt> value if undefined.</li>
239<li>Misc. changes to the <tt>jit.*</tt> modules.</li>
240<li>Misc. changes to the Makefiles.</li>
241<li>Lots of new docs.</li>
242<li>Complete doc reorg.</li>
243</ul>
244<p>
245Not released because Lua 5.1 alpha came out today.
246</p>
247
248<h2 id="LuaJIT-1.0.1">LuaJIT 1.0.1 &mdash; 2005-08-31</h2>
249<ul>
250<li>Missing GC step in <tt>OP_CONCAT</tt>.</li>
251<li>Fix result handling for C &ndash;> JIT calls.</li>
252<li>Detect CPU feature bits.</li>
253<li>Encode conditional moves (<tt>fucomip</tt>) only when supported.</li>
254<li>Add fallback instructions for FP compares.</li>
255<li>Add support for <tt>LUA_COMPAT_VARARG</tt>. Still disabled by default.</li>
256<li>MSVC needs a specific place for the <tt>CALLBACK</tt> attribute
257(David Burgess).</li>
258<li>Misc. doc updates.</li>
259</ul>
260<p>
261Interim non-public release.
262Special thanks to Adam D. Moss for reporting most of the bugs.
263</p>
264
265<h2 id="LuaJIT-1.0.0">LuaJIT 1.0.0 &mdash; 2005-08-29</h2>
266<p>
267This is the initial non-public release of LuaJIT.
268</p>
269</div>
270<br class="flush">
271</div>
272<div id="foot">
273<hr class="hide">
274Copyright &copy; 2005-2009 Mike Pall
275<span class="noprint">
276&middot;
277<a href="contact.html">Contact</a>
278</span>
279</div>
280</body>
281</html>
diff --git a/doc/contact.html b/doc/contact.html
new file mode 100644
index 00000000..36d5a825
--- /dev/null
+++ b/doc/contact.html
@@ -0,0 +1,84 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
2<html>
3<head>
4<title>Contact</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
6<meta name="Author" content="Mike Pall">
7<meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall">
8<meta name="Language" content="en">
9<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
10<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
11</head>
12<body>
13<div id="site">
14<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
15</div>
16<div id="head">
17<h1>Contact</h1>
18</div>
19<div id="nav">
20<ul><li>
21<a href="luajit.html">LuaJIT</a>
22<ul><li>
23<a href="install.html">Installation</a>
24</li><li>
25<a href="running.html">Running</a>
26</li><li>
27<a href="api.html">API Extensions</a>
28</li></ul>
29</li><li>
30<a href="status.html">Status</a>
31<ul><li>
32<a href="changes.html">Changes</a>
33</li></ul>
34</li><li>
35<a href="faq.html">FAQ</a>
36</li><li>
37<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
38</li></ul>
39</div>
40<div id="main">
41<p>
42Please send general questions to the
43<a href="http://www.lua.org/lua-l.html"><span class="ext">&raquo;</span>&nbsp;Lua mailing list</a>.
44You can also send any questions you have directly to me:
45</p>
46
47<script type="text/javascript">
48<!--
49var xS="@-: .0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZa<b>cdefghijklmnopqrstuvwxyz"
50function xD(s)
51{var len=s.length;var r="";for(var i=0;i<len;i++)
52{var c=s.charAt(i);var n=xS.indexOf(c);if(n!=-1)
53c=xS.charAt(66-n);r+=c;}
54document.write("<"+"p>"+r+"<"+"/p>\n");}
55//-->
56</script>
57<script type="text/javascript">
58<!--
59xD("ewYKA7vu-EIwslx7 K9A.t41C")
60//--></script>
61<noscript>
62<p><img src="img/contact.png" alt="Contact info in image" width="170" height="13">
63</p>
64</noscript>
65
66<h2>Copyright</h2>
67<p>
68All documentation is
69Copyright &copy; 2005-2009 Mike Pall.
70</p>
71
72
73<br class="flush">
74</div>
75<div id="foot">
76<hr class="hide">
77Copyright &copy; 2005-2009 Mike Pall
78<span class="noprint">
79&middot;
80<a href="contact.html">Contact</a>
81</span>
82</div>
83</body>
84</html>
diff --git a/doc/faq.html b/doc/faq.html
new file mode 100644
index 00000000..6f62e1eb
--- /dev/null
+++ b/doc/faq.html
@@ -0,0 +1,141 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
2<html>
3<head>
4<title>Frequently Asked Questions (FAQ)</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
6<meta name="Author" content="Mike Pall">
7<meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall">
8<meta name="Language" content="en">
9<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
10<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
11<style type="text/css">
12dd { margin-left: 1.5em; }
13</style>
14</head>
15<body>
16<div id="site">
17<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
18</div>
19<div id="head">
20<h1>Frequently Asked Questions (FAQ)</h1>
21</div>
22<div id="nav">
23<ul><li>
24<a href="luajit.html">LuaJIT</a>
25<ul><li>
26<a href="install.html">Installation</a>
27</li><li>
28<a href="running.html">Running</a>
29</li><li>
30<a href="api.html">API Extensions</a>
31</li></ul>
32</li><li>
33<a href="status.html">Status</a>
34<ul><li>
35<a href="changes.html">Changes</a>
36</li></ul>
37</li><li>
38<a class="current" href="faq.html">FAQ</a>
39</li><li>
40<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
41</li></ul>
42</div>
43<div id="main">
44<dl>
45<dt>Q: Where can I learn more about Lua and LuaJIT?</dt>
46<dd>
47<ul style="padding: 0;">
48<li>The <a href="http://lua.org"><span class="ext">&raquo;</span>&nbsp;main Lua.org site</a> has complete
49<a href="http://www.lua.org/docs.html"><span class="ext">&raquo;</span>&nbsp;documentation</a> of the language
50and links to books and papers about Lua.</li>
51<li>The community-managed <a href="http://lua-users.org/wiki/"><span class="ext">&raquo;</span>&nbsp;Lua Wiki</a>
52has information about diverse topics.</li>
53<li>The primary source of information for the latest developments surrounding
54Lua is the <a href="http://www.lua.org/lua-l.html"><span class="ext">&raquo;</span>&nbsp;Lua mailing list</a>.
55You can check out the <a href="http://lua-users.org/lists/lua-l/"><span class="ext">&raquo;</span>&nbsp;mailing
56list archive</a> or
57<a href="http://bazar2.conectiva.com.br/mailman/listinfo/lua"><span class="ext">&raquo;</span>&nbsp;subscribe</a>
58to the list (you need to be subscribed before posting).<br>
59This is also the place where announcements and discussions about LuaJIT
60take place.</li>
61</ul>
62</dl>
63
64<dl>
65<dt>Q: Where can I learn more about the compiler technology used by LuaJIT?</dt>
66<dd>
67I'm planning to write more documentation about the internals of LuaJIT.
68In the meantime, please use the following Google Scholar searches
69to find relevant papers:<br>
70Search for: <a href="http://scholar.google.com/scholar?q=Trace+Compiler"><span class="ext">&raquo;</span>&nbsp;Trace Compiler</a><br>
71Search for: <a href="http://scholar.google.com/scholar?q=JIT+Compiler"><span class="ext">&raquo;</span>&nbsp;JIT Compiler</a><br>
72Search for: <a href="http://scholar.google.com/scholar?q=Dynamic+Language+Optimizations"><span class="ext">&raquo;</span>&nbsp;Dynamic Language Optimizations</a><br>
73Search for: <a href="http://scholar.google.com/scholar?q=SSA+Form"><span class="ext">&raquo;</span>&nbsp;SSA Form</a><br>
74Search for: <a href="http://scholar.google.com/scholar?q=Linear+Scan+Register+Allocation"><span class="ext">&raquo;</span>&nbsp;Linear Scan Register Allocation</a><br>
75And, you know, reading the source is of course the only way to enlightenment. :-)
76</dd>
77</dl>
78
79<dl>
80<dt>Q: Why do I get this error: "attempt to index global 'arg' (a nil value)"?<br>
81Q: My vararg functions fail after switching to LuaJIT!</dt>
82<dd>LuaJIT is compatible to the Lua 5.1 language standard. It doesn't
83support the implicit <tt>arg</tt> parameter for old-style vararg
84functions from Lua 5.0.<br>Please convert your code to the
85<a href="http://www.lua.org/manual/5.1/manual.html#2.5.9"><span class="ext">&raquo;</span>&nbsp;Lua 5.1
86vararg syntax</a>.</dd>
87</dl>
88
89<dl>
90<dt>Q: Sometimes Ctrl-C fails to stop my Lua program. Why?</dt>
91<dd>The interrupt signal handler sets a Lua debug hook. But this is
92currently ignored by compiled code (this will eventually be fixed). If
93your program is running in a tight loop and never falls back to the
94interpreter, the debug hook never runs and can't throw the
95"interrupted!" error.<br> In the meantime you have to press Ctrl-C
96twice to get stop your program. That's similar to when it's stuck
97running inside a C function under the Lua interpreter.</dd>
98</dl>
99
100<dl>
101<dt>Q: Why doesn't my favorite power-patch for Lua apply against LuaJIT?</dt>
102<dd>Because it's a completely redesigned VM and has very little code
103in common with Lua anymore. Also, if the patch introduces changes to
104the Lua semantics, this would need to be reflected everywhere in the
105VM, from the interpreter up to all stages of the compiler.<br> Please
106use only standard Lua language constructs. For many common needs you
107can use source transformations or use wrapper or proxy functions.
108The compiler will happily optimize away such indirections.</dd>
109</dl>
110
111<dl>
112<dt>Q: Lua runs everywhere. Why doesn't LuaJIT support my CPU?</dt>
113<dd>Because it's a compiler &mdash; it needs to generate native
114machine code. This means the code generator must be ported to each
115architecture. And the fast interpreter is written in assembler and
116must be ported, too. This is quite an undertaking.<br> Currently only
117x86 CPUs are supported. x64 support is in the works. Other
118architectures will follow with sufficient demand and/or
119sponsoring.</dd>
120</dl>
121
122<dl>
123<dt>Q: When will feature X be added? When will the next version be released?</dt>
124<dd>When it's ready.<br>
125C'mon, it's open source &mdash; I'm doing it on my own time and you're
126getting it for free. You can either contribute a patch or sponsor
127the development of certain features, if they are important to you.
128</dd>
129</dl>
130<br class="flush">
131</div>
132<div id="foot">
133<hr class="hide">
134Copyright &copy; 2005-2009 Mike Pall
135<span class="noprint">
136&middot;
137<a href="contact.html">Contact</a>
138</span>
139</div>
140</body>
141</html>
diff --git a/doc/img/contact.png b/doc/img/contact.png
new file mode 100644
index 00000000..9c73dc59
--- /dev/null
+++ b/doc/img/contact.png
Binary files differ
diff --git a/doc/install.html b/doc/install.html
new file mode 100644
index 00000000..b7211d21
--- /dev/null
+++ b/doc/install.html
@@ -0,0 +1,216 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
2<html>
3<head>
4<title>Installation</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
6<meta name="Author" content="Mike Pall">
7<meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall">
8<meta name="Language" content="en">
9<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
10<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
11</head>
12<body>
13<div id="site">
14<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
15</div>
16<div id="head">
17<h1>Installation</h1>
18</div>
19<div id="nav">
20<ul><li>
21<a href="luajit.html">LuaJIT</a>
22<ul><li>
23<a class="current" href="install.html">Installation</a>
24</li><li>
25<a href="running.html">Running</a>
26</li><li>
27<a href="api.html">API Extensions</a>
28</li></ul>
29</li><li>
30<a href="status.html">Status</a>
31<ul><li>
32<a href="changes.html">Changes</a>
33</li></ul>
34</li><li>
35<a href="faq.html">FAQ</a>
36</li><li>
37<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
38</li></ul>
39</div>
40<div id="main">
41<p>
42LuaJIT is only distributed as a source package. This page explains
43how to build and install LuaJIT with different operating systems
44and C&nbsp;compilers.
45</p>
46<p>
47For the impatient (on POSIX systems):
48</p>
49<pre class="code">
50make &amp;&amp; sudo make install
51</pre>
52<p>
53LuaJIT currently builds out-of-the box on all popular x86 systems
54(Linux, Windows, OSX etc.). It builds and runs fine as a 32&nbsp;bit
55application under x64-based systems, too.
56</p>
57
58<h2>Configuring LuaJIT</h2>
59<p>
60The standard configuration should work fine for most installations.
61Usually there is no need to tweak the settings, except when you want to
62install to a non-standard path. The following three files hold all
63user-configurable settings:
64</p>
65<ul>
66<li><tt>src/luaconf.h</tt> sets some configuration variables, in
67particular the default paths for loading modules.</li>
68<li><tt>Makefile</tt> has settings for installing LuaJIT (POSIX
69only).</li>
70<li><tt>src/Makefile</tt> has settings for compiling LuaJIT under POSIX,
71MinGW and Cygwin.</li>
72<li><tt>src/msvcbuild.bat</tt> has settings for compiling LuaJIT with
73MSVC.</li>
74</ul>
75<p>
76Please read the instructions given in these files, before changing
77any settings.
78</p>
79
80<h2 id="posix">POSIX Systems (Linux, OSX, *BSD etc.)</h2>
81<h3>Prerequisites</h3>
82<p>
83Depending on your distribution, you may need to install a package for
84GCC (GCC 3.4 or later required), the development headers and/or a
85complete SDK.
86</p>
87<p>
88E.g. on a current Debian/Ubuntu, install <tt>libc6-dev</tt>
89with the package manager. Currently LuaJIT only builds as a 32&nbsp;bit
90application, so you actually need to install <tt>libc6-dev-i386</tt>
91when building on an x64 OS.
92</p>
93<p>
94Download the current source package (pick the .tar.gz), if you haven't
95already done so. Move it to a directory of your choice, open a
96terminal window and change to this directory. Now unpack the archive
97and change to the newly created directory:
98</p>
99<pre class="code">
100tar zxf LuaJIT-2.0.0-beta1.tar.gz
101cd LuaJIT-2.0.0-beta1
102</pre>
103<h3>Building LuaJIT</h3>
104<p>
105The supplied Makefiles try to auto-detect the settings needed for your
106operating system and your compiler. They need to be run with GNU Make,
107which is probably the default on your system, anyway. Simply run:
108</p>
109<pre class="code">
110make
111</pre>
112<h3>Installing LuaJIT</h3>
113<p>
114The top-level Makefile installs LuaJIT by default under
115<tt>/usr/local</tt>, i.e. the executable ends up in
116<tt>/usr/local/bin</tt> and so on. You need to have root privileges
117to write to this path. So, assuming sudo is installed on your system,
118run the following command and enter your sudo password:
119</p>
120<pre class="code">
121sudo make install
122</pre>
123<p>
124Otherwise specify the directory prefix as an absolute path, e.g.:
125</p>
126<pre class="code">
127sudo make install PREFIX=/opt/lj2
128</pre>
129<p>
130But note that the installation prefix and the prefix for the module paths
131(configured in <tt>src/luaconf.h</tt>) must match.
132</p>
133<p style="color: #c00000;">
134Note: to avoid overwriting a previous version, the beta test releases
135only install the LuaJIT executable under the versioned name (i.e.
136<tt>luajit-2.0.0-beta1</tt>). You probably want to create a symlink
137for convenience, with a command like this:
138</p>
139<pre class="code" style="color: #c00000;">
140sudo ln -sf luajit-2.0.0-beta1 /usr/local/bin/luajit
141</pre>
142
143<h2 id="windows">Windows Systems</h2>
144<h3>Prerequisites</h3>
145<p>
146Either install one of the open source SDKs
147(<a href="http://mingw.org/"><span class="ext">&raquo;</span>&nbsp;MinGW</a> or
148<a href="http://www.cygwin.com/"><span class="ext">&raquo;</span>&nbsp;Cygwin</a>) which come with modified
149versions of GCC plus the required development headers.
150</p>
151<p>
152Or install Microsoft's Visual C++ (MSVC) &mdash; the freely downloadable
153<a href="http://www.microsoft.com/Express/VC/"><span class="ext">&raquo;</span>&nbsp;Express Edition</a>
154works just fine.
155</p>
156<p>
157Next, download the source package and unpack it using an archive manager
158(e.g. the Windows Explorer) to a directory of your choice.
159</p>
160<h3>Building with MSVC</h3>
161<p>
162Open a "Visual Studio .NET Command Prompt" and <tt>cd</tt> to the
163directory where you've unpacked the sources. Then run this command:
164</p>
165<pre class="code">
166cd src
167msvcbuild
168</pre>
169<p>
170Then follow the installation instructions below.
171</p>
172<h3>Building with MinGW or Cygwin</h3>
173<p>
174Open a command prompt window and make sure the MinGW or Cygwin programs
175are in your path. Then <tt>cd</tt> to the directory where
176you've unpacked the sources and run this command for MinGW:
177</p>
178<pre class="code">
179cd src
180mingw32-make
181</pre>
182<p>
183Or this command for Cygwin:
184</p>
185<pre class="code">
186cd src
187make
188</pre>
189<p>
190Then follow the installation instructions below.
191</p>
192<h3>Installing LuaJIT</h3>
193<p>
194Copy <tt>luajit.exe</tt> and <tt>lua51.dll</tt>
195to a newly created directory (any location is ok). Add <tt>lua</tt>
196and <tt>lua\jit</tt> directories below it and copy all Lua files
197from the <tt>lib</tt> directory of the distribution to the latter directory.
198</p>
199<p>
200There are no hardcoded
201absolute path names &mdash; all modules are loaded relative to the
202directory where <tt>luajit.exe</tt> is installed
203(see <tt>src/luaconf.h</tt>).
204</p>
205<br class="flush">
206</div>
207<div id="foot">
208<hr class="hide">
209Copyright &copy; 2005-2009 Mike Pall
210<span class="noprint">
211&middot;
212<a href="contact.html">Contact</a>
213</span>
214</div>
215</body>
216</html>
diff --git a/doc/luajit.html b/doc/luajit.html
new file mode 100644
index 00000000..9b16ea37
--- /dev/null
+++ b/doc/luajit.html
@@ -0,0 +1,120 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
2<html>
3<head>
4<title>LuaJIT</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
6<meta name="Author" content="Mike Pall">
7<meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall">
8<meta name="Language" content="en">
9<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
10<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
11</head>
12<body>
13<div id="site">
14<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
15</div>
16<div id="head">
17<h1>LuaJIT</h1>
18</div>
19<div id="nav">
20<ul><li>
21<a class="current" href="luajit.html">LuaJIT</a>
22<ul><li>
23<a href="install.html">Installation</a>
24</li><li>
25<a href="running.html">Running</a>
26</li><li>
27<a href="api.html">API Extensions</a>
28</li></ul>
29</li><li>
30<a href="status.html">Status</a>
31<ul><li>
32<a href="changes.html">Changes</a>
33</li></ul>
34</li><li>
35<a href="faq.html">FAQ</a>
36</li><li>
37<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
38</li></ul>
39</div>
40<div id="main">
41<p>
42LuaJIT is a <b>Just-In-Time Compiler</b> for the Lua<sup>*</sup>
43programming language.
44</p>
45<p>
46LuaJIT is Copyright &copy; 2005-2008 Mike Pall.
47LuaJIT is open source software, released under the
48<a href="http://www.opensource.org/licenses/mit-license.php"><span class="ext">&raquo;</span>&nbsp;MIT/X license</a>.
49</p>
50<p class="indent" style="color: #606060;">
51* Lua is a powerful, dynamic and light-weight programming language
52designed for extending applications. Lua is also frequently used as a
53general-purpose, stand-alone language. More information about
54Lua can be found at: <a href="http://www.lua.org/"><span class="ext">&raquo;</span>&nbsp;http://www.lua.org/</a>
55</p>
56<h2>Compatibility</h2>
57<p>
58LuaJIT implements the full set of language features defined by Lua 5.1.
59The virtual machine (VM) is <b>API- and ABI-compatible</b> to the
60standard Lua interpreter and can be deployed as a drop-in replacement.
61</p>
62<p>
63LuaJIT offers more performance, at the expense of portability. It
64currently runs on all popular operating systems based on <b>x86 CPUs</b>
65(Linux, Windows, OSX etc.). It will be ported to x64 CPUs and other
66platforms in the future, based on user demand and sponsoring.
67</p>
68
69<h2>Overview</h2>
70<p>
71LuaJIT has been successfully used as a <b>scripting middleware</b> in
72games, 3D modellers, numerical simulations, trading platforms and many
73other specialty applications. It combines high flexibility with high
74performance and an unmatched <b>low memory footprint</b>: less than
75<b>120K</b> for the VM plus less than <b>80K</b> for the JIT compiler.
76</p>
77<p>
78LuaJIT has been in continuous development since 2005. It's widely
79considered to be <b>one of the fastest dynamic language
80implementations</b>. It has outperfomed other dynamic languages on many
81cross-language benchmarks since its first release &mdash; often by a
82substantial margin. Only now, in 2009, other dynamic language VMs are
83starting to catch up with the performance of LuaJIT 1.x &hellip;
84</p>
85<p>
862009 also marks the first release of the long-awaited <b>LuaJIT 2.0</b>.
87The whole VM has been rewritten from the ground up and relentlessly
88optimized for performance. It combines a high-speed interpreter,
89written in assembler, with a state-of-the-art JIT compiler.
90</p>
91<p>
92An innovative <b>trace compiler</b> is integrated with advanced,
93SSA-based optimizations and a highly tuned code generation backend. This
94allows a substantial reduction of the overhead associated with dynamic
95language features. It's destined to break into the performance range
96traditionally reserved for offline, static language compilers.
97</p>
98
99<h2>More ...</h2>
100<p>
101Click on the LuaJIT sub-topics in the navigation bar to learn more
102about LuaJIT.
103</p>
104<p><p>
105Click on the Logo in the upper left corner to visit
106the LuaJIT project page on the web. All other links to online
107resources are marked with a '<span class="ext">&raquo;</span>'.
108</p>
109<br class="flush">
110</div>
111<div id="foot">
112<hr class="hide">
113Copyright &copy; 2005-2009 Mike Pall
114<span class="noprint">
115&middot;
116<a href="contact.html">Contact</a>
117</span>
118</div>
119</body>
120</html>
diff --git a/doc/running.html b/doc/running.html
new file mode 100644
index 00000000..db69578c
--- /dev/null
+++ b/doc/running.html
@@ -0,0 +1,233 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
2<html>
3<head>
4<title>Running LuaJIT</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
6<meta name="Author" content="Mike Pall">
7<meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall">
8<meta name="Language" content="en">
9<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
10<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
11<style type="text/css">
12table.opt {
13 line-height: 1.2;
14}
15tr.opthead td {
16 font-weight: bold;
17}
18td.flag_name {
19 width: 4em;
20}
21td.flag_level {
22 width: 2em;
23 text-align: center;
24}
25td.param_name {
26 width: 6em;
27}
28td.param_default {
29 width: 4em;
30 text-align: right;
31}
32</style>
33</head>
34<body>
35<div id="site">
36<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
37</div>
38<div id="head">
39<h1>Running LuaJIT</h1>
40</div>
41<div id="nav">
42<ul><li>
43<a href="luajit.html">LuaJIT</a>
44<ul><li>
45<a href="install.html">Installation</a>
46</li><li>
47<a class="current" href="running.html">Running</a>
48</li><li>
49<a href="api.html">API Extensions</a>
50</li></ul>
51</li><li>
52<a href="status.html">Status</a>
53<ul><li>
54<a href="changes.html">Changes</a>
55</li></ul>
56</li><li>
57<a href="faq.html">FAQ</a>
58</li><li>
59<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
60</li></ul>
61</div>
62<div id="main">
63<p>
64LuaJIT has only a single stand-alone executable, called <tt>luajit</tt> on
65POSIX systems or <tt>luajit.exe</tt> on Windows. It can be used to run simple
66Lua statements or whole Lua applications from the command line. It has an
67interactive mode, too.
68</p>
69<p class="indent" style="color: #c00000;">
70Note: the beta test releases only install under the versioned name on
71POSIX systems (to avoid overwriting a previous version). You either need
72to type <tt>luajit-2.0.0-beta1</tt> to start it or create a symlink
73with a command like this:
74</p>
75<pre class="code" style="color: #c00000;">
76sudo ln -sf luajit-2.0.0-beta1 /usr/local/bin/luajit
77</pre>
78<p>
79Unlike previous versions <b>optimization is turned on by default</b> in
80LuaJIT 2.0!<br>It's no longer necessary to use <tt>luajit&nbsp;-O</tt>.
81</p>
82
83<h2 id="options">Command Line Options</h2>
84<p>
85The <tt>luajit</tt> stand-alone executable is just a slightly modified
86version of the regular <tt>lua</tt> stand-alone executable.
87It supports the same basic options, too. <tt>luajit&nbsp;-h</tt>
88prints a short list of the available options. Please have a look at the
89<a href="http://www.lua.org/manual/5.1/manual.html#6"><span class="ext">&raquo;</span>&nbsp;Lua manual</a>
90for details.
91</p>
92<p>
93Two additional options control the behavior of LuaJIT:
94</p>
95
96<h3 id="opt_j"><tt>-j cmd[=arg[,arg...]]</tt></h3>
97<p>
98This option performs a LuaJIT control command or activates one of the
99loadable extension modules. The command is first looked up in the
100<tt>jit.*</tt> library. If no matching function is found, a module
101named <tt>jit.&lt;cmd&gt;</tt> is loaded and the <tt>start()</tt>
102function of the module is called with the specified arguments (if
103any). The space between <tt>-j</tt> and <tt>cmd</tt> is optional.
104</p>
105<p>
106Here are the available LuaJIT control commands:
107</p>
108<ul>
109<li id="j_on"><tt>-jon</tt> &mdash; Turns the JIT compiler on (default).</li>
110<li id="j_off"><tt>-joff</tt> &mdash; Turns the JIT compiler off (only use the interpreter).</li>
111<li id="j_flush"><tt>-jflush</tt> &mdash; Flushes the whole cache of compiled code.</li>
112<li id="j_v"><tt>-jv</tt> &mdash; Shows verbose information about the progress of the JIT compiler.</li>
113<li id="j_dump"><tt>-jdump</tt> &mdash; Dumps the code and structures used in various compiler stages.</li>
114</ul>
115<p>
116The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules
117written in Lua. They are mainly used for debugging the JIT compiler
118itself. For a description of their options and output format, please
119read the comment block at the start of their source.
120They can be found in the <tt>lib</tt> directory of the source
121distribution or installed under the <tt>jit</tt> directory. By default
122this is <tt>/usr/local/share/luajit-2.0.0-beta1/jit</tt> on POSIX
123systems.
124</p>
125
126<h3 id="opt_O"><tt>-O[level]</tt><br>
127<tt>-O[+]flag</tt> <tt>-O-flag</tt><br>
128<tt>-Oparam=value</tt></h3>
129<p>
130This options allows fine-tuned control of the optimizations used by
131the JIT compiler. This is mainly intended for debugging LuaJIT itself.
132Please note that the JIT compiler is extremly fast (we are talking
133about the microsecond to millisecond range). Disabling optimizations
134doesn't have any visible impact on its overhead, but usually generates
135code that runs slower.
136</p>
137<p>
138The first form sets an optimization level &mdash; this enables a
139specific mix of optimization flags. <tt>-O0</tt> turns off all
140optimizations and higher numbers enable more optimizations. Omitting
141the level (i.e. just <tt>-O</tt>) sets the default optimization level,
142which is <tt>-O3</tt> in the current version.
143</p>
144<p>
145The second form adds or removes individual optimization flags.
146The third form sets a parameter for the VM or the JIT compiler
147to a specific value.
148</p>
149<p>
150You can either use this option multiple times (like <tt>-Ocse
151-O-dce -Ohotloop=10</tt>) or separate several settings with a comma
152(like <tt>-O+cse,-dce,hotloop=10</tt>). The settings are applied from
153left to right and later settings override earlier ones. You can freely
154mix the three forms, but note that setting an optimization level
155overrides all earlier flags.
156</p>
157<p>
158Here are the available flags and at what optimization levels they
159are enabled:
160</p>
161<table class="opt">
162<tr class="opthead">
163<td class="flag_name">Flag</td>
164<td class="flag_level">-O1</td>
165<td class="flag_level">-O2</td>
166<td class="flag_level">-O3</td>
167<td class="flag_desc">&nbsp;</td>
168</tr>
169<tr class="odd separate">
170<td class="flag_name">fold</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_desc">Constant Folding, Simplifications and Reassociation</td></tr>
171<tr class="even">
172<td class="flag_name">cse</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_desc">Common-Subexpression Elimination</td></tr>
173<tr class="odd">
174<td class="flag_name">dce</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_desc">Dead-Code Elimination</td></tr>
175<tr class="even">
176<td class="flag_name">narrow</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_desc">Narrowing of numbers to integers</td></tr>
177<tr class="odd">
178<td class="flag_name">loop</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_desc">Loop Optimizations (code hoisting)</td></tr>
179<tr class="even">
180<td class="flag_name">fwd</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_desc">Load Forwarding (L2L) and Store Forwarding (S2L)</td></tr>
181<tr class="odd">
182<td class="flag_name">dse</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_desc">Dead-Store Elimination</td></tr>
183<tr class="even">
184<td class="flag_name">fuse</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_desc">Fusion of operands into instructions</td></tr>
185</table>
186<p>
187Here are the parameters and their default settings:
188</p>
189<table class="opt">
190<tr class="opthead">
191<td class="param_name">Parameter</td>
192<td class="param_default">Default</td>
193<td class="param_desc">&nbsp;</td>
194</tr>
195<tr class="odd separate">
196<td class="param_name">maxtrace</td><td class="param_default">1000</td><td class="param_desc">Max. number of traces in the cache</td></tr>
197<tr class="even">
198<td class="param_name">maxrecord</td><td class="param_default">2000</td><td class="param_desc">Max. number of recorded IR instructions</td></tr>
199<tr class="odd">
200<td class="param_name">maxirconst</td><td class="param_default">500</td><td class="param_desc">Max. number of IR constants of a trace</td></tr>
201<tr class="even">
202<td class="param_name">maxside</td><td class="param_default">100</td><td class="param_desc">Max. number of side traces of a root trace</td></tr>
203<tr class="odd">
204<td class="param_name">maxsnap</td><td class="param_default">100</td><td class="param_desc">Max. number of snapshots for a trace</td></tr>
205<tr class="even separate">
206<td class="param_name">hotloop</td><td class="param_default">57</td><td class="param_desc">Number of iterations to detect a hot loop</td></tr>
207<tr class="odd">
208<td class="param_name">hotexit</td><td class="param_default">10</td><td class="param_desc">Number of taken exits to start a side trace</td></tr>
209<tr class="even">
210<td class="param_name">tryside</td><td class="param_default">4</td><td class="param_desc">Number of attempts to compile a side trace</td></tr>
211<tr class="odd separate">
212<td class="param_name">instunroll</td><td class="param_default">4</td><td class="param_desc">Max. unroll factor for instable loops</td></tr>
213<tr class="even">
214<td class="param_name">loopunroll</td><td class="param_default">7</td><td class="param_desc">Max. unroll factor for loop ops in side traces</td></tr>
215<tr class="odd">
216<td class="param_name">callunroll</td><td class="param_default">3</td><td class="param_desc">Max. unroll factor for pseudo-recursive calls</td></tr>
217<tr class="even separate">
218<td class="param_name">sizemcode</td><td class="param_default">32</td><td class="param_desc">Size of each machine code area in KBytes (Windows: 64K)</td></tr>
219<tr class="odd">
220<td class="param_name">maxmcode</td><td class="param_default">512</td><td class="param_desc">Max. total size of all machine code areas in KBytes</td></tr>
221</table>
222<br class="flush">
223</div>
224<div id="foot">
225<hr class="hide">
226Copyright &copy; 2005-2009 Mike Pall
227<span class="noprint">
228&middot;
229<a href="contact.html">Contact</a>
230</span>
231</div>
232</body>
233</html>
diff --git a/doc/status.html b/doc/status.html
new file mode 100644
index 00000000..23c14c76
--- /dev/null
+++ b/doc/status.html
@@ -0,0 +1,235 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
2<html>
3<head>
4<title>Status &amp; Roadmap</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
6<meta name="Author" content="Mike Pall">
7<meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall">
8<meta name="Language" content="en">
9<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
10<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
11<style type="text/css">
12ul li { padding-bottom: 0.3em; }
13</style>
14</head>
15<body>
16<div id="site">
17<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
18</div>
19<div id="head">
20<h1>Status &amp; Roadmap</h1>
21</div>
22<div id="nav">
23<ul><li>
24<a href="luajit.html">LuaJIT</a>
25<ul><li>
26<a href="install.html">Installation</a>
27</li><li>
28<a href="running.html">Running</a>
29</li><li>
30<a href="api.html">API Extensions</a>
31</li></ul>
32</li><li>
33<a class="current" href="status.html">Status</a>
34<ul><li>
35<a href="changes.html">Changes</a>
36</li></ul>
37</li><li>
38<a href="faq.html">FAQ</a>
39</li><li>
40<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
41</li></ul>
42</div>
43<div id="main">
44<p>
45The <span style="color: #0000c0;">LuaJIT 1.x</span> series represents
46the current <span style="color: #0000c0;">stable branch</span>. As of
47this writing there have been no open bugs since about a year. So, if
48you need a rock-solid VM, you are encouraged to fetch the latest
49release of LuaJIT 1.x from the <a href="http://luajit.org/download.html"><span class="ext">&raquo;</span>&nbsp;Download</a>
50page.
51</p>
52<p>
53<span style="color: #c00000;">LuaJIT 2.0</span> is the currently active
54<span style="color: #c00000;">development branch</span>.
55It has <b>Beta Test</b> status and is still undergoing
56substantial changes. It's expected to quickly mature within the next
57months. You should definitely start to evaluate it for new projects
58right now. But deploying it in production environments is not yet
59recommended.
60</p>
61
62<h2>Current Status</h2>
63<p>
64This is a list of the things you should know about the LuaJIT 2.0 beta test:
65</p>
66<ul>
67<li>
68The JIT compiler can only generate code for CPUs with <b>SSE2</b> at the
69moment. I.e. you need at least a P4, Core 2/i5/i7 or K8/K10 to use it. I
70plan to fix this during the beta phase and add support for emitting x87
71instructions to the backend.
72</li>
73<li>
74Obviously there will be many <b>bugs</b> in a VM which has been
75rewritten from the ground up. Please report your findings together with
76the circumstances needed to reproduce the bug. If possible reduce the
77problem down to a simple test cases.<br>
78There is no formal bug tracker at the moment. The best place for
79discussion is the
80<a href="http://www.lua.org/lua-l.html"><span class="ext">&raquo;</span>&nbsp;Lua mailing list</a>. Of course
81you may also send your bug report directly to me, especially when they
82contains lengthy debug output. Please check the
83<a href="contact.html">Contact</a> page for details.
84</li>
85<li>
86The VM is complete in the sense that it <b>should</b> run all Lua code
87just fine. It's considered a serious bug if the VM crashes or produces
88unexpected results &mdash; please report it. There are only very few
89known incompatibilities with standard Lua:
90<ul>
91<li>
92The Lua <b>debug API</b> is missing a couple of features (call/return
93hooks) and shows slightly different behavior (no per-coroutine hooks).
94</li>
95<li>
96Most other issues you're likely to find (e.g. with the existing test
97suites) are differences in the <b>implementation-defined</b> behavior.
98These either have a good reason (like early tail call resolving which
99may cause differences in error reporting), are arbitrary design choices
100or are due to quirks in the VM. The latter cases may get fixed if a
101demonstrable need is shown.
102</li>
103</ul>
104</li>
105<li>
106The <b>JIT compiler</b> is not complete (yet) and falls back to the
107interpreter in some cases. All of this works transparently, so unless
108you use -jv, you'll probably never notice (the interpreter is quite
109fast, too). Here are the known issues:
110<ul>
111<li>
112Many known issues cause a <b>NYI</b> (not yet implemented) trace abort
113message. E.g. for calls to vararg functions or many string library
114functions. Reporting these is only mildly useful, except if you have good
115example code that shows the problem. Obviously, reports accompanied with
116a patch to fix the issue are more than welcome. But please check back
117with me, before writing major improvements, to avoid duplication of
118effort.
119</li>
120<li>
121<b>Recursion</b> is not traced yet. Often no trace will be generated at
122all or some unroll limit will catch it and aborts the trace.
123</li>
124<li>
125The trace compiler currently does not back off specialization for
126function call dispatch. It should really fall back to specializing on
127the prototype, not the closure identity. This can lead to the so-called
128"trace explosion" problem with <b>closure-heavy programming</b>. The
129trace linking heuristics prevent this, but in the worst case this
130means the code always falls back to the interpreter.
131</li>
132<li>
133<b>Trace management</b> needs more tuning: better blacklisting of aborted
134traces, less drastic countermeasures against trace explosion and better
135heuristics in general.
136</li>
137<li>
138Some checks are missing in the JIT-compiled code for obscure situations
139with <b>open upvalues aliasing</b> one of the SSA slots later on (or
140vice versa). Bonus points, if you can find a real world test case for
141this.
142</li>
143</ul>
144</li>
145</ul>
146
147<h2>Roadmap</h2>
148<p>
149Rather than stating exact release dates (I'm well known for making
150spectacularly wrong guesses), this roadmap lists the general project
151plan, sorted by priority, as well as ideas for the future:
152</p>
153<ul>
154<li>
155The main goal right now is to stabilize LuaJIT 2.0 and get it out of
156beta test. <b>Correctness</b> has priority over completeness. This
157implies the first stable release will certainly NOT compile every
158library function call and will fall back to the interpreter from time
159to time. This is perfectly ok, since it still executes all Lua code,
160just not at the highest possible speed.
161</li>
162<li>
163The next step is to get it to compile more library functions and handle
164more cases where the compiler currently bails out. This doesn't mean it
165will compile every corner case. It's much more important that it
166performs well in a majority of use cases. Every compiler has to make
167these trade-offs &mdash; <b>completeness</b> just cannot be the
168overriding goal for a low-footprint, low-overhead JIT compiler.
169</li>
170<li>
171More <b>optimizations</b> will be added in parallel to the last step on
172an as-needed basis. Array-bounds-check (ABC) removal, sinking of stores
173to aggregates and sinking of allocations are high on the list. Faster
174handling of NEWREF and better alias analysis are desirable, too. More
175complex optimizations with less pay-off, such as value-range-propagation
176(VRP) will have to wait.
177</li>
178<li>
179LuaJIT 2.0 has been designed with <b>portability</b> in mind.
180Nonetheless, it compiles to native code and needs to be adapted to each
181architecture. Porting the compiler backend is probably the easier task,
182but a key element of its design is the fast interpreter, written in
183machine-specific assembler.<br>
184The code base and the internal structures are already prepared for
185easier porting to 64 bit architectures. The most likely next target is a
186port to <b>x64</b>, but this will have to wait until the x86 port
187stabilizes. Other ports will follow &mdash; companies which are
188interested in sponsoring a port to a particular architecture, please
189<a href="contact.html">contact me</a>.
190</li>
191<li>
192There are some planned <b>structural improvements</b> to the compiler,
193like compressed snapshot maps or generic handling of calls to helper
194methods. These are of lesser importance, unless other developments
195elevate their priority.
196</li>
197<li>
198<b>Documentation</b> about the <b>internals</b> of LuaJIT is still sorely
199missing. Although the source code is included and is IMHO well
200commented, many basic design decisions are in need of an explanation.
201The rather un-traditional compiler architecture and the many highly
202optimized data structures are a barrier for outside participation in
203the development. Alas, as I've repeatedly stated, I'm better at
204writing code than papers and I'm not in need of any academical merits.
205Someday I will find the time for it. :-)
206</li>
207<li>
208Producing good code for unbiased branches is a key problem for trace
209compilers. This is the main cause for "trace explosion".
210<b>Hyperblock scheduling</b> promises to solve this nicely at the
211price of a major redesign of the compiler. This would also pave the
212way for emitting predicated instructions, which is a prerequisite
213for efficient <b>vectorization</b>.
214</li>
215<li>
216Currently Lua is missing a standard library for access to <b>structured
217binary data</b> and <b>arrays/buffers</b> holding low-level data types.
218Allowing calls to arbitrary C functions (<b>FFI</b>) would obviate the
219need to write manual bindings. A variety of extension modules is floating
220around, with different scope and capabilities. Alas, none of them has been
221designed with a JIT compiler in mind.
222</li>
223</ul>
224<br class="flush">
225</div>
226<div id="foot">
227<hr class="hide">
228Copyright &copy; 2005-2009 Mike Pall
229<span class="noprint">
230&middot;
231<a href="contact.html">Contact</a>
232</span>
233</div>
234</body>
235</html>
diff --git a/dynasm/dasm_proto.h b/dynasm/dasm_proto.h
new file mode 100644
index 00000000..94d9a9e2
--- /dev/null
+++ b/dynasm/dasm_proto.h
@@ -0,0 +1,69 @@
1/*
2** DynASM encoding engine prototypes.
3** Copyright (C) 2005-2009 Mike Pall. All rights reserved.
4** Released under the MIT/X license. See dynasm.lua for full copyright notice.
5*/
6
7#ifndef _DASM_PROTO_H
8#define _DASM_PROTO_H
9
10#include <stddef.h>
11#include <stdarg.h>
12
13#define DASM_IDENT "DynASM 1.2.1"
14#define DASM_VERSION 10201 /* 1.2.1 */
15
16#ifndef Dst_DECL
17#define Dst_DECL dasm_State *Dst
18#endif
19
20#ifndef Dst_GET
21#define Dst_GET (Dst)
22#endif
23
24#ifndef DASM_FDEF
25#define DASM_FDEF extern
26#endif
27
28
29/* Internal DynASM encoder state. */
30typedef struct dasm_State dasm_State;
31
32/* Action list type. */
33typedef const unsigned char *dasm_ActList;
34
35
36/* Initialize and free DynASM state. */
37DASM_FDEF void dasm_init(Dst_DECL, int maxsection);
38DASM_FDEF void dasm_free(Dst_DECL);
39
40/* Setup global array. Must be called before dasm_setup(). */
41DASM_FDEF void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl);
42
43/* Grow PC label array. Can be called after dasm_setup(), too. */
44DASM_FDEF void dasm_growpc(Dst_DECL, unsigned int maxpc);
45
46/* Setup encoder. */
47DASM_FDEF void dasm_setup(Dst_DECL, dasm_ActList actionlist);
48
49/* Feed encoder with actions. Calls are generated by pre-processor. */
50DASM_FDEF void dasm_put(Dst_DECL, int start, ...);
51
52/* Link sections and return the resulting size. */
53DASM_FDEF int dasm_link(Dst_DECL, size_t *szp);
54
55/* Encode sections into buffer. */
56DASM_FDEF int dasm_encode(Dst_DECL, void *buffer);
57
58/* Get PC label offset. */
59DASM_FDEF int dasm_getpclabel(Dst_DECL, unsigned int pc);
60
61#ifdef DASM_CHECKS
62/* Optional sanity checker to call between isolated encoding steps. */
63DASM_FDEF int dasm_checkstep(Dst_DECL, int secmatch);
64#else
65#define dasm_checkstep(a, b) 0
66#endif
67
68
69#endif /* _DASM_PROTO_H */
diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h
new file mode 100644
index 00000000..dab33e5a
--- /dev/null
+++ b/dynasm/dasm_x86.h
@@ -0,0 +1,467 @@
1/*
2** DynASM x86 encoding engine.
3** Copyright (C) 2005-2009 Mike Pall. All rights reserved.
4** Released under the MIT/X license. See dynasm.lua for full copyright notice.
5*/
6
7#include <stddef.h>
8#include <stdarg.h>
9#include <string.h>
10#include <stdlib.h>
11
12#define DASM_ARCH "x86"
13
14#ifndef DASM_EXTERN
15#define DASM_EXTERN(a,b,c,d) 0
16#endif
17
18/* Action definitions. DASM_STOP must be 255. */
19enum {
20 DASM_DISP = 233,
21 DASM_IMM_S, DASM_IMM_B, DASM_IMM_W, DASM_IMM_D, DASM_IMM_WB, DASM_IMM_DB,
22 DASM_VREG, DASM_SPACE, DASM_SETLABEL, DASM_REL_A, DASM_REL_LG, DASM_REL_PC,
23 DASM_IMM_LG, DASM_IMM_PC, DASM_LABEL_LG, DASM_LABEL_PC, DASM_ALIGN,
24 DASM_EXTERN, DASM_ESC, DASM_MARK, DASM_SECTION, DASM_STOP
25};
26
27/* Maximum number of section buffer positions for a single dasm_put() call. */
28#define DASM_MAXSECPOS 25
29
30/* DynASM encoder status codes. Action list offset or number are or'ed in. */
31#define DASM_S_OK 0x00000000
32#define DASM_S_NOMEM 0x01000000
33#define DASM_S_PHASE 0x02000000
34#define DASM_S_MATCH_SEC 0x03000000
35#define DASM_S_RANGE_I 0x11000000
36#define DASM_S_RANGE_SEC 0x12000000
37#define DASM_S_RANGE_LG 0x13000000
38#define DASM_S_RANGE_PC 0x14000000
39#define DASM_S_RANGE_VREG 0x15000000
40#define DASM_S_UNDEF_L 0x21000000
41#define DASM_S_UNDEF_PC 0x22000000
42
43/* Macros to convert positions (8 bit section + 24 bit index). */
44#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
45#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
46#define DASM_SEC2POS(sec) ((sec)<<24)
47#define DASM_POS2SEC(pos) ((pos)>>24)
48#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
49
50/* Per-section structure. */
51typedef struct dasm_Section {
52 int *rbuf; /* Biased buffer pointer (negative section bias). */
53 int *buf; /* True buffer pointer. */
54 size_t bsize; /* Buffer size in bytes. */
55 int pos; /* Biased buffer position. */
56 int epos; /* End of biased buffer position - max single put. */
57 int ofs; /* Byte offset into section. */
58} dasm_Section;
59
60/* Core structure holding the DynASM encoding state. */
61struct dasm_State {
62 size_t psize; /* Allocated size of this structure. */
63 dasm_ActList actionlist; /* Current actionlist pointer. */
64 int *lglabels; /* Local/global chain/pos ptrs. */
65 size_t lgsize;
66 int *pclabels; /* PC label chains/pos ptrs. */
67 size_t pcsize;
68 void **globals; /* Array of globals (bias -10). */
69 dasm_Section *section; /* Pointer to active section. */
70 size_t codesize; /* Total size of all code sections. */
71 int maxsection; /* 0 <= sectionidx < maxsection. */
72 int status; /* Status code. */
73 dasm_Section sections[1]; /* All sections. Alloc-extended. */
74};
75
76/* The size of the core structure depends on the max. number of sections. */
77#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
78
79
80/* Initialize DynASM state. */
81void dasm_init(Dst_DECL, int maxsection)
82{
83 dasm_State *D;
84 size_t psz = 0;
85 int i;
86 Dst_REF = NULL;
87 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
88 D = Dst_REF;
89 D->psize = psz;
90 D->lglabels = NULL;
91 D->lgsize = 0;
92 D->pclabels = NULL;
93 D->pcsize = 0;
94 D->globals = NULL;
95 D->maxsection = maxsection;
96 for (i = 0; i < maxsection; i++) {
97 D->sections[i].buf = NULL; /* Need this for pass3. */
98 D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
99 D->sections[i].bsize = 0;
100 D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
101 }
102}
103
104/* Free DynASM state. */
105void dasm_free(Dst_DECL)
106{
107 dasm_State *D = Dst_REF;
108 int i;
109 for (i = 0; i < D->maxsection; i++)
110 if (D->sections[i].buf)
111 DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
112 if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
113 if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
114 DASM_M_FREE(Dst, D, D->psize);
115}
116
117/* Setup global label array. Must be called before dasm_setup(). */
118void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
119{
120 dasm_State *D = Dst_REF;
121 D->globals = gl - 10; /* Negative bias to compensate for locals. */
122 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
123}
124
125/* Grow PC label array. Can be called after dasm_setup(), too. */
126void dasm_growpc(Dst_DECL, unsigned int maxpc)
127{
128 dasm_State *D = Dst_REF;
129 size_t osz = D->pcsize;
130 DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
131 memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
132}
133
134/* Setup encoder. */
135void dasm_setup(Dst_DECL, dasm_ActList actionlist)
136{
137 dasm_State *D = Dst_REF;
138 int i;
139 D->actionlist = actionlist;
140 D->status = DASM_S_OK;
141 D->section = &D->sections[0];
142 memset((void *)D->lglabels, 0, D->lgsize);
143 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
144 for (i = 0; i < D->maxsection; i++) {
145 D->sections[i].pos = DASM_SEC2POS(i);
146 D->sections[i].ofs = 0;
147 }
148}
149
150
151#ifdef DASM_CHECKS
152#define CK(x, st) \
153 do { if (!(x)) { \
154 D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
155#define CKPL(kind, st) \
156 do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
157 D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
158#else
159#define CK(x, st) ((void)0)
160#define CKPL(kind, st) ((void)0)
161#endif
162
163/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
164void dasm_put(Dst_DECL, int start, ...)
165{
166 va_list ap;
167 dasm_State *D = Dst_REF;
168 dasm_ActList p = D->actionlist + start;
169 dasm_Section *sec = D->section;
170 int pos = sec->pos, ofs = sec->ofs, mrm = 4;
171 int *b;
172
173 if (pos >= sec->epos) {
174 DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
175 sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
176 sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
177 sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
178 }
179
180 b = sec->rbuf;
181 b[pos++] = start;
182
183 va_start(ap, start);
184 while (1) {
185 int action = *p++;
186 if (action < DASM_DISP) {
187 ofs++;
188 } else if (action <= DASM_REL_A) {
189 int n = va_arg(ap, int);
190 b[pos++] = n;
191 switch (action) {
192 case DASM_DISP:
193 if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; }
194 case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob;
195 case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
196 case DASM_IMM_D: ofs += 4; break;
197 case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob;
198 case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break;
199 case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob;
200 case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
201 case DASM_SPACE: p++; ofs += n; break;
202 case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
203 case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG);
204 if (*p++ == 1 && *p == DASM_DISP) mrm = n; continue;
205 }
206 mrm = 4;
207 } else {
208 int *pl, n;
209 switch (action) {
210 case DASM_REL_LG:
211 case DASM_IMM_LG:
212 n = *p++; pl = D->lglabels + n;
213 if (n <= 246) { CKPL(lg, LG); goto putrel; } /* Bkwd rel or global. */
214 pl -= 246; n = *pl;
215 if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
216 goto linkrel;
217 case DASM_REL_PC:
218 case DASM_IMM_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC);
219 putrel:
220 n = *pl;
221 if (n < 0) { /* Label exists. Get label pos and store it. */
222 b[pos] = -n;
223 } else {
224 linkrel:
225 b[pos] = n; /* Else link to rel chain, anchored at label. */
226 *pl = pos;
227 }
228 pos++;
229 ofs += 4; /* Maximum offset needed. */
230 if (action == DASM_REL_LG || action == DASM_REL_PC)
231 b[pos++] = ofs; /* Store pass1 offset estimate. */
232 break;
233 case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel;
234 case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC);
235 putlabel:
236 n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
237 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; }
238 *pl = -pos; /* Label exists now. */
239 b[pos++] = ofs; /* Store pass1 offset estimate. */
240 break;
241 case DASM_ALIGN:
242 ofs += *p++; /* Maximum alignment needed (arg is 2**n-1). */
243 b[pos++] = ofs; /* Store pass1 offset estimate. */
244 break;
245 case DASM_EXTERN: p += 2; ofs += 4; break;
246 case DASM_ESC: p++; ofs++; break;
247 case DASM_MARK: mrm = p[-2]; break;
248 case DASM_SECTION:
249 n = *p; CK(n < D->maxsection, RANGE_SEC); D->section = &D->sections[n];
250 case DASM_STOP: goto stop;
251 }
252 }
253 }
254stop:
255 va_end(ap);
256 sec->pos = pos;
257 sec->ofs = ofs;
258}
259#undef CK
260
261/* Pass 2: Link sections, shrink branches/aligns, fix label offsets. */
262int dasm_link(Dst_DECL, size_t *szp)
263{
264 dasm_State *D = Dst_REF;
265 int secnum;
266 int ofs = 0;
267
268#ifdef DASM_CHECKS
269 *szp = 0;
270 if (D->status != DASM_S_OK) return D->status;
271 {
272 int pc;
273 for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
274 if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
275 }
276#endif
277
278 { /* Handle globals not defined in this translation unit. */
279 int idx;
280 for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
281 int n = D->lglabels[idx];
282 /* Undefined label: Collapse rel chain and replace with marker (< 0). */
283 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
284 }
285 }
286
287 /* Combine all code sections. No support for data sections (yet). */
288 for (secnum = 0; secnum < D->maxsection; secnum++) {
289 dasm_Section *sec = D->sections + secnum;
290 int *b = sec->rbuf;
291 int pos = DASM_SEC2POS(secnum);
292 int lastpos = sec->pos;
293
294 while (pos != lastpos) {
295 dasm_ActList p = D->actionlist + b[pos++];
296 while (1) {
297 int op, action = *p++;
298 switch (action) {
299 case DASM_REL_LG: p++; op = p[-3]; goto rel_pc;
300 case DASM_REL_PC: op = p[-2]; rel_pc: {
301 int shrink = op == 0xe9 ? 3 : ((op&0xf0) == 0x80 ? 4 : 0);
302 if (shrink) { /* Shrinkable branch opcode? */
303 int lofs, lpos = b[pos];
304 if (lpos < 0) goto noshrink; /* Ext global? */
305 lofs = *DASM_POS2PTR(D, lpos);
306 if (lpos > pos) { /* Fwd label: add cumulative section offsets. */
307 int i;
308 for (i = secnum; i < DASM_POS2SEC(lpos); i++)
309 lofs += D->sections[i].ofs;
310 } else {
311 lofs -= ofs; /* Bkwd label: unfix offset. */
312 }
313 lofs -= b[pos+1]; /* Short branch ok? */
314 if (lofs >= -128-shrink && lofs <= 127) ofs -= shrink; /* Yes. */
315 else { noshrink: shrink = 0; } /* No, cannot shrink op. */
316 }
317 b[pos+1] = shrink;
318 pos += 2;
319 break;
320 }
321 case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++;
322 case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W:
323 case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB:
324 case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break;
325 case DASM_LABEL_LG: p++;
326 case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */
327 case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */
328 case DASM_EXTERN: p += 2; break;
329 case DASM_ESC: p++; break;
330 case DASM_MARK: break;
331 case DASM_SECTION: case DASM_STOP: goto stop;
332 }
333 }
334 stop: (void)0;
335 }
336 ofs += sec->ofs; /* Next section starts right after current section. */
337 }
338
339 D->codesize = ofs; /* Total size of all code sections */
340 *szp = ofs;
341 return DASM_S_OK;
342}
343
344#define dasmb(x) *cp++ = (unsigned char)(x)
345#ifndef DASM_ALIGNED_WRITES
346#define dasmw(x) \
347 do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0)
348#define dasmd(x) \
349 do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0)
350#else
351#define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0)
352#define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0)
353#endif
354
355/* Pass 3: Encode sections. */
356int dasm_encode(Dst_DECL, void *buffer)
357{
358 dasm_State *D = Dst_REF;
359 unsigned char *base = (unsigned char *)buffer;
360 unsigned char *cp = base;
361 int secnum;
362
363 /* Encode all code sections. No support for data sections (yet). */
364 for (secnum = 0; secnum < D->maxsection; secnum++) {
365 dasm_Section *sec = D->sections + secnum;
366 int *b = sec->buf;
367 int *endb = sec->rbuf + sec->pos;
368
369 while (b != endb) {
370 dasm_ActList p = D->actionlist + *b++;
371 unsigned char *mark = NULL;
372 while (1) {
373 int action = *p++;
374 int n = (action >= DASM_DISP && action <= DASM_ALIGN) ? *b++ : 0;
375 switch (action) {
376 case DASM_DISP: if (!mark) mark = cp; {
377 unsigned char *mm = mark;
378 if (*p != DASM_IMM_DB && *p != DASM_IMM_WB) mark = NULL;
379 if (n == 0) { int mrm = mm[-1]&7; if (mrm == 4) mrm = mm[0]&7;
380 if (mrm != 5) { mm[-1] -= 0x80; break; } }
381 if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40;
382 }
383 case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break;
384 case DASM_IMM_DB: if (((n+128)&-256) == 0) {
385 db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb;
386 } else mark = NULL;
387 case DASM_IMM_D: wd: dasmd(n); break;
388 case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
389 case DASM_IMM_W: dasmw(n); break;
390 case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; }
391 case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
392 b++; n = (int)(ptrdiff_t)D->globals[-n];
393 case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
394 case DASM_REL_PC: rel_pc: {
395 int shrink = *b++;
396 int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; }
397 n = *pb - ((int)(cp-base) + 4-shrink);
398 if (shrink == 0) goto wd;
399 if (shrink == 4) { cp--; cp[-1] = *cp-0x10; } else cp[-1] = 0xeb;
400 goto wb;
401 }
402 case DASM_IMM_LG:
403 p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; }
404 case DASM_IMM_PC: {
405 int *pb = DASM_POS2PTR(D, n);
406 n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base);
407 goto wd;
408 }
409 case DASM_LABEL_LG: {
410 int idx = *p++;
411 if (idx >= 10)
412 D->globals[idx] = (void *)(base + (*p == DASM_SETLABEL ? *b : n));
413 break;
414 }
415 case DASM_LABEL_PC: case DASM_SETLABEL: break;
416 case DASM_SPACE: { int fill = *p++; while (n--) *cp++ = fill; break; }
417 case DASM_ALIGN:
418 n = *p++;
419 while (((cp-base) & n)) *cp++ = 0x90; /* nop */
420 break;
421 case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd;
422 case DASM_MARK: mark = cp; break;
423 case DASM_ESC: action = *p++;
424 default: *cp++ = action; break;
425 case DASM_SECTION: case DASM_STOP: goto stop;
426 }
427 }
428 stop: (void)0;
429 }
430 }
431
432 if (base + D->codesize != cp) /* Check for phase errors. */
433 return DASM_S_PHASE;
434 return DASM_S_OK;
435}
436
437/* Get PC label offset. */
438int dasm_getpclabel(Dst_DECL, unsigned int pc)
439{
440 dasm_State *D = Dst_REF;
441 if (pc*sizeof(int) < D->pcsize) {
442 int pos = D->pclabels[pc];
443 if (pos < 0) return *DASM_POS2PTR(D, -pos);
444 if (pos > 0) return -1; /* Undefined. */
445 }
446 return -2; /* Unused or out of range. */
447}
448
449#ifdef DASM_CHECKS
450/* Optional sanity checker to call between isolated encoding steps. */
451int dasm_checkstep(Dst_DECL, int secmatch)
452{
453 dasm_State *D = Dst_REF;
454 if (D->status == DASM_S_OK) {
455 int i;
456 for (i = 1; i <= 9; i++) {
457 if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_L|i; break; }
458 D->lglabels[i] = 0;
459 }
460 }
461 if (D->status == DASM_S_OK && secmatch >= 0 &&
462 D->section != &D->sections[secmatch])
463 D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
464 return D->status;
465}
466#endif
467
diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
new file mode 100644
index 00000000..82210806
--- /dev/null
+++ b/dynasm/dasm_x86.lua
@@ -0,0 +1,1799 @@
1------------------------------------------------------------------------------
2-- DynASM x86 module.
3--
4-- Copyright (C) 2005-2009 Mike Pall. All rights reserved.
5-- See dynasm.lua for full copyright notice.
6------------------------------------------------------------------------------
7
8-- Module information:
9local _info = {
10 arch = "x86",
11 description = "DynASM x86 (i386) module",
12 version = "1.2.1",
13 vernum = 10201,
14 release = "2009-04-16",
15 author = "Mike Pall",
16 license = "MIT",
17}
18
19-- Exported glue functions for the arch-specific module.
20local _M = { _info = _info }
21
22-- Cache library functions.
23local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
24local assert, unpack = assert, unpack
25local _s = string
26local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
27local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
28local concat, sort = table.concat, table.sort
29local char, unpack = string.char, unpack
30
31-- Inherited tables and callbacks.
32local g_opt, g_arch
33local wline, werror, wfatal, wwarn
34
35-- Action name list.
36-- CHECK: Keep this in sync with the C code!
37local action_names = {
38 -- int arg, 1 buffer pos:
39 "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
40 -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
41 "VREG", "SPACE",
42 -- ptrdiff_t arg, 1 buffer pos (address): !x64
43 "SETLABEL", "REL_A",
44 -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
45 "REL_LG", "REL_PC",
46 -- action arg (1 byte) or int arg, 1 buffer pos (link):
47 "IMM_LG", "IMM_PC",
48 -- action arg (1 byte) or int arg, 1 buffer pos (offset):
49 "LABEL_LG", "LABEL_PC",
50 -- action arg (1 byte), 1 buffer pos (offset):
51 "ALIGN",
52 -- action args (2 bytes), no buffer pos.
53 "EXTERN",
54 -- action arg (1 byte), no buffer pos.
55 "ESC",
56 -- no action arg, no buffer pos.
57 "MARK",
58 -- action arg (1 byte), no buffer pos, terminal action:
59 "SECTION",
60 -- no args, no buffer pos, terminal action:
61 "STOP"
62}
63
64-- Maximum number of section buffer positions for dasm_put().
65-- CHECK: Keep this in sync with the C code!
66local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
67
68-- Action name -> action number (dynamically generated below).
69local map_action = {}
70-- First action number. Everything below does not need to be escaped.
71local actfirst = 256-#action_names
72
73-- Action list buffer and string (only used to remove dupes).
74local actlist = {}
75local actstr = ""
76
77-- Argument list for next dasm_put(). Start with offset 0 into action list.
78local actargs = { 0 }
79
80-- Current number of section buffer positions for dasm_put().
81local secpos = 1
82
83------------------------------------------------------------------------------
84
85-- Compute action numbers for action names.
86for n,name in ipairs(action_names) do
87 local num = actfirst + n - 1
88 map_action[name] = num
89end
90
91-- Dump action names and numbers.
92local function dumpactions(out)
93 out:write("DynASM encoding engine action codes:\n")
94 for n,name in ipairs(action_names) do
95 local num = map_action[name]
96 out:write(format(" %-10s %02X %d\n", name, num, num))
97 end
98 out:write("\n")
99end
100
101-- Write action list buffer as a huge static C array.
102local function writeactions(out, name)
103 local nn = #actlist
104 local last = actlist[nn] or 255
105 actlist[nn] = nil -- Remove last byte.
106 if nn == 0 then nn = 1 end
107 out:write("static const unsigned char ", name, "[", nn, "] = {\n")
108 local s = " "
109 for n,b in ipairs(actlist) do
110 s = s..b..","
111 if #s >= 75 then
112 assert(out:write(s, "\n"))
113 s = " "
114 end
115 end
116 out:write(s, last, "\n};\n\n") -- Add last byte back.
117end
118
119------------------------------------------------------------------------------
120
121-- Add byte to action list.
122local function wputxb(n)
123 assert(n >= 0 and n <= 255 and n % 1 == 0, "byte out of range")
124 actlist[#actlist+1] = n
125end
126
127-- Add action to list with optional arg. Advance buffer pos, too.
128local function waction(action, a, num)
129 wputxb(assert(map_action[action], "bad action name `"..action.."'"))
130 if a then actargs[#actargs+1] = a end
131 if a or num then secpos = secpos + (num or 1) end
132end
133
134-- Add call to embedded DynASM C code.
135local function wcall(func, args)
136 wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
137end
138
139-- Delete duplicate action list chunks. A tad slow, but so what.
140local function dedupechunk(offset)
141 local al, as = actlist, actstr
142 local chunk = char(unpack(al, offset+1, #al))
143 local orig = find(as, chunk, 1, true)
144 if orig then
145 actargs[1] = orig-1 -- Replace with original offset.
146 for i=offset+1,#al do al[i] = nil end -- Kill dupe.
147 else
148 actstr = as..chunk
149 end
150end
151
152-- Flush action list (intervening C code or buffer pos overflow).
153local function wflush(term)
154 local offset = actargs[1]
155 if #actlist == offset then return end -- Nothing to flush.
156 if not term then waction("STOP") end -- Terminate action list.
157 dedupechunk(offset)
158 wcall("put", actargs) -- Add call to dasm_put().
159 actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
160 secpos = 1 -- The actionlist offset occupies a buffer position, too.
161end
162
163-- Put escaped byte.
164local function wputb(n)
165 if n >= actfirst then waction("ESC") end -- Need to escape byte.
166 wputxb(n)
167end
168
169------------------------------------------------------------------------------
170
171-- Global label name -> global label number. With auto assignment on 1st use.
172local next_global = 10
173local map_global = setmetatable({}, { __index = function(t, name)
174 if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
175 local n = next_global
176 if n > 246 then werror("too many global labels") end
177 next_global = n + 1
178 t[name] = n
179 return n
180end})
181
182-- Dump global labels.
183local function dumpglobals(out, lvl)
184 local t = {}
185 for name, n in pairs(map_global) do t[n] = name end
186 out:write("Global labels:\n")
187 for i=10,next_global-1 do
188 out:write(format(" %s\n", t[i]))
189 end
190 out:write("\n")
191end
192
193-- Write global label enum.
194local function writeglobals(out, prefix)
195 local t = {}
196 for name, n in pairs(map_global) do t[n] = name end
197 out:write("enum {\n")
198 for i=10,next_global-1 do
199 out:write(" ", prefix, t[i], ",\n")
200 end
201 out:write(" ", prefix, "_MAX\n};\n")
202end
203
204-- Write global label names.
205local function writeglobalnames(out, name)
206 local t = {}
207 for name, n in pairs(map_global) do t[n] = name end
208 out:write("static const char *const ", name, "[] = {\n")
209 for i=10,next_global-1 do
210 out:write(" \"", t[i], "\",\n")
211 end
212 out:write(" (const char *)0\n};\n")
213end
214
215------------------------------------------------------------------------------
216
217-- Extern label name -> extern label number. With auto assignment on 1st use.
218local next_extern = -1
219local map_extern = setmetatable({}, { __index = function(t, name)
220 -- No restrictions on the name for now.
221 local n = next_extern
222 if n < -256 then werror("too many extern labels") end
223 next_extern = n - 1
224 t[name] = n
225 return n
226end})
227
228-- Dump extern labels.
229local function dumpexterns(out, lvl)
230 local t = {}
231 for name, n in pairs(map_extern) do t[-n] = name end
232 out:write("Extern labels:\n")
233 for i=1,-next_extern-1 do
234 out:write(format(" %s\n", t[i]))
235 end
236 out:write("\n")
237end
238
239-- Write extern label names.
240local function writeexternnames(out, name)
241 local t = {}
242 for name, n in pairs(map_extern) do t[-n] = name end
243 out:write("static const char *const ", name, "[] = {\n")
244 for i=1,-next_extern-1 do
245 out:write(" \"", t[i], "\",\n")
246 end
247 out:write(" (const char *)0\n};\n")
248end
249
250------------------------------------------------------------------------------
251
252-- Arch-specific maps.
253local map_archdef = {} -- Ext. register name -> int. name.
254local map_reg_rev = {} -- Int. register name -> ext. name.
255local map_reg_num = {} -- Int. register name -> register number.
256local map_reg_opsize = {} -- Int. register name -> operand size.
257local map_reg_valid_base = {} -- Int. register name -> valid base register?
258local map_reg_valid_index = {} -- Int. register name -> valid index register?
259local reg_list = {} -- Canonical list of int. register names.
260
261local map_type = {} -- Type name -> { ctype, reg }
262local ctypenum = 0 -- Type number (for _PTx macros).
263
264local addrsize = "d" -- Size for address operands. !x64
265
266-- Helper function to fill register maps.
267local function mkrmap(sz, cl, names)
268 local cname = format("@%s", sz)
269 reg_list[#reg_list+1] = cname
270 map_archdef[cl] = cname
271 map_reg_rev[cname] = cl
272 map_reg_num[cname] = -1
273 map_reg_opsize[cname] = sz
274 if sz == addrsize then
275 map_reg_valid_base[cname] = true
276 map_reg_valid_index[cname] = true
277 end
278 for n,name in ipairs(names) do
279 local iname = format("@%s%x", sz, n-1)
280 reg_list[#reg_list+1] = iname
281 map_archdef[name] = iname
282 map_reg_rev[iname] = name
283 map_reg_num[iname] = n-1
284 map_reg_opsize[iname] = sz
285 if sz == addrsize then
286 map_reg_valid_base[iname] = true
287 map_reg_valid_index[iname] = true
288 end
289 end
290 reg_list[#reg_list+1] = ""
291end
292
293-- Integer registers (dword, word and byte sized).
294mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"})
295map_reg_valid_index[map_archdef.esp] = false
296mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
297mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
298map_archdef["Ra"] = "@"..addrsize
299
300-- FP registers (internally tword sized, but use "f" as operand size).
301mkrmap("f", "Rf", {"st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7"})
302
303-- SSE registers (oword sized, but qword and dword accessible).
304mkrmap("o", "xmm", {"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7"})
305
306-- Operand size prefixes to codes.
307local map_opsize = {
308 byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t",
309 aword = addrsize,
310}
311
312-- Operand size code to number.
313local map_opsizenum = {
314 b = 1, w = 2, d = 4, q = 8, o = 16, t = 10,
315}
316
317-- Operand size code to name.
318local map_opsizename = {
319 b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword",
320 f = "fpword",
321}
322
323-- Valid index register scale factors.
324local map_xsc = {
325 ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3,
326}
327
328-- Condition codes.
329local map_cc = {
330 o = 0, no = 1, b = 2, nb = 3, e = 4, ne = 5, be = 6, nbe = 7,
331 s = 8, ns = 9, p = 10, np = 11, l = 12, nl = 13, le = 14, nle = 15,
332 c = 2, nae = 2, nc = 3, ae = 3, z = 4, nz = 5, na = 6, a = 7,
333 pe = 10, po = 11, nge = 12, ge = 13, ng = 14, g = 15,
334}
335
336
337-- Reverse defines for registers.
338function _M.revdef(s)
339 return gsub(s, "@%w+", map_reg_rev)
340end
341
342-- Dump register names and numbers
343local function dumpregs(out)
344 out:write("Register names, sizes and internal numbers:\n")
345 for _,reg in ipairs(reg_list) do
346 if reg == "" then
347 out:write("\n")
348 else
349 local name = map_reg_rev[reg]
350 local num = map_reg_num[reg]
351 local opsize = map_opsizename[map_reg_opsize[reg]]
352 out:write(format(" %-5s %-8s %s\n", name, opsize,
353 num < 0 and "(variable)" or num))
354 end
355 end
356end
357
358------------------------------------------------------------------------------
359
360-- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC).
361local function wputlabel(aprefix, imm, num)
362 if type(imm) == "number" then
363 if imm < 0 then
364 waction("EXTERN")
365 wputxb(aprefix == "IMM_" and 0 or 1)
366 imm = -imm-1
367 else
368 waction(aprefix.."LG", nil, num);
369 end
370 wputxb(imm)
371 else
372 waction(aprefix.."PC", imm, num)
373 end
374end
375
376-- Put signed byte or arg.
377local function wputsbarg(n)
378 if type(n) == "number" then
379 if n < -128 or n > 127 then
380 werror("signed immediate byte out of range")
381 end
382 if n < 0 then n = n + 256 end
383 wputb(n)
384 else waction("IMM_S", n) end
385end
386
387-- Put unsigned byte or arg.
388local function wputbarg(n)
389 if type(n) == "number" then
390 if n < 0 or n > 255 then
391 werror("unsigned immediate byte out of range")
392 end
393 wputb(n)
394 else waction("IMM_B", n) end
395end
396
397-- Put unsigned word or arg.
398local function wputwarg(n)
399 if type(n) == "number" then
400 if n < 0 or n > 65535 then
401 werror("unsigned immediate word out of range")
402 end
403 local r = n%256; n = (n-r)/256; wputb(r); wputb(n);
404 else waction("IMM_W", n) end
405end
406
407-- Put signed or unsigned dword or arg.
408local function wputdarg(n)
409 local tn = type(n)
410 if tn == "number" then
411 if n < 0 then n = n + 4294967296 end
412 local r = n%256; n = (n-r)/256; wputb(r);
413 r = n%256; n = (n-r)/256; wputb(r);
414 r = n%256; n = (n-r)/256; wputb(r); wputb(n);
415 elseif tn == "table" then
416 wputlabel("IMM_", n[1], 1)
417 else
418 waction("IMM_D", n)
419 end
420end
421
422-- Put operand-size dependent number or arg (defaults to dword).
423local function wputszarg(sz, n)
424 if not sz or sz == "d" then wputdarg(n)
425 elseif sz == "w" then wputwarg(n)
426 elseif sz == "b" then wputbarg(n)
427 elseif sz == "s" then wputsbarg(n)
428 else werror("bad operand size") end
429end
430
431-- Put multi-byte opcode with operand-size dependent modifications.
432local function wputop(sz, op)
433 local r
434 if sz == "w" then wputb(102) end
435 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
436 if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
437 if op >= 16777216 then r = op % 16777216 wputb((op-r) / 16777216) op = r end
438 if op >= 65536 then r = op % 65536 wputb((op-r) / 65536) op = r end
439 if op >= 256 then r = op % 256 wputb((op-r) / 256) op = r end
440 if sz == "b" then op = op - 1 end
441 wputb(op)
442end
443
444-- Put ModRM or SIB formatted byte.
445local function wputmodrm(m, s, rm, vs, vrm)
446 assert(m < 4 and s < 8 and rm < 8, "bad modrm operands")
447 wputb(64*m + 8*s + rm)
448end
449
450-- Put ModRM/SIB plus optional displacement.
451local function wputmrmsib(t, imark, s, vsreg)
452 local vreg, vxreg
453 local reg, xreg = t.reg, t.xreg
454 if reg and reg < 0 then reg = 0; vreg = t.vreg end
455 if xreg and xreg < 0 then xreg = 0; vxreg = t.vxreg end
456 if s < 0 then s = 0 end
457
458 -- Register mode.
459 if sub(t.mode, 1, 1) == "r" then
460 wputmodrm(3, s, reg)
461 if vsreg then waction("VREG", vsreg); wputxb(2) end
462 if vreg then waction("VREG", vreg); wputxb(0) end
463 return
464 end
465
466 local disp = t.disp
467 local tdisp = type(disp)
468 -- No base register?
469 if not reg then
470 if xreg then
471 -- Indexed mode with index register only.
472 -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
473 wputmodrm(0, s, 4)
474 if imark then waction("MARK") end
475 if vsreg then waction("VREG", vsreg); wputxb(2) end
476 wputmodrm(t.xsc, xreg, 5)
477 if vxreg then waction("VREG", vxreg); wputxb(3) end
478 else
479 -- Pure displacement.
480 wputmodrm(0, s, 5) -- [disp] -> (0, s, ebp)
481 if imark then waction("MARK") end
482 if vsreg then waction("VREG", vsreg); wputxb(2) end
483 end
484 wputdarg(disp)
485 return
486 end
487
488 local m
489 if tdisp == "number" then -- Check displacement size at assembly time.
490 if disp == 0 and reg ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too)
491 if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0]
492 elseif disp >= -128 and disp <= 127 then m = 1
493 else m = 2 end
494 elseif tdisp == "table" then
495 m = 2
496 end
497
498 -- Index register present or esp as base register: need SIB encoding.
499 if xreg or reg == 4 then
500 wputmodrm(m or 2, s, 4) -- ModRM.
501 if m == nil or imark then waction("MARK") end
502 if vsreg then waction("VREG", vsreg); wputxb(2) end
503 wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
504 if vxreg then waction("VREG", vxreg); wputxb(3) end
505 if vreg then waction("VREG", vreg); wputxb(1) end
506 else
507 wputmodrm(m or 2, s, reg) -- ModRM.
508 if (imark and (m == 1 or m == 2)) or
509 (m == nil and (vsreg or vreg)) then waction("MARK") end
510 if vsreg then waction("VREG", vsreg); wputxb(2) end
511 if vreg then waction("VREG", vreg); wputxb(1) end
512 end
513
514 -- Put displacement.
515 if m == 1 then wputsbarg(disp)
516 elseif m == 2 then wputdarg(disp)
517 elseif m == nil then waction("DISP", disp) end
518end
519
520------------------------------------------------------------------------------
521
522-- Return human-readable operand mode string.
523local function opmodestr(op, args)
524 local m = {}
525 for i=1,#args do
526 local a = args[i]
527 m[#m+1] = sub(a.mode, 1, 1)..(a.opsize or "?")
528 end
529 return op.." "..concat(m, ",")
530end
531
532-- Convert number to valid integer or nil.
533local function toint(expr)
534 local n = tonumber(expr)
535 if n then
536 if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then
537 werror("bad integer number `"..expr.."'")
538 end
539 return n
540 end
541end
542
543-- Parse immediate expression.
544local function immexpr(expr)
545 -- &expr (pointer)
546 if sub(expr, 1, 1) == "&" then
547 return "iPJ", format("(ptrdiff_t)(%s)", sub(expr,2))
548 end
549
550 local prefix = sub(expr, 1, 2)
551 -- =>expr (pc label reference)
552 if prefix == "=>" then
553 return "iJ", sub(expr, 3)
554 end
555 -- ->name (global label reference)
556 if prefix == "->" then
557 return "iJ", map_global[sub(expr, 3)]
558 end
559
560 -- [<>][1-9] (local label reference)
561 local dir, lnum = match(expr, "^([<>])([1-9])$")
562 if dir then -- Fwd: 247-255, Bkwd: 1-9.
563 return "iJ", lnum + (dir == ">" and 246 or 0)
564 end
565
566 local extname = match(expr, "^extern%s+(%S+)$")
567 if extname then
568 return "iJ", map_extern[extname]
569 end
570
571 -- expr (interpreted as immediate)
572 return "iI", expr
573end
574
575-- Parse displacement expression: +-num, +-expr, +-opsize*num
576local function dispexpr(expr)
577 local disp = expr == "" and 0 or toint(expr)
578 if disp then return disp end
579 local c, dispt = match(expr, "^([+-])%s*(.+)$")
580 if c == "+" then
581 expr = dispt
582 elseif not c then
583 werror("bad displacement expression `"..expr.."'")
584 end
585 local opsize, tailops = match(dispt, "^(%w+)%s*%*%s*(.+)$")
586 local ops, imm = map_opsize[opsize], toint(tailops)
587 if ops and imm then
588 if c == "-" then imm = -imm end
589 return imm*map_opsizenum[ops]
590 end
591 local mode, iexpr = immexpr(dispt)
592 if mode == "iJ" then
593 if c == "-" then werror("cannot invert label reference") end
594 return { iexpr }
595 end
596 return expr -- Need to return original signed expression.
597end
598
599-- Parse register or type expression.
600local function rtexpr(expr)
601 if not expr then return end
602 local tname, ovreg = match(expr, "^([%w_]+):(@[%w_]+)$")
603 local tp = map_type[tname or expr]
604 if tp then
605 local reg = ovreg or tp.reg
606 local rnum = map_reg_num[reg]
607 if not rnum then
608 werror("type `"..(tname or expr).."' needs a register override")
609 end
610 if not map_reg_valid_base[reg] then
611 werror("bad base register override `"..(map_reg_rev[reg] or reg).."'")
612 end
613 return reg, rnum, tp
614 end
615 return expr, map_reg_num[expr]
616end
617
618-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
619local function parseoperand(param)
620 local t = {}
621
622 local expr = param
623 local opsize, tailops = match(param, "^(%w+)%s*(.+)$")
624 if opsize then
625 t.opsize = map_opsize[opsize]
626 if t.opsize then expr = tailops end
627 end
628
629 local br = match(expr, "^%[%s*(.-)%s*%]$")
630 repeat
631 if br then
632 t.mode = "xm"
633
634 -- [disp]
635 t.disp = toint(br)
636 if t.disp then
637 t.mode = "xmO"
638 break
639 end
640
641 -- [reg...]
642 local tp
643 local reg, tailr = match(br, "^([@%w_:]+)%s*(.*)$")
644 reg, t.reg, tp = rtexpr(reg)
645 if not t.reg then
646 -- [expr]
647 t.mode = "xmO"
648 t.disp = dispexpr("+"..br)
649 break
650 end
651
652 if t.reg == -1 then
653 t.vreg, tailr = match(tailr, "^(%b())(.*)$")
654 if not t.vreg then werror("bad variable register expression") end
655 end
656
657 -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr]
658 local xsc, tailsc = match(tailr, "^%*%s*([1248])%s*(.*)$")
659 if xsc then
660 if not map_reg_valid_index[reg] then
661 werror("bad index register `"..map_reg_rev[reg].."'")
662 end
663 t.xsc = map_xsc[xsc]
664 t.xreg = t.reg
665 t.vxreg = t.vreg
666 t.reg = nil
667 t.vreg = nil
668 t.disp = dispexpr(tailsc)
669 break
670 end
671 if not map_reg_valid_base[reg] then
672 werror("bad base register `"..map_reg_rev[reg].."'")
673 end
674
675 -- [reg] or [reg+-disp]
676 t.disp = toint(tailr) or (tailr == "" and 0)
677 if t.disp then break end
678
679 -- [reg+xreg...]
680 local xreg, tailx = match(tailr, "^+%s*([@%w_:]+)%s*(.*)$")
681 xreg, t.xreg, tp = rtexpr(xreg)
682 if not t.xreg then
683 -- [reg+-expr]
684 t.disp = dispexpr(tailr)
685 break
686 end
687 if not map_reg_valid_index[xreg] then
688 werror("bad index register `"..map_reg_rev[xreg].."'")
689 end
690
691 if t.xreg == -1 then
692 t.vxreg, tailx = match(tailx, "^(%b())(.*)$")
693 if not t.vxreg then werror("bad variable register expression") end
694 end
695
696 -- [reg+xreg*xsc...]
697 local xsc, tailsc = match(tailx, "^%*%s*([1248])%s*(.*)$")
698 if xsc then
699 t.xsc = map_xsc[xsc]
700 tailx = tailsc
701 end
702
703 -- [...] or [...+-disp] or [...+-expr]
704 t.disp = dispexpr(tailx)
705 else
706 -- imm or opsize*imm
707 local imm = toint(expr)
708 if not imm and sub(expr, 1, 1) == "*" and t.opsize then
709 imm = toint(sub(expr, 2))
710 if imm then
711 imm = imm * map_opsizenum[t.opsize]
712 t.opsize = nil
713 end
714 end
715 if imm then
716 if t.opsize then werror("bad operand size override") end
717 local m = "i"
718 if imm == 1 then m = m.."1" end
719 if imm >= 4294967168 and imm <= 4294967295 then imm = imm-4294967296 end
720 if imm >= -128 and imm <= 127 then m = m.."S" end
721 t.imm = imm
722 t.mode = m
723 break
724 end
725
726 local tp
727 local reg, tailr = match(expr, "^([@%w_:]+)%s*(.*)$")
728 reg, t.reg, tp = rtexpr(reg)
729 if t.reg then
730 if t.reg == -1 then
731 t.vreg, tailr = match(tailr, "^(%b())(.*)$")
732 if not t.vreg then werror("bad variable register expression") end
733 end
734 -- reg
735 if tailr == "" then
736 if t.opsize then werror("bad operand size override") end
737 t.opsize = map_reg_opsize[reg]
738 if t.opsize == "f" then
739 t.mode = t.reg == 0 and "fF" or "f"
740 else
741 if reg == "@w4" then wwarn("bad idea, try again with `esp'") end
742 t.mode = t.reg == 0 and "rmR" or (reg == "@b1" and "rmC" or "rm")
743 end
744 break
745 end
746
747 -- type[idx], type[idx].field, type->field -> [reg+offset_expr]
748 if not tp then werror("bad operand `"..param.."'") end
749 t.mode = "xm"
750 t.disp = format(tp.ctypefmt, tailr)
751 else
752 t.mode, t.imm = immexpr(expr)
753 if sub(t.mode, -1) == "J" then
754 if t.opsize and t.opsize ~= addrsize then
755 werror("bad operand size override")
756 end
757 t.opsize = addrsize
758 end
759 end
760 end
761 until true
762 return t
763end
764
765------------------------------------------------------------------------------
766-- x86 Template String Description
767-- ===============================
768--
769-- Each template string is a list of [match:]pattern pairs,
770-- separated by "|". The first match wins. No match means a
771-- bad or unsupported combination of operand modes or sizes.
772--
773-- The match part and the ":" is omitted if the operation has
774-- no operands. Otherwise the first N characters are matched
775-- against the mode strings of each of the N operands.
776--
777-- The mode string for each operand type is (see parseoperand()):
778-- Integer register: "rm", +"R" for eax, ax, al, +"C" for cl
779-- FP register: "f", +"F" for st0
780-- Index operand: "xm", +"O" for [disp] (pure offset)
781-- Immediate: "i", +"S" for signed 8 bit, +"1" for 1,
782-- +"I" for arg, +"P" for pointer
783-- Any: +"J" for valid jump targets
784--
785-- So a match character "m" (mixed) matches both an integer register
786-- and an index operand (to be encoded with the ModRM/SIB scheme).
787-- But "r" matches only a register and "x" only an index operand
788-- (e.g. for FP memory access operations).
789--
790-- The operand size match string starts right after the mode match
791-- characters and ends before the ":". "dwb" is assumed, if empty.
792-- The effective data size of the operation is matched against this list.
793--
794-- If only the regular "b", "w", "d", "q", "t" operand sizes are
795-- present, then all operands must be the same size. Unspecified sizes
796-- are ignored, but at least one operand must have a size or the pattern
797-- won't match (use the "byte", "word", "dword", "qword", "tword"
798-- operand size overrides. E.g.: mov dword [eax], 1).
799--
800-- If the list has a "1" or "2" prefix, the operand size is taken
801-- from the respective operand and any other operand sizes are ignored.
802-- If the list contains only ".", all operand sizes are ignored.
803-- If the list has a "/" prefix, the concatenated (mixed) operand sizes
804-- are compared to the match.
805--
806-- E.g. "rrdw" matches for either two dword registers or two word
807-- registers. "Fx2dq" matches an st0 operand plus an index operand
808-- pointing to a dword (float) or qword (double).
809--
810-- Every character after the ":" is part of the pattern string:
811-- Hex chars are accumulated to form the opcode (left to right).
812-- "n" disables the standard opcode mods
813-- (otherwise: -1 for "b", o16 prefix for "w")
814-- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode.
815-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
816-- The spare 3 bits are either filled with the last hex digit or
817-- the result from a previous "r"/"R". The opcode is restored.
818--
819-- All of the following characters force a flush of the opcode:
820-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand.
821-- "S" stores a signed 8 bit immediate from the last operand.
822-- "U" stores an unsigned 8 bit immediate from the last operand.
823-- "W" stores an unsigned 16 bit immediate from the last operand.
824-- "i" stores an operand sized immediate from the last operand.
825-- "I" dito, but generates an action code to optionally modify
826-- the opcode (+2) for a signed 8 bit immediate.
827-- "J" generates one of the REL action codes from the last operand.
828--
829------------------------------------------------------------------------------
830
831-- Template strings for x86 instructions. Ordered by first opcode byte.
832-- Unimplemented opcodes (deliberate omissions) are marked with *.
833local map_op = {
834 -- 00-05: add...
835 -- 06: *push es
836 -- 07: *pop es
837 -- 08-0D: or...
838 -- 0E: *push cs
839 -- 0F: two byte opcode prefix
840 -- 10-15: adc...
841 -- 16: *push ss
842 -- 17: *pop ss
843 -- 18-1D: sbb...
844 -- 1E: *push ds
845 -- 1F: *pop ds
846 -- 20-25: and...
847 es_0 = "26",
848 -- 27: *daa
849 -- 28-2D: sub...
850 cs_0 = "2E",
851 -- 2F: *das
852 -- 30-35: xor...
853 ss_0 = "36",
854 -- 37: *aaa
855 -- 38-3D: cmp...
856 ds_0 = "3E",
857 -- 3F: *aas
858 inc_1 = "rdw:40r|m:FF0m",
859 dec_1 = "rdw:48r|m:FF1m",
860 push_1 = "rdw:50r|mdw:FF6m|S.:6AS|ib:n6Ai|i.:68i",
861 pop_1 = "rdw:58r|mdw:8F0m",
862 -- 60: *pusha, *pushad, *pushaw
863 -- 61: *popa, *popad, *popaw
864 -- 62: *bound rdw,x
865 -- 63: *arpl mw,rw
866 fs_0 = "64",
867 gs_0 = "65",
868 o16_0 = "66",
869 a16_0 = "67",
870 -- 68: push idw
871 -- 69: imul rdw,mdw,idw
872 -- 6A: push ib
873 -- 6B: imul rdw,mdw,S
874 -- 6C: *insb
875 -- 6D: *insd, *insw
876 -- 6E: *outsb
877 -- 6F: *outsd, *outsw
878 -- 70-7F: jcc lb
879 -- 80: add... mb,i
880 -- 81: add... mdw,i
881 -- 82: *undefined
882 -- 83: add... mdw,S
883 test_2 = "mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi",
884 -- 86: xchg rb,mb
885 -- 87: xchg rdw,mdw
886 -- 88: mov mb,r
887 -- 89: mov mdw,r
888 -- 8A: mov r,mb
889 -- 8B: mov r,mdw
890 -- 8C: *mov mdw,seg
891 lea_2 = "rxd:8DrM",
892 -- 8E: *mov seg,mdw
893 -- 8F: pop mdw
894 nop_0 = "90",
895 xchg_2 = "Rrdw:90R|rRdw:90r|rm:87rM|mr:87Rm",
896 cbw_0 = "6698",
897 cwde_0 = "98",
898 cwd_0 = "6699",
899 cdq_0 = "99",
900 -- 9A: *call iw:idw
901 wait_0 = "9B",
902 fwait_0 = "9B",
903 pushf_0 = "9C",
904 pushfw_0 = "669C",
905 pushfd_0 = "9C",
906 popf_0 = "9D",
907 popfw_0 = "669D",
908 popfd_0 = "9D",
909 sahf_0 = "9E",
910 lahf_0 = "9F",
911 mov_2 = "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi",
912 movsb_0 = "A4",
913 movsw_0 = "66A5",
914 movsd_0 = "A5",
915 cmpsb_0 = "A6",
916 cmpsw_0 = "66A7",
917 cmpsd_0 = "A7",
918 -- A8: test Rb,i
919 -- A9: test Rdw,i
920 stosb_0 = "AA",
921 stosw_0 = "66AB",
922 stosd_0 = "AB",
923 lodsb_0 = "AC",
924 lodsw_0 = "66AD",
925 lodsd_0 = "AD",
926 scasb_0 = "AE",
927 scasw_0 = "66AF",
928 scasd_0 = "AF",
929 -- B0-B7: mov rb,i
930 -- B8-BF: mov rdw,i
931 -- C0: rol... mb,i
932 -- C1: rol... mdw,i
933 ret_1 = "i.:nC2W",
934 ret_0 = "C3",
935 -- C4: *les rdw,mq
936 -- C5: *lds rdw,mq
937 -- C6: mov mb,i
938 -- C7: mov mdw,i
939 -- C8: *enter iw,ib
940 leave_0 = "C9",
941 -- CA: *retf iw
942 -- CB: *retf
943 int3_0 = "CC",
944 int_1 = "i.:nCDU",
945 into_0 = "CE",
946 -- CF: *iret
947 -- D0: rol... mb,1
948 -- D1: rol... mdw,1
949 -- D2: rol... mb,cl
950 -- D3: rol... mb,cl
951 -- D4: *aam ib
952 -- D5: *aad ib
953 -- D6: *salc
954 -- D7: *xlat
955 -- D8-DF: floating point ops
956 -- E0: *loopne
957 -- E1: *loope
958 -- E2: *loop
959 -- E3: *jcxz, *jecxz
960 -- E4: *in Rb,ib
961 -- E5: *in Rdw,ib
962 -- E6: *out ib,Rb
963 -- E7: *out ib,Rdw
964 call_1 = "md:FF2m|J.:E8J",
965 jmp_1 = "md:FF4m|J.:E9J", -- short: EB
966 -- EA: *jmp iw:idw
967 -- EB: jmp ib
968 -- EC: *in Rb,dx
969 -- ED: *in Rdw,dx
970 -- EE: *out dx,Rb
971 -- EF: *out dx,Rdw
972 -- F0: *lock
973 int1_0 = "F1",
974 repne_0 = "F2",
975 repnz_0 = "F2",
976 rep_0 = "F3",
977 repe_0 = "F3",
978 repz_0 = "F3",
979 -- F4: *hlt
980 cmc_0 = "F5",
981 -- F6: test... mb,i; div... mb
982 -- F7: test... mdw,i; div... mdw
983 clc_0 = "F8",
984 stc_0 = "F9",
985 -- FA: *cli
986 cld_0 = "FC",
987 std_0 = "FD",
988 -- FE: inc... mb
989 -- FF: inc... mdw
990
991 -- misc ops
992 not_1 = "m:F72m",
993 neg_1 = "m:F73m",
994 mul_1 = "m:F74m",
995 imul_1 = "m:F75m",
996 div_1 = "m:F76m",
997 idiv_1 = "m:F77m",
998
999 imul_2 = "rmdw:0FAFrM|rIdw:69rmI|rSdw:6BrmS|ridw:69rmi",
1000 imul_3 = "rmIdw:69rMI|rmSdw:6BrMS|rmidw:69rMi",
1001
1002 movzx_2 = "rm/db:0FB6rM|rm/wb:0FB6rM|rm/dw:0FB7rM",
1003 movsx_2 = "rm/db:0FBErM|rm/wb:0FBErM|rm/dw:0FBFrM",
1004
1005 bswap_1 = "rd:0FC8r",
1006 bsf_2 = "rmdw:0FBCrM",
1007 bsr_2 = "rmdw:0FBDrM",
1008 bt_2 = "mrdw:0FA3Rm|midw:0FBA4mU",
1009 btc_2 = "mrdw:0FBBRm|midw:0FBA7mU",
1010 btr_2 = "mrdw:0FB3Rm|midw:0FBA6mU",
1011 bts_2 = "mrdw:0FABRm|midw:0FBA5mU",
1012
1013 rdtsc_0 = "0F31", -- P1+
1014 cpuid_0 = "0FA2", -- P1+
1015
1016 -- floating point ops
1017 fst_1 = "ff:DDD0r|xd:D92m|xq:DD2m",
1018 fstp_1 = "ff:DDD8r|xd:D93m|xq:DD3m|xt:DB7m",
1019 fld_1 = "ff:D9C0r|xd:D90m|xq:DD0m|xt:DB5m",
1020
1021 fpop_0 = "DDD8", -- Alias for fstp st0.
1022
1023 fist_1 = "xw:nDF2m|xd:DB2m",
1024 fistp_1 = "xw:nDF3m|xd:DB3m|xq:DF7m",
1025 fild_1 = "xw:nDF0m|xd:DB0m|xq:DF5m",
1026
1027 fxch_0 = "D9C9",
1028 fxch_1 = "ff:D9C8r",
1029 fxch_2 = "fFf:D9C8r|Fff:D9C8R",
1030
1031 fucom_1 = "ff:DDE0r",
1032 fucom_2 = "Fff:DDE0R",
1033 fucomp_1 = "ff:DDE8r",
1034 fucomp_2 = "Fff:DDE8R",
1035 fucomi_1 = "ff:DBE8r", -- P6+
1036 fucomi_2 = "Fff:DBE8R", -- P6+
1037 fucomip_1 = "ff:DFE8r", -- P6+
1038 fucomip_2 = "Fff:DFE8R", -- P6+
1039 fcomi_1 = "ff:DBF0r", -- P6+
1040 fcomi_2 = "Fff:DBF0R", -- P6+
1041 fcomip_1 = "ff:DFF0r", -- P6+
1042 fcomip_2 = "Fff:DFF0R", -- P6+
1043 fucompp_0 = "DAE9",
1044 fcompp_0 = "DED9",
1045
1046 fldcw_1 = "xw:nD95m",
1047 fstcw_1 = "xw:n9BD97m",
1048 fnstcw_1 = "xw:nD97m",
1049 fstsw_1 = "Rw:n9BDFE0|xw:n9BDD7m",
1050 fnstsw_1 = "Rw:nDFE0|xw:nDD7m",
1051 fclex_0 = "9BDBE2",
1052 fnclex_0 = "DBE2",
1053
1054 fnop_0 = "D9D0",
1055 -- D9D1-D9DF: unassigned
1056
1057 fchs_0 = "D9E0",
1058 fabs_0 = "D9E1",
1059 -- D9E2: unassigned
1060 -- D9E3: unassigned
1061 ftst_0 = "D9E4",
1062 fxam_0 = "D9E5",
1063 -- D9E6: unassigned
1064 -- D9E7: unassigned
1065 fld1_0 = "D9E8",
1066 fldl2t_0 = "D9E9",
1067 fldl2e_0 = "D9EA",
1068 fldpi_0 = "D9EB",
1069 fldlg2_0 = "D9EC",
1070 fldln2_0 = "D9ED",
1071 fldz_0 = "D9EE",
1072 -- D9EF: unassigned
1073
1074 f2xm1_0 = "D9F0",
1075 fyl2x_0 = "D9F1",
1076 fptan_0 = "D9F2",
1077 fpatan_0 = "D9F3",
1078 fxtract_0 = "D9F4",
1079 fprem1_0 = "D9F5",
1080 fdecstp_0 = "D9F6",
1081 fincstp_0 = "D9F7",
1082 fprem_0 = "D9F8",
1083 fyl2xp1_0 = "D9F9",
1084 fsqrt_0 = "D9FA",
1085 fsincos_0 = "D9FB",
1086 frndint_0 = "D9FC",
1087 fscale_0 = "D9FD",
1088 fsin_0 = "D9FE",
1089 fcos_0 = "D9FF",
1090
1091 -- SSE, SSE2
1092 andnpd_2 = "rmo:660F55rM",
1093 andnps_2 = "rmo:0F55rM",
1094 andpd_2 = "rmo:660F54rM",
1095 andps_2 = "rmo:0F54rM",
1096 clflush_1 = "x.:0FAE7m",
1097 cmppd_3 = "rmio:660FC2rMU",
1098 cmpps_3 = "rmio:0FC2rMU",
1099 cmpsd_3 = "rmio:F20FC2rMU",
1100 cmpss_3 = "rmio:F30FC2rMU",
1101 comisd_2 = "rmo:660F2FrM",
1102 comiss_2 = "rmo:0F2FrM",
1103 cvtdq2pd_2 = "rro:F30FE6rM|rx/oq:",
1104 cvtdq2ps_2 = "rmo:0F5BrM",
1105 cvtpd2dq_2 = "rmo:F20FE6rM",
1106 cvtpd2ps_2 = "rmo:660F5ArM",
1107 cvtpi2pd_2 = "rx/oq:660F2ArM",
1108 cvtpi2ps_2 = "rx/oq:0F2ArM",
1109 cvtps2dq_2 = "rmo:660F5BrM",
1110 cvtps2pd_2 = "rro:0F5ArM|rx/oq:",
1111 cvtsd2si_2 = "rr/do:F20F2DrM|rx/dq:",
1112 cvtsd2ss_2 = "rro:F20F5ArM|rx/oq:",
1113 cvtsi2sd_2 = "rm/od:F20F2ArM",
1114 cvtsi2ss_2 = "rm/od:F30F2ArM",
1115 cvtss2sd_2 = "rro:F30F5ArM|rx/od:",
1116 cvtss2si_2 = "rr/do:F20F2CrM|rx/dd:",
1117 cvttpd2dq_2 = "rmo:660FE6rM",
1118 cvttps2dq_2 = "rmo:F30F5BrM",
1119 cvttsd2si_2 = "rr/do:F20F2CrM|rx/dq:",
1120 cvttss2si_2 = "rr/do:F30F2CrM|rx/dd:",
1121 ldmxcsr_1 = "xd:0FAE2m",
1122 lfence_0 = "0FAEE8",
1123 maskmovdqu_2 = "rro:660FF7rM",
1124 mfence_0 = "0FAEF0",
1125 movapd_2 = "rmo:660F28rM|mro:660F29Rm",
1126 movaps_2 = "rmo:0F28rM|mro:0F29Rm",
1127 movd_2 = "rm/od:660F6ErM|mr/do:660F7ERm",
1128 movdqa_2 = "rmo:660F6FrM|mro:660F7FRm",
1129 movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm",
1130 movhlps_2 = "rro:0F12rM",
1131 movhpd_2 = "rx/oq:660F16rM|xr/qo:660F17Rm",
1132 movhps_2 = "rx/oq:0F16rM|xr/qo:0F17Rm",
1133 movlhps_2 = "rro:0F16rM",
1134 movlpd_2 = "rx/oq:660F12rM|xr/qo:660F13Rm",
1135 movlps_2 = "rx/oq:0F12rM|xr/qo:0F13Rm",
1136 movmskpd_2 = "rr/do:660F50rM",
1137 movmskps_2 = "rr/do:0F50rM",
1138 movntdq_2 = "xro:660FE7Rm",
1139 movnti_2 = "xrd:0FC3Rm",
1140 movntpd_2 = "xro:660F2BRm",
1141 movntps_2 = "xro:0F2BRm",
1142 movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:660FD6Rm",
1143 movsd_2 = "rro:F20F10rM|rx/oq:|xr/qo:F20F11Rm",
1144 movss_2 = "rro:F30F10rM|rx/od:|xr/do:F30F11Rm",
1145 movupd_2 = "rmo:660F10rM|mro:660F11Rm",
1146 movups_2 = "rmo:0F10rM|mro:0F11Rm",
1147 orpd_2 = "rmo:660F56rM",
1148 orps_2 = "rmo:0F56rM",
1149 packssdw_2 = "rmo:660F6BrM",
1150 packsswb_2 = "rmo:660F63rM",
1151 packuswb_2 = "rmo:660F67rM",
1152 paddb_2 = "rmo:660FFCrM",
1153 paddd_2 = "rmo:660FFErM",
1154 paddq_2 = "rmo:660FD4rM",
1155 paddsb_2 = "rmo:660FECrM",
1156 paddsw_2 = "rmo:660FEDrM",
1157 paddusb_2 = "rmo:660FDCrM",
1158 paddusw_2 = "rmo:660FDDrM",
1159 paddw_2 = "rmo:660FFDrM",
1160 pand_2 = "rmo:660FDBrM",
1161 pandn_2 = "rmo:660FDFrM",
1162 pause_0 = "F390",
1163 pavgb_2 = "rmo:660FE0rM",
1164 pavgw_2 = "rmo:660FE3rM",
1165 pcmpeqb_2 = "rmo:660F74rM",
1166 pcmpeqd_2 = "rmo:660F76rM",
1167 pcmpeqw_2 = "rmo:660F75rM",
1168 pcmpgtb_2 = "rmo:660F64rM",
1169 pcmpgtd_2 = "rmo:660F66rM",
1170 pcmpgtw_2 = "rmo:660F65rM",
1171 pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nrMU", -- Mem op: SSE4.1 only.
1172 pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:",
1173 pmaddwd_2 = "rmo:660FF5rM",
1174 pmaxsw_2 = "rmo:660FEErM",
1175 pmaxub_2 = "rmo:660FDErM",
1176 pminsw_2 = "rmo:660FEArM",
1177 pminub_2 = "rmo:660FDArM",
1178 pmovmskb_2 = "rr/do:660FD7rM",
1179 pmulhuw_2 = "rmo:660FE4rM",
1180 pmulhw_2 = "rmo:660FE5rM",
1181 pmullw_2 = "rmo:660FD5rM",
1182 pmuludq_2 = "rmo:660FF4rM",
1183 por_2 = "rmo:660FEBrM",
1184 prefetchnta_1 = "xb:n0F180m",
1185 prefetcht0_1 = "xb:n0F181m",
1186 prefetcht1_1 = "xb:n0F182m",
1187 prefetcht2_1 = "xb:n0F183m",
1188 psadbw_2 = "rmo:660FF6rM",
1189 pshufd_3 = "rmio:660F70rMU",
1190 pshufhw_3 = "rmio:F30F70rMU",
1191 pshuflw_3 = "rmio:F20F70rMU",
1192 pslld_2 = "rmo:660FF2rM|rio:660F726mU",
1193 pslldq_2 = "rio:660F737mU",
1194 psllq_2 = "rmo:660FF3rM|rio:660F736mU",
1195 psllw_2 = "rmo:660FF1rM|rio:660F716mU",
1196 psrad_2 = "rmo:660FE2rM|rio:660F724mU",
1197 psraw_2 = "rmo:660FE1rM|rio:660F714mU",
1198 psrld_2 = "rmo:660FD2rM|rio:660F722mU",
1199 psrldq_2 = "rio:660F733mU",
1200 psrlq_2 = "rmo:660FD3rM|rio:660F732mU",
1201 psrlw_2 = "rmo:660FD1rM|rio:660F712mU",
1202 psubb_2 = "rmo:660FF8rM",
1203 psubd_2 = "rmo:660FFArM",
1204 psubq_2 = "rmo:660FFBrM",
1205 psubsb_2 = "rmo:660FE8rM",
1206 psubsw_2 = "rmo:660FE9rM",
1207 psubusb_2 = "rmo:660FD8rM",
1208 psubusw_2 = "rmo:660FD9rM",
1209 psubw_2 = "rmo:660FF9rM",
1210 punpckhbw_2 = "rmo:660F68rM",
1211 punpckhdq_2 = "rmo:660F6ArM",
1212 punpckhqdq_2 = "rmo:660F6DrM",
1213 punpckhwd_2 = "rmo:660F69rM",
1214 punpcklbw_2 = "rmo:660F60rM",
1215 punpckldq_2 = "rmo:660F62rM",
1216 punpcklqdq_2 = "rmo:660F6CrM",
1217 punpcklwd_2 = "rmo:660F61rM",
1218 pxor_2 = "rmo:660FEFrM",
1219 rcpps_2 = "rmo:0F53rM",
1220 rcpss_2 = "rmo:F30F53rM",
1221 rsqrtps_2 = "rmo:0F52rM",
1222 rsqrtss_2 = "rmo:F30F52rM",
1223 sfence_0 = "0FAEF8",
1224 shufpd_3 = "rmio:660FC6rMU",
1225 shufps_3 = "rmio:0FC6rMU",
1226 stmxcsr_1 = "xd:0FAE3m",
1227 ucomisd_2 = "rmo:660F2ErM",
1228 ucomiss_2 = "rmo:0F2ErM",
1229 unpckhpd_2 = "rmo:660F15rM",
1230 unpckhps_2 = "rmo:0F15rM",
1231 unpcklpd_2 = "rmo:660F14rM",
1232 unpcklps_2 = "rmo:0F14rM",
1233 xorpd_2 = "rmo:660F57rM",
1234 xorps_2 = "rmo:0F57rM",
1235
1236 -- SSE3 ops
1237 fisttp_1 = "xw:nDF1m|xd:DB1m|xq:DD1m",
1238 addsubpd_2 = "rmo:660FD0rM",
1239 addsubps_2 = "rmo:F20FD0rM",
1240 haddpd_2 = "rmo:660F7CrM",
1241 haddps_2 = "rmo:F20F7CrM",
1242 hsubpd_2 = "rmo:660F7DrM",
1243 hsubps_2 = "rmo:F20F7DrM",
1244 lddqu_2 = "rxo:F20FF0rM",
1245 movddup_2 = "rmo:F20F12rM",
1246 movshdup_2 = "rmo:F30F16rM",
1247 movsldup_2 = "rmo:F30F12rM",
1248
1249 -- SSSE3 ops
1250 pabsb_2 = "rmo:660F381CrM",
1251 pabsd_2 = "rmo:660F381ErM",
1252 pabsw_2 = "rmo:660F381DrM",
1253 palignr_3 = "rmio:660F3A0FrMU",
1254 phaddd_2 = "rmo:660F3802rM",
1255 phaddsw_2 = "rmo:660F3803rM",
1256 phaddw_2 = "rmo:660F3801rM",
1257 phsubd_2 = "rmo:660F3806rM",
1258 phsubsw_2 = "rmo:660F3807rM",
1259 phsubw_2 = "rmo:660F3805rM",
1260 pmaddubsw_2 = "rmo:660F3804rM",
1261 pmulhrsw_2 = "rmo:660F380BrM",
1262 pshufb_2 = "rmo:660F3800rM",
1263 psignb_2 = "rmo:660F3808rM",
1264 psignd_2 = "rmo:660F380ArM",
1265 psignw_2 = "rmo:660F3809rM",
1266
1267 -- SSE4.1 ops
1268 blendpd_3 = "rmio:660F3A0DrMU",
1269 blendps_3 = "rmio:660F3A0CrMU",
1270 blendvpd_3 = "rmRo:660F3815rM",
1271 blendvps_3 = "rmRo:660F3814rM",
1272 dppd_3 = "rmio:660F3A41rMU",
1273 dpps_3 = "rmio:660F3A40rMU",
1274 extractps_3 = "mri/do:660F3A17RmU",
1275 insertps_3 = "rrio:660F3A41rMU|rxi/od:",
1276 movntdqa_2 = "rmo:660F382ArM",
1277 mpsadbw_3 = "rmio:660F3A42rMU",
1278 packusdw_2 = "rmo:660F382BrM",
1279 pblendvb_3 = "rmRo:660F3810rM",
1280 pblendw_3 = "rmio:660F3A0ErMU",
1281 pcmpeqq_2 = "rmo:660F3829rM",
1282 pextrb_3 = "rri/do:660F3A14nRmU|xri/bo:",
1283 pextrd_3 = "mri/do:660F3A16RmU",
1284 -- x64: pextrq
1285 -- pextrw is SSE2, mem operand is SSE4.1 only
1286 phminposuw_2 = "rmo:660F3841rM",
1287 pinsrb_3 = "rri/od:660F3A20nrMU|rxi/ob:",
1288 pinsrd_3 = "rmi/od:660F3A22rMU",
1289 -- x64: pinsrq
1290 pmaxsb_2 = "rmo:660F383CrM",
1291 pmaxsd_2 = "rmo:660F383DrM",
1292 pmaxud_2 = "rmo:660F383FrM",
1293 pmaxuw_2 = "rmo:660F383ErM",
1294 pminsb_2 = "rmo:660F3838rM",
1295 pminsd_2 = "rmo:660F3839rM",
1296 pminud_2 = "rmo:660F383BrM",
1297 pminuw_2 = "rmo:660F383ArM",
1298 pmovsxbd_2 = "rro:660F3821rM|rx/od:",
1299 pmovsxbq_2 = "rro:660F3822rM|rx/ow:",
1300 pmovsxbw_2 = "rro:660F3820rM|rx/oq:",
1301 pmovsxdq_2 = "rro:660F3825rM|rx/oq:",
1302 pmovsxwd_2 = "rro:660F3823rM|rx/oq:",
1303 pmovsxwq_2 = "rro:660F3824rM|rx/od:",
1304 pmovzxbd_2 = "rro:660F3831rM|rx/od:",
1305 pmovzxbq_2 = "rro:660F3832rM|rx/ow:",
1306 pmovzxbw_2 = "rro:660F3830rM|rx/oq:",
1307 pmovzxdq_2 = "rro:660F3835rM|rx/oq:",
1308 pmovzxwd_2 = "rro:660F3833rM|rx/oq:",
1309 pmovzxwq_2 = "rro:660F3834rM|rx/od:",
1310 pmuldq_2 = "rmo:660F3828rM",
1311 pmulld_2 = "rmo:660F3840rM",
1312 ptest_2 = "rmo:660F3817rM",
1313 roundpd_3 = "rmio:660F3A09rMU",
1314 roundps_3 = "rmio:660F3A08rMU",
1315 roundsd_3 = "rrio:660F3A0BrMU|rxi/oq:",
1316 roundss_3 = "rrio:660F3A0ArMU|rxi/od:",
1317
1318 -- SSE4.2 ops
1319 crc32_2 = "rmd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0nrM",
1320 pcmpestri_3 = "rmio:660F3A61rMU",
1321 pcmpestrm_3 = "rmio:660F3A60rMU",
1322 pcmpgtq_2 = "rmo:660F3837rM",
1323 pcmpistri_3 = "rmio:660F3A63rMU",
1324 pcmpistrm_3 = "rmio:660F3A62rMU",
1325 popcnt_2 = "rmdw:F30FB8rM",
1326
1327 -- SSE4a
1328 extrq_2 = "rro:660F79rM",
1329 extrq_3 = "riio:660F780mUU",
1330 insertq_2 = "rro:F20F79rM",
1331 insertq_4 = "rriio:F20F78rMUU",
1332 lzcnt_2 = "rmdw:F30FBDrM",
1333 movntsd_2 = "xr/qo:F20F2BRm",
1334 movntss_2 = "xr/do:F30F2BRm",
1335 -- popcnt is also in SSE4.2
1336}
1337
1338------------------------------------------------------------------------------
1339
1340-- Arithmetic ops.
1341for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3,
1342 ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do
1343 local n8 = n * 8
1344 map_op[name.."_2"] = format(
1345 "mr:%02XRm|rm:%02XrM|mI1dw:81%XmI|mS1dw:83%XmS|Ri1dwb:%02Xri|mi1dwb:81%Xmi",
1346 1+n8, 3+n8, n, n, 5+n8, n)
1347end
1348
1349-- Shift ops.
1350for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3,
1351 shl = 4, shr = 5, sar = 7, sal = 4 } do
1352 map_op[name.."_2"] = format("m1:D1%Xm|mC1dwb:D3%Xm|mi:C1%XmU", n, n, n)
1353end
1354
1355-- Conditional ops.
1356for cc,n in pairs(map_cc) do
1357 map_op["j"..cc.."_1"] = format("J.:0F8%XJ", n) -- short: 7%X
1358 map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n)
1359 map_op["cmov"..cc.."_2"] = format("rmdw:0F4%XrM", n) -- P6+
1360end
1361
1362-- FP arithmetic ops.
1363for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3,
1364 sub = 4, subr = 5, div = 6, divr = 7 } do
1365 local nc = 192 + n * 8
1366 local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8))
1367 local fn = "f"..name
1368 map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:DC%Xm", nc, n, n)
1369 if n == 2 or n == 3 then
1370 map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:DC%XM", nc, n, n)
1371 else
1372 map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:DC%XM", nc, nr, n, n)
1373 map_op[fn.."p_1"] = format("ff:DE%02Xr", nr)
1374 map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr)
1375 end
1376 map_op["fi"..name.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n, n)
1377end
1378
1379-- FP conditional moves.
1380for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
1381 local n4 = n % 4
1382 local nc = 56000 + n4 * 8 + (n-n4) * 64
1383 map_op["fcmov"..cc.."_1"] = format("ff:%04Xr", nc) -- P6+
1384 map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
1385end
1386
1387-- SSE FP arithmetic ops.
1388for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
1389 sub = 12, min = 13, div = 14, max = 15 } do
1390 map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
1391 map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
1392 map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
1393 map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
1394end
1395
1396------------------------------------------------------------------------------
1397
1398-- Process pattern string.
1399local function dopattern(pat, args, sz, op)
1400 local digit, addin
1401 local opcode = 0
1402 local szov = sz
1403 local narg = 1
1404
1405 -- Limit number of section buffer positions used by a single dasm_put().
1406 -- A single opcode needs a maximum of 2 positions. !x64
1407 if secpos+2 > maxsecpos then wflush() end
1408
1409 -- Process each character.
1410 for c in gmatch(pat.."|", ".") do
1411 if match(c, "%x") then -- Hex digit.
1412 digit = byte(c) - 48
1413 if digit > 48 then digit = digit - 39
1414 elseif digit > 16 then digit = digit - 7 end
1415 opcode = opcode*16 + digit
1416 addin = nil
1417 elseif c == "n" then -- Disable operand size mods for opcode.
1418 szov = nil
1419 elseif c == "r" then -- Merge 1st operand regno. into opcode.
1420 addin = args[1]; opcode = opcode + addin.reg
1421 if narg < 2 then narg = 2 end
1422 elseif c == "R" then -- Merge 2nd operand regno. into opcode.
1423 addin = args[2]; opcode = opcode + addin.reg
1424 narg = 3
1425 elseif c == "m" or c == "M" then -- Encode ModRM/SIB.
1426 local s
1427 if addin then
1428 s = addin.reg
1429 opcode = opcode - s -- Undo regno opcode merge.
1430 else
1431 s = opcode % 16 -- Undo last digit.
1432 opcode = (opcode - s) / 16
1433 end
1434 wputop(szov, opcode); opcode = nil
1435 local imark = (sub(pat, -1) == "I") -- Force a mark (ugly).
1436 -- Put ModRM/SIB with regno/last digit as spare.
1437 local nn = c == "m" and 1 or 2
1438 wputmrmsib(args[nn], imark, s, addin and addin.vreg)
1439 if narg <= nn then narg = nn + 1 end
1440 addin = nil
1441 else
1442 if opcode then -- Flush opcode.
1443 if addin and addin.reg == -1 then
1444 wputop(szov, opcode + 1)
1445 waction("VREG", addin.vreg); wputxb(0)
1446 else
1447 wputop(szov, opcode)
1448 end
1449 opcode = nil
1450 end
1451 if c == "|" then break end
1452 if c == "o" then -- Offset (pure 32 bit displacement).
1453 wputdarg(args[1].disp); if narg < 2 then narg = 2 end
1454 elseif c == "O" then
1455 wputdarg(args[2].disp); narg = 3
1456 else
1457 -- Anything else is an immediate operand.
1458 local a = args[narg]
1459 narg = narg + 1
1460 local mode, imm = a.mode, a.imm
1461 if mode == "iJ" and not match("iIJ", c) then
1462 werror("bad operand size for label")
1463 end
1464 if c == "S" then
1465 wputsbarg(imm)
1466 elseif c == "U" then
1467 wputbarg(imm)
1468 elseif c == "W" then
1469 wputwarg(imm)
1470 elseif c == "i" or c == "I" then
1471 if mode == "iJ" then
1472 wputlabel("IMM_", imm, 1)
1473 elseif mode == "iI" and c == "I" then
1474 waction(sz == "w" and "IMM_WB" or "IMM_DB", imm)
1475 else
1476 wputszarg(sz, imm)
1477 end
1478 elseif c == "J" then
1479 if mode == "iPJ" then
1480 waction("REL_A", imm) -- !x64 (secpos)
1481 else
1482 wputlabel("REL_", imm, 2)
1483 end
1484 else
1485 werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
1486 end
1487 end
1488 end
1489 end
1490end
1491
1492------------------------------------------------------------------------------
1493
1494-- Mapping of operand modes to short names. Suppress output with '#'.
1495local map_modename = {
1496 r = "reg", R = "eax", C = "cl", x = "mem", m = "mrm", i = "imm",
1497 f = "stx", F = "st0", J = "lbl", ["1"] = "1",
1498 I = "#", S = "#", O = "#",
1499}
1500
1501-- Return a table/string showing all possible operand modes.
1502local function templatehelp(template, nparams)
1503 if nparams == 0 then return "" end
1504 local t = {}
1505 for tm in gmatch(template, "[^%|]+") do
1506 local s = map_modename[sub(tm, 1, 1)]
1507 s = s..gsub(sub(tm, 2, nparams), ".", function(c)
1508 return ", "..map_modename[c]
1509 end)
1510 if not match(s, "#") then t[#t+1] = s end
1511 end
1512 return t
1513end
1514
1515-- Match operand modes against mode match part of template.
1516local function matchtm(tm, args)
1517 for i=1,#args do
1518 if not match(args[i].mode, sub(tm, i, i)) then return end
1519 end
1520 return true
1521end
1522
1523-- Handle opcodes defined with template strings.
1524map_op[".template__"] = function(params, template, nparams)
1525 if not params then return templatehelp(template, nparams) end
1526 local args = {}
1527
1528 -- Zero-operand opcodes have no match part.
1529 if #params == 0 then
1530 dopattern(template, args, "d", params.op)
1531 return
1532 end
1533
1534 -- Determine common operand size (coerce undefined size) or flag as mixed.
1535 local sz, szmix
1536 for i,p in ipairs(params) do
1537 args[i] = parseoperand(p)
1538 local nsz = args[i].opsize
1539 if nsz then
1540 if sz and sz ~= nsz then szmix = true else sz = nsz end
1541 end
1542 end
1543
1544 -- Try all match:pattern pairs (separated by '|').
1545 local gotmatch, lastpat
1546 for tm in gmatch(template, "[^%|]+") do
1547 -- Split off size match (starts after mode match) and pattern string.
1548 local szm, pat = match(tm, "^(.-):(.*)$", #args+1)
1549 if pat == "" then pat = lastpat else lastpat = pat end
1550 if matchtm(tm, args) then
1551 local prefix = sub(szm, 1, 1)
1552 if prefix == "/" then -- Match both operand sizes.
1553 if args[1].opsize == sub(szm, 2, 2) and
1554 args[2].opsize == sub(szm, 3, 3) then
1555 dopattern(pat, args, sz, params.op) -- Process pattern string.
1556 return
1557 end
1558 else -- Match common operand size.
1559 local szp = sz
1560 if szm == "" then szm = "dwb" end -- Default size match.
1561 if prefix == "1" then szp = args[1].opsize; szmix = nil
1562 elseif prefix == "2" then szp = args[2].opsize; szmix = nil end
1563 if not szmix and (prefix == "." or match(szm, szp or "#")) then
1564 dopattern(pat, args, szp, params.op) -- Process pattern string.
1565 return
1566 end
1567 end
1568 gotmatch = true
1569 end
1570 end
1571
1572 local msg = "bad operand mode"
1573 if gotmatch then
1574 if szmix then
1575 msg = "mixed operand size"
1576 else
1577 msg = sz and "bad operand size" or "missing operand size"
1578 end
1579 end
1580
1581 werror(msg.." in `"..opmodestr(params.op, args).."'")
1582end
1583
1584------------------------------------------------------------------------------
1585
1586-- Pseudo-opcodes for data storage.
1587local function op_data(params)
1588 if not params then return "imm..." end
1589 local sz = sub(params.op, 2, 2)
1590 if sz == "a" then sz = addrsize end
1591 for _,p in ipairs(params) do
1592 local a = parseoperand(p)
1593 if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then
1594 werror("bad mode or size in `"..p.."'")
1595 end
1596 if a.mode == "iJ" then
1597 wputlabel("IMM_", a.imm, 1)
1598 else
1599 wputszarg(sz, a.imm)
1600 end
1601 end
1602end
1603
1604map_op[".byte_*"] = op_data
1605map_op[".sbyte_*"] = op_data
1606map_op[".word_*"] = op_data
1607map_op[".dword_*"] = op_data
1608map_op[".aword_*"] = op_data
1609
1610------------------------------------------------------------------------------
1611
1612-- Pseudo-opcode to mark the position where the action list is to be emitted.
1613map_op[".actionlist_1"] = function(params)
1614 if not params then return "cvar" end
1615 local name = params[1] -- No syntax check. You get to keep the pieces.
1616 wline(function(out) writeactions(out, name) end)
1617end
1618
1619-- Pseudo-opcode to mark the position where the global enum is to be emitted.
1620map_op[".globals_1"] = function(params)
1621 if not params then return "prefix" end
1622 local prefix = params[1] -- No syntax check. You get to keep the pieces.
1623 wline(function(out) writeglobals(out, prefix) end)
1624end
1625
1626-- Pseudo-opcode to mark the position where the global names are to be emitted.
1627map_op[".globalnames_1"] = function(params)
1628 if not params then return "cvar" end
1629 local name = params[1] -- No syntax check. You get to keep the pieces.
1630 wline(function(out) writeglobalnames(out, name) end)
1631end
1632
1633-- Pseudo-opcode to mark the position where the extern names are to be emitted.
1634map_op[".externnames_1"] = function(params)
1635 if not params then return "cvar" end
1636 local name = params[1] -- No syntax check. You get to keep the pieces.
1637 wline(function(out) writeexternnames(out, name) end)
1638end
1639
1640------------------------------------------------------------------------------
1641
1642-- Label pseudo-opcode (converted from trailing colon form).
1643map_op[".label_2"] = function(params)
1644 if not params then return "[1-9] | ->global | =>pcexpr [, addr]" end
1645 local a = parseoperand(params[1])
1646 local mode, imm = a.mode, a.imm
1647 if type(imm) == "number" and (mode == "iJ" or (imm >= 1 and imm <= 9)) then
1648 -- Local label (1: ... 9:) or global label (->global:).
1649 waction("LABEL_LG", nil, 1)
1650 wputxb(imm)
1651 elseif mode == "iJ" then
1652 -- PC label (=>pcexpr:).
1653 waction("LABEL_PC", imm)
1654 else
1655 werror("bad label definition")
1656 end
1657 -- SETLABEL must immediately follow LABEL_LG/LABEL_PC.
1658 local addr = params[2]
1659 if addr then
1660 local a = parseoperand(params[2])
1661 if a.mode == "iPJ" then
1662 waction("SETLABEL", a.imm) -- !x64 (secpos)
1663 else
1664 werror("bad label assignment")
1665 end
1666 end
1667end
1668map_op[".label_1"] = map_op[".label_2"]
1669
1670------------------------------------------------------------------------------
1671
1672-- Alignment pseudo-opcode.
1673map_op[".align_1"] = function(params)
1674 if not params then return "numpow2" end
1675 local align = tonumber(params[1]) or map_opsizenum[map_opsize[params[1]]]
1676 if align then
1677 local x = align
1678 -- Must be a power of 2 in the range (2 ... 256).
1679 for i=1,8 do
1680 x = x / 2
1681 if x == 1 then
1682 waction("ALIGN", nil, 1)
1683 wputxb(align-1) -- Action byte is 2**n-1.
1684 return
1685 end
1686 end
1687 end
1688 werror("bad alignment")
1689end
1690
1691-- Spacing pseudo-opcode.
1692map_op[".space_2"] = function(params)
1693 if not params then return "num [, filler]" end
1694 waction("SPACE", params[1])
1695 local fill = params[2]
1696 if fill then
1697 fill = tonumber(fill)
1698 if not fill or fill < 0 or fill > 255 then werror("bad filler") end
1699 end
1700 wputxb(fill or 0)
1701end
1702map_op[".space_1"] = map_op[".space_2"]
1703
1704------------------------------------------------------------------------------
1705
1706-- Pseudo-opcode for (primitive) type definitions (map to C types).
1707map_op[".type_3"] = function(params, nparams)
1708 if not params then
1709 return nparams == 2 and "name, ctype" or "name, ctype, reg"
1710 end
1711 local name, ctype, reg = params[1], params[2], params[3]
1712 if not match(name, "^[%a_][%w_]*$") then
1713 werror("bad type name `"..name.."'")
1714 end
1715 local tp = map_type[name]
1716 if tp then
1717 werror("duplicate type `"..name.."'")
1718 end
1719 if reg and not map_reg_valid_base[reg] then
1720 werror("bad base register `"..(map_reg_rev[reg] or reg).."'")
1721 end
1722 -- Add #type to defines. A bit unclean to put it in map_archdef.
1723 map_archdef["#"..name] = "sizeof("..ctype..")"
1724 -- Add new type and emit shortcut define.
1725 local num = ctypenum + 1
1726 map_type[name] = {
1727 ctype = ctype,
1728 ctypefmt = format("Dt%X(%%s)", num),
1729 reg = reg,
1730 }
1731 wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
1732 ctypenum = num
1733end
1734map_op[".type_2"] = map_op[".type_3"]
1735
1736-- Dump type definitions.
1737local function dumptypes(out, lvl)
1738 local t = {}
1739 for name in pairs(map_type) do t[#t+1] = name end
1740 sort(t)
1741 out:write("Type definitions:\n")
1742 for _,name in ipairs(t) do
1743 local tp = map_type[name]
1744 local reg = tp.reg and map_reg_rev[tp.reg] or ""
1745 out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
1746 end
1747 out:write("\n")
1748end
1749
1750------------------------------------------------------------------------------
1751
1752-- Set the current section.
1753function _M.section(num)
1754 waction("SECTION")
1755 wputxb(num)
1756 wflush(true) -- SECTION is a terminal action.
1757end
1758
1759------------------------------------------------------------------------------
1760
1761-- Dump architecture description.
1762function _M.dumparch(out)
1763 out:write(format("DynASM %s version %s, released %s\n\n",
1764 _info.arch, _info.version, _info.release))
1765 dumpregs(out)
1766 dumpactions(out)
1767end
1768
1769-- Dump all user defined elements.
1770function _M.dumpdef(out, lvl)
1771 dumptypes(out, lvl)
1772 dumpglobals(out, lvl)
1773 dumpexterns(out, lvl)
1774end
1775
1776------------------------------------------------------------------------------
1777
1778-- Pass callbacks from/to the DynASM core.
1779function _M.passcb(wl, we, wf, ww)
1780 wline, werror, wfatal, wwarn = wl, we, wf, ww
1781 return wflush
1782end
1783
1784-- Setup the arch-specific module.
1785function _M.setup(arch, opt)
1786 g_arch, g_opt = arch, opt
1787end
1788
1789-- Merge the core maps and the arch-specific maps.
1790function _M.mergemaps(map_coreop, map_def)
1791 setmetatable(map_op, { __index = map_coreop })
1792 setmetatable(map_def, { __index = map_archdef })
1793 return map_op, map_def
1794end
1795
1796return _M
1797
1798------------------------------------------------------------------------------
1799
diff --git a/dynasm/dynasm.lua b/dynasm/dynasm.lua
new file mode 100644
index 00000000..20ff9cf5
--- /dev/null
+++ b/dynasm/dynasm.lua
@@ -0,0 +1,1070 @@
1------------------------------------------------------------------------------
2-- DynASM. A dynamic assembler for code generation engines.
3-- Originally designed and implemented for LuaJIT.
4--
5-- Copyright (C) 2005-2009 Mike Pall. All rights reserved.
6-- See below for full copyright notice.
7------------------------------------------------------------------------------
8
9-- Application information.
10local _info = {
11 name = "DynASM",
12 description = "A dynamic assembler for code generation engines",
13 version = "1.2.1",
14 vernum = 10201,
15 release = "2009-04-16",
16 author = "Mike Pall",
17 url = "http://luajit.org/dynasm.html",
18 license = "MIT",
19 copyright = [[
20Copyright (C) 2005-2009 Mike Pall. All rights reserved.
21
22Permission is hereby granted, free of charge, to any person obtaining
23a copy of this software and associated documentation files (the
24"Software"), to deal in the Software without restriction, including
25without limitation the rights to use, copy, modify, merge, publish,
26distribute, sublicense, and/or sell copies of the Software, and to
27permit persons to whom the Software is furnished to do so, subject to
28the following conditions:
29
30The above copyright notice and this permission notice shall be
31included in all copies or substantial portions of the Software.
32
33THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
34EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
35MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
36IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
37CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
38TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
39SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40
41[ MIT license: http://www.opensource.org/licenses/mit-license.php ]
42]],
43}
44
45-- Cache library functions.
46local type, pairs, ipairs = type, pairs, ipairs
47local pcall, error, assert = pcall, error, assert
48local _s = string
49local sub, match, gmatch, gsub = _s.sub, _s.match, _s.gmatch, _s.gsub
50local format, rep, upper = _s.format, _s.rep, _s.upper
51local _t = table
52local insert, remove, concat, sort = _t.insert, _t.remove, _t.concat, _t.sort
53local exit = os.exit
54local io = io
55local stdin, stdout, stderr = io.stdin, io.stdout, io.stderr
56
57------------------------------------------------------------------------------
58
59-- Program options.
60local g_opt = {}
61
62-- Global state for current file.
63local g_fname, g_curline, g_indent, g_lineno, g_synclineno, g_arch
64local g_errcount = 0
65
66-- Write buffer for output file.
67local g_wbuffer, g_capbuffer
68
69------------------------------------------------------------------------------
70
71-- Write an output line (or callback function) to the buffer.
72local function wline(line, needindent)
73 local buf = g_capbuffer or g_wbuffer
74 buf[#buf+1] = needindent and g_indent..line or line
75 g_synclineno = g_synclineno + 1
76end
77
78-- Write assembler line as a comment, if requestd.
79local function wcomment(aline)
80 if g_opt.comment then
81 wline(g_opt.comment..aline..g_opt.endcomment, true)
82 end
83end
84
85-- Resync CPP line numbers.
86local function wsync()
87 if g_synclineno ~= g_lineno and g_opt.cpp then
88 wline("# "..g_lineno..' "'..g_fname..'"')
89 g_synclineno = g_lineno
90 end
91end
92
93-- Dummy action flush function. Replaced with arch-specific function later.
94local function wflush(term)
95end
96
97-- Dump all buffered output lines.
98local function wdumplines(out, buf)
99 for _,line in ipairs(buf) do
100 if type(line) == "string" then
101 assert(out:write(line, "\n"))
102 else
103 -- Special callback to dynamically insert lines after end of processing.
104 line(out)
105 end
106 end
107end
108
109------------------------------------------------------------------------------
110
111-- Emit an error. Processing continues with next statement.
112local function werror(msg)
113 error(format("%s:%s: error: %s:\n%s", g_fname, g_lineno, msg, g_curline), 0)
114end
115
116-- Emit a fatal error. Processing stops.
117local function wfatal(msg)
118 g_errcount = "fatal"
119 werror(msg)
120end
121
122-- Print a warning. Processing continues.
123local function wwarn(msg)
124 stderr:write(format("%s:%s: warning: %s:\n%s\n",
125 g_fname, g_lineno, msg, g_curline))
126end
127
128-- Print caught error message. But suppress excessive errors.
129local function wprinterr(...)
130 if type(g_errcount) == "number" then
131 -- Regular error.
132 g_errcount = g_errcount + 1
133 if g_errcount < 21 then -- Seems to be a reasonable limit.
134 stderr:write(...)
135 elseif g_errcount == 21 then
136 stderr:write(g_fname,
137 ":*: warning: too many errors (suppressed further messages).\n")
138 end
139 else
140 -- Fatal error.
141 stderr:write(...)
142 return true -- Stop processing.
143 end
144end
145
146------------------------------------------------------------------------------
147
148-- Map holding all option handlers.
149local opt_map = {}
150local opt_current
151
152-- Print error and exit with error status.
153local function opterror(...)
154 stderr:write("dynasm.lua: ERROR: ", ...)
155 stderr:write("\n")
156 exit(1)
157end
158
159-- Get option parameter.
160local function optparam(args)
161 local argn = args.argn
162 local p = args[argn]
163 if not p then
164 opterror("missing parameter for option `", opt_current, "'.")
165 end
166 args.argn = argn + 1
167 return p
168end
169
170------------------------------------------------------------------------------
171
172-- Core pseudo-opcodes.
173local map_coreop = {}
174-- Dummy opcode map. Replaced by arch-specific map.
175local map_op = {}
176
177-- Forward declarations.
178local dostmt
179local readfile
180
181------------------------------------------------------------------------------
182
183-- Map for defines (initially empty, chains to arch-specific map).
184local map_def = {}
185
186-- Pseudo-opcode to define a substitution.
187map_coreop[".define_2"] = function(params, nparams)
188 if not params then return nparams == 1 and "name" or "name, subst" end
189 local name, def = params[1], params[2] or "1"
190 if not match(name, "^[%a_][%w_]*$") then werror("bad or duplicate define") end
191 map_def[name] = def
192end
193map_coreop[".define_1"] = map_coreop[".define_2"]
194
195-- Define a substitution on the command line.
196function opt_map.D(args)
197 local namesubst = optparam(args)
198 local name, subst = match(namesubst, "^([%a_][%w_]*)=(.*)$")
199 if name then
200 map_def[name] = subst
201 elseif match(namesubst, "^[%a_][%w_]*$") then
202 map_def[namesubst] = "1"
203 else
204 opterror("bad define")
205 end
206end
207
208-- Undefine a substitution on the command line.
209function opt_map.U(args)
210 local name = optparam(args)
211 if match(name, "^[%a_][%w_]*$") then
212 map_def[name] = nil
213 else
214 opterror("bad define")
215 end
216end
217
218-- Helper for definesubst.
219local gotsubst
220
221local function definesubst_one(word)
222 local subst = map_def[word]
223 if subst then gotsubst = word; return subst else return word end
224end
225
226-- Iteratively substitute defines.
227local function definesubst(stmt)
228 -- Limit number of iterations.
229 for i=1,100 do
230 gotsubst = false
231 stmt = gsub(stmt, "#?[%w_]+", definesubst_one)
232 if not gotsubst then break end
233 end
234 if gotsubst then wfatal("recursive define involving `"..gotsubst.."'") end
235 return stmt
236end
237
238-- Dump all defines.
239local function dumpdefines(out, lvl)
240 local t = {}
241 for name in pairs(map_def) do
242 t[#t+1] = name
243 end
244 sort(t)
245 out:write("Defines:\n")
246 for _,name in ipairs(t) do
247 local subst = map_def[name]
248 if g_arch then subst = g_arch.revdef(subst) end
249 out:write(format(" %-20s %s\n", name, subst))
250 end
251 out:write("\n")
252end
253
254------------------------------------------------------------------------------
255
256-- Support variables for conditional assembly.
257local condlevel = 0
258local condstack = {}
259
260-- Evaluate condition with a Lua expression. Substitutions already performed.
261local function cond_eval(cond)
262 local func, err = loadstring("return "..cond)
263 if func then
264 setfenv(func, {}) -- No globals. All unknown identifiers evaluate to nil.
265 local ok, res = pcall(func)
266 if ok then
267 if res == 0 then return false end -- Oh well.
268 return not not res
269 end
270 err = res
271 end
272 wfatal("bad condition: "..err)
273end
274
275-- Skip statements until next conditional pseudo-opcode at the same level.
276local function stmtskip()
277 local dostmt_save = dostmt
278 local lvl = 0
279 dostmt = function(stmt)
280 local op = match(stmt, "^%s*(%S+)")
281 if op == ".if" then
282 lvl = lvl + 1
283 elseif lvl ~= 0 then
284 if op == ".endif" then lvl = lvl - 1 end
285 elseif op == ".elif" or op == ".else" or op == ".endif" then
286 dostmt = dostmt_save
287 dostmt(stmt)
288 end
289 end
290end
291
292-- Pseudo-opcodes for conditional assembly.
293map_coreop[".if_1"] = function(params)
294 if not params then return "condition" end
295 local lvl = condlevel + 1
296 local res = cond_eval(params[1])
297 condlevel = lvl
298 condstack[lvl] = res
299 if not res then stmtskip() end
300end
301
302map_coreop[".elif_1"] = function(params)
303 if not params then return "condition" end
304 if condlevel == 0 then wfatal(".elif without .if") end
305 local lvl = condlevel
306 local res = condstack[lvl]
307 if res then
308 if res == "else" then wfatal(".elif after .else") end
309 else
310 res = cond_eval(params[1])
311 if res then
312 condstack[lvl] = res
313 return
314 end
315 end
316 stmtskip()
317end
318
319map_coreop[".else_0"] = function(params)
320 if condlevel == 0 then wfatal(".else without .if") end
321 local lvl = condlevel
322 local res = condstack[lvl]
323 condstack[lvl] = "else"
324 if res then
325 if res == "else" then wfatal(".else after .else") end
326 stmtskip()
327 end
328end
329
330map_coreop[".endif_0"] = function(params)
331 local lvl = condlevel
332 if lvl == 0 then wfatal(".endif without .if") end
333 condlevel = lvl - 1
334end
335
336-- Check for unfinished conditionals.
337local function checkconds()
338 if g_errcount ~= "fatal" and condlevel ~= 0 then
339 wprinterr(g_fname, ":*: error: unbalanced conditional\n")
340 end
341end
342
343------------------------------------------------------------------------------
344
345-- Search for a file in the given path and open it for reading.
346local function pathopen(path, name)
347 local dirsep = match(package.path, "\\") and "\\" or "/"
348 for _,p in ipairs(path) do
349 local fullname = p == "" and name or p..dirsep..name
350 local fin = io.open(fullname, "r")
351 if fin then
352 g_fname = fullname
353 return fin
354 end
355 end
356end
357
358-- Include a file.
359map_coreop[".include_1"] = function(params)
360 if not params then return "filename" end
361 local name = params[1]
362 -- Save state. Ugly, I know. but upvalues are fast.
363 local gf, gl, gcl, gi = g_fname, g_lineno, g_curline, g_indent
364 -- Read the included file.
365 local fatal = readfile(pathopen(g_opt.include, name) or
366 wfatal("include file `"..name.."' not found"))
367 -- Restore state.
368 g_synclineno = -1
369 g_fname, g_lineno, g_curline, g_indent = gf, gl, gcl, gi
370 if fatal then wfatal("in include file") end
371end
372
373-- Make .include initially available, too.
374map_op[".include_1"] = map_coreop[".include_1"]
375
376------------------------------------------------------------------------------
377
378-- Support variables for macros.
379local mac_capture, mac_lineno, mac_name
380local mac_active = {}
381local mac_list = {}
382
383-- Pseudo-opcode to define a macro.
384map_coreop[".macro_*"] = function(mparams)
385 if not mparams then return "name [, params...]" end
386 -- Split off and validate macro name.
387 local name = remove(mparams, 1)
388 if not name then werror("missing macro name") end
389 if not (match(name, "^[%a_][%w_%.]*$") or match(name, "^%.[%w_%.]+$")) then
390 wfatal("bad macro name `"..name.."'")
391 end
392 -- Validate macro parameter names.
393 local mdup = {}
394 for _,mp in ipairs(mparams) do
395 if not match(mp, "^[%a_][%w_]*$") then
396 wfatal("bad macro parameter name `"..mp.."'")
397 end
398 if mdup[mp] then wfatal("duplicate macro parameter name `"..mp.."'") end
399 mdup[mp] = true
400 end
401 -- Check for duplicate or recursive macro definitions.
402 local opname = name.."_"..#mparams
403 if map_op[opname] or map_op[name.."_*"] then
404 wfatal("duplicate macro `"..name.."' ("..#mparams.." parameters)")
405 end
406 if mac_capture then wfatal("recursive macro definition") end
407
408 -- Enable statement capture.
409 local lines = {}
410 mac_lineno = g_lineno
411 mac_name = name
412 mac_capture = function(stmt) -- Statement capture function.
413 -- Stop macro definition with .endmacro pseudo-opcode.
414 if not match(stmt, "^%s*.endmacro%s*$") then
415 lines[#lines+1] = stmt
416 return
417 end
418 mac_capture = nil
419 mac_lineno = nil
420 mac_name = nil
421 mac_list[#mac_list+1] = opname
422 -- Add macro-op definition.
423 map_op[opname] = function(params)
424 if not params then return mparams, lines end
425 -- Protect against recursive macro invocation.
426 if mac_active[opname] then wfatal("recursive macro invocation") end
427 mac_active[opname] = true
428 -- Setup substitution map.
429 local subst = {}
430 for i,mp in ipairs(mparams) do subst[mp] = params[i] end
431 local mcom
432 if g_opt.maccomment and g_opt.comment then
433 mcom = " MACRO "..name.." ("..#mparams..")"
434 wcomment("{"..mcom)
435 end
436 -- Loop through all captured statements
437 for _,stmt in ipairs(lines) do
438 -- Substitute macro parameters.
439 local st = gsub(stmt, "[%w_]+", subst)
440 st = definesubst(st)
441 st = gsub(st, "%s*%.%.%s*", "") -- Token paste a..b.
442 if mcom and sub(st, 1, 1) ~= "|" then wcomment(st) end
443 -- Emit statement. Use a protected call for better diagnostics.
444 local ok, err = pcall(dostmt, st)
445 if not ok then
446 -- Add the captured statement to the error.
447 wprinterr(err, "\n", g_indent, "| ", stmt,
448 "\t[MACRO ", name, " (", #mparams, ")]\n")
449 end
450 end
451 if mcom then wcomment("}"..mcom) end
452 mac_active[opname] = nil
453 end
454 end
455end
456
457-- An .endmacro pseudo-opcode outside of a macro definition is an error.
458map_coreop[".endmacro_0"] = function(params)
459 wfatal(".endmacro without .macro")
460end
461
462-- Dump all macros and their contents (with -PP only).
463local function dumpmacros(out, lvl)
464 sort(mac_list)
465 out:write("Macros:\n")
466 for _,opname in ipairs(mac_list) do
467 local name = sub(opname, 1, -3)
468 local params, lines = map_op[opname]()
469 out:write(format(" %-20s %s\n", name, concat(params, ", ")))
470 if lvl > 1 then
471 for _,line in ipairs(lines) do
472 out:write(" |", line, "\n")
473 end
474 out:write("\n")
475 end
476 end
477 out:write("\n")
478end
479
480-- Check for unfinished macro definitions.
481local function checkmacros()
482 if mac_capture then
483 wprinterr(g_fname, ":", mac_lineno,
484 ": error: unfinished .macro `", mac_name ,"'\n")
485 end
486end
487
488------------------------------------------------------------------------------
489
490-- Support variables for captures.
491local cap_lineno, cap_name
492local cap_buffers = {}
493local cap_used = {}
494
495-- Start a capture.
496map_coreop[".capture_1"] = function(params)
497 if not params then return "name" end
498 wflush()
499 local name = params[1]
500 if not match(name, "^[%a_][%w_]*$") then
501 wfatal("bad capture name `"..name.."'")
502 end
503 if cap_name then
504 wfatal("already capturing to `"..cap_name.."' since line "..cap_lineno)
505 end
506 cap_name = name
507 cap_lineno = g_lineno
508 -- Create or continue a capture buffer and start the output line capture.
509 local buf = cap_buffers[name]
510 if not buf then buf = {}; cap_buffers[name] = buf end
511 g_capbuffer = buf
512 g_synclineno = 0
513end
514
515-- Stop a capture.
516map_coreop[".endcapture_0"] = function(params)
517 wflush()
518 if not cap_name then wfatal(".endcapture without a valid .capture") end
519 cap_name = nil
520 cap_lineno = nil
521 g_capbuffer = nil
522 g_synclineno = 0
523end
524
525-- Dump a capture buffer.
526map_coreop[".dumpcapture_1"] = function(params)
527 if not params then return "name" end
528 wflush()
529 local name = params[1]
530 if not match(name, "^[%a_][%w_]*$") then
531 wfatal("bad capture name `"..name.."'")
532 end
533 cap_used[name] = true
534 wline(function(out)
535 local buf = cap_buffers[name]
536 if buf then wdumplines(out, buf) end
537 end)
538 g_synclineno = 0
539end
540
541-- Dump all captures and their buffers (with -PP only).
542local function dumpcaptures(out, lvl)
543 out:write("Captures:\n")
544 for name,buf in pairs(cap_buffers) do
545 out:write(format(" %-20s %4s)\n", name, "("..#buf))
546 if lvl > 1 then
547 local bar = rep("=", 76)
548 out:write(" ", bar, "\n")
549 for _,line in ipairs(buf) do
550 out:write(" ", line, "\n")
551 end
552 out:write(" ", bar, "\n\n")
553 end
554 end
555 out:write("\n")
556end
557
558-- Check for unfinished or unused captures.
559local function checkcaptures()
560 if cap_name then
561 wprinterr(g_fname, ":", cap_lineno,
562 ": error: unfinished .capture `", cap_name,"'\n")
563 return
564 end
565 for name in pairs(cap_buffers) do
566 if not cap_used[name] then
567 wprinterr(g_fname, ":*: error: missing .dumpcapture ", name ,"\n")
568 end
569 end
570end
571
572------------------------------------------------------------------------------
573
574-- Sections names.
575local map_sections = {}
576
577-- Pseudo-opcode to define code sections.
578-- TODO: Data sections, BSS sections. Needs extra C code and API.
579map_coreop[".section_*"] = function(params)
580 if not params then return "name..." end
581 if #map_sections > 0 then werror("duplicate section definition") end
582 wflush()
583 for sn,name in ipairs(params) do
584 local opname = "."..name.."_0"
585 if not match(name, "^[%a][%w_]*$") or
586 map_op[opname] or map_op["."..name.."_*"] then
587 werror("bad section name `"..name.."'")
588 end
589 map_sections[#map_sections+1] = name
590 wline(format("#define DASM_SECTION_%s\t%d", upper(name), sn-1))
591 map_op[opname] = function(params) g_arch.section(sn-1) end
592 end
593 wline(format("#define DASM_MAXSECTION\t\t%d", #map_sections))
594end
595
596-- Dump all sections.
597local function dumpsections(out, lvl)
598 out:write("Sections:\n")
599 for _,name in ipairs(map_sections) do
600 out:write(format(" %s\n", name))
601 end
602 out:write("\n")
603end
604
605------------------------------------------------------------------------------
606
607-- Load architecture-specific module.
608local function loadarch(arch)
609 if not match(arch, "^[%w_]+$") then return "bad arch name" end
610 local ok, m_arch = pcall(require, "dasm_"..arch)
611 if not ok then return "cannot load module: "..m_arch end
612 g_arch = m_arch
613 wflush = m_arch.passcb(wline, werror, wfatal, wwarn)
614 m_arch.setup(arch, g_opt)
615 map_op, map_def = m_arch.mergemaps(map_coreop, map_def)
616end
617
618-- Dump architecture description.
619function opt_map.dumparch(args)
620 local name = optparam(args)
621 if not g_arch then
622 local err = loadarch(name)
623 if err then opterror(err) end
624 end
625
626 local t = {}
627 for name in pairs(map_coreop) do t[#t+1] = name end
628 for name in pairs(map_op) do t[#t+1] = name end
629 sort(t)
630
631 local out = stdout
632 local _arch = g_arch._info
633 out:write(format("%s version %s, released %s, %s\n",
634 _info.name, _info.version, _info.release, _info.url))
635 g_arch.dumparch(out)
636
637 local pseudo = true
638 out:write("Pseudo-Opcodes:\n")
639 for _,sname in ipairs(t) do
640 local name, nparam = match(sname, "^(.+)_([0-9%*])$")
641 if name then
642 if pseudo and sub(name, 1, 1) ~= "." then
643 out:write("\nOpcodes:\n")
644 pseudo = false
645 end
646 local f = map_op[sname]
647 local s
648 if nparam ~= "*" then nparam = nparam + 0 end
649 if nparam == 0 then
650 s = ""
651 elseif type(f) == "string" then
652 s = map_op[".template__"](nil, f, nparam)
653 else
654 s = f(nil, nparam)
655 end
656 if type(s) == "table" then
657 for _,s2 in ipairs(s) do
658 out:write(format(" %-12s %s\n", name, s2))
659 end
660 else
661 out:write(format(" %-12s %s\n", name, s))
662 end
663 end
664 end
665 out:write("\n")
666 exit(0)
667end
668
669-- Pseudo-opcode to set the architecture.
670-- Only initially available (map_op is replaced when called).
671map_op[".arch_1"] = function(params)
672 if not params then return "name" end
673 local err = loadarch(params[1])
674 if err then wfatal(err) end
675end
676
677-- Dummy .arch pseudo-opcode to improve the error report.
678map_coreop[".arch_1"] = function(params)
679 if not params then return "name" end
680 wfatal("duplicate .arch statement")
681end
682
683------------------------------------------------------------------------------
684
685-- Dummy pseudo-opcode. Don't confuse '.nop' with 'nop'.
686map_coreop[".nop_*"] = function(params)
687 if not params then return "[ignored...]" end
688end
689
690-- Pseudo-opcodes to raise errors.
691map_coreop[".error_1"] = function(params)
692 if not params then return "message" end
693 werror(params[1])
694end
695
696map_coreop[".fatal_1"] = function(params)
697 if not params then return "message" end
698 wfatal(params[1])
699end
700
701-- Dump all user defined elements.
702local function dumpdef(out)
703 local lvl = g_opt.dumpdef
704 if lvl == 0 then return end
705 dumpsections(out, lvl)
706 dumpdefines(out, lvl)
707 if g_arch then g_arch.dumpdef(out, lvl) end
708 dumpmacros(out, lvl)
709 dumpcaptures(out, lvl)
710end
711
712------------------------------------------------------------------------------
713
714-- Helper for splitstmt.
715local splitlvl
716
717local function splitstmt_one(c)
718 if c == "(" then
719 splitlvl = ")"..splitlvl
720 elseif c == "[" then
721 splitlvl = "]"..splitlvl
722 elseif c == ")" or c == "]" then
723 if sub(splitlvl, 1, 1) ~= c then werror("unbalanced () or []") end
724 splitlvl = sub(splitlvl, 2)
725 elseif splitlvl == "" then
726 return " \0 "
727 end
728 return c
729end
730
731-- Split statement into (pseudo-)opcode and params.
732local function splitstmt(stmt)
733 -- Convert label with trailing-colon into .label statement.
734 local label = match(stmt, "^%s*(.+):%s*$")
735 if label then return ".label", {label} end
736
737 -- Split at commas and equal signs, but obey parentheses and brackets.
738 splitlvl = ""
739 stmt = gsub(stmt, "[,%(%)%[%]]", splitstmt_one)
740 if splitlvl ~= "" then werror("unbalanced () or []") end
741
742 -- Split off opcode.
743 local op, other = match(stmt, "^%s*([^%s%z]+)%s*(.*)$")
744 if not op then werror("bad statement syntax") end
745
746 -- Split parameters.
747 local params = {}
748 for p in gmatch(other, "%s*(%Z+)%z?") do
749 params[#params+1] = gsub(p, "%s+$", "")
750 end
751 if #params > 16 then werror("too many parameters") end
752
753 params.op = op
754 return op, params
755end
756
757-- Process a single statement.
758dostmt = function(stmt)
759 -- Ignore empty statements.
760 if match(stmt, "^%s*$") then return end
761
762 -- Capture macro defs before substitution.
763 if mac_capture then return mac_capture(stmt) end
764 stmt = definesubst(stmt)
765
766 -- Emit C code without parsing the line.
767 if sub(stmt, 1, 1) == "|" then
768 local tail = sub(stmt, 2)
769 wflush()
770 if sub(tail, 1, 2) == "//" then wcomment(tail) else wline(tail, true) end
771 return
772 end
773
774 -- Split into (pseudo-)opcode and params.
775 local op, params = splitstmt(stmt)
776
777 -- Get opcode handler (matching # of parameters or generic handler).
778 local f = map_op[op.."_"..#params] or map_op[op.."_*"]
779 if not f then
780 if not g_arch then wfatal("first statement must be .arch") end
781 -- Improve error report.
782 for i=0,16 do
783 if map_op[op.."_"..i] then
784 werror("wrong number of parameters for `"..op.."'")
785 end
786 end
787 werror("unknown statement `"..op.."'")
788 end
789
790 -- Call opcode handler or special handler for template strings.
791 if type(f) == "string" then
792 map_op[".template__"](params, f)
793 else
794 f(params)
795 end
796end
797
798-- Process a single line.
799local function doline(line)
800 if g_opt.flushline then wflush() end
801
802 -- Assembler line?
803 local indent, aline = match(line, "^(%s*)%|(.*)$")
804 if not aline then
805 -- No, plain C code line, need to flush first.
806 wflush()
807 wsync()
808 wline(line, false)
809 return
810 end
811
812 g_indent = indent -- Remember current line indentation.
813
814 -- Emit C code (even from macros). Avoids echo and line parsing.
815 if sub(aline, 1, 1) == "|" then
816 if not mac_capture then
817 wsync()
818 elseif g_opt.comment then
819 wsync()
820 wcomment(aline)
821 end
822 dostmt(aline)
823 return
824 end
825
826 -- Echo assembler line as a comment.
827 if g_opt.comment then
828 wsync()
829 wcomment(aline)
830 end
831
832 -- Strip assembler comments.
833 aline = gsub(aline, "//.*$", "")
834
835 -- Split line into statements at semicolons.
836 if match(aline, ";") then
837 for stmt in gmatch(aline, "[^;]+") do dostmt(stmt) end
838 else
839 dostmt(aline)
840 end
841end
842
843------------------------------------------------------------------------------
844
845-- Write DynASM header.
846local function dasmhead(out)
847 out:write(format([[
848/*
849** This file has been pre-processed with DynASM.
850** %s
851** DynASM version %s, DynASM %s version %s
852** DO NOT EDIT! The original file is in "%s".
853*/
854
855#if DASM_VERSION != %d
856#error "Version mismatch between DynASM and included encoding engine"
857#endif
858
859]], _info.url,
860 _info.version, g_arch._info.arch, g_arch._info.version,
861 g_fname, _info.vernum))
862end
863
864-- Read input file.
865readfile = function(fin)
866 g_indent = ""
867 g_lineno = 0
868 g_synclineno = -1
869
870 -- Process all lines.
871 for line in fin:lines() do
872 g_lineno = g_lineno + 1
873 g_curline = line
874 local ok, err = pcall(doline, line)
875 if not ok and wprinterr(err, "\n") then return true end
876 end
877 wflush()
878
879 -- Close input file.
880 assert(fin == stdin or fin:close())
881end
882
883-- Write output file.
884local function writefile(outfile)
885 local fout
886
887 -- Open output file.
888 if outfile == nil or outfile == "-" then
889 fout = stdout
890 else
891 fout = assert(io.open(outfile, "w"))
892 end
893
894 -- Write all buffered lines
895 wdumplines(fout, g_wbuffer)
896
897 -- Close output file.
898 assert(fout == stdout or fout:close())
899
900 -- Optionally dump definitions.
901 dumpdef(fout == stdout and stderr or stdout)
902end
903
904-- Translate an input file to an output file.
905local function translate(infile, outfile)
906 g_wbuffer = {}
907 g_indent = ""
908 g_lineno = 0
909 g_synclineno = -1
910
911 -- Put header.
912 wline(dasmhead)
913
914 -- Read input file.
915 local fin
916 if infile == "-" then
917 g_fname = "(stdin)"
918 fin = stdin
919 else
920 g_fname = infile
921 fin = assert(io.open(infile, "r"))
922 end
923 readfile(fin)
924
925 -- Check for errors.
926 if not g_arch then
927 wprinterr(g_fname, ":*: error: missing .arch directive\n")
928 end
929 checkconds()
930 checkmacros()
931 checkcaptures()
932
933 if g_errcount ~= 0 then
934 stderr:write(g_fname, ":*: info: ", g_errcount, " error",
935 (type(g_errcount) == "number" and g_errcount > 1) and "s" or "",
936 " in input file -- no output file generated.\n")
937 dumpdef(stderr)
938 exit(1)
939 end
940
941 -- Write output file.
942 writefile(outfile)
943end
944
945------------------------------------------------------------------------------
946
947-- Print help text.
948function opt_map.help()
949 stdout:write("DynASM -- ", _info.description, ".\n")
950 stdout:write("DynASM ", _info.version, " ", _info.release, " ", _info.url, "\n")
951 stdout:write[[
952
953Usage: dynasm [OPTION]... INFILE.dasc|-
954
955 -h, --help Display this help text.
956 -V, --version Display version and copyright information.
957
958 -o, --outfile FILE Output file name (default is stdout).
959 -I, --include DIR Add directory to the include search path.
960
961 -c, --ccomment Use /* */ comments for assembler lines.
962 -C, --cppcomment Use // comments for assembler lines (default).
963 -N, --nocomment Suppress assembler lines in output.
964 -M, --maccomment Show macro expansions as comments (default off).
965
966 -L, --nolineno Suppress CPP line number information in output.
967 -F, --flushline Flush action list for every line.
968
969 -D NAME[=SUBST] Define a substitution.
970 -U NAME Undefine a substitution.
971
972 -P, --dumpdef Dump defines, macros, etc. Repeat for more output.
973 -A, --dumparch ARCH Load architecture ARCH and dump description.
974]]
975 exit(0)
976end
977
978-- Print version information.
979function opt_map.version()
980 stdout:write(format("%s version %s, released %s\n%s\n\n%s",
981 _info.name, _info.version, _info.release, _info.url, _info.copyright))
982 exit(0)
983end
984
985-- Misc. options.
986function opt_map.outfile(args) g_opt.outfile = optparam(args) end
987function opt_map.include(args) insert(g_opt.include, 1, optparam(args)) end
988function opt_map.ccomment() g_opt.comment = "/*|"; g_opt.endcomment = " */" end
989function opt_map.cppcomment() g_opt.comment = "//|"; g_opt.endcomment = "" end
990function opt_map.nocomment() g_opt.comment = false end
991function opt_map.maccomment() g_opt.maccomment = true end
992function opt_map.nolineno() g_opt.cpp = false end
993function opt_map.flushline() g_opt.flushline = true end
994function opt_map.dumpdef() g_opt.dumpdef = g_opt.dumpdef + 1 end
995
996------------------------------------------------------------------------------
997
998-- Short aliases for long options.
999local opt_alias = {
1000 h = "help", ["?"] = "help", V = "version",
1001 o = "outfile", I = "include",
1002 c = "ccomment", C = "cppcomment", N = "nocomment", M = "maccomment",
1003 L = "nolineno", F = "flushline",
1004 P = "dumpdef", A = "dumparch",
1005}
1006
1007-- Parse single option.
1008local function parseopt(opt, args)
1009 opt_current = #opt == 1 and "-"..opt or "--"..opt
1010 local f = opt_map[opt] or opt_map[opt_alias[opt]]
1011 if not f then
1012 opterror("unrecognized option `", opt_current, "'. Try `--help'.\n")
1013 end
1014 f(args)
1015end
1016
1017-- Parse arguments.
1018local function parseargs(args)
1019 -- Default options.
1020 g_opt.comment = "//|"
1021 g_opt.endcomment = ""
1022 g_opt.cpp = true
1023 g_opt.dumpdef = 0
1024 g_opt.include = { "" }
1025
1026 -- Process all option arguments.
1027 args.argn = 1
1028 repeat
1029 local a = args[args.argn]
1030 if not a then break end
1031 local lopt, opt = match(a, "^%-(%-?)(.+)")
1032 if not opt then break end
1033 args.argn = args.argn + 1
1034 if lopt == "" then
1035 -- Loop through short options.
1036 for o in gmatch(opt, ".") do parseopt(o, args) end
1037 else
1038 -- Long option.
1039 parseopt(opt, args)
1040 end
1041 until false
1042
1043 -- Check for proper number of arguments.
1044 local nargs = #args - args.argn + 1
1045 if nargs ~= 1 then
1046 if nargs == 0 then
1047 if g_opt.dumpdef > 0 then return dumpdef(stdout) end
1048 end
1049 opt_map.help()
1050 end
1051
1052 -- Translate a single input file to a single output file
1053 -- TODO: Handle multiple files?
1054 translate(args[args.argn], g_opt.outfile)
1055end
1056
1057------------------------------------------------------------------------------
1058
1059-- Add the directory dynasm.lua resides in to the Lua module search path.
1060local arg = arg
1061if arg and arg[0] then
1062 local prefix = match(arg[0], "^(.*[/\\])")
1063 if prefix then package.path = prefix.."?.lua;"..package.path end
1064end
1065
1066-- Start DynASM.
1067parseargs{...}
1068
1069------------------------------------------------------------------------------
1070
diff --git a/etc/strict.lua b/etc/strict.lua
new file mode 100644
index 00000000..604619dd
--- /dev/null
+++ b/etc/strict.lua
@@ -0,0 +1,41 @@
1--
2-- strict.lua
3-- checks uses of undeclared global variables
4-- All global variables must be 'declared' through a regular assignment
5-- (even assigning nil will do) in a main chunk before being used
6-- anywhere or assigned to inside a function.
7--
8
9local getinfo, error, rawset, rawget = debug.getinfo, error, rawset, rawget
10
11local mt = getmetatable(_G)
12if mt == nil then
13 mt = {}
14 setmetatable(_G, mt)
15end
16
17mt.__declared = {}
18
19local function what ()
20 local d = getinfo(3, "S")
21 return d and d.what or "C"
22end
23
24mt.__newindex = function (t, n, v)
25 if not mt.__declared[n] then
26 local w = what()
27 if w ~= "main" and w ~= "C" then
28 error("assign to undeclared variable '"..n.."'", 2)
29 end
30 mt.__declared[n] = true
31 end
32 rawset(t, n, v)
33end
34
35mt.__index = function (t, n)
36 if not mt.__declared[n] and what() ~= "C" then
37 error("variable '"..n.."' is not declared", 2)
38 end
39 return rawget(t, n)
40end
41
diff --git a/lib/.gitignore b/lib/.gitignore
new file mode 100644
index 00000000..500e2855
--- /dev/null
+++ b/lib/.gitignore
@@ -0,0 +1 @@
vmdef.lua
diff --git a/lib/bc.lua b/lib/bc.lua
new file mode 100644
index 00000000..532f2493
--- /dev/null
+++ b/lib/bc.lua
@@ -0,0 +1,182 @@
1----------------------------------------------------------------------------
2-- LuaJIT bytecode listing module.
3--
4-- Copyright (C) 2005-2009 Mike Pall. All rights reserved.
5-- Released under the MIT/X license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7--
8-- This module lists the bytecode of a Lua function. If it's loaded by -jbc
9-- it hooks into the parser and lists all functions of a chunk as they
10-- are parsed.
11--
12-- Example usage:
13--
14-- luajit -jbc -e 'local x=0; for i=1,1e6 do x=x+i end; print(x)'
15-- luajit -jbc=- foo.lua
16-- luajit -jbc=foo.list foo.lua
17--
18-- Default output is to stderr. To redirect the output to a file, pass a
19-- filename as an argument (use '-' for stdout) or set the environment
20-- variable LUAJIT_LISTFILE. The file is overwritten every time the module
21-- is started.
22--
23-- This module can also be used programmatically:
24--
25-- local bc = require("jit.bc")
26--
27-- local function foo() print("hello") end
28--
29-- bc.dump(foo) --> -- BYTECODE -- [...]
30-- print(bc.line(foo, 2)) --> 0002 KSTR 1 1 ; "hello"
31--
32-- local out = {
33-- -- Do something wich each line:
34-- write = function(t, ...) io.write(...) end,
35-- close = function(t) end,
36-- flush = function(t) end,
37-- }
38-- bc.dump(foo, out)
39--
40------------------------------------------------------------------------------
41
42-- Cache some library functions and objects.
43local jit = require("jit")
44assert(jit.version_num == 20000, "LuaJIT core/library version mismatch")
45local jutil = require("jit.util")
46local vmdef = require("jit.vmdef")
47local bit = require("bit")
48local sub, gsub, format = string.sub, string.gsub, string.format
49local byte, band, shr = string.byte, bit.band, bit.rshift
50local funcinfo, funcbc, funck = jutil.funcinfo, jutil.funcbc, jutil.funck
51local funcuvname = jutil.funcuvname
52local bcnames = vmdef.bcnames
53local stdout, stderr = io.stdout, io.stderr
54
55------------------------------------------------------------------------------
56
57local function ctlsub(c)
58 if c == "\n" then return "\\n"
59 elseif c == "\r" then return "\\r"
60 elseif c == "\t" then return "\\t"
61 elseif c == "\r" then return "\\r"
62 else return format("\\%03d", byte(c))
63 end
64end
65
66-- Return one bytecode line.
67local function bcline(func, pc, prefix)
68 local ins, m = funcbc(func, pc)
69 if not ins then return end
70 local ma, mb, mc = band(m, 7), band(m, 15*8), band(m, 15*128)
71 local a = band(shr(ins, 8), 0xff)
72 local oidx = 6*band(ins, 0xff)
73 local s = format("%04d %s %-6s %3s ",
74 pc, prefix or " ", sub(bcnames, oidx+1, oidx+6), ma == 0 and "" or a)
75 local d = shr(ins, 16)
76 if mc == 13*128 then -- BCMjump
77 if ma == 0 then
78 return format("%s=> %04d\n", sub(s, 1, -3), pc+d-0x7fff)
79 end
80 return format("%s=> %04d\n", s, pc+d-0x7fff)
81 end
82 if mb ~= 0 then d = band(d, 0xff) end
83 local kc
84 if mc == 10*128 then -- BCMstr
85 kc = funck(func, -d-1)
86 kc = format(#kc > 40 and '"%.40s"~' or '"%s"', gsub(kc, "%c", ctlsub))
87 elseif mc == 9*128 then -- BCMnum
88 kc = funck(func, d)
89 elseif mc == 12*128 then -- BCMfunc
90 local fi = funcinfo(funck(func, -d-1))
91 if fi.ffid then
92 kc = vmdef.ffnames[fi.ffid]
93 else
94 kc = fi.loc
95 end
96 elseif mc == 5*128 then -- BCMuv
97 kc = funcuvname(func, d)
98 end
99 if ma == 5 then -- BCMuv
100 local ka = funcuvname(func, a)
101 if kc then kc = ka.." ; "..kc else kc = ka end
102 end
103 if mb ~= 0 then
104 local b = shr(ins, 24)
105 if kc then return format("%s%3d %3d ; %s\n", s, b, d, kc) end
106 return format("%s%3d %3d\n", s, b, d)
107 end
108 if kc then return format("%s%3d ; %s\n", s, d, kc) end
109 if mc == 7*128 and d > 32767 then d = d - 65536 end -- BCMlits
110 return format("%s%3d\n", s, d)
111end
112
113-- Collect branch targets of a function.
114local function bctargets(func)
115 local target = {}
116 for pc=1,1000000000 do
117 local ins, m = funcbc(func, pc)
118 if not ins then break end
119 if band(m, 15*128) == 13*128 then target[pc+shr(ins, 16)-0x7fff] = true end
120 end
121 return target
122end
123
124-- Dump bytecode instructions of a function.
125local function bcdump(func, out)
126 if not out then out = stdout end
127 local fi = funcinfo(func)
128 out:write(format("-- BYTECODE -- %s-%d\n", fi.loc, fi.lastlinedefined))
129 local target = bctargets(func)
130 for pc=1,1000000000 do
131 local s = bcline(func, pc, target[pc] and "=>")
132 if not s then break end
133 out:write(s)
134 end
135 out:write("\n")
136 out:flush()
137end
138
139------------------------------------------------------------------------------
140
141-- Active flag and output file handle.
142local active, out
143
144-- List handler.
145local function h_list(func)
146 return bcdump(func, out)
147end
148
149-- Detach list handler.
150local function bclistoff()
151 if active then
152 active = false
153 jit.attach(h_list)
154 if out and out ~= stdout and out ~= stderr then out:close() end
155 out = nil
156 end
157end
158
159-- Open the output file and attach list handler.
160local function bcliston(outfile)
161 if active then bclistoff() end
162 if not outfile then outfile = os.getenv("LUAJIT_LISTFILE") end
163 if outfile then
164 out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
165 else
166 out = stderr
167 end
168 jit.attach(h_list, "bc")
169 active = true
170end
171
172-- Public module functions.
173module(...)
174
175line = bcline
176dump = bcdump
177targets = bctargets
178
179on = bcliston
180off = bclistoff
181start = bcliston -- For -j command line option.
182
diff --git a/lib/dis_x64.lua b/lib/dis_x64.lua
new file mode 100644
index 00000000..da3d63f8
--- /dev/null
+++ b/lib/dis_x64.lua
@@ -0,0 +1,19 @@
1----------------------------------------------------------------------------
2-- LuaJIT x64 disassembler wrapper module.
3--
4-- Copyright (C) 2005-2009 Mike Pall. All rights reserved.
5-- Released under the MIT/X license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the 64 bit functions from the combined
8-- x86/x64 disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local require = require
12
13module(...)
14
15local dis_x86 = require(_PACKAGE.."dis_x86")
16
17create = dis_x86.create64
18disass = dis_x86.disass64
19
diff --git a/lib/dis_x86.lua b/lib/dis_x86.lua
new file mode 100644
index 00000000..8f127bee
--- /dev/null
+++ b/lib/dis_x86.lua
@@ -0,0 +1,824 @@
1----------------------------------------------------------------------------
2-- LuaJIT x86/x64 disassembler module.
3--
4-- Copyright (C) 2005-2009 Mike Pall. All rights reserved.
5-- Released under the MIT/X license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This is a helper module used by the LuaJIT machine code dumper module.
8--
9-- Sending small code snippets to an external disassembler and mixing the
10-- output with our own stuff was too fragile. So I had to bite the bullet
11-- and write yet another x86 disassembler. Oh well ...
12--
13-- The output format is very similar to what ndisasm generates. But it has
14-- been developed independently by looking at the opcode tables from the
15-- Intel and AMD manuals. The supported instruction set is quite extensive
16-- and reflects what a current generation Intel or AMD CPU implements in
17-- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3,
18-- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM)
19-- instructions.
20--
21-- Notes:
22-- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported.
23-- * No attempt at optimization has been made -- it's fast enough for my needs.
24-- * The public API may change when more architectures are added.
25------------------------------------------------------------------------------
26
27local type = type
28local sub, byte, format = string.sub, string.byte, string.format
29local match, gmatch, gsub = string.match, string.gmatch, string.gsub
30local lower, rep = string.lower, string.rep
31
32-- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
33local map_opc1_32 = {
34--0x
35[0]="addBmr","addVmr","addBrm","addVrm","addBai","addVai","push es","pop es",
36"orBmr","orVmr","orBrm","orVrm","orBai","orVai","push cs","opc2*",
37--1x
38"adcBmr","adcVmr","adcBrm","adcVrm","adcBai","adcVai","push ss","pop ss",
39"sbbBmr","sbbVmr","sbbBrm","sbbVrm","sbbBai","sbbVai","push ds","pop ds",
40--2x
41"andBmr","andVmr","andBrm","andVrm","andBai","andVai","es:seg","daa",
42"subBmr","subVmr","subBrm","subVrm","subBai","subVai","cs:seg","das",
43--3x
44"xorBmr","xorVmr","xorBrm","xorVrm","xorBai","xorVai","ss:seg","aaa",
45"cmpBmr","cmpVmr","cmpBrm","cmpVrm","cmpBai","cmpVai","ds:seg","aas",
46--4x
47"incVR","incVR","incVR","incVR","incVR","incVR","incVR","incVR",
48"decVR","decVR","decVR","decVR","decVR","decVR","decVR","decVR",
49--5x
50"pushUR","pushUR","pushUR","pushUR","pushUR","pushUR","pushUR","pushUR",
51"popUR","popUR","popUR","popUR","popUR","popUR","popUR","popUR",
52--6x
53"sz*pushaw,pusha","sz*popaw,popa","boundVrm","arplWmr",
54"fs:seg","gs:seg","o16:","a16",
55"pushUi","imulVrmi","pushBs","imulVrms",
56"insb","insVS","outsb","outsVS",
57--7x
58"joBj","jnoBj","jbBj","jnbBj","jzBj","jnzBj","jbeBj","jaBj",
59"jsBj","jnsBj","jpeBj","jpoBj","jlBj","jgeBj","jleBj","jgBj",
60--8x
61"arith!Bmi","arith!Vmi","arith!Bmi","arith!Vms",
62"testBmr","testVmr","xchgBrm","xchgVrm",
63"movBmr","movVmr","movBrm","movVrm",
64"movVmg","leaVrm","movWgm","popUm",
65--9x
66"nop*xchgVaR|pause|xchgWaR|repne nop","xchgVaR","xchgVaR","xchgVaR",
67"xchgVaR","xchgVaR","xchgVaR","xchgVaR",
68"sz*cbw,cwde,cdqe","sz*cwd,cdq,cqo","call farViw","wait",
69"sz*pushfw,pushf","sz*popfw,popf","sahf","lahf",
70--Ax
71"movBao","movVao","movBoa","movVoa",
72"movsb","movsVS","cmpsb","cmpsVS",
73"testBai","testVai","stosb","stosVS",
74"lodsb","lodsVS","scasb","scasVS",
75--Bx
76"movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi",
77"movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI",
78--Cx
79"shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi",
80"enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS",
81--Dx
82"shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb",
83"fp*0","fp*1","fp*2","fp*3","fp*4","fp*5","fp*6","fp*7",
84--Ex
85"loopneBj","loopeBj","loopBj","sz*jcxzBj,jecxzBj,jrcxzBj",
86"inBau","inVau","outBua","outVua",
87"callVj","jmpVj","jmp farViw","jmpBj","inBad","inVad","outBda","outVda",
88--Fx
89"lock:","int1","repne:rep","rep:","hlt","cmc","testb!Bm","testv!Vm",
90"clc","stc","cli","sti","cld","std","incb!Bm","incd!Vm",
91}
92assert(#map_opc1_32 == 255)
93
94-- Map for 1st opcode byte in 64 bit mode (overrides only).
95local map_opc1_64 = setmetatable({
96 [0x06]=false, [0x07]=false, [0x0e]=false,
97 [0x16]=false, [0x17]=false, [0x1e]=false, [0x1f]=false,
98 [0x27]=false, [0x2f]=false, [0x37]=false, [0x3f]=false,
99 [0x60]=false, [0x61]=false, [0x62]=false, [0x63]="movsxdVrDmt", [0x67]="a32:",
100 [0x40]="rex*", [0x41]="rex*b", [0x42]="rex*x", [0x43]="rex*xb",
101 [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb",
102 [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb",
103 [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb",
104 [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false,
105 [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false,
106}, { __index = map_opc1_32 })
107
108-- Map for 2nd opcode byte (0F xx). True CISC hell. Hey, I told you.
109-- Prefix dependent MMX/SSE opcodes: (none)|rep|o16|repne, -|F3|66|F2
110local map_opc2 = {
111--0x
112[0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret",
113"invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu",
114--1x
115"movupsXrm|movssXrm|movupdXrm|movsdXrm",
116"movupsXmr|movssXmr|movupdXmr|movsdXmr",
117"movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm",
118"movlpsXmr||movlpdXmr",
119"unpcklpsXrm||unpcklpdXrm",
120"unpckhpsXrm||unpckhpdXrm",
121"movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
122"movhpsXmr||movhpdXmr",
123"$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
124"hintnopVm","hintnopVm","hintnopVm","hintnopVm",
125--2x
126"movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
127"movapsXrm||movapdXrm",
128"movapsXmr||movapdXmr",
129"cvtpi2psXrMm|cvtsi2ssXrVm|cvtpi2pdXrMm|cvtsi2sdXrVm",
130"movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr",
131"cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm",
132"cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm",
133"ucomissXrm||ucomisdXrm",
134"comissXrm||comisdXrm",
135--3x
136"wrmsr","rdtsc","rdmsr","rdpmc","sysenter","sysexit",nil,"getsec",
137"opc3*38",nil,"opc3*3a",nil,nil,nil,nil,nil,
138--4x
139"cmovoVrm","cmovnoVrm","cmovbVrm","cmovnbVrm",
140"cmovzVrm","cmovnzVrm","cmovbeVrm","cmovaVrm",
141"cmovsVrm","cmovnsVrm","cmovpeVrm","cmovpoVrm",
142"cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm",
143--5x
144"movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm",
145"rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm",
146"andpsXrm||andpdXrm","andnpsXrm||andnpdXrm",
147"orpsXrm||orpdXrm","xorpsXrm||xorpdXrm",
148"addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm",
149"cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm",
150"cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm",
151"subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm",
152"divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm",
153--6x
154"punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm",
155"pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm",
156"punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm",
157"||punpcklqdqXrm","||punpckhqdqXrm",
158"movPrVSm","movqMrm|movdquXrm|movdqaXrm",
159--7x
160"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu",
161"pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu",
162"pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|",
163"vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
164nil,nil,
165"||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm",
166"movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr",
167--8x
168"joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj",
169"jsVj","jnsVj","jpeVj","jpoVj","jlVj","jgeVj","jleVj","jgVj",
170--9x
171"setoBm","setnoBm","setbBm","setnbBm","setzBm","setnzBm","setbeBm","setaBm",
172"setsBm","setnsBm","setpeBm","setpoBm","setlBm","setgeBm","setleBm","setgBm",
173--Ax
174"push fs","pop fs","cpuid","btVmr","shldVmru","shldVmrc",nil,nil,
175"push gs","pop gs","rsm","btsVmr","shrdVmru","shrdVmrc","fxsave!Dmp","imulVrm",
176--Bx
177"cmpxchgBmr","cmpxchgVmr","$lssVrm","btrVmr",
178"$lfsVrm","$lgsVrm","movzxVrBmt","movzxVrWmt",
179"|popcntVrm","ud2Dp","bt!Vmu","btcVmr",
180"bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt",
181--Cx
182"xaddBmr","xaddVmr",
183"cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|",
184"pinsrwPrWmu","pextrwDrPmu",
185"shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp",
186"bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR",
187--Dx
188"||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm",
189"paddqPrm","pmullwPrm",
190"|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm",
191"psubusbPrm","psubuswPrm","pminubPrm","pandPrm",
192"paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm",
193--Ex
194"pavgbPrm","psrawPrm","psradPrm","pavgwPrm",
195"pmulhuwPrm","pmulhwPrm",
196"|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr",
197"psubsbPrm","psubswPrm","pminswPrm","porPrm",
198"paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm",
199--Fx
200"|||lddquXrm","psllwPrm","pslldPrm","psllqPrm",
201"pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$",
202"psubbPrm","psubwPrm","psubdPrm","psubqPrm",
203"paddbPrm","paddwPrm","padddPrm","ud",
204}
205assert(map_opc2[255] == "ud")
206
207-- Map for three-byte opcodes. Can't wait for their next invention.
208local map_opc3 = {
209["38"] = { -- [66] 0f 38 xx
210--0x
211[0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm",
212"pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm",
213"psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm",
214nil,nil,nil,nil,
215--1x
216"||pblendvbXrma",nil,nil,nil,
217"||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm",
218nil,nil,nil,nil,
219"pabsbPrm","pabswPrm","pabsdPrm",nil,
220--2x
221"||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm",
222"||pmovsxwqXrm","||pmovsxdqXrm",nil,nil,
223"||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm",
224nil,nil,nil,nil,
225--3x
226"||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm",
227"||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm",
228"||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm",
229"||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm",
230--4x
231"||pmulddXrm","||phminposuwXrm",
232--Fx
233[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
234},
235
236["3a"] = { -- [66] 0f 3a xx
237--0x
238[0x00]=nil,nil,nil,nil,nil,nil,nil,nil,
239"||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu",
240"||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu",
241--1x
242nil,nil,nil,nil,
243"||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru",
244nil,nil,nil,nil,nil,nil,nil,nil,
245--2x
246"||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil,
247--4x
248[0x40] = "||dppsXrmu",
249[0x41] = "||dppdXrmu",
250[0x42] = "||mpsadbwXrmu",
251--6x
252[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
253[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
254},
255}
256
257-- Map for VMX/SVM opcodes 0F 01 C0-FF (sgdt group with register operands).
258local map_opcvm = {
259[0xc1]="vmcall",[0xc2]="vmlaunch",[0xc3]="vmresume",[0xc4]="vmxoff",
260[0xc8]="monitor",[0xc9]="mwait",
261[0xd8]="vmrun",[0xd9]="vmmcall",[0xda]="vmload",[0xdb]="vmsave",
262[0xdc]="stgi",[0xdd]="clgi",[0xde]="skinit",[0xdf]="invlpga",
263[0xf8]="swapgs",[0xf9]="rdtscp",
264}
265
266-- Map for FP opcodes. And you thought stack machines are simple?
267local map_opcfp = {
268-- D8-DF 00-BF: opcodes with a memory operand.
269-- D8
270[0]="faddFm","fmulFm","fcomFm","fcompFm","fsubFm","fsubrFm","fdivFm","fdivrFm",
271"fldFm",nil,"fstFm","fstpFm","fldenvVm","fldcwWm","fnstenvVm","fnstcwWm",
272-- DA
273"fiaddDm","fimulDm","ficomDm","ficompDm",
274"fisubDm","fisubrDm","fidivDm","fidivrDm",
275-- DB
276"fildDm","fisttpDm","fistDm","fistpDm",nil,"fld twordFmp",nil,"fstp twordFmp",
277-- DC
278"faddGm","fmulGm","fcomGm","fcompGm","fsubGm","fsubrGm","fdivGm","fdivrGm",
279-- DD
280"fldGm","fisttpQm","fstGm","fstpGm","frstorDmp",nil,"fnsaveDmp","fnstswWm",
281-- DE
282"fiaddWm","fimulWm","ficomWm","ficompWm",
283"fisubWm","fisubrWm","fidivWm","fidivrWm",
284-- DF
285"fildWm","fisttpWm","fistWm","fistpWm",
286"fbld twordFmp","fildQm","fbstp twordFmp","fistpQm",
287-- xx C0-FF: opcodes with a pseudo-register operand.
288-- D8
289"faddFf","fmulFf","fcomFf","fcompFf","fsubFf","fsubrFf","fdivFf","fdivrFf",
290-- D9
291"fldFf","fxchFf",{"fnop"},nil,
292{"fchs","fabs",nil,nil,"ftst","fxam"},
293{"fld1","fldl2t","fldl2e","fldpi","fldlg2","fldln2","fldz"},
294{"f2xm1","fyl2x","fptan","fpatan","fxtract","fprem1","fdecstp","fincstp"},
295{"fprem","fyl2xp1","fsqrt","fsincos","frndint","fscale","fsin","fcos"},
296-- DA
297"fcmovbFf","fcmoveFf","fcmovbeFf","fcmovuFf",nil,{nil,"fucompp"},nil,nil,
298-- DB
299"fcmovnbFf","fcmovneFf","fcmovnbeFf","fcmovnuFf",
300{nil,nil,"fnclex","fninit"},"fucomiFf","fcomiFf",nil,
301-- DC
302"fadd toFf","fmul toFf",nil,nil,
303"fsub toFf","fsubr toFf","fdivr toFf","fdiv toFf",
304-- DD
305"ffreeFf",nil,"fstFf","fstpFf","fucomFf","fucompFf",nil,nil,
306-- DE
307"faddpFf","fmulpFf",nil,{nil,"fcompp"},
308"fsubrpFf","fsubpFf","fdivrpFf","fdivpFf",
309-- DF
310nil,nil,nil,nil,{"fnstsw ax"},"fucomipFf","fcomipFf",nil,
311}
312assert(map_opcfp[126] == "fcomipFf")
313
314-- Map for opcode groups. The subkey is sp from the ModRM byte.
315local map_opcgroup = {
316 arith = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" },
317 shift = { "rol", "ror", "rcl", "rcr", "shl", "shr", "sal", "sar" },
318 testb = { "testBmi", "testBmi", "not", "neg", "mul", "imul", "div", "idiv" },
319 testv = { "testVmi", "testVmi", "not", "neg", "mul", "imul", "div", "idiv" },
320 incb = { "inc", "dec" },
321 incd = { "inc", "dec", "callDmp", "$call farDmp",
322 "jmpDmp", "$jmp farDmp", "pushUm" },
323 sldt = { "sldt", "str", "lldt", "ltr", "verr", "verw" },
324 sgdt = { "vm*$sgdt", "vm*$sidt", "$lgdt", "vm*$lidt",
325 "smsw", nil, "lmsw", "vm*$invlpg" },
326 bt = { nil, nil, nil, nil, "bt", "bts", "btr", "btc" },
327 cmpxchg = { nil, "sz*,cmpxchg8bQmp,cmpxchg16bXmp", nil, nil,
328 nil, nil, "vmptrld|vmxon|vmclear", "vmptrst" },
329 pshiftw = { nil, nil, "psrlw", nil, "psraw", nil, "psllw" },
330 pshiftd = { nil, nil, "psrld", nil, "psrad", nil, "pslld" },
331 pshiftq = { nil, nil, "psrlq", nil, nil, nil, "psllq" },
332 pshiftdq = { nil, nil, "psrlq", "psrldq", nil, nil, "psllq", "pslldq" },
333 fxsave = { "$fxsave", "$fxrstor", "$ldmxcsr", "$stmxcsr",
334 nil, "lfenceDp$", "mfenceDp$", "sfenceDp$clflush" },
335 prefetch = { "prefetch", "prefetchw" },
336 prefetcht = { "prefetchnta", "prefetcht0", "prefetcht1", "prefetcht2" },
337}
338
339------------------------------------------------------------------------------
340
341-- Maps for register names.
342local map_regs = {
343 B = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
344 "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" },
345 B64 = { "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil",
346 "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" },
347 W = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di",
348 "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w" },
349 D = { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
350 "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" },
351 Q = { "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
352 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" },
353 M = { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
354 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext!
355 X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
356 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" },
357}
358local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" }
359
360-- Maps for size names.
361local map_sz2n = {
362 B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16,
363}
364local map_sz2prefix = {
365 B = "byte", W = "word", D = "dword",
366 Q = "qword",
367 M = "qword", X = "xword",
368 F = "dword", G = "qword", -- No need for sizes/register names for these two.
369}
370
371------------------------------------------------------------------------------
372
373-- Output a nicely formatted line with an opcode and operands.
374local function putop(ctx, text, operands)
375 local code, pos, hex = ctx.code, ctx.pos, ""
376 local hmax = ctx.hexdump
377 if hmax > 0 then
378 for i=ctx.start,pos-1 do
379 hex = hex..format("%02X", byte(code, i, i))
380 end
381 if #hex > hmax then hex = sub(hex, 1, hmax)..". "
382 else hex = hex..rep(" ", hmax-#hex+2) end
383 end
384 if operands then text = text.." "..operands end
385 if ctx.o16 then text = "o16 "..text; ctx.o16 = false end
386 if ctx.a32 then text = "a32 "..text; ctx.a32 = false end
387 if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end
388 if ctx.rex then
389 local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "")..
390 (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")
391 if t ~= "" then text = "rex."..t.." "..text end
392 ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
393 ctx.rex = false
394 end
395 if ctx.seg then
396 local text2, n = gsub(text, "%[", "["..ctx.seg..":")
397 if n == 0 then text = ctx.seg.." "..text else text = text2 end
398 ctx.seg = false
399 end
400 if ctx.lock then text = "lock "..text; ctx.lock = false end
401 local imm = ctx.imm
402 if imm then
403 local sym = ctx.symtab[imm]
404 if sym then text = text.."\t->"..sym end
405 end
406 ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text))
407 ctx.mrm = false
408 ctx.start = pos
409 ctx.imm = nil
410end
411
412-- Clear all prefix flags.
413local function clearprefixes(ctx)
414 ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false
415 ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
416 ctx.rex = false; ctx.a32 = false
417end
418
419-- Fallback for incomplete opcodes at the end.
420local function incomplete(ctx)
421 ctx.pos = ctx.stop+1
422 clearprefixes(ctx)
423 return putop(ctx, "(incomplete)")
424end
425
426-- Fallback for unknown opcodes.
427local function unknown(ctx)
428 clearprefixes(ctx)
429 return putop(ctx, "(unknown)")
430end
431
432-- Return an immediate of the specified size.
433local function getimm(ctx, pos, n)
434 if pos+n-1 > ctx.stop then return incomplete(ctx) end
435 local code = ctx.code
436 if n == 1 then
437 local b1 = byte(code, pos, pos)
438 return b1
439 elseif n == 2 then
440 local b1, b2 = byte(code, pos, pos+1)
441 return b1+b2*256
442 else
443 local b1, b2, b3, b4 = byte(code, pos, pos+3)
444 local imm = b1+b2*256+b3*65536+b4*16777216
445 ctx.imm = imm
446 return imm
447 end
448end
449
450-- Process pattern string and generate the operands.
451local function putpat(ctx, name, pat)
452 local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
453 local code, pos, stop = ctx.code, ctx.pos, ctx.stop
454
455 -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz
456 for p in gmatch(pat, ".") do
457 local x = nil
458 if p == "V" or p == "U" then
459 if ctx.rexw then sz = "Q"; ctx.rexw = false
460 elseif ctx.o16 then sz = "W"; ctx.o16 = false
461 elseif p == "U" and ctx.x64 then sz = "Q"
462 else sz = "D" end
463 regs = map_regs[sz]
464 elseif p == "T" then
465 if ctx.rexw then sz = "Q"; ctx.rexw = false else sz = "D" end
466 regs = map_regs[sz]
467 elseif p == "B" then
468 sz = "B"
469 regs = ctx.rex and map_regs.B64 or map_regs.B
470 elseif match(p, "[WDQMXFG]") then
471 sz = p
472 regs = map_regs[sz]
473 elseif p == "P" then
474 sz = ctx.o16 and "X" or "M"; ctx.o16 = false
475 regs = map_regs[sz]
476 elseif p == "S" then
477 name = name..lower(sz)
478 elseif p == "s" then
479 local imm = getimm(ctx, pos, 1); if not imm then return end
480 x = imm <= 127 and format("+0x%02x", imm)
481 or format("-0x%02x", 256-imm)
482 pos = pos+1
483 elseif p == "u" then
484 local imm = getimm(ctx, pos, 1); if not imm then return end
485 x = format("0x%02x", imm)
486 pos = pos+1
487 elseif p == "w" then
488 local imm = getimm(ctx, pos, 2); if not imm then return end
489 x = format("0x%x", imm)
490 pos = pos+2
491 elseif p == "o" then -- [offset]
492 if ctx.x64 then
493 local imm1 = getimm(ctx, pos, 4); if not imm1 then return end
494 local imm2 = getimm(ctx, pos+4, 4); if not imm2 then return end
495 x = format("[0x%08x%08x]", imm2, imm1)
496 pos = pos+8
497 else
498 local imm = getimm(ctx, pos, 4); if not imm then return end
499 x = format("[0x%08x]", imm)
500 pos = pos+4
501 end
502 elseif p == "i" or p == "I" then
503 local n = map_sz2n[sz]
504 if n == 8 and ctx.x64 and p == "I" then
505 local imm1 = getimm(ctx, pos, 4); if not imm1 then return end
506 local imm2 = getimm(ctx, pos+4, 4); if not imm2 then return end
507 x = format("0x%08x%08x", imm2, imm1)
508 else
509 if n == 8 then n = 4 end
510 local imm = getimm(ctx, pos, n); if not imm then return end
511 if sz == "Q" and (imm < 0 or imm > 0x7fffffff) then
512 imm = (0xffffffff+1)-imm
513 x = format(imm > 65535 and "-0x%08x" or "-0x%x", imm)
514 else
515 x = format(imm > 65535 and "0x%08x" or "0x%x", imm)
516 end
517 end
518 pos = pos+n
519 elseif p == "j" then
520 local n = map_sz2n[sz]
521 if n == 8 then n = 4 end
522 local imm = getimm(ctx, pos, n); if not imm then return end
523 if sz == "B" and imm > 127 then imm = imm-256
524 elseif imm > 2147483647 then imm = imm-4294967296 end
525 pos = pos+n
526 imm = imm + pos + ctx.addr
527 if imm > 4294967295 and not ctx.x64 then imm = imm-4294967296 end
528 ctx.imm = imm
529 if sz == "W" then
530 x = format("word 0x%04x", imm%65536)
531 elseif ctx.x64 then
532 local lo = imm % 0x1000000
533 x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
534 else
535 x = format("0x%08x", imm)
536 end
537 elseif p == "R" then
538 local r = byte(code, pos-1, pos-1)%8
539 if ctx.rexb then r = r + 8; ctx.rexb = false end
540 x = regs[r+1]
541 elseif p == "a" then x = regs[1]
542 elseif p == "c" then x = "cl"
543 elseif p == "d" then x = "dx"
544 elseif p == "1" then x = "1"
545 else
546 if not mode then
547 mode = ctx.mrm
548 if not mode then
549 if pos > stop then return incomplete(ctx) end
550 mode = byte(code, pos, pos)
551 pos = pos+1
552 end
553 rm = mode%8; mode = (mode-rm)/8
554 sp = mode%8; mode = (mode-sp)/8
555 sdisp = ""
556 if mode < 3 then
557 if rm == 4 then
558 if pos > stop then return incomplete(ctx) end
559 sc = byte(code, pos, pos)
560 pos = pos+1
561 rm = sc%8; sc = (sc-rm)/8
562 rx = sc%8; sc = (sc-rx)/8
563 if ctx.rexx then rx = rx + 8; ctx.rexx = false end
564 if rx == 4 then rx = nil end
565 end
566 if mode > 0 or rm == 5 then
567 local dsz = mode
568 if dsz ~= 1 then dsz = 4 end
569 local disp = getimm(ctx, pos, dsz); if not disp then return end
570 if mode == 0 then rm = nil end
571 if rm or rx or (not sc and ctx.x64 and not ctx.a32) then
572 if dsz == 1 and disp > 127 then
573 sdisp = format("-0x%x", 256-disp)
574 elseif disp >= 0 and disp <= 0x7fffffff then
575 sdisp = format("+0x%x", disp)
576 else
577 sdisp = format("-0x%x", (0xffffffff+1)-disp)
578 end
579 else
580 sdisp = format(ctx.x64 and not ctx.a32 and
581 not (disp >= 0 and disp <= 0x7fffffff)
582 and "0xffffffff%08x" or "0x%08x", disp)
583 end
584 pos = pos+dsz
585 end
586 end
587 if rm and ctx.rexb then rm = rm + 8; ctx.rexb = false end
588 if ctx.rexr then sp = sp + 8; ctx.rexr = false end
589 end
590 if p == "m" then
591 if mode == 3 then x = regs[rm+1]
592 else
593 local aregs = ctx.a32 and map_regs.D or ctx.aregs
594 local srm, srx = "", ""
595 if rm then srm = aregs[rm+1]
596 elseif not sc and ctx.x64 and not ctx.a32 then srm = "rip" end
597 ctx.a32 = false
598 if rx then
599 if rm then srm = srm.."+" end
600 srx = aregs[rx+1]
601 if sc > 0 then srx = srx.."*"..(2^sc) end
602 end
603 x = format("[%s%s%s]", srm, srx, sdisp)
604 end
605 if mode < 3 and
606 (not match(pat, "[aRrgp]") or match(pat, "t")) then -- Yuck.
607 x = map_sz2prefix[sz].." "..x
608 end
609 elseif p == "r" then x = regs[sp+1]
610 elseif p == "g" then x = map_segregs[sp+1]
611 elseif p == "p" then -- Suppress prefix.
612 elseif p == "f" then x = "st"..rm
613 elseif p == "x" then
614 if sp == 0 and ctx.lock and not ctx.x64 then
615 x = "CR8"; ctx.lock = false
616 else
617 x = "CR"..sp
618 end
619 elseif p == "y" then x = "DR"..sp
620 elseif p == "z" then x = "TR"..sp
621 elseif p == "t" then
622 else
623 error("bad pattern `"..pat.."'")
624 end
625 end
626 if x then operands = operands and operands..", "..x or x end
627 end
628 ctx.pos = pos
629 return putop(ctx, name, operands)
630end
631
632-- Forward declaration.
633local map_act
634
635-- Fetch and cache MRM byte.
636local function getmrm(ctx)
637 local mrm = ctx.mrm
638 if not mrm then
639 local pos = ctx.pos
640 if pos > ctx.stop then return nil end
641 mrm = byte(ctx.code, pos, pos)
642 ctx.pos = pos+1
643 ctx.mrm = mrm
644 end
645 return mrm
646end
647
648-- Dispatch to handler depending on pattern.
649local function dispatch(ctx, opat, patgrp)
650 if not opat then return unknown(ctx) end
651 if match(opat, "%|") then -- MMX/SSE variants depending on prefix.
652 local p
653 if ctx.rep then
654 p = ctx.rep=="rep" and "%|([^%|]*)" or "%|[^%|]*%|[^%|]*%|([^%|]*)"
655 ctx.rep = false
656 elseif ctx.o16 then p = "%|[^%|]*%|([^%|]*)"; ctx.o16 = false
657 else p = "^[^%|]*" end
658 opat = match(opat, p)
659 if not opat then return unknown(ctx) end
660-- ctx.rep = false; ctx.o16 = false
661 --XXX fails for 66 f2 0f 38 f1 06 crc32 eax,WORD PTR [esi]
662 --XXX remove in branches?
663 end
664 if match(opat, "%$") then -- reg$mem variants.
665 local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end
666 opat = match(opat, mrm >= 192 and "^[^%$]*" or "%$(.*)")
667 if opat == "" then return unknown(ctx) end
668 end
669 if opat == "" then return unknown(ctx) end
670 local name, pat = match(opat, "^([a-z0-9 ]*)(.*)")
671 if pat == "" and patgrp then pat = patgrp end
672 return map_act[sub(pat, 1, 1)](ctx, name, pat)
673end
674
675-- Get a pattern from an opcode map and dispatch to handler.
676local function dispatchmap(ctx, opcmap)
677 local pos = ctx.pos
678 local opat = opcmap[byte(ctx.code, pos, pos)]
679 pos = pos + 1
680 ctx.pos = pos
681 return dispatch(ctx, opat)
682end
683
684-- Map for action codes. The key is the first char after the name.
685map_act = {
686 -- Simple opcodes without operands.
687 [""] = function(ctx, name, pat)
688 return putop(ctx, name)
689 end,
690
691 -- Operand size chars fall right through.
692 B = putpat, W = putpat, D = putpat, Q = putpat,
693 V = putpat, U = putpat, T = putpat,
694 M = putpat, X = putpat, P = putpat,
695 F = putpat, G = putpat,
696
697 -- Collect prefixes.
698 [":"] = function(ctx, name, pat)
699 ctx[pat == ":" and name or sub(pat, 2)] = name
700 if ctx.pos - ctx.start > 5 then return unknown(ctx) end -- Limit #prefixes.
701 end,
702
703 -- Chain to special handler specified by name.
704 ["*"] = function(ctx, name, pat)
705 return map_act[name](ctx, name, sub(pat, 2))
706 end,
707
708 -- Use named subtable for opcode group.
709 ["!"] = function(ctx, name, pat)
710 local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end
711 return dispatch(ctx, map_opcgroup[name][((mrm-(mrm%8))/8)%8+1], sub(pat, 2))
712 end,
713
714 -- o16,o32[,o64] variants.
715 sz = function(ctx, name, pat)
716 if ctx.o16 then ctx.o16 = false
717 else
718 pat = match(pat, ",(.*)")
719 if ctx.rexw then
720 local p = match(pat, ",(.*)")
721 if p then pat = p; ctx.rexw = false end
722 end
723 end
724 pat = match(pat, "^[^,]*")
725 return dispatch(ctx, pat)
726 end,
727
728 -- Two-byte opcode dispatch.
729 opc2 = function(ctx, name, pat)
730 return dispatchmap(ctx, map_opc2)
731 end,
732
733 -- Three-byte opcode dispatch.
734 opc3 = function(ctx, name, pat)
735 return dispatchmap(ctx, map_opc3[pat])
736 end,
737
738 -- VMX/SVM dispatch.
739 vm = function(ctx, name, pat)
740 return dispatch(ctx, map_opcvm[ctx.mrm])
741 end,
742
743 -- Floating point opcode dispatch.
744 fp = function(ctx, name, pat)
745 local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end
746 local rm = mrm%8
747 local idx = pat*8 + ((mrm-rm)/8)%8
748 if mrm >= 192 then idx = idx + 64 end
749 local opat = map_opcfp[idx]
750 if type(opat) == "table" then opat = opat[rm+1] end
751 return dispatch(ctx, opat)
752 end,
753
754 -- REX prefix.
755 rex = function(ctx, name, pat)
756 if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed.
757 for p in gmatch(pat, ".") do ctx["rex"..p] = true end
758 ctx.rex = true
759 end,
760
761 -- Special case for nop with REX prefix.
762 nop = function(ctx, name, pat)
763 return dispatch(ctx, ctx.rex and pat or "nop")
764 end,
765}
766
767------------------------------------------------------------------------------
768
769-- Disassemble a block of code.
770local function disass_block(ctx, ofs, len)
771 if not ofs then ofs = 0 end
772 local stop = len and ofs+len or #ctx.code
773 ofs = ofs + 1
774 ctx.start = ofs
775 ctx.pos = ofs
776 ctx.stop = stop
777 ctx.imm = nil
778 ctx.mrm = false
779 clearprefixes(ctx)
780 while ctx.pos <= stop do dispatchmap(ctx, ctx.map1) end
781 if ctx.pos ~= ctx.start then incomplete(ctx) end
782end
783
784-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
785local function create_(code, addr, out)
786 local ctx = {}
787 ctx.code = code
788 ctx.addr = (addr or 0) - 1
789 ctx.out = out or io.write
790 ctx.symtab = {}
791 ctx.disass = disass_block
792 ctx.hexdump = 16
793 ctx.x64 = false
794 ctx.map1 = map_opc1_32
795 ctx.aregs = map_regs.D
796 return ctx
797end
798
799local function create64_(code, addr, out)
800 local ctx = create_(code, addr, out)
801 ctx.x64 = true
802 ctx.map1 = map_opc1_64
803 ctx.aregs = map_regs.Q
804 return ctx
805end
806
807-- Simple API: disassemble code (a string) at address and output via out.
808local function disass_(code, addr, out)
809 create_(code, addr, out):disass()
810end
811
812local function disass64_(code, addr, out)
813 create64_(code, addr, out):disass()
814end
815
816
817-- Public module functions.
818module(...)
819
820create = create_
821create64 = create64_
822disass = disass_
823disass64 = disass64_
824
diff --git a/lib/dump.lua b/lib/dump.lua
new file mode 100644
index 00000000..9fde87c1
--- /dev/null
+++ b/lib/dump.lua
@@ -0,0 +1,567 @@
1----------------------------------------------------------------------------
2-- LuaJIT compiler dump module.
3--
4-- Copyright (C) 2005-2009 Mike Pall. All rights reserved.
5-- Released under the MIT/X license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7--
8-- This module can be used to debug the JIT compiler itself. It dumps the
9-- code representations and structures used in various compiler stages.
10--
11-- Example usage:
12--
13-- luajit -jdump -e "local x=0; for i=1,1e6 do x=x+i end; print(x)"
14-- luajit -jdump=im -e "for i=1,1000 do for j=1,1000 do end end" | less -R
15-- luajit -jdump=is myapp.lua | less -R
16-- luajit -jdump=-b myapp.lua
17-- luajit -jdump=+aH,myapp.html myapp.lua
18-- luajit -jdump=ixT,myapp.dump myapp.lua
19--
20-- The first argument specifies the dump mode. The second argument gives
21-- the output file name. Default output is to stdout, unless the environment
22-- variable LUAJIT_DUMPFILE is set. The file is overwritten every time the
23-- module is started.
24--
25-- Different features can be turned on or off with the dump mode. If the
26-- mode starts with a '+', the following features are added to the default
27-- set of features; a '-' removes them. Otherwise the features are replaced.
28--
29-- The following dump features are available (* marks the default):
30--
31-- * t Print a line for each started, ended or aborted trace (see also -jv).
32-- * b Dump the traced bytecode.
33-- * i Dump the IR (intermediate representation).
34-- r Augment the IR with register/stack slots.
35-- s Dump the snapshot map.
36-- * m Dump the generated machine code.
37-- x Print each taken trace exit.
38-- X Print each taken trace exit and the contents of all registers.
39--
40-- The output format can be set with the following characters:
41--
42-- T Plain text output.
43-- A ANSI-colored text output
44-- H Colorized HTML + CSS output.
45--
46-- The default output format is plain text. It's set to ANSI-colored text
47-- if the COLORTERM variable is set. Note: this is independent of any output
48-- redirection, which is actually considered a feature.
49--
50-- You probably want to use less -R to enjoy viewing ANSI-colored text from
51-- a pipe or a file. Add this to your ~/.bashrc: export LESS="-R"
52--
53------------------------------------------------------------------------------
54
55-- Cache some library functions and objects.
56local jit = require("jit")
57assert(jit.version_num == 20000, "LuaJIT core/library version mismatch")
58local jutil = require("jit.util")
59local vmdef = require("jit.vmdef")
60local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc
61local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek
62local tracemc, traceexitstub = jutil.tracemc, jutil.traceexitstub
63local tracesnap = jutil.tracesnap
64local bit = require("bit")
65local band, shl, shr = bit.band, bit.lshift, bit.rshift
66local sub, gsub, format = string.sub, string.gsub, string.format
67local byte, char, rep = string.byte, string.char, string.rep
68local type, tostring = type, tostring
69local stdout, stderr = io.stdout, io.stderr
70
71-- Load other modules on-demand.
72local bcline, discreate
73
74-- Active flag, output file handle and dump mode.
75local active, out, dumpmode
76
77------------------------------------------------------------------------------
78
79local symtab = {}
80local nexitsym = 0
81
82-- Fill symbol table with trace exit addresses.
83local function fillsymtab(nexit)
84 local t = symtab
85 if nexit > nexitsym then
86 for i=nexitsym,nexit-1 do t[traceexitstub(i)] = tostring(i) end
87 nexitsym = nexit
88 end
89 return t
90end
91
92local function dumpwrite(s)
93 out:write(s)
94end
95
96-- Disassemble machine code.
97local function dump_mcode(tr)
98 local info = traceinfo(tr)
99 if not info then return end
100 local mcode, addr, loop = tracemc(tr)
101 if not mcode then return end
102 if not discreate then
103 discreate = require("jit.dis_"..jit.arch).create
104 end
105 out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
106 local ctx = discreate(mcode, addr, dumpwrite)
107 ctx.hexdump = 0
108 ctx.symtab = fillsymtab(info.nexit)
109 if loop ~= 0 then
110 symtab[addr+loop] = "LOOP"
111 ctx:disass(0, loop)
112 out:write("->LOOP:\n")
113 ctx:disass(loop, #mcode-loop)
114 symtab[addr+loop] = nil
115 else
116 ctx:disass(0, #mcode)
117 end
118end
119
120------------------------------------------------------------------------------
121
122local irtype_text = {
123 [0] = "nil",
124 "fal",
125 "tru",
126 "lud",
127 "str",
128 "ptr",
129 "thr",
130 "pro",
131 "fun",
132 "t09",
133 "tab",
134 "udt",
135 "num",
136 "int",
137 "i8 ",
138 "u8 ",
139 "i16",
140 "u16",
141}
142
143local colortype_ansi = {
144 [0] = "%s",
145 "%s",
146 "%s",
147 "%s",
148 "\027[32m%s\027[m",
149 "%s",
150 "\027[1m%s\027[m",
151 "%s",
152 "\027[1m%s\027[m",
153 "%s",
154 "\027[31m%s\027[m",
155 "\027[36m%s\027[m",
156 "\027[34m%s\027[m",
157 "\027[35m%s\027[m",
158 "\027[35m%s\027[m",
159 "\027[35m%s\027[m",
160 "\027[35m%s\027[m",
161 "\027[35m%s\027[m",
162}
163
164local function colorize_text(s, t)
165 return s
166end
167
168local function colorize_ansi(s, t)
169 return format(colortype_ansi[t], s)
170end
171
172local irtype_ansi = setmetatable({},
173 { __index = function(tab, t)
174 local s = colorize_ansi(irtype_text[t], t); tab[t] = s; return s; end })
175
176local html_escape = { ["<"] = "&lt;", [">"] = "&gt;", ["&"] = "&amp;", }
177
178local function colorize_html(s, t)
179 s = gsub(s, "[<>&]", html_escape)
180 return format('<span class="irt_%s">%s</span>', irtype_text[t], s)
181end
182
183local irtype_html = setmetatable({},
184 { __index = function(tab, t)
185 local s = colorize_html(irtype_text[t], t); tab[t] = s; return s; end })
186
187local header_html = [[
188<style type="text/css">
189background { background: #ffffff; color: #000000; }
190pre.ljdump {
191font-size: 10pt;
192background: #f0f4ff;
193color: #000000;
194border: 1px solid #bfcfff;
195padding: 0.5em;
196margin-left: 2em;
197margin-right: 2em;
198}
199span.irt_str { color: #00a000; }
200span.irt_thr, span.irt_fun { color: #404040; font-weight: bold; }
201span.irt_tab { color: #c00000; }
202span.irt_udt { color: #00c0c0; }
203span.irt_num { color: #0000c0; }
204span.irt_int { color: #c000c0; }
205</style>
206]]
207
208local colorize, irtype
209
210-- Lookup table to convert some literals into names.
211local litname = {
212 ["SLOAD "] = { [0] = "", "I", "R", "RI", "P", "PI", "PR", "PRI", },
213 ["XLOAD "] = { [0] = "", "unaligned", },
214 ["TOINT "] = { [0] = "check", "index", "", },
215 ["FLOAD "] = vmdef.irfield,
216 ["FREF "] = vmdef.irfield,
217 ["FPMATH"] = vmdef.irfpm,
218}
219
220local function ctlsub(c)
221 if c == "\n" then return "\\n"
222 elseif c == "\r" then return "\\r"
223 elseif c == "\t" then return "\\t"
224 elseif c == "\r" then return "\\r"
225 else return format("\\%03d", byte(c))
226 end
227end
228
229local function formatk(tr, idx)
230 local k, t, slot = tracek(tr, idx)
231 local tn = type(k)
232 local s
233 if tn == "number" then
234 if k == 2^52+2^51 then
235 s = "bias"
236 else
237 s = format("%+.14g", k)
238 end
239 elseif tn == "string" then
240 s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub))
241 elseif tn == "function" then
242 local fi = funcinfo(k)
243 if fi.ffid then
244 s = vmdef.ffnames[fi.ffid]
245 else
246 s = fi.loc
247 end
248 elseif tn == "table" then
249 s = format("{%p}", k)
250 elseif tn == "userdata" then
251 if t == 11 then
252 s = format("userdata:%p", k)
253 else
254 s = format("[%p]", k)
255 if s == "[0x00000000]" then s = "NULL" end
256 end
257 else
258 s = tostring(k) -- For primitives.
259 end
260 s = colorize(format("%-4s", s), t)
261 if slot then
262 s = format("%s @%d", s, slot)
263 end
264 return s
265end
266
267local function printsnap(tr, snap)
268 for i=1,#snap do
269 local ref = snap[i]
270 if not ref then
271 out:write("---- ")
272 elseif ref < 0 then
273 out:write(formatk(tr, ref), " ")
274 else
275 local m, ot, op1, op2 = traceir(tr, ref)
276 local t = band(ot, 15)
277 local sep = " "
278 if t == 8 then
279 local oidx = 6*shr(ot, 8)
280 local op = sub(vmdef.irnames, oidx+1, oidx+6)
281 if op == "FRAME " then
282 sep = "|"
283 end
284 end
285 out:write(colorize(format("%04d", ref), t), sep)
286 end
287 end
288 out:write("]\n")
289end
290
291-- Dump snapshots (not interleaved with IR).
292local function dump_snap(tr)
293 out:write("---- TRACE ", tr, " snapshots\n")
294 for i=0,1000000000 do
295 local snap = tracesnap(tr, i)
296 if not snap then break end
297 out:write(format("#%-3d %04d [ ", i, snap[0]))
298 printsnap(tr, snap)
299 end
300end
301
302-- NYI: should really get the register map from the disassembler.
303local reg_map = {
304 [0] = "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
305 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
306}
307
308-- Return a register name or stack slot for a rid/sp location.
309local function ridsp_name(ridsp)
310 local rid = band(ridsp, 0xff)
311 if ridsp > 255 then return format("[%x]", shr(ridsp, 8)*4) end
312 if rid < 128 then return reg_map[rid] end
313 return ""
314end
315
316-- Dump IR and interleaved snapshots.
317local function dump_ir(tr, dumpsnap, dumpreg)
318 local info = traceinfo(tr)
319 if not info then return end
320 local nins = info.nins
321 out:write("---- TRACE ", tr, " IR\n")
322 local irnames = vmdef.irnames
323 local snapref = 65536
324 local snap, snapno
325 if dumpsnap then
326 snap = tracesnap(tr, 0)
327 snapref = snap[0]
328 snapno = 0
329 end
330 for ins=1,nins do
331 if ins >= snapref then
332 if dumpreg then
333 out:write(format(".... SNAP #%-3d [ ", snapno))
334 else
335 out:write(format(".... SNAP #%-3d [ ", snapno))
336 end
337 printsnap(tr, snap)
338 snapno = snapno + 1
339 snap = tracesnap(tr, snapno)
340 snapref = snap and snap[0] or 65536
341 end
342 local m, ot, op1, op2, ridsp = traceir(tr, ins)
343 local oidx, t = 6*shr(ot, 8), band(ot, 31)
344 local op = sub(irnames, oidx+1, oidx+6)
345 if op == "LOOP " then
346 if dumpreg then
347 out:write(format("%04d ------------ LOOP ------------\n", ins))
348 else
349 out:write(format("%04d ------ LOOP ------------\n", ins))
350 end
351 elseif op ~= "NOP " and (dumpreg or op ~= "RENAME") then
352 if dumpreg then
353 out:write(format("%04d %-5s ", ins, ridsp_name(ridsp)))
354 else
355 out:write(format("%04d ", ins))
356 end
357 out:write(format("%s%s %s %s ",
358 band(ot, 64) == 0 and " " or ">",
359 band(ot, 128) == 0 and " " or "+",
360 irtype[t], op))
361 local m1 = band(m, 3)
362 if m1 ~= 3 then -- op1 != IRMnone
363 if op1 < 0 then
364 out:write(formatk(tr, op1))
365 else
366 out:write(format(m1 == 0 and "%04d" or "#%-3d", op1))
367 end
368 local m2 = band(m, 3*4)
369 if m2 ~= 3*4 then -- op2 != IRMnone
370 if m2 == 1*4 then -- op2 == IRMlit
371 local litn = litname[op]
372 if litn and litn[op2] then
373 out:write(" ", litn[op2])
374 else
375 out:write(format(" #%-3d", op2))
376 end
377 elseif op2 < 0 then
378 out:write(" ", formatk(tr, op2))
379 else
380 out:write(format(" %04d", op2))
381 end
382 end
383 end
384 out:write("\n")
385 end
386 end
387 if snap then
388 if dumpreg then
389 out:write(format(".... SNAP #%-3d [ ", snapno))
390 else
391 out:write(format(".... SNAP #%-3d [ ", snapno))
392 end
393 printsnap(tr, snap)
394 end
395end
396
397------------------------------------------------------------------------------
398
399local recprefix = ""
400local recdepth = 0
401
402-- Format trace error message.
403local function fmterr(err, info)
404 if type(err) == "number" then
405 if type(info) == "function" then
406 local fi = funcinfo(info)
407 if fi.ffid then
408 info = vmdef.ffnames[fi.ffid]
409 else
410 info = fi.loc
411 end
412 end
413 err = format(vmdef.traceerr[err], info)
414 end
415 return err
416end
417
418-- Dump trace states.
419local function dump_trace(what, tr, func, pc, otr, oex)
420 if what == "stop" or (what == "abort" and dumpmode.a) then
421 if dumpmode.i then dump_ir(tr, dumpmode.s, dumpmode.r and what == "stop")
422 elseif dumpmode.s then dump_snap(tr) end
423 if dumpmode.m then dump_mcode(tr) end
424 end
425 if what == "start" then
426 if dumpmode.H then out:write('<pre class="ljdump">\n') end
427 out:write("---- TRACE ", tr, " ", what)
428 if otr then out:write(" ", otr, "/", oex) end
429 local fi = funcinfo(func, pc)
430 out:write(" ", fi.loc, "\n")
431 recprefix = ""
432 reclevel = 0
433 elseif what == "stop" or what == "abort" then
434 out:write("---- TRACE ", tr, " ", what)
435 recprefix = nil
436 if what == "abort" then
437 local fi = funcinfo(func, pc)
438 out:write(" ", fi.loc, " -- ", fmterr(otr, oex), "\n")
439 else
440 local link = traceinfo(tr).link
441 if link == tr then
442 link = "loop"
443 elseif link == 0 then
444 link = "interpreter"
445 end
446 out:write(" -> ", link, "\n")
447 end
448 if dumpmode.H then out:write("</pre>\n\n") else out:write("\n") end
449 else
450 out:write("---- TRACE ", what, "\n\n")
451 end
452 out:flush()
453end
454
455-- Dump recorded bytecode.
456local function dump_record(tr, func, pc, depth, callee)
457 if depth ~= recdepth then
458 recdepth = depth
459 recprefix = rep(" .", depth)
460 end
461 local line = bcline(func, pc, recprefix)
462 if dumpmode.H then line = gsub(line, "[<>&]", html_escape) end
463 if type(callee) == "function" then
464 local fi = funcinfo(callee)
465 if fi.ffid then
466 out:write(sub(line, 1, -2), " ; ", vmdef.ffnames[fi.ffid], "\n")
467 else
468 out:write(sub(line, 1, -2), " ; ", fi.loc, "\n")
469 end
470 else
471 out:write(line)
472 end
473 if band(funcbc(func, pc), 0xff) < 16 then -- Write JMP for cond. ORDER BC
474 out:write(bcline(func, pc+1, recprefix))
475 end
476end
477
478------------------------------------------------------------------------------
479
480-- Dump taken trace exits.
481local function dump_texit(tr, ex, ngpr, nfpr, ...)
482 out:write("---- TRACE ", tr, " exit ", ex, "\n")
483 if dumpmode.X then
484 local regs = {...}
485 for i=1,ngpr do
486 out:write(format(" %08x", regs[i]))
487 if i % 8 == 0 then out:write("\n") end
488 end
489 for i=1,nfpr do
490 out:write(format(" %+17.14g", regs[ngpr+i]))
491 if i % 4 == 0 then out:write("\n") end
492 end
493 end
494end
495
496------------------------------------------------------------------------------
497
498-- Detach dump handlers.
499local function dumpoff()
500 if active then
501 active = false
502 jit.attach(dump_texit)
503 jit.attach(dump_record)
504 jit.attach(dump_trace)
505 if out and out ~= stdout and out ~= stderr then out:close() end
506 out = nil
507 end
508end
509
510-- Open the output file and attach dump handlers.
511local function dumpon(opt, outfile)
512 if active then dumpoff() end
513
514 local colormode = os.getenv("COLORTERM") and "A" or "T"
515 if opt then
516 opt = gsub(opt, "[TAH]", function(mode) colormode = mode; return ""; end)
517 end
518
519 local m = { t=true, b=true, i=true, m=true, }
520 if opt and opt ~= "" then
521 local o = sub(opt, 1, 1)
522 if o ~= "+" and o ~= "-" then m = {} end
523 for i=1,#opt do m[sub(opt, i, i)] = (o ~= "-") end
524 end
525 dumpmode = m
526
527 if m.t or m.b or m.i or m.s or m.m then
528 jit.attach(dump_trace, "trace")
529 end
530 if m.b then
531 jit.attach(dump_record, "record")
532 if not bcline then bcline = require("jit.bc").line end
533 end
534 if m.x or m.X then
535 jit.attach(dump_texit, "texit")
536 end
537
538 if not outfile then outfile = os.getenv("LUAJIT_DUMPFILE") end
539 if outfile then
540 out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
541 else
542 out = stdout
543 end
544
545 m[colormode] = true
546 if colormode == "A" then
547 colorize = colorize_ansi
548 irtype = irtype_ansi
549 elseif colormode == "H" then
550 colorize = colorize_html
551 irtype = irtype_html
552 out:write(header_html)
553 else
554 colorize = colorize_text
555 irtype = irtype_text
556 end
557
558 active = true
559end
560
561-- Public module functions.
562module(...)
563
564on = dumpon
565off = dumpoff
566start = dumpon -- For -j command line option.
567
diff --git a/lib/v.lua b/lib/v.lua
new file mode 100644
index 00000000..39fb8ed5
--- /dev/null
+++ b/lib/v.lua
@@ -0,0 +1,156 @@
1----------------------------------------------------------------------------
2-- Verbose mode of the LuaJIT compiler.
3--
4-- Copyright (C) 2005-2009 Mike Pall. All rights reserved.
5-- Released under the MIT/X license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7--
8-- This module shows verbose information about the progress of the
9-- JIT compiler. It prints one line for each generated trace. This module
10-- is useful to see which code has been compiled or where the compiler
11-- punts and falls back to the interpreter.
12--
13-- Example usage:
14--
15-- luajit -jv -e "for i=1,1000 do for j=1,1000 do end end"
16-- luajit -jv=myapp.out myapp.lua
17--
18-- Default output is to stderr. To redirect the output to a file, pass a
19-- filename as an argument (use '-' for stdout) or set the environment
20-- variable LUAJIT_VERBOSEFILE. The file is overwritten every time the
21-- module is started.
22--
23-- The output from the first example should look like this:
24--
25-- [TRACE 1 (command line):1]
26-- [TRACE 2 (1/3) (command line):1 -> 1]
27--
28-- The first number in each line is the internal trace number. Next are
29-- the file name ('(command line)') and the line number (':1') where the
30-- trace has started. Side traces also show the parent trace number and
31-- the exit number where they are attached to in parentheses ('(1/3)').
32-- An arrow at the end shows where the trace links to ('-> 1'), unless
33-- it loops to itself.
34--
35-- In this case the inner loop gets hot and is traced first, generating
36-- a root trace. Then the last exit from the 1st trace gets hot, too,
37-- and triggers generation of the 2nd trace. The side trace follows the
38-- path along the outer loop and *around* the inner loop, back to its
39-- start, and then links to the 1st trace. Yes, this may seem unusual,
40-- if you know how traditional compilers work. Trace compilers are full
41-- of surprises like this -- have fun! :-)
42--
43-- Aborted traces are shown like this:
44--
45-- [TRACE --- foo.lua:44 -- leaving loop in root trace at foo:lua:50]
46--
47-- Don't worry -- trace aborts are quite common, even in programs which
48-- can be fully compiled. The compiler may retry several times until it
49-- finds a suitable trace.
50--
51-- Of course this doesn't work with features that are not-yet-implemented
52-- (NYI error messages). The VM simply falls back to the interpreter. This
53-- may not matter at all if the particular trace is not very high up in
54-- the CPU usage profile. Oh, and the interpreter is quite fast, too.
55--
56-- Also check out the -jdump module, which prints all the gory details.
57--
58------------------------------------------------------------------------------
59
60-- Cache some library functions and objects.
61local jit = require("jit")
62assert(jit.version_num == 20000, "LuaJIT core/library version mismatch")
63local jutil = require("jit.util")
64local vmdef = require("jit.vmdef")
65local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
66local type, format = type, string.format
67local stdout, stderr = io.stdout, io.stderr
68
69-- Active flag and output file handle.
70local active, out
71
72------------------------------------------------------------------------------
73
74local startloc, startex
75
76-- Format trace error message.
77local function fmterr(err, info)
78 if type(err) == "number" then
79 if type(info) == "function" then
80 local fi = funcinfo(info)
81 if fi.ffid then
82 info = vmdef.ffnames[fi.ffid]
83 else
84 info = fi.loc
85 end
86 end
87 err = format(vmdef.traceerr[err], info)
88 end
89 return err
90end
91
92-- Dump trace states.
93local function dump_trace(what, tr, func, pc, otr, oex)
94 if what == "start" then
95 startloc = funcinfo(func, pc).loc
96 startex = otr and "("..otr.."/"..oex..") " or ""
97 else
98 if what == "abort" then
99 local loc = funcinfo(func, pc).loc
100 if loc ~= startloc then
101 out:write(format("[TRACE --- %s%s -- %s at %s]\n",
102 startex, startloc, fmterr(otr, oex), loc))
103 else
104 out:write(format("[TRACE --- %s%s -- %s]\n",
105 startex, startloc, fmterr(otr, oex)))
106 end
107 elseif what == "stop" then
108 local link = traceinfo(tr).link
109 if link == 0 then
110 out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n",
111 tr, startex, startloc))
112 elseif link == tr then
113 out:write(format("[TRACE %3s %s%s]\n", tr, startex, startloc))
114 else
115 out:write(format("[TRACE %3s %s%s -> %d]\n",
116 tr, startex, startloc, link))
117 end
118 else
119 out:write(format("[TRACE %s]\n", what))
120 end
121 out:flush()
122 end
123end
124
125------------------------------------------------------------------------------
126
127-- Detach dump handlers.
128local function dumpoff()
129 if active then
130 active = false
131 jit.attach(dump_trace)
132 if out and out ~= stdout and out ~= stderr then out:close() end
133 out = nil
134 end
135end
136
137-- Open the output file and attach dump handlers.
138local function dumpon(outfile)
139 if active then dumpoff() end
140 if not outfile then outfile = os.getenv("LUAJIT_VERBOSEFILE") end
141 if outfile then
142 out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
143 else
144 out = stderr
145 end
146 jit.attach(dump_trace, "trace")
147 active = true
148end
149
150-- Public module functions.
151module(...)
152
153on = dumpon
154off = dumpoff
155start = dumpon -- For -j command line option.
156
diff --git a/src/.gitignore b/src/.gitignore
new file mode 100644
index 00000000..e9f998ce
--- /dev/null
+++ b/src/.gitignore
@@ -0,0 +1,8 @@
1luajit
2buildvm
3buildvm_*.h
4lj_ffdef.h
5lj_libdef.h
6lj_recdef.h
7lj_folddef.h
8lj_vm.s
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 00000000..bb1839d1
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,326 @@
1##############################################################################
2# LuaJIT Makefile. Requires GNU Make.
3#
4# Suitable for POSIX platforms (Linux, *BSD, OSX etc.).
5# Also works with MinGW and Cygwin on Windows.
6# Please check msvcbuild.bat for building with MSVC on Windows.
7#
8# Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
9##############################################################################
10
11##############################################################################
12# Compiler options: change them as needed. This mainly affects the speed of
13# the JIT compiler itself, not the speed of the JIT compiled code.
14# Turn any of the optional settings on by removing the '#' in front of them.
15#
16# Note: LuaJIT can only be compiled for x86, and not for x64 (yet)!
17# In the meantime, the x86 binary runs fine under a x64 OS.
18#
19# It's recommended to compile at least for i686. By default the assembler part
20# of the interpreter makes use of CMOV/FCOMI*/FUCOMI* instructions, anyway.
21CC= gcc -m32 -march=i686
22# Use this for GCC 4.2 or higher if you don't intend to distribute the
23# binaries to a different machine:
24#CC= gcc -m32 -march=native
25#
26# Since the assembler part does NOT maintain a frame pointer, it's pointless
27# to slow down the C part by not omitting it. Debugging and tracebacks are
28# not affected -- the assembler part has frame unwind information and GCC
29# emits it with -g (see CCDEBUG below).
30CCOPT= -O2 -fomit-frame-pointer
31# Use this if you want to generate a smaller binary (but it's slower):
32#CCOPT= -Os -fomit-frame-pointer
33# Note: it's no longer recommended to use -O3 with GCC 4.x.
34# The I-Cache bloat usually outweighs the benefits from aggressive inlining.
35#
36CCDEBUG=
37# Uncomment the next line to generate debug information:
38#CCDEBUG= -g
39#
40CCWARN= -Wall
41# Uncomment the next line to enable more warnings:
42#CCWARN+= -Wextra -Wdeclaration-after-statement -Wredundant-decls -Wshadow -Wpointer-arith
43#
44##############################################################################
45
46##############################################################################
47# Compile time definitions: change them as needed, but make sure you force
48# a full recompile with "make clean", followed by "make".
49# Note that most of these are NOT suitable for benchmarking or release mode!
50XCFLAGS=
51#
52# Disable the use of CMOV and FCOMI*/FUCOMI* instructions in the interpreter.
53# This is only necessary if you intend to run the code on REALLY ANCIENT CPUs
54# (before Pentium Pro, or on the VIA C3). This generally slows down the
55# interpreter. Don't bother if your OS wouldn't run on them, anyway.
56#XCFLAGS+= -DLUAJIT_CPU_NOCMOV
57#
58# Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter:
59#XCFLAGS+= -DLUAJIT_DISABLE_JIT
60#
61# Use the system provided memory allocator (realloc) instead of the
62# bundled memory allocator. This is slower, but sometimes helpful for
63# debugging. It's mandatory for Valgrind's memcheck tool, too.
64#XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
65#
66# This define is required to run LuaJIT under Valgrind. The Valgrind
67# header files must be installed. You should enable debug information, too.
68#XCFLAGS+= -DLUAJIT_USE_VALGRIND
69#
70# This is the client for the GDB JIT API. GDB 7.0 or higher is required
71# to make use of it. See lj_gdbjit.c for details. Enabling this causes
72# a non-negligible overhead, even when not running under GDB.
73#XCFLAGS+= -DLUAJIT_USE_GDBJIT
74#
75# Turn on assertions for the Lua/C API to debug problems with lua_* calls.
76# This is rather slow -- use only while developing C libraries/embeddings.
77#XCFLAGS+= -DLUA_USE_APICHECK
78#
79# Turn on assertions for the whole LuaJIT VM. This significantly slows down
80# everything. Use only if you suspect a problem with LuaJIT itself.
81#XCFLAGS+= -DLUA_USE_ASSERT
82#
83##############################################################################
84# You probably don't need to change anything below this line.
85##############################################################################
86
87CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(CFLAGS) $(XCFLAGS)
88LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
89
90HOST_CC= $(CC)
91HOST_RM= rm -f
92HOST_XCFLAGS=
93HOST_XLDFLAGS=
94HOST_XLIBS=
95
96TARGET_CC= $(CC)
97TARGET_STRIP= strip
98TARGET_XCFLAGS= -D_FILE_OFFSET_BITS=64
99TARGET_XLDFLAGS=
100TARGET_XSHLDFLAGS= -shared
101TARGET_XLIBS=
102TARGET_ARCH= $(patsubst %,-DLUAJIT_TARGET=LUAJIT_ARCH_%,$(TARGET))
103TARGET_DISABLE= -U_FORTIFY_SOURCE
104ifneq (,$(findstring stack-protector,$(shell $(CC) -dumpspecs)))
105 TARGET_DISABLE+= -fno-stack-protector
106endif
107
108ifneq (,$(findstring Windows,$(OS)))
109 TARGET_SYS= Windows
110else
111 TARGET_SYS:= $(shell uname -s)
112 ifneq (,$(findstring CYGWIN,$(TARGET_SYS)))
113 TARGET_SYS= Windows
114 endif
115endif
116
117ifeq (Linux,$(TARGET_SYS))
118 TARGET_XLIBS= -ldl
119 TARGET_XLDFLAGS= -Wl,-E
120else
121ifeq (Windows,$(TARGET_SYS))
122 HOST_RM= del
123 TARGET_STRIP= strip --strip-unneeded
124else
125ifeq (Darwin,$(TARGET_SYS))
126 TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup
127 TARGET_STRIP= strip -x
128 export MACOSX_DEPLOYMENT_TARGET=10.3
129else
130 TARGET_XLDFLAGS= -Wl,-E
131endif
132endif
133endif
134
135# NOTE: The LuaJIT distribution comes with a pre-generated buildvm_*.h.
136# You DO NOT NEED an installed copy of (plain) Lua 5.1 to run DynASM unless
137# you want to MODIFY the corresponding *.dasc file. You can also use LuaJIT
138# itself (bootstrapped from the pre-generated file) to run DynASM of course.
139DASM_LUA= lua
140
141Q= @
142E= @echo
143#Q=
144#E= @:
145
146##############################################################################
147
148TARGET_CFLAGS= $(CCOPTIONS) $(TARGET_DISABLE) $(TARGET_XCFLAGS)
149TARGET_LDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS)
150TARGET_SHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS)
151TARGET_LIBS= -lm $(TARGET_XLIBS)
152ifneq (,$(CCDEBUG))
153 TARGET_STRIP= @:
154endif
155
156HOST_CFLAGS= $(CCOPTIONS) $(HOST_XCFLAGS) $(TARGET_ARCH)
157HOST_LDFLAGS= $(LDOPTIONS) $(HOST_XLDFLAGS)
158HOST_LIBS= $(HOST_XLIBS)
159
160DASM_DIR= ../dynasm
161DASM= $(DASM_LUA) $(DASM_DIR)/dynasm.lua
162DASM_FLAGS=
163DASM_DISTFLAGS= -LN
164
165BUILDVM_O= buildvm.o buildvm_asm.o buildvm_peobj.o buildvm_lib.o buildvm_fold.o
166BUILDVM_T= buildvm
167
168HOST_O= $(BUILDVM_O)
169HOST_T= $(BUILDVM_T)
170
171LJVM_S= lj_vm.s
172LJVM_O= lj_vm.o
173LJVM_BOUT= $(LJVM_S)
174LJVM_MODE= asm
175
176LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
177 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o
178LJLIB_C= $(LJLIB_O:.o=.c)
179
180LJCORE_O= lj_gc.o lj_err.o lj_ctype.o lj_bc.o lj_obj.o \
181 lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o \
182 lj_state.o lj_dispatch.o lj_vmevent.o lj_api.o \
183 lj_lex.o lj_parse.o \
184 lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
185 lj_opt_dce.o lj_opt_loop.o \
186 lj_mcode.o lj_snap.o lj_record.o lj_asm.o lj_trace.o lj_gdbjit.o \
187 lj_lib.o lj_alloc.o lib_aux.o \
188 $(LJLIB_O) lib_init.o
189
190LJVMCORE_O= $(LJVM_O) $(LJCORE_O)
191
192# NYI: Need complete support for building as a shared library on POSIX.
193# This is currently *only* suitable for MinGW and Cygwin, see below.
194LUAJIT_O= luajit.o
195LUAJIT_SO= luajit.so
196LUAJIT_T= luajit
197
198LIB_VMDEF= ../lib/vmdef.lua
199
200TARGET_DEP= $(LIB_VMDEF)
201TARGET_O= $(LJVMCORE_O) $(LUAJIT_O)
202TARGET_T= $(LUAJIT_T)
203
204ALL_GEN= $(LJVM_S) lj_ffdef.h lj_libdef.h lj_recdef.h $(LIB_VMDEF) lj_folddef.h
205ALL_DYNGEN= buildvm_x86.h
206WIN_RM= *.obj *.lib *.exp *.dll *.exe *.manifest
207ALL_RM= $(LUAJIT_T) $(LUAJIT_SO) $(HOST_T) $(ALL_GEN) *.o $(WIN_RM)
208
209ifeq (Windows,$(TARGET_SYS))
210 LJVM_BOUT= $(LJVM_O)
211 LJVM_MODE= peobj
212 LIB_VMDEF= ..\lib\vmdef.lua
213 # Imported symbols are bound to a specific DLL name under Windows.
214 LUAJIT_SO= lua51.dll
215 LUAJIT_T= luajit.exe
216 BUILDVM_T= buildvm.exe
217 #
218 # You can comment out the following two lines to build a static executable.
219 # But then you won't be able to dynamically load any C modules, because
220 # they bind to lua51.dll.
221 #
222 TARGET_XCFLAGS+= -DLUA_BUILD_AS_DLL
223 TARGET_O= $(LUAJIT_SO) $(LUAJIT_O)
224endif
225
226##############################################################################
227
228default: $(TARGET_T)
229
230all: $(TARGET_T)
231
232amalg:
233 @grep "^[+|]" ljamalg.c
234 $(MAKE) all "LJCORE_O=ljamalg.o"
235
236MAKE_TARGETS= amalg
237
238##############################################################################
239
240buildvm_x86.h: buildvm_x86.dasc
241 $(E) "DYNASM $@"
242 $(Q)$(DASM) $(DASM_FLAGS) -o $@ buildvm_x86.dasc
243
244$(BUILDVM_T): $(BUILDVM_O)
245 $(E) "HOSTLINK $@"
246 $(Q)$(HOST_CC) $(HOST_LDFLAGS) -o $@ $(BUILDVM_O) $(HOST_LIBS)
247
248$(LJVM_BOUT): $(BUILDVM_T)
249 $(E) "BUILDVM $@"
250 $(Q)./$(BUILDVM_T) -m $(LJVM_MODE) -o $@
251
252lj_ffdef.h: $(BUILDVM_T) $(LJLIB_C)
253 $(E) "BUILDVM $@"
254 $(Q)./$(BUILDVM_T) -m ffdef -o $@ $(LJLIB_C)
255
256lj_libdef.h: $(BUILDVM_T) $(LJLIB_C)
257 $(E) "BUILDVM $@"
258 $(Q)./$(BUILDVM_T) -m libdef -o $@ $(LJLIB_C)
259
260lj_recdef.h: $(BUILDVM_T) $(LJLIB_C)
261 $(E) "BUILDVM $@"
262 $(Q)./$(BUILDVM_T) -m recdef -o $@ $(LJLIB_C)
263
264$(LIB_VMDEF): $(BUILDVM_T) $(LJLIB_C)
265 $(E) "BUILDVM $@"
266 $(Q)./$(BUILDVM_T) -m vmdef -o $@ $(LJLIB_C)
267
268lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c
269 $(E) "BUILDVM $@"
270 $(Q)./$(BUILDVM_T) -m folddef -o $@ lj_opt_fold.c
271
272$(LUAJIT_SO): $(LJVMCORE_O)
273 $(E) "LINK $@"
274 $(Q)$(TARGET_CC) $(TARGET_SHLDFLAGS) -o $@ $(LJVMCORE_O) $(TARGET_LIBS)
275 $(Q)$(TARGET_STRIP) $@
276
277$(LUAJIT_T): $(TARGET_O) $(TARGET_DEP)
278 $(E) "LINK $@"
279 $(Q)$(TARGET_CC) $(TARGET_LDFLAGS) -o $@ $(TARGET_O) $(TARGET_LIBS)
280 $(Q)$(TARGET_STRIP) $@
281 $(E) "OK Successfully built LuaJIT"
282
283##############################################################################
284
285%.o: %.c
286 $(E) "CC $@"
287 $(Q)$(TARGET_CC) $(TARGET_CFLAGS) -c -o $@ $<
288
289%.o: %.s
290 $(E) "ASM $@"
291 $(Q)$(TARGET_CC) $(TARGET_CFLAGS) -c -o $@ $<
292
293$(HOST_O): %.o: %.c
294 $(E) "HOSTCC $@"
295 $(Q)$(HOST_CC) $(HOST_CFLAGS) -c -o $@ $<
296
297include Makefile.dep
298
299##############################################################################
300
301clean:
302 $(HOST_RM) $(ALL_RM)
303
304cleaner: clean
305 $(HOST_RM) $(ALL_DYNGEN)
306
307distclean: clean
308 $(E) "DYNASM $@"
309 $(Q)$(DASM) $(DASM_DISTFLAGS) -o buildvm_x86.h buildvm_x86.dasc
310
311depend:
312 @test -f lj_ffdef.h || touch lj_ffdef.h
313 @test -f lj_libdef.h || touch lj_libdef.h
314 @test -f lj_recdef.h || touch lj_recdef.h
315 @test -f lj_folddef.h || touch lj_folddef.h
316 @test -f buildvm_x86.h || touch buildvm_x86.h
317 @$(HOST_CC) $(HOST_CFLAGS) -MM *.c | sed "s|$(DASM_DIR)|\$$(DASM_DIR)|g" >Makefile.dep
318 @test -s lj_ffdef.h || $(HOST_RM) lj_ffdef.h
319 @test -s lj_libdef.h || $(HOST_RM) lj_libdef.h
320 @test -s lj_recdef.h || $(HOST_RM) lj_recdef.h
321 @test -s lj_folddef.h || $(HOST_RM) lj_folddef.h
322 @test -s buildvm_x86.h || $(HOST_RM) buildvm_x86.h
323
324.PHONY: default all $(MAKE_TARGETS) clean cleaner distclean depend
325
326##############################################################################
diff --git a/src/Makefile.dep b/src/Makefile.dep
new file mode 100644
index 00000000..b1cdd93b
--- /dev/null
+++ b/src/Makefile.dep
@@ -0,0 +1,139 @@
1buildvm.o: buildvm.c lua.h luaconf.h luajit.h lj_obj.h lj_def.h lj_arch.h \
2 lj_gc.h lj_bc.h lj_ir.h lj_frame.h lj_dispatch.h lj_jit.h lj_target.h \
3 lj_target_x86.h buildvm.h $(DASM_DIR)/dasm_proto.h $(DASM_DIR)/dasm_x86.h \
4 buildvm_x86.h lj_traceerr.h
5buildvm_asm.o: buildvm_asm.c buildvm.h lj_def.h lua.h luaconf.h lj_arch.h \
6 lj_bc.h
7buildvm_fold.o: buildvm_fold.c lj_obj.h lua.h luaconf.h lj_def.h \
8 lj_arch.h lj_ir.h buildvm.h
9buildvm_lib.o: buildvm_lib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
10 lj_lib.h buildvm.h
11buildvm_peobj.o: buildvm_peobj.c buildvm.h lj_def.h lua.h luaconf.h \
12 lj_arch.h lj_bc.h
13lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
14 lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_alloc.h
15lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
16 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \
17 lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_ctype.h lj_lib.h lj_libdef.h
18lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
19 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h
20lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
21 lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h
22lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h
23lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
24 lj_arch.h lj_err.h lj_errmsg.h lj_gc.h lj_ff.h lj_ffdef.h lj_lib.h \
25 lj_libdef.h
26lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \
27 lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ir.h \
28 lj_jit.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h lj_vmevent.h lj_lib.h \
29 luajit.h lj_libdef.h
30lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
31 lj_def.h lj_arch.h lj_lib.h lj_libdef.h
32lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
33 lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h
34lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
35 lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h
36lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
37 lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_state.h \
38 lj_ff.h lj_ffdef.h lj_ctype.h lj_lib.h lj_libdef.h
39lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
40 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_lib.h \
41 lj_libdef.h
42lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h
43lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
44 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \
45 lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
46 lj_traceerr.h lj_vm.h lj_lex.h lj_parse.h
47lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
48 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \
49 lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h \
50 lj_target.h lj_target_x86.h
51lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h
52lj_ctype.o: lj_ctype.c lj_ctype.h lj_def.h lua.h luaconf.h
53lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
54 lj_err.h lj_errmsg.h lj_state.h lj_frame.h lj_bc.h lj_jit.h lj_ir.h \
55 lj_trace.h lj_dispatch.h lj_traceerr.h lj_vm.h luajit.h
56lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \
57 lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h lj_frame.h lj_bc.h \
58 lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
59lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
60 lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
61 lj_traceerr.h lj_vm.h
62lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
63 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \
64 lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
65 lj_traceerr.h lj_vm.h
66lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
67 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_frame.h lj_bc.h lj_jit.h \
68 lj_ir.h lj_dispatch.h
69lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
70 lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \
71 lj_traceerr.h
72lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
73 lj_err.h lj_errmsg.h lj_str.h lj_lex.h lj_parse.h lj_ctype.h
74lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
75 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_vm.h \
76 lj_lib.h
77lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
78 lj_gc.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h lj_dispatch.h lj_bc.h \
79 lj_traceerr.h
80lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
81 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_bc.h lj_vm.h
82lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
83lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
84 lj_ir.h lj_jit.h lj_iropt.h
85lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
86 lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \
87 lj_traceerr.h lj_vm.h lj_folddef.h
88lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
89 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h \
90 lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h
91lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
92 lj_tab.h lj_ir.h lj_jit.h lj_iropt.h
93lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
94 lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
95 lj_dispatch.h lj_traceerr.h
96lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
97 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h \
98 lj_bc.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h
99lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
100 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_state.h lj_frame.h \
101 lj_bc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
102 lj_dispatch.h lj_traceerr.h lj_record.h lj_snap.h lj_asm.h lj_vm.h \
103 lj_recdef.h
104lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
105 lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
106 lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h lj_target_x86.h
107lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
108 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \
109 lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
110 lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h
111lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
112 lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_ctype.h
113lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
114 lj_err.h lj_errmsg.h lj_tab.h
115lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
116 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_frame.h lj_bc.h lj_state.h \
117 lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \
118 lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h lj_vm.h \
119 lj_vmevent.h lj_target.h lj_target_x86.h
120lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
121 lj_gc.h lj_udata.h
122lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
123 lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \
124 lj_vm.h lj_vmevent.h
125ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
126 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \
127 lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h \
128 lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c lj_ctype.c \
129 lj_ctype.h lj_bc.c lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c \
130 lj_meta.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c luajit.h \
131 lj_vmevent.c lj_vmevent.h lj_api.c lj_parse.h lj_lex.c lj_parse.c \
132 lj_lib.c lj_lib.h lj_ir.c lj_iropt.h lj_opt_mem.c lj_opt_fold.c \
133 lj_folddef.h lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h \
134 lj_mcode.c lj_mcode.h lj_snap.c lj_target.h lj_target_x86.h lj_record.c \
135 lj_ff.h lj_ffdef.h lj_record.h lj_asm.h lj_recdef.h lj_asm.c lj_trace.c \
136 lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c lualib.h \
137 lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c \
138 lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_init.c
139luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h
diff --git a/src/buildvm.c b/src/buildvm.c
new file mode 100644
index 00000000..b3738db4
--- /dev/null
+++ b/src/buildvm.c
@@ -0,0 +1,438 @@
1/*
2** LuaJIT VM builder.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** This is a tool to build the hand-tuned assembler code required for
6** LuaJIT's bytecode interpreter. It supports a variety of output formats
7** to feed different toolchains (see usage() below).
8**
9** This tool is not particularly optimized because it's only used while
10** _building_ LuaJIT. There's no point in distributing or installing it.
11** Only the object code generated by this tool is linked into LuaJIT.
12**
13** Caveat: some memory is not free'd, error handling is lazy.
14** It's a one-shot tool -- any effort fixing this would be wasted.
15*/
16
17#include "lua.h"
18#include "luajit.h"
19
20#ifdef LUA_USE_WIN
21#include <fcntl.h>
22#include <io.h>
23#endif
24
25#include "lj_obj.h"
26#include "lj_gc.h"
27#include "lj_bc.h"
28#include "lj_ir.h"
29#include "lj_frame.h"
30#include "lj_dispatch.h"
31#include "lj_target.h"
32
33#include "buildvm.h"
34
35/* ------------------------------------------------------------------------ */
36
37/* DynASM glue definitions. */
38#define Dst ctx
39#define Dst_DECL BuildCtx *ctx
40#define Dst_REF (ctx->D)
41
42#include "../dynasm/dasm_proto.h"
43
44/* Glue macros for DynASM. */
45#define DASM_M_GROW(ctx, t, p, sz, need) \
46 do { \
47 size_t _sz = (sz), _need = (need); \
48 if (_sz < _need) { \
49 if (_sz < 16) _sz = 16; \
50 while (_sz < _need) _sz += _sz; \
51 (p) = (t *)realloc((p), _sz); \
52 if ((p) == NULL) exit(1); \
53 (sz) = _sz; \
54 } \
55 } while(0)
56
57#define DASM_M_FREE(ctx, p, sz) free(p)
58
59static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
60
61#define DASM_EXTERN(ctx, addr, idx, type) \
62 collect_reloc(ctx, addr, idx, type)
63
64/* ------------------------------------------------------------------------ */
65
66/* Avoid trouble if cross-compiling for an x86 target. Speed doesn't matter. */
67#define DASM_ALIGNED_WRITES 1
68
69/* Embed architecture-specific DynASM encoder and backend. */
70#if LJ_TARGET_X86
71#include "../dynasm/dasm_x86.h"
72#include "buildvm_x86.h"
73#else
74#error "No support for this architecture (yet)"
75#endif
76
77/* ------------------------------------------------------------------------ */
78
79void owrite(BuildCtx *ctx, const void *ptr, size_t sz)
80{
81 if (fwrite(ptr, 1, sz, ctx->fp) != sz) {
82 fprintf(stderr, "Error: cannot write to output file: %s\n",
83 strerror(errno));
84 exit(1);
85 }
86}
87
88/* ------------------------------------------------------------------------ */
89
90/* Emit code as raw bytes. Only used for DynASM debugging. */
91static void emit_raw(BuildCtx *ctx)
92{
93 owrite(ctx, ctx->code, ctx->codesz);
94}
95
96/* -- Build machine code -------------------------------------------------- */
97
98/* Collect external relocations. */
99static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type)
100{
101 if (ctx->nreloc >= BUILD_MAX_RELOC) {
102 fprintf(stderr, "Error: too many relocations, increase BUILD_MAX_RELOC.\n");
103 exit(1);
104 }
105 ctx->reloc[ctx->nreloc].ofs = (int32_t)(addr - ctx->code);
106 ctx->reloc[ctx->nreloc].sym = idx;
107 ctx->reloc[ctx->nreloc].type = type;
108 ctx->nreloc++;
109 return 0; /* Encode symbol offset of 0. */
110}
111
112/* Naive insertion sort. Performance doesn't matter here. */
113static void perm_insert(int *perm, int32_t *ofs, int i)
114{
115 perm[i] = i;
116 while (i > 0) {
117 int a = perm[i-1];
118 int b = perm[i];
119 if (ofs[a] <= ofs[b]) break;
120 perm[i] = a;
121 perm[i-1] = b;
122 i--;
123 }
124}
125
126/* Build the machine code. */
127static int build_code(BuildCtx *ctx)
128{
129 int status;
130 int i, j;
131
132 /* Initialize DynASM structures. */
133 ctx->nglob = GLOB__MAX;
134 ctx->glob = (void **)malloc(ctx->nglob*sizeof(void *));
135 memset(ctx->glob, 0, ctx->nglob*sizeof(void *));
136 ctx->nreloc = 0;
137
138 ctx->extnames = extnames;
139 ctx->globnames = globnames;
140
141 ctx->dasm_ident = DASM_IDENT;
142 ctx->dasm_arch = DASM_ARCH;
143
144 dasm_init(Dst, DASM_MAXSECTION);
145 dasm_setupglobal(Dst, ctx->glob, ctx->nglob);
146 dasm_setup(Dst, build_actionlist);
147
148 /* Call arch-specific backend to emit the code. */
149 ctx->npc = build_backend(ctx);
150
151 /* Finalize the code. */
152 (void)dasm_checkstep(Dst, DASM_SECTION_CODE);
153 if ((status = dasm_link(Dst, &ctx->codesz))) return status;
154 ctx->code = (uint8_t *)malloc(ctx->codesz);
155 if ((status = dasm_encode(Dst, (void *)ctx->code))) return status;
156
157 /* Allocate the symbol offset and permutation tables. */
158 ctx->nsym = ctx->npc + ctx->nglob;
159 ctx->perm = (int *)malloc((ctx->nsym+1)*sizeof(int *));
160 ctx->sym_ofs = (int32_t *)malloc((ctx->nsym+1)*sizeof(int32_t));
161
162 /* Collect the opcodes (PC labels). */
163 for (i = 0; i < ctx->npc; i++) {
164 int32_t n = dasm_getpclabel(Dst, i);
165 if (n < 0) return 0x22000000|i;
166 ctx->sym_ofs[i] = n;
167 perm_insert(ctx->perm, ctx->sym_ofs, i);
168 }
169
170 /* Collect the globals (named labels). */
171 for (j = 0; j < ctx->nglob; j++, i++) {
172 const char *gl = globnames[j];
173 int len = (int)strlen(gl);
174 if (!ctx->glob[j]) {
175 fprintf(stderr, "Error: undefined global %s\n", gl);
176 exit(2);
177 }
178 if (len >= 2 && gl[len-2] == '_' && gl[len-1] == 'Z')
179 ctx->sym_ofs[i] = -1; /* Skip the _Z symbols. */
180 else
181 ctx->sym_ofs[i] = (int32_t)((uint8_t *)(ctx->glob[j]) - ctx->code);
182 perm_insert(ctx->perm, ctx->sym_ofs, i);
183 }
184
185 /* Close the address range. */
186 ctx->sym_ofs[i] = (int32_t)ctx->codesz;
187 perm_insert(ctx->perm, ctx->sym_ofs, i);
188
189 dasm_free(Dst);
190
191 return 0;
192}
193
194/* -- Generate VM enums --------------------------------------------------- */
195
196const char *const bc_names[] = {
197#define BCNAME(name, ma, mb, mc, mt) #name,
198BCDEF(BCNAME)
199#undef BCNAME
200 NULL
201};
202
203const char *const ir_names[] = {
204#define IRNAME(name, m, m1, m2) #name,
205IRDEF(IRNAME)
206#undef IRNAME
207 NULL
208};
209
210const char *const irfpm_names[] = {
211#define FPMNAME(name) #name,
212IRFPMDEF(FPMNAME)
213#undef FPMNAME
214 NULL
215};
216
217const char *const irfield_names[] = {
218#define FLNAME(name, type, field) #name,
219IRFLDEF(FLNAME)
220#undef FLNAME
221 NULL
222};
223
224static const char *const trace_errors[] = {
225#define TREDEF(name, msg) msg,
226#include "lj_traceerr.h"
227 NULL
228};
229
230static const char *lower(char *buf, const char *s)
231{
232 char *p = buf;
233 while (*s) {
234 *p++ = (*s >= 'A' && *s <= 'Z') ? *s+0x20 : *s;
235 s++;
236 }
237 *p = '\0';
238 return buf;
239}
240
241/* Emit VM definitions as Lua code for debug modules. */
242static void emit_vmdef(BuildCtx *ctx)
243{
244 char buf[80];
245 int i;
246 fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
247 fprintf(ctx->fp, "module(...)\n\n");
248
249 fprintf(ctx->fp, "bcnames = \"");
250 for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
251 fprintf(ctx->fp, "\"\n\n");
252
253 fprintf(ctx->fp, "irnames = \"");
254 for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
255 fprintf(ctx->fp, "\"\n\n");
256
257 fprintf(ctx->fp, "irfpm = { [0]=");
258 for (i = 0; irfpm_names[i]; i++)
259 fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i]));
260 fprintf(ctx->fp, "}\n\n");
261
262 fprintf(ctx->fp, "irfield = { [0]=");
263 for (i = 0; irfield_names[i]; i++) {
264 char *p;
265 lower(buf, irfield_names[i]);
266 p = strchr(buf, '_');
267 if (p) *p = '.';
268 fprintf(ctx->fp, "\"%s\", ", buf);
269 }
270 fprintf(ctx->fp, "}\n\n");
271
272 fprintf(ctx->fp, "traceerr = {\n[0]=");
273 for (i = 0; trace_errors[i]; i++)
274 fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
275 fprintf(ctx->fp, "}\n\n");
276}
277
278/* -- Argument parsing ---------------------------------------------------- */
279
280/* Build mode names. */
281static const char *const modenames[] = {
282#define BUILDNAME(name) #name,
283BUILDDEF(BUILDNAME)
284#undef BUILDNAME
285 NULL
286};
287
288/* Print usage information and exit. */
289static void usage(void)
290{
291 int i;
292 fprintf(stderr, LUAJIT_VERSION " VM builder.\n");
293 fprintf(stderr, LUAJIT_COPYRIGHT ", " LUAJIT_URL "\n");
294 fprintf(stderr, "Target architecture: " LJ_ARCH_NAME "\n\n");
295 fprintf(stderr, "Usage: buildvm -m mode [-o outfile] [infiles...]\n\n");
296 fprintf(stderr, "Available modes:\n");
297 for (i = 0; i < BUILD__MAX; i++)
298 fprintf(stderr, " %s\n", modenames[i]);
299 exit(1);
300}
301
302/* Parse the output mode name. */
303static BuildMode parsemode(const char *mode)
304{
305 int i;
306 for (i = 0; modenames[i]; i++)
307 if (!strcmp(mode, modenames[i]))
308 return (BuildMode)i;
309 usage();
310 return (BuildMode)-1;
311}
312
313/* Parse arguments. */
314static void parseargs(BuildCtx *ctx, char **argv)
315{
316 const char *a;
317 int i;
318 ctx->mode = (BuildMode)-1;
319 ctx->outname = "-";
320 for (i = 1; (a = argv[i]) != NULL; i++) {
321 if (a[0] != '-')
322 break;
323 switch (a[1]) {
324 case '-':
325 if (a[2]) goto err;
326 i++;
327 goto ok;
328 case '\0':
329 goto ok;
330 case 'm':
331 i++;
332 if (a[2] || argv[i] == NULL) goto err;
333 ctx->mode = parsemode(argv[i]);
334 break;
335 case 'o':
336 i++;
337 if (a[2] || argv[i] == NULL) goto err;
338 ctx->outname = argv[i];
339 break;
340 default: err:
341 usage();
342 break;
343 }
344 }
345ok:
346 ctx->args = argv+i;
347 if (ctx->mode == (BuildMode)-1) goto err;
348}
349
350int main(int argc, char **argv)
351{
352 BuildCtx ctx_;
353 BuildCtx *ctx = &ctx_;
354 int status, binmode;
355
356 UNUSED(argc);
357 parseargs(ctx, argv);
358
359 if ((status = build_code(ctx))) {
360 fprintf(stderr,"Error: DASM error %08x\n", status);
361 return 1;
362 }
363
364 switch (ctx->mode) {
365#if LJ_TARGET_X86ORX64
366 case BUILD_peobj:
367#endif
368 case BUILD_raw:
369 binmode = 1;
370 break;
371 default:
372 binmode = 0;
373 break;
374 }
375
376 if (ctx->outname[0] == '-' && ctx->outname[1] == '\0') {
377 ctx->fp = stdout;
378#ifdef LUA_USE_WIN
379 if (binmode)
380 _setmode(_fileno(stdout), _O_BINARY); /* Yuck. */
381#endif
382 } else if (!(ctx->fp = fopen(ctx->outname, binmode ? "wb" : "w"))) {
383 fprintf(stderr, "Error: cannot open output file '%s': %s\n",
384 ctx->outname, strerror(errno));
385 exit(1);
386 }
387
388 switch (ctx->mode) {
389 case BUILD_asm:
390#if defined(__ELF__)
391 ctx->mode = BUILD_elfasm;
392#elif defined(__MACH__)
393 ctx->mode = BUILD_machasm;
394#else
395 fprintf(stderr,"Error: auto-guessing the system assembler failed\n");
396 return 1;
397#endif
398 /* fallthrough */
399 case BUILD_elfasm:
400 case BUILD_coffasm:
401 case BUILD_machasm:
402 emit_asm(ctx);
403 emit_asm_debug(ctx);
404 break;
405#if LJ_TARGET_X86ORX64
406 case BUILD_peobj:
407 emit_peobj(ctx);
408 break;
409#endif
410 case BUILD_raw:
411 emit_raw(ctx);
412 break;
413 case BUILD_vmdef:
414 emit_vmdef(ctx);
415 /* fallthrough */
416 case BUILD_ffdef:
417 case BUILD_libdef:
418 case BUILD_recdef:
419 emit_lib(ctx);
420 break;
421 case BUILD_folddef:
422 emit_fold(ctx);
423 break;
424 default:
425 break;
426 }
427
428 fflush(ctx->fp);
429 if (ferror(ctx->fp)) {
430 fprintf(stderr, "Error: cannot write to output file: %s\n",
431 strerror(errno));
432 exit(1);
433 }
434 fclose(ctx->fp);
435
436 return 0;
437}
438
diff --git a/src/buildvm.h b/src/buildvm.h
new file mode 100644
index 00000000..e55527fd
--- /dev/null
+++ b/src/buildvm.h
@@ -0,0 +1,106 @@
1/*
2** LuaJIT VM builder.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _BUILDVM_H
7#define _BUILDVM_H
8
9#include <sys/types.h>
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13#include <errno.h>
14
15#include "lj_def.h"
16#include "lj_arch.h"
17
18/* Hardcoded limits. Increase as needed. */
19#define BUILD_MAX_RELOC 100 /* Max. number of relocations. */
20#define BUILD_MAX_FOLD 4096 /* Max. number of fold rules. */
21
22/* Prefix for scanned library definitions. */
23#define LIBDEF_PREFIX "LJLIB_"
24
25/* Prefix for scanned fold definitions. */
26#define FOLDDEF_PREFIX "LJFOLD"
27
28/* Prefixes for generated labels. */
29#define LABEL_PREFIX "lj_"
30#define LABEL_PREFIX_BC LABEL_PREFIX "BC_"
31#define LABEL_PREFIX_FF LABEL_PREFIX "ff_"
32#define LABEL_PREFIX_CF LABEL_PREFIX "cf_"
33#define LABEL_PREFIX_FFH LABEL_PREFIX "ffh_"
34#define LABEL_PREFIX_LIBCF LABEL_PREFIX "lib_cf_"
35#define LABEL_PREFIX_LIBINIT LABEL_PREFIX "lib_init_"
36
37/* Extra labels. */
38#define LABEL_ASM_BEGIN LABEL_PREFIX "vm_asm_begin"
39#define LABEL_OP_OFS LABEL_PREFIX "vm_op_ofs"
40
41/* Forward declaration. */
42struct dasm_State;
43
44/* Build modes. */
45#if LJ_TARGET_X86ORX64
46#define BUILDDEFX(_) _(peobj)
47#else
48#define BUILDDEFX(_)
49#endif
50
51#define BUILDDEF(_) \
52 _(asm) _(elfasm) _(coffasm) _(machasm) BUILDDEFX(_) _(raw) \
53 _(ffdef) _(libdef) _(recdef) _(vmdef) \
54 _(folddef)
55
56typedef enum {
57#define BUILDENUM(name) BUILD_##name,
58BUILDDEF(BUILDENUM)
59#undef BUILDENUM
60 BUILD__MAX
61} BuildMode;
62
63/* Code relocation. */
64typedef struct BuildReloc {
65 int32_t ofs;
66 int sym;
67 int type;
68} BuildReloc;
69
70/* Build context structure. */
71typedef struct BuildCtx {
72 /* DynASM state pointer. Should be first member. */
73 struct dasm_State *D;
74 /* Parsed command line. */
75 BuildMode mode;
76 FILE *fp;
77 const char *outname;
78 char **args;
79 /* Code and symbols generated by DynASM. */
80 uint8_t *code;
81 size_t codesz;
82 int npc, nglob, nsym, nreloc;
83 void **glob;
84 int *perm;
85 int32_t *sym_ofs;
86 /* Strings generated by DynASM. */
87 const char *const *extnames;
88 const char *const *globnames;
89 const char *dasm_ident;
90 const char *dasm_arch;
91 /* Relocations. */
92 BuildReloc reloc[BUILD_MAX_RELOC];
93} BuildCtx;
94
95extern void owrite(BuildCtx *ctx, const void *ptr, size_t sz);
96extern void emit_asm(BuildCtx *ctx);
97extern void emit_peobj(BuildCtx *ctx);
98extern void emit_lib(BuildCtx *ctx);
99extern void emit_fold(BuildCtx *ctx);
100
101extern const char *const bc_names[];
102extern const char *const ir_names[];
103extern const char *const irfpm_names[];
104extern const char *const irfield_names[];
105
106#endif
diff --git a/src/buildvm_asm.c b/src/buildvm_asm.c
new file mode 100644
index 00000000..e6972bd5
--- /dev/null
+++ b/src/buildvm_asm.c
@@ -0,0 +1,220 @@
1/*
2** LuaJIT VM builder: Assembler source code emitter.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#include "buildvm.h"
7#include "lj_bc.h"
8
9/* ------------------------------------------------------------------------ */
10
11/* Emit bytes piecewise as assembler text. */
12static void emit_asm_bytes(BuildCtx *ctx, uint8_t *p, int n)
13{
14 int i;
15 for (i = 0; i < n; i++) {
16 if ((i & 15) == 0)
17 fprintf(ctx->fp, "\t.byte %d", p[i]);
18 else
19 fprintf(ctx->fp, ",%d", p[i]);
20 if ((i & 15) == 15) putc('\n', ctx->fp);
21 }
22 if ((n & 15) != 0) putc('\n', ctx->fp);
23}
24
25/* Emit relocation */
26static void emit_asm_reloc(BuildCtx *ctx, BuildReloc *r)
27{
28 const char *sym = ctx->extnames[r->sym];
29 switch (ctx->mode) {
30 case BUILD_elfasm:
31 if (r->type)
32 fprintf(ctx->fp, "\t.long %s-.-4\n", sym);
33 else
34 fprintf(ctx->fp, "\t.long %s\n", sym);
35 break;
36 case BUILD_coffasm:
37 fprintf(ctx->fp, "\t.def _%s; .scl 3; .type 32; .endef\n", sym);
38 if (r->type)
39 fprintf(ctx->fp, "\t.long _%s-.-4\n", sym);
40 else
41 fprintf(ctx->fp, "\t.long _%s\n", sym);
42 break;
43 default: /* BUILD_machasm for relative relocations handled below. */
44 fprintf(ctx->fp, "\t.long _%s\n", sym);
45 break;
46 }
47}
48
49static const char *const jccnames[] = {
50 "jo", "jno", "jb", "jnb", "jz", "jnz", "jbe", "ja",
51 "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg"
52};
53
54/* Emit relocation for the incredibly stupid OSX assembler. */
55static void emit_asm_reloc_mach(BuildCtx *ctx, uint8_t *cp, int n,
56 const char *sym)
57{
58 const char *opname = NULL;
59 if (--n < 0) goto err;
60 if (cp[n] == 0xe8) {
61 opname = "call";
62 } else if (cp[n] == 0xe9) {
63 opname = "jmp";
64 } else if (cp[n] >= 0x80 && cp[n] <= 0x8f && n > 0 && cp[n-1] == 0x0f) {
65 opname = jccnames[cp[n]-0x80];
66 n--;
67 } else {
68err:
69 fprintf(stderr, "Error: unsupported opcode for %s symbol relocation.\n",
70 sym);
71 exit(1);
72 }
73 emit_asm_bytes(ctx, cp, n);
74 if (!strncmp(sym, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1))
75 fprintf(ctx->fp, "\t%s _%s\n", opname, sym);
76 else
77 fprintf(ctx->fp, "\t%s _" LABEL_PREFIX "wrapper_%s\n", opname, sym);
78}
79
80/* Emit an assembler label. */
81static void emit_asm_label(BuildCtx *ctx, const char *name, int size, int isfunc)
82{
83 switch (ctx->mode) {
84 case BUILD_elfasm:
85 fprintf(ctx->fp,
86 "\n\t.globl %s\n"
87 "\t.hidden %s\n"
88 "\t.type %s, @%s\n"
89 "\t.size %s, %d\n"
90 "%s:\n",
91 name, name, name, isfunc ? "function" : "object", name, size, name);
92 break;
93 case BUILD_coffasm:
94 fprintf(ctx->fp, "\n\t.globl _%s\n", name);
95 if (isfunc)
96 fprintf(ctx->fp, "\t.def _%s; .scl 3; .type 32; .endef\n", name);
97 fprintf(ctx->fp, "_%s:\n", name);
98 break;
99 case BUILD_machasm:
100 fprintf(ctx->fp,
101 "\n\t.private_extern _%s\n"
102 "_%s:\n", name, name);
103 break;
104 default:
105 break;
106 }
107}
108
109/* Emit alignment. */
110static void emit_asm_align(BuildCtx *ctx, int bits)
111{
112 switch (ctx->mode) {
113 case BUILD_elfasm:
114 case BUILD_coffasm:
115 fprintf(ctx->fp, "\t.p2align %d\n", bits);
116 break;
117 case BUILD_machasm:
118 fprintf(ctx->fp, "\t.align %d\n", bits);
119 break;
120 default:
121 break;
122 }
123}
124
125/* ------------------------------------------------------------------------ */
126
127/* Emit assembler source code. */
128void emit_asm(BuildCtx *ctx)
129{
130 char name[80];
131 int32_t prev;
132 int i, pi, rel;
133
134 fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
135 fprintf(ctx->fp, "\t.text\n");
136 emit_asm_align(ctx, 4);
137
138 emit_asm_label(ctx, LABEL_ASM_BEGIN, 0, 1);
139 if (ctx->mode == BUILD_elfasm)
140 fprintf(ctx->fp, ".Lbegin:\n");
141
142 i = 0;
143 do {
144 pi = ctx->perm[i++];
145 prev = ctx->sym_ofs[pi];
146 } while (prev < 0); /* Skip the _Z symbols. */
147
148 for (rel = 0; i <= ctx->nsym; i++) {
149 int ni = ctx->perm[i];
150 int32_t next = ctx->sym_ofs[ni];
151 int size = (int)(next - prev);
152 int32_t stop = next;
153 if (pi >= ctx->npc) {
154 sprintf(name, LABEL_PREFIX "%s", ctx->globnames[pi-ctx->npc]);
155 emit_asm_label(ctx, name, size, 1);
156#if LJ_HASJIT
157 } else {
158#else
159 } else if (!(pi == BC_JFORI || pi == BC_JFORL || pi == BC_JITERL ||
160 pi == BC_JLOOP || pi == BC_IFORL || pi == BC_IITERL ||
161 pi == BC_ILOOP)) {
162#endif
163 sprintf(name, LABEL_PREFIX_BC "%s", bc_names[pi]);
164 emit_asm_label(ctx, name, size, 1);
165 }
166 while (rel < ctx->nreloc && ctx->reloc[rel].ofs < stop) {
167 int n = ctx->reloc[rel].ofs - prev;
168 if (ctx->mode == BUILD_machasm && ctx->reloc[rel].type != 0) {
169 emit_asm_reloc_mach(ctx, ctx->code+prev, n,
170 ctx->extnames[ctx->reloc[rel].sym]);
171 } else {
172 emit_asm_bytes(ctx, ctx->code+prev, n);
173 emit_asm_reloc(ctx, &ctx->reloc[rel]);
174 }
175 prev += n+4;
176 rel++;
177 }
178 emit_asm_bytes(ctx, ctx->code+prev, stop-prev);
179 prev = next;
180 pi = ni;
181 }
182
183 switch (ctx->mode) {
184 case BUILD_elfasm:
185 fprintf(ctx->fp, "\n\t.section .rodata\n");
186 break;
187 case BUILD_coffasm:
188 fprintf(ctx->fp, "\n\t.section .rdata,\"dr\"\n");
189 break;
190 case BUILD_machasm:
191 fprintf(ctx->fp, "\n\t.const\n");
192 break;
193 default:
194 break;
195 }
196 emit_asm_align(ctx, 5);
197
198 emit_asm_label(ctx, LABEL_OP_OFS, 2*ctx->npc, 0);
199 for (i = 0; i < ctx->npc; i++)
200 fprintf(ctx->fp, "\t.short %d\n", ctx->sym_ofs[i]);
201
202 fprintf(ctx->fp, "\n");
203 switch (ctx->mode) {
204 case BUILD_elfasm:
205 fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\",@progbits\n");
206 /* fallthrough */
207 case BUILD_coffasm:
208 fprintf(ctx->fp, "\t.ident \"%s\"\n", ctx->dasm_ident);
209 break;
210 case BUILD_machasm:
211 fprintf(ctx->fp,
212 "\t.cstring\n"
213 "\t.ascii \"%s\\0\"\n", ctx->dasm_ident);
214 break;
215 default:
216 break;
217 }
218 fprintf(ctx->fp, "\n");
219}
220
diff --git a/src/buildvm_fold.c b/src/buildvm_fold.c
new file mode 100644
index 00000000..5f065643
--- /dev/null
+++ b/src/buildvm_fold.c
@@ -0,0 +1,206 @@
1/*
2** LuaJIT VM builder: IR folding hash table generator.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#include "lj_obj.h"
7#include "lj_ir.h"
8
9#include "buildvm.h"
10
11/* Context for the folding hash table generator. */
12static int lineno;
13static int funcidx;
14static uint32_t foldkeys[BUILD_MAX_FOLD];
15static uint32_t nkeys;
16
17/* Try to fill the hash table with keys using the hash parameters. */
18static int tryhash(uint32_t *htab, uint32_t sz, uint32_t r, int dorol)
19{
20 uint32_t i;
21 if (dorol && ((r & 31) == 0 || (r>>5) == 0))
22 return 0; /* Avoid zero rotates. */
23 memset(htab, 0xff, (sz+1)*sizeof(uint32_t));
24 for (i = 0; i < nkeys; i++) {
25 uint32_t key = foldkeys[i];
26 uint32_t k = key & 0xffffff;
27 uint32_t h = (dorol ? lj_rol(lj_rol(k, r>>5) - k, r&31) :
28 (((k << (r>>5)) - k) << (r&31))) % sz;
29 if (htab[h] != 0xffffffff) { /* Collision on primary slot. */
30 if (htab[h+1] != 0xffffffff) { /* Collision on secondary slot. */
31 /* Try to move the colliding key, if possible. */
32 if (h < sz-1 && htab[h+2] == 0xffffffff) {
33 uint32_t k2 = htab[h+1] & 0xffffff;
34 uint32_t h2 = (dorol ? lj_rol(lj_rol(k2, r>>5) - k2, r&31) :
35 (((k2 << (r>>5)) - k2) << (r&31))) % sz;
36 if (h2 != h+1) return 0; /* Cannot resolve collision. */
37 htab[h+2] = htab[h+1]; /* Move colliding key to secondary slot. */
38 } else {
39 return 0; /* Collision. */
40 }
41 }
42 htab[h+1] = key;
43 } else {
44 htab[h] = key;
45 }
46 }
47 return 1; /* Success, all keys could be stored. */
48}
49
50/* Print the generated hash table. */
51static void printhash(BuildCtx *ctx, uint32_t *htab, uint32_t sz)
52{
53 uint32_t i;
54 fprintf(ctx->fp, "static const uint32_t fold_hash[%d] = {\n0x%08x",
55 sz+1, htab[0]);
56 for (i = 1; i < sz+1; i++)
57 fprintf(ctx->fp, ",\n0x%08x", htab[i]);
58 fprintf(ctx->fp, "\n};\n\n");
59}
60
61/* Exhaustive search for the shortest semi-perfect hash table. */
62static void makehash(BuildCtx *ctx)
63{
64 uint32_t htab[BUILD_MAX_FOLD*2+1];
65 uint32_t sz, r;
66 /* Search for the smallest hash table with an odd size. */
67 for (sz = (nkeys|1); sz < BUILD_MAX_FOLD*2; sz += 2) {
68 /* First try all shift hash combinations. */
69 for (r = 0; r < 32*32; r++) {
70 if (tryhash(htab, sz, r, 0)) {
71 printhash(ctx, htab, sz);
72 fprintf(ctx->fp,
73 "#define fold_hashkey(k)\t(((((k)<<%u)-(k))<<%u)%%%u)\n\n",
74 r>>5, r&31, sz);
75 return;
76 }
77 }
78 /* Then try all rotate hash combinations. */
79 for (r = 0; r < 32*32; r++) {
80 if (tryhash(htab, sz, r, 1)) {
81 printhash(ctx, htab, sz);
82 fprintf(ctx->fp,
83 "#define fold_hashkey(k)\t(lj_rol(lj_rol((k),%u)-(k),%u)%%%u)\n\n",
84 r>>5, r&31, sz);
85 return;
86 }
87 }
88 }
89 fprintf(stderr, "Error: search for perfect hash failed\n");
90 exit(1);
91}
92
93/* Parse one token of a fold rule. */
94static uint32_t nexttoken(char **pp, int allowlit, int allowany)
95{
96 char *p = *pp;
97 if (p) {
98 uint32_t i;
99 char *q = strchr(p, ' ');
100 if (q) *q++ = '\0';
101 *pp = q;
102 if (allowlit && !strncmp(p, "IRFPM_", 6)) {
103 for (i = 0; irfpm_names[i]; i++)
104 if (!strcmp(irfpm_names[i], p+6))
105 return i;
106 } else if (allowlit && !strncmp(p, "IRFL_", 5)) {
107 for (i = 0; irfield_names[i]; i++)
108 if (!strcmp(irfield_names[i], p+5))
109 return i;
110 } else if (allowany && !strcmp("any", p)) {
111 return 0xff;
112 } else {
113 for (i = 0; ir_names[i]; i++)
114 if (!strcmp(ir_names[i], p))
115 return i;
116 }
117 fprintf(stderr, "Error: bad fold definition token \"%s\" at line %d\n", p, lineno);
118 exit(1);
119 }
120 return 0;
121}
122
123/* Parse a fold rule. */
124static void foldrule(char *p)
125{
126 uint32_t op = nexttoken(&p, 0, 0);
127 uint32_t left = nexttoken(&p, 0, 1);
128 uint32_t right = nexttoken(&p, 1, 1);
129 uint32_t key = (funcidx << 24) | (op << 16) | (left << 8) | right;
130 uint32_t i;
131 if (nkeys >= BUILD_MAX_FOLD) {
132 fprintf(stderr, "Error: too many fold rules, increase BUILD_MAX_FOLD.\n");
133 exit(1);
134 }
135 /* Simple insertion sort to detect duplicates. */
136 for (i = nkeys; i > 0; i--) {
137 if ((foldkeys[i-1]&0xffffff) < (key & 0xffffff))
138 break;
139 if ((foldkeys[i-1]&0xffffff) == (key & 0xffffff)) {
140 fprintf(stderr, "Error: duplicate fold definition at line %d\n", lineno);
141 exit(1);
142 }
143 foldkeys[i] = foldkeys[i-1];
144 }
145 foldkeys[i] = key;
146 nkeys++;
147}
148
149/* Emit C source code for IR folding hash table. */
150void emit_fold(BuildCtx *ctx)
151{
152 char buf[256]; /* We don't care about analyzing lines longer than that. */
153 const char *fname = ctx->args[0];
154 FILE *fp;
155
156 if (fname == NULL) {
157 fprintf(stderr, "Error: missing input filename\n");
158 exit(1);
159 }
160
161 if (fname[0] == '-' && fname[1] == '\0') {
162 fp = stdin;
163 } else {
164 fp = fopen(fname, "r");
165 if (!fp) {
166 fprintf(stderr, "Error: cannot open input file '%s': %s\n",
167 fname, strerror(errno));
168 exit(1);
169 }
170 }
171
172 fprintf(ctx->fp, "/* This is a generated file. DO NOT EDIT! */\n\n");
173 fprintf(ctx->fp, "static const FoldFunc fold_func[] = {\n");
174
175 lineno = 0;
176 funcidx = 0;
177 nkeys = 0;
178 while (fgets(buf, sizeof(buf), fp) != NULL) {
179 lineno++;
180 /* The prefix must be at the start of a line, otherwise it's ignored. */
181 if (!strncmp(buf, FOLDDEF_PREFIX, sizeof(FOLDDEF_PREFIX)-1)) {
182 char *p = buf+sizeof(FOLDDEF_PREFIX)-1;
183 char *q = strchr(p, ')');
184 if (p[0] == '(' && q) {
185 p++;
186 *q = '\0';
187 foldrule(p);
188 } else if ((p[0] == 'F' || p[0] == 'X') && p[1] == '(' && q) {
189 p += 2;
190 *q = '\0';
191 fprintf(ctx->fp, funcidx ? ",\n %s" : " %s", p);
192 funcidx++;
193 } else {
194 buf[strlen(buf)-1] = '\0';
195 fprintf(stderr, "Error: unknown fold definition tag %s%s at line %d\n",
196 FOLDDEF_PREFIX, p, lineno);
197 exit(1);
198 }
199 }
200 }
201 fclose(fp);
202 fprintf(ctx->fp, "\n};\n\n");
203
204 makehash(ctx);
205}
206
diff --git a/src/buildvm_lib.c b/src/buildvm_lib.c
new file mode 100644
index 00000000..cc572200
--- /dev/null
+++ b/src/buildvm_lib.c
@@ -0,0 +1,365 @@
1/*
2** LuaJIT VM builder: library definition compiler.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#include "lj_obj.h"
7#include "lj_lib.h"
8
9#include "buildvm.h"
10
11/* Context for library definitions. */
12static uint8_t obuf[8192];
13static uint8_t *optr;
14static char modname[80];
15static size_t modnamelen;
16static char funcname[80];
17static int modstate, regfunc;
18static int ffid, recffid;
19
20enum {
21 REGFUNC_OK,
22 REGFUNC_NOREG,
23 REGFUNC_NOREGUV
24};
25
26static void libdef_name(char *p, int kind)
27{
28 size_t n = strlen(p);
29 if (kind != LIBINIT_STRING) {
30 if (n > modnamelen && p[modnamelen] == '_' &&
31 !strncmp(p, modname, modnamelen)) {
32 p += modnamelen+1;
33 n -= modnamelen+1;
34 }
35 }
36 if (n > LIBINIT_MAXSTR) {
37 fprintf(stderr, "Error: string too long: '%s'\n", p);
38 exit(1);
39 }
40 if (optr+1+n+2 > obuf+sizeof(obuf)) { /* +2 for caller. */
41 fprintf(stderr, "Error: output buffer overflow\n");
42 exit(1);
43 }
44 *optr++ = (uint8_t)(n | kind);
45 memcpy(optr, p, n);
46 optr += n;
47}
48
49static void libdef_endmodule(BuildCtx *ctx)
50{
51 if (modstate != 0) {
52 char line[80];
53 const uint8_t *p;
54 int n;
55 if (modstate == 1)
56 fprintf(ctx->fp, " (lua_CFunction)0");
57 fprintf(ctx->fp, "\n};\n");
58 fprintf(ctx->fp, "static const uint8_t %s%s[] = {\n",
59 LABEL_PREFIX_LIBINIT, modname);
60 line[0] = '\0';
61 for (n = 0, p = obuf; p < optr; p++) {
62 n += sprintf(line+n, "%d,", *p);
63 if (n >= 75) {
64 fprintf(ctx->fp, "%s\n", line);
65 n = 0;
66 line[0] = '\0';
67 }
68 }
69 fprintf(ctx->fp, "%s%d\n};\n#endif\n\n", line, LIBINIT_END);
70 }
71}
72
73static void libdef_module(BuildCtx *ctx, char *p, int arg)
74{
75 UNUSED(arg);
76 if (ctx->mode == BUILD_libdef) {
77 libdef_endmodule(ctx);
78 optr = obuf;
79 *optr++ = (uint8_t)ffid;
80 *optr++ = 0;
81 modstate = 1;
82 fprintf(ctx->fp, "#ifdef %sMODULE_%s\n", LIBDEF_PREFIX, p);
83 fprintf(ctx->fp, "#undef %sMODULE_%s\n", LIBDEF_PREFIX, p);
84 fprintf(ctx->fp, "static const lua_CFunction %s%s[] = {\n",
85 LABEL_PREFIX_LIBCF, p);
86 }
87 modnamelen = strlen(p);
88 if (modnamelen > sizeof(modname)-1) {
89 fprintf(stderr, "Error: module name too long: '%s'\n", p);
90 exit(1);
91 }
92 strcpy(modname, p);
93}
94
95static int find_ffofs(BuildCtx *ctx, const char *name)
96{
97 int i;
98 for (i = 0; i < ctx->nglob; i++) {
99 const char *gl = ctx->globnames[i];
100 if (gl[0] == 'f' && gl[1] == 'f' && gl[2] == '_' && !strcmp(gl+3, name)) {
101 return (int)((uint8_t *)ctx->glob[i] - ctx->code);
102 }
103 }
104 fprintf(stderr, "Error: undefined fast function %s%s\n",
105 LABEL_PREFIX_FF, name);
106 exit(1);
107}
108
109static void libdef_func(BuildCtx *ctx, char *p, int arg)
110{
111 if (ctx->mode == BUILD_libdef) {
112 int ofs = arg != LIBINIT_CF ? find_ffofs(ctx, p) : 0;
113 if (modstate == 0) {
114 fprintf(stderr, "Error: no module for function definition %s\n", p);
115 exit(1);
116 }
117 if (regfunc == REGFUNC_NOREG) {
118 if (optr+1 > obuf+sizeof(obuf)) {
119 fprintf(stderr, "Error: output buffer overflow\n");
120 exit(1);
121 }
122 *optr++ = LIBINIT_FFID;
123 } else {
124 if (arg != LIBINIT_ASM_) {
125 if (modstate != 1) fprintf(ctx->fp, ",\n");
126 modstate = 2;
127 fprintf(ctx->fp, " %s%s", arg ? LABEL_PREFIX_FFH : LABEL_PREFIX_CF, p);
128 }
129 if (regfunc != REGFUNC_NOREGUV) obuf[1]++; /* Bump hash table size. */
130 libdef_name(regfunc == REGFUNC_NOREGUV ? "" : p, arg);
131 if (arg) {
132 *optr++ = (uint8_t)ofs;
133 *optr++ = (uint8_t)(ofs >> 8);
134 }
135 }
136 } else if (ctx->mode == BUILD_ffdef) {
137 fprintf(ctx->fp, "FFDEF(%s)\n", p);
138 } else if (ctx->mode == BUILD_recdef) {
139 if (strlen(p) > sizeof(funcname)-1) {
140 fprintf(stderr, "Error: function name too long: '%s'\n", p);
141 exit(1);
142 }
143 strcpy(funcname, p);
144 } else if (ctx->mode == BUILD_vmdef) {
145 int i;
146 for (i = 1; p[i] && modname[i-1]; i++)
147 if (p[i] == '_') p[i] = '.';
148 fprintf(ctx->fp, "\"%s\",\n", p);
149 }
150 ffid++;
151 regfunc = REGFUNC_OK;
152}
153
154static uint32_t find_rec(char *name)
155{
156 char *p = (char *)obuf;
157 uint32_t n;
158 for (n = 2; *p; n++) {
159 if (strcmp(p, name) == 0)
160 return n;
161 p += strlen(p)+1;
162 }
163 if (p+strlen(name)+1 >= (char *)obuf+sizeof(obuf)) {
164 fprintf(stderr, "Error: output buffer overflow\n");
165 exit(1);
166 }
167 strcpy(p, name);
168 return n;
169}
170
171static void libdef_rec(BuildCtx *ctx, char *p, int arg)
172{
173 UNUSED(arg);
174 if (ctx->mode == BUILD_recdef) {
175 char *q;
176 uint32_t n;
177 for (; recffid+1 < ffid; recffid++)
178 fprintf(ctx->fp, ",\n0");
179 recffid = ffid;
180 if (*p == '.') p = funcname;
181 q = strchr(p, ' ');
182 if (q) *q++ = '\0';
183 n = find_rec(p);
184 if (q)
185 fprintf(ctx->fp, ",\n0x%02x00+(%s)", n, q);
186 else
187 fprintf(ctx->fp, ",\n0x%02x00", n);
188 }
189}
190
191static void memcpy_endian(void *dst, void *src, size_t n)
192{
193 union { uint8_t b; uint32_t u; } host_endian;
194 host_endian.u = 1;
195 if (host_endian.b == LJ_ENDIAN_SELECT(1, 0)) {
196 memcpy(dst, src, n);
197 } else {
198 size_t i;
199 for (i = 0; i < n; i++)
200 ((uint8_t *)dst)[i] = ((uint8_t *)src)[n-i];
201 }
202}
203
204static void libdef_push(BuildCtx *ctx, char *p, int arg)
205{
206 UNUSED(arg);
207 if (ctx->mode == BUILD_libdef) {
208 int len = (int)strlen(p);
209 if (*p == '"') {
210 if (len > 1 && p[len-1] == '"') {
211 p[len-1] = '\0';
212 libdef_name(p+1, LIBINIT_STRING);
213 return;
214 }
215 } else if (*p >= '0' && *p <= '9') {
216 char *ep;
217 double d = strtod(p, &ep);
218 if (*ep == '\0') {
219 if (optr+1+sizeof(double) > obuf+sizeof(obuf)) {
220 fprintf(stderr, "Error: output buffer overflow\n");
221 exit(1);
222 }
223 *optr++ = LIBINIT_NUMBER;
224 memcpy_endian(optr, &d, sizeof(double));
225 optr += sizeof(double);
226 return;
227 }
228 } else if (!strcmp(p, "lastcl")) {
229 if (optr+1 > obuf+sizeof(obuf)) {
230 fprintf(stderr, "Error: output buffer overflow\n");
231 exit(1);
232 }
233 *optr++ = LIBINIT_LASTCL;
234 return;
235 } else if (len > 4 && !strncmp(p, "top-", 4)) {
236 if (optr+2 > obuf+sizeof(obuf)) {
237 fprintf(stderr, "Error: output buffer overflow\n");
238 exit(1);
239 }
240 *optr++ = LIBINIT_COPY;
241 *optr++ = (uint8_t)atoi(p+4);
242 return;
243 }
244 fprintf(stderr, "Error: bad value for %sPUSH(%s)\n", LIBDEF_PREFIX, p);
245 exit(1);
246 }
247}
248
249static void libdef_set(BuildCtx *ctx, char *p, int arg)
250{
251 UNUSED(arg);
252 if (ctx->mode == BUILD_libdef) {
253 if (p[0] == '!' && p[1] == '\0') p[0] = '\0'; /* Set env. */
254 libdef_name(p, LIBINIT_STRING);
255 *optr++ = LIBINIT_SET;
256 obuf[1]++; /* Bump hash table size. */
257 }
258}
259
260static void libdef_regfunc(BuildCtx *ctx, char *p, int arg)
261{
262 UNUSED(ctx); UNUSED(p);
263 regfunc = arg;
264}
265
266typedef void (*LibDefFunc)(BuildCtx *ctx, char *p, int arg);
267
268typedef struct LibDefHandler {
269 const char *suffix;
270 const char *stop;
271 const LibDefFunc func;
272 const int arg;
273} LibDefHandler;
274
275static const LibDefHandler libdef_handlers[] = {
276 { "MODULE_", " \t\r\n", libdef_module, 0 },
277 { "CF(", ")", libdef_func, LIBINIT_CF },
278 { "ASM(", ")", libdef_func, LIBINIT_ASM },
279 { "ASM_(", ")", libdef_func, LIBINIT_ASM_ },
280 { "REC(", ")", libdef_rec, 0 },
281 { "PUSH(", ")", libdef_push, 0 },
282 { "SET(", ")", libdef_set, 0 },
283 { "NOREGUV", NULL, libdef_regfunc, REGFUNC_NOREGUV },
284 { "NOREG", NULL, libdef_regfunc, REGFUNC_NOREG },
285 { NULL, NULL, (LibDefFunc)0, 0 }
286};
287
288/* Emit C source code for library function definitions. */
289void emit_lib(BuildCtx *ctx)
290{
291 const char *fname;
292
293 if (ctx->mode == BUILD_ffdef || ctx->mode == BUILD_libdef ||
294 ctx->mode == BUILD_recdef)
295 fprintf(ctx->fp, "/* This is a generated file. DO NOT EDIT! */\n\n");
296 else if (ctx->mode == BUILD_vmdef)
297 fprintf(ctx->fp, "ffnames = {\n[0]=\"Lua\",\n\"C\",\n");
298 if (ctx->mode == BUILD_recdef)
299 fprintf(ctx->fp, "static const uint16_t recff_idmap[] = {\n0,\n0x0100");
300 recffid = ffid = FF_C+1;
301
302 while ((fname = *ctx->args++)) {
303 char buf[256]; /* We don't care about analyzing lines longer than that. */
304 FILE *fp;
305 if (fname[0] == '-' && fname[1] == '\0') {
306 fp = stdin;
307 } else {
308 fp = fopen(fname, "r");
309 if (!fp) {
310 fprintf(stderr, "Error: cannot open input file '%s': %s\n",
311 fname, strerror(errno));
312 exit(1);
313 }
314 }
315 modstate = 0;
316 regfunc = REGFUNC_OK;
317 while (fgets(buf, sizeof(buf), fp) != NULL) {
318 char *p;
319 for (p = buf; (p = strstr(p, LIBDEF_PREFIX)) != NULL; ) {
320 const LibDefHandler *ldh;
321 p += sizeof(LIBDEF_PREFIX)-1;
322 for (ldh = libdef_handlers; ldh->suffix != NULL; ldh++) {
323 size_t n, len = strlen(ldh->suffix);
324 if (!strncmp(p, ldh->suffix, len)) {
325 p += len;
326 n = ldh->stop ? strcspn(p, ldh->stop) : 0;
327 if (!p[n]) break;
328 p[n] = '\0';
329 ldh->func(ctx, p, ldh->arg);
330 p += n+1;
331 break;
332 }
333 }
334 if (ldh->suffix == NULL) {
335 buf[strlen(buf)-1] = '\0';
336 fprintf(stderr, "Error: unknown library definition tag %s%s\n",
337 LIBDEF_PREFIX, p);
338 exit(1);
339 }
340 }
341 }
342 fclose(fp);
343 if (ctx->mode == BUILD_libdef) {
344 libdef_endmodule(ctx);
345 }
346 }
347
348 if (ctx->mode == BUILD_ffdef) {
349 fprintf(ctx->fp, "\n#undef FFDEF\n\n");
350 } else if (ctx->mode == BUILD_vmdef) {
351 fprintf(ctx->fp, "}\n\n");
352 } else if (ctx->mode == BUILD_recdef) {
353 char *p = (char *)obuf;
354 fprintf(ctx->fp, "\n};\n\n");
355 fprintf(ctx->fp, "static const RecordFunc recff_func[] = {\n"
356 "recff_nyi,\n"
357 "recff_c");
358 while (*p) {
359 fprintf(ctx->fp, ",\nrecff_%s", p);
360 p += strlen(p)+1;
361 }
362 fprintf(ctx->fp, "\n};\n\n");
363 }
364}
365
diff --git a/src/buildvm_peobj.c b/src/buildvm_peobj.c
new file mode 100644
index 00000000..9acf6b76
--- /dev/null
+++ b/src/buildvm_peobj.c
@@ -0,0 +1,303 @@
1/*
2** LuaJIT VM builder: PE object emitter.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Only used for building on Windows, since we cannot assume the presence
6** of a suitable assembler. The host and target byte order must match.
7*/
8
9#include "buildvm.h"
10#include "lj_bc.h"
11
12#if LJ_TARGET_X86ORX64
13
14/* Context for PE object emitter. */
15static char *strtab;
16static size_t strtabofs;
17
18/* -- PE object definitions ----------------------------------------------- */
19
20/* PE header. */
21typedef struct PEheader {
22 uint16_t arch;
23 uint16_t nsects;
24 uint32_t time;
25 uint32_t symtabofs;
26 uint32_t nsyms;
27 uint16_t opthdrsz;
28 uint16_t flags;
29} PEheader;
30
31/* PE section. */
32typedef struct PEsection {
33 char name[8];
34 uint32_t vsize;
35 uint32_t vaddr;
36 uint32_t size;
37 uint32_t ofs;
38 uint32_t relocofs;
39 uint32_t lineofs;
40 uint16_t nreloc;
41 uint16_t nline;
42 uint32_t flags;
43} PEsection;
44
45/* PE relocation. */
46typedef struct PEreloc {
47 uint32_t vaddr;
48 uint32_t symidx;
49 uint16_t type;
50} PEreloc;
51
52/* Cannot use sizeof, because it pads up to the max. alignment. */
53#define PEOBJ_RELOC_SIZE (4+4+2)
54
55/* PE symbol table entry. */
56typedef struct PEsym {
57 union {
58 char name[8];
59 uint32_t nameref[2];
60 } n;
61 uint32_t value;
62 int16_t sect;
63 uint16_t type;
64 uint8_t scl;
65 uint8_t naux;
66} PEsym;
67
68/* PE symbol table auxiliary entry for a section. */
69typedef struct PEsymaux {
70 uint32_t size;
71 uint16_t nreloc;
72 uint16_t nline;
73 uint32_t cksum;
74 uint16_t assoc;
75 uint8_t comdatsel;
76 uint8_t unused[3];
77} PEsymaux;
78
79/* Cannot use sizeof, because it pads up to the max. alignment. */
80#define PEOBJ_SYM_SIZE (8+4+2+2+1+1)
81
82/* PE object CPU specific defines. */
83#if LJ_TARGET_X86
84#define PEOBJ_ARCH_TARGET 0x014c
85#define PEOBJ_RELOC_REL32 0x14 /* MS: REL32, GNU: DISP32. */
86#define PEOBJ_RELOC_DIR32 0x06
87#define PEOBJ_SYM_PREFIX "_"
88#elif LJ_TARGET_X64
89#define PEOBJ_ARCH_TARGET 0x8664
90#define PEOBJ_RELOC_REL32 0x04 /* MS: REL32, GNU: DISP32. */
91#define PEOBJ_RELOC_DIR32 0x02
92#define PEOBJ_SYM_PREFIX ""
93#endif
94
95/* Section numbers (0-based). */
96enum {
97 PEOBJ_SECT_ABS = -2,
98 PEOBJ_SECT_UNDEF = -1,
99 PEOBJ_SECT_TEXT,
100 /* TODO: add .pdata/.xdata for x64. */
101 PEOBJ_SECT_RDATA,
102 PEOBJ_SECT_RDATA_Z,
103 PEOBJ_NSECTIONS
104};
105
106/* Symbol types. */
107#define PEOBJ_TYPE_NULL 0
108#define PEOBJ_TYPE_FUNC 0x20
109
110/* Symbol storage class. */
111#define PEOBJ_SCL_EXTERN 2
112#define PEOBJ_SCL_STATIC 3
113
114/* -- PE object emitter --------------------------------------------------- */
115
116/* Emit PE object symbol. */
117static void emit_peobj_sym(BuildCtx *ctx, const char *name, uint32_t value,
118 int sect, int type, int scl)
119{
120 PEsym sym;
121 size_t len = strlen(name);
122 if (!strtab) { /* Pass 1: only calculate string table length. */
123 if (len > 8) strtabofs += len+1;
124 return;
125 }
126 if (len <= 8) {
127 memcpy(sym.n.name, name, len);
128 memset(sym.n.name+len, 0, 8-len);
129 } else {
130 sym.n.nameref[0] = 0;
131 sym.n.nameref[1] = strtabofs;
132 memcpy(strtab + strtabofs, name, len);
133 strtab[strtabofs+len] = 0;
134 strtabofs += len+1;
135 }
136 sym.value = value;
137 sym.sect = (int16_t)(sect+1); /* 1-based section number. */
138 sym.type = (uint16_t)type;
139 sym.scl = (uint8_t)scl;
140 sym.naux = 0;
141 owrite(ctx, &sym, PEOBJ_SYM_SIZE);
142}
143
144/* Emit PE object section symbol. */
145static void emit_peobj_sym_sect(BuildCtx *ctx, PEsection *pesect, int sect)
146{
147 PEsym sym;
148 PEsymaux aux;
149 if (!strtab) return; /* Pass 1: no output. */
150 memcpy(sym.n.name, pesect[sect].name, 8);
151 sym.value = 0;
152 sym.sect = (int16_t)(sect+1); /* 1-based section number. */
153 sym.type = PEOBJ_TYPE_NULL;
154 sym.scl = PEOBJ_SCL_STATIC;
155 sym.naux = 1;
156 owrite(ctx, &sym, PEOBJ_SYM_SIZE);
157 memset(&aux, 0, sizeof(PEsymaux));
158 aux.size = pesect[sect].size;
159 aux.nreloc = pesect[sect].nreloc;
160 owrite(ctx, &aux, PEOBJ_SYM_SIZE);
161}
162
163#define emit_peobj_sym_func(ctx, name, ofs) \
164 emit_peobj_sym(ctx, name, (uint32_t)(ofs), \
165 PEOBJ_SECT_TEXT, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN)
166#define emit_peobj_sym_rdata(ctx, name, ofs) \
167 emit_peobj_sym(ctx, name, (uint32_t)(ofs), \
168 PEOBJ_SECT_RDATA, PEOBJ_TYPE_NULL, PEOBJ_SCL_EXTERN)
169
170/* Emit Windows PE object file. */
171void emit_peobj(BuildCtx *ctx)
172{
173 PEheader pehdr;
174 PEsection pesect[PEOBJ_NSECTIONS];
175 int nzsym, relocsyms;
176 uint32_t sofs;
177 int i;
178 union { uint8_t b; uint32_t u; } host_endian;
179
180 host_endian.u = 1;
181 if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) {
182 fprintf(stderr, "Error: different byte order for host and target\n");
183 exit(1);
184 }
185
186 sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection);
187
188 /* Fill in PE sections. */
189 memset(&pesect, 0, PEOBJ_NSECTIONS*sizeof(PEsection));
190 memcpy(pesect[PEOBJ_SECT_TEXT].name, ".text", sizeof(".text")-1);
191 pesect[PEOBJ_SECT_TEXT].ofs = sofs;
192 sofs += (pesect[PEOBJ_SECT_TEXT].size = (uint32_t)ctx->codesz);
193 pesect[PEOBJ_SECT_TEXT].relocofs = sofs;
194 sofs += (pesect[PEOBJ_SECT_TEXT].nreloc = (uint16_t)ctx->nreloc) * PEOBJ_RELOC_SIZE;
195 /* Flags: 60 = read+execute, 50 = align16, 20 = code. */
196 pesect[PEOBJ_SECT_TEXT].flags = 0x60500020;
197
198 memcpy(pesect[PEOBJ_SECT_RDATA].name, ".rdata", sizeof(".rdata")-1);
199 pesect[PEOBJ_SECT_RDATA].ofs = sofs;
200 sofs += (pesect[PEOBJ_SECT_RDATA].size = ctx->npc*sizeof(uint16_t));
201 /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
202 pesect[PEOBJ_SECT_RDATA].flags = 0x40300040;
203
204 memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1);
205 pesect[PEOBJ_SECT_RDATA_Z].ofs = sofs;
206 sofs += (pesect[PEOBJ_SECT_RDATA_Z].size = (uint32_t)strlen(ctx->dasm_ident)+1);
207 /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
208 pesect[PEOBJ_SECT_RDATA_Z].flags = 0x40300040;
209
210 /* Fill in PE header. */
211 pehdr.arch = PEOBJ_ARCH_TARGET;
212 pehdr.nsects = PEOBJ_NSECTIONS;
213 pehdr.time = 0; /* Timestamp is optional. */
214 pehdr.symtabofs = sofs;
215 pehdr.opthdrsz = 0;
216 pehdr.flags = 0;
217
218 /* Compute the size of the symbol table:
219 ** @feat.00 + nsections*2
220 ** + asm_start + (nsyms-nzsym) + op_ofs
221 ** + relocsyms
222 */
223 /* Skip _Z syms. */
224 for (nzsym = 0; ctx->sym_ofs[ctx->perm[nzsym]] < 0; nzsym++) ;
225 for (relocsyms = 0; ctx->extnames[relocsyms]; relocsyms++) ;
226 pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+(ctx->nsym-nzsym)+1 + relocsyms;
227
228 /* Write PE object header and all sections. */
229 owrite(ctx, &pehdr, sizeof(PEheader));
230 owrite(ctx, &pesect, sizeof(PEsection)*PEOBJ_NSECTIONS);
231
232 /* Write .text section. */
233 owrite(ctx, ctx->code, ctx->codesz);
234 for (i = 0; i < ctx->nreloc; i++) {
235 PEreloc reloc;
236 reloc.vaddr = (uint32_t)ctx->reloc[i].ofs;
237 reloc.symidx = 1+2+ctx->reloc[i].sym; /* Reloc syms are after .text sym. */
238 reloc.type = ctx->reloc[i].type ? PEOBJ_RELOC_REL32 : PEOBJ_RELOC_DIR32;
239 owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
240 }
241
242 /* Write .rdata section. */
243 for (i = 0; i < ctx->npc; i++) {
244 uint16_t pcofs = (uint16_t)ctx->sym_ofs[i];
245 owrite(ctx, &pcofs, 2);
246 }
247
248 /* Write .rdata$Z section. */
249 owrite(ctx, ctx->dasm_ident, strlen(ctx->dasm_ident)+1);
250
251 /* Write symbol table. */
252 strtab = NULL; /* 1st pass: collect string sizes. */
253 for (;;) {
254 char name[80];
255
256 strtabofs = 4;
257 /* Mark as SafeSEH compliant. */
258 emit_peobj_sym(ctx, "@feat.00", 1,
259 PEOBJ_SECT_ABS, PEOBJ_TYPE_NULL, PEOBJ_SCL_STATIC);
260
261 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_TEXT);
262 for (i = 0; ctx->extnames[i]; i++) {
263 sprintf(name, PEOBJ_SYM_PREFIX "%s", ctx->extnames[i]);
264 emit_peobj_sym(ctx, name, 0,
265 PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
266 }
267 emit_peobj_sym_func(ctx, PEOBJ_SYM_PREFIX LABEL_ASM_BEGIN, 0);
268 for (i = nzsym; i < ctx->nsym; i++) {
269 int pi = ctx->perm[i];
270 if (pi >= ctx->npc) {
271 sprintf(name, PEOBJ_SYM_PREFIX LABEL_PREFIX "%s",
272 ctx->globnames[pi-ctx->npc]);
273 emit_peobj_sym_func(ctx, name, ctx->sym_ofs[pi]);
274#if LJ_HASJIT
275 } else {
276#else
277 } else if (!(pi == BC_JFORI || pi == BC_JFORL || pi == BC_JITERL ||
278 pi == BC_JLOOP || pi == BC_IFORL || pi == BC_IITERL ||
279 pi == BC_ILOOP)) {
280#endif
281 sprintf(name, PEOBJ_SYM_PREFIX LABEL_PREFIX_BC "%s",
282 bc_names[pi]);
283 emit_peobj_sym_func(ctx, name, ctx->sym_ofs[pi]);
284 }
285 }
286
287 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_RDATA);
288 emit_peobj_sym_rdata(ctx, PEOBJ_SYM_PREFIX LABEL_OP_OFS, 0);
289
290 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_RDATA_Z);
291
292 if (strtab)
293 break;
294 /* 2nd pass: alloc strtab, write syms and copy strings. */
295 strtab = (char *)malloc(strtabofs);
296 *(uint32_t *)strtab = strtabofs;
297 }
298
299 /* Write string table. */
300 owrite(ctx, strtab, strtabofs);
301}
302
303#endif
diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc
new file mode 100644
index 00000000..add00c9d
--- /dev/null
+++ b/src/buildvm_x86.dasc
@@ -0,0 +1,3592 @@
1|// Low-level VM code for x86 CPUs.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4|
5|.arch x86
6|.section code_op, code_sub
7|
8|.actionlist build_actionlist
9|.globals GLOB_
10|.globalnames globnames
11|.externnames extnames
12|
13|//-----------------------------------------------------------------------
14|
15|// Fixed register assignments for the interpreter.
16|// This is very fragile and has many dependencies. Caveat emptor.
17|.define BASE, edx // Not C callee-save, refetched anyway.
18|.define KBASE, edi // Must be C callee-save.
19|.define PC, esi // Must be C callee-save.
20|.define DISPATCH, ebx // Must be C callee-save.
21|
22|.define RA, ecx
23|.define RAL, cl
24|.define RB, ebp // Must be ebp (C callee-save).
25|.define RC, eax // Must be eax (fcomparepp and others).
26|.define RCW, ax
27|.define RCH, ah
28|.define RCL, al
29|.define OP, RB
30|.define RD, RC
31|.define RDL, RCL
32|
33|// Type definitions. Some of these are only used for documentation.
34|.type L, lua_State
35|.type GL, global_State
36|.type TVALUE, TValue
37|.type GCOBJ, GCobj
38|.type STR, GCstr
39|.type TAB, GCtab
40|.type LFUNC, GCfuncL
41|.type CFUNC, GCfuncC
42|.type PROTO, GCproto
43|.type UPVAL, GCupval
44|.type NODE, Node
45|.type NARGS, int
46|.type TRACE, Trace
47|.type EXITINFO, ExitInfo
48|
49|// Stack layout while in interpreter. Must match with lj_frame.h.
50|.macro saveregs
51| push ebp; push edi; push esi; push ebx
52|.endmacro
53|.macro restoreregs
54| pop ebx; pop esi; pop edi; pop ebp
55|.endmacro
56|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
57|
58|.define INARG_4, aword [esp+aword*15]
59|.define INARG_3, aword [esp+aword*14]
60|.define INARG_2, aword [esp+aword*13]
61|.define INARG_1, aword [esp+aword*12]
62|//----- 16 byte aligned, ^^^ arguments from C caller
63|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter.
64|.define SAVE_R4, aword [esp+aword*10]
65|.define SAVE_R3, aword [esp+aword*9]
66|.define SAVE_R2, aword [esp+aword*8]
67|//----- 16 byte aligned
68|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves.
69|.define SAVE_PC, aword [esp+aword*6]
70|.define ARG6, aword [esp+aword*5]
71|.define ARG5, aword [esp+aword*4]
72|//----- 16 byte aligned
73|.define ARG4, aword [esp+aword*3]
74|.define ARG3, aword [esp+aword*2]
75|.define ARG2, aword [esp+aword*1]
76|.define ARG1, aword [esp] //<-- esp while in interpreter.
77|//----- 16 byte aligned, ^^^ arguments for C callee
78|
79|// FPARGx overlaps ARGx and ARG(x+1) on x86.
80|.define FPARG5, qword [esp+qword*2]
81|.define FPARG3, qword [esp+qword*1]
82|.define FPARG1, qword [esp]
83|// NRESULTS overlaps ARG6 (and FPARG5)
84|.define NRESULTS, ARG6
85|
86|// Arguments for vm_call and vm_pcall.
87|.define INARG_P_ERRF, INARG_4 // vm_pcall only.
88|.define INARG_NRES, INARG_3
89|.define INARG_BASE, INARG_2
90|.define SAVE_L, INARG_1
91|
92|.define SAVE_CFRAME, INARG_BASE // Overwrites INARG_BASE!
93|
94|// Arguments for vm_cpcall.
95|.define INARG_CP_UD, INARG_4
96|.define INARG_CP_FUNC, INARG_3
97|.define INARG_CP_CALL, INARG_2
98|
99|//-----------------------------------------------------------------------
100|
101|// Instruction headers.
102|.macro ins_A; .endmacro
103|.macro ins_AD; .endmacro
104|.macro ins_AJ; .endmacro
105|.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro
106|.macro ins_AB_; movzx RB, RCH; .endmacro
107|.macro ins_A_C; movzx RC, RCL; .endmacro
108|.macro ins_AND; not RD; .endmacro
109|
110|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
111|.macro ins_NEXT
112| mov RC, [PC]
113| movzx RA, RCH
114| movzx OP, RCL
115| add PC, 4
116| shr RC, 16
117| jmp aword [DISPATCH+OP*4]
118|.endmacro
119|
120|// Instruction footer.
121|.if 1
122| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
123| .define ins_next, ins_NEXT
124| .define ins_next_, ins_NEXT
125|.else
126| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
127| // Affects only certain kinds of benchmarks (and only with -j off).
128| // Around 10%-30% slower on Core2, a lot more slower on P4.
129| .macro ins_next
130| jmp ->ins_next
131| .endmacro
132| .macro ins_next_
133| ->ins_next:
134| ins_NEXT
135| .endmacro
136|.endif
137|
138|//-----------------------------------------------------------------------
139|
140|// Macros to test operand types.
141|.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro
142|.macro checknum, reg, target; checktp reg, LJ_TISNUM; ja target; .endmacro
143|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro
144|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro
145|
146|// These operands must be used with movzx.
147|.define PC_OP, byte [PC-4]
148|.define PC_RA, byte [PC-3]
149|.define PC_RB, byte [PC-1]
150|.define PC_RC, byte [PC-2]
151|.define PC_RD, word [PC-2]
152|
153|.macro branchPC, reg
154| lea PC, [PC+reg*4-BCBIAS_J*4]
155|.endmacro
156|
157|// Assumes DISPATCH is relative to GL.
158#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
159#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
160|
161|// Decrement hashed hotcount and trigger trace recorder if zero.
162|.macro hotloop, reg
163| mov reg, PC
164| shr reg, 1
165| and reg, HOTCOUNT_PCMASK
166| sub word [DISPATCH+reg+GG_DISP2HOT], 1
167| jz ->vm_hotloop
168|.endmacro
169|
170|.macro hotcall, reg
171| mov reg, PC
172| shr reg, 1
173| and reg, HOTCOUNT_PCMASK
174| sub word [DISPATCH+reg+GG_DISP2HOT], 1
175| jz ->vm_hotcall
176|.endmacro
177|
178|// Set current VM state.
179|.macro set_vmstate, st
180| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
181|.endmacro
182|
183|// Annoying x87 stuff: support for two compare variants.
184|.macro fcomparepp // Compare and pop st0 >< st1.
185||if (cmov) {
186| fucomip st1
187| fpop
188||} else {
189| fucompp
190| fnstsw ax // eax modified!
191| sahf
192||}
193|.endmacro
194|
195|.macro fdup; fld st0; .endmacro
196|.macro fpop1; fstp st1; .endmacro
197|
198|// Move table write barrier back. Overwrites reg.
199|.macro barrierback, tab, reg
200| and byte tab->marked, cast_byte(~LJ_GC_BLACK) // black2gray(tab)
201| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
202| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
203| mov tab->gclist, reg
204|.endmacro
205|
206|//-----------------------------------------------------------------------
207
208/* Generate subroutines used by opcodes and other parts of the VM. */
209/* The .code_sub section should be last to help static branch prediction. */
210static void build_subroutines(BuildCtx *ctx, int cmov)
211{
212 |.code_sub
213 |
214 |//-----------------------------------------------------------------------
215 |//-- Call and return handling -------------------------------------------
216 |//-----------------------------------------------------------------------
217 |
218 |// Reminder: A call gate may be called with func/args above L->maxstack,
219 |// i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
220 |// too. This means all call gates (L*, C and fast functions) must check
221 |// for stack overflow _before_ adding more slots!
222 |
223 |//-- Call gates ---------------------------------------------------------
224 |
225 |->gate_lf: // Call gate for fixarg Lua functions.
226 | // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = old base), PC = return
227 | // DISPATCH initialized
228 | mov BASE, RA
229 | mov PROTO:RB, LFUNC:RB->pt
230 | mov [BASE-4], PC // Store caller PC.
231 | movzx RA, byte PROTO:RB->framesize
232 | mov PC, PROTO:RB->bc
233 | mov KBASE, PROTO:RB->k
234 | mov L:RB, SAVE_L
235 | lea RA, [BASE+RA*8] // Top of frame.
236 | lea RC, [BASE+NARGS:RC*8-4] // Points to tag of 1st free slot.
237 | cmp RA, L:RB->maxstack
238 | ja ->gate_lf_growstack
239 |9: // Entry point from vararg setup below.
240 | mov RB, LJ_TNIL
241 |1: // Clear free slots until top of frame.
242 | mov [RC], RB
243 | mov [RC+8], RB
244 | add RC, 16
245 | cmp RC, RA
246 | jb <1
247#if LJ_HASJIT
248 | // NYI: Disabled, until the tracer supports recursion/upcalls/leaves.
249 | // hotcall RB
250#endif
251 | ins_next
252 |
253 |->gate_lv: // Call gate for vararg Lua functions.
254 | // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = old base), PC = return
255 | // DISPATCH initialized
256 | mov [RA-4], PC // Store caller PC.
257 | lea PC, [NARGS:RC*8+FRAME_VARG]
258 | lea BASE, [RA+PC-FRAME_VARG]
259 | mov [BASE-8], LFUNC:RB // Store copy of LFUNC.
260 | mov PROTO:RB, LFUNC:RB->pt
261 | mov [BASE-4], PC // Store delta + FRAME_VARG.
262 | movzx PC, byte PROTO:RB->framesize
263 | lea KBASE, [BASE+PC*8]
264 | mov L:PC, SAVE_L
265 | lea RC, [BASE+4]
266 | cmp KBASE, L:PC->maxstack
267 | ja ->gate_lv_growstack // Need to grow stack.
268 | movzx PC, byte PROTO:RB->numparams
269 | test PC, PC
270 | jz >2
271 |1: // Copy fixarg slots up.
272 | add RA, 8
273 | cmp RA, BASE
274 | jnb >2
275 | mov KBASE, [RA-8]
276 | mov [RC-4], KBASE
277 | mov KBASE, [RA-4]
278 | mov [RC], KBASE
279 | add RC, 8
280 | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC).
281 | sub PC, 1
282 | jnz <1
283 |2:
284 | movzx RA, byte PROTO:RB->framesize
285 | mov PC, PROTO:RB->bc
286 | mov KBASE, PROTO:RB->k
287 | lea RA, [BASE+RA*8]
288 | jmp <9
289 |
290 |->gate_c: // Call gate for C functions.
291 | // RA = new base, RB = CFUNC, RC = nargs+1, (BASE = old base), PC = return
292 | mov [RA-4], PC
293 | mov KBASE, CFUNC:RB->f
294 | mov L:RB, SAVE_L
295 | lea RC, [RA+NARGS:RC*8-8]
296 | mov L:RB->base, RA
297 | lea RA, [RC+8*LUA_MINSTACK]
298 | mov ARG1, L:RB
299 | mov L:RB->top, RC
300 | cmp RA, L:RB->maxstack
301 | ja ->gate_c_growstack // Need to grow stack.
302 | set_vmstate C
303 | call KBASE // (lua_State *L)
304 | set_vmstate INTERP
305 | // nresults returned in eax (RD).
306 | mov BASE, L:RB->base
307 | lea RA, [BASE+RD*8]
308 | neg RA
309 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
310 |->vm_returnc:
311 | add RD, 1 // RD = nresults+1
312 | mov NRESULTS, RD
313 | test PC, FRAME_TYPE
314 | jz ->BC_RET_Z // Handle regular return to Lua.
315 |
316 |//-- Return handling (non-inline) ---------------------------------------
317 |
318 |->vm_return:
319 | // BASE = base, RA = resultofs, RD = nresults+1 (= NRESULTS), PC = return
320 | test PC, FRAME_C
321 | jz ->vm_returnp
322 |
323 | // Return to C.
324 | set_vmstate C
325 | and PC, -8
326 | sub PC, BASE
327 | neg PC // Previous base = BASE - delta.
328 |
329 | sub RD, 1
330 | jz >2
331 |1:
332 | mov RB, [BASE+RA] // Move results down.
333 | mov [BASE-8], RB
334 | mov RB, [BASE+RA+4]
335 | mov [BASE-4], RB
336 | add BASE, 8
337 | sub RD, 1
338 | jnz <1
339 |2:
340 | mov L:RB, SAVE_L
341 | mov L:RB->base, PC
342 |3:
343 | mov RD, NRESULTS
344 | mov RA, INARG_NRES // RA = wanted nresults+1
345 |4:
346 | cmp RA, RD
347 | jne >6 // More/less results wanted?
348 |5:
349 | sub BASE, 8
350 | mov L:RB->top, BASE
351 |
352 |->vm_leave_cp:
353 | mov RA, SAVE_CFRAME // Restore previous C frame.
354 | mov L:RB->cframe, RA
355 | xor eax, eax // Ok return status for vm_pcall.
356 |
357 |->vm_leave_unw:
358 | add esp, CFRAME_SPACE
359 | restoreregs
360 | ret
361 |
362 |6:
363 | jb >7 // Less results wanted?
364 | // More results wanted. Check stack size and fill up results with nil.
365 | cmp BASE, L:RB->maxstack
366 | ja >8
367 | mov dword [BASE-4], LJ_TNIL
368 | add BASE, 8
369 | add RD, 1
370 | jmp <4
371 |
372 |7: // Less results wanted.
373 | test RA, RA
374 | jz <5 // But check for LUA_MULTRET+1.
375 | sub RA, RD // Negative result!
376 | lea BASE, [BASE+RA*8] // Correct top.
377 | jmp <5
378 |
379 |8: // Corner case: need to grow stack for filling up results.
380 | // This can happen if:
381 | // - A C function grows the stack (a lot).
382 | // - The GC shrinks the stack in between.
383 | // - A return back from a lua_call() with (high) nresults adjustment.
384 | mov L:RB->top, BASE // Save current top held in BASE (yes).
385 | mov NRESULTS, RD // Need to fill only remainder with nil.
386 | mov ARG2, RA // Grow by wanted nresults+1.
387 | mov ARG1, L:RB
388 | call extern lj_state_growstack // (lua_State *L, int n)
389 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
390 | jmp <3
391 |
392 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
393 | // (void *cframe, int errcode)
394 | mov ecx, [esp+4]
395 | mov eax, [esp+8] // Error return status for vm_pcall.
396 | and ecx, CFRAME_RAWMASK
397 | mov esp, ecx
398 | mov L:RB, SAVE_L
399 | mov GL:RB, L:RB->glref
400 | mov dword GL:RB->vmstate, ~LJ_VMST_C
401 | jmp ->vm_leave_unw
402 |
403 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
404 | mov ecx, [esp+4]
405 | and ecx, CFRAME_RAWMASK
406 | mov esp, ecx
407 | mov L:RB, SAVE_L
408 | mov RA, -8 // Results start at BASE+RA = BASE-8.
409 | mov RD, 1+1 // Really 1+2 results, incr. later.
410 | mov BASE, L:RB->base
411 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
412 | add DISPATCH, GG_G2DISP
413 | mov PC, [BASE-4] // Fetch PC of previous frame.
414 | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message.
415 | set_vmstate INTERP
416 | jmp ->vm_returnc // Increments RD/NRESULTS and returns.
417 |
418 |->vm_returnp:
419 | test PC, FRAME_P
420 | jz ->cont_dispatch
421 |
422 | // Return from pcall or xpcall fast func.
423 | and PC, -8
424 | sub BASE, PC // Restore caller base.
425 | lea RA, [RA+PC-8] // Rebase RA and prepend one result.
426 | mov PC, [BASE-4] // Fetch PC of previous frame.
427 | // Prepending may overwrite the pcall frame, so do it at the end.
428 | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results.
429 | jmp ->vm_returnc // Increments RD/NRESULTS and returns.
430 |
431 |//-- Grow stack on-demand -----------------------------------------------
432 |
433 |->gate_c_growstack: // Grow stack for C function.
434 | mov ARG2, LUA_MINSTACK
435 | jmp >1
436 |
437 |->gate_lv_growstack: // Grow stack for vararg Lua function.
438 | sub RC, 8
439 | mov BASE, RA
440 | mov RA, KBASE
441 | mov PC, PROTO:RB->bc
442 | mov L:RB, SAVE_L
443 |
444 |->gate_lf_growstack: // Grow stack for fixarg Lua function.
445 | // BASE = new base, RA = requested top, RC = top (offset +4 bytes)
446 | // RB = L, PC = first PC of called function (or anything if C function)
447 | sub RC, 4 // Adjust top.
448 | sub RA, BASE
449 | shr RA, 3 // n = pt->framesize - L->top
450 | add PC, 4 // Must point after first instruction.
451 | mov L:RB->base, BASE
452 | mov L:RB->top, RC
453 | mov SAVE_PC, PC
454 | mov ARG2, RA
455 | mov ARG1, L:RB
456 |1:
457 | // L:RB = L, L->base = new base, L->top = top
458 | // SAVE_PC = initial PC+1 (undefined for C functions)
459 | call extern lj_state_growstack // (lua_State *L, int n)
460 | mov RA, L:RB->base
461 | mov RC, L:RB->top
462 | mov LFUNC:RB, [RA-8]
463 | mov PC, [RA-4]
464 | sub RC, RA
465 | shr RC, 3
466 | add NARGS:RC, 1
467 | // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = invalid), PC restored.
468 | jmp aword LFUNC:RB->gate // Just retry call.
469 |
470 |//-----------------------------------------------------------------------
471 |//-- Entry points into the assembler VM ---------------------------------
472 |//-----------------------------------------------------------------------
473 |
474 |->vm_resume: // Setup C frame and resume thread.
475 | // (lua_State *L, StkId base, int nres1 = 0, ptrdiff_t ef = 0)
476 | saveregs
477 | mov PC, FRAME_C
478 | sub esp, CFRAME_SPACE
479 | xor RD, RD
480 | mov L:RB, SAVE_L
481 | lea KBASE, [esp+CFRAME_RESUME]
482 | mov RA, INARG_BASE
483 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
484 | add DISPATCH, GG_G2DISP
485 | mov L:RB->cframe, KBASE
486 | mov SAVE_CFRAME, RD // Caveat: overlaps INARG_BASE!
487 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
488 | cmp byte L:RB->status, RDL
489 | je >3 // Initial resume (like a call).
490 |
491 | // Resume after yield (like a return).
492 | set_vmstate INTERP
493 | mov byte L:RB->status, RDL
494 | mov BASE, L:RB->base
495 | mov RD, L:RB->top
496 | sub RD, RA
497 | shr RD, 3
498 | add RD, 1 // RD = nresults+1
499 | sub RA, BASE // RA = resultofs
500 | mov PC, [BASE-4]
501 | mov NRESULTS, RD
502 | test PC, FRAME_TYPE
503 | jz ->BC_RET_Z
504 | jmp ->vm_return
505 |
506 |->vm_pcall: // Setup protected C frame and enter VM.
507 | // (lua_State *L, StkId base, int nres1, ptrdiff_t ef)
508 | saveregs
509 | mov PC, FRAME_CP
510 | jmp >1
511 |
512 |->vm_call: // Setup C frame and enter VM.
513 | // (lua_State *L, StkId base, int nres1)
514 | saveregs
515 | mov PC, FRAME_C
516 |
517 |1: // Entry point for vm_pcall above (PC = ftype).
518 | sub esp, CFRAME_SPACE
519 | mov L:RB, SAVE_L
520 | mov RA, INARG_BASE
521 |
522 |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype).
523 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
524 | mov SAVE_CFRAME, KBASE // Caveat: overlaps INARG_BASE!
525 | mov SAVE_PC, esp // Any value outside of bytecode is ok.
526 | mov L:RB->cframe, esp
527 |
528 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
529 | add DISPATCH, GG_G2DISP
530 |
531 |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype).
532 | set_vmstate INTERP
533 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
534 | add PC, RA
535 | sub PC, BASE // PC = frame delta + frame type
536 |
537 | mov RC, L:RB->top
538 | sub RC, RA
539 | shr NARGS:RC, 3
540 | add NARGS:RC, 1 // RC = nargs+1
541 |
542 | mov LFUNC:RB, [RA-8]
543 | cmp dword [RA-4], LJ_TFUNC
544 | jne ->vmeta_call // Ensure KBASE defined and != BASE.
545 | jmp aword LFUNC:RB->gate
546 | // RA = new base, RB = LFUNC/CFUNC, RC = nargs+1.
547 |
548 |->vm_cpcall: // Setup protected C frame, call C.
549 | // (lua_State *L, lua_CPFunction cp, lua_CFunction func, void *ud)
550 | saveregs
551 | sub esp, CFRAME_SPACE
552 |
553 | mov L:RB, SAVE_L
554 | mov RC, INARG_CP_UD
555 | mov RA, INARG_CP_FUNC
556 | mov BASE, INARG_CP_CALL
557 | mov SAVE_PC, esp // Any value outside of bytecode is ok.
558 |
559 | // Caveat: INARG_P_* and INARG_CP_* overlap!
560 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
561 | sub KBASE, L:RB->top
562 | mov INARG_P_ERRF, 0 // No error function.
563 | mov INARG_NRES, KBASE // Neg. delta means cframe w/o frame.
564 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
565 |
566 | mov ARG3, RC
567 | mov ARG2, RA
568 | mov ARG1, L:RB
569 |
570 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
571 | mov SAVE_CFRAME, KBASE // Caveat: overlaps INARG_CP_CALL!
572 | mov L:RB->cframe, esp
573 |
574 | call BASE // (lua_State *L, lua_CFunction func, void *ud)
575 | // StkId (new base) or NULL returned in eax (RC).
576 | test RC, RC
577 | jz ->vm_leave_cp // No base? Just remove C frame.
578 | mov RA, RC
579 | mov PC, FRAME_CP
580 | jmp <2 // Else continue with the call.
581 |
582 |//-----------------------------------------------------------------------
583 |//-- Metamethod handling ------------------------------------------------
584 |//-----------------------------------------------------------------------
585 |
586 |//-- Continuation dispatch ----------------------------------------------
587 |
588 |->cont_dispatch:
589 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in NRESULTS)
590 | add RA, BASE
591 | and PC, -8
592 | mov RB, BASE
593 | sub BASE, PC // Restore caller BASE.
594 | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg.
595 | mov RC, RA // ... in [RC]
596 | mov PC, [RB-12] // Restore PC from [cont|PC].
597 | mov LFUNC:KBASE, [BASE-8]
598 | mov PROTO:KBASE, LFUNC:KBASE->pt
599 | mov KBASE, PROTO:KBASE->k
600 | // BASE = base, RC = result, RB = meta base
601 | jmp dword [RB-16] // Jump to continuation.
602 |
603 |->cont_cat: // BASE = base, RC = result, RB = mbase
604 | movzx RA, PC_RB
605 | sub RB, 16
606 | lea RA, [BASE+RA*8]
607 | sub RA, RB
608 | je ->cont_ra
609 | neg RA
610 | shr RA, 3
611 | mov ARG3, RA
612 | mov RA, [RC+4]
613 | mov RC, [RC]
614 | mov [RB+4], RA
615 | mov [RB], RC
616 | mov ARG2, RB
617 | jmp ->BC_CAT_Z
618 |
619 |//-- Table indexing metamethods -----------------------------------------
620 |
621 |->vmeta_tgets:
622 | mov ARG5, RC // RC = GCstr *
623 | mov ARG6, LJ_TSTR
624 | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6.
625 | cmp PC_OP, BC_GGET
626 | jne >1
627 | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
628 | mov [RA], TAB:RB // RB = GCtab *
629 | mov dword [RA+4], LJ_TTAB
630 | mov RB, RA
631 | jmp >2
632 |
633 |->vmeta_tgetb:
634 | movzx RC, PC_RC // Ugly, cannot fild from a byte.
635 | mov ARG4, RC
636 | fild ARG4
637 | fstp FPARG5
638 | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6.
639 | jmp >1
640 |
641 |->vmeta_tgetv:
642 | movzx RC, PC_RC // Reload TValue *k from RC.
643 | lea RC, [BASE+RC*8]
644 |1:
645 | movzx RB, PC_RB // Reload TValue *t from RB.
646 | lea RB, [BASE+RB*8]
647 |2:
648 | mov ARG2, RB
649 | mov L:RB, SAVE_L
650 | mov ARG3, RC
651 | mov ARG1, L:RB
652 | mov SAVE_PC, PC
653 | mov L:RB->base, BASE
654 | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
655 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
656 | mov BASE, L:RB->base
657 | test RC, RC
658 | jz >3
659 |->cont_ra: // BASE = base, RC = result
660 | movzx RA, PC_RA
661 | mov RB, [RC+4]
662 | mov RC, [RC]
663 | mov [BASE+RA*8+4], RB
664 | mov [BASE+RA*8], RC
665 | ins_next
666 |
667 |3: // Call __index metamethod.
668 | // BASE = base, L->top = new base, stack = cont/func/t/k
669 | mov RA, L:RB->top
670 | mov [RA-12], PC // [cont|PC]
671 | lea PC, [RA+FRAME_CONT]
672 | sub PC, BASE
673 | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
674 | mov NARGS:RC, 3 // 2+1 args for func(t, k).
675 | jmp aword LFUNC:RB->gate
676 |
677 |//-----------------------------------------------------------------------
678 |
679 |->vmeta_tsets:
680 | mov ARG5, RC // RC = GCstr *
681 | mov ARG6, LJ_TSTR
682 | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6.
683 | cmp PC_OP, BC_GSET
684 | jne >1
685 | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
686 | mov [RA], TAB:RB // RB = GCtab *
687 | mov dword [RA+4], LJ_TTAB
688 | mov RB, RA
689 | jmp >2
690 |
691 |->vmeta_tsetb:
692 | movzx RC, PC_RC // Ugly, cannot fild from a byte.
693 | mov ARG4, RC
694 | fild ARG4
695 | fstp FPARG5
696 | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6.
697 | jmp >1
698 |
699 |->vmeta_tsetv:
700 | movzx RC, PC_RC // Reload TValue *k from RC.
701 | lea RC, [BASE+RC*8]
702 |1:
703 | movzx RB, PC_RB // Reload TValue *t from RB.
704 | lea RB, [BASE+RB*8]
705 |2:
706 | mov ARG2, RB
707 | mov L:RB, SAVE_L
708 | mov ARG3, RC
709 | mov ARG1, L:RB
710 | mov SAVE_PC, PC
711 | mov L:RB->base, BASE
712 | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
713 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
714 | mov BASE, L:RB->base
715 | test RC, RC
716 | jz >3
717 | // NOBARRIER: lj_meta_tset ensures the table is not black.
718 | movzx RA, PC_RA
719 | mov RB, [BASE+RA*8+4]
720 | mov RA, [BASE+RA*8]
721 | mov [RC+4], RB
722 | mov [RC], RA
723 |->cont_nop: // BASE = base, (RC = result)
724 | ins_next
725 |
726 |3: // Call __newindex metamethod.
727 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
728 | mov RA, L:RB->top
729 | mov [RA-12], PC // [cont|PC]
730 | movzx RC, PC_RA
731 | mov RB, [BASE+RC*8+4] // Copy value to third argument.
732 | mov RC, [BASE+RC*8]
733 | mov [RA+20], RB
734 | mov [RA+16], RC
735 | lea PC, [RA+FRAME_CONT]
736 | sub PC, BASE
737 | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
738 | mov NARGS:RC, 4 // 3+1 args for func(t, k, v).
739 | jmp aword LFUNC:RB->gate
740 |
741 |//-- Comparison metamethods ---------------------------------------------
742 |
743 |->vmeta_comp:
744 | movzx RB, PC_OP
745 | lea RD, [BASE+RD*8]
746 | lea RA, [BASE+RA*8]
747 | mov ARG4, RB
748 | mov L:RB, SAVE_L
749 | mov ARG3, RD
750 | mov ARG2, RA
751 | mov ARG1, L:RB
752 | mov SAVE_PC, PC
753 | mov L:RB->base, BASE
754 | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
755 | // 0/1 or TValue * (metamethod) returned in eax (RC).
756 |3:
757 | mov BASE, L:RB->base
758 | cmp RC, 1
759 | ja ->vmeta_binop
760 |4:
761 | lea PC, [PC+4]
762 | jb >6
763 |5:
764 | movzx RD, PC_RD
765 | branchPC RD
766 |6:
767 | ins_next
768 |
769 |->cont_condt: // BASE = base, RC = result
770 | add PC, 4
771 | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true.
772 | jb <5
773 | jmp <6
774 |
775 |->cont_condf: // BASE = base, RC = result
776 | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false.
777 | jmp <4
778 |
779 |->vmeta_equal:
780 | mov ARG4, RB
781 | mov L:RB, SAVE_L
782 | sub PC, 4
783 | mov ARG3, RD
784 | mov ARG2, RA
785 | mov ARG1, L:RB
786 | mov SAVE_PC, PC
787 | mov L:RB->base, BASE
788 | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
789 | // 0/1 or TValue * (metamethod) returned in eax (RC).
790 | jmp <3
791 |
792 |//-- Arithmetic metamethods ---------------------------------------------
793 |
794 |->vmeta_arith_vn:
795 | lea RC, [KBASE+RC*8]
796 | jmp >1
797 |
798 |->vmeta_arith_nv:
799 | lea RC, [KBASE+RC*8]
800 | lea RB, [BASE+RB*8]
801 | xchg RB, RC
802 | jmp >2
803 |
804 |->vmeta_unm:
805 | lea RC, [BASE+RD*8]
806 | mov RB, RC
807 | jmp >2
808 |
809 |->vmeta_arith_vv:
810 | lea RC, [BASE+RC*8]
811 |1:
812 | lea RB, [BASE+RB*8]
813 |2:
814 | lea RA, [BASE+RA*8]
815 | mov ARG3, RB
816 | mov L:RB, SAVE_L
817 | mov ARG4, RC
818 | movzx RC, PC_OP
819 | mov ARG2, RA
820 | mov ARG5, RC
821 | mov ARG1, L:RB
822 | mov SAVE_PC, PC
823 | mov L:RB->base, BASE
824 | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
825 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
826 | mov BASE, L:RB->base
827 | test RC, RC
828 | jz ->cont_nop
829 |
830 | // Call metamethod for binary op.
831 |->vmeta_binop:
832 | // BASE = base, RC = new base, stack = cont/func/o1/o2
833 | mov RA, RC
834 | sub RC, BASE
835 | mov [RA-12], PC // [cont|PC]
836 | lea PC, [RC+FRAME_CONT]
837 | mov LFUNC:RB, [RA-8]
838 | mov NARGS:RC, 3 // 2+1 args for func(o1, o2).
839 | cmp dword [RA-4], LJ_TFUNC
840 | jne ->vmeta_call
841 | jmp aword LFUNC:RB->gate
842 |
843 |->vmeta_len:
844 | lea RD, [BASE+RD*8]
845 | mov L:RB, SAVE_L
846 | mov ARG2, RD
847 | mov ARG1, L:RB
848 | mov SAVE_PC, PC
849 | mov L:RB->base, BASE
850 | call extern lj_meta_len // (lua_State *L, TValue *o)
851 | // TValue * (metamethod) returned in eax (RC).
852 | mov BASE, L:RB->base
853 | jmp ->vmeta_binop // Binop call for compatibility.
854 |
855 |//-- Call metamethod ----------------------------------------------------
856 |
857 |->vmeta_call: // Resolve and call __call metamethod.
858 | // RA = new base, RC = nargs+1, BASE = old base, PC = return
859 | mov ARG4, RA // Save RA, RC for us.
860 | mov ARG5, NARGS:RC
861 | sub RA, 8
862 | lea RC, [RA+NARGS:RC*8]
863 | mov L:RB, SAVE_L
864 | mov ARG2, RA
865 | mov ARG3, RC
866 | mov ARG1, L:RB
867 | mov SAVE_PC, PC
868 | mov L:RB->base, BASE // This is the callers base!
869 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
870 | mov BASE, L:RB->base
871 | mov RA, ARG4
872 | mov NARGS:RC, ARG5
873 | mov LFUNC:RB, [RA-8]
874 | add NARGS:RC, 1
875 | // This is fragile. L->base must not move, KBASE must always be defined.
876 | cmp KBASE, BASE // Continue with CALLT if flag set.
877 | je ->BC_CALLT_Z
878 | jmp aword LFUNC:RB->gate // Otherwise call resolved metamethod.
879 |
880 |//-- Argument coercion for 'for' statement ------------------------------
881 |
882 |->vmeta_for:
883 | mov L:RB, SAVE_L
884 | mov ARG2, RA
885 | mov ARG1, L:RB
886 | mov SAVE_PC, PC
887 | mov L:RB->base, BASE
888 | call extern lj_meta_for // (lua_State *L, StkId base)
889 | mov BASE, L:RB->base
890 | mov RC, [PC-4]
891 | movzx RA, RCH
892 | movzx OP, RCL
893 | shr RC, 16
894 | jmp aword [DISPATCH+OP*4+GG_DISP_STATIC*4] // Retry FORI or JFORI.
895 |
896 |//-----------------------------------------------------------------------
897 |//-- Fast functions -----------------------------------------------------
898 |//-----------------------------------------------------------------------
899 |
900 |.macro .ffunc, name
901 |->ff_ .. name:
902 |.endmacro
903 |
904 |.macro .ffunc_1, name
905 |->ff_ .. name:
906 | cmp NARGS:RC, 1+1; jb ->fff_fallback
907 |.endmacro
908 |
909 |.macro .ffunc_2, name
910 |->ff_ .. name:
911 | cmp NARGS:RC, 2+1; jb ->fff_fallback
912 |.endmacro
913 |
914 |.macro .ffunc_n, name
915 | .ffunc_1 name
916 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
917 | fld qword [RA]
918 |.endmacro
919 |
920 |.macro .ffunc_n, name, op
921 | .ffunc_1 name
922 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
923 | op
924 | fld qword [RA]
925 |.endmacro
926 |
927 |.macro .ffunc_nn, name
928 | .ffunc_2 name
929 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
930 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
931 | fld qword [RA]
932 | fld qword [RA+8]
933 |.endmacro
934 |
935 |.macro .ffunc_nnr, name
936 | .ffunc_2 name
937 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
938 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
939 | fld qword [RA+8]
940 | fld qword [RA]
941 |.endmacro
942 |
943 |// Inlined GC threshold check. Caveat: uses label 1.
944 |.macro ffgccheck
945 | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
946 | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
947 | jb >1
948 | call ->fff_gcstep
949 |1:
950 |.endmacro
951 |
952 |//-- Base library: checks -----------------------------------------------
953 |
954 |.ffunc_1 assert
955 | mov RB, [RA+4]
956 | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback
957 | mov NRESULTS, RD
958 | mov [RA-4], RB
959 | mov RB, [RA]
960 | mov [RA-8], RB
961 | sub RD, 2
962 | jz >2
963 | mov ARG1, RA
964 |1:
965 | add RA, 8
966 | mov RB, [RA+4]
967 | mov [RA-4], RB
968 | mov RB, [RA]
969 | mov [RA-8], RB
970 | sub RD, 1
971 | jnz <1
972 | mov RA, ARG1
973 |2:
974 | mov RD, NRESULTS
975 | jmp ->fff_res_
976 |
977 |.ffunc_1 type
978 | mov RB, [RA+4]
979 | mov RC, ~LJ_TNUMX
980 | not RB
981 | cmp RC, RB
982 ||if (cmov) {
983 | cmova RC, RB
984 ||} else {
985 | jbe >1; mov RC, RB; 1:
986 ||}
987 | mov CFUNC:RB, [RA-8]
988 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
989 | mov dword [RA-4], LJ_TSTR
990 | mov [RA-8], STR:RC
991 | jmp ->fff_res1
992 |
993 |//-- Base library: getters and setters ---------------------------------
994 |
995 |.ffunc_1 getmetatable
996 | mov RB, [RA+4]
997 | cmp RB, LJ_TTAB; jne >6
998 |1: // Field metatable must be at same offset for GCtab and GCudata!
999 | mov TAB:RB, [RA]
1000 | mov TAB:RB, TAB:RB->metatable
1001 |2:
1002 | test TAB:RB, TAB:RB
1003 | mov dword [RA-4], LJ_TNIL
1004 | jz ->fff_res1
1005 | mov CFUNC:RC, [RA-8]
1006 | mov STR:RC, [DISPATCH+DISPATCH_GL(mmname)+4*MM_metatable]
1007 | mov dword [RA-4], LJ_TTAB // Store metatable as default result.
1008 | mov [RA-8], TAB:RB
1009 | mov ARG1, RA // Save result pointer.
1010 | mov RA, TAB:RB->hmask
1011 | and RA, STR:RC->hash
1012 | imul RA, #NODE
1013 | add NODE:RA, TAB:RB->node
1014 |3: // Rearranged logic, because we expect _not_ to find the key.
1015 | cmp dword NODE:RA->key.it, LJ_TSTR
1016 | jne >4
1017 | cmp dword NODE:RA->key.gcr, STR:RC
1018 | je >5
1019 |4:
1020 | mov NODE:RA, NODE:RA->next
1021 | test NODE:RA, NODE:RA
1022 | jnz <3
1023 | jmp ->fff_res1 // Not found, keep default result.
1024 |5:
1025 | mov RB, [RA+4]
1026 | cmp RB, LJ_TNIL; je ->fff_res1 // Dito for nil value.
1027 | mov RC, [RA]
1028 | mov RA, ARG1 // Restore result pointer.
1029 | mov [RA-4], RB // Return value of mt.__metatable.
1030 | mov [RA-8], RC
1031 | jmp ->fff_res1
1032 |
1033 |6:
1034 | cmp RB, LJ_TUDATA; je <1
1035 | cmp RB, LJ_TISNUM; ja >7
1036 | mov RB, LJ_TNUMX
1037 |7:
1038 | not RB
1039 | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(basemt)]
1040 | jmp <2
1041 |
1042 |.ffunc_2 setmetatable
1043 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
1044 | // Fast path: no mt for table yet and not clearing the mt.
1045 | mov TAB:RB, [RA]
1046 | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
1047 | cmp dword [RA+12], LJ_TTAB; jne ->fff_fallback
1048 | mov TAB:RC, [RA+8]
1049 | mov TAB:RB->metatable, TAB:RC
1050 | mov dword [RA-4], LJ_TTAB // Return original table.
1051 | mov [RA-8], TAB:RB
1052 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
1053 | jz >1
1054 | // Possible write barrier. Table is black, but skip iswhite(mt) check.
1055 | barrierback TAB:RB, RC
1056 |1:
1057 | jmp ->fff_res1
1058 |
1059 |.ffunc_2 rawget
1060 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
1061 | mov TAB:RC, [RA]
1062 | mov L:RB, SAVE_L
1063 | mov ARG2, TAB:RC
1064 | mov ARG1, L:RB
1065 | mov RB, RA
1066 | mov ARG4, BASE // Save BASE and RA.
1067 | add RA, 8
1068 | mov ARG3, RA
1069 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1070 | // cTValue * returned in eax (RC).
1071 | mov RA, RB
1072 | mov BASE, ARG4
1073 | mov RB, [RC] // Copy table slot.
1074 | mov RC, [RC+4]
1075 | mov [RA-8], RB
1076 | mov [RA-4], RC
1077 | jmp ->fff_res1
1078 |
1079 |//-- Base library: conversions ------------------------------------------
1080 |
1081 |.ffunc tonumber
1082 | // Only handles the number case inline (without a base argument).
1083 | cmp NARGS:RC, 1+1; jne ->fff_fallback // Exactly one argument.
1084 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
1085 | fld qword [RA]
1086 | jmp ->fff_resn
1087 |
1088 |.ffunc_1 tostring
1089 | // Only handles the string or number case inline.
1090 | cmp dword [RA+4], LJ_TSTR; jne >3
1091 | // A __tostring method in the string base metatable is ignored.
1092 | mov STR:RC, [RA]
1093 |2:
1094 | mov dword [RA-4], LJ_TSTR
1095 | mov [RA-8], STR:RC
1096 | jmp ->fff_res1
1097 |3: // Handle numbers inline, unless a number base metatable is present.
1098 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
1099 | cmp dword [DISPATCH+DISPATCH_GL(basemt)+4*(~LJ_TNUMX)], 0
1100 | jne ->fff_fallback
1101 | ffgccheck // Caveat: uses label 1.
1102 | mov L:RB, SAVE_L
1103 | mov ARG1, L:RB
1104 | mov ARG2, RA
1105 | mov L:RB->base, RA // Add frame since C call can throw.
1106 | mov [RA-4], PC
1107 | mov SAVE_PC, PC // Redundant (but a defined value).
1108 | mov ARG3, BASE // Save BASE.
1109 | call extern lj_str_fromnum // (lua_State *L, lua_Number *np)
1110 | // GCstr returned in eax (RC).
1111 | mov RA, L:RB->base
1112 | mov BASE, ARG3
1113 | jmp <2
1114 |
1115 |//-- Base library: iterators -------------------------------------------
1116 |
1117 |.ffunc_1 next
1118 | je >2 // Missing 2nd arg?
1119 |1:
1120 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
1121 | mov TAB:RB, [RA]
1122 | mov ARG2, TAB:RB
1123 | mov L:RB, SAVE_L
1124 | mov ARG1, L:RB
1125 | mov L:RB->base, RA // Add frame since C call can throw.
1126 | mov [RA-4], PC
1127 | mov SAVE_PC, PC // Redundant (but a defined value).
1128 | mov ARG4, BASE // Save BASE.
1129 | add RA, 8
1130 | mov ARG3, RA
1131 | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1132 | // Flag returned in eax (RC).
1133 | mov RA, L:RB->base
1134 | mov BASE, ARG4
1135 | test RC, RC; jz >3 // End of traversal?
1136 | mov RB, [RA+8] // Copy key and value to results.
1137 | mov RC, [RA+12]
1138 | mov [RA-8], RB
1139 | mov [RA-4], RC
1140 | mov RB, [RA+16]
1141 | mov RC, [RA+20]
1142 | mov [RA], RB
1143 | mov [RA+4], RC
1144 |->fff_res2:
1145 | mov RD, 1+2
1146 | jmp ->fff_res
1147 |2: // Set missing 2nd arg to nil.
1148 | mov dword [RA+12], LJ_TNIL
1149 | jmp <1
1150 |3: // End of traversal: return nil.
1151 | mov dword [RA-4], LJ_TNIL
1152 | jmp ->fff_res1
1153 |
1154 |.ffunc_1 pairs
1155 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
1156 | mov CFUNC:RC, CFUNC:RB->upvalue[0]
1157 | mov dword [RA-4], LJ_TFUNC
1158 | mov [RA-8], CFUNC:RC
1159 | mov dword [RA+12], LJ_TNIL
1160 | mov RD, 1+3
1161 | jmp ->fff_res
1162 |
1163 |.ffunc_1 ipairs_aux
1164 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
1165 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
1166 | fld qword [RA+8]
1167 | fld1
1168 | faddp st1
1169 | fist ARG2
1170 | fstp qword [RA-8]
1171 | mov TAB:RB, [RA]
1172 | mov RC, ARG2
1173 | cmp RC, TAB:RB->asize; jae >2 // Not in array part?
1174 | shl RC, 3
1175 | add RC, TAB:RB->array
1176 |1:
1177 | cmp dword [RC+4], LJ_TNIL; je ->fff_res0
1178 | mov RB, [RC] // Copy array slot.
1179 | mov RC, [RC+4]
1180 | mov [RA], RB
1181 | mov [RA+4], RC
1182 | jmp ->fff_res2
1183 |2: // Check for empty hash part first. Otherwise call C function.
1184 | cmp dword TAB:RB->hmask, 0; je ->fff_res0
1185 | mov ARG1, TAB:RB
1186 | mov ARG3, BASE // Save BASE and RA.
1187 | mov RB, RA
1188 | call extern lj_tab_getinth // (GCtab *t, int32_t key)
1189 | // cTValue * or NULL returned in eax (RC).
1190 | mov RA, RB
1191 | mov BASE, ARG3
1192 | test RC, RC
1193 | jnz <1
1194 |->fff_res0:
1195 | mov RD, 1+0
1196 | jmp ->fff_res
1197 |
1198 |.ffunc_1 ipairs
1199 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
1200 | mov CFUNC:RC, CFUNC:RB->upvalue[0]
1201 | mov dword [RA-4], LJ_TFUNC
1202 | mov [RA-8], CFUNC:RC
1203 | fldz
1204 | fstp qword [RA+8]
1205 | mov RD, 1+3
1206 | jmp ->fff_res
1207 |
1208 |//-- Base library: catch errors ----------------------------------------
1209 |
1210 |.ffunc_1 pcall
1211 | mov [RA-4], PC
1212 | mov PC, 8+FRAME_PCALL
1213 | mov BASE, RA
1214 | add RA, 8
1215 | sub NARGS:RC, 1
1216 | mov LFUNC:RB, [RA-8]
1217 |1:
1218 | test byte [DISPATCH+DISPATCH_GL(hookmask)], HOOK_ACTIVE
1219 | jnz >3 // Hook active before pcall?
1220 |2:
1221 | cmp dword [RA-4], LJ_TFUNC
1222 | jne ->vmeta_call // Ensure KBASE defined and != BASE.
1223 | jmp aword LFUNC:RB->gate
1224 |3:
1225 | add PC, 1 // Use FRAME_PCALLH if hook was active.
1226 | jmp <2
1227 |
1228 |.ffunc_2 xpcall
1229 | cmp dword [RA+12], LJ_TFUNC; jne ->fff_fallback
1230 | mov [RA-4], PC
1231 | mov RB, [RA+4] // Swap function and traceback.
1232 | mov [RA+12], RB
1233 | mov dword [RA+4], LJ_TFUNC
1234 | mov LFUNC:RB, [RA]
1235 | mov PC, [RA+8]
1236 | mov [RA+8], LFUNC:RB
1237 | mov [RA], PC
1238 | mov PC, 2*8+FRAME_PCALL
1239 | mov BASE, RA
1240 | add RA, 2*8
1241 | sub NARGS:RC, 2
1242 | jmp <1
1243 |
1244 |//-- Coroutine library --------------------------------------------------
1245 |
1246 |.macro coroutine_resume_wrap, resume
1247 |9: // Need to restore PC for fallback handler.
1248 | mov PC, SAVE_PC
1249 | jmp ->fff_fallback
1250 |
1251 |.if resume
1252 |.ffunc_1 coroutine_resume
1253 | mov L:RB, [RA]
1254 |.else
1255 |.ffunc coroutine_wrap_aux
1256 | mov L:RB, CFUNC:RB->upvalue[0].gcr
1257 |.endif
1258 | mov [RA-4], PC
1259 | mov SAVE_PC, PC
1260 | mov ARG1, L:RB
1261 |.if resume
1262 | cmp dword [RA+4], LJ_TTHREAD; jne <9
1263 |.endif
1264 | cmp aword L:RB->cframe, 0; jne <9
1265 | cmp byte L:RB->status, LUA_YIELD; ja <9
1266 | mov PC, L:RB->top
1267 | mov ARG2, PC
1268 | je >1 // Status != LUA_YIELD (i.e. 0)?
1269 | cmp PC, L:RB->base; je <9 // Check for presence of initial func.
1270 |1:
1271 |.if resume
1272 | lea PC, [PC+NARGS:RC*8-16] // Check stack space (-1-thread).
1273 |.else
1274 | lea PC, [PC+NARGS:RC*8-8] // Check stack space (-1).
1275 |.endif
1276 | cmp PC, L:RB->maxstack; ja <9
1277 | mov L:RB->top, PC
1278 |
1279 | mov L:RB, SAVE_L
1280 | mov L:RB->base, RA
1281 |.if resume
1282 | add RA, 8 // Keep resumed thread in stack for GC.
1283 |.endif
1284 | mov L:RB->top, RA
1285 | mov RB, ARG2
1286 |.if resume
1287 | lea RA, [RA+NARGS:RC*8-24] // RA = end of source for stack move.
1288 |.else
1289 | lea RA, [RA+NARGS:RC*8-16] // RA = end of source for stack move.
1290 |.endif
1291 | sub RA, PC // Relative to PC.
1292 |
1293 | cmp PC, RB
1294 | je >3
1295 |2: // Move args to coroutine.
1296 | mov RC, [PC+RA+4]
1297 | mov [PC-4], RC
1298 | mov RC, [PC+RA]
1299 | mov [PC-8], RC
1300 | sub PC, 8
1301 | cmp PC, RB
1302 | jne <2
1303 |3:
1304 | xor RA, RA
1305 | mov ARG4, RA
1306 | mov ARG3, RA
1307 | call ->vm_resume // (lua_State *L, StkId base, 0, 0)
1308 | set_vmstate INTERP
1309 |
1310 | mov L:RB, SAVE_L
1311 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
1312 | mov BASE, L:RB->base
1313 | cmp eax, LUA_YIELD
1314 | ja >8
1315 |4:
1316 | mov RA, L:PC->base
1317 | mov KBASE, L:PC->top
1318 | mov L:PC->top, RA // Clear coroutine stack.
1319 | mov PC, KBASE
1320 | sub PC, RA
1321 | je >6 // No results?
1322 | lea RD, [BASE+PC]
1323 | shr PC, 3
1324 | cmp RD, L:RB->maxstack
1325 | ja >9 // Need to grow stack?
1326 |
1327 | mov RB, BASE
1328 | sub RB, RA
1329 |5: // Move results from coroutine.
1330 | mov RD, [RA]
1331 | mov [RA+RB], RD
1332 | mov RD, [RA+4]
1333 | mov [RA+RB+4], RD
1334 | add RA, 8
1335 | cmp RA, KBASE
1336 | jne <5
1337 |6:
1338 |.if resume
1339 | lea RD, [PC+2] // nresults+1 = 1 + true + results.
1340 | mov dword [BASE-4], LJ_TTRUE // Prepend true to results.
1341 |.else
1342 | lea RD, [PC+1] // nresults+1 = 1 + results.
1343 |.endif
1344 |7:
1345 | mov PC, SAVE_PC
1346 | mov NRESULTS, RD
1347 |.if resume
1348 | mov RA, -8
1349 |.else
1350 | xor RA, RA
1351 |.endif
1352 | test PC, FRAME_TYPE
1353 | jz ->BC_RET_Z
1354 | jmp ->vm_return
1355 |
1356 |8: // Coroutine returned with error (at co->top-1).
1357 |.if resume
1358 | mov dword [BASE-4], LJ_TFALSE // Prepend false to results.
1359 | mov RA, L:PC->top
1360 | sub RA, 8
1361 | mov L:PC->top, RA // Clear error from coroutine stack.
1362 | mov RD, [RA] // Copy error message.
1363 | mov [BASE], RD
1364 | mov RD, [RA+4]
1365 | mov [BASE+4], RD
1366 | mov RD, 1+2 // nresults+1 = 1 + false + error.
1367 | jmp <7
1368 |.else
1369 | mov ARG2, L:PC
1370 | mov ARG1, L:RB
1371 | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1372 | // Error function does not return.
1373 |.endif
1374 |
1375 |9: // Handle stack expansion on return from yield.
1376 | mov L:RA, ARG1 // The callee doesn't modify SAVE_L.
1377 | mov L:RA->top, KBASE // Undo coroutine stack clearing.
1378 | mov ARG2, PC
1379 | mov ARG1, L:RB
1380 | call extern lj_state_growstack // (lua_State *L, int n)
1381 | mov BASE, L:RB->base
1382 | jmp <4 // Retry the stack move.
1383 |.endmacro
1384 |
1385 | coroutine_resume_wrap 1 // coroutine.resume
1386 | coroutine_resume_wrap 0 // coroutine.wrap
1387 |
1388 |.ffunc coroutine_yield
1389 | mov L:RB, SAVE_L
1390 | mov [RA-4], PC
1391 | test aword L:RB->cframe, CFRAME_CANYIELD
1392 | jz ->fff_fallback
1393 | mov L:RB->base, RA
1394 | lea RC, [RA+NARGS:RC*8-8]
1395 | mov L:RB->top, RC
1396 | xor eax, eax
1397 | mov aword L:RB->cframe, eax
1398 | mov al, LUA_YIELD
1399 | mov byte L:RB->status, al
1400 | jmp ->vm_leave_unw
1401 |
1402 |//-- Math library -------------------------------------------------------
1403 |
1404 |.ffunc_n math_abs
1405 | fabs
1406 | // fallthrough
1407 |->fff_resn:
1408 | fstp qword [RA-8]
1409 |->fff_res1:
1410 | mov RD, 1+1
1411 |->fff_res:
1412 | mov NRESULTS, RD
1413 |->fff_res_:
1414 | test PC, FRAME_TYPE
1415 | jnz >7
1416 |5:
1417 | cmp PC_RB, RDL // More results expected?
1418 | ja >6
1419 | // BASE and KBASE are assumed to be set for the calling frame.
1420 | ins_next
1421 |
1422 |6: // Fill up results with nil.
1423 | mov dword [RA+RD*8-12], LJ_TNIL
1424 | add RD, 1
1425 | jmp <5
1426 |
1427 |7: // Non-standard return case.
1428 | mov BASE, RA
1429 | mov RA, -8 // Results start at BASE+RA = BASE-8.
1430 | jmp ->vm_return
1431 |
1432 |.ffunc_n math_floor; call ->vm_floor; jmp ->fff_resn
1433 |.ffunc_n math_ceil; call ->vm_ceil; jmp ->fff_resn
1434 |
1435 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
1436 |
1437 |.ffunc_n math_log, fldln2; fyl2x; jmp ->fff_resn
1438 |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn
1439 |.ffunc_n math_exp; call ->vm_exp; jmp ->fff_resn
1440 |
1441 |.ffunc_n math_sin; fsin; jmp ->fff_resn
1442 |.ffunc_n math_cos; fcos; jmp ->fff_resn
1443 |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn
1444 |
1445 |.ffunc_n math_asin
1446 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan
1447 | jmp ->fff_resn
1448 |.ffunc_n math_acos
1449 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan
1450 | jmp ->fff_resn
1451 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
1452 |
1453 |.macro math_extern, func
1454 |.ffunc_n math_ .. func
1455 | mov ARG5, RA
1456 | fstp FPARG1
1457 | mov RB, BASE
1458 | call extern func
1459 | mov RA, ARG5
1460 | mov BASE, RB
1461 | jmp ->fff_resn
1462 |.endmacro
1463 |
1464 | math_extern sinh
1465 | math_extern cosh
1466 | math_extern tanh
1467 |
1468 |->ff_math_deg:
1469 |.ffunc_n math_rad; fmul qword CFUNC:RB->upvalue[0]; jmp ->fff_resn
1470 |
1471 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
1472 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
1473 |
1474 |.ffunc_1 math_frexp
1475 | mov RB, [RA+4]
1476 | cmp RB, LJ_TISNUM; ja ->fff_fallback
1477 | mov RC, [RA]
1478 | mov [RA-4], RB; mov [RA-8], RC
1479 | shl RB, 1; cmp RB, 0xffe00000; jae >3
1480 | or RC, RB; jz >3
1481 | mov RC, 1022
1482 | cmp RB, 0x00200000; jb >4
1483 |1:
1484 | shr RB, 21; sub RB, RC // Extract and unbias exponent.
1485 | mov ARG1, RB; fild ARG1
1486 | mov RB, [RA-4]
1487 | and RB, 0x800fffff // Mask off exponent.
1488 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
1489 | mov [RA-4], RB
1490 |2:
1491 | fstp qword [RA]
1492 | mov RD, 1+2
1493 | jmp ->fff_res
1494 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
1495 | fldz; jmp <2
1496 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
1497 | fld qword [RA]
1498 | mov ARG1, 0x5a800000; fmul ARG1 // x = x*2^54
1499 | fstp qword [RA-8]
1500 | mov RB, [RA-4]; mov RC, 1076; shl RB, 1; jmp <1
1501 |
1502 |.ffunc_n math_modf
1503 | mov RB, [RA+4]
1504 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
1505 | fdup
1506 | call ->vm_trunc
1507 | fsub st1, st0
1508 |1:
1509 | fstp qword [RA-8]; fstp qword [RA]
1510 | mov RC, [RA-4]; mov RB, [RA+4]
1511 | xor RC, RB; js >3 // Need to adjust sign?
1512 |2:
1513 | mov RD, 1+2
1514 | jmp ->fff_res
1515 |3:
1516 | xor RB, 0x80000000; mov [RA+4], RB; jmp <2 // Flip sign of fraction.
1517 |4:
1518 | fldz; fxch; jmp <1 // Return +-Inf and +-0.
1519 |
1520 |.ffunc_nnr math_fmod
1521 |1: ; fprem; fnstsw ax; sahf; jp <1
1522 | fpop1
1523 | jmp ->fff_resn
1524 |
1525 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
1526 |
1527 |.macro math_minmax, name, cmovop, nocmovop
1528 |.ffunc_n name
1529 | mov RB, 2
1530 |1:
1531 | cmp RB, RD; jae ->fff_resn
1532 | cmp dword [RA+RB*8-4], LJ_TISNUM; ja >5
1533 | fld qword [RA+RB*8-8]
1534 ||if (cmov) {
1535 | fucomi st1; cmovop st1; fpop1
1536 ||} else {
1537 | push eax
1538 | fucom st1; fnstsw ax; test ah, 1; nocmovop >2; fxch; 2: ; fpop
1539 | pop eax
1540 ||}
1541 | add RB, 1
1542 | jmp <1
1543 |.endmacro
1544 |
1545 | math_minmax math_min, fcmovnbe, jz
1546 | math_minmax math_max, fcmovbe, jnz
1547 |5:
1548 | fpop; jmp ->fff_fallback
1549 |
1550 |//-- String library -----------------------------------------------------
1551 |
1552 |.ffunc_1 string_len
1553 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
1554 | mov STR:RB, [RA]
1555 | fild dword STR:RB->len
1556 | jmp ->fff_resn
1557 |
1558 |.ffunc string_byte // Only handle the 1-arg case here.
1559 | cmp NARGS:RC, 1+1; jne ->fff_fallback
1560 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
1561 | mov STR:RB, [RA]
1562 | cmp dword STR:RB->len, 1
1563 | jb ->fff_res0 // Return no results for empty string.
1564 | movzx RB, byte STR:RB[1]
1565 | mov ARG1, RB
1566 | fild ARG1
1567 | jmp ->fff_resn
1568 |
1569 |.ffunc string_char // Only handle the 1-arg case here.
1570 | ffgccheck
1571 | cmp NARGS:RC, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
1572 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
1573 | fld qword [RA]
1574 | fistp ARG4
1575 | cmp ARG4, 255; ja ->fff_fallback
1576 | lea RC, ARG4 // Little-endian.
1577 | mov ARG5, RA // Save RA.
1578 | mov ARG3, 1
1579 | mov ARG2, RC
1580 |->fff_newstr:
1581 | mov L:RB, SAVE_L
1582 | mov ARG1, L:RB
1583 | mov SAVE_PC, PC
1584 | mov L:RB->base, BASE
1585 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
1586 | // GCstr * returned in eax (RC).
1587 | mov RA, ARG5
1588 | mov BASE, L:RB->base
1589 | mov dword [RA-4], LJ_TSTR
1590 | mov [RA-8], STR:RC
1591 | jmp ->fff_res1
1592 |
1593 |.ffunc string_sub
1594 | ffgccheck
1595 | mov ARG5, RA // Save RA.
1596 | mov ARG4, -1
1597 | cmp NARGS:RC, 1+2; jb ->fff_fallback
1598 | jna >1
1599 | cmp dword [RA+20], LJ_TISNUM; ja ->fff_fallback
1600 | fld qword [RA+16]
1601 | fistp ARG4
1602 |1:
1603 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
1604 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
1605 | mov STR:RB, [RA]
1606 | mov ARG2, STR:RB
1607 | mov RB, STR:RB->len
1608 | fld qword [RA+8]
1609 | fistp ARG3
1610 | mov RC, ARG4
1611 | cmp RB, RC // len < end? (unsigned compare)
1612 | jb >5
1613 |2:
1614 | mov RA, ARG3
1615 | test RA, RA // start <= 0?
1616 | jle >7
1617 |3:
1618 | mov STR:RB, ARG2
1619 | sub RC, RA // start > end?
1620 | jl ->fff_emptystr
1621 | lea RB, [STR:RB+RA+#STR-1]
1622 | add RC, 1
1623 |4:
1624 | mov ARG2, RB
1625 | mov ARG3, RC
1626 | jmp ->fff_newstr
1627 |
1628 |5: // Negative end or overflow.
1629 | jl >6
1630 | lea RC, [RC+RB+1] // end = end+(len+1)
1631 | jmp <2
1632 |6: // Overflow.
1633 | mov RC, RB // end = len
1634 | jmp <2
1635 |
1636 |7: // Negative start or underflow.
1637 | je >8
1638 | add RA, RB // start = start+(len+1)
1639 | add RA, 1
1640 | jg <3 // start > 0?
1641 |8: // Underflow.
1642 | mov RA, 1 // start = 1
1643 | jmp <3
1644 |
1645 |->fff_emptystr: // Range underflow.
1646 | xor RC, RC // Zero length. Any ptr in RB is ok.
1647 | jmp <4
1648 |
1649 |.ffunc_2 string_rep // Only handle the 1-char case inline.
1650 | ffgccheck
1651 | mov ARG5, RA // Save RA.
1652 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
1653 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
1654 | mov STR:RB, [RA]
1655 | fld qword [RA+8]
1656 | fistp ARG4
1657 | mov RC, ARG4
1658 | test RC, RC
1659 | jle ->fff_emptystr // Count <= 0? (or non-int)
1660 | cmp dword STR:RB->len, 1
1661 | jb ->fff_emptystr // Zero length string?
1662 | jne ->fff_fallback_2 // Fallback for > 1-char strings.
1663 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
1664 | movzx RA, byte STR:RB[1]
1665 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
1666 | mov ARG3, RC
1667 | mov ARG2, RB
1668 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
1669 | mov [RB], RAL
1670 | add RB, 1
1671 | sub RC, 1
1672 | jnz <1
1673 | jmp ->fff_newstr
1674 |
1675 |.ffunc_1 string_reverse
1676 | ffgccheck
1677 | mov ARG5, RA // Save RA.
1678 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
1679 | mov STR:RB, [RA]
1680 | mov RC, STR:RB->len
1681 | test RC, RC
1682 | jz ->fff_emptystr // Zero length string?
1683 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
1684 | add RB, #STR
1685 | mov ARG4, PC // Need another temp register.
1686 | mov ARG3, RC
1687 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
1688 | mov ARG2, PC
1689 |1:
1690 | movzx RA, byte [RB]
1691 | add RB, 1
1692 | sub RC, 1
1693 | mov [PC+RC], RAL
1694 | jnz <1
1695 | mov PC, ARG4
1696 | jmp ->fff_newstr
1697 |
1698 |.macro ffstring_case, name, lo, hi
1699 | .ffunc_1 name
1700 | ffgccheck
1701 | mov ARG5, RA // Save RA.
1702 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
1703 | mov STR:RB, [RA]
1704 | mov RC, STR:RB->len
1705 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
1706 | add RB, #STR
1707 | mov ARG4, PC // Need another temp register.
1708 | mov ARG3, RC
1709 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
1710 | mov ARG2, PC
1711 | jmp >3
1712 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
1713 | movzx RA, byte [RB+RC]
1714 | cmp RA, lo
1715 | jb >2
1716 | cmp RA, hi
1717 | ja >2
1718 | xor RA, 0x20
1719 |2:
1720 | mov [PC+RC], RAL
1721 |3:
1722 | sub RC, 1
1723 | jns <1
1724 | mov PC, ARG4
1725 | jmp ->fff_newstr
1726 |.endmacro
1727 |
1728 |ffstring_case string_lower, 0x41, 0x5a
1729 |ffstring_case string_upper, 0x61, 0x7a
1730 |
1731 |//-- Table library ------------------------------------------------------
1732 |
1733 |.ffunc_1 table_getn
1734 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
1735 | mov TAB:RB, [RA]
1736 | mov ARG1, TAB:RB
1737 | mov RB, RA // Save RA and BASE.
1738 | mov ARG2, BASE
1739 | call extern lj_tab_len // (GCtab *t)
1740 | // Length of table returned in eax (RC).
1741 | mov ARG1, RC
1742 | mov RA, RB // Restore RA and BASE.
1743 | mov BASE, ARG2
1744 | fild ARG1
1745 | jmp ->fff_resn
1746 |
1747 |//-- Bit library --------------------------------------------------------
1748 |
1749 |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!).
1750 |
1751 |.ffunc_n bit_tobit
1752 | mov ARG5, TOBIT_BIAS
1753 | fadd ARG5
1754 | fstp FPARG1 // 64 bit FP store.
1755 | fild ARG1 // 32 bit integer load (s2lfwd ok).
1756 | jmp ->fff_resn
1757 |
1758 |.macro .ffunc_bit, name
1759 | .ffunc_n name
1760 | mov ARG5, TOBIT_BIAS
1761 | fadd ARG5
1762 | fstp FPARG1
1763 | mov RB, ARG1
1764 |.endmacro
1765 |
1766 |.macro .ffunc_bit_op, name, ins
1767 | .ffunc_bit name
1768 | mov NRESULTS, NARGS:RC // Save for fallback.
1769 | lea RC, [RA+NARGS:RC*8-16]
1770 |1:
1771 | cmp RC, RA
1772 | jbe ->fff_resbit
1773 | cmp dword [RC+4], LJ_TISNUM; ja ->fff_fallback_bit_op
1774 | fld qword [RC]
1775 | fadd ARG5
1776 | fstp FPARG1
1777 | ins RB, ARG1
1778 | sub RC, 8
1779 | jmp <1
1780 |.endmacro
1781 |
1782 |.ffunc_bit_op bit_band, and
1783 |.ffunc_bit_op bit_bor, or
1784 |.ffunc_bit_op bit_bxor, xor
1785 |
1786 |.ffunc_bit bit_bswap
1787 | bswap RB
1788 | jmp ->fff_resbit
1789 |
1790 |.ffunc_bit bit_bnot
1791 | not RB
1792 |->fff_resbit:
1793 | mov ARG1, RB
1794 | fild ARG1
1795 | jmp ->fff_resn
1796 |
1797 |->fff_fallback_bit_op:
1798 | mov NARGS:RC, NRESULTS // Restore for fallback
1799 | jmp ->fff_fallback
1800 |
1801 |.macro .ffunc_bit_sh, name, ins
1802 | .ffunc_nn name
1803 | mov ARG5, TOBIT_BIAS
1804 | fadd ARG5
1805 | fstp FPARG3
1806 | fadd ARG5
1807 | fstp FPARG1
1808 | mov RC, RA // Assumes RA is ecx.
1809 | mov RA, ARG3
1810 | mov RB, ARG1
1811 | ins RB, cl
1812 | mov RA, RC
1813 | jmp ->fff_resbit
1814 |.endmacro
1815 |
1816 |.ffunc_bit_sh bit_lshift, shl
1817 |.ffunc_bit_sh bit_rshift, shr
1818 |.ffunc_bit_sh bit_arshift, sar
1819 |.ffunc_bit_sh bit_rol, rol
1820 |.ffunc_bit_sh bit_ror, ror
1821 |
1822 |//-----------------------------------------------------------------------
1823 |
1824 |->fff_fallback_2:
1825 | mov NARGS:RC, 1+2 // Other args are ignored, anyway.
1826 | jmp ->fff_fallback
1827 |->fff_fallback_1:
1828 | mov NARGS:RC, 1+1 // Other args are ignored, anyway.
1829 |->fff_fallback: // Call fast function fallback handler.
1830 | // RA = new base, RC = nargs+1
1831 | mov L:RB, SAVE_L
1832 | sub BASE, RA
1833 | mov [RA-4], PC
1834 | mov SAVE_PC, PC // Redundant (but a defined value).
1835 | mov ARG3, BASE // Save old BASE (relative).
1836 | mov L:RB->base, RA
1837 | lea RC, [RA+NARGS:RC*8-8]
1838 | mov ARG1, L:RB
1839 | lea BASE, [RC+8*LUA_MINSTACK] // Ensure enough space for handler.
1840 | mov L:RB->top, RC
1841 | mov CFUNC:RA, [RA-8]
1842 | cmp BASE, L:RB->maxstack
1843 | ja >5 // Need to grow stack.
1844 | call aword CFUNC:RA->f // (lua_State *L)
1845 | // Either throws an error or recovers and returns 0 or NRESULTS (+1).
1846 | test RC, RC; jnz >3
1847 |1: // Returned 0: retry fast path.
1848 | mov RA, L:RB->base
1849 | mov RC, L:RB->top
1850 | sub RC, RA
1851 | shr RC, 3
1852 | add NARGS:RC, 1
1853 | mov LFUNC:RB, [RA-8]
1854 | mov BASE, ARG3 // Restore old BASE.
1855 | add BASE, RA
1856 | cmp [RA-4], PC; jne >2 // Callable modified by handler?
1857 | jmp aword LFUNC:RB->gate // Retry the call.
1858 |
1859 |2: // Run modified callable.
1860 | cmp dword [RA-4], LJ_TFUNC
1861 | jne ->vmeta_call
1862 | jmp aword LFUNC:RB->gate // Retry the call.
1863 |
1864 |3: // Returned NRESULTS (already in RC/RD).
1865 | mov RA, L:RB->base
1866 | mov BASE, ARG3 // Restore old BASE.
1867 | add BASE, RA
1868 | jmp ->fff_res
1869 |
1870 |5: // Grow stack for fallback handler.
1871 | mov ARG2, LUA_MINSTACK
1872 | call extern lj_state_growstack // (lua_State *L, int n)
1873 | jmp <1 // Dumb retry (goes through ff first).
1874 |
1875 |->fff_gcstep: // Call GC step function.
1876 | // RA = new base, RC = nargs+1
1877 | pop RB // Must keep stack at same level.
1878 | mov ARG3, RB // Save return address
1879 | mov L:RB, SAVE_L
1880 | sub BASE, RA
1881 | mov ARG2, BASE // Save old BASE (relative).
1882 | mov [RA-4], PC
1883 | mov SAVE_PC, PC // Redundant (but a defined value).
1884 | mov L:RB->base, RA
1885 | lea RC, [RA+NARGS:RC*8-8]
1886 | mov ARG1, L:RB
1887 | mov L:RB->top, RC
1888 | call extern lj_gc_step // (lua_State *L)
1889 | mov RA, L:RB->base
1890 | mov RC, L:RB->top
1891 | sub RC, RA
1892 | shr RC, 3
1893 | add NARGS:RC, 1
1894 | mov PC, [RA-4]
1895 | mov BASE, ARG2 // Restore old BASE.
1896 | add BASE, RA
1897 | mov RB, ARG3
1898 | push RB // Restore return address.
1899 | mov LFUNC:RB, [RA-8]
1900 | ret
1901 |
1902 |//-----------------------------------------------------------------------
1903 |//-- Special dispatch targets -------------------------------------------
1904 |//-----------------------------------------------------------------------
1905 |
1906 |->vm_record: // Dispatch target for recording phase.
1907#if LJ_HASJIT
1908 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
1909 | test RDL, HOOK_VMEVENT // No recording while in vmevent.
1910 | jnz >5
1911 | // Decrement the hookcount for consistency, but always do the call.
1912 | test RDL, HOOK_ACTIVE
1913 | jnz >1
1914 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
1915 | jz >1
1916 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
1917 | jmp >1
1918#endif
1919 |
1920 |->vm_hook: // Dispatch target with enabled hooks.
1921 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
1922 | test RDL, HOOK_ACTIVE // Hook already active?
1923 | jnz >5
1924 |
1925 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
1926 | jz >5
1927 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
1928 | jz >1
1929 | test RDL, LUA_MASKLINE
1930 | jz >5
1931 |1:
1932 | mov L:RB, SAVE_L
1933 | mov RD, NRESULTS // Dynamic top for *M instructions.
1934 | mov ARG3, RD
1935 | mov L:RB->base, BASE
1936 | mov ARG2, PC
1937 | mov ARG1, L:RB
1938 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
1939 | call extern lj_dispatch_ins // (lua_State *L, BCIns *pc, int nres)
1940 |4:
1941 | mov BASE, L:RB->base
1942 | movzx RA, PC_RA
1943 |5:
1944 | movzx OP, PC_OP
1945 | movzx RD, PC_RD
1946 | jmp aword [DISPATCH+OP*4+GG_DISP_STATIC*4] // Re-dispatch to static ins.
1947 |
1948 |->vm_hotloop: // Hot loop counter underflow.
1949#if LJ_HASJIT
1950 | mov L:RB, SAVE_L
1951 | lea RA, [DISPATCH+GG_DISP2J]
1952 | mov ARG2, PC
1953 | mov ARG1, RA
1954 | mov [DISPATCH+DISPATCH_J(L)], L:RB
1955 | mov SAVE_PC, PC
1956 | mov L:RB->base, BASE
1957 | call extern lj_trace_hot // (jit_State *J, const BCIns *pc)
1958 | jmp <4
1959#endif
1960 |
1961 |->vm_hotcall: // Hot call counter underflow.
1962#if LJ_HASJIT
1963 | mov L:RB, SAVE_L
1964 | lea RA, [DISPATCH+GG_DISP2J]
1965 | mov ARG2, PC
1966 | mov ARG1, RA
1967 | mov [DISPATCH+DISPATCH_J(L)], L:RB
1968 | mov SAVE_PC, PC
1969 | mov L:RB->base, BASE
1970 | call extern lj_trace_hot // (jit_State *J, const BCIns *pc)
1971 | mov BASE, L:RB->base
1972 | // Dispatch the first instruction and optionally record it.
1973 | ins_next
1974#endif
1975 |
1976 |//-----------------------------------------------------------------------
1977 |//-- Trace exit handler -------------------------------------------------
1978 |//-----------------------------------------------------------------------
1979 |
1980 |// Called from an exit stub with the exit number on the stack.
1981 |// The 16 bit exit number is stored with two (sign-extended) push imm8.
1982 |->vm_exit_handler:
1983#if LJ_HASJIT
1984 | push ebp; lea ebp, [esp+12]; push ebp
1985 | push ebx; push edx; push ecx; push eax
1986 | movzx RC, byte [ebp-4] // Reconstruct exit number.
1987 | mov RCH, byte [ebp-8]
1988 | mov [ebp-4], edi; mov [ebp-8], esi
1989 | // Caveat: DISPATCH is ebx.
1990 | mov DISPATCH, [ebp]
1991 | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
1992 | set_vmstate EXIT
1993 | mov [DISPATCH+DISPATCH_J(exitno)], RC
1994 | mov [DISPATCH+DISPATCH_J(parent)], RA
1995 | sub esp, 8*8+16 // Room for SSE regs + args.
1996 |
1997 | // Must not access SSE regs if SSE2 is not present.
1998 | test dword [DISPATCH+DISPATCH_J(flags)], JIT_F_SSE2
1999 | jz >1
2000 | movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6
2001 | movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4
2002 | movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2
2003 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
2004 |1:
2005 | // Caveat: RB is ebp.
2006 | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)]
2007 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2008 | mov [DISPATCH+DISPATCH_J(L)], L:RB
2009 | lea RC, [esp+16]
2010 | mov L:RB->base, BASE
2011 | lea RA, [DISPATCH+GG_DISP2J]
2012 | mov ARG2, RC
2013 | mov ARG1, RA
2014 | call extern lj_trace_exit // (jit_State *J, ExitState *ex)
2015 | // Interpreter C frame returned in eax.
2016 | mov esp, eax // Reposition stack to C frame.
2017 | mov BASE, L:RB->base
2018 | mov PC, SAVE_PC
2019 | mov SAVE_L, L:RB // Needed for on-trace resume/yield.
2020#endif
2021 |->vm_exit_interp:
2022#if LJ_HASJIT
2023 | mov LFUNC:KBASE, [BASE-8]
2024 | mov PROTO:KBASE, LFUNC:KBASE->pt
2025 | mov KBASE, PROTO:KBASE->k
2026 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
2027 | set_vmstate INTERP
2028 | ins_next
2029#endif
2030 |
2031 |//-----------------------------------------------------------------------
2032 |//-- Math helper functions ----------------------------------------------
2033 |//-----------------------------------------------------------------------
2034 |
2035 |// FP value rounding. Called by math.floor/math.ceil fast functions
2036 |// and from JIT code. Arg/ret on x87 stack. No int/xmm registers modified.
2037 |.macro vm_round, mode1, mode2
2038 | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2.
2039 | mov [esp+8], eax
2040 | mov ax, mode1
2041 | or ax, [esp+4]
2042 |.if mode2 ~= 0xffff
2043 | and ax, mode2
2044 |.endif
2045 | mov [esp+6], ax
2046 | fldcw word [esp+6]
2047 | frndint
2048 | fldcw word [esp+4]
2049 | mov eax, [esp+8]
2050 | ret
2051 |.endmacro
2052 |
2053 |->vm_floor:
2054 | vm_round 0x0400, 0xf7ff
2055 |
2056 |->vm_ceil:
2057 | vm_round 0x0800, 0xfbff
2058 |
2059 |->vm_trunc:
2060 | vm_round 0x0c00, 0xffff
2061 |
2062 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
2063 |// Args/ret on x87 stack (y on top). No xmm registers modified.
2064 |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
2065 |->vm_mod:
2066 | fld st1
2067 | fdiv st1
2068 | fnstcw word [esp+4]
2069 | mov ax, 0x0400
2070 | or ax, [esp+4]
2071 | and ax, 0xf7ff
2072 | mov [esp+6], ax
2073 | fldcw word [esp+6]
2074 | frndint
2075 | fldcw word [esp+4]
2076 | fmulp st1
2077 | fsubp st1
2078 | ret
2079 |
2080 |// FP exponentiation e^x and 2^x. Called by math.exp fast function and
2081 |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified.
2082 |// Caveat: needs 3 slots on x87 stack!
2083 |->vm_exp:
2084 | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e))
2085 |->vm_exp2:
2086 | fst dword [esp+4] // Caveat: overwrites ARG1.
2087 | cmp dword [esp+4], 0x7f800000; je >1 // Special case: e^+Inf = +Inf
2088 | cmp dword [esp+4], 0xff800000; je >2 // Special case: e^-Inf = 0
2089 |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check.
2090 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
2091 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
2092 |1:
2093 | ret
2094 |2:
2095 | fpop; fldz; ret
2096 |
2097 |// Generic power function x^y. Called by BC_POW, math.pow fast function
2098 |// and vm_arith. Args/ret on x87 stack (y on top). No int/xmm regs modified.
2099 |// Caveat: needs 3 slots on x87 stack!
2100 |->vm_pow:
2101 | fist dword [esp+4] // Store/reload int before comparison.
2102 | fild dword [esp+4] // Integral exponent used in vm_powi.
2103 ||if (cmov) {
2104 | fucomip st1
2105 ||} else {
2106 | push eax; fucomp st1; fnstsw ax; sahf; pop eax
2107 ||}
2108 | jnz >8 // Branch for FP exponents.
2109 | jp >9 // Branch for NaN exponent.
2110 | fpop // Pop y and fallthrough to vm_powi.
2111 |
2112 |// FP/int power function x^i. Called from JIT code. Arg1/ret on x87 stack.
2113 |// Arg2 (int) on C stack. No int/xmm regs modified.
2114 |// Caveat: needs 2 slots on x87 stack!
2115 |->vm_powi:
2116 | push eax
2117 | mov eax, [esp+8]
2118 | cmp eax, 1; jle >6 // i<=1?
2119 | // Now 1 < (unsigned)i <= 0x80000000.
2120 |1: // Handle leading zeros.
2121 | test eax, 1; jnz >2
2122 | fmul st0
2123 | shr eax, 1
2124 | jmp <1
2125 |2:
2126 | shr eax, 1; jz >5
2127 | fdup
2128 |3: // Handle trailing bits.
2129 | fmul st0
2130 | shr eax, 1; jz >4
2131 | jnc <3
2132 | fmul st1, st0
2133 | jmp <3
2134 |4:
2135 | fmulp st1
2136 |5:
2137 | pop eax
2138 | ret
2139 |6:
2140 | je <5 // x^1 ==> x
2141 | jb >7
2142 | fld1; fdivrp st1
2143 | neg eax
2144 | cmp eax, 1; je <5 // x^-1 ==> 1/x
2145 | jmp <1 // x^-i ==> (1/x)^i
2146 |7:
2147 | fpop; fld1 // x^0 ==> 1
2148 | pop eax
2149 | ret
2150 |
2151 |8: // FP/FP power function x^y.
2152 | push eax
2153 | fst dword [esp+8]
2154 | fxch
2155 | fst dword [esp+12]
2156 | mov eax, [esp+8]; shl eax, 1
2157 | cmp eax, 0xff000000; je >2 // x^+-Inf?
2158 | mov eax, [esp+12]; shl eax, 1; je >4 // +-0^y?
2159 | cmp eax, 0xff000000; je >4 // +-Inf^y?
2160 | pop eax
2161 | fyl2x
2162 | jmp ->vm_exp2raw
2163 |
2164 |9: // Handle x^NaN.
2165 | fld1
2166 ||if (cmov) {
2167 | fucomip st2
2168 ||} else {
2169 | push eax; fucomp st2; fnstsw ax; sahf; pop eax
2170 ||}
2171 | je >1 // 1^NaN ==> 1
2172 | fxch // x^NaN ==> NaN
2173 |1:
2174 | fpop
2175 | ret
2176 |
2177 |2: // Handle x^+-Inf.
2178 | fabs
2179 | fld1
2180 ||if (cmov) {
2181 | fucomip st1
2182 ||} else {
2183 | fucomp st1; fnstsw ax; sahf
2184 ||}
2185 | je >3 // +-1^+-Inf ==> 1
2186 | fpop; fabs; fldz; mov eax, 0; setc al
2187 | ror eax, 1; xor eax, [esp+8]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
2188 | fxch
2189 |3:
2190 | fpop1; fabs; pop eax
2191 | ret
2192 |
2193 |4: // Handle +-0^y or +-Inf^y.
2194 | cmp dword [esp+8], 0; jge <3 // y >= 0, x^y ==> |x|
2195 | fpop; fpop
2196 | test eax, eax; pop eax; jz >5 // y < 0, +-0^y ==> +Inf
2197 | fldz // y < 0, +-Inf^y ==> 0
2198 | ret
2199 |5:
2200 | mov dword [esp+8], 0x7f800000 // Return +Inf.
2201 | fld dword [esp+8]
2202 | ret
2203 |
2204 |// Callable from C: double lj_vm_foldfpm(double x, int fpm)
2205 |// Computes fpm(x) for extended math functions. ORDER FPM.
2206 |->vm_foldfpm:
2207 | mov eax, [esp+12]
2208 | fld qword [esp+4]
2209 | cmp eax, 1; jb ->vm_floor; je ->vm_ceil
2210 | cmp eax, 3; jb ->vm_trunc; ja >1
2211 | fsqrt; ret
2212 |1: ; cmp eax, 5; jb ->vm_exp; je ->vm_exp2
2213 | cmp eax, 7; je >1; ja >2
2214 | fldln2; fxch; fyl2x; ret
2215 |1: ; fld1; fxch; fyl2x; ret
2216 |2: ; cmp eax, 9; je >1; ja >2
2217 | fldlg2; fxch; fyl2x; ret
2218 |1: ; fsin; ret
2219 |2: ; cmp eax, 11; je >1; ja >9
2220 | fcos; ret
2221 |1: ; fptan; fpop; ret
2222 |9: ; int3 // Bad fpm.
2223 |
2224 |// Callable from C: double lj_vm_foldarith(double x, double y, int op)
2225 |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
2226 |// and basic math functions. ORDER ARITH
2227 |->vm_foldarith:
2228 | mov eax, [esp+20]
2229 | fld qword [esp+4]
2230 | fld qword [esp+12]
2231 | cmp eax, 1; je >1; ja >2
2232 | faddp st1; ret
2233 |1: ; fsubp st1; ret
2234 |2: ; cmp eax, 3; je >1; ja >2
2235 | fmulp st1; ret
2236 |1: ; fdivp st1; ret
2237 |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
2238 | cmp eax, 7; je >1; ja >2
2239 | fpop; fchs; ret
2240 |1: ; fpop; fabs; ret
2241 |2: ; cmp eax, 9; je >1; ja >2
2242 | fpatan; ret
2243 |1: ; fxch; fscale; fpop1; ret
2244 |2: ; cmp eax, 11; je >1; ja >9
2245 ||if (cmov) {
2246 | fucomi st1; fcmovnbe st1; fpop1; ret
2247 |1: ; fucomi st1; fcmovbe st1; fpop1; ret
2248 ||} else {
2249 | fucom st1; fnstsw ax; test ah, 1; jz >2; fxch; 2: ; fpop; ret
2250 |1: ; fucom st1; fnstsw ax; test ah, 1; jnz >2; fxch; 2: ; fpop; ret
2251 ||}
2252 |9: ; int3 // Bad op.
2253 |
2254 |//-----------------------------------------------------------------------
2255 |//-- Miscellaneous functions --------------------------------------------
2256 |//-----------------------------------------------------------------------
2257 |
2258 |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
2259 |->vm_cpuid:
2260 | pushfd
2261 | pop edx
2262 | mov ecx, edx
2263 | xor edx, 0x00200000 // Toggle ID bit in flags.
2264 | push edx
2265 | popfd
2266 | pushfd
2267 | pop edx
2268 | xor eax, eax // Zero means no features supported.
2269 | cmp ecx, edx
2270 | jz >1 // No ID toggle means no CPUID support.
2271 | mov eax, [esp+4] // Argument 1 is function number.
2272 | push edi
2273 | push ebx
2274 | cpuid
2275 | mov edi, [esp+16] // Argument 2 is result area.
2276 | mov [edi], eax
2277 | mov [edi+4], ebx
2278 | mov [edi+8], ecx
2279 | mov [edi+12], edx
2280 | pop ebx
2281 | pop edi
2282 |1:
2283 | ret
2284 |
2285 |//-----------------------------------------------------------------------
2286}
2287
2288/* Generate the code for a single instruction. */
2289static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
2290{
2291 int vk = 0;
2292 |// Note: aligning all instructions does not pay off.
2293 |=>defop:
2294
2295 switch (op) {
2296
2297 /* -- Comparison ops ---------------------------------------------------- */
2298
2299 /* Remember: all ops branch for a true comparison, fall through otherwise. */
2300
2301 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2302 | // RA = src1, RD = src2, JMP with RD = target
2303 | ins_AD
2304 | checknum RA, ->vmeta_comp
2305 | checknum RD, ->vmeta_comp
2306 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
2307 | fld qword [BASE+RD*8]
2308 | add PC, 4
2309 | fcomparepp // eax (RD) modified!
2310 | // Unordered: all of ZF CF PF set, ordered: PF clear.
2311 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
2312 switch (op) {
2313 case BC_ISLT:
2314 | jbe >2
2315 break;
2316 case BC_ISGE:
2317 | ja >2
2318 break;
2319 case BC_ISLE:
2320 | jb >2
2321 break;
2322 case BC_ISGT:
2323 | jae >2
2324 break;
2325 default: break; /* Shut up GCC. */
2326 }
2327 |1:
2328 | movzx RD, PC_RD
2329 | branchPC RD
2330 |2:
2331 | ins_next
2332 break;
2333
2334 case BC_ISEQV: case BC_ISNEV:
2335 vk = op == BC_ISEQV;
2336 | ins_AD // RA = src1, RD = src2, JMP with RD = target
2337 | mov RB, [BASE+RD*8+4]
2338 | add PC, 4
2339 | cmp RB, LJ_TISNUM; ja >5
2340 | checknum RA, >5
2341 | fld qword [BASE+RA*8]
2342 | fld qword [BASE+RD*8]
2343 | fcomparepp // eax (RD) modified!
2344 iseqne_fp:
2345 if (vk) {
2346 | jp >2 // Unordered means not equal.
2347 | jne >2
2348 } else {
2349 | jp >2 // Unordered means not equal.
2350 | je >1
2351 }
2352 iseqne_end:
2353 if (vk) {
2354 |1: // EQ: Branch to the target.
2355 | movzx RD, PC_RD
2356 | branchPC RD
2357 |2: // NE: Fallthrough to next instruction.
2358 } else {
2359 |2: // NE: Branch to the target.
2360 | movzx RD, PC_RD
2361 | branchPC RD
2362 |1: // EQ: Fallthrough to next instruction.
2363 }
2364 | ins_next
2365 |
2366 if (op == BC_ISEQV || op == BC_ISNEV) {
2367 |5: // Either or both types are not numbers.
2368 | checktp RA, RB // Compare types.
2369 | jne <2 // Not the same type?
2370 | cmp RB, LJ_TISPRI
2371 | jae <1 // Same type and primitive type?
2372 |
2373 | // Same types and not a primitive type. Compare GCobj or pvalue.
2374 | mov RA, [BASE+RA*8]
2375 | mov RD, [BASE+RD*8]
2376 | cmp RA, RD
2377 | je <1 // Same GCobjs or pvalues?
2378 | cmp RB, LJ_TISTABUD
2379 | ja <2 // Different objects and not table/ud?
2380 |
2381 | // Different tables or userdatas. Need to check __eq metamethod.
2382 | // Field metatable must be at same offset for GCtab and GCudata!
2383 | mov TAB:RB, TAB:RA->metatable
2384 | test TAB:RB, TAB:RB
2385 | jz <2 // No metatable?
2386 | test byte TAB:RB->nomm, 1<<MM_eq
2387 | jnz <2 // Or 'no __eq' flag set?
2388 if (vk) {
2389 | xor RB, RB // ne = 0
2390 } else {
2391 | mov RB, 1 // ne = 1
2392 }
2393 | jmp ->vmeta_equal // Handle __eq metamethod.
2394 }
2395 break;
2396 case BC_ISEQS: case BC_ISNES:
2397 vk = op == BC_ISEQS;
2398 | ins_AND // RA = src, RD = str const, JMP with RD = target
2399 | add PC, 4
2400 | checkstr RA, >2
2401 | mov RA, [BASE+RA*8]
2402 | cmp RA, [KBASE+RD*4]
2403 iseqne_test:
2404 if (vk) {
2405 | jne >2
2406 } else {
2407 | je >1
2408 }
2409 goto iseqne_end;
2410 case BC_ISEQN: case BC_ISNEN:
2411 vk = op == BC_ISEQN;
2412 | ins_AD // RA = src, RD = num const, JMP with RD = target
2413 | add PC, 4
2414 | checknum RA, >2
2415 | fld qword [BASE+RA*8]
2416 | fld qword [KBASE+RD*8]
2417 | fcomparepp // eax (RD) modified!
2418 goto iseqne_fp;
2419 case BC_ISEQP: case BC_ISNEP:
2420 vk = op == BC_ISEQP;
2421 | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
2422 | add PC, 4
2423 | checktp RA, RD
2424 goto iseqne_test;
2425
2426 /* -- Unary test and copy ops ------------------------------------------- */
2427
2428 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
2429 | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
2430 | mov RB, [BASE+RD*8+4]
2431 | add PC, 4
2432 | cmp RB, LJ_TISTRUECOND
2433 if (op == BC_IST || op == BC_ISTC) {
2434 | jae >1
2435 } else {
2436 | jb >1
2437 }
2438 if (op == BC_ISTC || op == BC_ISFC) {
2439 | mov [BASE+RA*8+4], RB
2440 | mov RB, [BASE+RD*8]
2441 | mov [BASE+RA*8], RB
2442 }
2443 | movzx RD, PC_RD
2444 | branchPC RD
2445 |1: // Fallthrough to the next instruction.
2446 | ins_next
2447 break;
2448
2449 /* -- Unary ops --------------------------------------------------------- */
2450
2451 case BC_MOV:
2452 | ins_AD // RA = dst, RD = src
2453 | mov RB, [BASE+RD*8+4]
2454 | mov RD, [BASE+RD*8] // Overwrites RD.
2455 | mov [BASE+RA*8+4], RB
2456 | mov [BASE+RA*8], RD
2457 | ins_next_
2458 break;
2459 case BC_NOT:
2460 | ins_AD // RA = dst, RD = src
2461 | xor RB, RB
2462 | checktp RD, LJ_TISTRUECOND
2463 | adc RB, LJ_TTRUE
2464 | mov [BASE+RA*8+4], RB
2465 | ins_next
2466 break;
2467 case BC_UNM:
2468 | ins_AD // RA = dst, RD = src
2469 | checknum RD, ->vmeta_unm
2470 | fld qword [BASE+RD*8]
2471 | fchs
2472 | fstp qword [BASE+RA*8]
2473 | ins_next
2474 break;
2475 case BC_LEN:
2476 | ins_AD // RA = dst, RD = src
2477 | checkstr RD, >2
2478 | mov STR:RD, [BASE+RD*8]
2479 | fild dword STR:RD->len
2480 |1:
2481 | fstp qword [BASE+RA*8]
2482 | ins_next
2483 |2:
2484 | checktab RD, ->vmeta_len
2485 | mov TAB:RD, [BASE+RD*8]
2486 | mov ARG1, TAB:RD
2487 | mov RB, BASE // Save BASE.
2488 | call extern lj_tab_len // (GCtab *t)
2489 | // Length of table returned in eax (RC).
2490 | mov ARG1, RC
2491 | mov BASE, RB // Restore BASE.
2492 | fild ARG1
2493 | movzx RA, PC_RA
2494 | jmp <1
2495 break;
2496
2497 /* -- Binary ops -------------------------------------------------------- */
2498
2499 |.macro ins_arithpre, ins
2500 | ins_ABC
2501 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2502 ||switch (vk) {
2503 ||case 0:
2504 | checknum RB, ->vmeta_arith_vn
2505 | fld qword [BASE+RB*8]
2506 | ins qword [KBASE+RC*8]
2507 || break;
2508 ||case 1:
2509 | checknum RB, ->vmeta_arith_nv
2510 | fld qword [KBASE+RC*8]
2511 | ins qword [BASE+RB*8]
2512 || break;
2513 ||default:
2514 | checknum RB, ->vmeta_arith_vv
2515 | checknum RC, ->vmeta_arith_vv
2516 | fld qword [BASE+RB*8]
2517 | ins qword [BASE+RC*8]
2518 || break;
2519 ||}
2520 |.endmacro
2521 |
2522 |.macro ins_arith, ins
2523 | ins_arithpre ins
2524 | fstp qword [BASE+RA*8]
2525 | ins_next
2526 |.endmacro
2527
2528 | // RA = dst, RB = src1 or num const, RC = src2 or num const
2529 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2530 | ins_arith fadd
2531 break;
2532 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2533 | ins_arith fsub
2534 break;
2535 case BC_MULVN: case BC_MULNV: case BC_MULVV:
2536 | ins_arith fmul
2537 break;
2538 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
2539 | ins_arith fdiv
2540 break;
2541 case BC_MODVN:
2542 | ins_arithpre fld
2543 |->BC_MODVN_Z:
2544 | call ->vm_mod
2545 | fstp qword [BASE+RA*8]
2546 | ins_next
2547 break;
2548 case BC_MODNV: case BC_MODVV:
2549 | ins_arithpre fld
2550 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
2551 break;
2552 case BC_POW:
2553 | ins_arithpre fld
2554 | call ->vm_pow
2555 | fstp qword [BASE+RA*8]
2556 | ins_next
2557 break;
2558
2559 case BC_CAT:
2560 | ins_ABC // RA = dst, RB = src_start, RC = src_end
2561 | lea RA, [BASE+RC*8]
2562 | sub RC, RB
2563 | mov ARG2, RA
2564 | mov ARG3, RC
2565 |->BC_CAT_Z:
2566 | mov L:RB, SAVE_L
2567 | mov ARG1, L:RB
2568 | mov SAVE_PC, PC
2569 | mov L:RB->base, BASE
2570 | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
2571 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
2572 | mov BASE, L:RB->base
2573 | test RC, RC
2574 | jnz ->vmeta_binop
2575 | movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB].
2576 | movzx RA, PC_RA
2577 | mov RC, [BASE+RB*8+4]
2578 | mov RB, [BASE+RB*8]
2579 | mov [BASE+RA*8+4], RC
2580 | mov [BASE+RA*8], RB
2581 | ins_next
2582 break;
2583
2584 /* -- Constant ops ------------------------------------------------------ */
2585
2586 case BC_KSTR:
2587 | ins_AND // RA = dst, RD = str const (~)
2588 | mov RD, [KBASE+RD*4]
2589 | mov dword [BASE+RA*8+4], LJ_TSTR
2590 | mov [BASE+RA*8], RD
2591 | ins_next
2592 break;
2593 case BC_KSHORT:
2594 | ins_AD // RA = dst, RD = signed int16 literal
2595 | fild PC_RD // Refetch signed RD from instruction.
2596 | fstp qword [BASE+RA*8]
2597 | ins_next
2598 break;
2599 case BC_KNUM:
2600 | ins_AD // RA = dst, RD = num const
2601 | fld qword [KBASE+RD*8]
2602 | fstp qword [BASE+RA*8]
2603 | ins_next
2604 break;
2605 case BC_KPRI:
2606 | ins_AND // RA = dst, RD = primitive type (~)
2607 | mov [BASE+RA*8+4], RD
2608 | ins_next
2609 break;
2610 case BC_KNIL:
2611 | ins_AD // RA = dst_start, RD = dst_end
2612 | lea RA, [BASE+RA*8+12]
2613 | lea RD, [BASE+RD*8+4]
2614 | mov RB, LJ_TNIL
2615 | mov [RA-8], RB // Sets minimum 2 slots.
2616 |1:
2617 | mov [RA], RB
2618 | add RA, 8
2619 | cmp RA, RD
2620 | jbe <1
2621 | ins_next
2622 break;
2623
2624 /* -- Upvalue and function ops ------------------------------------------ */
2625
2626 case BC_UGET:
2627 | ins_AD // RA = dst, RD = upvalue #
2628 | mov LFUNC:RB, [BASE-8]
2629 | mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)]
2630 | mov RB, UPVAL:RB->v
2631 | mov RD, [RB+4]
2632 | mov RB, [RB]
2633 | mov [BASE+RA*8+4], RD
2634 | mov [BASE+RA*8], RB
2635 | ins_next
2636 break;
2637 case BC_USETV:
2638 | ins_AD // RA = upvalue #, RD = src
2639 | // Really ugly code due to the lack of a 4th free register.
2640 | mov LFUNC:RB, [BASE-8]
2641 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
2642 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
2643 | jnz >4
2644 |1:
2645 | mov RA, [BASE+RD*8]
2646 |2:
2647 | mov RB, UPVAL:RB->v
2648 | mov RD, [BASE+RD*8+4]
2649 | mov [RB], RA
2650 | mov [RB+4], RD
2651 |3:
2652 | ins_next
2653 |
2654 |4: // Upvalue is black. Check if new value is collectable and white.
2655 | mov RA, [BASE+RD*8+4]
2656 | sub RA, LJ_TISGCV
2657 | cmp RA, LJ_TISNUM - LJ_TISGCV // tvisgcv(v)
2658 | jbe <1
2659 | mov GCOBJ:RA, [BASE+RD*8]
2660 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
2661 | jz <2
2662 | // Crossed a write barrier. So move the barrier forward.
2663 | mov ARG2, UPVAL:RB
2664 | mov ARG3, GCOBJ:RA
2665 | mov RB, UPVAL:RB->v
2666 | mov RD, [BASE+RD*8+4]
2667 | mov [RB], GCOBJ:RA
2668 | mov [RB+4], RD
2669 |->BC_USETV_Z:
2670 | mov L:RB, SAVE_L
2671 | lea GL:RA, [DISPATCH+GG_DISP2G]
2672 | mov L:RB->base, BASE
2673 | mov ARG1, GL:RA
2674 | call extern lj_gc_barrieruv // (global_State *g, GCobj *o, GCobj *v)
2675 | mov BASE, L:RB->base
2676 | jmp <3
2677 break;
2678 case BC_USETS:
2679 | ins_AND // RA = upvalue #, RD = str const (~)
2680 | mov LFUNC:RB, [BASE-8]
2681 | mov GCOBJ:RD, [KBASE+RD*4]
2682 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
2683 | mov RA, UPVAL:RB->v
2684 | mov dword [RA+4], LJ_TSTR
2685 | mov [RA], GCOBJ:RD
2686 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
2687 | jnz >2
2688 |1:
2689 | ins_next
2690 |
2691 |2: // Upvalue is black. Check if string is white.
2692 | test byte GCOBJ:RD->gch.marked, LJ_GC_WHITES // iswhite(str)
2693 | jz <1
2694 | // Crossed a write barrier. So move the barrier forward.
2695 | mov ARG3, GCOBJ:RD
2696 | mov ARG2, UPVAL:RB
2697 | jmp ->BC_USETV_Z
2698 break;
2699 case BC_USETN:
2700 | ins_AD // RA = upvalue #, RD = num const
2701 | mov LFUNC:RB, [BASE-8]
2702 | fld qword [KBASE+RD*8]
2703 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
2704 | mov RA, UPVAL:RB->v
2705 | fstp qword [RA]
2706 | ins_next
2707 break;
2708 case BC_USETP:
2709 | ins_AND // RA = upvalue #, RD = primitive type (~)
2710 | mov LFUNC:RB, [BASE-8]
2711 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
2712 | mov RA, UPVAL:RB->v
2713 | mov [RA+4], RD
2714 | ins_next
2715 break;
2716 case BC_UCLO:
2717 | ins_AD // RA = level, RD = target
2718 | branchPC RD // Do this first to free RD.
2719 | mov L:RB, SAVE_L
2720 | cmp dword L:RB->openupval, 0
2721 | je >1
2722 | lea RA, [BASE+RA*8]
2723 | mov ARG2, RA
2724 | mov ARG1, L:RB
2725 | mov L:RB->base, BASE
2726 | call extern lj_func_closeuv // (lua_State *L, StkId level)
2727 | mov BASE, L:RB->base
2728 |1:
2729 | ins_next
2730 break;
2731
2732 case BC_FNEW:
2733 | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
2734 | mov LFUNC:RA, [BASE-8]
2735 | mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *.
2736 | mov L:RB, SAVE_L
2737 | mov ARG3, LFUNC:RA
2738 | mov ARG2, PROTO:RD
2739 | mov SAVE_PC, PC
2740 | mov ARG1, L:RB
2741 | mov L:RB->base, BASE
2742 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
2743 | call extern lj_func_newL_gc
2744 | // GCfuncL * returned in eax (RC).
2745 | mov BASE, L:RB->base
2746 | movzx RA, PC_RA
2747 | mov [BASE+RA*8], LFUNC:RC
2748 | mov dword [BASE+RA*8+4], LJ_TFUNC
2749 | ins_next
2750 break;
2751
2752 /* -- Table ops --------------------------------------------------------- */
2753
2754 case BC_TNEW:
2755 | ins_AD // RA = dst, RD = hbits|asize
2756 | mov RB, RD
2757 | and RD, 0x7ff
2758 | shr RB, 11
2759 | cmp RD, 0x7ff // Turn 0x7ff into 0x801.
2760 | sete RAL
2761 | mov ARG3, RB
2762 | add RD, RA
2763 | mov L:RB, SAVE_L
2764 | add RD, RA
2765 | mov ARG2, RD
2766 | mov SAVE_PC, PC
2767 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
2768 | mov ARG1, L:RB
2769 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
2770 | mov L:RB->base, BASE
2771 | jae >2
2772 |1:
2773 | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
2774 | // Table * returned in eax (RC).
2775 | mov BASE, L:RB->base
2776 | movzx RA, PC_RA
2777 | mov [BASE+RA*8], TAB:RC
2778 | mov dword [BASE+RA*8+4], LJ_TTAB
2779 | ins_next
2780 |2:
2781 | call extern lj_gc_step_fixtop // (lua_State *L)
2782 | mov ARG1, L:RB // Args owned by callee. Set it again.
2783 | jmp <1
2784 break;
2785 case BC_TDUP:
2786 | ins_AND // RA = dst, RD = table const (~) (holding template table)
2787 | mov TAB:RD, [KBASE+RD*4]
2788 | mov L:RB, SAVE_L
2789 | mov ARG2, TAB:RD
2790 | mov ARG1, L:RB
2791 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
2792 | mov SAVE_PC, PC
2793 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
2794 | mov L:RB->base, BASE
2795 | jae >3
2796 |2:
2797 | call extern lj_tab_dup // (lua_State *L, Table *kt)
2798 | // Table * returned in eax (RC).
2799 | mov BASE, L:RB->base
2800 | movzx RA, PC_RA
2801 | mov [BASE+RA*8], TAB:RC
2802 | mov dword [BASE+RA*8+4], LJ_TTAB
2803 | ins_next
2804 |3:
2805 | call extern lj_gc_step_fixtop // (lua_State *L)
2806 | mov ARG1, L:RB // Args owned by callee. Set it again.
2807 | jmp <2
2808 break;
2809
2810 case BC_GGET:
2811 | ins_AND // RA = dst, RD = str const (~)
2812 | mov LFUNC:RB, [BASE-8]
2813 | mov TAB:RB, LFUNC:RB->env
2814 | mov STR:RC, [KBASE+RD*4]
2815 | jmp ->BC_TGETS_Z
2816 break;
2817 case BC_GSET:
2818 | ins_AND // RA = src, RD = str const (~)
2819 | mov LFUNC:RB, [BASE-8]
2820 | mov TAB:RB, LFUNC:RB->env
2821 | mov STR:RC, [KBASE+RD*4]
2822 | jmp ->BC_TSETS_Z
2823 break;
2824
2825 case BC_TGETV:
2826 | ins_ABC // RA = dst, RB = table, RC = key
2827 | checktab RB, ->vmeta_tgetv
2828 | mov TAB:RB, [BASE+RB*8]
2829 |
2830 | // Integer key? Convert number to int and back and compare.
2831 | checknum RC, >5
2832 | fld qword [BASE+RC*8]
2833 | fist ARG1
2834 | fild ARG1
2835 | fcomparepp // eax (RC) modified!
2836 | mov RC, ARG1
2837 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
2838 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
2839 | jae ->vmeta_tgetv // Not in array part? Use fallback.
2840 | shl RC, 3
2841 | add RC, TAB:RB->array
2842 | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
2843 | je >2
2844 |1:
2845 | mov RB, [RC] // Get array slot.
2846 | mov RC, [RC+4]
2847 | mov [BASE+RA*8], RB
2848 | mov [BASE+RA*8+4], RC
2849 | ins_next
2850 |
2851 |2: // Check for __index if table value is nil.
2852 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
2853 | jz <1
2854 | mov TAB:RA, TAB:RB->metatable
2855 | test byte TAB:RA->nomm, 1<<MM_index
2856 | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
2857 | movzx RA, PC_RA // Restore RA.
2858 | jmp <1
2859 |
2860 |5: // String key?
2861 | checkstr RC, ->vmeta_tgetv
2862 | mov STR:RC, [BASE+RC*8]
2863 | jmp ->BC_TGETS_Z
2864 break;
2865 case BC_TGETS:
2866 | ins_ABC // RA = dst, RB = table, RC = str const (~)
2867 | not RC
2868 | mov STR:RC, [KBASE+RC*4]
2869 | checktab RB, ->vmeta_tgets
2870 | mov TAB:RB, [BASE+RB*8]
2871 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
2872 | mov RA, TAB:RB->hmask
2873 | and RA, STR:RC->hash
2874 | imul RA, #NODE
2875 | add NODE:RA, TAB:RB->node
2876 |1:
2877 | cmp dword NODE:RA->key.it, LJ_TSTR
2878 | jne >4
2879 | cmp dword NODE:RA->key.gcr, STR:RC
2880 | jne >4
2881 | // Ok, key found. Assumes: offsetof(Node, val) == 0
2882 | cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath.
2883 | je >5 // Key found, but nil value?
2884 | movzx RC, PC_RA
2885 | mov RB, [RA] // Get node value.
2886 | mov RA, [RA+4]
2887 | mov [BASE+RC*8], RB
2888 |2:
2889 | mov [BASE+RC*8+4], RA
2890 | ins_next
2891 |
2892 |3:
2893 | movzx RC, PC_RA
2894 | mov RA, LJ_TNIL
2895 | jmp <2
2896 |
2897 |4: // Follow hash chain.
2898 | mov NODE:RA, NODE:RA->next
2899 | test NODE:RA, NODE:RA
2900 | jnz <1
2901 | // End of hash chain: key not found, nil result.
2902 |
2903 |5: // Check for __index if table value is nil.
2904 | mov TAB:RA, TAB:RB->metatable
2905 | test TAB:RA, TAB:RA
2906 | jz <3 // No metatable: done.
2907 | test byte TAB:RA->nomm, 1<<MM_index
2908 | jnz <3 // 'no __index' flag set: done.
2909 | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
2910 break;
2911 case BC_TGETB:
2912 | ins_ABC // RA = dst, RB = table, RC = byte literal
2913 | checktab RB, ->vmeta_tgetb
2914 | mov TAB:RB, [BASE+RB*8]
2915 | cmp RC, TAB:RB->asize
2916 | jae ->vmeta_tgetb
2917 | shl RC, 3
2918 | add RC, TAB:RB->array
2919 | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
2920 | je >2
2921 |1:
2922 | mov RB, [RC] // Get array slot.
2923 | mov RC, [RC+4]
2924 | mov [BASE+RA*8], RB
2925 | mov [BASE+RA*8+4], RC
2926 | ins_next
2927 |
2928 |2: // Check for __index if table value is nil.
2929 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
2930 | jz <1
2931 | mov TAB:RA, TAB:RB->metatable
2932 | test byte TAB:RA->nomm, 1<<MM_index
2933 | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
2934 | movzx RA, PC_RA // Restore RA.
2935 | jmp <1
2936 break;
2937
2938 case BC_TSETV:
2939 | ins_ABC // RA = src, RB = table, RC = key
2940 | checktab RB, ->vmeta_tsetv
2941 | mov TAB:RB, [BASE+RB*8]
2942 |
2943 | // Integer key? Convert number to int and back and compare.
2944 | checknum RC, >5
2945 | fld qword [BASE+RC*8]
2946 | fist ARG1
2947 | fild ARG1
2948 | fcomparepp // eax (RC) modified!
2949 | mov RC, ARG1
2950 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
2951 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
2952 | jae ->vmeta_tsetv
2953 | shl RC, 3
2954 | add RC, TAB:RB->array
2955 | cmp dword [RC+4], LJ_TNIL
2956 | je >3 // Previous value is nil?
2957 |1:
2958 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
2959 | jnz >7
2960 |2:
2961 | mov RB, [BASE+RA*8+4] // Set array slot.
2962 | mov RA, [BASE+RA*8]
2963 | mov [RC+4], RB
2964 | mov [RC], RA
2965 | ins_next
2966 |
2967 |3: // Check for __newindex if previous value is nil.
2968 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
2969 | jz <1
2970 | mov TAB:RA, TAB:RB->metatable
2971 | test byte TAB:RA->nomm, 1<<MM_newindex
2972 | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
2973 | movzx RA, PC_RA // Restore RA.
2974 | jmp <1
2975 |
2976 |5: // String key?
2977 | checkstr RC, ->vmeta_tsetv
2978 | mov STR:RC, [BASE+RC*8]
2979 | jmp ->BC_TSETS_Z
2980 |
2981 |7: // Possible table write barrier for the value. Skip valiswhite check.
2982 | barrierback TAB:RB, RA
2983 | movzx RA, PC_RA // Restore RA.
2984 | jmp <2
2985 break;
2986 case BC_TSETS:
2987 | ins_ABC // RA = src, RB = table, RC = str const (~)
2988 | not RC
2989 | mov STR:RC, [KBASE+RC*4]
2990 | checktab RB, ->vmeta_tsets
2991 | mov TAB:RB, [BASE+RB*8]
2992 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
2993 | mov RA, TAB:RB->hmask
2994 | and RA, STR:RC->hash
2995 | imul RA, #NODE
2996 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
2997 | add NODE:RA, TAB:RB->node
2998 |1:
2999 | cmp dword NODE:RA->key.it, LJ_TSTR
3000 | jne >5
3001 | cmp dword NODE:RA->key.gcr, STR:RC
3002 | jne >5
3003 | // Ok, key found. Assumes: offsetof(Node, val) == 0
3004 | cmp dword [RA+4], LJ_TNIL
3005 | je >4 // Previous value is nil?
3006 |2:
3007 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3008 | jnz >7
3009 |3:
3010 | movzx RC, PC_RA
3011 | mov RB, [BASE+RC*8+4] // Set node value.
3012 | mov RC, [BASE+RC*8]
3013 | mov [RA+4], RB
3014 | mov [RA], RC
3015 | ins_next
3016 |
3017 |4: // Check for __newindex if previous value is nil.
3018 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
3019 | jz <2
3020 | mov ARG1, RA // Save RA.
3021 | mov TAB:RA, TAB:RB->metatable
3022 | test byte TAB:RA->nomm, 1<<MM_newindex
3023 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3024 | mov RA, ARG1 // Restore RA.
3025 | jmp <2
3026 |
3027 |5: // Follow hash chain.
3028 | mov NODE:RA, NODE:RA->next
3029 | test NODE:RA, NODE:RA
3030 | jnz <1
3031 | // End of hash chain: key not found, add a new one.
3032 |
3033 | // But check for __newindex first.
3034 | mov TAB:RA, TAB:RB->metatable
3035 | test TAB:RA, TAB:RA
3036 | jz >6 // No metatable: continue.
3037 | test byte TAB:RA->nomm, 1<<MM_newindex
3038 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3039 |6:
3040 | mov ARG5, STR:RC
3041 | mov ARG6, LJ_TSTR
3042 | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6.
3043 | mov ARG4, TAB:RB // Save TAB:RB for us.
3044 | mov ARG2, TAB:RB
3045 | mov L:RB, SAVE_L
3046 | mov ARG3, RC
3047 | mov ARG1, L:RB
3048 | mov SAVE_PC, PC
3049 | mov L:RB->base, BASE
3050 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
3051 | // Handles write barrier for the new key. TValue * returned in eax (RC).
3052 | mov BASE, L:RB->base
3053 | mov TAB:RB, ARG4 // Need TAB:RB for barrier.
3054 | mov RA, eax
3055 | jmp <2 // Must check write barrier for value.
3056 |
3057 |7: // Possible table write barrier for the value. Skip valiswhite check.
3058 | barrierback TAB:RB, RC // Destroys STR:RC.
3059 | jmp <3
3060 break;
3061 case BC_TSETB:
3062 | ins_ABC // RA = src, RB = table, RC = byte literal
3063 | checktab RB, ->vmeta_tsetb
3064 | mov TAB:RB, [BASE+RB*8]
3065 | cmp RC, TAB:RB->asize
3066 | jae ->vmeta_tsetb
3067 | shl RC, 3
3068 | add RC, TAB:RB->array
3069 | cmp dword [RC+4], LJ_TNIL
3070 | je >3 // Previous value is nil?
3071 |1:
3072 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3073 | jnz >7
3074 |2:
3075 | mov RB, [BASE+RA*8+4] // Set array slot.
3076 | mov RA, [BASE+RA*8]
3077 | mov [RC+4], RB
3078 | mov [RC], RA
3079 | ins_next
3080 |
3081 |3: // Check for __newindex if previous value is nil.
3082 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
3083 | jz <1
3084 | mov TAB:RA, TAB:RB->metatable
3085 | test byte TAB:RA->nomm, 1<<MM_newindex
3086 | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
3087 | movzx RA, PC_RA // Restore RA.
3088 | jmp <1
3089 |
3090 |7: // Possible table write barrier for the value. Skip valiswhite check.
3091 | barrierback TAB:RB, RA
3092 | movzx RA, PC_RA // Restore RA.
3093 | jmp <2
3094 break;
3095
3096 case BC_TSETM:
3097 | ins_AD // RA = base (table at base-1), RD = num const (start index)
3098 | mov ARG5, KBASE // Need one more free register.
3099 | fld qword [KBASE+RD*8]
3100 | fistp ARG4 // Const is guaranteed to be an int.
3101 |1:
3102 | lea RA, [BASE+RA*8]
3103 | mov TAB:RB, [RA-8] // Guaranteed to be a table.
3104 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3105 | jnz >7
3106 |2:
3107 | mov RD, NRESULTS
3108 | mov KBASE, ARG4
3109 | sub RD, 1
3110 | jz >4 // Nothing to copy?
3111 | add RD, KBASE // Compute needed size.
3112 | cmp RD, TAB:RB->asize
3113 | jae >5 // Does not fit into array part?
3114 | sub RD, KBASE
3115 | shl KBASE, 3
3116 | add KBASE, TAB:RB->array
3117 |3: // Copy result slots to table.
3118 | mov RB, [RA]
3119 | mov [KBASE], RB
3120 | mov RB, [RA+4]
3121 | add RA, 8
3122 | mov [KBASE+4], RB
3123 | add KBASE, 8
3124 | sub RD, 1
3125 | jnz <3
3126 |4:
3127 | mov KBASE, ARG5
3128 | ins_next
3129 |
3130 |5: // Need to resize array part.
3131 | mov ARG2, TAB:RB
3132 | mov L:RB, SAVE_L
3133 | mov ARG3, RD
3134 | mov ARG1, L:RB
3135 | mov SAVE_PC, PC
3136 | mov L:RB->base, BASE
3137 | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
3138 | mov BASE, L:RB->base
3139 | movzx RA, PC_RA // Restore RA.
3140 | jmp <1 // Retry.
3141 |
3142 |7: // Possible table write barrier for any value. Skip valiswhite check.
3143 | barrierback TAB:RB, RD
3144 | jmp <2
3145 break;
3146
3147 /* -- Calls and vararg handling ----------------------------------------- */
3148
3149 case BC_CALL: case BC_CALLM:
3150 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
3151 if (op == BC_CALLM) {
3152 | add NARGS:RC, NRESULTS
3153 }
3154 | lea RA, [BASE+RA*8+8]
3155 | mov LFUNC:RB, [RA-8]
3156 | cmp dword [RA-4], LJ_TFUNC
3157 | jne ->vmeta_call
3158 | jmp aword LFUNC:RB->gate
3159 break;
3160
3161 case BC_CALLMT:
3162 | ins_AD // RA = base, RD = extra_nargs
3163 | add NARGS:RD, NRESULTS
3164 | // Fall through. Assumes BC_CALLMT follows and ins_AD is a no-op.
3165 break;
3166 case BC_CALLT:
3167 | ins_AD // RA = base, RD = nargs+1
3168 | lea RA, [BASE+RA*8+8]
3169 | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
3170 | mov LFUNC:RB, [RA-8]
3171 | cmp dword [RA-4], LJ_TFUNC
3172 | jne ->vmeta_call
3173 |->BC_CALLT_Z:
3174 | mov PC, [BASE-4]
3175 | test PC, FRAME_TYPE
3176 | jnz >7
3177 |1:
3178 | mov [BASE-8], LFUNC:RB // Copy function down, reloaded below.
3179 | mov NRESULTS, NARGS:RD
3180 | sub NARGS:RD, 1
3181 | jz >3
3182 |2:
3183 | mov RB, [RA] // Move args down.
3184 | mov [KBASE], RB
3185 | mov RB, [RA+4]
3186 | mov [KBASE+4], RB
3187 | add KBASE, 8
3188 | add RA, 8
3189 | sub NARGS:RD, 1
3190 | jnz <2
3191 |
3192 | mov LFUNC:RB, [BASE-8]
3193 |3:
3194 | mov RA, BASE // BASE is ignored, except when ...
3195 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
3196 | ja >5
3197 |4:
3198 | mov NARGS:RD, NRESULTS
3199 | jmp aword LFUNC:RB->gate
3200 |
3201 |5: // Tailcall to a fast function.
3202 | test PC, FRAME_TYPE // Lua frame below?
3203 | jnz <4
3204 | movzx RD, PC_RA // Need to prepare BASE/KBASE.
3205 | not RD
3206 | lea BASE, [BASE+RD*8]
3207 | mov LFUNC:KBASE, [BASE-8]
3208 | mov PROTO:KBASE, LFUNC:KBASE->pt
3209 | mov KBASE, PROTO:KBASE->k
3210 | jmp <4
3211 |
3212 |7: // Tailcall from a vararg function.
3213 | jnp <1 // Vararg frame below?
3214 | and PC, -8
3215 | sub BASE, PC // Need to relocate BASE/KBASE down.
3216 | mov KBASE, BASE
3217 | mov PC, [BASE-4]
3218 | jmp <1
3219 break;
3220
3221 case BC_ITERC:
3222 | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
3223 | lea RA, [BASE+RA*8+8] // fb = base+1
3224 | mov RB, [RA-24] // Copy state. fb[0] = fb[-3].
3225 | mov RC, [RA-20]
3226 | mov [RA], RB
3227 | mov [RA+4], RC
3228 | mov RB, [RA-16] // Copy control var. fb[1] = fb[-2].
3229 | mov RC, [RA-12]
3230 | mov [RA+8], RB
3231 | mov [RA+12], RC
3232 | mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4]
3233 | mov RC, [RA-28]
3234 | mov [RA-8], LFUNC:RB
3235 | mov [RA-4], RC
3236 | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call.
3237 | mov NARGS:RC, 3
3238 | jne ->vmeta_call
3239 | jmp aword LFUNC:RB->gate
3240 break;
3241
3242 case BC_VARG:
3243 | ins_AB_ // RA = base, RB = nresults+1, (RC = 1)
3244 | mov LFUNC:RC, [BASE-8]
3245 | lea RA, [BASE+RA*8]
3246 | mov PROTO:RC, LFUNC:RC->pt
3247 | movzx RC, byte PROTO:RC->numparams
3248 | mov ARG3, KBASE // Need one more free register.
3249 | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)]
3250 | sub KBASE, [BASE-4]
3251 | // Note: KBASE may now be even _above_ BASE if nargs was < numparams.
3252 | test RB, RB
3253 | jz >5 // Copy all varargs?
3254 | lea RB, [RA+RB*8-8]
3255 | cmp KBASE, BASE // No vararg slots?
3256 | jnb >2
3257 |1: // Copy vararg slots to destination slots.
3258 | mov RC, [KBASE-8]
3259 | mov [RA], RC
3260 | mov RC, [KBASE-4]
3261 | add KBASE, 8
3262 | mov [RA+4], RC
3263 | add RA, 8
3264 | cmp RA, RB // All destination slots filled?
3265 | jnb >3
3266 | cmp KBASE, BASE // No more vararg slots?
3267 | jb <1
3268 |2: // Fill up remainder with nil.
3269 | mov dword [RA+4], LJ_TNIL
3270 | add RA, 8
3271 | cmp RA, RB
3272 | jb <2
3273 |3:
3274 | mov KBASE, ARG3
3275 | ins_next
3276 |
3277 |5: // Copy all varargs.
3278 | mov NRESULTS, 1 // NRESULTS = 0+1
3279 | mov RC, BASE
3280 | sub RC, KBASE
3281 | jbe <3 // No vararg slots?
3282 | mov RB, RC
3283 | shr RB, 3
3284 | mov ARG2, RB // Store this for stack growth below.
3285 | add RB, 1
3286 | mov NRESULTS, RB // NRESULTS = #varargs+1
3287 | mov L:RB, SAVE_L
3288 | add RC, RA
3289 | cmp RC, L:RB->maxstack
3290 | ja >7 // Need to grow stack?
3291 |6: // Copy all vararg slots.
3292 | mov RC, [KBASE-8]
3293 | mov [RA], RC
3294 | mov RC, [KBASE-4]
3295 | add KBASE, 8
3296 | mov [RA+4], RC
3297 | add RA, 8
3298 | cmp KBASE, BASE // No more vararg slots?
3299 | jb <6
3300 | jmp <3
3301 |
3302 |7: // Grow stack for varargs.
3303 | mov L:RB->base, BASE
3304 | mov L:RB->top, RA
3305 | mov SAVE_PC, PC
3306 | sub KBASE, BASE // Need delta, because BASE may change.
3307 | mov ARG1, L:RB
3308 | call extern lj_state_growstack // (lua_State *L, int n)
3309 | mov BASE, L:RB->base
3310 | mov RA, L:RB->top
3311 | add KBASE, BASE
3312 | jmp <6
3313 break;
3314
3315 /* -- Returns ----------------------------------------------------------- */
3316
3317 case BC_RETM:
3318 | ins_AD // RA = results, RD = extra_nresults
3319 | add RD, NRESULTS // NRESULTS >=1, so RD >=1.
3320 | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
3321 break;
3322
3323 case BC_RET: case BC_RET0: case BC_RET1:
3324 | ins_AD // RA = results, RD = nresults+1
3325 if (op != BC_RET0) {
3326 | shl RA, 3
3327 }
3328 |1:
3329 | mov PC, [BASE-4]
3330 | mov NRESULTS, RD // Save nresults+1.
3331 | test PC, FRAME_TYPE // Check frame type marker.
3332 | jnz >7 // Not returning to a fixarg Lua func?
3333 switch (op) {
3334 case BC_RET:
3335 |->BC_RET_Z:
3336 | mov KBASE, BASE // Use KBASE for result move.
3337 | sub RD, 1
3338 | jz >3
3339 |2:
3340 | mov RB, [KBASE+RA] // Move results down.
3341 | mov [KBASE-8], RB
3342 | mov RB, [KBASE+RA+4]
3343 | mov [KBASE-4], RB
3344 | add KBASE, 8
3345 | sub RD, 1
3346 | jnz <2
3347 |3:
3348 | mov RD, NRESULTS // Note: NRESULTS may be >255.
3349 | movzx RB, PC_RB // So cannot compare with RDL!
3350 |5:
3351 | cmp RB, RD // More results expected?
3352 | ja >6
3353 break;
3354 case BC_RET1:
3355 | mov RB, [BASE+RA+4]
3356 | mov [BASE-4], RB
3357 | mov RB, [BASE+RA]
3358 | mov [BASE-8], RB
3359 /* fallthrough */
3360 case BC_RET0:
3361 |5:
3362 | cmp PC_RB, RDL // More results expected?
3363 | ja >6
3364 default:
3365 break;
3366 }
3367 | movzx RA, PC_RA
3368 | not RA // Note: ~RA = -(RA+1)
3369 | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
3370 | mov LFUNC:KBASE, [BASE-8]
3371 | mov PROTO:KBASE, LFUNC:KBASE->pt
3372 | mov KBASE, PROTO:KBASE->k
3373 | ins_next
3374 |
3375 |6: // Fill up results with nil.
3376 if (op == BC_RET) {
3377 | mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base.
3378 | add KBASE, 8
3379 } else {
3380 | mov dword [BASE+RD*8-12], LJ_TNIL
3381 }
3382 | add RD, 1
3383 | jmp <5
3384 |
3385 |7: // Non-standard return case.
3386 | jnp ->vm_return
3387 | // Return from vararg function: relocate BASE down and RA up.
3388 | and PC, -8
3389 | sub BASE, PC
3390 if (op != BC_RET0) {
3391 | add RA, PC
3392 }
3393 | jmp <1
3394 break;
3395
3396 /* -- Loops and branches ------------------------------------------------ */
3397
3398 |.define FOR_IDX, qword [RA]; .define FOR_TIDX, dword [RA+4]
3399 |.define FOR_STOP, qword [RA+8]; .define FOR_TSTOP, dword [RA+12]
3400 |.define FOR_STEP, qword [RA+16]; .define FOR_TSTEP, dword [RA+20]
3401 |.define FOR_EXT, qword [RA+24]; .define FOR_TEXT, dword [RA+28]
3402
3403 case BC_FORL:
3404#if LJ_HASJIT
3405 | hotloop RB
3406#endif
3407 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
3408 break;
3409
3410 case BC_JFORI:
3411 case BC_JFORL:
3412#if !LJ_HASJIT
3413 break;
3414#endif
3415 case BC_FORI:
3416 case BC_IFORL:
3417 vk = (op == BC_IFORL || op == BC_JFORL);
3418 | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
3419 | lea RA, [BASE+RA*8]
3420 if (!vk) {
3421 | cmp FOR_TIDX, LJ_TISNUM; ja ->vmeta_for // Type checks
3422 | cmp FOR_TSTOP, LJ_TISNUM; ja ->vmeta_for
3423 }
3424 | mov RB, FOR_TSTEP // Load type/hiword of for step.
3425 if (!vk) {
3426 | cmp RB, LJ_TISNUM; ja ->vmeta_for
3427 }
3428 | fld FOR_STOP
3429 | fld FOR_IDX
3430 if (vk) {
3431 | fadd FOR_STEP // nidx = idx + step
3432 | fst FOR_IDX
3433 }
3434 | fst FOR_EXT
3435 | test RB, RB // Swap lim/(n)idx if step non-negative.
3436 | js >1
3437 | fxch
3438 |1:
3439 | fcomparepp // eax (RD) modified if !cmov.
3440 if (!cmov) {
3441 | movzx RD, PC_RD // Need to reload RD.
3442 }
3443 if (op == BC_FORI) {
3444 | jnb >2
3445 | branchPC RD
3446 } else if (op == BC_JFORI) {
3447 | branchPC RD
3448 | movzx RD, PC_RD
3449 | jnb =>BC_JLOOP
3450 } else if (op == BC_IFORL) {
3451 | jb >2
3452 | branchPC RD
3453 } else {
3454 | jnb =>BC_JLOOP
3455 }
3456 |2:
3457 | ins_next
3458 break;
3459
3460 case BC_ITERL:
3461#if LJ_HASJIT
3462 | hotloop RB
3463#endif
3464 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
3465 break;
3466
3467 case BC_JITERL:
3468#if !LJ_HASJIT
3469 break;
3470#endif
3471 case BC_IITERL:
3472 | ins_AJ // RA = base, RD = target
3473 | lea RA, [BASE+RA*8]
3474 | mov RB, [RA+4]
3475 | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
3476 if (op == BC_JITERL) {
3477 | mov [RA-4], RB
3478 | mov RB, [RA]
3479 | mov [RA-8], RB
3480 | jmp =>BC_JLOOP
3481 } else {
3482 | branchPC RD // Otherwise save control var + branch.
3483 | mov RD, [RA]
3484 | mov [RA-4], RB
3485 | mov [RA-8], RD
3486 }
3487 |1:
3488 | ins_next
3489 break;
3490
3491 case BC_LOOP:
3492 | ins_A // RA = base, RD = target (loop extent)
3493 | // Note: RA/RD is only used by trace recorder to determine scope/extent
3494 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
3495#if LJ_HASJIT
3496 | hotloop RB
3497#endif
3498 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
3499 break;
3500
3501 case BC_ILOOP:
3502 | ins_A // RA = base, RD = target (loop extent)
3503 | ins_next
3504 break;
3505
3506 case BC_JLOOP:
3507#if LJ_HASJIT
3508 | ins_AD // RA = base (ignored), RD = traceno
3509 | mov RA, [DISPATCH+DISPATCH_J(trace)]
3510 | mov TRACE:RD, [RA+RD*4]
3511 | mov RD, TRACE:RD->mcode
3512 | mov L:RB, SAVE_L
3513 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
3514 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB
3515 | jmp RD
3516#endif
3517 break;
3518
3519 case BC_JMP:
3520 | ins_AJ // RA = unused, RD = target
3521 | branchPC RD
3522 | ins_next
3523 break;
3524
3525 /* ---------------------------------------------------------------------- */
3526
3527 default:
3528 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
3529 exit(2);
3530 break;
3531 }
3532}
3533
3534static int build_backend(BuildCtx *ctx)
3535{
3536 int op;
3537 int cmov = 1;
3538#ifdef LUAJIT_CPU_NOCMOV
3539 cmov = 0;
3540#endif
3541
3542 dasm_growpc(Dst, BC__MAX);
3543
3544 build_subroutines(ctx, cmov);
3545
3546 |.code_op
3547 for (op = 0; op < BC__MAX; op++)
3548 build_ins(ctx, (BCOp)op, op, cmov);
3549
3550 return BC__MAX;
3551}
3552
3553/* Emit pseudo frame-info for all assembler functions. */
3554static void emit_asm_debug(BuildCtx *ctx)
3555{
3556 switch (ctx->mode) {
3557 case BUILD_elfasm:
3558 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
3559 fprintf(ctx->fp,
3560 ".Lframe0:\n"
3561 "\t.long .LECIE0-.LSCIE0\n"
3562 ".LSCIE0:\n"
3563 "\t.long 0xffffffff\n"
3564 "\t.byte 0x1\n"
3565 "\t.string \"\"\n"
3566 "\t.uleb128 0x1\n"
3567 "\t.sleb128 -4\n"
3568 "\t.byte 0x8\n"
3569 "\t.byte 0xc\n\t.uleb128 0x4\n\t.uleb128 0x4\n"
3570 "\t.byte 0x88\n\t.uleb128 0x1\n"
3571 "\t.align 4\n"
3572 ".LECIE0:\n\n");
3573 fprintf(ctx->fp,
3574 ".LSFDE0:\n"
3575 "\t.long .LEFDE0-.LASFDE0\n"
3576 ".LASFDE0:\n"
3577 "\t.long .Lframe0\n"
3578 "\t.long .Lbegin\n"
3579 "\t.long %d\n"
3580 "\t.byte 0xe\n\t.uleb128 0x30\n" /* def_cfa_offset */
3581 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
3582 "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
3583 "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
3584 "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
3585 "\t.align 4\n"
3586 ".LEFDE0:\n\n", (int)ctx->codesz);
3587 break;
3588 default: /* Difficult for other modes. */
3589 break;
3590 }
3591}
3592
diff --git a/src/lauxlib.h b/src/lauxlib.h
new file mode 100644
index 00000000..505a9f52
--- /dev/null
+++ b/src/lauxlib.h
@@ -0,0 +1,159 @@
1/*
2** $Id: lauxlib.h,v 1.88.1.1 2007/12/27 13:02:25 roberto Exp $
3** Auxiliary functions for building Lua libraries
4** See Copyright Notice in lua.h
5*/
6
7
8#ifndef lauxlib_h
9#define lauxlib_h
10
11
12#include <stddef.h>
13#include <stdio.h>
14
15#include "lua.h"
16
17
18#define luaL_getn(L,i) ((int)lua_objlen(L, i))
19#define luaL_setn(L,i,j) ((void)0) /* no op! */
20
21/* extra error code for `luaL_load' */
22#define LUA_ERRFILE (LUA_ERRERR+1)
23
24typedef struct luaL_Reg {
25 const char *name;
26 lua_CFunction func;
27} luaL_Reg;
28
29LUALIB_API void (luaL_openlib) (lua_State *L, const char *libname,
30 const luaL_Reg *l, int nup);
31LUALIB_API void (luaL_register) (lua_State *L, const char *libname,
32 const luaL_Reg *l);
33LUALIB_API int (luaL_getmetafield) (lua_State *L, int obj, const char *e);
34LUALIB_API int (luaL_callmeta) (lua_State *L, int obj, const char *e);
35LUALIB_API int (luaL_typerror) (lua_State *L, int narg, const char *tname);
36LUALIB_API int (luaL_argerror) (lua_State *L, int numarg, const char *extramsg);
37LUALIB_API const char *(luaL_checklstring) (lua_State *L, int numArg,
38 size_t *l);
39LUALIB_API const char *(luaL_optlstring) (lua_State *L, int numArg,
40 const char *def, size_t *l);
41LUALIB_API lua_Number (luaL_checknumber) (lua_State *L, int numArg);
42LUALIB_API lua_Number (luaL_optnumber) (lua_State *L, int nArg, lua_Number def);
43
44LUALIB_API lua_Integer (luaL_checkinteger) (lua_State *L, int numArg);
45LUALIB_API lua_Integer (luaL_optinteger) (lua_State *L, int nArg,
46 lua_Integer def);
47
48LUALIB_API void (luaL_checkstack) (lua_State *L, int sz, const char *msg);
49LUALIB_API void (luaL_checktype) (lua_State *L, int narg, int t);
50LUALIB_API void (luaL_checkany) (lua_State *L, int narg);
51
52LUALIB_API int (luaL_newmetatable) (lua_State *L, const char *tname);
53LUALIB_API void *(luaL_checkudata) (lua_State *L, int ud, const char *tname);
54
55LUALIB_API void (luaL_where) (lua_State *L, int lvl);
56LUALIB_API int (luaL_error) (lua_State *L, const char *fmt, ...);
57
58LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def,
59 const char *const lst[]);
60
61LUALIB_API int (luaL_ref) (lua_State *L, int t);
62LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref);
63
64LUALIB_API int (luaL_loadfile) (lua_State *L, const char *filename);
65LUALIB_API int (luaL_loadbuffer) (lua_State *L, const char *buff, size_t sz,
66 const char *name);
67LUALIB_API int (luaL_loadstring) (lua_State *L, const char *s);
68
69LUALIB_API lua_State *(luaL_newstate) (void);
70
71
72LUALIB_API const char *(luaL_gsub) (lua_State *L, const char *s, const char *p,
73 const char *r);
74
75LUALIB_API const char *(luaL_findtable) (lua_State *L, int idx,
76 const char *fname, int szhint);
77
78
79
80
81/*
82** ===============================================================
83** some useful macros
84** ===============================================================
85*/
86
87#define luaL_argcheck(L, cond,numarg,extramsg) \
88 ((void)((cond) || luaL_argerror(L, (numarg), (extramsg))))
89#define luaL_checkstring(L,n) (luaL_checklstring(L, (n), NULL))
90#define luaL_optstring(L,n,d) (luaL_optlstring(L, (n), (d), NULL))
91#define luaL_checkint(L,n) ((int)luaL_checkinteger(L, (n)))
92#define luaL_optint(L,n,d) ((int)luaL_optinteger(L, (n), (d)))
93#define luaL_checklong(L,n) ((long)luaL_checkinteger(L, (n)))
94#define luaL_optlong(L,n,d) ((long)luaL_optinteger(L, (n), (d)))
95
96#define luaL_typename(L,i) lua_typename(L, lua_type(L,(i)))
97
98#define luaL_dofile(L, fn) \
99 (luaL_loadfile(L, fn) || lua_pcall(L, 0, LUA_MULTRET, 0))
100
101#define luaL_dostring(L, s) \
102 (luaL_loadstring(L, s) || lua_pcall(L, 0, LUA_MULTRET, 0))
103
104#define luaL_getmetatable(L,n) (lua_getfield(L, LUA_REGISTRYINDEX, (n)))
105
106#define luaL_opt(L,f,n,d) (lua_isnoneornil(L,(n)) ? (d) : f(L,(n)))
107
108/*
109** {======================================================
110** Generic Buffer manipulation
111** =======================================================
112*/
113
114
115
116typedef struct luaL_Buffer {
117 char *p; /* current position in buffer */
118 int lvl; /* number of strings in the stack (level) */
119 lua_State *L;
120 char buffer[LUAL_BUFFERSIZE];
121} luaL_Buffer;
122
123#define luaL_addchar(B,c) \
124 ((void)((B)->p < ((B)->buffer+LUAL_BUFFERSIZE) || luaL_prepbuffer(B)), \
125 (*(B)->p++ = (char)(c)))
126
127/* compatibility only */
128#define luaL_putchar(B,c) luaL_addchar(B,c)
129
130#define luaL_addsize(B,n) ((B)->p += (n))
131
132LUALIB_API void (luaL_buffinit) (lua_State *L, luaL_Buffer *B);
133LUALIB_API char *(luaL_prepbuffer) (luaL_Buffer *B);
134LUALIB_API void (luaL_addlstring) (luaL_Buffer *B, const char *s, size_t l);
135LUALIB_API void (luaL_addstring) (luaL_Buffer *B, const char *s);
136LUALIB_API void (luaL_addvalue) (luaL_Buffer *B);
137LUALIB_API void (luaL_pushresult) (luaL_Buffer *B);
138
139
140/* }====================================================== */
141
142
143/* compatibility with ref system */
144
145/* pre-defined references */
146#define LUA_NOREF (-2)
147#define LUA_REFNIL (-1)
148
149#define lua_ref(L,lock) ((lock) ? luaL_ref(L, LUA_REGISTRYINDEX) : \
150 (lua_pushstring(L, "unlocked references are obsolete"), lua_error(L), 0))
151
152#define lua_unref(L,ref) luaL_unref(L, LUA_REGISTRYINDEX, (ref))
153
154#define lua_getref(L,ref) lua_rawgeti(L, LUA_REGISTRYINDEX, (ref))
155
156
157#define luaL_reg luaL_Reg
158
159#endif
diff --git a/src/lib_aux.c b/src/lib_aux.c
new file mode 100644
index 00000000..1ae32dbc
--- /dev/null
+++ b/src/lib_aux.c
@@ -0,0 +1,438 @@
1/*
2** Auxiliary library for the Lua/C API.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Major parts taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#include <errno.h>
10#include <stdarg.h>
11#include <stdio.h>
12
13#define lib_aux_c
14#define LUA_LIB
15
16#include "lua.h"
17#include "lauxlib.h"
18
19#include "lj_obj.h"
20#include "lj_err.h"
21#include "lj_lib.h"
22
23/* convert a stack index to positive */
24#define abs_index(L, i) \
25 ((i) > 0 || (i) <= LUA_REGISTRYINDEX ? (i) : lua_gettop(L) + (i) + 1)
26
27/* -- Type checks --------------------------------------------------------- */
28
29LUALIB_API void luaL_checkstack(lua_State *L, int size, const char *msg)
30{
31 if (!lua_checkstack(L, size))
32 lj_err_callerv(L, LJ_ERR_STKOVM, msg);
33}
34
35LUALIB_API void luaL_checktype(lua_State *L, int narg, int tt)
36{
37 if (lua_type(L, narg) != tt)
38 lj_err_argt(L, narg, tt);
39}
40
41LUALIB_API void luaL_checkany(lua_State *L, int narg)
42{
43 lj_lib_checkany(L, narg);
44}
45
46LUALIB_API const char *luaL_checklstring(lua_State *L, int narg, size_t *len)
47{
48 GCstr *s = lj_lib_checkstr(L, narg);
49 if (len != NULL) *len = s->len;
50 return strdata(s);
51}
52
53LUALIB_API const char *luaL_optlstring(lua_State *L, int narg,
54 const char *def, size_t *len)
55{
56 GCstr *s = lj_lib_optstr(L, narg);
57 if (s) {
58 if (len != NULL) *len = s->len;
59 return strdata(s);
60 }
61 if (len != NULL) *len = def ? strlen(def) : 0;
62 return def;
63}
64
65LUALIB_API lua_Number luaL_checknumber(lua_State *L, int narg)
66{
67 return lj_lib_checknum(L, narg);
68}
69
70LUALIB_API lua_Number luaL_optnumber(lua_State *L, int narg, lua_Number def)
71{
72 lj_lib_opt(L, narg,
73 return lj_lib_checknum(L, narg);
74 ,
75 return def;
76 )
77}
78
79LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int narg)
80{
81#if LJ_64
82 return (lua_Integer)lj_lib_checknum(L, narg);
83#else
84 return lj_lib_checkint(L, narg);
85#endif
86}
87
88LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int narg, lua_Integer def)
89{
90#if LJ_64
91 lj_lib_opt(L, narg,
92 return (lua_Integer)lj_lib_checknum(L, narg);
93 ,
94 return def;
95 )
96#else
97 return lj_lib_optint(L, narg, def);
98#endif
99}
100
101LUALIB_API int luaL_checkoption(lua_State *L, int narg, const char *def,
102 const char *const lst[])
103{
104 GCstr *s = lj_lib_optstr(L, narg);
105 const char *opt = s ? strdata(s) : def;
106 uint32_t i;
107 if (!opt) lj_err_argt(L, narg, LUA_TSTRING);
108 for (i = 0; lst[i]; i++)
109 if (strcmp(lst[i], opt) == 0)
110 return (int)i;
111 lj_err_argv(L, narg, LJ_ERR_INVOPTM, opt);
112}
113
114/* -- Module registration ------------------------------------------------- */
115
116LUALIB_API const char *luaL_findtable(lua_State *L, int idx,
117 const char *fname, int szhint)
118{
119 const char *e;
120 lua_pushvalue(L, idx);
121 do {
122 e = strchr(fname, '.');
123 if (e == NULL) e = fname + strlen(fname);
124 lua_pushlstring(L, fname, (size_t)(e - fname));
125 lua_rawget(L, -2);
126 if (lua_isnil(L, -1)) { /* no such field? */
127 lua_pop(L, 1); /* remove this nil */
128 lua_createtable(L, 0, (*e == '.' ? 1 : szhint)); /* new table for field */
129 lua_pushlstring(L, fname, (size_t)(e - fname));
130 lua_pushvalue(L, -2);
131 lua_settable(L, -4); /* set new table into field */
132 } else if (!lua_istable(L, -1)) { /* field has a non-table value? */
133 lua_pop(L, 2); /* remove table and value */
134 return fname; /* return problematic part of the name */
135 }
136 lua_remove(L, -2); /* remove previous table */
137 fname = e + 1;
138 } while (*e == '.');
139 return NULL;
140}
141
142static int libsize(const luaL_Reg *l)
143{
144 int size = 0;
145 for (; l->name; l++) size++;
146 return size;
147}
148
149LUALIB_API void luaL_openlib(lua_State *L, const char *libname,
150 const luaL_Reg *l, int nup)
151{
152 if (libname) {
153 int size = libsize(l);
154 /* check whether lib already exists */
155 luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16);
156 lua_getfield(L, -1, libname); /* get _LOADED[libname] */
157 if (!lua_istable(L, -1)) { /* not found? */
158 lua_pop(L, 1); /* remove previous result */
159 /* try global variable (and create one if it does not exist) */
160 if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, size) != NULL)
161 lj_err_callerv(L, LJ_ERR_BADMODN, libname);
162 lua_pushvalue(L, -1);
163 lua_setfield(L, -3, libname); /* _LOADED[libname] = new table */
164 }
165 lua_remove(L, -2); /* remove _LOADED table */
166 lua_insert(L, -(nup+1)); /* move library table to below upvalues */
167 }
168 for (; l->name; l++) {
169 int i;
170 for (i = 0; i < nup; i++) /* copy upvalues to the top */
171 lua_pushvalue(L, -nup);
172 lua_pushcclosure(L, l->func, nup);
173 lua_setfield(L, -(nup+2), l->name);
174 }
175 lua_pop(L, nup); /* remove upvalues */
176}
177
178LUALIB_API void luaL_register(lua_State *L, const char *libname,
179 const luaL_Reg *l)
180{
181 luaL_openlib(L, libname, l, 0);
182}
183
184LUALIB_API const char *luaL_gsub(lua_State *L, const char *s,
185 const char *p, const char *r)
186{
187 const char *wild;
188 size_t l = strlen(p);
189 luaL_Buffer b;
190 luaL_buffinit(L, &b);
191 while ((wild = strstr(s, p)) != NULL) {
192 luaL_addlstring(&b, s, (size_t)(wild - s)); /* push prefix */
193 luaL_addstring(&b, r); /* push replacement in place of pattern */
194 s = wild + l; /* continue after `p' */
195 }
196 luaL_addstring(&b, s); /* push last suffix */
197 luaL_pushresult(&b);
198 return lua_tostring(L, -1);
199}
200
201/* -- Buffer handling ----------------------------------------------------- */
202
203#define bufflen(B) ((size_t)((B)->p - (B)->buffer))
204#define bufffree(B) ((size_t)(LUAL_BUFFERSIZE - bufflen(B)))
205
206static int emptybuffer(luaL_Buffer *B)
207{
208 size_t l = bufflen(B);
209 if (l == 0)
210 return 0; /* put nothing on stack */
211 lua_pushlstring(B->L, B->buffer, l);
212 B->p = B->buffer;
213 B->lvl++;
214 return 1;
215}
216
217static void adjuststack(luaL_Buffer *B)
218{
219 if (B->lvl > 1) {
220 lua_State *L = B->L;
221 int toget = 1; /* number of levels to concat */
222 size_t toplen = lua_strlen(L, -1);
223 do {
224 size_t l = lua_strlen(L, -(toget+1));
225 if (!(B->lvl - toget + 1 >= LUA_MINSTACK/2 || toplen > l))
226 break;
227 toplen += l;
228 toget++;
229 } while (toget < B->lvl);
230 lua_concat(L, toget);
231 B->lvl = B->lvl - toget + 1;
232 }
233}
234
235LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B)
236{
237 if (emptybuffer(B))
238 adjuststack(B);
239 return B->buffer;
240}
241
242LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l)
243{
244 while (l--)
245 luaL_addchar(B, *s++);
246}
247
248LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s)
249{
250 luaL_addlstring(B, s, strlen(s));
251}
252
253LUALIB_API void luaL_pushresult(luaL_Buffer *B)
254{
255 emptybuffer(B);
256 lua_concat(B->L, B->lvl);
257 B->lvl = 1;
258}
259
260LUALIB_API void luaL_addvalue(luaL_Buffer *B)
261{
262 lua_State *L = B->L;
263 size_t vl;
264 const char *s = lua_tolstring(L, -1, &vl);
265 if (vl <= bufffree(B)) { /* fit into buffer? */
266 memcpy(B->p, s, vl); /* put it there */
267 B->p += vl;
268 lua_pop(L, 1); /* remove from stack */
269 } else {
270 if (emptybuffer(B))
271 lua_insert(L, -2); /* put buffer before new value */
272 B->lvl++; /* add new value into B stack */
273 adjuststack(B);
274 }
275}
276
277LUALIB_API void luaL_buffinit(lua_State *L, luaL_Buffer *B)
278{
279 B->L = L;
280 B->p = B->buffer;
281 B->lvl = 0;
282}
283
284/* -- Reference management ------------------------------------------------ */
285
286#define FREELIST_REF 0
287
288LUALIB_API int luaL_ref(lua_State *L, int t)
289{
290 int ref;
291 t = abs_index(L, t);
292 if (lua_isnil(L, -1)) {
293 lua_pop(L, 1); /* remove from stack */
294 return LUA_REFNIL; /* `nil' has a unique fixed reference */
295 }
296 lua_rawgeti(L, t, FREELIST_REF); /* get first free element */
297 ref = (int)lua_tointeger(L, -1); /* ref = t[FREELIST_REF] */
298 lua_pop(L, 1); /* remove it from stack */
299 if (ref != 0) { /* any free element? */
300 lua_rawgeti(L, t, ref); /* remove it from list */
301 lua_rawseti(L, t, FREELIST_REF); /* (t[FREELIST_REF] = t[ref]) */
302 } else { /* no free elements */
303 ref = (int)lua_objlen(L, t);
304 ref++; /* create new reference */
305 }
306 lua_rawseti(L, t, ref);
307 return ref;
308}
309
310LUALIB_API void luaL_unref(lua_State *L, int t, int ref)
311{
312 if (ref >= 0) {
313 t = abs_index(L, t);
314 lua_rawgeti(L, t, FREELIST_REF);
315 lua_rawseti(L, t, ref); /* t[ref] = t[FREELIST_REF] */
316 lua_pushinteger(L, ref);
317 lua_rawseti(L, t, FREELIST_REF); /* t[FREELIST_REF] = ref */
318 }
319}
320
321/* -- Load Lua code ------------------------------------------------------- */
322
323typedef struct FileReaderCtx {
324 FILE *fp;
325 char buf[LUAL_BUFFERSIZE];
326} FileReaderCtx;
327
328static const char *reader_file(lua_State *L, void *ud, size_t *size)
329{
330 FileReaderCtx *ctx = (FileReaderCtx *)ud;
331 UNUSED(L);
332 if (feof(ctx->fp)) return NULL;
333 *size = fread(ctx->buf, 1, sizeof(ctx->buf), ctx->fp);
334 return *size > 0 ? ctx->buf : NULL;
335}
336
337LUALIB_API int luaL_loadfile(lua_State *L, const char *filename)
338{
339 FileReaderCtx ctx;
340 int status;
341 const char *chunkname;
342 if (filename) {
343 ctx.fp = fopen(filename, "r");
344 if (ctx.fp == NULL) {
345 lua_pushfstring(L, "cannot open %s: %s", filename, strerror(errno));
346 return LUA_ERRFILE;
347 }
348 chunkname = lua_pushfstring(L, "@%s", filename);
349 } else {
350 ctx.fp = stdin;
351 chunkname = "=stdin";
352 }
353 status = lua_load(L, reader_file, &ctx, chunkname);
354 if (ferror(ctx.fp)) {
355 L->top -= filename ? 2 : 1;
356 lua_pushfstring(L, "cannot read %s: %s", chunkname+1, strerror(errno));
357 if (filename)
358 fclose(ctx.fp);
359 return LUA_ERRFILE;
360 }
361 if (filename) {
362 L->top--;
363 copyTV(L, L->top-1, L->top);
364 fclose(ctx.fp);
365 }
366 return status;
367}
368
369typedef struct StringReaderCtx {
370 const char *str;
371 size_t size;
372} StringReaderCtx;
373
374static const char *reader_string(lua_State *L, void *ud, size_t *size)
375{
376 StringReaderCtx *ctx = (StringReaderCtx *)ud;
377 UNUSED(L);
378 if (ctx->size == 0) return NULL;
379 *size = ctx->size;
380 ctx->size = 0;
381 return ctx->str;
382}
383
384LUALIB_API int luaL_loadbuffer(lua_State *L, const char *buf, size_t size,
385 const char *name)
386{
387 StringReaderCtx ctx;
388 ctx.str = buf;
389 ctx.size = size;
390 return lua_load(L, reader_string, &ctx, name);
391}
392
393LUALIB_API int luaL_loadstring(lua_State *L, const char *s)
394{
395 return luaL_loadbuffer(L, s, strlen(s), s);
396}
397
398/* -- Default allocator and panic function -------------------------------- */
399
400#ifdef LUAJIT_USE_SYSMALLOC
401
402static void *mem_alloc(void *ud, void *ptr, size_t osize, size_t nsize)
403{
404 (void)ud;
405 (void)osize;
406 if (nsize == 0) {
407 free(ptr);
408 return NULL;
409 } else {
410 return realloc(ptr, nsize);
411 }
412}
413
414#define mem_create() NULL
415
416#else
417
418#include "lj_alloc.h"
419
420#define mem_alloc lj_alloc_f
421#define mem_create lj_alloc_create
422
423#endif
424
425static int panic(lua_State *L)
426{
427 fprintf(stderr, "PANIC: unprotected error in call to Lua API (%s)\n",
428 lua_tostring(L, -1));
429 return 0;
430}
431
432LUALIB_API lua_State *luaL_newstate(void)
433{
434 lua_State *L = lua_newstate(mem_alloc, mem_create());
435 if (L) G(L)->panic = panic;
436 return L;
437}
438
diff --git a/src/lib_base.c b/src/lib_base.c
new file mode 100644
index 00000000..6b9e8eef
--- /dev/null
+++ b/src/lib_base.c
@@ -0,0 +1,560 @@
1/*
2** Base and coroutine library.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Major portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#include <stdio.h>
10
11#define lib_base_c
12#define LUA_LIB
13
14#include "lua.h"
15#include "lauxlib.h"
16#include "lualib.h"
17
18#include "lj_obj.h"
19#include "lj_gc.h"
20#include "lj_err.h"
21#include "lj_str.h"
22#include "lj_tab.h"
23#include "lj_meta.h"
24#include "lj_state.h"
25#include "lj_ff.h"
26#include "lj_ctype.h"
27#include "lj_lib.h"
28
29/* -- Base library: checks ------------------------------------------------ */
30
31#define LJLIB_MODULE_base
32
33LJLIB_ASM(assert) LJLIB_REC(.)
34{
35 GCstr *s;
36 lj_lib_checkany(L, 1);
37 s = lj_lib_optstr(L, 2);
38 if (s)
39 lj_err_callermsg(L, strdata(s));
40 else
41 lj_err_caller(L, LJ_ERR_ASSERT);
42 return FFH_UNREACHABLE;
43}
44
45/* ORDER LJ_T */
46LJLIB_PUSH("nil")
47LJLIB_PUSH("boolean")
48LJLIB_PUSH(top-1) /* boolean */
49LJLIB_PUSH("userdata")
50LJLIB_PUSH("string")
51LJLIB_PUSH("upval")
52LJLIB_PUSH("thread")
53LJLIB_PUSH("proto")
54LJLIB_PUSH("function")
55LJLIB_PUSH("deadkey")
56LJLIB_PUSH("table")
57LJLIB_PUSH(top-8) /* userdata */
58LJLIB_PUSH("number")
59LJLIB_ASM_(type) LJLIB_REC(.)
60/* Recycle the lj_lib_checkany(L, 1) from assert. */
61
62/* -- Base library: getters and setters ----------------------------------- */
63
64LJLIB_ASM_(getmetatable) LJLIB_REC(.)
65/* Recycle the lj_lib_checkany(L, 1) from assert. */
66
67LJLIB_ASM(setmetatable) LJLIB_REC(.)
68{
69 GCtab *t = lj_lib_checktab(L, 1);
70 GCtab *mt = lj_lib_checktabornil(L, 2);
71 if (!tvisnil(lj_meta_lookup(L, L->base, MM_metatable)))
72 lj_err_caller(L, LJ_ERR_PROTMT);
73 setgcref(t->metatable, obj2gco(mt));
74 if (mt) { lj_gc_objbarriert(L, t, mt); }
75 settabV(L, L->base-1, t);
76 return FFH_RES(1);
77}
78
79LJLIB_CF(getfenv)
80{
81 GCfunc *fn;
82 cTValue *o = L->base;
83 if (!(o < L->top && tvisfunc(o))) {
84 int level = lj_lib_optint(L, 1, 1);
85 o = lj_err_getframe(L, level, &level);
86 if (o == NULL)
87 lj_err_arg(L, 1, LJ_ERR_INVLVL);
88 }
89 fn = &gcval(o)->fn;
90 settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env));
91 return 1;
92}
93
94LJLIB_CF(setfenv)
95{
96 GCfunc *fn;
97 GCtab *t = lj_lib_checktab(L, 2);
98 cTValue *o = L->base;
99 if (!(o < L->top && tvisfunc(o))) {
100 int level = lj_lib_checkint(L, 1);
101 if (level == 0) {
102 /* NOBARRIER: A thread (i.e. L) is never black. */
103 setgcref(L->env, obj2gco(t));
104 return 0;
105 }
106 o = lj_err_getframe(L, level, &level);
107 if (o == NULL)
108 lj_err_arg(L, 1, LJ_ERR_INVLVL);
109 }
110 fn = &gcval(o)->fn;
111 if (!isluafunc(fn))
112 lj_err_caller(L, LJ_ERR_SETFENV);
113 setgcref(fn->l.env, obj2gco(t));
114 lj_gc_objbarrier(L, obj2gco(fn), t);
115 setfuncV(L, L->top++, fn);
116 return 1;
117}
118
119LJLIB_ASM(rawget) LJLIB_REC(.)
120{
121 lj_lib_checktab(L, 1);
122 lj_lib_checkany(L, 2);
123 return FFH_UNREACHABLE;
124}
125
126LJLIB_CF(rawset) LJLIB_REC(.)
127{
128 lj_lib_checktab(L, 1);
129 lj_lib_checkany(L, 2);
130 L->top = 1+lj_lib_checkany(L, 3);
131 lua_rawset(L, 1);
132 return 1;
133}
134
135LJLIB_CF(rawequal) LJLIB_REC(.)
136{
137 cTValue *o1 = lj_lib_checkany(L, 1);
138 cTValue *o2 = lj_lib_checkany(L, 2);
139 setboolV(L->top-1, lj_obj_equal(o1, o2));
140 return 1;
141}
142
143LJLIB_CF(unpack)
144{
145 GCtab *t = lj_lib_checktab(L, 1);
146 int32_t n, i = lj_lib_optint(L, 2, 1);
147 int32_t e = (L->base+3-1 < L->top && !tvisnil(L->base+3-1)) ?
148 lj_lib_checkint(L, 3) : (int32_t)lj_tab_len(t);
149 if (i > e) return 0;
150 n = e - i + 1;
151 if (n <= 0 || !lua_checkstack(L, n))
152 lj_err_caller(L, LJ_ERR_UNPACK);
153 do {
154 cTValue *tv = lj_tab_getint(t, i);
155 if (tv) {
156 copyTV(L, L->top++, tv);
157 } else {
158 setnilV(L->top++);
159 }
160 } while (i++ < e);
161 return n;
162}
163
164LJLIB_CF(select)
165{
166 int32_t n = (int32_t)(L->top - L->base);
167 if (n >= 1 && tvisstr(L->base) && *strVdata(L->base) == '#') {
168 setintV(L->top-1, n-1);
169 return 1;
170 } else {
171 int32_t i = lj_lib_checkint(L, 1);
172 if (i < 0) i = n + i; else if (i > n) i = n;
173 if (i < 1)
174 lj_err_arg(L, 1, LJ_ERR_IDXRNG);
175 return n - i;
176 }
177}
178
179/* -- Base library: conversions ------------------------------------------- */
180
181LJLIB_ASM(tonumber) LJLIB_REC(.)
182{
183 int32_t base = lj_lib_optint(L, 2, 10);
184 if (base == 10) {
185 TValue *o = lj_lib_checkany(L, 1);
186 if (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o))) {
187 setnumV(L->base-1, numV(o));
188 return FFH_RES(1);
189 }
190 } else {
191 const char *p = strdata(lj_lib_checkstr(L, 1));
192 char *ep;
193 unsigned long ul;
194 if (base < 2 || base > 36)
195 lj_err_arg(L, 2, LJ_ERR_BASERNG);
196 ul = strtoul(p, &ep, base);
197 if (p != ep) {
198 while (lj_ctype_isspace((unsigned char)(*ep))) ep++;
199 if (*ep == '\0') {
200 setnumV(L->base-1, cast_num(ul));
201 return FFH_RES(1);
202 }
203 }
204 }
205 setnilV(L->base-1);
206 return FFH_RES(1);
207}
208
209LJLIB_ASM(tostring) LJLIB_REC(.)
210{
211 TValue *o = lj_lib_checkany(L, 1);
212 cTValue *mo;
213 L->top = o+1; /* Only keep one argument. */
214 if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
215 copyTV(L, L->base-1, mo); /* Replace callable. */
216 return FFH_RETRY;
217 } else {
218 GCstr *s;
219 if (tvisnum(o)) {
220 s = lj_str_fromnum(L, &o->n);
221 } else if (tvisnil(o)) {
222 s = lj_str_newlit(L, "nil");
223 } else if (tvisfalse(o)) {
224 s = lj_str_newlit(L, "false");
225 } else if (tvistrue(o)) {
226 s = lj_str_newlit(L, "true");
227 } else {
228 if (tvisfunc(o) && isffunc(funcV(o)))
229 lua_pushfstring(L, "function: fast#%d", funcV(o)->c.ffid);
230 else
231 lua_pushfstring(L, "%s: %p", typename(o), lua_topointer(L, 1));
232 /* Note: lua_pushfstring calls the GC which may invalidate o. */
233 s = strV(L->top-1);
234 }
235 setstrV(L, L->base-1, s);
236 return FFH_RES(1);
237 }
238}
239
240/* -- Base library: iterators --------------------------------------------- */
241
242LJLIB_ASM(next)
243{
244 lj_lib_checktab(L, 1);
245 lj_lib_checknum(L, 2); /* For ipairs_aux. */
246 return FFH_UNREACHABLE;
247}
248
249LJLIB_PUSH(lastcl)
250LJLIB_ASM_(pairs)
251
252LJLIB_NOREGUV LJLIB_ASM_(ipairs_aux) LJLIB_REC(.)
253
254LJLIB_PUSH(lastcl)
255LJLIB_ASM_(ipairs) LJLIB_REC(.)
256
257/* -- Base library: throw and catch errors -------------------------------- */
258
259LJLIB_CF(error)
260{
261 int32_t level = lj_lib_optint(L, 2, 1);
262 lua_settop(L, 1);
263 if (lua_isstring(L, 1) && level > 0) {
264 luaL_where(L, level);
265 lua_pushvalue(L, 1);
266 lua_concat(L, 2);
267 }
268 return lua_error(L);
269}
270
271LJLIB_ASM(pcall) LJLIB_REC(.)
272{
273 lj_lib_checkany(L, 1);
274 lj_lib_checkfunc(L, 2); /* For xpcall only. */
275 return FFH_UNREACHABLE;
276}
277LJLIB_ASM_(xpcall) LJLIB_REC(.)
278
279/* -- Base library: load Lua code ----------------------------------------- */
280
281static int load_aux(lua_State *L, int status)
282{
283 if (status == 0)
284 return 1;
285 copyTV(L, L->top, L->top-1);
286 setnilV(L->top-1);
287 L->top++;
288 return 2;
289}
290
291LJLIB_CF(loadstring)
292{
293 GCstr *s = lj_lib_checkstr(L, 1);
294 GCstr *name = lj_lib_optstr(L, 2);
295 return load_aux(L,
296 luaL_loadbuffer(L, strdata(s), s->len, strdata(name ? name : s)));
297}
298
299LJLIB_CF(loadfile)
300{
301 GCstr *fname = lj_lib_optstr(L, 1);
302 return load_aux(L, luaL_loadfile(L, fname ? strdata(fname) : NULL));
303}
304
305static const char *reader_func(lua_State *L, void *ud, size_t *size)
306{
307 UNUSED(ud);
308 luaL_checkstack(L, 2, "too many nested functions");
309 copyTV(L, L->top++, L->base);
310 lua_call(L, 0, 1); /* Call user-supplied function. */
311 L->top--;
312 if (tvisnil(L->top)) {
313 *size = 0;
314 return NULL;
315 } else if (tvisstr(L->top) || tvisnum(L->top)) {
316 copyTV(L, L->base+2, L->top); /* Anchor string in reserved stack slot. */
317 return lua_tolstring(L, 3, size);
318 } else {
319 lj_err_caller(L, LJ_ERR_RDRSTR);
320 return NULL;
321 }
322}
323
324LJLIB_CF(load)
325{
326 GCstr *name = lj_lib_optstr(L, 2);
327 lj_lib_checkfunc(L, 1);
328 lua_settop(L, 3); /* Reserve a slot for the string from the reader. */
329 return load_aux(L,
330 lua_load(L, reader_func, NULL, name ? strdata(name) : "=(load)"));
331}
332
333LJLIB_CF(dofile)
334{
335 GCstr *fname = lj_lib_optstr(L, 1);
336 setnilV(L->top);
337 L->top = L->base+1;
338 if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != 0)
339 lua_error(L);
340 lua_call(L, 0, LUA_MULTRET);
341 return (L->top - L->base) - 1;
342}
343
344/* -- Base library: GC control -------------------------------------------- */
345
346LJLIB_CF(gcinfo)
347{
348 setintV(L->top++, (G(L)->gc.total >> 10));
349 return 1;
350}
351
352LJLIB_CF(collectgarbage)
353{
354 int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT, /* ORDER LUA_GC* */
355 "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul");
356 int32_t data = lj_lib_optint(L, 2, 0);
357 if (opt == LUA_GCCOUNT) {
358 setnumV(L->top-1, cast_num((int32_t)G(L)->gc.total)/1024.0);
359 } else {
360 int res = lua_gc(L, opt, data);
361 if (opt == LUA_GCSTEP)
362 setboolV(L->top-1, res);
363 else
364 setintV(L->top-1, res);
365 }
366 return 1;
367}
368
369/* -- Base library: miscellaneous functions ------------------------------- */
370
371LJLIB_PUSH(top-2) /* Upvalue holds weak table. */
372LJLIB_CF(newproxy)
373{
374 lua_settop(L, 1);
375 lua_newuserdata(L, 0);
376 if (lua_toboolean(L, 1) == 0) { /* newproxy(): without metatable. */
377 return 1;
378 } else if (lua_isboolean(L, 1)) { /* newproxy(true): with metatable. */
379 lua_newtable(L);
380 lua_pushvalue(L, -1);
381 lua_pushboolean(L, 1);
382 lua_rawset(L, lua_upvalueindex(1)); /* Remember mt in weak table. */
383 } else { /* newproxy(proxy): inherit metatable. */
384 int validproxy = 0;
385 if (lua_getmetatable(L, 1)) {
386 lua_rawget(L, lua_upvalueindex(1));
387 validproxy = lua_toboolean(L, -1);
388 lua_pop(L, 1);
389 }
390 if (!validproxy)
391 lj_err_arg(L, 1, LJ_ERR_NOPROXY);
392 lua_getmetatable(L, 1);
393 }
394 lua_setmetatable(L, 2);
395 return 1;
396}
397
398LJLIB_PUSH("tostring")
399LJLIB_CF(print)
400{
401 ptrdiff_t i, nargs = L->top - L->base;
402 cTValue *tv = lj_tab_getstr(tabref(L->env), strV(lj_lib_upvalue(L, 1)));
403 int shortcut = (tv && tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring);
404 copyTV(L, L->top++, tv ? tv : niltv(L));
405 for (i = 0; i < nargs; i++) {
406 const char *str;
407 size_t size;
408 cTValue *o = &L->base[i];
409 if (shortcut && tvisstr(o)) {
410 str = strVdata(o);
411 size = strV(o)->len;
412 } else if (shortcut && tvisnum(o)) {
413 char buf[LUAI_MAXNUMBER2STR];
414 lua_Number n = numV(o);
415 size = (size_t)lua_number2str(buf, n);
416 str = buf;
417 } else {
418 copyTV(L, L->top+1, o);
419 copyTV(L, L->top, L->top-1);
420 L->top += 2;
421 lua_call(L, 1, 1);
422 str = lua_tolstring(L, -1, &size);
423 if (!str)
424 lj_err_caller(L, LJ_ERR_PRTOSTR);
425 L->top--;
426 }
427 if (i)
428 putchar('\t');
429 fwrite(str, 1, size, stdout);
430 }
431 putchar('\n');
432 return 0;
433}
434
435LJLIB_PUSH(top-3)
436LJLIB_SET(_VERSION)
437
438#include "lj_libdef.h"
439
440/* -- Coroutine library --------------------------------------------------- */
441
442#define LJLIB_MODULE_coroutine
443
444LJLIB_CF(coroutine_status)
445{
446 const char *s;
447 lua_State *co;
448 if (!(L->top > L->base && tvisthread(L->base)))
449 lj_err_arg(L, 1, LJ_ERR_NOCORO);
450 co = threadV(L->base);
451 if (co == L) s = "running";
452 else if (co->status == LUA_YIELD) s = "suspended";
453 else if (co->status != 0) s = "dead";
454 else if (co->base > co->stack+1) s = "normal";
455 else if (co->top == co->base) s = "dead";
456 else s = "suspended";
457 lua_pushstring(L, s);
458 return 1;
459}
460
461LJLIB_CF(coroutine_running)
462{
463 if (lua_pushthread(L))
464 setnilV(L->top++);
465 return 1;
466}
467
468LJLIB_CF(coroutine_create)
469{
470 lua_State *L1 = lua_newthread(L);
471 if (!(L->top > L->base && tvisfunc(L->base) && isluafunc(funcV(L->base))))
472 lj_err_arg(L, 1, LJ_ERR_NOLFUNC);
473 setfuncV(L, L1->top++, funcV(L->base));
474 return 1;
475}
476
477LJLIB_ASM(coroutine_yield)
478{
479 lj_err_caller(L, LJ_ERR_CYIELD);
480 return FFH_UNREACHABLE;
481}
482
483static int ffh_resume(lua_State *L, lua_State *co, int wrap)
484{
485 if (co->cframe != NULL || co->status > LUA_YIELD ||
486 (co->status == 0 && co->top == co->base)) {
487 ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD;
488 if (wrap) lj_err_caller(L, em);
489 setboolV(L->base-1, 0);
490 setstrV(L, L->base, lj_err_str(L, em));
491 return FFH_RES(2);
492 }
493 lj_state_growstack(co, (MSize)(L->top - L->base - 1));
494 return FFH_RETRY;
495}
496
497LJLIB_ASM(coroutine_resume)
498{
499 if (!(L->top > L->base && tvisthread(L->base)))
500 lj_err_arg(L, 1, LJ_ERR_NOCORO);
501 return ffh_resume(L, threadV(L->base), 0);
502}
503
504LJLIB_NOREG LJLIB_ASM(coroutine_wrap_aux)
505{
506 return ffh_resume(L, threadV(lj_lib_upvalue(L, 1)), 1);
507}
508
509/* Inline declarations. */
510LJ_ASMF void lj_ff_coroutine_wrap_aux(void);
511LJ_FUNCA_NORET void lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co);
512
513/* Error handler, called from assembler VM. */
514void lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co)
515{
516 co->top--; copyTV(L, L->top, co->top); L->top++;
517 if (tvisstr(L->top-1))
518 lj_err_callermsg(L, strVdata(L->top-1));
519 else
520 lj_err_run(L);
521}
522
523LJLIB_CF(coroutine_wrap)
524{
525 GCfunc *fn;
526 lj_cf_coroutine_create(L);
527 lua_pushcclosure(L, lj_ffh_coroutine_wrap_aux, 1);
528 fn = funcV(L->top-1);
529 fn->c.gate = lj_ff_coroutine_wrap_aux;
530 fn->c.ffid = FF_coroutine_wrap_aux;
531 return 1;
532}
533
534#include "lj_libdef.h"
535
536/* ------------------------------------------------------------------------ */
537
538static void newproxy_weaktable(lua_State *L)
539{
540 /* NOBARRIER: The table is new (marked white). */
541 GCtab *t = lj_tab_new(L, 0, 1);
542 settabV(L, L->top++, t);
543 setgcref(t->metatable, obj2gco(t));
544 setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "__mode")),
545 lj_str_newlit(L, "kv"));
546 t->nomm = cast_byte(~(1u<<MM_mode));
547}
548
549LUALIB_API int luaopen_base(lua_State *L)
550{
551 /* NOBARRIER: Table and value are the same. */
552 GCtab *env = tabref(L->env);
553 settabV(L, lj_tab_setstr(L, env, lj_str_newlit(L, "_G")), env);
554 lua_pushliteral(L, LUA_VERSION); /* top-3. */
555 newproxy_weaktable(L); /* top-2. */
556 LJ_LIB_REG_(L, "_G", base);
557 LJ_LIB_REG(L, coroutine);
558 return 2;
559}
560
diff --git a/src/lib_bit.c b/src/lib_bit.c
new file mode 100644
index 00000000..2f727e68
--- /dev/null
+++ b/src/lib_bit.c
@@ -0,0 +1,74 @@
1/*
2** Bit manipulation library.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lib_bit_c
7#define LUA_LIB
8
9#include "lua.h"
10#include "lauxlib.h"
11#include "lualib.h"
12
13#include "lj_obj.h"
14#include "lj_err.h"
15#include "lj_str.h"
16#include "lj_lib.h"
17
18/* ------------------------------------------------------------------------ */
19
20#define LJLIB_MODULE_bit
21
22LJLIB_ASM(bit_tobit) LJLIB_REC(bit_unary IR_TOBIT)
23{
24 lj_lib_checknum(L, 1);
25 return FFH_RETRY;
26}
27LJLIB_ASM_(bit_bnot) LJLIB_REC(bit_unary IR_BNOT)
28LJLIB_ASM_(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP)
29
30LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL)
31{
32 lj_lib_checknum(L, 1);
33 lj_lib_checknum(L, 2);
34 return FFH_RETRY;
35}
36LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR)
37LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR)
38LJLIB_ASM_(bit_rol) LJLIB_REC(bit_shift IR_BROL)
39LJLIB_ASM_(bit_ror) LJLIB_REC(bit_shift IR_BROR)
40
41LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND)
42{
43 int i = 0;
44 do { lj_lib_checknum(L, ++i); } while (L->base+i < L->top);
45 return FFH_RETRY;
46}
47LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR)
48LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR)
49
50/* ------------------------------------------------------------------------ */
51
52LJLIB_CF(bit_tohex)
53{
54 uint32_t b = (uint32_t)lj_num2bit(lj_lib_checknum(L, 1));
55 int32_t i, n = L->base+1 >= L->top ? 8 : lj_num2bit(lj_lib_checknum(L, 2));
56 const char *hexdigits = "0123456789abcdef";
57 char buf[8];
58 if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; }
59 if (n > 8) n = 8;
60 for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; }
61 lua_pushlstring(L, buf, (size_t)n);
62 return 1;
63}
64
65/* ------------------------------------------------------------------------ */
66
67#include "lj_libdef.h"
68
69LUALIB_API int luaopen_bit(lua_State *L)
70{
71 LJ_LIB_REG(L, bit);
72 return 1;
73}
74
diff --git a/src/lib_debug.c b/src/lib_debug.c
new file mode 100644
index 00000000..0e6c35e5
--- /dev/null
+++ b/src/lib_debug.c
@@ -0,0 +1,366 @@
1/*
2** Debug library.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Major portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#define lib_debug_c
10#define LUA_LIB
11
12#include "lua.h"
13#include "lauxlib.h"
14#include "lualib.h"
15
16#include "lj_obj.h"
17#include "lj_err.h"
18#include "lj_lib.h"
19
20/* ------------------------------------------------------------------------ */
21
22#define LJLIB_MODULE_debug
23
24LJLIB_CF(debug_getregistry)
25{
26 copyTV(L, L->top++, registry(L));
27 return 1;
28}
29
30LJLIB_CF(debug_getmetatable)
31{
32 lj_lib_checkany(L, 1);
33 if (!lua_getmetatable(L, 1)) {
34 setnilV(L->top-1);
35 }
36 return 1;
37}
38
39LJLIB_CF(debug_setmetatable)
40{
41 lj_lib_checktabornil(L, 2);
42 L->top = L->base+2;
43 lua_setmetatable(L, 1);
44 setboolV(L->top-1, 1);
45 return 1;
46}
47
48LJLIB_CF(debug_getfenv)
49{
50 lj_lib_checkany(L, 1);
51 lua_getfenv(L, 1);
52 return 1;
53}
54
55LJLIB_CF(debug_setfenv)
56{
57 lj_lib_checktab(L, 2);
58 L->top = L->base+2;
59 if (!lua_setfenv(L, 1))
60 lj_err_caller(L, LJ_ERR_SETFENV);
61 return 1;
62}
63
64/* ------------------------------------------------------------------------ */
65
66static void settabss(lua_State *L, const char *i, const char *v)
67{
68 lua_pushstring(L, v);
69 lua_setfield(L, -2, i);
70}
71
72static void settabsi(lua_State *L, const char *i, int v)
73{
74 lua_pushinteger(L, v);
75 lua_setfield(L, -2, i);
76}
77
78static lua_State *getthread(lua_State *L, int *arg)
79{
80 if (L->base < L->top && tvisthread(L->base)) {
81 *arg = 1;
82 return threadV(L->base);
83 } else {
84 *arg = 0;
85 return L;
86 }
87}
88
89static void treatstackoption(lua_State *L, lua_State *L1, const char *fname)
90{
91 if (L == L1) {
92 lua_pushvalue(L, -2);
93 lua_remove(L, -3);
94 }
95 else
96 lua_xmove(L1, L, 1);
97 lua_setfield(L, -2, fname);
98}
99
100LJLIB_CF(debug_getinfo)
101{
102 lua_Debug ar;
103 int arg;
104 lua_State *L1 = getthread(L, &arg);
105 const char *options = luaL_optstring(L, arg+2, "flnSu");
106 if (lua_isnumber(L, arg+1)) {
107 if (!lua_getstack(L1, (int)lua_tointeger(L, arg+1), &ar)) {
108 setnilV(L->top-1);
109 return 1;
110 }
111 } else if (L->base+arg < L->top && tvisfunc(L->base+arg)) {
112 options = lua_pushfstring(L, ">%s", options);
113 setfuncV(L1, L1->top++, funcV(L->base+arg));
114 } else {
115 lj_err_arg(L, arg+1, LJ_ERR_NOFUNCL);
116 }
117 if (!lua_getinfo(L1, options, &ar))
118 lj_err_arg(L, arg+2, LJ_ERR_INVOPT);
119 lua_createtable(L, 0, 16);
120 if (strchr(options, 'S')) {
121 settabss(L, "source", ar.source);
122 settabss(L, "short_src", ar.short_src);
123 settabsi(L, "linedefined", ar.linedefined);
124 settabsi(L, "lastlinedefined", ar.lastlinedefined);
125 settabss(L, "what", ar.what);
126 }
127 if (strchr(options, 'l'))
128 settabsi(L, "currentline", ar.currentline);
129 if (strchr(options, 'u'))
130 settabsi(L, "nups", ar.nups);
131 if (strchr(options, 'n')) {
132 settabss(L, "name", ar.name);
133 settabss(L, "namewhat", ar.namewhat);
134 }
135 if (strchr(options, 'L'))
136 treatstackoption(L, L1, "activelines");
137 if (strchr(options, 'f'))
138 treatstackoption(L, L1, "func");
139 return 1; /* return table */
140}
141
142LJLIB_CF(debug_getlocal)
143{
144 int arg;
145 lua_State *L1 = getthread(L, &arg);
146 lua_Debug ar;
147 const char *name;
148 if (!lua_getstack(L1, lj_lib_checkint(L, arg+1), &ar))
149 lj_err_arg(L, arg+1, LJ_ERR_LVLRNG);
150 name = lua_getlocal(L1, &ar, lj_lib_checkint(L, arg+2));
151 if (name) {
152 lua_xmove(L1, L, 1);
153 lua_pushstring(L, name);
154 lua_pushvalue(L, -2);
155 return 2;
156 } else {
157 setnilV(L->top-1);
158 return 1;
159 }
160}
161
162LJLIB_CF(debug_setlocal)
163{
164 int arg;
165 lua_State *L1 = getthread(L, &arg);
166 lua_Debug ar;
167 TValue *tv;
168 if (!lua_getstack(L1, lj_lib_checkint(L, arg+1), &ar))
169 lj_err_arg(L, arg+1, LJ_ERR_LVLRNG);
170 tv = lj_lib_checkany(L, arg+3);
171 copyTV(L1, L1->top++, tv);
172 lua_pushstring(L, lua_setlocal(L1, &ar, lj_lib_checkint(L, arg+2)));
173 return 1;
174}
175
176static int debug_getupvalue(lua_State *L, int get)
177{
178 int32_t n = lj_lib_checkint(L, 2);
179 if (isluafunc(lj_lib_checkfunc(L, 1))) {
180 const char *name = get ? lua_getupvalue(L, 1, n) : lua_setupvalue(L, 1, n);
181 if (name) {
182 lua_pushstring(L, name);
183 if (!get) return 1;
184 copyTV(L, L->top, L->top-2);
185 L->top++;
186 return 2;
187 }
188 }
189 return 0;
190}
191
192LJLIB_CF(debug_getupvalue)
193{
194 return debug_getupvalue(L, 1);
195}
196
197LJLIB_CF(debug_setupvalue)
198{
199 lj_lib_checkany(L, 3);
200 return debug_getupvalue(L, 0);
201}
202
203/* ------------------------------------------------------------------------ */
204
205static const char KEY_HOOK = 'h';
206
207static void hookf(lua_State *L, lua_Debug *ar)
208{
209 static const char *const hooknames[] =
210 {"call", "return", "line", "count", "tail return"};
211 lua_pushlightuserdata(L, (void *)&KEY_HOOK);
212 lua_rawget(L, LUA_REGISTRYINDEX);
213 if (lua_isfunction(L, -1)) {
214 lua_pushstring(L, hooknames[(int)ar->event]);
215 if (ar->currentline >= 0)
216 lua_pushinteger(L, ar->currentline);
217 else lua_pushnil(L);
218 lua_call(L, 2, 0);
219 }
220}
221
222static int makemask(const char *smask, int count)
223{
224 int mask = 0;
225 if (strchr(smask, 'c')) mask |= LUA_MASKCALL;
226 if (strchr(smask, 'r')) mask |= LUA_MASKRET;
227 if (strchr(smask, 'l')) mask |= LUA_MASKLINE;
228 if (count > 0) mask |= LUA_MASKCOUNT;
229 return mask;
230}
231
232static char *unmakemask(int mask, char *smask)
233{
234 int i = 0;
235 if (mask & LUA_MASKCALL) smask[i++] = 'c';
236 if (mask & LUA_MASKRET) smask[i++] = 'r';
237 if (mask & LUA_MASKLINE) smask[i++] = 'l';
238 smask[i] = '\0';
239 return smask;
240}
241
242LJLIB_CF(debug_sethook)
243{
244 int arg, mask, count;
245 lua_Hook func;
246 (void)getthread(L, &arg);
247 if (lua_isnoneornil(L, arg+1)) {
248 lua_settop(L, arg+1);
249 func = NULL; mask = 0; count = 0; /* turn off hooks */
250 } else {
251 const char *smask = luaL_checkstring(L, arg+2);
252 luaL_checktype(L, arg+1, LUA_TFUNCTION);
253 count = luaL_optint(L, arg+3, 0);
254 func = hookf; mask = makemask(smask, count);
255 }
256 lua_pushlightuserdata(L, (void *)&KEY_HOOK);
257 lua_pushvalue(L, arg+1);
258 lua_rawset(L, LUA_REGISTRYINDEX);
259 lua_sethook(L, func, mask, count);
260 return 0;
261}
262
263LJLIB_CF(debug_gethook)
264{
265 char buff[5];
266 int mask = lua_gethookmask(L);
267 lua_Hook hook = lua_gethook(L);
268 if (hook != NULL && hook != hookf) { /* external hook? */
269 lua_pushliteral(L, "external hook");
270 } else {
271 lua_pushlightuserdata(L, (void *)&KEY_HOOK);
272 lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */
273 }
274 lua_pushstring(L, unmakemask(mask, buff));
275 lua_pushinteger(L, lua_gethookcount(L));
276 return 3;
277}
278
279/* ------------------------------------------------------------------------ */
280
281LJLIB_CF(debug_debug)
282{
283 for (;;) {
284 char buffer[250];
285 fputs("lua_debug> ", stderr);
286 if (fgets(buffer, sizeof(buffer), stdin) == 0 ||
287 strcmp(buffer, "cont\n") == 0)
288 return 0;
289 if (luaL_loadbuffer(L, buffer, strlen(buffer), "=(debug command)") ||
290 lua_pcall(L, 0, 0, 0)) {
291 fputs(lua_tostring(L, -1), stderr);
292 fputs("\n", stderr);
293 }
294 lua_settop(L, 0); /* remove eventual returns */
295 }
296}
297
298/* ------------------------------------------------------------------------ */
299
300#define LEVELS1 12 /* size of the first part of the stack */
301#define LEVELS2 10 /* size of the second part of the stack */
302
303LJLIB_CF(debug_traceback)
304{
305 int level;
306 int firstpart = 1; /* still before eventual `...' */
307 int arg;
308 lua_State *L1 = getthread(L, &arg);
309 lua_Debug ar;
310 if (lua_isnumber(L, arg+2)) {
311 level = (int)lua_tointeger(L, arg+2);
312 lua_pop(L, 1);
313 }
314 else
315 level = (L == L1) ? 1 : 0; /* level 0 may be this own function */
316 if (lua_gettop(L) == arg)
317 lua_pushliteral(L, "");
318 else if (!lua_isstring(L, arg+1)) return 1; /* message is not a string */
319 else lua_pushliteral(L, "\n");
320 lua_pushliteral(L, "stack traceback:");
321 while (lua_getstack(L1, level++, &ar)) {
322 if (level > LEVELS1 && firstpart) {
323 /* no more than `LEVELS2' more levels? */
324 if (!lua_getstack(L1, level+LEVELS2, &ar)) {
325 level--; /* keep going */
326 } else {
327 lua_pushliteral(L, "\n\t..."); /* too many levels */
328 /* This only works with LuaJIT 2.x. Avoids O(n^2) behaviour. */
329 lua_getstack(L1, -10, &ar);
330 level = ar.i_ci - LEVELS2;
331 }
332 firstpart = 0;
333 continue;
334 }
335 lua_pushliteral(L, "\n\t");
336 lua_getinfo(L1, "Snl", &ar);
337 lua_pushfstring(L, "%s:", ar.short_src);
338 if (ar.currentline > 0)
339 lua_pushfstring(L, "%d:", ar.currentline);
340 if (*ar.namewhat != '\0') { /* is there a name? */
341 lua_pushfstring(L, " in function " LUA_QS, ar.name);
342 } else {
343 if (*ar.what == 'm') /* main? */
344 lua_pushfstring(L, " in main chunk");
345 else if (*ar.what == 'C' || *ar.what == 't')
346 lua_pushliteral(L, " ?"); /* C function or tail call */
347 else
348 lua_pushfstring(L, " in function <%s:%d>",
349 ar.short_src, ar.linedefined);
350 }
351 lua_concat(L, lua_gettop(L) - arg);
352 }
353 lua_concat(L, lua_gettop(L) - arg);
354 return 1;
355}
356
357/* ------------------------------------------------------------------------ */
358
359#include "lj_libdef.h"
360
361LUALIB_API int luaopen_debug(lua_State *L)
362{
363 LJ_LIB_REG(L, debug);
364 return 1;
365}
366
diff --git a/src/lib_init.c b/src/lib_init.c
new file mode 100644
index 00000000..04ca60d9
--- /dev/null
+++ b/src/lib_init.c
@@ -0,0 +1,37 @@
1/*
2** Library initialization.
3** Major parts taken verbatim from the Lua interpreter.
4** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
5*/
6
7#define lib_init_c
8#define LUA_LIB
9
10#include "lua.h"
11#include "lauxlib.h"
12#include "lualib.h"
13
14static const luaL_Reg lualibs[] = {
15 { "", luaopen_base },
16 { LUA_LOADLIBNAME, luaopen_package },
17 { LUA_TABLIBNAME, luaopen_table },
18 { LUA_IOLIBNAME, luaopen_io },
19 { LUA_OSLIBNAME, luaopen_os },
20 { LUA_STRLIBNAME, luaopen_string },
21 { LUA_MATHLIBNAME, luaopen_math },
22 { LUA_DBLIBNAME, luaopen_debug },
23 { LUA_BITLIBNAME, luaopen_bit },
24 { LUA_JITLIBNAME, luaopen_jit },
25 { NULL, NULL }
26};
27
28LUALIB_API void luaL_openlibs(lua_State *L)
29{
30 const luaL_Reg *lib = lualibs;
31 for (; lib->func; lib++) {
32 lua_pushcfunction(L, lib->func);
33 lua_pushstring(L, lib->name);
34 lua_call(L, 1, 0);
35 }
36}
37
diff --git a/src/lib_io.c b/src/lib_io.c
new file mode 100644
index 00000000..01623258
--- /dev/null
+++ b/src/lib_io.c
@@ -0,0 +1,538 @@
1/*
2** I/O library.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Major portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#include <errno.h>
10#include <stdio.h>
11
12#define lib_io_c
13#define LUA_LIB
14
15#include "lua.h"
16#include "lauxlib.h"
17#include "lualib.h"
18
19#include "lj_obj.h"
20#include "lj_err.h"
21#include "lj_gc.h"
22#include "lj_ff.h"
23#include "lj_lib.h"
24
25/* Index of standard handles in function environment. */
26#define IO_INPUT 1
27#define IO_OUTPUT 2
28
29/* -- Error handling ------------------------------------------------------ */
30
31static int io_pushresult(lua_State *L, int ok, const char *fname)
32{
33 if (ok) {
34 setboolV(L->top++, 1);
35 return 1;
36 } else {
37 int en = errno; /* Lua API calls may change this value. */
38 lua_pushnil(L);
39 if (fname)
40 lua_pushfstring(L, "%s: %s", fname, strerror(en));
41 else
42 lua_pushfstring(L, "%s", strerror(en));
43 lua_pushinteger(L, en);
44 return 3;
45 }
46}
47
48static void io_file_error(lua_State *L, int arg, const char *fname)
49{
50 lua_pushfstring(L, "%s: %s", fname, strerror(errno));
51 luaL_argerror(L, arg, lua_tostring(L, -1));
52}
53
54/* -- Open helpers -------------------------------------------------------- */
55
56#define io_tofilep(L) ((FILE **)luaL_checkudata(L, 1, LUA_FILEHANDLE))
57
58static FILE *io_tofile(lua_State *L)
59{
60 FILE **f = io_tofilep(L);
61 if (*f == NULL)
62 lj_err_caller(L, LJ_ERR_IOCLFL);
63 return *f;
64}
65
66static FILE **io_file_new(lua_State *L)
67{
68 FILE **pf = (FILE **)lua_newuserdata(L, sizeof(FILE *));
69 *pf = NULL;
70 luaL_getmetatable(L, LUA_FILEHANDLE);
71 lua_setmetatable(L, -2);
72 return pf;
73}
74
75/* -- Close helpers ------------------------------------------------------- */
76
77static int lj_cf_io_std_close(lua_State *L)
78{
79 lua_pushnil(L);
80 lua_pushliteral(L, "cannot close standard file");
81 return 2;
82}
83
84static int lj_cf_io_pipe_close(lua_State *L)
85{
86 FILE **p = io_tofilep(L);
87#if defined(LUA_USE_POSIX)
88 int ok = (pclose(*p) != -1);
89#elif defined(LUA_USE_WIN)
90 int ok = (_pclose(*p) != -1);
91#else
92 int ok = 0;
93#endif
94 *p = NULL;
95 return io_pushresult(L, ok, NULL);
96}
97
98static int lj_cf_io_file_close(lua_State *L)
99{
100 FILE **p = io_tofilep(L);
101 int ok = (fclose(*p) == 0);
102 *p = NULL;
103 return io_pushresult(L, ok, NULL);
104}
105
106static int io_file_close(lua_State *L)
107{
108 lua_getfenv(L, 1);
109 lua_getfield(L, -1, "__close");
110 return (lua_tocfunction(L, -1))(L);
111}
112
113/* -- Read/write helpers -------------------------------------------------- */
114
115static int io_file_readnum(lua_State *L, FILE *fp)
116{
117 lua_Number d;
118 if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) {
119 lua_pushnumber(L, d);
120 return 1;
121 } else {
122 return 0; /* read fails */
123 }
124}
125
126static int test_eof(lua_State *L, FILE *fp)
127{
128 int c = getc(fp);
129 ungetc(c, fp);
130 lua_pushlstring(L, NULL, 0);
131 return (c != EOF);
132}
133
134static int io_file_readline(lua_State *L, FILE *fp)
135{
136 luaL_Buffer b;
137 luaL_buffinit(L, &b);
138 for (;;) {
139 size_t len;
140 char *p = luaL_prepbuffer(&b);
141 if (fgets(p, LUAL_BUFFERSIZE, fp) == NULL) { /* EOF? */
142 luaL_pushresult(&b);
143 return (strV(L->top-1)->len > 0); /* Anything read? */
144 }
145 len = strlen(p);
146 if (len == 0 || p[len-1] != '\n') { /* Partial line? */
147 luaL_addsize(&b, len);
148 } else {
149 luaL_addsize(&b, len - 1); /* Don't include EOL. */
150 luaL_pushresult(&b);
151 return 1; /* Got at least an EOL. */
152 }
153 }
154}
155
156static int io_file_readchars(lua_State *L, FILE *fp, size_t n)
157{
158 size_t rlen; /* how much to read */
159 size_t nr; /* number of chars actually read */
160 luaL_Buffer b;
161 luaL_buffinit(L, &b);
162 rlen = LUAL_BUFFERSIZE; /* try to read that much each time */
163 do {
164 char *p = luaL_prepbuffer(&b);
165 if (rlen > n) rlen = n; /* cannot read more than asked */
166 nr = fread(p, 1, rlen, fp);
167 luaL_addsize(&b, nr);
168 n -= nr; /* still have to read `n' chars */
169 } while (n > 0 && nr == rlen); /* until end of count or eof */
170 luaL_pushresult(&b); /* close buffer */
171 return (n == 0 || lua_objlen(L, -1) > 0);
172}
173
174static int io_file_read(lua_State *L, FILE *fp, int start)
175{
176 int ok, n, nargs = (L->top - L->base) - start;
177 clearerr(fp);
178 if (nargs == 0) {
179 ok = io_file_readline(L, fp);
180 n = start+1; /* Return 1 result. */
181 } else {
182 /* The results plus the buffers go on top of the args. */
183 luaL_checkstack(L, nargs+LUA_MINSTACK, "too many arguments");
184 ok = 1;
185 for (n = start; nargs-- && ok; n++) {
186 if (tvisstr(L->base+n)) {
187 const char *p = strVdata(L->base+n);
188 if (p[0] != '*')
189 lj_err_arg(L, n+1, LJ_ERR_INVOPT);
190 if (p[1] == 'n')
191 ok = io_file_readnum(L, fp);
192 else if (p[1] == 'l')
193 ok = io_file_readline(L, fp);
194 else if (p[1] == 'a')
195 io_file_readchars(L, fp, ~((size_t)0));
196 else
197 lj_err_arg(L, n+1, LJ_ERR_INVFMT);
198 } else if (tvisnum(L->base+n)) {
199 size_t len = (size_t)lj_lib_checkint(L, n+1);
200 ok = len ? io_file_readchars(L, fp, len) : test_eof(L, fp);
201 } else {
202 lj_err_arg(L, n+1, LJ_ERR_INVOPT);
203 }
204 }
205 }
206 if (ferror(fp))
207 return io_pushresult(L, 0, NULL);
208 if (!ok)
209 setnilV(L->top-1); /* Replace last result with nil. */
210 return n - start;
211}
212
213static int io_file_write(lua_State *L, FILE *fp, int start)
214{
215 cTValue *tv;
216 int status = 1;
217 for (tv = L->base+start; tv < L->top; tv++) {
218 if (tvisstr(tv)) {
219 MSize len = strV(tv)->len;
220 status = status && (fwrite(strVdata(tv), 1, len, fp) == len);
221 } else if (tvisnum(tv)) {
222 status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0);
223 } else {
224 lj_lib_checkstr(L, tv-L->base+1);
225 }
226 }
227 return io_pushresult(L, status, NULL);
228}
229
230/* -- I/O file methods ---------------------------------------------------- */
231
232#define LJLIB_MODULE_io_method
233
234LJLIB_CF(io_method_close)
235{
236 if (lua_isnone(L, 1))
237 lua_rawgeti(L, LUA_ENVIRONINDEX, IO_OUTPUT);
238 io_tofile(L);
239 return io_file_close(L);
240}
241
242LJLIB_CF(io_method_read)
243{
244 return io_file_read(L, io_tofile(L), 1);
245}
246
247LJLIB_CF(io_method_write)
248{
249 return io_file_write(L, io_tofile(L), 1);
250}
251
252LJLIB_CF(io_method_flush)
253{
254 return io_pushresult(L, fflush(io_tofile(L)) == 0, NULL);
255}
256
257LJLIB_CF(io_method_seek)
258{
259 FILE *fp = io_tofile(L);
260 int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end");
261 lua_Number ofs;
262 int res;
263 if (opt == 0) opt = SEEK_SET;
264 else if (opt == 1) opt = SEEK_CUR;
265 else if (opt == 2) opt = SEEK_END;
266 lj_lib_opt(L, 3,
267 ofs = lj_lib_checknum(L, 3);
268 ,
269 ofs = 0;
270 )
271#if defined(LUA_USE_POSIX)
272 res = fseeko(fp, (int64_t)ofs, opt);
273#elif _MSC_VER >= 1400
274 res = _fseeki64(fp, (int64_t)ofs, opt);
275#elif defined(__MINGW32__)
276 res = fseeko64(fp, (int64_t)ofs, opt);
277#else
278 res = fseek(fp, (long)ofs, opt);
279#endif
280 if (res)
281 return io_pushresult(L, 0, NULL);
282#if defined(LUA_USE_POSIX)
283 ofs = cast_num(ftello(fp));
284#elif _MSC_VER >= 1400
285 ofs = cast_num(_ftelli64(fp));
286#elif defined(__MINGW32__)
287 ofs = cast_num(ftello64(fp));
288#else
289 ofs = cast_num(ftell(fp));
290#endif
291 setnumV(L->top-1, ofs);
292 return 1;
293}
294
295LJLIB_CF(io_method_setvbuf)
296{
297 FILE *fp = io_tofile(L);
298 int opt = lj_lib_checkopt(L, 2, -1, "\4full\4line\2no");
299 size_t sz = (size_t)lj_lib_optint(L, 3, LUAL_BUFFERSIZE);
300 if (opt == 0) opt = _IOFBF;
301 else if (opt == 1) opt = _IOLBF;
302 else if (opt == 2) opt = _IONBF;
303 return io_pushresult(L, (setvbuf(fp, NULL, opt, sz) == 0), NULL);
304}
305
306/* Forward declaration. */
307static void io_file_lines(lua_State *L, int idx, int toclose);
308
309LJLIB_CF(io_method_lines)
310{
311 io_tofile(L);
312 io_file_lines(L, 1, 0);
313 return 1;
314}
315
316LJLIB_CF(io_method___gc)
317{
318 FILE *fp = *io_tofilep(L);
319 if (fp != NULL) io_file_close(L);
320 return 0;
321}
322
323LJLIB_CF(io_method___tostring)
324{
325 FILE *fp = *io_tofilep(L);
326 if (fp == NULL)
327 lua_pushliteral(L, "file (closed)");
328 else
329 lua_pushfstring(L, "file (%p)", fp);
330 return 1;
331}
332
333LJLIB_PUSH(top-1) LJLIB_SET(__index)
334
335#include "lj_libdef.h"
336
337/* -- I/O library functions ----------------------------------------------- */
338
339#define LJLIB_MODULE_io
340
341LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */
342
343static FILE *io_file_get(lua_State *L, int findex)
344{
345 GCtab *fenv = tabref(curr_func(L)->c.env);
346 GCudata *ud = udataV(&tvref(fenv->array)[findex]);
347 FILE *fp = *(FILE **)uddata(ud);
348 if (fp == NULL)
349 lj_err_caller(L, LJ_ERR_IOSTDCL);
350 return fp;
351}
352
353LJLIB_CF(io_open)
354{
355 const char *fname = luaL_checkstring(L, 1);
356 const char *mode = luaL_optstring(L, 2, "r");
357 FILE **pf = io_file_new(L);
358 *pf = fopen(fname, mode);
359 return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1;
360}
361
362LJLIB_CF(io_tmpfile)
363{
364 FILE **pf = io_file_new(L);
365 *pf = tmpfile();
366 return (*pf == NULL) ? io_pushresult(L, 0, NULL) : 1;
367}
368
369LJLIB_CF(io_close)
370{
371 return lj_cf_io_method_close(L);
372}
373
374LJLIB_CF(io_read)
375{
376 return io_file_read(L, io_file_get(L, IO_INPUT), 0);
377}
378
379LJLIB_CF(io_write)
380{
381 return io_file_write(L, io_file_get(L, IO_OUTPUT), 0);
382}
383
384LJLIB_CF(io_flush)
385{
386 return io_pushresult(L, fflush(io_file_get(L, IO_OUTPUT)) == 0, NULL);
387}
388
389LJLIB_NOREG LJLIB_CF(io_lines_iter)
390{
391 FILE *fp = *(FILE **)uddata(udataV(lj_lib_upvalue(L, 1)));
392 int ok;
393 if (fp == NULL)
394 lj_err_caller(L, LJ_ERR_IOCLFL);
395 ok = io_file_readline(L, fp);
396 if (ferror(fp))
397 return luaL_error(L, "%s", strerror(errno));
398 if (ok)
399 return 1;
400 if (tvistrue(lj_lib_upvalue(L, 2))) { /* Need to close file? */
401 L->top = L->base+1;
402 setudataV(L, L->base, udataV(lj_lib_upvalue(L, 1)));
403 io_file_close(L);
404 }
405 return 0;
406}
407
408static void io_file_lines(lua_State *L, int idx, int toclose)
409{
410 lua_pushvalue(L, idx);
411 lua_pushboolean(L, toclose);
412 lua_pushcclosure(L, lj_cf_io_lines_iter, 2);
413 funcV(L->top-1)->c.ffid = FF_io_lines_iter;
414}
415
416LJLIB_CF(io_lines)
417{
418 if (lua_isnoneornil(L, 1)) { /* no arguments? */
419 /* will iterate over default input */
420 lua_rawgeti(L, LUA_ENVIRONINDEX, IO_INPUT);
421 return lj_cf_io_method_lines(L);
422 } else {
423 const char *fname = luaL_checkstring(L, 1);
424 FILE **pf = io_file_new(L);
425 *pf = fopen(fname, "r");
426 if (*pf == NULL)
427 io_file_error(L, 1, fname);
428 io_file_lines(L, lua_gettop(L), 1);
429 return 1;
430 }
431}
432
433static int io_std_get(lua_State *L, int fp, const char *mode)
434{
435 if (!lua_isnoneornil(L, 1)) {
436 const char *fname = lua_tostring(L, 1);
437 if (fname) {
438 FILE **pf = io_file_new(L);
439 *pf = fopen(fname, mode);
440 if (*pf == NULL)
441 io_file_error(L, 1, fname);
442 } else {
443 io_tofile(L); /* check that it's a valid file handle */
444 lua_pushvalue(L, 1);
445 }
446 lua_rawseti(L, LUA_ENVIRONINDEX, fp);
447 }
448 /* return current value */
449 lua_rawgeti(L, LUA_ENVIRONINDEX, fp);
450 return 1;
451}
452
453LJLIB_CF(io_input)
454{
455 return io_std_get(L, IO_INPUT, "r");
456}
457
458LJLIB_CF(io_output)
459{
460 return io_std_get(L, IO_OUTPUT, "w");
461}
462
463LJLIB_CF(io_type)
464{
465 void *ud;
466 luaL_checkany(L, 1);
467 ud = lua_touserdata(L, 1);
468 lua_getfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE);
469 if (ud == NULL || !lua_getmetatable(L, 1) || !lua_rawequal(L, -2, -1))
470 lua_pushnil(L); /* not a file */
471 else if (*((FILE **)ud) == NULL)
472 lua_pushliteral(L, "closed file");
473 else
474 lua_pushliteral(L, "file");
475 return 1;
476}
477
478LJLIB_PUSH(top-3) LJLIB_SET(!) /* Set environment. */
479
480LJLIB_CF(io_popen)
481{
482#if defined(LUA_USE_POSIX) || defined(LUA_USE_WIN)
483 const char *fname = luaL_checkstring(L, 1);
484 const char *mode = luaL_optstring(L, 2, "r");
485 FILE **pf = io_file_new(L);
486#ifdef LUA_USE_POSIX
487 fflush(NULL);
488 *pf = popen(fname, mode);
489#else
490 *pf = _popen(fname, mode);
491#endif
492 return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1;
493#else
494 luaL_error(L, LUA_QL("popen") " not supported");
495#endif
496}
497
498#include "lj_libdef.h"
499
500/* ------------------------------------------------------------------------ */
501
502static void io_std_new(lua_State *L, FILE *fp, int k, const char *fname)
503{
504 FILE **pf = io_file_new(L);
505 GCudata *ud = udataV(L->top-1);
506 GCtab *envt = tabV(L->top-2);
507 *pf = fp;
508 setgcref(ud->env, obj2gco(envt));
509 lj_gc_objbarrier(L, obj2gco(ud), envt);
510 if (k > 0) {
511 lua_pushvalue(L, -1);
512 lua_rawseti(L, -5, k);
513 }
514 lua_setfield(L, -3, fname);
515}
516
517static void io_fenv_new(lua_State *L, int narr, lua_CFunction cls)
518{
519 lua_createtable(L, narr, 1);
520 lua_pushcfunction(L, cls);
521 lua_setfield(L, -2, "__close");
522}
523
524LUALIB_API int luaopen_io(lua_State *L)
525{
526 LJ_LIB_REG_(L, NULL, io_method);
527 lua_setfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE);
528 io_fenv_new(L, 0, lj_cf_io_pipe_close); /* top-3 */
529 io_fenv_new(L, 2, lj_cf_io_file_close); /* top-2 */
530 LJ_LIB_REG(L, io);
531 io_fenv_new(L, 0, lj_cf_io_std_close);
532 io_std_new(L, stdin, IO_INPUT, "stdin");
533 io_std_new(L, stdout, IO_OUTPUT, "stdout");
534 io_std_new(L, stderr, 0, "stderr");
535 lua_pop(L, 1);
536 return 1;
537}
538
diff --git a/src/lib_jit.c b/src/lib_jit.c
new file mode 100644
index 00000000..4a57f3b4
--- /dev/null
+++ b/src/lib_jit.c
@@ -0,0 +1,589 @@
1/*
2** JIT library.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lib_jit_c
7#define LUA_LIB
8
9#include "lua.h"
10#include "lauxlib.h"
11#include "lualib.h"
12
13#include "lj_arch.h"
14#include "lj_obj.h"
15#include "lj_err.h"
16#include "lj_str.h"
17#include "lj_tab.h"
18#if LJ_HASJIT
19#include "lj_ir.h"
20#include "lj_jit.h"
21#include "lj_iropt.h"
22#endif
23#include "lj_dispatch.h"
24#include "lj_vm.h"
25#include "lj_vmevent.h"
26#include "lj_lib.h"
27
28#include "luajit.h"
29
30/* -- jit.* functions ----------------------------------------------------- */
31
32#define LJLIB_MODULE_jit
33
34static int setjitmode(lua_State *L, int mode)
35{
36 int idx = 0;
37 if (L->base == L->top || tvisnil(L->base)) { /* jit.on/off/flush([nil]) */
38 mode |= LUAJIT_MODE_ENGINE;
39 } else {
40 /* jit.on/off/flush(func|proto, nil|true|false) */
41 if (tvisfunc(L->base) || tvisproto(L->base))
42 idx = 1;
43 else if (!tvistrue(L->base)) /* jit.on/off/flush(true, nil|true|false) */
44 goto err;
45 if (L->base+1 < L->top && tvisbool(L->base+1))
46 mode |= boolV(L->base+1) ? LUAJIT_MODE_ALLFUNC : LUAJIT_MODE_ALLSUBFUNC;
47 else
48 mode |= LUAJIT_MODE_FUNC;
49 }
50 if (luaJIT_setmode(L, idx, mode) != 1) {
51 err:
52#if LJ_HASJIT
53 lj_err_arg(L, 1, LJ_ERR_NOLFUNC);
54#else
55 lj_err_caller(L, LJ_ERR_NOJIT);
56#endif
57 }
58 return 0;
59}
60
61LJLIB_CF(jit_on)
62{
63 return setjitmode(L, LUAJIT_MODE_ON);
64}
65
66LJLIB_CF(jit_off)
67{
68 return setjitmode(L, LUAJIT_MODE_OFF);
69}
70
71LJLIB_CF(jit_flush)
72{
73#if LJ_HASJIT
74 if (L->base < L->top && (tvisnum(L->base) || tvisstr(L->base))) {
75 int traceno = lj_lib_checkint(L, 1);
76 luaJIT_setmode(L, traceno, LUAJIT_MODE_FLUSH|LUAJIT_MODE_TRACE);
77 return 0;
78 }
79#endif
80 return setjitmode(L, LUAJIT_MODE_FLUSH);
81}
82
83#if LJ_HASJIT
84/* Push a string for every flag bit that is set. */
85static void flagbits_to_strings(lua_State *L, uint32_t flags, uint32_t base,
86 const char *str)
87{
88 for (; *str; base <<= 1, str += 1+*str)
89 if (flags & base)
90 setstrV(L, L->top++, lj_str_new(L, str+1, *(uint8_t *)str));
91}
92#endif
93
94LJLIB_CF(jit_status)
95{
96#if LJ_HASJIT
97 jit_State *J = L2J(L);
98 L->top = L->base;
99 setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0);
100 flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING);
101 flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING);
102 return L->top - L->base;
103#else
104 setboolV(L->top++, 0);
105 return 1;
106#endif
107}
108
109LJLIB_CF(jit_attach)
110{
111#ifdef LUAJIT_DISABLE_VMEVENT
112 luaL_error(L, "vmevent API disabled");
113#else
114 GCfunc *fn = lj_lib_checkfunc(L, 1);
115 GCstr *s = lj_lib_optstr(L, 2);
116 luaL_findtable(L, LUA_REGISTRYINDEX, LJ_VMEVENTS_REGKEY, LJ_VMEVENTS_HSIZE);
117 if (s) { /* Attach to given event. */
118 lua_pushvalue(L, 1);
119 lua_rawseti(L, -2, VMEVENT_HASHIDX(s->hash));
120 G(L)->vmevmask = VMEVENT_NOCACHE; /* Invalidate cache. */
121 } else { /* Detach if no event given. */
122 setnilV(L->top++);
123 while (lua_next(L, -2)) {
124 L->top--;
125 if (tvisfunc(L->top) && funcV(L->top) == fn) {
126 setnilV(lj_tab_set(L, tabV(L->top-2), L->top-1));
127 }
128 }
129 }
130#endif
131 return 0;
132}
133
134LJLIB_PUSH(top-4) LJLIB_SET(arch)
135LJLIB_PUSH(top-3) LJLIB_SET(version_num)
136LJLIB_PUSH(top-2) LJLIB_SET(version)
137
138#include "lj_libdef.h"
139
140/* -- jit.util.* functions ------------------------------------------------ */
141
142#define LJLIB_MODULE_jit_util
143
144/* -- Reflection API for Lua functions ------------------------------------ */
145
146/* Return prototype of first argument (Lua function or prototype object) */
147static GCproto *check_Lproto(lua_State *L, int nolua)
148{
149 TValue *o = L->base;
150 if (L->top > o) {
151 if (tvisproto(o)) {
152 return protoV(o);
153 } else if (tvisfunc(o)) {
154 if (isluafunc(funcV(o)))
155 return funcproto(funcV(o));
156 else if (nolua)
157 return NULL;
158 }
159 }
160 lj_err_argt(L, 1, LUA_TFUNCTION);
161 return NULL; /* unreachable */
162}
163
164static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val)
165{
166 setintV(lj_tab_setstr(L, t, lj_str_newz(L, name)), val);
167}
168
169/* local info = jit.util.funcinfo(func [,pc]) */
170LJLIB_CF(jit_util_funcinfo)
171{
172 GCproto *pt = check_Lproto(L, 1);
173 if (pt) {
174 BCPos pc = (BCPos)lj_lib_optint(L, 2, 0);
175 GCtab *t;
176 lua_createtable(L, 0, 16); /* Increment hash size if fields are added. */
177 t = tabV(L->top-1);
178 setintfield(L, t, "linedefined", pt->linedefined);
179 setintfield(L, t, "lastlinedefined", pt->lastlinedefined);
180 setintfield(L, t, "stackslots", pt->framesize);
181 setintfield(L, t, "params", pt->numparams);
182 setintfield(L, t, "bytecodes", (int32_t)pt->sizebc);
183 setintfield(L, t, "gcconsts", (int32_t)pt->sizekgc);
184 setintfield(L, t, "nconsts", (int32_t)pt->sizekn);
185 setintfield(L, t, "upvalues", (int32_t)pt->sizeuv);
186 if (pc > 0)
187 setintfield(L, t, "currentline", pt->lineinfo ? pt->lineinfo[pc-1] : 0);
188 lua_pushboolean(L, (pt->flags & PROTO_IS_VARARG));
189 lua_setfield(L, -2, "isvararg");
190 setstrV(L, L->top++, pt->chunkname);
191 lua_setfield(L, -2, "source");
192 lj_err_pushloc(L, pt, pc);
193 lua_setfield(L, -2, "loc");
194 } else {
195 GCfunc *fn = funcV(L->base);
196 GCtab *t;
197 lua_createtable(L, 0, 2); /* Increment hash size if fields are added. */
198 t = tabV(L->top-1);
199 setintfield(L, t, "ffid", fn->c.ffid);
200 setintfield(L, t, "upvalues", fn->c.nupvalues);
201 }
202 return 1;
203}
204
205/* local ins, m = jit.util.funcbc(func, pc) */
206LJLIB_CF(jit_util_funcbc)
207{
208 GCproto *pt = check_Lproto(L, 0);
209 BCPos pc = (BCPos)lj_lib_checkint(L, 2) - 1;
210 if (pc < pt->sizebc) {
211 BCIns ins = pt->bc[pc];
212 BCOp op = bc_op(ins);
213 lua_assert(op < BC__MAX);
214 setintV(L->top, ins);
215 setintV(L->top+1, lj_bc_mode[op]);
216 L->top += 2;
217 return 2;
218 }
219 return 0;
220}
221
222/* local k = jit.util.funck(func, idx) */
223LJLIB_CF(jit_util_funck)
224{
225 GCproto *pt = check_Lproto(L, 0);
226 MSize idx = (MSize)lj_lib_checkint(L, 2);
227 if ((int32_t)idx >= 0) {
228 if (idx < pt->sizekn) {
229 setnumV(L->top-1, pt->k.n[idx]);
230 return 1;
231 }
232 } else {
233 if (~idx < pt->sizekgc) {
234 GCobj *gc = gcref(pt->k.gc[idx]);
235 setgcV(L, L->top-1, &gc->gch, ~gc->gch.gct);
236 return 1;
237 }
238 }
239 return 0;
240}
241
242/* local name = jit.util.funcuvname(func, idx) */
243LJLIB_CF(jit_util_funcuvname)
244{
245 GCproto *pt = check_Lproto(L, 0);
246 uint32_t idx = (uint32_t)lj_lib_checkint(L, 2);
247 if (idx < pt->sizeuvname) {
248 setstrV(L, L->top-1, pt->uvname[idx]);
249 return 1;
250 }
251 return 0;
252}
253
254/* -- Reflection API for traces ------------------------------------------- */
255
256#if LJ_HASJIT
257
258/* Check trace argument. Must not throw for non-existent trace numbers. */
259static Trace *jit_checktrace(lua_State *L)
260{
261 TraceNo tr = (TraceNo)lj_lib_checkint(L, 1);
262 jit_State *J = L2J(L);
263 if (tr > 0 && tr < J->sizetrace)
264 return J->trace[tr];
265 return NULL;
266}
267
268/* local info = jit.util.traceinfo(tr) */
269LJLIB_CF(jit_util_traceinfo)
270{
271 Trace *T = jit_checktrace(L);
272 if (T) {
273 GCtab *t;
274 lua_createtable(L, 0, 4); /* Increment hash size if fields are added. */
275 t = tabV(L->top-1);
276 setintfield(L, t, "nins", (int32_t)T->nins - REF_BIAS - 1);
277 setintfield(L, t, "nk", REF_BIAS - (int32_t)T->nk);
278 setintfield(L, t, "link", T->link);
279 setintfield(L, t, "nexit", T->nsnap);
280 /* There are many more fields. Add them only when needed. */
281 return 1;
282 }
283 return 0;
284}
285
286/* local m, ot, op1, op2, prev = jit.util.traceir(tr, idx) */
287LJLIB_CF(jit_util_traceir)
288{
289 Trace *T = jit_checktrace(L);
290 IRRef ref = (IRRef)lj_lib_checkint(L, 2) + REF_BIAS;
291 if (T && ref >= REF_BIAS && ref < T->nins) {
292 IRIns *ir = &T->ir[ref];
293 int32_t m = lj_ir_mode[ir->o];
294 setintV(L->top-2, m);
295 setintV(L->top-1, ir->ot);
296 setintV(L->top++, (int32_t)ir->op1 - (irm_op1(m)==IRMref ? REF_BIAS : 0));
297 setintV(L->top++, (int32_t)ir->op2 - (irm_op2(m)==IRMref ? REF_BIAS : 0));
298 setintV(L->top++, ir->prev);
299 return 5;
300 }
301 return 0;
302}
303
304/* local k, t [, slot] = jit.util.tracek(tr, idx) */
305LJLIB_CF(jit_util_tracek)
306{
307 Trace *T = jit_checktrace(L);
308 IRRef ref = (IRRef)lj_lib_checkint(L, 2) + REF_BIAS;
309 if (T && ref >= T->nk && ref < REF_BIAS) {
310 IRIns *ir = &T->ir[ref];
311 int32_t slot = -1;
312 if (ir->o == IR_KSLOT) {
313 slot = ir->op2;
314 ir = &T->ir[ir->op1];
315 }
316 lj_ir_kvalue(L, L->top-2, ir);
317 setintV(L->top-1, (int32_t)irt_type(ir->t));
318 if (slot == -1)
319 return 2;
320 setintV(L->top++, slot);
321 return 3;
322 }
323 return 0;
324}
325
326/* local snap = jit.util.tracesnap(tr, sn) */
327LJLIB_CF(jit_util_tracesnap)
328{
329 Trace *T = jit_checktrace(L);
330 SnapNo sn = (SnapNo)lj_lib_checkint(L, 2);
331 if (T && sn < T->nsnap) {
332 SnapShot *snap = &T->snap[sn];
333 IRRef2 *map = &T->snapmap[snap->mapofs];
334 BCReg s, nslots = snap->nslots;
335 GCtab *t;
336 lua_createtable(L, nslots ? (int)nslots : 1, 0);
337 t = tabV(L->top-1);
338 setintV(lj_tab_setint(L, t, 0), (int32_t)snap->ref - REF_BIAS);
339 for (s = 0; s < nslots; s++) {
340 TValue *o = lj_tab_setint(L, t, (int32_t)(s+1));
341 IRRef ref = snap_ref(map[s]);
342 if (ref)
343 setintV(o, (int32_t)ref - REF_BIAS);
344 else
345 setboolV(o, 0);
346 }
347 return 1;
348 }
349 return 0;
350}
351
352/* local mcode, addr, loop = jit.util.tracemc(tr) */
353LJLIB_CF(jit_util_tracemc)
354{
355 Trace *T = jit_checktrace(L);
356 if (T && T->mcode != NULL) {
357 setstrV(L, L->top-1, lj_str_new(L, (const char *)T->mcode, T->szmcode));
358 setnumV(L->top++, cast_num((intptr_t)T->mcode));
359 setintV(L->top++, T->mcloop);
360 return 3;
361 }
362 return 0;
363}
364
365/* local addr = jit.util.traceexitstub(idx) */
366LJLIB_CF(jit_util_traceexitstub)
367{
368 ExitNo exitno = (ExitNo)lj_lib_checkint(L, 1);
369 jit_State *J = L2J(L);
370 if (exitno < EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) {
371 setnumV(L->top-1, cast_num((intptr_t)exitstub_addr(J, exitno)));
372 return 1;
373 }
374 return 0;
375}
376
377#else
378
379static int trace_nojit(lua_State *L)
380{
381 UNUSED(L);
382 return 0;
383}
384#define lj_cf_jit_util_traceinfo trace_nojit
385#define lj_cf_jit_util_traceir trace_nojit
386#define lj_cf_jit_util_tracek trace_nojit
387#define lj_cf_jit_util_tracesnap trace_nojit
388#define lj_cf_jit_util_tracemc trace_nojit
389#define lj_cf_jit_util_traceexitstub trace_nojit
390
391#endif
392
393#include "lj_libdef.h"
394
395/* -- jit.opt module ------------------------------------------------------ */
396
397#define LJLIB_MODULE_jit_opt
398
399#if LJ_HASJIT
400/* Parse optimization level. */
401static int jitopt_level(jit_State *J, const char *str)
402{
403 if (str[0] >= '0' && str[0] <= '9' && str[1] == '\0') {
404 uint32_t flags;
405 if (str[0] == '0') flags = JIT_F_OPT_0;
406 else if (str[0] == '1') flags = JIT_F_OPT_1;
407 else if (str[0] == '2') flags = JIT_F_OPT_2;
408 else flags = JIT_F_OPT_3;
409 J->flags = (J->flags & ~JIT_F_OPT_MASK) | flags;
410 return 1; /* Ok. */
411 }
412 return 0; /* No match. */
413}
414
415/* Parse optimization flag. */
416static int jitopt_flag(jit_State *J, const char *str)
417{
418 const char *lst = JIT_F_OPTSTRING;
419 uint32_t opt;
420 int set = 1;
421 if (str[0] == '+') {
422 str++;
423 } else if (str[0] == '-') {
424 str++;
425 set = 0;
426 } else if (str[0] == 'n' && str[1] == 'o') {
427 str += str[2] == '-' ? 3 : 2;
428 set = 0;
429 }
430 for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) {
431 size_t len = *(const uint8_t *)lst;
432 if (len == 0)
433 break;
434 if (strncmp(str, lst+1, len) == 0 && str[len] == '\0') {
435 if (set) J->flags |= opt; else J->flags &= ~opt;
436 return 1; /* Ok. */
437 }
438 lst += 1+len;
439 }
440 return 0; /* No match. */
441}
442
443/* Forward declaration. */
444static void jit_init_hotcount(jit_State *J);
445
446/* Parse optimization parameter. */
447static int jitopt_param(jit_State *J, const char *str)
448{
449 const char *lst = JIT_P_STRING;
450 int i;
451 for (i = 0; i < JIT_P__MAX; i++) {
452 size_t len = *(const uint8_t *)lst;
453 TValue tv;
454 lua_assert(len != 0);
455 if (strncmp(str, lst+1, len) == 0 && str[len] == '=' &&
456 lj_str_numconv(&str[len+1], &tv)) {
457 J->param[i] = lj_num2int(tv.n);
458 if (i == JIT_P_hotloop)
459 jit_init_hotcount(J);
460 return 1; /* Ok. */
461 }
462 lst += 1+len;
463 }
464 return 0; /* No match. */
465}
466#endif
467
468/* jit.opt.start(flags...) */
469LJLIB_CF(jit_opt_start)
470{
471#if LJ_HASJIT
472 jit_State *J = L2J(L);
473 int nargs = (int)(L->top - L->base);
474 if (nargs == 0) {
475 J->flags = (J->flags & ~JIT_F_OPT_MASK) | JIT_F_OPT_DEFAULT;
476 } else {
477 int i;
478 for (i = 1; i <= nargs; i++) {
479 const char *str = strdata(lj_lib_checkstr(L, i));
480 if (!jitopt_level(J, str) &&
481 !jitopt_flag(J, str) &&
482 !jitopt_param(J, str))
483 lj_err_callerv(L, LJ_ERR_JITOPT, str);
484 }
485 }
486#else
487 lj_err_caller(L, LJ_ERR_NOJIT);
488#endif
489 return 0;
490}
491
492#include "lj_libdef.h"
493
494/* -- JIT compiler initialization ----------------------------------------- */
495
496#if LJ_HASJIT
497/* Default values for JIT parameters. */
498static const int32_t jit_param_default[JIT_P__MAX+1] = {
499#define JIT_PARAMINIT(len, name, value) (value),
500JIT_PARAMDEF(JIT_PARAMINIT)
501#undef JIT_PARAMINIT
502 0
503};
504
505/* Initialize hotcount table. */
506static void jit_init_hotcount(jit_State *J)
507{
508 HotCount start = (HotCount)J->param[JIT_P_hotloop];
509 HotCount *hotcount = J2GG(J)->hotcount;
510 uint32_t i;
511 for (i = 0; i < HOTCOUNT_SIZE; i++)
512 hotcount[i] = start;
513}
514#endif
515
516/* Arch-dependent CPU detection. */
517static uint32_t jit_cpudetect(lua_State *L)
518{
519 uint32_t flags = 0;
520#if LJ_TARGET_X86ORX64
521 uint32_t vendor[4];
522 uint32_t features[4];
523 if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
524#if !LJ_HASJIT
525#define JIT_F_CMOV 1
526#endif
527 flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
528#if LJ_HASJIT
529 flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
530 flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
531 if (vendor[2] == 0x6c65746e) { /* Intel. */
532 if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */
533 flags |= JIT_F_P4; /* Currently unused. */
534 else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
535 flags |= JIT_F_LEA_AGU;
536 } else if (vendor[2] == 0x444d4163) { /* AMD. */
537 uint32_t fam = (features[0] & 0x0ff00f00);
538 if (fam == 0x00000f00) /* K8. */
539 flags |= JIT_F_SPLIT_XMM;
540 if (fam >= 0x00000f00) /* K8, K10. */
541 flags |= JIT_F_PREFER_IMUL;
542 }
543#endif
544 }
545#ifndef LUAJIT_CPU_NOCMOV
546 if (!(flags & JIT_F_CMOV))
547 luaL_error(L, "Ancient CPU lacks CMOV support (recompile with -DLUAJIT_CPU_NOCMOV)");
548#endif
549#if LJ_HASJIT
550 if (!(flags & JIT_F_SSE2))
551 luaL_error(L, "Sorry, SSE2 CPU support required for this beta release");
552#endif
553 UNUSED(L);
554#else
555#error "Missing CPU detection for this architecture"
556#endif
557 return flags;
558}
559
560/* Initialize JIT compiler. */
561static void jit_init(lua_State *L)
562{
563 uint32_t flags = jit_cpudetect(L);
564#if LJ_HASJIT
565 jit_State *J = L2J(L);
566 J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
567 memcpy(J->param, jit_param_default, sizeof(J->param));
568 jit_init_hotcount(J);
569 lj_dispatch_update(G(L));
570#else
571 UNUSED(flags);
572#endif
573}
574
575LUALIB_API int luaopen_jit(lua_State *L)
576{
577 lua_pushliteral(L, LJ_ARCH_NAME);
578 lua_pushinteger(L, LUAJIT_VERSION_NUM);
579 lua_pushliteral(L, LUAJIT_VERSION);
580 LJ_LIB_REG(L, jit);
581#ifndef LUAJIT_DISABLE_JITUTIL
582 LJ_LIB_REG_(L, "jit.util", jit_util);
583#endif
584 LJ_LIB_REG_(L, "jit.opt", jit_opt);
585 L->top -= 2;
586 jit_init(L);
587 return 1;
588}
589
diff --git a/src/lib_math.c b/src/lib_math.c
new file mode 100644
index 00000000..ec8b0c2b
--- /dev/null
+++ b/src/lib_math.c
@@ -0,0 +1,188 @@
1/*
2** Math library.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#include <math.h>
7
8#define lib_math_c
9#define LUA_LIB
10
11#include "lua.h"
12#include "lauxlib.h"
13#include "lualib.h"
14
15#include "lj_obj.h"
16#include "lj_lib.h"
17
18/* ------------------------------------------------------------------------ */
19
20#define LJLIB_MODULE_math
21
22LJLIB_ASM(math_abs) LJLIB_REC(.)
23{
24 lj_lib_checknum(L, 1);
25 return FFH_RETRY;
26}
27LJLIB_ASM_(math_floor) LJLIB_REC(math_round IRFPM_FLOOR)
28LJLIB_ASM_(math_ceil) LJLIB_REC(math_round IRFPM_CEIL)
29LJLIB_ASM_(math_sqrt) LJLIB_REC(math_unary IRFPM_SQRT)
30LJLIB_ASM_(math_log) LJLIB_REC(math_unary IRFPM_LOG)
31LJLIB_ASM_(math_log10) LJLIB_REC(math_unary IRFPM_LOG10)
32LJLIB_ASM_(math_exp) LJLIB_REC(math_unary IRFPM_EXP)
33LJLIB_ASM_(math_sin) LJLIB_REC(math_unary IRFPM_SIN)
34LJLIB_ASM_(math_cos) LJLIB_REC(math_unary IRFPM_COS)
35LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN)
36LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin)
37LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos)
38LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan)
39LJLIB_ASM_(math_sinh)
40LJLIB_ASM_(math_cosh)
41LJLIB_ASM_(math_tanh)
42LJLIB_ASM_(math_frexp)
43LJLIB_ASM_(math_modf) LJLIB_REC(.)
44
45LJLIB_PUSH(57.29577951308232)
46LJLIB_ASM_(math_deg) LJLIB_REC(math_degrad)
47
48LJLIB_PUSH(0.017453292519943295)
49LJLIB_ASM_(math_rad) LJLIB_REC(math_degrad)
50
51LJLIB_ASM(math_atan2) LJLIB_REC(math_binary IR_ATAN2)
52{
53 lj_lib_checknum(L, 1);
54 lj_lib_checknum(L, 2);
55 return FFH_RETRY;
56}
57LJLIB_ASM_(math_ldexp) LJLIB_REC(math_binary IR_LDEXP)
58LJLIB_ASM_(math_pow) LJLIB_REC(.)
59LJLIB_ASM_(math_fmod)
60
61LJLIB_ASM(math_min) LJLIB_REC(math_minmax IR_MIN)
62{
63 int i = 0;
64 do { lj_lib_checknum(L, ++i); } while (L->base+i < L->top);
65 return FFH_RETRY;
66}
67LJLIB_ASM_(math_max) LJLIB_REC(math_minmax IR_MAX)
68
69LJLIB_PUSH(3.14159265358979323846) LJLIB_SET(pi)
70LJLIB_PUSH(1e310) LJLIB_SET(huge)
71
72#ifdef __MACH__
73LJ_FUNCA double lj_wrapper_sinh(double x) { return sinh(x); }
74LJ_FUNCA double lj_wrapper_cosh(double x) { return cosh(x); }
75LJ_FUNCA double lj_wrapper_tanh(double x) { return tanh(x); }
76#endif
77
78/* ------------------------------------------------------------------------ */
79
80/* This implements a Tausworthe PRNG with period 2^223. Based on:
81** Tables of maximally-equidistributed combined LFSR generators,
82** Pierre L'Ecuyer, 1991, table 3, 1st entry.
83** Full-period ME-CF generator with L=64, J=4, k=223, N1=49.
84*/
85
86/* PRNG state. */
87typedef struct TW223State {
88 uint64_t gen[4]; /* State of the 4 LFSR generators. */
89 int valid; /* State is valid. */
90} TW223State;
91
92/* Union needed for bit-pattern conversion between uint64_t and double. */
93typedef union { uint64_t u64; double d; } U64double;
94
95/* Update generator i and compute a running xor of all states. */
96#define TW223_GEN(i, k, q, s) \
97 z = tw->gen[i]; \
98 z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \
99 r ^= z; tw->gen[i] = z;
100
101/* PRNG step function. Returns a double in the range 0.0 <= d < 1.0. */
102static double tw223_step(TW223State *tw)
103{
104 uint64_t z, r = 0;
105 U64double u;
106 TW223_GEN(0, 63, 31, 18)
107 TW223_GEN(1, 58, 19, 28)
108 TW223_GEN(2, 55, 24, 7)
109 TW223_GEN(3, 47, 21, 8)
110 u.u64 = (r & (((uint64_t)1 << 52)-1)) | ((uint64_t)0x3ff << 52);
111#if defined(__GNUC__) && LJ_TARGET_X86 && __pic__
112 /* Compensate for unbelievable GCC pessimization. */
113 {
114 volatile U64double u1;
115 u1.u64 = (uint64_t)0x3f8 << 52;
116 return u.d - u1.d;
117 }
118#else
119 return u.d - 1.0;
120#endif
121}
122
123/* PRNG initialization function. */
124static void tw223_init(TW223State *tw, double d)
125{
126 uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */
127 int i;
128 for (i = 0; i < 4; i++) {
129 U64double u;
130 uint32_t m = 1u << (r&255);
131 r >>= 8;
132 u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354;
133 if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */
134 tw->gen[i] = u.u64;
135 }
136 tw->valid = 1;
137 for (i = 0; i < 10; i++)
138 tw223_step(tw);
139}
140
141/* PRNG extract function. */
142LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */
143LJLIB_CF(math_random)
144{
145 int n = cast_int(L->top - L->base);
146 TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1))));
147 double d;
148 if (LJ_UNLIKELY(!tw->valid)) tw223_init(tw, 0.0);
149 d = tw223_step(tw);
150 if (n > 0) {
151 double r1 = lj_lib_checknum(L, 1);
152 if (n == 1) {
153 d = floor(d*r1) + 1.0; /* d is an int in range [1, r1] */
154 } else {
155 double r2 = lj_lib_checknum(L, 2);
156 d = floor(d*(r2-r1+1.0)) + r1; /* d is an int in range [r1, r2] */
157 }
158 } /* else: d is a double in range [0, 1] */
159 setnumV(L->top++, d);
160 return 1;
161}
162
163/* PRNG seed function. */
164LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */
165LJLIB_CF(math_randomseed)
166{
167 TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1))));
168 tw223_init(tw, lj_lib_checknum(L, 1));
169 return 0;
170}
171
172/* ------------------------------------------------------------------------ */
173
174#include "lj_libdef.h"
175
176LUALIB_API int luaopen_math(lua_State *L)
177{
178 TW223State *tw;
179 tw = (TW223State *)lua_newuserdata(L, sizeof(TW223State));
180 tw->valid = 0; /* Use lazy initialization to save some time on startup. */
181 LJ_LIB_REG(L, math);
182#if defined(LUA_COMPAT_MOD)
183 lua_getfield(L, -1, "fmod");
184 lua_setfield(L, -2, "mod");
185#endif
186 return 1;
187}
188
diff --git a/src/lib_os.c b/src/lib_os.c
new file mode 100644
index 00000000..bee7216a
--- /dev/null
+++ b/src/lib_os.c
@@ -0,0 +1,249 @@
1/*
2** OS library.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Major portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#include <errno.h>
10#include <locale.h>
11#include <time.h>
12
13#define lib_os_c
14#define LUA_LIB
15
16#include "lua.h"
17#include "lauxlib.h"
18#include "lualib.h"
19
20#ifdef LUA_USE_POSIX
21#include <unistd.h>
22#else
23#include <stdio.h>
24#endif
25
26#include "lj_obj.h"
27#include "lj_err.h"
28#include "lj_lib.h"
29
30/* ------------------------------------------------------------------------ */
31
32#define LJLIB_MODULE_os
33
34static int os_pushresult(lua_State *L, int i, const char *filename)
35{
36 int en = errno; /* calls to Lua API may change this value */
37 if (i) {
38 setboolV(L->top-1, 1);
39 return 1;
40 } else {
41 setnilV(L->top-1);
42 lua_pushfstring(L, "%s: %s", filename, strerror(en));
43 lua_pushinteger(L, en);
44 return 3;
45 }
46}
47
48LJLIB_CF(os_execute)
49{
50 lua_pushinteger(L, system(luaL_optstring(L, 1, NULL)));
51 return 1;
52}
53
54LJLIB_CF(os_remove)
55{
56 const char *filename = luaL_checkstring(L, 1);
57 return os_pushresult(L, remove(filename) == 0, filename);
58}
59
60LJLIB_CF(os_rename)
61{
62 const char *fromname = luaL_checkstring(L, 1);
63 const char *toname = luaL_checkstring(L, 2);
64 return os_pushresult(L, rename(fromname, toname) == 0, fromname);
65}
66
67LJLIB_CF(os_tmpname)
68{
69#ifdef LUA_USE_POSIX
70 char buf[15+1];
71 int fp;
72 strcpy(buf, "/tmp/lua_XXXXXX");
73 fp = mkstemp(buf);
74 if (fp != -1)
75 close(fp);
76 else
77 lj_err_caller(L, LJ_ERR_OSUNIQF);
78#else
79 char buf[L_tmpnam];
80 if (tmpnam(buf) == NULL)
81 lj_err_caller(L, LJ_ERR_OSUNIQF);
82#endif
83 lua_pushstring(L, buf);
84 return 1;
85}
86
87LJLIB_CF(os_getenv)
88{
89 lua_pushstring(L, getenv(luaL_checkstring(L, 1))); /* if NULL push nil */
90 return 1;
91}
92
93LJLIB_CF(os_exit)
94{
95 exit(lj_lib_optint(L, 1, EXIT_SUCCESS));
96 return 0; /* to avoid warnings */
97}
98
99LJLIB_CF(os_clock)
100{
101 setnumV(L->top++, ((lua_Number)clock())*(1.0/(lua_Number)CLOCKS_PER_SEC));
102 return 1;
103}
104
105/* ------------------------------------------------------------------------ */
106
107static void setfield(lua_State *L, const char *key, int value)
108{
109 lua_pushinteger(L, value);
110 lua_setfield(L, -2, key);
111}
112
113static void setboolfield(lua_State *L, const char *key, int value)
114{
115 if (value < 0) /* undefined? */
116 return; /* does not set field */
117 lua_pushboolean(L, value);
118 lua_setfield(L, -2, key);
119}
120
121static int getboolfield(lua_State *L, const char *key)
122{
123 int res;
124 lua_getfield(L, -1, key);
125 res = lua_isnil(L, -1) ? -1 : lua_toboolean(L, -1);
126 lua_pop(L, 1);
127 return res;
128}
129
130static int getfield(lua_State *L, const char *key, int d)
131{
132 int res;
133 lua_getfield(L, -1, key);
134 if (lua_isnumber(L, -1)) {
135 res = (int)lua_tointeger(L, -1);
136 } else {
137 if (d < 0)
138 lj_err_callerv(L, LJ_ERR_OSDATEF, key);
139 res = d;
140 }
141 lua_pop(L, 1);
142 return res;
143}
144
145LJLIB_CF(os_date)
146{
147 const char *s = luaL_optstring(L, 1, "%c");
148 time_t t = luaL_opt(L, (time_t)luaL_checknumber, 2, time(NULL));
149 struct tm *stm;
150 if (*s == '!') { /* UTC? */
151 stm = gmtime(&t);
152 s++; /* skip `!' */
153 } else {
154 stm = localtime(&t);
155 }
156 if (stm == NULL) { /* invalid date? */
157 setnilV(L->top-1);
158 } else if (strcmp(s, "*t") == 0) {
159 lua_createtable(L, 0, 9); /* 9 = number of fields */
160 setfield(L, "sec", stm->tm_sec);
161 setfield(L, "min", stm->tm_min);
162 setfield(L, "hour", stm->tm_hour);
163 setfield(L, "day", stm->tm_mday);
164 setfield(L, "month", stm->tm_mon+1);
165 setfield(L, "year", stm->tm_year+1900);
166 setfield(L, "wday", stm->tm_wday+1);
167 setfield(L, "yday", stm->tm_yday+1);
168 setboolfield(L, "isdst", stm->tm_isdst);
169 } else {
170 char cc[3];
171 luaL_Buffer b;
172 cc[0] = '%'; cc[2] = '\0';
173 luaL_buffinit(L, &b);
174 for (; *s; s++) {
175 if (*s != '%' || *(s + 1) == '\0') { /* no conversion specifier? */
176 luaL_addchar(&b, *s);
177 } else {
178 size_t reslen;
179 char buff[200]; /* should be big enough for any conversion result */
180 cc[1] = *(++s);
181 reslen = strftime(buff, sizeof(buff), cc, stm);
182 luaL_addlstring(&b, buff, reslen);
183 }
184 }
185 luaL_pushresult(&b);
186 }
187 return 1;
188}
189
190LJLIB_CF(os_time)
191{
192 time_t t;
193 if (lua_isnoneornil(L, 1)) { /* called without args? */
194 t = time(NULL); /* get current time */
195 } else {
196 struct tm ts;
197 luaL_checktype(L, 1, LUA_TTABLE);
198 lua_settop(L, 1); /* make sure table is at the top */
199 ts.tm_sec = getfield(L, "sec", 0);
200 ts.tm_min = getfield(L, "min", 0);
201 ts.tm_hour = getfield(L, "hour", 12);
202 ts.tm_mday = getfield(L, "day", -1);
203 ts.tm_mon = getfield(L, "month", -1) - 1;
204 ts.tm_year = getfield(L, "year", -1) - 1900;
205 ts.tm_isdst = getboolfield(L, "isdst");
206 t = mktime(&ts);
207 }
208 if (t == (time_t)(-1))
209 lua_pushnil(L);
210 else
211 lua_pushnumber(L, (lua_Number)t);
212 return 1;
213}
214
215LJLIB_CF(os_difftime)
216{
217 lua_pushnumber(L, difftime((time_t)(luaL_checknumber(L, 1)),
218 (time_t)(luaL_optnumber(L, 2, (lua_Number)0))));
219 return 1;
220}
221
222/* ------------------------------------------------------------------------ */
223
224LJLIB_CF(os_setlocale)
225{
226 GCstr *s = lj_lib_optstr(L, 1);
227 const char *str = s ? strdata(s) : NULL;
228 int opt = lj_lib_checkopt(L, 2, 6,
229 "\5ctype\7numeric\4time\7collate\10monetary\1\377\3all");
230 if (opt == 0) opt = LC_CTYPE;
231 else if (opt == 1) opt = LC_NUMERIC;
232 else if (opt == 2) opt = LC_TIME;
233 else if (opt == 3) opt = LC_COLLATE;
234 else if (opt == 4) opt = LC_MONETARY;
235 else if (opt == 6) opt = LC_ALL;
236 lua_pushstring(L, setlocale(opt, str));
237 return 1;
238}
239
240/* ------------------------------------------------------------------------ */
241
242#include "lj_libdef.h"
243
244LUALIB_API int luaopen_os(lua_State *L)
245{
246 LJ_LIB_REG(L, os);
247 return 1;
248}
249
diff --git a/src/lib_package.c b/src/lib_package.c
new file mode 100644
index 00000000..69fa1db9
--- /dev/null
+++ b/src/lib_package.c
@@ -0,0 +1,508 @@
1/*
2** Package library.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Major portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#define lib_package_c
10#define LUA_LIB
11
12#include "lua.h"
13#include "lauxlib.h"
14#include "lualib.h"
15
16#include "lj_obj.h"
17#include "lj_err.h"
18#include "lj_lib.h"
19
20/* ------------------------------------------------------------------------ */
21
22/* Error codes for ll_loadfunc. */
23#define PACKAGE_ERR_LIB 1
24#define PACKAGE_ERR_FUNC 2
25
26/* Redefined in platform specific part. */
27#define PACKAGE_LIB_FAIL "open"
28#define setprogdir(L) ((void)0)
29
30#if defined(LUA_DL_DLOPEN)
31
32#include <dlfcn.h>
33
34static void ll_unloadlib(void *lib)
35{
36 dlclose(lib);
37}
38
39static void *ll_load(lua_State *L, const char *path)
40{
41 void *lib = dlopen(path, RTLD_NOW);
42 if (lib == NULL) lua_pushstring(L, dlerror());
43 return lib;
44}
45
46static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym)
47{
48 lua_CFunction f = (lua_CFunction)dlsym(lib, sym);
49 if (f == NULL) lua_pushstring(L, dlerror());
50 return f;
51}
52
53#elif defined(LUA_DL_DLL)
54
55#define WIN32_LEAN_AND_MEAN
56#include <windows.h>
57
58#undef setprogdir
59
60static void setprogdir(lua_State *L)
61{
62 char buff[MAX_PATH + 1];
63 char *lb;
64 DWORD nsize = sizeof(buff);
65 DWORD n = GetModuleFileNameA(NULL, buff, nsize);
66 if (n == 0 || n == nsize || (lb = strrchr(buff, '\\')) == NULL) {
67 luaL_error(L, "unable to get ModuleFileName");
68 } else {
69 *lb = '\0';
70 luaL_gsub(L, lua_tostring(L, -1), LUA_EXECDIR, buff);
71 lua_remove(L, -2); /* remove original string */
72 }
73}
74
75static void pusherror(lua_State *L)
76{
77 DWORD error = GetLastError();
78 char buffer[128];
79 if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
80 NULL, error, 0, buffer, sizeof(buffer), NULL))
81 lua_pushstring(L, buffer);
82 else
83 lua_pushfstring(L, "system error %d\n", error);
84}
85
86static void ll_unloadlib(void *lib)
87{
88 FreeLibrary((HINSTANCE)lib);
89}
90
91static void *ll_load(lua_State *L, const char *path)
92{
93 HINSTANCE lib = LoadLibraryA(path);
94 if (lib == NULL) pusherror(L);
95 return lib;
96}
97
98static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym)
99{
100 lua_CFunction f = (lua_CFunction)GetProcAddress((HINSTANCE)lib, sym);
101 if (f == NULL) pusherror(L);
102 return f;
103}
104
105#else
106
107#undef PACKAGE_LIB_FAIL
108#define PACKAGE_LIB_FAIL "absent"
109
110#define DLMSG "dynamic libraries not enabled; check your Lua installation"
111
112static void ll_unloadlib(void *lib)
113{
114 (void)lib;
115}
116
117static void *ll_load(lua_State *L, const char *path)
118{
119 (void)path;
120 lua_pushliteral(L, DLMSG);
121 return NULL;
122}
123
124static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym)
125{
126 (void)lib; (void)sym;
127 lua_pushliteral(L, DLMSG);
128 return NULL;
129}
130#endif
131
132/* ------------------------------------------------------------------------ */
133
134static void **ll_register(lua_State *L, const char *path)
135{
136 void **plib;
137 lua_pushfstring(L, "LOADLIB: %s", path);
138 lua_gettable(L, LUA_REGISTRYINDEX); /* check library in registry? */
139 if (!lua_isnil(L, -1)) { /* is there an entry? */
140 plib = (void **)lua_touserdata(L, -1);
141 } else { /* no entry yet; create one */
142 lua_pop(L, 1);
143 plib = (void **)lua_newuserdata(L, sizeof(void *));
144 *plib = NULL;
145 luaL_getmetatable(L, "_LOADLIB");
146 lua_setmetatable(L, -2);
147 lua_pushfstring(L, "LOADLIB: %s", path);
148 lua_pushvalue(L, -2);
149 lua_settable(L, LUA_REGISTRYINDEX);
150 }
151 return plib;
152}
153
154static int ll_loadfunc(lua_State *L, const char *path, const char *sym)
155{
156 void **reg = ll_register(L, path);
157 if (*reg == NULL) *reg = ll_load(L, path);
158 if (*reg == NULL) {
159 return PACKAGE_ERR_LIB; /* unable to load library */
160 } else {
161 lua_CFunction f = ll_sym(L, *reg, sym);
162 if (f == NULL)
163 return PACKAGE_ERR_FUNC; /* unable to find function */
164 lua_pushcfunction(L, f);
165 return 0; /* return function */
166 }
167}
168
169static int lj_cf_package_loadlib(lua_State *L)
170{
171 const char *path = luaL_checkstring(L, 1);
172 const char *init = luaL_checkstring(L, 2);
173 int stat = ll_loadfunc(L, path, init);
174 if (stat == 0) { /* no errors? */
175 return 1; /* return the loaded function */
176 } else { /* error; error message is on stack top */
177 lua_pushnil(L);
178 lua_insert(L, -2);
179 lua_pushstring(L, (stat == PACKAGE_ERR_LIB) ? PACKAGE_LIB_FAIL : "init");
180 return 3; /* return nil, error message, and where */
181 }
182}
183
184static int lj_cf_package_unloadlib(lua_State *L)
185{
186 void **lib = (void **)luaL_checkudata(L, 1, "_LOADLIB");
187 if (*lib) ll_unloadlib(*lib);
188 *lib = NULL; /* mark library as closed */
189 return 0;
190}
191
192/* ------------------------------------------------------------------------ */
193
194static int readable(const char *filename)
195{
196 FILE *f = fopen(filename, "r"); /* try to open file */
197 if (f == NULL) return 0; /* open failed */
198 fclose(f);
199 return 1;
200}
201
202static const char *pushnexttemplate(lua_State *L, const char *path)
203{
204 const char *l;
205 while (*path == *LUA_PATHSEP) path++; /* skip separators */
206 if (*path == '\0') return NULL; /* no more templates */
207 l = strchr(path, *LUA_PATHSEP); /* find next separator */
208 if (l == NULL) l = path + strlen(path);
209 lua_pushlstring(L, path, (size_t)(l - path)); /* template */
210 return l;
211}
212
213static const char *findfile(lua_State *L, const char *name,
214 const char *pname)
215{
216 const char *path;
217 name = luaL_gsub(L, name, ".", LUA_DIRSEP);
218 lua_getfield(L, LUA_ENVIRONINDEX, pname);
219 path = lua_tostring(L, -1);
220 if (path == NULL)
221 luaL_error(L, LUA_QL("package.%s") " must be a string", pname);
222 lua_pushliteral(L, ""); /* error accumulator */
223 while ((path = pushnexttemplate(L, path)) != NULL) {
224 const char *filename;
225 filename = luaL_gsub(L, lua_tostring(L, -1), LUA_PATH_MARK, name);
226 lua_remove(L, -2); /* remove path template */
227 if (readable(filename)) /* does file exist and is readable? */
228 return filename; /* return that file name */
229 lua_pushfstring(L, "\n\tno file " LUA_QS, filename);
230 lua_remove(L, -2); /* remove file name */
231 lua_concat(L, 2); /* add entry to possible error message */
232 }
233 return NULL; /* not found */
234}
235
236static void loaderror(lua_State *L, const char *filename)
237{
238 luaL_error(L, "error loading module " LUA_QS " from file " LUA_QS ":\n\t%s",
239 lua_tostring(L, 1), filename, lua_tostring(L, -1));
240}
241
242static int lj_cf_package_loader_lua(lua_State *L)
243{
244 const char *filename;
245 const char *name = luaL_checkstring(L, 1);
246 filename = findfile(L, name, "path");
247 if (filename == NULL) return 1; /* library not found in this path */
248 if (luaL_loadfile(L, filename) != 0)
249 loaderror(L, filename);
250 return 1; /* library loaded successfully */
251}
252
253static const char *mkfuncname(lua_State *L, const char *modname)
254{
255 const char *funcname;
256 const char *mark = strchr(modname, *LUA_IGMARK);
257 if (mark) modname = mark + 1;
258 funcname = luaL_gsub(L, modname, ".", "_");
259 funcname = lua_pushfstring(L, "luaopen_%s", funcname);
260 lua_remove(L, -2); /* remove 'gsub' result */
261 return funcname;
262}
263
264static int lj_cf_package_loader_c(lua_State *L)
265{
266 const char *funcname;
267 const char *name = luaL_checkstring(L, 1);
268 const char *filename = findfile(L, name, "cpath");
269 if (filename == NULL) return 1; /* library not found in this path */
270 funcname = mkfuncname(L, name);
271 if (ll_loadfunc(L, filename, funcname) != 0)
272 loaderror(L, filename);
273 return 1; /* library loaded successfully */
274}
275
276static int lj_cf_package_loader_croot(lua_State *L)
277{
278 const char *funcname;
279 const char *filename;
280 const char *name = luaL_checkstring(L, 1);
281 const char *p = strchr(name, '.');
282 int stat;
283 if (p == NULL) return 0; /* is root */
284 lua_pushlstring(L, name, (size_t)(p - name));
285 filename = findfile(L, lua_tostring(L, -1), "cpath");
286 if (filename == NULL) return 1; /* root not found */
287 funcname = mkfuncname(L, name);
288 if ((stat = ll_loadfunc(L, filename, funcname)) != 0) {
289 if (stat != PACKAGE_ERR_FUNC) loaderror(L, filename); /* real error */
290 lua_pushfstring(L, "\n\tno module " LUA_QS " in file " LUA_QS,
291 name, filename);
292 return 1; /* function not found */
293 }
294 return 1;
295}
296
297static int lj_cf_package_loader_preload(lua_State *L)
298{
299 const char *name = luaL_checkstring(L, 1);
300 lua_getfield(L, LUA_ENVIRONINDEX, "preload");
301 if (!lua_istable(L, -1))
302 luaL_error(L, LUA_QL("package.preload") " must be a table");
303 lua_getfield(L, -1, name);
304 if (lua_isnil(L, -1)) /* not found? */
305 lua_pushfstring(L, "\n\tno field package.preload['%s']", name);
306 return 1;
307}
308
309/* ------------------------------------------------------------------------ */
310
311static const int sentinel_ = 0;
312#define sentinel ((void *)&sentinel_)
313
314static int lj_cf_package_require(lua_State *L)
315{
316 const char *name = luaL_checkstring(L, 1);
317 int i;
318 lua_settop(L, 1); /* _LOADED table will be at index 2 */
319 lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED");
320 lua_getfield(L, 2, name);
321 if (lua_toboolean(L, -1)) { /* is it there? */
322 if (lua_touserdata(L, -1) == sentinel) /* check loops */
323 luaL_error(L, "loop or previous error loading module " LUA_QS, name);
324 return 1; /* package is already loaded */
325 }
326 /* else must load it; iterate over available loaders */
327 lua_getfield(L, LUA_ENVIRONINDEX, "loaders");
328 if (!lua_istable(L, -1))
329 luaL_error(L, LUA_QL("package.loaders") " must be a table");
330 lua_pushliteral(L, ""); /* error message accumulator */
331 for (i = 1; ; i++) {
332 lua_rawgeti(L, -2, i); /* get a loader */
333 if (lua_isnil(L, -1))
334 luaL_error(L, "module " LUA_QS " not found:%s",
335 name, lua_tostring(L, -2));
336 lua_pushstring(L, name);
337 lua_call(L, 1, 1); /* call it */
338 if (lua_isfunction(L, -1)) /* did it find module? */
339 break; /* module loaded successfully */
340 else if (lua_isstring(L, -1)) /* loader returned error message? */
341 lua_concat(L, 2); /* accumulate it */
342 else
343 lua_pop(L, 1);
344 }
345 lua_pushlightuserdata(L, sentinel);
346 lua_setfield(L, 2, name); /* _LOADED[name] = sentinel */
347 lua_pushstring(L, name); /* pass name as argument to module */
348 lua_call(L, 1, 1); /* run loaded module */
349 if (!lua_isnil(L, -1)) /* non-nil return? */
350 lua_setfield(L, 2, name); /* _LOADED[name] = returned value */
351 lua_getfield(L, 2, name);
352 if (lua_touserdata(L, -1) == sentinel) { /* module did not set a value? */
353 lua_pushboolean(L, 1); /* use true as result */
354 lua_pushvalue(L, -1); /* extra copy to be returned */
355 lua_setfield(L, 2, name); /* _LOADED[name] = true */
356 }
357 return 1;
358}
359
360/* ------------------------------------------------------------------------ */
361
362static void setfenv(lua_State *L)
363{
364 lua_Debug ar;
365 if (lua_getstack(L, 1, &ar) == 0 ||
366 lua_getinfo(L, "f", &ar) == 0 || /* get calling function */
367 lua_iscfunction(L, -1))
368 luaL_error(L, LUA_QL("module") " not called from a Lua function");
369 lua_pushvalue(L, -2);
370 lua_setfenv(L, -2);
371 lua_pop(L, 1);
372}
373
374static void dooptions(lua_State *L, int n)
375{
376 int i;
377 for (i = 2; i <= n; i++) {
378 lua_pushvalue(L, i); /* get option (a function) */
379 lua_pushvalue(L, -2); /* module */
380 lua_call(L, 1, 0);
381 }
382}
383
384static void modinit(lua_State *L, const char *modname)
385{
386 const char *dot;
387 lua_pushvalue(L, -1);
388 lua_setfield(L, -2, "_M"); /* module._M = module */
389 lua_pushstring(L, modname);
390 lua_setfield(L, -2, "_NAME");
391 dot = strrchr(modname, '.'); /* look for last dot in module name */
392 if (dot == NULL) dot = modname; else dot++;
393 /* set _PACKAGE as package name (full module name minus last part) */
394 lua_pushlstring(L, modname, (size_t)(dot - modname));
395 lua_setfield(L, -2, "_PACKAGE");
396}
397
398static int lj_cf_package_module(lua_State *L)
399{
400 const char *modname = luaL_checkstring(L, 1);
401 int loaded = lua_gettop(L) + 1; /* index of _LOADED table */
402 lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED");
403 lua_getfield(L, loaded, modname); /* get _LOADED[modname] */
404 if (!lua_istable(L, -1)) { /* not found? */
405 lua_pop(L, 1); /* remove previous result */
406 /* try global variable (and create one if it does not exist) */
407 if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, 1) != NULL)
408 lj_err_callerv(L, LJ_ERR_BADMODN, modname);
409 lua_pushvalue(L, -1);
410 lua_setfield(L, loaded, modname); /* _LOADED[modname] = new table */
411 }
412 /* check whether table already has a _NAME field */
413 lua_getfield(L, -1, "_NAME");
414 if (!lua_isnil(L, -1)) { /* is table an initialized module? */
415 lua_pop(L, 1);
416 } else { /* no; initialize it */
417 lua_pop(L, 1);
418 modinit(L, modname);
419 }
420 lua_pushvalue(L, -1);
421 setfenv(L);
422 dooptions(L, loaded - 1);
423 return 0;
424}
425
426static int lj_cf_package_seeall(lua_State *L)
427{
428 luaL_checktype(L, 1, LUA_TTABLE);
429 if (!lua_getmetatable(L, 1)) {
430 lua_createtable(L, 0, 1); /* create new metatable */
431 lua_pushvalue(L, -1);
432 lua_setmetatable(L, 1);
433 }
434 lua_pushvalue(L, LUA_GLOBALSINDEX);
435 lua_setfield(L, -2, "__index"); /* mt.__index = _G */
436 return 0;
437}
438
439/* ------------------------------------------------------------------------ */
440
441#define AUXMARK "\1"
442
443static void setpath(lua_State *L, const char *fieldname, const char *envname,
444 const char *def)
445{
446 const char *path = getenv(envname);
447 if (path == NULL) {
448 lua_pushstring(L, def);
449 } else {
450 path = luaL_gsub(L, path, LUA_PATHSEP LUA_PATHSEP,
451 LUA_PATHSEP AUXMARK LUA_PATHSEP);
452 luaL_gsub(L, path, AUXMARK, def);
453 lua_remove(L, -2);
454 }
455 setprogdir(L);
456 lua_setfield(L, -2, fieldname);
457}
458
459static const luaL_Reg package_lib[] = {
460 { "loadlib", lj_cf_package_loadlib },
461 { "seeall", lj_cf_package_seeall },
462 { NULL, NULL }
463};
464
465static const luaL_Reg package_global[] = {
466 { "module", lj_cf_package_module },
467 { "require", lj_cf_package_require },
468 { NULL, NULL }
469};
470
471static const lua_CFunction package_loaders[] =
472{
473 lj_cf_package_loader_preload,
474 lj_cf_package_loader_lua,
475 lj_cf_package_loader_c,
476 lj_cf_package_loader_croot,
477 NULL
478};
479
480LUALIB_API int luaopen_package(lua_State *L)
481{
482 int i;
483 luaL_newmetatable(L, "_LOADLIB");
484 lua_pushcfunction(L, lj_cf_package_unloadlib);
485 lua_setfield(L, -2, "__gc");
486 luaL_register(L, LUA_LOADLIBNAME, package_lib);
487 lua_pushvalue(L, -1);
488 lua_replace(L, LUA_ENVIRONINDEX);
489 lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0);
490 for (i = 0; package_loaders[i] != NULL; i++) {
491 lua_pushcfunction(L, package_loaders[i]);
492 lua_rawseti(L, -2, i+1);
493 }
494 lua_setfield(L, -2, "loaders");
495 setpath(L, "path", LUA_PATH, LUA_PATH_DEFAULT);
496 setpath(L, "cpath", LUA_CPATH, LUA_CPATH_DEFAULT);
497 lua_pushliteral(L, LUA_PATH_CONFIG);
498 lua_setfield(L, -2, "config");
499 luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16);
500 lua_setfield(L, -2, "loaded");
501 lua_newtable(L);
502 lua_setfield(L, -2, "preload");
503 lua_pushvalue(L, LUA_GLOBALSINDEX);
504 luaL_register(L, NULL, package_global);
505 lua_pop(L, 1);
506 return 1;
507}
508
diff --git a/src/lib_string.c b/src/lib_string.c
new file mode 100644
index 00000000..fdd7fbcb
--- /dev/null
+++ b/src/lib_string.c
@@ -0,0 +1,790 @@
1/*
2** String library.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Major portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#include <stdio.h>
10
11#define lib_string_c
12#define LUA_LIB
13
14#include "lua.h"
15#include "lauxlib.h"
16#include "lualib.h"
17
18#include "lj_obj.h"
19#include "lj_err.h"
20#include "lj_str.h"
21#include "lj_tab.h"
22#include "lj_state.h"
23#include "lj_ff.h"
24#include "lj_ctype.h"
25#include "lj_lib.h"
26
27/* ------------------------------------------------------------------------ */
28
29#define LJLIB_MODULE_string
30
31LJLIB_ASM(string_len) LJLIB_REC(.)
32{
33 lj_lib_checkstr(L, 1);
34 return FFH_RETRY;
35}
36
37LJLIB_ASM(string_byte) LJLIB_REC(string_range 0)
38{
39 GCstr *s = lj_lib_checkstr(L, 1);
40 int32_t len = (int32_t)s->len;
41 int32_t start = lj_lib_optint(L, 2, 1);
42 int32_t stop = lj_lib_optint(L, 3, start);
43 int32_t n, i;
44 const unsigned char *p;
45 if (stop < 0) stop += len+1;
46 if (start < 0) start += len+1;
47 if (start <= 0) start = 1;
48 if (stop > len) stop = len;
49 if (start > stop) return FFH_RES(0); /* Empty interval: return no results. */
50 start--;
51 n = stop - start;
52 if ((uint32_t)n > LUAI_MAXCSTACK)
53 lj_err_caller(L, LJ_ERR_STRSLC);
54 lj_state_checkstack(L, (MSize)n);
55 p = (const unsigned char *)strdata(s) + start;
56 for (i = 0; i < n; i++)
57 setintV(L->base + i-1, p[i]);
58 return FFH_RES(n);
59}
60
61LJLIB_ASM(string_char)
62{
63 int i, nargs = cast_int(L->top - L->base);
64 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (size_t)nargs);
65 for (i = 1; i <= nargs; i++) {
66 int32_t k = lj_lib_checkint(L, i);
67 if (!checku8(k))
68 lj_err_arg(L, i, LJ_ERR_BADVAL);
69 buf[i-1] = (char)k;
70 }
71 setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)nargs));
72 return FFH_RES(1);
73}
74
75LJLIB_ASM(string_sub) LJLIB_REC(string_range 1)
76{
77 lj_lib_checkstr(L, 1);
78 lj_lib_checkint(L, 2);
79 setintV(L->base+2, lj_lib_optint(L, 3, -1));
80 return FFH_RETRY;
81}
82
83LJLIB_ASM(string_rep)
84{
85 GCstr *s = lj_lib_checkstr(L, 1);
86 int32_t len = (int32_t)s->len;
87 int32_t k = lj_lib_checkint(L, 2);
88 int64_t tlen = (int64_t)k * len;
89 const char *src;
90 char *buf;
91 if (k <= 0) return FFH_RETRY;
92 if (tlen > LJ_MAX_STR)
93 lj_err_caller(L, LJ_ERR_STROV);
94 buf = lj_str_needbuf(L, &G(L)->tmpbuf, (MSize)tlen);
95 if (len <= 1) return FFH_RETRY; /* ASM code only needed buffer resize. */
96 src = strdata(s);
97 do {
98 int32_t i = 0;
99 do { *buf++ = src[i++]; } while (i < len);
100 } while (--k > 0);
101 setstrV(L, L->base-1, lj_str_new(L, G(L)->tmpbuf.buf, (size_t)tlen));
102 return FFH_RES(1);
103}
104
105LJLIB_ASM(string_reverse)
106{
107 GCstr *s = lj_lib_checkstr(L, 1);
108 lj_str_needbuf(L, &G(L)->tmpbuf, s->len);
109 return FFH_RETRY;
110}
111LJLIB_ASM_(string_lower)
112LJLIB_ASM_(string_upper)
113
114/* ------------------------------------------------------------------------ */
115
116LJLIB_CF(string_dump)
117{
118 lj_err_caller(L, LJ_ERR_STRDUMP);
119 return 0; /* unreachable */
120}
121
122/* ------------------------------------------------------------------------ */
123
124/* macro to `unsign' a character */
125#define uchar(c) ((unsigned char)(c))
126
127#define CAP_UNFINISHED (-1)
128#define CAP_POSITION (-2)
129
130typedef struct MatchState {
131 const char *src_init; /* init of source string */
132 const char *src_end; /* end (`\0') of source string */
133 lua_State *L;
134 int level; /* total number of captures (finished or unfinished) */
135 struct {
136 const char *init;
137 ptrdiff_t len;
138 } capture[LUA_MAXCAPTURES];
139} MatchState;
140
141#define L_ESC '%'
142#define SPECIALS "^$*+?.([%-"
143
144static int check_capture(MatchState *ms, int l)
145{
146 l -= '1';
147 if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED)
148 lj_err_caller(ms->L, LJ_ERR_STRCAPI);
149 return l;
150}
151
152static int capture_to_close(MatchState *ms)
153{
154 int level = ms->level;
155 for (level--; level>=0; level--)
156 if (ms->capture[level].len == CAP_UNFINISHED) return level;
157 lj_err_caller(ms->L, LJ_ERR_STRPATC);
158 return 0; /* unreachable */
159}
160
161static const char *classend(MatchState *ms, const char *p)
162{
163 switch (*p++) {
164 case L_ESC:
165 if (*p == '\0')
166 lj_err_caller(ms->L, LJ_ERR_STRPATE);
167 return p+1;
168 case '[':
169 if (*p == '^') p++;
170 do { /* look for a `]' */
171 if (*p == '\0')
172 lj_err_caller(ms->L, LJ_ERR_STRPATM);
173 if (*(p++) == L_ESC && *p != '\0')
174 p++; /* skip escapes (e.g. `%]') */
175 } while (*p != ']');
176 return p+1;
177 default:
178 return p;
179 }
180}
181
182static const unsigned char match_class_map[32] = {
183 0, LJ_CTYPE_ALPHA, 0, LJ_CTYPE_CNTRL, LJ_CTYPE_DIGIT, 0,0,0,0,0,0,0,
184 LJ_CTYPE_LOWER, 0,0,0, LJ_CTYPE_PUNCT, 0,0, LJ_CTYPE_SPACE, 0,
185 LJ_CTYPE_UPPER, 0, LJ_CTYPE_ALNUM, LJ_CTYPE_XDIGIT, 0,0,0,0,0,0,0
186};
187
188static int match_class(int c, int cl)
189{
190 if ((cl & 0xc0) == 0x40) {
191 int t = match_class_map[(cl&0x1f)];
192 if (t) {
193 t = lj_ctype_isa(c, t);
194 return (cl & 0x20) ? t : !t;
195 }
196 if (cl == 'z') return c == 0;
197 if (cl == 'Z') return c != 0;
198 }
199 return (cl == c);
200}
201
202static int matchbracketclass(int c, const char *p, const char *ec)
203{
204 int sig = 1;
205 if (*(p+1) == '^') {
206 sig = 0;
207 p++; /* skip the `^' */
208 }
209 while (++p < ec) {
210 if (*p == L_ESC) {
211 p++;
212 if (match_class(c, uchar(*p)))
213 return sig;
214 }
215 else if ((*(p+1) == '-') && (p+2 < ec)) {
216 p+=2;
217 if (uchar(*(p-2)) <= c && c <= uchar(*p))
218 return sig;
219 }
220 else if (uchar(*p) == c) return sig;
221 }
222 return !sig;
223}
224
225static int singlematch(int c, const char *p, const char *ep)
226{
227 switch (*p) {
228 case '.': return 1; /* matches any char */
229 case L_ESC: return match_class(c, uchar(*(p+1)));
230 case '[': return matchbracketclass(c, p, ep-1);
231 default: return (uchar(*p) == c);
232 }
233}
234
235static const char *match(MatchState *ms, const char *s, const char *p);
236
237static const char *matchbalance(MatchState *ms, const char *s, const char *p)
238{
239 if (*p == 0 || *(p+1) == 0)
240 lj_err_caller(ms->L, LJ_ERR_STRPATU);
241 if (*s != *p) {
242 return NULL;
243 } else {
244 int b = *p;
245 int e = *(p+1);
246 int cont = 1;
247 while (++s < ms->src_end) {
248 if (*s == e) {
249 if (--cont == 0) return s+1;
250 } else if (*s == b) {
251 cont++;
252 }
253 }
254 }
255 return NULL; /* string ends out of balance */
256}
257
258static const char *max_expand(MatchState *ms, const char *s,
259 const char *p, const char *ep)
260{
261 ptrdiff_t i = 0; /* counts maximum expand for item */
262 while ((s+i)<ms->src_end && singlematch(uchar(*(s+i)), p, ep))
263 i++;
264 /* keeps trying to match with the maximum repetitions */
265 while (i>=0) {
266 const char *res = match(ms, (s+i), ep+1);
267 if (res) return res;
268 i--; /* else didn't match; reduce 1 repetition to try again */
269 }
270 return NULL;
271}
272
273static const char *min_expand(MatchState *ms, const char *s,
274 const char *p, const char *ep)
275{
276 for (;;) {
277 const char *res = match(ms, s, ep+1);
278 if (res != NULL)
279 return res;
280 else if (s<ms->src_end && singlematch(uchar(*s), p, ep))
281 s++; /* try with one more repetition */
282 else
283 return NULL;
284 }
285}
286
287static const char *start_capture(MatchState *ms, const char *s,
288 const char *p, int what)
289{
290 const char *res;
291 int level = ms->level;
292 if (level >= LUA_MAXCAPTURES) lj_err_caller(ms->L, LJ_ERR_STRCAPN);
293 ms->capture[level].init = s;
294 ms->capture[level].len = what;
295 ms->level = level+1;
296 if ((res=match(ms, s, p)) == NULL) /* match failed? */
297 ms->level--; /* undo capture */
298 return res;
299}
300
301static const char *end_capture(MatchState *ms, const char *s,
302 const char *p)
303{
304 int l = capture_to_close(ms);
305 const char *res;
306 ms->capture[l].len = s - ms->capture[l].init; /* close capture */
307 if ((res = match(ms, s, p)) == NULL) /* match failed? */
308 ms->capture[l].len = CAP_UNFINISHED; /* undo capture */
309 return res;
310}
311
312static const char *match_capture(MatchState *ms, const char *s, int l)
313{
314 size_t len;
315 l = check_capture(ms, l);
316 len = (size_t)ms->capture[l].len;
317 if ((size_t)(ms->src_end-s) >= len &&
318 memcmp(ms->capture[l].init, s, len) == 0)
319 return s+len;
320 else
321 return NULL;
322}
323
324static const char *match(MatchState *ms, const char *s, const char *p)
325{
326 init: /* using goto's to optimize tail recursion */
327 switch (*p) {
328 case '(': /* start capture */
329 if (*(p+1) == ')') /* position capture? */
330 return start_capture(ms, s, p+2, CAP_POSITION);
331 else
332 return start_capture(ms, s, p+1, CAP_UNFINISHED);
333 case ')': /* end capture */
334 return end_capture(ms, s, p+1);
335 case L_ESC:
336 switch (*(p+1)) {
337 case 'b': /* balanced string? */
338 s = matchbalance(ms, s, p+2);
339 if (s == NULL) return NULL;
340 p+=4;
341 goto init; /* else return match(ms, s, p+4); */
342 case 'f': { /* frontier? */
343 const char *ep; char previous;
344 p += 2;
345 if (*p != '[')
346 lj_err_caller(ms->L, LJ_ERR_STRPATB);
347 ep = classend(ms, p); /* points to what is next */
348 previous = (s == ms->src_init) ? '\0' : *(s-1);
349 if (matchbracketclass(uchar(previous), p, ep-1) ||
350 !matchbracketclass(uchar(*s), p, ep-1)) return NULL;
351 p=ep;
352 goto init; /* else return match(ms, s, ep); */
353 }
354 default:
355 if (lj_ctype_isdigit(uchar(*(p+1)))) { /* capture results (%0-%9)? */
356 s = match_capture(ms, s, uchar(*(p+1)));
357 if (s == NULL) return NULL;
358 p+=2;
359 goto init; /* else return match(ms, s, p+2) */
360 }
361 goto dflt; /* case default */
362 }
363 case '\0': /* end of pattern */
364 return s; /* match succeeded */
365 case '$':
366 if (*(p+1) == '\0') /* is the `$' the last char in pattern? */
367 return (s == ms->src_end) ? s : NULL; /* check end of string */
368 else
369 goto dflt;
370 default: dflt: { /* it is a pattern item */
371 const char *ep = classend(ms, p); /* points to what is next */
372 int m = s<ms->src_end && singlematch(uchar(*s), p, ep);
373 switch (*ep) {
374 case '?': { /* optional */
375 const char *res;
376 if (m && ((res=match(ms, s+1, ep+1)) != NULL))
377 return res;
378 p=ep+1;
379 goto init; /* else return match(ms, s, ep+1); */
380 }
381 case '*': /* 0 or more repetitions */
382 return max_expand(ms, s, p, ep);
383 case '+': /* 1 or more repetitions */
384 return (m ? max_expand(ms, s+1, p, ep) : NULL);
385 case '-': /* 0 or more repetitions (minimum) */
386 return min_expand(ms, s, p, ep);
387 default:
388 if (!m) return NULL;
389 s++; p=ep;
390 goto init; /* else return match(ms, s+1, ep); */
391 }
392 }
393 }
394}
395
396static const char *lmemfind(const char *s1, size_t l1,
397 const char *s2, size_t l2)
398{
399 if (l2 == 0) {
400 return s1; /* empty strings are everywhere */
401 } else if (l2 > l1) {
402 return NULL; /* avoids a negative `l1' */
403 } else {
404 const char *init; /* to search for a `*s2' inside `s1' */
405 l2--; /* 1st char will be checked by `memchr' */
406 l1 = l1-l2; /* `s2' cannot be found after that */
407 while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
408 init++; /* 1st char is already checked */
409 if (memcmp(init, s2+1, l2) == 0) {
410 return init-1;
411 } else { /* correct `l1' and `s1' to try again */
412 l1 -= (size_t)(init-s1);
413 s1 = init;
414 }
415 }
416 return NULL; /* not found */
417 }
418}
419
420static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
421{
422 if (i >= ms->level) {
423 if (i == 0) /* ms->level == 0, too */
424 lua_pushlstring(ms->L, s, (size_t)(e - s)); /* add whole match */
425 else
426 lj_err_caller(ms->L, LJ_ERR_STRCAPI);
427 } else {
428 ptrdiff_t l = ms->capture[i].len;
429 if (l == CAP_UNFINISHED) lj_err_caller(ms->L, LJ_ERR_STRCAPU);
430 if (l == CAP_POSITION)
431 lua_pushinteger(ms->L, ms->capture[i].init - ms->src_init + 1);
432 else
433 lua_pushlstring(ms->L, ms->capture[i].init, (size_t)l);
434 }
435}
436
437static int push_captures(MatchState *ms, const char *s, const char *e)
438{
439 int i;
440 int nlevels = (ms->level == 0 && s) ? 1 : ms->level;
441 luaL_checkstack(ms->L, nlevels, "too many captures");
442 for (i = 0; i < nlevels; i++)
443 push_onecapture(ms, i, s, e);
444 return nlevels; /* number of strings pushed */
445}
446
447static ptrdiff_t posrelat(ptrdiff_t pos, size_t len)
448{
449 /* relative string position: negative means back from end */
450 if (pos < 0) pos += (ptrdiff_t)len + 1;
451 return (pos >= 0) ? pos : 0;
452}
453
454static int str_find_aux(lua_State *L, int find)
455{
456 size_t l1, l2;
457 const char *s = luaL_checklstring(L, 1, &l1);
458 const char *p = luaL_checklstring(L, 2, &l2);
459 ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1;
460 if (init < 0)
461 init = 0;
462 else if ((size_t)(init) > l1)
463 init = (ptrdiff_t)l1;
464 if (find && (lua_toboolean(L, 4) || /* explicit request? */
465 strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */
466 /* do a plain search */
467 const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2);
468 if (s2) {
469 lua_pushinteger(L, s2-s+1);
470 lua_pushinteger(L, s2-s+(ptrdiff_t)l2);
471 return 2;
472 }
473 } else {
474 MatchState ms;
475 int anchor = (*p == '^') ? (p++, 1) : 0;
476 const char *s1=s+init;
477 ms.L = L;
478 ms.src_init = s;
479 ms.src_end = s+l1;
480 do {
481 const char *res;
482 ms.level = 0;
483 if ((res=match(&ms, s1, p)) != NULL) {
484 if (find) {
485 lua_pushinteger(L, s1-s+1); /* start */
486 lua_pushinteger(L, res-s); /* end */
487 return push_captures(&ms, NULL, 0) + 2;
488 } else {
489 return push_captures(&ms, s1, res);
490 }
491 }
492 } while (s1++ < ms.src_end && !anchor);
493 }
494 lua_pushnil(L); /* not found */
495 return 1;
496}
497
498LJLIB_CF(string_find)
499{
500 return str_find_aux(L, 1);
501}
502
503LJLIB_CF(string_match)
504{
505 return str_find_aux(L, 0);
506}
507
508LJLIB_NOREG LJLIB_CF(string_gmatch_aux)
509{
510 const char *p = strVdata(lj_lib_upvalue(L, 2));
511 GCstr *str = strV(lj_lib_upvalue(L, 1));
512 const char *s = strdata(str);
513 TValue *tvpos = lj_lib_upvalue(L, 3);
514 const char *src = s + tvpos->u32.lo;
515 MatchState ms;
516 ms.L = L;
517 ms.src_init = s;
518 ms.src_end = s + str->len;
519 for (; src <= ms.src_end; src++) {
520 const char *e;
521 ms.level = 0;
522 if ((e = match(&ms, src, p)) != NULL) {
523 int32_t pos = (int32_t)(e - s);
524 if (e == src) pos++; /* Ensure progress for empty match. */
525 tvpos->u32.lo = (uint32_t)pos;
526 return push_captures(&ms, src, e);
527 }
528 }
529 return 0; /* not found */
530}
531
532LJLIB_CF(string_gmatch)
533{
534 lj_lib_checkstr(L, 1);
535 lj_lib_checkstr(L, 2);
536 L->top = L->base+3;
537 (L->top-1)->u64 = 0;
538 lua_pushcclosure(L, lj_cf_string_gmatch_aux, 3);
539 funcV(L->top-1)->c.ffid = FF_string_gmatch_aux;
540 return 1;
541}
542
543static void add_s(MatchState *ms, luaL_Buffer *b, const char *s, const char *e)
544{
545 size_t l, i;
546 const char *news = lua_tolstring(ms->L, 3, &l);
547 for (i = 0; i < l; i++) {
548 if (news[i] != L_ESC) {
549 luaL_addchar(b, news[i]);
550 } else {
551 i++; /* skip ESC */
552 if (!lj_ctype_isdigit(uchar(news[i]))) {
553 luaL_addchar(b, news[i]);
554 } else if (news[i] == '0') {
555 luaL_addlstring(b, s, (size_t)(e - s));
556 } else {
557 push_onecapture(ms, news[i] - '1', s, e);
558 luaL_addvalue(b); /* add capture to accumulated result */
559 }
560 }
561 }
562}
563
564static void add_value(MatchState *ms, luaL_Buffer *b,
565 const char *s, const char *e)
566{
567 lua_State *L = ms->L;
568 switch (lua_type(L, 3)) {
569 case LUA_TNUMBER:
570 case LUA_TSTRING: {
571 add_s(ms, b, s, e);
572 return;
573 }
574 case LUA_TFUNCTION: {
575 int n;
576 lua_pushvalue(L, 3);
577 n = push_captures(ms, s, e);
578 lua_call(L, n, 1);
579 break;
580 }
581 case LUA_TTABLE: {
582 push_onecapture(ms, 0, s, e);
583 lua_gettable(L, 3);
584 break;
585 }
586 }
587 if (!lua_toboolean(L, -1)) { /* nil or false? */
588 lua_pop(L, 1);
589 lua_pushlstring(L, s, (size_t)(e - s)); /* keep original text */
590 } else if (!lua_isstring(L, -1)) {
591 lj_err_callerv(L, LJ_ERR_STRGSRV, luaL_typename(L, -1));
592 }
593 luaL_addvalue(b); /* add result to accumulator */
594}
595
596LJLIB_CF(string_gsub)
597{
598 size_t srcl;
599 const char *src = luaL_checklstring(L, 1, &srcl);
600 const char *p = luaL_checkstring(L, 2);
601 int tr = lua_type(L, 3);
602 int max_s = luaL_optint(L, 4, (int)(srcl+1));
603 int anchor = (*p == '^') ? (p++, 1) : 0;
604 int n = 0;
605 MatchState ms;
606 luaL_Buffer b;
607 if (!(tr == LUA_TNUMBER || tr == LUA_TSTRING ||
608 tr == LUA_TFUNCTION || tr == LUA_TTABLE))
609 lj_err_arg(L, 3, LJ_ERR_NOSFT);
610 luaL_buffinit(L, &b);
611 ms.L = L;
612 ms.src_init = src;
613 ms.src_end = src+srcl;
614 while (n < max_s) {
615 const char *e;
616 ms.level = 0;
617 e = match(&ms, src, p);
618 if (e) {
619 n++;
620 add_value(&ms, &b, src, e);
621 }
622 if (e && e>src) /* non empty match? */
623 src = e; /* skip it */
624 else if (src < ms.src_end)
625 luaL_addchar(&b, *src++);
626 else
627 break;
628 if (anchor)
629 break;
630 }
631 luaL_addlstring(&b, src, (size_t)(ms.src_end-src));
632 luaL_pushresult(&b);
633 lua_pushinteger(L, n); /* number of substitutions */
634 return 2;
635}
636
637/* ------------------------------------------------------------------------ */
638
639/* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */
640#define MAX_FMTITEM 512
641/* valid flags in a format specification */
642#define FMT_FLAGS "-+ #0"
643/*
644** maximum size of each format specification (such as '%-099.99d')
645** (+10 accounts for %99.99x plus margin of error)
646*/
647#define MAX_FMTSPEC (sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
648
649static void addquoted(lua_State *L, luaL_Buffer *b, int arg)
650{
651 GCstr *str = lj_lib_checkstr(L, arg);
652 int32_t len = (int32_t)str->len;
653 const char *s = strdata(str);
654 luaL_addchar(b, '"');
655 while (len--) {
656 switch (*s) {
657 case '"': case '\\': case '\n':
658 luaL_addchar(b, '\\');
659 luaL_addchar(b, *s);
660 break;
661 case '\r':
662 luaL_addlstring(b, "\\r", 2);
663 break;
664 case '\0':
665 luaL_addlstring(b, "\\000", 4);
666 break;
667 default:
668 luaL_addchar(b, *s);
669 break;
670 }
671 s++;
672 }
673 luaL_addchar(b, '"');
674}
675
676static const char *scanformat(lua_State *L, const char *strfrmt, char *form)
677{
678 const char *p = strfrmt;
679 while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++; /* skip flags */
680 if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS))
681 lj_err_caller(L, LJ_ERR_STRFMTR);
682 if (lj_ctype_isdigit(uchar(*p))) p++; /* skip width */
683 if (lj_ctype_isdigit(uchar(*p))) p++; /* (2 digits at most) */
684 if (*p == '.') {
685 p++;
686 if (lj_ctype_isdigit(uchar(*p))) p++; /* skip precision */
687 if (lj_ctype_isdigit(uchar(*p))) p++; /* (2 digits at most) */
688 }
689 if (lj_ctype_isdigit(uchar(*p)))
690 lj_err_caller(L, LJ_ERR_STRFMTW);
691 *(form++) = '%';
692 strncpy(form, strfrmt, (size_t)(p - strfrmt + 1));
693 form += p - strfrmt + 1;
694 *form = '\0';
695 return p;
696}
697
698static void addintlen(char *form)
699{
700 size_t l = strlen(form);
701 char spec = form[l - 1];
702 strcpy(form + l - 1, LUA_INTFRMLEN);
703 form[l + sizeof(LUA_INTFRMLEN) - 2] = spec;
704 form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0';
705}
706
707LJLIB_CF(string_format)
708{
709 int arg = 1;
710 GCstr *fmt = lj_lib_checkstr(L, arg);
711 const char *strfrmt = strdata(fmt);
712 const char *strfrmt_end = strfrmt + fmt->len;
713 luaL_Buffer b;
714 luaL_buffinit(L, &b);
715 while (strfrmt < strfrmt_end) {
716 if (*strfrmt != L_ESC) {
717 luaL_addchar(&b, *strfrmt++);
718 } else if (*++strfrmt == L_ESC) {
719 luaL_addchar(&b, *strfrmt++); /* %% */
720 } else { /* format item */
721 char form[MAX_FMTSPEC]; /* to store the format (`%...') */
722 char buff[MAX_FMTITEM]; /* to store the formatted item */
723 arg++;
724 strfrmt = scanformat(L, strfrmt, form);
725 switch (*strfrmt++) {
726 case 'c':
727 sprintf(buff, form, lj_lib_checkint(L, arg));
728 break;
729 case 'd': case 'i':
730 addintlen(form);
731 sprintf(buff, form, (LUA_INTFRM_T)lj_lib_checknum(L, arg));
732 break;
733 case 'o': case 'u': case 'x': case 'X':
734 addintlen(form);
735 sprintf(buff, form, (unsigned LUA_INTFRM_T)lj_lib_checknum(L, arg));
736 break;
737 case 'e': case 'E': case 'f': case 'g': case 'G':
738 sprintf(buff, form, (double)lj_lib_checknum(L, arg));
739 break;
740 case 'q':
741 addquoted(L, &b, arg);
742 continue;
743 case 'p':
744 lj_str_pushf(L, "%p", lua_topointer(L, arg));
745 luaL_addvalue(&b);
746 continue;
747 case 's': {
748 GCstr *str = lj_lib_checkstr(L, arg);
749 if (!strchr(form, '.') && str->len >= 100) {
750 /* no precision and string is too long to be formatted;
751 keep original string */
752 setstrV(L, L->top++, str);
753 luaL_addvalue(&b);
754 continue;
755 }
756 sprintf(buff, form, strdata(str));
757 break;
758 }
759 default:
760 lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1));
761 break;
762 }
763 luaL_addlstring(&b, buff, strlen(buff));
764 }
765 }
766 luaL_pushresult(&b);
767 return 1;
768}
769
770/* ------------------------------------------------------------------------ */
771
772#include "lj_libdef.h"
773
774LUALIB_API int luaopen_string(lua_State *L)
775{
776 GCtab *mt;
777 LJ_LIB_REG(L, string);
778#if defined(LUA_COMPAT_GFIND)
779 lua_getfield(L, -1, "gmatch");
780 lua_setfield(L, -2, "gfind");
781#endif
782 mt = lj_tab_new(L, 0, 1);
783 /* NOBARRIER: G(L)->mmname[] is a GC root. */
784 setgcref(G(L)->basemt[~LJ_TSTR], obj2gco(mt));
785 settabV(L, lj_tab_setstr(L, mt, strref(G(L)->mmname[MM_index])),
786 tabV(L->top-1));
787 mt->nomm = cast_byte(~(1u<<MM_index));
788 return 1;
789}
790
diff --git a/src/lib_table.c b/src/lib_table.c
new file mode 100644
index 00000000..68dc825b
--- /dev/null
+++ b/src/lib_table.c
@@ -0,0 +1,276 @@
1/*
2** Table library.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Major portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#define lib_table_c
10#define LUA_LIB
11
12#include "lua.h"
13#include "lauxlib.h"
14#include "lualib.h"
15
16#include "lj_obj.h"
17#include "lj_gc.h"
18#include "lj_err.h"
19#include "lj_tab.h"
20#include "lj_lib.h"
21
22/* ------------------------------------------------------------------------ */
23
24#define LJLIB_MODULE_table
25
26LJLIB_CF(table_foreachi)
27{
28 GCtab *t = lj_lib_checktab(L, 1);
29 GCfunc *func = lj_lib_checkfunc(L, 2);
30 MSize i, n = lj_tab_len(t);
31 for (i = 1; i <= n; i++) {
32 cTValue *val;
33 setfuncV(L, L->top, func);
34 setintV(L->top+1, i);
35 val = lj_tab_getint(t, (int32_t)i);
36 if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); }
37 L->top += 3;
38 lua_call(L, 2, 1);
39 if (!tvisnil(L->top-1))
40 return 1;
41 L->top--;
42 }
43 return 0;
44}
45
46LJLIB_CF(table_foreach)
47{
48 GCtab *t = lj_lib_checktab(L, 1);
49 GCfunc *func = lj_lib_checkfunc(L, 2);
50 L->top = L->base+3;
51 setnilV(L->top-1);
52 while (lj_tab_next(L, t, L->top-1)) {
53 copyTV(L, L->top+2, L->top);
54 copyTV(L, L->top+1, L->top-1);
55 setfuncV(L, L->top, func);
56 L->top += 3;
57 lua_call(L, 2, 1);
58 if (!tvisnil(L->top-1))
59 return 1;
60 L->top--;
61 }
62 return 0;
63}
64
65LJLIB_ASM(table_getn) LJLIB_REC(.)
66{
67 lj_lib_checktab(L, 1);
68 return FFH_UNREACHABLE;
69}
70
71LJLIB_CF(table_maxn)
72{
73 GCtab *t = lj_lib_checktab(L, 1);
74 TValue *array = tvref(t->array);
75 Node *node;
76 lua_Number m = 0;
77 uint32_t i;
78 for (i = 0; i < t->asize; i++)
79 if (!tvisnil(&array[i])) {
80 m = (lua_Number)i;
81 break;
82 }
83 node = noderef(t->node);
84 for (i = 0; i <= t->hmask; i++)
85 if (tvisnum(&node[i].key) && numV(&node[i].key) > m)
86 m = numV(&node[i].key);
87 setnumV(L->top-1, m);
88 return 1;
89}
90
91LJLIB_CF(table_insert)
92{
93 GCtab *t = lj_lib_checktab(L, 1);
94 int32_t n, i = (int32_t)lj_tab_len(t) + 1;
95 int nargs = (int)((char *)L->top - (char *)L->base);
96 if (nargs != 2*sizeof(TValue)) {
97 if (nargs != 3*sizeof(TValue))
98 lj_err_caller(L, LJ_ERR_TABINS);
99 /* NOBARRIER: This just moves existing elements around. */
100 for (n = lj_lib_checkint(L, 2); i > n; i--) {
101 /* The set may invalidate the get pointer, so need to do it first! */
102 TValue *dst = lj_tab_setint(L, t, i);
103 cTValue *src = lj_tab_getint(t, i-1);
104 if (src) {
105 copyTV(L, dst, src);
106 } else {
107 setnilV(dst);
108 }
109 }
110 i = n;
111 }
112 {
113 TValue *dst = lj_tab_setint(L, t, i);
114 copyTV(L, dst, L->top-1);
115 lj_gc_barriert(L, t, dst);
116 }
117 return 0;
118}
119
120LJLIB_CF(table_remove)
121{
122 GCtab *t = lj_lib_checktab(L, 1);
123 int32_t e = (int32_t)lj_tab_len(t);
124 int32_t pos = lj_lib_optint(L, 2, e);
125 if (!(1 <= pos && pos <= e)) /* position is outside bounds? */
126 return 0; /* nothing to remove */
127 lua_rawgeti(L, 1, pos);
128 /* NOBARRIER: This just moves existing elements around. */
129 for (; pos < e; pos++) {
130 cTValue *src = lj_tab_getint(t, pos+1);
131 TValue *dst = lj_tab_setint(L, t, pos);
132 if (src) {
133 copyTV(L, dst, src);
134 } else {
135 setnilV(dst);
136 }
137 }
138 setnilV(lj_tab_setint(L, t, e));
139 return 1;
140}
141
142LJLIB_CF(table_concat)
143{
144 luaL_Buffer b;
145 GCtab *t = lj_lib_checktab(L, 1);
146 GCstr *sep = lj_lib_optstr(L, 2);
147 MSize seplen = sep ? sep->len : 0;
148 int32_t i = lj_lib_optint(L, 3, 1);
149 int32_t e = L->base+3 < L->top ? lj_lib_checkint(L, 4) :
150 (int32_t)lj_tab_len(t);
151 luaL_buffinit(L, &b);
152 if (i <= e) {
153 for (;;) {
154 cTValue *o;
155 lua_rawgeti(L, 1, i);
156 o = L->top-1;
157 if (!(tvisstr(o) || tvisnum(o)))
158 lj_err_callerv(L, LJ_ERR_TABCAT, typename(o), i);
159 luaL_addvalue(&b);
160 if (i++ == e) break;
161 if (seplen)
162 luaL_addlstring(&b, strdata(sep), seplen);
163 }
164 }
165 luaL_pushresult(&b);
166 return 1;
167}
168
169/* ------------------------------------------------------------------------ */
170
171static void set2(lua_State *L, int i, int j)
172{
173 lua_rawseti(L, 1, i);
174 lua_rawseti(L, 1, j);
175}
176
177static int sort_comp(lua_State *L, int a, int b)
178{
179 if (!lua_isnil(L, 2)) { /* function? */
180 int res;
181 lua_pushvalue(L, 2);
182 lua_pushvalue(L, a-1); /* -1 to compensate function */
183 lua_pushvalue(L, b-2); /* -2 to compensate function and `a' */
184 lua_call(L, 2, 1);
185 res = lua_toboolean(L, -1);
186 lua_pop(L, 1);
187 return res;
188 } else { /* a < b? */
189 return lua_lessthan(L, a, b);
190 }
191}
192
193static void auxsort(lua_State *L, int l, int u)
194{
195 while (l < u) { /* for tail recursion */
196 int i, j;
197 /* sort elements a[l], a[(l+u)/2] and a[u] */
198 lua_rawgeti(L, 1, l);
199 lua_rawgeti(L, 1, u);
200 if (sort_comp(L, -1, -2)) /* a[u] < a[l]? */
201 set2(L, l, u); /* swap a[l] - a[u] */
202 else
203 lua_pop(L, 2);
204 if (u-l == 1) break; /* only 2 elements */
205 i = (l+u)/2;
206 lua_rawgeti(L, 1, i);
207 lua_rawgeti(L, 1, l);
208 if (sort_comp(L, -2, -1)) { /* a[i]<a[l]? */
209 set2(L, i, l);
210 } else {
211 lua_pop(L, 1); /* remove a[l] */
212 lua_rawgeti(L, 1, u);
213 if (sort_comp(L, -1, -2)) /* a[u]<a[i]? */
214 set2(L, i, u);
215 else
216 lua_pop(L, 2);
217 }
218 if (u-l == 2) break; /* only 3 elements */
219 lua_rawgeti(L, 1, i); /* Pivot */
220 lua_pushvalue(L, -1);
221 lua_rawgeti(L, 1, u-1);
222 set2(L, i, u-1);
223 /* a[l] <= P == a[u-1] <= a[u], only need to sort from l+1 to u-2 */
224 i = l; j = u-1;
225 for (;;) { /* invariant: a[l..i] <= P <= a[j..u] */
226 /* repeat ++i until a[i] >= P */
227 while (lua_rawgeti(L, 1, ++i), sort_comp(L, -1, -2)) {
228 if (i>u) lj_err_caller(L, LJ_ERR_TABSORT);
229 lua_pop(L, 1); /* remove a[i] */
230 }
231 /* repeat --j until a[j] <= P */
232 while (lua_rawgeti(L, 1, --j), sort_comp(L, -3, -1)) {
233 if (j<l) lj_err_caller(L, LJ_ERR_TABSORT);
234 lua_pop(L, 1); /* remove a[j] */
235 }
236 if (j<i) {
237 lua_pop(L, 3); /* pop pivot, a[i], a[j] */
238 break;
239 }
240 set2(L, i, j);
241 }
242 lua_rawgeti(L, 1, u-1);
243 lua_rawgeti(L, 1, i);
244 set2(L, u-1, i); /* swap pivot (a[u-1]) with a[i] */
245 /* a[l..i-1] <= a[i] == P <= a[i+1..u] */
246 /* adjust so that smaller half is in [j..i] and larger one in [l..u] */
247 if (i-l < u-i) {
248 j=l; i=i-1; l=i+2;
249 } else {
250 j=i+1; i=u; u=j-2;
251 }
252 auxsort(L, j, i); /* call recursively the smaller one */
253 } /* repeat the routine for the larger one */
254}
255
256LJLIB_CF(table_sort)
257{
258 GCtab *t = lj_lib_checktab(L, 1);
259 int32_t n = (int32_t)lj_tab_len(t);
260 lua_settop(L, 2);
261 if (!tvisnil(L->base+1))
262 lj_lib_checkfunc(L, 2);
263 auxsort(L, 1, n);
264 return 0;
265}
266
267/* ------------------------------------------------------------------------ */
268
269#include "lj_libdef.h"
270
271LUALIB_API int luaopen_table(lua_State *L)
272{
273 LJ_LIB_REG(L, table);
274 return 1;
275}
276
diff --git a/src/lj.supp b/src/lj.supp
new file mode 100644
index 00000000..9a1379d7
--- /dev/null
+++ b/src/lj.supp
@@ -0,0 +1,6 @@
1# Valgrind suppression file for LuaJIT 2.x.
2{
3 Optimized string compare
4 Memcheck:Addr4
5 fun:lj_str_cmp
6}
diff --git a/src/lj_alloc.c b/src/lj_alloc.c
new file mode 100644
index 00000000..8ad4f8fb
--- /dev/null
+++ b/src/lj_alloc.c
@@ -0,0 +1,1232 @@
1/*
2** Bundled memory allocator.
3**
4** Beware: this is a HEAVILY CUSTOMIZED version of dlmalloc.
5** The original bears the following remark:
6**
7** This is a version (aka dlmalloc) of malloc/free/realloc written by
8** Doug Lea and released to the public domain, as explained at
9** http://creativecommons.org/licenses/publicdomain.
10**
11** * Version pre-2.8.4 Wed Mar 29 19:46:29 2006 (dl at gee)
12**
13** No additional copyright is claimed over the customizations.
14** Please do NOT bother the original author about this version here!
15**
16** If you want to use dlmalloc in another project, you should get
17** the original from: ftp://gee.cs.oswego.edu/pub/misc/
18** For thread-safe derivatives, take a look at:
19** - ptmalloc: http://www.malloc.de/
20** - nedmalloc: http://www.nedprod.com/programs/portable/nedmalloc/
21*/
22
23#define lj_alloc_c
24#define LUA_CORE
25
26/* To get the mremap prototype. Must be defind before any system includes. */
27#if defined(__linux__) && !defined(_GNU_SOURCE)
28#define _GNU_SOURCE
29#endif
30
31#include "lj_def.h"
32#include "lj_arch.h"
33#include "lj_alloc.h"
34
35#ifndef LUAJIT_USE_SYSMALLOC
36
37#define MAX_SIZE_T (~(size_t)0)
38#define MALLOC_ALIGNMENT ((size_t)8U)
39
40#define DEFAULT_GRANULARITY ((size_t)128U * (size_t)1024U)
41#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
42#define DEFAULT_MMAP_THRESHOLD ((size_t)128U * (size_t)1024U)
43#define MAX_RELEASE_CHECK_RATE 255
44
45/* ------------------- size_t and alignment properties -------------------- */
46
47/* The byte and bit size of a size_t */
48#define SIZE_T_SIZE (sizeof(size_t))
49#define SIZE_T_BITSIZE (sizeof(size_t) << 3)
50
51/* Some constants coerced to size_t */
52/* Annoying but necessary to avoid errors on some platforms */
53#define SIZE_T_ZERO ((size_t)0)
54#define SIZE_T_ONE ((size_t)1)
55#define SIZE_T_TWO ((size_t)2)
56#define TWO_SIZE_T_SIZES (SIZE_T_SIZE<<1)
57#define FOUR_SIZE_T_SIZES (SIZE_T_SIZE<<2)
58#define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES)
59
60/* The bit mask value corresponding to MALLOC_ALIGNMENT */
61#define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE)
62
63/* the number of bytes to offset an address to align it */
64#define align_offset(A)\
65 ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\
66 ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK))
67
68/* -------------------------- MMAP support ------------------------------- */
69
70#define MFAIL ((void *)(MAX_SIZE_T))
71#define CMFAIL ((char *)(MFAIL)) /* defined for convenience */
72
73#define IS_DIRECT_BIT (SIZE_T_ONE)
74
75#ifdef LUA_USE_WIN
76
77#if LJ_64
78#error "missing support for WIN64 to allocate in lower 2G"
79#endif
80
81#define WIN32_LEAN_AND_MEAN
82#include <windows.h>
83
84/* Win32 MMAP via VirtualAlloc */
85static LJ_AINLINE void *CALL_MMAP(size_t size)
86{
87 void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
88 return (ptr != 0)? ptr: MFAIL;
89}
90
91/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
92static LJ_AINLINE void *DIRECT_MMAP(size_t size)
93{
94 void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
95 PAGE_READWRITE);
96 return (ptr != 0)? ptr: MFAIL;
97}
98
99/* This function supports releasing coalesed segments */
100static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
101{
102 MEMORY_BASIC_INFORMATION minfo;
103 char *cptr = (char *)ptr;
104 while (size) {
105 if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
106 return -1;
107 if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
108 minfo.State != MEM_COMMIT || minfo.RegionSize > size)
109 return -1;
110 if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
111 return -1;
112 cptr += minfo.RegionSize;
113 size -= minfo.RegionSize;
114 }
115 return 0;
116}
117
118#else
119
120#include <sys/mman.h>
121
122#define MMAP_PROT (PROT_READ|PROT_WRITE)
123#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
124#define MAP_ANONYMOUS MAP_ANON
125#endif /* MAP_ANON */
126
127#if LJ_64
128#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS|MAP_32BIT)
129#else
130#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
131#endif
132
133#define CALL_MMAP(s) mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0)
134#define DIRECT_MMAP(s) CALL_MMAP(s)
135#define CALL_MUNMAP(a, s) munmap((a), (s))
136
137#ifdef __linux__
138/* Need to define _GNU_SOURCE to get the mremap prototype. */
139#define CALL_MREMAP(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv))
140#endif
141
142#endif
143
144#ifndef CALL_MREMAP
145#define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL)
146#endif
147
148/* ----------------------- Chunk representations ------------------------ */
149
150struct malloc_chunk {
151 size_t prev_foot; /* Size of previous chunk (if free). */
152 size_t head; /* Size and inuse bits. */
153 struct malloc_chunk *fd; /* double links -- used only if free. */
154 struct malloc_chunk *bk;
155};
156
157typedef struct malloc_chunk mchunk;
158typedef struct malloc_chunk *mchunkptr;
159typedef struct malloc_chunk *sbinptr; /* The type of bins of chunks */
160typedef unsigned int bindex_t; /* Described below */
161typedef unsigned int binmap_t; /* Described below */
162typedef unsigned int flag_t; /* The type of various bit flag sets */
163
164/* ------------------- Chunks sizes and alignments ----------------------- */
165
166#define MCHUNK_SIZE (sizeof(mchunk))
167
168#define CHUNK_OVERHEAD (SIZE_T_SIZE)
169
170/* Direct chunks need a second word of overhead ... */
171#define DIRECT_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
172/* ... and additional padding for fake next-chunk at foot */
173#define DIRECT_FOOT_PAD (FOUR_SIZE_T_SIZES)
174
175/* The smallest size we can malloc is an aligned minimal chunk */
176#define MIN_CHUNK_SIZE\
177 ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
178
179/* conversion from malloc headers to user pointers, and back */
180#define chunk2mem(p) ((void *)((char *)(p) + TWO_SIZE_T_SIZES))
181#define mem2chunk(mem) ((mchunkptr)((char *)(mem) - TWO_SIZE_T_SIZES))
182/* chunk associated with aligned address A */
183#define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A)))
184
185/* Bounds on request (not chunk) sizes. */
186#define MAX_REQUEST ((~MIN_CHUNK_SIZE+1) << 2)
187#define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE)
188
189/* pad request bytes into a usable size */
190#define pad_request(req) \
191 (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
192
193/* pad request, checking for minimum (but not maximum) */
194#define request2size(req) \
195 (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req))
196
197/* ------------------ Operations on head and foot fields ----------------- */
198
199#define PINUSE_BIT (SIZE_T_ONE)
200#define CINUSE_BIT (SIZE_T_TWO)
201#define INUSE_BITS (PINUSE_BIT|CINUSE_BIT)
202
203/* Head value for fenceposts */
204#define FENCEPOST_HEAD (INUSE_BITS|SIZE_T_SIZE)
205
206/* extraction of fields from head words */
207#define cinuse(p) ((p)->head & CINUSE_BIT)
208#define pinuse(p) ((p)->head & PINUSE_BIT)
209#define chunksize(p) ((p)->head & ~(INUSE_BITS))
210
211#define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT)
212#define clear_cinuse(p) ((p)->head &= ~CINUSE_BIT)
213
214/* Treat space at ptr +/- offset as a chunk */
215#define chunk_plus_offset(p, s) ((mchunkptr)(((char *)(p)) + (s)))
216#define chunk_minus_offset(p, s) ((mchunkptr)(((char *)(p)) - (s)))
217
218/* Ptr to next or previous physical malloc_chunk. */
219#define next_chunk(p) ((mchunkptr)(((char *)(p)) + ((p)->head & ~INUSE_BITS)))
220#define prev_chunk(p) ((mchunkptr)(((char *)(p)) - ((p)->prev_foot) ))
221
222/* extract next chunk's pinuse bit */
223#define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT)
224
225/* Get/set size at footer */
226#define get_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_foot)
227#define set_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_foot = (s))
228
229/* Set size, pinuse bit, and foot */
230#define set_size_and_pinuse_of_free_chunk(p, s)\
231 ((p)->head = (s|PINUSE_BIT), set_foot(p, s))
232
233/* Set size, pinuse bit, foot, and clear next pinuse */
234#define set_free_with_pinuse(p, s, n)\
235 (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s))
236
237#define is_direct(p)\
238 (!((p)->head & PINUSE_BIT) && ((p)->prev_foot & IS_DIRECT_BIT))
239
240/* Get the internal overhead associated with chunk p */
241#define overhead_for(p)\
242 (is_direct(p)? DIRECT_CHUNK_OVERHEAD : CHUNK_OVERHEAD)
243
244/* ---------------------- Overlaid data structures ----------------------- */
245
246struct malloc_tree_chunk {
247 /* The first four fields must be compatible with malloc_chunk */
248 size_t prev_foot;
249 size_t head;
250 struct malloc_tree_chunk *fd;
251 struct malloc_tree_chunk *bk;
252
253 struct malloc_tree_chunk *child[2];
254 struct malloc_tree_chunk *parent;
255 bindex_t index;
256};
257
258typedef struct malloc_tree_chunk tchunk;
259typedef struct malloc_tree_chunk *tchunkptr;
260typedef struct malloc_tree_chunk *tbinptr; /* The type of bins of trees */
261
262/* A little helper macro for trees */
263#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1])
264
265/* ----------------------------- Segments -------------------------------- */
266
267struct malloc_segment {
268 char *base; /* base address */
269 size_t size; /* allocated size */
270 struct malloc_segment *next; /* ptr to next segment */
271};
272
273typedef struct malloc_segment msegment;
274typedef struct malloc_segment *msegmentptr;
275
276/* ---------------------------- malloc_state ----------------------------- */
277
278/* Bin types, widths and sizes */
279#define NSMALLBINS (32U)
280#define NTREEBINS (32U)
281#define SMALLBIN_SHIFT (3U)
282#define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT)
283#define TREEBIN_SHIFT (8U)
284#define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT)
285#define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE)
286#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD)
287
288struct malloc_state {
289 binmap_t smallmap;
290 binmap_t treemap;
291 size_t dvsize;
292 size_t topsize;
293 mchunkptr dv;
294 mchunkptr top;
295 size_t trim_check;
296 size_t release_checks;
297 mchunkptr smallbins[(NSMALLBINS+1)*2];
298 tbinptr treebins[NTREEBINS];
299 msegment seg;
300};
301
302typedef struct malloc_state *mstate;
303
304#define is_initialized(M) ((M)->top != 0)
305
306/* -------------------------- system alloc setup ------------------------- */
307
308/* page-align a size */
309#define page_align(S)\
310 (((S) + (LJ_PAGESIZE - SIZE_T_ONE)) & ~(LJ_PAGESIZE - SIZE_T_ONE))
311
312/* granularity-align a size */
313#define granularity_align(S)\
314 (((S) + (DEFAULT_GRANULARITY - SIZE_T_ONE))\
315 & ~(DEFAULT_GRANULARITY - SIZE_T_ONE))
316
317#ifdef LUA_USE_WIN
318#define mmap_align(S) granularity_align(S)
319#else
320#define mmap_align(S) page_align(S)
321#endif
322
323/* True if segment S holds address A */
324#define segment_holds(S, A)\
325 ((char *)(A) >= S->base && (char *)(A) < S->base + S->size)
326
327/* Return segment holding given address */
328static msegmentptr segment_holding(mstate m, char *addr)
329{
330 msegmentptr sp = &m->seg;
331 for (;;) {
332 if (addr >= sp->base && addr < sp->base + sp->size)
333 return sp;
334 if ((sp = sp->next) == 0)
335 return 0;
336 }
337}
338
339/* Return true if segment contains a segment link */
340static int has_segment_link(mstate m, msegmentptr ss)
341{
342 msegmentptr sp = &m->seg;
343 for (;;) {
344 if ((char *)sp >= ss->base && (char *)sp < ss->base + ss->size)
345 return 1;
346 if ((sp = sp->next) == 0)
347 return 0;
348 }
349}
350
351/*
352 TOP_FOOT_SIZE is padding at the end of a segment, including space
353 that may be needed to place segment records and fenceposts when new
354 noncontiguous segments are added.
355*/
356#define TOP_FOOT_SIZE\
357 (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
358
359/* ---------------------------- Indexing Bins ---------------------------- */
360
361#define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS)
362#define small_index(s) ((s) >> SMALLBIN_SHIFT)
363#define small_index2size(i) ((i) << SMALLBIN_SHIFT)
364#define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE))
365
366/* addressing by index. See above about smallbin repositioning */
367#define smallbin_at(M, i) ((sbinptr)((char *)&((M)->smallbins[(i)<<1])))
368#define treebin_at(M,i) (&((M)->treebins[i]))
369
370/* assign tree index for size S to variable I */
371#define compute_tree_index(S, I)\
372{\
373 unsigned int X = S >> TREEBIN_SHIFT;\
374 if (X == 0) {\
375 I = 0;\
376 } else if (X > 0xFFFF) {\
377 I = NTREEBINS-1;\
378 } else {\
379 unsigned int K = lj_fls(X);\
380 I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
381 }\
382}
383
384/* Bit representing maximum resolved size in a treebin at i */
385#define bit_for_tree_index(i) \
386 (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2)
387
388/* Shift placing maximum resolved bit in a treebin at i as sign bit */
389#define leftshift_for_tree_index(i) \
390 ((i == NTREEBINS-1)? 0 : \
391 ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2)))
392
393/* The size of the smallest chunk held in bin with index i */
394#define minsize_for_tree_index(i) \
395 ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \
396 (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1)))
397
398/* ------------------------ Operations on bin maps ----------------------- */
399
400/* bit corresponding to given index */
401#define idx2bit(i) ((binmap_t)(1) << (i))
402
403/* Mark/Clear bits with given index */
404#define mark_smallmap(M,i) ((M)->smallmap |= idx2bit(i))
405#define clear_smallmap(M,i) ((M)->smallmap &= ~idx2bit(i))
406#define smallmap_is_marked(M,i) ((M)->smallmap & idx2bit(i))
407
408#define mark_treemap(M,i) ((M)->treemap |= idx2bit(i))
409#define clear_treemap(M,i) ((M)->treemap &= ~idx2bit(i))
410#define treemap_is_marked(M,i) ((M)->treemap & idx2bit(i))
411
412/* mask with all bits to left of least bit of x on */
413#define left_bits(x) ((x<<1) | (~(x<<1)+1))
414
415/* Set cinuse bit and pinuse bit of next chunk */
416#define set_inuse(M,p,s)\
417 ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
418 ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT)
419
420/* Set cinuse and pinuse of this chunk and pinuse of next chunk */
421#define set_inuse_and_pinuse(M,p,s)\
422 ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
423 ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT)
424
425/* Set size, cinuse and pinuse bit of this chunk */
426#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
427 ((p)->head = (s|PINUSE_BIT|CINUSE_BIT))
428
429/* ----------------------- Operations on smallbins ----------------------- */
430
431/* Link a free chunk into a smallbin */
432#define insert_small_chunk(M, P, S) {\
433 bindex_t I = small_index(S);\
434 mchunkptr B = smallbin_at(M, I);\
435 mchunkptr F = B;\
436 if (!smallmap_is_marked(M, I))\
437 mark_smallmap(M, I);\
438 else\
439 F = B->fd;\
440 B->fd = P;\
441 F->bk = P;\
442 P->fd = F;\
443 P->bk = B;\
444}
445
446/* Unlink a chunk from a smallbin */
447#define unlink_small_chunk(M, P, S) {\
448 mchunkptr F = P->fd;\
449 mchunkptr B = P->bk;\
450 bindex_t I = small_index(S);\
451 if (F == B) {\
452 clear_smallmap(M, I);\
453 } else {\
454 F->bk = B;\
455 B->fd = F;\
456 }\
457}
458
459/* Unlink the first chunk from a smallbin */
460#define unlink_first_small_chunk(M, B, P, I) {\
461 mchunkptr F = P->fd;\
462 if (B == F) {\
463 clear_smallmap(M, I);\
464 } else {\
465 B->fd = F;\
466 F->bk = B;\
467 }\
468}
469
470/* Replace dv node, binning the old one */
471/* Used only when dvsize known to be small */
472#define replace_dv(M, P, S) {\
473 size_t DVS = M->dvsize;\
474 if (DVS != 0) {\
475 mchunkptr DV = M->dv;\
476 insert_small_chunk(M, DV, DVS);\
477 }\
478 M->dvsize = S;\
479 M->dv = P;\
480}
481
482/* ------------------------- Operations on trees ------------------------- */
483
484/* Insert chunk into tree */
485#define insert_large_chunk(M, X, S) {\
486 tbinptr *H;\
487 bindex_t I;\
488 compute_tree_index(S, I);\
489 H = treebin_at(M, I);\
490 X->index = I;\
491 X->child[0] = X->child[1] = 0;\
492 if (!treemap_is_marked(M, I)) {\
493 mark_treemap(M, I);\
494 *H = X;\
495 X->parent = (tchunkptr)H;\
496 X->fd = X->bk = X;\
497 } else {\
498 tchunkptr T = *H;\
499 size_t K = S << leftshift_for_tree_index(I);\
500 for (;;) {\
501 if (chunksize(T) != S) {\
502 tchunkptr *C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\
503 K <<= 1;\
504 if (*C != 0) {\
505 T = *C;\
506 } else {\
507 *C = X;\
508 X->parent = T;\
509 X->fd = X->bk = X;\
510 break;\
511 }\
512 } else {\
513 tchunkptr F = T->fd;\
514 T->fd = F->bk = X;\
515 X->fd = F;\
516 X->bk = T;\
517 X->parent = 0;\
518 break;\
519 }\
520 }\
521 }\
522}
523
524#define unlink_large_chunk(M, X) {\
525 tchunkptr XP = X->parent;\
526 tchunkptr R;\
527 if (X->bk != X) {\
528 tchunkptr F = X->fd;\
529 R = X->bk;\
530 F->bk = R;\
531 R->fd = F;\
532 } else {\
533 tchunkptr *RP;\
534 if (((R = *(RP = &(X->child[1]))) != 0) ||\
535 ((R = *(RP = &(X->child[0]))) != 0)) {\
536 tchunkptr *CP;\
537 while ((*(CP = &(R->child[1])) != 0) ||\
538 (*(CP = &(R->child[0])) != 0)) {\
539 R = *(RP = CP);\
540 }\
541 *RP = 0;\
542 }\
543 }\
544 if (XP != 0) {\
545 tbinptr *H = treebin_at(M, X->index);\
546 if (X == *H) {\
547 if ((*H = R) == 0) \
548 clear_treemap(M, X->index);\
549 } else {\
550 if (XP->child[0] == X) \
551 XP->child[0] = R;\
552 else \
553 XP->child[1] = R;\
554 }\
555 if (R != 0) {\
556 tchunkptr C0, C1;\
557 R->parent = XP;\
558 if ((C0 = X->child[0]) != 0) {\
559 R->child[0] = C0;\
560 C0->parent = R;\
561 }\
562 if ((C1 = X->child[1]) != 0) {\
563 R->child[1] = C1;\
564 C1->parent = R;\
565 }\
566 }\
567 }\
568}
569
570/* Relays to large vs small bin operations */
571
572#define insert_chunk(M, P, S)\
573 if (is_small(S)) { insert_small_chunk(M, P, S)\
574 } else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); }
575
576#define unlink_chunk(M, P, S)\
577 if (is_small(S)) { unlink_small_chunk(M, P, S)\
578 } else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); }
579
580/* ----------------------- Direct-mmapping chunks ----------------------- */
581
582static void *direct_alloc(size_t nb)
583{
584 size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
585 if (LJ_LIKELY(mmsize > nb)) { /* Check for wrap around 0 */
586 char *mm = (char *)(DIRECT_MMAP(mmsize));
587 if (mm != CMFAIL) {
588 size_t offset = align_offset(chunk2mem(mm));
589 size_t psize = mmsize - offset - DIRECT_FOOT_PAD;
590 mchunkptr p = (mchunkptr)(mm + offset);
591 p->prev_foot = offset | IS_DIRECT_BIT;
592 p->head = psize|CINUSE_BIT;
593 chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD;
594 chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0;
595 return chunk2mem(p);
596 }
597 }
598 return NULL;
599}
600
601static mchunkptr direct_resize(mchunkptr oldp, size_t nb)
602{
603 size_t oldsize = chunksize(oldp);
604 if (is_small(nb)) /* Can't shrink direct regions below small size */
605 return NULL;
606 /* Keep old chunk if big enough but not too big */
607 if (oldsize >= nb + SIZE_T_SIZE &&
608 (oldsize - nb) <= (DEFAULT_GRANULARITY << 1)) {
609 return oldp;
610 } else {
611 size_t offset = oldp->prev_foot & ~IS_DIRECT_BIT;
612 size_t oldmmsize = oldsize + offset + DIRECT_FOOT_PAD;
613 size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
614 char *cp = (char *)CALL_MREMAP((char *)oldp - offset,
615 oldmmsize, newmmsize, 1);
616 if (cp != CMFAIL) {
617 mchunkptr newp = (mchunkptr)(cp + offset);
618 size_t psize = newmmsize - offset - DIRECT_FOOT_PAD;
619 newp->head = psize|CINUSE_BIT;
620 chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD;
621 chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0;
622 return newp;
623 }
624 }
625 return NULL;
626}
627
628/* -------------------------- mspace management -------------------------- */
629
630/* Initialize top chunk and its size */
631static void init_top(mstate m, mchunkptr p, size_t psize)
632{
633 /* Ensure alignment */
634 size_t offset = align_offset(chunk2mem(p));
635 p = (mchunkptr)((char *)p + offset);
636 psize -= offset;
637
638 m->top = p;
639 m->topsize = psize;
640 p->head = psize | PINUSE_BIT;
641 /* set size of fake trailing chunk holding overhead space only once */
642 chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE;
643 m->trim_check = DEFAULT_TRIM_THRESHOLD; /* reset on each update */
644}
645
646/* Initialize bins for a new mstate that is otherwise zeroed out */
647static void init_bins(mstate m)
648{
649 /* Establish circular links for smallbins */
650 bindex_t i;
651 for (i = 0; i < NSMALLBINS; i++) {
652 sbinptr bin = smallbin_at(m,i);
653 bin->fd = bin->bk = bin;
654 }
655}
656
657/* Allocate chunk and prepend remainder with chunk in successor base. */
658static void *prepend_alloc(mstate m, char *newbase, char *oldbase, size_t nb)
659{
660 mchunkptr p = align_as_chunk(newbase);
661 mchunkptr oldfirst = align_as_chunk(oldbase);
662 size_t psize = (size_t)((char *)oldfirst - (char *)p);
663 mchunkptr q = chunk_plus_offset(p, nb);
664 size_t qsize = psize - nb;
665 set_size_and_pinuse_of_inuse_chunk(m, p, nb);
666
667 /* consolidate remainder with first chunk of old base */
668 if (oldfirst == m->top) {
669 size_t tsize = m->topsize += qsize;
670 m->top = q;
671 q->head = tsize | PINUSE_BIT;
672 } else if (oldfirst == m->dv) {
673 size_t dsize = m->dvsize += qsize;
674 m->dv = q;
675 set_size_and_pinuse_of_free_chunk(q, dsize);
676 } else {
677 if (!cinuse(oldfirst)) {
678 size_t nsize = chunksize(oldfirst);
679 unlink_chunk(m, oldfirst, nsize);
680 oldfirst = chunk_plus_offset(oldfirst, nsize);
681 qsize += nsize;
682 }
683 set_free_with_pinuse(q, qsize, oldfirst);
684 insert_chunk(m, q, qsize);
685 }
686
687 return chunk2mem(p);
688}
689
690/* Add a segment to hold a new noncontiguous region */
691static void add_segment(mstate m, char *tbase, size_t tsize)
692{
693 /* Determine locations and sizes of segment, fenceposts, old top */
694 char *old_top = (char *)m->top;
695 msegmentptr oldsp = segment_holding(m, old_top);
696 char *old_end = oldsp->base + oldsp->size;
697 size_t ssize = pad_request(sizeof(struct malloc_segment));
698 char *rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
699 size_t offset = align_offset(chunk2mem(rawsp));
700 char *asp = rawsp + offset;
701 char *csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp;
702 mchunkptr sp = (mchunkptr)csp;
703 msegmentptr ss = (msegmentptr)(chunk2mem(sp));
704 mchunkptr tnext = chunk_plus_offset(sp, ssize);
705 mchunkptr p = tnext;
706
707 /* reset top to new space */
708 init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
709
710 /* Set up segment record */
711 set_size_and_pinuse_of_inuse_chunk(m, sp, ssize);
712 *ss = m->seg; /* Push current record */
713 m->seg.base = tbase;
714 m->seg.size = tsize;
715 m->seg.next = ss;
716
717 /* Insert trailing fenceposts */
718 for (;;) {
719 mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE);
720 p->head = FENCEPOST_HEAD;
721 if ((char *)(&(nextp->head)) < old_end)
722 p = nextp;
723 else
724 break;
725 }
726
727 /* Insert the rest of old top into a bin as an ordinary free chunk */
728 if (csp != old_top) {
729 mchunkptr q = (mchunkptr)old_top;
730 size_t psize = (size_t)(csp - old_top);
731 mchunkptr tn = chunk_plus_offset(q, psize);
732 set_free_with_pinuse(q, psize, tn);
733 insert_chunk(m, q, psize);
734 }
735}
736
737/* -------------------------- System allocation -------------------------- */
738
739static void *alloc_sys(mstate m, size_t nb)
740{
741 char *tbase = CMFAIL;
742 size_t tsize = 0;
743
744 /* Directly map large chunks */
745 if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) {
746 void *mem = direct_alloc(nb);
747 if (mem != 0)
748 return mem;
749 }
750
751 {
752 size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE;
753 size_t rsize = granularity_align(req);
754 if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */
755 char *mp = (char *)(CALL_MMAP(rsize));
756 if (mp != CMFAIL) {
757 tbase = mp;
758 tsize = rsize;
759 }
760 }
761 }
762
763 if (tbase != CMFAIL) {
764 msegmentptr sp = &m->seg;
765 /* Try to merge with an existing segment */
766 while (sp != 0 && tbase != sp->base + sp->size)
767 sp = sp->next;
768 if (sp != 0 && segment_holds(sp, m->top)) { /* append */
769 sp->size += tsize;
770 init_top(m, m->top, m->topsize + tsize);
771 } else {
772 sp = &m->seg;
773 while (sp != 0 && sp->base != tbase + tsize)
774 sp = sp->next;
775 if (sp != 0) {
776 char *oldbase = sp->base;
777 sp->base = tbase;
778 sp->size += tsize;
779 return prepend_alloc(m, tbase, oldbase, nb);
780 } else {
781 add_segment(m, tbase, tsize);
782 }
783 }
784
785 if (nb < m->topsize) { /* Allocate from new or extended top space */
786 size_t rsize = m->topsize -= nb;
787 mchunkptr p = m->top;
788 mchunkptr r = m->top = chunk_plus_offset(p, nb);
789 r->head = rsize | PINUSE_BIT;
790 set_size_and_pinuse_of_inuse_chunk(m, p, nb);
791 return chunk2mem(p);
792 }
793 }
794
795 return NULL;
796}
797
798/* ----------------------- system deallocation -------------------------- */
799
800/* Unmap and unlink any mmapped segments that don't contain used chunks */
801static size_t release_unused_segments(mstate m)
802{
803 size_t released = 0;
804 size_t nsegs = 0;
805 msegmentptr pred = &m->seg;
806 msegmentptr sp = pred->next;
807 while (sp != 0) {
808 char *base = sp->base;
809 size_t size = sp->size;
810 msegmentptr next = sp->next;
811 nsegs++;
812 {
813 mchunkptr p = align_as_chunk(base);
814 size_t psize = chunksize(p);
815 /* Can unmap if first chunk holds entire segment and not pinned */
816 if (!cinuse(p) && (char *)p + psize >= base + size - TOP_FOOT_SIZE) {
817 tchunkptr tp = (tchunkptr)p;
818 if (p == m->dv) {
819 m->dv = 0;
820 m->dvsize = 0;
821 } else {
822 unlink_large_chunk(m, tp);
823 }
824 if (CALL_MUNMAP(base, size) == 0) {
825 released += size;
826 /* unlink obsoleted record */
827 sp = pred;
828 sp->next = next;
829 } else { /* back out if cannot unmap */
830 insert_large_chunk(m, tp, psize);
831 }
832 }
833 }
834 pred = sp;
835 sp = next;
836 }
837 /* Reset check counter */
838 m->release_checks = nsegs > MAX_RELEASE_CHECK_RATE ?
839 nsegs : MAX_RELEASE_CHECK_RATE;
840 return released;
841}
842
843static int alloc_trim(mstate m, size_t pad)
844{
845 size_t released = 0;
846 if (pad < MAX_REQUEST && is_initialized(m)) {
847 pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */
848
849 if (m->topsize > pad) {
850 /* Shrink top space in granularity-size units, keeping at least one */
851 size_t unit = DEFAULT_GRANULARITY;
852 size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit -
853 SIZE_T_ONE) * unit;
854 msegmentptr sp = segment_holding(m, (char *)m->top);
855
856 if (sp->size >= extra &&
857 !has_segment_link(m, sp)) { /* can't shrink if pinned */
858 size_t newsize = sp->size - extra;
859 /* Prefer mremap, fall back to munmap */
860 if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) ||
861 (CALL_MUNMAP(sp->base + newsize, extra) == 0)) {
862 released = extra;
863 }
864 }
865
866 if (released != 0) {
867 sp->size -= released;
868 init_top(m, m->top, m->topsize - released);
869 }
870 }
871
872 /* Unmap any unused mmapped segments */
873 released += release_unused_segments(m);
874
875 /* On failure, disable autotrim to avoid repeated failed future calls */
876 if (released == 0 && m->topsize > m->trim_check)
877 m->trim_check = MAX_SIZE_T;
878 }
879
880 return (released != 0)? 1 : 0;
881}
882
883/* ---------------------------- malloc support --------------------------- */
884
885/* allocate a large request from the best fitting chunk in a treebin */
886static void *tmalloc_large(mstate m, size_t nb)
887{
888 tchunkptr v = 0;
889 size_t rsize = ~nb+1; /* Unsigned negation */
890 tchunkptr t;
891 bindex_t idx;
892 compute_tree_index(nb, idx);
893
894 if ((t = *treebin_at(m, idx)) != 0) {
895 /* Traverse tree for this bin looking for node with size == nb */
896 size_t sizebits = nb << leftshift_for_tree_index(idx);
897 tchunkptr rst = 0; /* The deepest untaken right subtree */
898 for (;;) {
899 tchunkptr rt;
900 size_t trem = chunksize(t) - nb;
901 if (trem < rsize) {
902 v = t;
903 if ((rsize = trem) == 0)
904 break;
905 }
906 rt = t->child[1];
907 t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
908 if (rt != 0 && rt != t)
909 rst = rt;
910 if (t == 0) {
911 t = rst; /* set t to least subtree holding sizes > nb */
912 break;
913 }
914 sizebits <<= 1;
915 }
916 }
917
918 if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */
919 binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap;
920 if (leftbits != 0)
921 t = *treebin_at(m, lj_ffs(leftbits));
922 }
923
924 while (t != 0) { /* find smallest of tree or subtree */
925 size_t trem = chunksize(t) - nb;
926 if (trem < rsize) {
927 rsize = trem;
928 v = t;
929 }
930 t = leftmost_child(t);
931 }
932
933 /* If dv is a better fit, return NULL so malloc will use it */
934 if (v != 0 && rsize < (size_t)(m->dvsize - nb)) {
935 mchunkptr r = chunk_plus_offset(v, nb);
936 unlink_large_chunk(m, v);
937 if (rsize < MIN_CHUNK_SIZE) {
938 set_inuse_and_pinuse(m, v, (rsize + nb));
939 } else {
940 set_size_and_pinuse_of_inuse_chunk(m, v, nb);
941 set_size_and_pinuse_of_free_chunk(r, rsize);
942 insert_chunk(m, r, rsize);
943 }
944 return chunk2mem(v);
945 }
946 return NULL;
947}
948
949/* allocate a small request from the best fitting chunk in a treebin */
950static void *tmalloc_small(mstate m, size_t nb)
951{
952 tchunkptr t, v;
953 mchunkptr r;
954 size_t rsize;
955 bindex_t i = lj_ffs(m->treemap);
956
957 v = t = *treebin_at(m, i);
958 rsize = chunksize(t) - nb;
959
960 while ((t = leftmost_child(t)) != 0) {
961 size_t trem = chunksize(t) - nb;
962 if (trem < rsize) {
963 rsize = trem;
964 v = t;
965 }
966 }
967
968 r = chunk_plus_offset(v, nb);
969 unlink_large_chunk(m, v);
970 if (rsize < MIN_CHUNK_SIZE) {
971 set_inuse_and_pinuse(m, v, (rsize + nb));
972 } else {
973 set_size_and_pinuse_of_inuse_chunk(m, v, nb);
974 set_size_and_pinuse_of_free_chunk(r, rsize);
975 replace_dv(m, r, rsize);
976 }
977 return chunk2mem(v);
978}
979
980/* ----------------------------------------------------------------------- */
981
982void *lj_alloc_create(void)
983{
984 size_t tsize = DEFAULT_GRANULARITY;
985 char *tbase = (char *)(CALL_MMAP(tsize));
986 if (tbase != CMFAIL) {
987 size_t msize = pad_request(sizeof(struct malloc_state));
988 mchunkptr mn;
989 mchunkptr msp = align_as_chunk(tbase);
990 mstate m = (mstate)(chunk2mem(msp));
991 memset(m, 0, msize);
992 msp->head = (msize|PINUSE_BIT|CINUSE_BIT);
993 m->seg.base = tbase;
994 m->seg.size = tsize;
995 m->release_checks = MAX_RELEASE_CHECK_RATE;
996 init_bins(m);
997 mn = next_chunk(mem2chunk(m));
998 init_top(m, mn, (size_t)((tbase + tsize) - (char *)mn) - TOP_FOOT_SIZE);
999 return m;
1000 }
1001 return NULL;
1002}
1003
1004void lj_alloc_destroy(void *msp)
1005{
1006 mstate ms = (mstate)msp;
1007 msegmentptr sp = &ms->seg;
1008 while (sp != 0) {
1009 char *base = sp->base;
1010 size_t size = sp->size;
1011 sp = sp->next;
1012 CALL_MUNMAP(base, size);
1013 }
1014}
1015
1016static LJ_NOINLINE void *lj_alloc_malloc(void *msp, size_t nsize)
1017{
1018 mstate ms = (mstate)msp;
1019 void *mem;
1020 size_t nb;
1021 if (nsize <= MAX_SMALL_REQUEST) {
1022 bindex_t idx;
1023 binmap_t smallbits;
1024 nb = (nsize < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(nsize);
1025 idx = small_index(nb);
1026 smallbits = ms->smallmap >> idx;
1027
1028 if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
1029 mchunkptr b, p;
1030 idx += ~smallbits & 1; /* Uses next bin if idx empty */
1031 b = smallbin_at(ms, idx);
1032 p = b->fd;
1033 unlink_first_small_chunk(ms, b, p, idx);
1034 set_inuse_and_pinuse(ms, p, small_index2size(idx));
1035 mem = chunk2mem(p);
1036 return mem;
1037 } else if (nb > ms->dvsize) {
1038 if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
1039 mchunkptr b, p, r;
1040 size_t rsize;
1041 binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
1042 bindex_t i = lj_ffs(leftbits);
1043 b = smallbin_at(ms, i);
1044 p = b->fd;
1045 unlink_first_small_chunk(ms, b, p, i);
1046 rsize = small_index2size(i) - nb;
1047 /* Fit here cannot be remainderless if 4byte sizes */
1048 if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) {
1049 set_inuse_and_pinuse(ms, p, small_index2size(i));
1050 } else {
1051 set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
1052 r = chunk_plus_offset(p, nb);
1053 set_size_and_pinuse_of_free_chunk(r, rsize);
1054 replace_dv(ms, r, rsize);
1055 }
1056 mem = chunk2mem(p);
1057 return mem;
1058 } else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) {
1059 return mem;
1060 }
1061 }
1062 } else if (nsize >= MAX_REQUEST) {
1063 nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
1064 } else {
1065 nb = pad_request(nsize);
1066 if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) {
1067 return mem;
1068 }
1069 }
1070
1071 if (nb <= ms->dvsize) {
1072 size_t rsize = ms->dvsize - nb;
1073 mchunkptr p = ms->dv;
1074 if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
1075 mchunkptr r = ms->dv = chunk_plus_offset(p, nb);
1076 ms->dvsize = rsize;
1077 set_size_and_pinuse_of_free_chunk(r, rsize);
1078 set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
1079 } else { /* exhaust dv */
1080 size_t dvs = ms->dvsize;
1081 ms->dvsize = 0;
1082 ms->dv = 0;
1083 set_inuse_and_pinuse(ms, p, dvs);
1084 }
1085 mem = chunk2mem(p);
1086 return mem;
1087 } else if (nb < ms->topsize) { /* Split top */
1088 size_t rsize = ms->topsize -= nb;
1089 mchunkptr p = ms->top;
1090 mchunkptr r = ms->top = chunk_plus_offset(p, nb);
1091 r->head = rsize | PINUSE_BIT;
1092 set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
1093 mem = chunk2mem(p);
1094 return mem;
1095 }
1096 return alloc_sys(ms, nb);
1097}
1098
1099static LJ_NOINLINE void *lj_alloc_free(void *msp, void *ptr)
1100{
1101 if (ptr != 0) {
1102 mchunkptr p = mem2chunk(ptr);
1103 mstate fm = (mstate)msp;
1104 size_t psize = chunksize(p);
1105 mchunkptr next = chunk_plus_offset(p, psize);
1106 if (!pinuse(p)) {
1107 size_t prevsize = p->prev_foot;
1108 if ((prevsize & IS_DIRECT_BIT) != 0) {
1109 prevsize &= ~IS_DIRECT_BIT;
1110 psize += prevsize + DIRECT_FOOT_PAD;
1111 CALL_MUNMAP((char *)p - prevsize, psize);
1112 return NULL;
1113 } else {
1114 mchunkptr prev = chunk_minus_offset(p, prevsize);
1115 psize += prevsize;
1116 p = prev;
1117 /* consolidate backward */
1118 if (p != fm->dv) {
1119 unlink_chunk(fm, p, prevsize);
1120 } else if ((next->head & INUSE_BITS) == INUSE_BITS) {
1121 fm->dvsize = psize;
1122 set_free_with_pinuse(p, psize, next);
1123 return NULL;
1124 }
1125 }
1126 }
1127 if (!cinuse(next)) { /* consolidate forward */
1128 if (next == fm->top) {
1129 size_t tsize = fm->topsize += psize;
1130 fm->top = p;
1131 p->head = tsize | PINUSE_BIT;
1132 if (p == fm->dv) {
1133 fm->dv = 0;
1134 fm->dvsize = 0;
1135 }
1136 if (tsize > fm->trim_check)
1137 alloc_trim(fm, 0);
1138 return NULL;
1139 } else if (next == fm->dv) {
1140 size_t dsize = fm->dvsize += psize;
1141 fm->dv = p;
1142 set_size_and_pinuse_of_free_chunk(p, dsize);
1143 return NULL;
1144 } else {
1145 size_t nsize = chunksize(next);
1146 psize += nsize;
1147 unlink_chunk(fm, next, nsize);
1148 set_size_and_pinuse_of_free_chunk(p, psize);
1149 if (p == fm->dv) {
1150 fm->dvsize = psize;
1151 return NULL;
1152 }
1153 }
1154 } else {
1155 set_free_with_pinuse(p, psize, next);
1156 }
1157
1158 if (is_small(psize)) {
1159 insert_small_chunk(fm, p, psize);
1160 } else {
1161 tchunkptr tp = (tchunkptr)p;
1162 insert_large_chunk(fm, tp, psize);
1163 if (--fm->release_checks == 0)
1164 release_unused_segments(fm);
1165 }
1166 }
1167 return NULL;
1168}
1169
1170static LJ_NOINLINE void *lj_alloc_realloc(void *msp, void *ptr, size_t nsize)
1171{
1172 if (nsize >= MAX_REQUEST) {
1173 return NULL;
1174 } else {
1175 mstate m = (mstate)msp;
1176 mchunkptr oldp = mem2chunk(ptr);
1177 size_t oldsize = chunksize(oldp);
1178 mchunkptr next = chunk_plus_offset(oldp, oldsize);
1179 mchunkptr newp = 0;
1180 size_t nb = request2size(nsize);
1181
1182 /* Try to either shrink or extend into top. Else malloc-copy-free */
1183 if (is_direct(oldp)) {
1184 newp = direct_resize(oldp, nb); /* this may return NULL. */
1185 } else if (oldsize >= nb) { /* already big enough */
1186 size_t rsize = oldsize - nb;
1187 newp = oldp;
1188 if (rsize >= MIN_CHUNK_SIZE) {
1189 mchunkptr remainder = chunk_plus_offset(newp, nb);
1190 set_inuse(m, newp, nb);
1191 set_inuse(m, remainder, rsize);
1192 lj_alloc_free(m, chunk2mem(remainder));
1193 }
1194 } else if (next == m->top && oldsize + m->topsize > nb) {
1195 /* Expand into top */
1196 size_t newsize = oldsize + m->topsize;
1197 size_t newtopsize = newsize - nb;
1198 mchunkptr newtop = chunk_plus_offset(oldp, nb);
1199 set_inuse(m, oldp, nb);
1200 newtop->head = newtopsize |PINUSE_BIT;
1201 m->top = newtop;
1202 m->topsize = newtopsize;
1203 newp = oldp;
1204 }
1205
1206 if (newp != 0) {
1207 return chunk2mem(newp);
1208 } else {
1209 void *newmem = lj_alloc_malloc(m, nsize);
1210 if (newmem != 0) {
1211 size_t oc = oldsize - overhead_for(oldp);
1212 memcpy(newmem, ptr, oc < nsize ? oc : nsize);
1213 lj_alloc_free(m, ptr);
1214 }
1215 return newmem;
1216 }
1217 }
1218}
1219
1220void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize)
1221{
1222 (void)osize;
1223 if (nsize == 0) {
1224 return lj_alloc_free(msp, ptr);
1225 } else if (ptr == NULL) {
1226 return lj_alloc_malloc(msp, nsize);
1227 } else {
1228 return lj_alloc_realloc(msp, ptr, nsize);
1229 }
1230}
1231
1232#endif
diff --git a/src/lj_alloc.h b/src/lj_alloc.h
new file mode 100644
index 00000000..f87a7cf3
--- /dev/null
+++ b/src/lj_alloc.h
@@ -0,0 +1,17 @@
1/*
2** Bundled memory allocator.
3** Donated to the public domain.
4*/
5
6#ifndef _LJ_ALLOC_H
7#define _LJ_ALLOC_H
8
9#include "lj_def.h"
10
11#ifndef LUAJIT_USE_SYSMALLOC
12LJ_FUNC void *lj_alloc_create(void);
13LJ_FUNC void lj_alloc_destroy(void *msp);
14LJ_FUNC void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize);
15#endif
16
17#endif
diff --git a/src/lj_api.c b/src/lj_api.c
new file mode 100644
index 00000000..ea4eaf66
--- /dev/null
+++ b/src/lj_api.c
@@ -0,0 +1,1046 @@
1/*
2** Public Lua/C API.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Major portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#define lj_api_c
10#define LUA_CORE
11
12#include "lj_obj.h"
13#include "lj_gc.h"
14#include "lj_err.h"
15#include "lj_str.h"
16#include "lj_tab.h"
17#include "lj_func.h"
18#include "lj_udata.h"
19#include "lj_meta.h"
20#include "lj_state.h"
21#include "lj_frame.h"
22#include "lj_trace.h"
23#include "lj_vm.h"
24#include "lj_lex.h"
25#include "lj_parse.h"
26
27/* -- Common helper functions --------------------------------------------- */
28
29#define api_checknelems(L, n) api_check(L, (n) <= (L->top - L->base))
30#define api_checkvalidindex(L, i) api_check(L, (i) != niltv(L))
31
32static TValue *index2adr(lua_State *L, int idx)
33{
34 if (idx > 0) {
35 TValue *o = L->base + (idx - 1);
36 return o < L->top ? o : niltv(L);
37 } else if (idx > LUA_REGISTRYINDEX) {
38 api_check(L, idx != 0 && -idx <= L->top - L->base);
39 return L->top + idx;
40 } else if (idx == LUA_GLOBALSINDEX) {
41 TValue *o = &G(L)->tmptv;
42 settabV(L, o, tabref(L->env));
43 return o;
44 } else if (idx == LUA_REGISTRYINDEX) {
45 return registry(L);
46 } else {
47 GCfunc *fn = curr_func(L);
48 api_check(L, fn->c.gct == ~LJ_TFUNC && !isluafunc(fn));
49 if (idx == LUA_ENVIRONINDEX) {
50 TValue *o = &G(L)->tmptv;
51 settabV(L, o, tabref(fn->c.env));
52 return o;
53 } else {
54 idx = LUA_GLOBALSINDEX - idx;
55 return idx <= fn->c.nupvalues ? &fn->c.upvalue[idx-1] : niltv(L);
56 }
57 }
58}
59
60static TValue *stkindex2adr(lua_State *L, int idx)
61{
62 if (idx > 0) {
63 TValue *o = L->base + (idx - 1);
64 return o < L->top ? o : niltv(L);
65 } else {
66 api_check(L, idx != 0 && -idx <= L->top - L->base);
67 return L->top + idx;
68 }
69}
70
71static GCtab *getcurrenv(lua_State *L)
72{
73 GCfunc *fn = curr_func(L);
74 return fn->c.gct == ~LJ_TFUNC ? tabref(fn->c.env) : tabref(L->env);
75}
76
77/* -- Miscellaneous API functions ----------------------------------------- */
78
79LUA_API int lua_status(lua_State *L)
80{
81 return L->status;
82}
83
84LUA_API int lua_checkstack(lua_State *L, int size)
85{
86 if (size > LUAI_MAXCSTACK || (L->top - L->base + size) > LUAI_MAXCSTACK) {
87 return 0; /* Stack overflow. */
88 } else if (size > 0) {
89 lj_state_checkstack(L, (MSize)size);
90 }
91 return 1;
92}
93
94LUA_API void lua_xmove(lua_State *from, lua_State *to, int n)
95{
96 TValue *f, *t;
97 if (from == to) return;
98 api_checknelems(from, n);
99 api_check(from, G(from) == G(to));
100 lj_state_checkstack(to, (MSize)n);
101 f = from->top;
102 t = to->top = to->top + n;
103 while (--n >= 0) copyTV(to, --t, --f);
104 from->top = f;
105}
106
107/* -- Stack manipulation -------------------------------------------------- */
108
109LUA_API int lua_gettop(lua_State *L)
110{
111 return cast_int(L->top - L->base);
112}
113
114LUA_API void lua_settop(lua_State *L, int idx)
115{
116 if (idx >= 0) {
117 api_check(L, idx <= L->maxstack - L->base);
118 if (L->base + idx > L->top) {
119 if (L->base + idx >= L->maxstack)
120 lj_state_growstack(L, (MSize)idx - (MSize)(L->top - L->base));
121 do { setnilV(L->top++); } while (L->top < L->base + idx);
122 } else {
123 L->top = L->base + idx;
124 }
125 } else {
126 api_check(L, -(idx+1) <= (L->top - L->base));
127 L->top += idx+1; /* Shrinks top (idx < 0). */
128 }
129}
130
131LUA_API void lua_remove(lua_State *L, int idx)
132{
133 TValue *p = stkindex2adr(L, idx);
134 api_checkvalidindex(L, p);
135 while (++p < L->top) copyTV(L, p-1, p);
136 L->top--;
137}
138
139LUA_API void lua_insert(lua_State *L, int idx)
140{
141 TValue *q, *p = stkindex2adr(L, idx);
142 api_checkvalidindex(L, p);
143 for (q = L->top; q > p; q--) copyTV(L, q, q-1);
144 copyTV(L, p, L->top);
145}
146
147LUA_API void lua_replace(lua_State *L, int idx)
148{
149 api_checknelems(L, 1);
150 if (idx == LUA_GLOBALSINDEX) {
151 api_check(L, tvistab(L->top-1));
152 /* NOBARRIER: A thread (i.e. L) is never black. */
153 setgcref(L->env, obj2gco(tabV(L->top-1)));
154 } else if (idx == LUA_ENVIRONINDEX) {
155 GCfunc *fn = curr_func(L);
156 if (fn->c.gct != ~LJ_TFUNC)
157 lj_err_msg(L, LJ_ERR_NOENV);
158 api_check(L, tvistab(L->top-1));
159 setgcref(fn->c.env, obj2gco(tabV(L->top-1)));
160 lj_gc_barrier(L, fn, L->top-1);
161 } else {
162 TValue *o = index2adr(L, idx);
163 api_checkvalidindex(L, o);
164 copyTV(L, o, L->top-1);
165 if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */
166 lj_gc_barrier(L, curr_func(L), L->top-1);
167 }
168 L->top--;
169}
170
171LUA_API void lua_pushvalue(lua_State *L, int idx)
172{
173 copyTV(L, L->top, index2adr(L, idx));
174 incr_top(L);
175}
176
177/* -- Stack getters ------------------------------------------------------- */
178
179LUA_API int lua_type(lua_State *L, int idx)
180{
181 cTValue *o = index2adr(L, idx);
182 if (tvisnum(o)) {
183 return LUA_TNUMBER;
184#if LJ_64
185 } else if (tvislightud(o)) {
186 return LUA_TLIGHTUSERDATA;
187#endif
188 } else if (o == niltv(L)) {
189 return LUA_TNONE;
190 } else { /* Magic internal/external tag conversion. ORDER LJ_T */
191 int t = ~itype(o);
192 return (int)(((t < 8 ? 0x98a42110 : 0x75b6) >> 4*(t&7)) & 15u);
193 }
194}
195
196LUA_API const char *lua_typename(lua_State *L, int t)
197{
198 UNUSED(L);
199 return lj_obj_typename[t+1];
200}
201
202LUA_API int lua_iscfunction(lua_State *L, int idx)
203{
204 cTValue *o = index2adr(L, idx);
205 return !isluafunc(funcV(o));
206}
207
208LUA_API int lua_isnumber(lua_State *L, int idx)
209{
210 cTValue *o = index2adr(L, idx);
211 TValue tmp;
212 return (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)));
213}
214
215LUA_API int lua_isstring(lua_State *L, int idx)
216{
217 cTValue *o = index2adr(L, idx);
218 return (tvisstr(o) || tvisnum(o));
219}
220
221LUA_API int lua_isuserdata(lua_State *L, int idx)
222{
223 cTValue *o = index2adr(L, idx);
224 return (tvisudata(o) || tvislightud(o));
225}
226
227LUA_API int lua_rawequal(lua_State *L, int idx1, int idx2)
228{
229 cTValue *o1 = index2adr(L, idx1);
230 cTValue *o2 = index2adr(L, idx2);
231 return (o1 == niltv(L) || o2 == niltv(L)) ? 0 : lj_obj_equal(o1, o2);
232}
233
234LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
235{
236 cTValue *o1 = index2adr(L, idx1);
237 cTValue *o2 = index2adr(L, idx2);
238 if (tvisnum(o1) && tvisnum(o2)) {
239 return numV(o1) == numV(o2);
240 } else if (itype(o1) != itype(o2)) {
241 return 0;
242 } else if (tvispri(o1)) {
243 return o1 != niltv(L) && o2 != niltv(L);
244#if LJ_64
245 } else if (tvislightud(o1)) {
246 return o1->u64 == o2->u64;
247#endif
248 } else if (gcrefeq(o1->gcr, o2->gcr)) {
249 return 1;
250 } else if (!tvistabud(o1)) {
251 return 0;
252 } else {
253 TValue *base = lj_meta_equal(L, gcV(o1), gcV(o2), 0);
254 if ((uintptr_t)base <= 1) {
255 return (int)(uintptr_t)base;
256 } else {
257 L->top = base+2;
258 lj_vm_call(L, base, 1+1);
259 L->top -= 2;
260 return tvistruecond(L->top+1);
261 }
262 }
263}
264
265LUA_API int lua_lessthan(lua_State *L, int idx1, int idx2)
266{
267 cTValue *o1 = index2adr(L, idx1);
268 cTValue *o2 = index2adr(L, idx2);
269 if (o1 == niltv(L) || o2 == niltv(L)) {
270 return 0;
271 } else if (tvisnum(o1) && tvisnum(o2)) {
272 return numV(o1) < numV(o2);
273 } else {
274 TValue *base = lj_meta_comp(L, o1, o2, 0);
275 if ((uintptr_t)base <= 1) {
276 return (int)(uintptr_t)base;
277 } else {
278 L->top = base+2;
279 lj_vm_call(L, base, 1+1);
280 L->top -= 2;
281 return tvistruecond(L->top+1);
282 }
283 }
284}
285
286LUA_API lua_Number lua_tonumber(lua_State *L, int idx)
287{
288 cTValue *o = index2adr(L, idx);
289 TValue tmp;
290 if (LJ_LIKELY(tvisnum(o)))
291 return numV(o);
292 else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))
293 return numV(&tmp);
294 else
295 return 0;
296}
297
298LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
299{
300 cTValue *o = index2adr(L, idx);
301 TValue tmp;
302 lua_Number n;
303 if (LJ_LIKELY(tvisnum(o)))
304 n = numV(o);
305 else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))
306 n = numV(&tmp);
307 else
308 return 0;
309#if LJ_64
310 return (lua_Integer)n;
311#else
312 return lj_num2int(n);
313#endif
314}
315
316LUA_API int lua_toboolean(lua_State *L, int idx)
317{
318 cTValue *o = index2adr(L, idx);
319 return tvistruecond(o);
320}
321
322LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len)
323{
324 TValue *o = index2adr(L, idx);
325 GCstr *s;
326 if (LJ_LIKELY(tvisstr(o))) {
327 s = strV(o);
328 } else if (tvisnum(o)) {
329 lj_gc_check(L);
330 o = index2adr(L, idx); /* GC may move the stack. */
331 s = lj_str_fromnum(L, &o->n);
332 } else {
333 if (len != NULL) *len = 0;
334 return NULL;
335 }
336 if (len != NULL) *len = s->len;
337 return strdata(s);
338}
339
340LUA_API size_t lua_objlen(lua_State *L, int idx)
341{
342 TValue *o = index2adr(L, idx);
343 if (tvisstr(o))
344 return strV(o)->len;
345 else if (tvistab(o))
346 return cast(size_t, lj_tab_len(tabV(o)));
347 else if (tvisudata(o))
348 return udataV(o)->len;
349 else if (tvisnum(o))
350 return lj_str_fromnum(L, &o->n)->len;
351 else
352 return 0;
353}
354
355LUA_API lua_CFunction lua_tocfunction(lua_State *L, int idx)
356{
357 cTValue *o = index2adr(L, idx);
358 return funcV(o)->c.gate == lj_gate_c ? funcV(o)->c.f : NULL;
359}
360
361LUA_API void *lua_touserdata(lua_State *L, int idx)
362{
363 cTValue *o = index2adr(L, idx);
364 if (tvisudata(o))
365 return uddata(udataV(o));
366 else if (tvislightud(o))
367 return lightudV(o);
368 else
369 return NULL;
370}
371
372LUA_API lua_State *lua_tothread(lua_State *L, int idx)
373{
374 cTValue *o = index2adr(L, idx);
375 return (!tvisthread(o)) ? NULL : threadV(o);
376}
377
378LUA_API const void *lua_topointer(lua_State *L, int idx)
379{
380 cTValue *o = index2adr(L, idx);
381 if (tvisudata(o))
382 return uddata(udataV(o));
383 else if (tvislightud(o))
384 return lightudV(o);
385 else if (tvisgcv(o))
386 return gcV(o);
387 else
388 return NULL;
389}
390
391/* -- Stack setters (object creation) ------------------------------------- */
392
393LUA_API void lua_pushnil(lua_State *L)
394{
395 setnilV(L->top);
396 incr_top(L);
397}
398
399LUA_API void lua_pushnumber(lua_State *L, lua_Number n)
400{
401 setnumV(L->top, n);
402 if (LJ_UNLIKELY(tvisnan(L->top)))
403 setnanV(L->top); /* Canonicalize injected NaNs. */
404 incr_top(L);
405}
406
407LUA_API void lua_pushinteger(lua_State *L, lua_Integer n)
408{
409 setnumV(L->top, cast_num(n));
410 incr_top(L);
411}
412
413LUA_API void lua_pushlstring(lua_State *L, const char *str, size_t len)
414{
415 GCstr *s;
416 lj_gc_check(L);
417 s = lj_str_new(L, str, len);
418 setstrV(L, L->top, s);
419 incr_top(L);
420}
421
422LUA_API void lua_pushstring(lua_State *L, const char *str)
423{
424 if (str == NULL) {
425 setnilV(L->top);
426 } else {
427 GCstr *s;
428 lj_gc_check(L);
429 s = lj_str_newz(L, str);
430 setstrV(L, L->top, s);
431 }
432 incr_top(L);
433}
434
435LUA_API const char *lua_pushvfstring(lua_State *L, const char *fmt,
436 va_list argp)
437{
438 lj_gc_check(L);
439 return lj_str_pushvf(L, fmt, argp);
440}
441
442LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
443{
444 const char *ret;
445 va_list argp;
446 lj_gc_check(L);
447 va_start(argp, fmt);
448 ret = lj_str_pushvf(L, fmt, argp);
449 va_end(argp);
450 return ret;
451}
452
453LUA_API void lua_pushcclosure(lua_State *L, lua_CFunction f, int n)
454{
455 GCfunc *fn;
456 lj_gc_check(L);
457 api_checknelems(L, n);
458 fn = lj_func_newC(L, (MSize)n, getcurrenv(L));
459 fn->c.f = f;
460 L->top -= n;
461 while (n--)
462 copyTV(L, &fn->c.upvalue[n], L->top+n);
463 setfuncV(L, L->top, fn);
464 lua_assert(iswhite(obj2gco(fn)));
465 incr_top(L);
466}
467
468LUA_API void lua_pushboolean(lua_State *L, int b)
469{
470 setboolV(L->top, (b != 0));
471 incr_top(L);
472}
473
474LUA_API void lua_pushlightuserdata(lua_State *L, void *p)
475{
476 setlightudV(L->top, checklightudptr(L, p));
477 incr_top(L);
478}
479
480LUA_API void lua_createtable(lua_State *L, int narray, int nrec)
481{
482 GCtab *t;
483 lj_gc_check(L);
484 t = lj_tab_new(L, (uint32_t)(narray > 0 ? narray+1 : 0), hsize2hbits(nrec));
485 settabV(L, L->top, t);
486 incr_top(L);
487}
488
489LUALIB_API int luaL_newmetatable(lua_State *L, const char *tname)
490{
491 GCtab *regt = tabV(registry(L));
492 TValue *tv = lj_tab_setstr(L, regt, lj_str_newz(L, tname));
493 if (tvisnil(tv)) {
494 GCtab *mt = lj_tab_new(L, 0, 1);
495 settabV(L, tv, mt);
496 settabV(L, L->top++, mt);
497 lj_gc_objbarriert(L, regt, mt);
498 return 1;
499 } else {
500 copyTV(L, L->top++, tv);
501 return 0;
502 }
503}
504
505LUA_API int lua_pushthread(lua_State *L)
506{
507 setthreadV(L, L->top, L);
508 incr_top(L);
509 return (mainthread(G(L)) == L);
510}
511
512LUA_API lua_State *lua_newthread(lua_State *L)
513{
514 lua_State *L1;
515 lj_gc_check(L);
516 L1 = lj_state_new(L);
517 setthreadV(L, L->top, L1);
518 incr_top(L);
519 return L1;
520}
521
522LUA_API void *lua_newuserdata(lua_State *L, size_t size)
523{
524 GCudata *ud;
525 lj_gc_check(L);
526 if (size > LJ_MAX_UDATA)
527 lj_err_msg(L, LJ_ERR_UDATAOV);
528 ud = lj_udata_new(L, (MSize)size, getcurrenv(L));
529 setudataV(L, L->top, ud);
530 incr_top(L);
531 return uddata(ud);
532}
533
534LUA_API void lua_concat(lua_State *L, int n)
535{
536 api_checknelems(L, n);
537 if (n >= 2) {
538 n--;
539 do {
540 TValue *top = lj_meta_cat(L, L->top-1, n);
541 if (top == NULL) {
542 L->top -= n;
543 break;
544 }
545 n -= cast_int(L->top - top);
546 L->top = top+2;
547 lj_vm_call(L, top, 1+1);
548 L->top--;
549 copyTV(L, L->top-1, L->top);
550 } while (--n > 0);
551 } else if (n == 0) { /* Push empty string. */
552 setstrV(L, L->top, lj_str_new(L, "", 0));
553 incr_top(L);
554 }
555 /* else n == 1: nothing to do. */
556}
557
558/* -- Object getters ------------------------------------------------------ */
559
560LUA_API void lua_gettable(lua_State *L, int idx)
561{
562 cTValue *v, *t = index2adr(L, idx);
563 api_checkvalidindex(L, t);
564 v = lj_meta_tget(L, t, L->top-1);
565 if (v == NULL) {
566 L->top += 2;
567 lj_vm_call(L, L->top-2, 1+1);
568 L->top -= 2;
569 v = L->top+1;
570 }
571 copyTV(L, L->top-1, v);
572}
573
574LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
575{
576 cTValue *v, *t = index2adr(L, idx);
577 TValue key;
578 api_checkvalidindex(L, t);
579 setstrV(L, &key, lj_str_newz(L, k));
580 v = lj_meta_tget(L, t, &key);
581 if (v == NULL) {
582 L->top += 2;
583 lj_vm_call(L, L->top-2, 1+1);
584 L->top -= 2;
585 v = L->top+1;
586 }
587 copyTV(L, L->top, v);
588 incr_top(L);
589}
590
591LUA_API void lua_rawget(lua_State *L, int idx)
592{
593 cTValue *t = index2adr(L, idx);
594 api_check(L, tvistab(t));
595 copyTV(L, L->top-1, lj_tab_get(L, tabV(t), L->top-1));
596}
597
598LUA_API void lua_rawgeti(lua_State *L, int idx, int n)
599{
600 cTValue *v, *t = index2adr(L, idx);
601 api_check(L, tvistab(t));
602 v = lj_tab_getint(tabV(t), n);
603 if (v) {
604 copyTV(L, L->top, v);
605 } else {
606 setnilV(L->top);
607 }
608 incr_top(L);
609}
610
611LUA_API int lua_getmetatable(lua_State *L, int idx)
612{
613 cTValue *o = index2adr(L, idx);
614 GCtab *mt = NULL;
615 if (tvistab(o))
616 mt = tabref(tabV(o)->metatable);
617 else if (tvisudata(o))
618 mt = tabref(udataV(o)->metatable);
619 else
620 mt = tabref(G(L)->basemt[itypemap(o)]);
621 if (mt == NULL)
622 return 0;
623 settabV(L, L->top, mt);
624 incr_top(L);
625 return 1;
626}
627
628LUALIB_API int luaL_getmetafield(lua_State *L, int idx, const char *field)
629{
630 if (lua_getmetatable(L, idx)) {
631 cTValue *tv = lj_tab_getstr(tabV(L->top-1), lj_str_newz(L, field));
632 if (tv && !tvisnil(tv)) {
633 copyTV(L, L->top-1, tv);
634 return 1;
635 }
636 L->top--;
637 }
638 return 0;
639}
640
641LUA_API void lua_getfenv(lua_State *L, int idx)
642{
643 cTValue *o = index2adr(L, idx);
644 api_checkvalidindex(L, o);
645 if (tvisfunc(o)) {
646 settabV(L, L->top, tabref(funcV(o)->c.env));
647 } else if (tvisudata(o)) {
648 settabV(L, L->top, tabref(udataV(o)->env));
649 } else if (tvisthread(o)) {
650 settabV(L, L->top, tabref(threadV(o)->env));
651 } else {
652 setnilV(L->top);
653 }
654 incr_top(L);
655}
656
657LUA_API int lua_next(lua_State *L, int idx)
658{
659 cTValue *t = index2adr(L, idx);
660 int more;
661 api_check(L, tvistab(t));
662 more = lj_tab_next(L, tabV(t), L->top-1);
663 if (more) {
664 incr_top(L); /* Return new key and value slot. */
665 } else { /* End of traversal. */
666 L->top--; /* Remove key slot. */
667 }
668 return more;
669}
670
671static const char *aux_upvalue(cTValue *f, uint32_t idx, TValue **val)
672{
673 GCfunc *fn;
674 if (!tvisfunc(f)) return NULL;
675 fn = funcV(f);
676 if (isluafunc(fn)) {
677 GCproto *pt = funcproto(fn);
678 if (idx < pt->sizeuvname) {
679 *val = gcref(fn->l.uvptr[idx])->uv.v;
680 return strdata(pt->uvname[idx]);
681 }
682 } else {
683 if (idx < fn->c.nupvalues) {
684 *val = &fn->c.upvalue[idx];
685 return "";
686 }
687 }
688 return NULL;
689}
690
691LUA_API const char *lua_getupvalue(lua_State *L, int idx, int n)
692{
693 TValue *val;
694 const char *name = aux_upvalue(index2adr(L, idx), (uint32_t)(n-1), &val);
695 if (name) {
696 copyTV(L, L->top, val);
697 incr_top(L);
698 }
699 return name;
700}
701
702LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
703{
704 cTValue *o = index2adr(L, idx);
705 if (tvisudata(o)) {
706 GCudata *ud = udataV(o);
707 cTValue *tv = lj_tab_getstr(tabV(registry(L)), lj_str_newz(L, tname));
708 if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable))
709 return uddata(ud);
710 }
711 lj_err_argtype(L, idx, tname);
712 return NULL; /* unreachable */
713}
714
715/* -- Object setters ------------------------------------------------------ */
716
717LUA_API void lua_settable(lua_State *L, int idx)
718{
719 TValue *o;
720 cTValue *t = index2adr(L, idx);
721 api_checknelems(L, 2);
722 api_checkvalidindex(L, t);
723 o = lj_meta_tset(L, t, L->top-2);
724 if (o) {
725 /* NOBARRIER: lj_meta_tset ensures the table is not black. */
726 copyTV(L, o, L->top-1);
727 L->top -= 2;
728 } else {
729 L->top += 3;
730 copyTV(L, L->top-1, L->top-6);
731 lj_vm_call(L, L->top-3, 0+1);
732 L->top -= 3;
733 }
734}
735
736LUA_API void lua_setfield(lua_State *L, int idx, const char *k)
737{
738 TValue *o;
739 TValue key;
740 cTValue *t = index2adr(L, idx);
741 api_checknelems(L, 1);
742 api_checkvalidindex(L, t);
743 setstrV(L, &key, lj_str_newz(L, k));
744 o = lj_meta_tset(L, t, &key);
745 if (o) {
746 L->top--;
747 /* NOBARRIER: lj_meta_tset ensures the table is not black. */
748 copyTV(L, o, L->top);
749 } else {
750 L->top += 3;
751 copyTV(L, L->top-1, L->top-6);
752 lj_vm_call(L, L->top-3, 0+1);
753 L->top -= 2;
754 }
755}
756
757LUA_API void lua_rawset(lua_State *L, int idx)
758{
759 GCtab *t = tabV(index2adr(L, idx));
760 TValue *dst, *key;
761 api_checknelems(L, 2);
762 key = L->top-2;
763 dst = lj_tab_set(L, t, key);
764 copyTV(L, dst, key+1);
765 lj_gc_barriert(L, t, dst);
766 L->top = key;
767}
768
769LUA_API void lua_rawseti(lua_State *L, int idx, int n)
770{
771 GCtab *t = tabV(index2adr(L, idx));
772 TValue *dst, *src;
773 api_checknelems(L, 1);
774 dst = lj_tab_setint(L, t, n);
775 src = L->top-1;
776 copyTV(L, dst, src);
777 lj_gc_barriert(L, t, dst);
778 L->top = src;
779}
780
781LUA_API int lua_setmetatable(lua_State *L, int idx)
782{
783 global_State *g;
784 GCtab *mt;
785 cTValue *o = index2adr(L, idx);
786 api_checknelems(L, 1);
787 api_checkvalidindex(L, o);
788 if (tvisnil(L->top-1)) {
789 mt = NULL;
790 } else {
791 api_check(L, tvistab(L->top-1));
792 mt = tabV(L->top-1);
793 }
794 g = G(L);
795 if (tvistab(o)) {
796 setgcref(tabV(o)->metatable, obj2gco(mt));
797 if (mt)
798 lj_gc_objbarriert(L, tabV(o), mt);
799 } else if (tvisudata(o)) {
800 setgcref(udataV(o)->metatable, obj2gco(mt));
801 if (mt)
802 lj_gc_objbarrier(L, udataV(o), mt);
803 } else {
804 /* Flush cache, since traces specialize to basemt. But not during __gc. */
805 if (lj_trace_flushall(L))
806 lj_err_caller(L, LJ_ERR_NOGCMM);
807 if (tvisbool(o)) {
808 /* NOBARRIER: g->basemt[] is a GC root. */
809 setgcref(g->basemt[~LJ_TTRUE], obj2gco(mt));
810 setgcref(g->basemt[~LJ_TFALSE], obj2gco(mt));
811 } else {
812 /* NOBARRIER: g->basemt[] is a GC root. */
813 setgcref(g->basemt[itypemap(o)], obj2gco(mt));
814 }
815 }
816 L->top--;
817 return 1;
818}
819
820LUA_API int lua_setfenv(lua_State *L, int idx)
821{
822 cTValue *o = index2adr(L, idx);
823 GCtab *t;
824 api_checknelems(L, 1);
825 api_checkvalidindex(L, o);
826 api_check(L, tvistab(L->top-1));
827 t = tabV(L->top-1);
828 if (tvisfunc(o)) {
829 setgcref(funcV(o)->c.env, obj2gco(t));
830 } else if (tvisudata(o)) {
831 setgcref(udataV(o)->env, obj2gco(t));
832 } else if (tvisthread(o)) {
833 setgcref(threadV(o)->env, obj2gco(t));
834 } else {
835 L->top--;
836 return 0;
837 }
838 lj_gc_objbarrier(L, gcV(o), t);
839 L->top--;
840 return 1;
841}
842
843LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n)
844{
845 cTValue *f = index2adr(L, idx);
846 TValue *val;
847 const char *name;
848 api_checknelems(L, 1);
849 name = aux_upvalue(f, (uint32_t)(n-1), &val);
850 if (name) {
851 L->top--;
852 copyTV(L, val, L->top);
853 lj_gc_barrier(L, funcV(f), L->top);
854 }
855 return name;
856}
857
858/* -- Calls --------------------------------------------------------------- */
859
860LUA_API void lua_call(lua_State *L, int nargs, int nresults)
861{
862 api_checknelems(L, nargs+1);
863 lj_vm_call(L, L->top - nargs, nresults+1);
864}
865
866LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
867{
868 global_State *g = G(L);
869 uint8_t oldh = hook_save(g);
870 ptrdiff_t ef;
871 int status;
872 api_checknelems(L, nargs+1);
873 if (errfunc == 0) {
874 ef = 0;
875 } else {
876 cTValue *o = stkindex2adr(L, errfunc);
877 api_checkvalidindex(L, o);
878 ef = savestack(L, o);
879 }
880 status = lj_vm_pcall(L, L->top - nargs, nresults+1, ef);
881 if (status) hook_restore(g, oldh);
882 return status;
883}
884
885static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud)
886{
887 GCfunc *fn;
888 fn = lj_func_newC(L, 0, getcurrenv(L));
889 fn->c.f = func;
890 setfuncV(L, L->top, fn);
891 setlightudV(L->top+1, checklightudptr(L, ud));
892 cframe_nres(L->cframe) = 1+0; /* Zero results. */
893 L->top += 2;
894 return L->top-1; /* Now call the newly allocated C function. */
895}
896
897LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
898{
899 global_State *g = G(L);
900 uint8_t oldh = hook_save(g);
901 int status = lj_vm_cpcall(L, cpcall, func, ud);
902 if (status) hook_restore(g, oldh);
903 return status;
904}
905
906LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
907{
908 if (luaL_getmetafield(L, idx, field)) {
909 TValue *base = L->top--;
910 copyTV(L, base, index2adr(L, idx));
911 L->top = base+1;
912 lj_vm_call(L, base, 1+1);
913 return 1;
914 }
915 return 0;
916}
917
918/* -- Coroutine yield and resume ------------------------------------------ */
919
920LUA_API int lua_yield(lua_State *L, int nresults)
921{
922 void *cf = L->cframe;
923 cTValue *f;
924 if (!cframe_canyield(cf))
925 lj_err_msg(L, LJ_ERR_CYIELD);
926 f = L->top - nresults;
927 if (f > L->base) {
928 TValue *t = L->base;
929 while (--nresults >= 0) copyTV(L, t++, f++);
930 L->top = t;
931 }
932 L->cframe = NULL;
933 L->status = LUA_YIELD;
934 lj_vm_unwind_c(cf, LUA_YIELD);
935 return -1; /* unreachable */
936}
937
938LUA_API int lua_resume(lua_State *L, int nargs)
939{
940 if (L->cframe == NULL && L->status <= LUA_YIELD)
941 return lj_vm_resume(L, L->top - nargs, 0, 0);
942 L->top = L->base;
943 setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP));
944 incr_top(L);
945 return LUA_ERRRUN;
946}
947
948/* -- Load and dump Lua code ---------------------------------------------- */
949
950static TValue *cpparser(lua_State *L, lua_CFunction dummy, void *ud)
951{
952 LexState *ls = cast(LexState *, ud);
953 GCfunc *fn;
954 UNUSED(dummy);
955 cframe_errfunc(L->cframe) = -1; /* Inherit error function. */
956 lj_lex_start(L, ls);
957 fn = lj_func_newL(L, lj_parse(ls), tabref(L->env));
958 /* Parser may realloc stack. Don't combine above/below into one statement. */
959 setfuncV(L, L->top++, fn);
960 return NULL;
961}
962
963LUA_API int lua_load(lua_State *L, lua_Reader reader, void *data,
964 const char *chunkname)
965{
966 LexState ls;
967 int status;
968 global_State *g;
969 ls.rfunc = reader;
970 ls.rdata = data;
971 ls.chunkarg = chunkname ? chunkname : "?";
972 lj_str_initbuf(L, &ls.sb);
973 status = lj_vm_cpcall(L, cpparser, NULL, &ls);
974 g = G(L);
975 lj_str_freebuf(g, &ls.sb);
976 lj_gc_check(L);
977 return status;
978}
979
980LUA_API int lua_dump(lua_State *L, lua_Writer writer, void *data)
981{
982 api_checknelems(L, 1);
983 UNUSED(L); UNUSED(writer); UNUSED(data);
984 return 1; /* Error, not supported. */
985}
986
987/* -- GC and memory management -------------------------------------------- */
988
989LUA_API int lua_gc(lua_State *L, int what, int data)
990{
991 global_State *g = G(L);
992 int res = 0;
993 switch (what) {
994 case LUA_GCSTOP:
995 g->gc.threshold = LJ_MAX_MEM;
996 break;
997 case LUA_GCRESTART:
998 g->gc.threshold = g->gc.total;
999 break;
1000 case LUA_GCCOLLECT:
1001 lj_gc_fullgc(L);
1002 break;
1003 case LUA_GCCOUNT:
1004 res = cast_int(g->gc.total >> 10);
1005 break;
1006 case LUA_GCCOUNTB:
1007 res = cast_int(g->gc.total & 0x3ff);
1008 break;
1009 case LUA_GCSTEP: {
1010 MSize a = (MSize)data << 10;
1011 g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0;
1012 while (g->gc.total >= g->gc.threshold)
1013 if (lj_gc_step(L)) {
1014 res = 1;
1015 break;
1016 }
1017 break;
1018 }
1019 case LUA_GCSETPAUSE:
1020 res = cast_int(g->gc.pause);
1021 g->gc.pause = (MSize)data;
1022 break;
1023 case LUA_GCSETSTEPMUL:
1024 res = cast_int(g->gc.stepmul);
1025 g->gc.stepmul = (MSize)data;
1026 break;
1027 default:
1028 res = -1; /* Invalid option. */
1029 }
1030 return res;
1031}
1032
1033LUA_API lua_Alloc lua_getallocf(lua_State *L, void **ud)
1034{
1035 global_State *g = G(L);
1036 if (ud) *ud = g->allocd;
1037 return g->allocf;
1038}
1039
1040LUA_API void lua_setallocf(lua_State *L, lua_Alloc f, void *ud)
1041{
1042 global_State *g = G(L);
1043 g->allocd = ud;
1044 g->allocf = f;
1045}
1046
diff --git a/src/lj_arch.h b/src/lj_arch.h
new file mode 100644
index 00000000..abdb5af9
--- /dev/null
+++ b/src/lj_arch.h
@@ -0,0 +1,88 @@
1/*
2** Target architecture selection.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_ARCH_H
7#define _LJ_ARCH_H
8
9#include "lua.h"
10
11
12/* Target endianess. */
13#define LUAJIT_LE 0
14#define LUAJIT_BE 1
15
16/* Target architectures. */
17#define LUAJIT_ARCH_X86 1
18#define LUAJIT_ARCH_x86 1
19#define LUAJIT_ARCH_X64 2
20#define LUAJIT_ARCH_x64 2
21
22
23/* Select native target if no target defined. */
24#ifndef LUAJIT_TARGET
25
26#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
27#define LUAJIT_TARGET LUAJIT_ARCH_X86
28#elif defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
29#define LUAJIT_TARGET LUAJIT_ARCH_X64
30#else
31#error "No support for this architecture (yet)"
32#endif
33
34#endif
35
36/* Set target properties. */
37#if LUAJIT_TARGET == LUAJIT_ARCH_X86
38#define LJ_ARCH_NAME "x86"
39#define LJ_ARCH_BITS 32
40#define LJ_ARCH_ENDIAN LUAJIT_LE
41#define LJ_TARGET_X86 1
42#define LJ_TARGET_X86ORX64 1
43#define LJ_PAGESIZE 4096
44#elif LUAJIT_TARGET == LUAJIT_ARCH_X64
45#define LJ_ARCH_NAME "x64"
46#define LJ_ARCH_BITS 64
47#define LJ_ARCH_ENDIAN LUAJIT_LE
48#define LJ_TARGET_X64 1
49#define LJ_TARGET_X86ORX64 1
50#define LJ_PAGESIZE 4096
51#error "No support for x64 architecture (yet)"
52#else
53#error "No target architecture defined"
54#endif
55
56/* Disable or enable the JIT compiler. */
57#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT)
58#define LJ_HASJIT 0
59#else
60#define LJ_HASJIT 1
61#endif
62
63#if LJ_ARCH_ENDIAN == LUAJIT_BE
64#define LJ_ENDIAN_SELECT(le, be) be
65#define LJ_ENDIAN_LOHI(lo, hi) hi lo
66#else
67#define LJ_ENDIAN_SELECT(le, be) le
68#define LJ_ENDIAN_LOHI(lo, hi) lo hi
69#endif
70
71#if LJ_ARCH_BITS == 32
72#define LJ_32 1
73#define LJ_64 0
74#elif LJ_ARCH_BITS == 64
75#define LJ_32 0
76#define LJ_64 1
77#else
78#error "Bad LJ_ARCH_BITS setting"
79#endif
80
81/* Whether target CPU masks the shift count by the operand length or not. */
82#if LJ_TARGET_X86ORX64
83#define LJ_TARGET_MASKEDSHIFT 1
84#else
85#define LJ_TARGET_MASKEDSHIFT 0
86#endif
87
88#endif
diff --git a/src/lj_asm.c b/src/lj_asm.c
new file mode 100644
index 00000000..b89b8543
--- /dev/null
+++ b/src/lj_asm.c
@@ -0,0 +1,3324 @@
1/*
2** IR assembler (SSA IR -> machine code).
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_asm_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASJIT
12
13#include "lj_gc.h"
14#include "lj_str.h"
15#include "lj_tab.h"
16#include "lj_ir.h"
17#include "lj_jit.h"
18#include "lj_iropt.h"
19#include "lj_mcode.h"
20#include "lj_iropt.h"
21#include "lj_trace.h"
22#include "lj_snap.h"
23#include "lj_asm.h"
24#include "lj_dispatch.h"
25#include "lj_vm.h"
26#include "lj_target.h"
27
28/* -- Assembler state and common macros ----------------------------------- */
29
30/* Assembler state. */
31typedef struct ASMState {
32 RegCost cost[RID_MAX]; /* Reference and blended allocation cost for regs. */
33
34 MCode *mcp; /* Current MCode pointer (grows down). */
35 MCode *mclim; /* Lower limit for MCode memory + red zone. */
36
37 IRIns *ir; /* Copy of pointer to IR instructions/constants. */
38 jit_State *J; /* JIT compiler state. */
39
40 x86ModRM mrm; /* Fused x86 address operand. */
41
42 RegSet freeset; /* Set of free registers. */
43 RegSet modset; /* Set of registers modified inside the loop. */
44 RegSet phiset; /* Set of PHI registers. */
45
46 uint32_t flags; /* Copy of JIT compiler flags. */
47 int loopinv; /* Loop branch inversion (0:no, 1:yes, 2:yes+CC_P). */
48
49 int32_t evenspill; /* Next even spill slot. */
50 int32_t oddspill; /* Next odd spill slot (or 0). */
51
52 IRRef curins; /* Reference of current instruction. */
53 IRRef stopins; /* Stop assembly before hitting this instruction. */
54 IRRef orignins; /* Original T->nins. */
55
56 IRRef snapref; /* Current snapshot is active after this reference. */
57 IRRef snaprename; /* Rename highwater mark for snapshot check. */
58 SnapNo snapno; /* Current snapshot number. */
59 SnapNo loopsnapno; /* Loop snapshot number. */
60
61 Trace *T; /* Trace to assemble. */
62 Trace *parent; /* Parent trace (or NULL). */
63
64 IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */
65 IRRef sectref; /* Section base reference (loopref or 0). */
66 IRRef loopref; /* Reference of LOOP instruction (or 0). */
67
68 BCReg topslot; /* Number of slots for stack check (unless 0). */
69 MSize gcsteps; /* Accumulated number of GC steps (per section). */
70
71 MCode *mcbot; /* Bottom of reserved MCode. */
72 MCode *mctop; /* Top of generated MCode. */
73 MCode *mcloop; /* Pointer to loop MCode (or NULL). */
74 MCode *invmcp; /* Points to invertible loop branch (or NULL). */
75 MCode *testmcp; /* Pending opportunity to remove test r,r. */
76 MCode *realign; /* Realign loop if not NULL. */
77
78 IRRef1 phireg[RID_MAX]; /* PHI register references. */
79 uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent slot to RegSP map. */
80} ASMState;
81
82#define IR(ref) (&as->ir[(ref)])
83
84/* Check for variant to invariant references. */
85#define iscrossref(as, ref) ((ref) < as->sectref)
86
87/* Inhibit memory op fusion from variant to invariant references. */
88#define FUSE_DISABLED (~(IRRef)0)
89#define mayfuse(as, ref) ((ref) > as->fuseref)
90#define neverfuse(as) (as->fuseref == FUSE_DISABLED)
91#define opisfusableload(o) \
92 ((o) == IR_ALOAD || (o) == IR_HLOAD || (o) == IR_ULOAD || \
93 (o) == IR_FLOAD || (o) == IR_SLOAD || (o) == IR_XLOAD)
94
95/* Instruction selection for XMM moves. */
96#define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS)
97#define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD)
98
99/* Sparse limit checks using a red zone before the actual limit. */
100#define MCLIM_REDZONE 64
101#define checkmclim(as) \
102 if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as)
103
104static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as)
105{
106 lj_mcode_limiterr(as->J, (size_t)(as->mctop - as->mcp + 4*MCLIM_REDZONE));
107}
108
109/* -- Emit x86 instructions ----------------------------------------------- */
110
111#define MODRM(mode, r1, r2) ((MCode)((mode)+(((r1)&7)<<3)+((r2)&7)))
112
113#if LJ_64
114#define REXRB(p, rr, rb) \
115 { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \
116 if (rex != 0x40) *--(p) = rex; }
117#define FORCE_REX 0x200
118#else
119#define REXRB(p, rr, rb) ((void)0)
120#define FORCE_REX 0
121#endif
122
123#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
124#define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4)
125
126#define emit_x87op(as, xo) \
127 (*(uint16_t *)(as->mcp-2) = (uint16_t)(xo), as->mcp -= 2)
128
129/* op */
130static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
131 MCode *p, int delta)
132{
133 int n = (int8_t)xo;
134#if defined(__GNUC__)
135 if (__builtin_constant_p(xo) && n == -2)
136 p[delta-2] = (MCode)(xo >> 24);
137 else if (__builtin_constant_p(xo) && n == -3)
138 *(uint16_t *)(p+delta-3) = (uint16_t)(xo >> 16);
139 else
140#endif
141 *(uint32_t *)(p+delta-5) = (uint32_t)xo;
142 p += n + delta;
143#if LJ_64
144 {
145 uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1);
146 if (rex != 0x40) {
147 if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); }
148 *--p = (MCode)rex;
149 }
150 }
151#else
152 UNUSED(rr); UNUSED(rb); UNUSED(rx);
153#endif
154 return p;
155}
156
157/* op + modrm */
158#define emit_opm(xo, mode, rr, rb, p, delta) \
159 (p[(delta)-1] = MODRM((mode), (rr), (rb)), \
160 emit_op((xo), (rr), (rb), 0, (p), (delta)))
161
162/* op + modrm + sib */
163#define emit_opmx(xo, mode, scale, rr, rb, rx, p) \
164 (p[-1] = MODRM((scale), (rx), (rb)), \
165 p[-2] = MODRM((mode), (rr), RID_ESP), \
166 emit_op((xo), (rr), (rb), (rx), (p), -1))
167
168/* op r1, r2 */
169static void emit_rr(ASMState *as, x86Op xo, Reg r1, Reg r2)
170{
171 MCode *p = as->mcp;
172 as->mcp = emit_opm(xo, XM_REG, r1, r2, p, 0);
173}
174
175#if LJ_64 && defined(LUA_USE_ASSERT)
176/* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */
177static int32_t ptr2addr(void *p)
178{
179 lua_assert((uintptr_t)p < (uintptr_t)0x80000000);
180 return i32ptr(p);
181}
182#else
183#define ptr2addr(p) (i32ptr((p)))
184#endif
185
186/* op r, [addr] */
187static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
188{
189 MCode *p = as->mcp;
190 *(int32_t *)(p-4) = ptr2addr(addr);
191#if LJ_64
192 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
193 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
194#else
195 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
196#endif
197}
198
199/* op r, [base+ofs] */
200static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs)
201{
202 MCode *p = as->mcp;
203 x86Mode mode;
204 if (ra_hasreg(rb)) {
205 if (ofs == 0 && (rb&7) != RID_EBP) {
206 mode = XM_OFS0;
207 } else if (checki8(ofs)) {
208 *--p = (MCode)ofs;
209 mode = XM_OFS8;
210 } else {
211 p -= 4;
212 *(int32_t *)p = ofs;
213 mode = XM_OFS32;
214 }
215 if ((rb&7) == RID_ESP)
216 *--p = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
217 } else {
218 *(int32_t *)(p-4) = ofs;
219#if LJ_64
220 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
221 p -= 5;
222 rb = RID_ESP;
223#else
224 p -= 4;
225 rb = RID_EBP;
226#endif
227 mode = XM_OFS0;
228 }
229 as->mcp = emit_opm(xo, mode, rr, rb, p, 0);
230}
231
232/* op r, [base+idx*scale+ofs] */
233static void emit_rmrxo(ASMState *as, x86Op xo, Reg rr, Reg rb, Reg rx,
234 x86Mode scale, int32_t ofs)
235{
236 MCode *p = as->mcp;
237 x86Mode mode;
238 if (ofs == 0 && (rb&7) != RID_EBP) {
239 mode = XM_OFS0;
240 } else if (checki8(ofs)) {
241 mode = XM_OFS8;
242 *--p = (MCode)ofs;
243 } else {
244 mode = XM_OFS32;
245 p -= 4;
246 *(int32_t *)p = ofs;
247 }
248 as->mcp = emit_opmx(xo, mode, scale, rr, rb, rx, p);
249}
250
251/* op r, i */
252static void emit_gri(ASMState *as, x86Group xg, Reg rb, int32_t i)
253{
254 MCode *p = as->mcp;
255 if (checki8(i)) {
256 p -= 3;
257 p[2] = (MCode)i;
258 p[0] = (MCode)(xg >> 16);
259 } else {
260 p -= 6;
261 *(int32_t *)(p+2) = i;
262 p[0] = (MCode)(xg >> 8);
263 }
264 p[1] = MODRM(XM_REG, xg, rb);
265 REXRB(p, 0, rb);
266 as->mcp = p;
267}
268
269/* op [base+ofs], i */
270static void emit_gmroi(ASMState *as, x86Group xg, Reg rb, int32_t ofs,
271 int32_t i)
272{
273 x86Op xo;
274 if (checki8(i)) {
275 emit_i8(as, i);
276 xo = (x86Op)(((xg >> 16) << 24)+0xfe);
277 } else {
278 emit_i32(as, i);
279 xo = (x86Op)(((xg >> 8) << 24)+0xfe);
280 }
281 emit_rmro(as, xo, (Reg)xg, rb, ofs);
282}
283
284#define emit_shifti(as, xg, r, i) \
285 (emit_i8(as, (i)), emit_rr(as, XO_SHIFTi, (Reg)(xg), (r)))
286
287/* op r, rm/mrm */
288static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb)
289{
290 MCode *p = as->mcp;
291 x86Mode mode = XM_REG;
292 if (rb == RID_MRM) {
293 rb = as->mrm.base;
294 if (rb == RID_NONE) {
295 rb = RID_EBP;
296 mode = XM_OFS0;
297 p -= 4;
298 *(int32_t *)p = as->mrm.ofs;
299 if (as->mrm.idx != RID_NONE)
300 goto mrmidx;
301#if LJ_64
302 *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
303 rb = RID_ESP;
304#endif
305 } else {
306 if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) {
307 mode = XM_OFS0;
308 } else if (checki8(as->mrm.ofs)) {
309 *--p = (MCode)as->mrm.ofs;
310 mode = XM_OFS8;
311 } else {
312 p -= 4;
313 *(int32_t *)p = as->mrm.ofs;
314 mode = XM_OFS32;
315 }
316 if (as->mrm.idx != RID_NONE) {
317 mrmidx:
318 as->mcp = emit_opmx(xo, mode, as->mrm.scale, rr, rb, as->mrm.idx, p);
319 return;
320 }
321 if ((rb&7) == RID_ESP)
322 *--p = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
323 }
324 }
325 as->mcp = emit_opm(xo, mode, rr, rb, p, 0);
326}
327
328static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
329{
330 if (ofs) {
331 if ((as->flags & JIT_F_LEA_AGU))
332 emit_rmro(as, XO_LEA, r, r, ofs);
333 else
334 emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs);
335 }
336}
337
338/* -- Emit moves ---------------------------------------------------------- */
339
340/* Generic move between two regs. */
341static void emit_movrr(ASMState *as, Reg r1, Reg r2)
342{
343 emit_rr(as, r1 < RID_MAX_GPR ? XO_MOV : XMM_MOVRR(as), r1, r2);
344}
345
346/* Generic move from [base+ofs]. */
347static void emit_movrmro(ASMState *as, Reg rr, Reg rb, int32_t ofs)
348{
349 emit_rmro(as, rr < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), rr, rb, ofs);
350}
351
352/* mov [base+ofs], i */
353static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
354{
355 emit_i32(as, i);
356 emit_rmro(as, XO_MOVmi, 0, base, ofs);
357}
358
359/* mov [base+ofs], r */
360#define emit_movtomro(as, r, base, ofs) \
361 emit_rmro(as, XO_MOVto, (r), (base), (ofs))
362
363/* Get/set global_State fields. */
364#define emit_opgl(as, xo, r, field) \
365 emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field)
366#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field)
367#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field)
368#define emit_setgli(as, field, i) \
369 (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, field))
370
371/* mov r, i / xor r, r */
372static void emit_loadi(ASMState *as, Reg r, int32_t i)
373{
374 if (i == 0) {
375 emit_rr(as, XO_ARITH(XOg_XOR), r, r);
376 } else {
377 MCode *p = as->mcp;
378 *(int32_t *)(p-4) = i;
379 p[-5] = (MCode)(XI_MOVri+(r&7));
380 p -= 5;
381 REXRB(p, 0, r);
382 as->mcp = p;
383 }
384}
385
386/* mov r, addr */
387#define emit_loada(as, r, addr) \
388 emit_loadi(as, (r), ptr2addr((addr)))
389
390/* movsd r, [&tv->n] / xorps r, r */
391static void emit_loadn(ASMState *as, Reg r, cTValue *tv)
392{
393 if (tvispzero(tv)) /* Use xor only for +0. */
394 emit_rr(as, XO_XORPS, r, r);
395 else
396 emit_rma(as, XMM_MOVRM(as), r, &tv->n);
397}
398
399/* -- Emit branches ------------------------------------------------------- */
400
401/* Label for short jumps. */
402typedef MCode *MCLabel;
403
404/* jcc short target */
405static void emit_sjcc(ASMState *as, int cc, MCLabel target)
406{
407 MCode *p = as->mcp;
408 p[-1] = (MCode)(int8_t)(target-p);
409 p[-2] = (MCode)(XI_JCCs+(cc&15));
410 as->mcp = p - 2;
411}
412
413/* jcc short (pending target) */
414static MCLabel emit_sjcc_label(ASMState *as, int cc)
415{
416 MCode *p = as->mcp;
417 p[-1] = 0;
418 p[-2] = (MCode)(XI_JCCs+(cc&15));
419 as->mcp = p - 2;
420 return p;
421}
422
423/* Fixup jcc short target. */
424static void emit_sfixup(ASMState *as, MCLabel source)
425{
426 source[-1] = (MCode)(as->mcp-source);
427}
428
429/* Return label pointing to current PC. */
430#define emit_label(as) ((as)->mcp)
431
432/* jcc target */
433static void emit_jcc(ASMState *as, int cc, MCode *target)
434{
435 MCode *p = as->mcp;
436 int32_t addr = (int32_t)(target - p);
437 *(int32_t *)(p-4) = addr;
438 p[-5] = (MCode)(XI_JCCn+(cc&15));
439 p[-6] = 0x0f;
440 as->mcp = p - 6;
441}
442
443/* call target */
444static void emit_call_(ASMState *as, MCode *target)
445{
446 MCode *p = as->mcp;
447 *(int32_t *)(p-4) = (int32_t)(target - p);
448 p[-5] = XI_CALL;
449 as->mcp = p - 5;
450}
451
452#define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f))
453
454/* Argument setup for C calls. Up to 3 args need no stack adjustment. */
455#define emit_setargr(as, narg, r) \
456 emit_movtomro(as, (r), RID_ESP, ((narg)-1)*4);
457#define emit_setargi(as, narg, imm) \
458 emit_movmroi(as, RID_ESP, ((narg)-1)*4, (imm))
459#define emit_setargp(as, narg, ptr) \
460 emit_setargi(as, (narg), ptr2addr((ptr)))
461
462/* -- Register allocator debugging ---------------------------------------- */
463
464/* #define LUAJIT_DEBUG_RA */
465
466#ifdef LUAJIT_DEBUG_RA
467
468#include <stdio.h>
469#include <stdarg.h>
470
471#define RIDNAME(name) #name,
472static const char *const ra_regname[] = {
473 GPRDEF(RIDNAME)
474 FPRDEF(RIDNAME)
475 "mrm",
476 NULL
477};
478#undef RIDNAME
479
480static char ra_dbg_buf[65536];
481static char *ra_dbg_p;
482static char *ra_dbg_merge;
483static MCode *ra_dbg_mcp;
484
485static void ra_dstart(void)
486{
487 ra_dbg_p = ra_dbg_buf;
488 ra_dbg_merge = NULL;
489 ra_dbg_mcp = NULL;
490}
491
492static void ra_dflush(void)
493{
494 fwrite(ra_dbg_buf, 1, (size_t)(ra_dbg_p-ra_dbg_buf), stdout);
495 ra_dstart();
496}
497
498static void ra_dprintf(ASMState *as, const char *fmt, ...)
499{
500 char *p;
501 va_list argp;
502 va_start(argp, fmt);
503 p = ra_dbg_mcp == as->mcp ? ra_dbg_merge : ra_dbg_p;
504 ra_dbg_mcp = NULL;
505 p += sprintf(p, "%08x \e[36m%04d ", (uintptr_t)as->mcp, as->curins-REF_BIAS);
506 for (;;) {
507 const char *e = strchr(fmt, '$');
508 if (e == NULL) break;
509 memcpy(p, fmt, (size_t)(e-fmt));
510 p += e-fmt;
511 if (e[1] == 'r') {
512 Reg r = va_arg(argp, Reg) & RID_MASK;
513 if (r <= RID_MAX) {
514 const char *q;
515 for (q = ra_regname[r]; *q; q++)
516 *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q;
517 } else {
518 *p++ = '?';
519 lua_assert(0);
520 }
521 } else if (e[1] == 'f' || e[1] == 'i') {
522 IRRef ref;
523 if (e[1] == 'f')
524 ref = va_arg(argp, IRRef);
525 else
526 ref = va_arg(argp, IRIns *) - as->ir;
527 if (ref >= REF_BIAS)
528 p += sprintf(p, "%04d", ref - REF_BIAS);
529 else
530 p += sprintf(p, "K%03d", REF_BIAS - ref);
531 } else if (e[1] == 's') {
532 uint32_t slot = va_arg(argp, uint32_t);
533 p += sprintf(p, "[esp+0x%x]", sps_scale(slot));
534 } else {
535 lua_assert(0);
536 }
537 fmt = e+2;
538 }
539 va_end(argp);
540 while (*fmt)
541 *p++ = *fmt++;
542 *p++ = '\e'; *p++ = '['; *p++ = 'm'; *p++ = '\n';
543 if (p > ra_dbg_buf+sizeof(ra_dbg_buf)-256) {
544 fwrite(ra_dbg_buf, 1, (size_t)(p-ra_dbg_buf), stdout);
545 p = ra_dbg_buf;
546 }
547 ra_dbg_p = p;
548}
549
550#define RA_DBG_START() ra_dstart()
551#define RA_DBG_FLUSH() ra_dflush()
552#define RA_DBG_REF() \
553 do { char *_p = ra_dbg_p; ra_dprintf(as, ""); \
554 ra_dbg_merge = _p; ra_dbg_mcp = as->mcp; } while (0)
555#define RA_DBGX(x) ra_dprintf x
556
557#else
558#define RA_DBG_START() ((void)0)
559#define RA_DBG_FLUSH() ((void)0)
560#define RA_DBG_REF() ((void)0)
561#define RA_DBGX(x) ((void)0)
562#endif
563
564/* -- Register allocator -------------------------------------------------- */
565
566#define ra_free(as, r) rset_set(as->freeset, (r))
567#define ra_modified(as, r) rset_set(as->modset, (r))
568
569#define ra_used(ir) (ra_hasreg((ir)->r) || ra_hasspill((ir)->s))
570
571/* Setup register allocator. */
572static void ra_setup(ASMState *as)
573{
574 /* Initially all regs (except the stack pointer) are free for use. */
575 as->freeset = RSET_ALL;
576 as->modset = RSET_EMPTY;
577 as->phiset = RSET_EMPTY;
578 memset(as->phireg, 0, sizeof(as->phireg));
579 memset(as->cost, 0, sizeof(as->cost));
580 as->cost[RID_ESP] = REGCOST(~0u, 0u);
581
582 /* Start slots for spill slot allocation. */
583 as->evenspill = (SPS_FIRST+1)&~1;
584 as->oddspill = (SPS_FIRST&1) ? SPS_FIRST : 0;
585}
586
587/* Rematerialize constants. */
588static Reg ra_rematk(ASMState *as, IRIns *ir)
589{
590 Reg r = ir->r;
591 lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
592 ra_free(as, r);
593 ra_modified(as, r);
594 ir->r = RID_INIT; /* Do not keep any hint. */
595 RA_DBGX((as, "remat $i $r", ir, r));
596 if (ir->o == IR_KNUM) {
597 emit_loadn(as, r, ir_knum(ir));
598 } else if (ir->o == IR_BASE) {
599 ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */
600 emit_getgl(as, r, jit_base);
601 } else {
602 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
603 ir->o == IR_KPTR || ir->o == IR_KNULL);
604 emit_loadi(as, r, ir->i);
605 }
606 return r;
607}
608
609/* Force a spill. Allocate a new spill slot if needed. */
610static int32_t ra_spill(ASMState *as, IRIns *ir)
611{
612 int32_t slot = ir->s;
613 if (!ra_hasspill(slot)) {
614 if (irt_isnum(ir->t)) {
615 slot = as->evenspill;
616 as->evenspill += 2;
617 } else if (as->oddspill) {
618 slot = as->oddspill;
619 as->oddspill = 0;
620 } else {
621 slot = as->evenspill;
622 as->oddspill = slot+1;
623 as->evenspill += 2;
624 }
625 if (as->evenspill > 256)
626 lj_trace_err(as->J, LJ_TRERR_SPILLOV);
627 ir->s = (uint8_t)slot;
628 }
629 return sps_scale(slot);
630}
631
632/* Restore a register (marked as free). Rematerialize or force a spill. */
633static Reg ra_restore(ASMState *as, IRRef ref)
634{
635 IRIns *ir = IR(ref);
636 if (irref_isk(ref) || ref == REF_BASE) {
637 return ra_rematk(as, ir);
638 } else {
639 Reg r = ir->r;
640 lua_assert(ra_hasreg(r));
641 ra_free(as, r);
642 ra_modified(as, r);
643 ra_sethint(ir->r, r); /* Keep hint. */
644 RA_DBGX((as, "restore $i $r", ir, r));
645 emit_movrmro(as, r, RID_ESP, ra_spill(as, ir)); /* Force a spill. */
646 return r;
647 }
648}
649
650/* Save a register to a spill slot. */
651static LJ_AINLINE void ra_save(ASMState *as, IRIns *ir, Reg r)
652{
653 RA_DBGX((as, "save $i $r", ir, r));
654 emit_rmro(as, r < RID_MAX_GPR ? XO_MOVto : XO_MOVSDto,
655 r, RID_ESP, sps_scale(ir->s));
656}
657
658#define MINCOST(r) \
659 if (LJ_LIKELY(allow&RID2RSET(r)) && as->cost[r] < cost) \
660 cost = as->cost[r]
661
662/* Evict the register with the lowest cost, forcing a restore. */
663static Reg ra_evict(ASMState *as, RegSet allow)
664{
665 RegCost cost = ~(RegCost)0;
666 if (allow < RID2RSET(RID_MAX_GPR)) {
667 MINCOST(RID_EAX);MINCOST(RID_ECX);MINCOST(RID_EDX);MINCOST(RID_EBX);
668 MINCOST(RID_EBP);MINCOST(RID_ESI);MINCOST(RID_EDI);
669#if LJ_64
670 MINCOST(RID_R8D);MINCOST(RID_R9D);MINCOST(RID_R10D);MINCOST(RID_R11D);
671 MINCOST(RID_R12D);MINCOST(RID_R13D);MINCOST(RID_R14D);MINCOST(RID_R15D);
672#endif
673 } else {
674 MINCOST(RID_XMM0);MINCOST(RID_XMM1);MINCOST(RID_XMM2);MINCOST(RID_XMM3);
675 MINCOST(RID_XMM4);MINCOST(RID_XMM5);MINCOST(RID_XMM6);MINCOST(RID_XMM7);
676#if LJ_64
677 MINCOST(RID_XMM8);MINCOST(RID_XMM9);MINCOST(RID_XMM10);MINCOST(RID_XMM11);
678 MINCOST(RID_XMM12);MINCOST(RID_XMM13);MINCOST(RID_XMM14);MINCOST(RID_XMM15);
679#endif
680 }
681 lua_assert(allow != RSET_EMPTY);
682 lua_assert(regcost_ref(cost) >= as->T->nk && regcost_ref(cost) < as->T->nins);
683 return ra_restore(as, regcost_ref(cost));
684}
685
686/* Pick any register (marked as free). Evict on-demand. */
687static LJ_AINLINE Reg ra_pick(ASMState *as, RegSet allow)
688{
689 RegSet pick = as->freeset & allow;
690 if (!pick)
691 return ra_evict(as, allow);
692 else
693 return rset_picktop(pick);
694}
695
696/* Get a scratch register (marked as free). */
697static LJ_AINLINE Reg ra_scratch(ASMState *as, RegSet allow)
698{
699 Reg r = ra_pick(as, allow);
700 ra_modified(as, r);
701 RA_DBGX((as, "scratch $r", r));
702 return r;
703}
704
705/* Evict all registers from a set (if not free). */
706static void ra_evictset(ASMState *as, RegSet drop)
707{
708 as->modset |= drop;
709 drop &= ~as->freeset;
710 while (drop) {
711 Reg r = rset_picktop(drop);
712 ra_restore(as, regcost_ref(as->cost[r]));
713 rset_clear(drop, r);
714 checkmclim(as);
715 }
716}
717
718/* Allocate a register for ref from the allowed set of registers.
719** Note: this function assumes the ref does NOT have a register yet!
720** Picks an optimal register, sets the cost and marks the register as non-free.
721*/
722static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow)
723{
724 IRIns *ir = IR(ref);
725 RegSet pick = as->freeset & allow;
726 Reg r;
727 lua_assert(ra_noreg(ir->r));
728 if (pick) {
729 /* First check register hint from propagation or PHI. */
730 if (ra_hashint(ir->r)) {
731 r = ra_gethint(ir->r);
732 if (rset_test(pick, r)) /* Use hint register if possible. */
733 goto found;
734 /* Rematerialization is cheaper than missing a hint. */
735 if (rset_test(allow, r) && irref_isk(regcost_ref(as->cost[r]))) {
736 ra_rematk(as, IR(regcost_ref(as->cost[r])));
737 goto found;
738 }
739 RA_DBGX((as, "hintmiss $f $r", ref, r));
740 }
741 /* Invariants should preferably get unused registers. */
742 if (ref < as->loopref && !irt_isphi(ir->t))
743 r = rset_pickbot(pick);
744 else
745 r = rset_picktop(pick);
746 } else {
747 r = ra_evict(as, allow);
748 }
749found:
750 RA_DBGX((as, "alloc $f $r", ref, r));
751 ir->r = (uint8_t)r;
752 rset_clear(as->freeset, r);
753 as->cost[r] = REGCOST_REF_T(ref, irt_t(ir->t));
754 return r;
755}
756
757/* Allocate a register on-demand. */
758static LJ_INLINE Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
759{
760 Reg r = IR(ref)->r;
761 /* Note: allow is ignored if the register is already allocated. */
762 if (ra_noreg(r)) r = ra_allocref(as, ref, allow);
763 return r;
764}
765
766/* Rename register allocation and emit move. */
767static void ra_rename(ASMState *as, Reg down, Reg up)
768{
769 IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]);
770 IR(ref)->r = (uint8_t)up;
771 as->cost[down] = 0;
772 lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR));
773 lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up));
774 rset_set(as->freeset, down); /* 'down' is free ... */
775 rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */
776 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up));
777 emit_movrr(as, down, up); /* Backwards code generation needs inverse move. */
778 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
779 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno);
780 ren = tref_ref(lj_ir_emit(as->J));
781 as->ir = as->T->ir; /* The IR may have been reallocated. */
782 IR(ren)->r = (uint8_t)down;
783 IR(ren)->s = SPS_NONE;
784 }
785}
786
787/* Pick a destination register (marked as free).
788** Caveat: allow is ignored if there's already a destination register.
789** Use ra_destreg() to get a specific register.
790*/
791static Reg ra_dest(ASMState *as, IRIns *ir, RegSet allow)
792{
793 Reg dest = ir->r;
794 if (ra_hasreg(dest)) {
795 ra_free(as, dest);
796 ra_modified(as, dest);
797 } else {
798 dest = ra_scratch(as, allow);
799 }
800 if (LJ_UNLIKELY(ra_hasspill(ir->s))) ra_save(as, ir, dest);
801 return dest;
802}
803
804/* Force a specific destination register (marked as free). */
805static void ra_destreg(ASMState *as, IRIns *ir, Reg r)
806{
807 Reg dest = ra_dest(as, ir, RID2RSET(r));
808 if (dest != r) {
809 ra_scratch(as, RID2RSET(r));
810 emit_movrr(as, dest, r);
811 }
812}
813
814/* Propagate dest register to left reference. Emit moves as needed.
815** This is a required fixup step for all 2-operand machine instructions.
816*/
817static void ra_left(ASMState *as, Reg dest, IRRef lref)
818{
819 IRIns *ir = IR(lref);
820 Reg left = ir->r;
821 if (ra_noreg(left)) {
822 if (irref_isk(lref)) {
823 if (ir->o == IR_KNUM) {
824 cTValue *tv = ir_knum(ir);
825 /* FP remat needs a load except for +0. Still better than eviction. */
826 if (tvispzero(tv) || !(as->freeset & RSET_FPR)) {
827 emit_loadn(as, dest, tv);
828 return;
829 }
830 } else {
831 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
832 ir->o == IR_KPTR || ir->o == IR_KNULL);
833 emit_loadi(as, dest, ir->i);
834 return;
835 }
836 }
837 if (!ra_hashint(left) && !iscrossref(as, lref))
838 ra_sethint(ir->r, dest); /* Propagate register hint. */
839 left = ra_allocref(as, lref, dest < RID_MAX_GPR ? RSET_GPR : RSET_FPR);
840 }
841 /* Move needed for true 3-operand instruction: y=a+b ==> y=a; y+=b. */
842 if (dest != left) {
843 /* Use register renaming if dest is the PHI reg. */
844 if (irt_isphi(ir->t) && as->phireg[dest] == lref) {
845 ra_modified(as, left);
846 ra_rename(as, left, dest);
847 } else {
848 emit_movrr(as, dest, left);
849 }
850 }
851}
852
853/* -- Exit stubs ---------------------------------------------------------- */
854
855/* Generate an exit stub group at the bottom of the reserved MCode memory. */
856static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
857{
858 ExitNo i, groupofs = (group*EXITSTUBS_PER_GROUP) & 0xff;
859 MCode *mxp = as->mcbot;
860 MCode *mxpstart = mxp;
861 if (mxp + (2+2)*EXITSTUBS_PER_GROUP+8+5 >= as->mctop)
862 asm_mclimit(as);
863 /* Push low byte of exitno for each exit stub. */
864 *mxp++ = XI_PUSHi8; *mxp++ = (MCode)groupofs;
865 for (i = 1; i < EXITSTUBS_PER_GROUP; i++) {
866 *mxp++ = XI_JMPs; *mxp++ = (MCode)((2+2)*(EXITSTUBS_PER_GROUP - i) - 2);
867 *mxp++ = XI_PUSHi8; *mxp++ = (MCode)(groupofs + i);
868 }
869 /* Push the high byte of the exitno for each exit stub group. */
870 *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8);
871 /* Store DISPATCH in ExitInfo->dispatch. Account for the two push ops. */
872 *mxp++ = XI_MOVmi;
873 *mxp++ = MODRM(XM_OFS8, 0, RID_ESP);
874 *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
875 *mxp++ = 2*sizeof(void *);
876 *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4;
877 /* Jump to exit handler which fills in the ExitState. */
878 *mxp++ = XI_JMP; mxp += 4;
879 *((int32_t *)(mxp-4)) = (int32_t)((MCode *)lj_vm_exit_handler - mxp);
880 /* Commit the code for this group (even if assembly fails later on). */
881 lj_mcode_commitbot(as->J, mxp);
882 as->mcbot = mxp;
883 as->mclim = as->mcbot + MCLIM_REDZONE;
884 return mxpstart;
885}
886
887/* Setup all needed exit stubs. */
888static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
889{
890 ExitNo i;
891 if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR)
892 lj_trace_err(as->J, LJ_TRERR_SNAPOV);
893 for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++)
894 if (as->J->exitstubgroup[i] == NULL)
895 as->J->exitstubgroup[i] = asm_exitstub_gen(as, i);
896}
897
898/* -- Snapshot and guard handling ----------------------------------------- */
899
900/* Can we rematerialize a KNUM instead of forcing a spill? */
901static int asm_snap_canremat(ASMState *as)
902{
903 Reg r;
904 for (r = RID_MIN_FPR; r < RID_MAX_FPR; r++)
905 if (irref_isk(regcost_ref(as->cost[r])))
906 return 1;
907 return 0;
908}
909
910/* Allocate registers or spill slots for refs escaping to a snapshot. */
911static void asm_snap_alloc(ASMState *as)
912{
913 SnapShot *snap = &as->T->snap[as->snapno];
914 IRRef2 *map = &as->T->snapmap[snap->mapofs];
915 BCReg s, nslots = snap->nslots;
916 for (s = 0; s < nslots; s++) {
917 IRRef ref = snap_ref(map[s]);
918 if (!irref_isk(ref)) {
919 IRIns *ir = IR(ref);
920 if (!ra_used(ir) && ir->o != IR_FRAME) {
921 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
922 /* Not a var-to-invar ref and got a free register (or a remat)? */
923 if ((!iscrossref(as, ref) || irt_isphi(ir->t)) &&
924 ((as->freeset & allow) ||
925 (allow == RSET_FPR && asm_snap_canremat(as)))) {
926 ra_allocref(as, ref, allow); /* Allocate a register. */
927 checkmclim(as);
928 RA_DBGX((as, "snapreg $f $r", ref, ir->r));
929 } else {
930 ra_spill(as, ir); /* Otherwise force a spill slot. */
931 RA_DBGX((as, "snapspill $f $s", ref, ir->s));
932 }
933 }
934 }
935 }
936}
937
938/* All guards for a snapshot use the same exitno. This is currently the
939** same as the snapshot number. Since the exact origin of the exit cannot
940** be determined, all guards for the same snapshot must exit with the same
941** RegSP mapping.
942** A renamed ref which has been used in a prior guard for the same snapshot
943** would cause an inconsistency. The easy way out is to force a spill slot.
944*/
945static int asm_snap_checkrename(ASMState *as, IRRef ren)
946{
947 SnapShot *snap = &as->T->snap[as->snapno];
948 IRRef2 *map = &as->T->snapmap[snap->mapofs];
949 BCReg s, nslots = snap->nslots;
950 for (s = 0; s < nslots; s++) {
951 IRRef ref = snap_ref(map[s]);
952 if (ref == ren) {
953 IRIns *ir = IR(ref);
954 ra_spill(as, ir); /* Register renamed, so force a spill slot. */
955 RA_DBGX((as, "snaprensp $f $s", ref, ir->s));
956 return 1; /* Found. */
957 }
958 }
959 return 0; /* Not found. */
960}
961
962/* Prepare snapshot for next guard instruction. */
963static void asm_snap_prep(ASMState *as)
964{
965 if (as->curins < as->snapref) {
966 do {
967 lua_assert(as->snapno != 0);
968 as->snapno--;
969 as->snapref = as->T->snap[as->snapno].ref;
970 } while (as->curins < as->snapref);
971 asm_snap_alloc(as);
972 as->snaprename = as->T->nins;
973 } else {
974 /* Process any renames above the highwater mark. */
975 for (; as->snaprename < as->T->nins; as->snaprename++) {
976 IRIns *ir = IR(as->snaprename);
977 if (asm_snap_checkrename(as, ir->op1))
978 ir->op2 = REF_BIAS-1; /* Kill rename. */
979 }
980 }
981}
982
983/* Emit conditional branch to exit for guard.
984** It's important to emit this *after* all registers have been allocated,
985** because rematerializations may invalidate the flags.
986*/
987static void asm_guardcc(ASMState *as, int cc)
988{
989 MCode *target = exitstub_addr(as->J, as->snapno);
990 MCode *p = as->mcp;
991 if (LJ_UNLIKELY(p == as->invmcp)) {
992 as->loopinv = 1;
993 *(int32_t *)(p+1) = target - (p+5);
994 target = p;
995 cc ^= 1;
996 if (as->realign) {
997 emit_sjcc(as, cc, target);
998 return;
999 }
1000 }
1001 emit_jcc(as, cc, target);
1002}
1003
1004/* -- Memory operand fusion ----------------------------------------------- */
1005
1006/* Arch-specific field offsets. */
1007static const uint8_t field_ofs[IRFL__MAX+1] = {
1008#define FLOFS(name, type, field) (uint8_t)offsetof(type, field),
1009IRFLDEF(FLOFS)
1010#undef FLOFS
1011 0
1012};
1013
1014/* Limit linear search to this distance. Avoids O(n^2) behavior. */
1015#define CONFLICT_SEARCH_LIM 15
1016
1017/* Check if there's no conflicting instruction between curins and ref. */
1018static int noconflict(ASMState *as, IRRef ref, IROp conflict)
1019{
1020 IRIns *ir = as->ir;
1021 IRRef i = as->curins;
1022 if (i > ref + CONFLICT_SEARCH_LIM)
1023 return 0; /* Give up, ref is too far away. */
1024 while (--i > ref)
1025 if (ir[i].o == conflict)
1026 return 0; /* Conflict found. */
1027 return 1; /* Ok, no conflict. */
1028}
1029
1030/* Fuse array reference into memory operand. */
1031static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow)
1032{
1033 IRIns *irb = IR(ir->op1);
1034 IRIns *ira, *irx;
1035 lua_assert(ir->o == IR_AREF);
1036 lua_assert(irb->o == IR_FLOAD && irb->op2 == IRFL_TAB_ARRAY);
1037 ira = IR(irb->op1);
1038 if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
1039 noconflict(as, irb->op1, IR_NEWREF)) {
1040 /* We can avoid the FLOAD of t->array for colocated arrays. */
1041 as->mrm.base = (uint8_t)ra_alloc1(as, irb->op1, allow); /* Table obj. */
1042 as->mrm.ofs = -(int32_t)(ira->op1*sizeof(TValue)); /* Ofs to colo array. */
1043 } else {
1044 as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); /* Array base. */
1045 as->mrm.ofs = 0;
1046 }
1047 irx = IR(ir->op2);
1048 if (irref_isk(ir->op2)) {
1049 as->mrm.ofs += 8*irx->i;
1050 as->mrm.idx = RID_NONE;
1051 } else {
1052 rset_clear(allow, as->mrm.base);
1053 as->mrm.scale = XM_SCALE8;
1054 /* Fuse a constant ADD (e.g. t[i+1]) into the offset.
1055 ** Doesn't help much without ABCelim, but reduces register pressure.
1056 */
1057 if (mayfuse(as, ir->op2) && ra_noreg(irx->r) &&
1058 irx->o == IR_ADD && irref_isk(irx->op2)) {
1059 as->mrm.ofs += 8*IR(irx->op2)->i;
1060 as->mrm.idx = (uint8_t)ra_alloc1(as, irx->op1, allow);
1061 } else {
1062 as->mrm.idx = (uint8_t)ra_alloc1(as, ir->op2, allow);
1063 }
1064 }
1065}
1066
1067/* Fuse array/hash/upvalue reference into memory operand.
1068** Caveat: this may allocate GPRs for the base/idx registers. Be sure to
1069** pass the final allow mask, excluding any GPRs used for other inputs.
1070** In particular: 2-operand GPR instructions need to call ra_dest() first!
1071*/
1072static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
1073{
1074 IRIns *ir = IR(ref);
1075 if (ra_noreg(ir->r)) {
1076 switch ((IROp)ir->o) {
1077 case IR_AREF:
1078 if (mayfuse(as, ref)) {
1079 asm_fusearef(as, ir, allow);
1080 return;
1081 }
1082 break;
1083 case IR_HREFK:
1084 if (mayfuse(as, ref)) {
1085 as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
1086 as->mrm.ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
1087 as->mrm.idx = RID_NONE;
1088 return;
1089 }
1090 break;
1091 case IR_UREFC:
1092 if (irref_isk(ir->op1)) {
1093 GCfunc *fn = ir_kfunc(IR(ir->op1));
1094 GCupval *uv = &gcref(fn->l.uvptr[ir->op2])->uv;
1095 as->mrm.ofs = ptr2addr(&uv->tv);
1096 as->mrm.base = as->mrm.idx = RID_NONE;
1097 return;
1098 }
1099 break;
1100 default:
1101 lua_assert(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO);
1102 break;
1103 }
1104 }
1105 as->mrm.base = (uint8_t)ra_alloc1(as, ref, allow);
1106 as->mrm.ofs = 0;
1107 as->mrm.idx = RID_NONE;
1108}
1109
1110/* Fuse FLOAD/FREF reference into memory operand. */
1111static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow)
1112{
1113 lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF);
1114 as->mrm.ofs = field_ofs[ir->op2];
1115 as->mrm.idx = RID_NONE;
1116 if (irref_isk(ir->op1)) {
1117 as->mrm.ofs += IR(ir->op1)->i;
1118 as->mrm.base = RID_NONE;
1119 } else {
1120 as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
1121 }
1122}
1123
1124/* Fuse string reference into memory operand. */
1125static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
1126{
1127 IRIns *irr;
1128 lua_assert(ir->o == IR_STRREF);
1129 as->mrm.idx = as->mrm.base = RID_NONE;
1130 as->mrm.scale = XM_SCALE1;
1131 as->mrm.ofs = sizeof(GCstr);
1132 if (irref_isk(ir->op1)) {
1133 as->mrm.ofs += IR(ir->op1)->i;
1134 } else {
1135 Reg r = ra_alloc1(as, ir->op1, allow);
1136 rset_clear(allow, r);
1137 as->mrm.base = (uint8_t)r;
1138 }
1139 irr = IR(ir->op2);
1140 if (irref_isk(ir->op2)) {
1141 as->mrm.ofs += irr->i;
1142 } else {
1143 Reg r;
1144 /* Fuse a constant add into the offset, e.g. string.sub(s, i+10). */
1145 if (mayfuse(as, ir->op2) && irr->o == IR_ADD && irref_isk(irr->op2)) {
1146 as->mrm.ofs += IR(irr->op2)->i;
1147 r = ra_alloc1(as, irr->op1, allow);
1148 } else {
1149 r = ra_alloc1(as, ir->op2, allow);
1150 }
1151 if (as->mrm.base == RID_NONE)
1152 as->mrm.base = (uint8_t)r;
1153 else
1154 as->mrm.idx = (uint8_t)r;
1155 }
1156}
1157
1158/* Fuse load into memory operand. */
1159static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
1160{
1161 IRIns *ir = IR(ref);
1162 if (ra_hasreg(ir->r)) {
1163 if (allow != RSET_EMPTY) return ir->r; /* Fast path. */
1164 fusespill:
1165 /* Force a spill if only memory operands are allowed (asm_x87load). */
1166 as->mrm.base = RID_ESP;
1167 as->mrm.ofs = ra_spill(as, ir);
1168 as->mrm.idx = RID_NONE;
1169 return RID_MRM;
1170 }
1171 if (ir->o == IR_KNUM) {
1172 lua_assert(allow != RSET_EMPTY);
1173 if (!(as->freeset & ~as->modset & RSET_FPR)) {
1174 as->mrm.ofs = ptr2addr(ir_knum(ir));
1175 as->mrm.base = as->mrm.idx = RID_NONE;
1176 return RID_MRM;
1177 }
1178 } else if (mayfuse(as, ref)) {
1179 RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
1180 if (ir->o == IR_SLOAD) {
1181 if (!irt_isint(ir->t) && !(ir->op2 & IRSLOAD_PARENT)) {
1182 as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
1183 as->mrm.ofs = 8*((int32_t)ir->op1-1);
1184 as->mrm.idx = RID_NONE;
1185 return RID_MRM;
1186 }
1187 } else if (ir->o == IR_FLOAD) {
1188 /* Generic fusion is only ok for IRT_INT operand (but see asm_comp). */
1189 if (irt_isint(ir->t) && noconflict(as, ref, IR_FSTORE)) {
1190 asm_fusefref(as, ir, xallow);
1191 return RID_MRM;
1192 }
1193 } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
1194 if (noconflict(as, ref, ir->o + IRDELTA_L2S)) {
1195 asm_fuseahuref(as, ir->op1, xallow);
1196 return RID_MRM;
1197 }
1198 } else if (ir->o == IR_XLOAD) {
1199 /* Generic fusion is only ok for IRT_INT operand (but see asm_comp).
1200 ** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
1201 */
1202 if (irt_isint(ir->t)) {
1203 asm_fusestrref(as, IR(ir->op1), xallow);
1204 return RID_MRM;
1205 }
1206 }
1207 }
1208 if (!(as->freeset & allow) &&
1209 (allow == RSET_EMPTY || ra_hasspill(ir->s) || ref < as->loopref))
1210 goto fusespill;
1211 return ra_allocref(as, ref, allow);
1212}
1213
1214/* -- Type conversions ---------------------------------------------------- */
1215
1216static void asm_tonum(ASMState *as, IRIns *ir)
1217{
1218 Reg dest = ra_dest(as, ir, RSET_FPR);
1219 Reg left = asm_fuseload(as, ir->op1, RSET_GPR);
1220 emit_mrm(as, XO_CVTSI2SD, dest, left);
1221 if (!(as->flags & JIT_F_SPLIT_XMM))
1222 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
1223}
1224
1225static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
1226{
1227 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
1228 Reg dest = ra_dest(as, ir, RSET_GPR);
1229 asm_guardcc(as, CC_P);
1230 asm_guardcc(as, CC_NE);
1231 emit_rr(as, XO_UCOMISD, left, tmp);
1232 emit_rr(as, XO_CVTSI2SD, tmp, dest);
1233 if (!(as->flags & JIT_F_SPLIT_XMM))
1234 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
1235 emit_rr(as, XO_CVTTSD2SI, dest, left);
1236 /* Can't fuse since left is needed twice. */
1237}
1238
1239static void asm_toint(ASMState *as, IRIns *ir)
1240{
1241 Reg dest = ra_dest(as, ir, RSET_GPR);
1242 Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
1243 emit_mrm(as, XO_CVTSD2SI, dest, left);
1244}
1245
1246static void asm_tobit(ASMState *as, IRIns *ir)
1247{
1248 Reg dest = ra_dest(as, ir, RSET_GPR);
1249 Reg tmp = ra_noreg(IR(ir->op1)->r) ?
1250 ra_alloc1(as, ir->op1, RSET_FPR) :
1251 ra_scratch(as, RSET_FPR);
1252 Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp));
1253 emit_rr(as, XO_MOVDto, tmp, dest);
1254 emit_mrm(as, XO_ADDSD, tmp, right);
1255 ra_left(as, tmp, ir->op1);
1256}
1257
1258static void asm_strto(ASMState *as, IRIns *ir)
1259{
1260 Reg str;
1261 int32_t ofs;
1262 RegSet drop = RSET_SCRATCH;
1263 /* Force a spill slot for the destination register (if any). */
1264 if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r))
1265 rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */
1266 ra_evictset(as, drop);
1267 asm_guardcc(as, CC_E);
1268 emit_rr(as, XO_TEST, RID_RET, RID_RET);
1269 /* int lj_str_numconv(const char *s, TValue *n) */
1270 emit_call(as, lj_str_numconv);
1271 ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */
1272 if (ofs == 0) {
1273 emit_setargr(as, 2, RID_ESP);
1274 } else {
1275 emit_setargr(as, 2, RID_RET);
1276 emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ofs);
1277 }
1278 emit_setargr(as, 1, RID_RET);
1279 str = ra_alloc1(as, ir->op1, RSET_GPR);
1280 emit_rmro(as, XO_LEA, RID_RET, str, sizeof(GCstr));
1281}
1282
1283static void asm_tostr(ASMState *as, IRIns *ir)
1284{
1285 IRIns *irl = IR(ir->op1);
1286 ra_destreg(as, ir, RID_RET);
1287 ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
1288 as->gcsteps++;
1289 if (irt_isnum(irl->t)) {
1290 /* GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) */
1291 emit_call(as, lj_str_fromnum);
1292 emit_setargr(as, 1, RID_RET);
1293 emit_getgl(as, RID_RET, jit_L);
1294 emit_setargr(as, 2, RID_RET);
1295 emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ra_spill(as, irl));
1296 } else {
1297 /* GCstr *lj_str_fromint(lua_State *L, int32_t k) */
1298 emit_call(as, lj_str_fromint);
1299 emit_setargr(as, 1, RID_RET);
1300 emit_getgl(as, RID_RET, jit_L);
1301 emit_setargr(as, 2, ra_alloc1(as, ir->op1, RSET_GPR));
1302 }
1303}
1304
1305/* -- Memory references --------------------------------------------------- */
1306
1307static void asm_aref(ASMState *as, IRIns *ir)
1308{
1309 Reg dest = ra_dest(as, ir, RSET_GPR);
1310 asm_fusearef(as, ir, RSET_GPR);
1311 if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0))
1312 emit_mrm(as, XO_LEA, dest, RID_MRM);
1313 else if (as->mrm.base != dest)
1314 emit_rr(as, XO_MOV, dest, as->mrm.base);
1315}
1316
1317/* Must match with hashkey() and hashrot() in lj_tab.c. */
1318static uint32_t ir_khash(IRIns *ir)
1319{
1320 uint32_t lo, hi;
1321 if (irt_isstr(ir->t)) {
1322 return ir_kstr(ir)->hash;
1323 } else if (irt_isnum(ir->t)) {
1324 lo = ir_knum(ir)->u32.lo;
1325 hi = ir_knum(ir)->u32.hi & 0x7fffffff;
1326 } else if (irt_ispri(ir->t)) {
1327 lua_assert(!irt_isnil(ir->t));
1328 return irt_type(ir->t)-IRT_FALSE;
1329 } else {
1330 lua_assert(irt_isaddr(ir->t));
1331 lo = u32ptr(ir_kgc(ir));
1332 hi = lo - 0x04c11db7;
1333 }
1334 lo ^= hi; hi = lj_rol(hi, 14);
1335 lo -= hi; hi = lj_rol(hi, 5);
1336 hi ^= lo; hi -= lj_rol(lo, 27);
1337 return hi;
1338}
1339
1340/* Merge NE(HREF, niltv) check. */
1341static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
1342{
1343 /* Assumes nothing else generates NE of HREF. */
1344 if (ir[1].o == IR_NE && ir[1].op1 == as->curins) {
1345 if (LJ_64 && *as->mcp != XI_ARITHi)
1346 as->mcp += 7+6;
1347 else
1348 as->mcp += 6+6; /* Kill cmp reg, imm32 + jz exit. */
1349 return as->mcp + *(int32_t *)(as->mcp-4); /* Return exit address. */
1350 }
1351 return NULL;
1352}
1353
1354/* Inlined hash lookup. Specialized for key type and for const keys.
1355** The equivalent C code is:
1356** Node *n = hashkey(t, key);
1357** do {
1358** if (lj_obj_equal(&n->key, key)) return &n->val;
1359** } while ((n = nextnode(n)));
1360** return niltv(L);
1361*/
1362static void asm_href(ASMState *as, IRIns *ir)
1363{
1364 MCode *nilexit = merge_href_niltv(as, ir); /* Do this before any restores. */
1365 RegSet allow = RSET_GPR;
1366 Reg dest = ra_dest(as, ir, allow);
1367 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
1368 Reg key = RID_NONE, tmp = RID_NONE;
1369 IRIns *irkey = IR(ir->op2);
1370 int isk = irref_isk(ir->op2);
1371 IRType1 kt = irkey->t;
1372 uint32_t khash;
1373 MCLabel l_end, l_loop, l_next;
1374
1375 if (!isk) {
1376 rset_clear(allow, tab);
1377 key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
1378 if (!irt_isstr(kt))
1379 tmp = ra_scratch(as, rset_exclude(allow, key));
1380 }
1381
1382 /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */
1383 l_end = emit_label(as);
1384 if (nilexit)
1385 emit_jcc(as, CC_E, nilexit); /* XI_JMP is not found by lj_asm_patchexit. */
1386 else
1387 emit_loada(as, dest, niltvg(J2G(as->J)));
1388
1389 /* Follow hash chain until the end. */
1390 l_loop = emit_sjcc_label(as, CC_NZ);
1391 emit_rr(as, XO_TEST, dest, dest);
1392 emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next));
1393 l_next = emit_label(as);
1394
1395 /* Type and value comparison. */
1396 emit_sjcc(as, CC_E, l_end);
1397 if (irt_isnum(kt)) {
1398 if (isk) {
1399 /* Assumes -0.0 is already canonicalized to +0.0. */
1400 emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo),
1401 (int32_t)ir_knum(irkey)->u32.lo);
1402 emit_sjcc(as, CC_NE, l_next);
1403 emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi),
1404 (int32_t)ir_knum(irkey)->u32.hi);
1405 } else {
1406 emit_sjcc(as, CC_P, l_next);
1407 emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n));
1408 emit_sjcc(as, CC_A, l_next);
1409 /* The type check avoids NaN penalties and complaints from Valgrind. */
1410 emit_i8(as, ~IRT_NUM);
1411 emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
1412 }
1413 } else {
1414 if (!irt_ispri(kt)) {
1415 lua_assert(irt_isaddr(kt));
1416 if (isk)
1417 emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr),
1418 ptr2addr(ir_kgc(irkey)));
1419 else
1420 emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr));
1421 emit_sjcc(as, CC_NE, l_next);
1422 }
1423 lua_assert(!irt_isnil(kt));
1424 emit_i8(as, ~irt_type(kt));
1425 emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
1426 }
1427 emit_sfixup(as, l_loop);
1428 checkmclim(as);
1429
1430 /* Load main position relative to tab->node into dest. */
1431 khash = isk ? ir_khash(irkey) : 1;
1432 if (khash == 0) {
1433 emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node));
1434 } else {
1435 emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node));
1436 if ((as->flags & JIT_F_PREFER_IMUL)) {
1437 emit_i8(as, sizeof(Node));
1438 emit_rr(as, XO_IMULi8, dest, dest);
1439 } else {
1440 emit_shifti(as, XOg_SHL, dest, 3);
1441 emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0);
1442 }
1443 if (isk) {
1444 emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash);
1445 emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
1446 } else if (irt_isstr(kt)) {
1447 emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, hash));
1448 emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
1449 } else { /* Must match with hashrot() in lj_tab.c. */
1450 emit_rmro(as, XO_ARITH(XOg_AND), dest, tab, offsetof(GCtab, hmask));
1451 emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp);
1452 emit_shifti(as, XOg_ROL, tmp, 27);
1453 emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp);
1454 emit_shifti(as, XOg_ROL, dest, 5);
1455 emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest);
1456 emit_shifti(as, XOg_ROL, dest, 14);
1457 emit_rr(as, XO_ARITH(XOg_XOR), tmp, dest);
1458 if (irt_isnum(kt)) {
1459 emit_rmro(as, XO_ARITH(XOg_AND), dest, RID_ESP, ra_spill(as, irkey)+4);
1460 emit_loadi(as, dest, 0x7fffffff);
1461 emit_rr(as, XO_MOVDto, key, tmp);
1462 } else {
1463 emit_rr(as, XO_MOV, tmp, key);
1464 emit_rmro(as, XO_LEA, dest, key, -0x04c11db7);
1465 }
1466 }
1467 }
1468}
1469
1470static void asm_hrefk(ASMState *as, IRIns *ir)
1471{
1472 IRIns *kslot = IR(ir->op2);
1473 IRIns *irkey = IR(kslot->op1);
1474 int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
1475 Reg dest = ra_used(ir) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
1476 Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
1477 MCLabel l_exit;
1478 lua_assert(ofs % sizeof(Node) == 0);
1479 if (ra_hasreg(dest)) {
1480 if (ofs != 0) {
1481 if (dest == node && !(as->flags & JIT_F_LEA_AGU))
1482 emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs);
1483 else
1484 emit_rmro(as, XO_LEA, dest, node, ofs);
1485 } else if (dest != node) {
1486 emit_rr(as, XO_MOV, dest, node);
1487 }
1488 }
1489 asm_guardcc(as, CC_NE);
1490 l_exit = emit_label(as);
1491 if (irt_isnum(irkey->t)) {
1492 /* Assumes -0.0 is already canonicalized to +0.0. */
1493 emit_gmroi(as, XG_ARITHi(XOg_CMP), node,
1494 ofs + (int32_t)offsetof(Node, key.u32.lo),
1495 (int32_t)ir_knum(irkey)->u32.lo);
1496 emit_sjcc(as, CC_NE, l_exit);
1497 emit_gmroi(as, XG_ARITHi(XOg_CMP), node,
1498 ofs + (int32_t)offsetof(Node, key.u32.hi),
1499 (int32_t)ir_knum(irkey)->u32.hi);
1500 } else {
1501 if (!irt_ispri(irkey->t)) {
1502 lua_assert(irt_isgcv(irkey->t));
1503 emit_gmroi(as, XG_ARITHi(XOg_CMP), node,
1504 ofs + (int32_t)offsetof(Node, key.gcr),
1505 ptr2addr(ir_kgc(irkey)));
1506 emit_sjcc(as, CC_NE, l_exit);
1507 }
1508 lua_assert(!irt_isnil(irkey->t));
1509 emit_i8(as, ~irt_type(irkey->t));
1510 emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
1511 ofs + (int32_t)offsetof(Node, key.it));
1512 }
1513}
1514
1515static void asm_newref(ASMState *as, IRIns *ir)
1516{
1517 IRRef keyref = ir->op2;
1518 IRIns *irkey = IR(keyref);
1519 RegSet allow = RSET_GPR;
1520 Reg tab, tmp;
1521 ra_destreg(as, ir, RID_RET);
1522 ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
1523 tab = ra_alloc1(as, ir->op1, allow);
1524 tmp = ra_scratch(as, rset_clear(allow, tab));
1525 /* TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) */
1526 emit_call(as, lj_tab_newkey);
1527 emit_setargr(as, 1, tmp);
1528 emit_setargr(as, 2, tab);
1529 emit_getgl(as, tmp, jit_L);
1530 if (irt_isnum(irkey->t)) {
1531 /* For numbers use the constant itself or a spill slot as a TValue. */
1532 if (irref_isk(keyref)) {
1533 emit_setargp(as, 3, ir_knum(irkey));
1534 } else {
1535 emit_setargr(as, 3, tmp);
1536 emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey));
1537 }
1538 } else {
1539 /* Otherwise use g->tmptv to hold the TValue. */
1540 lua_assert(irt_ispri(irkey->t) || irt_isaddr(irkey->t));
1541 emit_setargr(as, 3, tmp);
1542 if (!irref_isk(keyref)) {
1543 Reg src = ra_alloc1(as, keyref, rset_exclude(allow, tmp));
1544 emit_movtomro(as, src, tmp, 0);
1545 } else if (!irt_ispri(irkey->t)) {
1546 emit_movmroi(as, tmp, 0, irkey->i);
1547 }
1548 emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
1549 emit_loada(as, tmp, &J2G(as->J)->tmptv);
1550 }
1551}
1552
1553static void asm_uref(ASMState *as, IRIns *ir)
1554{
1555 /* NYI: Check that UREFO is still open and not aliasing a slot. */
1556 if (ra_used(ir)) {
1557 Reg dest = ra_dest(as, ir, RSET_GPR);
1558 if (irref_isk(ir->op1)) {
1559 GCfunc *fn = ir_kfunc(IR(ir->op1));
1560 TValue **v = &gcref(fn->l.uvptr[ir->op2])->uv.v;
1561 emit_rma(as, XO_MOV, dest, v);
1562 } else {
1563 Reg uv = ra_scratch(as, RSET_GPR);
1564 Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
1565 if (ir->o == IR_UREFC) {
1566 emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv));
1567 asm_guardcc(as, CC_NE);
1568 emit_i8(as, 1);
1569 emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
1570 } else {
1571 emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v));
1572 }
1573 emit_rmro(as, XO_MOV, uv, func,
1574 (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)ir->op2);
1575 }
1576 }
1577}
1578
1579static void asm_fref(ASMState *as, IRIns *ir)
1580{
1581 Reg dest = ra_dest(as, ir, RSET_GPR);
1582 asm_fusefref(as, ir, RSET_GPR);
1583 emit_mrm(as, XO_LEA, dest, RID_MRM);
1584}
1585
1586static void asm_strref(ASMState *as, IRIns *ir)
1587{
1588 Reg dest = ra_dest(as, ir, RSET_GPR);
1589 asm_fusestrref(as, ir, RSET_GPR);
1590 if (as->mrm.base == RID_NONE)
1591 emit_loadi(as, dest, as->mrm.ofs);
1592 else if (as->mrm.base == dest && as->mrm.idx == RID_NONE)
1593 emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs);
1594 else
1595 emit_mrm(as, XO_LEA, dest, RID_MRM);
1596}
1597
1598/* -- Loads and stores ---------------------------------------------------- */
1599
1600static void asm_fload(ASMState *as, IRIns *ir)
1601{
1602 Reg dest = ra_dest(as, ir, RSET_GPR);
1603 x86Op xo;
1604 asm_fusefref(as, ir, RSET_GPR);
1605 switch (irt_type(ir->t)) {
1606 case IRT_I8: xo = XO_MOVSXb; break;
1607 case IRT_U8: xo = XO_MOVZXb; break;
1608 case IRT_I16: xo = XO_MOVSXw; break;
1609 case IRT_U16: xo = XO_MOVZXw; break;
1610 default:
1611 lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));
1612 xo = XO_MOV;
1613 break;
1614 }
1615 emit_mrm(as, xo, dest, RID_MRM);
1616}
1617
1618static void asm_fstore(ASMState *as, IRIns *ir)
1619{
1620 RegSet allow = RSET_GPR;
1621 Reg src = RID_NONE;
1622 /* The IRT_I16/IRT_U16 stores should never be simplified for constant
1623 ** values since mov word [mem], imm16 has a length-changing prefix.
1624 */
1625 if (!irref_isk(ir->op2) || irt_isi16(ir->t) || irt_isu16(ir->t)) {
1626 RegSet allow8 = (irt_isi8(ir->t) || irt_isu8(ir->t)) ? RSET_GPR8 : RSET_GPR;
1627 src = ra_alloc1(as, ir->op2, allow8);
1628 rset_clear(allow, src);
1629 }
1630 asm_fusefref(as, IR(ir->op1), allow);
1631 if (ra_hasreg(src)) {
1632 x86Op xo;
1633 switch (irt_type(ir->t)) {
1634 case IRT_I8: case IRT_U8: xo = XO_MOVtob; src |= FORCE_REX; break;
1635 case IRT_I16: case IRT_U16: xo = XO_MOVtow; break;
1636 default:
1637 lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));
1638 xo = XO_MOVto;
1639 break;
1640 }
1641 emit_mrm(as, xo, src, RID_MRM);
1642 } else {
1643 if (irt_isi8(ir->t) || irt_isu8(ir->t)) {
1644 emit_i8(as, IR(ir->op2)->i);
1645 emit_mrm(as, XO_MOVmib, 0, RID_MRM);
1646 } else {
1647 lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));
1648 emit_i32(as, IR(ir->op2)->i);
1649 emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1650 }
1651 }
1652}
1653
1654static void asm_ahuload(ASMState *as, IRIns *ir)
1655{
1656 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1657 lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t));
1658 if (ra_used(ir)) {
1659 Reg dest = ra_dest(as, ir, allow);
1660 asm_fuseahuref(as, ir->op1, RSET_GPR);
1661 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM);
1662 } else {
1663 asm_fuseahuref(as, ir->op1, RSET_GPR);
1664 }
1665 /* Always do the type check, even if the load result is unused. */
1666 asm_guardcc(as, irt_isnum(ir->t) ? CC_A : CC_NE);
1667 emit_i8(as, ~irt_type(ir->t));
1668 as->mrm.ofs += 4;
1669 emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM);
1670}
1671
1672static void asm_ahustore(ASMState *as, IRIns *ir)
1673{
1674 if (irt_isnum(ir->t)) {
1675 Reg src = ra_alloc1(as, ir->op2, RSET_FPR);
1676 asm_fuseahuref(as, ir->op1, RSET_GPR);
1677 emit_mrm(as, XO_MOVSDto, src, RID_MRM);
1678 } else {
1679 IRIns *irr = IR(ir->op2);
1680 RegSet allow = RSET_GPR;
1681 Reg src = RID_NONE;
1682 if (!irref_isk(ir->op2)) {
1683 src = ra_alloc1(as, ir->op2, allow);
1684 rset_clear(allow, src);
1685 }
1686 asm_fuseahuref(as, ir->op1, allow);
1687 if (ra_hasreg(src)) {
1688 emit_mrm(as, XO_MOVto, src, RID_MRM);
1689 } else if (!irt_ispri(irr->t)) {
1690 lua_assert(irt_isaddr(ir->t));
1691 emit_i32(as, irr->i);
1692 emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1693 }
1694 as->mrm.ofs += 4;
1695 emit_i32(as, (int32_t)~irt_type(ir->t));
1696 emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1697 }
1698}
1699
1700static void asm_sload(ASMState *as, IRIns *ir)
1701{
1702 int32_t ofs = 8*((int32_t)ir->op1-1);
1703 IRType1 t = ir->t;
1704 Reg base;
1705 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
1706 if (irt_isint(t)) {
1707 Reg left = ra_scratch(as, RSET_FPR);
1708 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */
1709 base = ra_alloc1(as, REF_BASE, RSET_GPR);
1710 emit_rmro(as, XMM_MOVRM(as), left, base, ofs);
1711 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1712 } else if (ra_used(ir)) {
1713 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1714 Reg dest = ra_dest(as, ir, allow);
1715 lua_assert(irt_isnum(ir->t) || irt_isaddr(ir->t));
1716 base = ra_alloc1(as, REF_BASE, RSET_GPR);
1717 emit_movrmro(as, dest, base, ofs);
1718 } else {
1719 if (!irt_isguard(ir->t))
1720 return; /* No type check: avoid base alloc. */
1721 base = ra_alloc1(as, REF_BASE, RSET_GPR);
1722 }
1723 if (irt_isguard(ir->t)) {
1724 /* Need type check, even if the load result is unused. */
1725 asm_guardcc(as, irt_isnum(t) ? CC_A : CC_NE);
1726 emit_i8(as, ~irt_type(t));
1727 emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4);
1728 }
1729}
1730
1731static void asm_xload(ASMState *as, IRIns *ir)
1732{
1733 Reg dest = ra_dest(as, ir, RSET_GPR);
1734 x86Op xo;
1735 asm_fusestrref(as, IR(ir->op1), RSET_GPR); /* For now only support STRREF. */
1736 /* ir->op2 is ignored -- unaligned loads are ok on x86. */
1737 switch (irt_type(ir->t)) {
1738 case IRT_I8: xo = XO_MOVSXb; break;
1739 case IRT_U8: xo = XO_MOVZXb; break;
1740 case IRT_I16: xo = XO_MOVSXw; break;
1741 case IRT_U16: xo = XO_MOVZXw; break;
1742 default: lua_assert(irt_isint(ir->t)); xo = XO_MOV; break;
1743 }
1744 emit_mrm(as, xo, dest, RID_MRM);
1745}
1746
1747/* -- String ops ---------------------------------------------------------- */
1748
1749static void asm_snew(ASMState *as, IRIns *ir)
1750{
1751 RegSet allow = RSET_GPR;
1752 Reg left, right;
1753 IRIns *irl;
1754 ra_destreg(as, ir, RID_RET);
1755 ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
1756 irl = IR(ir->op1);
1757 left = irl->r;
1758 right = IR(ir->op2)->r;
1759 if (ra_noreg(left)) {
1760 lua_assert(irl->o == IR_STRREF);
1761 /* Get register only for non-const STRREF. */
1762 if (!(irref_isk(irl->op1) && irref_isk(irl->op2))) {
1763 if (ra_hasreg(right)) rset_clear(allow, right);
1764 left = ra_allocref(as, ir->op1, allow);
1765 }
1766 }
1767 if (ra_noreg(right) && !irref_isk(ir->op2)) {
1768 if (ra_hasreg(left)) rset_clear(allow, left);
1769 right = ra_allocref(as, ir->op2, allow);
1770 }
1771 /* GCstr *lj_str_new(lua_State *L, const char *str, size_t len) */
1772 emit_call(as, lj_str_new);
1773 emit_setargr(as, 1, RID_RET);
1774 emit_getgl(as, RID_RET, jit_L);
1775 if (ra_noreg(left)) /* Use immediate for const STRREF. */
1776 emit_setargi(as, 2, IR(irl->op1)->i + IR(irl->op2)->i +
1777 (int32_t)sizeof(GCstr));
1778 else
1779 emit_setargr(as, 2, left);
1780 if (ra_noreg(right))
1781 emit_setargi(as, 3, IR(ir->op2)->i);
1782 else
1783 emit_setargr(as, 3, right);
1784 as->gcsteps++;
1785}
1786
1787/* -- Table ops ----------------------------------------------------------- */
1788
1789static void asm_tnew(ASMState *as, IRIns *ir)
1790{
1791 ra_destreg(as, ir, RID_RET);
1792 ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
1793 /* GCtab *lj_tab_new(lua_State *L, int32_t asize, uint32_t hbits) */
1794 emit_call(as, lj_tab_new);
1795 emit_setargr(as, 1, RID_RET);
1796 emit_setargi(as, 2, ir->op1);
1797 emit_setargi(as, 3, ir->op2);
1798 emit_getgl(as, RID_RET, jit_L);
1799 as->gcsteps++;
1800}
1801
1802static void asm_tdup(ASMState *as, IRIns *ir)
1803{
1804 ra_destreg(as, ir, RID_RET);
1805 ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
1806 /* GCtab *lj_tab_dup(lua_State *L, const GCtab *kt) */
1807 emit_call(as, lj_tab_dup);
1808 emit_setargr(as, 1, RID_RET);
1809 emit_setargp(as, 2, ir_kgc(IR(ir->op1)));
1810 emit_getgl(as, RID_RET, jit_L);
1811 as->gcsteps++;
1812}
1813
1814static void asm_tlen(ASMState *as, IRIns *ir)
1815{
1816 ra_destreg(as, ir, RID_RET);
1817 ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
1818 emit_call(as, lj_tab_len); /* MSize lj_tab_len(GCtab *t) */
1819 emit_setargr(as, 1, ra_alloc1(as, ir->op1, RSET_GPR));
1820}
1821
1822static void asm_tbar(ASMState *as, IRIns *ir)
1823{
1824 Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
1825 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab));
1826 MCLabel l_end = emit_label(as);
1827 emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist));
1828 emit_setgl(as, tab, gc.grayagain);
1829 emit_getgl(as, tmp, gc.grayagain);
1830 emit_i8(as, ~LJ_GC_BLACK);
1831 emit_rmro(as, XO_ARITHib, XOg_AND, tab, offsetof(GCtab, marked));
1832 emit_sjcc(as, CC_Z, l_end);
1833 emit_i8(as, LJ_GC_BLACK);
1834 emit_rmro(as, XO_GROUP3b, XOg_TEST, tab, offsetof(GCtab, marked));
1835}
1836
1837static void asm_obar(ASMState *as, IRIns *ir)
1838{
1839 RegSet allow = RSET_GPR;
1840 Reg obj, val;
1841 GCobj *valp;
1842 MCLabel l_end;
1843 int32_t ofs;
1844 ra_evictset(as, RSET_SCRATCH);
1845 if (irref_isk(ir->op2)) {
1846 valp = ir_kgc(IR(ir->op2));
1847 val = RID_NONE;
1848 } else {
1849 valp = NULL;
1850 val = ra_alloc1(as, ir->op2, allow);
1851 rset_clear(allow, val);
1852 }
1853 obj = ra_alloc1(as, ir->op1, allow);
1854 l_end = emit_label(as);
1855 /* No need for other object barriers (yet). */
1856 lua_assert(IR(ir->op1)->o == IR_UREFC);
1857 ofs = -(int32_t)offsetof(GCupval, tv);
1858 /* void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v) */
1859 emit_call(as, lj_gc_barrieruv);
1860 if (ofs == 0) {
1861 emit_setargr(as, 2, obj);
1862 } else if (rset_test(RSET_SCRATCH, obj) && !(as->flags & JIT_F_LEA_AGU)) {
1863 emit_setargr(as, 2, obj);
1864 emit_gri(as, XG_ARITHi(XOg_ADD), obj, ofs);
1865 } else {
1866 emit_setargr(as, 2, RID_RET);
1867 emit_rmro(as, XO_LEA, RID_RET, obj, ofs);
1868 }
1869 emit_setargp(as, 1, J2G(as->J));
1870 if (valp)
1871 emit_setargp(as, 3, valp);
1872 else
1873 emit_setargr(as, 3, val);
1874 emit_sjcc(as, CC_Z, l_end);
1875 emit_i8(as, LJ_GC_WHITES);
1876 if (valp)
1877 emit_rma(as, XO_GROUP3b, XOg_TEST, &valp->gch.marked);
1878 else
1879 emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked));
1880 emit_sjcc(as, CC_Z, l_end);
1881 emit_i8(as, LJ_GC_BLACK);
1882 emit_rmro(as, XO_GROUP3b, XOg_TEST, obj,
1883 ofs + (int32_t)offsetof(GChead, marked));
1884}
1885
1886/* -- FP/int arithmetic and logic operations ------------------------------ */
1887
1888/* Load reference onto x87 stack. Force a spill to memory if needed. */
1889static void asm_x87load(ASMState *as, IRRef ref)
1890{
1891 IRIns *ir = IR(ref);
1892 if (ir->o == IR_KNUM) {
1893 cTValue *tv = ir_knum(ir);
1894 if (tvispzero(tv)) /* Use fldz only for +0. */
1895 emit_x87op(as, XI_FLDZ);
1896 else if (tvispone(tv))
1897 emit_x87op(as, XI_FLD1);
1898 else
1899 emit_rma(as, XO_FLDq, XOg_FLDq, tv);
1900 } else if (ir->o == IR_TONUM && !ra_used(ir) &&
1901 !irref_isk(ir->op1) && mayfuse(as, ir->op1)) {
1902 IRIns *iri = IR(ir->op1);
1903 emit_rmro(as, XO_FILDd, XOg_FILDd, RID_ESP, ra_spill(as, iri));
1904 } else {
1905 emit_mrm(as, XO_FLDq, XOg_FLDq, asm_fuseload(as, ref, RSET_EMPTY));
1906 }
1907}
1908
1909/* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */
1910static int fpmjoin_pow(ASMState *as, IRIns *ir)
1911{
1912 IRIns *irp = IR(ir->op1);
1913 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
1914 IRIns *irpp = IR(irp->op1);
1915 if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1916 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1917 emit_call(as, lj_vm_pow); /* st0 = lj_vm_pow(st1, st0) */
1918 asm_x87load(as, irp->op2);
1919 asm_x87load(as, irpp->op1);
1920 return 1;
1921 }
1922 }
1923 return 0;
1924}
1925
1926static void asm_fpmath(ASMState *as, IRIns *ir)
1927{
1928 IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER;
1929 if (fpm == IRFPM_SQRT) {
1930 Reg dest = ra_dest(as, ir, RSET_FPR);
1931 Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
1932 emit_mrm(as, XO_SQRTSD, dest, left);
1933 } else if ((as->flags & JIT_F_SSE4_1) && fpm <= IRFPM_TRUNC) {
1934 Reg dest = ra_dest(as, ir, RSET_FPR);
1935 Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
1936 /* Round down/up/trunc == 1001/1010/1011. */
1937 emit_i8(as, 0x09 + fpm);
1938 /* ROUNDSD has a 4-byte opcode which doesn't fit in x86Op. */
1939 emit_mrm(as, XO_ROUNDSD, dest, left);
1940 /* Let's pretend it's a 3-byte opcode, and compensate afterwards. */
1941 /* This is atrocious, but the alternatives are much worse. */
1942 if (LJ_64 && as->mcp[1] != (MCode)(XO_ROUNDSD >> 16)) {
1943 as->mcp[0] = as->mcp[1]; as->mcp[1] = 0x0f; /* Swap 0F and REX. */
1944 }
1945 *--as->mcp = 0x66; /* 1st byte of ROUNDSD opcode. */
1946 } else {
1947 int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */
1948 Reg dest = ir->r;
1949 if (ra_hasreg(dest)) {
1950 ra_free(as, dest);
1951 ra_modified(as, dest);
1952 emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs);
1953 }
1954 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
1955 switch (fpm) { /* st0 = lj_vm_*(st0) */
1956 case IRFPM_FLOOR: emit_call(as, lj_vm_floor); break;
1957 case IRFPM_CEIL: emit_call(as, lj_vm_ceil); break;
1958 case IRFPM_TRUNC: emit_call(as, lj_vm_trunc); break;
1959 case IRFPM_EXP: emit_call(as, lj_vm_exp); break;
1960 case IRFPM_EXP2:
1961 if (fpmjoin_pow(as, ir)) return;
1962 emit_call(as, lj_vm_exp2); /* st0 = lj_vm_exp2(st0) */
1963 break;
1964 case IRFPM_SIN: emit_x87op(as, XI_FSIN); break;
1965 case IRFPM_COS: emit_x87op(as, XI_FCOS); break;
1966 case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break;
1967 case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10:
1968 /* Note: the use of fyl2xp1 would be pointless here. When computing
1969 ** log(1.0+eps) the precision is already lost after 1.0 is added.
1970 ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense.
1971 */
1972 emit_x87op(as, XI_FYL2X); break;
1973 case IRFPM_OTHER:
1974 switch (ir->o) {
1975 case IR_ATAN2:
1976 emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break;
1977 case IR_LDEXP:
1978 emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break;
1979 case IR_POWI:
1980 emit_call(as, lj_vm_powi); /* st0 = lj_vm_powi(st0, [esp]) */
1981 emit_rmro(as, XO_MOVto, ra_alloc1(as, ir->op2, RSET_GPR), RID_ESP, 0);
1982 break;
1983 default: lua_assert(0); break;
1984 }
1985 break;
1986 default: lua_assert(0); break;
1987 }
1988 asm_x87load(as, ir->op1);
1989 switch (fpm) {
1990 case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break;
1991 case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break;
1992 case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break;
1993 case IRFPM_OTHER:
1994 if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2);
1995 break;
1996 default: break;
1997 }
1998 }
1999}
2000
2001/* Find out whether swapping operands might be beneficial. */
2002static int swapops(ASMState *as, IRIns *ir)
2003{
2004 IRIns *irl = IR(ir->op1);
2005 IRIns *irr = IR(ir->op2);
2006 lua_assert(ra_noreg(irr->r));
2007 if (!irm_iscomm(lj_ir_mode[ir->o]))
2008 return 0; /* Can't swap non-commutative operations. */
2009 if (irref_isk(ir->op2))
2010 return 0; /* Don't swap constants to the left. */
2011 if (ra_hasreg(irl->r))
2012 return 1; /* Swap if left already has a register. */
2013 if (ra_samehint(ir->r, irr->r))
2014 return 1; /* Swap if dest and right have matching hints. */
2015 if (ir->op1 < as->loopref && !irt_isphi(irl->t) &&
2016 !(ir->op2 < as->loopref && !irt_isphi(irr->t)))
2017 return 1; /* Swap invariants to the right. */
2018 if (opisfusableload(irl->o))
2019 return 1; /* Swap fusable loads to the right. */
2020 return 0; /* Otherwise don't swap. */
2021}
2022
2023static void asm_fparith(ASMState *as, IRIns *ir, x86Op xo)
2024{
2025 IRRef lref = ir->op1;
2026 IRRef rref = ir->op2;
2027 RegSet allow = RSET_FPR;
2028 Reg dest;
2029 Reg right = IR(rref)->r;
2030 if (ra_hasreg(right))
2031 rset_clear(allow, right);
2032 dest = ra_dest(as, ir, allow);
2033 if (lref == rref) {
2034 right = dest;
2035 } else if (ra_noreg(right)) {
2036 if (swapops(as, ir)) {
2037 IRRef tmp = lref; lref = rref; rref = tmp;
2038 }
2039 right = asm_fuseload(as, rref, rset_clear(allow, dest));
2040 }
2041 emit_mrm(as, xo, dest, right);
2042 ra_left(as, dest, lref);
2043}
2044
2045static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa)
2046{
2047 IRRef lref = ir->op1;
2048 IRRef rref = ir->op2;
2049 RegSet allow = RSET_GPR;
2050 Reg dest, right;
2051 if (as->testmcp == as->mcp) { /* Drop test r,r instruction. */
2052 as->testmcp = NULL;
2053 as->mcp += (LJ_64 && *as->mcp != XI_TEST) ? 3 : 2;
2054 }
2055 right = IR(rref)->r;
2056 if (ra_hasreg(right))
2057 rset_clear(allow, right);
2058 dest = ra_dest(as, ir, allow);
2059 if (lref == rref) {
2060 right = dest;
2061 } else if (ra_noreg(right) && !irref_isk(rref)) {
2062 if (swapops(as, ir)) {
2063 IRRef tmp = lref; lref = rref; rref = tmp;
2064 }
2065 right = asm_fuseload(as, rref, rset_clear(allow, dest));
2066 /* Note: fuses only with IR_FLOAD for now. */
2067 }
2068 if (irt_isguard(ir->t)) /* For IR_ADDOV etc. */
2069 asm_guardcc(as, CC_O);
2070 if (ra_hasreg(right))
2071 emit_mrm(as, XO_ARITH(xa), dest, right);
2072 else
2073 emit_gri(as, XG_ARITHi(xa), dest, IR(ir->op2)->i);
2074 ra_left(as, dest, lref);
2075}
2076
2077/* LEA is really a 4-operand ADD with an independent destination register,
2078** up to two source registers and an immediate. One register can be scaled
2079** by 1, 2, 4 or 8. This can be used to avoid moves or to fuse several
2080** instructions.
2081**
2082** Currently only a few common cases are supported:
2083** - 3-operand ADD: y = a+b; y = a+k with a and b already allocated
2084** - Left ADD fusion: y = (a+b)+k; y = (a+k)+b
2085** - Right ADD fusion: y = a+(b+k)
2086** The ommited variants have already been reduced by FOLD.
2087**
2088** There are more fusion opportunities, like gathering shifts or joining
2089** common references. But these are probably not worth the trouble, since
2090** array indexing is not decomposed and already makes use of all fields
2091** of the ModRM operand.
2092*/
2093static int asm_lea(ASMState *as, IRIns *ir)
2094{
2095 IRIns *irl = IR(ir->op1);
2096 IRIns *irr = IR(ir->op2);
2097 RegSet allow = RSET_GPR;
2098 Reg dest;
2099 as->mrm.base = as->mrm.idx = RID_NONE;
2100 as->mrm.scale = XM_SCALE1;
2101 as->mrm.ofs = 0;
2102 if (ra_hasreg(irl->r)) {
2103 rset_clear(allow, irl->r);
2104 as->mrm.base = irl->r;
2105 if (irref_isk(ir->op2) || ra_hasreg(irr->r)) {
2106 /* The PHI renaming logic does a better job in some cases. */
2107 if (ra_hasreg(ir->r) &&
2108 ((irt_isphi(irl->t) && as->phireg[ir->r] == ir->op1) ||
2109 (irt_isphi(irr->t) && as->phireg[ir->r] == ir->op2)))
2110 return 0;
2111 if (irref_isk(ir->op2)) {
2112 as->mrm.ofs = irr->i;
2113 } else {
2114 rset_clear(allow, irr->r);
2115 as->mrm.idx = irr->r;
2116 }
2117 } else if (irr->o == IR_ADD && mayfuse(as, ir->op2) &&
2118 irref_isk(irr->op2)) {
2119 Reg idx = ra_alloc1(as, irr->op1, allow);
2120 rset_clear(allow, idx);
2121 as->mrm.idx = (uint8_t)idx;
2122 as->mrm.ofs = IR(irr->op2)->i;
2123 } else {
2124 return 0;
2125 }
2126 } else if (ir->op1 != ir->op2 && irl->o == IR_ADD && mayfuse(as, ir->op1) &&
2127 (irref_isk(ir->op2) || irref_isk(irl->op2))) {
2128 Reg idx, base = ra_alloc1(as, irl->op1, allow);
2129 rset_clear(allow, base);
2130 as->mrm.base = (uint8_t)base;
2131 if (irref_isk(ir->op2)) {
2132 as->mrm.ofs = irr->i;
2133 idx = ra_alloc1(as, irl->op2, allow);
2134 } else {
2135 as->mrm.ofs = IR(irl->op2)->i;
2136 idx = ra_alloc1(as, ir->op2, allow);
2137 }
2138 rset_clear(allow, idx);
2139 as->mrm.idx = (uint8_t)idx;
2140 } else {
2141 return 0;
2142 }
2143 dest = ra_dest(as, ir, allow);
2144 emit_mrm(as, XO_LEA, dest, RID_MRM);
2145 return 1; /* Success. */
2146}
2147
2148static void asm_add(ASMState *as, IRIns *ir)
2149{
2150 if (irt_isnum(ir->t))
2151 asm_fparith(as, ir, XO_ADDSD);
2152 else if ((as->flags & JIT_F_LEA_AGU) || as->testmcp == as->mcp ||
2153 !asm_lea(as, ir))
2154 asm_intarith(as, ir, XOg_ADD);
2155}
2156
2157static void asm_bitnot(ASMState *as, IRIns *ir)
2158{
2159 Reg dest = ra_dest(as, ir, RSET_GPR);
2160 emit_rr(as, XO_GROUP3, XOg_NOT, dest);
2161 ra_left(as, dest, ir->op1);
2162}
2163
2164static void asm_bitswap(ASMState *as, IRIns *ir)
2165{
2166 Reg dest = ra_dest(as, ir, RSET_GPR);
2167 MCode *p = as->mcp;
2168 p[-1] = (MCode)(XI_BSWAP+(dest&7));
2169 p[-2] = 0x0f;
2170 p -= 2;
2171 REXRB(p, 0, dest);
2172 as->mcp = p;
2173 ra_left(as, dest, ir->op1);
2174}
2175
2176static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
2177{
2178 IRRef rref = ir->op2;
2179 IRIns *irr = IR(rref);
2180 Reg dest;
2181 if (irref_isk(rref)) { /* Constant shifts. */
2182 int shift;
2183 dest = ra_dest(as, ir, RSET_GPR);
2184 shift = irr->i & 31; /* Handle shifts of 0..31 bits. */
2185 switch (shift) {
2186 case 0: return;
2187 case 1: emit_rr(as, XO_SHIFT1, (Reg)xs, dest); break;
2188 default: emit_shifti(as, xs, dest, shift); break;
2189 }
2190 } else { /* Variable shifts implicitly use register cl (i.e. ecx). */
2191 RegSet allow = rset_exclude(RSET_GPR, RID_ECX);
2192 Reg right = irr->r;
2193 if (ra_noreg(right)) {
2194 right = ra_allocref(as, rref, RID2RSET(RID_ECX));
2195 } else if (right != RID_ECX) {
2196 rset_clear(allow, right);
2197 ra_scratch(as, RID2RSET(RID_ECX));
2198 }
2199 dest = ra_dest(as, ir, allow);
2200 emit_rr(as, XO_SHIFTcl, (Reg)xs, dest);
2201 if (right != RID_ECX)
2202 emit_rr(as, XO_MOV, RID_ECX, right);
2203 }
2204 ra_left(as, dest, ir->op1);
2205 /*
2206 ** Note: avoid using the flags resulting from a shift or rotate!
2207 ** All of them cause a partial flag stall, except for r,1 shifts
2208 ** (but not rotates). And a shift count of 0 leaves the flags unmodified.
2209 */
2210}
2211
2212/* -- Comparisons --------------------------------------------------------- */
2213
2214/* Virtual flags for unordered FP comparisons. */
2215#define VCC_U 0x100 /* Unordered. */
2216#define VCC_P 0x200 /* Needs extra CC_P branch. */
2217#define VCC_S 0x400 /* Swap avoids CC_P branch. */
2218#define VCC_PS (VCC_P|VCC_S)
2219
2220static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2221{
2222 if (irt_isnum(ir->t)) {
2223 IRRef lref = ir->op1;
2224 IRRef rref = ir->op2;
2225 Reg left, right;
2226 MCLabel l_around;
2227 /*
2228 ** An extra CC_P branch is required to preserve ordered/unordered
2229 ** semantics for FP comparisons. This can be avoided by swapping
2230 ** the operands and inverting the condition (except for EQ and UNE).
2231 ** So always try to swap if possible.
2232 **
2233 ** Another option would be to swap operands to achieve better memory
2234 ** operand fusion. But it's unlikely that this outweighs the cost
2235 ** of the extra branches.
2236 */
2237 if (cc & VCC_S) { /* Swap? */
2238 IRRef tmp = lref; lref = rref; rref = tmp;
2239 cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */
2240 }
2241 left = ra_alloc1(as, lref, RSET_FPR);
2242 right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
2243 l_around = emit_label(as);
2244 asm_guardcc(as, cc >> 4);
2245 if (cc & VCC_P) { /* Extra CC_P branch required? */
2246 if (!(cc & VCC_U)) {
2247 asm_guardcc(as, CC_P); /* Branch to exit for ordered comparisons. */
2248 } else if (l_around != as->invmcp) {
2249 emit_sjcc(as, CC_P, l_around); /* Branch around for unordered. */
2250 } else {
2251 /* Patched to mcloop by asm_loop_fixup. */
2252 as->loopinv = 2;
2253 if (as->realign)
2254 emit_sjcc(as, CC_P, as->mcp);
2255 else
2256 emit_jcc(as, CC_P, as->mcp);
2257 }
2258 }
2259 emit_mrm(as, XO_UCOMISD, left, right);
2260 } else if (!(irt_isstr(ir->t) && (cc & 0xe) != CC_E)) {
2261 IRRef lref = ir->op1, rref = ir->op2;
2262 IROp leftop = (IROp)(IR(lref)->o);
2263 lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));
2264 /* Swap constants (only for ABC) and fusable loads to the right. */
2265 if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) {
2266 if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */
2267 else if ((cc & 0xa) == 0x2) cc ^= 5; /* A <-> B, AE <-> BE */
2268 lref = ir->op2; rref = ir->op1;
2269 }
2270 if (irref_isk(rref)) {
2271 IRIns *irl = IR(lref);
2272 int32_t imm = IR(rref)->i;
2273 /* Check wether we can use test ins. Not for unsigned, since CF=0. */
2274 int usetest = (imm == 0 && (cc & 0xa) != 0x2);
2275 if (usetest && irl->o == IR_BAND && irl+1 == ir && !ra_used(irl)) {
2276 /* Combine comp(BAND(ref, r/imm), 0) into test mrm, r/imm. */
2277 Reg right, left = RID_NONE;
2278 RegSet allow = RSET_GPR;
2279 if (!irref_isk(irl->op2)) {
2280 left = ra_alloc1(as, irl->op2, allow);
2281 rset_clear(allow, left);
2282 }
2283 right = asm_fuseload(as, irl->op1, allow);
2284 asm_guardcc(as, cc);
2285 if (irref_isk(irl->op2)) {
2286 emit_i32(as, IR(irl->op2)->i);
2287 emit_mrm(as, XO_GROUP3, XOg_TEST, right);
2288 } else {
2289 emit_mrm(as, XO_TEST, left, right);
2290 }
2291 } else {
2292 Reg left;
2293 if (opisfusableload((IROp)irl->o) &&
2294 ((irt_isi8(irl->t) && checki8(imm)) ||
2295 (irt_isu8(irl->t) && checku8(imm)))) {
2296 /* Only the IRT_INT case is fused by asm_fuseload. The IRT_I8/IRT_U8
2297 ** loads are handled here. The IRT_I16/IRT_U16 loads should never be
2298 ** fused, since cmp word [mem], imm16 has a length-changing prefix.
2299 */
2300 IRType1 origt = irl->t; /* Temporarily flip types. */
2301 irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT;
2302 left = asm_fuseload(as, lref, RSET_GPR);
2303 irl->t = origt;
2304 if (left == RID_MRM) { /* Fusion succeeded? */
2305 asm_guardcc(as, cc);
2306 emit_i8(as, imm);
2307 emit_mrm(as, XO_ARITHib, XOg_CMP, RID_MRM);
2308 return;
2309 } /* Otherwise handle register case as usual. */
2310 } else {
2311 left = asm_fuseload(as, lref, RSET_GPR);
2312 }
2313 asm_guardcc(as, cc);
2314 if (usetest && left != RID_MRM) {
2315 /* Use test r,r instead of cmp r,0. */
2316 if (irl+1 == ir) /* Referencing previous ins? */
2317 as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */
2318 emit_rr(as, XO_TEST, left, left);
2319 } else {
2320 x86Op xo;
2321 if (checki8(imm)) {
2322 emit_i8(as, imm);
2323 xo = XO_ARITHi8;
2324 } else {
2325 emit_i32(as, imm);
2326 xo = XO_ARITHi;
2327 }
2328 emit_mrm(as, xo, XOg_CMP, left);
2329 }
2330 }
2331 } else {
2332 Reg left = ra_alloc1(as, lref, RSET_GPR);
2333 Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left));
2334 asm_guardcc(as, cc);
2335 emit_mrm(as, XO_CMP, left, right);
2336 }
2337 } else { /* Handle ordered string compares. */
2338 RegSet allow = RSET_GPR;
2339 /* This assumes lj_str_cmp never uses any SSE registers. */
2340 ra_evictset(as, (RSET_SCRATCH & RSET_GPR));
2341 asm_guardcc(as, cc);
2342 emit_rr(as, XO_TEST, RID_RET, RID_RET);
2343 emit_call(as, lj_str_cmp); /* int32_t lj_str_cmp(GCstr *a, GCstr *b) */
2344 if (irref_isk(ir->op1)) {
2345 emit_setargi(as, 1, IR(ir->op1)->i);
2346 } else {
2347 Reg left = ra_alloc1(as, ir->op1, allow);
2348 rset_clear(allow, left);
2349 emit_setargr(as, 1, left);
2350 }
2351 if (irref_isk(ir->op2)) {
2352 emit_setargi(as, 2, IR(ir->op2)->i);
2353 } else {
2354 Reg right = ra_alloc1(as, ir->op2, allow);
2355 emit_setargr(as, 2, right);
2356 }
2357 }
2358}
2359
2360#define asm_comp(as, ir, ci, cf, cu) \
2361 asm_comp_(as, ir, (ci)+((cf)<<4)+(cu))
2362
2363/* -- GC handling --------------------------------------------------------- */
2364
2365/* Sync all live GC values to Lua stack slots. */
2366static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow)
2367{
2368 IRRef2 *map = &as->T->snapmap[snap->mapofs];
2369 BCReg s, nslots = snap->nslots;
2370 for (s = 0; s < nslots; s++) {
2371 IRRef ref = snap_ref(map[s]);
2372 if (!irref_isk(ref)) {
2373 IRIns *ir = IR(ref);
2374 if (ir->o == IR_FRAME) {
2375 /* NYI: sync the frame, bump base, set topslot, clear new slots. */
2376 lj_trace_err(as->J, LJ_TRERR_NYIGCF);
2377 } else if (irt_isgcv(ir->t) &&
2378 !(ir->o == IR_SLOAD && ir->op1 < nslots && map[ir->op1] == 0)) {
2379 Reg src = ra_alloc1(as, ref, allow);
2380 int32_t ofs = 8*(int32_t)(s-1);
2381 emit_movtomro(as, src, base, ofs);
2382 emit_movmroi(as, base, ofs+4, irt_toitype(ir->t));
2383 checkmclim(as);
2384 }
2385 }
2386 }
2387}
2388
2389/* Check GC threshold and do one or more GC steps. */
2390static void asm_gc_check(ASMState *as, SnapShot *snap)
2391{
2392 MCLabel l_end;
2393 const BCIns *pc;
2394 Reg tmp, base;
2395 RegSet drop = RSET_SCRATCH;
2396 /* Must evict BASE because the stack may be reallocated by the GC. */
2397 if (ra_hasreg(IR(REF_BASE)->r))
2398 drop |= RID2RSET(IR(REF_BASE)->r);
2399 ra_evictset(as, drop);
2400 base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_GPR, RID_RET));
2401 l_end = emit_label(as);
2402 /* void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps) */
2403 emit_call(as, lj_gc_step_jit);
2404 emit_movtomro(as, base, RID_RET, offsetof(lua_State, base));
2405 emit_setargr(as, 1, RID_RET);
2406 emit_setargi(as, 3, (int32_t)as->gcsteps);
2407 emit_getgl(as, RID_RET, jit_L);
2408 pc = (const BCIns *)(uintptr_t)as->T->snapmap[snap->mapofs+snap->nslots];
2409 emit_setargp(as, 2, pc);
2410 asm_gc_sync(as, snap, base, rset_exclude(RSET_SCRATCH & RSET_GPR, base));
2411 if (as->curins == as->loopref) /* BASE gets restored by LOOP anyway. */
2412 ra_restore(as, REF_BASE); /* Better do it inside the slow path. */
2413 /* Jump around GC step if GC total < GC threshold. */
2414 tmp = ra_scratch(as, RSET_SCRATCH & RSET_GPR);
2415 emit_sjcc(as, CC_B, l_end);
2416 emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold);
2417 emit_getgl(as, tmp, gc.total);
2418 as->gcsteps = 0;
2419 checkmclim(as);
2420}
2421
2422/* -- PHI and loop handling ----------------------------------------------- */
2423
2424/* Break a PHI cycle by renaming to a free register (evict if needed). */
2425static void asm_phi_break(ASMState *as, RegSet blocked, RegSet blockedby,
2426 RegSet allow)
2427{
2428 RegSet candidates = blocked & allow;
2429 if (candidates) { /* If this register file has candidates. */
2430 /* Note: the set for ra_pick cannot be empty, since each register file
2431 ** has some registers never allocated to PHIs.
2432 */
2433 Reg down, up = ra_pick(as, ~blocked & allow); /* Get a free register. */
2434 if (candidates & ~blockedby) /* Optimize shifts, else it's a cycle. */
2435 candidates = candidates & ~blockedby;
2436 down = rset_picktop(candidates); /* Pick candidate PHI register. */
2437 ra_rename(as, down, up); /* And rename it to the free register. */
2438 }
2439}
2440
2441/* PHI register shuffling.
2442**
2443** The allocator tries hard to preserve PHI register assignments across
2444** the loop body. Most of the time this loop does nothing, since there
2445** are no register mismatches.
2446**
2447** If a register mismatch is detected and ...
2448** - the register is currently free: rename it.
2449** - the register is blocked by an invariant: restore/remat and rename it.
2450** - Otherwise the register is used by another PHI, so mark it as blocked.
2451**
2452** The renames are order-sensitive, so just retry the loop if a register
2453** is marked as blocked, but has been freed in the meantime. A cycle is
2454** detected if all of the blocked registers are allocated. To break the
2455** cycle rename one of them to a free register and retry.
2456**
2457** Note that PHI spill slots are kept in sync and don't need to be shuffled.
2458*/
2459static void asm_phi_shuffle(ASMState *as)
2460{
2461 RegSet work;
2462
2463 /* Find and resolve PHI register mismatches. */
2464 for (;;) {
2465 RegSet blocked = RSET_EMPTY;
2466 RegSet blockedby = RSET_EMPTY;
2467 RegSet phiset = as->phiset;
2468 while (phiset) { /* Check all left PHI operand registers. */
2469 Reg r = rset_picktop(phiset);
2470 IRIns *irl = IR(as->phireg[r]);
2471 Reg left = irl->r;
2472 if (r != left) { /* Mismatch? */
2473 if (!rset_test(as->freeset, r)) { /* PHI register blocked? */
2474 IRRef ref = regcost_ref(as->cost[r]);
2475 if (irt_ismarked(IR(ref)->t)) { /* Blocked by other PHI (w/reg)? */
2476 rset_set(blocked, r);
2477 if (ra_hasreg(left))
2478 rset_set(blockedby, left);
2479 left = RID_NONE;
2480 } else { /* Otherwise grab register from invariant. */
2481 ra_restore(as, ref);
2482 checkmclim(as);
2483 }
2484 }
2485 if (ra_hasreg(left)) {
2486 ra_rename(as, left, r);
2487 checkmclim(as);
2488 }
2489 }
2490 rset_clear(phiset, r);
2491 }
2492 if (!blocked) break; /* Finished. */
2493 if (!(as->freeset & blocked)) { /* Break cycles if none are free. */
2494 asm_phi_break(as, blocked, blockedby, RSET_GPR);
2495 asm_phi_break(as, blocked, blockedby, RSET_FPR);
2496 checkmclim(as);
2497 } /* Else retry some more renames. */
2498 }
2499
2500 /* Restore/remat invariants whose registers are modified inside the loop. */
2501 work = as->modset & ~(as->freeset | as->phiset);
2502 while (work) {
2503 Reg r = rset_picktop(work);
2504 ra_restore(as, regcost_ref(as->cost[r]));
2505 rset_clear(work, r);
2506 checkmclim(as);
2507 }
2508
2509 /* Allocate and save all unsaved PHI regs and clear marks. */
2510 work = as->phiset;
2511 while (work) {
2512 Reg r = rset_picktop(work);
2513 IRRef lref = as->phireg[r];
2514 IRIns *ir = IR(lref);
2515 if (ra_hasspill(ir->s)) { /* Left PHI gained a spill slot? */
2516 irt_clearmark(ir->t); /* Handled here, so clear marker now. */
2517 ra_alloc1(as, lref, RID2RSET(r));
2518 ra_save(as, ir, r); /* Save to spill slot inside the loop. */
2519 checkmclim(as);
2520 }
2521 rset_clear(work, r);
2522 }
2523}
2524
2525/* Emit renames for left PHIs which are only spilled outside the loop. */
2526static void asm_phi_fixup(ASMState *as)
2527{
2528 RegSet work = as->phiset;
2529 while (work) {
2530 Reg r = rset_picktop(work);
2531 IRRef lref = as->phireg[r];
2532 IRIns *ir = IR(lref);
2533 /* Left PHI gained a spill slot before the loop? */
2534 if (irt_ismarked(ir->t) && ra_hasspill(ir->s)) {
2535 IRRef ren;
2536 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
2537 ren = tref_ref(lj_ir_emit(as->J));
2538 as->ir = as->T->ir; /* The IR may have been reallocated. */
2539 IR(ren)->r = (uint8_t)r;
2540 IR(ren)->s = SPS_NONE;
2541 }
2542 irt_clearmark(ir->t); /* Always clear marker. */
2543 rset_clear(work, r);
2544 }
2545}
2546
2547/* Setup right PHI reference. */
2548static void asm_phi(ASMState *as, IRIns *ir)
2549{
2550 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
2551 RegSet afree = (as->freeset & allow);
2552 IRIns *irl = IR(ir->op1);
2553 IRIns *irr = IR(ir->op2);
2554 /* Spill slot shuffling is not implemented yet (but rarely needed). */
2555 if (ra_hasspill(irl->s) || ra_hasspill(irr->s))
2556 lj_trace_err(as->J, LJ_TRERR_NYIPHI);
2557 /* Leave at least one register free for non-PHIs (and PHI cycle breaking). */
2558 if ((afree & (afree-1))) { /* Two or more free registers? */
2559 Reg r;
2560 if (ra_noreg(irr->r)) { /* Get a register for the right PHI. */
2561 r = ra_allocref(as, ir->op2, allow);
2562 } else { /* Duplicate right PHI, need a copy (rare). */
2563 r = ra_scratch(as, allow);
2564 emit_movrr(as, r, irr->r);
2565 }
2566 ir->r = (uint8_t)r;
2567 rset_set(as->phiset, r);
2568 as->phireg[r] = (IRRef1)ir->op1;
2569 irt_setmark(irl->t); /* Marks left PHIs _with_ register. */
2570 if (ra_noreg(irl->r))
2571 ra_sethint(irl->r, r); /* Set register hint for left PHI. */
2572 } else { /* Otherwise allocate a spill slot. */
2573 /* This is overly restrictive, but it triggers only on synthetic code. */
2574 if (ra_hasreg(irl->r) || ra_hasreg(irr->r))
2575 lj_trace_err(as->J, LJ_TRERR_NYIPHI);
2576 ra_spill(as, ir);
2577 irl->s = irr->s = ir->s; /* Sync left/right PHI spill slots. */
2578 }
2579}
2580
2581/* Fixup the loop branch. */
2582static void asm_loop_fixup(ASMState *as)
2583{
2584 MCode *p = as->mctop;
2585 MCode *target = as->mcp;
2586 if (as->realign) { /* Realigned loops use short jumps. */
2587 as->realign = NULL; /* Stop another retry. */
2588 lua_assert(((intptr_t)target & 15) == 0);
2589 if (as->loopinv) { /* Inverted loop branch? */
2590 p -= 5;
2591 p[0] = XI_JMP;
2592 lua_assert(target - p >= -128);
2593 p[-1] = (MCode)(target - p); /* Patch sjcc. */
2594 if (as->loopinv == 2)
2595 p[-3] = (MCode)(target - p + 2); /* Patch opt. short jp. */
2596 } else {
2597 lua_assert(target - p >= -128);
2598 p[-1] = (MCode)(int8_t)(target - p); /* Patch short jmp. */
2599 p[-2] = XI_JMPs;
2600 }
2601 } else {
2602 MCode *newloop;
2603 p[-5] = XI_JMP;
2604 if (as->loopinv) { /* Inverted loop branch? */
2605 /* asm_guardcc already inverted the jcc and patched the jmp. */
2606 p -= 5;
2607 newloop = target+4;
2608 *(int32_t *)(p-4) = (int32_t)(target - p); /* Patch jcc. */
2609 if (as->loopinv == 2) {
2610 *(int32_t *)(p-10) = (int32_t)(target - p + 6); /* Patch opt. jp. */
2611 newloop = target+8;
2612 }
2613 } else { /* Otherwise just patch jmp. */
2614 *(int32_t *)(p-4) = (int32_t)(target - p);
2615 newloop = target+3;
2616 }
2617 /* Realign small loops and shorten the loop branch. */
2618 if (newloop >= p - 128) {
2619 as->realign = newloop; /* Force a retry and remember alignment. */
2620 as->curins = as->stopins; /* Abort asm_trace now. */
2621 as->T->nins = as->orignins; /* Remove any added renames. */
2622 }
2623 }
2624}
2625
2626/* Middle part of a loop. */
2627static void asm_loop(ASMState *as)
2628{
2629 /* LOOP is a guard, so the snapno is up to date. */
2630 as->loopsnapno = as->snapno;
2631 if (as->gcsteps)
2632 asm_gc_check(as, &as->T->snap[as->loopsnapno]);
2633 /* LOOP marks the transition from the variant to the invariant part. */
2634 as->testmcp = as->invmcp = NULL;
2635 as->sectref = 0;
2636 if (!neverfuse(as)) as->fuseref = 0;
2637 asm_phi_shuffle(as);
2638 asm_loop_fixup(as);
2639 as->mcloop = as->mcp;
2640 RA_DBGX((as, "===== LOOP ====="));
2641 if (!as->realign) RA_DBG_FLUSH();
2642}
2643
2644/* -- Head of trace ------------------------------------------------------- */
2645
2646/* Rematerialize all remaining constants in registers. */
2647static void asm_const_remat(ASMState *as)
2648{
2649 RegSet work = ~as->freeset & RSET_ALL;
2650 while (work) {
2651 Reg r = rset_pickbot(work);
2652 IRRef ref = regcost_ref(as->cost[r]);
2653 if (irref_isk(ref) || ref == REF_BASE) {
2654 ra_rematk(as, IR(ref));
2655 checkmclim(as);
2656 }
2657 rset_clear(work, r);
2658 }
2659}
2660
2661/* Head of a root trace. */
2662static void asm_head_root(ASMState *as)
2663{
2664 int32_t spadj;
2665 emit_setgli(as, vmstate, (int32_t)as->J->curtrace);
2666 spadj = sps_adjust(as);
2667 as->T->spadjust = (uint16_t)spadj;
2668 emit_addptr(as, RID_ESP, -spadj);
2669}
2670
2671/* Handle BASE coalescing for a root trace. */
2672static void asm_head_base(ASMState *as)
2673{
2674 IRIns *ir = IR(REF_BASE);
2675 Reg r = ir->r;
2676 lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
2677 ra_free(as, r);
2678 if (r != RID_BASE) {
2679 ra_scratch(as, RID2RSET(RID_BASE));
2680 emit_rr(as, XO_MOV, r, RID_BASE);
2681 }
2682}
2683
2684/* Check Lua stack size for overflow at the start of a side trace.
2685** Stack overflow is rare, so let the regular exit handling fix this up.
2686** This is done in the context of the *parent* trace and parent exitno!
2687*/
2688static void asm_checkstack(ASMState *as, RegSet allow)
2689{
2690 /* Try to get an unused temp. register, otherwise spill/restore eax. */
2691 Reg r = allow ? rset_pickbot(allow) : RID_EAX;
2692 emit_jcc(as, CC_B, exitstub_addr(as->J, as->J->exitno));
2693 if (allow == RSET_EMPTY) /* Restore temp. register. */
2694 emit_rmro(as, XO_MOV, r, RID_ESP, sps_scale(SPS_TEMP1));
2695 emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*as->topslot));
2696 emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, ptr2addr(&J2G(as->J)->jit_base));
2697 emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
2698 emit_getgl(as, r, jit_L);
2699 if (allow == RSET_EMPTY) /* Spill temp. register. */
2700 emit_rmro(as, XO_MOVto, r, RID_ESP, sps_scale(SPS_TEMP1));
2701}
2702
2703/* Head of a side trace.
2704**
2705** The current simplistic algorithm requires that all slots inherited
2706** from the parent are live in a register between pass 2 and pass 3. This
2707** avoids the complexity of stack slot shuffling. But of course this may
2708** overflow the register set in some cases and cause the dreaded error:
2709** "NYI: register coalescing too complex". A refined algorithm is needed.
2710*/
2711static void asm_head_side(ASMState *as)
2712{
2713 IRRef1 sloadins[RID_MAX];
2714 RegSet allow = RSET_ALL; /* Inverse of all coalesced registers. */
2715 RegSet live = RSET_EMPTY; /* Live parent registers. */
2716 int32_t spadj, spdelta;
2717 int pass2 = 0;
2718 int pass3 = 0;
2719 IRRef i;
2720
2721 /* Scan all parent SLOADs and collect register dependencies. */
2722 for (i = as->curins; i > REF_BASE; i--) {
2723 IRIns *ir = IR(i);
2724 lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
2725 ir->o == IR_FRAME);
2726 if (ir->o == IR_SLOAD) {
2727 RegSP rs = as->parentmap[ir->op1];
2728 if (ra_hasreg(ir->r)) {
2729 rset_clear(allow, ir->r);
2730 if (ra_hasspill(ir->s))
2731 ra_save(as, ir, ir->r);
2732 } else if (ra_hasspill(ir->s)) {
2733 irt_setmark(ir->t);
2734 pass2 = 1;
2735 }
2736 if (ir->r == rs) { /* Coalesce matching registers right now. */
2737 ra_free(as, ir->r);
2738 } else if (ra_hasspill(regsp_spill(rs))) {
2739 if (ra_hasreg(ir->r))
2740 pass3 = 1;
2741 } else if (ra_used(ir)) {
2742 sloadins[rs] = (IRRef1)i;
2743 rset_set(live, rs); /* Block live parent register. */
2744 }
2745 }
2746 }
2747
2748 /* Calculate stack frame adjustment. */
2749 spadj = sps_adjust(as);
2750 spdelta = spadj - (int32_t)as->parent->spadjust;
2751 if (spdelta < 0) { /* Don't shrink the stack frame. */
2752 spadj = (int32_t)as->parent->spadjust;
2753 spdelta = 0;
2754 }
2755 as->T->spadjust = (uint16_t)spadj;
2756
2757 /* Reload spilled target registers. */
2758 if (pass2) {
2759 for (i = as->curins; i > REF_BASE; i--) {
2760 IRIns *ir = IR(i);
2761 if (irt_ismarked(ir->t)) {
2762 RegSet mask;
2763 Reg r;
2764 RegSP rs;
2765 irt_clearmark(ir->t);
2766 rs = as->parentmap[ir->op1];
2767 if (!ra_hasspill(regsp_spill(rs)))
2768 ra_sethint(ir->r, rs); /* Hint may be gone, set it again. */
2769 else if (sps_scale(regsp_spill(rs))+spdelta == sps_scale(ir->s))
2770 continue; /* Same spill slot, do nothing. */
2771 mask = (irt_isnum(ir->t) ? RSET_FPR : RSET_GPR) & allow;
2772 if (mask == RSET_EMPTY)
2773 lj_trace_err(as->J, LJ_TRERR_NYICOAL);
2774 r = ra_allocref(as, i, mask);
2775 ra_save(as, ir, r);
2776 rset_clear(allow, r);
2777 if (r == rs) { /* Coalesce matching registers right now. */
2778 ra_free(as, r);
2779 rset_clear(live, r);
2780 } else if (ra_hasspill(regsp_spill(rs))) {
2781 pass3 = 1;
2782 }
2783 checkmclim(as);
2784 }
2785 }
2786 }
2787
2788 /* Store trace number and adjust stack frame relative to the parent. */
2789 emit_setgli(as, vmstate, (int32_t)as->J->curtrace);
2790 emit_addptr(as, RID_ESP, -spdelta);
2791
2792 /* Restore target registers from parent spill slots. */
2793 if (pass3) {
2794 RegSet work = ~as->freeset & RSET_ALL;
2795 while (work) {
2796 Reg r = rset_pickbot(work);
2797 IRIns *ir = IR(regcost_ref(as->cost[r]));
2798 RegSP rs = as->parentmap[ir->op1];
2799 rset_clear(work, r);
2800 if (ra_hasspill(regsp_spill(rs))) {
2801 int32_t ofs = sps_scale(regsp_spill(rs));
2802 ra_free(as, r);
2803 emit_movrmro(as, r, RID_ESP, ofs);
2804 checkmclim(as);
2805 }
2806 }
2807 }
2808
2809 /* Shuffle registers to match up target regs with parent regs. */
2810 for (;;) {
2811 RegSet work;
2812
2813 /* Repeatedly coalesce free live registers by moving to their target. */
2814 while ((work = as->freeset & live) != RSET_EMPTY) {
2815 Reg rp = rset_pickbot(work);
2816 IRIns *ir = IR(sloadins[rp]);
2817 rset_clear(live, rp);
2818 rset_clear(allow, rp);
2819 ra_free(as, ir->r);
2820 emit_movrr(as, ir->r, rp);
2821 checkmclim(as);
2822 }
2823
2824 /* We're done if no live registers remain. */
2825 if (live == RSET_EMPTY)
2826 break;
2827
2828 /* Break cycles by renaming one target to a temp. register. */
2829 if (live & RSET_GPR) {
2830 RegSet tmpset = as->freeset & ~live & allow & RSET_GPR;
2831 if (tmpset == RSET_EMPTY)
2832 lj_trace_err(as->J, LJ_TRERR_NYICOAL);
2833 ra_rename(as, rset_pickbot(live & RSET_GPR), rset_pickbot(tmpset));
2834 }
2835 if (live & RSET_FPR) {
2836 RegSet tmpset = as->freeset & ~live & allow & RSET_FPR;
2837 if (tmpset == RSET_EMPTY)
2838 lj_trace_err(as->J, LJ_TRERR_NYICOAL);
2839 ra_rename(as, rset_pickbot(live & RSET_FPR), rset_pickbot(tmpset));
2840 }
2841 checkmclim(as);
2842 /* Continue with coalescing to fix up the broken cycle(s). */
2843 }
2844
2845 /* Check Lua stack size if frames have been added. */
2846 if (as->topslot)
2847 asm_checkstack(as, allow & RSET_GPR);
2848}
2849
2850/* -- Tail of trace ------------------------------------------------------- */
2851
2852/* Sync Lua stack slots to match the last snapshot.
2853** Note: code generation is backwards, so this is best read bottom-up.
2854*/
2855static void asm_tail_sync(ASMState *as)
2856{
2857 SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */
2858 BCReg s, nslots = snap->nslots;
2859 IRRef2 *map = &as->T->snapmap[snap->mapofs];
2860 IRRef2 *flinks = map + nslots + snap->nframelinks;
2861 BCReg newbase = 0;
2862 BCReg secondbase = ~(BCReg)0;
2863 BCReg topslot = 0;
2864
2865 checkmclim(as);
2866 ra_allocref(as, REF_BASE, RID2RSET(RID_BASE));
2867
2868 /* Must check all frames to find topslot (outer can be larger than inner). */
2869 for (s = 0; s < nslots; s++) {
2870 IRRef ref = snap_ref(map[s]);
2871 if (!irref_isk(ref)) {
2872 IRIns *ir = IR(ref);
2873 if (ir->o == IR_FRAME && irt_isfunc(ir->t)) {
2874 GCfunc *fn = ir_kfunc(IR(ir->op2));
2875 if (isluafunc(fn)) {
2876 BCReg fs = s + funcproto(fn)->framesize;
2877 newbase = s;
2878 if (secondbase == ~(BCReg)0) secondbase = s;
2879 if (fs > topslot) topslot = fs;
2880 }
2881 }
2882 }
2883 }
2884 as->topslot = topslot; /* Used in asm_head_side(). */
2885
2886 if (as->T->link == TRACE_INTERP) {
2887 /* Setup fixed registers for exit to interpreter. */
2888 emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch);
2889 emit_loadi(as, RID_PC, (int32_t)map[nslots]);
2890 } else if (newbase) {
2891 /* Save modified BASE for linking to trace with higher start frame. */
2892 emit_setgl(as, RID_BASE, jit_base);
2893 }
2894
2895 emit_addptr(as, RID_BASE, 8*(int32_t)newbase);
2896
2897 /* Clear stack slots of newly added frames. */
2898 if (nslots <= topslot) {
2899 if (nslots < topslot) {
2900 for (s = nslots; s <= topslot; s++) {
2901 emit_movtomro(as, RID_EAX, RID_BASE, 8*(int32_t)s-4);
2902 checkmclim(as);
2903 }
2904 emit_loadi(as, RID_EAX, LJ_TNIL);
2905 } else {
2906 emit_movmroi(as, RID_BASE, 8*(int32_t)nslots-4, LJ_TNIL);
2907 }
2908 }
2909
2910 /* Store the value of all modified slots to the Lua stack. */
2911 for (s = 0; s < nslots; s++) {
2912 int32_t ofs = 8*((int32_t)s-1);
2913 IRRef ref = snap_ref(map[s]);
2914 if (ref) {
2915 IRIns *ir = IR(ref);
2916 /* No need to restore readonly slots and unmodified non-parent slots. */
2917 if (ir->o == IR_SLOAD && ir->op1 == s &&
2918 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
2919 continue;
2920 if (irt_isnum(ir->t)) {
2921 Reg src = ra_alloc1(as, ref, RSET_FPR);
2922 emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
2923 } else if (ir->o == IR_FRAME) {
2924 emit_movmroi(as, RID_BASE, ofs, ptr2addr(ir_kgc(IR(ir->op2))));
2925 if (s != 0) /* Do not overwrite link to previous frame. */
2926 emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks));
2927 } else {
2928 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t));
2929 if (!irref_isk(ref)) {
2930 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
2931 emit_movtomro(as, src, RID_BASE, ofs);
2932 } else if (!irt_ispri(ir->t)) {
2933 emit_movmroi(as, RID_BASE, ofs, ir->i);
2934 }
2935 emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
2936 }
2937 } else if (s > secondbase) {
2938 emit_movmroi(as, RID_BASE, ofs+4, LJ_TNIL);
2939 }
2940 checkmclim(as);
2941 }
2942 lua_assert(map + nslots == flinks-1);
2943}
2944
2945/* Fixup the tail code. */
2946static void asm_tail_fixup(ASMState *as, TraceNo lnk)
2947{
2948 /* Note: don't use as->mcp swap + emit_*: emit_op overwrites more bytes. */
2949 MCode *p = as->mctop;
2950 MCode *target, *q;
2951 int32_t spadj = as->T->spadjust;
2952 if (spadj == 0) {
2953 p -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6;
2954 } else {
2955 MCode *p1;
2956 /* Patch stack adjustment. */
2957 if (checki8(spadj)) {
2958 p -= 3;
2959 p1 = p-6;
2960 *p1 = (MCode)spadj;
2961 } else {
2962 p1 = p-9;
2963 *(int32_t *)p1 = spadj;
2964 }
2965 if ((as->flags & JIT_F_LEA_AGU)) {
2966 p1[-3] = (MCode)XI_LEA;
2967 p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP);
2968 p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
2969 } else {
2970 p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
2971 p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
2972 }
2973 }
2974 /* Patch exit branch. */
2975 target = lnk == TRACE_INTERP ? (MCode *)lj_vm_exit_interp :
2976 as->J->trace[lnk]->mcode;
2977 *(int32_t *)(p-4) = (int32_t)(target - p);
2978 p[-5] = XI_JMP;
2979 /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */
2980 for (q = as->mctop-1; q >= p; q--)
2981 *q = XI_NOP;
2982 as->mctop = p;
2983}
2984
2985/* -- Instruction dispatch ------------------------------------------------ */
2986
2987/* Assemble a single instruction. */
2988static void asm_ir(ASMState *as, IRIns *ir)
2989{
2990 switch ((IROp)ir->o) {
2991 /* Miscellaneous ops. */
2992 case IR_LOOP: asm_loop(as); break;
2993 case IR_NOP: break;
2994 case IR_PHI: asm_phi(as, ir); break;
2995
2996 /* Guarded assertions. */
2997 case IR_LT: asm_comp(as, ir, CC_GE, CC_AE, VCC_PS); break;
2998 case IR_GE: asm_comp(as, ir, CC_L, CC_B, 0); break;
2999 case IR_LE: asm_comp(as, ir, CC_G, CC_A, VCC_PS); break;
3000 case IR_GT: asm_comp(as, ir, CC_LE, CC_BE, 0); break;
3001 case IR_ULT: asm_comp(as, ir, CC_AE, CC_AE, VCC_U); break;
3002 case IR_UGE: asm_comp(as, ir, CC_B, CC_B, VCC_U|VCC_PS); break;
3003 case IR_ULE: asm_comp(as, ir, CC_A, CC_A, VCC_U); break;
3004 case IR_ABC:
3005 case IR_UGT: asm_comp(as, ir, CC_BE, CC_BE, VCC_U|VCC_PS); break;
3006
3007 case IR_FRAME:
3008 if (ir->op1 == ir->op2) break; /* No check needed for placeholder. */
3009 /* fallthrough */
3010 case IR_EQ: asm_comp(as, ir, CC_NE, CC_NE, VCC_P); break;
3011 case IR_NE: asm_comp(as, ir, CC_E, CC_E, VCC_U|VCC_P); break;
3012
3013 /* Bit ops. */
3014 case IR_BNOT: asm_bitnot(as, ir); break;
3015 case IR_BSWAP: asm_bitswap(as, ir); break;
3016
3017 case IR_BAND: asm_intarith(as, ir, XOg_AND); break;
3018 case IR_BOR: asm_intarith(as, ir, XOg_OR); break;
3019 case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break;
3020
3021 case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break;
3022 case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break;
3023 case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break;
3024 case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break;
3025 case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break;
3026
3027 /* Arithmetic ops. */
3028 case IR_ADD: asm_add(as, ir); break;
3029 case IR_SUB:
3030 if (irt_isnum(ir->t))
3031 asm_fparith(as, ir, XO_SUBSD);
3032 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
3033 asm_intarith(as, ir, XOg_SUB);
3034 break;
3035 case IR_MUL: asm_fparith(as, ir, XO_MULSD); break;
3036 case IR_DIV: asm_fparith(as, ir, XO_DIVSD); break;
3037
3038 case IR_NEG: asm_fparith(as, ir, XO_XORPS); break;
3039 case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break;
3040
3041 case IR_MIN: asm_fparith(as, ir, XO_MINSD); break;
3042 case IR_MAX: asm_fparith(as, ir, XO_MAXSD); break;
3043
3044 case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: case IR_POWI:
3045 asm_fpmath(as, ir);
3046 break;
3047
3048 /* Overflow-checking arithmetic ops. Note: don't use LEA here! */
3049 case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break;
3050 case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break;
3051
3052 /* Memory references. */
3053 case IR_AREF: asm_aref(as, ir); break;
3054 case IR_HREF: asm_href(as, ir); break;
3055 case IR_HREFK: asm_hrefk(as, ir); break;
3056 case IR_NEWREF: asm_newref(as, ir); break;
3057 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
3058 case IR_FREF: asm_fref(as, ir); break;
3059 case IR_STRREF: asm_strref(as, ir); break;
3060
3061 /* Loads and stores. */
3062 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: asm_ahuload(as, ir); break;
3063 case IR_FLOAD: asm_fload(as, ir); break;
3064 case IR_SLOAD: asm_sload(as, ir); break;
3065 case IR_XLOAD: asm_xload(as, ir); break;
3066
3067 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
3068 case IR_FSTORE: asm_fstore(as, ir); break;
3069
3070 /* String ops. */
3071 case IR_SNEW: asm_snew(as, ir); break;
3072
3073 /* Table ops. */
3074 case IR_TNEW: asm_tnew(as, ir); break;
3075 case IR_TDUP: asm_tdup(as, ir); break;
3076 case IR_TLEN: asm_tlen(as, ir); break;
3077 case IR_TBAR: asm_tbar(as, ir); break;
3078 case IR_OBAR: asm_obar(as, ir); break;
3079
3080 /* Type conversions. */
3081 case IR_TONUM: asm_tonum(as, ir); break;
3082 case IR_TOINT:
3083 if (irt_isguard(ir->t))
3084 asm_tointg(as, ir, ra_alloc1(as, ir->op1, RSET_FPR));
3085 else
3086 asm_toint(as, ir); break;
3087 break;
3088 case IR_TOBIT: asm_tobit(as, ir); break;
3089 case IR_TOSTR: asm_tostr(as, ir); break;
3090 case IR_STRTO: asm_strto(as, ir); break;
3091
3092 default:
3093 setintV(&as->J->errinfo, ir->o);
3094 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
3095 break;
3096 }
3097}
3098
3099/* Assemble a trace in linear backwards order. */
3100static void asm_trace(ASMState *as)
3101{
3102 for (as->curins--; as->curins > as->stopins; as->curins--) {
3103 IRIns *ir = IR(as->curins);
3104 if (irt_isguard(ir->t))
3105 asm_snap_prep(as);
3106 else if (!ra_used(ir) && !irm_sideeff(lj_ir_mode[ir->o]) &&
3107 (as->flags & JIT_F_OPT_DCE))
3108 continue; /* Dead-code elimination can be soooo easy. */
3109 RA_DBG_REF();
3110 checkmclim(as);
3111 asm_ir(as, ir);
3112 }
3113}
3114
3115/* -- Trace setup --------------------------------------------------------- */
3116
3117/* Clear reg/sp for all instructions and add register hints. */
3118static void asm_setup_regsp(ASMState *as, Trace *T)
3119{
3120 IRRef i, nins;
3121 int inloop;
3122
3123 /* Clear reg/sp for constants. */
3124 for (i = T->nk; i < REF_BIAS; i++)
3125 IR(i)->prev = REGSP_INIT;
3126
3127 /* REF_BASE is used for implicit references to the BASE register. */
3128 IR(REF_BASE)->prev = REGSP_HINT(RID_BASE);
3129
3130 nins = T->nins;
3131 if (IR(nins-1)->o == IR_RENAME) {
3132 do { nins--; } while (IR(nins-1)->o == IR_RENAME);
3133 T->nins = nins; /* Remove any renames left over from ASM restart. */
3134 }
3135 as->snaprename = nins;
3136 as->snapref = nins;
3137 as->snapno = T->nsnap;
3138
3139 as->stopins = REF_BASE;
3140 as->orignins = nins;
3141 as->curins = nins;
3142
3143 inloop = 0;
3144 for (i = REF_FIRST; i < nins; i++) {
3145 IRIns *ir = IR(i);
3146 switch (ir->o) {
3147 case IR_LOOP:
3148 inloop = 1;
3149 break;
3150 /* Set hints for slot loads from a parent trace. */
3151 case IR_SLOAD:
3152 if ((ir->op2 & IRSLOAD_PARENT)) {
3153 RegSP rs = as->parentmap[ir->op1];
3154 lua_assert(regsp_used(rs));
3155 as->stopins = i;
3156 if (!ra_hasspill(regsp_spill(rs)) && ra_hasreg(regsp_reg(rs))) {
3157 ir->prev = (uint16_t)REGSP_HINT(regsp_reg(rs));
3158 continue;
3159 }
3160 }
3161 break;
3162 case IR_FRAME:
3163 if (i == as->stopins+1 && ir->op1 == ir->op2)
3164 as->stopins++;
3165 break;
3166 /* C calls evict all scratch regs and return results in RID_RET. */
3167 case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TLEN: case IR_TOSTR:
3168 case IR_NEWREF:
3169 ir->prev = REGSP_HINT(RID_RET);
3170 if (inloop)
3171 as->modset = RSET_SCRATCH;
3172 continue;
3173 case IR_STRTO: case IR_OBAR:
3174 if (inloop)
3175 as->modset = RSET_SCRATCH;
3176 break;
3177 /* Ordered string compares evict all integer scratch registers. */
3178 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
3179 if (irt_isstr(ir->t) && inloop)
3180 as->modset |= (RSET_SCRATCH & RSET_GPR);
3181 break;
3182 /* Non-constant shift counts need to be in RID_ECX. */
3183 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
3184 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r))
3185 IR(ir->op2)->r = REGSP_HINT(RID_ECX);
3186 break;
3187 /* Do not propagate hints across type conversions. */
3188 case IR_TONUM: case IR_TOINT: case IR_TOBIT:
3189 break;
3190 default:
3191 /* Propagate hints across likely 'op reg, imm' or 'op reg'. */
3192 if (irref_isk(ir->op2) && !irref_isk(ir->op1)) {
3193 ir->prev = IR(ir->op1)->prev;
3194 continue;
3195 }
3196 break;
3197 }
3198 ir->prev = REGSP_INIT;
3199 }
3200}
3201
3202/* -- Assembler core ------------------------------------------------------ */
3203
3204/* Define this if you want to run LuaJIT with Valgrind. */
3205#ifdef LUAJIT_USE_VALGRIND
3206#include <valgrind/valgrind.h>
3207#define VG_INVALIDATE(p, sz) VALGRIND_DISCARD_TRANSLATIONS(p, sz)
3208#else
3209#define VG_INVALIDATE(p, sz) ((void)0)
3210#endif
3211
3212/* Assemble a trace. */
3213void lj_asm_trace(jit_State *J, Trace *T)
3214{
3215 ASMState as_;
3216 ASMState *as = &as_;
3217
3218 /* Setup initial state. Copy some fields to reduce indirections. */
3219 as->J = J;
3220 as->T = T;
3221 as->ir = T->ir;
3222 as->flags = J->flags;
3223 as->loopref = J->loopref;
3224 as->realign = NULL;
3225 as->loopinv = 0;
3226 if (J->parent) {
3227 as->parent = J->trace[J->parent];
3228 lj_snap_regspmap(as->parentmap, as->parent, J->exitno);
3229 } else {
3230 as->parent = NULL;
3231 }
3232 as->mctop = lj_mcode_reserve(J, &as->mcbot); /* Reserve MCode memory. */
3233 as->mcp = as->mctop;
3234 as->mclim = as->mcbot + MCLIM_REDZONE;
3235 asm_exitstub_setup(as, T->nsnap);
3236
3237 do {
3238 as->mcp = as->mctop;
3239 as->curins = T->nins;
3240 RA_DBG_START();
3241 RA_DBGX((as, "===== STOP ====="));
3242 /* Realign and leave room for backwards loop branch or exit branch. */
3243 if (as->realign) {
3244 int i = ((int)(intptr_t)as->realign) & 15;
3245 MCode *p = as->mctop;
3246 /* Fill unused mcode tail with NOPs to make the prefetcher happy. */
3247 while (i-- > 0)
3248 *--p = XI_NOP;
3249 as->mctop = p;
3250 as->mcp = p - (as->loopinv ? 5 : 2); /* Space for short/near jmp. */
3251 } else {
3252 as->mcp = as->mctop - 5; /* Space for exit branch (near jmp). */
3253 }
3254 as->invmcp = as->mcp;
3255 as->mcloop = NULL;
3256 as->testmcp = NULL;
3257 as->topslot = 0;
3258 as->gcsteps = 0;
3259 as->sectref = as->loopref;
3260 as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED;
3261
3262 /* Setup register allocation. */
3263 ra_setup(as);
3264 asm_setup_regsp(as, T);
3265
3266 if (!as->loopref) {
3267 /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */
3268 as->mcp -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6;
3269 as->invmcp = NULL;
3270 asm_tail_sync(as);
3271 }
3272 asm_trace(as);
3273 } while (as->realign); /* Retry in case the MCode needs to be realigned. */
3274
3275 RA_DBG_REF();
3276 checkmclim(as);
3277 if (as->gcsteps)
3278 asm_gc_check(as, &as->T->snap[0]);
3279 if (!J->parent)
3280 asm_head_base(as);
3281 asm_const_remat(as);
3282 if (J->parent)
3283 asm_head_side(as);
3284 else
3285 asm_head_root(as);
3286 asm_phi_fixup(as);
3287
3288 RA_DBGX((as, "===== START ===="));
3289 RA_DBG_FLUSH();
3290 if (as->freeset != RSET_ALL)
3291 lj_trace_err(as->J, LJ_TRERR_BADRA); /* Ouch! Should never happen. */
3292
3293 /* Set trace entry point before fixing up tail to allow link to self. */
3294 T->mcode = as->mcp;
3295 T->mcloop = as->mcloop ? (MSize)(as->mcloop - as->mcp) : 0;
3296 if (!as->loopref)
3297 asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */
3298 T->szmcode = (MSize)(as->mctop - as->mcp);
3299 VG_INVALIDATE(T->mcode, T->szmcode);
3300}
3301
3302/* Patch exit jumps of existing machine code to a new target. */
3303void lj_asm_patchexit(jit_State *J, Trace *T, ExitNo exitno, MCode *target)
3304{
3305 MCode *p = T->mcode;
3306 MCode *mcarea = lj_mcode_patch(J, p, 0);
3307 MSize len = T->szmcode;
3308 MCode *px = exitstub_addr(J, exitno) - 6;
3309 MCode *pe = p+len-6;
3310 if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px)
3311 *(int32_t *)(p+len-4) = (int32_t)(target - (p+len));
3312 for (; p < pe; p++) {
3313 if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) {
3314 *(int32_t *)(p+2) = (int32_t)(target - (p+6));
3315 p += 5;
3316 }
3317 }
3318 lj_mcode_patch(J, mcarea, 1);
3319 VG_INVALIDATE(T->mcode, T->szmcode);
3320}
3321
3322#undef IR
3323
3324#endif
diff --git a/src/lj_asm.h b/src/lj_asm.h
new file mode 100644
index 00000000..84122b43
--- /dev/null
+++ b/src/lj_asm.h
@@ -0,0 +1,17 @@
1/*
2** IR assembler (SSA IR -> machine code).
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_ASM_H
7#define _LJ_ASM_H
8
9#include "lj_jit.h"
10
11#if LJ_HASJIT
12LJ_FUNC void lj_asm_trace(jit_State *J, Trace *T);
13LJ_FUNC void lj_asm_patchexit(jit_State *J, Trace *T, ExitNo exitno,
14 MCode *target);
15#endif
16
17#endif
diff --git a/src/lj_bc.c b/src/lj_bc.c
new file mode 100644
index 00000000..79846325
--- /dev/null
+++ b/src/lj_bc.c
@@ -0,0 +1,17 @@
1/*
2** Bytecode instruction modes.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_bc_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10#include "lj_bc.h"
11
12/* Bytecode instruction modes. */
13LJ_DATADEF const uint16_t lj_bc_mode[BC__MAX+1] = {
14BCDEF(BCMODE)
15 0
16};
17
diff --git a/src/lj_bc.h b/src/lj_bc.h
new file mode 100644
index 00000000..d906011c
--- /dev/null
+++ b/src/lj_bc.h
@@ -0,0 +1,235 @@
1/*
2** Bytecode instruction format.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_BC_H
7#define _LJ_BC_H
8
9#include "lj_def.h"
10#include "lj_arch.h"
11
12/* Bytecode instruction format, 32 bit wide, fields of 8 or 16 bit:
13**
14** +----+----+----+----+
15** | B | C | A | OP | Format ABC
16** +----+----+----+----+
17** | D | A | OP | Format AD
18** +--------------------
19** MSB LSB
20**
21** In-memory instructions are always stored in host byte order.
22*/
23
24/* Operand ranges and related constants. */
25#define BCMAX_A 0xff
26#define BCMAX_B 0xff
27#define BCMAX_C 0xff
28#define BCMAX_D 0xffff
29#define BCBIAS_J 0x8000
30#define NO_REG BCMAX_A
31#define NO_JMP (~(BCPos)0)
32
33/* Macros to get instruction fields. */
34#define bc_op(i) (cast(BCOp, (i)&0xff))
35#define bc_a(i) (cast(BCReg, ((i)>>8)&0xff))
36#define bc_b(i) (cast(BCReg, (i)>>24))
37#define bc_c(i) (cast(BCReg, ((i)>>16)&0xff))
38#define bc_d(i) (cast(BCReg, (i)>>16))
39#define bc_j(i) ((ptrdiff_t)bc_d(i)-BCBIAS_J)
40
41/* Macros to set instruction fields. */
42#define setbc_byte(p, x, ofs) \
43 ((uint8_t *)(p))[LJ_ENDIAN_SELECT(ofs, 3-ofs)] = cast_byte(x)
44#define setbc_op(p, x) setbc_byte(p, (x), 0)
45#define setbc_a(p, x) setbc_byte(p, (x), 1)
46#define setbc_b(p, x) setbc_byte(p, (x), 3)
47#define setbc_c(p, x) setbc_byte(p, (x), 2)
48#define setbc_d(p, x) \
49 ((uint16_t *)(p))[LJ_ENDIAN_SELECT(1, 0)] = cast(uint16_t, (x))
50#define setbc_j(p, x) setbc_d(p, (BCPos)((int32_t)(x)+BCBIAS_J))
51
52/* Macros to compose instructions. */
53#define BCINS_ABC(o, a, b, c) \
54 (cast(BCIns, o)|(cast(BCIns, a)<<8)|\
55 (cast(BCIns, b)<<24)|(cast(BCIns, c)<<16))
56#define BCINS_AD(o, a, d) \
57 (cast(BCIns, o)|(cast(BCIns, a)<<8)|(cast(BCIns, d)<<16))
58#define BCINS_AJ(o, a, j) BCINS_AD(o, a, (BCPos)((int32_t)(j)+BCBIAS_J))
59
60/* Bytecode instruction definition. Order matters, see below.
61**
62** (name, filler, Amode, Bmode, Cmode or Dmode, metamethod)
63**
64** The opcode name suffixes specify the type for RB/RC or RD:
65** V = variable slot
66** S = string const
67** N = number const
68** P = primitive type (~itype)
69** B = unsigned byte literal
70** M = multiple args/results
71*/
72#define BCDEF(_) \
73 /* Comparison ops. ORDER OPR. */ \
74 _(ISLT, var, ___, var, lt) \
75 _(ISGE, var, ___, var, lt) \
76 _(ISLE, var, ___, var, le) \
77 _(ISGT, var, ___, var, le) \
78 \
79 _(ISEQV, var, ___, var, eq) \
80 _(ISNEV, var, ___, var, eq) \
81 _(ISEQS, var, ___, str, eq) \
82 _(ISNES, var, ___, str, eq) \
83 _(ISEQN, var, ___, num, eq) \
84 _(ISNEN, var, ___, num, eq) \
85 _(ISEQP, var, ___, pri, eq) \
86 _(ISNEP, var, ___, pri, eq) \
87 \
88 /* Unary test and copy ops. */ \
89 _(ISTC, dst, ___, var, ___) \
90 _(ISFC, dst, ___, var, ___) \
91 _(IST, ___, ___, var, ___) \
92 _(ISF, ___, ___, var, ___) \
93 \
94 /* Unary ops. */ \
95 _(MOV, dst, ___, var, ___) \
96 _(NOT, dst, ___, var, ___) \
97 _(UNM, dst, ___, var, unm) \
98 _(LEN, dst, ___, var, len) \
99 \
100 /* Binary ops. ORDER OPR. VV last, POW must be next. */ \
101 _(ADDVN, dst, var, num, add) \
102 _(SUBVN, dst, var, num, sub) \
103 _(MULVN, dst, var, num, mul) \
104 _(DIVVN, dst, var, num, div) \
105 _(MODVN, dst, var, num, mod) \
106 \
107 _(ADDNV, dst, var, num, add) \
108 _(SUBNV, dst, var, num, sub) \
109 _(MULNV, dst, var, num, mul) \
110 _(DIVNV, dst, var, num, div) \
111 _(MODNV, dst, var, num, mod) \
112 \
113 _(ADDVV, dst, var, var, add) \
114 _(SUBVV, dst, var, var, sub) \
115 _(MULVV, dst, var, var, mul) \
116 _(DIVVV, dst, var, var, div) \
117 _(MODVV, dst, var, var, mod) \
118 \
119 _(POW, dst, var, var, pow) \
120 _(CAT, dst, rbase, rbase, concat) \
121 \
122 /* Constant ops. */ \
123 _(KSTR, dst, ___, str, ___) \
124 _(KSHORT, dst, ___, lits, ___) \
125 _(KNUM, dst, ___, num, ___) \
126 _(KPRI, dst, ___, pri, ___) \
127 _(KNIL, base, ___, base, ___) \
128 \
129 /* Upvalue and function ops. */ \
130 _(UGET, dst, ___, uv, ___) \
131 _(USETV, uv, ___, var, ___) \
132 _(USETS, uv, ___, str, ___) \
133 _(USETN, uv, ___, num, ___) \
134 _(USETP, uv, ___, pri, ___) \
135 _(UCLO, rbase, ___, jump, ___) \
136 _(FNEW, dst, ___, func, gc) \
137 \
138 /* Table ops. */ \
139 _(TNEW, dst, ___, lit, gc) \
140 _(TDUP, dst, ___, tab, gc) \
141 _(GGET, dst, ___, str, index) \
142 _(GSET, var, ___, str, newindex) \
143 _(TGETV, dst, var, var, index) \
144 _(TGETS, dst, var, str, index) \
145 _(TGETB, dst, var, lit, index) \
146 _(TSETV, var, var, var, newindex) \
147 _(TSETS, var, var, str, newindex) \
148 _(TSETB, var, var, lit, newindex) \
149 _(TSETM, base, ___, num, newindex) \
150 \
151 /* Calls and vararg handling. T = tail call. */ \
152 _(CALLM, base, lit, lit, call) \
153 _(CALL, base, lit, lit, call) \
154 _(CALLMT, base, ___, lit, call) \
155 _(CALLT, base, ___, lit, call) \
156 _(ITERC, base, lit, lit, call) \
157 _(VARG, base, lit, lit, ___) \
158 \
159 /* Returns. */ \
160 _(RETM, base, ___, lit, ___) \
161 _(RET, rbase, ___, lit, ___) \
162 _(RET0, rbase, ___, lit, ___) \
163 _(RET1, rbase, ___, lit, ___) \
164 \
165 /* Loops and branches. I/J = interp/JIT, I/C/L = init/call/loop. */ \
166 _(FORI, base, ___, jump, ___) \
167 _(JFORI, base, ___, jump, ___) \
168 \
169 _(FORL, base, ___, jump, ___) \
170 _(IFORL, base, ___, jump, ___) \
171 _(JFORL, base, ___, lit, ___) \
172 \
173 _(ITERL, base, ___, jump, ___) \
174 _(IITERL, base, ___, jump, ___) \
175 _(JITERL, base, ___, lit, ___) \
176 \
177 _(LOOP, rbase, ___, jump, ___) \
178 _(ILOOP, rbase, ___, jump, ___) \
179 _(JLOOP, rbase, ___, lit, ___) \
180 \
181 _(JMP, rbase, ___, jump, ___)
182
183/* Bytecode opcode numbers. */
184typedef enum {
185#define BCENUM(name, ma, mb, mc, mt) BC_##name,
186BCDEF(BCENUM)
187#undef BCENUM
188 BC__MAX
189} BCOp;
190
191LJ_STATIC_ASSERT((int)BC_ISEQV+1 == (int)BC_ISNEV);
192LJ_STATIC_ASSERT(((int)BC_ISEQV^1) == (int)BC_ISNEV);
193LJ_STATIC_ASSERT(((int)BC_ISEQS^1) == (int)BC_ISNES);
194LJ_STATIC_ASSERT(((int)BC_ISEQN^1) == (int)BC_ISNEN);
195LJ_STATIC_ASSERT(((int)BC_ISEQP^1) == (int)BC_ISNEP);
196LJ_STATIC_ASSERT(((int)BC_ISLT^1) == (int)BC_ISGE);
197LJ_STATIC_ASSERT(((int)BC_ISLE^1) == (int)BC_ISGT);
198LJ_STATIC_ASSERT(((int)BC_ISLT^3) == (int)BC_ISGT);
199LJ_STATIC_ASSERT((int)BC_IST-(int)BC_ISTC == (int)BC_ISF-(int)BC_ISFC);
200LJ_STATIC_ASSERT((int)BC_CALLT-(int)BC_CALL == (int)BC_CALLMT-(int)BC_CALLM);
201LJ_STATIC_ASSERT((int)BC_CALLMT + 1 == (int)BC_CALLT);
202LJ_STATIC_ASSERT((int)BC_RETM + 1 == (int)BC_RET);
203LJ_STATIC_ASSERT((int)BC_FORL + 1 == (int)BC_IFORL);
204LJ_STATIC_ASSERT((int)BC_FORL + 2 == (int)BC_JFORL);
205LJ_STATIC_ASSERT((int)BC_ITERL + 1 == (int)BC_IITERL);
206LJ_STATIC_ASSERT((int)BC_ITERL + 2 == (int)BC_JITERL);
207LJ_STATIC_ASSERT((int)BC_LOOP + 1 == (int)BC_ILOOP);
208LJ_STATIC_ASSERT((int)BC_LOOP + 2 == (int)BC_JLOOP);
209
210/* Stack slots used by FORI/FORL, relative to operand A. */
211enum {
212 FORL_IDX, FORL_STOP, FORL_STEP, FORL_EXT
213};
214
215/* Bytecode operand modes. ORDER BCMode */
216typedef enum {
217 BCMnone, BCMdst, BCMbase, BCMvar, BCMrbase, BCMuv, /* Mode A must be <= 7 */
218 BCMlit, BCMlits, BCMpri, BCMnum, BCMstr, BCMtab, BCMfunc, BCMjump,
219 BCM_max
220} BCMode;
221#define BCM___ BCMnone
222
223#define bcmode_a(op) (cast(BCMode, lj_bc_mode[op] & 7))
224#define bcmode_b(op) (cast(BCMode, (lj_bc_mode[op]>>3) & 15))
225#define bcmode_c(op) (cast(BCMode, (lj_bc_mode[op]>>7) & 15))
226#define bcmode_d(op) bcmode_c(op)
227#define bcmode_hasd(op) ((lj_bc_mode[op] & (15<<3)) == (BCMnone<<3))
228#define bcmode_mm(op) (cast(MMS, lj_bc_mode[op]>>11))
229
230#define BCMODE(name, ma, mb, mc, mm) \
231 (BCM##ma|(BCM##mb<<3)|(BCM##mc<<7)|(MM_##mm<<11)),
232
233LJ_DATA const uint16_t lj_bc_mode[BC__MAX+1];
234
235#endif
diff --git a/src/lj_ctype.c b/src/lj_ctype.c
new file mode 100644
index 00000000..9f19b879
--- /dev/null
+++ b/src/lj_ctype.c
@@ -0,0 +1,44 @@
1/*
2** Internal CTYPE replacement.
3** Donated to the public domain.
4**
5** This is intended to replace the problematic libc single-byte NLS functions.
6** These just don't make sense anymore with UTF-8 locales becoming the norm
7** on POSIX systems. It never worked too well on Windows systems since hardly
8** anyone bothered to call setlocale().
9**
10** Instead this table is hardcoded for ASCII, except for identifiers. These
11** include the characters 128-255, too. This allows for the use of all
12** non-ASCII chars as identifiers in the lexer. This is a broad definition,
13** but works well in practice for both UTF-8 locales and most single-byte
14** locales (such as ISO-8859-*).
15**
16** If you really need proper ctypes for UTF-8 strings, please use an add-on
17** library such as slnunicode: http://luaforge.net/projects/sln/
18*/
19
20#define lj_ctype_c
21#define LUA_CORE
22
23#include "lj_ctype.h"
24
25LJ_DATADEF const uint8_t lj_ctype_bits[257] = {
26 0,
27 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 1, 1,
28 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
29 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
30 152,152,152,152,152,152,152,152,152,152, 4, 4, 4, 4, 4, 4,
31 4,176,176,176,176,176,176,160,160,160,160,160,160,160,160,160,
32 160,160,160,160,160,160,160,160,160,160,160, 4, 4, 4, 4,132,
33 4,208,208,208,208,208,208,192,192,192,192,192,192,192,192,192,
34 192,192,192,192,192,192,192,192,192,192,192, 4, 4, 4, 4, 1,
35 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,
36 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,
37 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,
38 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,
39 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,
40 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,
41 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,
42 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
43};
44
diff --git a/src/lj_ctype.h b/src/lj_ctype.h
new file mode 100644
index 00000000..c4cdff84
--- /dev/null
+++ b/src/lj_ctype.h
@@ -0,0 +1,40 @@
1/*
2** Internal CTYPE replacement.
3** Donated to the public domain.
4*/
5
6#ifndef _LJ_CTYPE_H
7#define _LJ_CTYPE_H
8
9#include "lj_def.h"
10
11#define LJ_CTYPE_CNTRL 0x01
12#define LJ_CTYPE_SPACE 0x02
13#define LJ_CTYPE_PUNCT 0x04
14#define LJ_CTYPE_DIGIT 0x08
15#define LJ_CTYPE_XDIGIT 0x10
16#define LJ_CTYPE_UPPER 0x20
17#define LJ_CTYPE_LOWER 0x40
18#define LJ_CTYPE_IDENT 0x80
19#define LJ_CTYPE_ALPHA (LJ_CTYPE_LOWER|LJ_CTYPE_UPPER)
20#define LJ_CTYPE_ALNUM (LJ_CTYPE_ALPHA|LJ_CTYPE_DIGIT)
21
22/* Only pass -1 or 0..255 to these macros. Never pass a signed char! */
23#define lj_ctype_isa(c, t) (lj_ctype_bits[(c)+1] & t)
24#define lj_ctype_iscntrl(c) lj_ctype_isa((c), LJ_CTYPE_CNTRL)
25#define lj_ctype_isspace(c) lj_ctype_isa((c), LJ_CTYPE_SPACE)
26#define lj_ctype_ispunct(c) lj_ctype_isa((c), LJ_CTYPE_PUNCT)
27#define lj_ctype_isdigit(c) lj_ctype_isa((c), LJ_CTYPE_DIGIT)
28#define lj_ctype_isxdigit(c) lj_ctype_isa((c), LJ_CTYPE_XDIGIT)
29#define lj_ctype_isupper(c) lj_ctype_isa((c), LJ_CTYPE_UPPER)
30#define lj_ctype_islower(c) lj_ctype_isa((c), LJ_CTYPE_LOWER)
31#define lj_ctype_isident(c) lj_ctype_isa((c), LJ_CTYPE_IDENT)
32#define lj_ctype_isalpha(c) lj_ctype_isa((c), LJ_CTYPE_ALPHA)
33#define lj_ctype_isalnum(c) lj_ctype_isa((c), LJ_CTYPE_ALNUM)
34
35#define lj_ctype_toupper(c) ((c) - (lj_ctype_islower(c) >> 1))
36#define lj_ctype_tolower(c) ((c) + lj_ctype_isupper(c))
37
38LJ_DATA const uint8_t lj_ctype_bits[257];
39
40#endif
diff --git a/src/lj_def.h b/src/lj_def.h
new file mode 100644
index 00000000..dbfd5bf5
--- /dev/null
+++ b/src/lj_def.h
@@ -0,0 +1,226 @@
1/*
2** LuaJIT common internal definitions.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_DEF_H
7#define _LJ_DEF_H
8
9#include "lua.h"
10
11#ifdef _MSC_VER
12/* MSVC is stuck in the last century and doesn't have C99's stdint.h. */
13typedef __int8 int8_t;
14typedef __int16 int16_t;
15typedef __int32 int32_t;
16typedef __int64 int64_t;
17typedef unsigned __int8 uint8_t;
18typedef unsigned __int16 uint16_t;
19typedef unsigned __int32 uint32_t;
20typedef unsigned __int64 uint64_t;
21#ifdef _WIN64
22typedef __int64 intptr_t;
23typedef unsigned __int64 uintptr_t;
24#else
25typedef __int32 intptr_t;
26typedef unsigned __int32 uintptr_t;
27#endif
28#else
29#include <stdint.h>
30#endif
31
32/* Needed everywhere. */
33#include <string.h>
34#include <stdlib.h>
35
36/* Various VM limits. */
37#define LJ_MAX_MEM 0x7fffff00 /* Max. total memory allocation. */
38#define LJ_MAX_ALLOC LJ_MAX_MEM /* Max. individual allocation length. */
39#define LJ_MAX_STR LJ_MAX_MEM /* Max. string length. */
40#define LJ_MAX_UDATA LJ_MAX_MEM /* Max. userdata length. */
41
42#define LJ_MAX_STRTAB (1<<26) /* Max. string table size. */
43#define LJ_MAX_HBITS 26 /* Max. hash bits. */
44#define LJ_MAX_ABITS 28 /* Max. bits of array key. */
45#define LJ_MAX_ASIZE ((1<<(LJ_MAX_ABITS-1))+1) /* Max. array part size. */
46#define LJ_MAX_COLOSIZE 16 /* Max. elems for colocated array. */
47
48#define LJ_MAX_LINE LJ_MAX_MEM /* Max. source code line number. */
49#define LJ_MAX_XLEVEL 200 /* Max. syntactic nesting level. */
50#define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */
51#define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */
52#define LJ_MAX_LOCVAR 200 /* Max. # of local variables. */
53#define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */
54
55#define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */
56#define LJ_STACK_EXTRA 5 /* Extra stack space (metamethods). */
57
58/* Minimum table/buffer sizes. */
59#define LJ_MIN_GLOBAL 6 /* Min. global table size (hbits). */
60#define LJ_MIN_REGISTRY 2 /* Min. registry size (hbits). */
61#define LJ_MIN_STRTAB 256 /* Min. string table size (pow2). */
62#define LJ_MIN_SBUF 32 /* Min. string buffer length. */
63#define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */
64#define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */
65#define LJ_MIN_KNUMSZ 16 /* Min. size for chained KNUM array. */
66
67/* JIT compiler limits. */
68#define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */
69#define LJ_MAX_PHI 32 /* Max. # of PHIs for a loop. */
70#define LJ_MAX_EXITSTUBGR 8 /* Max. # of exit stub groups. */
71
72/* Various macros. */
73#ifndef UNUSED
74#define UNUSED(x) ((void)(x)) /* to avoid warnings */
75#endif
76
77#ifndef cast
78#define cast(t, exp) ((t)(exp))
79#endif
80
81#define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo)
82#define cast_byte(i) cast(uint8_t, (i))
83#define cast_num(i) cast(lua_Number, (i))
84#define cast_int(i) cast(int, (i))
85#define i32ptr(p) ((int32_t)(intptr_t)(void *)(p))
86#define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p))
87
88#define checki8(x) ((x) == (int32_t)(int8_t)(x))
89#define checku8(x) ((x) == (int32_t)(uint8_t)(x))
90#define checki16(x) ((x) == (int32_t)(int16_t)(x))
91
92/* Every half-decent C compiler transforms this into a rotate instruction. */
93#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(32-(n))))
94#define lj_ror(x, n) (((x)<<(32-(n))) | ((x)>>(n)))
95
96/* A really naive Bloom filter. But sufficient for our needs. */
97typedef uintptr_t BloomFilter;
98#define BLOOM_MASK (8*sizeof(BloomFilter) - 1)
99#define bloombit(x) ((uintptr_t)1 << ((x) & BLOOM_MASK))
100#define bloomset(b, x) ((b) |= bloombit((x)))
101#define bloomtest(b, x) ((b) & bloombit((x)))
102
103#if defined(__GNUC__)
104
105#if (__GNUC__ < 3) || ((__GNUC__ == 3) && __GNUC_MINOR__ < 4)
106#error "sorry, need GCC 3.4 or newer"
107#endif
108
109#define LJ_NORET __attribute__((noreturn))
110#define LJ_ALIGN(n) __attribute__((aligned(n)))
111#define LJ_INLINE inline
112#define LJ_AINLINE inline __attribute__((always_inline))
113#define LJ_NOINLINE __attribute__((noinline))
114
115#if defined(__ELF__) || defined(__MACH__)
116#define LJ_NOAPI extern __attribute__((visibility("hidden")))
117#endif
118
119/* Note: it's only beneficial to use fastcall on x86 and then only for up to
120** two non-FP args. The amalgamated compile covers all LJ_FUNC cases. Only
121** indirect calls and related tail-called C functions are marked as fastcall.
122*/
123#if defined(__i386__)
124#define LJ_FASTCALL __attribute__((fastcall))
125#endif
126
127#define LJ_LIKELY(x) __builtin_expect(!!(x), 1)
128#define LJ_UNLIKELY(x) __builtin_expect(!!(x), 0)
129
130#define lj_ffs(x) ((uint32_t)__builtin_ctz(x))
131/* Don't ask ... */
132#if defined(__INTEL_COMPILER) && (defined(__i386__) || defined(__x86_64__))
133static LJ_AINLINE uint32_t lj_fls(uint32_t x)
134{
135 uint32_t r; __asm__("bsrl %1, %0" : "=r" (r) : "rm" (x) : "cc"); return r;
136}
137#else
138#define lj_fls(x) ((uint32_t)(__builtin_clz(x)^31))
139#endif
140
141#if defined(__i386__) || defined(__x86_64__)
142static LJ_AINLINE uint32_t lj_bswap(uint32_t x)
143{
144 uint32_t r; __asm__("bswap %0" : "=r" (r) : "0" (x)); return r;
145}
146#else
147#error "missing define for lj_bswap()"
148#endif
149
150#elif defined(_MSC_VER)
151
152#define LJ_NORET __declspec(noreturn)
153#define LJ_ALIGN(n) __declspec(align(n))
154#define LJ_INLINE __inline
155#define LJ_AINLINE __forceinline
156#define LJ_NOINLINE __declspec(noinline)
157#if defined(_M_IX86)
158#define LJ_FASTCALL __fastcall
159#endif
160
161static LJ_AINLINE uint32_t lj_ffs(uint32_t x)
162{
163 uint32_t r; _BitScanForward(&r, x); return r;
164}
165
166static LJ_AINLINE uint32_t lj_fls(uint32_t x)
167{
168 uint32_t r; _BitScanReverse(&r, x); return r;
169}
170
171#define lj_bswap(x) (_byteswap_ulong((x)))
172
173#else
174#error "missing defines for your compiler"
175#endif
176
177/* Optional defines. */
178#ifndef LJ_FASTCALL
179#define LJ_FASTCALL
180#endif
181#ifndef LJ_NORET
182#define LJ_NORET
183#endif
184#ifndef LJ_NOAPI
185#define LJ_NOAPI extern
186#endif
187#ifndef LJ_LIKELY
188#define LJ_LIKELY(x) (x)
189#define LJ_UNLIKELY(x) (x)
190#endif
191
192/* Attributes for internal functions. */
193#if defined(ljamalg_c)
194#define LJ_DATA static
195#define LJ_DATADEF static
196#define LJ_FUNC static
197#define LJ_ASMF LJ_NOAPI
198#define LJ_FUNCA LJ_NOAPI
199#else
200#define LJ_DATA LJ_NOAPI
201#define LJ_DATADEF
202#define LJ_FUNC LJ_NOAPI
203#define LJ_ASMF LJ_NOAPI
204#define LJ_FUNCA LJ_NOAPI
205#endif
206#define LJ_FUNC_NORET LJ_FUNC LJ_NORET
207#define LJ_FUNCA_NORET LJ_FUNCA LJ_NORET
208#define LJ_ASMF_NORET LJ_ASMF LJ_NORET
209
210/* Runtime assertions. */
211#ifdef lua_assert
212#define check_exp(c, e) (lua_assert(c), (e))
213#define api_check(l, e) lua_assert(e)
214#else
215#define lua_assert(c) ((void)0)
216#define check_exp(c, e) (e)
217#define api_check luai_apicheck
218#endif
219
220/* Static assertions. */
221#define LJ_ASSERT_NAME2(name, line) name ## line
222#define LJ_ASSERT_NAME(line) LJ_ASSERT_NAME2(lj_assert_, line)
223#define LJ_STATIC_ASSERT(cond) \
224 extern void LJ_ASSERT_NAME(__LINE__)(int STATIC_ASSERTION_FAILED[(cond)?1:-1])
225
226#endif
diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c
new file mode 100644
index 00000000..d2fce2e0
--- /dev/null
+++ b/src/lj_dispatch.c
@@ -0,0 +1,284 @@
1/*
2** Instruction dispatch handling.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_dispatch_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10#include "lj_err.h"
11#include "lj_state.h"
12#include "lj_frame.h"
13#include "lj_bc.h"
14#if LJ_HASJIT
15#include "lj_jit.h"
16#endif
17#include "lj_trace.h"
18#include "lj_dispatch.h"
19#include "lj_vm.h"
20#include "luajit.h"
21
22/* -- Dispatch table management ------------------------------------------- */
23
24/* Initialize instruction dispatch table and hot counters. */
25void lj_dispatch_init(GG_State *GG)
26{
27 uint32_t i;
28 ASMFunction *disp = GG->dispatch;
29 for (i = 0; i < BC__MAX; i++)
30 disp[GG_DISP_STATIC+i] = disp[i] = makeasmfunc(lj_vm_op_ofs[i]);
31 /* The JIT engine is off by default. luaopen_jit() turns it on. */
32 disp[BC_FORL] = disp[BC_IFORL];
33 disp[BC_ITERL] = disp[BC_IITERL];
34 disp[BC_LOOP] = disp[BC_ILOOP];
35}
36
37/* Update dispatch table depending on various flags. */
38void lj_dispatch_update(global_State *g)
39{
40 uint8_t oldmode = g->dispatchmode;
41 uint8_t mode = 0;
42#if LJ_HASJIT
43 mode |= (G2J(g)->flags & JIT_F_ON) ? 1 : 0;
44 mode |= G2J(g)->state != LJ_TRACE_IDLE ? 6 : 0;
45#endif
46 mode |= (g->hookmask & HOOK_EVENTMASK) ? 2 : 0;
47 if (oldmode != mode) { /* Mode changed? */
48 ASMFunction *disp = G2GG(g)->dispatch;
49 ASMFunction f_forl, f_iterl, f_loop;
50 g->dispatchmode = mode;
51 if ((mode & 5) == 1) { /* Hotcount if JIT is on, but not when recording. */
52 f_forl = makeasmfunc(lj_vm_op_ofs[BC_FORL]);
53 f_iterl = makeasmfunc(lj_vm_op_ofs[BC_ITERL]);
54 f_loop = makeasmfunc(lj_vm_op_ofs[BC_LOOP]);
55 } else { /* Otherwise use the non-hotcounting instructions. */
56 f_forl = disp[GG_DISP_STATIC+BC_IFORL];
57 f_iterl = disp[GG_DISP_STATIC+BC_IITERL];
58 f_loop = disp[GG_DISP_STATIC+BC_ILOOP];
59 }
60 /* Set static loop ins first (may be copied below). */
61 disp[GG_DISP_STATIC+BC_FORL] = f_forl;
62 disp[GG_DISP_STATIC+BC_ITERL] = f_iterl;
63 disp[GG_DISP_STATIC+BC_LOOP] = f_loop;
64 if ((oldmode & 6) != (mode & 6)) { /* Need to change whole table? */
65 if ((mode & 6) == 0) { /* No hooks and no recording? */
66 /* Copy static dispatch table to dynamic dispatch table. */
67 memcpy(&disp[0], &disp[GG_DISP_STATIC], sizeof(ASMFunction)*BC__MAX);
68 } else {
69 /* The recording dispatch also checks for hooks. */
70 ASMFunction f = (mode & 6) == 6 ? lj_vm_record : lj_vm_hook;
71 uint32_t i;
72 for (i = 0; i < BC__MAX; i++)
73 disp[i] = f;
74 }
75 } else if ((mode & 6) == 0) { /* Fix dynamic loop ins unless overriden. */
76 disp[BC_FORL] = f_forl;
77 disp[BC_ITERL] = f_iterl;
78 disp[BC_LOOP] = f_loop;
79 }
80 }
81}
82
83/* -- JIT mode setting ---------------------------------------------------- */
84
85#if LJ_HASJIT
86/* Set JIT mode for a single prototype. */
87static void setptmode(global_State *g, GCproto *pt, int mode)
88{
89 if ((mode & LUAJIT_MODE_ON)) { /* (Re-)enable JIT compilation. */
90 pt->flags &= ~PROTO_NO_JIT;
91 lj_trace_reenableproto(pt); /* Unpatch all ILOOP etc. bytecodes. */
92 } else { /* Flush and/or disable JIT compilation. */
93 if (!(mode & LUAJIT_MODE_FLUSH))
94 pt->flags |= PROTO_NO_JIT;
95 lj_trace_flushproto(g, pt); /* Flush all traces of prototype. */
96 }
97}
98
99/* Recursively set the JIT mode for all children of a prototype. */
100static void setptmode_all(global_State *g, GCproto *pt, int mode)
101{
102 ptrdiff_t i;
103 for (i = -(ptrdiff_t)pt->sizekgc; i < 0; i++) {
104 GCobj *o = gcref(pt->k.gc[i]);
105 if (o->gch.gct == ~LJ_TPROTO) {
106 setptmode(g, gco2pt(o), mode);
107 setptmode_all(g, gco2pt(o), mode);
108 }
109 }
110}
111#endif
112
113/* Public API function: control the JIT engine. */
114int luaJIT_setmode(lua_State *L, int idx, int mode)
115{
116 global_State *g = G(L);
117 int mm = mode & LUAJIT_MODE_MASK;
118 lj_trace_abort(g); /* Abort recording on any state change. */
119 /* Avoid pulling the rug from under our own feet. */
120 if ((g->hookmask & HOOK_GC))
121 lj_err_caller(L, LJ_ERR_NOGCMM);
122 switch (mm) {
123#if LJ_HASJIT
124 case LUAJIT_MODE_ENGINE:
125 if ((mode & LUAJIT_MODE_FLUSH)) {
126 lj_trace_flushall(L);
127 } else {
128 if ((mode & LUAJIT_MODE_ON))
129 G2J(g)->flags |= (uint32_t)JIT_F_ON;
130 else
131 G2J(g)->flags &= ~(uint32_t)JIT_F_ON;
132 lj_dispatch_update(g);
133 }
134 break;
135 case LUAJIT_MODE_FUNC:
136 case LUAJIT_MODE_ALLFUNC:
137 case LUAJIT_MODE_ALLSUBFUNC: {
138 cTValue *tv = idx == 0 ? frame_prev(L->base-1) :
139 idx > 0 ? L->base + (idx-1) : L->top + idx;
140 GCproto *pt;
141 if ((idx == 0 || tvisfunc(tv)) && isluafunc(&gcval(tv)->fn))
142 pt = funcproto(&gcval(tv)->fn); /* Cannot use funcV() for frame slot. */
143 else if (tvisproto(tv))
144 pt = protoV(tv);
145 else
146 return 0; /* Failed. */
147 if (mm != LUAJIT_MODE_ALLSUBFUNC)
148 setptmode(g, pt, mode);
149 if (mm != LUAJIT_MODE_FUNC)
150 setptmode_all(g, pt, mode);
151 break;
152 }
153 case LUAJIT_MODE_TRACE:
154 if (!(mode & LUAJIT_MODE_FLUSH))
155 return 0; /* Failed. */
156 lj_trace_flush(G2J(g), idx);
157 break;
158#else
159 case LUAJIT_MODE_ENGINE:
160 case LUAJIT_MODE_FUNC:
161 case LUAJIT_MODE_ALLFUNC:
162 case LUAJIT_MODE_ALLSUBFUNC:
163 UNUSED(idx);
164 if ((mode & LUAJIT_MODE_ON))
165 return 0; /* Failed. */
166 break;
167#endif
168 default:
169 return 0; /* Failed. */
170 }
171 return 1; /* OK. */
172}
173
174/* Enforce (dynamic) linker error for version mismatches. See luajit.c. */
175LUA_API void LUAJIT_VERSION_SYM(void)
176{
177}
178
179/* -- Hooks --------------------------------------------------------------- */
180
181/* This function can be called asynchronously (e.g. during a signal). */
182LUA_API int lua_sethook(lua_State *L, lua_Hook func, int mask, int count)
183{
184 global_State *g = G(L);
185 mask &= HOOK_EVENTMASK;
186 if (func == NULL || mask == 0) { mask = 0; func = NULL; } /* Consistency. */
187 g->hookf = func;
188 g->hookcount = g->hookcstart = (int32_t)count;
189 g->hookmask = (uint8_t)((g->hookmask & ~HOOK_EVENTMASK) | mask);
190 lj_trace_abort(g); /* Abort recording on any hook change. */
191 lj_dispatch_update(g);
192 return 1;
193}
194
195LUA_API lua_Hook lua_gethook(lua_State *L)
196{
197 return G(L)->hookf;
198}
199
200LUA_API int lua_gethookmask(lua_State *L)
201{
202 return G(L)->hookmask & HOOK_EVENTMASK;
203}
204
205LUA_API int lua_gethookcount(lua_State *L)
206{
207 return (int)G(L)->hookcstart;
208}
209
210/* Call a hook. */
211static void callhook(lua_State *L, int event, BCLine line)
212{
213 global_State *g = G(L);
214 lua_Hook hookf = g->hookf;
215 if (hookf && !hook_active(g)) {
216 lua_Debug ar;
217 lj_trace_abort(g); /* Abort recording on any hook call. */
218 ar.event = event;
219 ar.currentline = line;
220 ar.i_ci = cast_int((L->base-1) - L->stack); /* Top frame, nextframe=NULL. */
221 lj_state_checkstack(L, 1+LUA_MINSTACK);
222 hook_enter(g);
223 hookf(L, &ar);
224 lua_assert(hook_active(g));
225 hook_leave(g);
226 }
227}
228
229/* -- Instruction dispatch callbacks -------------------------------------- */
230
231/* Calculate number of used stack slots in the current frame. */
232static BCReg cur_topslot(GCproto *pt, const BCIns *pc, uint32_t nres)
233{
234 BCIns ins = pc[-1];
235 for (;;) {
236 switch (bc_op(ins)) {
237 case BC_UCLO: ins = pc[bc_j(ins)]; break;
238 case BC_CALLM:
239 case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1;
240 case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1;
241 case BC_TSETM: return bc_a(ins) + nres-1;
242 default: return pt->framesize;
243 }
244 }
245}
246
247/* Instruction dispatch callback for instr/line hooks or when recording. */
248void lj_dispatch_ins(lua_State *L, const BCIns *pc, uint32_t nres)
249{
250 GCfunc *fn = curr_func(L);
251 GCproto *pt = funcproto(fn);
252 BCReg slots = cur_topslot(pt, pc, nres);
253 global_State *g = G(L);
254 const BCIns *oldpc = cframe_Lpc(L);
255 cframe_Lpc(L) = pc;
256 L->top = L->base + slots; /* Fix top. */
257#if LJ_HASJIT
258 {
259 jit_State *J = G2J(g);
260 if (J->state != LJ_TRACE_IDLE) {
261 J->L = L;
262 J->pc = pc-1;
263 J->fn = fn;
264 J->pt = pt;
265 lj_trace_ins(J);
266 }
267 }
268#endif
269 if ((g->hookmask & LUA_MASKCOUNT) && g->hookcount == 0) {
270 g->hookcount = g->hookcstart;
271 callhook(L, LUA_HOOKCOUNT, -1);
272 }
273 if ((g->hookmask & LUA_MASKLINE) && pt->lineinfo) {
274 BCPos npc = (BCPos)(pc - pt->bc)-1;
275 BCPos opc = (BCPos)(oldpc - pt->bc)-1;
276 BCLine line = pt->lineinfo[npc];
277 if (npc == 0 || pc <= oldpc ||
278 opc >= pt->sizebc || line != pt->lineinfo[opc]) {
279 L->top = L->base + slots; /* Fix top again after instruction hook. */
280 callhook(L, LUA_HOOKLINE, line);
281 }
282 }
283}
284
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
new file mode 100644
index 00000000..298aa166
--- /dev/null
+++ b/src/lj_dispatch.h
@@ -0,0 +1,64 @@
1/*
2** Instruction dispatch handling.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_DISPATCH_H
7#define _LJ_DISPATCH_H
8
9#include "lj_obj.h"
10#include "lj_bc.h"
11#if LJ_HASJIT
12#include "lj_jit.h"
13#endif
14
15/* Type of hot counter. Must match the code in the assembler VM. */
16/* 16 bits are sufficient. Only 0.0015% overhead with maximum slot penalty. */
17typedef uint16_t HotCount;
18
19/* Number of hot counter hash table entries (must be a power of two). */
20#define HOTCOUNT_SIZE 64
21#define HOTCOUNT_PCMASK ((HOTCOUNT_SIZE-1)*sizeof(HotCount))
22#define HOTCOUNT_MIN_PENALTY 103
23#define HOTCOUNT_MAX_PENALTY 60000
24
25/* Global state, main thread and extra fields are allocated together. */
26typedef struct GG_State {
27 lua_State L; /* Main thread. */
28 global_State g; /* Global state. */
29#if LJ_HASJIT
30 jit_State J; /* JIT state. */
31 HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */
32#endif
33 ASMFunction dispatch[2*BC__MAX]; /* Instruction dispatch tables. */
34} GG_State;
35
36#define GG_DISP_STATIC BC__MAX
37
38#define GG_OFS(field) ((int)offsetof(GG_State, field))
39#define G2GG(gl) \
40 ((GG_State *)(((char *)(gl))-((char *)(&((GG_State *)0)->g))))
41#define J2GG(j) \
42 ((GG_State *)(((char *)(j))-((char *)(&((GG_State *)0)->J))))
43#define L2GG(L) G2GG(G(L))
44#define J2G(J) (&J2GG(J)->g)
45#define G2J(gl) (&G2GG(gl)->J)
46#define L2J(L) (&L2GG(L)->J)
47#define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g))
48#define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch))
49#define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch))
50#define GG_DISP2HOT (GG_OFS(hotcount) - GG_OFS(dispatch))
51
52#define hotcount_get(gg, pc) \
53 (gg)->hotcount[(u32ptr(pc)>>2) & (HOTCOUNT_SIZE-1)]
54#define hotcount_set(gg, pc, val) \
55 (hotcount_get((gg), (pc)) = (HotCount)(val))
56
57/* Dispatch table management. */
58LJ_FUNC void lj_dispatch_init(GG_State *GG);
59LJ_FUNC void lj_dispatch_update(global_State *g);
60
61/* Instruction dispatch callback for instr/line hooks or when recording. */
62LJ_FUNCA void lj_dispatch_ins(lua_State *L, const BCIns *pc, uint32_t nres);
63
64#endif
diff --git a/src/lj_err.c b/src/lj_err.c
new file mode 100644
index 00000000..a723af48
--- /dev/null
+++ b/src/lj_err.c
@@ -0,0 +1,763 @@
1/*
2** Error handling and debugging API.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#define lj_err_c
10#define LUA_CORE
11
12#include "lj_obj.h"
13#include "lj_err.h"
14#include "lj_str.h"
15#include "lj_tab.h"
16#include "lj_func.h"
17#include "lj_state.h"
18#include "lj_frame.h"
19#include "lj_bc.h"
20#include "lj_trace.h"
21#include "lj_vm.h"
22
23/* -- Error messages ------------------------------------------------------ */
24
25/* Error message strings. */
26static const char *lj_err_allmsg =
27#define ERRDEF(name, msg) msg "\0"
28#include "lj_errmsg.h"
29;
30
31#define err2msg(em) (lj_err_allmsg+(int)(em))
32
33/* -- Frame and function introspection ------------------------------------ */
34
35static BCPos currentpc(lua_State *L, GCfunc *fn, cTValue *nextframe)
36{
37 const BCIns *ins;
38 lua_assert(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD);
39 if (!isluafunc(fn)) { /* Cannot derive a PC for non-Lua functions. */
40 return ~(BCPos)0;
41 } else if (nextframe == NULL) { /* Lua function on top. */
42 ins = cframe_Lpc(L); /* Only happens during error/hook handling. */
43 } else {
44 if (frame_islua(nextframe)) {
45 ins = frame_pc(nextframe);
46 } else if (frame_iscont(nextframe)) {
47 ins = frame_contpc(nextframe);
48 } else {
49 /* Lua function below errfunc/gc/hook: find cframe to get the PC. */
50 void *cf = cframe_raw(L->cframe);
51 TValue *f = L->base-1;
52 while (f > nextframe) {
53 if (frame_islua(f)) {
54 f = frame_prevl(f);
55 } else {
56 if (frame_isc(f))
57 cf = cframe_raw(cframe_prev(cf));
58 f = frame_prevd(f);
59 }
60 }
61 if (cframe_prev(cf))
62 cf = cframe_raw(cframe_prev(cf));
63 ins = cframe_pc(cf);
64 }
65 }
66 return (BCPos)((ins - funcproto(fn)->bc) - 1);
67}
68
69static BCLine currentline(lua_State *L, GCfunc *fn, cTValue *nextframe)
70{
71 BCPos pc = currentpc(L, fn, nextframe);
72 if (pc != ~(BCPos)0) {
73 GCproto *pt = funcproto(fn);
74 lua_assert(pc < pt->sizebc);
75 return pt->lineinfo ? pt->lineinfo[pc] : 0;
76 } else {
77 return -1;
78 }
79}
80
81static const char *getvarname(const GCproto *pt, BCPos pc, BCReg slot)
82{
83 MSize i;
84 for (i = 0; i < pt->sizevarinfo && pt->varinfo[i].startpc <= pc; i++)
85 if (pc < pt->varinfo[i].endpc && slot-- == 0)
86 return strdata(pt->varinfo[i].name);
87 return NULL;
88}
89
90static const char *getobjname(GCproto *pt, const BCIns *ip, BCReg slot,
91 const char **name)
92{
93 const char *lname;
94restart:
95 lname = getvarname(pt, (BCPos)(ip - pt->bc), slot);
96 if (lname != NULL) { *name = lname; return "local"; }
97 while (--ip >= pt->bc) {
98 BCIns ins = *ip;
99 BCOp op = bc_op(ins);
100 BCReg ra = bc_a(ins);
101 if (bcmode_a(op) == BCMbase) {
102 if (slot >= ra && (op != BC_KNIL || slot <= bc_d(ins)))
103 return NULL;
104 } else if (bcmode_a(op) == BCMdst && ra == slot) {
105 switch (bc_op(ins)) {
106 case BC_MOV:
107 if (ra == slot) { slot = bc_d(ins); goto restart; }
108 break;
109 case BC_GGET:
110 *name = strdata(gco2str(gcref(pt->k.gc[~bc_d(ins)])));
111 return "global";
112 case BC_TGETS:
113 *name = strdata(gco2str(gcref(pt->k.gc[~bc_c(ins)])));
114 if (ip > pt->bc) {
115 BCIns insp = ip[-1];
116 if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1 &&
117 bc_d(insp) == bc_b(ins))
118 return "method";
119 }
120 return "field";
121 case BC_UGET:
122 *name = pt->uvname ? strdata(pt->uvname[bc_d(ins)]) : "?";
123 return "upvalue";
124 default:
125 return NULL;
126 }
127 }
128 }
129 return NULL;
130}
131
132static const char *getfuncname(lua_State *L, TValue *frame, const char **name)
133{
134 MMS mm;
135 const BCIns *ip;
136 TValue *pframe;
137 GCfunc *fn;
138 BCPos pc;
139 if (frame_isvarg(frame))
140 frame = frame_prevd(frame);
141 pframe = frame_prev(frame);
142 fn = frame_func(pframe);
143 pc = currentpc(L, fn, frame);
144 if (pc == ~(BCPos)0)
145 return NULL;
146 lua_assert(pc < funcproto(fn)->sizebc);
147 ip = &funcproto(fn)->bc[pc];
148 mm = bcmode_mm(bc_op(*ip));
149 if (mm == MM_call) {
150 BCReg slot = bc_a(*ip);
151 if (bc_op(*ip) == BC_ITERC) slot -= 3;
152 return getobjname(funcproto(fn), ip, slot, name);
153 } else if (mm != MM_MAX) {
154 *name = strdata(strref(G(L)->mmname[mm]));
155 return "metamethod";
156 } else {
157 return NULL;
158 }
159}
160
161void lj_err_pushloc(lua_State *L, GCproto *pt, BCPos pc)
162{
163 GCstr *name = pt->chunkname;
164 if (name) {
165 const char *s = strdata(name);
166 MSize i, len = name->len;
167 BCLine line;
168 if (pc)
169 line = pt->lineinfo ? pt->lineinfo[pc-1] : 0;
170 else
171 line = pt->linedefined;
172 if (*s == '@') {
173 s++; len--;
174 for (i = len; i > 0; i--)
175 if (s[i] == '/' || s[i] == '\\') {
176 s += i+1;
177 break;
178 }
179 lj_str_pushf(L, "%s:%d", s, line);
180 } else if (len > 40) {
181 lj_str_pushf(L, "%p:%d", pt, line);
182 } else if (*s == '=') {
183 lj_str_pushf(L, "%s:%d", s+1, line);
184 } else {
185 lj_str_pushf(L, "\"%s\":%d", s, line);
186 }
187 } else {
188 lj_str_pushf(L, "%p:%u", pt, pc);
189 }
190}
191
192static void err_chunkid(char *out, const char *src)
193{
194 if (*src == '=') {
195 strncpy(out, src+1, LUA_IDSIZE); /* remove first char */
196 out[LUA_IDSIZE-1] = '\0'; /* ensures null termination */
197 } else if (*src == '@') { /* out = "source", or "...source" */
198 size_t l = strlen(++src); /* skip the `@' */
199 if (l >= LUA_IDSIZE) {
200 src += l-(LUA_IDSIZE-4); /* get last part of file name */
201 strcpy(out, "...");
202 out += 3;
203 }
204 strcpy(out, src);
205 } else { /* out = [string "string"] */
206 size_t len; /* Length, up to first control char. */
207 for (len = 0; len < LUA_IDSIZE-11; len++)
208 if (((const unsigned char *)src)[len] < ' ') break;
209 strcpy(out, "[string \""); out += 9;
210 if (src[len] != '\0') { /* must truncate? */
211 if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15;
212 strncpy(out, src, len); out += len;
213 strcpy(out, "..."); out += 3;
214 } else {
215 strcpy(out, src); out += len;
216 }
217 strcpy(out, "\"]");
218 }
219}
220
221/* -- Public debug API ---------------------------------------------------- */
222
223static TValue *findlocal(lua_State *L, const lua_Debug *ar,
224 const char **name, BCReg slot)
225{
226 uint32_t offset = (uint32_t)ar->i_ci & 0xffff;
227 uint32_t size = (uint32_t)ar->i_ci >> 16;
228 TValue *frame = L->stack + offset;
229 TValue *nextframe = size ? frame + size : NULL;
230 GCfunc *fn = frame_func(frame);
231 BCPos pc = currentpc(L, fn, nextframe);
232 if (pc != ~(BCPos)0 &&
233 (*name = getvarname(funcproto(fn), pc, slot-1)) != NULL)
234 ;
235 else if (slot > 0 && frame + slot < (nextframe ? nextframe : L->top))
236 *name = "(*temporary)";
237 else
238 *name = NULL;
239 return frame+slot;
240}
241
242LUA_API const char *lua_getlocal(lua_State *L, const lua_Debug *ar, int n)
243{
244 const char *name;
245 TValue *o = findlocal(L, ar, &name, (BCReg)n);
246 if (name) {
247 copyTV(L, L->top, o);
248 incr_top(L);
249 }
250 return name;
251}
252
253
254LUA_API const char *lua_setlocal(lua_State *L, const lua_Debug *ar, int n)
255{
256 const char *name;
257 TValue *o = findlocal(L, ar, &name, (BCReg)n);
258 if (name)
259 copyTV(L, o, L->top-1);
260 L->top--;
261 return name;
262}
263
264LUA_API int lua_getinfo(lua_State *L, const char *what, lua_Debug *ar)
265{
266 int status = 1;
267 TValue *frame = NULL;
268 TValue *nextframe = NULL;
269 GCfunc *fn;
270 if (*what == '>') {
271 TValue *func = L->top - 1;
272 api_check(L, tvisfunc(func));
273 fn = funcV(func);
274 L->top--;
275 what++;
276 } else {
277 uint32_t offset = (uint32_t)ar->i_ci & 0xffff;
278 uint32_t size = (uint32_t)ar->i_ci >> 16;
279 lua_assert(offset != 0);
280 frame = L->stack + offset;
281 if (size) nextframe = frame + size;
282 lua_assert(frame<=L->maxstack && (!nextframe || nextframe<=L->maxstack));
283 fn = frame_func(frame);
284 lua_assert(fn->c.gct == ~LJ_TFUNC);
285 }
286 for (; *what; what++) {
287 switch (*what) {
288 case 'S':
289 if (isluafunc(fn)) {
290 ar->source = strdata(funcproto(fn)->chunkname);
291 ar->linedefined = cast_int(funcproto(fn)->linedefined);
292 ar->lastlinedefined = cast_int(funcproto(fn)->lastlinedefined);
293 ar->what = (ar->linedefined == 0) ? "main" : "Lua";
294 } else {
295 ar->source = "=[C]";
296 ar->linedefined = -1;
297 ar->lastlinedefined = -1;
298 ar->what = "C";
299 }
300 err_chunkid(ar->short_src, ar->source);
301 break;
302 case 'l':
303 ar->currentline = frame ? currentline(L, fn, nextframe) : -1;
304 break;
305 case 'u':
306 ar->nups = fn->c.nupvalues;
307 break;
308 case 'n':
309 ar->namewhat = frame ? getfuncname(L, frame, &ar->name) : NULL;
310 if (ar->namewhat == NULL) {
311 ar->namewhat = "";
312 ar->name = NULL;
313 }
314 break;
315 case 'f':
316 setfuncV(L, L->top, fn);
317 incr_top(L);
318 break;
319 case 'L':
320 if (isluafunc(fn)) {
321 GCtab *t = lj_tab_new(L, 0, 0);
322 BCLine *lineinfo = funcproto(fn)->lineinfo;
323 uint32_t i, szl = funcproto(fn)->sizelineinfo;
324 for (i = 0; i < szl; i++)
325 setboolV(lj_tab_setint(L, t, lineinfo[i]), 1);
326 settabV(L, L->top, t);
327 } else {
328 setnilV(L->top);
329 }
330 incr_top(L);
331 break;
332 default:
333 status = 0; /* Bad option. */
334 break;
335 }
336 }
337 return status;
338}
339
340cTValue *lj_err_getframe(lua_State *L, int level, int *size)
341{
342 cTValue *frame, *nextframe;
343 /* Traverse frames backwards. */
344 for (nextframe = frame = L->base-1; frame > L->stack; ) {
345 if (frame_gc(frame) == obj2gco(L))
346 level++; /* Skip dummy frames. See lj_meta_call(). */
347 if (level-- == 0) {
348 *size = cast_int(nextframe - frame);
349 return frame; /* Level found. */
350 }
351 nextframe = frame;
352 if (frame_islua(frame)) {
353 frame = frame_prevl(frame);
354 } else {
355 if (frame_isvarg(frame))
356 level++; /* Skip vararg pseudo-frame. */
357 frame = frame_prevd(frame);
358 }
359 }
360 *size = level;
361 return NULL; /* Level not found. */
362}
363
364LUA_API int lua_getstack(lua_State *L, int level, lua_Debug *ar)
365{
366 int size;
367 cTValue *frame = lj_err_getframe(L, level, &size);
368 if (frame) {
369 ar->i_ci = (size << 16) + cast_int(frame - L->stack);
370 return 1;
371 } else {
372 ar->i_ci = level - size;
373 return 0;
374 }
375}
376
377/* -- Error handling ------------------------------------------------------ */
378
379/* Return string object for error message. */
380LJ_NOINLINE GCstr *lj_err_str(lua_State *L, ErrMsg em)
381{
382 return lj_str_newz(L, err2msg(em));
383}
384
385/* Unwind Lua stack and add error message on top. */
386LJ_NOINLINE static void unwindstack(lua_State *L, TValue *top, int errcode)
387{
388 lj_func_closeuv(L, top);
389 switch (errcode) {
390 case LUA_ERRMEM:
391 setstrV(L, top, lj_err_str(L, LJ_ERR_ERRMEM));
392 break;
393 case LUA_ERRERR:
394 setstrV(L, top, lj_err_str(L, LJ_ERR_ERRERR));
395 break;
396 case LUA_ERRSYNTAX:
397 case LUA_ERRRUN:
398 copyTV(L, top, L->top - 1);
399 break;
400 default:
401 lua_assert(0);
402 break;
403 }
404 L->top = top+1;
405 lj_state_relimitstack(L);
406}
407
408/* Throw error. Find catch frame, unwind stack and continue. */
409LJ_NOINLINE void lj_err_throw(lua_State *L, int errcode)
410{
411 TValue *frame = L->base-1;
412 void *cf = L->cframe;
413 global_State *g = G(L);
414 if (L->status == LUA_ERRERR+1) { /* Don't touch the stack during lua_open. */
415 lj_vm_unwind_c(cf, errcode);
416 goto uncaught; /* unreachable */
417 }
418 lj_trace_abort(g);
419 setgcrefnull(g->jit_L);
420 L->status = 0;
421 while (cf) {
422 if (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */
423 TValue *top = restorestack(L, -cframe_nres(cf));
424 if (frame < top) {
425 L->cframe = cframe_prev(cf);
426 L->base = frame+1;
427 unwindstack(L, top, errcode);
428 lj_vm_unwind_c(cf, errcode);
429 goto uncaught; /* unreachable */
430 }
431 }
432 if (frame <= L->stack)
433 break;
434 switch (frame_typep(frame)) {
435 case FRAME_LUA:
436 case FRAME_LUAP:
437 frame = frame_prevl(frame);
438 break;
439 case FRAME_C:
440 if (cframe_canyield(cf)) goto uncaught;
441 cf = cframe_prev(cf);
442 /* fallthrough */
443 case FRAME_CONT:
444 case FRAME_VARG:
445 frame = frame_prevd(frame);
446 break;
447 case FRAME_CP:
448 L->cframe = cframe_prev(cf);
449 L->base = frame_prevd(frame) + 1;
450 unwindstack(L, frame, errcode);
451 lj_vm_unwind_c(cf, errcode);
452 goto uncaught; /* unreachable */
453 case FRAME_PCALL:
454 hook_leave(g);
455 /* fallthrough */
456 case FRAME_PCALLH:
457 L->cframe = cf;
458 L->base = frame_prevd(frame) + 1;
459 unwindstack(L, L->base, errcode);
460 lj_vm_unwind_ff(cf);
461 goto uncaught; /* unreachable */
462 default:
463 lua_assert(0);
464 goto uncaught;
465 }
466 }
467 /* No catch frame found. Must be a resume or an unprotected error. */
468uncaught:
469 L->status = cast_byte(errcode);
470 L->cframe = NULL;
471 if (cframe_canyield(cf)) { /* Resume? */
472 unwindstack(L, L->top, errcode);
473 lj_vm_unwind_c(cf, errcode);
474 }
475 /* Better rethrow on main thread than panic. */
476 {
477 if (L != mainthread(g))
478 lj_err_throw(mainthread(g), errcode);
479 if (g->panic) {
480 L->base = L->stack+1;
481 unwindstack(L, L->base, errcode);
482 g->panic(L);
483 }
484 }
485 exit(EXIT_FAILURE);
486}
487
488/* Find error function for runtime errors. Requires an extra stack traversal. */
489static ptrdiff_t finderrfunc(lua_State *L)
490{
491 TValue *frame = L->base-1;
492 void *cf = L->cframe;
493 while (frame > L->stack) {
494 lua_assert(cf != NULL);
495 while (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */
496 if (frame >= restorestack(L, -cframe_nres(cf)))
497 break;
498 if (cframe_errfunc(cf) >= 0) /* Error handler not inherited (-1)? */
499 return cframe_errfunc(cf);
500 cf = cframe_prev(cf); /* Else unwind cframe and continue searching. */
501 if (cf == NULL)
502 return 0;
503 }
504 switch (frame_typep(frame)) {
505 case FRAME_LUA:
506 case FRAME_LUAP:
507 frame = frame_prevl(frame);
508 break;
509 case FRAME_C:
510 if (cframe_canyield(cf)) return 0;
511 cf = cframe_prev(cf);
512 /* fallthrough */
513 case FRAME_CONT:
514 case FRAME_VARG:
515 frame = frame_prevd(frame);
516 break;
517 case FRAME_CP:
518 if (cframe_errfunc(cf) >= 0)
519 return cframe_errfunc(cf);
520 frame = frame_prevd(frame);
521 break;
522 case FRAME_PCALL:
523 case FRAME_PCALLH:
524 if (frame_ftsz(frame) >= (ptrdiff_t)(2*sizeof(TValue))) /* xpcall? */
525 return savestack(L, frame-1); /* Point to xpcall's errorfunc. */
526 return 0;
527 default:
528 lua_assert(0);
529 return 0;
530 }
531 }
532 return 0;
533}
534
535/* Runtime error. */
536LJ_NOINLINE void lj_err_run(lua_State *L)
537{
538 ptrdiff_t ef = finderrfunc(L);
539 if (ef) {
540 TValue *errfunc = restorestack(L, ef);
541 TValue *top = L->top;
542 lj_trace_abort(G(L));
543 if (!tvisfunc(errfunc) || L->status == LUA_ERRERR)
544 lj_err_throw(L, LUA_ERRERR);
545 L->status = LUA_ERRERR;
546 copyTV(L, top, top-1);
547 copyTV(L, top-1, errfunc);
548 L->top = top+1;
549 lj_vm_call(L, top, 1+1); /* Stack: |errfunc|msg| -> |msg| */
550 }
551 lj_err_throw(L, LUA_ERRRUN);
552}
553
554/* Add location to error message. */
555LJ_NOINLINE static void err_loc(lua_State *L, const char *msg,
556 cTValue *frame, cTValue *nextframe)
557{
558 if (frame) {
559 GCfunc *fn = frame_func(frame);
560 if (isluafunc(fn)) {
561 char buff[LUA_IDSIZE];
562 BCLine line = currentline(L, fn, nextframe);
563 err_chunkid(buff, strdata(funcproto(fn)->chunkname));
564 lj_str_pushf(L, "%s:%d: %s", buff, line, msg);
565 return;
566 }
567 }
568 lj_str_pushf(L, "%s", msg);
569}
570
571/* Formatted runtime error message. */
572LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...)
573{
574 const char *msg;
575 va_list argp;
576 va_start(argp, em);
577 if (curr_funcisL(L)) L->top = curr_topL(L);
578 msg = lj_str_pushvf(L, err2msg(em), argp);
579 va_end(argp);
580 err_loc(L, msg, L->base-1, NULL);
581 lj_err_run(L);
582}
583
584/* Non-vararg variant for better calling conventions. */
585LJ_NOINLINE void lj_err_msg(lua_State *L, ErrMsg em)
586{
587 err_msgv(L, em);
588}
589
590/* Lexer error. */
591LJ_NOINLINE void lj_err_lex(lua_State *L, const char *src, const char *tok,
592 BCLine line, ErrMsg em, va_list argp)
593{
594 char buff[LUA_IDSIZE];
595 const char *msg;
596 err_chunkid(buff, src);
597 msg = lj_str_pushvf(L, err2msg(em), argp);
598 msg = lj_str_pushf(L, "%s:%d: %s", buff, line, msg);
599 if (tok)
600 lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok);
601 lj_err_throw(L, LUA_ERRSYNTAX);
602}
603
604/* Typecheck error for operands. */
605LJ_NOINLINE void lj_err_optype(lua_State *L, cTValue *o, ErrMsg opm)
606{
607 const char *tname = typename(o);
608 const char *oname = NULL;
609 const char *opname = err2msg(opm);
610 if (curr_funcisL(L)) {
611 GCproto *pt = curr_proto(L);
612 const BCIns *pc = cframe_Lpc(L) - 1;
613 const char *kind = getobjname(pt, pc, (BCReg)(o - L->base), &oname);
614 if (kind)
615 err_msgv(L, LJ_ERR_BADOPRT, opname, kind, oname, tname);
616 }
617 err_msgv(L, LJ_ERR_BADOPRV, opname, tname);
618}
619
620/* Typecheck error for ordered comparisons. */
621LJ_NOINLINE void lj_err_comp(lua_State *L, cTValue *o1, cTValue *o2)
622{
623 const char *t1 = typename(o1);
624 const char *t2 = typename(o2);
625 err_msgv(L, t1 == t2 ? LJ_ERR_BADCMPV : LJ_ERR_BADCMPT, t1, t2);
626 /* This assumes the two "boolean" entries are commoned by the C compiler. */
627}
628
629/* Typecheck error for __call. */
630LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o)
631{
632 /* Gross hack if lua_[p]call or pcall/xpcall fail for a non-callable object:
633 ** L->base still points to the caller. So add a dummy frame with L instead
634 ** of a function. See lua_getstack().
635 */
636 const BCIns *pc = cframe_Lpc(L);
637 if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) {
638 const char *tname = typename(o);
639 setframe_pc(o, pc);
640 setframe_gc(o, obj2gco(L));
641 L->top = L->base = o+1;
642 err_msgv(L, LJ_ERR_BADCALL, tname);
643 }
644 lj_err_optype(L, o, LJ_ERR_OPCALL);
645}
646
647/* Error in context of caller. */
648LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg)
649{
650 cTValue *frame = L->base-1;
651 cTValue *pframe = frame_islua(frame) ? frame_prevl(frame) : NULL;
652 err_loc(L, msg, pframe, frame);
653 lj_err_run(L);
654}
655
656/* Formatted error in context of caller. */
657LJ_NOINLINE void lj_err_callerv(lua_State *L, ErrMsg em, ...)
658{
659 const char *msg;
660 va_list argp;
661 va_start(argp, em);
662 msg = lj_str_pushvf(L, err2msg(em), argp);
663 va_end(argp);
664 lj_err_callermsg(L, msg);
665}
666
667/* Error in context of caller. */
668LJ_NOINLINE void lj_err_caller(lua_State *L, ErrMsg em)
669{
670 lj_err_callermsg(L, err2msg(em));
671}
672
673/* Argument error message. */
674LJ_NORET LJ_NOINLINE static void err_argmsg(lua_State *L, int narg,
675 const char *msg)
676{
677 const char *fname = "?";
678 const char *ftype = getfuncname(L, L->base - 1, &fname);
679 if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */
680 msg = lj_str_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg);
681 else
682 msg = lj_str_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg);
683 lj_err_callermsg(L, msg);
684}
685
686/* Formatted argument error. */
687LJ_NOINLINE void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...)
688{
689 const char *msg;
690 va_list argp;
691 va_start(argp, em);
692 msg = lj_str_pushvf(L, err2msg(em), argp);
693 va_end(argp);
694 err_argmsg(L, narg, msg);
695}
696
697/* Argument error. */
698LJ_NOINLINE void lj_err_arg(lua_State *L, int narg, ErrMsg em)
699{
700 err_argmsg(L, narg, err2msg(em));
701}
702
703/* Typecheck error for arguments. */
704LJ_NOINLINE void lj_err_argtype(lua_State *L, int narg, const char *xname)
705{
706 TValue *o = L->base + narg-1;
707 const char *tname = o < L->top ? typename(o) : lj_obj_typename[0];
708 const char *msg = lj_str_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname);
709 err_argmsg(L, narg, msg);
710}
711
712/* Typecheck error for arguments. */
713LJ_NOINLINE void lj_err_argt(lua_State *L, int narg, int tt)
714{
715 lj_err_argtype(L, narg, lj_obj_typename[tt+1]);
716}
717
718/* -- Public error handling API ------------------------------------------- */
719
720LUA_API lua_CFunction lua_atpanic(lua_State *L, lua_CFunction panicf)
721{
722 lua_CFunction old = G(L)->panic;
723 G(L)->panic = panicf;
724 return old;
725}
726
727/* Forwarders for the public API (C calling convention and no LJ_NORET). */
728LUA_API int lua_error(lua_State *L)
729{
730 lj_err_run(L);
731 return 0; /* unreachable */
732}
733
734LUALIB_API int luaL_argerror(lua_State *L, int narg, const char *msg)
735{
736 err_argmsg(L, narg, msg);
737 return 0; /* unreachable */
738}
739
740LUALIB_API int luaL_typerror(lua_State *L, int narg, const char *xname)
741{
742 lj_err_argtype(L, narg, xname);
743 return 0; /* unreachable */
744}
745
746LUALIB_API void luaL_where(lua_State *L, int level)
747{
748 int size;
749 cTValue *frame = lj_err_getframe(L, level, &size);
750 err_loc(L, "", frame, size ? frame+size : NULL);
751}
752
753LUALIB_API int luaL_error(lua_State *L, const char *fmt, ...)
754{
755 const char *msg;
756 va_list argp;
757 va_start(argp, fmt);
758 msg = lj_str_pushvf(L, fmt, argp);
759 va_end(argp);
760 lj_err_callermsg(L, msg);
761 return 0; /* unreachable */
762}
763
diff --git a/src/lj_err.h b/src/lj_err.h
new file mode 100644
index 00000000..e794d44c
--- /dev/null
+++ b/src/lj_err.h
@@ -0,0 +1,40 @@
1/*
2** Error handling and debugging support.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_ERR_H
7#define _LJ_ERR_H
8
9#include <stdarg.h>
10
11#include "lj_obj.h"
12
13typedef enum {
14#define ERRDEF(name, msg) \
15 LJ_ERR_##name, LJ_ERR_##name##_ = LJ_ERR_##name + sizeof(msg)-1,
16#include "lj_errmsg.h"
17 LJ_ERR__MAX
18} ErrMsg;
19
20LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em);
21LJ_FUNC_NORET void lj_err_throw(lua_State *L, int errcode);
22LJ_FUNC_NORET void lj_err_run(lua_State *L);
23LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em);
24LJ_FUNC_NORET void lj_err_lex(lua_State *L, const char *src, const char *tok,
25 BCLine line, ErrMsg em, va_list argp);
26LJ_FUNC_NORET void lj_err_optype(lua_State *L, cTValue *o, ErrMsg opm);
27LJ_FUNC_NORET void lj_err_comp(lua_State *L, cTValue *o1, cTValue *o2);
28LJ_FUNC_NORET void lj_err_optype_call(lua_State *L, TValue *o);
29LJ_FUNC_NORET void lj_err_callermsg(lua_State *L, const char *msg);
30LJ_FUNC_NORET void lj_err_callerv(lua_State *L, ErrMsg em, ...);
31LJ_FUNC_NORET void lj_err_caller(lua_State *L, ErrMsg em);
32LJ_FUNC_NORET void lj_err_arg(lua_State *L, int narg, ErrMsg em);
33LJ_FUNC_NORET void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...);
34LJ_FUNC_NORET void lj_err_argtype(lua_State *L, int narg, const char *xname);
35LJ_FUNC_NORET void lj_err_argt(lua_State *L, int narg, int tt);
36
37LJ_FUNC void lj_err_pushloc(lua_State *L, GCproto *pt, BCPos pc);
38LJ_FUNC cTValue *lj_err_getframe(lua_State *L, int level, int *size);
39
40#endif
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h
new file mode 100644
index 00000000..03abd59b
--- /dev/null
+++ b/src/lj_errmsg.h
@@ -0,0 +1,134 @@
1/*
2** VM error messages.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6/* This file may be included multiple times with different ERRDEF macros. */
7
8/* Basic error handling. */
9ERRDEF(ERRMEM, "not enough memory")
10ERRDEF(ERRERR, "error in error handling")
11
12/* Allocations. */
13ERRDEF(STROV, "string length overflow")
14ERRDEF(UDATAOV, "userdata length overflow")
15ERRDEF(STKOV, "stack overflow")
16ERRDEF(STKOVM, "stack overflow (%s)")
17ERRDEF(TABOV, "table overflow")
18
19/* Table indexing. */
20ERRDEF(NANIDX, "table index is NaN")
21ERRDEF(NILIDX, "table index is nil")
22ERRDEF(NEXTIDX, "invalid key to " LUA_QL("next"))
23
24/* Metamethod resolving. */
25ERRDEF(BADCALL, "attempt to call a %s value")
26ERRDEF(BADOPRT, "attempt to %s %s " LUA_QS " (a %s value)")
27ERRDEF(BADOPRV, "attempt to %s a %s value")
28ERRDEF(BADCMPT, "attempt to compare %s with %s")
29ERRDEF(BADCMPV, "attempt to compare two %s values")
30ERRDEF(GETLOOP, "loop in gettable")
31ERRDEF(SETLOOP, "loop in settable")
32ERRDEF(OPCALL, "call")
33ERRDEF(OPINDEX, "index")
34ERRDEF(OPARITH, "perform arithmetic on")
35ERRDEF(OPCAT, "concatenate")
36ERRDEF(OPLEN, "get length of")
37
38/* Type checks. */
39ERRDEF(BADSELF, "calling " LUA_QS " on bad self (%s)")
40ERRDEF(BADARG, "bad argument #%d to " LUA_QS " (%s)")
41ERRDEF(BADTYPE, "%s expected, got %s")
42ERRDEF(BADVAL, "invalid value")
43ERRDEF(NOVAL, "value expected")
44ERRDEF(NOCORO, "coroutine expected")
45ERRDEF(NOTABN, "nil or table expected")
46ERRDEF(NOLFUNC, "Lua function expected")
47ERRDEF(NOFUNCL, "function or level expected")
48ERRDEF(NOSFT, "string/function/table expected")
49ERRDEF(NOPROXY, "boolean or proxy expected")
50ERRDEF(FORINIT, LUA_QL("for") " initial value must be a number")
51ERRDEF(FORLIM, LUA_QL("for") " limit must be a number")
52ERRDEF(FORSTEP, LUA_QL("for") " step must be a number")
53
54/* C API checks. */
55ERRDEF(NOENV, "no calling environment")
56ERRDEF(CYIELD, "attempt to yield across C-call boundary")
57ERRDEF(BADLU, "bad light userdata pointer")
58ERRDEF(NOGCMM, "bad action while in __gc metamethod")
59
60/* Standard library function errors. */
61ERRDEF(ASSERT, "assertion failed!")
62ERRDEF(PROTMT, "cannot change a protected metatable")
63ERRDEF(UNPACK, "too many results to unpack")
64ERRDEF(RDRSTR, "reader function must return a string")
65ERRDEF(PRTOSTR, LUA_QL("tostring") " must return a string to " LUA_QL("print"))
66ERRDEF(IDXRNG, "index out of range")
67ERRDEF(BASERNG, "base out of range")
68ERRDEF(LVLRNG, "level out of range")
69ERRDEF(INVLVL, "invalid level")
70ERRDEF(INVOPT, "invalid option")
71ERRDEF(INVOPTM, "invalid option " LUA_QS)
72ERRDEF(INVFMT, "invalid format")
73ERRDEF(SETFENV, LUA_QL("setfenv") " cannot change environment of given object")
74ERRDEF(CORUN, "cannot resume running coroutine")
75ERRDEF(CODEAD, "cannot resume dead coroutine")
76ERRDEF(COSUSP, "cannot resume non-suspended coroutine")
77ERRDEF(TABINS, "wrong number of arguments to " LUA_QL("insert"))
78ERRDEF(TABCAT, "invalid value (%s) at index %d in table for " LUA_QL("concat"))
79ERRDEF(TABSORT, "invalid order function for sorting")
80ERRDEF(IOCLFL, "attempt to use a closed file")
81ERRDEF(IOSTDCL, "standard file is closed")
82ERRDEF(OSUNIQF, "unable to generate a unique filename")
83ERRDEF(OSDATEF, "field " LUA_QS " missing in date table")
84ERRDEF(STRDUMP, "cannot dump functions")
85ERRDEF(STRSLC, "string slice too long")
86ERRDEF(STRPATB, "missing " LUA_QL("[") " after " LUA_QL("%f") " in pattern")
87ERRDEF(STRPATC, "invalid pattern capture")
88ERRDEF(STRPATE, "malformed pattern (ends with " LUA_QL("%") ")")
89ERRDEF(STRPATM, "malformed pattern (missing " LUA_QL("]") ")")
90ERRDEF(STRPATU, "unbalanced pattern")
91ERRDEF(STRCAPI, "invalid capture index")
92ERRDEF(STRCAPN, "too many captures")
93ERRDEF(STRCAPU, "unfinished capture")
94ERRDEF(STRFMTO, "invalid option " LUA_QL("%%%c") " to " LUA_QL("format"))
95ERRDEF(STRFMTR, "invalid format (repeated flags)")
96ERRDEF(STRFMTW, "invalid format (width or precision too long)")
97ERRDEF(STRGSRV, "invalid replacement value (a %s)")
98ERRDEF(BADMODN, "name conflict for module " LUA_QS)
99ERRDEF(NOJIT, "JIT compiler permanently disabled")
100ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS)
101
102/* Lexer/parser errors. */
103ERRDEF(XNEAR, "%s near " LUA_QS)
104ERRDEF(XELEM, "lexical element too long")
105ERRDEF(XLINES, "chunk has too many lines")
106ERRDEF(XLEVELS, "chunk has too many syntax levels")
107ERRDEF(XNUMBER, "malformed number")
108ERRDEF(XLSTR, "unfinished long string")
109ERRDEF(XLCOM, "unfinished long comment")
110ERRDEF(XSTR, "unfinished string")
111ERRDEF(XESC, "escape sequence too large")
112ERRDEF(XLDELIM, "invalid long string delimiter")
113ERRDEF(XBCLOAD, "cannot load Lua bytecode")
114ERRDEF(XTOKEN, LUA_QS " expected")
115ERRDEF(XJUMP, "control structure too long")
116ERRDEF(XSLOTS, "function or expression too complex")
117ERRDEF(XLIMM, "main function has more than %d %s")
118ERRDEF(XLIMF, "function at line %d has more than %d %s")
119ERRDEF(XMATCH, LUA_QS " expected (to close " LUA_QS " at line %d)")
120ERRDEF(XFIXUP, "function too long for return fixup")
121ERRDEF(XPARAM, "<name> or " LUA_QL("...") " expected")
122ERRDEF(XAMBIG, "ambiguous syntax (function call x new statement)")
123ERRDEF(XFUNARG, "function arguments expected")
124ERRDEF(XSYMBOL, "unexpected symbol")
125ERRDEF(XDOTS, "cannot use " LUA_QL("...") " outside a vararg function")
126ERRDEF(XSYNTAX, "syntax error")
127ERRDEF(XBREAK, "no loop to break")
128ERRDEF(XFOR, LUA_QL("=") " or " LUA_QL("in") " expected")
129
130#undef ERRDEF
131
132/* Detecting unused error messages:
133 awk -F, '/^ERRDEF/ { gsub(/ERRDEF./, ""); printf "grep -q LJ_ERR_%s *.[ch] || echo %s\n", $1, $1}' lj_errmsg.h | sh
134*/
diff --git a/src/lj_ff.h b/src/lj_ff.h
new file mode 100644
index 00000000..6dfd73a7
--- /dev/null
+++ b/src/lj_ff.h
@@ -0,0 +1,18 @@
1/*
2** Fast function IDs.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_FF_H
7#define _LJ_FF_H
8
9/* Fast function ID. */
10typedef enum {
11 FF_LUA_ = FF_LUA, /* Lua function (must be 0). */
12 FF_C_ = FF_C, /* Regular C function (must be 1). */
13#define FFDEF(name) FF_##name,
14#include "lj_ffdef.h"
15 FF__MAX
16} FastFunc;
17
18#endif
diff --git a/src/lj_frame.h b/src/lj_frame.h
new file mode 100644
index 00000000..1c03e3e1
--- /dev/null
+++ b/src/lj_frame.h
@@ -0,0 +1,84 @@
1/*
2** Stack frames.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_FRAME_H
7#define _LJ_FRAME_H
8
9#include "lj_obj.h"
10#include "lj_bc.h"
11
12/* -- Lua stack frame ----------------------------------------------------- */
13
14/* Frame type markers in callee function slot (callee base-1). */
15enum {
16 FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG,
17 FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH
18};
19#define FRAME_TYPE 3
20#define FRAME_P 4
21#define FRAME_TYPEP (FRAME_TYPE|FRAME_P)
22
23/* Macros to access and modify Lua frames. */
24#define frame_gc(f) (gcref((f)->fr.func))
25#define frame_func(f) (&frame_gc(f)->fn)
26#define frame_ftsz(f) ((f)->fr.tp.ftsz)
27
28#define frame_type(f) (frame_ftsz(f) & FRAME_TYPE)
29#define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP)
30#define frame_islua(f) (frame_type(f) == FRAME_LUA)
31#define frame_isc(f) (frame_type(f) == FRAME_C)
32#define frame_iscont(f) (frame_typep(f) == FRAME_CONT)
33#define frame_isvarg(f) (frame_typep(f) == FRAME_VARG)
34#define frame_ispcall(f) ((frame_ftsz(f) & 6) == FRAME_PCALL)
35
36#define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns))
37#define frame_contpc(f) (frame_pc((f)-1))
38#if LJ_64
39#define frame_contf(f) \
40 ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin+(((f)-1)->u64 & 0xffffffff)))
41#else
42#define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void))
43#endif
44#define frame_delta(f) (frame_ftsz(f) >> 3)
45#define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP)
46
47#define frame_prevl(f) ((f) - (1+bc_a(frame_pc(f)[-1])))
48#define frame_prevd(f) ((TValue *)((char *)(f) - frame_sized(f)))
49#define frame_prev(f) (frame_islua(f)?frame_prevl(f):frame_prevd(f))
50/* Note: this macro does not skip over FRAME_VARG. */
51
52#define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc)))
53#define setframe_gc(f, p) (setgcref((f)->fr.func, (p)))
54
55/* -- C stack frame ------------------------------------------------------- */
56
57/* Macros to access and modify the C stack frame chain. */
58
59/* These definitions must match with the arch-specific *.dasc files. */
60#if LJ_TARGET_X86
61#define CFRAME_OFS_ERRF (15*sizeof(void *))
62#define CFRAME_OFS_NRES (14*sizeof(void *))
63#define CFRAME_OFS_PREV (13*sizeof(void *))
64#define CFRAME_OFS_L (12*sizeof(void *))
65#define CFRAME_OFS_PC (6*sizeof(void *))
66#define CFRAME_SIZE (12*sizeof(void *))
67#else
68#error "Missing CFRAME_* definitions for this architecture"
69#endif
70
71#define CFRAME_RESUME 1
72#define CFRAME_CANYIELD ((intptr_t)(CFRAME_RESUME))
73#define CFRAME_RAWMASK (~CFRAME_CANYIELD)
74
75#define cframe_errfunc(cf) (*(ptrdiff_t *)(((char *)cf)+CFRAME_OFS_ERRF))
76#define cframe_nres(cf) (*(ptrdiff_t *)(((char *)cf)+CFRAME_OFS_NRES))
77#define cframe_prev(cf) (*(void **)(((char *)cf)+CFRAME_OFS_PREV))
78#define cframe_L(cf) (*(lua_State **)(((char *)cf)+CFRAME_OFS_L))
79#define cframe_pc(cf) (*(const BCIns **)(((char *)cf)+CFRAME_OFS_PC))
80#define cframe_canyield(cf) ((intptr_t)(cf) & CFRAME_CANYIELD)
81#define cframe_raw(cf) ((void *)((intptr_t)(cf) & CFRAME_RAWMASK))
82#define cframe_Lpc(L) cframe_pc(cframe_raw(L->cframe))
83
84#endif
diff --git a/src/lj_func.c b/src/lj_func.c
new file mode 100644
index 00000000..92cdeda2
--- /dev/null
+++ b/src/lj_func.c
@@ -0,0 +1,185 @@
1/*
2** Function handling (prototypes, functions and upvalues).
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#define lj_func_c
10#define LUA_CORE
11
12#include "lj_obj.h"
13#include "lj_gc.h"
14#include "lj_func.h"
15#include "lj_trace.h"
16#include "lj_vm.h"
17
18/* -- Prototypes ---------------------------------------------------------- */
19
20GCproto *lj_func_newproto(lua_State *L)
21{
22 GCproto *pt = lj_mem_newobj(L, GCproto);
23 pt->gct = ~LJ_TPROTO;
24 pt->numparams = 0;
25 pt->framesize = 0;
26 pt->sizeuv = 0;
27 pt->flags = 0;
28 pt->trace = 0;
29 pt->k.n = NULL;
30 pt->bc = NULL;
31 pt->uv = NULL;
32 pt->sizebc = 0;
33 pt->sizekgc = 0;
34 pt->sizekn = 0;
35 pt->sizelineinfo = 0;
36 pt->sizevarinfo = 0;
37 pt->sizeuvname = 0;
38 pt->linedefined = 0;
39 pt->lastlinedefined = 0;
40 pt->lineinfo = NULL;
41 pt->varinfo = NULL;
42 pt->uvname = NULL;
43 pt->chunkname = NULL;
44 return pt;
45}
46
47void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt)
48{
49 MSize nkgc = round_nkgc(pt->sizekgc);
50 MSize sizek = nkgc*(MSize)sizeof(GCobj *) +
51 pt->sizekn*(MSize)sizeof(lua_Number);
52 lj_mem_free(g, pt->k.gc - nkgc, sizek);
53 lj_mem_freevec(g, pt->bc, pt->sizebc, BCIns);
54 lj_mem_freevec(g, pt->uv, pt->sizeuv, int16_t);
55 lj_mem_freevec(g, pt->lineinfo, pt->sizelineinfo, int32_t);
56 lj_mem_freevec(g, pt->varinfo, pt->sizevarinfo, struct VarInfo);
57 lj_mem_freevec(g, pt->uvname, pt->sizeuvname, GCstr *);
58 lj_trace_freeproto(g, pt);
59 lj_mem_freet(g, pt);
60}
61
62/* -- Upvalues ------------------------------------------------------------ */
63
64static void unlinkuv(GCupval *uv)
65{
66 lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv);
67 setgcrefr(uvnext(uv)->prev, uv->prev);
68 setgcrefr(uvprev(uv)->next, uv->next);
69}
70
71/* Find existing open upvalue for a stack slot or create a new one. */
72static GCupval *func_finduv(lua_State *L, TValue *slot)
73{
74 global_State *g = G(L);
75 GCRef *pp = &L->openupval;
76 GCupval *p;
77 GCupval *uv;
78 /* Search the sorted list of open upvalues. */
79 while (gcref(*pp) != NULL && (p = gco2uv(gcref(*pp)))->v >= slot) {
80 lua_assert(!p->closed && p->v != &p->tv);
81 if (p->v == slot) { /* Found open upvalue pointing to same slot? */
82 if (isdead(g, obj2gco(p))) /* Resurrect it, if it's dead. */
83 flipwhite(obj2gco(p));
84 return p;
85 }
86 pp = &p->nextgc;
87 }
88 /* No matching upvalue found. Create a new one. */
89 uv = lj_mem_newt(L, sizeof(GCupval), GCupval);
90 newwhite(g, uv);
91 uv->gct = ~LJ_TUPVAL;
92 uv->closed = 0; /* Still open. */
93 uv->v = slot; /* Pointing to the stack slot. */
94 /* NOBARRIER: The GCupval is new (marked white) and open. */
95 setgcrefr(uv->nextgc, *pp); /* Insert into sorted list of open upvalues. */
96 setgcref(*pp, obj2gco(uv));
97 setgcref(uv->prev, obj2gco(&g->uvhead)); /* Insert into GC list, too. */
98 setgcrefr(uv->next, g->uvhead.next);
99 setgcref(uvnext(uv)->prev, obj2gco(uv));
100 setgcref(g->uvhead.next, obj2gco(uv));
101 lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv);
102 return uv;
103}
104
105/* Close all open upvalues pointing to some stack level or above. */
106void lj_func_closeuv(lua_State *L, TValue *level)
107{
108 GCupval *uv;
109 global_State *g = G(L);
110 while (gcref(L->openupval) != NULL &&
111 (uv = gco2uv(gcref(L->openupval)))->v >= level) {
112 GCobj *o = obj2gco(uv);
113 lua_assert(!isblack(o) && !uv->closed && uv->v != &uv->tv);
114 setgcrefr(L->openupval, uv->nextgc); /* No longer in open list. */
115 if (isdead(g, o)) {
116 lj_func_freeuv(g, uv);
117 } else {
118 unlinkuv(uv);
119 lj_gc_closeuv(g, uv);
120 }
121 }
122}
123
124void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv)
125{
126 if (!uv->closed)
127 unlinkuv(uv);
128 lj_mem_freet(g, uv);
129}
130
131/* -- Functions (closures) ------------------------------------------------ */
132
133GCfunc *lj_func_newC(lua_State *L, MSize nelems, GCtab *env)
134{
135 GCfunc *fn = cast(GCfunc *, lj_mem_newgco(L, sizeCfunc(nelems)));
136 fn->c.gct = ~LJ_TFUNC;
137 fn->c.ffid = FF_C;
138 fn->c.nupvalues = cast_byte(nelems);
139 /* NOBARRIER: The GCfunc is new (marked white). */
140 setgcref(fn->c.env, obj2gco(env));
141 fn->c.gate = lj_gate_c;
142 return fn;
143}
144
145GCfunc *lj_func_newL(lua_State *L, GCproto *pt, GCtab *env)
146{
147 GCfunc *fn = cast(GCfunc *, lj_mem_newgco(L, sizeLfunc((MSize)pt->sizeuv)));
148 fn->l.gct = ~LJ_TFUNC;
149 fn->l.ffid = FF_LUA;
150 fn->l.nupvalues = cast_byte(pt->sizeuv);
151 /* NOBARRIER: The GCfunc is new (marked white). */
152 setgcref(fn->l.pt, obj2gco(pt));
153 setgcref(fn->l.env, obj2gco(env));
154 fn->l.gate = (pt->flags & PROTO_IS_VARARG) ? lj_gate_lv : lj_gate_lf;
155 return fn;
156}
157
158/* Do a GC check and create a new Lua function with inherited upvalues. */
159GCfunc *lj_func_newL_gc(lua_State *L, GCproto *pt, GCfuncL *parent)
160{
161 GCfunc *fn;
162 GCRef *puv;
163 uint32_t i, nuv;
164 TValue *base;
165 lj_gc_check_fixtop(L);
166 fn = lj_func_newL(L, pt, tabref(parent->env));
167 /* NOBARRIER: The GCfunc is new (marked white). */
168 puv = parent->uvptr;
169 nuv = fn->l.nupvalues;
170 base = L->base;
171 for (i = 0; i < nuv; i++) {
172 int v = pt->uv[i];
173 GCupval *uv = v < 0 ? &gcref(puv[~v])->uv : func_finduv(L, base + v);
174 setgcref(fn->l.uvptr[i], obj2gco(uv));
175 }
176 return fn;
177}
178
179void LJ_FASTCALL lj_func_free(global_State *g, GCfunc *fn)
180{
181 MSize size = isluafunc(fn) ? sizeLfunc((MSize)fn->l.nupvalues) :
182 sizeCfunc((MSize)fn->c.nupvalues);
183 lj_mem_free(g, fn, size);
184}
185
diff --git a/src/lj_func.h b/src/lj_func.h
new file mode 100644
index 00000000..ee7942ea
--- /dev/null
+++ b/src/lj_func.h
@@ -0,0 +1,25 @@
1/*
2** Function handling (prototypes, functions and upvalues).
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_FUNC_H
7#define _LJ_FUNC_H
8
9#include "lj_obj.h"
10
11/* Prototypes. */
12LJ_FUNC GCproto *lj_func_newproto(lua_State *L);
13LJ_FUNC void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt);
14
15/* Upvalues. */
16LJ_FUNCA void lj_func_closeuv(lua_State *L, TValue *level);
17LJ_FUNC void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv);
18
19/* Functions (closures). */
20LJ_FUNC GCfunc *lj_func_newC(lua_State *L, MSize nelems, GCtab *env);
21LJ_FUNC GCfunc *lj_func_newL(lua_State *L, GCproto *pt, GCtab *env);
22LJ_FUNCA GCfunc *lj_func_newL_gc(lua_State *L, GCproto *pt, GCfuncL *parent);
23LJ_FUNC void LJ_FASTCALL lj_func_free(global_State *g, GCfunc *c);
24
25#endif
diff --git a/src/lj_gc.c b/src/lj_gc.c
new file mode 100644
index 00000000..e479b567
--- /dev/null
+++ b/src/lj_gc.c
@@ -0,0 +1,800 @@
1/*
2** Garbage collector.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Major portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#define lj_gc_c
10#define LUA_CORE
11
12#include "lj_obj.h"
13#include "lj_gc.h"
14#include "lj_err.h"
15#include "lj_str.h"
16#include "lj_tab.h"
17#include "lj_func.h"
18#include "lj_udata.h"
19#include "lj_meta.h"
20#include "lj_state.h"
21#include "lj_frame.h"
22#include "lj_trace.h"
23#include "lj_vm.h"
24
25#define GCSTEPSIZE 1024u
26#define GCSWEEPMAX 40
27#define GCSWEEPCOST 10
28#define GCFINALIZECOST 100
29
30/* Macros to set GCobj colors and flags. */
31#define white2gray(x) ((x)->gch.marked &= cast_byte(~LJ_GC_WHITES))
32#define black2gray(x) ((x)->gch.marked &= cast_byte(~LJ_GC_BLACK))
33#define gray2black(x) ((x)->gch.marked |= LJ_GC_BLACK)
34#define makewhite(g, x) \
35 ((x)->gch.marked = ((x)->gch.marked & cast_byte(~LJ_GC_COLORS)) | curwhite(g))
36#define isfinalized(u) ((u)->marked & LJ_GC_FINALIZED)
37#define markfinalized(u) ((u)->marked |= LJ_GC_FINALIZED)
38
39/* -- Mark phase ---------------------------------------------------------- */
40
41/* Mark a TValue (if needed). */
42#define gc_marktv(g, tv) \
43 { lua_assert(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct)); \
44 if (tviswhite(tv)) gc_mark(g, gcV(tv)); }
45
46/* Mark a GCobj (if needed). */
47#define gc_markobj(g, o) \
48 { if (iswhite(obj2gco(o))) gc_mark(g, obj2gco(o)); }
49
50/* Mark a string object. */
51#define gc_mark_str(s) ((s)->marked &= cast_byte(~LJ_GC_WHITES))
52
53/* Mark a white GCobj. */
54static void gc_mark(global_State *g, GCobj *o)
55{
56 lua_assert(iswhite(o) && !isdead(g, o));
57 white2gray(o);
58 if (LJ_UNLIKELY(o->gch.gct == ~LJ_TUDATA)) {
59 GCtab *mt = tabref(gco2ud(o)->metatable);
60 gray2black(o); /* Userdata are never gray. */
61 if (mt) gc_markobj(g, mt);
62 gc_markobj(g, tabref(gco2ud(o)->env));
63 } else if (LJ_UNLIKELY(o->gch.gct == ~LJ_TUPVAL)) {
64 GCupval *uv = gco2uv(o);
65 gc_marktv(g, uv->v);
66 if (uv->closed)
67 gray2black(o); /* Closed upvalues are never gray. */
68 } else if (o->gch.gct != ~LJ_TSTR) {
69 lua_assert(o->gch.gct == ~LJ_TFUNC || o->gch.gct == ~LJ_TTAB ||
70 o->gch.gct == ~LJ_TTHREAD || o->gch.gct == ~LJ_TPROTO);
71 setgcrefr(o->gch.gclist, g->gc.gray);
72 setgcref(g->gc.gray, o);
73 }
74}
75
76/* Mark the base metatables. */
77static void gc_mark_basemt(global_State *g)
78{
79 int i;
80 for (i = 0; i < BASEMT_MAX; i++)
81 if (tabref(g->basemt[i]) != NULL)
82 gc_markobj(g, tabref(g->basemt[i]));
83}
84
85/* Start a GC cycle and mark the root set. */
86static void gc_mark_start(global_State *g)
87{
88 setgcrefnull(g->gc.gray);
89 setgcrefnull(g->gc.grayagain);
90 setgcrefnull(g->gc.weak);
91 gc_markobj(g, mainthread(g));
92 gc_markobj(g, tabref(mainthread(g)->env));
93 gc_marktv(g, &g->registrytv);
94 gc_mark_basemt(g);
95 g->gc.state = GCSpropagate;
96}
97
98/* Mark open upvalues. */
99static void gc_mark_uv(global_State *g)
100{
101 GCupval *uv;
102 for (uv = uvnext(&g->uvhead); uv != &g->uvhead; uv = uvnext(uv)) {
103 lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv);
104 if (isgray(obj2gco(uv)))
105 gc_marktv(g, uv->v);
106 }
107}
108
109/* Mark userdata in mmudata list. */
110static void gc_mark_mmudata(global_State *g)
111{
112 GCobj *root = gcref(g->gc.mmudata);
113 GCobj *u = root;
114 if (u) {
115 do {
116 u = gcnext(u);
117 makewhite(g, u); /* Could be from previous GC. */
118 gc_mark(g, u);
119 } while (u != root);
120 }
121}
122
123/* Separate userdata which which needs finalization to mmudata list. */
124size_t lj_gc_separateudata(global_State *g, int all)
125{
126 size_t m = 0;
127 GCRef *p = &mainthread(g)->nextgc;
128 GCobj *o;
129 while ((o = gcref(*p)) != NULL) {
130 if (!(iswhite(o) || all) || isfinalized(gco2ud(o))) {
131 p = &o->gch.nextgc; /* Nothing to do. */
132 } else if (!lj_meta_fastg(g, tabref(gco2ud(o)->metatable), MM_gc)) {
133 markfinalized(gco2ud(o)); /* Done, as there's no __gc metamethod. */
134 p = &o->gch.nextgc;
135 } else { /* Otherwise move userdata to be finalized to mmudata list. */
136 m += sizeudata(gco2ud(o));
137 markfinalized(gco2ud(o));
138 *p = o->gch.nextgc;
139 if (gcref(g->gc.mmudata)) { /* Link to end of mmudata list. */
140 GCobj *root = gcref(g->gc.mmudata);
141 setgcrefr(o->gch.nextgc, root->gch.nextgc);
142 setgcref(root->gch.nextgc, o);
143 setgcref(g->gc.mmudata, o);
144 } else { /* Create circular list. */
145 setgcref(o->gch.nextgc, o);
146 setgcref(g->gc.mmudata, o);
147 }
148 }
149 }
150 return m;
151}
152
153/* -- Propagation phase --------------------------------------------------- */
154
155/* Traverse a table. */
156static int gc_traverse_tab(global_State *g, GCtab *t)
157{
158 int weak = 0;
159 cTValue *mode;
160 GCtab *mt = tabref(t->metatable);
161 if (mt)
162 gc_markobj(g, mt);
163 mode = lj_meta_fastg(g, mt, MM_mode);
164 if (mode && tvisstr(mode)) { /* Valid __mode field? */
165 const char *modestr = strVdata(mode);
166 int c;
167 while ((c = *modestr++)) {
168 if (c == 'k') weak |= LJ_GC_WEAKKEY;
169 else if (c == 'v') weak |= LJ_GC_WEAKVAL;
170 }
171 if (weak) { /* Weak tables are cleared in the atomic phase. */
172 t->marked = cast_byte((t->marked & ~LJ_GC_WEAK) | weak);
173 setgcrefr(t->gclist, g->gc.weak);
174 setgcref(g->gc.weak, obj2gco(t));
175 }
176 }
177 if (weak == LJ_GC_WEAK) /* Nothing to mark if both keys/values are weak. */
178 return 1;
179 if (!(weak & LJ_GC_WEAKVAL)) { /* Mark array part. */
180 MSize i, asize = t->asize;
181 for (i = 0; i < asize; i++)
182 gc_marktv(g, arrayslot(t, i));
183 }
184 if (t->hmask > 0) { /* Mark hash part. */
185 Node *node = noderef(t->node);
186 MSize i, hmask = t->hmask;
187 for (i = 0; i <= hmask; i++) {
188 Node *n = &node[i];
189 lua_assert(itype(&n->key) != LJ_TDEADKEY || tvisnil(&n->val));
190 if (!tvisnil(&n->val)) { /* Mark non-empty slot. */
191 lua_assert(!tvisnil(&n->key));
192 if (!(weak & LJ_GC_WEAKKEY)) gc_marktv(g, &n->key);
193 if (!(weak & LJ_GC_WEAKVAL)) gc_marktv(g, &n->val);
194 } else if (tvisgcv(&n->key)) { /* Leave GC key in, but mark as dead. */
195 setitype(&n->key, LJ_TDEADKEY);
196 }
197 }
198 }
199 return weak;
200}
201
202/* Traverse a function. */
203static void gc_traverse_func(global_State *g, GCfunc *fn)
204{
205 gc_markobj(g, tabref(fn->c.env));
206 if (isluafunc(fn)) {
207 uint32_t i;
208 lua_assert(fn->l.nupvalues == funcproto(fn)->sizeuv);
209 gc_markobj(g, funcproto(fn));
210 for (i = 0; i < fn->l.nupvalues; i++) /* Mark Lua function upvalues. */
211 gc_markobj(g, &gcref(fn->l.uvptr[i])->uv);
212 } else {
213 uint32_t i;
214 for (i = 0; i < fn->c.nupvalues; i++) /* Mark C function upvalues. */
215 gc_marktv(g, &fn->c.upvalue[i]);
216 }
217}
218
219#if LJ_HASJIT
220/* Traverse a trace. */
221static void gc_traverse_trace(global_State *g, Trace *T)
222{
223 IRRef ref;
224 for (ref = T->nk; ref < REF_TRUE; ref++) {
225 IRIns *ir = &T->ir[ref];
226 if (ir->o == IR_KGC)
227 gc_markobj(g, ir_kgc(ir));
228 }
229}
230
231/* The current trace is a GC root while not anchored in the prototype (yet). */
232#define gc_mark_curtrace(g) \
233 { if (G2J(g)->state != LJ_TRACE_IDLE && G2J(g)->curtrace != 0) \
234 gc_traverse_trace(g, &G2J(g)->cur); }
235#else
236#define gc_mark_curtrace(g) UNUSED(g)
237#endif
238
239/* Traverse a prototype. */
240static void gc_traverse_proto(global_State *g, GCproto *pt)
241{
242 ptrdiff_t i;
243#if LJ_HASJIT
244 jit_State *J = G2J(g);
245 TraceNo root, side;
246 /* Mark all root traces and attached side traces. */
247 for (root = pt->trace; root != 0; root = J->trace[root]->nextroot) {
248 for (side = J->trace[root]->nextside; side != 0;
249 side = J->trace[side]->nextside)
250 gc_traverse_trace(g, J->trace[side]);
251 gc_traverse_trace(g, J->trace[root]);
252 }
253#endif
254 /* GC during prototype creation could cause NULL fields. */
255 if (pt->chunkname)
256 gc_mark_str(pt->chunkname);
257 for (i = -(ptrdiff_t)pt->sizekgc; i < 0; i++) /* Mark collectable consts. */
258 gc_markobj(g, gcref(pt->k.gc[i]));
259 for (i = 0; i < (ptrdiff_t)pt->sizeuvname; i++) /* Mark upvalue names. */
260 if (pt->uvname[i])
261 gc_mark_str(pt->uvname[i]);
262 for (i = 0; i < (ptrdiff_t)pt->sizevarinfo; i++) /* Mark names of locals. */
263 if (pt->varinfo[i].name)
264 gc_mark_str(pt->varinfo[i].name);
265}
266
267/* Traverse the frame structure of a stack. */
268static TValue *gc_traverse_frames(global_State *g, lua_State *th)
269{
270 TValue *frame, *top = th->top-1;
271 /* Note: extra vararg frame not skipped, marks function twice (harmless). */
272 for (frame = th->base-1; frame > th->stack; frame = frame_prev(frame)) {
273 GCfunc *fn = frame_func(frame);
274 TValue *ftop = frame;
275 if (isluafunc(fn)) ftop += funcproto(fn)->framesize;
276 if (ftop > top) top = ftop;
277 gc_markobj(g, frame_gc(frame)); /* Need to mark hidden function (or L). */
278 }
279 top++; /* Correct bias of -1 (frame == base-1). */
280 if (top > th->maxstack) top = th->maxstack;
281 return top;
282}
283
284/* Traverse a thread object. */
285static void gc_traverse_thread(global_State *g, lua_State *th)
286{
287 TValue *o, *lim;
288 gc_markobj(g, tabref(th->env));
289 for (o = th->stack+1; o < th->top; o++)
290 gc_marktv(g, o);
291 lim = gc_traverse_frames(g, th);
292 /* Extra cleanup required to avoid this marking problem:
293 **
294 ** [aa[bb.X| X created.
295 ** [aa[cc| GC called from (small) inner frame, X destroyed.
296 ** [aa....X.| GC called again in (larger) outer frame, X resurrected (ouch).
297 **
298 ** During GC in step 2 the stack must be cleaned up to the max. frame extent:
299 **
300 ** ***| Slots cleaned
301 ** [cc| from top of last frame
302 ** [aa......| to max. frame extent.
303 */
304 for (; o <= lim; o++)
305 setnilV(o);
306 lj_state_shrinkstack(th, (MSize)(lim - th->stack));
307}
308
309/* Propagate one gray object. Traverse it and turn it black. */
310static size_t propagatemark(global_State *g)
311{
312 GCobj *o = gcref(g->gc.gray);
313 lua_assert(isgray(o));
314 gray2black(o);
315 setgcrefr(g->gc.gray, o->gch.gclist); /* Remove from gray list. */
316 if (LJ_LIKELY(o->gch.gct == ~LJ_TTAB)) {
317 GCtab *t = gco2tab(o);
318 if (gc_traverse_tab(g, t))
319 black2gray(o); /* Keep weak tables gray. */
320 return sizeof(GCtab) + sizeof(TValue) * t->asize +
321 sizeof(Node) * (t->hmask + 1);
322 } else if (LJ_LIKELY(o->gch.gct == ~LJ_TFUNC)) {
323 GCfunc *fn = gco2func(o);
324 gc_traverse_func(g, fn);
325 return isluafunc(fn) ? sizeLfunc((MSize)fn->l.nupvalues) :
326 sizeCfunc((MSize)fn->c.nupvalues);
327 } else if (LJ_LIKELY(o->gch.gct == ~LJ_TPROTO)) {
328 GCproto *pt = gco2pt(o);
329 gc_traverse_proto(g, pt);
330 return sizeof(GCproto) + sizeof(BCIns) * pt->sizebc +
331 sizeof(GCobj *) * pt->sizekgc +
332 sizeof(lua_Number) * pt->sizekn +
333 sizeof(int16_t) * pt->sizeuv +
334 sizeof(int32_t) * pt->sizelineinfo +
335 sizeof(VarInfo) * pt->sizevarinfo +
336 sizeof(GCstr *) * pt->sizeuvname;
337 } else {
338 lua_State *th = gco2th(o);
339 setgcrefr(th->gclist, g->gc.grayagain);
340 setgcref(g->gc.grayagain, o);
341 black2gray(o); /* Threads are never black. */
342 gc_traverse_thread(g, th);
343 return sizeof(lua_State) + sizeof(TValue) * th->stacksize;
344 }
345}
346
347/* Propagate all gray objects. */
348static size_t gc_propagate_gray(global_State *g)
349{
350 size_t m = 0;
351 while (gcref(g->gc.gray) != NULL)
352 m += propagatemark(g);
353 return m;
354}
355
356/* -- Sweep phase --------------------------------------------------------- */
357
358/* Try to shrink some common data structures. */
359static void gc_shrink(global_State *g, lua_State *L)
360{
361 if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1)
362 lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */
363 if (g->tmpbuf.sz > LJ_MIN_SBUF*2)
364 lj_str_resizebuf(L, &g->tmpbuf, g->tmpbuf.sz >> 1); /* Shrink temp buf. */
365}
366
367/* Type of GC free functions. */
368typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o);
369
370/* GC free functions for LJ_TSTR .. LJ_TUDATA. ORDER LJ_T */
371static const GCFreeFunc gc_freefunc[] = {
372 (GCFreeFunc)lj_str_free,
373 (GCFreeFunc)lj_func_freeuv,
374 (GCFreeFunc)lj_state_free,
375 (GCFreeFunc)lj_func_freeproto,
376 (GCFreeFunc)lj_func_free,
377 (GCFreeFunc)0,
378 (GCFreeFunc)lj_tab_free,
379 (GCFreeFunc)lj_udata_free
380};
381
382/* Full sweep of a GC list. */
383#define gc_fullsweep(g, p) gc_sweep(g, (p), LJ_MAX_MEM)
384
385/* Partial sweep of a GC list. */
386static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim)
387{
388 /* Mask with other white and LJ_GC_FIXED. Or LJ_GC_SFIXED on shutdown. */
389 int ow = otherwhite(g);
390 GCobj *o;
391 while ((o = gcref(*p)) != NULL && lim-- > 0) {
392 if (o->gch.gct == ~LJ_TTHREAD) /* Need to sweep open upvalues, too. */
393 gc_fullsweep(g, &gco2th(o)->openupval);
394 if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */
395 lua_assert(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED));
396 makewhite(g, o); /* Value is alive, change to the current white. */
397 p = &o->gch.nextgc;
398 } else { /* Otherwise value is dead, free it. */
399 lua_assert(isdead(g, o) || ow == LJ_GC_SFIXED);
400 setgcrefr(*p, o->gch.nextgc);
401 if (o == gcref(g->gc.root))
402 setgcrefr(g->gc.root, o->gch.nextgc); /* Adjust list anchor. */
403 gc_freefunc[o->gch.gct - ~LJ_TSTR](g, o);
404 }
405 }
406 return p;
407}
408
409/* Check whether we can clear a key or a value slot from a table. */
410static int gc_mayclear(cTValue *o, int val)
411{
412 if (tvisgcv(o)) { /* Only collectable objects can be weak references. */
413 if (tvisstr(o)) { /* But strings cannot be used as weak references. */
414 gc_mark_str(strV(o)); /* And need to be marked. */
415 return 0;
416 }
417 if (iswhite(gcV(o)))
418 return 1; /* Object is about to be collected. */
419 if (tvisudata(o) && val && isfinalized(udataV(o)))
420 return 1; /* Finalized userdata is dropped only from values. */
421 }
422 return 0; /* Cannot clear. */
423}
424
425/* Clear collected entries from weak tables. */
426static void gc_clearweak(GCobj *o)
427{
428 while (o) {
429 GCtab *t = gco2tab(o);
430 lua_assert((t->marked & LJ_GC_WEAK));
431 if ((t->marked & LJ_GC_WEAKVAL)) {
432 MSize i, asize = t->asize;
433 for (i = 0; i < asize; i++) {
434 /* Clear array slot when value is about to be collected. */
435 TValue *tv = arrayslot(t, i);
436 if (gc_mayclear(tv, 1))
437 setnilV(tv);
438 }
439 }
440 if (t->hmask > 0) {
441 Node *node = noderef(t->node);
442 MSize i, hmask = t->hmask;
443 for (i = 0; i <= hmask; i++) {
444 Node *n = &node[i];
445 /* Clear hash slot when key or value is about to be collected. */
446 if (!tvisnil(&n->val) && (gc_mayclear(&n->key, 0) ||
447 gc_mayclear(&n->val, 1))) {
448 setnilV(&n->val);
449 if (tvisgcv(&n->key)) /* Leave GC key in, but mark as dead. */
450 setitype(&n->key, LJ_TDEADKEY);
451 }
452 }
453 }
454 o = gcref(t->gclist);
455 }
456}
457
458/* Finalize one userdata object from mmudata list. */
459static void gc_finalize(lua_State *L)
460{
461 global_State *g = G(L);
462 GCobj *o = gcnext(gcref(g->gc.mmudata));
463 GCudata *ud = gco2ud(o);
464 cTValue *mo;
465 /* Unchain from list of userdata to be finalized. */
466 if (o == gcref(g->gc.mmudata))
467 setgcrefnull(g->gc.mmudata);
468 else
469 setgcrefr(gcref(g->gc.mmudata)->gch.nextgc, ud->nextgc);
470 /* Add it back to the main userdata list and make it white. */
471 setgcrefr(ud->nextgc, mainthread(g)->nextgc);
472 setgcref(mainthread(g)->nextgc, o);
473 makewhite(g, o);
474 /* Resolve the __gc metamethod. */
475 mo = lj_meta_fastg(g, tabref(ud->metatable), MM_gc);
476 if (mo) {
477 /* Save and restore lots of state around the __gc callback. */
478 uint8_t oldh = hook_save(g);
479 MSize oldt = g->gc.threshold;
480 GCobj *oldjl = gcref(g->jit_L);
481 MSize oldjs = 0;
482 ptrdiff_t oldjb = 0;
483 int errcode;
484 TValue *top;
485 if (oldjl) {
486 oldjs = gco2th(oldjl)->stacksize;
487 oldjb = savestack(gco2th(oldjl), mref(g->jit_base, TValue ));
488 setgcrefnull(g->jit_L);
489 }
490 lj_trace_abort(g);
491 top = L->top;
492 L->top = top+2;
493 hook_entergc(g); /* Disable hooks and new traces during __gc. */
494 g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */
495 copyTV(L, top, mo);
496 setudataV(L, top+1, ud);
497 errcode = lj_vm_pcall(L, top+1, 1+0, -1); /* Stack: |mo|ud| -> | */
498 hook_restore(g, oldh);
499 g->gc.threshold = oldt; /* Restore GC threshold. */
500 if (oldjl) {
501 if (gco2th(oldjl)->stacksize < oldjs)
502 lj_state_growstack(gco2th(oldjl), oldjs - gco2th(oldjl)->stacksize);
503 setgcref(g->jit_L, oldjl);
504 setmref(g->jit_base, restorestack(gco2th(oldjl), oldjb));
505 }
506 if (errcode)
507 lj_err_throw(L, errcode); /* Propagate errors. */
508 }
509}
510
511/* Finalize all userdata objects from mmudata list. */
512void lj_gc_finalizeudata(lua_State *L)
513{
514 while (gcref(G(L)->gc.mmudata) != NULL)
515 gc_finalize(L);
516}
517
518/* Free all remaining GC objects. */
519void lj_gc_freeall(global_State *g)
520{
521 MSize i, strmask;
522 /* Free everything, except super-fixed objects (the main thread). */
523 g->gc.currentwhite = LJ_GC_WHITES | LJ_GC_SFIXED;
524 gc_fullsweep(g, &g->gc.root);
525 strmask = g->strmask;
526 for (i = 0; i <= strmask; i++) /* Free all string hash chains. */
527 gc_fullsweep(g, &g->strhash[i]);
528}
529
530/* -- Collector ----------------------------------------------------------- */
531
532/* Atomic part of the GC cycle, transitioning from mark to sweep phase. */
533static void atomic(global_State *g, lua_State *L)
534{
535 size_t udsize;
536
537 gc_mark_uv(g); /* Need to remark open upvalues (the thread may be dead). */
538 gc_propagate_gray(g); /* Propagate any left-overs. */
539
540 setgcrefr(g->gc.gray, g->gc.weak); /* Empty the list of weak tables. */
541 setgcrefnull(g->gc.weak);
542 lua_assert(!iswhite(obj2gco(mainthread(g))));
543 gc_markobj(g, L); /* Mark running thread. */
544 gc_mark_curtrace(g); /* Mark current trace. */
545 gc_mark_basemt(g); /* Mark base metatables (again). */
546 gc_propagate_gray(g); /* Propagate all of the above. */
547
548 setgcrefr(g->gc.gray, g->gc.grayagain); /* Empty the 2nd chance list. */
549 setgcrefnull(g->gc.grayagain);
550 gc_propagate_gray(g); /* Propagate it. */
551
552 udsize = lj_gc_separateudata(g, 0); /* Separate userdata to be finalized. */
553 gc_mark_mmudata(g); /* Mark them. */
554 udsize += gc_propagate_gray(g); /* And propagate the marks. */
555
556 /* All marking done, clear weak tables. */
557 gc_clearweak(gcref(g->gc.weak));
558
559 /* Prepare for sweep phase. */
560 g->gc.currentwhite = cast_byte(otherwhite(g)); /* Flip current white. */
561 g->gc.sweepstr = 0;
562 g->gc.sweep = &g->gc.root;
563 g->gc.state = GCSsweepstring;
564 g->gc.estimate = g->gc.total - (MSize)udsize; /* Initial estimate. */
565}
566
567/* GC state machine. Returns a cost estimate for each step performed. */
568static size_t gc_onestep(lua_State *L)
569{
570 global_State *g = G(L);
571 switch (g->gc.state) {
572 case GCSpause:
573 gc_mark_start(g); /* Start a new GC cycle by marking all GC roots. */
574 return 0;
575 case GCSpropagate:
576 if (gcref(g->gc.gray) != NULL)
577 return propagatemark(g); /* Propagate one gray object. */
578 atomic(g, L); /* End of mark phase. */
579 return 0;
580 case GCSsweepstring: {
581 MSize old = g->gc.total;
582 gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */
583 if (g->gc.sweepstr > g->strmask)
584 g->gc.state = GCSsweep; /* All string hash chains sweeped. */
585 lua_assert(old >= g->gc.total);
586 g->gc.estimate -= old - g->gc.total;
587 return GCSWEEPCOST;
588 }
589 case GCSsweep: {
590 MSize old = g->gc.total;
591 g->gc.sweep = gc_sweep(g, g->gc.sweep, GCSWEEPMAX); /* Partial sweep. */
592 if (gcref(*g->gc.sweep) == NULL) {
593 gc_shrink(g, L);
594 g->gc.state = GCSfinalize; /* End of sweep phase. */
595 }
596 lua_assert(old >= g->gc.total);
597 g->gc.estimate -= old - g->gc.total;
598 return GCSWEEPMAX*GCSWEEPCOST;
599 }
600 case GCSfinalize:
601 if (gcref(g->gc.mmudata) != NULL) {
602 gc_finalize(L); /* Finalize one userdata object. */
603 if (g->gc.estimate > GCFINALIZECOST)
604 g->gc.estimate -= GCFINALIZECOST;
605 return GCFINALIZECOST;
606 }
607 g->gc.state = GCSpause; /* End of GC cycle. */
608 g->gc.debt = 0;
609 return 0;
610 default:
611 lua_assert(0);
612 return 0;
613 }
614}
615
616/* Perform a limited amount of incremental GC steps. */
617int lj_gc_step(lua_State *L)
618{
619 global_State *g = G(L);
620 MSize lim;
621 int32_t ostate = g->vmstate;
622 setvmstate(g, GC);
623 lim = (GCSTEPSIZE/100) * g->gc.stepmul;
624 if (lim == 0)
625 lim = LJ_MAX_MEM;
626 g->gc.debt += g->gc.total - g->gc.threshold;
627 do {
628 lim -= (MSize)gc_onestep(L);
629 if (g->gc.state == GCSpause) {
630 lua_assert(g->gc.total >= g->gc.estimate);
631 g->gc.threshold = (g->gc.estimate/100) * g->gc.pause;
632 g->vmstate = ostate;
633 return 1; /* Finished a GC cycle. */
634 }
635 } while ((int32_t)lim > 0);
636 if (g->gc.debt < GCSTEPSIZE) {
637 g->gc.threshold = g->gc.total + GCSTEPSIZE;
638 } else {
639 g->gc.debt -= GCSTEPSIZE;
640 g->gc.threshold = g->gc.total;
641 }
642 g->vmstate = ostate;
643 return 0;
644}
645
646/* Ditto, but fix the stack top first. */
647void lj_gc_step_fixtop(lua_State *L)
648{
649 if (curr_funcisL(L)) L->top = curr_topL(L);
650 lj_gc_step(L);
651}
652
653/* Perform multiple GC steps. Called from JIT-compiled code. */
654void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps)
655{
656 cframe_pc(cframe_raw(L->cframe)) = pc;
657 L->top = curr_topL(L);
658 while (steps-- > 0 && lj_gc_step(L) == 0)
659 ;
660}
661
662/* Perform a full GC cycle. */
663void lj_gc_fullgc(lua_State *L)
664{
665 global_State *g = G(L);
666 int32_t ostate = g->vmstate;
667 setvmstate(g, GC);
668 if (g->gc.state <= GCSpropagate) { /* Caught somewhere in the middle. */
669 g->gc.sweepstr = 0;
670 g->gc.sweep = &g->gc.root; /* Sweep everything (preserving it). */
671 setgcrefnull(g->gc.gray); /* Reset lists from partial propagation. */
672 setgcrefnull(g->gc.grayagain);
673 setgcrefnull(g->gc.weak);
674 g->gc.state = GCSsweepstring; /* Fast forward to the sweep phase. */
675 }
676 lua_assert(g->gc.state != GCSpause && g->gc.state != GCSpropagate);
677 while (g->gc.state != GCSfinalize) { /* Finish sweep. */
678 lua_assert(g->gc.state == GCSsweepstring || g->gc.state == GCSsweep);
679 gc_onestep(L);
680 }
681 /* Now perform a full GC. */
682 gc_mark_start(g);
683 while (g->gc.state != GCSpause)
684 gc_onestep(L);
685 g->gc.threshold = (g->gc.estimate/100) * g->gc.pause;
686 g->vmstate = ostate;
687}
688
689/* -- Write barriers ------------------------------------------------------ */
690
691/* Move the GC propagation frontier back for tables (make it gray again). */
692void lj_gc_barrierback(global_State *g, GCtab *t)
693{
694 GCobj *o = obj2gco(t);
695 lua_assert(isblack(o) && !isdead(g, o));
696 lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause);
697 black2gray(o);
698 setgcrefr(t->gclist, g->gc.grayagain);
699 setgcref(g->gc.grayagain, o);
700}
701
702/* Move the GC propagation frontier forward. */
703void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v)
704{
705 lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o));
706 lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause);
707 lua_assert(o->gch.gct != ~LJ_TTAB);
708 /* Preserve invariant during propagation. Otherwise it doesn't matter. */
709 if (g->gc.state == GCSpropagate)
710 gc_mark(g, v); /* Move frontier forward. */
711 else
712 makewhite(g, o); /* Make it white to avoid the following barrier. */
713}
714
715/* The reason for duplicating this is that it needs to be visible from ASM. */
716void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v)
717{
718 lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o));
719 lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause);
720 lua_assert(o->gch.gct == ~LJ_TUPVAL);
721 /* Preserve invariant during propagation. Otherwise it doesn't matter. */
722 if (g->gc.state == GCSpropagate)
723 gc_mark(g, v); /* Move frontier forward. */
724 else
725 makewhite(g, o); /* Make it white to avoid the following barrier. */
726}
727
728/* Close upvalue. Also needs a write barrier. */
729void lj_gc_closeuv(global_State *g, GCupval *uv)
730{
731 GCobj *o = obj2gco(uv);
732 /* Copy stack slot to upvalue itself and point to the copy. */
733 copyTV(mainthread(g), &uv->tv, uv->v);
734 uv->v = &uv->tv;
735 uv->closed = 1;
736 setgcrefr(o->gch.nextgc, g->gc.root);
737 setgcref(g->gc.root, o);
738 if (isgray(o)) { /* A closed upvalue is never gray, so fix this. */
739 if (g->gc.state == GCSpropagate) {
740 gray2black(o); /* Make it black and preserve invariant. */
741 if (tviswhite(uv->v))
742 lj_gc_barrierf(g, o, gcV(uv->v));
743 } else {
744 makewhite(g, o); /* Make it white, i.e. sweep the upvalue. */
745 lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause);
746 }
747 }
748}
749
750#if LJ_HASJIT
751/* Mark a trace if it's saved during the propagation phase. */
752void lj_gc_barriertrace(global_State *g, void *T)
753{
754 if (g->gc.state == GCSpropagate)
755 gc_traverse_trace(g, (Trace *)T);
756}
757#endif
758
759/* -- Allocator ----------------------------------------------------------- */
760
761/* Call pluggable memory allocator to allocate or resize a fragment. */
762void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz)
763{
764 global_State *g = G(L);
765 lua_assert((osz == 0) == (p == NULL));
766 p = g->allocf(g->allocd, p, osz, nsz);
767 if (p == NULL && nsz > 0)
768 lj_err_throw(L, LUA_ERRMEM);
769 lua_assert((nsz == 0) == (p == NULL));
770 g->gc.total = (g->gc.total - osz) + nsz;
771 return p;
772}
773
774/* Allocate new GC object and link it to the root set. */
775void *lj_mem_newgco(lua_State *L, MSize size)
776{
777 global_State *g = G(L);
778 GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size);
779 if (o == NULL)
780 lj_err_throw(L, LUA_ERRMEM);
781 g->gc.total += size;
782 setgcrefr(o->gch.nextgc, g->gc.root);
783 setgcref(g->gc.root, o);
784 newwhite(g, o);
785 return o;
786}
787
788/* Resize growable vector. */
789void *lj_mem_grow(lua_State *L, void *p, MSize *szp, MSize lim, MSize esz)
790{
791 MSize sz = (*szp) << 1;
792 if (sz < LJ_MIN_VECSZ)
793 sz = LJ_MIN_VECSZ;
794 if (sz > lim)
795 sz = lim;
796 p = lj_mem_realloc(L, p, (*szp)*esz, sz*esz);
797 *szp = sz;
798 return p;
799}
800
diff --git a/src/lj_gc.h b/src/lj_gc.h
new file mode 100644
index 00000000..192066d3
--- /dev/null
+++ b/src/lj_gc.h
@@ -0,0 +1,102 @@
1/*
2** Garbage collector.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_GC_H
7#define _LJ_GC_H
8
9#include "lj_obj.h"
10
11/* Garbage collector states. Order matters. */
12enum { GCSpause, GCSpropagate, GCSsweepstring, GCSsweep, GCSfinalize };
13
14/* Bitmasks for marked field of GCobj. */
15#define LJ_GC_WHITE0 0x01
16#define LJ_GC_WHITE1 0x02
17#define LJ_GC_BLACK 0x04
18#define LJ_GC_FINALIZED 0x08
19#define LJ_GC_WEAKKEY 0x08
20#define LJ_GC_WEAKVAL 0x10
21#define LJ_GC_FIXED 0x20
22#define LJ_GC_SFIXED 0x40
23
24#define LJ_GC_WHITES (LJ_GC_WHITE0 | LJ_GC_WHITE1)
25#define LJ_GC_COLORS (LJ_GC_WHITES | LJ_GC_BLACK)
26#define LJ_GC_WEAK (LJ_GC_WEAKKEY | LJ_GC_WEAKVAL)
27
28/* Macros to test and set GCobj colors. */
29#define iswhite(x) ((x)->gch.marked & LJ_GC_WHITES)
30#define isblack(x) ((x)->gch.marked & LJ_GC_BLACK)
31#define isgray(x) (!((x)->gch.marked & (LJ_GC_BLACK|LJ_GC_WHITES)))
32#define tviswhite(x) (tvisgcv(x) && iswhite(gcV(x)))
33#define otherwhite(g) (g->gc.currentwhite ^ LJ_GC_WHITES)
34#define isdead(g, v) ((v)->gch.marked & otherwhite(g) & LJ_GC_WHITES)
35
36#define curwhite(g) ((g)->gc.currentwhite & LJ_GC_WHITES)
37#define newwhite(g, x) (obj2gco(x)->gch.marked = (uint8_t)curwhite(g))
38#define flipwhite(x) ((x)->gch.marked ^= LJ_GC_WHITES)
39#define fixstring(s) ((s)->marked |= LJ_GC_FIXED)
40
41/* Collector. */
42LJ_FUNC size_t lj_gc_separateudata(global_State *g, int all);
43LJ_FUNC void lj_gc_finalizeudata(lua_State *L);
44LJ_FUNC void lj_gc_freeall(global_State *g);
45LJ_FUNCA int lj_gc_step(lua_State *L);
46LJ_FUNCA void lj_gc_step_fixtop(lua_State *L);
47LJ_FUNCA void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps);
48LJ_FUNC void lj_gc_fullgc(lua_State *L);
49
50/* GC check: drive collector forward if the GC threshold has been reached. */
51#define lj_gc_check(L) \
52 { if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) \
53 lj_gc_step(L); }
54#define lj_gc_check_fixtop(L) \
55 { if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) \
56 lj_gc_step_fixtop(L); }
57
58/* Write barriers. */
59LJ_FUNC void lj_gc_barrierback(global_State *g, GCtab *t);
60LJ_FUNC void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v);
61LJ_FUNCA void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v);
62LJ_FUNC void lj_gc_closeuv(global_State *g, GCupval *uv);
63LJ_FUNC void lj_gc_barriertrace(global_State *g, void *T);
64
65/* Barrier for stores to table objects. TValue and GCobj variant. */
66#define lj_gc_barriert(L, t, tv) \
67 { if (tviswhite(tv) && isblack(obj2gco(t))) \
68 lj_gc_barrierback(G(L), (t)); }
69#define lj_gc_objbarriert(L, t, o) \
70 { if (iswhite(obj2gco(o)) && isblack(obj2gco(t))) \
71 lj_gc_barrierback(G(L), (t)); }
72
73/* Barrier for stores to any other object. TValue and GCobj variant. */
74#define lj_gc_barrier(L, p, tv) \
75 { if (tviswhite(tv) && isblack(obj2gco(p))) \
76 lj_gc_barrierf(G(L), obj2gco(p), gcV(tv)); }
77#define lj_gc_objbarrier(L, p, o) \
78 { if (iswhite(obj2gco(o)) && isblack(obj2gco(p))) \
79 lj_gc_barrierf(G(L), obj2gco(p), obj2gco(o)); }
80
81/* Allocator. */
82LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz);
83LJ_FUNC void *lj_mem_newgco(lua_State *L, MSize size);
84LJ_FUNC void *lj_mem_grow(lua_State *L, void *p,
85 MSize *szp, MSize lim, MSize esz);
86
87#define lj_mem_new(L, s) lj_mem_realloc(L, NULL, 0, (s))
88#define lj_mem_free(g, p, osize) \
89 (g->gc.total -= (MSize)(osize), g->allocf(g->allocd, (p), (osize), 0))
90
91#define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (MSize)((n)*sizeof(t))))
92#define lj_mem_reallocvec(L, p, on, n, t) \
93 ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (MSize)((n)*sizeof(t))))
94#define lj_mem_growvec(L, p, n, m, t) \
95 ((p) = (t *)lj_mem_grow(L, (p), &(n), (m), (MSize)sizeof(t)))
96#define lj_mem_freevec(g, p, n, t) lj_mem_free(g, (p), (n)*sizeof(t))
97
98#define lj_mem_newobj(L, t) ((t *)lj_mem_newgco(L, sizeof(t)))
99#define lj_mem_newt(L, s, t) ((t *)lj_mem_new(L, (s)))
100#define lj_mem_freet(g, p) lj_mem_free(g, (p), sizeof(*(p)))
101
102#endif
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c
new file mode 100644
index 00000000..dfec188a
--- /dev/null
+++ b/src/lj_gdbjit.c
@@ -0,0 +1,739 @@
1/*
2** Client for the GDB JIT API.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_gdbjit_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASJIT
12
13#include "lj_gc.h"
14#include "lj_err.h"
15#include "lj_str.h"
16#include "lj_frame.h"
17#include "lj_jit.h"
18#include "lj_dispatch.h"
19
20/* This is not compiled in by default.
21** Enable with -DLUAJIT_USE_GDBJIT in the Makefile and recompile everything.
22*/
23#ifdef LUAJIT_USE_GDBJIT
24
25/* The GDB JIT API allows JIT compilers to pass debug information about
26** JIT-compiled code back to GDB. You need at least GDB 7.0 or higher
27** to see it in action.
28**
29** This is a passive API, so it works even when not running under GDB
30** or when attaching to an already running process. Alas, this implies
31** enabling it always has a non-negligible overhead -- do not use in
32** release mode!
33**
34** The LuaJIT GDB JIT client is rather minimal at the moment. It gives
35** each trace a symbol name and adds a source location and frame unwind
36** information. Obviously LuaJIT itself and any embedding C application
37** should be compiled with debug symbols, too (see the Makefile).
38**
39** Traces are named TRACE_1, TRACE_2, ... these correspond to the trace
40** numbers from -jv or -jdump. Use "break TRACE_1" or "tbreak TRACE_1" etc.
41** to set breakpoints on specific traces (even ahead of their creation).
42**
43** The source location for each trace allows listing the corresponding
44** source lines with the GDB command "list" (but only if the Lua source
45** has been loaded from a file). Currently this is always set to the
46** location where the trace has been started.
47**
48** Frame unwind information can be inspected with the GDB command
49** "info frame". This also allows proper backtraces across JIT-compiled
50** code with the GDB command "bt".
51**
52** You probably want to add the following settings to a .gdbinit file
53** (or add them to ~/.gdbinit):
54** set disassembly-flavor intel
55** set breakpoint pending on
56**
57** Here's a sample GDB session:
58** ------------------------------------------------------------------------
59
60$ cat >x.lua
61for outer=1,100 do
62 for inner=1,100 do end
63end
64^D
65
66$ luajit -jv x.lua
67[TRACE 1 x.lua:2]
68[TRACE 2 (1/3) x.lua:1 -> 1]
69
70$ gdb --quiet --args luajit x.lua
71(gdb) tbreak TRACE_1
72Function "TRACE_1" not defined.
73Temporary breakpoint 1 (TRACE_1) pending.
74(gdb) run
75Starting program: luajit x.lua
76
77Temporary breakpoint 1, TRACE_1 () at x.lua:2
782 for inner=1,100 do end
79(gdb) list
801 for outer=1,100 do
812 for inner=1,100 do end
823 end
83(gdb) bt
84#0 TRACE_1 () at x.lua:2
85#1 0x08053690 in lua_pcall [...]
86[...]
87#7 0x0806ff90 in main [...]
88(gdb) disass TRACE_1
89Dump of assembler code for function TRACE_1:
900xf7fd9fba <TRACE_1+0>: mov DWORD PTR ds:0xf7e0e2a0,0x1
910xf7fd9fc4 <TRACE_1+10>: movsd xmm7,QWORD PTR [edx+0x20]
92[...]
930xf7fd9ff8 <TRACE_1+62>: jmp 0xf7fd2014
94End of assembler dump.
95(gdb) tbreak TRACE_2
96Function "TRACE_2" not defined.
97Temporary breakpoint 2 (TRACE_2) pending.
98(gdb) cont
99Continuing.
100
101Temporary breakpoint 2, TRACE_2 () at x.lua:1
1021 for outer=1,100 do
103(gdb) info frame
104Stack level 0, frame at 0xffffd7c0:
105 eip = 0xf7fd9f60 in TRACE_2 (x.lua:1); saved eip 0x8053690
106 called by frame at 0xffffd7e0
107 source language unknown.
108 Arglist at 0xffffd78c, args:
109 Locals at 0xffffd78c, Previous frame's sp is 0xffffd7c0
110 Saved registers:
111 ebx at 0xffffd7ac, ebp at 0xffffd7b8, esi at 0xffffd7b0, edi at 0xffffd7b4,
112 eip at 0xffffd7bc
113(gdb)
114
115** ------------------------------------------------------------------------
116*/
117
118/* -- GDB JIT API --------------------------------------------------------- */
119
120/* GDB JIT actions. */
121enum {
122 GDBJIT_NOACTION = 0,
123 GDBJIT_REGISTER,
124 GDBJIT_UNREGISTER
125};
126
127/* GDB JIT entry. */
128typedef struct GDBJITentry {
129 struct GDBJITentry *next_entry;
130 struct GDBJITentry *prev_entry;
131 const char *symfile_addr;
132 uint64_t symfile_size;
133} GDBJITentry;
134
135/* GDB JIT descriptor. */
136typedef struct GDBJITdesc {
137 uint32_t version;
138 uint32_t action_flag;
139 GDBJITentry *relevant_entry;
140 GDBJITentry *first_entry;
141} GDBJITdesc;
142
143GDBJITdesc __jit_debug_descriptor = {
144 1, GDBJIT_NOACTION, NULL, NULL
145};
146
147/* GDB sets a breakpoint at this function. */
148void LJ_NOINLINE __jit_debug_register_code()
149{
150 __asm__ __volatile__("");
151};
152
153/* -- In-memory ELF object definitions ------------------------------------ */
154
155/* ELF definitions. */
156typedef struct ELFheader {
157 uint8_t emagic[4];
158 uint8_t eclass;
159 uint8_t eendian;
160 uint8_t eversion;
161 uint8_t eosabi;
162 uint8_t eabiversion;
163 uint8_t epad[7];
164 uint16_t type;
165 uint16_t machine;
166 uint32_t version;
167 uintptr_t entry;
168 uintptr_t phofs;
169 uintptr_t shofs;
170 uint32_t flags;
171 uint16_t ehsize;
172 uint16_t phentsize;
173 uint16_t phnum;
174 uint16_t shentsize;
175 uint16_t shnum;
176 uint16_t shstridx;
177} ELFheader;
178
179typedef struct ELFsectheader {
180 uint32_t name;
181 uint32_t type;
182 uintptr_t flags;
183 uintptr_t addr;
184 uintptr_t ofs;
185 uintptr_t size;
186 uint32_t link;
187 uint32_t info;
188 uintptr_t align;
189 uintptr_t entsize;
190} ELFsectheader;
191
192#define ELFSECT_IDX_ABS 0xfff1
193
194enum {
195 ELFSECT_TYPE_PROGBITS = 1,
196 ELFSECT_TYPE_SYMTAB = 2,
197 ELFSECT_TYPE_STRTAB = 3,
198 ELFSECT_TYPE_NOBITS = 8
199};
200
201#define ELFSECT_FLAGS_WRITE 1
202#define ELFSECT_FLAGS_ALLOC 2
203#define ELFSECT_FLAGS_EXEC 4
204
205typedef struct ELFsymbol {
206#if LJ_64
207 uint32_t name;
208 uint8_t info;
209 uint8_t other;
210 uint16_t sectidx;
211 uintptr_t value;
212 uint64_t size;
213#else
214 uint32_t name;
215 uintptr_t value;
216 uint32_t size;
217 uint8_t info;
218 uint8_t other;
219 uint16_t sectidx;
220#endif
221} ELFsymbol;
222
223enum {
224 ELFSYM_TYPE_FUNC = 2,
225 ELFSYM_TYPE_FILE = 4,
226 ELFSYM_BIND_LOCAL = 0 << 4,
227 ELFSYM_BIND_GLOBAL = 1 << 4,
228};
229
230/* DWARF definitions. */
231#define DW_CIE_VERSION 1
232
233enum {
234 DW_CFA_nop = 0x0,
235 DW_CFA_def_cfa = 0xc,
236 DW_CFA_def_cfa_offset = 0xe,
237 DW_CFA_advance_loc = 0x40,
238 DW_CFA_offset = 0x80
239};
240
241enum {
242 DW_EH_PE_udata4 = 3,
243 DW_EH_PE_textrel = 0x20
244};
245
246enum {
247 DW_TAG_compile_unit = 0x11
248};
249
250enum {
251 DW_children_no = 0,
252 DW_children_yes = 1
253};
254
255enum {
256 DW_AT_name = 0x03,
257 DW_AT_stmt_list = 0x10,
258 DW_AT_low_pc = 0x11,
259 DW_AT_high_pc = 0x12
260};
261
262enum {
263 DW_FORM_addr = 0x01,
264 DW_FORM_data4 = 0x06,
265 DW_FORM_string = 0x08
266};
267
268enum {
269 DW_LNS_extended_op = 0,
270 DW_LNS_copy = 1,
271 DW_LNS_advance_pc = 2,
272 DW_LNS_advance_line = 3
273};
274
275enum {
276 DW_LNE_end_sequence = 1,
277 DW_LNE_set_address = 2
278};
279
280enum {
281#if LJ_TARGET_X86
282 DW_REG_AX, DW_REG_CX, DW_REG_DX, DW_REG_BX,
283 DW_REG_SP, DW_REG_BP, DW_REG_SI, DW_REG_DI,
284 DW_REG_RA,
285#elif LJ_TARGET_X64
286 /* Yes, the order is strange, but correct. */
287 DW_REG_AX, DW_REG_DX, DW_REG_CX, DW_REG_BX,
288 DW_REG_SI, DW_REG_DI, DW_REG_BP, DW_REG_SP,
289 DW_REG_8, DW_REG_9, DW_REG_10, DW_REG_11,
290 DW_REG_12, DW_REG_13, DW_REG_14, DW_REG_15,
291 DW_REG_RA,
292#else
293#error "Unsupported target architecture"
294#endif
295};
296
297/* Minimal list of sections for the in-memory ELF object. */
298enum {
299 GDBJIT_SECT_NULL,
300 GDBJIT_SECT_text,
301 GDBJIT_SECT_eh_frame,
302 GDBJIT_SECT_shstrtab,
303 GDBJIT_SECT_strtab,
304 GDBJIT_SECT_symtab,
305 GDBJIT_SECT_debug_info,
306 GDBJIT_SECT_debug_abbrev,
307 GDBJIT_SECT_debug_line,
308 GDBJIT_SECT__MAX
309};
310
311enum {
312 GDBJIT_SYM_UNDEF,
313 GDBJIT_SYM_FILE,
314 GDBJIT_SYM_FUNC,
315 GDBJIT_SYM__MAX
316};
317
318/* In-memory ELF object. */
319typedef struct GDBJITobj {
320 ELFheader hdr; /* ELF header. */
321 ELFsectheader sect[GDBJIT_SECT__MAX]; /* ELF sections. */
322 ELFsymbol sym[GDBJIT_SYM__MAX]; /* ELF symbol table. */
323 uint8_t space[4096]; /* Space for various section data. */
324} GDBJITobj;
325
326/* Combined structure for GDB JIT entry and ELF object. */
327typedef struct GDBJITentryobj {
328 GDBJITentry entry;
329 size_t sz;
330 GDBJITobj obj;
331} GDBJITentryobj;
332
333/* Template for in-memory ELF header. */
334static const ELFheader elfhdr_template = {
335 .emagic = { 0x7f, 'E', 'L', 'F' },
336 .eclass = LJ_64 ? 2 : 1,
337 .eendian = LJ_ENDIAN_SELECT(1, 2),
338 .eversion = 1,
339#if defined(__linux__)
340 .eosabi = 0, /* Nope, it's not 3. */
341#elif defined(__FreeBSD__)
342 .eosabi = 9,
343#elif defined(__NetBSD__)
344 .eosabi = 2,
345#elif defined(__OpenBSD__)
346 .eosabi = 12,
347#elif defined(__solaris__)
348 .eosabi = 6,
349#else
350 .eosabi = 0,
351#endif
352 .eabiversion = 0,
353 .epad = { 0, 0, 0, 0, 0, 0, 0 },
354 .type = 1,
355#if LJ_TARGET_X86
356 .machine = 3,
357#elif LJ_TARGET_X64
358 .machine = 62,
359#else
360#error "Unsupported target architecture"
361#endif
362 .version = 1,
363 .entry = 0,
364 .phofs = 0,
365 .shofs = offsetof(GDBJITobj, sect),
366 .flags = 0,
367 .ehsize = sizeof(ELFheader),
368 .phentsize = 0,
369 .phnum = 0,
370 .shentsize = sizeof(ELFsectheader),
371 .shnum = GDBJIT_SECT__MAX,
372 .shstridx = GDBJIT_SECT_shstrtab
373};
374
375/* -- In-memory ELF object generation ------------------------------------- */
376
377/* Context for generating the ELF object for the GDB JIT API. */
378typedef struct GDBJITctx {
379 uint8_t *p; /* Pointer to next address in obj.space. */
380 uint8_t *startp; /* Pointer to start address in obj.space. */
381 Trace *T; /* Generate symbols for this trace. */
382 uintptr_t mcaddr; /* Machine code address. */
383 MSize szmcode; /* Size of machine code. */
384 MSize spadjp; /* Stack adjustment for parent trace or interpreter. */
385 MSize spadj; /* Stack adjustment for trace itself. */
386 BCLine lineno; /* Starting line number. */
387 const char *filename; /* Starting file name. */
388 const char *trname; /* Name of trace. */
389 size_t objsize; /* Final size of ELF object. */
390 GDBJITobj obj; /* In-memory ELF object. */
391} GDBJITctx;
392
393/* Add a zero-terminated string. */
394static uint32_t gdbjit_strz(GDBJITctx *ctx, const char *str)
395{
396 uint8_t *p = ctx->p;
397 uint32_t ofs = (uint32_t)(p - ctx->startp);
398 do {
399 *p++ = (uint8_t)*str;
400 } while (*str++);
401 ctx->p = p;
402 return ofs;
403}
404
405/* Add a ULEB128 value. */
406static void gdbjit_uleb128(GDBJITctx *ctx, uint32_t v)
407{
408 uint8_t *p = ctx->p;
409 for (; v >= 0x80; v >>= 7)
410 *p++ = (uint8_t)((v & 0x7f) | 0x80);
411 *p++ = (uint8_t)v;
412 ctx->p = p;
413}
414
415/* Add a SLEB128 value. */
416static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v)
417{
418 uint8_t *p = ctx->p;
419 for (; (uint32_t)(v+0x40) >= 0x80; v >>= 7)
420 *p++ = (uint8_t)((v & 0x7f) | 0x80);
421 *p++ = (uint8_t)(v & 0x7f);
422 ctx->p = p;
423}
424
425/* Shortcuts to generate DWARF structures. */
426#define DB(x) (*p++ = (x))
427#define DI8(x) (*(int8_t *)p = (x), p++)
428#define DU16(x) (*(uint16_t *)p = (x), p += 2)
429#define DU32(x) (*(uint32_t *)p = (x), p += 4)
430#define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t))
431#define DUV(x) (ctx->p = p, gdbjit_uleb128(ctx, (x)), p = ctx->p)
432#define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p)
433#define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p)
434#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop
435#define DSECT(name, stmt) \
436 { uint32_t *szp_##name = (uint32_t *)p; p += 4; stmt \
437 *szp_##name = (uint32_t)((p-(uint8_t *)szp_##name)-4); } \
438
439/* Initialize ELF section headers. */
440static void LJ_FASTCALL gdbjit_secthdr(GDBJITctx *ctx)
441{
442 ELFsectheader *sect;
443
444 *ctx->p++ = '\0'; /* Empty string at start of string table. */
445
446#define SECTDEF(id, tp, al) \
447 sect = &ctx->obj.sect[GDBJIT_SECT_##id]; \
448 sect->name = gdbjit_strz(ctx, "." #id); \
449 sect->type = ELFSECT_TYPE_##tp; \
450 sect->align = (al)
451
452 SECTDEF(text, NOBITS, 16);
453 sect->flags = ELFSECT_FLAGS_ALLOC|ELFSECT_FLAGS_EXEC;
454 sect->addr = ctx->mcaddr;
455 sect->ofs = 0;
456 sect->size = ctx->szmcode;
457
458 SECTDEF(eh_frame, PROGBITS, sizeof(uintptr_t));
459 sect->flags = ELFSECT_FLAGS_ALLOC;
460
461 SECTDEF(shstrtab, STRTAB, 1);
462 SECTDEF(strtab, STRTAB, 1);
463
464 SECTDEF(symtab, SYMTAB, sizeof(uintptr_t));
465 sect->ofs = offsetof(GDBJITobj, sym);
466 sect->size = sizeof(ctx->obj.sym);
467 sect->link = GDBJIT_SECT_strtab;
468 sect->entsize = sizeof(ELFsymbol);
469 sect->info = GDBJIT_SYM_FUNC;
470
471 SECTDEF(debug_info, PROGBITS, 1);
472 SECTDEF(debug_abbrev, PROGBITS, 1);
473 SECTDEF(debug_line, PROGBITS, 1);
474
475#undef SECTDEF
476}
477
478/* Initialize symbol table. */
479static void LJ_FASTCALL gdbjit_symtab(GDBJITctx *ctx)
480{
481 ELFsymbol *sym;
482
483 *ctx->p++ = '\0'; /* Empty string at start of string table. */
484
485 sym = &ctx->obj.sym[GDBJIT_SYM_FILE];
486 sym->name = gdbjit_strz(ctx, "JIT mcode");
487 sym->sectidx = ELFSECT_IDX_ABS;
488 sym->info = ELFSYM_TYPE_FILE|ELFSYM_BIND_LOCAL;
489
490 sym = &ctx->obj.sym[GDBJIT_SYM_FUNC];
491 sym->name = gdbjit_strz(ctx, ctx->trname);
492 sym->sectidx = GDBJIT_SECT_text;
493 sym->value = 0;
494 sym->size = ctx->szmcode;
495 sym->info = ELFSYM_TYPE_FUNC|ELFSYM_BIND_GLOBAL;
496}
497
498/* Initialize .eh_frame section. */
499static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx)
500{
501 uint8_t *p = ctx->p;
502 uint8_t *framep = p;
503
504 /* Emit DWARF EH CIE. */
505 DSECT(CIE,
506 DU32(0); /* Offset to CIE itself. */
507 DB(DW_CIE_VERSION);
508 DSTR("zR"); /* Augmentation. */
509 DUV(1); /* Code alignment factor. */
510 DSV(-(int32_t)sizeof(uintptr_t)); /* Data alignment factor. */
511 DB(DW_REG_RA); /* Return address register. */
512 DB(1); DB(DW_EH_PE_textrel|DW_EH_PE_udata4); /* Augmentation data. */
513 DB(DW_CFA_def_cfa); DUV(DW_REG_SP); DUV(sizeof(uintptr_t));
514 DB(DW_CFA_offset|DW_REG_RA); DUV(1);
515 DALIGNNOP(sizeof(uintptr_t));
516 )
517
518 /* Emit DWARF EH FDE. */
519 DSECT(FDE,
520 DU32((uint32_t)(p-framep)); /* Offset to CIE. */
521 DU32(0); /* Machine code offset relative to .text. */
522 DU32(ctx->szmcode); /* Machine code length. */
523 DB(0); /* Augmentation data. */
524 /* Registers saved in CFRAME. */
525#if LJ_TARGET_X86
526 DB(DW_CFA_offset|DW_REG_BP); DUV(2);
527 DB(DW_CFA_offset|DW_REG_DI); DUV(3);
528 DB(DW_CFA_offset|DW_REG_SI); DUV(4);
529 DB(DW_CFA_offset|DW_REG_BX); DUV(5);
530#elif LJ_TARGET_X64
531 /* Add saved registers for x64 CFRAME. */
532#else
533#error "Unsupported target architecture"
534#endif
535 if (ctx->spadjp != ctx->spadj) { /* Parent/interpreter stack frame size. */
536 DB(DW_CFA_def_cfa_offset); DUV(ctx->spadjp);
537 DB(DW_CFA_advance_loc|1); /* Only an approximation. */
538 }
539 DB(DW_CFA_def_cfa_offset); DUV(ctx->spadj); /* Trace stack frame size. */
540 DALIGNNOP(sizeof(uintptr_t));
541 )
542
543 ctx->p = p;
544}
545
546/* Initialize .debug_info section. */
547static void LJ_FASTCALL gdbjit_debuginfo(GDBJITctx *ctx)
548{
549 uint8_t *p = ctx->p;
550
551 DSECT(info,
552 DU16(2); /* DWARF version. */
553 DU32(0); /* Abbrev offset. */
554 DB(sizeof(uintptr_t)); /* Pointer size. */
555
556 DUV(1); /* Abbrev #1: DW_TAG_compile_unit. */
557 DSTR(ctx->filename); /* DW_AT_name. */
558 DADDR(ctx->mcaddr); /* DW_AT_low_pc. */
559 DADDR(ctx->mcaddr + ctx->szmcode); /* DW_AT_high_pc. */
560 DU32(0); /* DW_AT_stmt_list. */
561 )
562
563 ctx->p = p;
564}
565
566/* Initialize .debug_abbrev section. */
567static void LJ_FASTCALL gdbjit_debugabbrev(GDBJITctx *ctx)
568{
569 uint8_t *p = ctx->p;
570
571 /* Abbrev #1: DW_TAG_compile_unit. */
572 DUV(1); DUV(DW_TAG_compile_unit);
573 DB(DW_children_no);
574 DUV(DW_AT_name); DUV(DW_FORM_string);
575 DUV(DW_AT_low_pc); DUV(DW_FORM_addr);
576 DUV(DW_AT_high_pc); DUV(DW_FORM_addr);
577 DUV(DW_AT_stmt_list); DUV(DW_FORM_data4);
578 DB(0); DB(0);
579
580 ctx->p = p;
581}
582
583#define DLNE(op, s) (DB(DW_LNS_extended_op), DUV(1+(s)), DB((op)))
584
585/* Initialize .debug_line section. */
586static void LJ_FASTCALL gdbjit_debugline(GDBJITctx *ctx)
587{
588 uint8_t *p = ctx->p;
589
590 DSECT(line,
591 DU16(2); /* DWARF version. */
592 DSECT(header,
593 DB(1); /* Minimum instruction length. */
594 DB(1); /* is_stmt. */
595 DI8(0); /* Line base for special opcodes. */
596 DB(2); /* Line range for special opcodes. */
597 DB(3+1); /* Opcode base at DW_LNS_advance_line+1. */
598 DB(0); DB(1); DB(1); /* Standard opcode lengths. */
599 /* Directory table. */
600 DB(0);
601 /* File name table. */
602 DSTR(ctx->filename); DUV(0); DUV(0); DUV(0);
603 DB(0);
604 )
605
606 DLNE(DW_LNE_set_address, sizeof(uintptr_t)); DADDR(ctx->mcaddr);
607 if (ctx->lineno) {
608 DB(DW_LNS_advance_line); DSV(ctx->lineno-1);
609 }
610 DB(DW_LNS_copy);
611 DB(DW_LNS_advance_pc); DUV(ctx->szmcode);
612 DLNE(DW_LNE_end_sequence, 0);
613 )
614
615 ctx->p = p;
616}
617
618#undef DLNE
619
620/* Undef shortcuts. */
621#undef DB
622#undef DI8
623#undef DU16
624#undef DU32
625#undef DADDR
626#undef DUV
627#undef DSV
628#undef DSTR
629#undef DALIGNNOP
630#undef DSECT
631
632/* Type of a section initializer callback. */
633typedef void (LJ_FASTCALL *GDBJITinitf)(GDBJITctx *ctx);
634
635/* Call section initializer and set the section offset and size. */
636static void gdbjit_initsect(GDBJITctx *ctx, int sect, GDBJITinitf initf)
637{
638 ctx->startp = ctx->p;
639 ctx->obj.sect[sect].ofs = (uintptr_t)((char *)ctx->p - (char *)&ctx->obj);
640 initf(ctx);
641 ctx->obj.sect[sect].size = (uintptr_t)(ctx->p - ctx->startp);
642}
643
644#define SECTALIGN(p, a) \
645 ((p) = (uint8_t *)(((uintptr_t)(p) + ((a)-1)) & ~(uintptr_t)((a)-1)))
646
647/* Build in-memory ELF object. */
648static void gdbjit_buildobj(GDBJITctx *ctx)
649{
650 GDBJITobj *obj = &ctx->obj;
651 /* Fill in ELF header and clear structures. */
652 memcpy(&obj->hdr, &elfhdr_template, sizeof(ELFheader));
653 memset(&obj->sect, 0, sizeof(ELFsectheader)*GDBJIT_SECT__MAX);
654 memset(&obj->sym, 0, sizeof(ELFsymbol)*GDBJIT_SYM__MAX);
655 /* Initialize sections. */
656 ctx->p = obj->space;
657 gdbjit_initsect(ctx, GDBJIT_SECT_shstrtab, gdbjit_secthdr);
658 gdbjit_initsect(ctx, GDBJIT_SECT_strtab, gdbjit_symtab);
659 gdbjit_initsect(ctx, GDBJIT_SECT_debug_info, gdbjit_debuginfo);
660 gdbjit_initsect(ctx, GDBJIT_SECT_debug_abbrev, gdbjit_debugabbrev);
661 gdbjit_initsect(ctx, GDBJIT_SECT_debug_line, gdbjit_debugline);
662 SECTALIGN(ctx->p, sizeof(uintptr_t));
663 gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame, gdbjit_ehframe);
664 ctx->objsize = (size_t)((char *)ctx->p - (char *)obj);
665 lua_assert(ctx->objsize < sizeof(GDBJITobj));
666}
667
668#undef SECTALIGN
669
670/* -- Interface to GDB JIT API -------------------------------------------- */
671
672/* Add new entry to GDB JIT symbol chain. */
673static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
674{
675 /* Allocate memory for GDB JIT entry and ELF object. */
676 MSize sz = (MSize)(sizeof(GDBJITentryobj) - sizeof(GDBJITobj) + ctx->objsize);
677 GDBJITentryobj *eo = lj_mem_newt(L, sz, GDBJITentryobj);
678 memcpy(&eo->obj, &ctx->obj, ctx->objsize); /* Copy ELF object. */
679 eo->sz = sz;
680 ctx->T->gdbjit_entry = (void *)eo;
681 /* Link new entry to chain and register it. */
682 eo->entry.prev_entry = NULL;
683 eo->entry.next_entry = __jit_debug_descriptor.first_entry;
684 if (eo->entry.next_entry)
685 eo->entry.next_entry->prev_entry = &eo->entry;
686 eo->entry.symfile_addr = (const char *)&eo->obj;
687 eo->entry.symfile_size = ctx->objsize;
688 __jit_debug_descriptor.first_entry = &eo->entry;
689 __jit_debug_descriptor.relevant_entry = &eo->entry;
690 __jit_debug_descriptor.action_flag = GDBJIT_REGISTER;
691 __jit_debug_register_code();
692}
693
694/* Add debug info for newly compiled trace and notify GDB. */
695void lj_gdbjit_addtrace(jit_State *J, Trace *T, TraceNo traceno)
696{
697 GDBJITctx ctx;
698 lua_State *L = J->L;
699 GCproto *pt = &gcref(T->startpt)->pt;
700 TraceNo parent = T->ir[REF_BASE].op1;
701 uintptr_t pcofs = (uintptr_t)(T->snap[0].mapofs+T->snap[0].nslots);
702 const BCIns *startpc = (const BCIns *)(uintptr_t)T->snapmap[pcofs];
703 ctx.T = T;
704 ctx.mcaddr = (uintptr_t)T->mcode;
705 ctx.szmcode = T->szmcode;
706 ctx.spadjp = CFRAME_SIZE + (MSize)(parent ? J->trace[parent]->spadjust : 0);
707 ctx.spadj = CFRAME_SIZE + T->spadjust;
708 ctx.lineno = pt->lineinfo ? pt->lineinfo[startpc - pt->bc] : 0;
709 ctx.filename = strdata(pt->chunkname);
710 if (*ctx.filename == '@' || *ctx.filename == '=')
711 ctx.filename++;
712 else
713 ctx.filename = "(string)";
714 ctx.trname = lj_str_pushf(L, "TRACE_%d", traceno);
715 L->top--;
716 gdbjit_buildobj(&ctx);
717 gdbjit_newentry(L, &ctx);
718}
719
720/* Delete debug info for trace and notify GDB. */
721void lj_gdbjit_deltrace(jit_State *J, Trace *T)
722{
723 GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry;
724 if (eo) {
725 if (eo->entry.prev_entry)
726 eo->entry.prev_entry->next_entry = eo->entry.next_entry;
727 else
728 __jit_debug_descriptor.first_entry = eo->entry.next_entry;
729 if (eo->entry.next_entry)
730 eo->entry.next_entry->prev_entry = eo->entry.prev_entry;
731 __jit_debug_descriptor.relevant_entry = &eo->entry;
732 __jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER;
733 __jit_debug_register_code();
734 lj_mem_free(J2G(J), eo, eo->sz);
735 }
736}
737
738#endif
739#endif
diff --git a/src/lj_gdbjit.h b/src/lj_gdbjit.h
new file mode 100644
index 00000000..2221948f
--- /dev/null
+++ b/src/lj_gdbjit.h
@@ -0,0 +1,22 @@
1/*
2** Client for the GDB JIT API.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_GDBJIT_H
7#define _LJ_GDBJIT_H
8
9#include "lj_obj.h"
10#include "lj_jit.h"
11
12#if LJ_HASJIT && defined(LUAJIT_USE_GDBJIT)
13
14LJ_FUNC void lj_gdbjit_addtrace(jit_State *J, Trace *T, TraceNo traceno);
15LJ_FUNC void lj_gdbjit_deltrace(jit_State *J, Trace *T);
16
17#else
18#define lj_gdbjit_addtrace(J, T, tn) UNUSED(T)
19#define lj_gdbjit_deltrace(J, T) UNUSED(T)
20#endif
21
22#endif
diff --git a/src/lj_ir.c b/src/lj_ir.c
new file mode 100644
index 00000000..2ff54821
--- /dev/null
+++ b/src/lj_ir.c
@@ -0,0 +1,461 @@
1/*
2** SSA IR (Intermediate Representation) emitter.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_ir_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASJIT
12
13#include "lj_gc.h"
14#include "lj_str.h"
15#include "lj_ir.h"
16#include "lj_jit.h"
17#include "lj_iropt.h"
18#include "lj_trace.h"
19
20/* Some local macros to save typing. Undef'd at the end. */
21#define IR(ref) (&J->cur.ir[(ref)])
22#define fins (&J->fold.ins)
23
24/* Pass IR on to next optimization in chain (FOLD). */
25#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
26
27/* -- IR tables ----------------------------------------------------------- */
28
29/* IR instruction modes. */
30LJ_DATADEF const uint8_t lj_ir_mode[IR__MAX+1] = {
31IRDEF(IRMODE)
32 0
33};
34
35/* -- IR emitter ---------------------------------------------------------- */
36
37/* Grow IR buffer at the top. */
38void LJ_FASTCALL lj_ir_growtop(jit_State *J)
39{
40 IRIns *baseir = J->irbuf + J->irbotlim;
41 MSize szins = J->irtoplim - J->irbotlim;
42 if (szins) {
43 baseir = (IRIns *)lj_mem_realloc(J->L, baseir, szins*sizeof(IRIns),
44 2*szins*sizeof(IRIns));
45 J->irtoplim = J->irbotlim + 2*szins;
46 } else {
47 baseir = (IRIns *)lj_mem_realloc(J->L, NULL, 0, LJ_MIN_IRSZ*sizeof(IRIns));
48 J->irbotlim = REF_BASE - LJ_MIN_IRSZ/4;
49 J->irtoplim = J->irbotlim + LJ_MIN_IRSZ;
50 }
51 J->cur.ir = J->irbuf = baseir - J->irbotlim;
52}
53
54/* Grow IR buffer at the bottom or shift it up. */
55static void lj_ir_growbot(jit_State *J)
56{
57 IRIns *baseir = J->irbuf + J->irbotlim;
58 MSize szins = J->irtoplim - J->irbotlim;
59 lua_assert(szins != 0);
60 lua_assert(J->cur.nk == J->irbotlim);
61 if (J->cur.nins + (szins >> 1) < J->irtoplim) {
62 /* More than half of the buffer is free on top: shift up by a quarter. */
63 MSize ofs = szins >> 2;
64 memmove(baseir + ofs, baseir, (J->cur.nins - J->irbotlim)*sizeof(IRIns));
65 J->irbotlim -= ofs;
66 J->irtoplim -= ofs;
67 J->cur.ir = J->irbuf = baseir - J->irbotlim;
68 } else {
69 /* Double the buffer size, but split the growth amongst top/bottom. */
70 IRIns *newbase = lj_mem_newt(J->L, 2*szins*sizeof(IRIns), IRIns);
71 MSize ofs = szins >= 256 ? 128 : (szins >> 1); /* Limit bottom growth. */
72 memcpy(newbase + ofs, baseir, (J->cur.nins - J->irbotlim)*sizeof(IRIns));
73 lj_mem_free(G(J->L), baseir, szins*sizeof(IRIns));
74 J->irbotlim -= ofs;
75 J->irtoplim = J->irbotlim + 2*szins;
76 J->cur.ir = J->irbuf = newbase - J->irbotlim;
77 }
78}
79
80/* Emit IR without any optimizations. */
81TRef LJ_FASTCALL lj_ir_emit(jit_State *J)
82{
83 IRRef ref = lj_ir_nextins(J);
84 IRIns *ir = IR(ref);
85 IROp op = fins->o;
86 ir->prev = J->chain[op];
87 J->chain[op] = (IRRef1)ref;
88 ir->o = op;
89 ir->op1 = fins->op1;
90 ir->op2 = fins->op2;
91 J->guardemit.irt |= fins->t.irt;
92 return TREF(ref, irt_t((ir->t = fins->t)));
93}
94
95/* -- Interning of constants ---------------------------------------------- */
96
97/*
98** IR instructions for constants are kept between J->cur.nk >= ref < REF_BIAS.
99** They are chained like all other instructions, but grow downwards.
100** The are interned (like strings in the VM) to facilitate reference
101** comparisons. The same constant must get the same reference.
102*/
103
104/* Get ref of next IR constant and optionally grow IR.
105** Note: this may invalidate all IRIns *!
106*/
107static LJ_AINLINE IRRef ir_nextk(jit_State *J)
108{
109 IRRef ref = J->cur.nk;
110 if (LJ_UNLIKELY(ref <= J->irbotlim)) lj_ir_growbot(J);
111 J->cur.nk = --ref;
112 return ref;
113}
114
115/* Intern int32_t constant. */
116TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k)
117{
118 IRIns *ir, *cir = J->cur.ir;
119 IRRef ref;
120 for (ref = J->chain[IR_KINT]; ref; ref = cir[ref].prev)
121 if (cir[ref].i == k)
122 goto found;
123 ref = ir_nextk(J);
124 ir = IR(ref);
125 ir->i = k;
126 ir->t.irt = IRT_INT;
127 ir->o = IR_KINT;
128 ir->prev = J->chain[IR_KINT];
129 J->chain[IR_KINT] = (IRRef1)ref;
130found:
131 return TREF(ref, IRT_INT);
132}
133
134/* The MRef inside the KNUM IR instruction holds the address of the constant
135** (an aligned double or a special 64 bit pattern). The KNUM constants
136** themselves are stored in a chained array and shared across traces.
137**
138** Rationale for choosing this data structure:
139** - The address of the constants is embedded in the generated machine code
140** and must never move. A resizable array or hash table wouldn't work.
141** - Most apps need very few non-integer constants (less than a dozen).
142** - Linear search is hard to beat in terms of speed and low complexity.
143*/
144typedef struct KNumArray {
145 MRef next; /* Pointer to next list. */
146 MSize numk; /* Number of used elements in this array. */
147 TValue k[LJ_MIN_KNUMSZ]; /* Array of constants. */
148} KNumArray;
149
150/* Free all chained arrays. */
151void lj_ir_knum_freeall(jit_State *J)
152{
153 KNumArray *kn;
154 for (kn = mref(J->knum, KNumArray); kn; ) {
155 KNumArray *next = mref(kn->next, KNumArray);
156 lj_mem_free(J2G(J), kn, sizeof(KNumArray));
157 kn = next;
158 }
159}
160
161/* Find KNUM constant in chained array or add it. */
162static cTValue *ir_knum_find(jit_State *J, uint64_t nn)
163{
164 KNumArray *kn, *knp = NULL;
165 TValue *ntv;
166 MSize idx;
167 /* Search for the constant in the whole chain of arrays. */
168 for (kn = mref(J->knum, KNumArray); kn; kn = mref(kn->next, KNumArray)) {
169 knp = kn; /* Remember previous element in list. */
170 for (idx = 0; idx < kn->numk; idx++) { /* Search one array. */
171 TValue *tv = &kn->k[idx];
172 if (tv->u64 == nn) /* Needed for +-0/NaN/absmask. */
173 return tv;
174 }
175 }
176 /* Constant was not found, need to add it. */
177 if (!(knp && knp->numk < LJ_MIN_KNUMSZ)) { /* Allocate a new array. */
178 KNumArray *nkn = lj_mem_newt(J->L, sizeof(KNumArray), KNumArray);
179 setmref(nkn->next, NULL);
180 nkn->numk = 0;
181 if (knp)
182 setmref(knp->next, nkn); /* Chain to the end of the list. */
183 else
184 setmref(J->knum, nkn); /* Link first array. */
185 knp = nkn;
186 }
187 ntv = &knp->k[knp->numk++]; /* Add to current array. */
188 ntv->u64 = nn;
189 return ntv;
190}
191
192/* Intern FP constant, given by its address. */
193TRef lj_ir_knum_addr(jit_State *J, cTValue *tv)
194{
195 IRIns *ir, *cir = J->cur.ir;
196 IRRef ref;
197 for (ref = J->chain[IR_KNUM]; ref; ref = cir[ref].prev)
198 if (ir_knum(&cir[ref]) == tv)
199 goto found;
200 ref = ir_nextk(J);
201 ir = IR(ref);
202 setmref(ir->ptr, tv);
203 ir->t.irt = IRT_NUM;
204 ir->o = IR_KNUM;
205 ir->prev = J->chain[IR_KNUM];
206 J->chain[IR_KNUM] = (IRRef1)ref;
207found:
208 return TREF(ref, IRT_NUM);
209}
210
211/* Intern FP constant, given by its 64 bit pattern. */
212TRef lj_ir_knum_nn(jit_State *J, uint64_t nn)
213{
214 return lj_ir_knum_addr(J, ir_knum_find(J, nn));
215}
216
217/* Special 16 byte aligned SIMD constants. */
218LJ_DATADEF LJ_ALIGN(16) cTValue lj_ir_knum_tv[4] = {
219 { U64x(7fffffff,ffffffff) }, { U64x(7fffffff,ffffffff) },
220 { U64x(80000000,00000000) }, { U64x(80000000,00000000) }
221};
222
223/* Check whether a number is int and return it. -0 is NOT considered an int. */
224static int numistrueint(lua_Number n, int32_t *kp)
225{
226 int32_t k = lj_num2int(n);
227 if (n == cast_num(k)) {
228 if (kp) *kp = k;
229 if (k == 0) { /* Special check for -0. */
230 TValue tv;
231 setnumV(&tv, n);
232 if (tv.u32.hi != 0)
233 return 0;
234 }
235 return 1;
236 }
237 return 0;
238}
239
240/* Intern number as int32_t constant if possible, otherwise as FP constant. */
241TRef lj_ir_knumint(jit_State *J, lua_Number n)
242{
243 int32_t k;
244 if (numistrueint(n, &k))
245 return lj_ir_kint(J, k);
246 else
247 return lj_ir_knum(J, n);
248}
249
250/* Intern GC object "constant". */
251TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t)
252{
253 IRIns *ir, *cir = J->cur.ir;
254 IRRef ref;
255 for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev)
256 if (ir_kgc(&cir[ref]) == o)
257 goto found;
258 ref = ir_nextk(J);
259 ir = IR(ref);
260 /* NOBARRIER: Current trace is a GC root. */
261 setgcref(ir->gcr, o);
262 ir->t.irt = (uint8_t)t;
263 ir->o = IR_KGC;
264 ir->prev = J->chain[IR_KGC];
265 J->chain[IR_KGC] = (IRRef1)ref;
266found:
267 return TREF(ref, t);
268}
269
270/* Intern 32 bit pointer constant. */
271TRef lj_ir_kptr(jit_State *J, void *ptr)
272{
273 IRIns *ir, *cir = J->cur.ir;
274 IRRef ref;
275 lua_assert((void *)(intptr_t)i32ptr(ptr) == ptr);
276 for (ref = J->chain[IR_KPTR]; ref; ref = cir[ref].prev)
277 if (mref(cir[ref].ptr, void) == ptr)
278 goto found;
279 ref = ir_nextk(J);
280 ir = IR(ref);
281 setmref(ir->ptr, ptr);
282 ir->t.irt = IRT_PTR;
283 ir->o = IR_KPTR;
284 ir->prev = J->chain[IR_KPTR];
285 J->chain[IR_KPTR] = (IRRef1)ref;
286found:
287 return TREF(ref, IRT_PTR);
288}
289
290/* Intern typed NULL constant. */
291TRef lj_ir_knull(jit_State *J, IRType t)
292{
293 IRIns *ir, *cir = J->cur.ir;
294 IRRef ref;
295 for (ref = J->chain[IR_KNULL]; ref; ref = cir[ref].prev)
296 if (irt_t(cir[ref].t) == t)
297 goto found;
298 ref = ir_nextk(J);
299 ir = IR(ref);
300 ir->i = 0;
301 ir->t.irt = (uint8_t)t;
302 ir->o = IR_KNULL;
303 ir->prev = J->chain[IR_KNULL];
304 J->chain[IR_KNULL] = (IRRef1)ref;
305found:
306 return TREF(ref, t);
307}
308
309/* Intern key slot. */
310TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot)
311{
312 IRIns *ir, *cir = J->cur.ir;
313 IRRef2 op12 = IRREF2((IRRef1)key, (IRRef1)slot);
314 IRRef ref;
315 /* Const part is not touched by CSE/DCE, so 0-65535 is ok for IRMlit here. */
316 lua_assert(tref_isk(key) && slot == (IRRef)(IRRef1)slot);
317 for (ref = J->chain[IR_KSLOT]; ref; ref = cir[ref].prev)
318 if (cir[ref].op12 == op12)
319 goto found;
320 ref = ir_nextk(J);
321 ir = IR(ref);
322 ir->op12 = op12;
323 ir->t.irt = IRT_PTR;
324 ir->o = IR_KSLOT;
325 ir->prev = J->chain[IR_KSLOT];
326 J->chain[IR_KSLOT] = (IRRef1)ref;
327found:
328 return TREF(ref, IRT_PTR);
329}
330
331/* -- Access to IR constants ---------------------------------------------- */
332
333/* Copy value of IR constant. */
334void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir)
335{
336 UNUSED(L);
337 lua_assert(ir->o != IR_KSLOT); /* Common mistake. */
338 if (irt_isint(ir->t)) {
339 lua_assert(ir->o == IR_KINT);
340 setintV(tv, ir->i);
341 } else if (irt_isnum(ir->t)) {
342 lua_assert(ir->o == IR_KNUM);
343 setnumV(tv, ir_knum(ir)->n);
344 } else if (irt_ispri(ir->t)) {
345 lua_assert(ir->o == IR_KPRI);
346 setitype(tv, irt_toitype(ir->t));
347 } else {
348 if (ir->o == IR_KGC) {
349 lua_assert(irt_isgcv(ir->t));
350 setgcV(L, tv, &ir_kgc(ir)->gch, irt_toitype(ir->t));
351 } else {
352 lua_assert(ir->o == IR_KPTR || ir->o == IR_KNULL);
353 setlightudV(tv, mref(ir->ptr, void));
354 }
355 }
356}
357
358/* -- Convert IR operand types -------------------------------------------- */
359
360/* Convert from integer or string to number. */
361TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr)
362{
363 if (!tref_isnum(tr)) {
364 if (tref_isinteger(tr))
365 tr = emitir(IRTN(IR_TONUM), tr, 0);
366 else if (tref_isstr(tr))
367 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
368 else
369 lj_trace_err(J, LJ_TRERR_BADTYPE);
370 }
371 return tr;
372}
373
374/* Convert from integer or number to string. */
375TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr)
376{
377 if (!tref_isstr(tr)) {
378 if (!tref_isnumber(tr))
379 lj_trace_err(J, LJ_TRERR_BADTYPE);
380 tr = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0);
381 }
382 return tr;
383}
384
385/* Convert from number or string to bitop operand (overflow wrapped). */
386TRef LJ_FASTCALL lj_ir_tobit(jit_State *J, TRef tr)
387{
388 if (!tref_isinteger(tr)) {
389 if (tref_isstr(tr))
390 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
391 else if (!tref_isnum(tr))
392 lj_trace_err(J, LJ_TRERR_BADTYPE);
393 tr = emitir(IRTI(IR_TOBIT), tr, lj_ir_knum_tobit(J));
394 }
395 return tr;
396}
397
398/* Convert from number or string to integer (overflow undefined). */
399TRef LJ_FASTCALL lj_ir_toint(jit_State *J, TRef tr)
400{
401 if (!tref_isinteger(tr)) {
402 if (tref_isstr(tr))
403 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
404 else if (!tref_isnum(tr))
405 lj_trace_err(J, LJ_TRERR_BADTYPE);
406 tr = emitir(IRTI(IR_TOINT), tr, IRTOINT_ANY);
407 }
408 return tr;
409}
410
411/* -- Miscellaneous IR ops ------------------------------------------------ */
412
413/* Evaluate numeric comparison. */
414int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op)
415{
416 switch (op) {
417 case IR_EQ: return (a == b);
418 case IR_NE: return (a != b);
419 case IR_LT: return (a < b);
420 case IR_GE: return (a >= b);
421 case IR_LE: return (a <= b);
422 case IR_GT: return (a > b);
423 case IR_ULT: return !(a >= b);
424 case IR_UGE: return !(a < b);
425 case IR_ULE: return !(a > b);
426 case IR_UGT: return !(a <= b);
427 default: lua_assert(0); return 0;
428 }
429}
430
431/* Evaluate string comparison. */
432int lj_ir_strcmp(GCstr *a, GCstr *b, IROp op)
433{
434 int res = lj_str_cmp(a, b);
435 switch (op) {
436 case IR_LT: return (res < 0);
437 case IR_GE: return (res >= 0);
438 case IR_LE: return (res <= 0);
439 case IR_GT: return (res > 0);
440 default: lua_assert(0); return 0;
441 }
442}
443
444/* Rollback IR to previous state. */
445void lj_ir_rollback(jit_State *J, IRRef ref)
446{
447 IRRef nins = J->cur.nins;
448 while (nins > ref) {
449 IRIns *ir;
450 nins--;
451 ir = IR(nins);
452 J->chain[ir->o] = ir->prev;
453 }
454 J->cur.nins = nins;
455}
456
457#undef IR
458#undef fins
459#undef emitir
460
461#endif
diff --git a/src/lj_ir.h b/src/lj_ir.h
new file mode 100644
index 00000000..a6973a81
--- /dev/null
+++ b/src/lj_ir.h
@@ -0,0 +1,429 @@
1/*
2** SSA IR (Intermediate Representation) format.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_IR_H
7#define _LJ_IR_H
8
9#include "lj_obj.h"
10
11/* IR instruction definition. Order matters, see below. */
12#define IRDEF(_) \
13 /* Miscellaneous ops. */ \
14 _(NOP, N , ___, ___) \
15 _(BASE, N , lit, lit) \
16 _(LOOP, G , ___, ___) \
17 _(PHI, S , ref, ref) \
18 _(RENAME, S , ref, lit) \
19 \
20 /* Constants. */ \
21 _(KPRI, N , ___, ___) \
22 _(KINT, N , cst, ___) \
23 _(KGC, N , cst, ___) \
24 _(KPTR, N , cst, ___) \
25 _(KNULL, N , cst, ___) \
26 _(KNUM, N , cst, ___) \
27 _(KSLOT, N , ref, lit) \
28 \
29 /* Guarded assertions. */ \
30 /* Must be properly aligned to flip opposites (^1) and (un)ordered (^4). */ \
31 _(EQ, GC, ref, ref) \
32 _(NE, GC, ref, ref) \
33 \
34 _(ABC, G , ref, ref) \
35 _(FRAME, G , ref, ref) \
36 \
37 _(LT, G , ref, ref) \
38 _(GE, G , ref, ref) \
39 _(LE, G , ref, ref) \
40 _(GT, G , ref, ref) \
41 \
42 _(ULT, G , ref, ref) \
43 _(UGE, G , ref, ref) \
44 _(ULE, G , ref, ref) \
45 _(UGT, G , ref, ref) \
46 \
47 /* Bit ops. */ \
48 _(BNOT, N , ref, ___) \
49 _(BSWAP, N , ref, ___) \
50 _(BAND, C , ref, ref) \
51 _(BOR, C , ref, ref) \
52 _(BXOR, C , ref, ref) \
53 _(BSHL, N , ref, ref) \
54 _(BSHR, N , ref, ref) \
55 _(BSAR, N , ref, ref) \
56 _(BROL, N , ref, ref) \
57 _(BROR, N , ref, ref) \
58 \
59 /* Arithmetic ops. ORDER ARITH (FPMATH/POWI take the space for MOD/POW). */ \
60 _(ADD, C , ref, ref) \
61 _(SUB, N , ref, ref) \
62 _(MUL, C , ref, ref) \
63 _(DIV, N , ref, ref) \
64 \
65 _(FPMATH, N , ref, lit) \
66 _(POWI, N , ref, ref) \
67 \
68 _(NEG, N , ref, ref) \
69 _(ABS, N , ref, ref) \
70 _(ATAN2, N , ref, ref) \
71 _(LDEXP, N , ref, ref) \
72 _(MIN, C , ref, ref) \
73 _(MAX, C , ref, ref) \
74 \
75 /* Overflow-checking arithmetic ops. */ \
76 _(ADDOV, GC, ref, ref) \
77 _(SUBOV, G , ref, ref) \
78 \
79 /* Memory ops. A = array, H = hash, U = upvalue, F = field, S = stack. */ \
80 \
81 /* Memory references. */ \
82 _(AREF, R , ref, ref) \
83 _(HREFK, RG, ref, ref) \
84 _(HREF, L , ref, ref) \
85 _(NEWREF, S , ref, ref) \
86 _(UREFO, LG, ref, lit) \
87 _(UREFC, LG, ref, lit) \
88 _(FREF, R , ref, lit) \
89 _(STRREF, N , ref, ref) \
90 \
91 /* Loads and Stores. These must be in the same order. */ \
92 _(ALOAD, LG, ref, ___) \
93 _(HLOAD, LG, ref, ___) \
94 _(ULOAD, LG, ref, ___) \
95 _(FLOAD, L , ref, lit) \
96 _(SLOAD, LG, lit, lit) \
97 _(XLOAD, L , ref, lit) \
98 \
99 _(ASTORE, S , ref, ref) \
100 _(HSTORE, S , ref, ref) \
101 _(USTORE, S , ref, ref) \
102 _(FSTORE, S , ref, ref) \
103 \
104 /* String ops. */ \
105 _(SNEW, N , ref, ref) \
106 \
107 /* Table ops. */ \
108 _(TNEW, A , lit, lit) \
109 _(TDUP, A , ref, ___) \
110 _(TLEN, L , ref, ___) \
111 _(TBAR, S , ref, ___) \
112 _(OBAR, S , ref, ref) \
113 \
114 /* Type conversions. */ \
115 _(TONUM, N , ref, ___) \
116 _(TOINT, N , ref, lit) \
117 _(TOBIT, N , ref, ref) \
118 _(TOSTR, N , ref, ___) \
119 _(STRTO, G , ref, ___) \
120 \
121 /* End of list. */
122
123/* IR opcodes (max. 256). */
124typedef enum {
125#define IRENUM(name, m, m1, m2) IR_##name,
126IRDEF(IRENUM)
127#undef IRENUM
128 IR__MAX
129} IROp;
130
131/* Stored opcode. */
132typedef uint8_t IROp1;
133
134LJ_STATIC_ASSERT(((int)IR_EQ^1) == (int)IR_NE);
135LJ_STATIC_ASSERT(((int)IR_LT^1) == (int)IR_GE);
136LJ_STATIC_ASSERT(((int)IR_LE^1) == (int)IR_GT);
137LJ_STATIC_ASSERT(((int)IR_LT^3) == (int)IR_GT);
138LJ_STATIC_ASSERT(((int)IR_LT^4) == (int)IR_ULT);
139
140/* Delta between xLOAD and xSTORE. */
141#define IRDELTA_L2S ((int)IR_ASTORE - (int)IR_ALOAD)
142
143LJ_STATIC_ASSERT((int)IR_HLOAD + IRDELTA_L2S == (int)IR_HSTORE);
144LJ_STATIC_ASSERT((int)IR_ULOAD + IRDELTA_L2S == (int)IR_USTORE);
145LJ_STATIC_ASSERT((int)IR_FLOAD + IRDELTA_L2S == (int)IR_FSTORE);
146
147/* FPMATH sub-functions. ORDER FPM. */
148#define IRFPMDEF(_) \
149 _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \
150 _(SQRT) _(EXP) _(EXP2) _(LOG) _(LOG2) _(LOG10) \
151 _(SIN) _(COS) _(TAN) \
152 _(OTHER)
153
154typedef enum {
155#define FPMENUM(name) IRFPM_##name,
156IRFPMDEF(FPMENUM)
157#undef FPMENUM
158 IRFPM__MAX
159} IRFPMathOp;
160
161/* FLOAD field IDs. */
162#define IRFLDEF(_) \
163 _(STR_LEN, GCstr, len) \
164 _(FUNC_ENV, GCfunc, l.env) \
165 _(TAB_META, GCtab, metatable) \
166 _(TAB_ARRAY, GCtab, array) \
167 _(TAB_NODE, GCtab, node) \
168 _(TAB_ASIZE, GCtab, asize) \
169 _(TAB_HMASK, GCtab, hmask) \
170 _(TAB_NOMM, GCtab, nomm) \
171 _(UDATA_META, GCudata, metatable)
172
173typedef enum {
174#define FLENUM(name, type, field) IRFL_##name,
175IRFLDEF(FLENUM)
176#undef FLENUM
177 IRFL__MAX
178} IRFieldID;
179
180/* SLOAD mode bits, stored in op2. */
181#define IRSLOAD_INHERIT 1 /* Inherited by exits/side traces. */
182#define IRSLOAD_READONLY 2 /* Read-only, omit slot store. */
183#define IRSLOAD_PARENT 4 /* Coalesce with parent trace. */
184
185/* XLOAD mode, stored in op2. */
186#define IRXLOAD_UNALIGNED 1
187
188/* TOINT mode, stored in op2. Ordered by strength of the checks. */
189#define IRTOINT_CHECK 0 /* Number checked for integerness. */
190#define IRTOINT_INDEX 1 /* Checked + special backprop rules. */
191#define IRTOINT_ANY 2 /* Any FP number is ok. */
192#define IRTOINT_TOBIT 3 /* Cache only: TOBIT conversion. */
193
194/* IR operand mode (2 bit). */
195typedef enum {
196 IRMref, /* IR reference. */
197 IRMlit, /* 16 bit unsigned literal. */
198 IRMcst, /* Constant literal: i, gcr or ptr. */
199 IRMnone /* Unused operand. */
200} IRMode;
201#define IRM___ IRMnone
202
203/* Mode bits: Commutative, {Normal/Ref, Alloc, Load, Store}, Guard. */
204#define IRM_C 0x10
205
206#define IRM_N 0x00
207#define IRM_R IRM_N
208#define IRM_A 0x20
209#define IRM_L 0x40
210#define IRM_S 0x60
211
212#define IRM_G 0x80
213
214#define IRM_GC (IRM_G|IRM_C)
215#define IRM_RG (IRM_R|IRM_G)
216#define IRM_LG (IRM_L|IRM_G)
217
218#define irm_op1(m) (cast(IRMode, (m)&3))
219#define irm_op2(m) (cast(IRMode, ((m)>>2)&3))
220#define irm_iscomm(m) ((m) & IRM_C)
221#define irm_kind(m) ((m) & IRM_S)
222#define irm_isguard(m) ((m) & IRM_G)
223/* Stores or any other op with a guard has a side-effect. */
224#define irm_sideeff(m) ((m) >= IRM_S)
225
226#define IRMODE(name, m, m1, m2) ((IRM##m1)|((IRM##m2)<<2)|(IRM_##m)),
227
228LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
229
230/* IR result type and flags (8 bit). */
231typedef enum {
232 /* Map of itypes to non-negative numbers. ORDER LJ_T */
233 IRT_NIL,
234 IRT_FALSE,
235 IRT_TRUE,
236 IRT_LIGHTUD,
237 /* GCobj types are from here ... */
238 IRT_STR,
239 IRT_PTR, /* IRT_PTR never escapes the IR (map of LJ_TUPVAL). */
240 IRT_THREAD,
241 IRT_PROTO,
242 IRT_FUNC,
243 IRT_9, /* LJ_TDEADKEY is never used in the IR. */
244 IRT_TAB,
245 IRT_UDATA,
246 /* ... until here. */
247 IRT_NUM,
248 /* The various integers are only used in the IR and can only escape to
249 ** a TValue after implicit or explicit conversion (TONUM). Their types
250 ** must be contiguous and next to IRT_NUM (see the typerange macros below).
251 */
252 IRT_INT,
253 IRT_I8,
254 IRT_U8,
255 IRT_I16,
256 IRT_U16,
257 /* There is room for 14 more types. */
258
259 /* Additional flags. */
260 IRT_MARK = 0x20, /* Marker for misc. purposes. */
261 IRT_GUARD = 0x40, /* Instruction is a guard. */
262 IRT_ISPHI = 0x80, /* Instruction is left or right PHI operand. */
263
264 /* Masks. */
265 IRT_TYPE = 0x1f,
266 IRT_T = 0xff
267} IRType;
268
269#define irtype_ispri(irt) ((uint32_t)(irt) <= IRT_TRUE)
270
271/* Stored IRType. */
272typedef struct IRType1 { uint8_t irt; } IRType1;
273
274#define IRT(o, t) ((uint32_t)(((o)<<8) | (t)))
275#define IRTI(o) (IRT((o), IRT_INT))
276#define IRTN(o) (IRT((o), IRT_NUM))
277#define IRTG(o, t) (IRT((o), IRT_GUARD|(t)))
278#define IRTGI(o) (IRT((o), IRT_GUARD|IRT_INT))
279
280#define irt_t(t) (cast(IRType, (t).irt))
281#define irt_type(t) (cast(IRType, (t).irt & IRT_TYPE))
282#define irt_sametype(t1, t2) ((((t1).irt ^ (t2).irt) & IRT_TYPE) == 0)
283#define irt_typerange(t, first, last) \
284 ((uint32_t)((t).irt & IRT_TYPE) - (uint32_t)(first) <= (uint32_t)(last-first))
285
286#define irt_isnil(t) (irt_type(t) == IRT_NIL)
287#define irt_ispri(t) ((uint32_t)irt_type(t) <= IRT_TRUE)
288#define irt_isstr(t) (irt_type(t) == IRT_STR)
289#define irt_isfunc(t) (irt_type(t) == IRT_FUNC)
290#define irt_istab(t) (irt_type(t) == IRT_TAB)
291#define irt_isnum(t) (irt_type(t) == IRT_NUM)
292#define irt_isint(t) (irt_type(t) == IRT_INT)
293#define irt_isi8(t) (irt_type(t) == IRT_I8)
294#define irt_isu8(t) (irt_type(t) == IRT_U8)
295#define irt_isi16(t) (irt_type(t) == IRT_I16)
296#define irt_isu16(t) (irt_type(t) == IRT_U16)
297
298#define irt_isinteger(t) (irt_typerange((t), IRT_INT, IRT_U16))
299#define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA))
300#define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA))
301
302#define itype2irt(tv) \
303 (~uitype(tv) < IRT_NUM ? cast(IRType, ~uitype(tv)) : IRT_NUM)
304#define irt_toitype(t) ((int32_t)~(uint32_t)irt_type(t))
305
306#define irt_isguard(t) ((t).irt & IRT_GUARD)
307#define irt_ismarked(t) ((t).irt & IRT_MARK)
308#define irt_setmark(t) ((t).irt |= IRT_MARK)
309#define irt_clearmark(t) ((t).irt &= ~IRT_MARK)
310#define irt_isphi(t) ((t).irt & IRT_ISPHI)
311#define irt_setphi(t) ((t).irt |= IRT_ISPHI)
312#define irt_clearphi(t) ((t).irt &= ~IRT_ISPHI)
313
314/* Stored combined IR opcode and type. */
315typedef uint16_t IROpT;
316
317/* IR references. */
318typedef uint16_t IRRef1; /* One stored reference. */
319typedef uint32_t IRRef2; /* Two stored references. */
320typedef uint32_t IRRef; /* Used to pass around references. */
321
322/* Fixed references. */
323enum {
324 REF_BIAS = 0x8000,
325 REF_TRUE = REF_BIAS-3,
326 REF_FALSE = REF_BIAS-2,
327 REF_NIL = REF_BIAS-1, /* \--- Constants grow downwards. */
328 REF_BASE = REF_BIAS, /* /--- IR grows upwards. */
329 REF_FIRST = REF_BIAS+1,
330 REF_DROP = 0xffff
331};
332
333/* Note: IRMlit operands must be < REF_BIAS, too!
334** This allows for fast and uniform manipulation of all operands
335** without looking up the operand mode in lj_ir_mode:
336** - CSE calculates the maximum reference of two operands.
337** This must work with mixed reference/literal operands, too.
338** - DCE marking only checks for operand >= REF_BIAS.
339** - LOOP needs to substitute reference operands.
340** Constant references and literals must not be modified.
341*/
342
343#define IRREF2(lo, hi) ((IRRef2)(lo) | ((IRRef2)(hi) << 16))
344
345#define irref_isk(ref) ((ref) < REF_BIAS)
346
347/* Tagged IR references. */
348typedef uint32_t TRef;
349
350#define TREF(ref, t) (cast(TRef, (ref) + ((t)<<16)))
351
352#define tref_ref(tr) (cast(IRRef1, (tr)))
353#define tref_t(tr) (cast(IRType, (tr)>>16))
354#define tref_type(tr) (cast(IRType, ((tr)>>16) & IRT_TYPE))
355#define tref_typerange(tr, first, last) \
356 ((((tr)>>16) & IRT_TYPE) - (TRef)(first) <= (TRef)(last-first))
357
358#define tref_istype(tr, t) (((tr) & (IRT_TYPE<<16)) == ((t)<<16))
359#define tref_isnil(tr) (tref_istype((tr), IRT_NIL))
360#define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE))
361#define tref_istrue(tr) (tref_istype((tr), IRT_TRUE))
362#define tref_isstr(tr) (tref_istype((tr), IRT_STR))
363#define tref_isfunc(tr) (tref_istype((tr), IRT_FUNC))
364#define tref_istab(tr) (tref_istype((tr), IRT_TAB))
365#define tref_isudata(tr) (tref_istype((tr), IRT_UDATA))
366#define tref_isnum(tr) (tref_istype((tr), IRT_NUM))
367#define tref_isint(tr) (tref_istype((tr), IRT_INT))
368
369#define tref_isbool(tr) (tref_typerange((tr), IRT_FALSE, IRT_TRUE))
370#define tref_ispri(tr) (tref_typerange((tr), IRT_NIL, IRT_TRUE))
371#define tref_istruecond(tr) (!tref_typerange((tr), IRT_NIL, IRT_FALSE))
372#define tref_isinteger(tr) (tref_typerange((tr), IRT_INT, IRT_U16))
373#define tref_isnumber(tr) (tref_typerange((tr), IRT_NUM, IRT_U16))
374#define tref_isnumber_str(tr) (tref_isnumber((tr)) || tref_isstr((tr)))
375#define tref_isgcv(tr) (tref_typerange((tr), IRT_STR, IRT_UDATA))
376
377#define tref_isk(tr) (irref_isk(tref_ref((tr))))
378#define tref_isk2(tr1, tr2) (irref_isk(tref_ref((tr1) | (tr2))))
379
380#define TREF_PRI(t) (TREF(REF_NIL-(t), (t)))
381#define TREF_NIL (TREF_PRI(IRT_NIL))
382#define TREF_FALSE (TREF_PRI(IRT_FALSE))
383#define TREF_TRUE (TREF_PRI(IRT_TRUE))
384
385/* IR instruction format (64 bit).
386**
387** 16 16 8 8 8 8
388** +-------+-------+---+---+---+---+
389** | op1 | op2 | t | o | r | s |
390** +-------+-------+---+---+---+---+
391** | op12/i/gco | ot | prev | (alternative fields in union)
392** +---------------+-------+-------+
393** 32 16 16
394**
395** prev is only valid prior to register allocation and then reused for r + s.
396*/
397
398typedef union IRIns {
399 struct {
400 LJ_ENDIAN_LOHI(
401 IRRef1 op1; /* IR operand 1. */
402 , IRRef1 op2; /* IR operand 2. */
403 )
404 IROpT ot; /* IR opcode and type (overlaps t and o). */
405 IRRef1 prev; /* Previous ins in same chain (overlaps r and s). */
406 };
407 struct {
408 IRRef2 op12; /* IR operand 1 and 2 (overlaps op1 and op2). */
409 LJ_ENDIAN_LOHI(
410 IRType1 t; /* IR type. */
411 , IROp1 o; /* IR opcode. */
412 )
413 LJ_ENDIAN_LOHI(
414 uint8_t r; /* Register allocation (overlaps prev). */
415 , uint8_t s; /* Spill slot allocation (overlaps prev). */
416 )
417 };
418 int32_t i; /* 32 bit signed integer literal (overlaps op12). */
419 GCRef gcr; /* GCobj constant (overlaps op12). */
420 MRef ptr; /* Pointer constant (overlaps op12). */
421} IRIns;
422
423#define ir_kgc(ir) (gcref((ir)->gcr))
424#define ir_kstr(ir) (gco2str(ir_kgc((ir))))
425#define ir_ktab(ir) (gco2tab(ir_kgc((ir))))
426#define ir_kfunc(ir) (gco2func(ir_kgc((ir))))
427#define ir_knum(ir) (mref((ir)->ptr, cTValue))
428
429#endif
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
new file mode 100644
index 00000000..69b0a955
--- /dev/null
+++ b/src/lj_iropt.h
@@ -0,0 +1,128 @@
1/*
2** Common header for IR emitter and optimizations.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_IROPT_H
7#define _LJ_IROPT_H
8
9#include "lj_obj.h"
10#include "lj_jit.h"
11
12#if LJ_HASJIT
13/* IR emitter. */
14LJ_FUNC void LJ_FASTCALL lj_ir_growtop(jit_State *J);
15LJ_FUNC TRef LJ_FASTCALL lj_ir_emit(jit_State *J);
16
17/* Save current IR in J->fold.ins, but do not emit it (yet). */
18static LJ_AINLINE void lj_ir_set_(jit_State *J, uint16_t ot, IRRef1 a, IRRef1 b)
19{
20 J->fold.ins.ot = ot; J->fold.ins.op1 = a; J->fold.ins.op2 = b;
21}
22
23#define lj_ir_set(J, ot, a, b) \
24 lj_ir_set_(J, (uint16_t)(ot), (IRRef1)(a), (IRRef1)(b))
25
26/* Get ref of next IR instruction and optionally grow IR.
27** Note: this may invalidate all IRIns*!
28*/
29static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J)
30{
31 IRRef ref = J->cur.nins;
32 if (LJ_UNLIKELY(ref >= J->irtoplim)) lj_ir_growtop(J);
33 J->cur.nins = ref + 1;
34 return ref;
35}
36
37/* Interning of constants. */
38LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k);
39LJ_FUNC void lj_ir_knum_freeall(jit_State *J);
40LJ_FUNC TRef lj_ir_knum_addr(jit_State *J, cTValue *tv);
41LJ_FUNC TRef lj_ir_knum_nn(jit_State *J, uint64_t nn);
42LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n);
43LJ_FUNC TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t);
44LJ_FUNC TRef lj_ir_kptr(jit_State *J, void *ptr);
45LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t);
46LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot);
47
48static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n)
49{
50 TValue tv;
51 tv.n = n;
52 return lj_ir_knum_nn(J, tv.u64);
53}
54
55#define lj_ir_kstr(J, str) lj_ir_kgc(J, obj2gco((str)), IRT_STR)
56#define lj_ir_ktab(J, tab) lj_ir_kgc(J, obj2gco((tab)), IRT_TAB)
57#define lj_ir_kfunc(J, func) lj_ir_kgc(J, obj2gco((func)), IRT_FUNC)
58
59/* Special FP constants. */
60#define lj_ir_knum_zero(J) lj_ir_knum_nn(J, U64x(00000000,00000000))
61#define lj_ir_knum_one(J) lj_ir_knum_nn(J, U64x(3ff00000,00000000))
62#define lj_ir_knum_tobit(J) lj_ir_knum_nn(J, U64x(43380000,00000000))
63
64/* Special 16 byte aligned SIMD constants. */
65LJ_DATA LJ_ALIGN(16) cTValue lj_ir_knum_tv[4];
66#define lj_ir_knum_abs(J) lj_ir_knum_addr(J, &lj_ir_knum_tv[0])
67#define lj_ir_knum_neg(J) lj_ir_knum_addr(J, &lj_ir_knum_tv[2])
68
69/* Access to constants. */
70LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir);
71
72/* Convert IR operand types. */
73LJ_FUNC TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr);
74LJ_FUNC TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr);
75LJ_FUNC TRef LJ_FASTCALL lj_ir_tobit(jit_State *J, TRef tr);
76LJ_FUNC TRef LJ_FASTCALL lj_ir_toint(jit_State *J, TRef tr);
77
78/* Miscellaneous IR ops. */
79LJ_FUNC int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op);
80LJ_FUNC int lj_ir_strcmp(GCstr *a, GCstr *b, IROp op);
81LJ_FUNC void lj_ir_rollback(jit_State *J, IRRef ref);
82
83/* Emit IR instructions with on-the-fly optimizations. */
84LJ_FUNC TRef LJ_FASTCALL lj_opt_fold(jit_State *J);
85LJ_FUNC TRef LJ_FASTCALL lj_opt_cse(jit_State *J);
86
87/* Special return values for the fold functions. */
88enum {
89 NEXTFOLD, /* Couldn't fold, pass on. */
90 RETRYFOLD, /* Retry fold with modified fins. */
91 KINTFOLD, /* Return ref for int constant in fins->i. */
92 FAILFOLD, /* Guard would always fail. */
93 DROPFOLD, /* Guard eliminated. */
94 MAX_FOLD
95};
96
97#define INTFOLD(k) ((J->fold.ins.i = (k)), (TRef)KINTFOLD)
98#define CONDFOLD(cond) ((TRef)FAILFOLD + (TRef)(cond))
99#define LEFTFOLD (J->fold.ins.op1)
100#define RIGHTFOLD (J->fold.ins.op2)
101#define CSEFOLD (lj_opt_cse(J))
102#define EMITFOLD (lj_ir_emit(J))
103
104/* Load/store forwarding. */
105LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J);
106LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J);
107LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J);
108LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J);
109LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J);
110LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref);
111
112/* Dead-store elimination. */
113LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J);
114LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_ustore(jit_State *J);
115LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J);
116
117/* Narrowing. */
118LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J);
119LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc);
120LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc);
121LJ_FUNC IRType lj_opt_narrow_forl(cTValue *forbase);
122
123/* Optimization passes. */
124LJ_FUNC void lj_opt_dce(jit_State *J);
125LJ_FUNC int lj_opt_loop(jit_State *J);
126#endif
127
128#endif
diff --git a/src/lj_jit.h b/src/lj_jit.h
new file mode 100644
index 00000000..280eff41
--- /dev/null
+++ b/src/lj_jit.h
@@ -0,0 +1,279 @@
1/*
2** Common definitions for the JIT compiler.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_JIT_H
7#define _LJ_JIT_H
8
9#include "lj_obj.h"
10#include "lj_ir.h"
11
12/* JIT engine flags. */
13#define JIT_F_ON 0x00000001
14
15/* CPU-specific JIT engine flags. */
16#if LJ_TARGET_X86ORX64
17#define JIT_F_CMOV 0x00000100
18#define JIT_F_SSE2 0x00000200
19#define JIT_F_SSE4_1 0x00000400
20#define JIT_F_P4 0x00000800
21#define JIT_F_PREFER_IMUL 0x00001000
22#define JIT_F_SPLIT_XMM 0x00002000
23#define JIT_F_LEA_AGU 0x00004000
24
25/* Names for the CPU-specific flags. Must match the order above. */
26#define JIT_F_CPU_FIRST JIT_F_CMOV
27#define JIT_F_CPUSTRING "\4CMOV\4SSE2\6SSE4.1\2P4\3AMD\2K8\4ATOM"
28#else
29#error "Missing CPU-specific JIT engine flags"
30#endif
31
32/* Optimization flags. */
33#define JIT_F_OPT_MASK 0x00ff0000
34
35#define JIT_F_OPT_FOLD 0x00010000
36#define JIT_F_OPT_CSE 0x00020000
37#define JIT_F_OPT_DCE 0x00040000
38#define JIT_F_OPT_FWD 0x00080000
39#define JIT_F_OPT_DSE 0x00100000
40#define JIT_F_OPT_NARROW 0x00200000
41#define JIT_F_OPT_LOOP 0x00400000
42#define JIT_F_OPT_FUSE 0x00800000
43
44/* Optimizations names for -O. Must match the order above. */
45#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD
46#define JIT_F_OPTSTRING \
47 "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\4fuse"
48
49/* Optimization levels set a fixed combination of flags. */
50#define JIT_F_OPT_0 0
51#define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE)
52#define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP)
53#define JIT_F_OPT_3 (JIT_F_OPT_2|JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_FUSE)
54#define JIT_F_OPT_DEFAULT JIT_F_OPT_3
55
56#ifdef LUA_USE_WIN
57/* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */
58#define JIT_P_sizemcode_DEFAULT 64
59#else
60/* Could go as low as 4K, but the mmap() overhead would be rather high. */
61#define JIT_P_sizemcode_DEFAULT 32
62#endif
63
64/* Optimization parameters and their defaults. Length is a char in octal! */
65#define JIT_PARAMDEF(_) \
66 _(\010, maxtrace, 1000) /* Max. # of traces in cache. */ \
67 _(\011, maxrecord, 2000) /* Max. # of recorded IR instructions. */ \
68 _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \
69 _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \
70 _(\007, maxsnap, 100) /* Max. # of snapshots for a trace. */ \
71 \
72 _(\007, hotloop, 57) /* # of iterations to detect a hot loop. */ \
73 _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \
74 _(\007, tryside, 4) /* # of attempts to compile a side trace. */ \
75 \
76 _(\012, instunroll, 4) /* Max. unroll for instable loops. */ \
77 _(\012, loopunroll, 7) /* Max. unroll for loop ops in side traces. */ \
78 _(\012, callunroll, 3) /* Max. unroll for recursive calls. */ \
79 _(\011, recunroll, 0) /* Max. unroll for true recursion. */ \
80 \
81 /* Size of each machine code area (in KBytes). */ \
82 _(\011, sizemcode, JIT_P_sizemcode_DEFAULT) \
83 /* Max. total size of all machine code areas (in KBytes). */ \
84 _(\010, maxmcode, 512) \
85 /* End of list. */
86
87enum {
88#define JIT_PARAMENUM(len, name, value) JIT_P_##name,
89JIT_PARAMDEF(JIT_PARAMENUM)
90#undef JIT_PARAMENUM
91 JIT_P__MAX
92};
93
94#define JIT_PARAMSTR(len, name, value) #len #name
95#define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR)
96
97/* Trace compiler state. */
98typedef enum {
99 LJ_TRACE_IDLE, /* Trace compiler idle. */
100 LJ_TRACE_ACTIVE = 0x10,
101 LJ_TRACE_RECORD, /* Bytecode recording active. */
102 LJ_TRACE_START, /* New trace started. */
103 LJ_TRACE_END, /* End of trace. */
104 LJ_TRACE_ASM, /* Assemble trace. */
105 LJ_TRACE_ERR, /* Trace aborted with error. */
106} TraceState;
107
108/* Machine code type. */
109typedef uint8_t MCode;
110
111/* Stack snapshot header. */
112typedef struct SnapShot {
113 uint16_t mapofs; /* Offset into snapshot map. */
114 IRRef1 ref; /* First IR ref for this snapshot. */
115 uint8_t nslots; /* Number of stack slots. */
116 uint8_t nframelinks; /* Number of frame links. */
117 uint8_t count; /* Count of taken exits for this snapshot. */
118 uint8_t unused1;
119} SnapShot;
120
121#define SNAPCOUNT_DONE 255 /* Already compiled and linked a side trace. */
122#define snap_ref(sn) ((IRRef)(IRRef1)(sn))
123#define snap_ridsp(sn) ((sn) >> 16)
124
125/* Snapshot and exit numbers. */
126typedef uint32_t SnapNo;
127typedef uint32_t ExitNo;
128
129/* Trace number. */
130typedef uint32_t TraceNo; /* Used to pass around trace numbers. */
131typedef uint16_t TraceNo1; /* Stored trace number. */
132
133#define TRACE_INTERP 0 /* Fallback to interpreter. */
134
135/* Trace anchor. */
136typedef struct Trace {
137 IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */
138 IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */
139 IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */
140 SnapShot *snap; /* Snapshot array. */
141 IRRef2 *snapmap; /* Snapshot map. */
142 uint16_t nsnap; /* Number of snapshots. */
143 uint16_t nsnapmap; /* Number of snapshot map elements. */
144 GCRef startpt; /* Starting prototype. */
145 BCIns startins; /* Original bytecode of starting instruction. */
146 MCode *mcode; /* Start of machine code. */
147 MSize szmcode; /* Size of machine code. */
148 MSize mcloop; /* Offset of loop start in machine code. */
149 TraceNo1 link; /* Linked trace (or self for loops). */
150 TraceNo1 root; /* Root trace of side trace (or 0 for root traces). */
151 TraceNo1 nextroot; /* Next root trace for same prototype. */
152 TraceNo1 nextside; /* Next side trace of same root trace. */
153 uint16_t nchild; /* Number of child traces (root trace only). */
154 uint16_t spadjust; /* Stack pointer adjustment (offset in bytes). */
155#ifdef LUAJIT_USE_GDBJIT
156 void *gdbjit_entry; /* GDB JIT entry. */
157#endif
158} Trace;
159
160/* Round-robin penalty cache for bytecodes leading to aborted traces. */
161typedef struct HotPenalty {
162 const BCIns *pc; /* Starting bytecode PC. */
163 uint16_t val; /* Penalty value, i.e. hotcount start. */
164 uint16_t reason; /* Abort reason (really TraceErr). */
165} HotPenalty;
166
167/* Number of slots for the penalty cache. Must be a power of 2. */
168#define PENALTY_SLOTS 16
169
170/* Round-robin backpropagation cache for narrowing conversions. */
171typedef struct BPropEntry {
172 IRRef1 key; /* Key: original reference. */
173 IRRef1 val; /* Value: reference after conversion. */
174 IRRef mode; /* Mode for this entry (currently IRTOINT_*). */
175} BPropEntry;
176
177/* Number of slots for the backpropagation cache. Must be a power of 2. */
178#define BPROP_SLOTS 16
179
180/* Fold state is used to fold instructions on-the-fly. */
181typedef struct FoldState {
182 IRIns ins; /* Currently emitted instruction. */
183 IRIns left; /* Instruction referenced by left operand. */
184 IRIns right; /* Instruction referenced by right operand. */
185} FoldState;
186
187/* JIT compiler state. */
188typedef struct jit_State {
189 Trace cur; /* Current trace. */
190
191 lua_State *L; /* Current Lua state. */
192 const BCIns *pc; /* Current PC. */
193 BCReg maxslot; /* Relative to baseslot. */
194
195 uint32_t flags; /* JIT engine flags. */
196 TRef *base; /* Current frame base, points into J->slots. */
197 BCReg baseslot; /* Current frame base, offset into J->slots. */
198 GCfunc *fn; /* Current function. */
199 GCproto *pt; /* Current prototype. */
200
201 FoldState fold; /* Fold state. */
202
203 uint8_t mergesnap; /* Allowed to merge with next snapshot. */
204 uint8_t needsnap; /* Need snapshot before recording next bytecode. */
205 IRType1 guardemit; /* Accumulated IRT_GUARD for emitted instructions. */
206 uint8_t unused1;
207
208 const BCIns *bc_min; /* Start of allowed bytecode range for root trace. */
209 MSize bc_extent; /* Extent of the range. */
210
211 TraceState state; /* Trace compiler state. */
212
213 int32_t instunroll; /* Unroll counter for instable loops. */
214 int32_t loopunroll; /* Unroll counter for loop ops in side traces. */
215 int32_t tailcalled; /* Number of successive tailcalls. */
216 int32_t framedepth; /* Current frame depth. */
217
218 MRef knum; /* Pointer to chained array of KNUM constants. */
219
220 IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */
221 IRRef irtoplim; /* Upper limit of instuction buffer (biased). */
222 IRRef irbotlim; /* Lower limit of instuction buffer (biased). */
223 IRRef loopref; /* Last loop reference or ref of final LOOP (or 0). */
224
225 SnapShot *snapbuf; /* Temp. snapshot buffer. */
226 IRRef2 *snapmapbuf; /* Temp. snapshot map buffer. */
227 MSize sizesnap; /* Size of temp. snapshot buffer. */
228 MSize sizesnapmap; /* Size of temp. snapshot map buffer. */
229
230 Trace **trace; /* Array of traces. */
231 TraceNo curtrace; /* Current trace number (if not 0). Kept in J->cur. */
232 TraceNo freetrace; /* Start of scan for next free trace. */
233 MSize sizetrace; /* Size of trace array. */
234
235 IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */
236 TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */
237
238 int32_t param[JIT_P__MAX]; /* JIT engine parameters. */
239
240 MCode *exitstubgroup[LJ_MAX_EXITSTUBGR]; /* Exit stub group addresses. */
241
242 HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */
243 uint32_t penaltyslot; /* Round-robin index into penalty slots. */
244
245 BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */
246 uint32_t bpropslot; /* Round-robin index into bpropcache slots. */
247
248 const BCIns *startpc; /* Bytecode PC of starting instruction. */
249 TraceNo parent; /* Parent of current side trace (0 for root traces). */
250 ExitNo exitno; /* Exit number in parent of current side trace. */
251
252 TValue errinfo; /* Additional info element for trace errors. */
253
254 MCode *mcarea; /* Base of current mcode area. */
255 MCode *mctop; /* Top of current mcode area. */
256 MCode *mcbot; /* Bottom of current mcode area. */
257 size_t szmcarea; /* Size of current mcode area. */
258 size_t szallmcarea; /* Total size of all allocated mcode areas. */
259 int mcprot; /* Protection of current mcode area. */
260} jit_State;
261
262/* Exit stubs. */
263#if LJ_TARGET_X86ORX64
264/* Limited by the range of a short fwd jump (127): (2+2)*(32-1)-2 = 122. */
265#define EXITSTUB_SPACING (2+2)
266#define EXITSTUBS_PER_GROUP 32
267#else
268#error "Missing CPU-specific exit stub definitions"
269#endif
270
271/* Return the address of an exit stub. */
272static LJ_AINLINE MCode *exitstub_addr(jit_State *J, ExitNo exitno)
273{
274 lua_assert(J->exitstubgroup[exitno / EXITSTUBS_PER_GROUP] != NULL);
275 return J->exitstubgroup[exitno / EXITSTUBS_PER_GROUP] +
276 EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP);
277}
278
279#endif
diff --git a/src/lj_lex.c b/src/lj_lex.c
new file mode 100644
index 00000000..38b0a7d4
--- /dev/null
+++ b/src/lj_lex.c
@@ -0,0 +1,393 @@
1/*
2** Lexical analyzer.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Major portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#define lj_lex_c
10#define LUA_CORE
11
12#include "lj_obj.h"
13#include "lj_gc.h"
14#include "lj_err.h"
15#include "lj_str.h"
16#include "lj_lex.h"
17#include "lj_parse.h"
18#include "lj_ctype.h"
19
20/* Lua lexer token names. */
21static const char *const tokennames[] = {
22#define TKSTR1(name) #name,
23#define TKSTR2(name, sym) #sym,
24TKDEF(TKSTR1, TKSTR2)
25#undef TKSTR1
26#undef TKSTR2
27 NULL
28};
29
30/* -- Buffer handling ----------------------------------------------------- */
31
32#define char2int(c) cast(int, cast(uint8_t, (c)))
33#define next(ls) \
34 (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls))
35#define save_and_next(ls) (save(ls, ls->current), next(ls))
36#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
37#define END_OF_STREAM (-1)
38
39static int fillbuf(LexState *ls)
40{
41 size_t sz;
42 const char *buf = ls->rfunc(ls->L, ls->rdata, &sz);
43 if (buf == NULL || sz == 0) return END_OF_STREAM;
44 ls->n = (MSize)sz - 1;
45 ls->p = buf;
46 return char2int(*(ls->p++));
47}
48
49static void save(LexState *ls, int c)
50{
51 if (ls->sb.n + 1 > ls->sb.sz) {
52 MSize newsize;
53 if (ls->sb.sz >= LJ_MAX_STR/2)
54 lj_lex_error(ls, 0, LJ_ERR_XELEM);
55 newsize = ls->sb.sz * 2;
56 lj_str_resizebuf(ls->L, &ls->sb, newsize);
57 }
58 ls->sb.buf[ls->sb.n++] = cast(char, c);
59}
60
61static int check_next(LexState *ls, const char *set)
62{
63 if (!strchr(set, ls->current))
64 return 0;
65 save_and_next(ls);
66 return 1;
67}
68
69static void inclinenumber(LexState *ls)
70{
71 int old = ls->current;
72 lua_assert(currIsNewline(ls));
73 next(ls); /* skip `\n' or `\r' */
74 if (currIsNewline(ls) && ls->current != old)
75 next(ls); /* skip `\n\r' or `\r\n' */
76 if (++ls->linenumber >= LJ_MAX_LINE)
77 lj_lex_error(ls, ls->token, LJ_ERR_XLINES);
78}
79
80/* -- Scanner for terminals ----------------------------------------------- */
81
82static void read_numeral(LexState *ls, TValue *tv)
83{
84 lua_assert(lj_ctype_isdigit(ls->current));
85 do {
86 save_and_next(ls);
87 } while (lj_ctype_isdigit(ls->current) || ls->current == '.');
88 if (check_next(ls, "Ee")) /* `E'? */
89 check_next(ls, "+-"); /* optional exponent sign */
90 while (lj_ctype_isident(ls->current))
91 save_and_next(ls);
92 save(ls, '\0');
93 if (!lj_str_numconv(ls->sb.buf, tv))
94 lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER);
95}
96
97static int skip_sep(LexState *ls)
98{
99 int count = 0;
100 int s = ls->current;
101 lua_assert(s == '[' || s == ']');
102 save_and_next(ls);
103 while (ls->current == '=') {
104 save_and_next(ls);
105 count++;
106 }
107 return (ls->current == s) ? count : (-count) - 1;
108}
109
110static void read_long_string(LexState *ls, TValue *tv, int sep)
111{
112 save_and_next(ls); /* skip 2nd `[' */
113 if (currIsNewline(ls)) /* string starts with a newline? */
114 inclinenumber(ls); /* skip it */
115 for (;;) {
116 switch (ls->current) {
117 case END_OF_STREAM:
118 lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM);
119 break;
120 case ']':
121 if (skip_sep(ls) == sep) {
122 save_and_next(ls); /* skip 2nd `]' */
123 goto endloop;
124 }
125 break;
126 case '\n':
127 case '\r':
128 save(ls, '\n');
129 inclinenumber(ls);
130 if (!tv) lj_str_resetbuf(&ls->sb); /* avoid wasting space */
131 break;
132 default:
133 if (tv) save_and_next(ls);
134 else next(ls);
135 break;
136 }
137 } endloop:
138 if (tv) {
139 GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep),
140 ls->sb.n - 2*(2 + (MSize)sep));
141 setstrV(ls->L, tv, str);
142 }
143}
144
145static void read_string(LexState *ls, int delim, TValue *tv)
146{
147 save_and_next(ls);
148 while (ls->current != delim) {
149 switch (ls->current) {
150 case END_OF_STREAM:
151 lj_lex_error(ls, TK_eof, LJ_ERR_XSTR);
152 continue;
153 case '\n':
154 case '\r':
155 lj_lex_error(ls, TK_string, LJ_ERR_XSTR);
156 continue;
157 case '\\': {
158 int c;
159 next(ls); /* do not save the `\' */
160 switch (ls->current) {
161 case 'a': c = '\a'; break;
162 case 'b': c = '\b'; break;
163 case 'f': c = '\f'; break;
164 case 'n': c = '\n'; break;
165 case 'r': c = '\r'; break;
166 case 't': c = '\t'; break;
167 case 'v': c = '\v'; break;
168 case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue;
169 case END_OF_STREAM: continue; /* will raise an error next loop */
170 default:
171 if (!lj_ctype_isdigit(ls->current)) {
172 save_and_next(ls); /* handles \\, \", \', and \? */
173 } else { /* \xxx */
174 int i = 0;
175 c = 0;
176 do {
177 c = 10*c + (ls->current-'0');
178 next(ls);
179 } while (++i<3 && lj_ctype_isdigit(ls->current));
180 if (c > UCHAR_MAX)
181 lj_lex_error(ls, TK_string, LJ_ERR_XESC);
182 save(ls, c);
183 }
184 continue;
185 }
186 save(ls, c);
187 next(ls);
188 continue;
189 }
190 default:
191 save_and_next(ls);
192 break;
193 }
194 }
195 save_and_next(ls); /* skip delimiter */
196 setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2));
197}
198
199/* -- Main lexical scanner ------------------------------------------------ */
200
201static int llex(LexState *ls, TValue *tv)
202{
203 lj_str_resetbuf(&ls->sb);
204 for (;;) {
205 if (lj_ctype_isident(ls->current)) {
206 GCstr *s;
207 if (lj_ctype_isdigit(ls->current)) { /* Numeric literal. */
208 read_numeral(ls, tv);
209 return TK_number;
210 }
211 /* Identifier or reserved word. */
212 do {
213 save_and_next(ls);
214 } while (lj_ctype_isident(ls->current));
215 s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n);
216 if (s->reserved > 0) /* Reserved word? */
217 return TK_OFS + s->reserved;
218 setstrV(ls->L, tv, s);
219 return TK_name;
220 }
221 switch (ls->current) {
222 case '\n':
223 case '\r':
224 inclinenumber(ls);
225 continue;
226 case ' ':
227 case '\t':
228 case '\v':
229 case '\f':
230 next(ls);
231 continue;
232 case '-':
233 next(ls);
234 if (ls->current != '-') return '-';
235 /* else is a comment */
236 next(ls);
237 if (ls->current == '[') {
238 int sep = skip_sep(ls);
239 lj_str_resetbuf(&ls->sb); /* `skip_sep' may dirty the buffer */
240 if (sep >= 0) {
241 read_long_string(ls, NULL, sep); /* long comment */
242 lj_str_resetbuf(&ls->sb);
243 continue;
244 }
245 }
246 /* else short comment */
247 while (!currIsNewline(ls) && ls->current != END_OF_STREAM)
248 next(ls);
249 continue;
250 case '[': {
251 int sep = skip_sep(ls);
252 if (sep >= 0) {
253 read_long_string(ls, tv, sep);
254 return TK_string;
255 } else if (sep == -1) {
256 return '[';
257 } else {
258 lj_lex_error(ls, TK_string, LJ_ERR_XLDELIM);
259 continue;
260 }
261 }
262 case '=':
263 next(ls);
264 if (ls->current != '=') return '='; else { next(ls); return TK_eq; }
265 case '<':
266 next(ls);
267 if (ls->current != '=') return '<'; else { next(ls); return TK_le; }
268 case '>':
269 next(ls);
270 if (ls->current != '=') return '>'; else { next(ls); return TK_ge; }
271 case '~':
272 next(ls);
273 if (ls->current != '=') return '~'; else { next(ls); return TK_ne; }
274 case '"':
275 case '\'':
276 read_string(ls, ls->current, tv);
277 return TK_string;
278 case '.':
279 save_and_next(ls);
280 if (check_next(ls, ".")) {
281 if (check_next(ls, "."))
282 return TK_dots; /* ... */
283 else
284 return TK_concat; /* .. */
285 } else if (!lj_ctype_isdigit(ls->current)) {
286 return '.';
287 } else {
288 read_numeral(ls, tv);
289 return TK_number;
290 }
291 case END_OF_STREAM:
292 return TK_eof;
293 default: {
294 int c = ls->current;
295 next(ls);
296 return c; /* Single-char tokens (+ - / ...). */
297 }
298 }
299 }
300}
301
302/* -- Lexer API ----------------------------------------------------------- */
303
304void lj_lex_start(lua_State *L, LexState *ls)
305{
306 ls->L = L;
307 ls->fs = NULL;
308 ls->n = 0;
309 ls->p = NULL;
310 ls->lookahead = TK_eof; /* No look-ahead token. */
311 ls->linenumber = 1;
312 ls->lastline = 1;
313 lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF);
314 next(ls); /* Read-ahead first char. */
315 if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb &&
316 char2int(ls->p[1]) == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
317 ls->n -= 2;
318 ls->p += 2;
319 next(ls);
320 }
321 if (ls->current == '#') { /* Skip POSIX #! header line. */
322 do {
323 next(ls);
324 if (ls->current == END_OF_STREAM) return;
325 } while (!currIsNewline(ls));
326 inclinenumber(ls);
327 }
328 if (ls->current == LUA_SIGNATURE[0]) {
329 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_XBCLOAD));
330 lj_err_throw(L, LUA_ERRSYNTAX);
331 }
332 /* This is an unanchored GCstr before it's stored in the prototype.
333 ** Do this last since next() calls the reader which may call the GC.
334 */
335 ls->chunkname = lj_str_newz(L, ls->chunkarg);
336}
337
338void lj_lex_next(LexState *ls)
339{
340 ls->lastline = ls->linenumber;
341 if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */
342 ls->token = llex(ls, &ls->tokenval); /* Get next token. */
343 } else { /* Otherwise return lookahead token. */
344 ls->token = ls->lookahead;
345 ls->lookahead = TK_eof;
346 ls->tokenval = ls->lookaheadval;
347 }
348}
349
350LexToken lj_lex_lookahead(LexState *ls)
351{
352 lua_assert(ls->lookahead == TK_eof);
353 ls->lookahead = llex(ls, &ls->lookaheadval);
354 return ls->lookahead;
355}
356
357const char *lj_lex_token2str(LexState *ls, LexToken token)
358{
359 if (token > TK_OFS)
360 return tokennames[token-TK_OFS-1];
361 else if (!lj_ctype_iscntrl(token))
362 return lj_str_pushf(ls->L, "%c", token);
363 else
364 return lj_str_pushf(ls->L, "char(%d)", token);
365}
366
367void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...)
368{
369 const char *tok;
370 va_list argp;
371 if (token == 0) {
372 tok = NULL;
373 } else if (token == TK_name || token == TK_string || token == TK_number) {
374 save(ls, '\0');
375 tok = ls->sb.buf;
376 } else {
377 tok = lj_lex_token2str(ls, token);
378 }
379 va_start(argp, em);
380 lj_err_lex(ls->L, strdata(ls->chunkname), tok, ls->linenumber, em, argp);
381 va_end(argp);
382}
383
384void lj_lex_init(lua_State *L)
385{
386 uint32_t i;
387 for (i = 0; i < TK_RESERVED; i++) {
388 GCstr *s = lj_str_newz(L, tokennames[i]);
389 fixstring(s); /* Reserved words are never collected. */
390 s->reserved = cast_byte(i+1);
391 }
392}
393
diff --git a/src/lj_lex.h b/src/lj_lex.h
new file mode 100644
index 00000000..cc5d5a9f
--- /dev/null
+++ b/src/lj_lex.h
@@ -0,0 +1,63 @@
1/*
2** Lexical analyzer.
3** Major parts taken verbatim from the Lua interpreter.
4** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
5*/
6
7#ifndef _LJ_LEX_H
8#define _LJ_LEX_H
9
10#include <stdarg.h>
11
12#include "lj_obj.h"
13#include "lj_err.h"
14
15/* Lua lexer tokens. */
16#define TKDEF(_, __) \
17 _(and) _(break) _(do) _(else) _(elseif) _(end) _(false) \
18 _(for) _(function) _(if) _(in) _(local) _(nil) _(not) _(or) \
19 _(repeat) _(return) _(then) _(true) _(until) _(while) \
20 __(concat, ..) __(dots, ...) __(eq, ==) __(ge, >=) __(le, <=) __(ne, ~=) \
21 __(number, <number>) __(name, <name>) __(string, <string>) __(eof, <eof>)
22
23enum {
24 TK_OFS = 256,
25#define TKENUM1(name) TK_##name,
26#define TKENUM2(name, sym) TK_##name,
27TKDEF(TKENUM1, TKENUM2)
28#undef TKENUM1
29#undef TKENUM2
30 TK_RESERVED = TK_while - TK_OFS
31};
32
33typedef int LexToken;
34
35/* Lua lexer state. */
36typedef struct LexState {
37 struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */
38 struct lua_State *L; /* Lua state. */
39 TValue tokenval; /* Current token value. */
40 TValue lookaheadval; /* Lookahead token value. */
41 int current; /* Current character (charint). */
42 LexToken token; /* Current token. */
43 LexToken lookahead; /* Lookahead token. */
44 SBuf sb; /* String buffer for tokens. */
45 const char *p; /* Current position in input buffer. */
46 MSize n; /* Bytes left in input buffer. */
47 lua_Reader rfunc; /* Reader callback. */
48 void *rdata; /* Reader callback data. */
49 BCLine linenumber; /* Input line counter. */
50 BCLine lastline; /* Line of last token. */
51 GCstr *chunkname; /* Current chunk name (interned string). */
52 const char *chunkarg; /* Chunk name argument. */
53 uint32_t level; /* Syntactical nesting level. */
54} LexState;
55
56LJ_FUNC void lj_lex_start(lua_State *L, LexState *ls);
57LJ_FUNC void lj_lex_next(LexState *ls);
58LJ_FUNC LexToken lj_lex_lookahead(LexState *ls);
59LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken token);
60LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...);
61LJ_FUNC void lj_lex_init(lua_State *L);
62
63#endif
diff --git a/src/lj_lib.c b/src/lj_lib.c
new file mode 100644
index 00000000..683c66d6
--- /dev/null
+++ b/src/lj_lib.c
@@ -0,0 +1,216 @@
1/*
2** Library function support.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_lib_c
7#define LUA_CORE
8
9#include "lauxlib.h"
10
11#include "lj_obj.h"
12#include "lj_gc.h"
13#include "lj_err.h"
14#include "lj_str.h"
15#include "lj_tab.h"
16#include "lj_func.h"
17#include "lj_vm.h"
18#include "lj_lib.h"
19
20/* -- Library initialization ---------------------------------------------- */
21
22static GCtab *lib_create_table(lua_State *L, const char *libname, int hsize)
23{
24 if (libname) {
25 luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16);
26 lua_getfield(L, -1, libname);
27 if (!tvistab(L->top-1)) {
28 L->top--;
29 if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, hsize) != NULL)
30 lj_err_callerv(L, LJ_ERR_BADMODN, libname);
31 settabV(L, L->top, tabV(L->top-1));
32 L->top++;
33 lua_setfield(L, -3, libname); /* _LOADED[libname] = new table */
34 }
35 L->top--;
36 settabV(L, L->top-1, tabV(L->top));
37 } else {
38 lua_createtable(L, 0, hsize);
39 }
40 return tabV(L->top-1);
41}
42
43void lj_lib_register(lua_State *L, const char *libname,
44 const uint8_t *p, const lua_CFunction *cf)
45{
46 GCtab *env = tabref(L->env);
47 GCfunc *ofn = NULL;
48 int ffid = *p++;
49 GCtab *tab = lib_create_table(L, libname, *p++);
50 ptrdiff_t tpos = L->top - L->base;
51
52 /* Avoid barriers further down. */
53 if (isblack(obj2gco(tab))) lj_gc_barrierback(G(L), tab);
54 tab->nomm = 0;
55
56 for (;;) {
57 uint32_t tag = *p++;
58 MSize len = tag & LIBINIT_LENMASK;
59 tag &= LIBINIT_TAGMASK;
60 if (tag != LIBINIT_STRING) {
61 const char *name;
62 MSize nuv = (MSize)(L->top - L->base - tpos);
63 GCfunc *fn = lj_func_newC(L, nuv, env);
64 if (nuv) {
65 L->top = L->base + tpos;
66 memcpy(fn->c.upvalue, L->top, sizeof(TValue)*nuv);
67 }
68 fn->c.ffid = (uint8_t)(ffid++);
69 name = (const char *)p;
70 p += len;
71 if (tag != LIBINIT_CF) {
72 fn->c.gate = makeasmfunc(p[0] + (p[1] << 8));
73 p += 2;
74 }
75 if (tag == LIBINIT_ASM_)
76 fn->c.f = ofn->c.f; /* Copy handler from previous function. */
77 else
78 fn->c.f = *cf++; /* Get cf or handler from C function table. */
79 if (len) {
80 /* NOBARRIER: See above for common barrier. */
81 setfuncV(L, lj_tab_setstr(L, tab, lj_str_new(L, name, len)), fn);
82 }
83 ofn = fn;
84 } else {
85 switch (tag | len) {
86 case LIBINIT_SET:
87 L->top -= 2;
88 if (tvisstr(L->top+1) && strV(L->top+1)->len == 0)
89 env = tabV(L->top);
90 else /* NOBARRIER: See above for common barrier. */
91 copyTV(L, lj_tab_set(L, tab, L->top+1), L->top);
92 break;
93 case LIBINIT_NUMBER:
94 memcpy(&L->top->n, p, sizeof(double));
95 L->top++;
96 p += sizeof(double);
97 break;
98 case LIBINIT_COPY:
99 copyTV(L, L->top, L->top - *p++);
100 L->top++;
101 break;
102 case LIBINIT_LASTCL:
103 setfuncV(L, L->top++, ofn);
104 break;
105 case LIBINIT_FFID:
106 ffid++;
107 break;
108 case LIBINIT_END:
109 return;
110 default:
111 setstrV(L, L->top++, lj_str_new(L, (const char *)p, len));
112 p += len;
113 break;
114 }
115 }
116 }
117}
118
119/* -- Type checks --------------------------------------------------------- */
120
121TValue *lj_lib_checkany(lua_State *L, int narg)
122{
123 TValue *o = L->base + narg-1;
124 if (o >= L->top)
125 lj_err_arg(L, narg, LJ_ERR_NOVAL);
126 return o;
127}
128
129GCstr *lj_lib_checkstr(lua_State *L, int narg)
130{
131 TValue *o = L->base + narg-1;
132 if (o < L->top) {
133 if (LJ_LIKELY(tvisstr(o))) {
134 return strV(o);
135 } else if (tvisnum(o)) {
136 GCstr *s = lj_str_fromnum(L, &o->n);
137 setstrV(L, o, s);
138 return s;
139 }
140 }
141 lj_err_argt(L, narg, LUA_TSTRING);
142 return NULL; /* unreachable */
143}
144
145GCstr *lj_lib_optstr(lua_State *L, int narg)
146{
147 TValue *o = L->base + narg-1;
148 return (o < L->top && !tvisnil(o)) ? lj_lib_checkstr(L, narg) : NULL;
149}
150
151lua_Number lj_lib_checknum(lua_State *L, int narg)
152{
153 TValue *o = L->base + narg-1;
154 if (!(o < L->top &&
155 (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o)))))
156 lj_err_argt(L, narg, LUA_TNUMBER);
157 return numV(o);
158}
159
160int32_t lj_lib_checkint(lua_State *L, int narg)
161{
162 return lj_num2int(lj_lib_checknum(L, narg));
163}
164
165int32_t lj_lib_optint(lua_State *L, int narg, int32_t def)
166{
167 TValue *o = L->base + narg-1;
168 return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def;
169}
170
171GCfunc *lj_lib_checkfunc(lua_State *L, int narg)
172{
173 TValue *o = L->base + narg-1;
174 if (!(o < L->top && tvisfunc(o)))
175 lj_err_argt(L, narg, LUA_TFUNCTION);
176 return funcV(o);
177}
178
179GCtab *lj_lib_checktab(lua_State *L, int narg)
180{
181 TValue *o = L->base + narg-1;
182 if (!(o < L->top && tvistab(o)))
183 lj_err_argt(L, narg, LUA_TTABLE);
184 return tabV(o);
185}
186
187GCtab *lj_lib_checktabornil(lua_State *L, int narg)
188{
189 TValue *o = L->base + narg-1;
190 if (o < L->top) {
191 if (tvistab(o))
192 return tabV(o);
193 else if (tvisnil(o))
194 return NULL;
195 }
196 lj_err_arg(L, narg, LJ_ERR_NOTABN);
197 return NULL; /* unreachable */
198}
199
200int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst)
201{
202 GCstr *s = def >= 0 ? lj_lib_optstr(L, narg) : lj_lib_checkstr(L, narg);
203 if (s) {
204 const char *opt = strdata(s);
205 MSize len = s->len;
206 int i;
207 for (i = 0; *(const uint8_t *)lst; i++) {
208 if (*(const uint8_t *)lst == len && memcmp(opt, lst+1, len) == 0)
209 return i;
210 lst += 1+*(const uint8_t *)lst;
211 }
212 lj_err_argv(L, narg, LJ_ERR_INVOPTM, opt);
213 }
214 return def;
215}
216
diff --git a/src/lj_lib.h b/src/lj_lib.h
new file mode 100644
index 00000000..1cba3778
--- /dev/null
+++ b/src/lj_lib.h
@@ -0,0 +1,84 @@
1/*
2** Library function support.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_LIB_H
7#define _LJ_LIB_H
8
9#include "lj_obj.h"
10
11/*
12** A fallback handler is called by the assembler VM if the fast path fails:
13**
14** - too few arguments: unrecoverable.
15** - wrong argument type: recoverable, if coercion succeeds.
16** - bad argument value: unrecoverable.
17** - stack overflow: recoverable, if stack reallocation succeeds.
18** - extra handling: recoverable.
19**
20** The unrecoverable cases throw an error with lj_err_arg(), lj_err_argtype(),
21** lj_err_caller() or lj_err_callermsg().
22** The recoverable cases return 0 or the number of results + 1.
23** The assembler VM retries the fast path only if 0 is returned.
24** This time the fallback must not be called again or it gets stuck in a loop.
25*/
26
27/* Return values from fallback handler. */
28#define FFH_RETRY 0
29#define FFH_UNREACHABLE FFH_RETRY
30#define FFH_RES(n) ((n)+1)
31
32LJ_FUNC TValue *lj_lib_checkany(lua_State *L, int narg);
33LJ_FUNC GCstr *lj_lib_checkstr(lua_State *L, int narg);
34LJ_FUNC GCstr *lj_lib_optstr(lua_State *L, int narg);
35LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg);
36LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg);
37LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def);
38LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg);
39LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg);
40LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg);
41LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
42
43#define lj_lib_opt(L, narg, gotarg, noarg) \
44 { TValue *_o = L->base + (narg)-1; \
45 if (_o < L->top && !tvisnil(_o)) { gotarg } else { noarg } }
46
47/* Avoid including lj_frame.h. */
48#define lj_lib_upvalue(L, n) \
49 (&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1])
50
51/* Library function declarations. Scanned by buildvm. */
52#define LJLIB_CF(name) static int lj_cf_##name(lua_State *L)
53#define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L)
54#define LJLIB_ASM_(name)
55#define LJLIB_SET(name)
56#define LJLIB_PUSH(arg)
57#define LJLIB_REC(handler)
58#define LJLIB_NOREGUV
59#define LJLIB_NOREG
60
61#define LJ_LIB_REG(L, name) \
62 lj_lib_register(L, #name, lj_lib_init_##name, lj_lib_cf_##name)
63#define LJ_LIB_REG_(L, regname, name) \
64 lj_lib_register(L, regname, lj_lib_init_##name, lj_lib_cf_##name)
65
66LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
67 const uint8_t *init, const lua_CFunction *cf);
68
69/* Library init data tags. */
70#define LIBINIT_LENMASK 0x3f
71#define LIBINIT_TAGMASK 0xc0
72#define LIBINIT_CF 0x00
73#define LIBINIT_ASM 0x40
74#define LIBINIT_ASM_ 0x80
75#define LIBINIT_STRING 0xc0
76#define LIBINIT_MAXSTR 0x39
77#define LIBINIT_SET 0xfa
78#define LIBINIT_NUMBER 0xfb
79#define LIBINIT_COPY 0xfc
80#define LIBINIT_LASTCL 0xfd
81#define LIBINIT_FFID 0xfe
82#define LIBINIT_END 0xff
83
84#endif
diff --git a/src/lj_mcode.c b/src/lj_mcode.c
new file mode 100644
index 00000000..e5791e9f
--- /dev/null
+++ b/src/lj_mcode.c
@@ -0,0 +1,260 @@
1/*
2** Machine code management.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_mcode_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASJIT
12
13#include "lj_gc.h"
14#include "lj_jit.h"
15#include "lj_mcode.h"
16#include "lj_trace.h"
17#include "lj_dispatch.h"
18
19/* -- OS-specific functions ----------------------------------------------- */
20
21#if defined(LUA_USE_WIN)
22
23#define WIN32_LEAN_AND_MEAN
24#include <windows.h>
25
26#define MCPROT_RW PAGE_READWRITE
27#define MCPROT_RX PAGE_EXECUTE_READ
28#define MCPROT_RWX PAGE_EXECUTE_READWRITE
29
30static LJ_AINLINE void *mcode_alloc(jit_State *J, size_t sz, DWORD prot)
31{
32 void *p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot);
33 if (!p)
34 lj_trace_err(J, LJ_TRERR_MCODEAL);
35 return p;
36}
37
38static LJ_AINLINE void mcode_free(jit_State *J, void *p, size_t sz)
39{
40 UNUSED(J); UNUSED(sz);
41 VirtualFree(p, 0, MEM_RELEASE);
42}
43
44static LJ_AINLINE void mcode_setprot(void *p, size_t sz, DWORD prot)
45{
46 DWORD oprot;
47 VirtualProtect(p, sz, prot, &oprot);
48}
49
50#elif defined(LUA_USE_POSIX)
51
52#include <sys/mman.h>
53
54#ifndef MAP_ANONYMOUS
55#define MAP_ANONYMOUS MAP_ANON
56#endif
57
58#define MCPROT_RW (PROT_READ|PROT_WRITE)
59#define MCPROT_RX (PROT_READ|PROT_EXEC)
60#define MCPROT_RWX (PROT_READ|PROT_WRITE|PROT_EXEC)
61
62static LJ_AINLINE void *mcode_alloc(jit_State *J, size_t sz, int prot)
63{
64 void *p = mmap(NULL, sz, prot, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
65 if (p == MAP_FAILED)
66 lj_trace_err(J, LJ_TRERR_MCODEAL);
67 return p;
68}
69
70static LJ_AINLINE void mcode_free(jit_State *J, void *p, size_t sz)
71{
72 UNUSED(J);
73 munmap(p, sz);
74}
75
76static LJ_AINLINE void mcode_setprot(void *p, size_t sz, int prot)
77{
78 mprotect(p, sz, prot);
79}
80
81#else
82
83/* Fallback allocator. This will fail if memory is not executable by default. */
84#define LUAJIT_UNPROTECT_MCODE
85#define MCPROT_RW 0
86#define MCPROT_RX 0
87#define MCPROT_RWX 0
88
89static LJ_AINLINE void *mcode_alloc(jit_State *J, size_t sz, int prot)
90{
91 UNUSED(prot);
92 return lj_mem_new(J->L, sz);
93}
94
95static LJ_AINLINE void mcode_free(jit_State *J, void *p, size_t sz)
96{
97 lj_mem_free(J2G(J), p, sz);
98}
99
100#define mcode_setprot(p, sz, prot) UNUSED(p)
101
102#endif
103
104/* -- MCode area management ----------------------------------------------- */
105
106/* Define this ONLY if the page protection twiddling becomes a bottleneck. */
107#ifdef LUAJIT_UNPROTECT_MCODE
108
109/* It's generally considered to be a potential security risk to have
110** pages with simultaneous write *and* execute access in a process.
111**
112** Do not even think about using this mode for server processes or
113** apps handling untrusted external data (such as a browser).
114**
115** The security risk is not in LuaJIT itself -- but if an adversary finds
116** any *other* flaw in your C application logic, then any RWX memory page
117** simplifies writing an exploit considerably.
118*/
119#define MCPROT_GEN MCPROT_RWX
120#define MCPROT_RUN MCPROT_RWX
121
122#else
123
124/* This is the default behaviour and much safer:
125**
126** Most of the time the memory pages holding machine code are executable,
127** but NONE of them is writable.
128**
129** The current memory area is marked read-write (but NOT executable) only
130** during the short time window while the assembler generates machine code.
131*/
132#define MCPROT_GEN MCPROT_RW
133#define MCPROT_RUN MCPROT_RX
134
135#endif
136
137/* Change protection of MCode area. */
138static void mcode_protect(jit_State *J, int prot)
139{
140#ifdef LUAJIT_UNPROTECT_MCODE
141 UNUSED(J); UNUSED(prot);
142#else
143 if (J->mcprot != prot) {
144 mcode_setprot(J->mcarea, J->szmcarea, prot);
145 J->mcprot = prot;
146 }
147#endif
148}
149
150/* Linked list of MCode areas. */
151typedef struct MCLink {
152 MCode *next; /* Next area. */
153 size_t size; /* Size of current area. */
154} MCLink;
155
156/* Allocate a new MCode area. */
157static void mcode_allocarea(jit_State *J)
158{
159 MCode *oldarea = J->mcarea;
160 size_t sz = (size_t)J->param[JIT_P_sizemcode] << 10;
161 sz = (sz + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1);
162 J->mcarea = (MCode *)mcode_alloc(J, sz, MCPROT_GEN);
163 J->szmcarea = sz;
164 J->mcprot = MCPROT_GEN;
165 J->mctop = (MCode *)((char *)J->mcarea + J->szmcarea);
166 J->mcbot = (MCode *)((char *)J->mcarea + sizeof(MCLink));
167 ((MCLink *)J->mcarea)->next = oldarea;
168 ((MCLink *)J->mcarea)->size = sz;
169 J->szallmcarea += sz;
170}
171
172/* Free all MCode areas. */
173void lj_mcode_free(jit_State *J)
174{
175 MCode *mc = J->mcarea;
176 J->mcarea = NULL;
177 J->szallmcarea = 0;
178 while (mc) {
179 MCode *next = ((MCLink *)mc)->next;
180 mcode_free(J, mc, ((MCLink *)mc)->size);
181 mc = next;
182 }
183}
184
185/* -- MCode transactions -------------------------------------------------- */
186
187/* Reserve the remainder of the current MCode area. */
188MCode *lj_mcode_reserve(jit_State *J, MCode **lim)
189{
190 if (!J->mcarea)
191 mcode_allocarea(J);
192 else
193 mcode_protect(J, MCPROT_GEN);
194 *lim = J->mcbot;
195 return J->mctop;
196}
197
198/* Commit the top part of the current MCode area. */
199void lj_mcode_commit(jit_State *J, MCode *top)
200{
201 J->mctop = top;
202 mcode_protect(J, MCPROT_RUN);
203}
204
205/* Abort the reservation. */
206void lj_mcode_abort(jit_State *J)
207{
208 mcode_protect(J, MCPROT_RUN);
209}
210
211/* Set/reset protection to allow patching of MCode areas. */
212MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish)
213{
214#ifdef LUAJIT_UNPROTECT_MCODE
215 UNUSED(J); UNUSED(ptr); UNUSED(finish);
216 return NULL;
217#else
218 if (finish) {
219 if (J->mcarea == ptr)
220 mcode_protect(J, MCPROT_RUN);
221 else
222 mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN);
223 return NULL;
224 } else {
225 MCode *mc = J->mcarea;
226 /* Try current area first to use the protection cache. */
227 if (ptr >= mc && ptr < mc + J->szmcarea) {
228 mcode_protect(J, MCPROT_GEN);
229 return mc;
230 }
231 /* Otherwise search through the list of MCode areas. */
232 for (;;) {
233 mc = ((MCLink *)mc)->next;
234 lua_assert(mc != NULL);
235 if (ptr >= mc && ptr < mc + ((MCLink *)mc)->size) {
236 mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN);
237 return mc;
238 }
239 }
240 }
241#endif
242}
243
244/* Limit of MCode reservation reached. */
245void lj_mcode_limiterr(jit_State *J, size_t need)
246{
247 size_t sizemcode, maxmcode;
248 lj_mcode_abort(J);
249 sizemcode = (size_t)J->param[JIT_P_sizemcode] << 10;
250 sizemcode = (sizemcode + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1);
251 maxmcode = (size_t)J->param[JIT_P_maxmcode] << 10;
252 if ((size_t)need > sizemcode)
253 lj_trace_err(J, LJ_TRERR_MCODEOV); /* Too long for any area. */
254 if (J->szallmcarea + sizemcode > maxmcode)
255 lj_trace_err(J, LJ_TRERR_MCODEAL);
256 mcode_allocarea(J);
257 lj_trace_err(J, LJ_TRERR_MCODELM); /* Retry with new area. */
258}
259
260#endif
diff --git a/src/lj_mcode.h b/src/lj_mcode.h
new file mode 100644
index 00000000..d4573bf4
--- /dev/null
+++ b/src/lj_mcode.h
@@ -0,0 +1,23 @@
1/*
2** Machine code management.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_MCODE_H
7#define _LJ_MCODE_H
8
9#include "lj_jit.h"
10
11#if LJ_HASJIT
12LJ_FUNC void lj_mcode_free(jit_State *J);
13LJ_FUNC MCode *lj_mcode_reserve(jit_State *J, MCode **lim);
14LJ_FUNC void lj_mcode_commit(jit_State *J, MCode *m);
15LJ_FUNC void lj_mcode_abort(jit_State *J);
16LJ_FUNC MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish);
17LJ_FUNC_NORET void lj_mcode_limiterr(jit_State *J, size_t need);
18
19#define lj_mcode_commitbot(J, m) (J->mcbot = (m))
20
21#endif
22
23#endif
diff --git a/src/lj_meta.c b/src/lj_meta.c
new file mode 100644
index 00000000..dff01f85
--- /dev/null
+++ b/src/lj_meta.c
@@ -0,0 +1,358 @@
1/*
2** Metamethod handling.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#define lj_meta_c
10#define LUA_CORE
11
12#include "lj_obj.h"
13#include "lj_gc.h"
14#include "lj_err.h"
15#include "lj_str.h"
16#include "lj_tab.h"
17#include "lj_meta.h"
18#include "lj_bc.h"
19#include "lj_vm.h"
20
21/* -- Metamethod handling ------------------------------------------------- */
22
23/* String interning of metamethod names for fast indexing. */
24void lj_meta_init(lua_State *L)
25{
26#define MMNAME(name) "__" #name
27 const char *metanames = MMDEF(MMNAME);
28#undef MMNAME
29 global_State *g = G(L);
30 const char *p, *q;
31 uint32_t i;
32 for (i = 0, p = metanames; *p; i++, p = q) {
33 GCstr *s;
34 for (q = p+2; *q && *q != '_'; q++) ;
35 s = lj_str_new(L, p, (size_t)(q-p));
36 fixstring(s); /* Never collect these names. */
37 /* NOBARRIER: g->mmname[] is a GC root. */
38 setgcref(g->mmname[i], obj2gco(s));
39 }
40}
41
42/* Negative caching of a few fast metamethods. See the lj_meta_fast() macro. */
43cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name)
44{
45 cTValue *mo = lj_tab_getstr(mt, name);
46 lua_assert(mm <= MM_FAST);
47 if (!mo || tvisnil(mo)) { /* No metamethod? */
48 mt->nomm |= cast_byte(1u<<mm); /* Set negative cache flag. */
49 return NULL;
50 }
51 return mo;
52}
53
54/* Lookup metamethod for object. */
55cTValue *lj_meta_lookup(lua_State *L, cTValue *o, MMS mm)
56{
57 GCtab *mt;
58 if (tvistab(o))
59 mt = tabref(tabV(o)->metatable);
60 else if (tvisudata(o))
61 mt = tabref(udataV(o)->metatable);
62 else
63 mt = tabref(G(L)->basemt[itypemap(o)]);
64 if (mt) {
65 cTValue *mo = lj_tab_getstr(mt, strref(G(L)->mmname[mm]));
66 if (mo)
67 return mo;
68 }
69 return niltv(L);
70}
71
72/* Setup call to metamethod to be run by Assembler VM. */
73static TValue *mmcall(lua_State *L, ASMFunction cont, cTValue *mo,
74 cTValue *a, cTValue *b)
75{
76 /*
77 ** |-- framesize -> top top+1 top+2 top+3
78 ** before: [func slots ...]
79 ** mm setup: [func slots ...] [cont|?] [mo|tmtype] [a] [b]
80 ** in asm: [func slots ...] [cont|PC] [mo|delta] [a] [b]
81 ** ^-- func base ^-- mm base
82 ** after mm: [func slots ...] [result]
83 ** ^-- copy to base[PC_RA] --/ for lj_cont_ra
84 ** istruecond + branch for lj_cont_cond*
85 ** ignore for lj_cont_nop
86 ** next PC: [func slots ...]
87 */
88 TValue *top = L->top;
89 if (curr_funcisL(L)) top = curr_topL(L);
90 setcont(top, cont); /* Assembler VM stores PC in upper word. */
91 copyTV(L, top+1, mo); /* Store metamethod and two arguments. */
92 copyTV(L, top+2, a);
93 copyTV(L, top+3, b);
94 return top+2; /* Return new base. */
95}
96
97/* -- C helpers for some instructions, called from assembler VM ----------- */
98
99/* Helper for TGET*. __index chain and metamethod. */
100cTValue *lj_meta_tget(lua_State *L, cTValue *o, cTValue *k)
101{
102 int loop;
103 for (loop = 0; loop < LJ_MAX_IDXCHAIN; loop++) {
104 cTValue *mo;
105 if (tvistab(o)) {
106 GCtab *t = tabV(o);
107 cTValue *tv = lj_tab_get(L, t, k);
108 if (!tvisnil(tv) ||
109 !(mo = lj_meta_fast(L, tabref(t->metatable), MM_index)))
110 return tv;
111 } else if (tvisnil(mo = lj_meta_lookup(L, o, MM_index))) {
112 lj_err_optype(L, o, LJ_ERR_OPINDEX);
113 return NULL; /* unreachable */
114 }
115 if (tvisfunc(mo)) {
116 L->top = mmcall(L, lj_cont_ra, mo, o, k);
117 return NULL; /* Trigger metamethod call. */
118 }
119 o = mo;
120 }
121 lj_err_msg(L, LJ_ERR_GETLOOP);
122 return NULL; /* unreachable */
123}
124
125/* Helper for TSET*. __newindex chain and metamethod. */
126TValue *lj_meta_tset(lua_State *L, cTValue *o, cTValue *k)
127{
128 TValue tmp;
129 int loop;
130 for (loop = 0; loop < LJ_MAX_IDXCHAIN; loop++) {
131 cTValue *mo;
132 if (tvistab(o)) {
133 GCtab *t = tabV(o);
134 TValue *tv = lj_tab_set(L, t, k);
135 if (!tvisnil(tv) ||
136 !(mo = lj_meta_fast(L, tabref(t->metatable), MM_newindex))) {
137 if (isblack(obj2gco(t))) lj_gc_barrierback(G(L), t);
138 return tv;
139 }
140 } else if (tvisnil(mo = lj_meta_lookup(L, o, MM_newindex))) {
141 lj_err_optype(L, o, LJ_ERR_OPINDEX);
142 return NULL; /* unreachable */
143 }
144 if (tvisfunc(mo)) {
145 L->top = mmcall(L, lj_cont_nop, mo, o, k);
146 /* L->top+2 = v filled in by caller. */
147 return NULL; /* Trigger metamethod call. */
148 }
149 copyTV(L, &tmp, mo);
150 o = &tmp;
151 }
152 lj_err_msg(L, LJ_ERR_SETLOOP);
153 return NULL; /* unreachable */
154}
155
156static cTValue *str2num(cTValue *o, TValue *n)
157{
158 if (tvisnum(o))
159 return o;
160 else if (tvisstr(o) && lj_str_numconv(strVdata(o), n))
161 return n;
162 else
163 return NULL;
164}
165
166/* Helper for arithmetic instructions. Coercion, metamethod. */
167TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb, cTValue *rc,
168 BCReg op)
169{
170 MMS mm = bcmode_mm(op);
171 TValue tempb, tempc;
172 cTValue *b, *c;
173 if ((b = str2num(rb, &tempb)) != NULL &&
174 (c = str2num(rc, &tempc)) != NULL) { /* Try coercion first. */
175 setnumV(ra, lj_vm_foldarith(numV(b), numV(c), (int)mm-MM_add));
176 return NULL;
177 } else {
178 cTValue *mo = lj_meta_lookup(L, rb, mm);
179 if (tvisnil(mo)) {
180 mo = lj_meta_lookup(L, rc, mm);
181 if (tvisnil(mo)) {
182 if (str2num(rb, &tempb) == NULL) rc = rb;
183 lj_err_optype(L, rc, LJ_ERR_OPARITH);
184 return NULL; /* unreachable */
185 }
186 }
187 return mmcall(L, lj_cont_ra, mo, rb, rc);
188 }
189}
190
191/* In-place coercion of a number to a string. */
192static LJ_AINLINE int tostring(lua_State *L, TValue *o)
193{
194 if (tvisstr(o)) {
195 return 1;
196 } else if (tvisnum(o)) {
197 setstrV(L, o, lj_str_fromnum(L, &o->n));
198 return 1;
199 } else {
200 return 0;
201 }
202}
203
204/* Helper for CAT. Coercion, iterative concat, __concat metamethod. */
205TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
206{
207 do {
208 int n = 1;
209 if (!(tvisstr(top-1) || tvisnum(top-1)) || !tostring(L, top)) {
210 cTValue *mo = lj_meta_lookup(L, top-1, MM_concat);
211 if (tvisnil(mo)) {
212 mo = lj_meta_lookup(L, top, MM_concat);
213 if (tvisnil(mo)) {
214 if (tvisstr(top-1) || tvisnum(top-1)) top++;
215 lj_err_optype(L, top-1, LJ_ERR_OPCAT);
216 return NULL; /* unreachable */
217 }
218 }
219 /* One of the top two elements is not a string, call __cat metamethod:
220 **
221 ** before: [...][CAT stack .........................]
222 ** top-1 top top+1 top+2
223 ** pick two: [...][CAT stack ...] [o1] [o2]
224 ** setup mm: [...][CAT stack ...] [cont|?] [mo|tmtype] [o1] [o2]
225 ** in asm: [...][CAT stack ...] [cont|PC] [mo|delta] [o1] [o2]
226 ** ^-- func base ^-- mm base
227 ** after mm: [...][CAT stack ...] <--push-- [result]
228 ** next step: [...][CAT stack .............]
229 */
230 copyTV(L, top+2, top) /* Careful with the order of stack copies! */
231 copyTV(L, top+1, top-1)
232 copyTV(L, top, mo)
233 setcont(top-1, lj_cont_cat);
234 return top+1; /* Trigger metamethod call. */
235 } else if (strV(top)->len == 0) { /* Shortcut. */
236 (void)tostring(L, top-1);
237 } else {
238 /* Pick as many strings as possible from the top and concatenate them:
239 **
240 ** before: [...][CAT stack ...........................]
241 ** pick str: [...][CAT stack ...] [...... strings ......]
242 ** concat: [...][CAT stack ...] [result]
243 ** next step: [...][CAT stack ............]
244 */
245 MSize tlen = strV(top)->len;
246 char *buffer;
247 int i;
248 for (n = 1; n <= left && tostring(L, top-n); n++) {
249 MSize len = strV(top-n)->len;
250 if (len >= LJ_MAX_STR - tlen)
251 lj_err_msg(L, LJ_ERR_STROV);
252 tlen += len;
253 }
254 buffer = lj_str_needbuf(L, &G(L)->tmpbuf, tlen);
255 n--;
256 tlen = 0;
257 for (i = n; i >= 0; i--) {
258 MSize len = strV(top-i)->len;
259 memcpy(buffer + tlen, strVdata(top-i), len);
260 tlen += len;
261 }
262 setstrV(L, top-n, lj_str_new(L, buffer, tlen));
263 }
264 left -= n;
265 top -= n;
266 } while (left >= 1);
267 lj_gc_check_fixtop(L);
268 return NULL;
269}
270
271/* Helper for LEN. __len metamethod. */
272TValue *lj_meta_len(lua_State *L, cTValue *o)
273{
274 cTValue *mo = lj_meta_lookup(L, o, MM_len);
275 if (tvisnil(mo)) {
276 lj_err_optype(L, o, LJ_ERR_OPLEN);
277 return NULL; /* unreachable */
278 }
279 return mmcall(L, lj_cont_ra, mo, o, niltv(L));
280}
281
282/* Helper for equality comparisons. __eq metamethod. */
283TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne)
284{
285 /* Field metatable must be at same offset for GCtab and GCudata! */
286 cTValue *mo = lj_meta_fast(L, tabref(o1->gch.metatable), MM_eq);
287 if (mo) {
288 TValue *top;
289 int it;
290 if (tabref(o1->gch.metatable) != tabref(o2->gch.metatable)) {
291 cTValue *mo2 = lj_meta_fast(L, tabref(o2->gch.metatable), MM_eq);
292 if (mo2 == NULL || !lj_obj_equal(mo, mo2))
293 return cast(TValue *, (intptr_t)ne);
294 }
295 top = curr_top(L);
296 setcont(top, ne ? lj_cont_condf : lj_cont_condt);
297 copyTV(L, top+1, mo);
298 it = o1->gch.gct == ~LJ_TTAB ? LJ_TTAB : LJ_TUDATA;
299 setgcV(L, top+2, &o1->gch, it);
300 setgcV(L, top+3, &o2->gch, it);
301 return top+2; /* Trigger metamethod call. */
302 }
303 return cast(TValue *, (intptr_t)ne);
304}
305
306/* Helper for ordered comparisons. String compare, __lt/__le metamethods. */
307TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op)
308{
309 if (itype(o1) == itype(o2)) { /* Never called with two numbers. */
310 if (tvisstr(o1) && tvisstr(o2)) {
311 int32_t res = lj_str_cmp(strV(o1), strV(o2));
312 return cast(TValue *, (intptr_t)(((op&2) ? res <= 0 : res < 0) ^ (op&1)));
313 } else {
314 trymt:
315 while (1) {
316 ASMFunction cont = (op & 1) ? lj_cont_condf : lj_cont_condt;
317 MMS mm = (op & 2) ? MM_le : MM_lt;
318 cTValue *mo = lj_meta_lookup(L, o1, mm);
319 cTValue *mo2 = lj_meta_lookup(L, o2, mm);
320 if (tvisnil(mo) || !lj_obj_equal(mo, mo2)) {
321 if (op & 2) { /* MM_le not found: retry with MM_lt. */
322 cTValue *ot = o1; o1 = o2; o2 = ot; /* Swap operands. */
323 op ^= 3; /* Use LT and flip condition. */
324 continue;
325 }
326 goto err;
327 }
328 return mmcall(L, cont, mo, o1, o2);
329 }
330 }
331 } else if (tvisbool(o1) && tvisbool(o2)) {
332 goto trymt;
333 } else {
334 err:
335 lj_err_comp(L, o1, o2);
336 return NULL;
337 }
338}
339
340/* Helper for calls. __call metamethod. */
341void lj_meta_call(lua_State *L, TValue *func, TValue *top)
342{
343 cTValue *mo = lj_meta_lookup(L, func, MM_call);
344 TValue *p;
345 if (!tvisfunc(mo))
346 lj_err_optype_call(L, func);
347 for (p = top; p > func; p--) copyTV(L, p, p-1);
348 copyTV(L, func, mo);
349}
350
351/* Helper for FORI. Coercion. */
352void lj_meta_for(lua_State *L, TValue *base)
353{
354 if (!str2num(base, base)) lj_err_msg(L, LJ_ERR_FORINIT);
355 if (!str2num(base+1, base+1)) lj_err_msg(L, LJ_ERR_FORLIM);
356 if (!str2num(base+2, base+2)) lj_err_msg(L, LJ_ERR_FORSTEP);
357}
358
diff --git a/src/lj_meta.h b/src/lj_meta.h
new file mode 100644
index 00000000..60d1e79e
--- /dev/null
+++ b/src/lj_meta.h
@@ -0,0 +1,33 @@
1/*
2** Metamethod handling.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_META_H
7#define _LJ_META_H
8
9#include "lj_obj.h"
10
11/* Metamethod handling */
12LJ_FUNC void lj_meta_init(lua_State *L);
13LJ_FUNC cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name);
14LJ_FUNC cTValue *lj_meta_lookup(lua_State *L, cTValue *o, MMS mm);
15
16#define lj_meta_fastg(g, mt, mm) \
17 ((mt) == NULL ? NULL : ((mt)->nomm & (1u<<(mm))) ? NULL : \
18 lj_meta_cache(mt, mm, strref((g)->mmname[mm])))
19#define lj_meta_fast(L, mt, mm) lj_meta_fastg(G(L), mt, mm)
20
21/* C helpers for some instructions, called from assembler VM. */
22LJ_FUNCA cTValue *lj_meta_tget(lua_State *L, cTValue *o, cTValue *k);
23LJ_FUNCA TValue *lj_meta_tset(lua_State *L, cTValue *o, cTValue *k);
24LJ_FUNCA TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb,
25 cTValue *rc, BCReg op);
26LJ_FUNCA TValue *lj_meta_cat(lua_State *L, TValue *top, int left);
27LJ_FUNCA TValue *lj_meta_len(lua_State *L, cTValue *o);
28LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne);
29LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op);
30LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top);
31LJ_FUNCA void lj_meta_for(lua_State *L, TValue *base);
32
33#endif
diff --git a/src/lj_obj.c b/src/lj_obj.c
new file mode 100644
index 00000000..d26a6b38
--- /dev/null
+++ b/src/lj_obj.c
@@ -0,0 +1,41 @@
1/*
2** Miscellaneous object handling.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_obj_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11/* Object type names. */
12LJ_DATADEF const char *const lj_obj_typename[] = { /* ORDER LUA_T */
13 "no value", "nil", "boolean", "userdata", "number", "string",
14 "table", "function", "userdata", "thread", "proto", "upval"
15};
16
17LJ_DATADEF const char *const lj_obj_itypename[] = { /* ORDER LJ_T */
18 "nil", "boolean", "boolean", "userdata", "string", "upval", "thread",
19 "proto", "function", "deadkey", "table", "userdata", "number"
20};
21
22/* Compare two objects without calling metamethods. */
23int lj_obj_equal(cTValue *o1, cTValue *o2)
24{
25 if (itype(o1) == itype(o2)) {
26 if (tvispri(o1))
27 return 1;
28 if (!tvisnum(o1)) {
29#if LJ_64
30 if (tvislightud(o1))
31 return o1->u64 == o2->u64;
32 else
33#endif
34 return gcrefeq(o1->gcr, o2->gcr);
35 }
36 } else if (!tvisnum(o1) || !tvisnum(o2)) {
37 return 0;
38 }
39 return numV(o1) == numV(o2);
40}
41
diff --git a/src/lj_obj.h b/src/lj_obj.h
new file mode 100644
index 00000000..e5ea713d
--- /dev/null
+++ b/src/lj_obj.h
@@ -0,0 +1,676 @@
1/*
2** LuaJIT VM tags, values and objects.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#ifndef _LJ_OBJ_H
10#define _LJ_OBJ_H
11
12#include "lua.h"
13#include "lj_def.h"
14#include "lj_arch.h"
15
16/* -- Memory references (32 bit address space) ---------------------------- */
17
18/* Memory size. */
19typedef uint32_t MSize;
20
21/* Memory reference */
22typedef struct MRef {
23 uint32_t ptr32; /* Pseudo 32 bit pointer. */
24} MRef;
25
26#define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32)
27
28#define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p))
29#define setmrefr(r, v) ((r).ptr32 = (v).ptr32)
30
31/* -- GC object references (32 bit address space) ------------------------- */
32
33/* GCobj reference */
34typedef struct GCRef {
35 uint32_t gcptr32; /* Pseudo 32 bit pointer. */
36} GCRef;
37
38/* Common GC header for all collectable objects. */
39#define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct
40/* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */
41
42#define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32)
43#define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32)
44#define gcrefu(r) ((r).gcptr32)
45#define gcrefi(r) ((int32_t)(r).gcptr32)
46#define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32)
47#define gcnext(gc) (gcref((gc)->gch.nextgc))
48
49#define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch)
50#define setgcrefi(r, i) ((r).gcptr32 = (uint32_t)(i))
51#define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p))
52#define setgcrefnull(r) ((r).gcptr32 = 0)
53#define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32)
54
55/* IMPORTANT NOTE:
56**
57** All uses of the setgcref* macros MUST be accompanied with a write barrier.
58**
59** This is to ensure the integrity of the incremental GC. The invariant
60** to preserve is that a black object never points to a white object.
61** I.e. never store a white object into a field of a black object.
62**
63** It's ok to LEAVE OUT the write barrier ONLY in the following cases:
64** - The source is not a GC object (NULL).
65** - The target is a GC root. I.e. everything in global_State.
66** - The target is a lua_State field (threads are never black).
67** - The target is a stack slot, see setgcV et al.
68** - The target is an open upvalue, i.e. pointing to a stack slot.
69** - The target is a newly created object (i.e. marked white). But make
70** sure nothing invokes the GC inbetween.
71** - The target and the source are the same object (self-reference).
72** - The target already contains the object (e.g. moving elements around).
73**
74** The most common case is a store to a stack slot. All other cases where
75** a barrier has been omitted are annotated with a NOBARRIER comment.
76**
77** The same logic applies for stores to table slots (array part or hash
78** part). ALL uses of lj_tab_set* require a barrier for the stored *value*
79** (if it's a GC object). The barrier for the *key* is already handled
80** internally by lj_tab_newkey.
81*/
82
83/* -- Common type definitions --------------------------------------------- */
84
85/* Types for handling bytecodes. Need this here, details in lj_bc.h. */
86typedef uint32_t BCIns; /* Bytecode instruction. */
87typedef uint32_t BCPos; /* Bytecode position. */
88typedef uint32_t BCReg; /* Bytecode register. */
89typedef int32_t BCLine; /* Bytecode line number. */
90
91/* Internal assembler functions. Never call these directly from C. */
92typedef void (*ASMFunction)(void);
93
94/* Resizable string buffer. Need this here, details in lj_str.h. */
95typedef struct SBuf {
96 char *buf; /* String buffer base. */
97 MSize n; /* String buffer length. */
98 MSize sz; /* String buffer size. */
99} SBuf;
100
101/* -- Tags and values ----------------------------------------------------- */
102
103/* Frame link. */
104typedef union {
105 int32_t ftsz; /* Frame type and size of previous frame. */
106 MRef pcr; /* Overlaps PC for Lua frames. */
107} FrameLink;
108
109/* Tagged value. */
110typedef LJ_ALIGN(8) union TValue {
111 uint64_t u64; /* 64 bit pattern overlaps number. */
112 lua_Number n; /* Number object overlaps split tag/value object. */
113 struct {
114 LJ_ENDIAN_LOHI(
115 GCRef gcr; /* GCobj reference (if any). */
116 , int32_t it; /* Internal object tag. Must overlap MSW of number. */
117 )
118 };
119 struct {
120 LJ_ENDIAN_LOHI(
121 GCRef func; /* Function for next frame (or dummy L). */
122 , FrameLink tp; /* Link to previous frame. */
123 )
124 } fr;
125 struct {
126 LJ_ENDIAN_LOHI(
127 uint32_t lo; /* Lower 32 bits of number. */
128 , uint32_t hi; /* Upper 32 bits of number. */
129 )
130 } u32;
131} TValue;
132
133typedef const TValue cTValue;
134
135#define tvref(r) (mref(r, TValue))
136
137/* More external and GCobj tags for internal objects. */
138#define LAST_TT LUA_TTHREAD
139
140#define LUA_TPROTO (LAST_TT+1)
141#define LUA_TUPVAL (LAST_TT+2)
142#define LUA_TDEADKEY (LAST_TT+3)
143
144/* Internal object tags.
145**
146** Internal tags overlap the MSW of a number object (must be a double).
147** Interpreted as a double these are special NaNs. The FPU only generates
148** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available
149** for use as internal tags. Small negative numbers are used to shorten the
150** encoding of type comparisons (reg/mem against sign-ext. 8 bit immediate).
151**
152** ---MSW---.---LSW---
153** primitive types | itype | |
154** lightuserdata | itype | void * | (32 bit platforms)
155** lightuserdata |fffc| void * | (64 bit platforms, 48 bit pointers)
156** GC objects | itype | GCRef |
157** number -------double------
158**
159** ORDER LJ_T
160** Primitive types nil/false/true must be first, lightuserdata next.
161** GC objects are at the end, table/userdata must be lowest.
162** Also check lj_ir.h for similar ordering constraints.
163*/
164#define LJ_TNIL (-1)
165#define LJ_TFALSE (-2)
166#define LJ_TTRUE (-3)
167#define LJ_TLIGHTUD (-4)
168#define LJ_TSTR (-5)
169#define LJ_TUPVAL (-6)
170#define LJ_TTHREAD (-7)
171#define LJ_TPROTO (-8)
172#define LJ_TFUNC (-9)
173#define LJ_TDEADKEY (-10)
174#define LJ_TTAB (-11)
175#define LJ_TUDATA (-12)
176/* This is just the canonical number type used in some places. */
177#define LJ_TNUMX (-13)
178
179#if LJ_64
180#define LJ_TISNUM ((uint32_t)0xfff80000)
181#else
182#define LJ_TISNUM ((uint32_t)LJ_TNUMX)
183#endif
184#define LJ_TISTRUECOND ((uint32_t)LJ_TFALSE)
185#define LJ_TISPRI ((uint32_t)LJ_TTRUE)
186#define LJ_TISGCV ((uint32_t)(LJ_TSTR+1))
187#define LJ_TISTABUD ((uint32_t)LJ_TTAB)
188
189/* -- TValue getters/setters ---------------------------------------------- */
190
191/* Macros to test types. */
192#define itype(o) ((o)->it)
193#define uitype(o) ((uint32_t)itype(o))
194#define tvisnil(o) (itype(o) == LJ_TNIL)
195#define tvisfalse(o) (itype(o) == LJ_TFALSE)
196#define tvistrue(o) (itype(o) == LJ_TTRUE)
197#define tvisbool(o) (tvisfalse(o) || tvistrue(o))
198#if LJ_64
199#define tvislightud(o) ((itype(o) >> 16) == LJ_TLIGHTUD)
200#else
201#define tvislightud(o) (itype(o) == LJ_TLIGHTUD)
202#endif
203#define tvisstr(o) (itype(o) == LJ_TSTR)
204#define tvisfunc(o) (itype(o) == LJ_TFUNC)
205#define tvisthread(o) (itype(o) == LJ_TTHREAD)
206#define tvisproto(o) (itype(o) == LJ_TPROTO)
207#define tvistab(o) (itype(o) == LJ_TTAB)
208#define tvisudata(o) (itype(o) == LJ_TUDATA)
209#define tvisnum(o) (uitype(o) <= LJ_TISNUM)
210
211#define tvistruecond(o) (uitype(o) < LJ_TISTRUECOND)
212#define tvispri(o) (uitype(o) >= LJ_TISPRI)
213#define tvistabud(o) (uitype(o) <= LJ_TISTABUD) /* && !tvisnum() */
214#define tvisgcv(o) \
215 ((uitype(o) - LJ_TISGCV) > ((uint32_t)LJ_TNUMX - LJ_TISGCV))
216
217/* Special macros to test numbers for NaN, +0, -0, +1 and raw equality. */
218#define tvisnan(o) ((o)->n != (o)->n)
219#define tvispzero(o) ((o)->u64 == 0)
220#define tvismzero(o) ((o)->u64 == U64x(80000000,00000000))
221#define tvispone(o) ((o)->u64 == U64x(3ff00000,00000000))
222#define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64)
223
224/* Macros to convert type ids. */
225#if LJ_64
226#define itypemap(o) \
227 (tvisnum(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o))
228#else
229#define itypemap(o) (tvisnum(o) ? ~LJ_TNUMX : ~itype(o))
230#endif
231
232/* Macros to get tagged values. */
233#define gcval(o) (gcref((o)->gcr))
234#define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - (o)->it))
235#if LJ_64
236#define lightudV(o) check_exp(tvislightud(o), \
237 (void *)((o)->u64 & U64x(0000ffff,ffffffff)))
238#else
239#define lightudV(o) check_exp(tvislightud(o), gcrefp((o)->gcr, void))
240#endif
241#define gcV(o) check_exp(tvisgcv(o), gcval(o))
242#define strV(o) check_exp(tvisstr(o), &gcval(o)->str)
243#define funcV(o) check_exp(tvisfunc(o), &gcval(o)->fn)
244#define threadV(o) check_exp(tvisthread(o), &gcval(o)->th)
245#define protoV(o) check_exp(tvisproto(o), &gcval(o)->pt)
246#define tabV(o) check_exp(tvistab(o), &gcval(o)->tab)
247#define udataV(o) check_exp(tvisudata(o), &gcval(o)->ud)
248#define numV(o) check_exp(tvisnum(o), (o)->n)
249
250/* Macros to set tagged values. */
251#define setitype(o, i) ((o)->it = (i))
252#define setnilV(o) ((o)->it = LJ_TNIL)
253#define setboolV(o, x) ((o)->it = LJ_TFALSE-(x))
254
255#if LJ_64
256#define checklightudptr(L, p) \
257 (((uint64_t)(p) >> 48) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p))
258#define setlightudV(o, x) \
259 ((o)->u64 = (uint64_t)(x) | (((uint64_t)LJ_TLIGHTUD) << 48))
260#define setcont(o, x) \
261 ((o)->u64 = (uint64_t)(x) - (uint64_t)lj_vm_asm_begin)
262#else
263#define checklightudptr(L, p) (p)
264#define setlightudV(o, x) \
265 { TValue *i_o = (o); \
266 setgcrefp(i_o->gcr, (x)); i_o->it = LJ_TLIGHTUD; }
267#define setcont(o, x) \
268 { TValue *i_o = (o); \
269 setgcrefp(i_o->gcr, (x)); i_o->it = LJ_TLIGHTUD; }
270#endif
271
272#define tvchecklive(g, o) \
273 lua_assert(!tvisgcv(o) || \
274 ((~itype(o) == gcval(o)->gch.gct) && !isdead(g, gcval(o))))
275
276#define setgcV(L, o, x, itype) \
277 { TValue *i_o = (o); \
278 setgcrefp(i_o->gcr, &(x)->nextgc); i_o->it = itype; \
279 tvchecklive(G(L), i_o); }
280#define setstrV(L, o, x) setgcV(L, o, x, LJ_TSTR)
281#define setthreadV(L, o, x) setgcV(L, o, x, LJ_TTHREAD)
282#define setprotoV(L, o, x) setgcV(L, o, x, LJ_TPROTO)
283#define setfuncV(L, o, x) setgcV(L, o, &(x)->l, LJ_TFUNC)
284#define settabV(L, o, x) setgcV(L, o, x, LJ_TTAB)
285#define setudataV(L, o, x) setgcV(L, o, x, LJ_TUDATA)
286
287#define setnumV(o, x) ((o)->n = (x))
288#define setnanV(o) ((o)->u64 = U64x(fff80000,00000000))
289#define setintV(o, i) ((o)->n = cast_num((int32_t)(i)))
290
291/* Copy tagged values. */
292#define copyTV(L, o1, o2) \
293 { cTValue *i_o2 = (o2); TValue *i_o1 = (o1); \
294 *i_o1 = *i_o2; tvchecklive(G(L), i_o1); }
295
296/* -- String object ------------------------------------------------------- */
297
298/* String object header. String payload follows. */
299typedef struct GCstr {
300 GCHeader;
301 uint8_t reserved; /* Used by lexer for fast lookup of reserved words. */
302 uint8_t unused;
303 MSize hash; /* Hash of string. */
304 MSize len; /* Size of string. */
305} GCstr;
306
307#define strref(r) (&gcref((r))->str)
308#define strdata(s) ((const char *)((s)+1))
309#define strdatawr(s) ((char *)((s)+1))
310#define strVdata(o) strdata(strV(o))
311#define sizestring(s) (sizeof(struct GCstr)+(s)->len+1)
312
313/* -- Userdata object ----------------------------------------------------- */
314
315/* Userdata object. Payload follows. */
316typedef struct GCudata {
317 GCHeader;
318 uint8_t unused1;
319 uint8_t unused2;
320 GCRef env; /* Should be at same offset in GCfunc. */
321 MSize len; /* Size of payload. */
322 GCRef metatable; /* Must be at same offset in GCtab. */
323 uint32_t align1; /* To force 8 byte alignment of the payload. */
324} GCudata;
325
326#define uddata(u) ((void *)((u)+1))
327#define sizeudata(u) (sizeof(struct GCudata)+(u)->len)
328
329/* -- Prototype object ---------------------------------------------------- */
330
331/* Split constant array. Collectables are below, numbers above pointer. */
332typedef union ProtoK {
333 lua_Number *n; /* Numbers. */
334 GCRef *gc; /* Collectable objects (strings/table/proto). */
335} ProtoK;
336
337#define SCALE_NUM_GCO ((int32_t)sizeof(lua_Number)/sizeof(GCRef))
338#define round_nkgc(n) (((n) + SCALE_NUM_GCO-1) & ~(SCALE_NUM_GCO-1))
339
340typedef struct VarInfo {
341 GCstr *name; /* Local variable name. */
342 BCPos startpc; /* First point where the local variable is active. */
343 BCPos endpc; /* First point where the local variable is dead. */
344} VarInfo;
345
346typedef struct GCproto {
347 GCHeader;
348 uint8_t numparams; /* Number of parameters. */
349 uint8_t framesize; /* Fixed frame size. */
350 MSize sizebc; /* Number of bytecode instructions. */
351 GCRef gclist;
352 ProtoK k; /* Split constant array (points to the middle). */
353 BCIns *bc; /* Array of bytecode instructions. */
354 int16_t *uv; /* Upvalue list. local >= 0. parent uv < 0. */
355 MSize sizekgc; /* Number of collectable constants. */
356 MSize sizekn; /* Number of lua_Number constants. */
357 uint8_t sizeuv; /* Number of upvalues. */
358 uint8_t flags; /* Miscellaneous flags (see below). */
359 uint16_t trace; /* Anchor for chain of root traces. */
360 /* ------ The following fields are for debugging/tracebacks only ------ */
361 MSize sizelineinfo; /* Size of lineinfo array (may be 0). */
362 MSize sizevarinfo; /* Size of local var info array (may be 0). */
363 MSize sizeuvname; /* Size of upvalue names array (may be 0). */
364 BCLine linedefined; /* First line of the function definition. */
365 BCLine lastlinedefined; /* Last line of the function definition. */
366 BCLine *lineinfo; /* Map from bytecode instructions to source lines. */
367 struct VarInfo *varinfo; /* Names and extents of local variables. */
368 GCstr **uvname; /* Upvalue names. */
369 GCstr *chunkname; /* Name of the chunk this function was defined in. */
370} GCproto;
371
372#define PROTO_IS_VARARG 0x01
373#define PROTO_HAS_FNEW 0x02
374#define PROTO_HAS_RETURN 0x04
375#define PROTO_FIXUP_RETURN 0x08
376#define PROTO_NO_JIT 0x10
377#define PROTO_HAS_ILOOP 0x20
378
379/* -- Upvalue object ------------------------------------------------------ */
380
381typedef struct GCupval {
382 GCHeader;
383 uint8_t closed; /* Set if closed (i.e. uv->v == &uv->u.value). */
384 uint8_t unused;
385 union {
386 TValue tv; /* If closed: the value itself. */
387 struct { /* If open: double linked list, anchored at thread. */
388 GCRef prev;
389 GCRef next;
390 };
391 };
392 TValue *v; /* Points to stack slot (open) or above (closed). */
393#if LJ_32
394 int32_t unusedv; /* For consistent alignment (32 bit only). */
395#endif
396} GCupval;
397
398#define uvprev(uv_) (&gcref((uv_)->prev)->uv)
399#define uvnext(uv_) (&gcref((uv_)->next)->uv)
400
401/* -- Function object (closures) ------------------------------------------ */
402
403/* Common header for functions. env should be at same offset in GCudata. */
404#define GCfuncHeader \
405 GCHeader; uint8_t ffid; uint8_t nupvalues; \
406 GCRef env; GCRef gclist; ASMFunction gate
407
408typedef struct GCfuncC {
409 GCfuncHeader;
410 lua_CFunction f; /* C function to be called. */
411 TValue upvalue[1]; /* Array of upvalues (TValue). */
412} GCfuncC;
413
414typedef struct GCfuncL {
415 GCfuncHeader;
416 GCRef pt; /* Link to prototype this function is based on. */
417 GCRef uvptr[1]; /* Array of _pointers_ to upvalue objects (GCupval). */
418} GCfuncL;
419
420typedef union GCfunc {
421 GCfuncC c;
422 GCfuncL l;
423} GCfunc;
424
425#define FF_LUA 0
426#define FF_C 1
427#define isluafunc(fn) ((fn)->c.ffid == FF_LUA)
428#define iscfunc(fn) ((fn)->c.ffid == FF_C)
429#define isffunc(fn) ((fn)->c.ffid > FF_C)
430#define funcproto(fn) check_exp(isluafunc(fn), &gcref((fn)->l.pt)->pt)
431#define sizeCfunc(n) (sizeof(GCfuncC) + sizeof(TValue)*((n)-1))
432#define sizeLfunc(n) (sizeof(GCfuncL) + sizeof(TValue *)*((n)-1))
433
434/* -- Table object -------------------------------------------------------- */
435
436/* Hash node. */
437typedef struct Node {
438 TValue val; /* Value object. Must be first field. */
439 TValue key; /* Key object. */
440 MRef next; /* Hash chain. */
441 int32_t unused; /* For consistent alignment. */
442} Node;
443
444LJ_STATIC_ASSERT(offsetof(Node, val) == 0);
445
446typedef struct GCtab {
447 GCHeader;
448 uint8_t nomm; /* Negative cache for fast metamethods. */
449 int8_t colo; /* Array colocation. */
450 MRef array; /* Array part. */
451 GCRef gclist;
452 GCRef metatable; /* Must be at same offset in GCudata. */
453 MRef node; /* Hash part. */
454 uint32_t asize; /* Size of array part (keys [0, asize-1]). */
455 uint32_t hmask; /* Hash part mask (size of hash part - 1). */
456 MRef lastfree; /* Any free position is before this position. */
457} GCtab;
458
459#define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab))
460#define tabref(r) (&gcref((r))->tab)
461#define noderef(r) (mref((r), Node))
462#define nextnode(n) (mref((n)->next, Node))
463
464/* -- State objects ------------------------------------------------------- */
465
466/* VM states. */
467enum {
468 LJ_VMST_INTERP, /* Interpreter. */
469 LJ_VMST_C, /* C function. */
470 LJ_VMST_GC, /* Garbage collector. */
471 LJ_VMST_EXIT, /* Trace exit handler. */
472 LJ_VMST_RECORD, /* Trace recorder. */
473 LJ_VMST_OPT, /* Optimizer. */
474 LJ_VMST_ASM, /* Assembler. */
475 LJ_VMST__MAX
476};
477
478#define setvmstate(g, st) ((g)->vmstate = ~LJ_VMST_##st)
479
480/* Metamethods. */
481#define MMDEF(_) \
482 _(index) _(newindex) _(gc) _(mode) _(eq) \
483 /* Only the above (fast) metamethods are negative cached (max. 8). */ \
484 _(len) _(lt) _(le) _(concat) _(call) \
485 /* The following must be in ORDER ARITH. */ \
486 _(add) _(sub) _(mul) _(div) _(mod) _(pow) _(unm) \
487 /* The following are used in the standard libraries. */ \
488 _(metatable) _(tostring)
489
490typedef enum {
491#define MMENUM(name) MM_##name,
492MMDEF(MMENUM)
493#undef MMENUM
494 MM_MAX,
495 MM____ = MM_MAX,
496 MM_FAST = MM_eq
497} MMS;
498
499#define BASEMT_MAX ((~LJ_TNUMX)+1)
500
501typedef struct GCState {
502 MSize total; /* Memory currently allocated. */
503 MSize threshold; /* Memory threshold. */
504 uint8_t currentwhite; /* Current white color. */
505 uint8_t state; /* GC state. */
506 uint8_t unused1;
507 uint8_t unused2;
508 MSize sweepstr; /* Sweep position in string table. */
509 GCRef root; /* List of all collectable objects. */
510 GCRef *sweep; /* Sweep position in root list. */
511 GCRef gray; /* List of gray objects. */
512 GCRef grayagain; /* List of objects for atomic traversal. */
513 GCRef weak; /* List of weak tables (to be cleared). */
514 GCRef mmudata; /* List of userdata (to be finalized). */
515 MSize stepmul; /* Incremental GC step granularity. */
516 MSize debt; /* Debt (how much GC is behind schedule). */
517 MSize estimate; /* Estimate of memory actually in use. */
518 MSize pause; /* Pause between successive GC cycles. */
519} GCState;
520
521/* Global state, shared by all threads of a Lua universe. */
522typedef struct global_State {
523 GCRef *strhash; /* String hash table (hash chain anchors). */
524 MSize strmask; /* String hash mask (size of hash table - 1). */
525 MSize strnum; /* Number of strings in hash table. */
526 lua_Alloc allocf; /* Memory allocator. */
527 void *allocd; /* Memory allocator data. */
528 GCState gc; /* Garbage collector. */
529 SBuf tmpbuf; /* Temporary buffer for string concatenation. */
530 Node nilnode; /* Fallback 1-element hash part (nil key and value). */
531 uint8_t hookmask; /* Hook mask. */
532 uint8_t dispatchmode; /* Dispatch mode. */
533 uint8_t vmevmask; /* VM event mask. */
534 uint8_t unused1;
535 GCRef mainthref; /* Link to main thread. */
536 TValue registrytv; /* Anchor for registry. */
537 TValue tmptv; /* Temporary TValue. */
538 GCupval uvhead; /* Head of double-linked list of all open upvalues. */
539 int32_t hookcount; /* Instruction hook countdown. */
540 int32_t hookcstart; /* Start count for instruction hook counter. */
541 lua_Hook hookf; /* Hook function. */
542 lua_CFunction panic; /* Called as a last resort for errors. */
543 volatile int32_t vmstate; /* VM state or current JIT code trace number. */
544 GCRef jit_L; /* Current JIT code lua_State or NULL. */
545 MRef jit_base; /* Current JIT code L->base. */
546 GCRef basemt[BASEMT_MAX]; /* Metatables for base types. */
547 GCRef mmname[MM_MAX]; /* Array holding metamethod names. */
548} global_State;
549
550#define mainthread(g) (&gcref(g->mainthref)->th)
551#define niltv(L) \
552 check_exp(tvisnil(&G(L)->nilnode.val), &G(L)->nilnode.val)
553#define niltvg(g) \
554 check_exp(tvisnil(&(g)->nilnode.val), &(g)->nilnode.val)
555
556/* Hook management. Hook event masks are defined in lua.h. */
557#define HOOK_EVENTMASK 0x0f
558#define HOOK_ACTIVE 0x10
559#define HOOK_VMEVENT 0x20
560#define HOOK_GC 0x40
561#define hook_active(g) ((g)->hookmask & HOOK_ACTIVE)
562#define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE)
563#define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC))
564#define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT))
565#define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE)
566#define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK)
567#define hook_restore(g, h) \
568 ((g)->hookmask = ((g)->hookmask & HOOK_EVENTMASK) | (h))
569
570/* Per-thread state object. */
571struct lua_State {
572 GCHeader;
573 uint8_t dummy_ffid; /* Fake FF_C for curr_funcisL() on dummy frames. */
574 uint8_t status; /* Thread status. */
575 MRef glref; /* Link to global state. */
576 GCRef gclist; /* GC chain. */
577 TValue *base; /* Base of currently executing function. */
578 TValue *top; /* First free slot in the stack. */
579 TValue *maxstack; /* Last free slot in the stack. */
580 TValue *stack; /* Stack base. */
581 GCRef openupval; /* List of open upvalues in the stack. */
582 GCRef env; /* Thread environment (table of globals). */
583 void *cframe; /* End of C stack frame chain. */
584 MSize stacksize; /* True stack size (incl. LJ_STACK_EXTRA). */
585};
586
587#define G(L) (mref(L->glref, global_State))
588#define registry(L) (&G(L)->registrytv)
589
590/* Macros to access the currently executing (Lua) function. */
591#define curr_func(L) (&gcref((L->base-1)->fr.func)->fn)
592#define curr_funcisL(L) (isluafunc(curr_func(L)))
593#define curr_proto(L) (funcproto(curr_func(L)))
594#define curr_topL(L) (L->base + curr_proto(L)->framesize)
595#define curr_top(L) (curr_funcisL(L) ? curr_topL(L) : L->top)
596
597/* -- GC object definition and conversions -------------------------------- */
598
599/* GC header for generic access to common fields of GC objects. */
600typedef struct GChead {
601 GCHeader;
602 uint8_t unused1;
603 uint8_t unused2;
604 GCRef env;
605 GCRef gclist;
606 GCRef metatable;
607} GChead;
608
609/* The env field SHOULD be at the same offset for all GC objects. */
610LJ_STATIC_ASSERT(offsetof(GChead, env) == offsetof(GCfuncL, env));
611LJ_STATIC_ASSERT(offsetof(GChead, env) == offsetof(GCudata, env));
612
613/* The metatable field MUST be at the same offset for all GC objects. */
614LJ_STATIC_ASSERT(offsetof(GChead, metatable) == offsetof(GCtab, metatable));
615LJ_STATIC_ASSERT(offsetof(GChead, metatable) == offsetof(GCudata, metatable));
616
617/* The gclist field MUST be at the same offset for all GC objects. */
618LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(lua_State, gclist));
619LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(GCproto, gclist));
620LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(GCfuncL, gclist));
621LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(GCtab, gclist));
622
623typedef union GCobj {
624 GChead gch;
625 GCstr str;
626 GCupval uv;
627 lua_State th;
628 GCproto pt;
629 GCfunc fn;
630 GCtab tab;
631 GCudata ud;
632} GCobj;
633
634/* Macros to convert a GCobj pointer into a specific value. */
635#define gco2str(o) check_exp((o)->gch.gct == ~LJ_TSTR, &(o)->str)
636#define gco2uv(o) check_exp((o)->gch.gct == ~LJ_TUPVAL, &(o)->uv)
637#define gco2th(o) check_exp((o)->gch.gct == ~LJ_TTHREAD, &(o)->th)
638#define gco2pt(o) check_exp((o)->gch.gct == ~LJ_TPROTO, &(o)->pt)
639#define gco2func(o) check_exp((o)->gch.gct == ~LJ_TFUNC, &(o)->fn)
640#define gco2tab(o) check_exp((o)->gch.gct == ~LJ_TTAB, &(o)->tab)
641#define gco2ud(o) check_exp((o)->gch.gct == ~LJ_TUDATA, &(o)->ud)
642
643/* Macro to convert any collectable object into a GCobj pointer. */
644#define obj2gco(v) (cast(GCobj *, (v)))
645
646/* -- Number to integer conversion ---------------------------------------- */
647
648static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
649{
650 TValue o;
651 o.n = n + 6755399441055744.0; /* 2^52 + 2^51 */
652 return (int32_t)o.u32.lo;
653}
654
655#if (defined(__i386__) || defined(_M_IX86)) && !defined(__SSE2__)
656#define lj_num2int(n) lj_num2bit((n))
657#else
658#define lj_num2int(n) ((int32_t)(n))
659#endif
660
661/* -- Miscellaneous object handling --------------------------------------- */
662
663/* Names and maps for internal and external object tags. */
664LJ_DATA const char *const lj_obj_typename[1+LUA_TUPVAL+1];
665LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1];
666
667#define typename(o) (lj_obj_itypename[itypemap(o)])
668
669/* Compare two objects without calling metamethods. */
670LJ_FUNC int lj_obj_equal(cTValue *o1, cTValue *o2);
671
672#ifdef LUA_USE_ASSERT
673#include "lj_gc.h"
674#endif
675
676#endif
diff --git a/src/lj_opt_dce.c b/src/lj_opt_dce.c
new file mode 100644
index 00000000..0cd60830
--- /dev/null
+++ b/src/lj_opt_dce.c
@@ -0,0 +1,79 @@
1/*
2** DCE: Dead Code Elimination. Pre-LOOP only -- ASM already performs DCE.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_opt_dce_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASJIT
12
13#include "lj_ir.h"
14#include "lj_jit.h"
15#include "lj_iropt.h"
16
17/* Some local macros to save typing. Undef'd at the end. */
18#define IR(ref) (&J->cur.ir[(ref)])
19
20/* Scan through all snapshots and mark all referenced instructions. */
21static void dce_marksnap(jit_State *J)
22{
23 SnapNo i, nsnap = J->cur.nsnap;
24 for (i = 0; i < nsnap; i++) {
25 SnapShot *snap = &J->cur.snap[i];
26 IRRef2 *map = &J->cur.snapmap[snap->mapofs];
27 BCReg s, nslots = snap->nslots;
28 for (s = 0; s < nslots; s++) {
29 IRRef ref = snap_ref(map[s]);
30 if (!irref_isk(ref))
31 irt_setmark(IR(ref)->t);
32 }
33 }
34}
35
36/* Backwards propagate marks. Replace unused instructions with NOPs. */
37static void dce_propagate(jit_State *J)
38{
39 IRRef1 *pchain[IR__MAX];
40 IRRef ins;
41 uint32_t i;
42 for (i = 0; i < IR__MAX; i++) pchain[i] = &J->chain[i];
43 for (ins = J->cur.nins-1; ins >= REF_FIRST; ins--) {
44 IRIns *ir = IR(ins);
45 if (irt_ismarked(ir->t)) {
46 irt_clearmark(ir->t);
47 pchain[ir->o] = &ir->prev;
48 } else if (!(irt_isguard(ir->t) || irm_sideeff(lj_ir_mode[ir->o]))) {
49 *pchain[ir->o] = ir->prev; /* Reroute original instruction chain. */
50 *pchain[IR_NOP] = (IRRef1)ins;
51 ir->t.irt = IRT_NIL;
52 ir->o = IR_NOP; /* Replace instruction with NOP. */
53 ir->op1 = ir->op2 = 0;
54 pchain[IR_NOP] = &ir->prev;
55 continue;
56 }
57 if (!irref_isk(ir->op1)) irt_setmark(IR(ir->op1)->t);
58 if (!irref_isk(ir->op2)) irt_setmark(IR(ir->op2)->t);
59 }
60 *pchain[IR_NOP] = 0; /* Terminate NOP chain. */
61}
62
63/* Dead Code Elimination.
64**
65** First backpropagate marks for all used instructions. Then replace
66** the unused ones with a NOP. Note that compressing the IR to eliminate
67** the NOPs does not pay off.
68*/
69void lj_opt_dce(jit_State *J)
70{
71 if ((J->flags & JIT_F_OPT_DCE)) {
72 dce_marksnap(J);
73 dce_propagate(J);
74 }
75}
76
77#undef IR
78
79#endif
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
new file mode 100644
index 00000000..e5d98162
--- /dev/null
+++ b/src/lj_opt_fold.c
@@ -0,0 +1,1415 @@
1/*
2** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3** CSE: Common-Subexpression Elimination.
4** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
5*/
6
7#define lj_opt_fold_c
8#define LUA_CORE
9
10#include "lj_obj.h"
11
12#if LJ_HASJIT
13
14#include "lj_str.h"
15#include "lj_ir.h"
16#include "lj_jit.h"
17#include "lj_iropt.h"
18#include "lj_trace.h"
19#include "lj_vm.h"
20
21/* Here's a short description how the FOLD engine processes instructions:
22**
23** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
24** The instruction and its operands are used to select matching fold rules.
25** These are applied iteratively until a fixed point is reached.
26**
27** The 8 bit opcode of the instruction itself plus the opcodes of the
28** two instructions referenced by its operands form a 24 bit key
29** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
30**
31** This key is used for partial matching against the fold rules. The
32** left/right operand fields of the key are successively masked with
33** the 'any' wildcard, from most specific to least specific:
34**
35** ins left right
36** ins any right
37** ins left any
38** ins any any
39**
40** The masked key is used to lookup a matching fold rule in a semi-perfect
41** hash table. If a matching rule is found, the related fold function is run.
42** Multiple rules can share the same fold function. A fold rule may return
43** one of several special values:
44**
45** - NEXTFOLD means no folding was applied, because an additional test
46** inside the fold function failed. Matching continues against less
47** specific fold rules. Finally the instruction is passed on to CSE.
48**
49** - RETRYFOLD means the instruction was modified in-place. Folding is
50** retried as if this instruction had just been received.
51**
52** All other return values are terminal actions -- no further folding is
53** applied:
54**
55** - INTFOLD(i) returns a reference to the integer constant i.
56**
57** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
58** without emitting an instruction.
59**
60** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
61** it without passing through any further optimizations.
62**
63** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
64** no result (e.g. guarded assertions): FAILFOLD means the guard would
65** always fail, i.e. the current trace is pointless. DROPFOLD means
66** the guard is always true and has been eliminated. CONDFOLD is a
67** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
68**
69** - Any other return value is interpreted as an IRRef or TRef. This
70** can be a reference to an existing or a newly created instruction.
71** Only the least-significant 16 bits (IRRef1) are used to form a TRef
72** which is finally returned to the caller.
73**
74** The FOLD engine receives instructions both from the trace recorder and
75** substituted instructions from LOOP unrolling. This means all types
76** of instructions may end up here, even though the recorder bypasses
77** FOLD in some cases. Thus all loads, stores and allocations must have
78** an any/any rule to avoid being passed on to CSE.
79**
80** Carefully read the following requirements before adding or modifying
81** any fold rules:
82**
83** Requirement #1: All fold rules must preserve their destination type.
84**
85** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
86** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
87**
88** Requirement #2: Fold rules should not create *new* instructions which
89** reference operands *across* PHIs.
90**
91** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
92** left operand is a PHI. Then fleft->op1 would point across the PHI
93** frontier to an invariant instruction. Adding a PHI for this instruction
94** would be counterproductive. The solution is to add a barrier which
95** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
96** The only exception is for recurrences with high latencies like
97** repeated int->num->int conversions.
98**
99** One could relax this condition a bit if the referenced instruction is
100** a PHI, too. But this often leads to worse code due to excessive
101** register shuffling.
102**
103** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
104** Even returning fleft->op1 would be ok, because a new PHI will added,
105** if needed. But again, this leads to excessive register shuffling and
106** should be avoided.
107**
108** Requirement #3: The set of all fold rules must be monotonic to guarantee
109** termination.
110**
111** The goal is optimization, so one primarily wants to add strength-reducing
112** rules. This means eliminating an instruction or replacing an instruction
113** with one or more simpler instructions. Don't add fold rules which point
114** into the other direction.
115**
116** Some rules (like commutativity) do not directly reduce the strength of
117** an instruction, but enable other fold rules (e.g. by moving constants
118** to the right operand). These rules must be made unidirectional to avoid
119** cycles.
120**
121** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
122*/
123
124/* Some local macros to save typing. Undef'd at the end. */
125#define IR(ref) (&J->cur.ir[(ref)])
126#define fins (&J->fold.ins)
127#define fleft (&J->fold.left)
128#define fright (&J->fold.right)
129#define knumleft (ir_knum(fleft)->n)
130#define knumright (ir_knum(fright)->n)
131
132/* Pass IR on to next optimization in chain (FOLD). */
133#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
134
135/* Fold function type. Fastcall on x86 significantly reduces their size. */
136typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
137
138/* Macros for the fold specs, so buildvm can recognize them. */
139#define LJFOLD(x)
140#define LJFOLDX(x)
141#define LJFOLDF(name) static TRef LJ_FASTCALL name(jit_State *J)
142/* Note: They must be at the start of a line or buildvm ignores them! */
143
144/* Barrier to prevent using operands across PHIs. */
145#define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
146
147/* Barrier to prevent folding across a GC step.
148** GC steps can only happen at the head of a trace and at LOOP.
149** And the GC is only driven forward if there is at least one allocation.
150*/
151#define gcstep_barrier(J, ref) \
152 ((ref) < J->chain[IR_LOOP] && \
153 (J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
154 J->chain[IR_SNEW] || J->chain[IR_TOSTR]))
155
156/* -- Constant folding ---------------------------------------------------- */
157
158LJFOLD(ADD KNUM KNUM)
159LJFOLD(SUB KNUM KNUM)
160LJFOLD(MUL KNUM KNUM)
161LJFOLD(DIV KNUM KNUM)
162LJFOLD(NEG KNUM KNUM)
163LJFOLD(ABS KNUM KNUM)
164LJFOLD(ATAN2 KNUM KNUM)
165LJFOLD(LDEXP KNUM KNUM)
166LJFOLD(MIN KNUM KNUM)
167LJFOLD(MAX KNUM KNUM)
168LJFOLDF(kfold_numarith)
169{
170 lua_Number a = knumleft;
171 lua_Number b = knumright;
172 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
173 return lj_ir_knum(J, y);
174}
175
176LJFOLD(FPMATH KNUM any)
177LJFOLDF(kfold_fpmath)
178{
179 lua_Number a = knumleft;
180 lua_Number y = lj_vm_foldfpm(a, fins->op2);
181 return lj_ir_knum(J, y);
182}
183
184LJFOLD(POWI KNUM KINT)
185LJFOLDF(kfold_powi)
186{
187 lua_Number a = knumleft;
188 lua_Number b = cast_num(fright->i);
189 lua_Number y = lj_vm_foldarith(a, b, IR_POWI - IR_ADD);
190 return lj_ir_knum(J, y);
191}
192
193static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
194{
195 switch (op) {
196 case IR_ADD: k1 += k2; break;
197 case IR_SUB: k1 -= k2; break;
198 case IR_BAND: k1 &= k2; break;
199 case IR_BOR: k1 |= k2; break;
200 case IR_BXOR: k1 ^= k2; break;
201 case IR_BSHL: k1 <<= (k2 & 31); break;
202 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break;
203 case IR_BSAR: k1 >>= (k2 & 31); break;
204 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break;
205 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
206 default: lua_assert(0); break;
207 }
208 return k1;
209}
210
211LJFOLD(ADD KINT KINT)
212LJFOLD(SUB KINT KINT)
213LJFOLD(BAND KINT KINT)
214LJFOLD(BOR KINT KINT)
215LJFOLD(BXOR KINT KINT)
216LJFOLD(BSHL KINT KINT)
217LJFOLD(BSHR KINT KINT)
218LJFOLD(BSAR KINT KINT)
219LJFOLD(BROL KINT KINT)
220LJFOLD(BROR KINT KINT)
221LJFOLDF(kfold_intarith)
222{
223 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
224}
225
226LJFOLD(BNOT KINT)
227LJFOLDF(kfold_bnot)
228{
229 return INTFOLD(~fleft->i);
230}
231
232LJFOLD(BSWAP KINT)
233LJFOLDF(kfold_bswap)
234{
235 return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i));
236}
237
238LJFOLD(TONUM KINT)
239LJFOLDF(kfold_tonum)
240{
241 return lj_ir_knum(J, cast_num(fleft->i));
242}
243
244LJFOLD(TOBIT KNUM KNUM)
245LJFOLDF(kfold_tobit)
246{
247 TValue tv;
248 tv.n = knumleft + knumright;
249 return INTFOLD((int32_t)tv.u32.lo);
250}
251
252LJFOLD(TOINT KNUM any)
253LJFOLDF(kfold_toint)
254{
255 lua_Number n = knumleft;
256 int32_t k = lj_num2int(n);
257 if (irt_isguard(fins->t) && n != cast_num(k)) {
258 /* We're about to create a guard which always fails, like TOINT +1.5.
259 ** Some pathological loops cause this during LICM, e.g.:
260 ** local x,k,t = 0,1.5,{1,[1.5]=2}
261 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
262 ** assert(x == 300)
263 */
264 return FAILFOLD;
265 }
266 return INTFOLD(k);
267}
268
269LJFOLD(TOSTR KNUM)
270LJFOLDF(kfold_tostr_knum)
271{
272 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft));
273}
274
275LJFOLD(TOSTR KINT)
276LJFOLDF(kfold_tostr_kint)
277{
278 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i));
279}
280
281LJFOLD(STRTO KGC)
282LJFOLDF(kfold_strto)
283{
284 TValue n;
285 if (lj_str_numconv(strdata(ir_kstr(fleft)), &n))
286 return lj_ir_knum(J, numV(&n));
287 return FAILFOLD;
288}
289
290LJFOLD(SNEW STRREF KINT)
291LJFOLDF(kfold_snew)
292{
293 if (fright->i == 0)
294 return lj_ir_kstr(J, lj_str_new(J->L, "", 0));
295 PHIBARRIER(fleft);
296 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
297 const char *s = strdata(ir_kstr(IR(fleft->op1)));
298 int32_t ofs = IR(fleft->op2)->i;
299 return lj_ir_kstr(J, lj_str_new(J->L, s+ofs, (size_t)fright->i));
300 }
301 return NEXTFOLD;
302}
303
304/* Must not use kfold_kref for numbers (could be NaN). */
305LJFOLD(EQ KNUM KNUM)
306LJFOLD(NE KNUM KNUM)
307LJFOLD(LT KNUM KNUM)
308LJFOLD(GE KNUM KNUM)
309LJFOLD(LE KNUM KNUM)
310LJFOLD(GT KNUM KNUM)
311LJFOLD(ULT KNUM KNUM)
312LJFOLD(UGE KNUM KNUM)
313LJFOLD(ULE KNUM KNUM)
314LJFOLD(UGT KNUM KNUM)
315LJFOLDF(kfold_numcomp)
316{
317 return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o));
318}
319
320LJFOLD(LT KINT KINT)
321LJFOLD(GE KINT KINT)
322LJFOLD(LE KINT KINT)
323LJFOLD(GT KINT KINT)
324LJFOLD(ULT KINT KINT)
325LJFOLD(UGE KINT KINT)
326LJFOLD(ULE KINT KINT)
327LJFOLD(UGT KINT KINT)
328LJFOLD(ABC KINT KINT)
329LJFOLDF(kfold_intcomp)
330{
331 int32_t a = fleft->i, b = fright->i;
332 switch ((IROp)fins->o) {
333 case IR_LT: return CONDFOLD(a < b);
334 case IR_GE: return CONDFOLD(a >= b);
335 case IR_LE: return CONDFOLD(a <= b);
336 case IR_GT: return CONDFOLD(a > b);
337 case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b);
338 case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b);
339 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
340 case IR_ABC:
341 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
342 default: lua_assert(0); return FAILFOLD;
343 }
344}
345
346LJFOLD(LT KGC KGC)
347LJFOLD(GE KGC KGC)
348LJFOLD(LE KGC KGC)
349LJFOLD(GT KGC KGC)
350LJFOLDF(kfold_strcomp)
351{
352 if (irt_isstr(fins->t)) {
353 GCstr *a = ir_kstr(fleft);
354 GCstr *b = ir_kstr(fright);
355 return CONDFOLD(lj_ir_strcmp(a, b, (IROp)fins->o));
356 }
357 return NEXTFOLD;
358}
359
360/* Don't constant-fold away FLOAD checks against KNULL. */
361LJFOLD(EQ FLOAD KNULL)
362LJFOLD(NE FLOAD KNULL)
363LJFOLDX(lj_opt_cse)
364
365/* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
366LJFOLD(EQ any KNULL)
367LJFOLD(NE any KNULL)
368LJFOLD(EQ KNULL any)
369LJFOLD(NE KNULL any)
370LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */
371LJFOLD(NE KINT KINT)
372LJFOLD(EQ KGC KGC)
373LJFOLD(NE KGC KGC)
374LJFOLDF(kfold_kref)
375{
376 return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE));
377}
378
379/* -- Algebraic shortcuts ------------------------------------------------- */
380
381LJFOLD(FPMATH FPMATH IRFPM_FLOOR)
382LJFOLD(FPMATH FPMATH IRFPM_CEIL)
383LJFOLD(FPMATH FPMATH IRFPM_TRUNC)
384LJFOLDF(shortcut_round)
385{
386 IRFPMathOp op = (IRFPMathOp)fleft->op2;
387 if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC)
388 return LEFTFOLD; /* round(round_left(x)) = round_left(x) */
389 return NEXTFOLD;
390}
391
392LJFOLD(FPMATH TONUM IRFPM_FLOOR)
393LJFOLD(FPMATH TONUM IRFPM_CEIL)
394LJFOLD(FPMATH TONUM IRFPM_TRUNC)
395LJFOLD(ABS ABS KNUM)
396LJFOLDF(shortcut_left)
397{
398 return LEFTFOLD; /* f(g(x)) ==> g(x) */
399}
400
401LJFOLD(ABS NEG KNUM)
402LJFOLDF(shortcut_dropleft)
403{
404 PHIBARRIER(fleft);
405 fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */
406 return RETRYFOLD;
407}
408
409/* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
410LJFOLD(NEG NEG KNUM)
411LJFOLD(BNOT BNOT)
412LJFOLD(BSWAP BSWAP)
413LJFOLDF(shortcut_leftleft)
414{
415 PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */
416 return fleft->op1; /* f(g(x)) ==> x */
417}
418
419LJFOLD(TONUM TOINT)
420LJFOLDF(shortcut_leftleft_toint)
421{
422 PHIBARRIER(fleft);
423 if (irt_isguard(fleft->t)) /* Only safe with a guarded TOINT. */
424 return fleft->op1; /* f(g(x)) ==> x */
425 return NEXTFOLD;
426}
427
428LJFOLD(TOINT TONUM any)
429LJFOLD(TOBIT TONUM KNUM) /* The inverse must NOT be shortcut! */
430LJFOLDF(shortcut_leftleft_across_phi)
431{
432 /* Fold even across PHI to avoid expensive int->num->int conversions. */
433 return fleft->op1; /* f(g(x)) ==> x */
434}
435
436/* -- FP algebraic simplifications ---------------------------------------- */
437
438/* FP arithmetic is tricky -- there's not much to simplify.
439** Please note the following common pitfalls before sending "improvements":
440** x+0 ==> x is INVALID for x=-0
441** 0-x ==> -x is INVALID for x=+0
442** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
443*/
444
445LJFOLD(ADD NEG any)
446LJFOLDF(simplify_numadd_negx)
447{
448 PHIBARRIER(fleft);
449 fins->o = IR_SUB; /* (-a) + b ==> b - a */
450 fins->op1 = fins->op2;
451 fins->op2 = fleft->op1;
452 return RETRYFOLD;
453}
454
455LJFOLD(ADD any NEG)
456LJFOLDF(simplify_numadd_xneg)
457{
458 PHIBARRIER(fright);
459 fins->o = IR_SUB; /* a + (-b) ==> a - b */
460 fins->op2 = fright->op1;
461 return RETRYFOLD;
462}
463
464LJFOLD(SUB any KNUM)
465LJFOLDF(simplify_numsub_k)
466{
467 lua_Number n = knumright;
468 if (n == 0.0) /* x - (+-0) ==> x */
469 return LEFTFOLD;
470 return NEXTFOLD;
471}
472
473LJFOLD(SUB NEG KNUM)
474LJFOLDF(simplify_numsub_negk)
475{
476 PHIBARRIER(fleft);
477 fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */
478 fins->op1 = (IRRef1)lj_ir_knum(J, -knumright);
479 return RETRYFOLD;
480}
481
482LJFOLD(SUB any NEG)
483LJFOLDF(simplify_numsub_xneg)
484{
485 PHIBARRIER(fright);
486 fins->o = IR_ADD; /* a - (-b) ==> a + b */
487 fins->op2 = fright->op1;
488 return RETRYFOLD;
489}
490
491LJFOLD(MUL any KNUM)
492LJFOLD(DIV any KNUM)
493LJFOLDF(simplify_nummuldiv_k)
494{
495 lua_Number n = knumright;
496 if (n == 1.0) { /* x o 1 ==> x */
497 return LEFTFOLD;
498 } else if (n == -1.0) { /* x o -1 ==> -x */
499 fins->o = IR_NEG;
500 fins->op2 = (IRRef1)lj_ir_knum_neg(J);
501 return RETRYFOLD;
502 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
503 fins->o = IR_ADD;
504 fins->op2 = fins->op1;
505 return RETRYFOLD;
506 }
507 return NEXTFOLD;
508}
509
510LJFOLD(MUL NEG KNUM)
511LJFOLD(DIV NEG KNUM)
512LJFOLDF(simplify_nummuldiv_negk)
513{
514 PHIBARRIER(fleft);
515 fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */
516 fins->op2 = (IRRef1)lj_ir_knum(J, -knumright);
517 return RETRYFOLD;
518}
519
520LJFOLD(MUL NEG NEG)
521LJFOLD(DIV NEG NEG)
522LJFOLDF(simplify_nummuldiv_negneg)
523{
524 PHIBARRIER(fleft);
525 PHIBARRIER(fright);
526 fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */
527 fins->op2 = fright->op1;
528 return RETRYFOLD;
529}
530
531LJFOLD(POWI any KINT)
532LJFOLDF(simplify_powi_xk)
533{
534 int32_t k = fright->i;
535 TRef ref = fins->op1;
536 if (k == 0) /* x ^ 0 ==> 1 */
537 return lj_ir_knum_one(J); /* Result must be a number, not an int. */
538 if (k == 1) /* x ^ 1 ==> x */
539 return LEFTFOLD;
540 if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */
541 return NEXTFOLD;
542 if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */
543 ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
544 k = -k;
545 }
546 /* Unroll x^k for 1 <= k <= 65536. */
547 for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */
548 ref = emitir(IRTN(IR_MUL), ref, ref);
549 if ((k >>= 1) != 0) { /* Handle trailing bits. */
550 TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
551 for (; k != 1; k >>= 1) {
552 if (k & 1)
553 ref = emitir(IRTN(IR_MUL), ref, tmp);
554 tmp = emitir(IRTN(IR_MUL), tmp, tmp);
555 }
556 ref = emitir(IRTN(IR_MUL), ref, tmp);
557 }
558 return ref;
559}
560
561LJFOLD(POWI KNUM any)
562LJFOLDF(simplify_powi_kx)
563{
564 lua_Number n = knumleft;
565 if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
566 fins->o = IR_TONUM;
567 fins->op1 = fins->op2;
568 fins->op2 = 0;
569 fins->op2 = (IRRef1)lj_opt_fold(J);
570 fins->op1 = (IRRef1)lj_ir_knum_one(J);
571 fins->o = IR_LDEXP;
572 return RETRYFOLD;
573 }
574 return NEXTFOLD;
575}
576
577/* -- FP conversion narrowing --------------------------------------------- */
578
579LJFOLD(TOINT ADD any)
580LJFOLD(TOINT SUB any)
581LJFOLD(TOBIT ADD KNUM)
582LJFOLD(TOBIT SUB KNUM)
583LJFOLDF(narrow_convert)
584{
585 PHIBARRIER(fleft);
586 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
587 if (J->chain[IR_LOOP])
588 return NEXTFOLD;
589 return lj_opt_narrow_convert(J);
590}
591
592/* Relaxed CSE rule for TOINT allows commoning with stronger checks, too. */
593LJFOLD(TOINT any any)
594LJFOLDF(cse_toint)
595{
596 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
597 IRRef ref, op1 = fins->op1;
598 uint8_t guard = irt_isguard(fins->t);
599 for (ref = J->chain[IR_TOINT]; ref > op1; ref = IR(ref)->prev)
600 if (IR(ref)->op1 == op1 && irt_isguard(IR(ref)->t) >= guard)
601 return ref;
602 }
603 return EMITFOLD; /* No fallthrough to regular CSE. */
604}
605
606/* -- Integer algebraic simplifications ----------------------------------- */
607
608LJFOLD(ADD any KINT)
609LJFOLD(ADDOV any KINT)
610LJFOLD(SUBOV any KINT)
611LJFOLDF(simplify_intadd_k)
612{
613 if (fright->i == 0) /* i o 0 ==> i */
614 return LEFTFOLD;
615 return NEXTFOLD;
616}
617
618LJFOLD(SUB any KINT)
619LJFOLDF(simplify_intsub_k)
620{
621 if (fright->i == 0) /* i - 0 ==> i */
622 return LEFTFOLD;
623 fins->o = IR_ADD; /* i - k ==> i + (-k) */
624 fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i); /* Overflow for -2^31 ok. */
625 return RETRYFOLD;
626}
627
628LJFOLD(SUB any any)
629LJFOLD(SUBOV any any)
630LJFOLDF(simplify_intsub)
631{
632 if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */
633 return INTFOLD(0);
634 return NEXTFOLD;
635}
636
637LJFOLD(SUB ADD any)
638LJFOLDF(simplify_intsubadd_leftcancel)
639{
640 if (!irt_isnum(fins->t)) {
641 PHIBARRIER(fleft);
642 if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */
643 return fleft->op2;
644 if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */
645 return fleft->op1;
646 }
647 return NEXTFOLD;
648}
649
650LJFOLD(SUB SUB any)
651LJFOLDF(simplify_intsubsub_leftcancel)
652{
653 if (!irt_isnum(fins->t)) {
654 PHIBARRIER(fleft);
655 if (fins->op1 == fleft->op1) { /* (i - j) - i ==> 0 - j */
656 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
657 fins->op2 = fleft->op2;
658 return RETRYFOLD;
659 }
660 }
661 return NEXTFOLD;
662}
663
664LJFOLD(SUB any SUB)
665LJFOLDF(simplify_intsubsub_rightcancel)
666{
667 if (!irt_isnum(fins->t)) {
668 PHIBARRIER(fright);
669 if (fins->op1 == fright->op1) /* i - (i - j) ==> j */
670 return fright->op2;
671 }
672 return NEXTFOLD;
673}
674
675LJFOLD(SUB any ADD)
676LJFOLDF(simplify_intsubadd_rightcancel)
677{
678 if (!irt_isnum(fins->t)) {
679 PHIBARRIER(fright);
680 if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */
681 fins->op2 = fright->op2;
682 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
683 return RETRYFOLD;
684 }
685 if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */
686 fins->op2 = fright->op1;
687 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
688 return RETRYFOLD;
689 }
690 }
691 return NEXTFOLD;
692}
693
694LJFOLD(SUB ADD ADD)
695LJFOLDF(simplify_intsubaddadd_cancel)
696{
697 if (!irt_isnum(fins->t)) {
698 PHIBARRIER(fleft);
699 PHIBARRIER(fright);
700 if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */
701 fins->op1 = fleft->op2;
702 fins->op2 = fright->op2;
703 return RETRYFOLD;
704 }
705 if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
706 fins->op1 = fleft->op2;
707 fins->op2 = fright->op1;
708 return RETRYFOLD;
709 }
710 if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
711 fins->op1 = fleft->op1;
712 fins->op2 = fright->op2;
713 return RETRYFOLD;
714 }
715 if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
716 fins->op1 = fleft->op1;
717 fins->op2 = fright->op1;
718 return RETRYFOLD;
719 }
720 }
721 return NEXTFOLD;
722}
723
724LJFOLD(BAND any KINT)
725LJFOLDF(simplify_band_k)
726{
727 if (fright->i == 0) /* i & 0 ==> 0 */
728 return RIGHTFOLD;
729 if (fright->i == -1) /* i & -1 ==> i */
730 return LEFTFOLD;
731 return NEXTFOLD;
732}
733
734LJFOLD(BOR any KINT)
735LJFOLDF(simplify_bor_k)
736{
737 if (fright->i == 0) /* i | 0 ==> i */
738 return LEFTFOLD;
739 if (fright->i == -1) /* i | -1 ==> -1 */
740 return RIGHTFOLD;
741 return NEXTFOLD;
742}
743
744LJFOLD(BXOR any KINT)
745LJFOLDF(simplify_bxor_k)
746{
747 if (fright->i == 0) /* i xor 0 ==> i */
748 return LEFTFOLD;
749 if (fright->i == -1) { /* i xor -1 ==> ~i */
750 fins->o = IR_BNOT;
751 fins->op2 = 0;
752 return RETRYFOLD;
753 }
754 return NEXTFOLD;
755}
756
757LJFOLD(BSHL any KINT)
758LJFOLD(BSHR any KINT)
759LJFOLD(BSAR any KINT)
760LJFOLD(BROL any KINT)
761LJFOLD(BROR any KINT)
762LJFOLDF(simplify_shift_ik)
763{
764 int32_t k = (fright->i & 31);
765 if (k == 0) /* i o 0 ==> i */
766 return LEFTFOLD;
767 if (k != fright->i) { /* i o k ==> i o (k & 31) */
768 fins->op2 = (IRRef1)lj_ir_kint(J, k);
769 return RETRYFOLD;
770 }
771 if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&31) */
772 fins->o = IR_BROL;
773 fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&31);
774 return RETRYFOLD;
775 }
776 return NEXTFOLD;
777}
778
779LJFOLD(BSHL any BAND)
780LJFOLD(BSHR any BAND)
781LJFOLD(BSAR any BAND)
782LJFOLD(BROL any BAND)
783LJFOLD(BROR any BAND)
784LJFOLDF(simplify_shift_andk)
785{
786#if LJ_TARGET_MASKEDSHIFT
787 IRIns *irk = IR(fright->op2);
788 PHIBARRIER(fright);
789 if (irk->o == IR_KINT) { /* i o (j & 31) ==> i o j */
790 int32_t k = irk->i & 31;
791 if (k == 31) {
792 fins->op2 = fright->op1;
793 return RETRYFOLD;
794 }
795 }
796#endif
797 return NEXTFOLD;
798}
799
800LJFOLD(BSHL KINT any)
801LJFOLD(BSHR KINT any)
802LJFOLDF(simplify_shift1_ki)
803{
804 if (fleft->i == 0) /* 0 o i ==> 0 */
805 return LEFTFOLD;
806 return NEXTFOLD;
807}
808
809LJFOLD(BSAR KINT any)
810LJFOLD(BROL KINT any)
811LJFOLD(BROR KINT any)
812LJFOLDF(simplify_shift2_ki)
813{
814 if (fleft->i == 0 || fleft->i == -1) /* 0 o i ==> 0; -1 o i ==> -1 */
815 return LEFTFOLD;
816 return NEXTFOLD;
817}
818
819/* -- Reassociation ------------------------------------------------------- */
820
821LJFOLD(ADD ADD KINT)
822LJFOLD(BAND BAND KINT)
823LJFOLD(BOR BOR KINT)
824LJFOLD(BXOR BXOR KINT)
825LJFOLDF(reassoc_intarith_k)
826{
827 IRIns *irk = IR(fleft->op2);
828 if (irk->o == IR_KINT) {
829 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
830 if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
831 return LEFTFOLD;
832 PHIBARRIER(fleft);
833 fins->op1 = fleft->op1;
834 fins->op2 = (IRRef1)lj_ir_kint(J, k);
835 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
836 }
837 return NEXTFOLD;
838}
839
840LJFOLD(MIN MIN any)
841LJFOLD(MAX MAX any)
842LJFOLD(BAND BAND any)
843LJFOLD(BOR BOR any)
844LJFOLDF(reassoc_dup)
845{
846 PHIBARRIER(fleft);
847 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
848 return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
849 return NEXTFOLD;
850}
851
852LJFOLD(BXOR BXOR any)
853LJFOLDF(reassoc_bxor)
854{
855 PHIBARRIER(fleft);
856 if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */
857 return fleft->op2;
858 if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */
859 return fleft->op1;
860 return NEXTFOLD;
861}
862
863LJFOLD(BSHL BSHL KINT)
864LJFOLD(BSHR BSHR KINT)
865LJFOLD(BSAR BSAR KINT)
866LJFOLD(BROL BROL KINT)
867LJFOLD(BROR BROR KINT)
868LJFOLDF(reassoc_shift)
869{
870 IRIns *irk = IR(fleft->op2);
871 PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
872 if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
873 int32_t k = (irk->i & 31) + (fright->i & 31);
874 if (k > 31) { /* Combined shift too wide? */
875 if (fins->o == IR_BSHL || fins->o == IR_BSHR)
876 return INTFOLD(0);
877 else if (fins->o == IR_BSAR)
878 k = 31;
879 else
880 k &= 31;
881 }
882 fins->op1 = fleft->op1;
883 fins->op2 = (IRRef1)lj_ir_kint(J, k);
884 return RETRYFOLD;
885 }
886 return NEXTFOLD;
887}
888
889LJFOLD(MIN MIN KNUM)
890LJFOLD(MAX MAX KNUM)
891LJFOLDF(reassoc_minmax_k)
892{
893 IRIns *irk = IR(fleft->op2);
894 if (irk->o == IR_KNUM) {
895 lua_Number a = ir_knum(irk)->n;
896 lua_Number b = knumright;
897 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
898 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
899 return LEFTFOLD;
900 PHIBARRIER(fleft);
901 fins->op1 = fleft->op1;
902 fins->op2 = (IRRef1)lj_ir_knum(J, y);
903 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
904 }
905 return NEXTFOLD;
906}
907
908LJFOLD(MIN MAX any)
909LJFOLD(MAX MIN any)
910LJFOLDF(reassoc_minmax_left)
911{
912 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
913 return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
914 return NEXTFOLD;
915}
916
917LJFOLD(MIN any MAX)
918LJFOLD(MAX any MIN)
919LJFOLDF(reassoc_minmax_right)
920{
921 if (fins->op1 == fright->op1 || fins->op1 == fright->op2)
922 return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
923 return NEXTFOLD;
924}
925
926/* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
927** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
928** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
929*/
930LJFOLD(ABC any ADD)
931LJFOLDF(reassoc_abc)
932{
933 if (irref_isk(fright->op2)) {
934 IRIns *add2 = IR(fright->op1);
935 if (add2->o == IR_ADD && irref_isk(add2->op2) &&
936 IR(fright->op2)->i == -IR(add2->op2)->i) {
937 IRRef ref = J->chain[IR_ABC];
938 IRRef lim = add2->op1;
939 if (fins->op1 > lim) lim = fins->op1;
940 while (ref > lim) {
941 IRIns *ir = IR(ref);
942 if (ir->op1 == fins->op1 && ir->op2 == add2->op1)
943 return DROPFOLD;
944 ref = ir->prev;
945 }
946 }
947 }
948 return NEXTFOLD;
949}
950
951/* -- Commutativity ------------------------------------------------------- */
952
953/* The refs of commutative ops are canonicalized. Lower refs go to the right.
954** Rationale behind this:
955** - It (also) moves constants to the right.
956** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
957** - It helps CSE to find more matches.
958** - The assembler generates better code with constants at the right.
959*/
960
961LJFOLD(ADD any any)
962LJFOLD(MUL any any)
963LJFOLD(ADDOV any any)
964LJFOLDF(comm_swap)
965{
966 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
967 IRRef1 tmp = fins->op1;
968 fins->op1 = fins->op2;
969 fins->op2 = tmp;
970 return RETRYFOLD;
971 }
972 return NEXTFOLD;
973}
974
975LJFOLD(EQ any any)
976LJFOLD(NE any any)
977LJFOLDF(comm_equal)
978{
979 /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
980 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
981 return CONDFOLD(fins->o == IR_EQ);
982 return comm_swap(J);
983}
984
985LJFOLD(LT any any)
986LJFOLD(GE any any)
987LJFOLD(LE any any)
988LJFOLD(GT any any)
989LJFOLD(ULT any any)
990LJFOLD(UGE any any)
991LJFOLD(ULE any any)
992LJFOLD(UGT any any)
993LJFOLDF(comm_comp)
994{
995 /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
996 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
997 return CONDFOLD(fins->o & 1);
998 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
999 IRRef1 tmp = fins->op1;
1000 fins->op1 = fins->op2;
1001 fins->op2 = tmp;
1002 fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
1003 return RETRYFOLD;
1004 }
1005 return NEXTFOLD;
1006}
1007
1008LJFOLD(BAND any any)
1009LJFOLD(BOR any any)
1010LJFOLD(MIN any any)
1011LJFOLD(MAX any any)
1012LJFOLDF(comm_dup)
1013{
1014 if (fins->op1 == fins->op2) /* x o x ==> x */
1015 return LEFTFOLD;
1016 return comm_swap(J);
1017}
1018
1019LJFOLD(BXOR any any)
1020LJFOLDF(comm_bxor)
1021{
1022 if (fins->op1 == fins->op2) /* i xor i ==> 0 */
1023 return INTFOLD(0);
1024 return comm_swap(J);
1025}
1026
1027/* -- Simplification of compound expressions ------------------------------ */
1028
1029static int32_t kfold_xload(IRIns *ir, const void *p)
1030{
1031#if !LJ_TARGET_X86ORX64
1032#error "Missing support for unaligned loads"
1033#endif
1034 switch (irt_type(ir->t)) {
1035 case IRT_I8: return (int32_t)*(int8_t *)p;
1036 case IRT_U8: return (int32_t)*(uint8_t *)p;
1037 case IRT_I16: return (int32_t)*(int16_t *)p;
1038 case IRT_U16: return (int32_t)*(uint16_t *)p;
1039 default: lua_assert(irt_isint(ir->t)); return (int32_t)*(int32_t *)p;
1040 }
1041}
1042
1043/* Turn: string.sub(str, a, b) == kstr
1044** into: string.byte(str, a) == string.byte(kstr, 1) etc.
1045** Note: this creates unaligned XLOADs!
1046*/
1047LJFOLD(EQ SNEW KGC)
1048LJFOLD(NE SNEW KGC)
1049LJFOLDF(merge_eqne_snew_kgc)
1050{
1051 GCstr *kstr = ir_kstr(fright);
1052 int32_t len = (int32_t)kstr->len;
1053 lua_assert(irt_isstr(fins->t));
1054 if (len <= 4) { /* Handle string lengths 0, 1, 2, 3, 4. */
1055 IROp op = (IROp)fins->o;
1056 IRRef strref = fleft->op1;
1057 lua_assert(IR(strref)->o == IR_STRREF);
1058 if (op == IR_EQ) {
1059 emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len));
1060 /* Caveat: fins/fleft/fright is no longer valid after emitir. */
1061 } else {
1062 /* NE is not expanded since this would need an OR of two conds. */
1063 if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */
1064 return NEXTFOLD;
1065 if (IR(fleft->op2)->i != len)
1066 return DROPFOLD;
1067 }
1068 if (len > 0) {
1069 /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
1070 uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, IRT_I8) :
1071 len == 2 ? IRT(IR_XLOAD, IRT_U16) :
1072 IRTI(IR_XLOAD));
1073 TRef tmp = emitir(ot, strref, len > 1 ? IRXLOAD_UNALIGNED : 0);
1074 TRef val = lj_ir_kint(J, kfold_xload(IR(tref_ref(tmp)), strdata(kstr)));
1075 if (len == 3)
1076 tmp = emitir(IRTI(IR_BAND), tmp,
1077 lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
1078 fins->op1 = (IRRef1)tmp;
1079 fins->op2 = (IRRef1)val;
1080 fins->ot = (IROpT)IRTGI(op);
1081 return RETRYFOLD;
1082 } else {
1083 return DROPFOLD;
1084 }
1085 }
1086 return NEXTFOLD;
1087}
1088
1089/* -- Loads --------------------------------------------------------------- */
1090
1091/* Loads cannot be folded or passed on to CSE in general.
1092** Alias analysis is needed to check for forwarding opportunities.
1093**
1094** Caveat: *all* loads must be listed here or they end up at CSE!
1095*/
1096
1097LJFOLD(ALOAD any)
1098LJFOLDX(lj_opt_fwd_aload)
1099
1100LJFOLD(HLOAD any)
1101LJFOLDX(lj_opt_fwd_hload)
1102
1103LJFOLD(ULOAD any)
1104LJFOLDX(lj_opt_fwd_uload)
1105
1106LJFOLD(TLEN any)
1107LJFOLDX(lj_opt_fwd_tlen)
1108
1109/* Upvalue refs are really loads, but there are no corresponding stores.
1110** So CSE is ok for them, except for UREFO across a GC step (see below).
1111** If the referenced function is const, its upvalue addresses are const, too.
1112** This can be used to improve CSE by looking for the same address,
1113** even if the upvalues originate from a different function.
1114*/
1115LJFOLD(UREFO KGC any)
1116LJFOLD(UREFC KGC any)
1117LJFOLDF(cse_uref)
1118{
1119 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1120 IRRef ref = J->chain[fins->o];
1121 GCfunc *fn = ir_kfunc(fleft);
1122 GCupval *uv = gco2uv(gcref(fn->l.uvptr[fins->op2]));
1123 while (ref > 0) {
1124 IRIns *ir = IR(ref);
1125 if (irref_isk(ir->op1)) {
1126 GCfunc *fn2 = ir_kfunc(IR(ir->op1));
1127 if (gco2uv(gcref(fn2->l.uvptr[ir->op2])) == uv) {
1128 if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
1129 break;
1130 return ref;
1131 }
1132 }
1133 ref = ir->prev;
1134 }
1135 }
1136 return EMITFOLD;
1137}
1138
1139/* We can safely FOLD/CSE array/hash refs and field loads, since there
1140** are no corresponding stores. But NEWREF may invalidate all of them.
1141** Lacking better disambiguation for table references, these optimizations
1142** are simply disabled across any NEWREF.
1143** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1144** FLOADs. And NEWREF itself is treated like a store (see below).
1145*/
1146LJFOLD(HREF any any)
1147LJFOLDF(cse_href)
1148{
1149 TRef tr = lj_opt_cse(J);
1150 return tref_ref(tr) < J->chain[IR_NEWREF] ? EMITFOLD : tr;
1151}
1152
1153LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
1154LJFOLDF(fload_tab_tnew_asize)
1155{
1156 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF])
1157 return INTFOLD(fleft->op1);
1158 return NEXTFOLD;
1159}
1160
1161LJFOLD(FLOAD TNEW IRFL_TAB_HMASK)
1162LJFOLDF(fload_tab_tnew_hmask)
1163{
1164 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF])
1165 return INTFOLD((1 << fleft->op2)-1);
1166 return NEXTFOLD;
1167}
1168
1169LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE)
1170LJFOLDF(fload_tab_tdup_asize)
1171{
1172 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF])
1173 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize);
1174 return NEXTFOLD;
1175}
1176
1177LJFOLD(FLOAD TDUP IRFL_TAB_HMASK)
1178LJFOLDF(fload_tab_tdup_hmask)
1179{
1180 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF])
1181 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask);
1182 return NEXTFOLD;
1183}
1184
1185LJFOLD(FLOAD any IRFL_TAB_ARRAY)
1186LJFOLD(FLOAD any IRFL_TAB_NODE)
1187LJFOLD(FLOAD any IRFL_TAB_ASIZE)
1188LJFOLD(FLOAD any IRFL_TAB_HMASK)
1189LJFOLDF(fload_tab_ah)
1190{
1191 TRef tr = lj_opt_cse(J);
1192 return tref_ref(tr) < J->chain[IR_NEWREF] ? EMITFOLD : tr;
1193}
1194
1195/* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
1196LJFOLD(FLOAD KGC IRFL_STR_LEN)
1197LJFOLDF(fload_str_len)
1198{
1199 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1200 return INTFOLD((int32_t)ir_kstr(fleft)->len);
1201 return NEXTFOLD;
1202}
1203
1204LJFOLD(FLOAD any IRFL_STR_LEN)
1205LJFOLDX(lj_opt_cse)
1206
1207/* All other field loads need alias analysis. */
1208LJFOLD(FLOAD any any)
1209LJFOLDX(lj_opt_fwd_fload)
1210
1211/* This is for LOOP only. Recording handles SLOADs internally. */
1212LJFOLD(SLOAD any any)
1213LJFOLDF(fwd_sload)
1214{
1215 lua_assert(J->slot[fins->op1] != 0);
1216 return J->slot[fins->op1];
1217}
1218
1219/* Strings are immutable, so we can safely FOLD/CSE an XLOAD of a string. */
1220LJFOLD(XLOAD STRREF any)
1221LJFOLDF(xload_str)
1222{
1223 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
1224 GCstr *str = ir_kstr(IR(fleft->op1));
1225 int32_t ofs = IR(fleft->op2)->i;
1226 lua_assert((MSize)ofs < str->len);
1227 lua_assert((MSize)(ofs + (1<<((fins->op2>>8)&3))) <= str->len);
1228 return INTFOLD(kfold_xload(fins, strdata(str)+ofs));
1229 }
1230 return CSEFOLD;
1231}
1232/* No XLOAD of non-strings (yet), so we don't need a (XLOAD any any) rule. */
1233
1234/* -- Write barriers ------------------------------------------------------ */
1235
1236/* Write barriers are amenable to CSE, but not across any incremental
1237** GC steps.
1238**
1239** The same logic applies to open upvalue references, because the stack
1240** may be resized during a GC step.
1241*/
1242LJFOLD(TBAR any)
1243LJFOLD(OBAR any any)
1244LJFOLD(UREFO any any)
1245LJFOLDF(barrier_tab)
1246{
1247 TRef tr = lj_opt_cse(J);
1248 if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */
1249 return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */
1250 return tr;
1251}
1252
1253LJFOLD(TBAR TNEW)
1254LJFOLD(TBAR TDUP)
1255LJFOLDF(barrier_tnew_tdup)
1256{
1257 /* New tables are always white and never need a barrier. */
1258 if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */
1259 return NEXTFOLD;
1260 return DROPFOLD;
1261}
1262
1263/* -- Stores and allocations ---------------------------------------------- */
1264
1265/* Stores and allocations cannot be folded or passed on to CSE in general.
1266** But some stores can be eliminated with dead-store elimination (DSE).
1267**
1268** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
1269*/
1270
1271LJFOLD(ASTORE any any)
1272LJFOLD(HSTORE any any)
1273LJFOLDX(lj_opt_dse_ahstore)
1274
1275LJFOLD(USTORE any any)
1276LJFOLDX(lj_opt_dse_ustore)
1277
1278LJFOLD(FSTORE any any)
1279LJFOLDX(lj_opt_dse_fstore)
1280
1281LJFOLD(NEWREF any any) /* Treated like a store. */
1282LJFOLD(TNEW any any)
1283LJFOLD(TDUP any)
1284LJFOLDF(store_raw)
1285{
1286 return EMITFOLD;
1287}
1288
1289/* ------------------------------------------------------------------------ */
1290
1291/* Every entry in the generated hash table is a 32 bit pattern:
1292**
1293** xxxxxxxx iiiiiiii llllllll rrrrrrrr
1294**
1295** xxxxxxxx = 8 bit index into fold function table
1296** iiiiiiii = 8 bit folded instruction opcode
1297** llllllll = 8 bit left instruction opcode
1298** rrrrrrrr = 8 bit right instruction opcode or 8 bits from literal field
1299*/
1300
1301#include "lj_folddef.h"
1302
1303/* ------------------------------------------------------------------------ */
1304
1305/* Fold IR instruction. */
1306TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
1307{
1308 uint32_t key, any;
1309 IRRef ref;
1310
1311 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
1312 lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
1313 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT);
1314 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
1315 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
1316 return lj_opt_cse(J);
1317
1318 /* Forwarding or CSE disabled? Emit raw IR for loads, except for SLOAD. */
1319 if ((J->flags & (JIT_F_OPT_FWD|JIT_F_OPT_CSE)) !=
1320 (JIT_F_OPT_FWD|JIT_F_OPT_CSE) &&
1321 irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD)
1322 return lj_ir_emit(J);
1323
1324 /* DSE disabled? Emit raw IR for stores. */
1325 if (!(J->flags & JIT_F_OPT_DSE) && irm_kind(lj_ir_mode[fins->o]) == IRM_S)
1326 return lj_ir_emit(J);
1327 }
1328
1329 /* Fold engine start/retry point. */
1330retry:
1331 /* Construct key from opcode and operand opcodes (unless literal/none). */
1332 key = ((uint32_t)fins->o << 16);
1333 if (fins->op1 >= J->cur.nk) {
1334 key += (uint32_t)IR(fins->op1)->o << 8;
1335 *fleft = *IR(fins->op1);
1336 }
1337 if (fins->op2 >= J->cur.nk) {
1338 key += (uint32_t)IR(fins->op2)->o;
1339 *fright = *IR(fins->op2);
1340 } else {
1341 key += (fins->op2 & 0xffu); /* For IRFPM_* and IRFL_*. */
1342 }
1343
1344 /* Check for a match in order from most specific to least specific. */
1345 any = 0;
1346 for (;;) {
1347 uint32_t k = key | any;
1348 uint32_t h = fold_hashkey(k);
1349 uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
1350 if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
1351 ref = (IRRef)tref_ref(fold_func[fh >> 24](J));
1352 if (ref != NEXTFOLD)
1353 break;
1354 }
1355 if (any == 0xffff) /* Exhausted folding. Pass on to CSE. */
1356 return lj_opt_cse(J);
1357 any = (any | (any >> 8)) ^ 0xff00;
1358 }
1359
1360 /* Return value processing, ordered by frequency. */
1361 if (LJ_LIKELY(ref >= MAX_FOLD))
1362 return TREF(ref, irt_t(IR(ref)->t));
1363 if (ref == RETRYFOLD)
1364 goto retry;
1365 if (ref == KINTFOLD)
1366 return lj_ir_kint(J, fins->i);
1367 if (ref == FAILFOLD)
1368 lj_trace_err(J, LJ_TRERR_GFAIL);
1369 lua_assert(ref == DROPFOLD);
1370 return REF_DROP;
1371}
1372
1373/* -- Common-Subexpression Elimination ------------------------------------ */
1374
1375/* CSE an IR instruction. This is very fast due to the skip-list chains. */
1376TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
1377{
1378 /* Avoid narrow to wide store-to-load forwarding stall */
1379 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
1380 IROp op = fins->o;
1381 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1382 /* Limited search for same operands in per-opcode chain. */
1383 IRRef ref = J->chain[op];
1384 IRRef lim = fins->op1;
1385 if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */
1386 while (ref > lim) {
1387 if (IR(ref)->op12 == op12)
1388 return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */
1389 ref = IR(ref)->prev;
1390 }
1391 }
1392 /* Otherwise emit IR (inlined for speed). */
1393 {
1394 IRRef ref = lj_ir_nextins(J);
1395 IRIns *ir = IR(ref);
1396 ir->prev = J->chain[op];
1397 ir->op12 = op12;
1398 J->chain[op] = (IRRef1)ref;
1399 ir->o = fins->o;
1400 J->guardemit.irt |= fins->t.irt;
1401 return TREF(ref, irt_t((ir->t = fins->t)));
1402 }
1403}
1404
1405/* ------------------------------------------------------------------------ */
1406
1407#undef IR
1408#undef fins
1409#undef fleft
1410#undef fright
1411#undef knumleft
1412#undef knumright
1413#undef emitir
1414
1415#endif
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
new file mode 100644
index 00000000..adc0c476
--- /dev/null
+++ b/src/lj_opt_loop.c
@@ -0,0 +1,358 @@
1/*
2** LOOP: Loop Optimizations.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_opt_loop_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASJIT
12
13#include "lj_gc.h"
14#include "lj_err.h"
15#include "lj_str.h"
16#include "lj_ir.h"
17#include "lj_jit.h"
18#include "lj_iropt.h"
19#include "lj_trace.h"
20#include "lj_snap.h"
21#include "lj_vm.h"
22
23/* Loop optimization:
24**
25** Traditional Loop-Invariant Code Motion (LICM) splits the instructions
26** of a loop into invariant and variant instructions. The invariant
27** instructions are hoisted out of the loop and only the variant
28** instructions remain inside the loop body.
29**
30** Unfortunately LICM is mostly useless for compiling dynamic languages.
31** The IR has many guards and most of the subsequent instructions are
32** control-dependent on them. The first non-hoistable guard would
33** effectively prevent hoisting of all subsequent instructions.
34**
35** That's why we use a special form of unrolling using copy-substitution,
36** combined with redundancy elimination:
37**
38** The recorded instruction stream is re-emitted to the compiler pipeline
39** with substituted operands. The substitution table is filled with the
40** refs returned by re-emitting each instruction. This can be done
41** on-the-fly, because the IR is in strict SSA form, where every ref is
42** defined before its use.
43**
44** This aproach generates two code sections, separated by the LOOP
45** instruction:
46**
47** 1. The recorded instructions form a kind of pre-roll for the loop. It
48** contains a mix of invariant and variant instructions and performs
49** exactly one loop iteration (but not necessarily the 1st iteration).
50**
51** 2. The loop body contains only the variant instructions and performs
52** all remaining loop iterations.
53**
54** On first sight that looks like a waste of space, because the variant
55** instructions are present twice. But the key insight is that the
56** pre-roll honors the control-dependencies for *both* the pre-roll itself
57** *and* the loop body!
58**
59** It also means one doesn't have to explicitly model control-dependencies
60** (which, BTW, wouldn't help LICM much). And it's much easier to
61** integrate sparse snapshotting with this approach.
62**
63** One of the nicest aspects of this approach is that all of the
64** optimizations of the compiler pipeline (FOLD, CSE, FWD, etc.) can be
65** reused with only minor restrictions (e.g. one should not fold
66** instructions across loop-carried dependencies).
67**
68** But in general all optimizations can be applied which only need to look
69** backwards into the generated instruction stream. At any point in time
70** during the copy-substitution process this contains both a static loop
71** iteration (the pre-roll) and a dynamic one (from the to-be-copied
72** instruction up to the end of the partial loop body).
73**
74** Since control-dependencies are implicitly kept, CSE also applies to all
75** kinds of guards. The major advantage is that all invariant guards can
76** be hoisted, too.
77**
78** Load/store forwarding works across loop iterations, too. This is
79** important if loop-carried dependencies are kept in upvalues or tables.
80** E.g. 'self.idx = self.idx + 1' deep down in some OO-style method may
81** become a forwarded loop-recurrence after inlining.
82**
83** Since the IR is in SSA form, loop-carried dependencies have to be
84** modeled with PHI instructions. The potential candidates for PHIs are
85** collected on-the-fly during copy-substitution. After eliminating the
86** redundant ones, PHI instructions are emitted *below* the loop body.
87**
88** Note that this departure from traditional SSA form doesn't change the
89** semantics of the PHI instructions themselves. But it greatly simplifies
90** on-the-fly generation of the IR and the machine code.
91*/
92
93/* Some local macros to save typing. Undef'd at the end. */
94#define IR(ref) (&J->cur.ir[(ref)])
95
96/* Pass IR on to next optimization in chain (FOLD). */
97#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
98
99/* Emit raw IR without passing through optimizations. */
100#define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
101
102/* -- PHI elimination ----------------------------------------------------- */
103
104/* Emit or eliminate collected PHIs. */
105static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi)
106{
107 int pass2 = 0;
108 IRRef i, nslots;
109 IRRef invar = J->chain[IR_LOOP];
110 /* Pass #1: mark redundant and potentially redundant PHIs. */
111 for (i = 0; i < nphi; i++) {
112 IRRef lref = phi[i];
113 IRRef rref = subst[lref];
114 if (lref == rref || rref == REF_DROP) { /* Invariants are redundant. */
115 irt_setmark(IR(lref)->t);
116 } else if (!(IR(rref)->op1 == lref || IR(rref)->op2 == lref)) {
117 /* Quick check for simple recurrences failed, need pass2. */
118 irt_setmark(IR(lref)->t);
119 pass2 = 1;
120 }
121 }
122 /* Pass #2: traverse variant part and clear marks of non-redundant PHIs. */
123 if (pass2) {
124 for (i = J->cur.nins-1; i > invar; i--) {
125 IRIns *ir = IR(i);
126 if (!irref_isk(ir->op1)) irt_clearmark(IR(ir->op1)->t);
127 if (!irref_isk(ir->op2)) irt_clearmark(IR(ir->op2)->t);
128 }
129 }
130 /* Pass #3: add PHIs for variant slots without a corresponding SLOAD. */
131 nslots = J->baseslot+J->maxslot;
132 for (i = 1; i < nslots; i++) {
133 IRRef ref = tref_ref(J->slot[i]);
134 if (!irref_isk(ref) && ref != subst[ref]) {
135 IRIns *ir = IR(ref);
136 irt_clearmark(ir->t); /* Unmark potential uses, too. */
137 if (!irt_isphi(ir->t) && !irt_ispri(ir->t)) {
138 irt_setphi(ir->t);
139 if (nphi >= LJ_MAX_PHI)
140 lj_trace_err(J, LJ_TRERR_PHIOV);
141 phi[nphi++] = (IRRef1)ref;
142 }
143 }
144 }
145 /* Pass #4: emit PHI instructions or eliminate PHIs. */
146 for (i = 0; i < nphi; i++) {
147 IRRef lref = phi[i];
148 IRIns *ir = IR(lref);
149 if (!irt_ismarked(ir->t)) { /* Emit PHI if not marked. */
150 IRRef rref = subst[lref];
151 if (rref > invar)
152 irt_setphi(IR(rref)->t);
153 emitir_raw(IRT(IR_PHI, irt_type(ir->t)), lref, rref);
154 } else { /* Otherwise eliminate PHI. */
155 irt_clearmark(ir->t);
156 irt_clearphi(ir->t);
157 }
158 }
159}
160
161/* -- Loop unrolling using copy-substitution ------------------------------ */
162
163/* Unroll loop. */
164static void loop_unroll(jit_State *J)
165{
166 IRRef1 phi[LJ_MAX_PHI];
167 uint32_t nphi = 0;
168 IRRef1 *subst;
169 SnapShot *osnap, *snap;
170 IRRef2 *loopmap;
171 BCReg loopslots;
172 MSize nsnap, nsnapmap;
173 IRRef ins, invar, osnapref;
174
175 /* Use temp buffer for substitution table.
176 ** Only non-constant refs in [REF_BIAS,invar) are valid indexes.
177 ** Note: don't call into the VM or run the GC or the buffer may be gone.
178 */
179 invar = J->cur.nins;
180 subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf,
181 (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS;
182 subst[REF_BASE] = REF_BASE;
183
184 /* LOOP separates the pre-roll from the loop body. */
185 emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0);
186
187 /* Ensure size for copy-substituted snapshots (minus #0 and loop snapshot). */
188 nsnap = J->cur.nsnap;
189 if (LJ_UNLIKELY(2*nsnap-2 > J->sizesnap)) {
190 MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
191 if (2*nsnap-2 > maxsnap)
192 lj_trace_err(J, LJ_TRERR_SNAPOV);
193 lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
194 J->cur.snap = J->snapbuf;
195 }
196 nsnapmap = J->cur.nsnapmap; /* Use temp. copy to avoid undo. */
197 if (LJ_UNLIKELY(nsnapmap*2 > J->sizesnapmap)) {
198 J->snapmapbuf = (IRRef2 *)lj_mem_realloc(J->L, J->snapmapbuf,
199 J->sizesnapmap*sizeof(IRRef2),
200 2*J->sizesnapmap*sizeof(IRRef2));
201 J->cur.snapmap = J->snapmapbuf;
202 J->sizesnapmap *= 2;
203 }
204
205 /* The loop snapshot is used for fallback substitutions. */
206 snap = &J->cur.snap[nsnap-1];
207 loopmap = &J->cur.snapmap[snap->mapofs];
208 loopslots = snap->nslots;
209 /* The PC of snapshot #0 and the loop snapshot must match. */
210 lua_assert(loopmap[loopslots] == J->cur.snapmap[J->cur.snap[0].nslots]);
211
212 /* Start substitution with snapshot #1 (#0 is empty for root traces). */
213 osnap = &J->cur.snap[1];
214 osnapref = osnap->ref;
215
216 /* Copy and substitute all recorded instructions and snapshots. */
217 for (ins = REF_FIRST; ins < invar; ins++) {
218 IRIns *ir;
219 IRRef op1, op2;
220
221 /* Copy-substitute snapshot. */
222 if (ins >= osnapref) {
223 IRRef2 *nmap, *omap = &J->cur.snapmap[osnap->mapofs];
224 BCReg s, nslots;
225 uint32_t nmapofs, nframelinks;
226 if (irt_isguard(J->guardemit)) { /* Guard inbetween? */
227 nmapofs = nsnapmap;
228 snap++; /* Add new snapshot. */
229 } else {
230 nmapofs = snap->mapofs; /* Overwrite previous snapshot. */
231 }
232 J->guardemit.irt = 0;
233 nslots = osnap->nslots;
234 nframelinks = osnap->nframelinks;
235 snap->mapofs = (uint16_t)nmapofs;
236 snap->ref = (IRRef1)J->cur.nins;
237 snap->nslots = (uint8_t)nslots;
238 snap->nframelinks = (uint8_t)nframelinks;
239 snap->count = 0;
240 osnap++;
241 osnapref = osnap->ref;
242 nsnapmap = nmapofs + nslots + nframelinks;
243 nmap = &J->cur.snapmap[nmapofs];
244 /* Substitute snapshot slots. */
245 for (s = 0; s < nslots; s++) {
246 IRRef ref = snap_ref(omap[s]);
247 if (ref) {
248 if (!irref_isk(ref))
249 ref = subst[ref];
250 } else if (s < loopslots) {
251 ref = loopmap[s];
252 }
253 nmap[s] = ref;
254 }
255 /* Copy frame links. */
256 nmap += nslots;
257 omap += nslots;
258 for (s = 0; s < nframelinks; s++)
259 nmap[s] = omap[s];
260 }
261
262 /* Substitute instruction operands. */
263 ir = IR(ins);
264 op1 = ir->op1;
265 if (!irref_isk(op1)) op1 = subst[op1];
266 op2 = ir->op2;
267 if (!irref_isk(op2)) op2 = subst[op2];
268 if (irm_kind(lj_ir_mode[ir->o]) == IRM_N &&
269 op1 == ir->op1 && op2 == ir->op2) { /* Regular invariant ins? */
270 subst[ins] = (IRRef1)ins; /* Shortcut. */
271 } else {
272 /* Re-emit substituted instruction to the FOLD/CSE/etc. pipeline. */
273 IRType1 t = ir->t; /* Get this first, since emitir may invalidate ir. */
274 IRRef ref = tref_ref(emitir(ir->ot & ~IRT_ISPHI, op1, op2));
275 subst[ins] = (IRRef1)ref;
276 if (ref != ins && ref < invar) { /* Loop-carried dependency? */
277 IRIns *irr = IR(ref);
278 /* Potential PHI? */
279 if (!irref_isk(ref) && !irt_isphi(irr->t) && !irt_ispri(irr->t)) {
280 irt_setphi(irr->t);
281 if (nphi >= LJ_MAX_PHI)
282 lj_trace_err(J, LJ_TRERR_PHIOV);
283 phi[nphi++] = (IRRef1)ref;
284 }
285 /* Check all loop-carried dependencies for type instability. */
286 if (!irt_sametype(t, irr->t)) {
287 if (irt_isnum(t) && irt_isinteger(irr->t)) /* Fix int->num case. */
288 subst[ins] = tref_ref(emitir(IRTN(IR_TONUM), ref, 0));
289 else
290 lj_trace_err(J, LJ_TRERR_TYPEINS);
291 }
292 }
293 }
294 }
295 if (irt_isguard(J->guardemit)) { /* Guard inbetween? */
296 J->cur.nsnapmap = (uint16_t)nsnapmap;
297 snap++;
298 } else {
299 J->cur.nsnapmap = (uint16_t)snap->mapofs; /* Last snapshot is redundant. */
300 }
301 J->cur.nsnap = (uint16_t)(snap - J->cur.snap);
302 lua_assert(J->cur.nsnapmap <= J->sizesnapmap);
303
304 loop_emit_phi(J, subst, phi, nphi);
305}
306
307/* Undo any partial changes made by the loop optimization. */
308static void loop_undo(jit_State *J, IRRef ins)
309{
310 lj_ir_rollback(J, ins);
311 for (ins--; ins >= REF_FIRST; ins--) { /* Remove flags. */
312 IRIns *ir = IR(ins);
313 irt_clearphi(ir->t);
314 irt_clearmark(ir->t);
315 }
316}
317
318/* Protected callback for loop optimization. */
319static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud)
320{
321 UNUSED(L); UNUSED(dummy);
322 loop_unroll((jit_State *)ud);
323 return NULL;
324}
325
326/* Loop optimization. */
327int lj_opt_loop(jit_State *J)
328{
329 IRRef nins = J->cur.nins;
330 int errcode = lj_vm_cpcall(J->L, cploop_opt, NULL, J);
331 if (LJ_UNLIKELY(errcode)) {
332 lua_State *L = J->L;
333 if (errcode == LUA_ERRRUN && tvisnum(L->top-1)) { /* Trace error? */
334 int32_t e = lj_num2int(numV(L->top-1));
335 switch ((TraceError)e) {
336 case LJ_TRERR_TYPEINS: /* Type instability. */
337 case LJ_TRERR_GFAIL: /* Guard would always fail. */
338 /* Unrolling via recording fixes many cases, e.g. a flipped boolean. */
339 if (--J->instunroll < 0) /* But do not unroll forever. */
340 break;
341 L->top--; /* Remove error object. */
342 J->guardemit.irt = 0;
343 loop_undo(J, nins);
344 return 1; /* Loop optimization failed, continue recording. */
345 default:
346 break;
347 }
348 }
349 lj_err_throw(L, errcode); /* Propagate all other errors. */
350 }
351 return 0; /* Loop optimization is ok. */
352}
353
354#undef IR
355#undef emitir
356#undef emitir_raw
357
358#endif
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c
new file mode 100644
index 00000000..77a9c0e7
--- /dev/null
+++ b/src/lj_opt_mem.c
@@ -0,0 +1,550 @@
1/*
2** Memory access optimizations.
3** AA: Alias Analysis using high-level semantic disambiguation.
4** FWD: Load Forwarding (L2L) + Store Forwarding (S2L).
5** DSE: Dead-Store Elimination.
6** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
7*/
8
9#define lj_opt_mem_c
10#define LUA_CORE
11
12#include "lj_obj.h"
13
14#if LJ_HASJIT
15
16#include "lj_tab.h"
17#include "lj_ir.h"
18#include "lj_jit.h"
19#include "lj_iropt.h"
20
21/* Some local macros to save typing. Undef'd at the end. */
22#define IR(ref) (&J->cur.ir[(ref)])
23#define fins (&J->fold.ins)
24
25/*
26** Caveat #1: return value is not always a TRef -- only use with tref_ref().
27** Caveat #2: FWD relies on active CSE for xREF operands -- see lj_opt_fold().
28*/
29
30/* Return values from alias analysis. */
31typedef enum {
32 ALIAS_NO, /* The two refs CANNOT alias (exact). */
33 ALIAS_MAY, /* The two refs MAY alias (inexact). */
34 ALIAS_MUST /* The two refs MUST alias (exact). */
35} AliasRet;
36
37/* -- ALOAD/HLOAD forwarding and ASTORE/HSTORE elimination ---------------- */
38
39/* Alias analysis for array and hash access using key-based disambiguation. */
40static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb)
41{
42 IRRef ka = refa->op2;
43 IRRef kb = refb->op2;
44 IRIns *keya, *keyb;
45 if (refa == refb)
46 return ALIAS_MUST; /* Shortcut for same refs. */
47 keya = IR(ka);
48 if (keya->o == IR_KSLOT) { ka = keya->op1; keya = IR(ka); }
49 keyb = IR(kb);
50 if (keyb->o == IR_KSLOT) { kb = keyb->op1; keyb = IR(kb); }
51 if (ka == kb) {
52 /* Same key. Check for same table with different ref (NEWREF vs. HREF). */
53 IRIns *ta = refa;
54 IRIns *tb = refb;
55 if (ta->o == IR_HREFK || ta->o == IR_AREF) ta = IR(ta->op1);
56 if (tb->o == IR_HREFK || tb->o == IR_AREF) tb = IR(tb->op1);
57 if (ta->op1 == tb->op1)
58 return ALIAS_MUST; /* Same key, same table. */
59 else
60 return ALIAS_MAY; /* Same key, possibly different table. */
61 }
62 if (irref_isk(ka) && irref_isk(kb))
63 return ALIAS_NO; /* Different constant keys. */
64 if (refa->o == IR_AREF) {
65 /* Disambiguate array references based on index arithmetic. */
66 lua_assert(refb->o == IR_AREF);
67 if (refa->op1 == refb->op1) {
68 /* Same table, different non-const array keys. */
69 int32_t ofsa = 0, ofsb = 0;
70 IRRef basea = ka, baseb = kb;
71 /* Gather base and offset from t[base] or t[base+-ofs]. */
72 if (keya->o == IR_ADD && irref_isk(keya->op2)) {
73 basea = keya->op1;
74 ofsa = IR(keya->op2)->i;
75 if (basea == kb && ofsa != 0)
76 return ALIAS_NO; /* t[base+-ofs] vs. t[base]. */
77 }
78 if (keyb->o == IR_ADD && irref_isk(keyb->op2)) {
79 baseb = keyb->op1;
80 ofsb = IR(keyb->op2)->i;
81 if (ka == baseb && ofsb != 0)
82 return ALIAS_NO; /* t[base] vs. t[base+-ofs]. */
83 }
84 if (basea == baseb && ofsa != ofsb)
85 return ALIAS_NO; /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */
86 }
87 } else {
88 /* Disambiguate hash references based on the type of their keys. */
89 lua_assert((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) &&
90 (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF));
91 if (!irt_sametype(keya->t, keyb->t))
92 return ALIAS_NO; /* Different key types. */
93 }
94 return ALIAS_MAY; /* Anything else: we just don't know. */
95}
96
97/* Array and hash load forwarding. */
98static TRef fwd_ahload(jit_State *J, IRRef xref)
99{
100 IRIns *xr = IR(xref);
101 IRRef lim = xref; /* Search limit. */
102 IRRef ref;
103
104 /* Search for conflicting stores. */
105 ref = J->chain[fins->o+IRDELTA_L2S];
106 while (ref > xref) {
107 IRIns *store = IR(ref);
108 switch (aa_ahref(J, xr, IR(store->op1))) {
109 case ALIAS_NO: break; /* Continue searching. */
110 case ALIAS_MAY: lim = ref; goto conflict; /* Limit search for load. */
111 case ALIAS_MUST: return store->op2; /* Store forwarding. */
112 }
113 ref = store->prev;
114 }
115
116 /* No conflicting store (yet): const-fold loads from allocations. */
117 {
118 IRIns *ir = (xr->o == IR_HREFK || xr->o == IR_AREF) ? IR(xr->op1) : xr;
119 IRRef tab = ir->op1;
120 ir = IR(tab);
121 if (ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) {
122 /* A NEWREF with a number key may end up pointing to the array part.
123 ** But it's referenced from HSTORE and not found in the ASTORE chain.
124 ** For now simply consider this a conflict without forwarding anything.
125 */
126 if (xr->o == IR_AREF) {
127 IRRef ref2 = J->chain[IR_NEWREF];
128 while (ref2 > tab) {
129 IRIns *newref = IR(ref2);
130 if (irt_isnum(IR(newref->op2)->t))
131 goto conflict;
132 ref2 = newref->prev;
133 }
134 }
135 /* NEWREF inhibits CSE for HREF, and dependent FLOADs from HREFK/AREF.
136 ** But the above search for conflicting stores was limited by xref.
137 ** So continue searching, limited by the TNEW/TDUP. Store forwarding
138 ** is ok, too. A conflict does NOT limit the search for a matching load.
139 */
140 while (ref > tab) {
141 IRIns *store = IR(ref);
142 switch (aa_ahref(J, xr, IR(store->op1))) {
143 case ALIAS_NO: break; /* Continue searching. */
144 case ALIAS_MAY: goto conflict; /* Conflicting store. */
145 case ALIAS_MUST: return store->op2; /* Store forwarding. */
146 }
147 ref = store->prev;
148 }
149 lua_assert(ir->o != IR_TNEW || irt_isnil(fins->t));
150 if (irt_ispri(fins->t)) {
151 return TREF_PRI(irt_type(fins->t));
152 } else if (irt_isnum(fins->t) || irt_isstr(fins->t)) {
153 TValue keyv;
154 cTValue *tv;
155 IRIns *key = IR(xr->op2);
156 if (key->o == IR_KSLOT) key = IR(key->op1);
157 lj_ir_kvalue(J->L, &keyv, key);
158 tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv);
159 lua_assert(itype2irt(tv) == irt_type(fins->t));
160 if (irt_isnum(fins->t))
161 return lj_ir_knum_nn(J, tv->u64);
162 else
163 return lj_ir_kstr(J, strV(tv));
164 }
165 /* Othwerwise: don't intern as a constant. */
166 }
167 }
168
169conflict:
170 /* Try to find a matching load. Below the conflicting store, if any. */
171 ref = J->chain[fins->o];
172 while (ref > lim) {
173 IRIns *load = IR(ref);
174 if (load->op1 == xref)
175 return ref; /* Load forwarding. */
176 ref = load->prev;
177 }
178 return 0; /* Conflict or no match. */
179}
180
181/* Reassociate ALOAD across PHIs to handle t[i-1] forwarding case. */
182static TRef fwd_aload_reassoc(jit_State *J)
183{
184 IRIns *irx = IR(fins->op1);
185 IRIns *key = IR(irx->op2);
186 if (key->o == IR_ADD && irref_isk(key->op2)) {
187 IRIns *add2 = IR(key->op1);
188 if (add2->o == IR_ADD && irref_isk(add2->op2) &&
189 IR(key->op2)->i == -IR(add2->op2)->i) {
190 IRRef ref = J->chain[IR_AREF];
191 IRRef lim = add2->op1;
192 if (irx->op1 > lim) lim = irx->op1;
193 while (ref > lim) {
194 IRIns *ir = IR(ref);
195 if (ir->op1 == irx->op1 && ir->op2 == add2->op1)
196 return fwd_ahload(J, ref);
197 ref = ir->prev;
198 }
199 }
200 }
201 return 0;
202}
203
204/* ALOAD forwarding. */
205TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J)
206{
207 IRRef ref;
208 if ((ref = fwd_ahload(J, fins->op1)) ||
209 (ref = fwd_aload_reassoc(J)))
210 return ref;
211 return EMITFOLD;
212}
213
214/* HLOAD forwarding. */
215TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J)
216{
217 IRRef ref = fwd_ahload(J, fins->op1);
218 if (ref)
219 return ref;
220 return EMITFOLD;
221}
222
223/* ASTORE/HSTORE elimination. */
224TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J)
225{
226 IRRef xref = fins->op1; /* xREF reference. */
227 IRRef val = fins->op2; /* Stored value reference. */
228 IRIns *xr = IR(xref);
229 IRRef1 *refp = &J->chain[fins->o];
230 IRRef ref = *refp;
231 while (ref > xref) { /* Search for redundant or conflicting stores. */
232 IRIns *store = IR(ref);
233 switch (aa_ahref(J, xr, IR(store->op1))) {
234 case ALIAS_NO:
235 break; /* Continue searching. */
236 case ALIAS_MAY: /* Store to MAYBE the same location. */
237 if (store->op2 != val) /* Conflict if the value is different. */
238 goto doemit;
239 break; /* Otherwise continue searching. */
240 case ALIAS_MUST: /* Store to the same location. */
241 if (store->op2 == val) /* Same value: drop the new store. */
242 return DROPFOLD;
243 /* Different value: try to eliminate the redundant store. */
244 if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */
245 IRIns *ir;
246 /* Check for any intervening guards (includes conflicting loads). */
247 for (ir = IR(J->cur.nins-1); ir > store; ir--)
248 if (irt_isguard(ir->t))
249 goto doemit; /* No elimination possible. */
250 /* Remove redundant store from chain and replace with NOP. */
251 *refp = store->prev;
252 store->o = IR_NOP; /* Unchained NOP -- does anybody care? */
253 store->t.irt = IRT_NIL;
254 store->op1 = store->op2 = 0;
255 store->prev = 0;
256 /* Now emit the new store instead. */
257 }
258 goto doemit;
259 }
260 ref = *(refp = &store->prev);
261 }
262doemit:
263 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
264}
265
266/* -- ULOAD forwarding ---------------------------------------------------- */
267
268/* The current alias analysis for upvalues is very simplistic. It only
269** disambiguates between the unique upvalues of the same function.
270** This is good enough for now, since most upvalues are read-only.
271**
272** A more precise analysis would be feasible with the help of the parser:
273** generate a unique key for every upvalue, even across all prototypes.
274** Lacking a realistic use-case, it's unclear whether this is beneficial.
275*/
276static AliasRet aa_uref(IRIns *refa, IRIns *refb)
277{
278 if (refa->o != refb->o)
279 return ALIAS_NO; /* Different UREFx type. */
280 if (refa->op1 != refb->op1)
281 return ALIAS_MAY; /* Different function. */
282 else if (refa->op2 == refb->op2)
283 return ALIAS_MUST; /* Same function, same upvalue idx. */
284 else
285 return ALIAS_NO; /* Same function, different upvalue idx. */
286}
287
288/* ULOAD forwarding. */
289TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J)
290{
291 IRRef uref = fins->op1;
292 IRRef lim = uref; /* Search limit. */
293 IRIns *xr = IR(uref);
294 IRRef ref;
295
296 /* Search for conflicting stores. */
297 ref = J->chain[IR_USTORE];
298 while (ref > uref) {
299 IRIns *store = IR(ref);
300 switch (aa_uref(xr, IR(store->op1))) {
301 case ALIAS_NO: break; /* Continue searching. */
302 case ALIAS_MAY: lim = ref; goto conflict; /* Limit search for load. */
303 case ALIAS_MUST: return store->op2; /* Store forwarding. */
304 }
305 ref = store->prev;
306 }
307
308conflict:
309 /* Try to find a matching load. Below the conflicting store, if any. */
310 ref = J->chain[IR_ULOAD];
311 while (ref > lim) {
312 IRIns *load = IR(ref);
313 if (load->op1 == uref)
314 return ref; /* Load forwarding. */
315 ref = load->prev;
316 }
317 return EMITFOLD; /* Conflict or no match. */
318}
319
320/* USTORE elimination. */
321TRef LJ_FASTCALL lj_opt_dse_ustore(jit_State *J)
322{
323 IRRef xref = fins->op1; /* xREF reference. */
324 IRRef val = fins->op2; /* Stored value reference. */
325 IRIns *xr = IR(xref);
326 IRRef1 *refp = &J->chain[IR_USTORE];
327 IRRef ref = *refp;
328 while (ref > xref) { /* Search for redundant or conflicting stores. */
329 IRIns *store = IR(ref);
330 switch (aa_uref(xr, IR(store->op1))) {
331 case ALIAS_NO:
332 break; /* Continue searching. */
333 case ALIAS_MAY: /* Store to MAYBE the same location. */
334 if (store->op2 != val) /* Conflict if the value is different. */
335 goto doemit;
336 break; /* Otherwise continue searching. */
337 case ALIAS_MUST: /* Store to the same location. */
338 if (store->op2 == val) /* Same value: drop the new store. */
339 return DROPFOLD;
340 /* Different value: try to eliminate the redundant store. */
341 if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */
342 IRIns *ir;
343 /* Check for any intervening guards (includes conflicting loads). */
344 for (ir = IR(J->cur.nins-1); ir > store; ir--)
345 if (irt_isguard(ir->t))
346 goto doemit; /* No elimination possible. */
347 /* Remove redundant store from chain and replace with NOP. */
348 *refp = store->prev;
349 store->o = IR_NOP; /* Unchained NOP -- does anybody care? */
350 store->t.irt = IRT_NIL;
351 store->op1 = store->op2 = 0;
352 store->prev = 0;
353 /* Now emit the new store instead. */
354 }
355 goto doemit;
356 }
357 ref = *(refp = &store->prev);
358 }
359doemit:
360 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
361}
362
363/* -- FLOAD forwarding and FSTORE elimination ----------------------------- */
364
365/* Alias analysis for field access.
366** Field loads are cheap and field stores are rare.
367** Simple disambiguation based on field types is good enough.
368*/
369static AliasRet aa_fref(IRIns *refa, IRIns *refb)
370{
371 if (refa->op2 != refb->op2)
372 return ALIAS_NO; /* Different fields. */
373 if (refa->op1 == refb->op1)
374 return ALIAS_MUST; /* Same field, same object. */
375 else
376 return ALIAS_MAY; /* Same field, possibly different object. */
377}
378
379/* Only the loads for mutable fields end up here (see FOLD). */
380TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J)
381{
382 IRRef oref = fins->op1; /* Object reference. */
383 IRRef fid = fins->op2; /* Field ID. */
384 IRRef lim = oref; /* Search limit. */
385 IRRef ref;
386
387 /* Search for conflicting stores. */
388 ref = J->chain[IR_FSTORE];
389 while (ref > oref) {
390 IRIns *store = IR(ref);
391 switch (aa_fref(fins, IR(store->op1))) {
392 case ALIAS_NO: break; /* Continue searching. */
393 case ALIAS_MAY: lim = ref; goto conflict; /* Limit search for load. */
394 case ALIAS_MUST: return store->op2; /* Store forwarding. */
395 }
396 ref = store->prev;
397 }
398
399 /* No conflicting store: const-fold field loads from allocations. */
400 if (fid == IRFL_TAB_META) {
401 IRIns *ir = IR(oref);
402 if (ir->o == IR_TNEW || ir->o == IR_TDUP)
403 return lj_ir_knull(J, IRT_TAB);
404 }
405
406conflict:
407 /* Try to find a matching load. Below the conflicting store, if any. */
408 ref = J->chain[IR_FLOAD];
409 while (ref > lim) {
410 IRIns *load = IR(ref);
411 if (load->op1 == oref && load->op2 == fid)
412 return ref; /* Load forwarding. */
413 ref = load->prev;
414 }
415 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
416}
417
418/* FSTORE elimination. */
419TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J)
420{
421 IRRef fref = fins->op1; /* FREF reference. */
422 IRRef val = fins->op2; /* Stored value reference. */
423 IRIns *xr = IR(fref);
424 IRRef1 *refp = &J->chain[IR_FSTORE];
425 IRRef ref = *refp;
426 while (ref > fref) { /* Search for redundant or conflicting stores. */
427 IRIns *store = IR(ref);
428 switch (aa_fref(xr, IR(store->op1))) {
429 case ALIAS_NO:
430 break; /* Continue searching. */
431 case ALIAS_MAY:
432 if (store->op2 != val) /* Conflict if the value is different. */
433 goto doemit;
434 break; /* Otherwise continue searching. */
435 case ALIAS_MUST:
436 if (store->op2 == val) /* Same value: drop the new store. */
437 return DROPFOLD;
438 /* Different value: try to eliminate the redundant store. */
439 if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */
440 IRIns *ir;
441 /* Check for any intervening guards or conflicting loads. */
442 for (ir = IR(J->cur.nins-1); ir > store; ir--)
443 if (irt_isguard(ir->t) || (ir->o == IR_FLOAD && ir->op2 == xr->op2))
444 goto doemit; /* No elimination possible. */
445 /* Remove redundant store from chain and replace with NOP. */
446 *refp = store->prev;
447 store->o = IR_NOP; /* Unchained NOP -- does anybody care? */
448 store->t.irt = IRT_NIL;
449 store->op1 = store->op2 = 0;
450 store->prev = 0;
451 /* Now emit the new store instead. */
452 }
453 goto doemit;
454 }
455 ref = *(refp = &store->prev);
456 }
457doemit:
458 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
459}
460
461/* -- TLEN forwarding ----------------------------------------------------- */
462
463/* This is rather simplistic right now, but better than nothing. */
464TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J)
465{
466 IRRef tab = fins->op1; /* Table reference. */
467 IRRef lim = tab; /* Search limit. */
468 IRRef ref;
469
470 /* Any ASTORE is a conflict and limits the search. */
471 if (J->chain[IR_ASTORE] > lim) lim = J->chain[IR_ASTORE];
472
473 /* Search for conflicting HSTORE with numeric key. */
474 ref = J->chain[IR_HSTORE];
475 while (ref > lim) {
476 IRIns *store = IR(ref);
477 IRIns *href = IR(store->op1);
478 IRIns *key = IR(href->op2);
479 if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) {
480 lim = ref; /* Conflicting store found, limits search for TLEN. */
481 break;
482 }
483 ref = store->prev;
484 }
485
486 /* Try to find a matching load. Below the conflicting store, if any. */
487 ref = J->chain[IR_TLEN];
488 while (ref > lim) {
489 IRIns *tlen = IR(ref);
490 if (tlen->op1 == tab)
491 return ref; /* Load forwarding. */
492 ref = tlen->prev;
493 }
494 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
495}
496
497/* -- ASTORE/HSTORE previous type analysis -------------------------------- */
498
499/* Check whether the previous value for a table store is non-nil.
500** This can be derived either from a previous store or from a previous
501** load (because all loads from tables perform a type check).
502**
503** The result of the analysis can be used to avoid the metatable check
504** and the guard against HREF returning niltv. Both of these are cheap,
505** so let's not spend too much effort on the analysis.
506**
507** A result of 1 is exact: previous value CANNOT be nil.
508** A result of 0 is inexact: previous value MAY be nil.
509*/
510int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref)
511{
512 /* First check stores. */
513 IRRef ref = J->chain[loadop+IRDELTA_L2S];
514 while (ref > xref) {
515 IRIns *store = IR(ref);
516 if (store->op1 == xref) { /* Same xREF. */
517 /* A nil store MAY alias, but a non-nil store MUST alias. */
518 return !irt_isnil(store->t);
519 } else if (irt_isnil(store->t)) { /* Must check any nil store. */
520 IRRef skref = IR(store->op1)->op2;
521 IRRef xkref = IR(xref)->op2;
522 /* Same key type MAY alias. */
523 if (irt_sametype(IR(skref)->t, IR(xkref)->t)) {
524 if (skref == xkref || !irref_isk(skref) || !irref_isk(xkref))
525 return 0; /* A nil store with same const key or var key MAY alias. */
526 /* Different const keys CANNOT alias. */
527 } /* Different key types CANNOT alias. */
528 } /* Other non-nil stores MAY alias. */
529 ref = store->prev;
530 }
531
532 /* Check loads since nothing could be derived from stores. */
533 ref = J->chain[loadop];
534 while (ref > xref) {
535 IRIns *load = IR(ref);
536 if (load->op1 == xref) { /* Same xREF. */
537 /* A nil load MAY alias, but a non-nil load MUST alias. */
538 return !irt_isnil(load->t);
539 } /* Other non-nil loads MAY alias. */
540 ref = load->prev;
541 }
542 return 0; /* Nothing derived at all, previous value MAY be nil. */
543}
544
545/* ------------------------------------------------------------------------ */
546
547#undef IR
548#undef fins
549
550#endif
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
new file mode 100644
index 00000000..60a6afb8
--- /dev/null
+++ b/src/lj_opt_narrow.c
@@ -0,0 +1,430 @@
1/*
2** NARROW: Narrowing of numbers to integers (double to int32_t).
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_opt_narrow_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASJIT
12
13#include "lj_str.h"
14#include "lj_bc.h"
15#include "lj_ir.h"
16#include "lj_jit.h"
17#include "lj_iropt.h"
18#include "lj_trace.h"
19
20/* Rationale for narrowing optimizations:
21**
22** Lua has only a single number type and this is a FP double by default.
23** Narrowing doubles to integers does not pay off for the interpreter on a
24** current-generation x86/x64 machine. Most FP operations need the same
25** amount of execution resources as their integer counterparts, except
26** with slightly longer latencies. Longer latencies are a non-issue for
27** the interpreter, since they are usually hidden by other overhead.
28**
29** The total CPU execution bandwidth is the sum of the bandwidth of the FP
30** and the integer units, because they execute in parallel. The FP units
31** have an equal or higher bandwidth than the integer units. Not using
32** them means losing execution bandwidth. Moving work away from them to
33** the already quite busy integer units is a losing proposition.
34**
35** The situation for JIT-compiled code is a bit different: the higher code
36** density makes the extra latencies much more visible. Tight loops expose
37** the latencies for updating the induction variables. Array indexing
38** requires narrowing conversions with high latencies and additional
39** guards (to check that the index is really an integer). And many common
40** optimizations only work on integers.
41**
42** One solution would be speculative, eager narrowing of all number loads.
43** This causes many problems, like losing -0 or the need to resolve type
44** mismatches between traces. It also effectively forces the integer type
45** to have overflow-checking semantics. This impedes many basic
46** optimizations and requires adding overflow checks to all integer
47** arithmetic operations (whereas FP arithmetics can do without).
48**
49** Always replacing an FP op with an integer op plus an overflow check is
50** counter-productive on a current-generation super-scalar CPU. Although
51** the overflow check branches are highly predictable, they will clog the
52** execution port for the branch unit and tie up reorder buffers. This is
53** turning a pure data-flow dependency into a different data-flow
54** dependency (with slightly lower latency) *plus* a control dependency.
55** In general, you don't want to do this since latencies due to data-flow
56** dependencies can be well hidden by out-of-order execution.
57**
58** A better solution is to keep all numbers as FP values and only narrow
59** when it's beneficial to do so. LuaJIT uses predictive narrowing for
60** induction variables and demand-driven narrowing for index expressions
61** and bit operations. Additionally it can eliminate or hoists most of the
62** resulting overflow checks. Regular arithmetic computations are never
63** narrowed to integers.
64**
65** The integer type in the IR has convenient wrap-around semantics and
66** ignores overflow. Extra operations have been added for
67** overflow-checking arithmetic (ADDOV/SUBOV) instead of an extra type.
68** Apart from reducing overall complexity of the compiler, this also
69** nicely solves the problem where you want to apply algebraic
70** simplifications to ADD, but not to ADDOV. And the assembler can use lea
71** instead of an add for integer ADD, but not for ADDOV (lea does not
72** affect the flags, but it helps to avoid register moves).
73**
74** Note that all of the above has to be reconsidered if LuaJIT is to be
75** ported to architectures with slow FP operations or with no hardware FPU
76** at all. In the latter case an integer-only port may be the best overall
77** solution (if this still meets user demands).
78*/
79
80/* Some local macros to save typing. Undef'd at the end. */
81#define IR(ref) (&J->cur.ir[(ref)])
82#define fins (&J->fold.ins)
83
84/* Pass IR on to next optimization in chain (FOLD). */
85#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
86
87#define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
88
89/* -- Elimination of narrowing type conversions --------------------------- */
90
91/* Narrowing of index expressions and bit operations is demand-driven. The
92** trace recorder emits a narrowing type conversion (TOINT or TOBIT) in
93** all of these cases (e.g. array indexing or string indexing). FOLD
94** already takes care of eliminating simple redundant conversions like
95** TOINT(TONUM(x)) ==> x.
96**
97** But the surrounding code is FP-heavy and all arithmetic operations are
98** performed on FP numbers. Consider a common example such as 'x=t[i+1]',
99** with 'i' already an integer (due to induction variable narrowing). The
100** index expression would be recorded as TOINT(ADD(TONUM(i), 1)), which is
101** clearly suboptimal.
102**
103** One can do better by recursively backpropagating the narrowing type
104** conversion across FP arithmetic operations. This turns FP ops into
105** their corresponding integer counterparts. Depending on the semantics of
106** the conversion they also need to check for overflow. Currently only ADD
107** and SUB are supported.
108**
109** The above example can be rewritten as ADDOV(TOINT(TONUM(i)), 1) and
110** then into ADDOV(i, 1) after folding of the conversions. The original FP
111** ops remain in the IR and are eliminated by DCE since all references to
112** them are gone.
113**
114** Special care has to be taken to avoid narrowing across an operation
115** which is potentially operating on non-integral operands. One obvious
116** case is when an expression contains a non-integral constant, but ends
117** up as an integer index at runtime (like t[x+1.5] with x=0.5).
118**
119** Operations with two non-constant operands illustrate a similar problem
120** (like t[a+b] with a=1.5 and b=2.5). Backpropagation has to stop there,
121** unless it can be proven that either operand is integral (e.g. by CSEing
122** a previous conversion). As a not-so-obvious corollary this logic also
123** applies for a whole expression tree (e.g. t[(a+1)+(b+1)]).
124**
125** Correctness of the transformation is guaranteed by avoiding to expand
126** the tree by adding more conversions than the one we would need to emit
127** if not backpropagating. TOBIT employs a more optimistic rule, because
128** the conversion has special semantics, designed to make the life of the
129** compiler writer easier. ;-)
130**
131** Using on-the-fly backpropagation of an expression tree doesn't work
132** because it's unknown whether the transform is correct until the end.
133** This either requires IR rollback and cache invalidation for every
134** subtree or a two-pass algorithm. The former didn't work out too well,
135** so the code now combines a recursive collector with a stack-based
136** emitter.
137**
138** [A recursive backpropagation algorithm with backtracking, employing
139** skip-list lookup and round-robin caching, emitting stack operations
140** on-the-fly for a stack-based interpreter -- and all of that in a meager
141** kilobyte? Yep, compilers are a great treasure chest. Throw away your
142** textbooks and read the codebase of a compiler today!]
143**
144** There's another optimization opportunity for array indexing: it's
145** always accompanied by an array bounds-check. The outermost overflow
146** check may be delegated to the ABC operation. This works because ABC is
147** an unsigned comparison and wrap-around due to overflow creates negative
148** numbers.
149**
150** But this optimization is only valid for constants that cannot overflow
151** an int32_t into the range of valid array indexes [0..2^27+1). A check
152** for +-2^30 is safe since -2^31 - 2^30 wraps to 2^30 and 2^31-1 + 2^30
153** wraps to -2^30-1.
154**
155** It's also good enough in practice, since e.g. t[i+1] or t[i-10] are
156** quite common. So the above example finally ends up as ADD(i, 1)!
157**
158** Later on, the assembler is able to fuse the whole array reference and
159** the ADD into the memory operands of loads and other instructions. This
160** is why LuaJIT is able to generate very pretty (and fast) machine code
161** for array indexing. And that, my dear, concludes another story about
162** one of the hidden secrets of LuaJIT ...
163*/
164
165/* Maximum backpropagation depth and maximum stack size. */
166#define NARROW_MAX_BACKPROP 100
167#define NARROW_MAX_STACK 256
168
169/* Context used for narrowing of type conversions. */
170typedef struct NarrowConv {
171 jit_State *J; /* JIT compiler state. */
172 IRRef2 *sp; /* Current stack pointer. */
173 IRRef2 *maxsp; /* Maximum stack pointer minus redzone. */
174 int lim; /* Limit on the number of emitted conversions. */
175 IRRef mode; /* Conversion mode (IRTOINT_*). */
176 IRRef2 stack[NARROW_MAX_STACK]; /* Stack holding the stack-machine code. */
177} NarrowConv;
178
179/* The stack machine has a 32 bit instruction format: [IROpT | IRRef1]
180** The lower 16 bits hold a reference (or 0). The upper 16 bits hold
181** the IR opcode + type or one of the following special opcodes:
182*/
183enum {
184 NARROW_REF, /* Push ref. */
185 NARROW_CONV, /* Push conversion of ref. */
186 NARROW_INT /* Push KINT ref. The next code holds an int32_t. */
187};
188
189/* Lookup a reference in the backpropagation cache. */
190static IRRef narrow_bpc_get(jit_State *J, IRRef1 key, IRRef mode)
191{
192 ptrdiff_t i;
193 for (i = 0; i < BPROP_SLOTS; i++) {
194 BPropEntry *bp = &J->bpropcache[i];
195 if (bp->key == key && bp->mode <= mode) /* Stronger checks are ok, too. */
196 return bp->val;
197 }
198 return 0;
199}
200
201/* Add an entry to the backpropagation cache. */
202static void narrow_bpc_set(jit_State *J, IRRef1 key, IRRef1 val, IRRef mode)
203{
204 uint32_t slot = J->bpropslot;
205 BPropEntry *bp = &J->bpropcache[slot];
206 J->bpropslot = (slot + 1) & (BPROP_SLOTS-1);
207 bp->key = key;
208 bp->val = val;
209 bp->mode = mode;
210}
211
212/* Backpropagate narrowing conversion. Return number of needed conversions. */
213static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)
214{
215 jit_State *J = nc->J;
216 IRIns *ir = IR(ref);
217 IRRef cref;
218
219 /* Check the easy cases first. */
220 if (ir->o == IR_TONUM) { /* Undo inverse conversion. */
221 *nc->sp++ = IRREF2(ir->op1, NARROW_REF);
222 return 0;
223 } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */
224 lua_Number n = ir_knum(ir)->n;
225 if (nc->mode == IRTOINT_TOBIT) { /* Allows a wider range of constants. */
226 int64_t k64 = (int64_t)n;
227 if (n == cast_num(k64)) { /* Only if constant doesn't lose precision. */
228 *nc->sp++ = IRREF2(0, NARROW_INT);
229 *nc->sp++ = (IRRef2)k64; /* But always truncate to 32 bits. */
230 return 0;
231 }
232 } else {
233 int32_t k = lj_num2int(n);
234 if (n == cast_num(k)) { /* Only if constant is really an integer. */
235 *nc->sp++ = IRREF2(0, NARROW_INT);
236 *nc->sp++ = (IRRef2)k;
237 return 0;
238 }
239 }
240 return 10; /* Never narrow other FP constants (this is rare). */
241 }
242
243 /* Try to CSE the conversion. Stronger checks are ok, too. */
244 for (cref = J->chain[fins->o]; cref > ref; cref = IR(cref)->prev)
245 if (IR(cref)->op1 == ref &&
246 irt_isguard(IR(cref)->t) >= irt_isguard(fins->t)) {
247 *nc->sp++ = IRREF2(cref, NARROW_REF);
248 return 0; /* Already there, no additional conversion needed. */
249 }
250
251 /* Backpropagate across ADD/SUB. */
252 if (ir->o == IR_ADD || ir->o == IR_SUB) {
253 /* Try cache lookup first. */
254 IRRef bpref, mode = nc->mode;
255 if (mode == IRTOINT_INDEX && depth > 0)
256 mode = IRTOINT_CHECK; /* Inner conversions need a stronger check. */
257 bpref = narrow_bpc_get(nc->J, (IRRef1)ref, mode);
258 if (bpref) {
259 *nc->sp++ = IRREF2(bpref, NARROW_REF);
260 return 0;
261 }
262 if (++depth < NARROW_MAX_BACKPROP && nc->sp < nc->maxsp) {
263 IRRef2 *savesp = nc->sp;
264 int count = narrow_conv_backprop(nc, ir->op1, depth);
265 count += narrow_conv_backprop(nc, ir->op2, depth);
266 if (count <= nc->lim) { /* Limit total number of conversions. */
267 *nc->sp++ = IRREF2(ref, IRTI(ir->o));
268 return count;
269 }
270 nc->sp = savesp; /* Too many conversions, need to backtrack. */
271 }
272 }
273
274 /* Otherwise add a conversion. */
275 *nc->sp++ = IRREF2(ref, NARROW_CONV);
276 return 1;
277}
278
279/* Emit the conversions collected during backpropagation. */
280static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
281{
282 /* The fins fields must be saved now -- emitir() overwrites them. */
283 IROpT guardot = irt_isguard(fins->t) ? IRTG(IR_ADDOV-IR_ADD, 0) : 0;
284 IROpT convot = fins->ot;
285 IRRef1 convop2 = fins->op2;
286 IRRef2 *next = nc->stack; /* List of instructions from backpropagation. */
287 IRRef2 *last = nc->sp;
288 IRRef2 *sp = nc->stack; /* Recycle the stack to store operands. */
289 while (next < last) { /* Simple stack machine to process the ins. list. */
290 IRRef2 ref = *next++;
291 IROpT op = ref >> 16;
292 if (op == NARROW_REF) {
293 *sp++ = ref;
294 } else if (op == NARROW_CONV) {
295 *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */
296 } else if (op == NARROW_INT) {
297 lua_assert(next < last);
298 *sp++ = lj_ir_kint(J, *next++);
299 } else { /* Regular IROpT. Pops two operands and pushes one result. */
300 IRRef mode = nc->mode;
301 lua_assert(sp >= nc->stack+2);
302 sp--;
303 /* Omit some overflow checks for array indexing. See comments above. */
304 if (mode == IRTOINT_INDEX) {
305 if (next == last && irref_isk((IRRef1)sp[0]) &&
306 (uint32_t)IR((IRRef1)sp[0])->i + 0x40000000 < 0x80000000)
307 guardot = 0;
308 else
309 mode = IRTOINT_CHECK; /* Otherwise cache a stronger check. */
310 }
311 sp[-1] = emitir(op+guardot, sp[-1], sp[0]);
312 narrow_bpc_set(J, (IRRef1)ref, (IRRef1)sp[-1], mode); /* Add to cache. */
313 }
314 }
315 lua_assert(sp == nc->stack+1);
316 return nc->stack[0];
317}
318
319/* Narrow a type conversion of an arithmetic operation. */
320TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J)
321{
322 if ((J->flags & JIT_F_OPT_NARROW)) {
323 NarrowConv nc;
324 nc.J = J;
325 nc.sp = nc.stack;
326 nc.maxsp = &nc.stack[NARROW_MAX_STACK-4];
327 if (fins->o == IR_TOBIT) {
328 nc.mode = IRTOINT_TOBIT; /* Used only in the backpropagation cache. */
329 nc.lim = 2; /* TOBIT can use a more optimistic rule. */
330 } else {
331 nc.mode = fins->op2;
332 nc.lim = 1;
333 }
334 if (narrow_conv_backprop(&nc, fins->op1, 0) <= nc.lim)
335 return narrow_conv_emit(J, &nc);
336 }
337 return NEXTFOLD;
338}
339
340/* -- Narrowing of arithmetic operators ----------------------------------- */
341
342/* Check whether a number fits into an int32_t (-0 is ok, too). */
343static int numisint(lua_Number n)
344{
345 return (n == cast_num(lj_num2int(n)));
346}
347
348/* Narrowing of modulo operator. */
349TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc)
350{
351 TRef tmp;
352 if ((J->flags & JIT_F_OPT_NARROW) &&
353 tref_isk(rc) && tref_isint(rc)) { /* Optimize x % k. */
354 int32_t k = IR(tref_ref(rc))->i;
355 if (k > 0 && (k & (k-1)) == 0) { /* i % 2^k ==> band(i, 2^k-1) */
356 if (tref_isint(rb))
357 return emitir(IRTI(IR_BAND), rb, lj_ir_kint(J, k-1));
358 }
359 }
360 /* b % c ==> b - floor(b/c)*c */
361 rb = lj_ir_tonum(J, rb);
362 rc = lj_ir_tonum(J, rc);
363 tmp = emitir(IRTN(IR_DIV), rb, rc);
364 tmp = emitir(IRTN(IR_FPMATH), tmp, IRFPM_FLOOR);
365 tmp = emitir(IRTN(IR_MUL), tmp, rc);
366 return emitir(IRTN(IR_SUB), rb, tmp);
367}
368
369/* Narrowing of power operator or math.pow. */
370TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc)
371{
372 lua_Number n;
373 if (tvisstr(vc) && !lj_str_numconv(strVdata(vc), vc))
374 lj_trace_err(J, LJ_TRERR_BADTYPE);
375 n = numV(vc);
376 /* Limit narrowing for pow to small exponents (or for two constants). */
377 if ((tref_isint(rc) && tref_isk(rc) && tref_isk(rb)) ||
378 ((J->flags & JIT_F_OPT_NARROW) &&
379 (numisint(n) && n >= -65536.0 && n <= 65536.0))) {
380 TRef tmp;
381 if (!tref_isinteger(rc)) {
382 if (tref_isstr(rc))
383 rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
384 rc = emitir(IRTGI(IR_TOINT), rc, IRTOINT_CHECK); /* Guarded TOINT! */
385 }
386 if (!tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */
387 tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536-2147483647-1));
388 emitir(IRTGI(IR_LE), tmp, lj_ir_kint(J, 2*65536-2147483647-1));
389 }
390 return emitir(IRTN(IR_POWI), rb, rc);
391 }
392 /* FOLD covers most cases, but some are easier to do here. */
393 if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb)))))
394 return rb; /* 1 ^ x ==> 1 */
395 rc = lj_ir_tonum(J, rc);
396 if (tref_isk(rc) && ir_knum(IR(tref_ref(rc)))->n == 0.5)
397 return emitir(IRTN(IR_FPMATH), rb, IRFPM_SQRT); /* x ^ 0.5 ==> sqrt(x) */
398 /* Split up b^c into exp2(c*log2(b)). Assembler may rejoin later. */
399 rb = emitir(IRTN(IR_FPMATH), rb, IRFPM_LOG2);
400 rc = emitir(IRTN(IR_MUL), rb, rc);
401 return emitir(IRTN(IR_FPMATH), rc, IRFPM_EXP2);
402}
403
404/* -- Predictive narrowing of induction variables ------------------------- */
405
406/* Narrow the FORL index type by looking at the runtime values. */
407IRType lj_opt_narrow_forl(cTValue *forbase)
408{
409 lua_assert(tvisnum(&forbase[FORL_IDX]) &&
410 tvisnum(&forbase[FORL_STOP]) &&
411 tvisnum(&forbase[FORL_STEP]));
412 /* Narrow only if the runtime values of start/stop/step are all integers. */
413 if (numisint(numV(&forbase[FORL_IDX])) &&
414 numisint(numV(&forbase[FORL_STOP])) &&
415 numisint(numV(&forbase[FORL_STEP]))) {
416 /* And if the loop index can't possibly overflow. */
417 lua_Number step = numV(&forbase[FORL_STEP]);
418 lua_Number sum = numV(&forbase[FORL_STOP]) + step;
419 if (0 <= step ? sum <= 2147483647.0 : sum >= -2147483648.0)
420 return IRT_INT;
421 }
422 return IRT_NUM;
423}
424
425#undef IR
426#undef fins
427#undef emitir
428#undef emitir_raw
429
430#endif
diff --git a/src/lj_parse.c b/src/lj_parse.c
new file mode 100644
index 00000000..663525ab
--- /dev/null
+++ b/src/lj_parse.c
@@ -0,0 +1,2198 @@
1/*
2** Lua parser (source code -> bytecode).
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Major portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#define lj_parse_c
10#define LUA_CORE
11
12#include "lj_obj.h"
13#include "lj_gc.h"
14#include "lj_err.h"
15#include "lj_str.h"
16#include "lj_tab.h"
17#include "lj_func.h"
18#include "lj_state.h"
19#include "lj_bc.h"
20#include "lj_lex.h"
21#include "lj_parse.h"
22#include "lj_vm.h"
23#include "lj_vmevent.h"
24
25/* -- Parser structures and definitions ----------------------------------- */
26
27/* Expression kinds. */
28typedef enum {
29 /* Constant expressions must be first and in this order: */
30 VKNIL,
31 VKFALSE,
32 VKTRUE,
33 VKSTR, /* sval = string value */
34 VKNUM, /* nval = numerical value */
35 VKLAST = VKNUM,
36 /* Non-constant expressions follow: */
37 VLOCAL, /* info = local register */
38 VUPVAL, /* info = upvalue index */
39 VGLOBAL, /* sval = string value */
40 VINDEXED, /* info = table register, aux = index reg/byte/string const */
41 VJMP, /* info = instruction PC */
42 VRELOCABLE, /* info = instruction PC */
43 VNONRELOC, /* info = result register */
44 VCALL, /* info = instruction PC, aux = base */
45 VVOID
46} ExpKind;
47
48/* Expression descriptor. */
49typedef struct ExpDesc {
50 union {
51 struct { uint32_t info, aux; } s;
52 TValue nval;
53 GCstr *sval;
54 } u;
55 ExpKind k;
56 BCPos t; /* true condition exit list */
57 BCPos f; /* false condition exit list */
58} ExpDesc;
59
60/* Tests for expression types */
61#define isK(e) ((uint32_t)((e)->k) <= VKLAST)
62#define isnumK(e) ((e)->k == VKNUM)
63#define isstrK(e) ((e)->k == VKSTR)
64#define expnumV(e) check_exp(isnumK((e)), numV(&(e)->u.nval))
65
66#define hasjumps(e) ((e)->t != (e)->f)
67#define isKexp(e) (isK(e) && !hasjumps(e))
68#define isnumKexp(e) (isnumK(e) && !hasjumps(e))
69
70#define priKk(k) check_exp((k) <= VKTRUE, (k) - VKNIL)
71#define priK(e) priKk((e)->k)
72
73/* Per-function linked list of blocks. */
74typedef struct FuncBlock {
75 struct FuncBlock *previous; /* chain */
76 BCPos breaklist; /* list of jumps out of this loop */
77 uint8_t nactvar; /* # active locals outside the breakable structure */
78 uint8_t upval; /* true if some variable in the block is an upvalue */
79 uint8_t isbreakable; /* true if `block' is a loop */
80} FuncBlock;
81
82typedef struct UpValDesc {
83 uint8_t k;
84 uint8_t info;
85} UpValDesc;
86
87/* Per-function state. */
88typedef struct FuncState {
89 GCproto *pt; /* current function header */
90 GCtab *kt; /* table to find (and reuse) elements in `k' */
91 struct FuncState *prev; /* enclosing function */
92 struct LexState *ls; /* lexical state */
93 struct lua_State *L; /* copy of the Lua state */
94 struct FuncBlock *bl; /* chain of current blocks */
95 BCPos pc; /* next bytecode position */
96 BCPos lasttarget; /* PC of last jump target */
97 BCPos jpc; /* list of pending jumps to PC */
98 BCReg freereg; /* first free register */
99 BCReg nkn, nkgc; /* number of lua_Number/GCobj constants */
100 uint16_t nlocvars; /* number of elements in `locvars' */
101 uint8_t nactvar; /* number of active local variables */
102 uint8_t nuv; /* number of upvalues */
103 UpValDesc upvalues[LJ_MAX_UPVAL]; /* upvalues */
104 uint16_t actvar[LJ_MAX_LOCVAR]; /* declared-variable stack */
105} FuncState;
106
107/* Binary and unary operators. ORDER OPR */
108typedef enum BinOpr {
109 OPR_ADD, OPR_SUB, OPR_MUL, OPR_DIV, OPR_MOD, OPR_POW, /* ORDER ARITH */
110 OPR_CONCAT,
111 OPR_NE, OPR_EQ,
112 OPR_LT, OPR_GE, OPR_LE, OPR_GT,
113 OPR_AND, OPR_OR,
114 OPR_NOBINOPR
115} BinOpr;
116
117LJ_STATIC_ASSERT((int)BC_ISGE-(int)BC_ISLT == (int)OPR_GE-(int)OPR_LT);
118LJ_STATIC_ASSERT((int)BC_ISLE-(int)BC_ISLT == (int)OPR_LE-(int)OPR_LT);
119LJ_STATIC_ASSERT((int)BC_ISGT-(int)BC_ISLT == (int)OPR_GT-(int)OPR_LT);
120LJ_STATIC_ASSERT((int)BC_SUBVV-(int)BC_ADDVV == (int)OPR_SUB-(int)OPR_ADD);
121LJ_STATIC_ASSERT((int)BC_MULVV-(int)BC_ADDVV == (int)OPR_MUL-(int)OPR_ADD);
122LJ_STATIC_ASSERT((int)BC_DIVVV-(int)BC_ADDVV == (int)OPR_DIV-(int)OPR_ADD);
123LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD);
124
125typedef enum UnOpr { OPR_MINUS, OPR_NOT, OPR_LEN, OPR_NOUNOPR } UnOpr;
126
127/* -- Error handling ------------------------------------------------------ */
128
129LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em)
130{
131 lj_lex_error(ls, ls->token, em);
132}
133
134LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken token)
135{
136 lj_lex_error(ls, ls->token, LJ_ERR_XTOKEN, lj_lex_token2str(ls, token));
137}
138
139LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what)
140{
141 if (fs->pt->linedefined == 0)
142 lj_lex_error(fs->ls, 0, LJ_ERR_XLIMM, limit, what);
143 else
144 lj_lex_error(fs->ls, 0, LJ_ERR_XLIMF, fs->pt->linedefined, limit, what);
145}
146
147#define checklimit(fs, v, l, m) if ((v) >= (l)) err_limit(fs, l, m)
148#define checklimitgt(fs, v, l, m) if ((v) > (l)) err_limit(fs, l, m)
149#define checkcond(ls, c, em) { if (!(c)) err_syntax(ls, em); }
150
151/* -- Code emitter: branches ---------------------------------------------- */
152
153static BCPos getjump(FuncState *fs, BCPos pc)
154{
155 ptrdiff_t delta = bc_j(fs->pt->bc[pc]);
156 if ((BCPos)delta == NO_JMP)
157 return NO_JMP;
158 else
159 return (BCPos)(((ptrdiff_t)pc+1)+delta);
160}
161
162static int need_value(FuncState *fs, BCPos list)
163{
164 for (; list != NO_JMP; list = getjump(fs, list)) {
165 BCOp op = bc_op(fs->pt->bc[list >= 1 ? list-1 : list]);
166 if (!(op == BC_ISTC || op == BC_ISFC)) return 1;
167 }
168 return 0; /* Not found. */
169}
170
171static int patchtestreg(FuncState *fs, BCPos pc, BCReg reg)
172{
173 BCIns *i = &fs->pt->bc[pc >= 1 ? pc-1 : pc];
174 BCOp op = bc_op(*i);
175 if (!(op == BC_ISTC || op == BC_ISFC))
176 return 0; /* cannot patch other instructions */
177 if (reg != NO_REG && reg != bc_d(*i)) {
178 setbc_a(i, reg);
179 } else { /* no register to put value or register already has the value */
180 setbc_op(i, op+(BC_IST-BC_ISTC));
181 setbc_a(i, 0);
182 }
183 return 1;
184}
185
186static void removevalues(FuncState *fs, BCPos list)
187{
188 for (; list != NO_JMP; list = getjump(fs, list))
189 patchtestreg(fs, list, NO_REG);
190}
191
192static void fixjump(FuncState *fs, BCPos pc, BCPos dest)
193{
194 BCIns *jmp = &fs->pt->bc[pc];
195 BCPos offset = dest-(pc+1)+BCBIAS_J;
196 lua_assert(dest != NO_JMP);
197 if (offset > BCMAX_D)
198 err_syntax(fs->ls, LJ_ERR_XJUMP);
199 setbc_d(jmp, offset);
200}
201
202static void concatjumps(FuncState *fs, BCPos *l1, BCPos l2)
203{
204 if (l2 == NO_JMP) return;
205 else if (*l1 == NO_JMP) {
206 *l1 = l2;
207 } else {
208 BCPos list = *l1;
209 BCPos next;
210 while ((next = getjump(fs, list)) != NO_JMP) /* find last element */
211 list = next;
212 fixjump(fs, list, l2);
213 }
214}
215
216static void patchlistaux(FuncState *fs, BCPos list, BCPos vtarget,
217 BCReg reg, BCPos dtarget)
218{
219 while (list != NO_JMP) {
220 BCPos next = getjump(fs, list);
221 if (patchtestreg(fs, list, reg))
222 fixjump(fs, list, vtarget);
223 else
224 fixjump(fs, list, dtarget); /* jump to default target */
225 list = next;
226 }
227}
228
229static void patchtohere(FuncState *fs, BCPos list)
230{
231 fs->lasttarget = fs->pc;
232 concatjumps(fs, &fs->jpc, list);
233}
234
235static void patchlist(FuncState *fs, BCPos list, BCPos target)
236{
237 if (target == fs->pc) {
238 patchtohere(fs, list);
239 } else {
240 lua_assert(target < fs->pc);
241 patchlistaux(fs, list, target, NO_REG, target);
242 }
243}
244
245/* -- Code emitter: instructions ------------------------------------------ */
246
247static BCPos emitINS(FuncState *fs, BCIns i)
248{
249 GCproto *pt;
250 patchlistaux(fs, fs->jpc, fs->pc, NO_REG, fs->pc);
251 fs->jpc = NO_JMP;
252 pt = fs->pt;
253 if (LJ_UNLIKELY(fs->pc >= pt->sizebc)) {
254 checklimit(fs, fs->pc, LJ_MAX_BCINS, "bytecode instructions");
255 lj_mem_growvec(fs->L, pt->bc, pt->sizebc, LJ_MAX_BCINS, BCIns);
256 lj_mem_growvec(fs->L, pt->lineinfo, pt->sizelineinfo, LJ_MAX_BCINS, BCLine);
257 }
258 pt->bc[fs->pc] = i;
259 pt->lineinfo[fs->pc] = fs->ls->lastline;
260 return fs->pc++;
261}
262
263#define emitABC(fs, o, a, b, c) emitINS(fs, BCINS_ABC(o, a, b, c))
264#define emitAD(fs, o, a, d) emitINS(fs, BCINS_AD(o, a, d))
265#define emitAJ(fs, o, a, j) emitINS(fs, BCINS_AJ(o, a, j))
266
267#define bcptr(fs, e) (&(fs)->pt->bc[(e)->u.s.info])
268
269static BCPos emit_jump(FuncState *fs)
270{
271 BCPos jpc = fs->jpc; /* save list of jumps to here */
272 BCPos j = fs->pc - 1;
273 fs->jpc = NO_JMP;
274 if ((int32_t)j >= (int32_t)fs->lasttarget && bc_op(fs->pt->bc[j]) == BC_UCLO)
275 setbc_j(&fs->pt->bc[j], NO_JMP);
276 else
277 j = emitAJ(fs, BC_JMP, fs->freereg, NO_JMP);
278 concatjumps(fs, &j, jpc); /* keep them on hold */
279 return j;
280}
281
282/* -- Code emitter: constants --------------------------------------------- */
283
284static BCReg numK(FuncState *fs, ExpDesc *e)
285{
286 lua_State *L = fs->L;
287 TValue *val;
288 lua_assert(isnumK(e));
289 val = lj_tab_set(L, fs->kt, &e->u.nval);
290 if (tvisnum(val))
291 return val->u32.lo;
292 val->u64 = fs->nkn;
293 return fs->nkn++;
294}
295
296static BCReg gcK(FuncState *fs, GCobj *gc, int itype)
297{
298 lua_State *L = fs->L;
299 TValue o, *val;
300 setgcV(L, &o, &gc->gch, itype);
301 val = lj_tab_set(L, fs->kt, &o);
302 if (tvisnum(val))
303 return val->u32.lo;
304 val->u64 = fs->nkgc;
305 return fs->nkgc++;
306}
307
308static BCReg strK(FuncState *fs, ExpDesc *e)
309{
310 lua_assert(isstrK(e) || e->k == VGLOBAL);
311 return gcK(fs, obj2gco(e->u.sval), LJ_TSTR);
312}
313
314GCstr *lj_parse_keepstr(LexState *ls, const char *str, size_t len)
315{
316 lua_State *L = ls->L;
317 GCstr *s = lj_str_new(L, str, len);
318 TValue *tv = lj_tab_setstr(L, ls->fs->kt, s);
319 if (tvisnil(tv)) setboolV(tv, 1); /* Anchor string to avoid GC. */
320 return s;
321}
322
323static void keep_token(LexState *ls)
324{
325 if (ls->token == TK_name || ls->token == TK_string) {
326 TValue *tv = lj_tab_setstr(ls->L, ls->fs->kt, strV(&ls->tokenval));
327 if (tvisnil(tv)) setboolV(tv, 1); /* Anchor string to avoid GC. */
328 }
329}
330
331static void nilK(FuncState *fs, BCReg from, BCReg n)
332{
333 BCIns *pr;
334 if (fs->pc > fs->lasttarget) { /* no jumps to current position? */
335 BCReg pfrom, pto;
336 pr = &fs->pt->bc[fs->pc-1];
337 pfrom = bc_a(*pr);
338 switch (bc_op(*pr)) {
339 case BC_KPRI:
340 if (bc_d(*pr) != ~LJ_TNIL) break;
341 if (from == pfrom) {
342 if (n == 1) return;
343 } else if (from == pfrom+1) {
344 from = pfrom;
345 n++;
346 } else {
347 break;
348 }
349 fs->pc--;
350 break;
351 case BC_KNIL:
352 pto = bc_d(*pr);
353 if (pfrom <= from && from <= pto+1) { /* can connect both? */
354 if (from+n-1 > pto)
355 setbc_d(pr, from+n-1);
356 return;
357 }
358 break;
359 default:
360 break;
361 }
362 }
363 emitINS(fs, n == 1 ? BCINS_AD(BC_KPRI, from, priKk(VKNIL))
364 : BCINS_AD(BC_KNIL, from, from+n-1));
365}
366
367/* -- Code emitter: registers --------------------------------------------- */
368
369static void checkframe(FuncState *fs, BCReg n)
370{
371 BCReg sz = fs->freereg + n;
372 if (sz > fs->pt->framesize) {
373 if (sz >= LJ_MAX_SLOTS)
374 err_syntax(fs->ls, LJ_ERR_XSLOTS);
375 fs->pt->framesize = cast_byte(sz);
376 }
377}
378
379static void reserveregs(FuncState *fs, BCReg n)
380{
381 checkframe(fs, n);
382 fs->freereg += n;
383}
384
385static void freereg(FuncState *fs, BCReg reg)
386{
387 if (reg >= fs->nactvar) {
388 fs->freereg--;
389 lua_assert(reg == fs->freereg);
390 }
391}
392
393static void freeexp(FuncState *fs, ExpDesc *e)
394{
395 if (e->k == VNONRELOC)
396 freereg(fs, e->u.s.info);
397}
398
399/* -- Code emitter: expressions ------------------------------------------- */
400
401static void dischargevars(FuncState *fs, ExpDesc *e)
402{
403 BCIns ins;
404 switch (e->k) {
405 case VUPVAL:
406 ins = BCINS_AD(BC_UGET, 0, e->u.s.info);
407 break;
408 case VGLOBAL:
409 ins = BCINS_AD(BC_GGET, 0, strK(fs, e));
410 break;
411 case VINDEXED: {
412 /* TGET[VSB] key = reg, string const or byte const */
413 BCReg rc = e->u.s.aux;
414 if ((int32_t)rc < 0) {
415 ins = BCINS_ABC(BC_TGETS, 0, e->u.s.info, ~rc);
416 } else if (rc > BCMAX_C) {
417 ins = BCINS_ABC(BC_TGETB, 0, e->u.s.info, rc-(BCMAX_C+1));
418 } else {
419 freereg(fs, rc);
420 ins = BCINS_ABC(BC_TGETV, 0, e->u.s.info, rc);
421 }
422 freereg(fs, e->u.s.info);
423 break;
424 }
425 case VCALL:
426 e->u.s.info = e->u.s.aux;
427 /* fallthrough */
428 case VLOCAL:
429 e->k = VNONRELOC;
430 /* fallthrough */
431 default:
432 return;
433 }
434 e->u.s.info = emitINS(fs, ins);
435 e->k = VRELOCABLE;
436}
437
438static void discharge2reg(FuncState *fs, ExpDesc *e, BCReg reg)
439{
440 BCIns ins;
441 dischargevars(fs, e);
442 switch (e->k) {
443 case VKNIL: case VKFALSE: case VKTRUE:
444 ins = BCINS_AD(BC_KPRI, reg, priK(e));
445 break;
446 case VKSTR:
447 ins = BCINS_AD(BC_KSTR, reg, strK(fs, e));
448 break;
449 case VKNUM: {
450 lua_Number n = expnumV(e);
451 int32_t k = lj_num2int(n);
452 if (checki16(k) && n == cast_num(k))
453 ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)k);
454 else
455 ins = BCINS_AD(BC_KNUM, reg, numK(fs, e));
456 break;
457 }
458 case VRELOCABLE:
459 setbc_a(bcptr(fs, e), reg);
460 goto noins;
461 case VNONRELOC:
462 if (reg == e->u.s.info)
463 goto noins;
464 ins = BCINS_AD(BC_MOV, reg, e->u.s.info);
465 break;
466 default:
467 lua_assert(e->k == VVOID || e->k == VJMP);
468 return; /* nothing to do... */
469 }
470 emitINS(fs, ins);
471noins:
472 e->u.s.info = reg;
473 e->k = VNONRELOC;
474}
475
476static void exp2reg(FuncState *fs, ExpDesc *e, BCReg reg)
477{
478 discharge2reg(fs, e, reg);
479 if (e->k == VJMP)
480 concatjumps(fs, &e->t, e->u.s.info); /* put this jump in `t' list */
481 if (hasjumps(e)) {
482 BCPos final; /* position after whole expression */
483 BCPos p_f = NO_JMP; /* position of an eventual LOAD false */
484 BCPos p_t = NO_JMP; /* position of an eventual LOAD true */
485 if (need_value(fs, e->t) || need_value(fs, e->f)) {
486 BCPos fj = (e->k == VJMP) ? NO_JMP : emit_jump(fs);
487 p_f = emitAD(fs, BC_KPRI, reg, priKk(VKFALSE));
488 emitAJ(fs, BC_JMP, fs->freereg, 1);
489 p_t = emitAD(fs, BC_KPRI, reg, priKk(VKTRUE));
490 patchtohere(fs, fj);
491 }
492 final = fs->pc;
493 fs->lasttarget = final;
494 patchlistaux(fs, e->f, final, reg, p_f);
495 patchlistaux(fs, e->t, final, reg, p_t);
496 }
497 e->f = e->t = NO_JMP;
498 e->u.s.info = reg;
499 e->k = VNONRELOC;
500}
501
502static void exp2nextreg(FuncState *fs, ExpDesc *e)
503{
504 dischargevars(fs, e);
505 freeexp(fs, e);
506 reserveregs(fs, 1);
507 exp2reg(fs, e, fs->freereg - 1);
508}
509
510static BCReg exp2anyreg(FuncState *fs, ExpDesc *e)
511{
512 dischargevars(fs, e);
513 if (e->k == VNONRELOC) {
514 if (!hasjumps(e)) return e->u.s.info; /* exp is already in a register */
515 if (e->u.s.info >= fs->nactvar) { /* reg. is not a local? */
516 exp2reg(fs, e, e->u.s.info); /* put value on it */
517 return e->u.s.info;
518 }
519 }
520 exp2nextreg(fs, e); /* default */
521 return e->u.s.info;
522}
523
524static void exp2val(FuncState *fs, ExpDesc *e)
525{
526 if (hasjumps(e))
527 exp2anyreg(fs, e);
528 else
529 dischargevars(fs, e);
530}
531
532static void storevar(FuncState *fs, ExpDesc *var, ExpDesc *e)
533{
534 BCIns ins;
535 switch (var->k) {
536 case VLOCAL:
537 freeexp(fs, e);
538 exp2reg(fs, e, var->u.s.info);
539 return;
540 case VUPVAL:
541 exp2val(fs, e);
542 switch (e->k) {
543 case VKNIL: case VKFALSE: case VKTRUE:
544 ins = BCINS_AD(BC_USETP, var->u.s.info, priK(e));
545 break;
546 case VKSTR:
547 ins = BCINS_AD(BC_USETS, var->u.s.info, strK(fs, e));
548 break;
549 case VKNUM:
550 ins = BCINS_AD(BC_USETN, var->u.s.info, numK(fs, e));
551 break;
552 default:
553 ins = BCINS_AD(BC_USETV, var->u.s.info, exp2anyreg(fs, e));
554 break;
555 }
556 break;
557 case VGLOBAL: {
558 BCReg ra = exp2anyreg(fs, e);
559 ins = BCINS_AD(BC_GSET, ra, strK(fs, var));
560 break;
561 }
562 case VINDEXED: {
563 /* TSET[VSB] key = reg, string const or byte const */
564 BCReg ra = exp2anyreg(fs, e);
565 BCReg rc = var->u.s.aux;
566 if ((int32_t)rc < 0) {
567 ins = BCINS_ABC(BC_TSETS, ra, var->u.s.info, ~rc);
568 } else if (rc > BCMAX_C) {
569 ins = BCINS_ABC(BC_TSETB, ra, var->u.s.info, rc-(BCMAX_C+1));
570 } else {
571 /* Free late alloced key reg to avoid assert on free of value reg. */
572 /* This can only happen when called from constructor(). */
573 lua_assert(e->k != VNONRELOC || ra < fs->nactvar ||
574 rc < ra || (freereg(fs, rc),1));
575 ins = BCINS_ABC(BC_TSETV, ra, var->u.s.info, rc);
576 }
577 break;
578 }
579 default:
580 lua_assert(0); /* invalid var kind to store */
581 return;
582 }
583 emitINS(fs, ins);
584 freeexp(fs, e);
585}
586
587static void indexexp(FuncState *fs, ExpDesc *t, ExpDesc *e)
588{
589 /* already called: exp2val(fs, e) */
590 t->k = VINDEXED;
591 if (isnumK(e)) {
592 lua_Number n = expnumV(e);
593 int32_t k = lj_num2int(n);
594 if (checku8(k) && n == cast_num(k)) {
595 t->u.s.aux = BCMAX_C+1+(uint32_t)k; /* 256..511: const byte key */
596 return;
597 }
598 } else if (isstrK(e)) {
599 BCReg idx = strK(fs, e);
600 if (idx <= BCMAX_C) {
601 t->u.s.aux = ~idx; /* -256..-1: const string key */
602 return;
603 }
604 }
605 t->u.s.aux = exp2anyreg(fs, e); /* 0..255: register */
606}
607
608static void methodexp(FuncState *fs, ExpDesc *e, ExpDesc *key)
609{
610 BCReg idx, func, tab = exp2anyreg(fs, e);
611 freeexp(fs, e);
612 func = fs->freereg;
613 emitAD(fs, BC_MOV, func+1, tab);
614 lua_assert(isstrK(key));
615 idx = strK(fs, key);
616 if (idx <= BCMAX_C) {
617 reserveregs(fs, 2);
618 emitABC(fs, BC_TGETS, func, tab, idx);
619 } else {
620 reserveregs(fs, 3);
621 emitAD(fs, BC_KSTR, func+2, idx);
622 emitABC(fs, BC_TGETV, func, tab, func+2);
623 fs->freereg--;
624 }
625 e->u.s.info = func;
626 e->k = VNONRELOC;
627}
628
629/* -- Code emitter: conditionals ------------------------------------------ */
630
631static void invertjump(FuncState *fs, ExpDesc *e)
632{
633 BCIns *i = bcptr(fs, e) - 1;
634 setbc_op(i, bc_op(*i)^1);
635}
636
637static BCPos jumponcond(FuncState *fs, ExpDesc *e, int cond)
638{
639 if (e->k == VRELOCABLE) {
640 BCIns *i = bcptr(fs, e);
641 if (bc_op(*i) == BC_NOT) {
642 *i = BCINS_AD(cond ? BC_ISF : BC_IST, 0, bc_d(*i));
643 return emit_jump(fs);
644 }
645 /* else go through */
646 }
647 if (e->k != VNONRELOC) {
648 reserveregs(fs, 1);
649 discharge2reg(fs, e, fs->freereg-1);
650 }
651 freeexp(fs, e);
652 emitAD(fs, cond ? BC_ISTC : BC_ISFC, NO_REG, e->u.s.info);
653 return emit_jump(fs);
654}
655
656static void goiftrue(FuncState *fs, ExpDesc *e)
657{
658 BCPos pc; /* PC of last jump. */
659 dischargevars(fs, e);
660 switch (e->k) {
661 case VKSTR: case VKNUM: case VKTRUE:
662 pc = NO_JMP; /* always true; do nothing */
663 break;
664 case VJMP:
665 invertjump(fs, e);
666 pc = e->u.s.info;
667 break;
668 case VKFALSE:
669 if (!hasjumps(e)) {
670 pc = emit_jump(fs); /* always jump */
671 break;
672 }
673 /* fallthrough */
674 default:
675 pc = jumponcond(fs, e, 0);
676 break;
677 }
678 concatjumps(fs, &e->f, pc); /* insert last jump in `f' list */
679 patchtohere(fs, e->t);
680 e->t = NO_JMP;
681}
682
683static void goiffalse(FuncState *fs, ExpDesc *e)
684{
685 BCPos pc; /* PC of last jump. */
686 dischargevars(fs, e);
687 switch (e->k) {
688 case VKNIL: case VKFALSE:
689 pc = NO_JMP; /* always false; do nothing */
690 break;
691 case VJMP:
692 pc = e->u.s.info;
693 break;
694 case VKTRUE:
695 if (!hasjumps(e)) {
696 pc = emit_jump(fs); /* always jump */
697 break;
698 }
699 /* fallthrough */
700 default:
701 pc = jumponcond(fs, e, 1);
702 break;
703 }
704 concatjumps(fs, &e->t, pc); /* insert last jump in `t' list */
705 patchtohere(fs, e->f);
706 e->f = NO_JMP;
707}
708
709/* -- Code emitter: operators --------------------------------------------- */
710
711static int foldarith(BinOpr opr, ExpDesc *e1, ExpDesc *e2)
712{
713 TValue o;
714 if (!isnumKexp(e1) || !isnumKexp(e2)) return 0;
715 setnumV(&o, lj_vm_foldarith(expnumV(e1), expnumV(e2), (int)opr-OPR_ADD));
716 if (tvisnan(&o) || tvismzero(&o)) return 0; /* Avoid NaN and -0 as consts. */
717 setnumV(&e1->u.nval, numV(&o));
718 return 1;
719}
720
721static void codearith(FuncState *fs, BinOpr opr, ExpDesc *e1, ExpDesc *e2)
722{
723 BCReg rb, rc, t;
724 uint32_t op;
725 if (foldarith(opr, e1, e2))
726 return;
727 if (opr == OPR_POW) {
728 op = BC_POW;
729 rc = exp2anyreg(fs, e2);
730 rb = exp2anyreg(fs, e1);
731 } else {
732 op = opr-OPR_ADD+BC_ADDVV;
733 /* must discharge 2nd operand first since VINDEXED might free regs */
734 exp2val(fs, e2);
735 if (isnumK(e2) && (rc = numK(fs, e2)) <= BCMAX_C)
736 op -= BC_ADDVV-BC_ADDVN;
737 else
738 rc = exp2anyreg(fs, e2);
739 /* emit_prebinop discharges 1st operand, but may need to use KNUM/KSHORT */
740 lua_assert(isnumK(e1) || e1->k == VNONRELOC);
741 exp2val(fs, e1);
742 /* avoid two consts to satisfy bytecode constraints */
743 if (isnumK(e1) && !isnumK(e2) && (t = numK(fs, e1)) <= BCMAX_B) {
744 rb = rc; rc = t; op -= BC_ADDVV-BC_ADDNV;
745 } else {
746 rb = exp2anyreg(fs, e1);
747 }
748 }
749 /* using freeexp might cause asserts if the order is wrong */
750 if (e1->k == VNONRELOC && e1->u.s.info >= fs->nactvar) fs->freereg--;
751 if (e2->k == VNONRELOC && e2->u.s.info >= fs->nactvar) fs->freereg--;
752 e1->u.s.info = emitABC(fs, op, 0, rb, rc);
753 e1->k = VRELOCABLE;
754}
755
756static void codecomp(FuncState *fs, BinOpr opr, ExpDesc *e1, ExpDesc *e2)
757{
758 ExpDesc *eret = e1;
759 BCIns ins;
760 exp2val(fs, e1);
761 if (opr == OPR_EQ || opr == OPR_NE) {
762 BCOp op = opr == OPR_EQ ? BC_ISEQV : BC_ISNEV;
763 BCReg ra;
764 if (isK(e1)) { e1 = e2; e2 = eret; } /* need constant in 2nd arg */
765 ra = exp2anyreg(fs, e1); /* first arg must be in a reg */
766 exp2val(fs, e2);
767 switch (e2->k) {
768 case VKNIL: case VKFALSE: case VKTRUE:
769 ins = BCINS_AD(op+(BC_ISEQP-BC_ISEQV), ra, priK(e2));
770 break;
771 case VKSTR:
772 ins = BCINS_AD(op+(BC_ISEQS-BC_ISEQV), ra, strK(fs, e2));
773 break;
774 case VKNUM:
775 ins = BCINS_AD(op+(BC_ISEQN-BC_ISEQV), ra, numK(fs, e2));
776 break;
777 default:
778 ins = BCINS_AD(op, ra, exp2anyreg(fs, e2));
779 break;
780 }
781 } else {
782 uint32_t op = opr-OPR_LT+BC_ISLT;
783 BCReg ra;
784 if ((op-BC_ISLT) & 1) { /* GT -> LT, GE -> LE */
785 e1 = e2; e2 = eret; /* swap operands */
786 op = ((op-BC_ISLT)^3)+BC_ISLT;
787 }
788 ra = exp2anyreg(fs, e1);
789 ins = BCINS_AD(op, ra, exp2anyreg(fs, e2));
790 }
791 /* using freeexp might cause asserts if the order is wrong */
792 if (e1->k == VNONRELOC && e1->u.s.info >= fs->nactvar) fs->freereg--;
793 if (e2->k == VNONRELOC && e2->u.s.info >= fs->nactvar) fs->freereg--;
794 emitINS(fs, ins);
795 eret->u.s.info = emit_jump(fs);
796 eret->k = VJMP;
797}
798
799static void emit_unop(FuncState *fs, UnOpr uop, ExpDesc *e)
800{
801 BCOp op = BC_LEN;
802 switch (uop) {
803 case OPR_MINUS:
804 if (isnumKexp(e) && expnumV(e) != 0) { /* Avoid const-folding to -0. */
805 setnumV(&e->u.nval, -expnumV(e));
806 return;
807 }
808 op = BC_UNM;
809 /* fallthrough */
810 case OPR_LEN:
811 exp2anyreg(fs, e);
812 break;
813 case OPR_NOT:
814 /* interchange true and false lists */
815 { BCPos temp = e->f; e->f = e->t; e->t = temp; }
816 removevalues(fs, e->f);
817 removevalues(fs, e->t);
818 dischargevars(fs, e);
819 switch (e->k) {
820 case VKNIL: case VKFALSE:
821 e->k = VKTRUE;
822 return;
823 case VKSTR: case VKNUM: case VKTRUE:
824 e->k = VKFALSE;
825 return;
826 case VJMP:
827 invertjump(fs, e);
828 return;
829 case VRELOCABLE:
830 reserveregs(fs, 1);
831 setbc_a(bcptr(fs, e), fs->freereg-1);
832 e->u.s.info = fs->freereg-1;
833 e->k = VNONRELOC;
834 break;
835 case VNONRELOC:
836 break;
837 default: lua_assert(0); return;
838 }
839 op = BC_NOT;
840 break;
841 default: lua_assert(0); return;
842 }
843 freeexp(fs, e);
844 e->u.s.info = emitAD(fs, op, 0, e->u.s.info);
845 e->k = VRELOCABLE;
846}
847
848static void prepare_binop(FuncState *fs, BinOpr op, ExpDesc *e)
849{
850 switch (op) {
851 case OPR_AND:
852 goiftrue(fs, e);
853 break;
854 case OPR_OR:
855 goiffalse(fs, e);
856 break;
857 case OPR_CONCAT:
858 exp2nextreg(fs, e); /* operand must be on the `stack' */
859 break;
860 case OPR_EQ: case OPR_NE:
861 if (!isKexp(e)) exp2anyreg(fs, e);
862 break;
863 default:
864 if (!isnumKexp(e)) exp2anyreg(fs, e);
865 break;
866 }
867}
868
869static void emit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2)
870{
871 switch (op) {
872 case OPR_AND:
873 lua_assert(e1->t == NO_JMP); /* list must be closed */
874 dischargevars(fs, e2);
875 concatjumps(fs, &e2->f, e1->f);
876 *e1 = *e2;
877 break;
878 case OPR_OR:
879 lua_assert(e1->f == NO_JMP); /* list must be closed */
880 dischargevars(fs, e2);
881 concatjumps(fs, &e2->t, e1->t);
882 *e1 = *e2;
883 break;
884 case OPR_CONCAT:
885 exp2val(fs, e2);
886 if (e2->k == VRELOCABLE && bc_op(*bcptr(fs, e2)) == BC_CAT) {
887 lua_assert(e1->u.s.info == bc_b(*bcptr(fs, e2))-1);
888 freeexp(fs, e1);
889 setbc_b(bcptr(fs, e2), e1->u.s.info);
890 e1->u.s.info = e2->u.s.info;
891 } else {
892 exp2nextreg(fs, e2);
893 freeexp(fs, e2);
894 freeexp(fs, e1);
895 e1->u.s.info = emitABC(fs, BC_CAT, 0, e1->u.s.info, e2->u.s.info);
896 }
897 e1->k = VRELOCABLE;
898 break;
899 case OPR_ADD: case OPR_SUB: case OPR_MUL:
900 case OPR_DIV: case OPR_MOD: case OPR_POW:
901 codearith(fs, op, e1, e2);
902 break;
903 case OPR_EQ: case OPR_NE:
904 case OPR_LT: case OPR_LE: case OPR_GT: case OPR_GE:
905 codecomp(fs, op, e1, e2);
906 break;
907 default: lua_assert(0); break;
908 }
909}
910
911/* -- Lexer support ------------------------------------------------------- */
912
913static int testnext(LexState *ls, LexToken tok)
914{
915 if (ls->token == tok) {
916 lj_lex_next(ls);
917 return 1;
918 }
919 return 0;
920}
921
922static void checknext(LexState *ls, LexToken tok)
923{
924 if (ls->token != tok)
925 err_token(ls, tok);
926 lj_lex_next(ls);
927}
928
929static void checkmatch(LexState *ls, LexToken what, LexToken who, BCLine line)
930{
931 if (!testnext(ls, what)) {
932 if (line == ls->linenumber) {
933 err_token(ls, what);
934 } else {
935 const char *swhat = lj_lex_token2str(ls, what);
936 const char *swho = lj_lex_token2str(ls, who);
937 lj_lex_error(ls, ls->token, LJ_ERR_XMATCH, swhat, swho, line);
938 }
939 }
940}
941
942static GCstr *str_checkname(LexState *ls)
943{
944 GCstr *s;
945 if (ls->token != TK_name)
946 err_token(ls, TK_name);
947 s = strV(&ls->tokenval);
948 lj_lex_next(ls);
949 return s;
950}
951
952static void init_exp(ExpDesc *e, ExpKind k, uint32_t info)
953{
954 e->k = k;
955 e->u.s.info = info;
956 e->f = e->t = NO_JMP;
957}
958
959static void checkname(LexState *ls, ExpDesc *e)
960{
961 init_exp(e, VKSTR, 0);
962 e->u.sval = str_checkname(ls);
963}
964
965/* -- Variable handling --------------------------------------------------- */
966
967#define getlocvar(fs, i) ((fs)->pt->varinfo[(fs)->actvar[(i)]])
968
969static BCReg registerlocalvar(LexState *ls, GCstr *name)
970{
971 FuncState *fs = ls->fs;
972 GCproto *pt = fs->pt;
973 if (LJ_UNLIKELY(fs->nlocvars >= pt->sizevarinfo)) {
974 MSize oldsize = pt->sizevarinfo;
975 checklimit(fs, fs->nlocvars, 32767, "local variables");
976 lj_mem_growvec(fs->L, pt->varinfo, pt->sizevarinfo, 32767, VarInfo);
977 while (oldsize < pt->sizevarinfo) pt->varinfo[oldsize++].name = NULL;
978 }
979 pt->varinfo[fs->nlocvars].name = name;
980 lj_gc_objbarrier(ls->L, pt, name);
981 return fs->nlocvars++;
982}
983
984static void new_localvar(LexState *ls, GCstr *name, BCReg n)
985{
986 FuncState *fs = ls->fs;
987 checklimit(fs, fs->nactvar+n, LJ_MAX_LOCVAR, "local variables");
988 fs->actvar[fs->nactvar+n] = cast(uint16_t, registerlocalvar(ls, name));
989}
990
991#define new_localvarliteral(ls,v,n) \
992 new_localvar(ls, lj_parse_keepstr(ls, "" v, sizeof(v)-1), n)
993
994static void adjustlocalvars(LexState *ls, BCReg nvars)
995{
996 FuncState *fs = ls->fs;
997 fs->nactvar = cast_byte(fs->nactvar + nvars);
998 for (; nvars; nvars--)
999 getlocvar(fs, fs->nactvar - nvars).startpc = fs->pc;
1000}
1001
1002static void removevars(LexState *ls, BCReg tolevel)
1003{
1004 FuncState *fs = ls->fs;
1005 while (fs->nactvar > tolevel)
1006 getlocvar(fs, --fs->nactvar).endpc = fs->pc;
1007}
1008
1009static uint32_t indexupvalue(FuncState *fs, GCstr *name, ExpDesc *v)
1010{
1011 uint32_t i;
1012 GCproto *pt = fs->pt;
1013 for (i = 0; i < fs->nuv; i++) {
1014 if (fs->upvalues[i].k == v->k && fs->upvalues[i].info == v->u.s.info) {
1015 lua_assert(pt->uvname[i] == name);
1016 return i;
1017 }
1018 }
1019 /* Not found, create a new upvalue for this name. */
1020 if (LJ_UNLIKELY(fs->nuv >= pt->sizeuvname)) {
1021 MSize oldsize = pt->sizeuvname;
1022 checklimit(fs, fs->nuv, LJ_MAX_UPVAL, "upvalues");
1023 lj_mem_growvec(fs->L, pt->uvname, pt->sizeuvname, LJ_MAX_UPVAL, GCstr *);
1024 while (oldsize < pt->sizeuvname) pt->uvname[oldsize++] = NULL;
1025 }
1026 pt->uvname[fs->nuv] = name;
1027 lj_gc_objbarrier(fs->L, pt, name);
1028 lua_assert(v->k == VLOCAL || v->k == VUPVAL);
1029 fs->upvalues[fs->nuv].k = cast_byte(v->k);
1030 fs->upvalues[fs->nuv].info = cast_byte(v->u.s.info);
1031 return fs->nuv++;
1032}
1033
1034static BCReg searchvar(FuncState *fs, GCstr *n)
1035{
1036 int i;
1037 for (i = fs->nactvar-1; i >= 0; i--) {
1038 if (n == getlocvar(fs, i).name)
1039 return (BCReg)i;
1040 }
1041 return (BCReg)-1; /* Not found. */
1042}
1043
1044static void markupval(FuncState *fs, BCReg level)
1045{
1046 FuncBlock *bl = fs->bl;
1047 while (bl && bl->nactvar > level) bl = bl->previous;
1048 if (bl) bl->upval = 1;
1049}
1050
1051static int singlevaraux(FuncState *fs, GCstr *name, ExpDesc *e, int first)
1052{
1053 if (fs == NULL) { /* no more levels? */
1054 init_exp(e, VGLOBAL, 0); /* default is global variable */
1055 e->u.sval = name;
1056 return 1;
1057 } else {
1058 BCReg reg = searchvar(fs, name); /* look up at current level */
1059 if ((int32_t)reg >= 0) {
1060 init_exp(e, VLOCAL, reg);
1061 if (!first)
1062 markupval(fs, reg); /* local will be used as an upval */
1063 return 0;
1064 } else { /* not found at current level; try upper one */
1065 if (singlevaraux(fs->prev, name, e, 0)) /* global? */
1066 return 1;
1067 e->u.s.info = indexupvalue(fs, name, e); /* else was local or upvalue */
1068 e->k = VUPVAL; /* upvalue in this level */
1069 return 0;
1070 }
1071 }
1072}
1073
1074#define singlevar(ls, e) singlevaraux((ls)->fs, str_checkname(ls), (e), 1)
1075
1076static void adjust_assign(LexState *ls, BCReg nvars, BCReg nexps, ExpDesc *e)
1077{
1078 FuncState *fs = ls->fs;
1079 int32_t extra = (int32_t)nvars - (int32_t)nexps;
1080 if (e->k == VCALL) {
1081 extra++; /* includes call itself */
1082 if (extra < 0) extra = 0;
1083 setbc_b(bcptr(fs, e), extra+1);
1084 if (extra > 1) reserveregs(fs, (BCReg)extra-1);
1085 } else {
1086 if (e->k != VVOID) exp2nextreg(fs, e); /* close last expression */
1087 if (extra > 0) {
1088 BCReg reg = fs->freereg;
1089 reserveregs(fs, (BCReg)extra);
1090 nilK(fs, reg, (BCReg)extra);
1091 }
1092 }
1093}
1094
1095/* -- Function handling --------------------------------------------------- */
1096
1097/* Forward declaration. */
1098static void chunk(LexState *ls);
1099
1100static void open_func(LexState *ls, FuncState *fs)
1101{
1102 lua_State *L = ls->L;
1103 GCproto *pt = lj_func_newproto(L);
1104 fs->pt = pt;
1105 fs->prev = ls->fs; /* linked list of funcstates */
1106 fs->ls = ls;
1107 fs->L = L;
1108 ls->fs = fs;
1109 fs->pc = 0;
1110 fs->lasttarget = 0;
1111 fs->jpc = NO_JMP;
1112 fs->freereg = 0;
1113 fs->nkgc = 0;
1114 fs->nkn = 0;
1115 fs->nlocvars = 0;
1116 fs->nactvar = 0;
1117 fs->nuv = 0;
1118 fs->bl = NULL;
1119 pt->chunkname = ls->chunkname;
1120 pt->framesize = 2; /* registers 0/1 are always valid */
1121 fs->kt = lj_tab_new(L, 0, 0);
1122 /* anchor table of constants and prototype (to avoid being collected) */
1123 settabV(L, L->top, fs->kt);
1124 incr_top(L);
1125 setprotoV(L, L->top, pt);
1126 incr_top(L);
1127}
1128
1129static void collectk(FuncState *fs, GCproto *pt)
1130{
1131 GCtab *kt;
1132 TValue *array;
1133 Node *node;
1134 BCReg nkgc;
1135 MSize i, hmask, sizek;
1136 GCRef *kstart;
1137 checklimitgt(fs, fs->nkn, BCMAX_D+1, "constants");
1138 checklimitgt(fs, fs->nkgc, BCMAX_D+1, "constants");
1139 nkgc = round_nkgc(fs->nkgc);
1140 sizek = (MSize)(nkgc*sizeof(MRef) + fs->nkn*sizeof(lua_Number));
1141 kstart = lj_mem_newt(fs->L, sizek, GCRef);
1142 if (nkgc) setgcrefnull(kstart[0]); /* May be uninitialized otherwise. */
1143 pt->k.gc = kstart + nkgc;
1144 pt->sizekn = fs->nkn;
1145 pt->sizekgc = fs->nkgc;
1146 kt = fs->kt;
1147 array = tvref(kt->array);
1148 for (i = 0; i < kt->asize; i++)
1149 if (tvisnum(&array[i]))
1150 pt->k.n[array[i].u32.lo] = cast_num(i);
1151 node = noderef(kt->node);
1152 hmask = kt->hmask;
1153 for (i = 0; i <= hmask; i++) {
1154 Node *n = &node[i];
1155 if (tvisnum(&n->val)) {
1156 ptrdiff_t kidx = (ptrdiff_t)n->val.u32.lo;
1157 if (tvisnum(&n->key)) {
1158 pt->k.n[kidx] = numV(&n->key);
1159 } else {
1160 GCobj *o = gcV(&n->key);
1161 setgcref(pt->k.gc[~kidx], o);
1162 lj_gc_objbarrier(fs->L, pt, o);
1163 }
1164 }
1165 }
1166}
1167
1168static void collectuv(FuncState *fs, GCproto *pt)
1169{
1170 uint32_t i;
1171 pt->uv = lj_mem_newvec(fs->L, fs->nuv, int16_t);
1172 pt->sizeuv = fs->nuv;
1173 for (i = 0; i < pt->sizeuv; i++) {
1174 uint32_t v = fs->upvalues[i].info;
1175 if (fs->upvalues[i].k == VUPVAL) v = ~v;
1176 pt->uv[i] = (int16_t)v;
1177 }
1178}
1179
1180static void finalret(FuncState *fs, GCproto *pt)
1181{
1182 BCPos lastpc = fs->pc;
1183 if (lastpc > fs->lasttarget) {
1184 switch (bc_op(pt->bc[lastpc-1])) {
1185 case BC_CALLMT: case BC_CALLT:
1186 case BC_RETM: case BC_RET: case BC_RET0: case BC_RET1:
1187 goto suppress_return; /* already got a return */
1188 default: break;
1189 }
1190 }
1191 if (fs->pt->flags & PROTO_HAS_FNEW)
1192 emitAJ(fs, BC_UCLO, 0, 0);
1193 emitAD(fs, BC_RET0, 0, 1); /* final return */
1194suppress_return:
1195 /* may need to fixup returns encoded before first function was created */
1196 if (fs->pt->flags & PROTO_FIXUP_RETURN) {
1197 BCPos pc;
1198 for (pc = 0; pc < lastpc; pc++) {
1199 BCIns i = pt->bc[pc];
1200 BCPos offset;
1201 switch (bc_op(i)) {
1202 case BC_CALLMT: case BC_CALLT:
1203 case BC_RETM: case BC_RET: case BC_RET0: case BC_RET1:
1204 offset = emitINS(fs, i)-(pc+1)+BCBIAS_J; /* copy return ins */
1205 if (offset > BCMAX_D)
1206 err_syntax(fs->ls, LJ_ERR_XFIXUP);
1207 pt->bc[pc] = BCINS_AD(BC_UCLO, 0, offset); /* replace w/ UCLO+branch */
1208 break;
1209 case BC_UCLO: return; /* we're done */
1210 default: break;
1211 }
1212 }
1213 }
1214}
1215
1216static void close_func(LexState *ls)
1217{
1218 lua_State *L = ls->L;
1219 FuncState *fs = ls->fs;
1220 GCproto *pt = fs->pt;
1221 removevars(ls, 0);
1222 finalret(fs, pt);
1223 lj_mem_reallocvec(L, pt->bc, pt->sizebc, fs->pc, BCIns);
1224 pt->sizebc = fs->pc;
1225 collectk(fs, pt);
1226 collectuv(fs, pt);
1227 lj_mem_reallocvec(L, pt->lineinfo, pt->sizelineinfo, fs->pc, BCLine);
1228 pt->sizelineinfo = fs->pc;
1229 lj_mem_reallocvec(L, pt->varinfo, pt->sizevarinfo, fs->nlocvars, VarInfo);
1230 pt->sizevarinfo = fs->nlocvars;
1231 lj_mem_reallocvec(L, pt->uvname, pt->sizeuvname, fs->nuv, GCstr *);
1232 pt->sizeuvname = fs->nuv;
1233 lua_assert(fs->bl == NULL);
1234 lj_vmevent_send(L, BC,
1235 setprotoV(L, L->top++, pt);
1236 );
1237 ls->fs = fs->prev;
1238 L->top -= 2; /* Remove table and prototype from the stack. */
1239 lua_assert(ls->fs != NULL || ls->token == TK_eof);
1240 keep_token(ls); /* Re-anchor last token. */
1241}
1242
1243GCproto *lj_parse(LexState *ls)
1244{
1245 struct FuncState fs;
1246 ls->level = 0;
1247 open_func(ls, &fs);
1248 fs.pt->flags |= PROTO_IS_VARARG; /* Main chunk is always a vararg func. */
1249 lj_lex_next(ls); /* Read-ahead first token. */
1250 chunk(ls);
1251 if (ls->token != TK_eof)
1252 err_token(ls, TK_eof);
1253 fs.pt->lastlinedefined = ls->linenumber;
1254 close_func(ls);
1255 lua_assert(fs.prev == NULL);
1256 lua_assert(fs.pt->sizeuv == 0);
1257 lua_assert(ls->fs == NULL);
1258 return fs.pt;
1259}
1260
1261/* -- Expressions --------------------------------------------------------- */
1262
1263/* forward declaration */
1264static void expr(LexState *ls, ExpDesc *v);
1265
1266static void field(LexState *ls, ExpDesc *v)
1267{
1268 /* field -> ['.' | ':'] NAME */
1269 FuncState *fs = ls->fs;
1270 ExpDesc key;
1271 exp2anyreg(fs, v);
1272 lj_lex_next(ls); /* skip the dot or colon */
1273 checkname(ls, &key);
1274 indexexp(fs, v, &key);
1275}
1276
1277static void yindex(LexState *ls, ExpDesc *v)
1278{
1279 /* index -> '[' expr ']' */
1280 lj_lex_next(ls); /* skip the '[' */
1281 expr(ls, v);
1282 exp2val(ls->fs, v);
1283 checknext(ls, ']');
1284}
1285
1286static void kexp2tv(TValue *v, ExpDesc *e)
1287{
1288 switch (e->k) {
1289 case VKNIL: case VKFALSE: case VKTRUE: v->it = ~(int32_t)e->k; break;
1290 case VKSTR:
1291 setgcref(v->gcr, obj2gco(e->u.sval)); v->it = LJ_TSTR; break;
1292 case VKNUM: setnumV(v, expnumV(e)); break;
1293 default: lua_assert(0); break;
1294 }
1295}
1296
1297static void constructor(LexState *ls, ExpDesc *e)
1298{
1299 FuncState *fs = ls->fs;
1300 BCLine line = ls->linenumber;
1301 GCtab *t = NULL;
1302 int vcall = 0, needarr = 0;
1303 int32_t narr = 1; /* first array index */
1304 uint32_t nhash = 0; /* number of hash entries */
1305 BCReg freg = fs->freereg;
1306 BCPos pc = emitAD(fs, BC_TNEW, freg, 0);
1307 init_exp(e, VNONRELOC, freg);
1308 reserveregs(fs, 1);
1309 freg++;
1310 checknext(ls, '{');
1311 while (ls->token != '}') {
1312 ExpDesc key, val;
1313 vcall = 0;
1314 if (ls->token == '[') {
1315 yindex(ls, &key); /* already calls exp2val */
1316 if (!isK(&key)) indexexp(fs, e, &key);
1317 if (isnumK(&key) && expnumV(&key) == 0) needarr = 1; else nhash++;
1318 checknext(ls, '=');
1319 } else if (ls->token == TK_name && lj_lex_lookahead(ls) == '=') {
1320 checkname(ls, &key);
1321 checknext(ls, '=');
1322 nhash++;
1323 } else {
1324 init_exp(&key, VKNUM, 0);
1325 setintV(&key.u.nval, narr);
1326 narr++;
1327 needarr = vcall = 1;
1328 }
1329 expr(ls, &val);
1330 if (isKexp(&val) && isK(&key) && key.k != VKNIL) {
1331 TValue k;
1332 if (!t) { /* create template table on demand */
1333 BCReg kidx;
1334 t = lj_tab_new(fs->L, 0, 0);
1335 kidx = gcK(fs, obj2gco(t), LJ_TTAB);
1336 fs->pt->bc[pc] = BCINS_AD(BC_TDUP, freg-1, kidx);
1337 }
1338 vcall = 0;
1339 kexp2tv(&k, &key);
1340 kexp2tv(lj_tab_set(fs->L, t, &k), &val);
1341 if (val.k == VKSTR)
1342 lj_gc_objbarriert(fs->L, t, val.u.sval);
1343 } else {
1344 if (isK(&key)) indexexp(fs, e, &key);
1345 if (val.k != VCALL) vcall = 0;
1346 storevar(fs, e, &val);
1347 }
1348 fs->freereg = freg;
1349 if (!testnext(ls, ',') && !testnext(ls, ';')) break;
1350 }
1351 checkmatch(ls, '}', '{', line);
1352 if (vcall) {
1353 BCIns *i = &fs->pt->bc[fs->pc-1];
1354 ExpDesc en;
1355 lua_assert(bc_a(*i)==freg && bc_op(*i) == (narr>256?BC_TSETV:BC_TSETB));
1356 init_exp(&en, VKNUM, 0);
1357 setintV(&en.u.nval, narr-1);
1358 if (narr > 256) { fs->pc--; i--; }
1359 *i = BCINS_AD(BC_TSETM, freg, numK(fs, &en));
1360 setbc_b(i-1, 0);
1361 }
1362 if (pc == fs->pc-1) { /* make expr relocable if possible */
1363 e->u.s.info = pc;
1364 fs->freereg--;
1365 e->k = VRELOCABLE;
1366 } else {
1367 e->k = VNONRELOC; /* indexexp may have changed it */
1368 }
1369 if (!t) { /* Construct TNEW RD: hhhhhaaaaaaaaaaa. */
1370 if (!needarr) narr = 0;
1371 else if (narr < 3) narr = 3;
1372 else if (narr > 0x7ff) narr = 0x7ff;
1373 setbc_d(&fs->pt->bc[pc], (uint32_t)narr | (hsize2hbits(nhash) << 11));
1374 }
1375}
1376
1377static void parlist(LexState *ls)
1378{
1379 /* parlist -> [ param { `,' param } ] */
1380 FuncState *fs = ls->fs;
1381 GCproto *pt = fs->pt;
1382 BCReg nparams = 0;
1383 if (ls->token != ')') { /* is `parlist' not empty? */
1384 do {
1385 switch (ls->token) {
1386 case TK_name: /* param -> NAME */
1387 new_localvar(ls, str_checkname(ls), nparams++);
1388 break;
1389 case TK_dots: /* param -> `...' */
1390 lj_lex_next(ls);
1391 pt->flags |= PROTO_IS_VARARG;
1392 break;
1393 default:
1394 err_syntax(ls, LJ_ERR_XPARAM);
1395 break;
1396 }
1397 } while (!(pt->flags & PROTO_IS_VARARG) && testnext(ls, ','));
1398 }
1399 adjustlocalvars(ls, nparams);
1400 pt->numparams = cast_byte(fs->nactvar);
1401 reserveregs(fs, fs->nactvar); /* reserve register for parameters */
1402}
1403
1404static void body(LexState *ls, ExpDesc *e, int needself, BCLine line)
1405{
1406 /* body -> `(' parlist `)' chunk END */
1407 FuncState *fs, new_fs;
1408 BCReg kidx;
1409 open_func(ls, &new_fs);
1410 new_fs.pt->linedefined = line;
1411 checknext(ls, '(');
1412 if (needself) {
1413 new_localvarliteral(ls, "self", 0);
1414 adjustlocalvars(ls, 1);
1415 }
1416 parlist(ls);
1417 checknext(ls, ')');
1418 chunk(ls);
1419 new_fs.pt->lastlinedefined = ls->linenumber;
1420 checkmatch(ls, TK_end, TK_function, line);
1421 close_func(ls);
1422 fs = ls->fs;
1423 kidx = gcK(fs, obj2gco(new_fs.pt), LJ_TPROTO);
1424 init_exp(e, VRELOCABLE, emitAD(fs, BC_FNEW, 0, kidx));
1425 if (!(fs->pt->flags & PROTO_HAS_FNEW)) {
1426 if (fs->pt->flags & PROTO_HAS_RETURN)
1427 fs->pt->flags |= PROTO_FIXUP_RETURN;
1428 fs->pt->flags |= PROTO_HAS_FNEW;
1429 }
1430}
1431
1432static BCReg explist1(LexState *ls, ExpDesc *v)
1433{
1434 /* explist1 -> expr { `,' expr } */
1435 BCReg n = 1; /* at least one expression */
1436 expr(ls, v);
1437 while (testnext(ls, ',')) {
1438 exp2nextreg(ls->fs, v);
1439 expr(ls, v);
1440 n++;
1441 }
1442 return n;
1443}
1444
1445static void funcargs(LexState *ls, ExpDesc *e)
1446{
1447 FuncState *fs = ls->fs;
1448 ExpDesc args;
1449 BCIns ins;
1450 BCReg base;
1451 BCLine line = ls->linenumber;
1452 switch (ls->token) {
1453 case '(': { /* funcargs -> `(' [ explist1 ] `)' */
1454 if (line != ls->lastline)
1455 err_syntax(ls, LJ_ERR_XAMBIG);
1456 lj_lex_next(ls);
1457 if (ls->token == ')') { /* arg list is empty? */
1458 args.k = VVOID;
1459 } else {
1460 explist1(ls, &args);
1461 if (args.k == VCALL)
1462 setbc_b(bcptr(fs, &args), 0);
1463 }
1464 checkmatch(ls, ')', '(', line);
1465 break;
1466 }
1467 case '{': { /* funcargs -> constructor */
1468 constructor(ls, &args);
1469 break;
1470 }
1471 case TK_string: { /* funcargs -> STRING */
1472 init_exp(&args, VKSTR, 0);
1473 args.u.sval = strV(&ls->tokenval);
1474 lj_lex_next(ls); /* must use `seminfo' before `next' */
1475 break;
1476 }
1477 default: {
1478 err_syntax(ls, LJ_ERR_XFUNARG);
1479 return;
1480 }
1481 }
1482 lua_assert(e->k == VNONRELOC);
1483 base = e->u.s.info; /* base register for call */
1484 if (args.k == VCALL) {
1485 ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1);
1486 } else {
1487 if (args.k != VVOID)
1488 exp2nextreg(fs, &args); /* close last argument */
1489 ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base);
1490 }
1491 init_exp(e, VCALL, emitINS(fs, ins));
1492 e->u.s.aux = base;
1493 fs->pt->lineinfo[fs->pc - 1] = line;
1494 fs->freereg = base+1; /* call removes function and arguments and leaves
1495 (unless changed) one result */
1496}
1497
1498static void prefixexp(LexState *ls, ExpDesc *v)
1499{
1500 /* prefixexp -> NAME | '(' expr ')' */
1501 switch (ls->token) {
1502 case '(': {
1503 BCLine line = ls->linenumber;
1504 lj_lex_next(ls);
1505 expr(ls, v);
1506 checkmatch(ls, ')', '(', line);
1507 dischargevars(ls->fs, v);
1508 return;
1509 }
1510 case TK_name: {
1511 singlevar(ls, v);
1512 return;
1513 }
1514 default: {
1515 err_syntax(ls, LJ_ERR_XSYMBOL);
1516 return;
1517 }
1518 }
1519}
1520
1521static void primaryexp(LexState *ls, ExpDesc *v)
1522{
1523 /* primaryexp ->
1524 prefixexp { `.' NAME | `[' exp `]' | `:' NAME funcargs | funcargs } */
1525 FuncState *fs = ls->fs;
1526 prefixexp(ls, v);
1527 for (;;) {
1528 switch (ls->token) {
1529 case '.': /* field */
1530 field(ls, v);
1531 break;
1532 case '[': { /* `[' exp1 `]' */
1533 ExpDesc key;
1534 exp2anyreg(fs, v);
1535 yindex(ls, &key);
1536 indexexp(fs, v, &key);
1537 break;
1538 }
1539 case ':': { /* `:' NAME funcargs */
1540 ExpDesc key;
1541 lj_lex_next(ls);
1542 checkname(ls, &key);
1543 methodexp(fs, v, &key);
1544 funcargs(ls, v);
1545 break;
1546 }
1547 case '(': case TK_string: case '{': /* funcargs */
1548 exp2nextreg(fs, v);
1549 funcargs(ls, v);
1550 break;
1551 default: return;
1552 }
1553 }
1554}
1555
1556static void simpleexp(LexState *ls, ExpDesc *v)
1557{
1558 /* simpleexp -> NUMBER | STRING | NIL | true | false | ... |
1559 constructor | FUNCTION body | primaryexp */
1560 switch (ls->token) {
1561 case TK_number:
1562 init_exp(v, VKNUM, 0);
1563 setnumV(&v->u.nval, numV(&ls->tokenval));
1564 break;
1565 case TK_string:
1566 init_exp(v, VKSTR, 0);
1567 v->u.sval = strV(&ls->tokenval);
1568 break;
1569 case TK_nil:
1570 init_exp(v, VKNIL, 0);
1571 break;
1572 case TK_true:
1573 init_exp(v, VKTRUE, 0);
1574 break;
1575 case TK_false:
1576 init_exp(v, VKFALSE, 0);
1577 break;
1578 case TK_dots: { /* vararg */
1579 FuncState *fs = ls->fs;
1580 BCReg base;
1581 checkcond(ls, fs->pt->flags & PROTO_IS_VARARG, LJ_ERR_XDOTS);
1582 reserveregs(fs, 1);
1583 base = fs->freereg-1;
1584 init_exp(v, VCALL, emitABC(fs, BC_VARG, base, 2, 1));
1585 v->u.s.aux = base;
1586 break;
1587 }
1588 case '{': /* constructor */
1589 constructor(ls, v);
1590 return;
1591 case TK_function:
1592 lj_lex_next(ls);
1593 body(ls, v, 0, ls->linenumber);
1594 return;
1595 default:
1596 primaryexp(ls, v);
1597 return;
1598 }
1599 lj_lex_next(ls);
1600}
1601
1602static void enterlevel(LexState *ls)
1603{
1604 if (++ls->level >= LJ_MAX_XLEVEL)
1605 lj_lex_error(ls, 0, LJ_ERR_XLEVELS);
1606}
1607
1608#define leavelevel(ls) ((ls)->level--)
1609
1610static UnOpr getunopr(LexToken tok)
1611{
1612 switch (tok) {
1613 case TK_not: return OPR_NOT;
1614 case '-': return OPR_MINUS;
1615 case '#': return OPR_LEN;
1616 default: return OPR_NOUNOPR;
1617 }
1618}
1619
1620static BinOpr getbinopr(LexToken tok)
1621{
1622 switch (tok) {
1623 case '+': return OPR_ADD;
1624 case '-': return OPR_SUB;
1625 case '*': return OPR_MUL;
1626 case '/': return OPR_DIV;
1627 case '%': return OPR_MOD;
1628 case '^': return OPR_POW;
1629 case TK_concat: return OPR_CONCAT;
1630 case TK_ne: return OPR_NE;
1631 case TK_eq: return OPR_EQ;
1632 case '<': return OPR_LT;
1633 case TK_le: return OPR_LE;
1634 case '>': return OPR_GT;
1635 case TK_ge: return OPR_GE;
1636 case TK_and: return OPR_AND;
1637 case TK_or: return OPR_OR;
1638 default: return OPR_NOBINOPR;
1639 }
1640}
1641
1642static const struct {
1643 uint8_t left; /* left priority for each binary operator */
1644 uint8_t right; /* right priority */
1645} priority[] = { /* ORDER OPR */
1646 {6,6}, {6,6}, {7,7}, {7,7}, {7,7}, /* ADD SUB MUL DIV MOD */
1647 {10,9}, {5,4}, /* POW CONCAT (right associative) */
1648 {3,3}, {3,3}, /* EQ NE */
1649 {3,3}, {3,3}, {3,3}, {3,3}, /* LT GE GT LE */
1650 {2,2}, {1,1} /* AND OR */
1651};
1652
1653#define UNARY_PRIORITY 8 /* priority for unary operators */
1654
1655/*
1656** subexpr -> (simpleexp | unop subexpr) { binop subexpr }
1657** where `binop' is any binary operator with a priority higher than `limit'
1658*/
1659static BinOpr subexpr(LexState *ls, ExpDesc *v, uint32_t limit)
1660{
1661 BinOpr op;
1662 UnOpr uop;
1663 enterlevel(ls);
1664 uop = getunopr(ls->token);
1665 if (uop != OPR_NOUNOPR) {
1666 lj_lex_next(ls);
1667 subexpr(ls, v, UNARY_PRIORITY);
1668 emit_unop(ls->fs, uop, v);
1669 } else {
1670 simpleexp(ls, v);
1671 }
1672 /* expand while operators have priorities higher than `limit' */
1673 op = getbinopr(ls->token);
1674 while (op != OPR_NOBINOPR && priority[op].left > limit) {
1675 ExpDesc v2;
1676 BinOpr nextop;
1677 lj_lex_next(ls);
1678 prepare_binop(ls->fs, op, v);
1679 /* read sub-expression with higher priority */
1680 nextop = subexpr(ls, &v2, priority[op].right);
1681 emit_binop(ls->fs, op, v, &v2);
1682 op = nextop;
1683 }
1684 leavelevel(ls);
1685 return op; /* return first untreated operator */
1686}
1687
1688static void expr(LexState *ls, ExpDesc *v)
1689{
1690 subexpr(ls, v, 0);
1691}
1692
1693static BCPos condexpr(LexState *ls)
1694{
1695 /* cond -> exp */
1696 ExpDesc v;
1697 expr(ls, &v); /* read condition */
1698 if (v.k == VKNIL) v.k = VKFALSE; /* `falses' are all equal here */
1699 goiftrue(ls->fs, &v);
1700 return v.f;
1701}
1702
1703/* -- Scope handling ------------------------------------------------------ */
1704
1705static void enterblock(FuncState *fs, FuncBlock *bl, int isbreakable)
1706{
1707 bl->breaklist = NO_JMP;
1708 bl->isbreakable = (uint8_t)isbreakable;
1709 bl->nactvar = fs->nactvar;
1710 bl->upval = 0;
1711 bl->previous = fs->bl;
1712 fs->bl = bl;
1713 lua_assert(fs->freereg == fs->nactvar);
1714}
1715
1716static void leaveblock(FuncState *fs)
1717{
1718 FuncBlock *bl = fs->bl;
1719 fs->bl = bl->previous;
1720 removevars(fs->ls, bl->nactvar);
1721 fs->freereg = fs->nactvar; /* free registers */
1722 lua_assert(bl->nactvar == fs->nactvar);
1723 /* a block either controls scope or breaks (never both) */
1724 lua_assert(!bl->isbreakable || !bl->upval);
1725 if (bl->upval)
1726 emitAJ(fs, BC_UCLO, bl->nactvar, 0);
1727 else /* avoid in upval case, it clears lasttarget and kills UCLO+JMP join */
1728 patchtohere(fs, bl->breaklist);
1729}
1730
1731static void block(LexState *ls)
1732{
1733 /* block -> chunk */
1734 FuncState *fs = ls->fs;
1735 FuncBlock bl;
1736 enterblock(fs, &bl, 0);
1737 chunk(ls);
1738 lua_assert(bl.breaklist == NO_JMP);
1739 leaveblock(fs);
1740}
1741
1742/* -- Statements ---------------------------------------------------------- */
1743
1744/*
1745** structure to chain all variables in the left-hand side of an
1746** assignment
1747*/
1748struct LHS_assign {
1749 ExpDesc v; /* variable (global, local, upvalue, or indexed) */
1750 struct LHS_assign *prev;
1751};
1752
1753/*
1754** check whether, in an assignment to a local variable, the local variable
1755** is needed in a previous assignment (to a table). If so, save original
1756** local value in a safe place and use this safe copy in the previous
1757** assignment.
1758*/
1759static void check_conflict(LexState *ls, struct LHS_assign *lh,
1760 const ExpDesc *v)
1761{
1762 FuncState *fs = ls->fs;
1763 BCReg reg = fs->freereg; /* eventual position to save local variable */
1764 int conflict = 0;
1765 for (; lh; lh = lh->prev) {
1766 if (lh->v.k == VINDEXED) {
1767 if (lh->v.u.s.info == v->u.s.info) { /* conflict? */
1768 conflict = 1;
1769 lh->v.u.s.info = reg; /* previous assignment will use safe copy */
1770 }
1771 if (lh->v.u.s.aux == v->u.s.info) { /* conflict? */
1772 conflict = 1;
1773 lh->v.u.s.aux = reg; /* previous assignment will use safe copy */
1774 }
1775 }
1776 }
1777 if (conflict) {
1778 emitAD(fs, BC_MOV, reg, v->u.s.info); /* make copy */
1779 reserveregs(fs, 1);
1780 }
1781}
1782
1783static void assignment(LexState *ls, struct LHS_assign *lh, BCReg nvars)
1784{
1785 ExpDesc e;
1786 checkcond(ls, VLOCAL <= lh->v.k && lh->v.k <= VINDEXED, LJ_ERR_XSYNTAX);
1787 if (testnext(ls, ',')) { /* assignment -> `,' primaryexp assignment */
1788 struct LHS_assign nv;
1789 nv.prev = lh;
1790 primaryexp(ls, &nv.v);
1791 if (nv.v.k == VLOCAL)
1792 check_conflict(ls, lh, &nv.v);
1793 checklimit(ls->fs, ls->level + nvars, LJ_MAX_XLEVEL, "variable names");
1794 assignment(ls, &nv, nvars+1);
1795 } else { /* assignment -> `=' explist1 */
1796 BCReg nexps;
1797 checknext(ls, '=');
1798 nexps = explist1(ls, &e);
1799 if (nexps == nvars) {
1800 if (e.k == VCALL) {
1801 if (bc_op(*bcptr(ls->fs, &e)) == BC_VARG) {
1802 ls->fs->freereg--;
1803 e.k = VRELOCABLE;
1804 } else {
1805 e.u.s.info = e.u.s.aux;
1806 e.k = VNONRELOC;
1807 }
1808 }
1809 storevar(ls->fs, &lh->v, &e);
1810 return;
1811 }
1812 adjust_assign(ls, nvars, nexps, &e);
1813 if (nexps > nvars)
1814 ls->fs->freereg -= nexps - nvars; /* remove extra values */
1815 }
1816 init_exp(&e, VNONRELOC, ls->fs->freereg-1); /* default assignment */
1817 storevar(ls->fs, &lh->v, &e);
1818}
1819
1820static void breakstat(LexState *ls)
1821{
1822 FuncState *fs = ls->fs;
1823 FuncBlock *bl = fs->bl;
1824 int upval = 0;
1825 while (bl && !bl->isbreakable) {
1826 upval |= bl->upval;
1827 bl = bl->previous;
1828 }
1829 if (!bl)
1830 err_syntax(ls, LJ_ERR_XBREAK);
1831 if (upval)
1832 emitAJ(fs, BC_UCLO, bl->nactvar, 0);
1833 concatjumps(fs, &bl->breaklist, emit_jump(fs));
1834}
1835
1836static void whilestat(LexState *ls, BCLine line)
1837{
1838 /* whilestat -> WHILE cond DO block END */
1839 FuncState *fs = ls->fs;
1840 BCPos start, loop, condexit;
1841 FuncBlock bl;
1842 lj_lex_next(ls); /* skip WHILE */
1843 start = fs->lasttarget = fs->pc;
1844 condexit = condexpr(ls);
1845 enterblock(fs, &bl, 1);
1846 checknext(ls, TK_do);
1847 loop = emitAD(fs, BC_LOOP, fs->nactvar, 0);
1848 block(ls);
1849 patchlist(fs, emit_jump(fs), start);
1850 checkmatch(ls, TK_end, TK_while, line);
1851 leaveblock(fs);
1852 patchtohere(fs, condexit); /* false conditions finish the loop */
1853 fixjump(fs, loop, fs->pc);
1854}
1855
1856static void repeatstat(LexState *ls, BCLine line)
1857{
1858 /* repeatstat -> REPEAT block UNTIL cond */
1859 FuncState *fs = ls->fs;
1860 BCPos loop = fs->lasttarget = fs->pc;
1861 BCPos condexit;
1862 FuncBlock bl1, bl2;
1863 enterblock(fs, &bl1, 1); /* loop block */
1864 enterblock(fs, &bl2, 0); /* scope block */
1865 lj_lex_next(ls); /* skip REPEAT */
1866 emitAD(fs, BC_LOOP, fs->nactvar, 0);
1867 chunk(ls);
1868 checkmatch(ls, TK_until, TK_repeat, line);
1869 condexit = condexpr(ls); /* read condition (inside scope block) */
1870 if (!bl2.upval) { /* no upvalues? */
1871 leaveblock(fs); /* finish scope */
1872 } else { /* complete semantics when there are upvalues */
1873 breakstat(ls); /* if condition then break */
1874 patchtohere(fs, condexit); /* else... */
1875 leaveblock(fs); /* finish scope... */
1876 condexit = emit_jump(fs); /* and repeat */
1877 }
1878 patchlist(fs, condexit, loop); /* close the loop */
1879 fixjump(fs, loop, fs->pc);
1880 leaveblock(fs); /* finish loop */
1881}
1882
1883static void exp1(LexState *ls)
1884{
1885 ExpDesc e;
1886 expr(ls, &e);
1887 exp2nextreg(ls->fs, &e);
1888}
1889
1890static void forbody(LexState *ls, BCReg base, BCLine line, BCReg nvars,
1891 int isnum)
1892{
1893 /* forbody -> DO block */
1894 FuncBlock bl;
1895 FuncState *fs = ls->fs;
1896 BCPos loop, loopend;
1897 adjustlocalvars(ls, 3); /* control variables */
1898 checknext(ls, TK_do);
1899 loop = isnum ? emitAJ(fs, BC_FORI, base, NO_JMP) :
1900 emitAJ(fs, BC_JMP, fs->freereg, NO_JMP);
1901 enterblock(fs, &bl, 0); /* scope for declared variables */
1902 adjustlocalvars(ls, nvars);
1903 reserveregs(fs, nvars);
1904 block(ls);
1905 leaveblock(fs); /* end of scope for declared variables */
1906 if (isnum) {
1907 loopend = emitAJ(fs, BC_FORL, base, NO_JMP);
1908 fixjump(fs, loop, fs->pc);
1909 } else {
1910 fixjump(fs, loop, fs->pc);
1911 emitABC(fs, BC_ITERC, base+3, nvars+1, 2+1);
1912 loopend = emitAJ(fs, BC_ITERL, base+3, NO_JMP);
1913 fs->pt->lineinfo[loopend-1] = line;
1914 }
1915 fs->pt->lineinfo[loopend] = line; /* pretend last op starts the loop */
1916 fixjump(fs, loopend, loop+1);
1917}
1918
1919static void fornum(LexState *ls, GCstr *varname, BCLine line)
1920{
1921 /* fornum -> NAME = exp1,exp1[,exp1] forbody */
1922 FuncState *fs = ls->fs;
1923 BCReg base = fs->freereg;
1924 new_localvarliteral(ls, "(for index)", FORL_IDX);
1925 new_localvarliteral(ls, "(for limit)", FORL_STOP);
1926 new_localvarliteral(ls, "(for step)", FORL_STEP);
1927 new_localvar(ls, varname, FORL_EXT);
1928 checknext(ls, '=');
1929 exp1(ls); /* initial value */
1930 checknext(ls, ',');
1931 exp1(ls); /* limit */
1932 if (testnext(ls, ',')) {
1933 exp1(ls); /* optional step */
1934 } else { /* default step = 1 */
1935 emitAD(fs, BC_KSHORT, fs->freereg, 1);
1936 reserveregs(fs, 1);
1937 }
1938 forbody(ls, base, line, 1, 1);
1939}
1940
1941static void forlist(LexState *ls, GCstr *indexname)
1942{
1943 /* forlist -> NAME {,NAME} IN explist1 forbody */
1944 FuncState *fs = ls->fs;
1945 ExpDesc e;
1946 BCReg nvars = 0;
1947 BCLine line;
1948 BCReg base = fs->freereg;
1949 /* create control variables */
1950 new_localvarliteral(ls, "(for generator)", nvars++);
1951 new_localvarliteral(ls, "(for state)", nvars++);
1952 new_localvarliteral(ls, "(for control)", nvars++);
1953 /* create declared variables */
1954 new_localvar(ls, indexname, nvars++);
1955 while (testnext(ls, ','))
1956 new_localvar(ls, str_checkname(ls), nvars++);
1957 checknext(ls, TK_in);
1958 line = ls->linenumber;
1959 adjust_assign(ls, 3, explist1(ls, &e), &e);
1960 checkframe(fs, 3); /* extra space to call generator */
1961 forbody(ls, base, line, nvars - 3, 0);
1962}
1963
1964static void forstat(LexState *ls, BCLine line)
1965{
1966 /* forstat -> FOR (fornum | forlist) END */
1967 FuncState *fs = ls->fs;
1968 GCstr *varname;
1969 FuncBlock bl;
1970 enterblock(fs, &bl, 1); /* scope for loop and control variables */
1971 lj_lex_next(ls); /* skip `for' */
1972 varname = str_checkname(ls); /* first variable name */
1973 switch (ls->token) {
1974 case '=': fornum(ls, varname, line); break;
1975 case ',': case TK_in: forlist(ls, varname); break;
1976 default: err_syntax(ls, LJ_ERR_XFOR);
1977 }
1978 checkmatch(ls, TK_end, TK_for, line);
1979 leaveblock(fs); /* loop scope (`break' jumps to this point) */
1980}
1981
1982static BCPos test_then_block(LexState *ls)
1983{
1984 /* test_then_block -> [IF | ELSEIF] cond THEN block */
1985 BCPos condexit;
1986 lj_lex_next(ls); /* skip IF or ELSEIF */
1987 condexit = condexpr(ls);
1988 checknext(ls, TK_then);
1989 block(ls); /* `then' part */
1990 return condexit;
1991}
1992
1993static void ifstat(LexState *ls, BCLine line)
1994{
1995 /* ifstat -> IF cond THEN block {ELSEIF cond THEN block} [ELSE block] END */
1996 FuncState *fs = ls->fs;
1997 BCPos flist;
1998 BCPos escapelist = NO_JMP;
1999 flist = test_then_block(ls); /* IF cond THEN block */
2000 while (ls->token == TK_elseif) {
2001 concatjumps(fs, &escapelist, emit_jump(fs));
2002 patchtohere(fs, flist);
2003 flist = test_then_block(ls); /* ELSEIF cond THEN block */
2004 }
2005 if (ls->token == TK_else) {
2006 concatjumps(fs, &escapelist, emit_jump(fs));
2007 patchtohere(fs, flist);
2008 lj_lex_next(ls); /* skip ELSE (after patch, for correct line info) */
2009 block(ls); /* `else' part */
2010 } else {
2011 concatjumps(fs, &escapelist, flist);
2012 }
2013 patchtohere(fs, escapelist);
2014 checkmatch(ls, TK_end, TK_if, line);
2015}
2016
2017static void localfunc(LexState *ls)
2018{
2019 ExpDesc v, b;
2020 FuncState *fs = ls->fs;
2021 new_localvar(ls, str_checkname(ls), 0);
2022 init_exp(&v, VLOCAL, fs->freereg);
2023 reserveregs(fs, 1);
2024 adjustlocalvars(ls, 1);
2025 body(ls, &b, 0, ls->linenumber);
2026 storevar(fs, &v, &b);
2027 /* debug information will only see the variable after this point! */
2028 getlocvar(fs, fs->nactvar - 1).startpc = fs->pc;
2029}
2030
2031static void localstat(LexState *ls)
2032{
2033 /* stat -> LOCAL NAME {`,' NAME} [`=' explist1] */
2034 BCReg nvars = 0;
2035 BCReg nexps;
2036 ExpDesc e;
2037 do {
2038 new_localvar(ls, str_checkname(ls), nvars++);
2039 } while (testnext(ls, ','));
2040 if (testnext(ls, '=')) {
2041 nexps = explist1(ls, &e);
2042 } else {
2043 e.k = VVOID;
2044 nexps = 0;
2045 }
2046 adjust_assign(ls, nvars, nexps, &e);
2047 adjustlocalvars(ls, nvars);
2048}
2049
2050static int func_name(LexState *ls, ExpDesc *v)
2051{
2052 /* func_name -> NAME {field} [`:' NAME] */
2053 int needself = 0;
2054 singlevar(ls, v);
2055 while (ls->token == '.')
2056 field(ls, v);
2057 if (ls->token == ':') {
2058 needself = 1;
2059 field(ls, v);
2060 }
2061 return needself;
2062}
2063
2064static void funcstat(LexState *ls, BCLine line)
2065{
2066 /* funcstat -> FUNCTION func_name body */
2067 FuncState *fs;
2068 int needself;
2069 ExpDesc v, b;
2070 lj_lex_next(ls); /* skip FUNCTION */
2071 needself = func_name(ls, &v);
2072 body(ls, &b, needself, line);
2073 fs = ls->fs;
2074 storevar(fs, &v, &b);
2075 fs->pt->lineinfo[fs->pc - 1] = line;
2076}
2077
2078static void exprstat(LexState *ls)
2079{
2080 /* stat -> func | assignment */
2081 FuncState *fs = ls->fs;
2082 struct LHS_assign v;
2083 primaryexp(ls, &v.v);
2084 if (v.v.k == VCALL) { /* stat -> func */
2085 setbc_b(bcptr(fs, &v.v), 1); /* call statement uses no results */
2086 } else { /* stat -> assignment */
2087 v.prev = NULL;
2088 assignment(ls, &v, 1);
2089 }
2090}
2091
2092static int block_follow(LexToken token)
2093{
2094 switch (token) {
2095 case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof:
2096 return 1;
2097 default:
2098 return 0;
2099 }
2100}
2101
2102static void retstat(LexState *ls)
2103{
2104 /* stat -> RETURN explist */
2105 BCIns ins;
2106 FuncState *fs = ls->fs;
2107 lj_lex_next(ls); /* skip RETURN */
2108 fs->pt->flags |= PROTO_HAS_RETURN;
2109 if (block_follow(ls->token) || ls->token == ';') {
2110 ins = BCINS_AD(BC_RET0, 0, 1); /* return no values */
2111 } else {
2112 ExpDesc e;
2113 BCReg nret = explist1(ls, &e); /* optional return values */
2114 if (nret == 1) {
2115 if (e.k == VCALL) {
2116 BCIns *i = bcptr(fs, &e);
2117 /* It doesn't pay off to add BC_VARGT just for 'return ...'. */
2118 if (bc_op(*i) == BC_VARG) goto notailcall;
2119 fs->pc--;
2120 ins = BCINS_AD(bc_op(*i)-BC_CALL+BC_CALLT, bc_a(*i), bc_c(*i));
2121 } else {
2122 ins = BCINS_AD(BC_RET1, exp2anyreg(fs, &e), 2);
2123 }
2124 } else {
2125 if (e.k == VCALL) {
2126 notailcall:
2127 setbc_b(bcptr(fs, &e), 0);
2128 ins = BCINS_AD(BC_RETM, fs->nactvar, e.u.s.aux - fs->nactvar);
2129 } else {
2130 exp2nextreg(fs, &e); /* values must go to the `stack' */
2131 ins = BCINS_AD(BC_RET, fs->nactvar, nret+1);
2132 }
2133 }
2134 }
2135 if (fs->pt->flags & PROTO_HAS_FNEW)
2136 emitAJ(fs, BC_UCLO, 0, 0);
2137 emitINS(fs, ins);
2138}
2139
2140static int statement(LexState *ls)
2141{
2142 BCLine line = ls->linenumber; /* may be needed for error messages */
2143 switch (ls->token) {
2144 case TK_if:
2145 ifstat(ls, line);
2146 return 0;
2147 case TK_while:
2148 whilestat(ls, line);
2149 return 0;
2150 case TK_do:
2151 lj_lex_next(ls); /* skip DO */
2152 block(ls);
2153 checkmatch(ls, TK_end, TK_do, line);
2154 return 0;
2155 case TK_for:
2156 forstat(ls, line);
2157 return 0;
2158 case TK_repeat:
2159 repeatstat(ls, line);
2160 return 0;
2161 case TK_function:
2162 funcstat(ls, line);
2163 return 0;
2164 case TK_local:
2165 lj_lex_next(ls); /* skip LOCAL */
2166 if (testnext(ls, TK_function)) /* local function? */
2167 localfunc(ls);
2168 else
2169 localstat(ls);
2170 return 0;
2171 case TK_return:
2172 retstat(ls);
2173 return 1; /* must be last statement */
2174 case TK_break:
2175 lj_lex_next(ls); /* skip BREAK */
2176 breakstat(ls);
2177 return 1; /* must be last statement */
2178 default:
2179 exprstat(ls);
2180 return 0;
2181 }
2182}
2183
2184static void chunk(LexState *ls)
2185{
2186 /* chunk -> { stat [`;'] } */
2187 int islast = 0;
2188 enterlevel(ls);
2189 while (!islast && !block_follow(ls->token)) {
2190 islast = statement(ls);
2191 testnext(ls, ';');
2192 lua_assert(ls->fs->pt->framesize >= ls->fs->freereg &&
2193 ls->fs->freereg >= ls->fs->nactvar);
2194 ls->fs->freereg = ls->fs->nactvar; /* free registers */
2195 }
2196 leavelevel(ls);
2197}
2198
diff --git a/src/lj_parse.h b/src/lj_parse.h
new file mode 100644
index 00000000..72aac2c6
--- /dev/null
+++ b/src/lj_parse.h
@@ -0,0 +1,15 @@
1/*
2** Lua parser (source code -> bytecode).
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_PARSE_H
7#define _LJ_PARSE_H
8
9#include "lj_obj.h"
10#include "lj_lex.h"
11
12LJ_FUNC GCproto *lj_parse(LexState *ls);
13LJ_FUNC GCstr *lj_parse_keepstr(LexState *ls, const char *str, size_t l);
14
15#endif
diff --git a/src/lj_record.c b/src/lj_record.c
new file mode 100644
index 00000000..e101ba23
--- /dev/null
+++ b/src/lj_record.c
@@ -0,0 +1,2136 @@
1/*
2** Trace recorder (bytecode -> SSA IR).
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_record_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASJIT
12
13#include "lj_gc.h"
14#include "lj_err.h"
15#include "lj_str.h"
16#include "lj_tab.h"
17#include "lj_state.h"
18#include "lj_frame.h"
19#include "lj_bc.h"
20#include "lj_ff.h"
21#include "lj_ir.h"
22#include "lj_jit.h"
23#include "lj_iropt.h"
24#include "lj_trace.h"
25#include "lj_record.h"
26#include "lj_snap.h"
27#include "lj_asm.h"
28#include "lj_dispatch.h"
29#include "lj_vm.h"
30
31/* Some local macros to save typing. Undef'd at the end. */
32#define IR(ref) (&J->cur.ir[(ref)])
33
34/* Pass IR on to next optimization in chain (FOLD). */
35#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
36
37/* Emit raw IR without passing through optimizations. */
38#define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
39
40/* Context for recording an indexed load/store. */
41typedef struct RecordIndex {
42 TValue tabv; /* Runtime value of table (or indexed object). */
43 TValue keyv; /* Runtime value of key. */
44 TValue valv; /* Runtime value of stored value. */
45 TValue mobjv; /* Runtime value of metamethod object. */
46 GCtab *mtv; /* Runtime value of metatable object. */
47 cTValue *oldv; /* Runtime value of previously stored value. */
48 TRef tab; /* Table (or indexed object) reference. */
49 TRef key; /* Key reference. */
50 TRef val; /* Value reference for a store or 0 for a load. */
51 TRef mt; /* Metatable reference. */
52 TRef mobj; /* Metamethod object reference. */
53 int idxchain; /* Index indirections left or 0 for raw lookup. */
54} RecordIndex;
55
56/* Requested results from rec_call(). */
57enum {
58 /* Non-negative numbers are number of requested results. */
59 CALLRES_MULTI = -1, /* Return multiple results. */
60 CALLRES_TAILCALL = -2, /* Tail call. */
61 CALLRES_PENDING = -3, /* Call is pending, no results yet. */
62 CALLRES_CONT = -4 /* Continuation call. */
63};
64
65/* Forward declarations. */
66static TRef rec_idx(jit_State *J, RecordIndex *ix);
67static int rec_call(jit_State *J, BCReg func, int cres, int nargs);
68
69/* -- Sanity checks ------------------------------------------------------- */
70
71#ifdef LUA_USE_ASSERT
72/* Sanity check the whole IR -- sloooow. */
73static void rec_check_ir(jit_State *J)
74{
75 IRRef i, nins = J->cur.nins, nk = J->cur.nk;
76 lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536);
77 for (i = nins-1; i >= nk; i--) {
78 IRIns *ir = IR(i);
79 uint32_t mode = lj_ir_mode[ir->o];
80 IRRef op1 = ir->op1;
81 IRRef op2 = ir->op2;
82 switch (irm_op1(mode)) {
83 case IRMnone: lua_assert(op1 == 0); break;
84 case IRMref: lua_assert(op1 >= nk);
85 lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break;
86 case IRMlit: break;
87 case IRMcst: lua_assert(i < REF_BIAS); continue;
88 }
89 switch (irm_op2(mode)) {
90 case IRMnone: lua_assert(op2 == 0); break;
91 case IRMref: lua_assert(op2 >= nk);
92 lua_assert(i >= REF_BIAS ? op2 < i : op2 > i); break;
93 case IRMlit: break;
94 case IRMcst: lua_assert(0); break;
95 }
96 if (ir->prev) {
97 lua_assert(ir->prev >= nk);
98 lua_assert(i >= REF_BIAS ? ir->prev < i : ir->prev > i);
99 lua_assert(IR(ir->prev)->o == ir->o);
100 }
101 }
102}
103
104/* Sanity check the slots. */
105static void rec_check_slots(jit_State *J)
106{
107 BCReg s, nslots = J->baseslot + J->maxslot;
108 lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS);
109 lua_assert(nslots < LJ_MAX_JSLOTS);
110 for (s = 0; s < nslots; s++) {
111 TRef tr = J->slot[s];
112 if (tr) {
113 IRRef ref = tref_ref(tr);
114 lua_assert(ref >= J->cur.nk && ref < J->cur.nins);
115 lua_assert(irt_t(IR(ref)->t) == tref_t(tr));
116 }
117 }
118}
119#endif
120
121/* -- Type handling and specialization ------------------------------------ */
122
123/* Note: these functions return tagged references (TRef). */
124
125/* Specialize a slot to a specific type. Note: slot can be negative! */
126static TRef sloadt(jit_State *J, int32_t slot, IRType t, int mode)
127{
128 /* No guard, since none of the callers need a type-checking SLOAD. */
129 TRef ref = emitir_raw(IRT(IR_SLOAD, t), (int32_t)J->baseslot+slot, mode);
130 J->base[slot] = ref;
131 return ref;
132}
133
134/* Specialize a slot to the runtime type. Note: slot can be negative! */
135static TRef sload(jit_State *J, int32_t slot)
136{
137 IRType t = itype2irt(&J->L->base[slot]);
138 TRef ref = emitir_raw(IRTG(IR_SLOAD, t), (int32_t)J->baseslot+slot, 0);
139 if (irtype_ispri(t)) ref = TREF_PRI(t); /* Canonicalize primitive refs. */
140 J->base[slot] = ref;
141 return ref;
142}
143
144/* Get TRef from slot. Load slot and specialize if not done already. */
145#define getslot(J, s) (J->base[(s)] ? J->base[(s)] : sload(J, (int32_t)(s)))
146
147/* Get TRef for current function. */
148static TRef getcurrf(jit_State *J)
149{
150 if (J->base[-1]) {
151 IRIns *ir = IR(tref_ref(J->base[-1]));
152 if (ir->o == IR_FRAME) /* Shortcut if already specialized. */
153 return TREF(ir->op2, IRT_FUNC); /* Return TRef of KFUNC. */
154 return J->base[-1];
155 } else {
156 lua_assert(J->baseslot == 1);
157 return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY);
158 }
159}
160
161/* Compare for raw object equality.
162** Returns 0 if the objects are the same.
163** Returns 1 if they are different, but the same type.
164** Returns 2 for two different types.
165** Comparisons between primitives always return 1 -- no caller cares about it.
166*/
167static int rec_objcmp(jit_State *J, TRef a, TRef b, cTValue *av, cTValue *bv)
168{
169 int diff = !lj_obj_equal(av, bv);
170 if (!tref_isk2(a, b)) { /* Shortcut, also handles primitives. */
171 IRType ta = tref_type(a);
172 IRType tb = tref_type(b);
173 if (ta != tb) {
174 /* Widen mixed number/int comparisons to number/number comparison. */
175 if (ta == IRT_INT && tb == IRT_NUM) {
176 a = emitir(IRTN(IR_TONUM), a, 0);
177 ta = IRT_NUM;
178 } else if (ta == IRT_NUM && tb == IRT_INT) {
179 b = emitir(IRTN(IR_TONUM), b, 0);
180 } else {
181 return 2; /* Two different types are never equal. */
182 }
183 }
184 emitir(IRTG(diff ? IR_NE : IR_EQ, ta), a, b);
185 }
186 return diff;
187}
188
189/* -- Record loop ops ----------------------------------------------------- */
190
191/* Loop event. */
192typedef enum {
193 LOOPEV_LEAVE, /* Loop is left or not entered. */
194 LOOPEV_ENTER /* Loop is entered. */
195} LoopEvent;
196
197/* Canonicalize slots: convert integers to numbers. */
198static void canonicalize_slots(jit_State *J)
199{
200 BCReg s;
201 for (s = J->baseslot+J->maxslot-1; s >= 1; s--) {
202 TRef tr = J->slot[s];
203 if (tref_isinteger(tr)) {
204 IRIns *ir = IR(tref_ref(tr));
205 if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY)))
206 J->slot[s] = emitir(IRTN(IR_TONUM), tr, 0);
207 }
208 }
209}
210
211/* Stop recording. */
212static void rec_stop(jit_State *J, TraceNo lnk)
213{
214 lj_trace_end(J);
215 J->cur.link = (uint16_t)lnk;
216 if (lnk == J->curtrace) { /* Looping back? */
217 if ((J->flags & JIT_F_OPT_LOOP)) /* Shall we try to create a loop? */
218 goto nocanon; /* Do not canonicalize or we lose the narrowing. */
219 if (J->cur.root) /* Otherwise ensure we always link to the root trace. */
220 J->cur.link = J->cur.root;
221 }
222 canonicalize_slots(J);
223nocanon:
224 /* Note: all loop ops must set J->pc to the following instruction! */
225 lj_snap_add(J); /* Add loop snapshot. */
226 J->needsnap = 0;
227 J->mergesnap = 1; /* In case recording continues. */
228}
229
230/* Peek before FORI to find a const initializer, otherwise load from slot. */
231static TRef fori_arg(jit_State *J, const BCIns *pc, BCReg slot, IRType t)
232{
233 /* A store to slot-1 means there's no conditional assignment for slot. */
234 if (bc_a(pc[-1]) == slot-1 && bcmode_a(bc_op(pc[-1])) == BCMdst) {
235 BCIns ins = pc[0];
236 if (bc_a(ins) == slot) {
237 if (bc_op(ins) == BC_KSHORT) {
238 int32_t k = (int32_t)(int16_t)bc_d(ins);
239 if (t == IRT_INT)
240 return lj_ir_kint(J, k);
241 else
242 return lj_ir_knum(J, cast_num(k));
243 } else if (bc_op(ins) == BC_KNUM) {
244 lua_Number n = J->pt->k.n[bc_d(ins)];
245 if (t == IRT_INT)
246 return lj_ir_kint(J, lj_num2int(n));
247 else
248 return lj_ir_knum(J, n);
249 }
250 }
251 }
252 if (J->base[slot])
253 return J->base[slot];
254 else
255 return sloadt(J, (int32_t)slot, t, IRSLOAD_READONLY|IRSLOAD_INHERIT);
256}
257
258/* Simulate the runtime behavior of the FOR loop iterator.
259** It's important to exactly reproduce the semantics of the interpreter.
260*/
261static LoopEvent for_iter(jit_State *J, IROp *op, BCReg ra, int isforl)
262{
263 cTValue *forbase = &J->L->base[ra];
264 lua_Number stopv = numV(&forbase[FORL_STOP]);
265 lua_Number idxv = numV(&forbase[FORL_IDX]);
266 if (isforl)
267 idxv += numV(&forbase[FORL_STEP]);
268 if ((int32_t)forbase[FORL_STEP].u32.hi >= 0) {
269 if (idxv <= stopv) { *op = IR_LE; return LOOPEV_ENTER; }
270 *op = IR_GT; return LOOPEV_LEAVE;
271 } else {
272 if (stopv <= idxv) { *op = IR_GE; return LOOPEV_ENTER; }
273 *op = IR_LT; return LOOPEV_LEAVE;
274 }
275}
276
277/* Record FORL/JFORL or FORI/JFORI. */
278static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
279{
280 BCReg ra = bc_a(*fori);
281 IROp op;
282 LoopEvent ev = for_iter(J, &op, ra, isforl);
283 TRef *tr = &J->base[ra];
284 TRef idx, stop;
285 IRType t;
286 if (isforl) { /* Handle FORL/JFORL opcodes. */
287 TRef step;
288 idx = tr[FORL_IDX];
289 if (!idx) idx = sloadt(J, (int32_t)(ra+FORL_IDX), IRT_NUM, 0);
290 t = tref_type(idx);
291 stop = fori_arg(J, fori-2, ra+FORL_STOP, t);
292 step = fori_arg(J, fori-1, ra+FORL_STEP, t);
293 tr[FORL_IDX] = idx = emitir(IRT(IR_ADD, t), idx, step);
294 } else { /* Handle FORI/JFORI opcodes. */
295 BCReg i;
296 t = IRT_NUM;
297 for (i = FORL_IDX; i <= FORL_STEP; i++) {
298 lua_assert(J->base[ra+i] != 0); /* Assumes the slots are already set. */
299 tr[i] = lj_ir_tonum(J, J->base[ra+i]);
300 }
301 idx = tr[FORL_IDX];
302 stop = tr[FORL_STOP];
303 if (!tref_isk(tr[FORL_STEP])) /* Non-const step: need direction guard. */
304 emitir(IRTG(((op-IR_LT)>>1)+IR_LT, IRT_NUM),
305 tr[FORL_STEP], lj_ir_knum_zero(J));
306 }
307
308 tr[FORL_EXT] = idx;
309 if (ev == LOOPEV_LEAVE) {
310 J->maxslot = ra+FORL_EXT+1;
311 J->pc = fori+1;
312 } else {
313 J->maxslot = ra;
314 J->pc = fori+bc_j(*fori)+1;
315 }
316 lj_snap_add(J);
317
318 emitir(IRTG(op, t), idx, stop);
319
320 if (ev == LOOPEV_LEAVE) {
321 J->maxslot = ra;
322 J->pc = fori+bc_j(*fori)+1;
323 } else {
324 J->maxslot = ra+FORL_EXT+1;
325 J->pc = fori+1;
326 }
327 J->needsnap = 1;
328 return ev;
329}
330
331/* Record ITERL/JITERL. */
332static LoopEvent rec_iterl(jit_State *J, const BCIns iterins)
333{
334 BCReg ra = bc_a(iterins);
335 lua_assert(J->base[ra] != 0);
336 if (!tref_isnil(J->base[ra])) { /* Looping back? */
337 J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */
338 J->maxslot = ra-1+bc_b(J->pc[-1]);
339 J->pc += bc_j(iterins)+1;
340 return LOOPEV_ENTER;
341 } else {
342 J->maxslot = ra-3;
343 J->pc++;
344 return LOOPEV_LEAVE;
345 }
346}
347
348/* Record LOOP/JLOOP. Now, that was easy. */
349static LoopEvent rec_loop(jit_State *J, BCReg ra)
350{
351 J->maxslot = ra;
352 J->pc++;
353 return LOOPEV_ENTER;
354}
355
356/* Check if a loop repeatedly failed to trace because it didn't loop back. */
357static int innerloopleft(jit_State *J, const BCIns *pc)
358{
359 ptrdiff_t i;
360 for (i = 0; i < PENALTY_SLOTS; i++)
361 if (J->penalty[i].pc == pc) {
362 if (J->penalty[i].reason == LJ_TRERR_LLEAVE &&
363 J->penalty[i].val >= 2*HOTCOUNT_MIN_PENALTY)
364 return 1;
365 break;
366 }
367 return 0;
368}
369
370/* Handle the case when an interpreted loop op is hit. */
371static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
372{
373 if (J->parent == 0) {
374 if (pc == J->startpc && J->framedepth == 0) { /* Same loop? */
375 if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */
376 lj_trace_err(J, LJ_TRERR_LLEAVE);
377 rec_stop(J, J->curtrace); /* Root trace forms a loop. */
378 } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */
379 /* It's usually better to abort here and wait until the inner loop
380 ** is traced. But if the inner loop repeatedly didn't loop back,
381 ** this indicates a low trip count. In this case try unrolling
382 ** an inner loop even in a root trace. But it's better to be a bit
383 ** more conservative here and only do it for very short loops.
384 */
385 if (!innerloopleft(J, pc))
386 lj_trace_err(J, LJ_TRERR_LINNER); /* Root trace hit an inner loop. */
387 if ((J->loopref && J->cur.nins - J->loopref > 8) || --J->loopunroll < 0)
388 lj_trace_err(J, LJ_TRERR_LUNROLL); /* Limit loop unrolling. */
389 J->loopref = J->cur.nins;
390 }
391 } else if (ev != LOOPEV_LEAVE) { /* Side trace enters an inner loop. */
392 J->loopref = J->cur.nins;
393 if (--J->loopunroll < 0)
394 lj_trace_err(J, LJ_TRERR_LUNROLL); /* Limit loop unrolling. */
395 } /* Side trace continues across a loop that's left or not entered. */
396}
397
398/* Handle the case when an already compiled loop op is hit. */
399static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev)
400{
401 if (J->parent == 0) { /* Root trace hit an inner loop. */
402 /* Better let the inner loop spawn a side trace back here. */
403 lj_trace_err(J, LJ_TRERR_LINNER);
404 } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */
405 J->instunroll = 0; /* Cannot continue across a compiled loop op. */
406 if (J->pc == J->startpc && J->framedepth == 0)
407 lnk = J->curtrace; /* Can form an extra loop. */
408 rec_stop(J, lnk); /* Link to the loop. */
409 } /* Side trace continues across a loop that's left or not entered. */
410}
411
412/* -- Metamethod handling ------------------------------------------------- */
413
414/* Prepare to record call to metamethod. */
415static BCReg rec_mm_prep(jit_State *J, ASMFunction cont)
416{
417 BCReg s, top = curr_proto(J->L)->framesize;
418 TRef trcont;
419 setcont(&J->L->base[top], cont);
420#if LJ_64
421 trcont = lj_ir_kptr(J, (void *)((int64_t)cont - (int64_t)lj_vm_asm_begin));
422#else
423 trcont = lj_ir_kptr(J, (void *)cont);
424#endif
425 J->base[top] = emitir(IRTG(IR_FRAME, IRT_PTR), trcont, trcont);
426 for (s = J->maxslot; s < top; s++)
427 J->base[s] = 0;
428 return top+1;
429}
430
431/* Record metamethod lookup. */
432static int rec_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
433{
434 RecordIndex mix;
435 GCtab *mt;
436 if (tref_istab(ix->tab)) {
437 mt = tabref(tabV(&ix->tabv)->metatable);
438 mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_TAB_META);
439 } else if (tref_isudata(ix->tab)) {
440 mt = tabref(udataV(&ix->tabv)->metatable);
441 mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_UDATA_META);
442 } else {
443 /* Specialize to base metatable. Must flush mcode in lua_setmetatable(). */
444 mt = tabref(J2G(J)->basemt[itypemap(&ix->tabv)]);
445 if (mt == NULL)
446 return 0; /* No metamethod. */
447 mix.tab = lj_ir_ktab(J, mt);
448 goto nocheck;
449 }
450 ix->mt = mix.tab;
451 emitir(IRTG(mt ? IR_NE : IR_EQ, IRT_TAB), mix.tab, lj_ir_knull(J, IRT_TAB));
452nocheck:
453 if (mt) {
454 GCstr *mmstr = strref(J2G(J)->mmname[mm]);
455 cTValue *mo = lj_tab_getstr(mt, mmstr);
456 if (mo && !tvisnil(mo))
457 copyTV(J->L, &ix->mobjv, mo);
458 ix->mtv = mt;
459 settabV(J->L, &mix.tabv, mt);
460 setstrV(J->L, &mix.keyv, mmstr);
461 mix.key = lj_ir_kstr(J, mmstr);
462 mix.val = 0;
463 mix.idxchain = 0;
464 ix->mobj = rec_idx(J, &mix);
465 return !tref_isnil(ix->mobj); /* 1 if metamethod found, 0 if not. */
466 }
467 return 0; /* No metamethod. */
468}
469
470/* Record call to arithmetic metamethod (and MM_len). */
471static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
472{
473 /* Set up metamethod call first to save ix->tab and ix->tabv. */
474 BCReg func = rec_mm_prep(J, lj_cont_ra);
475 TRef *base = J->base + func;
476 TValue *basev = J->L->base + func;
477 base[1] = ix->tab; base[2] = ix->key;
478 copyTV(J->L, basev+1, &ix->tabv);
479 copyTV(J->L, basev+2, &ix->keyv);
480 if (!rec_mm_lookup(J, ix, mm)) { /* Lookup metamethod on 1st operand. */
481 if (mm != MM_len) {
482 ix->tab = ix->key;
483 copyTV(J->L, &ix->tabv, &ix->keyv);
484 if (rec_mm_lookup(J, ix, mm)) /* Lookup metamethod on 2nd operand. */
485 goto ok;
486 }
487 lj_trace_err(J, LJ_TRERR_NOMM);
488 }
489ok:
490 base[0] = ix->mobj;
491 copyTV(J->L, basev+0, &ix->mobjv);
492 return rec_call(J, func, CALLRES_CONT, 2) ? J->base[func] : 0;
493}
494
495/* Call a comparison metamethod. */
496static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op)
497{
498 BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt);
499 TRef *base = J->base + func;
500 TValue *tv = J->L->base + func;
501 base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key;
502 copyTV(J->L, tv+0, &ix->mobjv);
503 copyTV(J->L, tv+1, &ix->valv);
504 copyTV(J->L, tv+2, &ix->keyv);
505 rec_call(J, func, CALLRES_CONT, 2);
506 /* It doesn't matter whether this is immediately resolved or not.
507 ** Type specialization of the return type suffices to specialize
508 ** the control flow.
509 */
510}
511
512/* Record call to equality comparison metamethod (for tab and udata only). */
513static void rec_mm_equal(jit_State *J, RecordIndex *ix, int op)
514{
515 ix->tab = ix->val;
516 copyTV(J->L, &ix->tabv, &ix->valv);
517 if (rec_mm_lookup(J, ix, MM_eq)) { /* Lookup metamethod on 1st operand. */
518 cTValue *bv;
519 TRef mo1 = ix->mobj;
520 TValue mo1v;
521 copyTV(J->L, &mo1v, &ix->mobjv);
522 /* Avoid the 2nd lookup and the objcmp if the metatables are equal. */
523 bv = &ix->keyv;
524 if (tvistab(bv) && tabref(tabV(bv)->metatable) == ix->mtv) {
525 TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_TAB_META);
526 emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt);
527 } else if (tvisudata(bv) && tabref(udataV(bv)->metatable) == ix->mtv) {
528 TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_UDATA_META);
529 emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt);
530 } else { /* Lookup metamethod on 2nd operand and compare both. */
531 ix->tab = ix->key;
532 copyTV(J->L, &ix->tabv, bv);
533 if (!rec_mm_lookup(J, ix, MM_eq) ||
534 rec_objcmp(J, mo1, ix->mobj, &mo1v, &ix->mobjv))
535 return;
536 }
537 rec_mm_callcomp(J, ix, op);
538 }
539}
540
541/* Record call to ordered comparison metamethods (for arbitrary objects). */
542static void rec_mm_comp(jit_State *J, RecordIndex *ix, int op)
543{
544 ix->tab = ix->val;
545 copyTV(J->L, &ix->tabv, &ix->valv);
546 while (1) {
547 MMS mm = (op & 2) ? MM_le : MM_lt; /* Try __le + __lt or only __lt. */
548 if (rec_mm_lookup(J, ix, mm)) { /* Lookup metamethod on 1st operand. */
549 cTValue *bv;
550 TRef mo1 = ix->mobj;
551 TValue mo1v;
552 copyTV(J->L, &mo1v, &ix->mobjv);
553 /* Avoid the 2nd lookup and the objcmp if the metatables are equal. */
554 bv = &ix->keyv;
555 if (tvistab(bv) && tabref(tabV(bv)->metatable) == ix->mtv) {
556 TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_TAB_META);
557 emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt);
558 } else if (tvisudata(bv) && tabref(udataV(bv)->metatable) == ix->mtv) {
559 TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_UDATA_META);
560 emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt);
561 } else { /* Lookup metamethod on 2nd operand and compare both. */
562 ix->tab = ix->key;
563 copyTV(J->L, &ix->tabv, bv);
564 if (!rec_mm_lookup(J, ix, mm) ||
565 rec_objcmp(J, mo1, ix->mobj, &mo1v, &ix->mobjv))
566 goto nomatch;
567 }
568 rec_mm_callcomp(J, ix, op);
569 return;
570 }
571 nomatch:
572 /* First lookup failed. Retry with __lt and swapped operands. */
573 if (!(op & 2)) break; /* Already at __lt. Interpreter will throw. */
574 ix->tab = ix->key; ix->key = ix->val; ix->val = ix->tab;
575 copyTV(J->L, &ix->tabv, &ix->keyv);
576 copyTV(J->L, &ix->keyv, &ix->valv);
577 copyTV(J->L, &ix->valv, &ix->tabv);
578 op ^= 3;
579 }
580}
581
582/* -- Indexed access ------------------------------------------------------ */
583
584/* Record indexed key lookup. */
585static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
586{
587 TRef key;
588 GCtab *t = tabV(&ix->tabv);
589 ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */
590
591 /* Integer keys are looked up in the array part first. */
592 key = ix->key;
593 if (tref_isnumber(key)) {
594 lua_Number n = numV(&ix->keyv);
595 int32_t k = lj_num2int(n);
596 lua_assert(tvisnum(&ix->keyv));
597 /* Potential array key? */
598 if ((MSize)k < LJ_MAX_ASIZE && n == cast_num(k)) {
599 TRef asizeref, ikey = key;
600 if (!tref_isinteger(ikey))
601 ikey = emitir(IRTGI(IR_TOINT), ikey, IRTOINT_INDEX);
602 asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE);
603 if ((MSize)k < t->asize) { /* Currently an array key? */
604 TRef arrayref;
605 emitir(IRTGI(IR_ABC), asizeref, ikey); /* Bounds check. */
606 arrayref = emitir(IRT(IR_FLOAD, IRT_PTR), ix->tab, IRFL_TAB_ARRAY);
607 return emitir(IRT(IR_AREF, IRT_PTR), arrayref, ikey);
608 } else { /* Currently not in array (may be an array extension)? */
609 emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */
610 if (k == 0 && tref_isk(key))
611 key = lj_ir_knum_zero(J); /* Canonicalize 0 or +-0.0 to +0.0. */
612 /* And continue with the hash lookup. */
613 }
614 } else if (!tref_isk(key)) {
615 /* We can rule out const numbers which failed the integerness test
616 ** above. But all other numbers are potential array keys.
617 */
618 if (t->asize == 0) { /* True sparse tables have an empty array part. */
619 /* Guard that the array part stays empty. */
620 TRef tmp = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE);
621 emitir(IRTGI(IR_EQ), tmp, lj_ir_kint(J, 0));
622 } else {
623 lj_trace_err(J, LJ_TRERR_NYITMIX);
624 }
625 }
626 }
627
628 /* Otherwise the key is located in the hash part. */
629 if (tref_isinteger(key)) /* Hash keys are based on numbers, not ints. */
630 ix->key = key = emitir(IRTN(IR_TONUM), key, 0);
631 if (tref_isk(key)) {
632 /* Optimize lookup of constant hash keys. */
633 MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val);
634 if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) &&
635 hslot <= 65535*(MSize)sizeof(Node)) {
636 TRef node, kslot;
637 TRef hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK);
638 emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask));
639 node = emitir(IRT(IR_FLOAD, IRT_PTR), ix->tab, IRFL_TAB_NODE);
640 kslot = lj_ir_kslot(J, key, hslot / sizeof(Node));
641 return emitir(IRTG(IR_HREFK, IRT_PTR), node, kslot);
642 }
643 }
644 /* Fall back to a regular hash lookup. */
645 return emitir(IRT(IR_HREF, IRT_PTR), ix->tab, key);
646}
647
648/* Determine whether a key is NOT one of the fast metamethod names. */
649static int nommstr(jit_State *J, TRef key)
650{
651 if (tref_isstr(key)) {
652 if (tref_isk(key)) {
653 GCstr *str = ir_kstr(IR(tref_ref(key)));
654 uint32_t i;
655 for (i = 0; i <= MM_FAST; i++)
656 if (strref(J2G(J)->mmname[i]) == str)
657 return 0; /* MUST be one the fast metamethod names. */
658 } else {
659 return 0; /* Variable string key MAY be a metamethod name. */
660 }
661 }
662 return 1; /* CANNOT be a metamethod name. */
663}
664
665/* Record indexed load/store. */
666static TRef rec_idx(jit_State *J, RecordIndex *ix)
667{
668 TRef xref;
669 IROp xrefop, loadop;
670 cTValue *oldv;
671
672 while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */
673 lua_assert(ix->idxchain != 0); /* Never call raw rec_idx() on non-table. */
674 if (!rec_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index))
675 lj_trace_err(J, LJ_TRERR_NOMM);
676 handlemm:
677 if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */
678 BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra);
679 TRef *base = J->base + func;
680 TValue *tv = J->L->base + func;
681 base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key;
682 setfuncV(J->L, tv+0, funcV(&ix->mobjv));
683 copyTV(J->L, tv+1, &ix->tabv);
684 copyTV(J->L, tv+2, &ix->keyv);
685 if (ix->val) {
686 base[3] = ix->val;
687 copyTV(J->L, tv+3, &ix->valv);
688 rec_call(J, func, CALLRES_CONT, 3); /* mobj(tab, key, val) */
689 return 0;
690 } else {
691 /* res = mobj(tab, key) */
692 return rec_call(J, func, CALLRES_CONT, 2) ? J->base[func] : 0;
693 }
694 }
695 /* Otherwise retry lookup with metaobject. */
696 ix->tab = ix->mobj;
697 copyTV(J->L, &ix->tabv, &ix->mobjv);
698 if (--ix->idxchain == 0)
699 lj_trace_err(J, LJ_TRERR_IDXLOOP);
700 }
701
702 /* First catch nil and NaN keys for tables. */
703 if (tvisnil(&ix->keyv) || (tvisnum(&ix->keyv) && tvisnan(&ix->keyv))) {
704 if (ix->val) /* Better fail early. */
705 lj_trace_err(J, LJ_TRERR_STORENN);
706 if (tref_isk(ix->key)) {
707 if (ix->idxchain && rec_mm_lookup(J, ix, MM_index))
708 goto handlemm;
709 return TREF_NIL;
710 }
711 }
712
713 /* Record the key lookup. */
714 xref = rec_idx_key(J, ix);
715 xrefop = IR(tref_ref(xref))->o;
716 loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD;
717 oldv = ix->oldv;
718
719 if (ix->val == 0) { /* Indexed load */
720 IRType t = itype2irt(oldv);
721 TRef res = emitir(IRTG(loadop, t), xref, 0);
722 if (t == IRT_NIL && ix->idxchain && rec_mm_lookup(J, ix, MM_index))
723 goto handlemm;
724 if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */
725 return res;
726 } else { /* Indexed store. */
727 GCtab *mt = tabref(tabV(&ix->tabv)->metatable);
728 if (tvisnil(oldv)) { /* Previous value was nil? */
729 /* Need to duplicate the hasmm check for the early guards. */
730 int hasmm = 0;
731 if (ix->idxchain && mt) {
732 cTValue *mo = lj_tab_getstr(mt, strref(J2G(J)->mmname[MM_newindex]));
733 hasmm = mo && !tvisnil(mo);
734 }
735 if (hasmm || oldv == niltvg(J2G(J)))
736 emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */
737 else if (xrefop == IR_HREF)
738 emitir(IRTG(IR_NE, IRT_PTR), xref, lj_ir_kptr(J, niltvg(J2G(J))));
739 if (ix->idxchain && rec_mm_lookup(J, ix, MM_newindex)) { /* Metamethod? */
740 lua_assert(hasmm);
741 goto handlemm;
742 }
743 lua_assert(!hasmm);
744 if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */
745 TRef key = ix->key;
746 if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */
747 key = emitir(IRTN(IR_TONUM), key, 0);
748 xref = emitir(IRT(IR_NEWREF, IRT_PTR), ix->tab, key);
749 }
750 } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) {
751 /* Cannot derive that the previous value was non-nil, must do checks. */
752 if (xrefop == IR_HREF) /* Guard against store to niltv. */
753 emitir(IRTG(IR_NE, IRT_PTR), xref, lj_ir_kptr(J, niltvg(J2G(J))));
754 if (ix->idxchain) { /* Metamethod lookup required? */
755 /* A check for NULL metatable is cheaper (hoistable) than a load. */
756 if (!mt) {
757 TRef mtref = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_TAB_META);
758 emitir(IRTG(IR_EQ, IRT_TAB), mtref, lj_ir_knull(J, IRT_TAB));
759 } else {
760 IRType t = itype2irt(oldv);
761 emitir(IRTG(loadop, t), xref, 0); /* Guard for non-nil value. */
762 }
763 }
764 }
765 if (tref_isinteger(ix->val)) /* Convert int to number before storing. */
766 ix->val = emitir(IRTN(IR_TONUM), ix->val, 0);
767 emitir(IRT(loadop+IRDELTA_L2S, tref_type(ix->val)), xref, ix->val);
768 if (tref_isgcv(ix->val))
769 emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0);
770 /* Invalidate neg. metamethod cache for stores with certain string keys. */
771 if (!nommstr(J, ix->key)) {
772 TRef fref = emitir(IRT(IR_FREF, IRT_PTR), ix->tab, IRFL_TAB_NOMM);
773 emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0));
774 }
775 J->needsnap = 1;
776 return 0;
777 }
778}
779
780/* -- Upvalue access ------------------------------------------------------ */
781
782/* Record upvalue load/store. */
783static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val)
784{
785 GCupval *uvp = &gcref(J->fn->l.uvptr[uv])->uv;
786 TRef fn = getcurrf(J);
787 IRRef uref;
788 int needbarrier = 0;
789 if (!uvp->closed) {
790 /* In current stack? */
791 if (uvp->v >= J->L->stack && uvp->v < J->L->maxstack) {
792 int32_t slot = (int32_t)(uvp->v - (J->L->base - J->baseslot));
793 if (slot >= 0) { /* Aliases an SSA slot? */
794 slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */
795 /* NYI: add IR to guard that it's still aliasing the same slot. */
796 if (val == 0) {
797 return getslot(J, slot);
798 } else {
799 J->base[slot] = val;
800 if (slot >= (int32_t)J->maxslot) J->maxslot = (BCReg)(slot+1);
801 return 0;
802 }
803 }
804 }
805 uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PTR), fn, uv));
806 } else {
807 needbarrier = 1;
808 uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PTR), fn, uv));
809 }
810 if (val == 0) { /* Upvalue load */
811 IRType t = itype2irt(uvp->v);
812 TRef res = emitir(IRTG(IR_ULOAD, t), uref, 0);
813 if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitive refs. */
814 return res;
815 } else { /* Upvalue store. */
816 if (tref_isinteger(val)) /* Convert int to number before storing. */
817 val = emitir(IRTN(IR_TONUM), val, 0);
818 emitir(IRT(IR_USTORE, tref_type(val)), uref, val);
819 if (needbarrier && tref_isgcv(val))
820 emitir(IRT(IR_OBAR, IRT_NIL), uref, val);
821 J->needsnap = 1;
822 return 0;
823 }
824}
825
826/* -- Record calls to fast functions -------------------------------------- */
827
828/* Note: The function and the arguments for the bytecode CALL instructions
829** always occupy _new_ stack slots (above the highest active variable).
830** This means they must have been stored there by previous instructions
831** (MOV, K*, ADD etc.) which must be part of the same trace. This in turn
832** means their reference slots are already valid and their types have
833** already been specialized (i.e. getslot() would be redundant).
834** The 1st slot beyond the arguments is set to 0 before calling recff_*.
835*/
836
837/* Data used by handlers to record a fast function. */
838typedef struct RecordFFData {
839 TValue *argv; /* Runtime argument values. */
840 GCfunc *fn; /* The currently recorded function. */
841 int nargs; /* Number of passed arguments. */
842 int nres; /* Number of returned results (defaults to 1). */
843 int cres; /* Wanted number of call results. */
844 uint32_t data; /* Per-ffid auxiliary data (opcode, literal etc.). */
845} RecordFFData;
846
847/* Type of handler to record a fast function. */
848typedef void (*RecordFunc)(jit_State *J, TRef *res, RecordFFData *rd);
849
850/* Avoid carrying two pointers around. */
851#define arg (res+1)
852
853/* Get runtime value of int argument. */
854static int32_t argv2int(jit_State *J, TValue *o)
855{
856 if (tvisstr(o) && !lj_str_numconv(strVdata(o), o))
857 lj_trace_err(J, LJ_TRERR_BADTYPE);
858 return lj_num2bit(numV(o));
859}
860
861/* Get runtime value of string argument. */
862static GCstr *argv2str(jit_State *J, TValue *o)
863{
864 if (LJ_LIKELY(tvisstr(o))) {
865 return strV(o);
866 } else {
867 GCstr *s;
868 lua_assert(tvisnum(o));
869 s = lj_str_fromnum(J->L, &o->n);
870 setstrV(J->L, o, s);
871 return s;
872 }
873}
874
875/* Fallback handler for all fast functions that are not recorded (yet). */
876static void recff_nyi(jit_State *J, TRef *res, RecordFFData *rd)
877{
878 UNUSED(res);
879 setfuncV(J->L, &J->errinfo, rd->fn);
880 lj_trace_err_info(J, LJ_TRERR_NYIFF);
881}
882
883LJ_NORET static void recff_err_ffu(jit_State *J, RecordFFData *rd)
884{
885 setfuncV(J->L, &J->errinfo, rd->fn);
886 lj_trace_err_info(J, LJ_TRERR_NYIFFU);
887}
888
889/* C functions can have arbitrary side-effects and are not recorded (yet). */
890static void recff_c(jit_State *J, TRef *res, RecordFFData *rd)
891{
892 UNUSED(res);
893 setlightudV(&J->errinfo, (void *)rd->fn->c.f);
894 lj_trace_err_info(J, LJ_TRERR_NYICF);
895}
896
897/* -- Base library fast functions ----------------------------------------- */
898
899static void recff_assert(jit_State *J, TRef *res, RecordFFData *rd)
900{
901 /* Arguments already specialized. The interpreter throws for nil/false. */
902 BCReg i;
903 for (i = 0; arg[i]; i++) /* Need to pass through all arguments. */
904 res[i] = arg[i];
905 rd->nres = (int)i;
906 UNUSED(J);
907}
908
909static void recff_type(jit_State *J, TRef *res, RecordFFData *rd)
910{
911 /* Arguments already specialized. Result is a constant string. Neat, huh? */
912 IRType t = tref_isinteger(arg[0]) ? IRT_NUM : tref_type(arg[0]);
913 res[0] = lj_ir_kstr(J, strV(&rd->fn->c.upvalue[t]));
914}
915
916static void recff_getmetatable(jit_State *J, TRef *res, RecordFFData *rd)
917{
918 TRef tr = arg[0];
919 if (tref_istab(tr)) {
920 RecordIndex ix;
921 ix.tab = tr;
922 copyTV(J->L, &ix.tabv, &rd->argv[0]);
923 if (rec_mm_lookup(J, &ix, MM_metatable))
924 res[0] = ix.mobj;
925 else
926 res[0] = ix.mt;
927 } /* else: Interpreter will throw. */
928}
929
930static void recff_setmetatable(jit_State *J, TRef *res, RecordFFData *rd)
931{
932 TRef tr = arg[0];
933 TRef mt = arg[1];
934 if (tref_istab(tr) && (tref_istab(mt) || (mt && tref_isnil(mt)))) {
935 TRef fref, mtref;
936 RecordIndex ix;
937 ix.tab = tr;
938 copyTV(J->L, &ix.tabv, &rd->argv[0]);
939 rec_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable field. */
940 fref = emitir(IRT(IR_FREF, IRT_PTR), tr, IRFL_TAB_META);
941 mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt;
942 emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref);
943 if (!tref_isnil(mt))
944 emitir(IRT(IR_TBAR, IRT_TAB), tr, 0);
945 res[0] = tr;
946 J->needsnap = 1;
947 } /* else: Interpreter will throw. */
948}
949
950static void recff_rawget(jit_State *J, TRef *res, RecordFFData *rd)
951{
952 if (tref_istab(arg[0]) && arg[1]) {
953 RecordIndex ix;
954 ix.tab = arg[0]; ix.key = arg[1]; ix.val = 0; ix.idxchain = 0;
955 settabV(J->L, &ix.tabv, tabV(&rd->argv[0]));
956 copyTV(J->L, &ix.keyv, &rd->argv[1]);
957 res[0] = rec_idx(J, &ix);
958 } /* else: Interpreter will throw. */
959}
960
961static void recff_rawset(jit_State *J, TRef *res, RecordFFData *rd)
962{
963 if (tref_istab(arg[0]) && arg[1] && arg[2]) {
964 RecordIndex ix;
965 ix.tab = arg[0]; ix.key = arg[1]; ix.val = arg[2]; ix.idxchain = 0;
966 settabV(J->L, &ix.tabv, tabV(&rd->argv[0]));
967 copyTV(J->L, &ix.keyv, &rd->argv[1]);
968 copyTV(J->L, &ix.valv, &rd->argv[2]);
969 rec_idx(J, &ix);
970 res[0] = arg[0]; /* Returns table. */
971 } /* else: Interpreter will throw. */
972}
973
974static void recff_rawequal(jit_State *J, TRef *res, RecordFFData *rd)
975{
976 if (arg[0] && arg[1]) {
977 int diff = rec_objcmp(J, arg[0], arg[1], &rd->argv[0], &rd->argv[1]);
978 res[0] = diff ? TREF_FALSE : TREF_TRUE;
979 } /* else: Interpreter will throw. */
980}
981
982static void recff_tonumber(jit_State *J, TRef *res, RecordFFData *rd)
983{
984 TRef tr = arg[0];
985 if (tref_isnumber_str(tr)) {
986 if (arg[1]) {
987 TRef base = lj_ir_toint(J, arg[1]);
988 if (!tref_isk(base) || IR(tref_ref(base))->i != 10)
989 recff_err_ffu(J, rd);
990 }
991 if (tref_isstr(tr))
992 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
993 } else {
994 tr = TREF_NIL;
995 }
996 res[0] = tr;
997 UNUSED(rd);
998}
999
1000static void recff_tostring(jit_State *J, TRef *res, RecordFFData *rd)
1001{
1002 TRef tr = arg[0];
1003 if (tref_isstr(tr)) {
1004 /* Ignore __tostring in the string base metatable. */
1005 res[0] = tr;
1006 } else {
1007 RecordIndex ix;
1008 ix.tab = tr;
1009 copyTV(J->L, &ix.tabv, &rd->argv[0]);
1010 if (rec_mm_lookup(J, &ix, MM_tostring)) { /* Has __tostring metamethod? */
1011 res[0] = ix.mobj;
1012 copyTV(J->L, rd->argv - 1, &ix.mobjv);
1013 if (!rec_call(J, (BCReg)(res - J->base), 1, 1)) /* Pending call? */
1014 rd->cres = CALLRES_PENDING;
1015 /* Otherwise res[0] already contains the result. */
1016 } else if (tref_isnumber(tr)) {
1017 res[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0);
1018 } else {
1019 recff_err_ffu(J, rd);
1020 }
1021 }
1022}
1023
1024static void recff_ipairs_aux(jit_State *J, TRef *res, RecordFFData *rd)
1025{
1026 RecordIndex ix;
1027 ix.tab = arg[0];
1028 if (tref_istab(ix.tab)) {
1029 if (!tvisnum(&rd->argv[1])) /* No support for string coercion. */
1030 lj_trace_err(J, LJ_TRERR_BADTYPE);
1031 setnumV(&ix.keyv, numV(&rd->argv[1])+(lua_Number)1);
1032 settabV(J->L, &ix.tabv, tabV(&rd->argv[0]));
1033 ix.val = 0; ix.idxchain = 0;
1034 ix.key = lj_ir_toint(J, arg[1]);
1035 res[0] = ix.key = emitir(IRTI(IR_ADD), ix.key, lj_ir_kint(J, 1));
1036 res[1] = rec_idx(J, &ix);
1037 rd->nres = tref_isnil(res[1]) ? 0 : 2;
1038 } /* else: Interpreter will throw. */
1039}
1040
1041static void recff_ipairs(jit_State *J, TRef *res, RecordFFData *rd)
1042{
1043 TRef tab = arg[0];
1044 if (tref_istab(tab)) {
1045 res[0] = lj_ir_kfunc(J, funcV(&rd->fn->c.upvalue[0]));
1046 res[1] = tab;
1047 res[2] = lj_ir_kint(J, 0);
1048 rd->nres = 3;
1049 } /* else: Interpreter will throw. */
1050}
1051
1052static void recff_pcall(jit_State *J, TRef *res, RecordFFData *rd)
1053{
1054 if (rd->nargs >= 1) {
1055 BCReg parg = (BCReg)(arg - J->base);
1056 if (rec_call(J, parg, CALLRES_MULTI, rd->nargs - 1)) { /* Resolved call. */
1057 res[0] = TREF_TRUE; /* Prepend true result. No need to move results. */
1058 rd->nres = (int)((J->maxslot - parg) + 1);
1059 } else { /* Propagate pending call. */
1060 rd->cres = CALLRES_PENDING;
1061 }
1062 } /* else: Interpreter will throw. */
1063}
1064
1065/* Struct to pass context across lj_vm_cpcall. */
1066typedef struct RecordXpcall {
1067 BCReg parg;
1068 int nargs;
1069 int resolved;
1070} RecordXpcall;
1071
1072static TValue *recff_xpcall_cp(lua_State *L, lua_CFunction dummy, void *ud)
1073{
1074 jit_State *J = L2J(L);
1075 RecordXpcall *rx = (RecordXpcall *)ud;
1076 UNUSED(dummy);
1077 rx->resolved = rec_call(J, rx->parg, CALLRES_MULTI, rx->nargs);
1078 return NULL;
1079}
1080
1081static void recff_xpcall(jit_State *J, TRef *res, RecordFFData *rd)
1082{
1083 if (rd->nargs >= 2) {
1084 RecordXpcall rx;
1085 BCReg parg = (BCReg)(arg - J->base) + 1;
1086 TRef tmp;
1087 TValue argv0, argv1;
1088 ptrdiff_t oargv;
1089 int errcode;
1090 /* Swap function and traceback. */
1091 tmp = arg[0]; arg[0] = arg[1]; arg[1] = tmp;
1092 copyTV(J->L, &argv0, &rd->argv[0]);
1093 copyTV(J->L, &argv1, &rd->argv[1]);
1094 copyTV(J->L, &rd->argv[0], &argv1);
1095 copyTV(J->L, &rd->argv[1], &argv0);
1096 oargv = savestack(J->L, rd->argv);
1097 /* Need to protect rec_call because the recorder may throw. */
1098 rx.parg = parg;
1099 rx.nargs = rd->nargs - 2;
1100 errcode = lj_vm_cpcall(J->L, recff_xpcall_cp, NULL, &rx);
1101 /* Always undo Lua stack swap to avoid confusing the interpreter. */
1102 rd->argv = restorestack(J->L, oargv); /* Stack may have been resized. */
1103 copyTV(J->L, &rd->argv[0], &argv0);
1104 copyTV(J->L, &rd->argv[1], &argv1);
1105 if (errcode)
1106 lj_err_throw(J->L, errcode); /* Propagate errors. */
1107 if (rx.resolved) { /* Resolved call. */
1108 int i, nres = (int)(J->maxslot - parg);
1109 rd->nres = nres + 1;
1110 res[0] = TREF_TRUE; /* Prepend true result. */
1111 for (i = 1; i <= nres; i++) /* Move results down. */
1112 res[i] = res[i+1];
1113 } else { /* Propagate pending call. */
1114 rd->cres = CALLRES_PENDING;
1115 }
1116 } /* else: Interpreter will throw. */
1117}
1118
1119/* -- Math library fast functions ----------------------------------------- */
1120
1121static void recff_math_abs(jit_State *J, TRef *res, RecordFFData *rd)
1122{
1123 TRef tr = lj_ir_tonum(J, arg[0]);
1124 res[0] = emitir(IRTN(IR_ABS), tr, lj_ir_knum_abs(J));
1125 UNUSED(rd);
1126}
1127
1128/* Record rounding functions math.floor and math.ceil. */
1129static void recff_math_round(jit_State *J, TRef *res, RecordFFData *rd)
1130{
1131 if (tref_isinteger(arg[0]))
1132 res[0] = arg[0];
1133 else
1134 res[0] = emitir(IRTN(IR_FPMATH), lj_ir_tonum(J, arg[0]), rd->data);
1135 /* Note: result is integral (or NaN/Inf), but may not fit into an integer. */
1136}
1137
1138/* Record unary math.* functions, mapped to IR_FPMATH opcode. */
1139static void recff_math_unary(jit_State *J, TRef *res, RecordFFData *rd)
1140{
1141 res[0] = emitir(IRTN(IR_FPMATH), lj_ir_tonum(J, arg[0]), rd->data);
1142}
1143
1144/* Record binary math.* functions math.atan2 and math.ldexp. */
1145static void recff_math_binary(jit_State *J, TRef *res, RecordFFData *rd)
1146{
1147 TRef tr = lj_ir_tonum(J, arg[0]);
1148 res[0] = emitir(IRTN(rd->data), tr, lj_ir_tonum(J, arg[1]));
1149}
1150
1151/* Record math.asin, math.acos, math.atan. */
1152static void recff_math_atrig(jit_State *J, TRef *res, RecordFFData *rd)
1153{
1154 TRef y = lj_ir_tonum(J, arg[0]);
1155 TRef x = lj_ir_knum_one(J);
1156 uint32_t ffid = rd->data;
1157 if (ffid != FF_math_atan) {
1158 TRef tmp = emitir(IRTN(IR_MUL), y, y);
1159 tmp = emitir(IRTN(IR_SUB), x, tmp);
1160 tmp = emitir(IRTN(IR_FPMATH), tmp, IRFPM_SQRT);
1161 if (ffid == FF_math_asin) { x = tmp; } else { x = y; y = tmp; }
1162 }
1163 res[0] = emitir(IRTN(IR_ATAN2), y, x);
1164}
1165
1166static void recff_math_modf(jit_State *J, TRef *res, RecordFFData *rd)
1167{
1168 TRef tr = arg[0];
1169 if (tref_isinteger(arg[0])) {
1170 res[0] = tr;
1171 res[1] = lj_ir_kint(J, 0);
1172 } else {
1173 tr = lj_ir_tonum(J, tr);
1174 res[0] = emitir(IRTN(IR_FPMATH), tr, IRFPM_TRUNC);
1175 res[1] = emitir(IRTN(IR_SUB), tr, res[0]);
1176 }
1177 rd->nres = 2;
1178}
1179
1180static void recff_math_degrad(jit_State *J, TRef *res, RecordFFData *rd)
1181{
1182 TRef tr = lj_ir_tonum(J, arg[0]);
1183 res[0] = emitir(IRTN(IR_MUL), tr, lj_ir_knum(J, numV(&rd->fn->c.upvalue[0])));
1184}
1185
1186static void recff_math_pow(jit_State *J, TRef *res, RecordFFData *rd)
1187{
1188 if (!tref_isnumber_str(arg[1]))
1189 lj_trace_err(J, LJ_TRERR_BADTYPE);
1190 res[0] = lj_opt_narrow_pow(J, lj_ir_tonum(J, arg[0]), arg[1], &rd->argv[1]);
1191 UNUSED(rd);
1192}
1193
1194static void recff_math_minmax(jit_State *J, TRef *res, RecordFFData *rd)
1195{
1196 TRef tr = lj_ir_tonum(J, arg[0]);
1197 uint32_t op = rd->data;
1198 BCReg i;
1199 for (i = 1; arg[i]; i++)
1200 tr = emitir(IRTN(op), tr, lj_ir_tonum(J, arg[i]));
1201 res[0] = tr;
1202}
1203
1204/* -- Bit library fast functions ------------------------------------------ */
1205
1206/* Record unary bit.tobit, bit.bnot, bit.bswap. */
1207static void recff_bit_unary(jit_State *J, TRef *res, RecordFFData *rd)
1208{
1209 TRef tr = lj_ir_tobit(J, arg[0]);
1210 res[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0);
1211}
1212
1213/* Record N-ary bit.band, bit.bor, bit.bxor. */
1214static void recff_bit_nary(jit_State *J, TRef *res, RecordFFData *rd)
1215{
1216 TRef tr = lj_ir_tobit(J, arg[0]);
1217 uint32_t op = rd->data;
1218 BCReg i;
1219 for (i = 1; arg[i]; i++)
1220 tr = emitir(IRTI(op), tr, lj_ir_tobit(J, arg[i]));
1221 res[0] = tr;
1222}
1223
1224/* Record bit shifts. */
1225static void recff_bit_shift(jit_State *J, TRef *res, RecordFFData *rd)
1226{
1227 TRef tr = lj_ir_tobit(J, arg[0]);
1228 TRef tsh = lj_ir_tobit(J, arg[1]);
1229#if !LJ_TARGET_MASKEDSHIFT
1230 if (!tref_isk(tsh))
1231 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31));
1232#endif
1233 res[0] = emitir(IRTI(rd->data), tr, tsh);
1234}
1235
1236/* -- String library fast functions --------------------------------------- */
1237
1238static void recff_string_len(jit_State *J, TRef *res, RecordFFData *rd)
1239{
1240 res[0] = emitir(IRTI(IR_FLOAD), lj_ir_tostr(J, arg[0]), IRFL_STR_LEN);
1241 UNUSED(rd);
1242}
1243
1244/* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */
1245static void recff_string_range(jit_State *J, TRef *res, RecordFFData *rd)
1246{
1247 TRef trstr = lj_ir_tostr(J, arg[0]);
1248 TRef trlen = emitir(IRTI(IR_FLOAD), trstr, IRFL_STR_LEN);
1249 TRef tr0 = lj_ir_kint(J, 0);
1250 TRef trstart, trend;
1251 GCstr *str = argv2str(J, &rd->argv[0]);
1252 int32_t start, end;
1253 if (rd->data) { /* string.sub(str, start [,end]) */
1254 trstart = lj_ir_toint(J, arg[1]);
1255 trend = tref_isnil(arg[2]) ? lj_ir_kint(J, -1) : lj_ir_toint(J, arg[2]);
1256 start = argv2int(J, &rd->argv[1]);
1257 end = tref_isnil(arg[2]) ? -1 : argv2int(J, &rd->argv[2]);
1258 } else { /* string.byte(str, [,start [,end]]) */
1259 if (arg[1]) {
1260 trstart = lj_ir_toint(J, arg[1]);
1261 trend = tref_isnil(arg[2]) ? trstart : lj_ir_toint(J, arg[2]);
1262 start = argv2int(J, &rd->argv[1]);
1263 end = tref_isnil(arg[2]) ? start : argv2int(J, &rd->argv[2]);
1264 } else {
1265 trend = trstart = lj_ir_kint(J, 1);
1266 end = start = 1;
1267 }
1268 }
1269 if (end < 0) {
1270 emitir(IRTGI(IR_LT), trend, tr0);
1271 trend = emitir(IRTI(IR_ADD), emitir(IRTI(IR_ADD), trlen, trend),
1272 lj_ir_kint(J, 1));
1273 end = end+(int32_t)str->len+1;
1274 } else if ((MSize)end <= str->len) {
1275 emitir(IRTGI(IR_ULE), trend, trlen);
1276 } else {
1277 emitir(IRTGI(IR_GT), trend, trlen);
1278 end = (int32_t)str->len;
1279 trend = trlen;
1280 }
1281 if (start < 0) {
1282 emitir(IRTGI(IR_LT), trstart, tr0);
1283 trstart = emitir(IRTI(IR_ADD), trlen, trstart);
1284 start = start+(int32_t)str->len;
1285 emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), trstart, tr0);
1286 if (start < 0) {
1287 trstart = tr0;
1288 start = 0;
1289 }
1290 } else {
1291 if (start == 0) {
1292 emitir(IRTGI(IR_EQ), trstart, tr0);
1293 trstart = tr0;
1294 } else {
1295 trstart = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, -1));
1296 emitir(IRTGI(IR_GE), trstart, tr0);
1297 start--;
1298 }
1299 }
1300 if (rd->data) { /* Return string.sub result. */
1301 if (end - start >= 0) {
1302 /* Also handle empty range here, to avoid extra traces. */
1303 TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart);
1304 emitir(IRTGI(IR_GE), trslen, tr0);
1305 trptr = emitir(IRT(IR_STRREF, IRT_PTR), trstr, trstart);
1306 res[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen);
1307 } else { /* Range underflow: return empty string. */
1308 emitir(IRTGI(IR_LT), trend, trstart);
1309 res[0] = lj_ir_kstr(J, lj_str_new(J->L, strdata(str), 0));
1310 }
1311 } else { /* Return string.byte result(s). */
1312 int32_t i, len = end - start;
1313 if (len > 0) {
1314 TRef trslen = emitir(IRTI(IR_SUB), trend, trstart);
1315 emitir(IRTGI(IR_EQ), trslen, lj_ir_kint(J, len));
1316 if (res + len > J->slot + LJ_MAX_JSLOTS)
1317 lj_trace_err(J, LJ_TRERR_STACKOV);
1318 rd->nres = len;
1319 for (i = 0; i < len; i++) {
1320 TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, i));
1321 tmp = emitir(IRT(IR_STRREF, IRT_PTR), trstr, tmp);
1322 res[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, 0);
1323 }
1324 } else { /* Empty range or range underflow: return no results. */
1325 emitir(IRTGI(IR_LE), trend, trstart);
1326 rd->nres = 0;
1327 }
1328 }
1329}
1330
1331/* -- Table library fast functions ---------------------------------------- */
1332
1333static void recff_table_getn(jit_State *J, TRef *res, RecordFFData *rd)
1334{
1335 if (tref_istab(arg[0])) {
1336 res[0] = emitir(IRTI(IR_TLEN), arg[0], 0);
1337 } /* else: Interpreter will throw. */
1338 UNUSED(rd);
1339}
1340
1341/* -- Record calls and returns -------------------------------------------- */
1342
1343#undef arg
1344
1345#include "lj_recdef.h"
1346
1347/* Record return. */
1348static void rec_ret(jit_State *J, BCReg rbase, int gotresults)
1349{
1350 TValue *frame = J->L->base - 1;
1351 TRef *res = J->base + rbase;
1352 J->tailcalled = 0;
1353 while (frame_ispcall(frame)) {
1354 BCReg cbase = (BCReg)frame_delta(frame);
1355 lua_assert(J->baseslot > 1);
1356 J->baseslot -= (BCReg)cbase;
1357 J->base -= cbase;
1358 *--res = TREF_TRUE; /* Prepend true to results. */
1359 gotresults++;
1360 J->framedepth--;
1361 frame = frame_prevd(frame);
1362 }
1363 if (J->framedepth-- <= 0)
1364 lj_trace_err(J, LJ_TRERR_NYIRETL);
1365 lua_assert(J->baseslot > 1);
1366 if (frame_islua(frame)) {
1367 BCIns callins = *(J->pc = frame_pc(frame)-1);
1368 ptrdiff_t nresults = bc_b(callins) ? (int)bc_b(callins)-1 : gotresults;
1369 BCReg cbase = bc_a(callins);
1370 int i;
1371 for (i = 0; i < nresults; i++)
1372 J->base[i-1] = i < gotresults ? res[i] : TREF_NIL;
1373 J->maxslot = cbase+(BCReg)nresults;
1374 J->baseslot -= cbase+1;
1375 J->base -= cbase+1;
1376 } else if (frame_iscont(frame)) {
1377 ASMFunction cont = frame_contf(frame);
1378 BCReg i, cbase = (BCReg)frame_delta(frame);
1379 J->pc = frame_contpc(frame)-1;
1380 J->baseslot -= (BCReg)cbase;
1381 J->base -= cbase;
1382 /* Shrink maxslot as much as possible after return from continuation. */
1383 for (i = cbase-2; i > 0 && J->base[i] == 0; i--) ;
1384 J->maxslot = i;
1385 if (cont == lj_cont_ra) {
1386 /* Copy result to destination slot. */
1387 BCReg dst = bc_a(*J->pc);
1388 J->base[dst] = res[0];
1389 if (dst > J->maxslot) J->maxslot = dst+1;
1390 } else if (cont == lj_cont_nop) {
1391 /* Nothing to do here. */
1392 } else if (cont == lj_cont_cat) {
1393 lua_assert(0);
1394 } else {
1395 /* Result type already specialized. */
1396 lua_assert(cont == lj_cont_condf || cont == lj_cont_condt);
1397 }
1398 } else {
1399 lua_assert(0);
1400 }
1401 lua_assert(J->baseslot >= 1);
1402}
1403
1404/* Check unroll limits for calls. */
1405static void check_call_unroll(jit_State *J, GCfunc *fn)
1406{
1407 TValue *first = J->L->base - J->baseslot;
1408 TValue *frame = J->L->base - 1;
1409 int count = 0;
1410 while (frame > first) {
1411 if (frame_func(frame) == fn)
1412 count++;
1413 if (frame_isvarg(frame))
1414 frame = frame_prevd(frame);
1415 frame = frame_prev(frame);
1416 }
1417 if (frame_func(first) == fn && bc_op(J->cur.startins) == BC_CALL) {
1418 if (count >= J->param[JIT_P_recunroll])
1419 lj_trace_err(J, LJ_TRERR_NYIRECU);
1420 } else {
1421 if (count >= J->param[JIT_P_callunroll])
1422 lj_trace_err(J, LJ_TRERR_CUNROLL);
1423 }
1424}
1425
1426/* Record call. Returns 0 for pending calls and 1 for resolved calls. */
1427static int rec_call(jit_State *J, BCReg func, int cres, int nargs)
1428{
1429 RecordFFData rd;
1430 TRef *res = &J->base[func];
1431 TValue *tv = &J->L->base[func];
1432
1433 if (tref_isfunc(res[0])) { /* Regular function call. */
1434 rd.fn = funcV(tv);
1435 rd.argv = tv+1;
1436 } else { /* Otherwise resolve __call metamethod for called object. */
1437 RecordIndex ix;
1438 int i;
1439 ix.tab = res[0];
1440 copyTV(J->L, &ix.tabv, tv);
1441 if (!rec_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj))
1442 lj_trace_err(J, LJ_TRERR_NOMM);
1443 /* Update the recorder state, but not the Lua stack. */
1444 for (i = ++nargs; i > 0; i--)
1445 res[i] = res[i-1];
1446 res[0] = ix.mobj;
1447 rd.fn = funcV(&ix.mobjv);
1448 rd.argv = tv; /* The called object is the 1st arg. */
1449 }
1450
1451 /* Specialize to the runtime value of the called function. */
1452 res[0] = emitir(IRTG(IR_FRAME, IRT_FUNC), res[0], lj_ir_kfunc(J, rd.fn));
1453
1454 if (isluafunc(rd.fn)) { /* Record call to Lua function. */
1455 GCproto *pt = funcproto(rd.fn);
1456 if ((pt->flags & PROTO_NO_JIT))
1457 lj_trace_err(J, LJ_TRERR_CJITOFF);
1458 if ((pt->flags & PROTO_IS_VARARG)) {
1459 if (rd.fn->l.gate != lj_gate_lv)
1460 lj_trace_err(J, LJ_TRERR_NYILNKF);
1461 lj_trace_err(J, LJ_TRERR_NYIVF);
1462 } else {
1463 if (rd.fn->l.gate != lj_gate_lf)
1464 lj_trace_err(J, LJ_TRERR_NYILNKF);
1465 }
1466 check_call_unroll(J, rd.fn);
1467 if (cres == CALLRES_TAILCALL) {
1468 int i;
1469 /* Tailcalls can form a loop, so count towards the loop unroll limit. */
1470 if (++J->tailcalled > J->loopunroll)
1471 lj_trace_err(J, LJ_TRERR_LUNROLL);
1472 for (i = 0; i <= nargs; i++) /* Move func + args down. */
1473 J->base[i-1] = res[i];
1474 /* Note: the new FRAME is now at J->base[-1] (even for slot #0). */
1475 } else { /* Regular call. */
1476 J->base += func+1;
1477 J->baseslot += func+1;
1478 J->framedepth++;
1479 }
1480 if (J->baseslot + pt->framesize >= LJ_MAX_JSLOTS)
1481 lj_trace_err(J, LJ_TRERR_STACKOV);
1482 /* Fill up missing args with nil. */
1483 while (nargs < pt->numparams)
1484 J->base[nargs++] = TREF_NIL;
1485 /* The remaining slots should never be read before they are written. */
1486 J->maxslot = pt->numparams;
1487 return 0; /* No result yet. */
1488 } else { /* Record call to C function or fast function. */
1489 uint32_t m = 0;
1490 res[1+nargs] = 0;
1491 rd.nargs = nargs;
1492 if (rd.fn->c.ffid < sizeof(recff_idmap)/sizeof(recff_idmap[0]))
1493 m = recff_idmap[rd.fn->c.ffid];
1494 rd.data = m & 0xff;
1495 rd.cres = cres;
1496 rd.nres = 1; /* Default is one result. */
1497 (recff_func[m >> 8])(J, res, &rd); /* Call recff_* handler. */
1498 cres = rd.cres;
1499 if (cres >= 0) {
1500 /* Caller takes fixed number of results: local a,b = f() */
1501 J->maxslot = func + (BCReg)cres;
1502 while (rd.nres < cres) /* Fill up missing results with nil. */
1503 res[rd.nres++] = TREF_NIL;
1504 } else if (cres == CALLRES_MULTI) {
1505 /* Caller takes any number of results: return 1,f() */
1506 J->maxslot = func + (BCReg)rd.nres;
1507 } else if (cres == CALLRES_TAILCALL) {
1508 /* Tail call: return f() */
1509 rec_ret(J, func, rd.nres);
1510 } else if (cres == CALLRES_CONT) {
1511 /* Note: immediately resolved continuations must not change J->maxslot. */
1512 res[rd.nres] = TREF_NIL; /* Turn 0 results into nil result. */
1513 } else {
1514 J->framedepth++;
1515 lua_assert(cres == CALLRES_PENDING);
1516 return 0; /* Pending call, no result yet. */
1517 }
1518 return 1; /* Result resolved immediately. */
1519 }
1520}
1521
1522/* -- Record allocations -------------------------------------------------- */
1523
1524static TRef rec_tnew(jit_State *J, uint32_t ah)
1525{
1526 uint32_t asize = ah & 0x7ff;
1527 uint32_t hbits = ah >> 11;
1528 if (asize == 0x7ff) asize = 0x801;
1529 return emitir(IRT(IR_TNEW, IRT_TAB), asize, hbits);
1530}
1531
1532/* -- Record bytecode ops ------------------------------------------------- */
1533
1534/* Optimize state after comparison. */
1535static void optstate_comp(jit_State *J, int cond)
1536{
1537 BCIns jmpins = J->pc[1];
1538 const BCIns *npc = J->pc + 2 + (cond ? bc_j(jmpins) : 0);
1539 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
1540 /* Avoid re-recording the comparison in side traces. */
1541 J->cur.snapmap[snap->mapofs + snap->nslots] = u32ptr(npc);
1542 J->needsnap = 1;
1543 /* Shrink last snapshot if possible. */
1544 if (bc_a(jmpins) < J->maxslot) {
1545 J->maxslot = bc_a(jmpins);
1546 lj_snap_shrink(J);
1547 }
1548}
1549
1550/* Record the next bytecode instruction (_before_ it's executed). */
1551void lj_record_ins(jit_State *J)
1552{
1553 cTValue *lbase;
1554 RecordIndex ix;
1555 const BCIns *pc;
1556 BCIns ins;
1557 BCOp op;
1558 TRef ra, rb, rc;
1559
1560 /* Need snapshot before recording next bytecode (e.g. after a store). */
1561 if (J->needsnap) {
1562 J->needsnap = 0;
1563 lj_snap_add(J);
1564 J->mergesnap = 1;
1565 }
1566
1567 /* Record only closed loops for root traces. */
1568 pc = J->pc;
1569 if (J->framedepth == 0 &&
1570 (MSize)((char *)pc - (char *)J->bc_min) >= J->bc_extent)
1571 lj_trace_err(J, LJ_TRERR_LLEAVE);
1572
1573#ifdef LUA_USE_ASSERT
1574 rec_check_slots(J);
1575 rec_check_ir(J);
1576#endif
1577
1578 /* Keep a copy of the runtime values of var/num/str operands. */
1579#define rav (&ix.valv)
1580#define rbv (&ix.tabv)
1581#define rcv (&ix.keyv)
1582
1583 lbase = J->L->base;
1584 ins = *pc;
1585 op = bc_op(ins);
1586 ra = bc_a(ins);
1587 ix.val = 0;
1588 switch (bcmode_a(op)) {
1589 case BCMvar:
1590 copyTV(J->L, rav, &lbase[ra]); ix.val = ra = getslot(J, ra); break;
1591 default: break; /* Handled later. */
1592 }
1593 rb = bc_b(ins);
1594 rc = bc_c(ins);
1595 switch (bcmode_b(op)) {
1596 case BCMnone: rb = 0; rc = bc_d(ins); break; /* Upgrade rc to 'rd'. */
1597 case BCMvar:
1598 copyTV(J->L, rbv, &lbase[rb]); ix.tab = rb = getslot(J, rb); break;
1599 case BCMnum: { lua_Number n = J->pt->k.n[rb];
1600 setnumV(rbv, n); ix.tab = rb = lj_ir_knumint(J, n); } break;
1601 default: break; /* Handled later. */
1602 }
1603 switch (bcmode_c(op)) {
1604 case BCMvar:
1605 copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break;
1606 case BCMpri: setitype(rcv, (int32_t)~rc); rc = TREF_PRI(IRT_NIL+rc); break;
1607 case BCMnum: { lua_Number n = J->pt->k.n[rc];
1608 setnumV(rcv, n); ix.key = rc = lj_ir_knumint(J, n); } break;
1609 case BCMstr: { GCstr *s = strref(J->pt->k.gc[~rc]);
1610 setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break;
1611 default: break; /* Handled later. */
1612 }
1613
1614 switch (op) {
1615
1616 /* -- Comparison ops ---------------------------------------------------- */
1617
1618 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
1619 /* Emit nothing for two numeric or string consts. */
1620 if (!(tref_isk2(ra,rc) && tref_isnumber_str(ra) && tref_isnumber_str(rc))) {
1621 IRType ta = tref_type(ra);
1622 IRType tc = tref_type(rc);
1623 int irop;
1624 if (ta != tc) {
1625 /* Widen mixed number/int comparisons to number/number comparison. */
1626 if (ta == IRT_INT && tc == IRT_NUM) {
1627 ra = emitir(IRTN(IR_TONUM), ra, 0);
1628 ta = IRT_NUM;
1629 } else if (ta == IRT_NUM && tc == IRT_INT) {
1630 rc = emitir(IRTN(IR_TONUM), rc, 0);
1631 } else if (!((ta == IRT_FALSE || ta == IRT_TRUE) &&
1632 (tc == IRT_FALSE || tc == IRT_TRUE))) {
1633 break; /* Interpreter will throw for two different types. */
1634 }
1635 }
1636 lj_snap_add(J);
1637 irop = (int)op - (int)BC_ISLT + (int)IR_LT;
1638 if (ta == IRT_NUM) {
1639 if ((irop & 1)) irop ^= 4; /* ISGE/ISGT are unordered. */
1640 if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 5;
1641 } else if (ta == IRT_INT) {
1642 if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 1;
1643 } else if (ta == IRT_STR) {
1644 if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1;
1645 } else {
1646 rec_mm_comp(J, &ix, (int)op);
1647 break;
1648 }
1649 emitir(IRTG(irop, ta), ra, rc);
1650 optstate_comp(J, ((int)op ^ irop) & 1);
1651 }
1652 break;
1653
1654 case BC_ISEQV: case BC_ISNEV:
1655 case BC_ISEQS: case BC_ISNES:
1656 case BC_ISEQN: case BC_ISNEN:
1657 case BC_ISEQP: case BC_ISNEP:
1658 /* Emit nothing for two non-table, non-udata consts. */
1659 if (!(tref_isk2(ra, rc) && !(tref_istab(ra) || tref_isudata(ra)))) {
1660 int diff;
1661 lj_snap_add(J);
1662 diff = rec_objcmp(J, ra, rc, rav, rcv);
1663 if (diff == 1 && (tref_istab(ra) || tref_isudata(ra))) {
1664 /* Only check __eq if different, but the same type (table or udata). */
1665 rec_mm_equal(J, &ix, (int)op);
1666 break;
1667 }
1668 optstate_comp(J, ((int)op & 1) == !diff);
1669 }
1670 break;
1671
1672 /* -- Unary test and copy ops ------------------------------------------- */
1673
1674 case BC_ISTC: case BC_ISFC:
1675 if ((op & 1) == tref_istruecond(rc))
1676 rc = 0; /* Don't store if condition is not true. */
1677 /* fallthrough */
1678 case BC_IST: case BC_ISF: /* Type specialization suffices. */
1679 if (bc_a(pc[1]) < J->maxslot)
1680 J->maxslot = bc_a(pc[1]); /* Shrink used slots. */
1681 break;
1682
1683 /* -- Unary ops --------------------------------------------------------- */
1684
1685 case BC_NOT:
1686 /* Type specialization already forces const result. */
1687 rc = tref_istruecond(rc) ? TREF_FALSE : TREF_TRUE;
1688 break;
1689
1690 case BC_LEN:
1691 if (tref_isstr(rc)) {
1692 rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN);
1693 } else if (tref_istab(rc)) {
1694 rc = emitir(IRTI(IR_TLEN), rc, 0);
1695 } else {
1696 ix.tab = rc;
1697 copyTV(J->L, &ix.tabv, &ix.keyv);
1698 ix.key = IRT_NIL;
1699 setnilV(&ix.keyv);
1700 rc = rec_mm_arith(J, &ix, MM_len);
1701 }
1702 break;
1703
1704 /* -- Arithmetic ops ---------------------------------------------------- */
1705
1706 case BC_UNM:
1707 if (tref_isnumber_str(rc)) {
1708 rc = lj_ir_tonum(J, rc);
1709 rc = emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J));
1710 } else {
1711 ix.tab = rc;
1712 copyTV(J->L, &ix.tabv, &ix.keyv);
1713 rc = rec_mm_arith(J, &ix, MM_unm);
1714 }
1715 break;
1716
1717 case BC_ADDNV: case BC_SUBNV: case BC_MULNV: case BC_DIVNV: case BC_MODNV:
1718 ix.tab = rc; ix.key = rc = rb; rb = ix.tab;
1719 copyTV(J->L, &ix.valv, &ix.tabv);
1720 copyTV(J->L, &ix.tabv, &ix.keyv);
1721 copyTV(J->L, &ix.keyv, &ix.valv);
1722 if (op == BC_MODNV)
1723 goto recmod;
1724 /* fallthrough */
1725 case BC_ADDVN: case BC_SUBVN: case BC_MULVN: case BC_DIVVN:
1726 case BC_ADDVV: case BC_SUBVV: case BC_MULVV: case BC_DIVVV: {
1727 MMS mm = bcmode_mm(op);
1728 if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) {
1729 rb = lj_ir_tonum(J, rb);
1730 rc = lj_ir_tonum(J, rc);
1731 rc = emitir(IRTN((int)mm - (int)MM_add + (int)IR_ADD), rb, rc);
1732 } else {
1733 rc = rec_mm_arith(J, &ix, mm);
1734 }
1735 break;
1736 }
1737
1738 case BC_MODVN: case BC_MODVV:
1739 recmod:
1740 if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
1741 rc = lj_opt_narrow_mod(J, rb, rc);
1742 else
1743 rc = rec_mm_arith(J, &ix, MM_mod);
1744 break;
1745
1746 case BC_POW:
1747 if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
1748 rc = lj_opt_narrow_pow(J, lj_ir_tonum(J, rb), rc, rcv);
1749 else
1750 rc = rec_mm_arith(J, &ix, MM_pow);
1751 break;
1752
1753 /* -- Constant and move ops --------------------------------------------- */
1754
1755 case BC_KSTR: case BC_KNUM: case BC_KPRI: case BC_MOV:
1756 break;
1757 case BC_KSHORT:
1758 rc = lj_ir_kint(J, (int32_t)(int16_t)rc);
1759 break;
1760 case BC_KNIL:
1761 while (ra <= rc)
1762 J->base[ra++] = TREF_NIL;
1763 if (rc >= J->maxslot) J->maxslot = rc+1;
1764 break;
1765
1766 /* -- Upvalue and function ops ------------------------------------------ */
1767
1768 case BC_UGET:
1769 rc = rec_upvalue(J, rc, 0);
1770 break;
1771 case BC_USETV: case BC_USETS: case BC_USETN: case BC_USETP:
1772 rec_upvalue(J, ra, rc);
1773 break;
1774
1775 /* -- Table ops --------------------------------------------------------- */
1776
1777 case BC_GGET: case BC_GSET:
1778 settabV(J->L, &ix.tabv, tabref(J->fn->l.env));
1779 ix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), getcurrf(J), IRFL_FUNC_ENV);
1780 ix.idxchain = LJ_MAX_IDXCHAIN;
1781 rc = rec_idx(J, &ix);
1782 break;
1783
1784 case BC_TGETB: case BC_TSETB:
1785 setintV(&ix.keyv, (int32_t)rc);
1786 ix.key = lj_ir_kint(J, (int32_t)rc);
1787 /* fallthrough */
1788 case BC_TGETV: case BC_TGETS: case BC_TSETV: case BC_TSETS:
1789 ix.idxchain = LJ_MAX_IDXCHAIN;
1790 rc = rec_idx(J, &ix);
1791 break;
1792
1793 case BC_TNEW:
1794 rc = rec_tnew(J, rc);
1795 break;
1796 case BC_TDUP:
1797 rc = emitir(IRT(IR_TDUP, IRT_TAB),
1798 lj_ir_ktab(J, tabref(J->pt->k.gc[~rc])), 0);
1799 break;
1800
1801 /* -- Calls and vararg handling ----------------------------------------- */
1802
1803 case BC_ITERC:
1804 J->base[ra] = getslot(J, ra-3);
1805 J->base[ra+1] = getslot(J, ra-2);
1806 J->base[ra+2] = getslot(J, ra-1);
1807 { /* Have to do the actual copy now because rec_call needs the values. */
1808 TValue *b = &J->L->base[ra];
1809 copyTV(J->L, b, b-3);
1810 copyTV(J->L, b+1, b-2);
1811 copyTV(J->L, b+2, b-1);
1812 }
1813 goto callop;
1814
1815 case BC_CALLMT:
1816 rb = (TRef)(CALLRES_TAILCALL+1);
1817 /* fallthrough */
1818 case BC_CALLM:
1819 /* L->top is set to L->base+ra+rc+NRESULTS-1+1, see lj_dispatch_ins(). */
1820 rc = (BCReg)(J->L->top - J->L->base) - ra;
1821 goto callop;
1822
1823 case BC_CALLT:
1824 rb = (TRef)(CALLRES_TAILCALL+1);
1825 /* fallthrough */
1826 case BC_CALL:
1827 callop:
1828 if (rb == (TRef)(CALLRES_TAILCALL+1)) { /* Tail call. */
1829 }
1830 rec_call(J, ra, (int)(rb-1), (int)(rc-1));
1831 break;
1832
1833 /* -- Returns ----------------------------------------------------------- */
1834
1835 case BC_RETM:
1836 /* L->top is set to L->base+ra+rc+NRESULTS-1, see lj_dispatch_ins(). */
1837 rc = (BCReg)(J->L->top - J->L->base) - ra + 1;
1838 /* fallthrough */
1839 case BC_RET: case BC_RET0: case BC_RET1:
1840 rec_ret(J, ra, (int)(rc-1));
1841 break;
1842
1843 /* -- Loops and branches ------------------------------------------------ */
1844
1845 case BC_FORI:
1846 if (rec_for(J, pc, 0) != LOOPEV_LEAVE)
1847 J->loopref = J->cur.nins;
1848 break;
1849 case BC_JFORI:
1850 lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL);
1851 if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */
1852 rec_stop(J, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J]));
1853 /* Continue tracing if the loop is not entered. */
1854 break;
1855
1856 case BC_FORL:
1857 rec_loop_interp(J, pc, rec_for(J, pc+((ptrdiff_t)rc-BCBIAS_J), 1));
1858 break;
1859 case BC_ITERL:
1860 rec_loop_interp(J, pc, rec_iterl(J, *pc));
1861 break;
1862 case BC_LOOP:
1863 rec_loop_interp(J, pc, rec_loop(J, ra));
1864 break;
1865
1866 case BC_JFORL:
1867 rec_loop_jit(J, rc, rec_for(J, pc+bc_j(J->trace[rc]->startins), 1));
1868 break;
1869 case BC_JITERL:
1870 rec_loop_jit(J, rc, rec_iterl(J, J->trace[rc]->startins));
1871 break;
1872 case BC_JLOOP:
1873 rec_loop_jit(J, rc, rec_loop(J, ra));
1874 break;
1875
1876 case BC_IFORL:
1877 case BC_IITERL:
1878 case BC_ILOOP:
1879 lj_trace_err_info(J, LJ_TRERR_LBLACKL);
1880 break;
1881
1882 case BC_JMP:
1883 if (ra < J->maxslot)
1884 J->maxslot = ra; /* Shrink used slots. */
1885 break;
1886
1887 case BC_CAT:
1888 case BC_UCLO:
1889 case BC_FNEW:
1890 case BC_TSETM:
1891 case BC_VARG:
1892 default:
1893 setintV(&J->errinfo, (int32_t)op);
1894 lj_trace_err_info(J, LJ_TRERR_NYIBC);
1895 break;
1896 }
1897
1898 /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */
1899 if (bcmode_a(op) == BCMdst && rc) {
1900 J->base[ra] = rc;
1901 if (ra >= J->maxslot) J->maxslot = ra+1;
1902 }
1903
1904#undef rav
1905#undef rbv
1906#undef rcv
1907
1908 /* Limit the number of recorded IR instructions. */
1909 if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord])
1910 lj_trace_err(J, LJ_TRERR_TRACEOV);
1911}
1912
1913/* -- Recording setup ----------------------------------------------------- */
1914
1915/* Setup recording for a FORL loop. */
1916static void rec_setup_forl(jit_State *J, const BCIns *fori)
1917{
1918 BCReg ra = bc_a(*fori);
1919 cTValue *forbase = &J->L->base[ra];
1920 IRType t = (J->flags & JIT_F_OPT_NARROW) ? lj_opt_narrow_forl(forbase)
1921 : IRT_NUM;
1922 TRef stop = fori_arg(J, fori-2, ra+FORL_STOP, t);
1923 TRef step = fori_arg(J, fori-1, ra+FORL_STEP, t);
1924 int dir = (0 <= numV(&forbase[FORL_STEP]));
1925 lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI);
1926 if (!tref_isk(step)) {
1927 /* Non-constant step: need a guard for the direction. */
1928 TRef zero = (t == IRT_INT) ? lj_ir_kint(J, 0) : lj_ir_knum_zero(J);
1929 emitir(IRTG(dir ? IR_GE : IR_LT, t), step, zero);
1930 /* Add hoistable overflow checks for a narrowed FORL index. */
1931 if (t == IRT_INT) {
1932 if (tref_isk(stop)) {
1933 /* Constant stop: optimize check away or to a range check for step. */
1934 int32_t k = IR(tref_ref(stop))->i;
1935 if (dir) {
1936 if (k > 0)
1937 emitir(IRTGI(IR_LE), step, lj_ir_kint(J, (int32_t)0x7fffffff-k));
1938 } else {
1939 if (k < 0)
1940 emitir(IRTGI(IR_GE), step, lj_ir_kint(J, (int32_t)0x80000000-k));
1941 }
1942 } else {
1943 /* Stop+step variable: need full overflow check (with dead result). */
1944 emitir(IRTGI(IR_ADDOV), step, stop);
1945 }
1946 }
1947 } else if (t == IRT_INT && !tref_isk(stop)) {
1948 /* Constant step: optimize overflow check to a range check for stop. */
1949 int32_t k = IR(tref_ref(step))->i;
1950 k = (int32_t)(dir ? 0x7fffffff : 0x80000000) - k;
1951 emitir(IRTGI(dir ? IR_LE : IR_GE), stop, lj_ir_kint(J, k));
1952 }
1953 J->base[ra+FORL_EXT] = sloadt(J, (int32_t)(ra+FORL_IDX), t, IRSLOAD_INHERIT);
1954 J->maxslot = ra+FORL_EXT+1;
1955}
1956
1957/* Setup recording for a root trace started by a hot loop. */
1958static const BCIns *rec_setup_root(jit_State *J)
1959{
1960 /* Determine the next PC and the bytecode range for the loop. */
1961 const BCIns *pcj, *pc = J->pc;
1962 BCIns ins = *pc;
1963 BCReg ra = bc_a(ins);
1964 switch (bc_op(ins)) {
1965 case BC_FORL:
1966 J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns);
1967 pc += 1+bc_j(ins);
1968 J->bc_min = pc;
1969 break;
1970 case BC_ITERL:
1971 lua_assert(bc_op(pc[-1]) == BC_ITERC);
1972 J->maxslot = ra + bc_b(pc[-1]) - 1;
1973 J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns);
1974 pc += 1+bc_j(ins);
1975 lua_assert(bc_op(pc[-1]) == BC_JMP);
1976 J->bc_min = pc;
1977 break;
1978 case BC_LOOP:
1979 /* Only check BC range for real loops, but not for "repeat until true". */
1980 pcj = pc + bc_j(ins);
1981 ins = *pcj;
1982 if (bc_op(ins) == BC_JMP && bc_j(ins) < 0) {
1983 J->bc_min = pcj+1 + bc_j(ins);
1984 J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns);
1985 }
1986 J->maxslot = ra;
1987 pc++;
1988 break;
1989 default:
1990 lua_assert(0);
1991 break;
1992 }
1993 return pc;
1994}
1995
1996/* Setup recording for a side trace. */
1997static void rec_setup_side(jit_State *J, Trace *T)
1998{
1999 SnapShot *snap = &T->snap[J->exitno];
2000 IRRef2 *map = &T->snapmap[snap->mapofs];
2001 BCReg s, nslots = snap->nslots;
2002 BloomFilter seen = 0;
2003 for (s = 0; s < nslots; s++) {
2004 IRRef ref = snap_ref(map[s]);
2005 if (ref) {
2006 IRIns *ir = &T->ir[ref];
2007 TRef tr = 0;
2008 /* The bloom filter avoids O(nslots^2) overhead for de-duping slots. */
2009 if (bloomtest(seen, ref)) {
2010 BCReg j;
2011 for (j = 0; j < s; j++)
2012 if (snap_ref(map[j]) == ref) {
2013 if (ir->o == IR_FRAME && irt_isfunc(ir->t))
2014 J->baseslot = s+1;
2015 tr = J->slot[j];
2016 goto dupslot;
2017 }
2018 }
2019 bloomset(seen, ref);
2020 switch ((IROp)ir->o) {
2021 case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break;
2022 case IR_KINT: tr = lj_ir_kint(J, ir->i); break;
2023 case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break;
2024 case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break;
2025 case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */
2026 if (irt_isfunc(ir->t)) {
2027 J->baseslot = s+1;
2028 J->framedepth++;
2029 tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2]));
2030 tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr);
2031 } else {
2032 tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void));
2033 tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr);
2034 }
2035 break;
2036 case IR_SLOAD: /* Inherited SLOADs don't need a guard. */
2037 tr = emitir_raw(ir->ot & ~IRT_GUARD, s,
2038 (ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT);
2039 break;
2040 default: /* Parent refs are already typed and don't need a guard. */
2041 tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s,
2042 IRSLOAD_INHERIT|IRSLOAD_PARENT);
2043 break;
2044 }
2045 dupslot:
2046 J->slot[s] = tr;
2047 }
2048 }
2049 J->base = J->slot + J->baseslot;
2050 J->maxslot = nslots - J->baseslot;
2051 lj_snap_add(J);
2052}
2053
2054/* Setup for recording a new trace. */
2055void lj_record_setup(jit_State *J)
2056{
2057 uint32_t i;
2058
2059 /* Initialize state related to current trace. */
2060 memset(J->slot, 0, sizeof(J->slot));
2061 memset(J->chain, 0, sizeof(J->chain));
2062 memset(J->bpropcache, 0, sizeof(J->bpropcache));
2063
2064 J->baseslot = 1; /* Invoking function is at base[-1]. */
2065 J->base = J->slot + J->baseslot;
2066 J->maxslot = 0;
2067 J->framedepth = 0;
2068
2069 J->instunroll = J->param[JIT_P_instunroll];
2070 J->loopunroll = J->param[JIT_P_loopunroll];
2071 J->tailcalled = 0;
2072 J->loopref = 0;
2073
2074 J->bc_min = NULL; /* Means no limit. */
2075 J->bc_extent = ~(MSize)0;
2076
2077 /* Emit instructions for fixed references. Also triggers initial IR alloc. */
2078 emitir_raw(IRT(IR_BASE, IRT_PTR), J->parent, J->exitno);
2079 for (i = 0; i <= 2; i++) {
2080 IRIns *ir = IR(REF_NIL-i);
2081 ir->i = 0;
2082 ir->t.irt = (uint8_t)(IRT_NIL+i);
2083 ir->o = IR_KPRI;
2084 ir->prev = 0;
2085 }
2086 J->cur.nk = REF_TRUE;
2087
2088 setgcref(J->cur.startpt, obj2gco(J->pt));
2089 J->startpc = J->pc;
2090 if (J->parent) { /* Side trace. */
2091 Trace *T = J->trace[J->parent];
2092 TraceNo root = T->root ? T->root : J->parent;
2093 J->cur.root = (uint16_t)root;
2094 J->cur.startins = BCINS_AD(BC_JMP, 0, 0);
2095 /* Check whether we could at least potentially form an extra loop. */
2096 if (J->exitno == 0 && T->snap[0].nslots == 1 && T->snapmap[0] == 0) {
2097 /* We can narrow a FORL for some side traces, too. */
2098 if (J->pc > J->pt->bc && bc_op(J->pc[-1]) == BC_JFORI &&
2099 bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) {
2100 lj_snap_add(J);
2101 rec_setup_forl(J, J->pc-1);
2102 goto sidecheck;
2103 }
2104 } else {
2105 J->startpc = NULL; /* Prevent forming an extra loop. */
2106 }
2107 rec_setup_side(J, T);
2108 sidecheck:
2109 if (J->trace[J->cur.root]->nchild >= J->param[JIT_P_maxside] ||
2110 T->snap[J->exitno].count >= J->param[JIT_P_hotexit] +
2111 J->param[JIT_P_tryside])
2112 rec_stop(J, TRACE_INTERP);
2113 } else { /* Root trace. */
2114 J->cur.root = 0;
2115 if (J->pc >= J->pt->bc) { /* Not a hot CALL? */
2116 J->cur.startins = *J->pc;
2117 J->pc = rec_setup_root(J);
2118 /* Note: the loop instruction itself is recorded at the end and not
2119 ** at the start! So snapshot #0 needs to point to the *next* instruction.
2120 */
2121 } else {
2122 J->cur.startins = BCINS_ABC(BC_CALL, 0, 0, 0);
2123 }
2124 lj_snap_add(J);
2125 if (bc_op(J->cur.startins) == BC_FORL)
2126 rec_setup_forl(J, J->pc-1);
2127 if (1 + J->pt->framesize >= LJ_MAX_JSLOTS)
2128 lj_trace_err(J, LJ_TRERR_STACKOV);
2129 }
2130}
2131
2132#undef IR
2133#undef emitir_raw
2134#undef emitir
2135
2136#endif
diff --git a/src/lj_record.h b/src/lj_record.h
new file mode 100644
index 00000000..7bb7952c
--- /dev/null
+++ b/src/lj_record.h
@@ -0,0 +1,17 @@
1/*
2** Trace recorder (bytecode -> SSA IR).
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_RECORD_H
7#define _LJ_RECORD_H
8
9#include "lj_obj.h"
10#include "lj_jit.h"
11
12#if LJ_HASJIT
13LJ_FUNC void lj_record_ins(jit_State *J);
14LJ_FUNC void lj_record_setup(jit_State *J);
15#endif
16
17#endif
diff --git a/src/lj_snap.c b/src/lj_snap.c
new file mode 100644
index 00000000..09cd095c
--- /dev/null
+++ b/src/lj_snap.c
@@ -0,0 +1,286 @@
1/*
2** Snapshot handling.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_snap_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASJIT
12
13#include "lj_gc.h"
14#include "lj_state.h"
15#include "lj_frame.h"
16#include "lj_ir.h"
17#include "lj_jit.h"
18#include "lj_iropt.h"
19#include "lj_trace.h"
20#include "lj_snap.h"
21#include "lj_target.h"
22
23/* Some local macros to save typing. Undef'd at the end. */
24#define IR(ref) (&J->cur.ir[(ref)])
25
26/* -- Snapshot generation ------------------------------------------------- */
27
28/* NYI: Snapshots are in need of a redesign. The current storage model for
29** snapshot maps is too wasteful. They could be compressed (1D or 2D) and
30** made more flexible at the same time. Iterators should no longer need to
31** skip unmodified slots. IR_FRAME should be eliminated, too.
32*/
33
34/* Add all modified slots to the snapshot. */
35static void snapshot_slots(jit_State *J, IRRef2 *map, BCReg nslots)
36{
37 BCReg s;
38 for (s = 0; s < nslots; s++) {
39 IRRef ref = tref_ref(J->slot[s]);
40 if (ref) {
41 IRIns *ir = IR(ref);
42 if (ir->o == IR_SLOAD && ir->op1 == s && !(ir->op2 & IRSLOAD_INHERIT))
43 ref = 0;
44 }
45 map[s] = (IRRef2)ref;
46 }
47}
48
49/* Add frame links at the end of the snapshot. */
50static MSize snapshot_framelinks(jit_State *J, IRRef2 *map)
51{
52 cTValue *frame = J->L->base - 1;
53 cTValue *lim = J->L->base - J->baseslot;
54 MSize f = 0;
55 map[f++] = u32ptr(J->pc);
56 while (frame > lim) {
57 if (frame_islua(frame)) {
58 map[f++] = u32ptr(frame_pc(frame));
59 frame = frame_prevl(frame);
60 } else if (frame_ispcall(frame)) {
61 map[f++] = (uint32_t)frame_ftsz(frame);
62 frame = frame_prevd(frame);
63 } else if (frame_iscont(frame)) {
64 map[f++] = (uint32_t)frame_ftsz(frame);
65 map[f++] = u32ptr(frame_contpc(frame));
66 frame = frame_prevd(frame);
67 } else {
68 lua_assert(0);
69 }
70 }
71 return f;
72}
73
74/* Take a snapshot of the current stack. */
75static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
76{
77 BCReg nslots = J->baseslot + J->maxslot;
78 MSize nsm, nframelinks;
79 IRRef2 *p;
80 /* Conservative estimate. Continuation frames need 2 slots. */
81 nsm = nsnapmap + nslots + (uint32_t)J->framedepth*2+1;
82 if (LJ_UNLIKELY(nsm > J->sizesnapmap)) { /* Need to grow snapshot map? */
83 if (nsm < 2*J->sizesnapmap)
84 nsm = 2*J->sizesnapmap;
85 else if (nsm < 64)
86 nsm = 64;
87 J->snapmapbuf = (IRRef2 *)lj_mem_realloc(J->L, J->snapmapbuf,
88 J->sizesnapmap*sizeof(IRRef2), nsm*sizeof(IRRef2));
89 J->cur.snapmap = J->snapmapbuf;
90 J->sizesnapmap = nsm;
91 }
92 p = &J->cur.snapmap[nsnapmap];
93 snapshot_slots(J, p, nslots);
94 nframelinks = snapshot_framelinks(J, p + nslots);
95 J->cur.nsnapmap = (uint16_t)(nsnapmap + nslots + nframelinks);
96 snap->mapofs = (uint16_t)nsnapmap;
97 snap->ref = (IRRef1)J->cur.nins;
98 snap->nslots = (uint8_t)nslots;
99 snap->nframelinks = (uint8_t)nframelinks;
100 snap->count = 0;
101}
102
103/* Add or merge a snapshot. */
104void lj_snap_add(jit_State *J)
105{
106 MSize nsnap = J->cur.nsnap;
107 MSize nsnapmap = J->cur.nsnapmap;
108 /* Merge if no ins. inbetween or if requested and no guard inbetween. */
109 if (J->mergesnap ? !irt_isguard(J->guardemit) :
110 (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) {
111 nsnapmap = J->cur.snap[--nsnap].mapofs;
112 } else {
113 /* Need to grow snapshot buffer? */
114 if (LJ_UNLIKELY(nsnap >= J->sizesnap)) {
115 MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
116 if (nsnap >= maxsnap)
117 lj_trace_err(J, LJ_TRERR_SNAPOV);
118 lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
119 J->cur.snap = J->snapbuf;
120 }
121 J->cur.nsnap = (uint16_t)(nsnap+1);
122 }
123 J->mergesnap = 0;
124 J->guardemit.irt = 0;
125 snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap);
126}
127
128/* Shrink last snapshot. */
129void lj_snap_shrink(jit_State *J)
130{
131 BCReg nslots = J->baseslot + J->maxslot;
132 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
133 IRRef2 *oflinks = &J->cur.snapmap[snap->mapofs + snap->nslots];
134 IRRef2 *nflinks = &J->cur.snapmap[snap->mapofs + nslots];
135 uint32_t s, nframelinks = snap->nframelinks;
136 lua_assert(nslots < snap->nslots);
137 snap->nslots = (uint8_t)nslots;
138 J->cur.nsnapmap = (uint16_t)(snap->mapofs + nslots + nframelinks);
139 for (s = 0; s < nframelinks; s++) /* Move frame links down. */
140 nflinks[s] = oflinks[s];
141}
142
143/* -- Snapshot access ----------------------------------------------------- */
144
145/* Initialize a Bloom Filter with all renamed refs.
146** There are very few renames (often none), so the filter has
147** very few bits set. This makes it suitable for negative filtering.
148*/
149static BloomFilter snap_renamefilter(Trace *T, SnapNo lim)
150{
151 BloomFilter rfilt = 0;
152 IRIns *ir;
153 for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
154 if (ir->op2 <= lim)
155 bloomset(rfilt, ir->op1);
156 return rfilt;
157}
158
159/* Process matching renames to find the original RegSP. */
160static RegSP snap_renameref(Trace *T, SnapNo lim, IRRef ref, RegSP rs)
161{
162 IRIns *ir;
163 for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
164 if (ir->op1 == ref && ir->op2 <= lim)
165 rs = ir->prev;
166 return rs;
167}
168
169/* Convert a snapshot into a linear slot -> RegSP map. */
170void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno)
171{
172 SnapShot *snap = &T->snap[snapno];
173 BCReg s, nslots = snap->nslots;
174 IRRef2 *map = &T->snapmap[snap->mapofs];
175 BloomFilter rfilt = snap_renamefilter(T, snapno);
176 for (s = 0; s < nslots; s++) {
177 IRRef ref = snap_ref(map[s]);
178 if (!irref_isk(ref)) {
179 IRIns *ir = &T->ir[ref];
180 uint32_t rs = ir->prev;
181 if (bloomtest(rfilt, ref))
182 rs = snap_renameref(T, snapno, ref, rs);
183 rsmap[s] = (uint16_t)rs;
184 }
185 }
186}
187
188/* Restore interpreter state from exit state with the help of a snapshot. */
189void lj_snap_restore(jit_State *J, void *exptr)
190{
191 ExitState *ex = (ExitState *)exptr;
192 SnapNo snapno = J->exitno; /* For now, snapno == exitno. */
193 Trace *T = J->trace[J->parent];
194 SnapShot *snap = &T->snap[snapno];
195 BCReg s, nslots = snap->nslots;
196 IRRef2 *map = &T->snapmap[snap->mapofs];
197 IRRef2 *flinks = map + nslots + snap->nframelinks;
198 TValue *o, *newbase, *ntop;
199 BloomFilter rfilt = snap_renamefilter(T, snapno);
200 lua_State *L = J->L;
201
202 /* Make sure the stack is big enough for the slots from the snapshot. */
203 if (L->base + nslots >= L->maxstack) {
204 L->top = curr_topL(L);
205 lj_state_growstack(L, nslots - curr_proto(L)->framesize);
206 }
207
208 /* Fill stack slots with data from the registers and spill slots. */
209 newbase = NULL;
210 ntop = L->base;
211 for (s = 0, o = L->base-1; s < nslots; s++, o++) {
212 IRRef ref = snap_ref(map[s]);
213 if (ref) {
214 IRIns *ir = &T->ir[ref];
215 if (irref_isk(ref)) { /* Restore constant slot. */
216 lj_ir_kvalue(L, o, ir);
217 } else {
218 IRType1 t = ir->t;
219 RegSP rs = ir->prev;
220 if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
221 rs = snap_renameref(T, snapno, ref, rs);
222 if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
223 int32_t *sps = &ex->spill[regsp_spill(rs)];
224 if (irt_isinteger(t)) {
225 setintV(o, *sps);
226 } else if (irt_isnum(t)) {
227 o->u64 = *(uint64_t *)sps;
228 } else {
229 lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
230 setgcrefi(o->gcr, *sps);
231 setitype(o, irt_toitype(t));
232 }
233 } else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */
234 Reg r = regsp_reg(rs);
235 if (irt_isinteger(t)) {
236 setintV(o, ex->gpr[r-RID_MIN_GPR]);
237 } else if (irt_isnum(t)) {
238 setnumV(o, ex->fpr[r-RID_MIN_FPR]);
239 } else {
240 if (!irt_ispri(t))
241 setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
242 setitype(o, irt_toitype(t));
243 }
244 } else { /* Restore frame slot. */
245 lua_assert(ir->o == IR_FRAME);
246 /* This works for both PTR and FUNC IR_FRAME. */
247 setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void));
248 if (s != 0) /* Do not overwrite link to previous frame. */
249 o->fr.tp.ftsz = (int32_t)*--flinks;
250 if (irt_isfunc(ir->t)) {
251 GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr));
252 if (isluafunc(fn)) {
253 TValue *fs;
254 newbase = o+1;
255 fs = newbase + funcproto(fn)->framesize;
256 if (fs > ntop) ntop = fs; /* Update top for newly added frames. */
257 }
258 }
259 }
260 }
261 } else if (newbase) {
262 setnilV(o); /* Clear unreferenced slots of newly added frames. */
263 }
264 }
265 if (newbase) { /* Clear remainder of newly added frames. */
266 L->base = newbase;
267 if (ntop >= L->maxstack) { /* Need to grow the stack again. */
268 MSize need = (MSize)(ntop - o);
269 L->top = o;
270 lj_state_growstack(L, need);
271 o = L->top;
272 ntop = o + need;
273 }
274 L->top = curr_topL(L);
275 for (; o < ntop; o++)
276 setnilV(o);
277 } else { /* Must not clear slots of existing frame. */
278 L->top = curr_topL(L);
279 }
280 lua_assert(map + nslots == flinks-1);
281 J->pc = (const BCIns *)(uintptr_t)(*--flinks);
282}
283
284#undef IR
285
286#endif
diff --git a/src/lj_snap.h b/src/lj_snap.h
new file mode 100644
index 00000000..806047b1
--- /dev/null
+++ b/src/lj_snap.h
@@ -0,0 +1,19 @@
1/*
2** Snapshot handling.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_SNAP_H
7#define _LJ_SNAP_H
8
9#include "lj_obj.h"
10#include "lj_jit.h"
11
12#if LJ_HASJIT
13LJ_FUNC void lj_snap_add(jit_State *J);
14LJ_FUNC void lj_snap_shrink(jit_State *J);
15LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno);
16LJ_FUNC void lj_snap_restore(jit_State *J, void *exptr);
17#endif
18
19#endif
diff --git a/src/lj_state.c b/src/lj_state.c
new file mode 100644
index 00000000..b4bc7a0c
--- /dev/null
+++ b/src/lj_state.c
@@ -0,0 +1,255 @@
1/*
2** State and stack handling.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#define lj_state_c
10#define LUA_CORE
11
12#include "lj_obj.h"
13#include "lj_gc.h"
14#include "lj_err.h"
15#include "lj_str.h"
16#include "lj_tab.h"
17#include "lj_func.h"
18#include "lj_meta.h"
19#include "lj_state.h"
20#include "lj_frame.h"
21#include "lj_trace.h"
22#include "lj_dispatch.h"
23#include "lj_vm.h"
24#include "lj_lex.h"
25#include "lj_alloc.h"
26
27/* -- Stack handling ------------------------------------------------------ */
28
29/* Stack sizes. */
30#define LJ_STACK_MIN LUA_MINSTACK /* Min. stack size. */
31#define LJ_STACK_MAX LUAI_MAXSTACK /* Max. stack size. */
32#define LJ_STACK_START (2*LJ_STACK_MIN) /* Starting stack size. */
33#define LJ_STACK_MAXEX (LJ_STACK_MAX + 1 + LJ_STACK_EXTRA)
34
35/* Explanation of LJ_STACK_EXTRA:
36**
37** Calls to metamethods store their arguments beyond the current top
38** without checking for the stack limit. This avoids stack resizes which
39** would invalidate passed TValue pointers. The stack check is performed
40** later by the call gate. This can safely resize the stack or raise an
41** error. Thus we need some extra slots beyond the current stack limit.
42**
43** Most metamethods need 4 slots above top (cont, mobj, arg1, arg2) plus
44** one extra slot if mobj is not a function. Only lj_meta_tset needs 5
45** slots above top, but then mobj is always a function. So we can get by
46** with 5 extra slots.
47*/
48
49/* Resize stack slots and adjust pointers in state. */
50static void resizestack(lua_State *L, MSize n)
51{
52 TValue *oldst = L->stack;
53 ptrdiff_t delta;
54 MSize realsize = n + 1 + LJ_STACK_EXTRA;
55 GCobj *up;
56 lua_assert((MSize)(L->maxstack-L->stack) == L->stacksize-LJ_STACK_EXTRA-1);
57 lj_mem_reallocvec(L, L->stack, L->stacksize, realsize, TValue);
58 delta = (char *)L->stack - (char *)oldst;
59 L->maxstack = L->stack + n;
60 L->stacksize = realsize;
61 L->base = (TValue *)((char *)L->base + delta);
62 L->top = (TValue *)((char *)L->top + delta);
63 for (up = gcref(L->openupval); up != NULL; up = gcnext(up))
64 gco2uv(up)->v = (TValue *)((char *)gco2uv(up)->v + delta);
65 if (obj2gco(L) == gcref(G(L)->jit_L))
66 setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta);
67}
68
69/* Relimit stack after error, in case the limit was overdrawn. */
70void lj_state_relimitstack(lua_State *L)
71{
72 if (L->stacksize > LJ_STACK_MAXEX && L->top - L->stack < LJ_STACK_MAX-1)
73 resizestack(L, LJ_STACK_MAX);
74}
75
76/* Try to shrink the stack (called from GC). */
77void lj_state_shrinkstack(lua_State *L, MSize used)
78{
79 if (L->stacksize > LJ_STACK_MAXEX)
80 return; /* Avoid stack shrinking while handling stack overflow. */
81 if (4*used < L->stacksize &&
82 2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize &&
83 obj2gco(L) != gcref(G(L)->jit_L)) /* Don't shrink stack of live trace. */
84 resizestack(L, L->stacksize >> 1);
85}
86
87/* Try to grow stack. */
88void lj_state_growstack(lua_State *L, MSize need)
89{
90 if (L->stacksize > LJ_STACK_MAXEX) /* overflow while handling overflow? */
91 lj_err_throw(L, LUA_ERRERR);
92 resizestack(L, L->stacksize + (need > L->stacksize ? need : L->stacksize));
93 if (L->stacksize > LJ_STACK_MAXEX) {
94 if (curr_funcisL(L)) { /* Clear slots of incomplete Lua frame. */
95 TValue *top = curr_topL(L);
96 while (--top >= L->top) setnilV(top);
97 }
98 lj_err_msg(L, LJ_ERR_STKOV); /* ... to allow L->top = curr_topL(L). */
99 }
100}
101
102void lj_state_growstack1(lua_State *L)
103{
104 lj_state_growstack(L, 1);
105}
106
107/* Allocate basic stack for new state. */
108static void stack_init(lua_State *L1, lua_State *L)
109{
110 L1->stack = lj_mem_newvec(L, LJ_STACK_START + LJ_STACK_EXTRA, TValue);
111 L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA;
112 L1->top = L1->stack;
113 L1->maxstack = L1->stack+(L1->stacksize - LJ_STACK_EXTRA)-1;
114 setthreadV(L1, L1->top, L1); /* needed for curr_funcisL() on empty stack */
115 setnilV(L1->top); /* but clear its type */
116 L1->base = ++L1->top;
117}
118
119/* -- State handling ------------------------------------------------------ */
120
121/* Open parts that may cause memory-allocation errors. */
122static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud)
123{
124 global_State *g = G(L);
125 UNUSED(dummy);
126 UNUSED(ud);
127 stack_init(L, L);
128 /* NOBARRIER: State initialization, all objects are white. */
129 setgcref(L->env, obj2gco(lj_tab_new(L, 0, LJ_MIN_GLOBAL)));
130 settabV(L, registry(L), lj_tab_new(L, 0, LJ_MIN_REGISTRY));
131 lj_str_resize(L, LJ_MIN_STRTAB-1);
132 lj_meta_init(L);
133 lj_lex_init(L);
134 fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */
135 g->gc.threshold = 4*g->gc.total;
136 return NULL;
137}
138
139static void close_state(lua_State *L)
140{
141 global_State *g = G(L);
142#ifndef LUAJIT_USE_SYSMALLOC
143 if (g->allocf == lj_alloc_f) {
144 lj_alloc_destroy(g->allocd);
145 } else
146#endif
147 {
148 lj_func_closeuv(L, L->stack);
149 lj_gc_freeall(g);
150 lua_assert(gcref(g->gc.root) == obj2gco(L));
151 lua_assert(g->strnum == 0);
152 lj_trace_freestate(g);
153 lj_mem_freevec(g, g->strhash, g->strmask+1, GCstr *);
154 lj_str_freebuf(g, &g->tmpbuf);
155 lj_mem_freevec(g, L->stack, L->stacksize, TValue);
156 lua_assert(g->gc.total == sizeof(GG_State));
157 g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0);
158 }
159}
160
161LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
162{
163 GG_State *GG = cast(GG_State *, f(ud, NULL, 0, sizeof(GG_State)));
164 lua_State *L = &GG->L;
165 global_State *g = &GG->g;
166 if (GG == NULL) return NULL;
167 memset(GG, 0, sizeof(GG_State));
168 L->gct = ~LJ_TTHREAD;
169 L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */
170 L->dummy_ffid = FF_C;
171 setmref(L->glref, g);
172 g->gc.currentwhite = LJ_GC_WHITE0 | LJ_GC_FIXED;
173 g->allocf = f;
174 g->allocd = ud;
175 setgcref(g->mainthref, obj2gco(L));
176 setgcref(g->uvhead.prev, obj2gco(&g->uvhead));
177 setgcref(g->uvhead.next, obj2gco(&g->uvhead));
178 g->strmask = ~(MSize)0;
179 setnilV(registry(L));
180 setnilV(&g->nilnode.val);
181 setnilV(&g->nilnode.key);
182 lj_str_initbuf(L, &g->tmpbuf);
183 g->gc.state = GCSpause;
184 setgcref(g->gc.root, obj2gco(L));
185 g->gc.sweep = &g->gc.root;
186 g->gc.total = sizeof(GG_State);
187 g->gc.pause = LUAI_GCPAUSE;
188 g->gc.stepmul = LUAI_GCMUL;
189 lj_dispatch_init((GG_State *)L);
190 L->status = LUA_ERRERR+1; /* Avoid touching the stack upon memory error. */
191 if (lj_vm_cpcall(L, cpluaopen, NULL, NULL) != 0) {
192 /* Memory allocation error: free partial state. */
193 close_state(L);
194 return NULL;
195 }
196 L->status = 0;
197 return L;
198}
199
200static TValue *cpfinalize(lua_State *L, lua_CFunction dummy, void *ud)
201{
202 UNUSED(dummy);
203 UNUSED(ud);
204 lj_gc_finalizeudata(L);
205 /* Frame pop omitted. */
206 return NULL;
207}
208
209LUA_API void lua_close(lua_State *L)
210{
211 global_State *g = G(L);
212 L = mainthread(g); /* Only the main thread can be closed. */
213 lj_func_closeuv(L, L->stack);
214 lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */
215#if LJ_HASJIT
216 G2J(g)->flags &= ~JIT_F_ON;
217 G2J(g)->state = LJ_TRACE_IDLE;
218 lj_dispatch_update(g);
219#endif
220 do {
221 hook_enter(g);
222 L->status = 0;
223 L->cframe = NULL;
224 L->base = L->top = L->stack + 1;
225 } while (lj_vm_cpcall(L, cpfinalize, NULL, NULL) != 0);
226 close_state(L);
227}
228
229lua_State *lj_state_new(lua_State *L)
230{
231 lua_State *L1 = lj_mem_newobj(L, lua_State);
232 L1->gct = ~LJ_TTHREAD;
233 L1->dummy_ffid = FF_C;
234 L1->status = 0;
235 L1->stacksize = 0;
236 L1->stack = NULL;
237 L1->cframe = NULL;
238 /* NOBARRIER: The lua_State is new (marked white). */
239 setgcrefnull(L1->openupval);
240 setmrefr(L1->glref, L->glref);
241 setgcrefr(L1->env, L->env);
242 stack_init(L1, L); /* init stack */
243 lua_assert(iswhite(obj2gco(L1)));
244 return L1;
245}
246
247void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L)
248{
249 lua_assert(L != mainthread(g));
250 lj_func_closeuv(L, L->stack);
251 lua_assert(gcref(L->openupval) == NULL);
252 lj_mem_freevec(g, L->stack, L->stacksize, TValue);
253 lj_mem_freet(g, L);
254}
255
diff --git a/src/lj_state.h b/src/lj_state.h
new file mode 100644
index 00000000..54e85405
--- /dev/null
+++ b/src/lj_state.h
@@ -0,0 +1,31 @@
1/*
2** State and stack handling.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_STATE_H
7#define _LJ_STATE_H
8
9#include "lj_obj.h"
10
11#define incr_top(L) \
12 (++L->top >= L->maxstack && (lj_state_growstack1(L), 0))
13
14#define savestack(L, p) ((char *)(p) - (char *)L->stack)
15#define restorestack(L, n) ((TValue *)((char *)L->stack + (n)))
16
17LJ_FUNC void lj_state_relimitstack(lua_State *L);
18LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used);
19LJ_FUNCA void lj_state_growstack(lua_State *L, MSize need);
20LJ_FUNCA void lj_state_growstack1(lua_State *L);
21
22static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need)
23{
24 if ((MSize)((char *)L->maxstack-(char *)L->top) <= need*(MSize)sizeof(TValue))
25 lj_state_growstack(L, need);
26}
27
28LJ_FUNC lua_State *lj_state_new(lua_State *L);
29LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L);
30
31#endif
diff --git a/src/lj_str.c b/src/lj_str.c
new file mode 100644
index 00000000..26f91cba
--- /dev/null
+++ b/src/lj_str.c
@@ -0,0 +1,301 @@
1/*
2** String handling.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#include <stdio.h>
10
11#define lj_str_c
12#define LUA_CORE
13
14#include "lj_obj.h"
15#include "lj_gc.h"
16#include "lj_err.h"
17#include "lj_str.h"
18#include "lj_state.h"
19#include "lj_ctype.h"
20
21/* -- String interning ---------------------------------------------------- */
22
23/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
24int32_t lj_str_cmp(GCstr *a, GCstr *b)
25{
26 MSize i, n = a->len > b->len ? b->len : a->len;
27 for (i = 0; i < n; i += 4) {
28 /* Note: innocuous access up to end of string + 3. */
29 uint32_t va = *(const uint32_t *)(strdata(a)+i);
30 uint32_t vb = *(const uint32_t *)(strdata(b)+i);
31 if (va != vb) {
32#if LJ_ARCH_ENDIAN == LUAJIT_LE
33 va = lj_bswap(va); vb = lj_bswap(vb);
34#endif
35 i -= n;
36 if ((int32_t)i >= -3) {
37 va >>= 32+(i<<3); vb >>= 32+(i<<3);
38 if (va == vb) break;
39 }
40 return (int32_t)(va - vb);
41 }
42 }
43 return (int32_t)(a->len - b->len);
44}
45
46/* Resize the string hash table (grow and shrink). */
47void lj_str_resize(lua_State *L, MSize newmask)
48{
49 global_State *g = G(L);
50 GCRef *newhash;
51 MSize i;
52 if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
53 return; /* No resizing during GC traversal or if already too big. */
54 newhash = lj_mem_newvec(L, newmask+1, GCRef);
55 memset(newhash, 0, (newmask+1)*sizeof(GCRef));
56 for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */
57 GCobj *p = gcref(g->strhash[i]);
58 while (p) { /* Follow each hash chain and reinsert all strings. */
59 MSize h = gco2str(p)->hash & newmask;
60 GCobj *next = gcnext(p);
61 /* NOBARRIER: The string table is a GC root. */
62 setgcrefr(p->gch.nextgc, newhash[h]);
63 setgcref(newhash[h], p);
64 p = next;
65 }
66 }
67 lj_mem_freevec(g, g->strhash, g->strmask+1, GCstr *);
68 g->strmask = newmask;
69 g->strhash = newhash;
70}
71
72/* Intern a string and return string object. */
73GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
74{
75 global_State *g;
76 GCstr *s;
77 GCobj *o;
78 MSize len = (MSize)lenx;
79 MSize h = len;
80 MSize step = (len>>5)+1; /* Partial hash. */
81 MSize l1;
82 if (lenx >= LJ_MAX_STR)
83 lj_err_msg(L, LJ_ERR_STROV);
84 for (l1 = len; l1 >= step; l1 -= step) /* Compute string hash. */
85 h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1]));
86 /* Check if the string has already been interned. */
87 g = G(L);
88 for (o = gcref(g->strhash[h & g->strmask]); o != NULL; o = gcnext(o)) {
89 GCstr *tso = gco2str(o);
90 if (tso->len == len && (memcmp(str, strdata(tso), len) == 0)) {
91 if (isdead(g, o)) flipwhite(o); /* Resurrect if dead. */
92 return tso; /* Return existing string. */
93 }
94 }
95 /* Nope, create a new string. */
96 s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
97 newwhite(g, s);
98 s->gct = ~LJ_TSTR;
99 s->len = len;
100 s->hash = h;
101 s->reserved = 0;
102 memcpy(strdatawr(s), str, len);
103 strdatawr(s)[len] = '\0'; /* Zero-terminate string. */
104 /* Add it to string hash table. */
105 h &= g->strmask;
106 s->nextgc = g->strhash[h];
107 /* NOBARRIER: The string table is a GC root. */
108 setgcref(g->strhash[h], obj2gco(s));
109 if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */
110 lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */
111 return s; /* Return newly interned string. */
112}
113
114void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
115{
116 g->strnum--;
117 lj_mem_free(g, s, sizestring(s));
118}
119
120/* -- Type conversions ---------------------------------------------------- */
121
122/* Convert string to number. */
123int lj_str_numconv(const char *s, TValue *n)
124{
125 lua_Number sign = 1;
126 const uint8_t *p = (const uint8_t *)s;
127 while (lj_ctype_isspace(*p)) p++;
128 if (*p == '-') { p++; sign = -1; } else if (*p == '+') { p++; }
129 if ((uint32_t)(*p - '0') < 10) {
130 uint32_t k = (uint32_t)(*p++ - '0');
131 if (k == 0 && ((*p & ~0x20) == 'X')) {
132 p++;
133 while (lj_ctype_isxdigit(*p)) {
134 if (k >= 0x10000000) goto parsedbl;
135 k = (k << 4) + (*p & 15u);
136 if (!lj_ctype_isdigit(*p)) k += 9;
137 p++;
138 }
139 } else {
140 while ((uint32_t)(*p - '0') < 10) {
141 if (k >= 0x19999999) goto parsedbl;
142 k = k * 10u + (uint32_t)(*p++ - '0');
143 }
144 }
145 while (LJ_UNLIKELY(lj_ctype_isspace(*p))) p++;
146 if (LJ_LIKELY(*p == '\0')) {
147 setnumV(n, sign * cast_num(k));
148 return 1;
149 }
150 }
151parsedbl:
152 {
153 TValue tv;
154 char *endptr;
155 setnumV(&tv, lua_str2number(s, &endptr));
156 if (endptr == s) return 0; /* conversion failed */
157 if (LJ_UNLIKELY(*endptr != '\0')) {
158 while (lj_ctype_isspace((uint8_t)*endptr)) endptr++;
159 if (*endptr != '\0') return 0; /* invalid trailing characters? */
160 }
161 if (LJ_LIKELY(!tvisnan(&tv)))
162 setnumV(n, numV(&tv));
163 else
164 setnanV(n); /* Canonicalize injected NaNs. */
165 return 1;
166 }
167}
168
169/* Convert number to string. */
170GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np)
171{
172 char s[LUAI_MAXNUMBER2STR];
173 lua_Number n = *np;
174 size_t len = (size_t)lua_number2str(s, n);
175 return lj_str_new(L, s, len);
176}
177
178/* Convert integer to string. */
179GCstr *lj_str_fromint(lua_State *L, int32_t k)
180{
181 char s[1+10];
182 char *p = s+sizeof(s);
183 uint32_t i = (uint32_t)(k < 0 ? -k : k);
184 do { *--p = (char)('0' + i % 10); } while (i /= 10);
185 if (k < 0) *--p = '-';
186 return lj_str_new(L, p, (size_t)(s+sizeof(s)-p));
187}
188
189/* -- String formatting --------------------------------------------------- */
190
191static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len)
192{
193 char *p;
194 MSize i;
195 if (sb->n + len > sb->sz) {
196 MSize sz = sb->sz * 2;
197 while (sb->n + len > sz) sz = sz * 2;
198 lj_str_resizebuf(L, sb, sz);
199 }
200 p = sb->buf + sb->n;
201 sb->n += len;
202 for (i = 0; i < len; i++) p[i] = str[i];
203}
204
205static void addchar(lua_State *L, SBuf *sb, int c)
206{
207 if (sb->n + 1 > sb->sz) {
208 MSize sz = sb->sz * 2;
209 lj_str_resizebuf(L, sb, sz);
210 }
211 sb->buf[sb->n++] = cast(char, c);
212}
213
214/* Push formatted message as a string object to Lua stack. va_list variant. */
215const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp)
216{
217 SBuf *sb = &G(L)->tmpbuf;
218 lj_str_needbuf(L, sb, (MSize)strlen(fmt));
219 lj_str_resetbuf(sb);
220 for (;;) {
221 const char *e = strchr(fmt, '%');
222 if (e == NULL) break;
223 addstr(L, sb, fmt, (MSize)(e-fmt));
224 /* This function only handles %s, %c, %d, %f and %p formats. */
225 switch (e[1]) {
226 case 's': {
227 const char *s = va_arg(argp, char *);
228 if (s == NULL) s = "(null)";
229 addstr(L, sb, s, (MSize)strlen(s));
230 break;
231 }
232 case 'c':
233 addchar(L, sb, va_arg(argp, int));
234 break;
235 case 'd': {
236 char buff[1+10];
237 char *p = buff+sizeof(buff);
238 int32_t k = va_arg(argp, int32_t);
239 uint32_t i = (uint32_t)(k < 0 ? -k : k);
240 do { *--p = (char)('0' + i % 10); } while (i /= 10);
241 if (k < 0) *--p = '-';
242 addstr(L, sb, p, (MSize)(buff+sizeof(buff)-p));
243 break;
244 }
245 case 'f': {
246 char buff[LUAI_MAXNUMBER2STR];
247 lua_Number n = cast_num(va_arg(argp, LUAI_UACNUMBER));
248 MSize len = (MSize)lua_number2str(buff, n);
249 addstr(L, sb, buff, len);
250 break;
251 }
252 case 'p': {
253#define FMTP_CHARS (2*sizeof(ptrdiff_t))
254 char buff[2+FMTP_CHARS];
255 ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *));
256 int i;
257 buff[0] = '0';
258 buff[1] = 'x';
259 for (i = 2+FMTP_CHARS-1; i >= 2; i--, p >>= 4)
260 buff[i] = "0123456789abcdef"[(p & 15)];
261 addstr(L, sb, buff, 2+FMTP_CHARS);
262 break;
263 }
264 case '%':
265 addchar(L, sb, '%');
266 break;
267 default:
268 addchar(L, sb, '%');
269 addchar(L, sb, e[1]);
270 break;
271 }
272 fmt = e+2;
273 }
274 addstr(L, sb, fmt, (MSize)strlen(fmt));
275 setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n));
276 incr_top(L);
277 return strVdata(L->top - 1);
278}
279
280/* Push formatted message as a string object to Lua stack. Vararg variant. */
281const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
282{
283 const char *msg;
284 va_list argp;
285 va_start(argp, fmt);
286 msg = lj_str_pushvf(L, fmt, argp);
287 va_end(argp);
288 return msg;
289}
290
291/* -- Buffer handling ----------------------------------------------------- */
292
293char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz)
294{
295 if (sz > sb->sz) {
296 if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF;
297 lj_str_resizebuf(L, sb, sz);
298 }
299 return sb->buf;
300}
301
diff --git a/src/lj_str.h b/src/lj_str.h
new file mode 100644
index 00000000..f7e56d16
--- /dev/null
+++ b/src/lj_str.h
@@ -0,0 +1,45 @@
1/*
2** String handling.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_STR_H
7#define _LJ_STR_H
8
9#include <stdarg.h>
10
11#include "lj_obj.h"
12
13/* String interning. */
14LJ_FUNCA int32_t lj_str_cmp(GCstr *a, GCstr *b);
15LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
16LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
17LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
18
19#define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s)))
20#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
21
22/* Type conversions. */
23LJ_FUNCA int lj_str_numconv(const char *s, TValue *n);
24LJ_FUNCA GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np);
25LJ_FUNCA GCstr *lj_str_fromint(lua_State *L, int32_t k);
26
27/* String formatting. */
28LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp);
29LJ_FUNC const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
30#if defined(__GNUC__)
31 __attribute__ ((format (printf, 2, 3)))
32#endif
33 ;
34
35/* Resizable string buffers. Struct definition in lj_obj.h. */
36LJ_FUNC char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz);
37
38#define lj_str_initbuf(L, sb) ((sb)->buf = NULL, (sb)->sz = 0)
39#define lj_str_resetbuf(sb) ((sb)->n = 0)
40#define lj_str_resizebuf(L, sb, size) \
41 ((sb)->buf = (char *)lj_mem_realloc(L, (sb)->buf, (sb)->sz, (size)), \
42 (sb)->sz = (size))
43#define lj_str_freebuf(g, sb) lj_mem_free(g, (void *)(sb)->buf, (sb)->sz)
44
45#endif
diff --git a/src/lj_tab.c b/src/lj_tab.c
new file mode 100644
index 00000000..633ea20c
--- /dev/null
+++ b/src/lj_tab.c
@@ -0,0 +1,618 @@
1/*
2** Table handling.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Major portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#define lj_tab_c
10#define LUA_CORE
11
12#include "lj_obj.h"
13#include "lj_gc.h"
14#include "lj_err.h"
15#include "lj_tab.h"
16
17/* -- Object hashing ------------------------------------------------------ */
18
19/* Hash values are masked with the table hash mask and used as an index. */
20#define hashmask(t, x) (&noderef(t->node)[(x) & t->hmask])
21
22/* String hashes are precomputed when they are interned. */
23#define hashstr(t, s) hashmask(t, (s)->hash)
24
25#define hashnum(t, o) hashrot(t, (o)->u32.lo, (o)->u32.hi&0x7fffffff)
26#define hashgcref(t, r) hashrot(t, gcrefu(r), gcrefu(r)-0x04c11db7)
27
28/* Scramble the bits of numbers and pointers. */
29static LJ_AINLINE Node *hashrot(const GCtab *t, uint32_t lo, uint32_t hi)
30{
31 lo ^= hi; hi = lj_rol(hi, 14);
32 lo -= hi; hi = lj_rol(hi, 5);
33 hi ^= lo; hi -= lj_rol(lo, 27);
34 return hashmask(t, hi);
35}
36
37/* Hash an arbitrary key and return its anchor position in the hash table. */
38static Node *hashkey(const GCtab *t, cTValue *key)
39{
40 if (tvisstr(key))
41 return hashstr(t, strV(key));
42 else if (tvisnum(key))
43 return hashnum(t, key);
44 else if (tvisbool(key))
45 return hashmask(t, boolV(key));
46 else
47 return hashgcref(t, key->gcr);
48 /* Only hash 32 bits of lightuserdata on a 64 bit CPU. Good enough? */
49}
50
51/* -- Table creation and destruction -------------------------------------- */
52
53/* Create new hash part for table. */
54static LJ_AINLINE void newhpart(lua_State *L, GCtab *t, uint32_t hbits)
55{
56 uint32_t hsize;
57 Node *node;
58 lua_assert(hbits != 0);
59 if (hbits > LJ_MAX_HBITS)
60 lj_err_msg(L, LJ_ERR_TABOV);
61 hsize = 1u << hbits;
62 node = lj_mem_newvec(L, hsize, Node);
63 setmref(t->node, node);
64 t->hmask = hsize-1;
65 setmref(t->lastfree, &node[hsize]);
66}
67
68/*
69** Q: Why all of these copies of t->hmask, t->node etc. to local variables?
70** A: Because alias analysis for C is _really_ tough.
71** Even state-of-the-art C compilers won't produce good code without this.
72*/
73
74/* Clear hash part of table. */
75static LJ_AINLINE void clearhpart(GCtab *t)
76{
77 uint32_t i, hmask = t->hmask;
78 Node *node = noderef(t->node);
79 lua_assert(t->hmask != 0);
80 for (i = 0; i <= hmask; i++) {
81 Node *n = &node[i];
82 setmref(n->next, NULL);
83 setnilV(&n->key);
84 setnilV(&n->val);
85 }
86}
87
88/* Clear array part of table. */
89static LJ_AINLINE void clearapart(GCtab *t)
90{
91 uint32_t i, asize = t->asize;
92 TValue *array = tvref(t->array);
93 for (i = 0; i < asize; i++)
94 setnilV(&array[i]);
95}
96
97/* Create a new table. Note: the slots are not initialized (yet). */
98static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
99{
100 GCtab *t;
101 global_State *g;
102 /* First try to colocate the array part. */
103 if (LJ_MAX_COLOSIZE && asize > 0 && asize <= LJ_MAX_COLOSIZE) {
104 /* This is ugly. (sizeof(GCtab)&7) != 0. So prepend the colocated array. */
105 TValue *array = lj_mem_newt(L, sizetabcolo(asize), TValue);
106 t = cast(GCtab *, array + asize);
107 g = G(L);
108 setgcrefr(t->nextgc, g->gc.root);
109 setgcref(g->gc.root, obj2gco(t));
110 newwhite(g, t);
111 t->gct = ~LJ_TTAB;
112 t->nomm = cast_byte(~0);
113 t->colo = (int8_t)asize;
114 setmref(t->array, array);
115 setgcrefnull(t->metatable);
116 t->asize = asize;
117 t->hmask = 0;
118 setmref(t->node, &g->nilnode);
119 setmref(t->lastfree, &g->nilnode);
120 } else { /* Otherwise separately allocate the array part. */
121 t = lj_mem_newobj(L, GCtab);
122 t->gct = ~LJ_TTAB;
123 t->nomm = cast_byte(~0);
124 t->colo = 0;
125 setmref(t->array, NULL);
126 setgcrefnull(t->metatable);
127 t->asize = 0; /* In case the array allocation fails. */
128 t->hmask = 0;
129 g = G(L);
130 setmref(t->node, &g->nilnode);
131 setmref(t->lastfree, &g->nilnode);
132 if (asize > 0) {
133 if (asize > LJ_MAX_ASIZE)
134 lj_err_msg(L, LJ_ERR_TABOV);
135 setmref(t->array, lj_mem_newvec(L, asize, TValue));
136 t->asize = asize;
137 }
138 }
139 if (hbits)
140 newhpart(L, t, hbits);
141 return t;
142}
143
144/* Create a new table.
145**
146** IMPORTANT NOTE: The API differs from lua_createtable()!
147**
148** The array size is non-inclusive. E.g. asize=128 creates array slots
149** for 0..127, but not for 128. If you need slots 1..128, pass asize=129
150** (slot 0 is wasted in this case).
151**
152** The hash size is given in hash bits. hbits=0 means no hash part.
153** hbits=1 creates 2 hash slots, hbits=2 creates 4 hash slots and so on.
154*/
155GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits)
156{
157 GCtab *t = newtab(L, asize, hbits);
158 clearapart(t);
159 if (t->hmask > 0) clearhpart(t);
160 return t;
161}
162
163/* Duplicate a table. */
164GCtab *lj_tab_dup(lua_State *L, const GCtab *kt)
165{
166 GCtab *t;
167 uint32_t asize, hmask;
168 t = newtab(L, kt->asize, kt->hmask > 0 ? lj_fls(kt->hmask)+1 : 0);
169 lua_assert(kt->asize == t->asize && kt->hmask == t->hmask);
170 t->nomm = 0; /* Keys with metamethod names may be present. */
171 asize = kt->asize;
172 if (asize > 0) {
173 TValue *array = tvref(t->array);
174 TValue *karray = tvref(kt->array);
175 if (asize < 64) { /* An inlined loop beats memcpy for < 512 bytes. */
176 uint32_t i;
177 for (i = 0; i < asize; i++)
178 copyTV(L, &array[i], &karray[i]);
179 } else {
180 memcpy(array, karray, asize*sizeof(TValue));
181 }
182 }
183 hmask = kt->hmask;
184 if (hmask > 0) {
185 uint32_t i;
186 Node *node = noderef(t->node);
187 Node *knode = noderef(kt->node);
188 ptrdiff_t d = (char *)node - (char *)knode;
189 setmref(t->lastfree, (Node *)((char *)noderef(kt->lastfree) + d));
190 for (i = 0; i <= hmask; i++) {
191 Node *kn = &knode[i];
192 Node *n = &node[i];
193 Node *next = nextnode(kn);
194 copyTV(L, &n->val, &kn->val);
195 copyTV(L, &n->key, &kn->key);
196 setmref(n->next, next == NULL? next : (Node *)((char *)next + d));
197 }
198 }
199 return t;
200}
201
202/* Free a table. */
203void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t)
204{
205 if (t->hmask > 0)
206 lj_mem_freevec(g, noderef(t->node), t->hmask+1, Node);
207 if (LJ_MAX_COLOSIZE && t->colo) {
208 ptrdiff_t n;
209 if (t->colo < 0 && t->asize > 0) /* Array part was separated. */
210 lj_mem_freevec(g, tvref(t->array), t->asize, TValue);
211 n = t->colo & 0x7f;
212 lj_mem_free(g, (TValue *)t - n, sizetabcolo((uint32_t)n));
213 } else {
214 if (t->asize > 0)
215 lj_mem_freevec(g, tvref(t->array), t->asize, TValue);
216 lj_mem_freet(g, t);
217 }
218}
219
220/* -- Table resizing ------------------------------------------------------ */
221
222/* Resize a table to fit the new array/hash part sizes. */
223static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits)
224{
225 Node *oldnode = noderef(t->node);
226 uint32_t oldasize = t->asize;
227 uint32_t oldhmask = t->hmask;
228 if (asize > oldasize) { /* Array part grows? */
229 TValue *array;
230 uint32_t i;
231 if (asize > LJ_MAX_ASIZE)
232 lj_err_msg(L, LJ_ERR_TABOV);
233 if (LJ_MAX_COLOSIZE && t->colo > 0) {
234 /* A colocated array must be separated and copied. */
235 TValue *oarray = tvref(t->array);
236 array = lj_mem_newvec(L, asize, TValue);
237 t->colo = (int8_t)(t->colo | 0x80); /* Mark as separated (colo < 0). */
238 for (i = 0; i < oldasize; i++)
239 copyTV(L, &array[i], &oarray[i]);
240 } else {
241 array = (TValue *)lj_mem_realloc(L, tvref(t->array),
242 oldasize*sizeof(TValue), asize*sizeof(TValue));
243 }
244 setmref(t->array, array);
245 t->asize = asize;
246 for (i = oldasize; i < asize; i++) /* Clear newly allocated slots. */
247 setnilV(&array[i]);
248 }
249 /* Create new (empty) hash part. */
250 if (hbits) {
251 newhpart(L, t, hbits);
252 clearhpart(t);
253 } else {
254 global_State *g = G(L);
255 setmref(t->node, &g->nilnode);
256 setmref(t->lastfree, &g->nilnode);
257 t->hmask = 0;
258 }
259 if (asize < oldasize) { /* Array part shrinks? */
260 TValue *array = tvref(t->array);
261 uint32_t i;
262 t->asize = asize; /* Note: This 'shrinks' even colocated arrays. */
263 for (i = asize; i < oldasize; i++) /* Reinsert old array values. */
264 if (!tvisnil(&array[i]))
265 copyTV(L, lj_tab_setinth(L, t, (int32_t)i), &array[i]);
266 /* Physically shrink only separated arrays. */
267 if (LJ_MAX_COLOSIZE && t->colo <= 0)
268 setmref(t->array, lj_mem_realloc(L, array,
269 oldasize*sizeof(TValue), asize*sizeof(TValue)));
270 }
271 if (oldhmask > 0) { /* Reinsert pairs from old hash part. */
272 global_State *g;
273 uint32_t i;
274 for (i = 0; i <= oldhmask; i++) {
275 Node *n = &oldnode[i];
276 if (!tvisnil(&n->val))
277 copyTV(L, lj_tab_set(L, t, &n->key), &n->val);
278 }
279 g = G(L);
280 lj_mem_freevec(g, oldnode, oldhmask+1, Node);
281 }
282}
283
284static uint32_t countint(cTValue *key, uint32_t *bins)
285{
286 if (tvisnum(key)) {
287 lua_Number nk = numV(key);
288 int32_t k = lj_num2int(nk);
289 if ((uint32_t)k < LJ_MAX_ASIZE && nk == cast_num(k)) {
290 bins[(k > 2 ? lj_fls((uint32_t)(k-1)) : 0)]++;
291 return 1;
292 }
293 }
294 return 0;
295}
296
297static uint32_t countarray(const GCtab *t, uint32_t *bins)
298{
299 uint32_t na, b, i;
300 if (t->asize == 0) return 0;
301 for (na = i = b = 0; b < LJ_MAX_ABITS; b++) {
302 uint32_t n, top = 2u << b;
303 TValue *array;
304 if (top >= t->asize) {
305 top = t->asize-1;
306 if (i > top)
307 break;
308 }
309 array = tvref(t->array);
310 for (n = 0; i <= top; i++)
311 if (!tvisnil(&array[i]))
312 n++;
313 bins[b] += n;
314 na += n;
315 }
316 return na;
317}
318
319static uint32_t counthash(const GCtab *t, uint32_t *bins, uint32_t *narray)
320{
321 uint32_t total, na, i, hmask = t->hmask;
322 Node *node = noderef(t->node);
323 for (total = na = 0, i = 0; i <= hmask; i++) {
324 Node *n = &node[i];
325 if (!tvisnil(&n->val)) {
326 na += countint(&n->key, bins);
327 total++;
328 }
329 }
330 *narray += na;
331 return total;
332}
333
334static uint32_t bestasize(uint32_t bins[], uint32_t *narray)
335{
336 uint32_t b, sum, na = 0, sz = 0, nn = *narray;
337 for (b = 0, sum = 0; (1u<<b) <= nn && sum != nn; b++)
338 if (bins[b] > 0 && (sum += bins[b]) >= (1u<<b)) {
339 sz = (2u<<b)+1;
340 na = sum;
341 }
342 *narray = sz;
343 return na;
344}
345
346static void rehashtab(lua_State *L, GCtab *t, cTValue *ek)
347{
348 uint32_t bins[LJ_MAX_ABITS];
349 uint32_t total, asize, na, i;
350 for (i = 0; i < LJ_MAX_ABITS; i++) bins[i] = 0;
351 asize = countarray(t, bins);
352 total = 1 + asize + counthash(t, bins, &asize);
353 asize += countint(ek, bins);
354 na = bestasize(bins, &asize);
355 total -= na;
356 resizetab(L, t, asize, hsize2hbits(total));
357}
358
359void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize)
360{
361 resizetab(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0);
362}
363
364/* -- Table getters ------------------------------------------------------- */
365
366cTValue *lj_tab_getinth(GCtab *t, int32_t key)
367{
368 TValue k;
369 Node *n;
370 k.n = cast_num(key);
371 n = hashnum(t, &k);
372 do {
373 if (tvisnum(&n->key) && n->key.n == k.n)
374 return &n->val;
375 } while ((n = nextnode(n)));
376 return NULL;
377}
378
379cTValue *lj_tab_getstr(GCtab *t, GCstr *key)
380{
381 Node *n = hashstr(t, key);
382 do {
383 if (tvisstr(&n->key) && strV(&n->key) == key)
384 return &n->val;
385 } while ((n = nextnode(n)));
386 return NULL;
387}
388
389cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key)
390{
391 if (tvisstr(key)) {
392 cTValue *tv = lj_tab_getstr(t, strV(key));
393 if (tv)
394 return tv;
395 } else if (tvisnum(key)) {
396 lua_Number nk = numV(key);
397 int32_t k = lj_num2int(nk);
398 if (nk == cast_num(k)) {
399 cTValue *tv = lj_tab_getint(t, k);
400 if (tv)
401 return tv;
402 } else {
403 goto genlookup; /* Else use the generic lookup. */
404 }
405 } else if (!tvisnil(key)) {
406 Node *n;
407 genlookup:
408 n = hashkey(t, key);
409 do {
410 if (lj_obj_equal(&n->key, key))
411 return &n->val;
412 } while ((n = nextnode(n)));
413 }
414 return niltv(L);
415}
416
417/* -- Table setters ------------------------------------------------------- */
418
419static Node *getfreepos(GCtab *t)
420{
421 Node *node = noderef(t->node);
422 Node *lastfree = noderef(t->lastfree);
423 while (lastfree > node) {
424 lastfree--;
425 setmref(t->lastfree, lastfree);
426 if (tvisnil(&lastfree->key))
427 return lastfree;
428 }
429 return NULL; /* could not find a free place */
430}
431
432/*
433** inserts a new key into a hash table; first, check whether key's main
434** position is free. If not, check whether colliding node is in its main
435** position or not: if it is not, move colliding node to an empty place and
436** put new key in its main position; otherwise (colliding node is in its main
437** position), new key goes to an empty position.
438*/
439TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
440{
441 Node *mp = hashkey(t, key);
442 if (!tvisnil(&mp->val) || t->hmask == 0) {
443 Node *othern;
444 Node *n = getfreepos(t); /* get a free place */
445 if (n == NULL) { /* cannot find a free place? */
446 rehashtab(L, t, key); /* grow table */
447 return lj_tab_set(L, t, key); /* re-insert key into grown table */
448 }
449 lua_assert(n != &G(L)->nilnode);
450 othern = hashkey(t, &mp->key);
451 if (othern != mp) { /* is colliding node out of its main position? */
452 /* yes; move colliding node into free position */
453 while (noderef(othern->next) != mp)
454 othern = nextnode(othern); /* find previous */
455 setmref(othern->next, n); /* redo the chain with `n' in place of `mp' */
456 *n = *mp; /* copy colliding node into free pos. (mp->next also goes) */
457 setmref(mp->next, NULL); /* now `mp' is free */
458 setnilV(&mp->val);
459 } else { /* colliding node is in its own main position */
460 /* new node will go into free position */
461 setmrefr(n->next, mp->next); /* chain new position */
462 setmref(mp->next, n);
463 mp = n;
464 }
465 }
466 mp->key.u64 = key->u64;
467 if (LJ_UNLIKELY(tvismzero(&mp->key)))
468 mp->key.u64 = 0;
469 lj_gc_barriert(L, t, key);
470 lua_assert(tvisnil(&mp->val));
471 return &mp->val;
472}
473
474TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key)
475{
476 TValue k;
477 Node *n;
478 k.n = cast_num(key);
479 n = hashnum(t, &k);
480 do {
481 if (tvisnum(&n->key) && n->key.n == k.n)
482 return &n->val;
483 } while ((n = nextnode(n)));
484 return lj_tab_newkey(L, t, &k);
485}
486
487TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key)
488{
489 TValue k;
490 Node *n = hashstr(t, key);
491 do {
492 if (tvisstr(&n->key) && strV(&n->key) == key)
493 return &n->val;
494 } while ((n = nextnode(n)));
495 setstrV(L, &k, key);
496 return lj_tab_newkey(L, t, &k);
497}
498
499TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key)
500{
501 Node *n;
502 t->nomm = 0; /* Invalidate negative metamethod cache. */
503 if (tvisstr(key)) {
504 return lj_tab_setstr(L, t, strV(key));
505 } else if (tvisnum(key)) {
506 lua_Number nk = numV(key);
507 int32_t k = lj_num2int(nk);
508 if (nk == cast_num(k))
509 return lj_tab_setint(L, t, k);
510 if (tvisnan(key))
511 lj_err_msg(L, LJ_ERR_NANIDX);
512 /* Else use the generic lookup. */
513 } else if (tvisnil(key)) {
514 lj_err_msg(L, LJ_ERR_NILIDX);
515 }
516 n = hashkey(t, key);
517 do {
518 if (lj_obj_equal(&n->key, key))
519 return &n->val;
520 } while ((n = nextnode(n)));
521 return lj_tab_newkey(L, t, key);
522}
523
524/* -- Table traversal ----------------------------------------------------- */
525
526/* Get the traversal index of a key. */
527static uint32_t keyindex(lua_State *L, GCtab *t, cTValue *key)
528{
529 if (tvisnum(key)) {
530 lua_Number nk = numV(key);
531 int32_t k = lj_num2int(nk);
532 if ((uint32_t)k < t->asize && nk == cast_num(k))
533 return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */
534 }
535 if (!tvisnil(key)) {
536 Node *n = hashkey(t, key);
537 do {
538 if (lj_obj_equal(&n->key, key) ||
539 (itype(&n->key) == LJ_TDEADKEY && tvisgcv(key) &&
540 gcV(&n->key) == gcV(key)))
541 return t->asize + (uint32_t)(n - noderef(t->node));
542 /* Hash key indexes: [t->asize..t->asize+t->nmask] */
543 } while ((n = nextnode(n)));
544 lj_err_msg(L, LJ_ERR_NEXTIDX);
545 return 0; /* unreachable */
546 }
547 return ~0u; /* A nil key starts the traversal. */
548}
549
550/* Advance to the next step in a table traversal. */
551int lj_tab_next(lua_State *L, GCtab *t, TValue *key)
552{
553 uint32_t i = keyindex(L, t, key); /* Find predecessor key index. */
554 for (i++; i < t->asize; i++) /* First traverse the array keys. */
555 if (!tvisnil(arrayslot(t, i))) {
556 setintV(key, i);
557 copyTV(L, key+1, arrayslot(t, i));
558 return 1;
559 }
560 for (i -= t->asize; i <= t->hmask; i++) { /* Then traverse the hash keys. */
561 Node *n = &noderef(t->node)[i];
562 if (!tvisnil(&n->val)) {
563 copyTV(L, key, &n->key);
564 copyTV(L, key+1, &n->val);
565 return 1;
566 }
567 }
568 return 0; /* End of traversal. */
569}
570
571/* -- Table length calculation -------------------------------------------- */
572
573static MSize unbound_search(GCtab *t, MSize j)
574{
575 cTValue *tv;
576 MSize i = j; /* i is zero or a present index */
577 j++;
578 /* find `i' and `j' such that i is present and j is not */
579 while ((tv = lj_tab_getint(t, cast(int32_t, j))) && !tvisnil(tv)) {
580 i = j;
581 j *= 2;
582 if (j > (MSize)(INT_MAX-2)) { /* overflow? */
583 /* table was built with bad purposes: resort to linear search */
584 i = 1;
585 while ((tv = lj_tab_getint(t, cast(int32_t, i))) && !tvisnil(tv)) i++;
586 return i - 1;
587 }
588 }
589 /* now do a binary search between them */
590 while (j - i > 1) {
591 MSize m = (i+j)/2;
592 cTValue *tvb = lj_tab_getint(t, cast(int32_t, m));
593 if (tvb && !tvisnil(tvb)) i = m; else j = m;
594 }
595 return i;
596}
597
598/*
599** Try to find a boundary in table `t'. A `boundary' is an integer index
600** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil).
601*/
602MSize lj_tab_len(GCtab *t)
603{
604 MSize j = (MSize)t->asize;
605 if (j > 1 && tvisnil(arrayslot(t, j-1))) {
606 MSize i = 1;
607 while (j - i > 1) {
608 MSize m = (i+j)/2;
609 if (tvisnil(arrayslot(t, m-1))) j = m; else i = m;
610 }
611 return i-1;
612 }
613 if (j) j--;
614 if (t->hmask <= 0)
615 return j;
616 return unbound_search(t, j);
617}
618
diff --git a/src/lj_tab.h b/src/lj_tab.h
new file mode 100644
index 00000000..e9e8bcd1
--- /dev/null
+++ b/src/lj_tab.h
@@ -0,0 +1,41 @@
1/*
2** Table handling.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_TAB_H
7#define _LJ_TAB_H
8
9#include "lj_obj.h"
10
11#define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0)
12
13LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits);
14LJ_FUNCA GCtab *lj_tab_dup(lua_State *L, const GCtab *kt);
15LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t);
16LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize);
17
18/* Caveat: all getters except lj_tab_get() can return NULL! */
19
20LJ_FUNCA cTValue *lj_tab_getinth(GCtab *t, int32_t key);
21LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, GCstr *key);
22LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key);
23
24/* Caveat: all setters require a write barrier for the stored value. */
25
26LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key);
27LJ_FUNC TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key);
28LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key);
29LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
30
31#define inarray(t, key) ((MSize)(key) < (MSize)(t)->asize)
32#define arrayslot(t, i) (&tvref((t)->array)[(i)])
33#define lj_tab_getint(t, key) \
34 (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_getinth((t), (key)))
35#define lj_tab_setint(L, t, key) \
36 (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key)))
37
38LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key);
39LJ_FUNCA MSize lj_tab_len(GCtab *t);
40
41#endif
diff --git a/src/lj_target.h b/src/lj_target.h
new file mode 100644
index 00000000..0b464d3f
--- /dev/null
+++ b/src/lj_target.h
@@ -0,0 +1,132 @@
1/*
2** Definitions for target CPU.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_TARGET_H
7#define _LJ_TARGET_H
8
9#include "lj_def.h"
10#include "lj_arch.h"
11
12/* -- Registers and spill slots ------------------------------------------- */
13
14/* Register type (uint8_t in ir->r). */
15typedef uint32_t Reg;
16
17/* The hi-bit is NOT set for an allocated register. This means the value
18** can be directly used without masking. The hi-bit is set for a register
19** allocation hint or for RID_INIT.
20*/
21#define RID_NONE 0x80
22#define RID_MASK 0x7f
23#define RID_INIT (RID_NONE|RID_MASK)
24
25#define ra_noreg(r) ((r) & RID_NONE)
26#define ra_hasreg(r) (!((r) & RID_NONE))
27
28/* The ra_hashint() macro assumes a previous test for ra_noreg(). */
29#define ra_hashint(r) ((r) != RID_INIT)
30#define ra_gethint(r) ((Reg)((r) & RID_MASK))
31#define ra_sethint(rr, r) rr = (uint8_t)((r)|RID_NONE)
32#define ra_samehint(r1, r2) (ra_gethint((r1)^(r2)) == 0)
33
34/* Spill slot 0 means no spill slot has been allocated. */
35#define SPS_NONE 0
36
37#define ra_hasspill(s) ((s) != SPS_NONE)
38
39/* Combined register and spill slot (uint16_t in ir->prev). */
40typedef uint32_t RegSP;
41
42#define REGSP(r, s) ((r) + ((s) << 8))
43#define REGSP_HINT(r) ((r)|RID_NONE)
44#define REGSP_INIT REGSP(RID_INIT, 0)
45
46#define regsp_reg(rs) ((rs) & 255)
47#define regsp_spill(rs) ((rs) >> 8)
48#define regsp_used(rs) \
49 (((rs) & ~REGSP(RID_MASK, 0)) != REGSP(RID_NONE, 0))
50
51/* -- Register sets ------------------------------------------------------- */
52
53/* Bitset for registers. 32 registers suffice right now.
54** Note that one set holds bits for both GPRs and FPRs.
55*/
56typedef uint32_t RegSet;
57
58#define RID2RSET(r) (((RegSet)1) << (r))
59#define RSET_EMPTY 0
60#define RSET_RANGE(lo, hi) ((RID2RSET((hi)-(lo))-1) << (lo))
61
62#define rset_test(rs, r) (((rs) >> (r)) & 1)
63#define rset_set(rs, r) (rs |= RID2RSET(r))
64#define rset_clear(rs, r) (rs &= ~RID2RSET(r))
65#define rset_exclude(rs, r) (rs & ~RID2RSET(r))
66#define rset_picktop(rs) ((Reg)lj_fls(rs))
67#define rset_pickbot(rs) ((Reg)lj_ffs(rs))
68
69/* -- Register allocation cost -------------------------------------------- */
70
71/* The register allocation heuristic keeps track of the cost for allocating
72** a specific register:
73**
74** A free register (obviously) has a cost of 0 and a 1-bit in the free mask.
75**
76** An already allocated register has the (non-zero) IR reference in the lowest
77** bits and the result of a blended cost-model in the higher bits.
78**
79** The allocator first checks the free mask for a hit. Otherwise an (unrolled)
80** linear search for the minimum cost is used. The search doesn't need to
81** keep track of the position of the minimum, which makes it very fast.
82** The lowest bits of the minimum cost show the desired IR reference whose
83** register is the one to evict.
84**
85** Without the cost-model this degenerates to the standard heuristics for
86** (reverse) linear-scan register allocation. Since code generation is done
87** in reverse, a live interval extends from the last use to the first def.
88** For an SSA IR the IR reference is the first (and only) def and thus
89** trivially marks the end of the interval. The LSRA heuristics says to pick
90** the register whose live interval has the furthest extent, i.e. the lowest
91** IR reference in our case.
92**
93** A cost-model should take into account other factors, like spill-cost and
94** restore- or rematerialization-cost, which depend on the kind of instruction.
95** E.g. constants have zero spill costs, variant instructions have higher
96** costs than invariants and PHIs should preferably never be spilled.
97**
98** Here's a first cut at simple, but effective blended cost-model for R-LSRA:
99** - Due to careful design of the IR, constants already have lower IR
100** references than invariants and invariants have lower IR references
101** than variants.
102** - The cost in the upper 16 bits is the sum of the IR reference and a
103** weighted score. The score currently only takes into account whether
104** the IRT_ISPHI bit is set in the instruction type.
105** - The PHI weight is the minimum distance (in IR instructions) a PHI
106** reference has to be further apart from a non-PHI reference to be spilled.
107** - It should be a power of two (for speed) and must be between 2 and 32768.
108** Good values for the PHI weight seem to be between 40 and 150.
109** - Further study is required.
110*/
111#define REGCOST_PHI_WEIGHT 64
112
113/* Cost for allocating a specific register. */
114typedef uint32_t RegCost;
115
116/* Note: assumes 16 bit IRRef1. */
117#define REGCOST(cost, ref) ((RegCost)(ref) + ((RegCost)(cost) << 16))
118#define regcost_ref(rc) ((IRRef1)(rc))
119
120#define REGCOST_T(t) \
121 ((RegCost)((t)&IRT_ISPHI) * (((RegCost)(REGCOST_PHI_WEIGHT)<<16)/IRT_ISPHI))
122#define REGCOST_REF_T(ref, t) (REGCOST((ref), (ref)) + REGCOST_T((t)))
123
124/* -- Target-specific definitions ----------------------------------------- */
125
126#if LJ_TARGET_X86ORX64
127#include "lj_target_x86.h"
128#else
129#error "Missing include for target CPU"
130#endif
131
132#endif
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
new file mode 100644
index 00000000..3ee4fa00
--- /dev/null
+++ b/src/lj_target_x86.h
@@ -0,0 +1,257 @@
1/*
2** Definitions for x86 and x64 CPUs.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_TARGET_X86_H
7#define _LJ_TARGET_X86_H
8
9/* -- Registers IDs ------------------------------------------------------- */
10
11#if LJ_64
12#define GPRDEF(_) \
13 _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) \
14 _(R8D) _(R9D) _(R10D) _(R11D) _(R12D) _(R13D) _(R14D) _(R15D)
15#define FPRDEF(_) \
16 _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) \
17 _(XMM8) _(XMM9) _(XMM10) _(XMM11) _(XMM12) _(XMM13) _(XMM14) _(XMM15)
18#else
19#define GPRDEF(_) \
20 _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI)
21#define FPRDEF(_) \
22 _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7)
23#endif
24
25#define RIDENUM(name) RID_##name,
26
27enum {
28 GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
29 FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
30 RID_MAX,
31 RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */
32
33 /* Calling conventions. */
34 RID_RET = RID_EAX,
35
36 /* These definitions must match with the *.dasc file(s): */
37 RID_BASE = RID_EDX, /* Interpreter BASE. */
38 RID_PC = RID_ESI, /* Interpreter PC. */
39 RID_DISPATCH = RID_EBX, /* Interpreter DISPATCH table. */
40
41 /* Register ranges [min, max) and number of registers. */
42 RID_MIN_GPR = RID_EAX,
43 RID_MIN_FPR = RID_XMM0,
44 RID_MAX_GPR = RID_MIN_FPR,
45 RID_MAX_FPR = RID_MAX,
46 RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
47 RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR,
48};
49
50/* -- Register sets ------------------------------------------------------- */
51
52/* Make use of all registers, except the stack pointer. */
53#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP))
54#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
55#define RSET_ALL (RSET_GPR|RSET_FPR)
56
57#if LJ_64
58/* Note: this requires the use of FORCE_REX! */
59#define RSET_GPR8 RSET_GPR
60#else
61#define RSET_GPR8 (RSET_RANGE(RID_EAX, RID_EBX+1))
62#endif
63
64/* ABI-specific register sets. */
65#define RSET_ACD (RID2RSET(RID_EAX)|RID2RSET(RID_ECX)|RID2RSET(RID_EDX))
66#if LJ_64
67#ifdef _WIN64
68/* Windows x64 ABI. */
69#define RSET_SCRATCH \
70 (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1))
71#else
72/* The rest of the civilized x64 world has a common ABI. */
73#define RSET_SCRATCH \
74 (RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR)
75#endif
76#else
77/* Common x86 ABI. */
78#define RSET_SCRATCH (RSET_ACD|RSET_FPR)
79#endif
80
81#if LJ_64
82/* Prefer the low 8 regs of each type to reduce REX prefixes. */
83#undef rset_picktop
84#define rset_picktop(rs) (lj_fls(lj_bswap(rs)) ^ 0x18)
85#endif
86
87/* -- Spill slots --------------------------------------------------------- */
88
89/* Stack layout for the compiled machine code (after stack adjustment). */
90enum {
91 SPS_TEMP1, /* Temps (3*dword) for calls and asm_x87load. */
92 SPS_TEMP2,
93 SPS_TEMP3,
94 SPS_FIRST, /* First spill slot for general use. */
95
96 /* This definition must match with the *.dasc file(s). */
97 SPS_FIXED = 6 /* Available fixed spill slots in interpreter frame. */
98};
99
100/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */
101#define sps_scale(slot) (4 * (int32_t)(slot))
102#define sps_adjust(as) (sps_scale((as->evenspill-SPS_FIXED+3)&~3))
103
104/* -- Exit state ---------------------------------------------------------- */
105
106/* This definition must match with the *.dasc file(s). */
107typedef struct {
108 lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
109 int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
110 int32_t spill[256]; /* Spill slots. */
111} ExitState;
112
113/* -- x86 ModRM operand encoding ------------------------------------------ */
114
115typedef enum {
116 XM_OFS0 = 0x00, XM_OFS8 = 0x40, XM_OFS32 = 0x80, XM_REG = 0xc0,
117 XM_SCALE1 = 0x00, XM_SCALE2 = 0x40, XM_SCALE4 = 0x80, XM_SCALE8 = 0xc0,
118 XM_MASK = 0xc0
119} x86Mode;
120
121/* Structure to hold variable ModRM operand. */
122typedef struct {
123 int32_t ofs; /* Offset. */
124 uint8_t base; /* Base register or RID_NONE. */
125 uint8_t idx; /* Index register or RID_NONE. */
126 uint8_t scale; /* Index scale (XM_SCALE1 .. XM_SCALE8). */
127} x86ModRM;
128
129/* -- Opcodes ------------------------------------------------------------- */
130
131/* Macros to construct variable-length x86 opcodes. -(len+1) is in LSB. */
132#define XO_(o) ((uint32_t)(0x0000fe + (0x##o<<24)))
133#define XO_FPU(a,b) ((uint32_t)(0x00fd + (0x##a<<16)+(0x##b<<24)))
134#define XO_0f(o) ((uint32_t)(0x0f00fd + (0x##o<<24)))
135#define XO_66(o) ((uint32_t)(0x6600fd + (0x##o<<24)))
136#define XO_660f(o) ((uint32_t)(0x0f66fc + (0x##o<<24)))
137#define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24)))
138#define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24)))
139
140/* This list of x86 opcodes is not intended to be complete. Opcodes are only
141** included when needed. Take a look at DynASM or jit.dis_x86 to see the
142** whole mess.
143*/
144typedef enum {
145 /* Fixed length opcodes. XI_* prefix. */
146 XI_NOP = 0x90,
147 XI_CALL = 0xe8,
148 XI_JMP = 0xe9,
149 XI_JMPs = 0xeb,
150 XI_JCCs = 0x70, /* Really 7x. */
151 XI_JCCn = 0x80, /* Really 0f8x. */
152 XI_LEA = 0x8d,
153 XI_MOVri = 0xb8, /* Really b8+r. */
154 XI_ARITHib = 0x80,
155 XI_ARITHi = 0x81,
156 XI_ARITHi8 = 0x83,
157 XI_PUSHi8 = 0x6a,
158 XI_TEST = 0x85,
159 XI_MOVmi = 0xc7,
160 XI_BSWAP = 0xc8, /* Really 0fc8+r. */
161
162 /* Note: little-endian byte-order! */
163 XI_FLDZ = 0xeed9,
164 XI_FLD1 = 0xe8d9,
165 XI_FLDLG2 = 0xecd9,
166 XI_FLDLN2 = 0xedd9,
167 XI_FPOP = 0xd8dd, /* Really fstp st0. */
168 XI_FPOP1 = 0xd9dd, /* Really fstp st1. */
169 XI_FRNDINT = 0xfcd9,
170 XI_FSIN = 0xfed9,
171 XI_FCOS = 0xffd9,
172 XI_FPTAN = 0xf2d9,
173 XI_FPATAN = 0xf3d9,
174 XI_FSCALE = 0xfdd9,
175 XI_FYL2X = 0xf1d9,
176
177 /* Variable-length opcodes. XO_* prefix. */
178 XO_MOV = XO_(8b),
179 XO_MOVto = XO_(89),
180 XO_MOVtow = XO_66(89),
181 XO_MOVtob = XO_(88),
182 XO_MOVmi = XO_(c7),
183 XO_MOVmib = XO_(c6),
184 XO_LEA = XO_(8d),
185 XO_ARITHib = XO_(80),
186 XO_ARITHi = XO_(81),
187 XO_ARITHi8 = XO_(83),
188 XO_SHIFTi = XO_(c1),
189 XO_SHIFT1 = XO_(d1),
190 XO_SHIFTcl = XO_(d3),
191 XO_IMULi8 = XO_(6b),
192 XO_CMP = XO_(3b),
193 XO_TEST = XO_(85),
194 XO_GROUP3b = XO_(f6),
195 XO_GROUP3 = XO_(f7),
196 XO_MOVZXb = XO_0f(b6),
197 XO_MOVZXw = XO_0f(b7),
198 XO_MOVSXb = XO_0f(be),
199 XO_MOVSXw = XO_0f(bf),
200
201 XO_MOVSD = XO_f20f(10),
202 XO_MOVSDto = XO_f20f(11),
203 XO_MOVLPD = XO_660f(12),
204 XO_MOVAPS = XO_0f(28),
205 XO_XORPS = XO_0f(57),
206 XO_ANDPS = XO_0f(54),
207 XO_ADDSD = XO_f20f(58),
208 XO_SUBSD = XO_f20f(5c),
209 XO_MULSD = XO_f20f(59),
210 XO_DIVSD = XO_f20f(5e),
211 XO_SQRTSD = XO_f20f(51),
212 XO_MINSD = XO_f20f(5d),
213 XO_MAXSD = XO_f20f(5f),
214 XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */
215 XO_UCOMISD = XO_660f(2e),
216 XO_CVTSI2SD = XO_f20f(2a),
217 XO_CVTSD2SI = XO_f20f(2d),
218 XO_CVTTSD2SI= XO_f20f(2c),
219 XO_MOVDto = XO_660f(7e),
220
221 XO_FLDq = XO_(dd), XOg_FLDq = 0,
222 XO_FILDd = XO_(db), XOg_FILDd = 0,
223 XO_FSTPq = XO_(dd), XOg_FSTPq = 3,
224 XO_FISTPq = XO_(df), XOg_FISTPq = 7,
225} x86Op;
226
227/* x86 opcode groups. */
228typedef uint32_t x86Group;
229
230#define XG_(i8, i, g) ((x86Group)(((i8) << 16) + ((i) << 8) + (g)))
231#define XG_ARITHi(g) XG_(XI_ARITHi8, XI_ARITHi, g)
232
233#define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27)))
234
235typedef enum {
236 XOg_ADD, XOg_OR, XOg_ADC, XOg_SBB, XOg_AND, XOg_SUB, XOg_XOR, XOg_CMP
237} x86Arith;
238
239typedef enum {
240 XOg_ROL, XOg_ROR, XOg_RCL, XOg_RCR, XOg_SHL, XOg_SHR, XOg_SAL, XOg_SAR
241} x86Shift;
242
243typedef enum {
244 XOg_TEST, XOg_TEST_, XOg_NOT, XOg_NEG, XOg_MUL, XOg_IMUL, XOg_DIV, XOg_IDIV
245} x86Group3;
246
247/* x86 condition codes. */
248typedef enum {
249 CC_O, CC_NO, CC_B, CC_NB, CC_E, CC_NE, CC_BE, CC_NBE,
250 CC_S, CC_NS, CC_P, CC_NP, CC_L, CC_NL, CC_LE, CC_NLE,
251 CC_C = CC_B, CC_NAE = CC_C, CC_NC = CC_NB, CC_AE = CC_NB,
252 CC_Z = CC_E, CC_NZ = CC_NE, CC_NA = CC_BE, CC_A = CC_NBE,
253 CC_PE = CC_P, CC_PO = CC_NP, CC_NGE = CC_L, CC_GE = CC_NL,
254 CC_NG = CC_LE, CC_G = CC_NLE
255} x86CC;
256
257#endif
diff --git a/src/lj_trace.c b/src/lj_trace.c
new file mode 100644
index 00000000..6ceb5633
--- /dev/null
+++ b/src/lj_trace.c
@@ -0,0 +1,591 @@
1/*
2** Trace management.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_trace_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASJIT
12
13#include "lj_gc.h"
14#include "lj_err.h"
15#include "lj_str.h"
16#include "lj_frame.h"
17#include "lj_state.h"
18#include "lj_bc.h"
19#include "lj_ir.h"
20#include "lj_jit.h"
21#include "lj_iropt.h"
22#include "lj_mcode.h"
23#include "lj_trace.h"
24#include "lj_snap.h"
25#include "lj_gdbjit.h"
26#include "lj_record.h"
27#include "lj_asm.h"
28#include "lj_dispatch.h"
29#include "lj_vm.h"
30#include "lj_vmevent.h"
31#include "lj_target.h"
32
33/* -- Error handling ------------------------------------------------------ */
34
35/* Synchronous abort with error message. */
36void lj_trace_err(jit_State *J, TraceError e)
37{
38 setnilV(&J->errinfo); /* No error info. */
39 setintV(J->L->top++, (int32_t)e);
40 lj_err_throw(J->L, LUA_ERRRUN);
41}
42
43/* Synchronous abort with error message and error info. */
44void lj_trace_err_info(jit_State *J, TraceError e)
45{
46 setintV(J->L->top++, (int32_t)e);
47 lj_err_throw(J->L, LUA_ERRRUN);
48}
49
50/* -- Trace management ---------------------------------------------------- */
51
52/* The current trace is first assembled in J->cur. The variable length
53** arrays point to shared, growable buffers (J->irbuf etc.). The trace is
54** kept in this state until a new trace needs to be created. Then the current
55** trace and its data structures are copied to a new (compact) Trace object.
56*/
57
58/* Find a free trace number. */
59static TraceNo trace_findfree(jit_State *J)
60{
61 MSize osz, lim;
62 if (J->freetrace == 0)
63 J->freetrace = 1;
64 for (; J->freetrace < J->sizetrace; J->freetrace++)
65 if (J->trace[J->freetrace] == NULL)
66 return J->freetrace++;
67 /* Need to grow trace array. */
68 lim = (MSize)J->param[JIT_P_maxtrace] + 1;
69 if (lim < 2) lim = 2; else if (lim > 65535) lim = 65535;
70 osz = J->sizetrace;
71 if (osz >= lim)
72 return 0; /* Too many traces. */
73 lj_mem_growvec(J->L, J->trace, J->sizetrace, lim, Trace *);
74 while (osz < J->sizetrace)
75 J->trace[osz++] = NULL;
76 return J->freetrace;
77}
78
79#define TRACE_COPYELEM(field, szfield, tp) \
80 T2->field = (tp *)p; \
81 memcpy(p, T->field, T->szfield*sizeof(tp)); \
82 p += T->szfield*sizeof(tp);
83
84/* Save a trace by copying and compacting it. */
85static Trace *trace_save(jit_State *J, Trace *T)
86{
87 size_t sztr = ((sizeof(Trace)+7)&~7);
88 size_t szins = (T->nins-T->nk)*sizeof(IRIns);
89 size_t sz = sztr + szins +
90 T->nsnap*sizeof(SnapShot) +
91 T->nsnapmap*sizeof(IRRef2);
92 Trace *T2 = lj_mem_newt(J->L, (MSize)sz, Trace);
93 char *p = (char *)T2 + sztr;
94 memcpy(T2, T, sizeof(Trace));
95 T2->ir = (IRIns *)p - T->nk;
96 memcpy(p, T->ir+T->nk, szins);
97 p += szins;
98 TRACE_COPYELEM(snap, nsnap, SnapShot)
99 TRACE_COPYELEM(snapmap, nsnapmap, IRRef2)
100 lj_gc_barriertrace(J2G(J), T);
101 return T2;
102}
103
104/* Free a trace. */
105static void trace_free(jit_State *J, TraceNo traceno)
106{
107 lua_assert(traceno != 0);
108 if (traceno < J->freetrace)
109 J->freetrace = traceno;
110 lj_gdbjit_deltrace(J, J->trace[traceno]);
111 if (traceno == J->curtrace) {
112 lua_assert(J->trace[traceno] == &J->cur);
113 J->trace[traceno] = NULL;
114 J->curtrace = 0;
115 } else {
116 Trace *T = J->trace[traceno];
117 lua_assert(T != NULL && T != &J->cur);
118 J->trace[traceno] = NULL;
119 lj_mem_free(J2G(J), T,
120 ((sizeof(Trace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) +
121 T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(IRRef2));
122 }
123}
124
125/* Free all traces associated with a prototype. No unpatching needed. */
126void lj_trace_freeproto(global_State *g, GCproto *pt)
127{
128 jit_State *J = G2J(g);
129 TraceNo traceno;
130 /* Free all root traces. */
131 for (traceno = pt->trace; traceno != 0; ) {
132 TraceNo side, nextroot = J->trace[traceno]->nextroot;
133 /* Free all side traces. */
134 for (side = J->trace[traceno]->nextside; side != 0; ) {
135 TraceNo next = J->trace[side]->nextside;
136 trace_free(J, side);
137 side = next;
138 }
139 /* Now free the trace itself. */
140 trace_free(J, traceno);
141 traceno = nextroot;
142 }
143}
144
145/* Re-enable compiling a prototype by unpatching any modified bytecode. */
146void lj_trace_reenableproto(GCproto *pt)
147{
148 if ((pt->flags & PROTO_HAS_ILOOP)) {
149 BCIns *bc = pt->bc;
150 BCPos i, sizebc = pt->sizebc;;
151 pt->flags &= ~PROTO_HAS_ILOOP;
152 for (i = 0; i < sizebc; i++) {
153 BCOp op = bc_op(bc[i]);
154 if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP)
155 setbc_op(&bc[i], (int)op+(int)BC_LOOP-(int)BC_ILOOP);
156 }
157 }
158}
159
160/* Unpatch the bytecode modified by a root trace. */
161static void trace_unpatch(jit_State *J, Trace *T)
162{
163 BCOp op = bc_op(T->startins);
164 uint32_t pcofs = T->snap[0].mapofs + T->snap[0].nslots;
165 BCIns *pc = ((BCIns *)(uintptr_t)T->snapmap[pcofs]) - 1;
166 switch (op) {
167 case BC_FORL:
168 lua_assert(bc_op(*pc) == BC_JFORI);
169 setbc_op(pc, BC_FORI); /* Unpatch JFORI, too. */
170 pc += bc_j(*pc);
171 lua_assert(bc_op(*pc) == BC_JFORL && J->trace[bc_d(*pc)] == T);
172 *pc = T->startins;
173 break;
174 case BC_LOOP:
175 lua_assert(bc_op(*pc) == BC_JLOOP && J->trace[bc_d(*pc)] == T);
176 *pc = T->startins;
177 break;
178 case BC_ITERL:
179 lua_assert(bc_op(*pc) == BC_JMP);
180 pc += bc_j(*pc)+2;
181 lua_assert(bc_op(*pc) == BC_JITERL && J->trace[bc_d(*pc)] == T);
182 *pc = T->startins;
183 break;
184 case BC_CALL:
185 lj_trace_err(J, LJ_TRERR_NYILNKF);
186 break;
187 case BC_JMP: /* No need to unpatch branches in parent traces (yet). */
188 default:
189 lua_assert(0);
190 break;
191 }
192}
193
194/* Flush a root trace and any attached side traces. */
195void lj_trace_flush(jit_State *J, TraceNo traceno)
196{
197 Trace *T = NULL;
198 GCproto *pt;
199 if (traceno > 0 && traceno <= J->sizetrace)
200 T = J->trace[traceno];
201 if (T == NULL)
202 return;
203 pt = &gcref(T->startpt)->pt;
204 if (T->root == 0 && pt != NULL) {
205 TraceNo side;
206 /* First unpatch any modified bytecode. */
207 trace_unpatch(J, T);
208 /* Unlink root trace from chain anchored in prototype. */
209 if (pt->trace == traceno) { /* Trace is first in chain. Easy. */
210 pt->trace = T->nextroot;
211 } else { /* Otherwise search in chain of root traces. */
212 Trace *T2 = J->trace[pt->trace];
213 while (T2->nextroot != traceno) {
214 lua_assert(T2->nextroot != 0);
215 T2 = J->trace[T2->nextroot];
216 }
217 T2->nextroot = T->nextroot; /* Unlink from chain. */
218 }
219 /* Free all side traces. */
220 for (side = T->nextside; side != 0; ) {
221 TraceNo next = J->trace[side]->nextside;
222 trace_free(J, side);
223 side = next;
224 }
225 /* Now free the trace itself. */
226 trace_free(J, traceno);
227 } /* Flush for non-root traces is currently ignored. */
228}
229
230/* Flush all traces associated with a prototype. */
231void lj_trace_flushproto(global_State *g, GCproto *pt)
232{
233 while (pt->trace != 0)
234 lj_trace_flush(G2J(g), pt->trace);
235}
236
237/* Flush all traces. */
238int lj_trace_flushall(lua_State *L)
239{
240 jit_State *J = L2J(L);
241 ptrdiff_t i;
242 if ((J2G(J)->hookmask & HOOK_GC))
243 return 1;
244 for (i = (ptrdiff_t)J->sizetrace-1; i > 0; i--)
245 lj_trace_flush(J, (TraceNo)i);
246#ifdef LUA_USE_ASSERT
247 for (i = 0; i < (ptrdiff_t)J->sizetrace; i++)
248 lua_assert(J->trace[i] == NULL);
249#endif
250 J->freetrace = 0;
251 /* Free the whole machine code and invalidate all exit stub groups. */
252 lj_mcode_free(J);
253 memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup));
254 lj_vmevent_send(L, TRACE,
255 setstrV(L, L->top++, lj_str_newlit(L, "flush"));
256 );
257 return 0;
258}
259
260/* Free everything associated with the JIT compiler state. */
261void lj_trace_freestate(global_State *g)
262{
263 jit_State *J = G2J(g);
264#ifdef LUA_USE_ASSERT
265 { /* This assumes all traces have already been freed. */
266 ptrdiff_t i;
267 for (i = 0; i < (ptrdiff_t)J->sizetrace; i++)
268 lua_assert(J->trace[i] == NULL);
269 }
270#endif
271 lj_mcode_free(J);
272 lj_ir_knum_freeall(J);
273 lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, IRRef2);
274 lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot);
275 lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns);
276 lj_mem_freevec(g, J->trace, J->sizetrace, Trace *);
277}
278
279/* -- Trace compiler state machine ---------------------------------------- */
280
281/* Penalize a bytecode instruction by bumping its hot counter. */
282static void hotpenalty(jit_State *J, const BCIns *pc, TraceError e)
283{
284 uint32_t i, val = HOTCOUNT_MIN_PENALTY;
285 for (i = 0; i < PENALTY_SLOTS; i++)
286 if (J->penalty[i].pc == pc) {
287 val = ((uint32_t)J->penalty[i].val << 1) + 1;
288 if (val > HOTCOUNT_MAX_PENALTY) val = HOTCOUNT_MAX_PENALTY;
289 goto setpenalty;
290 }
291 i = J->penaltyslot;
292 J->penaltyslot = (J->penaltyslot + 1) & (PENALTY_SLOTS-1);
293 J->penalty[i].pc = pc;
294setpenalty:
295 J->penalty[i].val = (uint16_t)val;
296 J->penalty[i].reason = e;
297 hotcount_set(J2GG(J), pc+1, val);
298}
299
300/* Start tracing. */
301static void trace_start(jit_State *J)
302{
303 lua_State *L;
304
305 if (J->curtrace != 0 && J->trace[J->curtrace] == &J->cur) {
306 J->trace[J->curtrace] = trace_save(J, &J->cur); /* Save current trace. */
307 J->curtrace = 0;
308 }
309
310 if ((J->pt->flags & PROTO_NO_JIT)) { /* JIT disabled for this proto? */
311 if (J->parent == 0) {
312 if (J->pc >= J->pt->bc) {
313 /* Lazy bytecode patching to disable hotcount events. */
314 setbc_op(J->pc, (int)bc_op(*J->pc)+(int)BC_ILOOP-(int)BC_LOOP);
315 J->pt->flags |= PROTO_HAS_ILOOP;
316 } else {
317 /* NYI: lazy closure patching to disable hotcall events. */
318 lua_assert(0);
319 }
320 }
321 J->state = LJ_TRACE_IDLE; /* Silently ignored. */
322 return;
323 }
324
325 /* Get a new trace number. */
326 J->curtrace = trace_findfree(J);
327 if (LJ_UNLIKELY(J->curtrace == 0)) { /* No free trace? */
328 lua_assert((J2G(J)->hookmask & HOOK_GC) == 0);
329 lj_trace_flushall(J->L);
330 J->state = LJ_TRACE_IDLE; /* Silently ignored. */
331 return;
332 }
333 J->trace[J->curtrace] = &J->cur;
334
335 /* Setup enough of the current trace to be able to send the vmevent. */
336 memset(&J->cur, 0, sizeof(Trace));
337 J->cur.nins = J->cur.nk = REF_BASE;
338 J->cur.ir = J->irbuf;
339 J->cur.snap = J->snapbuf;
340 J->cur.snapmap = J->snapmapbuf;
341 /* J->cur.nsnapmap = 0; */
342 J->mergesnap = 0;
343 J->needsnap = 0;
344 J->guardemit.irt = 0;
345
346 L = J->L;
347 lj_vmevent_send(L, TRACE,
348 setstrV(L, L->top++, lj_str_newlit(L, "start"));
349 setintV(L->top++, J->curtrace);
350 setfuncV(L, L->top++, J->fn);
351 setintV(L->top++, J->pc - J->pt->bc + 1);
352 if (J->parent) {
353 setintV(L->top++, J->parent);
354 setintV(L->top++, J->exitno);
355 }
356 );
357 lj_record_setup(J);
358}
359
360/* Stop tracing. */
361static void trace_stop(jit_State *J)
362{
363 BCIns *pc = (BCIns *)J->startpc; /* Not const here. */
364 BCOp op = bc_op(J->cur.startins);
365 GCproto *pt = &gcref(J->cur.startpt)->pt;
366 lua_State *L;
367
368 switch (op) {
369 case BC_FORL:
370 setbc_op(pc+bc_j(J->cur.startins), BC_JFORI); /* Patch FORI, too. */
371 /* fallthrough */
372 case BC_LOOP:
373 case BC_ITERL:
374 /* Patch bytecode of starting instruction in root trace. */
375 setbc_op(pc, (int)op+(int)BC_JLOOP-(int)BC_LOOP);
376 setbc_d(pc, J->curtrace);
377 /* Add to root trace chain in prototype. */
378 J->cur.nextroot = pt->trace;
379 pt->trace = (TraceNo1)J->curtrace;
380 break;
381 case BC_CALL:
382 lj_trace_err(J, LJ_TRERR_NYILNKF);
383 break;
384 case BC_JMP:
385 /* Patch exit branch in parent to side trace entry. */
386 lua_assert(J->parent != 0 && J->cur.root != 0);
387 lj_asm_patchexit(J, J->trace[J->parent], J->exitno, J->cur.mcode);
388 /* Avoid compiling a side trace twice (stack resizing uses parent exit). */
389 J->trace[J->parent]->snap[J->exitno].count = SNAPCOUNT_DONE;
390 /* Add to side trace chain in root trace. */
391 {
392 Trace *root = J->trace[J->cur.root];
393 root->nchild++;
394 J->cur.nextside = root->nextside;
395 root->nextside = (TraceNo1)J->curtrace;
396 }
397 break;
398 default:
399 lua_assert(0);
400 break;
401 }
402
403 /* Commit new mcode only after all patching is done. */
404 lj_mcode_commit(J, J->cur.mcode);
405 lj_gdbjit_addtrace(J, &J->cur, J->curtrace);
406
407 L = J->L;
408 lj_vmevent_send(L, TRACE,
409 setstrV(L, L->top++, lj_str_newlit(L, "stop"));
410 setintV(L->top++, J->curtrace);
411 );
412}
413
414/* Abort tracing. */
415static int trace_abort(jit_State *J)
416{
417 lua_State *L = J->L;
418 TraceError e = LJ_TRERR_RECERR;
419 lj_mcode_abort(J);
420 if (tvisnum(L->top-1))
421 e = (TraceError)lj_num2int(numV(L->top-1));
422 if (e == LJ_TRERR_MCODELM) {
423 J->state = LJ_TRACE_ASM;
424 return 1; /* Retry ASM with new MCode area. */
425 }
426 if (J->parent == 0)
427 hotpenalty(J, J->startpc, e); /* Penalize starting instruction. */
428 if (J->curtrace) { /* Is there anything to abort? */
429 ptrdiff_t errobj = savestack(L, L->top-1); /* Stack may be resized. */
430 lj_vmevent_send(L, TRACE,
431 setstrV(L, L->top++, lj_str_newlit(L, "abort"));
432 setintV(L->top++, J->curtrace);
433 setfuncV(L, L->top++, J->fn);
434 setintV(L->top++, J->pc - J->pt->bc + 1);
435 copyTV(L, L->top++, restorestack(L, errobj));
436 copyTV(L, L->top++, &J->errinfo);
437 );
438 /* Drop aborted trace after the vmevent (which may still access it). */
439 J->trace[J->curtrace] = NULL;
440 if (J->curtrace < J->freetrace)
441 J->freetrace = J->curtrace;
442 J->curtrace = 0;
443 }
444 L->top--; /* Remove error object */
445 if (e == LJ_TRERR_MCODEAL)
446 lj_trace_flushall(L);
447 return 0;
448}
449
450/* State machine for the trace compiler. Protected callback. */
451static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud)
452{
453 jit_State *J = (jit_State *)ud;
454 UNUSED(dummy);
455 do {
456 switch (J->state) {
457 case LJ_TRACE_START:
458 J->state = LJ_TRACE_RECORD; /* trace_start() may change state. */
459 trace_start(J);
460 lj_dispatch_update(J2G(J));
461 break;
462
463 case LJ_TRACE_RECORD:
464 setvmstate(J2G(J), RECORD);
465 lj_vmevent_send(L, RECORD,
466 setintV(L->top++, J->curtrace);
467 setfuncV(L, L->top++, J->fn);
468 setintV(L->top++, J->pc - J->pt->bc + 1);
469 setintV(L->top++, J->framedepth);
470 if (bcmode_mm(bc_op(*J->pc)) == MM_call) {
471 cTValue *o = &L->base[bc_a(*J->pc)];
472 if (bc_op(*J->pc) == BC_ITERC) o -= 3;
473 copyTV(L, L->top++, o);
474 }
475 );
476 lj_record_ins(J);
477 break;
478
479 case LJ_TRACE_END:
480 J->loopref = 0;
481 if ((J->flags & JIT_F_OPT_LOOP) && J->cur.link == J->curtrace) {
482 setvmstate(J2G(J), OPT);
483 lj_opt_dce(J);
484 if (lj_opt_loop(J)) { /* Loop optimization failed? */
485 J->loopref = J->cur.nins;
486 J->state = LJ_TRACE_RECORD; /* Try to continue recording. */
487 break;
488 }
489 J->loopref = J->chain[IR_LOOP]; /* Needed by assembler. */
490 }
491 J->state = LJ_TRACE_ASM;
492 break;
493
494 case LJ_TRACE_ASM:
495 setvmstate(J2G(J), ASM);
496 lj_asm_trace(J, &J->cur);
497 trace_stop(J);
498 setvmstate(J2G(J), INTERP);
499 J->state = LJ_TRACE_IDLE;
500 lj_dispatch_update(J2G(J));
501 return NULL;
502
503 default: /* Trace aborted asynchronously. */
504 setintV(L->top++, (int32_t)LJ_TRERR_RECERR);
505 /* fallthrough */
506 case LJ_TRACE_ERR:
507 if (trace_abort(J))
508 break; /* Retry. */
509 setvmstate(J2G(J), INTERP);
510 J->state = LJ_TRACE_IDLE;
511 lj_dispatch_update(J2G(J));
512 return NULL;
513 }
514 } while (J->state > LJ_TRACE_RECORD);
515 return NULL;
516}
517
518/* -- Event handling ------------------------------------------------------ */
519
520/* A bytecode instruction is about to be executed. Record it. */
521void lj_trace_ins(jit_State *J)
522{
523 while (lj_vm_cpcall(J->L, trace_state, NULL, (void *)J) != 0)
524 J->state = LJ_TRACE_ERR;
525}
526
527/* Start recording a new trace. */
528static void trace_new(jit_State *J)
529{
530 /* Only start a new trace if not inside __gc call or vmevent. */
531 if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) {
532 lua_assert(J->state == LJ_TRACE_IDLE);
533 J->state = LJ_TRACE_START;
534 J->fn = curr_func(J->L);
535 J->pt = funcproto(J->fn);
536 lj_trace_ins(J);
537 }
538}
539
540/* A hotcount triggered. Start recording a root trace. */
541void lj_trace_hot(jit_State *J, const BCIns *pc)
542{
543 lua_State *L = J->L;
544 L->top = curr_topL(L); /* Only called from Lua and NRESULTS is not used. */
545 hotcount_set(J2GG(J), pc, J->param[JIT_P_hotloop]+1); /* Reset hotcount. */
546 J->parent = 0; /* Root trace. */
547 J->exitno = 0;
548 J->pc = pc-1; /* The interpreter bytecode PC is offset by 1. */
549 trace_new(J);
550}
551
552/* A trace exited. Restore interpreter state and check for hot exits. */
553void *lj_trace_exit(jit_State *J, void *exptr)
554{
555 lua_State *L = J->L;
556 void *cf;
557
558 /* Restore interpreter state. */
559 lj_snap_restore(J, exptr);
560 cf = cframe_raw(L->cframe);
561 cframe_pc(cf) = J->pc;
562
563 lj_vmevent_send(L, TEXIT,
564 ExitState *ex = (ExitState *)exptr;
565 uint32_t i;
566 lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK);
567 setintV(L->top++, J->parent);
568 setintV(L->top++, J->exitno);
569 setintV(L->top++, RID_NUM_GPR);
570 setintV(L->top++, RID_NUM_FPR);
571 for (i = 0; i < RID_NUM_GPR; i++)
572 setintV(L->top++, ex->gpr[i]);
573 for (i = 0; i < RID_NUM_FPR; i++) {
574 setnumV(L->top, ex->fpr[i]);
575 if (LJ_UNLIKELY(tvisnan(L->top)))
576 setnanV(L->top);
577 L->top++;
578 }
579 );
580
581 { /* Check for a hot exit. */
582 SnapShot *snap = &J->trace[J->parent]->snap[J->exitno];
583 if (snap->count != SNAPCOUNT_DONE &&
584 ++snap->count >= J->param[JIT_P_hotexit])
585 trace_new(J); /* Start recording a side trace. */
586 }
587
588 return cf; /* Return the interpreter C frame. */
589}
590
591#endif
diff --git a/src/lj_trace.h b/src/lj_trace.h
new file mode 100644
index 00000000..9d8eb790
--- /dev/null
+++ b/src/lj_trace.h
@@ -0,0 +1,52 @@
1/*
2** Trace management.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_TRACE_H
7#define _LJ_TRACE_H
8
9#if LJ_HASJIT
10
11#include "lj_obj.h"
12#include "lj_jit.h"
13#include "lj_dispatch.h"
14
15/* Trace errors. */
16typedef enum {
17#define TREDEF(name, msg) LJ_TRERR_##name,
18#include "lj_traceerr.h"
19 LJ_TRERR__MAX
20} TraceError;
21
22LJ_FUNC_NORET void lj_trace_err(jit_State *J, TraceError e);
23LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e);
24
25/* Trace management. */
26LJ_FUNC void lj_trace_freeproto(global_State *g, GCproto *pt);
27LJ_FUNC void lj_trace_reenableproto(GCproto *pt);
28LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt);
29LJ_FUNC void lj_trace_flush(jit_State *J, TraceNo traceno);
30LJ_FUNC int lj_trace_flushall(lua_State *L);
31LJ_FUNC void lj_trace_freestate(global_State *g);
32
33/* Event handling. */
34LJ_FUNC void lj_trace_ins(jit_State *J);
35LJ_FUNCA void lj_trace_hot(jit_State *J, const BCIns *pc);
36LJ_FUNCA void *lj_trace_exit(jit_State *J, void *exptr);
37
38/* Signal asynchronous abort of trace or end of trace. */
39#define lj_trace_abort(g) (G2J(g)->state &= ~LJ_TRACE_ACTIVE)
40#define lj_trace_end(J) (J->state = LJ_TRACE_END)
41
42#else
43
44#define lj_trace_flushall(L) (UNUSED(L), 0)
45#define lj_trace_freestate(g) UNUSED(g)
46#define lj_trace_freeproto(g, pt) (UNUSED(g), UNUSED(pt), (void)0)
47#define lj_trace_abort(g) UNUSED(g)
48#define lj_trace_end(J) UNUSED(J)
49
50#endif
51
52#endif
diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h
new file mode 100644
index 00000000..2c8347b0
--- /dev/null
+++ b/src/lj_traceerr.h
@@ -0,0 +1,59 @@
1/*
2** Trace compiler error messages.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6/* This file may be included multiple times with different TREDEF macros. */
7
8/* Recording. */
9TREDEF(RECERR, "error thrown or hook called during recording")
10TREDEF(TRACEOV, "trace too long")
11TREDEF(STACKOV, "trace too deep")
12TREDEF(SNAPOV, "too many snapshots")
13TREDEF(NYIBC, "NYI: bytecode %d")
14
15/* Recording loop ops. */
16TREDEF(LLEAVE, "leaving loop in root trace")
17TREDEF(LINNER, "inner loop in root trace")
18TREDEF(LUNROLL, "loop unroll limit reached")
19TREDEF(LBLACKL, "blacklisted loop")
20
21/* Recording calls/returns. */
22TREDEF(BADTYPE, "bad argument type")
23TREDEF(CJITOFF, "call to JIT-disabled function")
24TREDEF(CUNROLL, "call unroll limit reached")
25TREDEF(NYIRECU, "NYI: recursive calls")
26TREDEF(NYILNKF, "NYI: linking/patching function calls")
27TREDEF(NYIVF, "NYI: vararg function")
28TREDEF(NYICF, "NYI: C function %p")
29TREDEF(NYIFF, "NYI: FastFunc %s")
30TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s")
31TREDEF(NYIRETL, "NYI: return to lower frame")
32
33/* Recording indexed load/store. */
34TREDEF(STORENN, "store with nil or NaN key")
35TREDEF(NOMM, "missing metamethod")
36TREDEF(IDXLOOP, "looping index lookup")
37TREDEF(NYITMIX, "NYI: mixed sparse/dense table")
38
39/* Optimizations. */
40TREDEF(GFAIL, "guard would always fail")
41TREDEF(PHIOV, "too many PHIs")
42TREDEF(TYPEINS, "persistent type instability")
43
44/* Assembler. */
45TREDEF(MCODEAL, "failed to allocate mcode memory")
46TREDEF(MCODEOV, "machine code too long")
47TREDEF(MCODELM, "hit mcode limit (retrying)")
48TREDEF(SPILLOV, "too many spill slots")
49TREDEF(BADRA, "inconsistent register allocation")
50TREDEF(NYIIR, "NYI: cannot assemble IR instruction %d")
51TREDEF(NYIPHI, "NYI: PHI shuffling too complex")
52TREDEF(NYICOAL, "NYI: register coalescing too complex")
53TREDEF(NYIGCF, "NYI: gcstep sync with frames")
54
55#undef TREDEF
56
57/* Detecting unused error messages:
58 awk -F, '/^TREDEF/ { gsub(/TREDEF./, ""); printf "grep -q LJ_TRERR_%s *.[ch] || echo %s\n", $1, $1}' lj_traceerr.h | sh
59*/
diff --git a/src/lj_udata.c b/src/lj_udata.c
new file mode 100644
index 00000000..863889c9
--- /dev/null
+++ b/src/lj_udata.c
@@ -0,0 +1,33 @@
1/*
2** Userdata handling.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_udata_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10#include "lj_gc.h"
11#include "lj_udata.h"
12
13GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env)
14{
15 GCudata *ud = lj_mem_newt(L, sizeof(GCudata) + sz, GCudata);
16 global_State *g = G(L);
17 newwhite(g, ud); /* Not finalized. */
18 ud->gct = ~LJ_TUDATA;
19 ud->len = sz;
20 /* NOBARRIER: The GCudata is new (marked white). */
21 setgcrefnull(ud->metatable);
22 setgcref(ud->env, obj2gco(env));
23 /* Chain to userdata list (after main thread). */
24 setgcrefr(ud->nextgc, mainthread(g)->nextgc);
25 setgcref(mainthread(g)->nextgc, obj2gco(ud));
26 return ud;
27}
28
29void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud)
30{
31 lj_mem_free(g, ud, sizeudata(ud));
32}
33
diff --git a/src/lj_udata.h b/src/lj_udata.h
new file mode 100644
index 00000000..981304f8
--- /dev/null
+++ b/src/lj_udata.h
@@ -0,0 +1,14 @@
1/*
2** Userdata handling.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_UDATA_H
7#define _LJ_UDATA_H
8
9#include "lj_obj.h"
10
11LJ_FUNC GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env);
12LJ_FUNC void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud);
13
14#endif
diff --git a/src/lj_vm.h b/src/lj_vm.h
new file mode 100644
index 00000000..f50614bb
--- /dev/null
+++ b/src/lj_vm.h
@@ -0,0 +1,66 @@
1/*
2** Assembler VM interface definitions.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_VM_H
7#define _LJ_VM_H
8
9#include "lj_obj.h"
10
11/* Entry points for ASM parts of VM. */
12LJ_ASMF void lj_vm_call(lua_State *L, TValue *base, int nres1);
13LJ_ASMF int lj_vm_pcall(lua_State *L, TValue *base, int nres1, ptrdiff_t ef);
14typedef TValue *(*lua_CPFunction)(lua_State *L, lua_CFunction func, void *ud);
15LJ_ASMF int lj_vm_cpcall(lua_State *L, lua_CPFunction cp, lua_CFunction func,
16 void *ud);
17LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef);
18LJ_ASMF_NORET void lj_vm_unwind_c(void *cframe, int errcode);
19LJ_ASMF_NORET void lj_vm_unwind_ff(void *cframe);
20
21/* Miscellaneous functions. */
22#if LJ_TARGET_X86ORX64
23LJ_ASMF int lj_vm_cpuid(uint32_t f, uint32_t res[4]);
24#endif
25LJ_ASMF double lj_vm_foldarith(double x, double y, int op);
26LJ_ASMF double lj_vm_foldfpm(double x, int op);
27
28/* Dispatch targets for recording and hooks. */
29LJ_ASMF void lj_vm_record(void);
30LJ_ASMF void lj_vm_hook(void);
31
32/* Trace exit handling. */
33LJ_ASMF void lj_vm_exit_handler(void);
34LJ_ASMF void lj_vm_exit_interp(void);
35
36/* Handlers callable from compiled code. */
37LJ_ASMF void lj_vm_floor(void);
38LJ_ASMF void lj_vm_ceil(void);
39LJ_ASMF void lj_vm_trunc(void);
40LJ_ASMF void lj_vm_exp(void);
41LJ_ASMF void lj_vm_exp2(void);
42LJ_ASMF void lj_vm_pow(void);
43LJ_ASMF void lj_vm_powi(void);
44
45/* Call gates for functions. */
46LJ_ASMF void lj_gate_lf(void);
47LJ_ASMF void lj_gate_lv(void);
48LJ_ASMF void lj_gate_c(void);
49
50/* Continuations for metamethods. */
51LJ_ASMF void lj_cont_cat(void); /* Continue with concatenation. */
52LJ_ASMF void lj_cont_ra(void); /* Store result in RA from instruction. */
53LJ_ASMF void lj_cont_nop(void); /* Do nothing, just continue execution. */
54LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */
55LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */
56
57/* Start of the ASM code. */
58LJ_ASMF void lj_vm_asm_begin(void);
59
60/* Opcode handler offsets, relative to lj_vm_asm_begin. */
61LJ_ASMF const uint16_t lj_vm_op_ofs[];
62
63#define makeasmfunc(ofs) \
64 ((ASMFunction)((char *)lj_vm_asm_begin + (ofs)))
65
66#endif
diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c
new file mode 100644
index 00000000..657eb8d7
--- /dev/null
+++ b/src/lj_vmevent.c
@@ -0,0 +1,56 @@
1/*
2** VM event handling.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#include <stdio.h>
7
8#define lj_vmevent_c
9#define LUA_CORE
10
11#include "lj_obj.h"
12#include "lj_str.h"
13#include "lj_tab.h"
14#include "lj_state.h"
15#include "lj_dispatch.h"
16#include "lj_vm.h"
17#include "lj_vmevent.h"
18
19ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev)
20{
21 global_State *g = G(L);
22 GCstr *s = lj_str_newlit(L, LJ_VMEVENTS_REGKEY);
23 cTValue *tv = lj_tab_getstr(tabV(registry(L)), s);
24 if (tvistab(tv)) {
25 int hash = VMEVENT_HASH(ev);
26 tv = lj_tab_getint(tabV(tv), hash);
27 if (tv && tvisfunc(tv)) {
28 lj_state_checkstack(L, LUA_MINSTACK);
29 setfuncV(L, L->top++, funcV(tv));
30 return savestack(L, L->top);
31 }
32 }
33 g->vmevmask &= ~VMEVENT_MASK(ev); /* No handler: cache this fact. */
34 return 0;
35}
36
37void lj_vmevent_call(lua_State *L, ptrdiff_t argbase)
38{
39 global_State *g = G(L);
40 uint8_t oldmask = g->vmevmask;
41 uint8_t oldh = hook_save(g);
42 int status;
43 g->vmevmask = 0; /* Disable all events. */
44 hook_vmevent(g);
45 status = lj_vm_pcall(L, restorestack(L, argbase), 0+1, 0);
46 if (LJ_UNLIKELY(status)) {
47 /* Really shouldn't use stderr here, but where else to complain? */
48 L->top--;
49 fprintf(stderr, "VM handler failed: %s\n",
50 tvisstr(L->top) ? strVdata(L->top) : "?");
51 }
52 hook_restore(g, oldh);
53 if (g->vmevmask != VMEVENT_NOCACHE)
54 g->vmevmask = oldmask; /* Restore event mask, but not if not modified. */
55}
56
diff --git a/src/lj_vmevent.h b/src/lj_vmevent.h
new file mode 100644
index 00000000..9eaa52e1
--- /dev/null
+++ b/src/lj_vmevent.h
@@ -0,0 +1,49 @@
1/*
2** VM event handling.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_VMEVENT_H
7#define _LJ_VMEVENT_H
8
9#include "lj_obj.h"
10
11/* Registry key for VM event handler table. */
12#define LJ_VMEVENTS_REGKEY "_VMEVENTS"
13#define LJ_VMEVENTS_HSIZE 4
14
15#define VMEVENT_MASK(ev) ((uint8_t)1 << ((int)(ev) & 7))
16#define VMEVENT_HASH(ev) ((int)(ev) & ~7)
17#define VMEVENT_HASHIDX(h) ((int)(h) << 3)
18#define VMEVENT_NOCACHE 255
19
20#define VMEVENT_DEF(name, hash) \
21 LJ_VMEVENT_##name##_, \
22 LJ_VMEVENT_##name = ((LJ_VMEVENT_##name##_) & 7)|((hash) << 3)
23
24/* VM event IDs. */
25typedef enum {
26 VMEVENT_DEF(BC, 0x0000140b),
27 VMEVENT_DEF(TRACE, 0x10ea574d),
28 VMEVENT_DEF(RECORD, 0x5698231c),
29 VMEVENT_DEF(TEXIT, 0x12d984a7),
30 LJ_VMEVENT__MAX
31} VMEvent;
32
33#ifdef LUAJIT_DISABLE_VMEVENT
34#define lj_vmevent_send(L, ev, args) UNUSED(L)
35#else
36#define lj_vmevent_send(L, ev, args) \
37 if (G(L)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \
38 ptrdiff_t argbase = lj_vmevent_prepare(L, LJ_VMEVENT_##ev); \
39 if (argbase) { \
40 args \
41 lj_vmevent_call(L, argbase); \
42 } \
43 }
44
45LJ_FUNC ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev);
46LJ_FUNC void lj_vmevent_call(lua_State *L, ptrdiff_t argbase);
47#endif
48
49#endif
diff --git a/src/ljamalg.c b/src/ljamalg.c
new file mode 100644
index 00000000..46d0e21f
--- /dev/null
+++ b/src/ljamalg.c
@@ -0,0 +1,70 @@
1/*
2** LuaJIT core and libraries amalgamation.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6/*
7+--------------------------------------------------------------------------+
8| WARNING: Compiling the amalgamation needs a lot of virtual memory |
9| (around 160 MB with GCC 4.x)! If you don't have enough physical memory |
10| your machine will start swapping to disk and the compile will not finish |
11| within a reasonable amount of time. |
12| So either compile on a bigger machine or use the non-amalgamated build. |
13+--------------------------------------------------------------------------+
14*/
15
16#define ljamalg_c
17#define LUA_CORE
18
19/* To get the mremap prototype. Must be defind before any system includes. */
20#if defined(__linux__) && !defined(_GNU_SOURCE)
21#define _GNU_SOURCE
22#endif
23
24#include "lua.h"
25#include "lauxlib.h"
26
27#include "lj_gc.c"
28#include "lj_err.c"
29#include "lj_ctype.c"
30#include "lj_bc.c"
31#include "lj_obj.c"
32#include "lj_str.c"
33#include "lj_tab.c"
34#include "lj_func.c"
35#include "lj_udata.c"
36#include "lj_meta.c"
37#include "lj_state.c"
38#include "lj_dispatch.c"
39#include "lj_vmevent.c"
40#include "lj_api.c"
41#include "lj_lex.c"
42#include "lj_parse.c"
43#include "lj_lib.c"
44#include "lj_ir.c"
45#include "lj_opt_mem.c"
46#include "lj_opt_fold.c"
47#include "lj_opt_narrow.c"
48#include "lj_opt_dce.c"
49#include "lj_opt_loop.c"
50#include "lj_mcode.c"
51#include "lj_snap.c"
52#include "lj_record.c"
53#include "lj_asm.c"
54#include "lj_trace.c"
55#include "lj_gdbjit.c"
56#include "lj_alloc.c"
57
58#include "lib_aux.c"
59#include "lib_base.c"
60#include "lib_math.c"
61#include "lib_string.c"
62#include "lib_table.c"
63#include "lib_io.c"
64#include "lib_os.c"
65#include "lib_package.c"
66#include "lib_debug.c"
67#include "lib_bit.c"
68#include "lib_jit.c"
69#include "lib_init.c"
70
diff --git a/src/lua.h b/src/lua.h
new file mode 100644
index 00000000..0e98b374
--- /dev/null
+++ b/src/lua.h
@@ -0,0 +1,388 @@
1/*
2** $Id: lua.h,v 1.218.1.5 2008/08/06 13:30:12 roberto Exp $
3** Lua - An Extensible Extension Language
4** Lua.org, PUC-Rio, Brazil (http://www.lua.org)
5** See Copyright Notice at the end of this file
6*/
7
8
9#ifndef lua_h
10#define lua_h
11
12#include <stdarg.h>
13#include <stddef.h>
14
15
16#include "luaconf.h"
17
18
19#define LUA_VERSION "Lua 5.1"
20#define LUA_RELEASE "Lua 5.1.4"
21#define LUA_VERSION_NUM 501
22#define LUA_COPYRIGHT "Copyright (C) 1994-2008 Lua.org, PUC-Rio"
23#define LUA_AUTHORS "R. Ierusalimschy, L. H. de Figueiredo & W. Celes"
24
25
26/* mark for precompiled code (`<esc>Lua') */
27#define LUA_SIGNATURE "\033Lua"
28
29/* option for multiple returns in `lua_pcall' and `lua_call' */
30#define LUA_MULTRET (-1)
31
32
33/*
34** pseudo-indices
35*/
36#define LUA_REGISTRYINDEX (-10000)
37#define LUA_ENVIRONINDEX (-10001)
38#define LUA_GLOBALSINDEX (-10002)
39#define lua_upvalueindex(i) (LUA_GLOBALSINDEX-(i))
40
41
42/* thread status; 0 is OK */
43#define LUA_YIELD 1
44#define LUA_ERRRUN 2
45#define LUA_ERRSYNTAX 3
46#define LUA_ERRMEM 4
47#define LUA_ERRERR 5
48
49
50typedef struct lua_State lua_State;
51
52typedef int (*lua_CFunction) (lua_State *L);
53
54
55/*
56** functions that read/write blocks when loading/dumping Lua chunks
57*/
58typedef const char * (*lua_Reader) (lua_State *L, void *ud, size_t *sz);
59
60typedef int (*lua_Writer) (lua_State *L, const void* p, size_t sz, void* ud);
61
62
63/*
64** prototype for memory-allocation functions
65*/
66typedef void * (*lua_Alloc) (void *ud, void *ptr, size_t osize, size_t nsize);
67
68
69/*
70** basic types
71*/
72#define LUA_TNONE (-1)
73
74#define LUA_TNIL 0
75#define LUA_TBOOLEAN 1
76#define LUA_TLIGHTUSERDATA 2
77#define LUA_TNUMBER 3
78#define LUA_TSTRING 4
79#define LUA_TTABLE 5
80#define LUA_TFUNCTION 6
81#define LUA_TUSERDATA 7
82#define LUA_TTHREAD 8
83
84
85
86/* minimum Lua stack available to a C function */
87#define LUA_MINSTACK 20
88
89
90/*
91** generic extra include file
92*/
93#if defined(LUA_USER_H)
94#include LUA_USER_H
95#endif
96
97
98/* type of numbers in Lua */
99typedef LUA_NUMBER lua_Number;
100
101
102/* type for integer functions */
103typedef LUA_INTEGER lua_Integer;
104
105
106
107/*
108** state manipulation
109*/
110LUA_API lua_State *(lua_newstate) (lua_Alloc f, void *ud);
111LUA_API void (lua_close) (lua_State *L);
112LUA_API lua_State *(lua_newthread) (lua_State *L);
113
114LUA_API lua_CFunction (lua_atpanic) (lua_State *L, lua_CFunction panicf);
115
116
117/*
118** basic stack manipulation
119*/
120LUA_API int (lua_gettop) (lua_State *L);
121LUA_API void (lua_settop) (lua_State *L, int idx);
122LUA_API void (lua_pushvalue) (lua_State *L, int idx);
123LUA_API void (lua_remove) (lua_State *L, int idx);
124LUA_API void (lua_insert) (lua_State *L, int idx);
125LUA_API void (lua_replace) (lua_State *L, int idx);
126LUA_API int (lua_checkstack) (lua_State *L, int sz);
127
128LUA_API void (lua_xmove) (lua_State *from, lua_State *to, int n);
129
130
131/*
132** access functions (stack -> C)
133*/
134
135LUA_API int (lua_isnumber) (lua_State *L, int idx);
136LUA_API int (lua_isstring) (lua_State *L, int idx);
137LUA_API int (lua_iscfunction) (lua_State *L, int idx);
138LUA_API int (lua_isuserdata) (lua_State *L, int idx);
139LUA_API int (lua_type) (lua_State *L, int idx);
140LUA_API const char *(lua_typename) (lua_State *L, int tp);
141
142LUA_API int (lua_equal) (lua_State *L, int idx1, int idx2);
143LUA_API int (lua_rawequal) (lua_State *L, int idx1, int idx2);
144LUA_API int (lua_lessthan) (lua_State *L, int idx1, int idx2);
145
146LUA_API lua_Number (lua_tonumber) (lua_State *L, int idx);
147LUA_API lua_Integer (lua_tointeger) (lua_State *L, int idx);
148LUA_API int (lua_toboolean) (lua_State *L, int idx);
149LUA_API const char *(lua_tolstring) (lua_State *L, int idx, size_t *len);
150LUA_API size_t (lua_objlen) (lua_State *L, int idx);
151LUA_API lua_CFunction (lua_tocfunction) (lua_State *L, int idx);
152LUA_API void *(lua_touserdata) (lua_State *L, int idx);
153LUA_API lua_State *(lua_tothread) (lua_State *L, int idx);
154LUA_API const void *(lua_topointer) (lua_State *L, int idx);
155
156
157/*
158** push functions (C -> stack)
159*/
160LUA_API void (lua_pushnil) (lua_State *L);
161LUA_API void (lua_pushnumber) (lua_State *L, lua_Number n);
162LUA_API void (lua_pushinteger) (lua_State *L, lua_Integer n);
163LUA_API void (lua_pushlstring) (lua_State *L, const char *s, size_t l);
164LUA_API void (lua_pushstring) (lua_State *L, const char *s);
165LUA_API const char *(lua_pushvfstring) (lua_State *L, const char *fmt,
166 va_list argp);
167LUA_API const char *(lua_pushfstring) (lua_State *L, const char *fmt, ...);
168LUA_API void (lua_pushcclosure) (lua_State *L, lua_CFunction fn, int n);
169LUA_API void (lua_pushboolean) (lua_State *L, int b);
170LUA_API void (lua_pushlightuserdata) (lua_State *L, void *p);
171LUA_API int (lua_pushthread) (lua_State *L);
172
173
174/*
175** get functions (Lua -> stack)
176*/
177LUA_API void (lua_gettable) (lua_State *L, int idx);
178LUA_API void (lua_getfield) (lua_State *L, int idx, const char *k);
179LUA_API void (lua_rawget) (lua_State *L, int idx);
180LUA_API void (lua_rawgeti) (lua_State *L, int idx, int n);
181LUA_API void (lua_createtable) (lua_State *L, int narr, int nrec);
182LUA_API void *(lua_newuserdata) (lua_State *L, size_t sz);
183LUA_API int (lua_getmetatable) (lua_State *L, int objindex);
184LUA_API void (lua_getfenv) (lua_State *L, int idx);
185
186
187/*
188** set functions (stack -> Lua)
189*/
190LUA_API void (lua_settable) (lua_State *L, int idx);
191LUA_API void (lua_setfield) (lua_State *L, int idx, const char *k);
192LUA_API void (lua_rawset) (lua_State *L, int idx);
193LUA_API void (lua_rawseti) (lua_State *L, int idx, int n);
194LUA_API int (lua_setmetatable) (lua_State *L, int objindex);
195LUA_API int (lua_setfenv) (lua_State *L, int idx);
196
197
198/*
199** `load' and `call' functions (load and run Lua code)
200*/
201LUA_API void (lua_call) (lua_State *L, int nargs, int nresults);
202LUA_API int (lua_pcall) (lua_State *L, int nargs, int nresults, int errfunc);
203LUA_API int (lua_cpcall) (lua_State *L, lua_CFunction func, void *ud);
204LUA_API int (lua_load) (lua_State *L, lua_Reader reader, void *dt,
205 const char *chunkname);
206
207LUA_API int (lua_dump) (lua_State *L, lua_Writer writer, void *data);
208
209
210/*
211** coroutine functions
212*/
213LUA_API int (lua_yield) (lua_State *L, int nresults);
214LUA_API int (lua_resume) (lua_State *L, int narg);
215LUA_API int (lua_status) (lua_State *L);
216
217/*
218** garbage-collection function and options
219*/
220
221#define LUA_GCSTOP 0
222#define LUA_GCRESTART 1
223#define LUA_GCCOLLECT 2
224#define LUA_GCCOUNT 3
225#define LUA_GCCOUNTB 4
226#define LUA_GCSTEP 5
227#define LUA_GCSETPAUSE 6
228#define LUA_GCSETSTEPMUL 7
229
230LUA_API int (lua_gc) (lua_State *L, int what, int data);
231
232
233/*
234** miscellaneous functions
235*/
236
237LUA_API int (lua_error) (lua_State *L);
238
239LUA_API int (lua_next) (lua_State *L, int idx);
240
241LUA_API void (lua_concat) (lua_State *L, int n);
242
243LUA_API lua_Alloc (lua_getallocf) (lua_State *L, void **ud);
244LUA_API void lua_setallocf (lua_State *L, lua_Alloc f, void *ud);
245
246
247
248/*
249** ===============================================================
250** some useful macros
251** ===============================================================
252*/
253
254#define lua_pop(L,n) lua_settop(L, -(n)-1)
255
256#define lua_newtable(L) lua_createtable(L, 0, 0)
257
258#define lua_register(L,n,f) (lua_pushcfunction(L, (f)), lua_setglobal(L, (n)))
259
260#define lua_pushcfunction(L,f) lua_pushcclosure(L, (f), 0)
261
262#define lua_strlen(L,i) lua_objlen(L, (i))
263
264#define lua_isfunction(L,n) (lua_type(L, (n)) == LUA_TFUNCTION)
265#define lua_istable(L,n) (lua_type(L, (n)) == LUA_TTABLE)
266#define lua_islightuserdata(L,n) (lua_type(L, (n)) == LUA_TLIGHTUSERDATA)
267#define lua_isnil(L,n) (lua_type(L, (n)) == LUA_TNIL)
268#define lua_isboolean(L,n) (lua_type(L, (n)) == LUA_TBOOLEAN)
269#define lua_isthread(L,n) (lua_type(L, (n)) == LUA_TTHREAD)
270#define lua_isnone(L,n) (lua_type(L, (n)) == LUA_TNONE)
271#define lua_isnoneornil(L, n) (lua_type(L, (n)) <= 0)
272
273#define lua_pushliteral(L, s) \
274 lua_pushlstring(L, "" s, (sizeof(s)/sizeof(char))-1)
275
276#define lua_setglobal(L,s) lua_setfield(L, LUA_GLOBALSINDEX, (s))
277#define lua_getglobal(L,s) lua_getfield(L, LUA_GLOBALSINDEX, (s))
278
279#define lua_tostring(L,i) lua_tolstring(L, (i), NULL)
280
281
282
283/*
284** compatibility macros and functions
285*/
286
287#define lua_open() luaL_newstate()
288
289#define lua_getregistry(L) lua_pushvalue(L, LUA_REGISTRYINDEX)
290
291#define lua_getgccount(L) lua_gc(L, LUA_GCCOUNT, 0)
292
293#define lua_Chunkreader lua_Reader
294#define lua_Chunkwriter lua_Writer
295
296
297/* hack */
298LUA_API void lua_setlevel (lua_State *from, lua_State *to);
299
300
301/*
302** {======================================================================
303** Debug API
304** =======================================================================
305*/
306
307
308/*
309** Event codes
310*/
311#define LUA_HOOKCALL 0
312#define LUA_HOOKRET 1
313#define LUA_HOOKLINE 2
314#define LUA_HOOKCOUNT 3
315#define LUA_HOOKTAILRET 4
316
317
318/*
319** Event masks
320*/
321#define LUA_MASKCALL (1 << LUA_HOOKCALL)
322#define LUA_MASKRET (1 << LUA_HOOKRET)
323#define LUA_MASKLINE (1 << LUA_HOOKLINE)
324#define LUA_MASKCOUNT (1 << LUA_HOOKCOUNT)
325
326typedef struct lua_Debug lua_Debug; /* activation record */
327
328
329/* Functions to be called by the debuger in specific events */
330typedef void (*lua_Hook) (lua_State *L, lua_Debug *ar);
331
332
333LUA_API int lua_getstack (lua_State *L, int level, lua_Debug *ar);
334LUA_API int lua_getinfo (lua_State *L, const char *what, lua_Debug *ar);
335LUA_API const char *lua_getlocal (lua_State *L, const lua_Debug *ar, int n);
336LUA_API const char *lua_setlocal (lua_State *L, const lua_Debug *ar, int n);
337LUA_API const char *lua_getupvalue (lua_State *L, int funcindex, int n);
338LUA_API const char *lua_setupvalue (lua_State *L, int funcindex, int n);
339
340LUA_API int lua_sethook (lua_State *L, lua_Hook func, int mask, int count);
341LUA_API lua_Hook lua_gethook (lua_State *L);
342LUA_API int lua_gethookmask (lua_State *L);
343LUA_API int lua_gethookcount (lua_State *L);
344
345
346struct lua_Debug {
347 int event;
348 const char *name; /* (n) */
349 const char *namewhat; /* (n) `global', `local', `field', `method' */
350 const char *what; /* (S) `Lua', `C', `main', `tail' */
351 const char *source; /* (S) */
352 int currentline; /* (l) */
353 int nups; /* (u) number of upvalues */
354 int linedefined; /* (S) */
355 int lastlinedefined; /* (S) */
356 char short_src[LUA_IDSIZE]; /* (S) */
357 /* private part */
358 int i_ci; /* active function */
359};
360
361/* }====================================================================== */
362
363
364/******************************************************************************
365* Copyright (C) 1994-2008 Lua.org, PUC-Rio. All rights reserved.
366*
367* Permission is hereby granted, free of charge, to any person obtaining
368* a copy of this software and associated documentation files (the
369* "Software"), to deal in the Software without restriction, including
370* without limitation the rights to use, copy, modify, merge, publish,
371* distribute, sublicense, and/or sell copies of the Software, and to
372* permit persons to whom the Software is furnished to do so, subject to
373* the following conditions:
374*
375* The above copyright notice and this permission notice shall be
376* included in all copies or substantial portions of the Software.
377*
378* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
379* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
380* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
381* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
382* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
383* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
384* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
385******************************************************************************/
386
387
388#endif
diff --git a/src/lua.hpp b/src/lua.hpp
new file mode 100644
index 00000000..ec417f59
--- /dev/null
+++ b/src/lua.hpp
@@ -0,0 +1,9 @@
1// lua.hpp
2// Lua header files for C++
3// <<extern "C">> not supplied automatically because Lua also compiles as C++
4
5extern "C" {
6#include "lua.h"
7#include "lualib.h"
8#include "lauxlib.h"
9}
diff --git a/src/luaconf.h b/src/luaconf.h
new file mode 100644
index 00000000..4d4f1099
--- /dev/null
+++ b/src/luaconf.h
@@ -0,0 +1,133 @@
1/*
2** Configuration header.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef luaconf_h
7#define luaconf_h
8
9#include <limits.h>
10#include <stddef.h>
11
12/* Try to determine supported features for a couple of standard platforms. */
13#if defined(_WIN32)
14#define LUA_USE_WIN
15#define LUA_DL_DLL
16#elif defined(__linux__) || defined(__solaris__) || defined(__CYGWIN__) || \
17 defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
18 (defined(__MACH__) && defined(__APPLE__))
19#define LUA_USE_POSIX
20#define LUA_DL_DLOPEN
21#endif
22
23/* Default path for loading Lua and C modules with require(). */
24#ifdef LUA_USE_WIN
25/*
26** In Windows, any exclamation mark ('!') in the path is replaced by the
27** path of the directory of the executable file of the current process.
28*/
29#define LUA_LDIR "!\\lua\\"
30#define LUA_CDIR "!\\"
31#define LUA_PATH_DEFAULT \
32 ".\\?.lua;" LUA_LDIR"?.lua;" LUA_LDIR"?\\init.lua;"
33#define LUA_CPATH_DEFAULT \
34 ".\\?.dll;" LUA_CDIR"?.dll;" LUA_CDIR"loadall.dll"
35#else
36#define LUA_ROOT "/usr/local/"
37#define LUA_JDIR LUA_ROOT "share/luajit-2.0.0-beta1/"
38#define LUA_LDIR LUA_ROOT "share/lua/5.1/"
39#define LUA_CDIR LUA_ROOT "lib/lua/5.1/"
40#define LUA_PATH_DEFAULT \
41 "./?.lua;" LUA_JDIR"?.lua;" LUA_LDIR"?.lua;" LUA_LDIR"?/init.lua;"
42#define LUA_CPATH_DEFAULT \
43 "./?.so;" LUA_CDIR"?.so;" LUA_CDIR"loadall.so"
44#endif
45
46/* Environment variable names for path overrides and initialization code. */
47#define LUA_PATH "LUA_PATH"
48#define LUA_CPATH "LUA_CPATH"
49#define LUA_INIT "LUA_INIT"
50
51/* Special file system characters. */
52#ifdef LUA_USE_WIN
53#define LUA_DIRSEP "\\"
54#else
55#define LUA_DIRSEP "/"
56#endif
57#define LUA_PATHSEP ";"
58#define LUA_PATH_MARK "?"
59#define LUA_EXECDIR "!"
60#define LUA_IGMARK "-"
61#define LUA_PATH_CONFIG \
62 LUA_DIRSEP "\n" LUA_PATHSEP "\n" LUA_PATH_MARK "\n" \
63 LUA_EXECDIR "\n" LUA_IGMARK
64
65/* Quoting in error messages. */
66#define LUA_QL(x) "'" x "'"
67#define LUA_QS LUA_QL("%s")
68
69/* Various tunables. */
70#define LUAI_MAXSTACK 65500 /* Max. # of stack slots for a thread (<64K). */
71#define LUAI_MAXCSTACK 8000 /* Max. # of stack slots for a C func (<10K). */
72#define LUAI_GCPAUSE 200 /* Pause GC until memory is at 200%. */
73#define LUAI_GCMUL 200 /* Run GC at 200% of allocation speed. */
74#define LUA_MAXCAPTURES 32 /* Max. pattern captures. */
75
76/* Compatibility with older library function names. */
77#define LUA_COMPAT_MOD /* OLD: math.mod, NEW: math.fmod */
78#define LUA_COMPAT_GFIND /* OLD: string.gfind, NEW: string.gmatch */
79
80/* Configuration for the frontend (the luajit executable). */
81#if defined(luajit_c)
82#define LUA_PROGNAME "luajit" /* Fallback frontend name. */
83#define LUA_PROMPT "> " /* Interactive prompt. */
84#define LUA_PROMPT2 ">> " /* Continuation prompt. */
85#define LUA_MAXINPUT 512 /* Max. input line length. */
86#endif
87
88/* Note: changing the following defines breaks the Lua 5.1 ABI. */
89#define LUA_INTEGER ptrdiff_t
90#define LUA_IDSIZE 60 /* Size of lua_Debug.short_src. */
91#define LUAL_BUFFERSIZE BUFSIZ /* Size of lauxlib and io.* buffers. */
92
93/* The following defines are here only for compatibility with luaconf.h
94** from the standard Lua distribution. They must not be changed for LuaJIT.
95*/
96#define LUA_NUMBER_DOUBLE
97#define LUA_NUMBER double
98#define LUAI_UACNUMBER double
99#define LUA_NUMBER_SCAN "%lf"
100#define LUA_NUMBER_FMT "%.14g"
101#define lua_number2str(s, n) sprintf((s), LUA_NUMBER_FMT, (n))
102#define LUAI_MAXNUMBER2STR 32
103#define lua_str2number(s, p) strtod((s), (p))
104#define LUA_INTFRMLEN "l"
105#define LUA_INTFRM_T long
106
107/* Linkage of public API functions. */
108#if defined(LUA_BUILD_AS_DLL)
109#if defined(LUA_CORE) || defined(LUA_LIB)
110#define LUA_API __declspec(dllexport)
111#else
112#define LUA_API __declspec(dllimport)
113#endif
114#else
115#define LUA_API extern
116#endif
117
118#define LUALIB_API LUA_API
119
120/* Support for internal assertions. */
121#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK)
122#include <assert.h>
123#endif
124#ifdef LUA_USE_ASSERT
125#define lua_assert(x) assert(x)
126#endif
127#ifdef LUA_USE_APICHECK
128#define luai_apicheck(L, o) { (void)L; assert(o); }
129#else
130#define luai_apicheck(L, o) { (void)L; }
131#endif
132
133#endif
diff --git a/src/luajit.c b/src/luajit.c
new file mode 100644
index 00000000..9153975b
--- /dev/null
+++ b/src/luajit.c
@@ -0,0 +1,519 @@
1/*
2** LuaJIT frontend. Runs commands, scripts, read-eval-print (REPL) etc.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Major portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#include <signal.h>
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13
14#define luajit_c
15
16#include "lua.h"
17#include "lauxlib.h"
18#include "lualib.h"
19#include "luajit.h"
20
21#if defined(LUA_USE_POSIX)
22#include <unistd.h>
23#define lua_stdin_is_tty() isatty(0)
24#elif defined(LUA_USE_WIN)
25#include <io.h>
26#ifdef __BORLANDC__
27#define lua_stdin_is_tty() isatty(_fileno(stdin))
28#else
29#define lua_stdin_is_tty() _isatty(_fileno(stdin))
30#endif
31#else
32#define lua_stdin_is_tty() 1
33#endif
34
35static lua_State *globalL = NULL;
36static const char *progname = LUA_PROGNAME;
37
38static void lstop(lua_State *L, lua_Debug *ar)
39{
40 (void)ar; /* unused arg. */
41 lua_sethook(L, NULL, 0, 0);
42 /* Avoid luaL_error -- a C hook doesn't add an extra frame. */
43 luaL_where(L, 0);
44 lua_pushfstring(L, "%sinterrupted!", lua_tostring(L, -1));
45 lua_error(L);
46}
47
48static void laction(int i)
49{
50 signal(i, SIG_DFL); /* if another SIGINT happens before lstop,
51 terminate process (default action) */
52 lua_sethook(globalL, lstop, LUA_MASKCALL | LUA_MASKRET | LUA_MASKCOUNT, 1);
53}
54
55static void print_usage(void)
56{
57 fprintf(stderr,
58 "usage: %s [options] [script [args]].\n"
59 "Available options are:\n"
60 " -e stat execute string " LUA_QL("stat") "\n"
61 " -l name require library " LUA_QL("name") "\n"
62 " -j cmd perform LuaJIT control command\n"
63 " -O[lvl] set LuaJIT optimization level\n"
64 " -i enter interactive mode after executing " LUA_QL("script") "\n"
65 " -v show version information\n"
66 " -- stop handling options\n"
67 " - execute stdin and stop handling options\n"
68 ,
69 progname);
70 fflush(stderr);
71}
72
73static void l_message(const char *pname, const char *msg)
74{
75 if (pname) fprintf(stderr, "%s: ", pname);
76 fprintf(stderr, "%s\n", msg);
77 fflush(stderr);
78}
79
80static int report(lua_State *L, int status)
81{
82 if (status && !lua_isnil(L, -1)) {
83 const char *msg = lua_tostring(L, -1);
84 if (msg == NULL) msg = "(error object is not a string)";
85 l_message(progname, msg);
86 lua_pop(L, 1);
87 }
88 return status;
89}
90
91static int traceback(lua_State *L)
92{
93 if (!lua_isstring(L, 1)) /* 'message' not a string? */
94 return 1; /* keep it intact */
95 lua_getfield(L, LUA_GLOBALSINDEX, "debug");
96 if (!lua_istable(L, -1)) {
97 lua_pop(L, 1);
98 return 1;
99 }
100 lua_getfield(L, -1, "traceback");
101 if (!lua_isfunction(L, -1)) {
102 lua_pop(L, 2);
103 return 1;
104 }
105 lua_pushvalue(L, 1); /* pass error message */
106 lua_pushinteger(L, 2); /* skip this function and traceback */
107 lua_call(L, 2, 1); /* call debug.traceback */
108 return 1;
109}
110
111static int docall(lua_State *L, int narg, int clear)
112{
113 int status;
114 int base = lua_gettop(L) - narg; /* function index */
115 lua_pushcfunction(L, traceback); /* push traceback function */
116 lua_insert(L, base); /* put it under chunk and args */
117 signal(SIGINT, laction);
118 status = lua_pcall(L, narg, (clear ? 0 : LUA_MULTRET), base);
119 signal(SIGINT, SIG_DFL);
120 lua_remove(L, base); /* remove traceback function */
121 /* force a complete garbage collection in case of errors */
122 if (status != 0) lua_gc(L, LUA_GCCOLLECT, 0);
123 return status;
124}
125
126static void print_version(void)
127{
128 fprintf(stderr,
129 LUAJIT_VERSION " -- " LUAJIT_COPYRIGHT ". " LUAJIT_URL "\n");
130}
131
132static void print_jit_status(lua_State *L)
133{
134 int n;
135 const char *s;
136 lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED");
137 lua_getfield(L, -1, "jit"); /* Get jit.* module table. */
138 lua_remove(L, -2);
139 lua_getfield(L, -1, "status");
140 lua_remove(L, -2);
141 n = lua_gettop(L);
142 lua_call(L, 0, LUA_MULTRET);
143 fputs(lua_toboolean(L, n) ? "JIT: ON" : "JIT: OFF", stderr);
144 for (n++; (s = lua_tostring(L, n)); n++)
145 fprintf(stderr, " %s", s);
146 fputs("\n", stdout);
147}
148
149static int getargs(lua_State *L, char **argv, int n)
150{
151 int narg;
152 int i;
153 int argc = 0;
154 while (argv[argc]) argc++; /* count total number of arguments */
155 narg = argc - (n + 1); /* number of arguments to the script */
156 luaL_checkstack(L, narg + 3, "too many arguments to script");
157 for (i = n+1; i < argc; i++)
158 lua_pushstring(L, argv[i]);
159 lua_createtable(L, narg, n + 1);
160 for (i = 0; i < argc; i++) {
161 lua_pushstring(L, argv[i]);
162 lua_rawseti(L, -2, i - n);
163 }
164 return narg;
165}
166
167static int dofile(lua_State *L, const char *name)
168{
169 int status = luaL_loadfile(L, name) || docall(L, 0, 1);
170 return report(L, status);
171}
172
173static int dostring(lua_State *L, const char *s, const char *name)
174{
175 int status = luaL_loadbuffer(L, s, strlen(s), name) || docall(L, 0, 1);
176 return report(L, status);
177}
178
179static int dolibrary(lua_State *L, const char *name)
180{
181 lua_getglobal(L, "require");
182 lua_pushstring(L, name);
183 return report(L, docall(L, 1, 1));
184}
185
186static void write_prompt(lua_State *L, int firstline)
187{
188 const char *p;
189 lua_getfield(L, LUA_GLOBALSINDEX, firstline ? "_PROMPT" : "_PROMPT2");
190 p = lua_tostring(L, -1);
191 if (p == NULL) p = firstline ? LUA_PROMPT : LUA_PROMPT2;
192 fputs(p, stdout);
193 fflush(stdout);
194 lua_pop(L, 1); /* remove global */
195}
196
197static int incomplete(lua_State *L, int status)
198{
199 if (status == LUA_ERRSYNTAX) {
200 size_t lmsg;
201 const char *msg = lua_tolstring(L, -1, &lmsg);
202 const char *tp = msg + lmsg - (sizeof(LUA_QL("<eof>")) - 1);
203 if (strstr(msg, LUA_QL("<eof>")) == tp) {
204 lua_pop(L, 1);
205 return 1;
206 }
207 }
208 return 0; /* else... */
209}
210
211static int pushline(lua_State *L, int firstline)
212{
213 char buf[LUA_MAXINPUT];
214 write_prompt(L, firstline);
215 if (fgets(buf, LUA_MAXINPUT, stdin)) {
216 size_t len = strlen(buf);
217 if (len > 0 && buf[len-1] == '\n')
218 buf[len-1] = '\0';
219 if (firstline && buf[0] == '=')
220 lua_pushfstring(L, "return %s", buf+1);
221 else
222 lua_pushstring(L, buf);
223 return 1;
224 }
225 return 0;
226}
227
228static int loadline(lua_State *L)
229{
230 int status;
231 lua_settop(L, 0);
232 if (!pushline(L, 1))
233 return -1; /* no input */
234 for (;;) { /* repeat until gets a complete line */
235 status = luaL_loadbuffer(L, lua_tostring(L, 1), lua_strlen(L, 1), "=stdin");
236 if (!incomplete(L, status)) break; /* cannot try to add lines? */
237 if (!pushline(L, 0)) /* no more input? */
238 return -1;
239 lua_pushliteral(L, "\n"); /* add a new line... */
240 lua_insert(L, -2); /* ...between the two lines */
241 lua_concat(L, 3); /* join them */
242 }
243 lua_remove(L, 1); /* remove line */
244 return status;
245}
246
247static void dotty(lua_State *L)
248{
249 int status;
250 const char *oldprogname = progname;
251 progname = NULL;
252 while ((status = loadline(L)) != -1) {
253 if (status == 0) status = docall(L, 0, 0);
254 report(L, status);
255 if (status == 0 && lua_gettop(L) > 0) { /* any result to print? */
256 lua_getglobal(L, "print");
257 lua_insert(L, 1);
258 if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0)
259 l_message(progname,
260 lua_pushfstring(L, "error calling " LUA_QL("print") " (%s)",
261 lua_tostring(L, -1)));
262 }
263 }
264 lua_settop(L, 0); /* clear stack */
265 fputs("\n", stdout);
266 fflush(stdout);
267 progname = oldprogname;
268}
269
270static int handle_script(lua_State *L, char **argv, int n)
271{
272 int status;
273 const char *fname;
274 int narg = getargs(L, argv, n); /* collect arguments */
275 lua_setglobal(L, "arg");
276 fname = argv[n];
277 if (strcmp(fname, "-") == 0 && strcmp(argv[n-1], "--") != 0)
278 fname = NULL; /* stdin */
279 status = luaL_loadfile(L, fname);
280 lua_insert(L, -(narg+1));
281 if (status == 0)
282 status = docall(L, narg, 0);
283 else
284 lua_pop(L, narg);
285 return report(L, status);
286}
287
288/* Load add-on module. */
289static int loadjitmodule(lua_State *L, const char *notfound)
290{
291 lua_getglobal(L, "require");
292 lua_pushliteral(L, "jit.");
293 lua_pushvalue(L, -3);
294 lua_concat(L, 2);
295 if (lua_pcall(L, 1, 1, 0)) {
296 const char *msg = lua_tostring(L, -1);
297 if (msg && !strncmp(msg, "module ", 7)) {
298 err:
299 l_message(progname, notfound);
300 return 1;
301 } else {
302 return report(L, 1);
303 }
304 }
305 lua_getfield(L, -1, "start");
306 if (lua_isnil(L, -1)) goto err;
307 lua_remove(L, -2); /* Drop module table. */
308 return 0;
309}
310
311/* Run command with options. */
312static int runcmdopt(lua_State *L, const char *opt)
313{
314 int narg = 0;
315 if (opt && *opt) {
316 for (;;) { /* Split arguments. */
317 const char *p = strchr(opt, ',');
318 narg++;
319 if (!p) break;
320 if (p == opt)
321 lua_pushnil(L);
322 else
323 lua_pushlstring(L, opt, (size_t)(p - opt));
324 opt = p + 1;
325 }
326 if (*opt)
327 lua_pushstring(L, opt);
328 else
329 lua_pushnil(L);
330 }
331 return report(L, lua_pcall(L, narg, 0, 0));
332}
333
334/* JIT engine control command: try jit library first or load add-on module. */
335static int dojitcmd(lua_State *L, const char *cmd)
336{
337 const char *opt = strchr(cmd, '=');
338 lua_pushlstring(L, cmd, opt ? (size_t)(opt - cmd) : strlen(cmd));
339 lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED");
340 lua_getfield(L, -1, "jit"); /* Get jit.* module table. */
341 lua_remove(L, -2);
342 lua_pushvalue(L, -2);
343 lua_gettable(L, -2); /* Lookup library function. */
344 if (!lua_isfunction(L, -1)) {
345 lua_pop(L, 2); /* Drop non-function and jit.* table, keep module name. */
346 if (loadjitmodule(L, "unknown luaJIT command"))
347 return 1;
348 } else {
349 lua_remove(L, -2); /* Drop jit.* table. */
350 }
351 lua_remove(L, -2); /* Drop module name. */
352 return runcmdopt(L, opt ? opt+1 : opt);
353}
354
355/* Optimization flags. */
356static int dojitopt(lua_State *L, const char *opt)
357{
358 lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED");
359 lua_getfield(L, -1, "jit.opt"); /* Get jit.opt.* module table. */
360 lua_remove(L, -2);
361 lua_getfield(L, -1, "start");
362 lua_remove(L, -2);
363 return runcmdopt(L, opt);
364}
365
366/* check that argument has no extra characters at the end */
367#define notail(x) {if ((x)[2] != '\0') return -1;}
368
369static int collectargs(char **argv, int *pi, int *pv, int *pe)
370{
371 int i;
372 for (i = 1; argv[i] != NULL; i++) {
373 if (argv[i][0] != '-') /* not an option? */
374 return i;
375 switch (argv[i][1]) { /* option */
376 case '-':
377 notail(argv[i]);
378 return (argv[i+1] != NULL ? i+1 : 0);
379 case '\0':
380 return i;
381 case 'i':
382 notail(argv[i]);
383 *pi = 1; /* go through */
384 case 'v':
385 notail(argv[i]);
386 *pv = 1;
387 break;
388 case 'e':
389 *pe = 1; /* go through */
390 case 'j': /* LuaJIT extension */
391 case 'l':
392 if (argv[i][2] == '\0') {
393 i++;
394 if (argv[i] == NULL) return -1;
395 }
396 break;
397 case 'O': break; /* LuaJIT extension */
398 default: return -1; /* invalid option */
399 }
400 }
401 return 0;
402}
403
404static int runargs(lua_State *L, char **argv, int n)
405{
406 int i;
407 for (i = 1; i < n; i++) {
408 if (argv[i] == NULL) continue;
409 lua_assert(argv[i][0] == '-');
410 switch (argv[i][1]) { /* option */
411 case 'e': {
412 const char *chunk = argv[i] + 2;
413 if (*chunk == '\0') chunk = argv[++i];
414 lua_assert(chunk != NULL);
415 if (dostring(L, chunk, "=(command line)") != 0)
416 return 1;
417 break;
418 }
419 case 'l': {
420 const char *filename = argv[i] + 2;
421 if (*filename == '\0') filename = argv[++i];
422 lua_assert(filename != NULL);
423 if (dolibrary(L, filename))
424 return 1; /* stop if file fails */
425 break;
426 }
427 case 'j': { /* LuaJIT extension */
428 const char *cmd = argv[i] + 2;
429 if (*cmd == '\0') cmd = argv[++i];
430 lua_assert(cmd != NULL);
431 if (dojitcmd(L, cmd))
432 return 1;
433 break;
434 }
435 case 'O': /* LuaJIT extension */
436 if (dojitopt(L, argv[i] + 2))
437 return 1;
438 break;
439 default: break;
440 }
441 }
442 return 0;
443}
444
445static int handle_luainit(lua_State *L)
446{
447 const char *init = getenv(LUA_INIT);
448 if (init == NULL)
449 return 0; /* status OK */
450 else if (init[0] == '@')
451 return dofile(L, init+1);
452 else
453 return dostring(L, init, "=" LUA_INIT);
454}
455
456struct Smain {
457 int argc;
458 char **argv;
459 int status;
460};
461
462static int pmain(lua_State *L)
463{
464 struct Smain *s = (struct Smain *)lua_touserdata(L, 1);
465 char **argv = s->argv;
466 int script;
467 int has_i = 0, has_v = 0, has_e = 0;
468 globalL = L;
469 if (argv[0] && argv[0][0]) progname = argv[0];
470 LUAJIT_VERSION_SYM(); /* linker-enforced version check */
471 lua_gc(L, LUA_GCSTOP, 0); /* stop collector during initialization */
472 luaL_openlibs(L); /* open libraries */
473 lua_gc(L, LUA_GCRESTART, 0);
474 s->status = handle_luainit(L);
475 if (s->status != 0) return 0;
476 script = collectargs(argv, &has_i, &has_v, &has_e);
477 if (script < 0) { /* invalid args? */
478 print_usage();
479 s->status = 1;
480 return 0;
481 }
482 if (has_v) print_version();
483 s->status = runargs(L, argv, (script > 0) ? script : s->argc);
484 if (s->status != 0) return 0;
485 if (script)
486 s->status = handle_script(L, argv, script);
487 if (s->status != 0) return 0;
488 if (has_i) {
489 print_jit_status(L);
490 dotty(L);
491 } else if (script == 0 && !has_e && !has_v) {
492 if (lua_stdin_is_tty()) {
493 print_version();
494 print_jit_status(L);
495 dotty(L);
496 } else {
497 dofile(L, NULL); /* executes stdin as a file */
498 }
499 }
500 return 0;
501}
502
503int main(int argc, char **argv)
504{
505 int status;
506 struct Smain s;
507 lua_State *L = lua_open(); /* create state */
508 if (L == NULL) {
509 l_message(argv[0], "cannot create state: not enough memory");
510 return EXIT_FAILURE;
511 }
512 s.argc = argc;
513 s.argv = argv;
514 status = lua_cpcall(L, pmain, &s);
515 report(L, status);
516 lua_close(L);
517 return (status || s.status) ? EXIT_FAILURE : EXIT_SUCCESS;
518}
519
diff --git a/src/luajit.h b/src/luajit.h
new file mode 100644
index 00000000..01913755
--- /dev/null
+++ b/src/luajit.h
@@ -0,0 +1,68 @@
1/*
2** LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/
3**
4** Copyright (C) 2005-2009 Mike Pall. All rights reserved.
5**
6** Permission is hereby granted, free of charge, to any person obtaining
7** a copy of this software and associated documentation files (the
8** "Software"), to deal in the Software without restriction, including
9** without limitation the rights to use, copy, modify, merge, publish,
10** distribute, sublicense, and/or sell copies of the Software, and to
11** permit persons to whom the Software is furnished to do so, subject to
12** the following conditions:
13**
14** The above copyright notice and this permission notice shall be
15** included in all copies or substantial portions of the Software.
16**
17** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24**
25** [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
26*/
27
28#ifndef _LUAJIT_H
29#define _LUAJIT_H
30
31#include "lua.h"
32
33#define LUAJIT_VERSION "LuaJIT 2.0.0-beta1"
34#define LUAJIT_VERSION_NUM 20000 /* Version 2.0.0 = 02.00.00. */
35#define LUAJIT_VERSION_SYM luaJIT_version_2_0_0_beta1
36#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2009 Mike Pall"
37#define LUAJIT_URL "http://luajit.org/"
38
39/* Modes for luaJIT_setmode. */
40#define LUAJIT_MODE_MASK 0x00ff
41
42enum {
43 LUAJIT_MODE_ENGINE, /* Set mode for whole JIT engine. */
44 LUAJIT_MODE_DEBUG, /* Set debug mode (idx = level). */
45
46 LUAJIT_MODE_FUNC, /* Change mode for a function. */
47 LUAJIT_MODE_ALLFUNC, /* Recurse into subroutine protos. */
48 LUAJIT_MODE_ALLSUBFUNC, /* Change only the subroutines. */
49
50 LUAJIT_MODE_TRACE, /* Flush a compiled trace. */
51
52 LUAJIT_MODE_MAX
53};
54
55/* Flags or'ed in to the mode. */
56#define LUAJIT_MODE_OFF 0x0000 /* Disable JIT compilation. */
57#define LUAJIT_MODE_ON 0x0100 /* (Re-)enable JIT compilation. */
58#define LUAJIT_MODE_FLUSH 0x0200 /* Flush JIT-compiled code. */
59
60/* LuaJIT public C API. */
61
62/* Control the JIT engine. */
63LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode);
64
65/* Enforce (dynamic) linker error for version mismatches. Call from main. */
66LUA_API void LUAJIT_VERSION_SYM(void);
67
68#endif
diff --git a/src/lualib.h b/src/lualib.h
new file mode 100644
index 00000000..c1ceb613
--- /dev/null
+++ b/src/lualib.h
@@ -0,0 +1,41 @@
1/*
2** Standard library header.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LUALIB_H
7#define _LUALIB_H
8
9#include "lua.h"
10
11#define LUA_FILEHANDLE "FILE*"
12
13#define LUA_COLIBNAME "coroutine"
14#define LUA_MATHLIBNAME "math"
15#define LUA_STRLIBNAME "string"
16#define LUA_TABLIBNAME "table"
17#define LUA_IOLIBNAME "io"
18#define LUA_OSLIBNAME "os"
19#define LUA_LOADLIBNAME "package"
20#define LUA_DBLIBNAME "debug"
21#define LUA_BITLIBNAME "bit"
22#define LUA_JITLIBNAME "jit"
23
24LUALIB_API int luaopen_base(lua_State *L);
25LUALIB_API int luaopen_math(lua_State *L);
26LUALIB_API int luaopen_string(lua_State *L);
27LUALIB_API int luaopen_table(lua_State *L);
28LUALIB_API int luaopen_io(lua_State *L);
29LUALIB_API int luaopen_os(lua_State *L);
30LUALIB_API int luaopen_package(lua_State *L);
31LUALIB_API int luaopen_debug(lua_State *L);
32LUALIB_API int luaopen_bit(lua_State *L);
33LUALIB_API int luaopen_jit(lua_State *L);
34
35LUALIB_API void luaL_openlibs(lua_State *L);
36
37#ifndef lua_assert
38#define lua_assert(x) ((void)0)
39#endif
40
41#endif
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat
new file mode 100644
index 00000000..8bdc4d8a
--- /dev/null
+++ b/src/msvcbuild.bat
@@ -0,0 +1,53 @@
1@rem Script to build LuaJIT with MSVC.
2@rem Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
3@rem
4@rem Open a "Visual Studio .NET Command Prompt", cd to this directory
5@rem and run this script.
6
7@if not defined INCLUDE goto :FAIL
8
9@setlocal
10@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE
11@set LJLINK=link /nologo
12@set LJMT=mt /nologo
13@set DASMDIR=..\dynasm
14@set DASM=lua %DASMDIR%\dynasm.lua
15@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c
16
17if not exist buildvm_x86.h^
18 %DASM% -LN -o buildvm_x86.h buildvm_x86.dasc
19
20%LJCOMPILE% /I "." /I %DASMDIR% buildvm*.c
21%LJLINK% /out:buildvm.exe buildvm*.obj
22if exist buildvm.exe.manifest^
23 %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
24
25buildvm -m peobj -o lj_vm.obj
26buildvm -m ffdef -o lj_ffdef.h %ALL_LIB%
27buildvm -m libdef -o lj_libdef.h %ALL_LIB%
28buildvm -m recdef -o lj_recdef.h %ALL_LIB%
29buildvm -m vmdef -o ..\lib\vmdef.lua %ALL_LIB%
30buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
31
32@if "%1"=="amalg" goto :AMALGDLL
33%LJCOMPILE% /DLUA_BUILD_AS_DLL lj_*.c lib_*.c
34%LJLINK% /DLL /out:lua51.dll lj_*.obj lib_*.obj
35@goto :MTDLL
36:AMALGDLL
37%LJCOMPILE% /DLUA_BUILD_AS_DLL ljamalg.c
38%LJLINK% /DLL /out:lua51.dll ljamalg.obj lj_vm.obj
39:MTDLL
40if exist lua51.dll.manifest^
41 %LJMT% -manifest lua51.dll.manifest -outputresource:lua51.dll;2
42
43%LJCOMPILE% luajit.c
44%LJLINK% /out:luajit.exe luajit.obj lua51.lib
45if exist luajit.exe.manifest^
46 %LJMT% -manifest luajit.exe.manifest -outputresource:luajit.exe
47
48del *.obj *.manifest buildvm.exe
49
50@goto :END
51:FAIL
52@echo You must open a "Visual Studio .NET Command Prompt" to run this script
53:END